From 1cff11974080f47864d56ef97db228c51fcb8a0c Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Wed, 5 Aug 2020 13:55:12 +0300 Subject: habanalabs: verify user input in cs_ioctl_signal_wait User input must be validated before using it to access internal structures. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_submission.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/misc/habanalabs/common/command_submission.c') diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index b9840e368eb5..2e3fcbc794db 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -808,6 +808,14 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, /* currently it is guaranteed to have only one chunk */ chunk = &cs_chunk_array[0]; + + if (chunk->queue_index >= hdev->asic_prop.max_queues) { + dev_err(hdev->dev, "Queue index %d is invalid\n", + chunk->queue_index); + rc = -EINVAL; + goto free_cs_chunk_array; + } + q_idx = chunk->queue_index; hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; q_type = hw_queue_prop->type; -- cgit v1.2.3-59-g8ed1b From a98d73c7fae486f7fea83bdec8599d4fccb6807d Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Wed, 15 Jul 2020 08:52:39 +0300 Subject: habanalabs: Replace dma-fence mechanism with completions habanalabs driver uses dma-fence mechanism for synchronization. dma-fence mechanism was designed solely for GPUs, hence we purpose a simpler mechanism based on completions to replace current dma-fence objects. Signed-off-by: Ofir Bitton Cc: Greg Kroah-Hartman Cc: Daniel Vetter Reviewed-by: Oded Gabbay Reviewed-by: Daniel Vetter Signed-off-by: Oded Gabbay --- .../misc/habanalabs/common/command_submission.c | 95 +++++++++++----------- drivers/misc/habanalabs/common/context.c | 13 +-- drivers/misc/habanalabs/common/habanalabs.h | 30 +++++-- drivers/misc/habanalabs/common/hw_queue.c | 2 +- 4 files changed, 77 insertions(+), 63 deletions(-) (limited to 'drivers/misc/habanalabs/common/command_submission.c') diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 2e3fcbc794db..00c1ff2e953d 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -38,26 +38,10 @@ void hl_sob_reset_error(struct kref *ref) hw_sob->q_idx, hw_sob->sob_id); } -static const char *hl_fence_get_driver_name(struct dma_fence *fence) -{ - return "HabanaLabs"; -} - -static const char *hl_fence_get_timeline_name(struct dma_fence *fence) -{ - struct hl_cs_compl *hl_cs_compl = - container_of(fence, struct hl_cs_compl, base_fence); - - return dev_name(hl_cs_compl->hdev->dev); -} - -static bool hl_fence_enable_signaling(struct dma_fence *fence) -{ - return true; -} - -static void hl_fence_release(struct dma_fence *fence) +static void hl_fence_release(struct kref *kref) { + struct hl_fence *fence = + container_of(kref, struct hl_fence, refcount); struct hl_cs_compl *hl_cs_cmpl = container_of(fence, struct hl_cs_compl, base_fence); struct hl_device *hdev = hl_cs_cmpl->hdev; @@ -99,15 +83,27 @@ static void hl_fence_release(struct dma_fence *fence) } free: - kfree_rcu(hl_cs_cmpl, base_fence.rcu); + kfree(hl_cs_cmpl); } -static const struct dma_fence_ops hl_fence_ops = { - .get_driver_name = hl_fence_get_driver_name, - .get_timeline_name = hl_fence_get_timeline_name, - .enable_signaling = hl_fence_enable_signaling, - .release = hl_fence_release -}; +void hl_fence_put(struct hl_fence *fence) +{ + if (fence) + kref_put(&fence->refcount, hl_fence_release); +} + +void hl_fence_get(struct hl_fence *fence) +{ + if (fence) + kref_get(&fence->refcount); +} + +static void hl_fence_init(struct hl_fence *fence) +{ + kref_init(&fence->refcount); + fence->error = 0; + init_completion(&fence->completion); +} static void cs_get(struct hl_cs *cs) { @@ -336,7 +332,7 @@ static void cs_do_release(struct kref *ref) * In case the wait for signal CS was submitted, the put occurs * in init_signal_wait_cs() right before hanging on the PQ. */ - dma_fence_put(cs->signal_fence); + hl_fence_put(cs->signal_fence); } /* @@ -348,19 +344,18 @@ static void cs_do_release(struct kref *ref) hl_ctx_put(cs->ctx); /* We need to mark an error for not submitted because in that case - * the dma fence release flow is different. Mainly, we don't need + * the hl fence release flow is different. Mainly, we don't need * to handle hw_sob for signal/wait */ if (cs->timedout) - dma_fence_set_error(cs->fence, -ETIMEDOUT); + cs->fence->error = -ETIMEDOUT; else if (cs->aborted) - dma_fence_set_error(cs->fence, -EIO); + cs->fence->error = -EIO; else if (!cs->submitted) - dma_fence_set_error(cs->fence, -EBUSY); - - dma_fence_signal(cs->fence); - dma_fence_put(cs->fence); + cs->fence->error = -EBUSY; + complete_all(&cs->fence->completion); + hl_fence_put(cs->fence); cs_counters_aggregate(hdev, cs->ctx); kfree(cs->jobs_in_queue_cnt); @@ -401,7 +396,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, enum hl_cs_type cs_type, struct hl_cs **cs_new) { struct hl_cs_compl *cs_cmpl; - struct dma_fence *other = NULL; + struct hl_fence *other = NULL; struct hl_cs *cs; int rc; @@ -434,7 +429,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, cs_cmpl->cs_seq = ctx->cs_sequence; other = ctx->cs_pending[cs_cmpl->cs_seq & (hdev->asic_prop.max_pending_cs - 1)]; - if ((other) && (!dma_fence_is_signaled(other))) { + + if (other && !completion_done(&other->completion)) { dev_dbg(hdev->dev, "Rejecting CS because of too many in-flights CS\n"); rc = -EAGAIN; @@ -448,8 +444,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, goto free_fence; } - dma_fence_init(&cs_cmpl->base_fence, &hl_fence_ops, &cs_cmpl->lock, - ctx->asid, ctx->cs_sequence); + /* init hl_fence */ + hl_fence_init(&cs_cmpl->base_fence); cs->sequence = cs_cmpl->cs_seq; @@ -458,9 +454,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, &cs_cmpl->base_fence; ctx->cs_sequence++; - dma_fence_get(&cs_cmpl->base_fence); + hl_fence_get(&cs_cmpl->base_fence); - dma_fence_put(other); + hl_fence_put(other); spin_unlock(&ctx->cs_lock); @@ -773,7 +769,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, struct hl_ctx *ctx = hpriv->ctx; struct hl_cs_chunk *cs_chunk_array, *chunk; struct hw_queue_properties *hw_queue_prop; - struct dma_fence *sig_fence = NULL; + struct hl_fence *sig_fence = NULL; struct hl_cs_job *job; struct hl_cs *cs; struct hl_cb *cb; @@ -883,14 +879,14 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, dev_err(hdev->dev, "CS seq 0x%llx is not of a signal CS\n", signal_seq); - dma_fence_put(sig_fence); + hl_fence_put(sig_fence); rc = -EINVAL; goto free_signal_seq_array; } - if (dma_fence_is_signaled(sig_fence)) { + if (completion_done(&sig_fence->completion)) { /* signal CS already finished */ - dma_fence_put(sig_fence); + hl_fence_put(sig_fence); rc = 0; goto free_signal_seq_array; } @@ -902,7 +898,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, rc = allocate_cs(hdev, ctx, cs_type, &cs); if (rc) { if (cs_type == CS_TYPE_WAIT) - dma_fence_put(sig_fence); + hl_fence_put(sig_fence); hl_ctx_put(ctx); goto free_signal_seq_array; } @@ -1162,7 +1158,7 @@ out: static long _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq) { - struct dma_fence *fence; + struct hl_fence *fence; unsigned long timeout; long rc; @@ -1181,12 +1177,15 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev, "Can't wait on CS %llu because current CS is at seq %llu\n", seq, ctx->cs_sequence); } else if (fence) { - rc = dma_fence_wait_timeout(fence, true, timeout); + rc = wait_for_completion_interruptible_timeout( + &fence->completion, timeout); + if (fence->error == -ETIMEDOUT) rc = -ETIMEDOUT; else if (fence->error == -EIO) rc = -EIO; - dma_fence_put(fence); + + hl_fence_put(fence); } else { dev_dbg(hdev->dev, "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n", diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c index 3e375958e73b..b168a9fce817 100644 --- a/drivers/misc/habanalabs/common/context.c +++ b/drivers/misc/habanalabs/common/context.c @@ -23,7 +23,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx) */ for (i = 0 ; i < hdev->asic_prop.max_pending_cs ; i++) - dma_fence_put(ctx->cs_pending[i]); + hl_fence_put(ctx->cs_pending[i]); kfree(ctx->cs_pending); @@ -128,7 +128,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) atomic_set(&ctx->thread_ctx_switch_token, 1); ctx->thread_ctx_switch_wait_token = 0; ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs, - sizeof(struct dma_fence *), + sizeof(struct hl_fence *), GFP_KERNEL); if (!ctx->cs_pending) return -ENOMEM; @@ -184,10 +184,10 @@ int hl_ctx_put(struct hl_ctx *ctx) return kref_put(&ctx->refcount, hl_ctx_do_release); } -struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq) +struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq) { struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop; - struct dma_fence *fence; + struct hl_fence *fence; spin_lock(&ctx->cs_lock); @@ -201,8 +201,9 @@ struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq) return NULL; } - fence = dma_fence_get( - ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)]); + fence = ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)]; + hl_fence_get(fence); + spin_unlock(&ctx->cs_lock); return fence; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 278c45a0b255..f97eebc64979 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -342,9 +341,22 @@ struct asic_fixed_properties { u8 completion_queues_count; }; +/** + * struct hl_fence - software synchronization primitive + * @completion: fence is implemented using completion + * @refcount: refcount for this fence + * @error: mark this fence with error + * + */ +struct hl_fence { + struct completion completion; + struct kref refcount; + int error; +}; + /** * struct hl_cs_compl - command submission completion object. - * @base_fence: kernel fence object. + * @base_fence: hl fence object. * @lock: spinlock to protect fence. * @hdev: habanalabs device structure. * @hw_sob: the H/W SOB used in this signal/wait CS. @@ -353,7 +365,7 @@ struct asic_fixed_properties { * @sob_val: the SOB value that is used in this signal/wait CS. */ struct hl_cs_compl { - struct dma_fence base_fence; + struct hl_fence base_fence; spinlock_t lock; struct hl_device *hdev; struct hl_hw_sob *hw_sob; @@ -800,7 +812,7 @@ struct hl_va_range { * @hdev: pointer to the device structure. * @refcount: reference counter for the context. Context is released only when * this hits 0l. It is incremented on CS and CS_WAIT. - * @cs_pending: array of DMA fence objects representing pending CS. + * @cs_pending: array of hl fence objects representing pending CS. * @host_va_range: holds available virtual addresses for host mappings. * @host_huge_va_range: holds available virtual addresses for host mappings * with huge pages. @@ -832,7 +844,7 @@ struct hl_ctx { struct hl_fpriv *hpriv; struct hl_device *hdev; struct kref refcount; - struct dma_fence **cs_pending; + struct hl_fence **cs_pending; struct hl_va_range *host_va_range; struct hl_va_range *host_huge_va_range; struct hl_va_range *dram_va_range; @@ -919,8 +931,8 @@ struct hl_cs { struct list_head job_list; spinlock_t job_lock; struct kref refcount; - struct dma_fence *fence; - struct dma_fence *signal_fence; + struct hl_fence *fence; + struct hl_fence *signal_fence; struct work_struct finish_work; struct delayed_work work_tdr; struct list_head mirror_node; @@ -1739,7 +1751,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx); void hl_ctx_do_release(struct kref *ref); void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx); int hl_ctx_put(struct hl_ctx *ctx); -struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq); +struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq); void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr); void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr); @@ -1781,6 +1793,8 @@ void hl_cs_rollback_all(struct hl_device *hdev); struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, enum hl_queue_type queue_type, bool is_kernel_allocated_cb); void hl_sob_reset_error(struct kref *ref); +void hl_fence_put(struct hl_fence *fence); +void hl_fence_get(struct hl_fence *fence); void goya_set_asic_funcs(struct hl_device *hdev); void gaudi_set_asic_funcs(struct hl_device *hdev); diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c index 287681646071..65b9aa69a83e 100644 --- a/drivers/misc/habanalabs/common/hw_queue.c +++ b/drivers/misc/habanalabs/common/hw_queue.c @@ -474,7 +474,7 @@ static void init_signal_wait_cs(struct hl_cs *cs) * wait CS was submitted. */ mb(); - dma_fence_put(cs->signal_fence); + hl_fence_put(cs->signal_fence); cs->signal_fence = NULL; } } -- cgit v1.2.3-59-g8ed1b From bd4ef3729213280522694ff714e28192e486487d Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Wed, 12 Aug 2020 10:19:28 +0300 Subject: habanalabs: eliminate redundant else condition If both parts of if-else are goto statements, we can remove the else and put the else goto statement after the if statement. Reported-by: kernel test robot Reviewed-by: Tomer Tayar Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_submission.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/misc/habanalabs/common/command_submission.c') diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 00c1ff2e953d..a811a9fdf13b 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -686,8 +686,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, rc = -ENOMEM; if (is_kernel_allocated_cb) goto release_cb; - else - goto free_cs_object; + + goto free_cs_object; } job->id = i + 1; -- cgit v1.2.3-59-g8ed1b From 975ab7b32b90c97046ddbdd53798391b7d8a6a1e Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Tue, 1 Sep 2020 11:22:05 +0300 Subject: habanalabs: count dropped CS because max CS in-flight There is a case where the user reaches the maximum number of CS in-flight. In that case, the driver rejects the new CS of the user with EAGAIN. Count that event so the user can query the driver later to see if it happened. Reviewed-by: Tomer Tayar Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_submission.c | 5 ++++- include/uapi/misc/habanalabs.h | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/misc/habanalabs/common/command_submission.c') diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index a811a9fdf13b..470bffbe9bdc 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -252,6 +252,8 @@ static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx) ctx->cs_counters.parsing_drop_cnt; hdev->aggregated_cs_counters.queue_full_drop_cnt += ctx->cs_counters.queue_full_drop_cnt; + hdev->aggregated_cs_counters.max_cs_in_flight_drop_cnt += + ctx->cs_counters.max_cs_in_flight_drop_cnt; } static void cs_do_release(struct kref *ref) @@ -431,8 +433,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, (hdev->asic_prop.max_pending_cs - 1)]; if (other && !completion_done(&other->completion)) { - dev_dbg(hdev->dev, + dev_dbg_ratelimited(hdev->dev, "Rejecting CS because of too many in-flights CS\n"); + ctx->cs_counters.max_cs_in_flight_drop_cnt++; rc = -EAGAIN; goto free_fence; } diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index a2dcad29340f..69fb44d35292 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -401,12 +401,14 @@ struct hl_info_sync_manager { * @parsing_drop_cnt: dropped due to error in packet parsing * @queue_full_drop_cnt: dropped due to queue full * @device_in_reset_drop_cnt: dropped due to device in reset + * @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight */ struct hl_cs_counters { __u64 out_of_mem_drop_cnt; __u64 parsing_drop_cnt; __u64 queue_full_drop_cnt; __u64 device_in_reset_drop_cnt; + __u64 max_cs_in_flight_drop_cnt; }; struct hl_info_cs_counters { -- cgit v1.2.3-59-g8ed1b From 681a22f55f1506023da06ebf660a4a252b35bc93 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 7 Sep 2020 18:08:51 +0300 Subject: habanalabs: allow to wait on CS without sleep The user sometimes wants to check if a CS has completed to clean resources. In that case, the user doesn't want to sleep but just to check if the CS has finished and continue with his code. Add a new definition to the API of the wait on CS. The new definition says that if the timeout is 0, the driver won't sleep at all but return immediately after checking if the CS has finished. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_submission.c | 7 +++++-- include/uapi/misc/habanalabs.h | 3 +++ 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/misc/habanalabs/common/command_submission.c') diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 470bffbe9bdc..b2b974ecc431 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -1180,8 +1180,11 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev, "Can't wait on CS %llu because current CS is at seq %llu\n", seq, ctx->cs_sequence); } else if (fence) { - rc = wait_for_completion_interruptible_timeout( - &fence->completion, timeout); + if (!timeout_us) + rc = completion_done(&fence->completion); + else + rc = wait_for_completion_interruptible_timeout( + &fence->completion, timeout); if (fence->error == -ETIMEDOUT) rc = -ETIMEDOUT; diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 69fb44d35292..d449f8a31ce6 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -914,6 +914,9 @@ struct hl_debug_args { * inside the kernel until the CS has finished or until the user-requested * timeout has expired. * + * If the timeout value is 0, the driver won't sleep at all. It will check + * the status of the CS and return immediately + * * The return value of the IOCTL is a standard Linux error code. The possible * values are: * -- cgit v1.2.3-59-g8ed1b