aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/misc/habanalabs/common/command_submission.c
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2020-11-26 13:16:55 +0100
committerPeter Zijlstra <peterz@infradead.org>2020-11-26 13:16:55 +0100
commit20c7775aecea04d8ca322039969d49dcf568e0e9 (patch)
tree138c057839197c9021043353e994815c0250e669 /drivers/misc/habanalabs/common/command_submission.c
parentperf/x86/intel: Add event constraint for CYCLE_ACTIVITY.STALLS_MEM_ANY (diff)
parentMerge tag 'media/v5.10-2' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media (diff)
downloadlinux-dev-20c7775aecea04d8ca322039969d49dcf568e0e9.tar.xz
linux-dev-20c7775aecea04d8ca322039969d49dcf568e0e9.zip
Merge remote-tracking branch 'origin/master' into perf/core
Further perf/core patches will depend on: d3f7b1bb2040 ("mm/gup: fix gup_fast with dynamic page table folding") which is already in Linus' tree.
Diffstat (limited to 'drivers/misc/habanalabs/common/command_submission.c')
-rw-r--r--drivers/misc/habanalabs/common/command_submission.c115
1 files changed, 64 insertions, 51 deletions
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index b9840e368eb5..b2b974ecc431 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -38,26 +38,10 @@ void hl_sob_reset_error(struct kref *ref)
hw_sob->q_idx, hw_sob->sob_id);
}
-static const char *hl_fence_get_driver_name(struct dma_fence *fence)
-{
- return "HabanaLabs";
-}
-
-static const char *hl_fence_get_timeline_name(struct dma_fence *fence)
-{
- struct hl_cs_compl *hl_cs_compl =
- container_of(fence, struct hl_cs_compl, base_fence);
-
- return dev_name(hl_cs_compl->hdev->dev);
-}
-
-static bool hl_fence_enable_signaling(struct dma_fence *fence)
-{
- return true;
-}
-
-static void hl_fence_release(struct dma_fence *fence)
+static void hl_fence_release(struct kref *kref)
{
+ struct hl_fence *fence =
+ container_of(kref, struct hl_fence, refcount);
struct hl_cs_compl *hl_cs_cmpl =
container_of(fence, struct hl_cs_compl, base_fence);
struct hl_device *hdev = hl_cs_cmpl->hdev;
@@ -99,15 +83,27 @@ static void hl_fence_release(struct dma_fence *fence)
}
free:
- kfree_rcu(hl_cs_cmpl, base_fence.rcu);
+ kfree(hl_cs_cmpl);
+}
+
+void hl_fence_put(struct hl_fence *fence)
+{
+ if (fence)
+ kref_put(&fence->refcount, hl_fence_release);
+}
+
+void hl_fence_get(struct hl_fence *fence)
+{
+ if (fence)
+ kref_get(&fence->refcount);
}
-static const struct dma_fence_ops hl_fence_ops = {
- .get_driver_name = hl_fence_get_driver_name,
- .get_timeline_name = hl_fence_get_timeline_name,
- .enable_signaling = hl_fence_enable_signaling,
- .release = hl_fence_release
-};
+static void hl_fence_init(struct hl_fence *fence)
+{
+ kref_init(&fence->refcount);
+ fence->error = 0;
+ init_completion(&fence->completion);
+}
static void cs_get(struct hl_cs *cs)
{
@@ -256,6 +252,8 @@ static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx)
ctx->cs_counters.parsing_drop_cnt;
hdev->aggregated_cs_counters.queue_full_drop_cnt +=
ctx->cs_counters.queue_full_drop_cnt;
+ hdev->aggregated_cs_counters.max_cs_in_flight_drop_cnt +=
+ ctx->cs_counters.max_cs_in_flight_drop_cnt;
}
static void cs_do_release(struct kref *ref)
@@ -336,7 +334,7 @@ static void cs_do_release(struct kref *ref)
* In case the wait for signal CS was submitted, the put occurs
* in init_signal_wait_cs() right before hanging on the PQ.
*/
- dma_fence_put(cs->signal_fence);
+ hl_fence_put(cs->signal_fence);
}
/*
@@ -348,19 +346,18 @@ static void cs_do_release(struct kref *ref)
hl_ctx_put(cs->ctx);
/* We need to mark an error for not submitted because in that case
- * the dma fence release flow is different. Mainly, we don't need
+ * the hl fence release flow is different. Mainly, we don't need
* to handle hw_sob for signal/wait
*/
if (cs->timedout)
- dma_fence_set_error(cs->fence, -ETIMEDOUT);
+ cs->fence->error = -ETIMEDOUT;
else if (cs->aborted)
- dma_fence_set_error(cs->fence, -EIO);
+ cs->fence->error = -EIO;
else if (!cs->submitted)
- dma_fence_set_error(cs->fence, -EBUSY);
-
- dma_fence_signal(cs->fence);
- dma_fence_put(cs->fence);
+ cs->fence->error = -EBUSY;
+ complete_all(&cs->fence->completion);
+ hl_fence_put(cs->fence);
cs_counters_aggregate(hdev, cs->ctx);
kfree(cs->jobs_in_queue_cnt);
@@ -401,7 +398,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
enum hl_cs_type cs_type, struct hl_cs **cs_new)
{
struct hl_cs_compl *cs_cmpl;
- struct dma_fence *other = NULL;
+ struct hl_fence *other = NULL;
struct hl_cs *cs;
int rc;
@@ -434,9 +431,11 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
cs_cmpl->cs_seq = ctx->cs_sequence;
other = ctx->cs_pending[cs_cmpl->cs_seq &
(hdev->asic_prop.max_pending_cs - 1)];
- if ((other) && (!dma_fence_is_signaled(other))) {
- dev_dbg(hdev->dev,
+
+ if (other && !completion_done(&other->completion)) {
+ dev_dbg_ratelimited(hdev->dev,
"Rejecting CS because of too many in-flights CS\n");
+ ctx->cs_counters.max_cs_in_flight_drop_cnt++;
rc = -EAGAIN;
goto free_fence;
}
@@ -448,8 +447,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
goto free_fence;
}
- dma_fence_init(&cs_cmpl->base_fence, &hl_fence_ops, &cs_cmpl->lock,
- ctx->asid, ctx->cs_sequence);
+ /* init hl_fence */
+ hl_fence_init(&cs_cmpl->base_fence);
cs->sequence = cs_cmpl->cs_seq;
@@ -458,9 +457,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
&cs_cmpl->base_fence;
ctx->cs_sequence++;
- dma_fence_get(&cs_cmpl->base_fence);
+ hl_fence_get(&cs_cmpl->base_fence);
- dma_fence_put(other);
+ hl_fence_put(other);
spin_unlock(&ctx->cs_lock);
@@ -690,8 +689,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
rc = -ENOMEM;
if (is_kernel_allocated_cb)
goto release_cb;
- else
- goto free_cs_object;
+
+ goto free_cs_object;
}
job->id = i + 1;
@@ -773,7 +772,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
struct hl_ctx *ctx = hpriv->ctx;
struct hl_cs_chunk *cs_chunk_array, *chunk;
struct hw_queue_properties *hw_queue_prop;
- struct dma_fence *sig_fence = NULL;
+ struct hl_fence *sig_fence = NULL;
struct hl_cs_job *job;
struct hl_cs *cs;
struct hl_cb *cb;
@@ -808,6 +807,14 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
/* currently it is guaranteed to have only one chunk */
chunk = &cs_chunk_array[0];
+
+ if (chunk->queue_index >= hdev->asic_prop.max_queues) {
+ dev_err(hdev->dev, "Queue index %d is invalid\n",
+ chunk->queue_index);
+ rc = -EINVAL;
+ goto free_cs_chunk_array;
+ }
+
q_idx = chunk->queue_index;
hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
q_type = hw_queue_prop->type;
@@ -875,14 +882,14 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
dev_err(hdev->dev,
"CS seq 0x%llx is not of a signal CS\n",
signal_seq);
- dma_fence_put(sig_fence);
+ hl_fence_put(sig_fence);
rc = -EINVAL;
goto free_signal_seq_array;
}
- if (dma_fence_is_signaled(sig_fence)) {
+ if (completion_done(&sig_fence->completion)) {
/* signal CS already finished */
- dma_fence_put(sig_fence);
+ hl_fence_put(sig_fence);
rc = 0;
goto free_signal_seq_array;
}
@@ -894,7 +901,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
rc = allocate_cs(hdev, ctx, cs_type, &cs);
if (rc) {
if (cs_type == CS_TYPE_WAIT)
- dma_fence_put(sig_fence);
+ hl_fence_put(sig_fence);
hl_ctx_put(ctx);
goto free_signal_seq_array;
}
@@ -1154,7 +1161,7 @@ out:
static long _hl_cs_wait_ioctl(struct hl_device *hdev,
struct hl_ctx *ctx, u64 timeout_us, u64 seq)
{
- struct dma_fence *fence;
+ struct hl_fence *fence;
unsigned long timeout;
long rc;
@@ -1173,12 +1180,18 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev,
"Can't wait on CS %llu because current CS is at seq %llu\n",
seq, ctx->cs_sequence);
} else if (fence) {
- rc = dma_fence_wait_timeout(fence, true, timeout);
+ if (!timeout_us)
+ rc = completion_done(&fence->completion);
+ else
+ rc = wait_for_completion_interruptible_timeout(
+ &fence->completion, timeout);
+
if (fence->error == -ETIMEDOUT)
rc = -ETIMEDOUT;
else if (fence->error == -EIO)
rc = -EIO;
- dma_fence_put(fence);
+
+ hl_fence_put(fence);
} else {
dev_dbg(hdev->dev,
"Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",