aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/misc/habanalabs/common
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs/common')
-rw-r--r--drivers/misc/habanalabs/common/command_buffer.c55
-rw-r--r--drivers/misc/habanalabs/common/command_submission.c1016
-rw-r--r--drivers/misc/habanalabs/common/context.c4
-rw-r--r--drivers/misc/habanalabs/common/debugfs.c310
-rw-r--r--drivers/misc/habanalabs/common/device.c198
-rw-r--r--drivers/misc/habanalabs/common/firmware_if.c167
-rw-r--r--drivers/misc/habanalabs/common/habanalabs.h458
-rw-r--r--drivers/misc/habanalabs/common/habanalabs_drv.c34
-rw-r--r--drivers/misc/habanalabs/common/habanalabs_ioctl.c77
-rw-r--r--drivers/misc/habanalabs/common/hw_queue.c273
-rw-r--r--drivers/misc/habanalabs/common/hwmon.c31
-rw-r--r--drivers/misc/habanalabs/common/memory.c333
-rw-r--r--drivers/misc/habanalabs/common/mmu.c273
-rw-r--r--drivers/misc/habanalabs/common/mmu_v1.c139
-rw-r--r--drivers/misc/habanalabs/common/pci.c16
-rw-r--r--drivers/misc/habanalabs/common/sysfs.c18
16 files changed, 2228 insertions, 1174 deletions
diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c
index ada570f35a41..6f6a904ab6ca 100644
--- a/drivers/misc/habanalabs/common/command_buffer.c
+++ b/drivers/misc/habanalabs/common/command_buffer.c
@@ -11,7 +11,6 @@
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
-#include <linux/genalloc.h>
static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
{
@@ -68,9 +67,9 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
bus_addr = cb->bus_address;
offset = 0;
list_for_each_entry(va_block, &cb->va_block_list, node) {
- rc = hl_mmu_map(ctx, va_block->start, bus_addr, va_block->size,
- list_is_last(&va_block->node,
- &cb->va_block_list));
+ rc = hl_mmu_map_page(ctx, va_block->start, bus_addr,
+ va_block->size, list_is_last(&va_block->node,
+ &cb->va_block_list));
if (rc) {
dev_err(hdev->dev, "Failed to map VA %#llx to CB\n",
va_block->start);
@@ -93,7 +92,7 @@ err_va_umap:
list_for_each_entry(va_block, &cb->va_block_list, node) {
if (offset <= 0)
break;
- hl_mmu_unmap(ctx, va_block->start, va_block->size,
+ hl_mmu_unmap_page(ctx, va_block->start, va_block->size,
offset <= va_block->size);
offset -= va_block->size;
}
@@ -120,7 +119,7 @@ static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb)
mutex_lock(&ctx->mmu_lock);
list_for_each_entry(va_block, &cb->va_block_list, node)
- if (hl_mmu_unmap(ctx, va_block->start, va_block->size,
+ if (hl_mmu_unmap_page(ctx, va_block->start, va_block->size,
list_is_last(&va_block->node,
&cb->va_block_list)))
dev_warn_ratelimited(hdev->dev,
@@ -376,17 +375,49 @@ int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle)
return rc;
}
+static int hl_cb_info(struct hl_device *hdev, struct hl_cb_mgr *mgr,
+ u64 cb_handle, u32 *usage_cnt)
+{
+ struct hl_cb *cb;
+ u32 handle;
+ int rc = 0;
+
+ /* The CB handle was given to user to do mmap, so need to shift it back
+ * to the value which was allocated by the IDR module.
+ */
+ cb_handle >>= PAGE_SHIFT;
+ handle = (u32) cb_handle;
+
+ spin_lock(&mgr->cb_lock);
+
+ cb = idr_find(&mgr->cb_handles, handle);
+ if (!cb) {
+ dev_err(hdev->dev,
+ "CB info failed, no match to handle 0x%x\n", handle);
+ rc = -EINVAL;
+ goto out;
+ }
+
+ *usage_cnt = atomic_read(&cb->cs_cnt);
+
+out:
+ spin_unlock(&mgr->cb_lock);
+ return rc;
+}
+
int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
{
union hl_cb_args *args = data;
struct hl_device *hdev = hpriv->hdev;
+ enum hl_device_status status;
u64 handle = 0;
+ u32 usage_cnt = 0;
int rc;
- if (hl_device_disabled_or_in_reset(hdev)) {
+ if (!hl_device_operational(hdev, &status)) {
dev_warn_ratelimited(hdev->dev,
"Device is %s. Can't execute CB IOCTL\n",
- atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
+ hdev->status[status]);
return -EBUSY;
}
@@ -413,6 +444,13 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
args->in.cb_handle);
break;
+ case HL_CB_OP_INFO:
+ rc = hl_cb_info(hdev, &hpriv->cb_mgr, args->in.cb_handle,
+ &usage_cnt);
+ memset(args, 0, sizeof(*args));
+ args->out.usage_cnt = usage_cnt;
+ break;
+
default:
rc = -ENOTTY;
break;
@@ -517,6 +555,7 @@ int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
}
cb->mmap_size = cb->size;
+ vma->vm_pgoff = handle;
return 0;
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index b2b974ecc431..beb482310a58 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -11,11 +11,25 @@
#include <linux/uaccess.h>
#include <linux/slab.h>
-#define HL_CS_FLAGS_SIG_WAIT (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT)
+#define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
+ HL_CS_FLAGS_COLLECTIVE_WAIT)
+
+/**
+ * enum hl_cs_wait_status - cs wait status
+ * @CS_WAIT_STATUS_BUSY: cs was not completed yet
+ * @CS_WAIT_STATUS_COMPLETED: cs completed
+ * @CS_WAIT_STATUS_GONE: cs completed but fence is already gone
+ */
+enum hl_cs_wait_status {
+ CS_WAIT_STATUS_BUSY,
+ CS_WAIT_STATUS_COMPLETED,
+ CS_WAIT_STATUS_GONE
+};
static void job_wq_completion(struct work_struct *work);
-static long _hl_cs_wait_ioctl(struct hl_device *hdev,
- struct hl_ctx *ctx, u64 timeout_us, u64 seq);
+static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
+ u64 timeout_us, u64 seq,
+ enum hl_cs_wait_status *status, s64 *timestamp);
static void cs_do_release(struct kref *ref);
static void hl_sob_reset(struct kref *ref)
@@ -38,6 +52,38 @@ void hl_sob_reset_error(struct kref *ref)
hw_sob->q_idx, hw_sob->sob_id);
}
+/**
+ * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
+ * @sob_base: sob base id
+ * @sob_mask: sob user mask, each bit represents a sob offset from sob base
+ * @mask: generated mask
+ *
+ * Return: 0 if given parameters are valid
+ */
+int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask)
+{
+ int i;
+
+ if (sob_mask == 0)
+ return -EINVAL;
+
+ if (sob_mask == 0x1) {
+ *mask = ~(1 << (sob_base & 0x7));
+ } else {
+ /* find msb in order to verify sob range is valid */
+ for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--)
+ if (BIT(i) & sob_mask)
+ break;
+
+ if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1))
+ return -EINVAL;
+
+ *mask = ~sob_mask;
+ }
+
+ return 0;
+}
+
static void hl_fence_release(struct kref *kref)
{
struct hl_fence *fence =
@@ -53,7 +99,8 @@ static void hl_fence_release(struct kref *kref)
goto free;
if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
- (hl_cs_cmpl->type == CS_TYPE_WAIT)) {
+ (hl_cs_cmpl->type == CS_TYPE_WAIT) ||
+ (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)) {
dev_dbg(hdev->dev,
"CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n",
@@ -80,6 +127,10 @@ static void hl_fence_release(struct kref *kref)
* hence the above scenario is avoided.
*/
kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);
+
+ if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
+ hdev->asic_funcs->reset_sob_group(hdev,
+ hl_cs_cmpl->sob_group);
}
free:
@@ -102,10 +153,11 @@ static void hl_fence_init(struct hl_fence *fence)
{
kref_init(&fence->refcount);
fence->error = 0;
+ fence->timestamp = ktime_set(0, 0);
init_completion(&fence->completion);
}
-static void cs_get(struct hl_cs *cs)
+void cs_get(struct hl_cs *cs)
{
kref_get(&cs->refcount);
}
@@ -120,6 +172,18 @@ static void cs_put(struct hl_cs *cs)
kref_put(&cs->refcount, cs_do_release);
}
+static void cs_job_do_release(struct kref *ref)
+{
+ struct hl_cs_job *job = container_of(ref, struct hl_cs_job, refcount);
+
+ kfree(job);
+}
+
+static void cs_job_put(struct hl_cs_job *job)
+{
+ kref_put(&job->refcount, cs_job_do_release);
+}
+
static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
{
/*
@@ -169,10 +233,7 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
job->patched_cb = parser.patched_cb;
job->job_cb_size = parser.patched_cb_size;
job->contains_dma_pkt = parser.contains_dma_pkt;
-
- spin_lock(&job->patched_cb->lock);
- job->patched_cb->cs_cnt++;
- spin_unlock(&job->patched_cb->lock);
+ atomic_inc(&job->patched_cb->cs_cnt);
}
/*
@@ -180,9 +241,7 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
* original CB anymore because it was already parsed and
* won't be accessed again for this CS
*/
- spin_lock(&job->user_cb->lock);
- job->user_cb->cs_cnt--;
- spin_unlock(&job->user_cb->lock);
+ atomic_dec(&job->user_cb->cs_cnt);
hl_cb_put(job->user_cb);
job->user_cb = NULL;
} else if (!rc) {
@@ -192,7 +251,7 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
return rc;
}
-static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
+static void complete_job(struct hl_device *hdev, struct hl_cs_job *job)
{
struct hl_cs *cs = job->cs;
@@ -204,10 +263,7 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
* created, so we need to check it's not NULL
*/
if (job->patched_cb) {
- spin_lock(&job->patched_cb->lock);
- job->patched_cb->cs_cnt--;
- spin_unlock(&job->patched_cb->lock);
-
+ atomic_dec(&job->patched_cb->cs_cnt);
hl_cb_put(job->patched_cb);
}
}
@@ -215,13 +271,12 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
/* For H/W queue jobs, if a user CB was allocated by driver and MMU is
* enabled, the user CB isn't released in cs_parser() and thus should be
* released here.
+ * This is also true for INT queues jobs which were allocated by driver
*/
- if (job->queue_type == QUEUE_TYPE_HW &&
- job->is_kernel_allocated_cb && hdev->mmu_enable) {
- spin_lock(&job->user_cb->lock);
- job->user_cb->cs_cnt--;
- spin_unlock(&job->user_cb->lock);
-
+ if (job->is_kernel_allocated_cb &&
+ ((job->queue_type == QUEUE_TYPE_HW && hdev->mmu_enable) ||
+ job->queue_type == QUEUE_TYPE_INT)) {
+ atomic_dec(&job->user_cb->cs_cnt);
hl_cb_put(job->user_cb);
}
@@ -239,27 +294,12 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
job->queue_type == QUEUE_TYPE_HW)
cs_put(cs);
- kfree(job);
-}
-
-static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx)
-{
- hdev->aggregated_cs_counters.device_in_reset_drop_cnt +=
- ctx->cs_counters.device_in_reset_drop_cnt;
- hdev->aggregated_cs_counters.out_of_mem_drop_cnt +=
- ctx->cs_counters.out_of_mem_drop_cnt;
- hdev->aggregated_cs_counters.parsing_drop_cnt +=
- ctx->cs_counters.parsing_drop_cnt;
- hdev->aggregated_cs_counters.queue_full_drop_cnt +=
- ctx->cs_counters.queue_full_drop_cnt;
- hdev->aggregated_cs_counters.max_cs_in_flight_drop_cnt +=
- ctx->cs_counters.max_cs_in_flight_drop_cnt;
+ cs_job_put(job);
}
static void cs_do_release(struct kref *ref)
{
- struct hl_cs *cs = container_of(ref, struct hl_cs,
- refcount);
+ struct hl_cs *cs = container_of(ref, struct hl_cs, refcount);
struct hl_device *hdev = cs->ctx->hdev;
struct hl_cs_job *job, *tmp;
@@ -268,77 +308,78 @@ static void cs_do_release(struct kref *ref)
/*
* Although if we reached here it means that all external jobs have
* finished, because each one of them took refcnt to CS, we still
- * need to go over the internal jobs and free them. Otherwise, we
+ * need to go over the internal jobs and complete them. Otherwise, we
* will have leaked memory and what's worse, the CS object (and
* potentially the CTX object) could be released, while the JOB
* still holds a pointer to them (but no reference).
*/
list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
- free_job(hdev, job);
+ complete_job(hdev, job);
- /* We also need to update CI for internal queues */
- if (cs->submitted) {
- hdev->asic_funcs->hw_queues_lock(hdev);
+ if (!cs->submitted) {
+ /* In case the wait for signal CS was submitted, the put occurs
+ * in init_signal_wait_cs() or collective_wait_init_cs()
+ * right before hanging on the PQ.
+ */
+ if (cs->type == CS_TYPE_WAIT ||
+ cs->type == CS_TYPE_COLLECTIVE_WAIT)
+ hl_fence_put(cs->signal_fence);
- hdev->cs_active_cnt--;
- if (!hdev->cs_active_cnt) {
- struct hl_device_idle_busy_ts *ts;
+ goto out;
+ }
- ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++];
- ts->busy_to_idle_ts = ktime_get();
+ hdev->asic_funcs->hw_queues_lock(hdev);
- if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE)
- hdev->idle_busy_ts_idx = 0;
- } else if (hdev->cs_active_cnt < 0) {
- dev_crit(hdev->dev, "CS active cnt %d is negative\n",
- hdev->cs_active_cnt);
- }
+ hdev->cs_active_cnt--;
+ if (!hdev->cs_active_cnt) {
+ struct hl_device_idle_busy_ts *ts;
- hdev->asic_funcs->hw_queues_unlock(hdev);
+ ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++];
+ ts->busy_to_idle_ts = ktime_get();
- hl_int_hw_queue_update_ci(cs);
+ if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE)
+ hdev->idle_busy_ts_idx = 0;
+ } else if (hdev->cs_active_cnt < 0) {
+ dev_crit(hdev->dev, "CS active cnt %d is negative\n",
+ hdev->cs_active_cnt);
+ }
- spin_lock(&hdev->hw_queues_mirror_lock);
- /* remove CS from hw_queues mirror list */
- list_del_init(&cs->mirror_node);
- spin_unlock(&hdev->hw_queues_mirror_lock);
+ hdev->asic_funcs->hw_queues_unlock(hdev);
- /*
- * Don't cancel TDR in case this CS was timedout because we
- * might be running from the TDR context
- */
- if ((!cs->timedout) &&
- (hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT)) {
- struct hl_cs *next;
+ /* Need to update CI for internal queues */
+ hl_int_hw_queue_update_ci(cs);
- if (cs->tdr_active)
- cancel_delayed_work_sync(&cs->work_tdr);
+ /* remove CS from CS mirror list */
+ spin_lock(&hdev->cs_mirror_lock);
+ list_del_init(&cs->mirror_node);
+ spin_unlock(&hdev->cs_mirror_lock);
- spin_lock(&hdev->hw_queues_mirror_lock);
+ /* Don't cancel TDR in case this CS was timedout because we might be
+ * running from the TDR context
+ */
+ if (!cs->timedout && hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) {
+ struct hl_cs *next;
- /* queue TDR for next CS */
- next = list_first_entry_or_null(
- &hdev->hw_queues_mirror_list,
- struct hl_cs, mirror_node);
+ if (cs->tdr_active)
+ cancel_delayed_work_sync(&cs->work_tdr);
- if ((next) && (!next->tdr_active)) {
- next->tdr_active = true;
- schedule_delayed_work(&next->work_tdr,
- hdev->timeout_jiffies);
- }
+ spin_lock(&hdev->cs_mirror_lock);
- spin_unlock(&hdev->hw_queues_mirror_lock);
+ /* queue TDR for next CS */
+ next = list_first_entry_or_null(&hdev->cs_mirror_list,
+ struct hl_cs, mirror_node);
+
+ if (next && !next->tdr_active) {
+ next->tdr_active = true;
+ schedule_delayed_work(&next->work_tdr,
+ hdev->timeout_jiffies);
}
- } else if (cs->type == CS_TYPE_WAIT) {
- /*
- * In case the wait for signal CS was submitted, the put occurs
- * in init_signal_wait_cs() right before hanging on the PQ.
- */
- hl_fence_put(cs->signal_fence);
+
+ spin_unlock(&hdev->cs_mirror_lock);
}
- /*
- * Must be called before hl_ctx_put because inside we use ctx to get
+out:
+ /* Must be called before hl_ctx_put because inside we use ctx to get
* the device
*/
hl_debugfs_remove_cs(cs);
@@ -356,9 +397,10 @@ static void cs_do_release(struct kref *ref)
else if (!cs->submitted)
cs->fence->error = -EBUSY;
+ if (cs->timestamp)
+ cs->fence->timestamp = ktime_get();
complete_all(&cs->fence->completion);
hl_fence_put(cs->fence);
- cs_counters_aggregate(hdev, cs->ctx);
kfree(cs->jobs_in_queue_cnt);
kfree(cs);
@@ -384,24 +426,51 @@ static void cs_timedout(struct work_struct *work)
hdev = cs->ctx->hdev;
- dev_err(hdev->dev,
- "Command submission %llu has not finished in time!\n",
- cs->sequence);
+ switch (cs->type) {
+ case CS_TYPE_SIGNAL:
+ dev_err(hdev->dev,
+ "Signal command submission %llu has not finished in time!\n",
+ cs->sequence);
+ break;
+
+ case CS_TYPE_WAIT:
+ dev_err(hdev->dev,
+ "Wait command submission %llu has not finished in time!\n",
+ cs->sequence);
+ break;
+
+ case CS_TYPE_COLLECTIVE_WAIT:
+ dev_err(hdev->dev,
+ "Collective Wait command submission %llu has not finished in time!\n",
+ cs->sequence);
+ break;
+
+ default:
+ dev_err(hdev->dev,
+ "Command submission %llu has not finished in time!\n",
+ cs->sequence);
+ break;
+ }
cs_put(cs);
if (hdev->reset_on_lockup)
hl_device_reset(hdev, false, false);
+ else
+ hdev->needs_reset = true;
}
static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
enum hl_cs_type cs_type, struct hl_cs **cs_new)
{
- struct hl_cs_compl *cs_cmpl;
+ struct hl_cs_counters_atomic *cntr;
struct hl_fence *other = NULL;
+ struct hl_cs_compl *cs_cmpl;
struct hl_cs *cs;
int rc;
+ cntr = &hdev->aggregated_cs_counters;
+
cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
if (!cs)
return -ENOMEM;
@@ -435,7 +504,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
if (other && !completion_done(&other->completion)) {
dev_dbg_ratelimited(hdev->dev,
"Rejecting CS because of too many in-flights CS\n");
- ctx->cs_counters.max_cs_in_flight_drop_cnt++;
+ atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt);
+ atomic64_inc(&cntr->max_cs_in_flight_drop_cnt);
rc = -EAGAIN;
goto free_fence;
}
@@ -480,7 +550,7 @@ static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
struct hl_cs_job *job, *tmp;
list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
- free_job(hdev, job);
+ complete_job(hdev, job);
}
void hl_cs_rollback_all(struct hl_device *hdev)
@@ -493,8 +563,7 @@ void hl_cs_rollback_all(struct hl_device *hdev)
flush_workqueue(hdev->cq_wq[i]);
/* Make sure we don't have leftovers in the H/W queues mirror list */
- list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list,
- mirror_node) {
+ list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
cs_get(cs);
cs->aborted = true;
dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
@@ -512,7 +581,7 @@ static void job_wq_completion(struct work_struct *work)
struct hl_device *hdev = cs->ctx->hdev;
/* job is no longer needed */
- free_job(hdev, job);
+ complete_job(hdev, job);
}
static int validate_queue_index(struct hl_device *hdev,
@@ -547,9 +616,36 @@ static int validate_queue_index(struct hl_device *hdev,
return -EINVAL;
}
- *queue_type = hw_queue_prop->type;
- *is_kernel_allocated_cb = !!hw_queue_prop->requires_kernel_cb;
+ /* When hw queue type isn't QUEUE_TYPE_HW,
+ * USER_ALLOC_CB flag shall be referred as "don't care".
+ */
+ if (hw_queue_prop->type == QUEUE_TYPE_HW) {
+ if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) {
+ if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) {
+ dev_err(hdev->dev,
+ "Queue index %d doesn't support user CB\n",
+ chunk->queue_index);
+ return -EINVAL;
+ }
+
+ *is_kernel_allocated_cb = false;
+ } else {
+ if (!(hw_queue_prop->cb_alloc_flags &
+ CB_ALLOC_KERNEL)) {
+ dev_err(hdev->dev,
+ "Queue index %d doesn't support kernel CB\n",
+ chunk->queue_index);
+ return -EINVAL;
+ }
+
+ *is_kernel_allocated_cb = true;
+ }
+ } else {
+ *is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags
+ & CB_ALLOC_KERNEL);
+ }
+ *queue_type = hw_queue_prop->type;
return 0;
}
@@ -573,9 +669,7 @@ static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
goto release_cb;
}
- spin_lock(&cb->lock);
- cb->cs_cnt++;
- spin_unlock(&cb->lock);
+ atomic_inc(&cb->cs_cnt);
return cb;
@@ -593,6 +687,7 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
if (!job)
return NULL;
+ kref_init(&job->refcount);
job->queue_type = queue_type;
job->is_kernel_allocated_cb = is_kernel_allocated_cb;
@@ -605,42 +700,115 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
return job;
}
-static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
- u32 num_chunks, u64 *cs_seq)
+static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
+{
+ if (cs_type_flags & HL_CS_FLAGS_SIGNAL)
+ return CS_TYPE_SIGNAL;
+ else if (cs_type_flags & HL_CS_FLAGS_WAIT)
+ return CS_TYPE_WAIT;
+ else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT)
+ return CS_TYPE_COLLECTIVE_WAIT;
+ else
+ return CS_TYPE_DEFAULT;
+}
+
+static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
{
struct hl_device *hdev = hpriv->hdev;
- struct hl_cs_chunk *cs_chunk_array;
- struct hl_cs_job *job;
- struct hl_cs *cs;
- struct hl_cb *cb;
- bool int_queues_only = true;
- u32 size_to_copy;
- int rc, i;
+ struct hl_ctx *ctx = hpriv->ctx;
+ u32 cs_type_flags, num_chunks;
+ enum hl_device_status status;
+ enum hl_cs_type cs_type;
- *cs_seq = ULLONG_MAX;
+ if (!hl_device_operational(hdev, &status)) {
+ dev_warn_ratelimited(hdev->dev,
+ "Device is %s. Can't submit new CS\n",
+ hdev->status[status]);
+ return -EBUSY;
+ }
+
+ cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK;
+
+ if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) {
+ dev_err(hdev->dev,
+ "CS type flags are mutually exclusive, context %d\n",
+ ctx->asid);
+ return -EINVAL;
+ }
+
+ cs_type = hl_cs_get_cs_type(cs_type_flags);
+ num_chunks = args->in.num_chunks_execute;
+
+ if (unlikely((cs_type != CS_TYPE_DEFAULT) &&
+ !hdev->supports_sync_stream)) {
+ dev_err(hdev->dev, "Sync stream CS is not supported\n");
+ return -EINVAL;
+ }
+
+ if (cs_type == CS_TYPE_DEFAULT) {
+ if (!num_chunks) {
+ dev_err(hdev->dev,
+ "Got execute CS with 0 chunks, context %d\n",
+ ctx->asid);
+ return -EINVAL;
+ }
+ } else if (num_chunks != 1) {
+ dev_err(hdev->dev,
+ "Sync stream CS mandates one chunk only, context %d\n",
+ ctx->asid);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int hl_cs_copy_chunk_array(struct hl_device *hdev,
+ struct hl_cs_chunk **cs_chunk_array,
+ void __user *chunks, u32 num_chunks)
+{
+ u32 size_to_copy;
if (num_chunks > HL_MAX_JOBS_PER_CS) {
dev_err(hdev->dev,
"Number of chunks can NOT be larger than %d\n",
HL_MAX_JOBS_PER_CS);
- rc = -EINVAL;
- goto out;
+ return -EINVAL;
}
- cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array),
+ *cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
GFP_ATOMIC);
- if (!cs_chunk_array) {
- rc = -ENOMEM;
- goto out;
- }
+ if (!*cs_chunk_array)
+ return -ENOMEM;
size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
- if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) {
+ if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) {
dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
- rc = -EFAULT;
- goto free_cs_chunk_array;
+ kfree(*cs_chunk_array);
+ return -EFAULT;
}
+ return 0;
+}
+
+static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
+ u32 num_chunks, u64 *cs_seq, bool timestamp)
+{
+ bool int_queues_only = true;
+ struct hl_device *hdev = hpriv->hdev;
+ struct hl_cs_chunk *cs_chunk_array;
+ struct hl_cs_counters_atomic *cntr;
+ struct hl_cs_job *job;
+ struct hl_cs *cs;
+ struct hl_cb *cb;
+ int rc, i;
+
+ cntr = &hdev->aggregated_cs_counters;
+ *cs_seq = ULLONG_MAX;
+
+ rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks);
+ if (rc)
+ goto out;
+
/* increment refcnt for context */
hl_ctx_get(hdev, hpriv->ctx);
@@ -650,6 +818,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
goto free_cs_chunk_array;
}
+ cs->timestamp = !!timestamp;
*cs_seq = cs->sequence;
hl_debugfs_add_cs(cs);
@@ -663,14 +832,17 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
rc = validate_queue_index(hdev, chunk, &queue_type,
&is_kernel_allocated_cb);
if (rc) {
- hpriv->ctx->cs_counters.parsing_drop_cnt++;
+ atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
+ atomic64_inc(&cntr->parsing_drop_cnt);
goto free_cs_object;
}
if (is_kernel_allocated_cb) {
cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
if (!cb) {
- hpriv->ctx->cs_counters.parsing_drop_cnt++;
+ atomic64_inc(
+ &hpriv->ctx->cs_counters.parsing_drop_cnt);
+ atomic64_inc(&cntr->parsing_drop_cnt);
rc = -EINVAL;
goto free_cs_object;
}
@@ -684,7 +856,9 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
job = hl_cs_allocate_job(hdev, queue_type,
is_kernel_allocated_cb);
if (!job) {
- hpriv->ctx->cs_counters.out_of_mem_drop_cnt++;
+ atomic64_inc(
+ &hpriv->ctx->cs_counters.out_of_mem_drop_cnt);
+ atomic64_inc(&cntr->out_of_mem_drop_cnt);
dev_err(hdev->dev, "Failed to allocate a new job\n");
rc = -ENOMEM;
if (is_kernel_allocated_cb)
@@ -717,7 +891,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
rc = cs_parser(hpriv, job);
if (rc) {
- hpriv->ctx->cs_counters.parsing_drop_cnt++;
+ atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
+ atomic64_inc(&cntr->parsing_drop_cnt);
dev_err(hdev->dev,
"Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
cs->ctx->asid, cs->sequence, job->id, rc);
@@ -726,7 +901,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
}
if (int_queues_only) {
- hpriv->ctx->cs_counters.parsing_drop_cnt++;
+ atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
+ atomic64_inc(&cntr->parsing_drop_cnt);
dev_err(hdev->dev,
"Reject CS %d.%llu because only internal queues jobs are present\n",
cs->ctx->asid, cs->sequence);
@@ -747,9 +923,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
goto put_cs;
release_cb:
- spin_lock(&cb->lock);
- cb->cs_cnt--;
- spin_unlock(&cb->lock);
+ atomic_dec(&cb->cs_cnt);
hl_cb_put(cb);
free_cs_object:
cs_rollback(hdev, cs);
@@ -764,47 +938,234 @@ out:
return rc;
}
-static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
- void __user *chunks, u32 num_chunks,
+static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
u64 *cs_seq)
{
struct hl_device *hdev = hpriv->hdev;
struct hl_ctx *ctx = hpriv->ctx;
- struct hl_cs_chunk *cs_chunk_array, *chunk;
- struct hw_queue_properties *hw_queue_prop;
- struct hl_fence *sig_fence = NULL;
- struct hl_cs_job *job;
- struct hl_cs *cs;
- struct hl_cb *cb;
- enum hl_queue_type q_type;
- u64 *signal_seq_arr = NULL, signal_seq;
- u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size;
- int rc;
+ bool need_soft_reset = false;
+ int rc = 0, do_ctx_switch;
+ void __user *chunks;
+ u32 num_chunks, tmp;
+ int ret;
- *cs_seq = ULLONG_MAX;
+ do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
- if (num_chunks > HL_MAX_JOBS_PER_CS) {
+ if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
+ mutex_lock(&hpriv->restore_phase_mutex);
+
+ if (do_ctx_switch) {
+ rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
+ if (rc) {
+ dev_err_ratelimited(hdev->dev,
+ "Failed to switch to context %d, rejecting CS! %d\n",
+ ctx->asid, rc);
+ /*
+ * If we timedout, or if the device is not IDLE
+ * while we want to do context-switch (-EBUSY),
+ * we need to soft-reset because QMAN is
+ * probably stuck. However, we can't call to
+ * reset here directly because of deadlock, so
+ * need to do it at the very end of this
+ * function
+ */
+ if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
+ need_soft_reset = true;
+ mutex_unlock(&hpriv->restore_phase_mutex);
+ goto out;
+ }
+ }
+
+ hdev->asic_funcs->restore_phase_topology(hdev);
+
+ chunks = (void __user *) (uintptr_t) args->in.chunks_restore;
+ num_chunks = args->in.num_chunks_restore;
+
+ if (!num_chunks) {
+ dev_dbg(hdev->dev,
+ "Need to run restore phase but restore CS is empty\n");
+ rc = 0;
+ } else {
+ rc = cs_ioctl_default(hpriv, chunks, num_chunks,
+ cs_seq, false);
+ }
+
+ mutex_unlock(&hpriv->restore_phase_mutex);
+
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to submit restore CS for context %d (%d)\n",
+ ctx->asid, rc);
+ goto out;
+ }
+
+ /* Need to wait for restore completion before execution phase */
+ if (num_chunks) {
+ enum hl_cs_wait_status status;
+wait_again:
+ ret = _hl_cs_wait_ioctl(hdev, ctx,
+ jiffies_to_usecs(hdev->timeout_jiffies),
+ *cs_seq, &status, NULL);
+ if (ret) {
+ if (ret == -ERESTARTSYS) {
+ usleep_range(100, 200);
+ goto wait_again;
+ }
+
+ dev_err(hdev->dev,
+ "Restore CS for context %d failed to complete %d\n",
+ ctx->asid, ret);
+ rc = -ENOEXEC;
+ goto out;
+ }
+ }
+
+ ctx->thread_ctx_switch_wait_token = 1;
+
+ } else if (!ctx->thread_ctx_switch_wait_token) {
+ rc = hl_poll_timeout_memory(hdev,
+ &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
+ 100, jiffies_to_usecs(hdev->timeout_jiffies), false);
+
+ if (rc == -ETIMEDOUT) {
+ dev_err(hdev->dev,
+ "context switch phase timeout (%d)\n", tmp);
+ goto out;
+ }
+ }
+
+out:
+ if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset))
+ hl_device_reset(hdev, false, false);
+
+ return rc;
+}
+
+static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
+ struct hl_cs_chunk *chunk, u64 *signal_seq)
+{
+ u64 *signal_seq_arr = NULL;
+ u32 size_to_copy, signal_seq_arr_len;
+ int rc = 0;
+
+ signal_seq_arr_len = chunk->num_signal_seq_arr;
+
+ /* currently only one signal seq is supported */
+ if (signal_seq_arr_len != 1) {
dev_err(hdev->dev,
- "Number of chunks can NOT be larger than %d\n",
- HL_MAX_JOBS_PER_CS);
- rc = -EINVAL;
- goto out;
+ "Wait for signal CS supports only one signal CS seq\n");
+ return -EINVAL;
}
- cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array),
+ signal_seq_arr = kmalloc_array(signal_seq_arr_len,
+ sizeof(*signal_seq_arr),
GFP_ATOMIC);
- if (!cs_chunk_array) {
- rc = -ENOMEM;
+ if (!signal_seq_arr)
+ return -ENOMEM;
+
+ size_to_copy = chunk->num_signal_seq_arr * sizeof(*signal_seq_arr);
+ if (copy_from_user(signal_seq_arr,
+ u64_to_user_ptr(chunk->signal_seq_arr),
+ size_to_copy)) {
+ dev_err(hdev->dev,
+ "Failed to copy signal seq array from user\n");
+ rc = -EFAULT;
goto out;
}
- size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
- if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) {
- dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
- rc = -EFAULT;
- goto free_cs_chunk_array;
+ /* currently it is guaranteed to have only one signal seq */
+ *signal_seq = signal_seq_arr[0];
+
+out:
+ kfree(signal_seq_arr);
+
+ return rc;
+}
+
+static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
+ struct hl_ctx *ctx, struct hl_cs *cs, enum hl_queue_type q_type,
+ u32 q_idx)
+{
+ struct hl_cs_counters_atomic *cntr;
+ struct hl_cs_job *job;
+ struct hl_cb *cb;
+ u32 cb_size;
+
+ cntr = &hdev->aggregated_cs_counters;
+
+ job = hl_cs_allocate_job(hdev, q_type, true);
+ if (!job) {
+ atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+ atomic64_inc(&cntr->out_of_mem_drop_cnt);
+ dev_err(hdev->dev, "Failed to allocate a new job\n");
+ return -ENOMEM;
}
+ if (cs->type == CS_TYPE_WAIT)
+ cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
+ else
+ cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
+
+ cb = hl_cb_kernel_create(hdev, cb_size,
+ q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
+ if (!cb) {
+ atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+ atomic64_inc(&cntr->out_of_mem_drop_cnt);
+ kfree(job);
+ return -EFAULT;
+ }
+
+ job->id = 0;
+ job->cs = cs;
+ job->user_cb = cb;
+ atomic_inc(&job->user_cb->cs_cnt);
+ job->user_cb_size = cb_size;
+ job->hw_queue_id = q_idx;
+
+ /*
+ * No need in parsing, user CB is the patched CB.
+ * We call hl_cb_destroy() out of two reasons - we don't need the CB in
+ * the CB idr anymore and to decrement its refcount as it was
+ * incremented inside hl_cb_kernel_create().
+ */
+ job->patched_cb = job->user_cb;
+ job->job_cb_size = job->user_cb_size;
+ hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
+
+ /* increment refcount as for external queues we get completion */
+ cs_get(cs);
+
+ cs->jobs_in_queue_cnt[job->hw_queue_id]++;
+
+ list_add_tail(&job->cs_node, &cs->job_list);
+
+ hl_debugfs_add_job(hdev, job);
+
+ return 0;
+}
+
+static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
+ void __user *chunks, u32 num_chunks,
+ u64 *cs_seq, bool timestamp)
+{
+ struct hl_cs_chunk *cs_chunk_array, *chunk;
+ struct hw_queue_properties *hw_queue_prop;
+ struct hl_device *hdev = hpriv->hdev;
+ struct hl_cs_compl *sig_waitcs_cmpl;
+ u32 q_idx, collective_engine_id = 0;
+ struct hl_fence *sig_fence = NULL;
+ struct hl_ctx *ctx = hpriv->ctx;
+ enum hl_queue_type q_type;
+ struct hl_cs *cs;
+ u64 signal_seq;
+ int rc;
+
+ *cs_seq = ULLONG_MAX;
+
+ rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks);
+ if (rc)
+ goto out;
+
/* currently it is guaranteed to have only one chunk */
chunk = &cs_chunk_array[0];
@@ -819,60 +1180,43 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
q_type = hw_queue_prop->type;
- if ((q_idx >= hdev->asic_prop.max_queues) ||
- (!hw_queue_prop->supports_sync_stream)) {
- dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx);
+ if (!hw_queue_prop->supports_sync_stream) {
+ dev_err(hdev->dev,
+ "Queue index %d does not support sync stream operations\n",
+ q_idx);
rc = -EINVAL;
goto free_cs_chunk_array;
}
- if (cs_type == CS_TYPE_WAIT) {
- struct hl_cs_compl *sig_waitcs_cmpl;
-
- signal_seq_arr_len = chunk->num_signal_seq_arr;
-
- /* currently only one signal seq is supported */
- if (signal_seq_arr_len != 1) {
+ if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
+ if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
dev_err(hdev->dev,
- "Wait for signal CS supports only one signal CS seq\n");
+ "Queue index %d is invalid\n", q_idx);
rc = -EINVAL;
goto free_cs_chunk_array;
}
- signal_seq_arr = kmalloc_array(signal_seq_arr_len,
- sizeof(*signal_seq_arr),
- GFP_ATOMIC);
- if (!signal_seq_arr) {
- rc = -ENOMEM;
- goto free_cs_chunk_array;
- }
+ collective_engine_id = chunk->collective_engine_id;
+ }
- size_to_copy = chunk->num_signal_seq_arr *
- sizeof(*signal_seq_arr);
- if (copy_from_user(signal_seq_arr,
- u64_to_user_ptr(chunk->signal_seq_arr),
- size_to_copy)) {
- dev_err(hdev->dev,
- "Failed to copy signal seq array from user\n");
- rc = -EFAULT;
- goto free_signal_seq_array;
- }
+ if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) {
+ rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq);
+ if (rc)
+ goto free_cs_chunk_array;
- /* currently it is guaranteed to have only one signal seq */
- signal_seq = signal_seq_arr[0];
sig_fence = hl_ctx_get_fence(ctx, signal_seq);
if (IS_ERR(sig_fence)) {
dev_err(hdev->dev,
"Failed to get signal CS with seq 0x%llx\n",
signal_seq);
rc = PTR_ERR(sig_fence);
- goto free_signal_seq_array;
+ goto free_cs_chunk_array;
}
if (!sig_fence) {
/* signal CS already finished */
rc = 0;
- goto free_signal_seq_array;
+ goto free_cs_chunk_array;
}
sig_waitcs_cmpl =
@@ -884,14 +1228,14 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
signal_seq);
hl_fence_put(sig_fence);
rc = -EINVAL;
- goto free_signal_seq_array;
+ goto free_cs_chunk_array;
}
if (completion_done(&sig_fence->completion)) {
/* signal CS already finished */
hl_fence_put(sig_fence);
rc = 0;
- goto free_signal_seq_array;
+ goto free_cs_chunk_array;
}
}
@@ -900,70 +1244,37 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
rc = allocate_cs(hdev, ctx, cs_type, &cs);
if (rc) {
- if (cs_type == CS_TYPE_WAIT)
+ if (cs_type == CS_TYPE_WAIT ||
+ cs_type == CS_TYPE_COLLECTIVE_WAIT)
hl_fence_put(sig_fence);
hl_ctx_put(ctx);
- goto free_signal_seq_array;
+ goto free_cs_chunk_array;
}
+ cs->timestamp = !!timestamp;
+
/*
* Save the signal CS fence for later initialization right before
* hanging the wait CS on the queue.
*/
- if (cs->type == CS_TYPE_WAIT)
+ if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT)
cs->signal_fence = sig_fence;
hl_debugfs_add_cs(cs);
*cs_seq = cs->sequence;
- job = hl_cs_allocate_job(hdev, q_type, true);
- if (!job) {
- ctx->cs_counters.out_of_mem_drop_cnt++;
- dev_err(hdev->dev, "Failed to allocate a new job\n");
- rc = -ENOMEM;
- goto put_cs;
- }
-
- if (cs->type == CS_TYPE_WAIT)
- cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
+ if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL)
+ rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type,
+ q_idx);
+ else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
+ rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
+ cs, q_idx, collective_engine_id);
else
- cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
-
- cb = hl_cb_kernel_create(hdev, cb_size,
- q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
- if (!cb) {
- ctx->cs_counters.out_of_mem_drop_cnt++;
- kfree(job);
- rc = -EFAULT;
- goto put_cs;
- }
-
- job->id = 0;
- job->cs = cs;
- job->user_cb = cb;
- job->user_cb->cs_cnt++;
- job->user_cb_size = cb_size;
- job->hw_queue_id = q_idx;
-
- /*
- * No need in parsing, user CB is the patched CB.
- * We call hl_cb_destroy() out of two reasons - we don't need the CB in
- * the CB idr anymore and to decrement its refcount as it was
- * incremented inside hl_cb_kernel_create().
- */
- job->patched_cb = job->user_cb;
- job->job_cb_size = job->user_cb_size;
- hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
-
- cs->jobs_in_queue_cnt[job->hw_queue_id]++;
-
- list_add_tail(&job->cs_node, &cs->job_list);
-
- /* increment refcount as for external queues we get completion */
- cs_get(cs);
+ rc = -EINVAL;
- hl_debugfs_add_job(hdev, job);
+ if (rc)
+ goto free_cs_object;
rc = hl_hw_queue_schedule_cs(cs);
if (rc) {
@@ -984,9 +1295,6 @@ free_cs_object:
put_cs:
/* We finished with the CS in this function, so put the ref */
cs_put(cs);
-free_signal_seq_array:
- if (cs_type == CS_TYPE_WAIT)
- kfree(signal_seq_arr);
free_cs_chunk_array:
kfree(cs_chunk_array);
out:
@@ -995,156 +1303,39 @@ out:
int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
{
- struct hl_device *hdev = hpriv->hdev;
union hl_cs_args *args = data;
- struct hl_ctx *ctx = hpriv->ctx;
- void __user *chunks_execute, *chunks_restore;
enum hl_cs_type cs_type;
- u32 num_chunks_execute, num_chunks_restore, sig_wait_flags;
u64 cs_seq = ULONG_MAX;
- int rc, do_ctx_switch;
- bool need_soft_reset = false;
-
- if (hl_device_disabled_or_in_reset(hdev)) {
- dev_warn_ratelimited(hdev->dev,
- "Device is %s. Can't submit new CS\n",
- atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
- rc = -EBUSY;
- goto out;
- }
-
- sig_wait_flags = args->in.cs_flags & HL_CS_FLAGS_SIG_WAIT;
+ void __user *chunks;
+ u32 num_chunks;
+ int rc;
- if (unlikely(sig_wait_flags == HL_CS_FLAGS_SIG_WAIT)) {
- dev_err(hdev->dev,
- "Signal and wait CS flags are mutually exclusive, context %d\n",
- ctx->asid);
- rc = -EINVAL;
+ rc = hl_cs_sanity_checks(hpriv, args);
+ if (rc)
goto out;
- }
- if (unlikely((sig_wait_flags & HL_CS_FLAGS_SIG_WAIT) &&
- (!hdev->supports_sync_stream))) {
- dev_err(hdev->dev, "Sync stream CS is not supported\n");
- rc = -EINVAL;
+ rc = hl_cs_ctx_switch(hpriv, args, &cs_seq);
+ if (rc)
goto out;
- }
- if (args->in.cs_flags & HL_CS_FLAGS_SIGNAL)
- cs_type = CS_TYPE_SIGNAL;
- else if (args->in.cs_flags & HL_CS_FLAGS_WAIT)
- cs_type = CS_TYPE_WAIT;
- else
- cs_type = CS_TYPE_DEFAULT;
-
- chunks_execute = (void __user *) (uintptr_t) args->in.chunks_execute;
- num_chunks_execute = args->in.num_chunks_execute;
-
- if (cs_type == CS_TYPE_DEFAULT) {
- if (!num_chunks_execute) {
- dev_err(hdev->dev,
- "Got execute CS with 0 chunks, context %d\n",
- ctx->asid);
- rc = -EINVAL;
- goto out;
- }
- } else if (num_chunks_execute != 1) {
- dev_err(hdev->dev,
- "Sync stream CS mandates one chunk only, context %d\n",
- ctx->asid);
- rc = -EINVAL;
- goto out;
+ cs_type = hl_cs_get_cs_type(args->in.cs_flags &
+ ~HL_CS_FLAGS_FORCE_RESTORE);
+ chunks = (void __user *) (uintptr_t) args->in.chunks_execute;
+ num_chunks = args->in.num_chunks_execute;
+
+ switch (cs_type) {
+ case CS_TYPE_SIGNAL:
+ case CS_TYPE_WAIT:
+ case CS_TYPE_COLLECTIVE_WAIT:
+ rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks,
+ &cs_seq, args->in.cs_flags & HL_CS_FLAGS_TIMESTAMP);
+ break;
+ default:
+ rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
+ args->in.cs_flags & HL_CS_FLAGS_TIMESTAMP);
+ break;
}
- do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
-
- if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
- long ret;
-
- chunks_restore =
- (void __user *) (uintptr_t) args->in.chunks_restore;
- num_chunks_restore = args->in.num_chunks_restore;
-
- mutex_lock(&hpriv->restore_phase_mutex);
-
- if (do_ctx_switch) {
- rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
- if (rc) {
- dev_err_ratelimited(hdev->dev,
- "Failed to switch to context %d, rejecting CS! %d\n",
- ctx->asid, rc);
- /*
- * If we timedout, or if the device is not IDLE
- * while we want to do context-switch (-EBUSY),
- * we need to soft-reset because QMAN is
- * probably stuck. However, we can't call to
- * reset here directly because of deadlock, so
- * need to do it at the very end of this
- * function
- */
- if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
- need_soft_reset = true;
- mutex_unlock(&hpriv->restore_phase_mutex);
- goto out;
- }
- }
-
- hdev->asic_funcs->restore_phase_topology(hdev);
-
- if (!num_chunks_restore) {
- dev_dbg(hdev->dev,
- "Need to run restore phase but restore CS is empty\n");
- rc = 0;
- } else {
- rc = cs_ioctl_default(hpriv, chunks_restore,
- num_chunks_restore, &cs_seq);
- }
-
- mutex_unlock(&hpriv->restore_phase_mutex);
-
- if (rc) {
- dev_err(hdev->dev,
- "Failed to submit restore CS for context %d (%d)\n",
- ctx->asid, rc);
- goto out;
- }
-
- /* Need to wait for restore completion before execution phase */
- if (num_chunks_restore) {
- ret = _hl_cs_wait_ioctl(hdev, ctx,
- jiffies_to_usecs(hdev->timeout_jiffies),
- cs_seq);
- if (ret <= 0) {
- dev_err(hdev->dev,
- "Restore CS for context %d failed to complete %ld\n",
- ctx->asid, ret);
- rc = -ENOEXEC;
- goto out;
- }
- }
-
- ctx->thread_ctx_switch_wait_token = 1;
- } else if (!ctx->thread_ctx_switch_wait_token) {
- u32 tmp;
-
- rc = hl_poll_timeout_memory(hdev,
- &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
- 100, jiffies_to_usecs(hdev->timeout_jiffies), false);
-
- if (rc == -ETIMEDOUT) {
- dev_err(hdev->dev,
- "context switch phase timeout (%d)\n", tmp);
- goto out;
- }
- }
-
- if (cs_type == CS_TYPE_DEFAULT)
- rc = cs_ioctl_default(hpriv, chunks_execute, num_chunks_execute,
- &cs_seq);
- else
- rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks_execute,
- num_chunks_execute, &cs_seq);
-
out:
if (rc != -EAGAIN) {
memset(args, 0, sizeof(*args));
@@ -1152,18 +1343,20 @@ out:
args->out.seq = cs_seq;
}
- if (((rc == -ETIMEDOUT) || (rc == -EBUSY)) && (need_soft_reset))
- hl_device_reset(hdev, false, false);
-
return rc;
}
-static long _hl_cs_wait_ioctl(struct hl_device *hdev,
- struct hl_ctx *ctx, u64 timeout_us, u64 seq)
+static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
+ u64 timeout_us, u64 seq,
+ enum hl_cs_wait_status *status, s64 *timestamp)
{
struct hl_fence *fence;
unsigned long timeout;
- long rc;
+ int rc = 0;
+ long completion_rc;
+
+ if (timestamp)
+ *timestamp = 0;
if (timeout_us == MAX_SCHEDULE_TIMEOUT)
timeout = timeout_us;
@@ -1181,11 +1374,20 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev,
seq, ctx->cs_sequence);
} else if (fence) {
if (!timeout_us)
- rc = completion_done(&fence->completion);
+ completion_rc = completion_done(&fence->completion);
else
- rc = wait_for_completion_interruptible_timeout(
+ completion_rc =
+ wait_for_completion_interruptible_timeout(
&fence->completion, timeout);
+ if (completion_rc > 0) {
+ *status = CS_WAIT_STATUS_COMPLETED;
+ if (timestamp)
+ *timestamp = ktime_to_ns(fence->timestamp);
+ } else {
+ *status = CS_WAIT_STATUS_BUSY;
+ }
+
if (fence->error == -ETIMEDOUT)
rc = -ETIMEDOUT;
else if (fence->error == -EIO)
@@ -1196,7 +1398,7 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev,
dev_dbg(hdev->dev,
"Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
seq, ctx->cs_sequence);
- rc = 1;
+ *status = CS_WAIT_STATUS_GONE;
}
hl_ctx_put(ctx);
@@ -1208,14 +1410,17 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
{
struct hl_device *hdev = hpriv->hdev;
union hl_wait_cs_args *args = data;
+ enum hl_cs_wait_status status;
u64 seq = args->in.seq;
- long rc;
+ s64 timestamp;
+ int rc;
- rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq);
+ rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq,
+ &status, &timestamp);
memset(args, 0, sizeof(*args));
- if (rc < 0) {
+ if (rc) {
if (rc == -ERESTARTSYS) {
dev_err_ratelimited(hdev->dev,
"user process got signal while waiting for CS handle %llu\n",
@@ -1236,10 +1441,23 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
return rc;
}
- if (rc == 0)
- args->out.status = HL_WAIT_CS_STATUS_BUSY;
- else
+ if (timestamp) {
+ args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
+ args->out.timestamp_nsec = timestamp;
+ }
+
+ switch (status) {
+ case CS_WAIT_STATUS_GONE:
+ args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
+ fallthrough;
+ case CS_WAIT_STATUS_COMPLETED:
args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
+ break;
+ case CS_WAIT_STATUS_BUSY:
+ default:
+ args->out.status = HL_WAIT_CS_STATUS_BUSY;
+ break;
+ }
return 0;
}
diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c
index 7a59dd7c6450..f65e6559149b 100644
--- a/drivers/misc/habanalabs/common/context.c
+++ b/drivers/misc/habanalabs/common/context.c
@@ -40,10 +40,14 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
if ((hdev->in_debug) && (hdev->compute_ctx == ctx))
hl_device_set_debug_mode(hdev, false);
+ hdev->asic_funcs->ctx_fini(ctx);
hl_cb_va_pool_fini(ctx);
hl_vm_ctx_fini(ctx);
hl_asid_free(hdev, ctx->asid);
+ /* Scrub both SRAM and DRAM */
+ hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
+
if ((!hdev->pldm) && (hdev->pdev) &&
(!hdev->asic_funcs->is_device_idle(hdev,
&idle_mask, NULL)))
diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index 912ddfa360b1..cef716643979 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -22,9 +22,10 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
u8 i2c_reg, long *val)
{
struct cpucp_packet pkt;
+ u64 result;
int rc;
- if (hl_device_disabled_or_in_reset(hdev))
+ if (!hl_device_operational(hdev, NULL))
return -EBUSY;
memset(&pkt, 0, sizeof(pkt));
@@ -36,7 +37,9 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
pkt.i2c_reg = i2c_reg;
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- 0, val);
+ 0, &result);
+
+ *val = (long) result;
if (rc)
dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc);
@@ -50,7 +53,7 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
struct cpucp_packet pkt;
int rc;
- if (hl_device_disabled_or_in_reset(hdev))
+ if (!hl_device_operational(hdev, NULL))
return -EBUSY;
memset(&pkt, 0, sizeof(pkt));
@@ -76,7 +79,7 @@ static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state)
struct cpucp_packet pkt;
int rc;
- if (hl_device_disabled_or_in_reset(hdev))
+ if (!hl_device_operational(hdev, NULL))
return;
memset(&pkt, 0, sizeof(pkt));
@@ -113,7 +116,7 @@ static int command_buffers_show(struct seq_file *s, void *data)
" %03llu %d 0x%08x %d %d %d\n",
cb->id, cb->ctx->asid, cb->size,
kref_read(&cb->refcount),
- cb->mmap, cb->cs_cnt);
+ cb->mmap, atomic_read(&cb->cs_cnt));
}
spin_unlock(&dev_entry->cb_spinlock);
@@ -168,18 +171,19 @@ static int command_submission_jobs_show(struct seq_file *s, void *data)
if (first) {
first = false;
seq_puts(s, "\n");
- seq_puts(s, " JOB ID CS ID CTX ASID H/W Queue\n");
- seq_puts(s, "---------------------------------------\n");
+ seq_puts(s, " JOB ID CS ID CTX ASID JOB RefCnt H/W Queue\n");
+ seq_puts(s, "----------------------------------------------------\n");
}
if (job->cs)
seq_printf(s,
- " %02d %llu %d %d\n",
+ " %02d %llu %d %d %d\n",
job->id, job->cs->sequence, job->cs->ctx->asid,
- job->hw_queue_id);
+ kref_read(&job->refcount), job->hw_queue_id);
else
seq_printf(s,
- " %02d 0 %d %d\n",
- job->id, HL_KERNEL_ASID_ID, job->hw_queue_id);
+ " %02d 0 %d %d %d\n",
+ job->id, HL_KERNEL_ASID_ID,
+ kref_read(&job->refcount), job->hw_queue_id);
}
spin_unlock(&dev_entry->cs_job_spinlock);
@@ -300,93 +304,15 @@ static int vm_show(struct seq_file *s, void *data)
return 0;
}
-/* these inline functions are copied from mmu.c */
-static inline u64 get_hop0_addr(struct hl_ctx *ctx)
-{
- return ctx->hdev->asic_prop.mmu_pgt_addr +
- (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
-}
-
-static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
- u64 virt_addr, u64 mask, u64 shift)
-{
- return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
- ((virt_addr & mask) >> shift);
-}
-
-static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
- struct hl_mmu_properties *mmu_specs,
- u64 hop_addr, u64 vaddr)
-{
- return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop0_mask,
- mmu_specs->hop0_shift);
-}
-
-static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
- struct hl_mmu_properties *mmu_specs,
- u64 hop_addr, u64 vaddr)
-{
- return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop1_mask,
- mmu_specs->hop1_shift);
-}
-
-static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
- struct hl_mmu_properties *mmu_specs,
- u64 hop_addr, u64 vaddr)
-{
- return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop2_mask,
- mmu_specs->hop2_shift);
-}
-
-static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
- struct hl_mmu_properties *mmu_specs,
- u64 hop_addr, u64 vaddr)
-{
- return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop3_mask,
- mmu_specs->hop3_shift);
-}
-
-static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
- struct hl_mmu_properties *mmu_specs,
- u64 hop_addr, u64 vaddr)
-{
- return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop4_mask,
- mmu_specs->hop4_shift);
-}
-
-static inline u64 get_hop5_pte_addr(struct hl_ctx *ctx,
- struct hl_mmu_properties *mmu_specs,
- u64 hop_addr, u64 vaddr)
-{
- return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop5_mask,
- mmu_specs->hop5_shift);
-}
-
-static inline u64 get_next_hop_addr(u64 curr_pte)
-{
- if (curr_pte & PAGE_PRESENT_MASK)
- return curr_pte & HOP_PHYS_ADDR_MASK;
- else
- return ULLONG_MAX;
-}
-
static int mmu_show(struct seq_file *s, void *data)
{
struct hl_debugfs_entry *entry = s->private;
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev;
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- struct hl_mmu_properties *mmu_prop;
struct hl_ctx *ctx;
- bool is_dram_addr;
-
- u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0,
- hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0,
- hop2_addr = 0, hop2_pte_addr = 0, hop2_pte = 0,
- hop3_addr = 0, hop3_pte_addr = 0, hop3_pte = 0,
- hop4_addr = 0, hop4_pte_addr = 0, hop4_pte = 0,
- hop5_addr = 0, hop5_pte_addr = 0, hop5_pte = 0,
- virt_addr = dev_entry->mmu_addr;
+ struct hl_mmu_hop_info hops_info;
+ u64 virt_addr = dev_entry->mmu_addr;
+ int i;
if (!hdev->mmu_enable)
return 0;
@@ -401,132 +327,24 @@ static int mmu_show(struct seq_file *s, void *data)
return 0;
}
- is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
- prop->dmmu.start_addr,
- prop->dmmu.end_addr);
-
- /* shifts and masks are the same in PMMU and HPMMU, use one of them */
- mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
-
- mutex_lock(&ctx->mmu_lock);
-
- /* the following lookup is copied from unmap() in mmu.c */
-
- hop0_addr = get_hop0_addr(ctx);
- hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
- hop0_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr);
- hop1_addr = get_next_hop_addr(hop0_pte);
-
- if (hop1_addr == ULLONG_MAX)
- goto not_mapped;
-
- hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
- hop1_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr);
- hop2_addr = get_next_hop_addr(hop1_pte);
-
- if (hop2_addr == ULLONG_MAX)
- goto not_mapped;
-
- hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
- hop2_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr);
- hop3_addr = get_next_hop_addr(hop2_pte);
-
- if (hop3_addr == ULLONG_MAX)
- goto not_mapped;
-
- hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
- hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr);
-
- if (mmu_prop->num_hops == MMU_ARCH_5_HOPS) {
- if (!(hop3_pte & LAST_MASK)) {
- hop4_addr = get_next_hop_addr(hop3_pte);
-
- if (hop4_addr == ULLONG_MAX)
- goto not_mapped;
-
- hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop,
- hop4_addr, virt_addr);
- hop4_pte = hdev->asic_funcs->read_pte(hdev,
- hop4_pte_addr);
- if (!(hop4_pte & PAGE_PRESENT_MASK))
- goto not_mapped;
- } else {
- if (!(hop3_pte & PAGE_PRESENT_MASK))
- goto not_mapped;
- }
- } else {
- hop4_addr = get_next_hop_addr(hop3_pte);
-
- if (hop4_addr == ULLONG_MAX)
- goto not_mapped;
-
- hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop,
- hop4_addr, virt_addr);
- hop4_pte = hdev->asic_funcs->read_pte(hdev,
- hop4_pte_addr);
- if (!(hop4_pte & LAST_MASK)) {
- hop5_addr = get_next_hop_addr(hop4_pte);
-
- if (hop5_addr == ULLONG_MAX)
- goto not_mapped;
-
- hop5_pte_addr = get_hop5_pte_addr(ctx, mmu_prop,
- hop5_addr, virt_addr);
- hop5_pte = hdev->asic_funcs->read_pte(hdev,
- hop5_pte_addr);
- if (!(hop5_pte & PAGE_PRESENT_MASK))
- goto not_mapped;
- } else {
- if (!(hop4_pte & PAGE_PRESENT_MASK))
- goto not_mapped;
- }
+ if (hl_mmu_get_tlb_info(ctx, virt_addr, &hops_info)) {
+ dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
+ virt_addr);
+ return 0;
}
seq_printf(s, "asid: %u, virt_addr: 0x%llx\n",
dev_entry->mmu_asid, dev_entry->mmu_addr);
- seq_printf(s, "hop0_addr: 0x%llx\n", hop0_addr);
- seq_printf(s, "hop0_pte_addr: 0x%llx\n", hop0_pte_addr);
- seq_printf(s, "hop0_pte: 0x%llx\n", hop0_pte);
-
- seq_printf(s, "hop1_addr: 0x%llx\n", hop1_addr);
- seq_printf(s, "hop1_pte_addr: 0x%llx\n", hop1_pte_addr);
- seq_printf(s, "hop1_pte: 0x%llx\n", hop1_pte);
-
- seq_printf(s, "hop2_addr: 0x%llx\n", hop2_addr);
- seq_printf(s, "hop2_pte_addr: 0x%llx\n", hop2_pte_addr);
- seq_printf(s, "hop2_pte: 0x%llx\n", hop2_pte);
-
- seq_printf(s, "hop3_addr: 0x%llx\n", hop3_addr);
- seq_printf(s, "hop3_pte_addr: 0x%llx\n", hop3_pte_addr);
- seq_printf(s, "hop3_pte: 0x%llx\n", hop3_pte);
-
- if (mmu_prop->num_hops == MMU_ARCH_5_HOPS) {
- if (!(hop3_pte & LAST_MASK)) {
- seq_printf(s, "hop4_addr: 0x%llx\n", hop4_addr);
- seq_printf(s, "hop4_pte_addr: 0x%llx\n", hop4_pte_addr);
- seq_printf(s, "hop4_pte: 0x%llx\n", hop4_pte);
- }
- } else {
- seq_printf(s, "hop4_addr: 0x%llx\n", hop4_addr);
- seq_printf(s, "hop4_pte_addr: 0x%llx\n", hop4_pte_addr);
- seq_printf(s, "hop4_pte: 0x%llx\n", hop4_pte);
-
- if (!(hop4_pte & LAST_MASK)) {
- seq_printf(s, "hop5_addr: 0x%llx\n", hop5_addr);
- seq_printf(s, "hop5_pte_addr: 0x%llx\n", hop5_pte_addr);
- seq_printf(s, "hop5_pte: 0x%llx\n", hop5_pte);
- }
+ for (i = 0 ; i < hops_info.used_hops ; i++) {
+ seq_printf(s, "hop%d_addr: 0x%llx\n",
+ i, hops_info.hop_info[i].hop_addr);
+ seq_printf(s, "hop%d_pte_addr: 0x%llx\n",
+ i, hops_info.hop_info[i].hop_pte_addr);
+ seq_printf(s, "hop%d_pte: 0x%llx\n",
+ i, hops_info.hop_info[i].hop_pte_val);
}
- goto out;
-
-not_mapped:
- dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
- virt_addr);
-out:
- mutex_unlock(&ctx->mmu_lock);
-
return 0;
}
@@ -597,7 +415,7 @@ static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
if (!hdev->mmu_enable)
goto out;
- if (hdev->dram_supports_virtual_memory &&
+ if (prop->dram_supports_virtual_memory &&
(addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr))
return true;
@@ -616,78 +434,20 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
u64 *phys_addr)
{
struct hl_ctx *ctx = hdev->compute_ctx;
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- struct hl_mmu_properties *mmu_prop;
- u64 hop_addr, hop_pte_addr, hop_pte;
- u64 offset_mask = HOP4_MASK | FLAGS_MASK;
int rc = 0;
- bool is_dram_addr;
if (!ctx) {
dev_err(hdev->dev, "no ctx available\n");
return -EINVAL;
}
- is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
- prop->dmmu.start_addr,
- prop->dmmu.end_addr);
-
- /* shifts and masks are the same in PMMU and HPMMU, use one of them */
- mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
-
- mutex_lock(&ctx->mmu_lock);
-
- /* hop 0 */
- hop_addr = get_hop0_addr(ctx);
- hop_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
- hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
-
- /* hop 1 */
- hop_addr = get_next_hop_addr(hop_pte);
- if (hop_addr == ULLONG_MAX)
- goto not_mapped;
- hop_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
- hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
-
- /* hop 2 */
- hop_addr = get_next_hop_addr(hop_pte);
- if (hop_addr == ULLONG_MAX)
- goto not_mapped;
- hop_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
- hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
-
- /* hop 3 */
- hop_addr = get_next_hop_addr(hop_pte);
- if (hop_addr == ULLONG_MAX)
- goto not_mapped;
- hop_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
- hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
-
- if (!(hop_pte & LAST_MASK)) {
- /* hop 4 */
- hop_addr = get_next_hop_addr(hop_pte);
- if (hop_addr == ULLONG_MAX)
- goto not_mapped;
- hop_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop_addr,
- virt_addr);
- hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
-
- offset_mask = FLAGS_MASK;
+ rc = hl_mmu_va_to_pa(ctx, virt_addr, phys_addr);
+ if (rc) {
+ dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
+ virt_addr);
+ rc = -EINVAL;
}
- if (!(hop_pte & PAGE_PRESENT_MASK))
- goto not_mapped;
-
- *phys_addr = (hop_pte & ~offset_mask) | (virt_addr & offset_mask);
-
- goto out;
-
-not_mapped:
- dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
- virt_addr);
- rc = -EINVAL;
-out:
- mutex_unlock(&ctx->mmu_lock);
return rc;
}
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 20572224099a..5871162a8442 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -10,20 +10,9 @@
#include "habanalabs.h"
#include <linux/pci.h>
-#include <linux/sched/signal.h>
#include <linux/hwmon.h>
#include <uapi/misc/habanalabs.h>
-#define HL_PLDM_PENDING_RESET_PER_SEC (HL_PENDING_RESET_PER_SEC * 10)
-
-bool hl_device_disabled_or_in_reset(struct hl_device *hdev)
-{
- if ((hdev->disabled) || (atomic_read(&hdev->in_reset)))
- return true;
- else
- return false;
-}
-
enum hl_device_status hl_device_status(struct hl_device *hdev)
{
enum hl_device_status status;
@@ -32,12 +21,34 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
status = HL_DEVICE_STATUS_MALFUNCTION;
else if (atomic_read(&hdev->in_reset))
status = HL_DEVICE_STATUS_IN_RESET;
+ else if (hdev->needs_reset)
+ status = HL_DEVICE_STATUS_NEEDS_RESET;
else
status = HL_DEVICE_STATUS_OPERATIONAL;
return status;
}
+bool hl_device_operational(struct hl_device *hdev,
+ enum hl_device_status *status)
+{
+ enum hl_device_status current_status;
+
+ current_status = hl_device_status(hdev);
+ if (status)
+ *status = current_status;
+
+ switch (current_status) {
+ case HL_DEVICE_STATUS_IN_RESET:
+ case HL_DEVICE_STATUS_MALFUNCTION:
+ case HL_DEVICE_STATUS_NEEDS_RESET:
+ return false;
+ case HL_DEVICE_STATUS_OPERATIONAL:
+ default:
+ return true;
+ }
+}
+
static void hpriv_release(struct kref *ref)
{
struct hl_fpriv *hpriv;
@@ -231,16 +242,36 @@ delete_cdev_device:
static void device_cdev_sysfs_del(struct hl_device *hdev)
{
- /* device_release() won't be called so must free devices explicitly */
- if (!hdev->cdev_sysfs_created) {
- kfree(hdev->dev_ctrl);
- kfree(hdev->dev);
- return;
- }
+ if (!hdev->cdev_sysfs_created)
+ goto put_devices;
hl_sysfs_fini(hdev);
cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
cdev_device_del(&hdev->cdev, hdev->dev);
+
+put_devices:
+ put_device(hdev->dev);
+ put_device(hdev->dev_ctrl);
+}
+
+static void device_hard_reset_pending(struct work_struct *work)
+{
+ struct hl_device_reset_work *device_reset_work =
+ container_of(work, struct hl_device_reset_work,
+ reset_work.work);
+ struct hl_device *hdev = device_reset_work->hdev;
+ int rc;
+
+ rc = hl_device_reset(hdev, true, true);
+ if ((rc == -EBUSY) && !hdev->device_fini_pending) {
+ dev_info(hdev->dev,
+ "Could not reset device. will try again in %u seconds",
+ HL_PENDING_RESET_PER_SEC);
+
+ queue_delayed_work(device_reset_work->wq,
+ &device_reset_work->reset_work,
+ msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000));
+ }
}
/*
@@ -327,17 +358,32 @@ static int device_early_init(struct hl_device *hdev)
hl_cb_mgr_init(&hdev->kernel_cb_mgr);
+ hdev->device_reset_work.wq =
+ create_singlethread_workqueue("hl_device_reset");
+ if (!hdev->device_reset_work.wq) {
+ rc = -ENOMEM;
+ dev_err(hdev->dev, "Failed to create device reset WQ\n");
+ goto free_cb_mgr;
+ }
+
+ INIT_DELAYED_WORK(&hdev->device_reset_work.reset_work,
+ device_hard_reset_pending);
+ hdev->device_reset_work.hdev = hdev;
+ hdev->device_fini_pending = 0;
+
mutex_init(&hdev->send_cpu_message_lock);
mutex_init(&hdev->debug_lock);
mutex_init(&hdev->mmu_cache_lock);
- INIT_LIST_HEAD(&hdev->hw_queues_mirror_list);
- spin_lock_init(&hdev->hw_queues_mirror_lock);
+ INIT_LIST_HEAD(&hdev->cs_mirror_list);
+ spin_lock_init(&hdev->cs_mirror_lock);
INIT_LIST_HEAD(&hdev->fpriv_list);
mutex_init(&hdev->fpriv_list_lock);
atomic_set(&hdev->in_reset, 0);
return 0;
+free_cb_mgr:
+ hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
free_idle_busy_ts_arr:
kfree(hdev->idle_busy_ts_arr);
free_chip_info:
@@ -380,6 +426,7 @@ static void device_early_fini(struct hl_device *hdev)
kfree(hdev->hl_chip_info);
destroy_workqueue(hdev->eq_wq);
+ destroy_workqueue(hdev->device_reset_work.wq);
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
destroy_workqueue(hdev->cq_wq[i]);
@@ -412,7 +459,7 @@ static void hl_device_heartbeat(struct work_struct *work)
struct hl_device *hdev = container_of(work, struct hl_device,
work_heartbeat.work);
- if (hl_device_disabled_or_in_reset(hdev))
+ if (!hl_device_operational(hdev, NULL))
goto reschedule;
if (!hdev->asic_funcs->send_heartbeat(hdev))
@@ -758,16 +805,12 @@ disable_device:
return rc;
}
-static int device_kill_open_processes(struct hl_device *hdev)
+static int device_kill_open_processes(struct hl_device *hdev, u32 timeout)
{
- u16 pending_total, pending_cnt;
struct hl_fpriv *hpriv;
struct task_struct *task = NULL;
+ u32 pending_cnt;
- if (hdev->pldm)
- pending_total = HL_PLDM_PENDING_RESET_PER_SEC;
- else
- pending_total = HL_PENDING_RESET_PER_SEC;
/* Giving time for user to close FD, and for processes that are inside
* hl_device_open to finish
@@ -775,6 +818,19 @@ static int device_kill_open_processes(struct hl_device *hdev)
if (!list_empty(&hdev->fpriv_list))
ssleep(1);
+ if (timeout) {
+ pending_cnt = timeout;
+ } else {
+ if (hdev->process_kill_trial_cnt) {
+ /* Processes have been already killed */
+ pending_cnt = 1;
+ goto wait_for_processes;
+ } else {
+ /* Wait a small period after process kill */
+ pending_cnt = HL_PENDING_RESET_PER_SEC;
+ }
+ }
+
mutex_lock(&hdev->fpriv_list_lock);
/* This section must be protected because we are dereferencing
@@ -794,16 +850,18 @@ static int device_kill_open_processes(struct hl_device *hdev)
mutex_unlock(&hdev->fpriv_list_lock);
- /* We killed the open users, but because the driver cleans up after the
- * user contexts are closed (e.g. mmu mappings), we need to wait again
- * to make sure the cleaning phase is finished before continuing with
- * the reset
+ /*
+ * We killed the open users, but that doesn't mean they are closed.
+ * It could be that they are running a long cleanup phase in the driver
+ * e.g. MMU unmappings, or running other long teardown flow even before
+ * our cleanup.
+ * Therefore we need to wait again to make sure they are closed before
+ * continuing with the reset.
*/
- pending_cnt = pending_total;
-
+wait_for_processes:
while ((!list_empty(&hdev->fpriv_list)) && (pending_cnt)) {
- dev_info(hdev->dev,
+ dev_dbg(hdev->dev,
"Waiting for all unmap operations to finish before hard reset\n");
pending_cnt--;
@@ -811,18 +869,17 @@ static int device_kill_open_processes(struct hl_device *hdev)
ssleep(1);
}
- return list_empty(&hdev->fpriv_list) ? 0 : -EBUSY;
-}
+ /* All processes exited successfully */
+ if (list_empty(&hdev->fpriv_list))
+ return 0;
-static void device_hard_reset_pending(struct work_struct *work)
-{
- struct hl_device_reset_work *device_reset_work =
- container_of(work, struct hl_device_reset_work, reset_work);
- struct hl_device *hdev = device_reset_work->hdev;
+ /* Give up waiting for processes to exit */
+ if (hdev->process_kill_trial_cnt == HL_PENDING_RESET_MAX_TRIALS)
+ return -ETIME;
- hl_device_reset(hdev, true, true);
+ hdev->process_kill_trial_cnt++;
- kfree(device_reset_work);
+ return -EBUSY;
}
/*
@@ -859,6 +916,10 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
hard_reset = true;
}
+ /* Re-entry of reset thread */
+ if (from_hard_reset_thread && hdev->process_kill_trial_cnt)
+ goto kill_processes;
+
/*
* Prevent concurrency in this function - only one reset should be
* done at any given time. Only need to perform this if we didn't
@@ -904,26 +965,17 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
again:
if ((hard_reset) && (!from_hard_reset_thread)) {
- struct hl_device_reset_work *device_reset_work;
-
hdev->hard_reset_pending = true;
- device_reset_work = kzalloc(sizeof(*device_reset_work),
- GFP_ATOMIC);
- if (!device_reset_work) {
- rc = -ENOMEM;
- goto out_err;
- }
+ hdev->process_kill_trial_cnt = 0;
/*
* Because the reset function can't run from interrupt or
* from heartbeat work, we need to call the reset function
* from a dedicated work
*/
- INIT_WORK(&device_reset_work->reset_work,
- device_hard_reset_pending);
- device_reset_work->hdev = hdev;
- schedule_work(&device_reset_work->reset_work);
+ queue_delayed_work(hdev->device_reset_work.wq,
+ &hdev->device_reset_work.reset_work, 0);
return 0;
}
@@ -949,12 +1001,25 @@ again:
/* Go over all the queues, release all CS and their jobs */
hl_cs_rollback_all(hdev);
+kill_processes:
if (hard_reset) {
/* Kill processes here after CS rollback. This is because the
* process can't really exit until all its CSs are done, which
* is what we do in cs rollback
*/
- rc = device_kill_open_processes(hdev);
+ rc = device_kill_open_processes(hdev, 0);
+
+ if (rc == -EBUSY) {
+ if (hdev->device_fini_pending) {
+ dev_crit(hdev->dev,
+ "Failed to kill all open processes, stopping hard reset\n");
+ goto out_err;
+ }
+
+ /* signal reset thread to reschedule */
+ return rc;
+ }
+
if (rc) {
dev_crit(hdev->dev,
"Failed to kill all open processes, stopping hard reset\n");
@@ -1089,6 +1154,7 @@ again:
}
atomic_set(&hdev->in_reset, 0);
+ hdev->needs_reset = false;
if (hard_reset)
hdev->hard_reset_cnt++;
@@ -1261,13 +1327,6 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
hl_debugfs_add_device(hdev);
- if (hdev->asic_funcs->get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
- dev_info(hdev->dev,
- "H/W state is dirty, must reset before initializing\n");
- hdev->asic_funcs->halt_engines(hdev, true);
- hdev->asic_funcs->hw_fini(hdev, true);
- }
-
/*
* From this point, in case of an error, add char devices and create
* sysfs nodes as part of the error flow, to allow debugging.
@@ -1371,9 +1430,9 @@ sw_fini:
early_fini:
device_early_fini(hdev);
free_dev_ctrl:
- kfree(hdev->dev_ctrl);
+ put_device(hdev->dev_ctrl);
free_dev:
- kfree(hdev->dev);
+ put_device(hdev->dev);
out_disabled:
hdev->disabled = true;
if (add_cdev_sysfs_on_err)
@@ -1398,11 +1457,14 @@ out_disabled:
*/
void hl_device_fini(struct hl_device *hdev)
{
- int i, rc;
ktime_t timeout;
+ int i, rc;
dev_info(hdev->dev, "Removing device\n");
+ hdev->device_fini_pending = 1;
+ flush_delayed_work(&hdev->device_reset_work.reset_work);
+
/*
* This function is competing with the reset function, so try to
* take the reset atomic and if we are already in middle of reset,
@@ -1458,7 +1520,11 @@ void hl_device_fini(struct hl_device *hdev)
* can't really exit until all its CSs are done, which is what we
* do in cs rollback
*/
- rc = device_kill_open_processes(hdev);
+ dev_info(hdev->dev,
+ "Waiting for all processes to exit (timeout of %u seconds)",
+ HL_PENDING_RESET_LONG_SEC);
+
+ rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC);
if (rc)
dev_crit(hdev->dev, "Failed to kill all open processes\n");
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index cd41c7ceb0e7..0e1c629e9800 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -9,8 +9,6 @@
#include "../include/common/hl_boot_if.h"
#include <linux/firmware.h>
-#include <linux/genalloc.h>
-#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/slab.h>
#define FW_FILE_MAX_SIZE 0x1400000 /* maximum size of 20MB */
@@ -20,16 +18,18 @@
* @hdev: pointer to hl_device structure.
* @fw_name: the firmware image name
* @dst: IO memory mapped address space to copy firmware to
+ * @src_offset: offset in src FW to copy from
+ * @size: amount of bytes to copy (0 to copy the whole binary)
*
* Copy fw code from firmware file to device memory.
*
* Return: 0 on success, non-zero for failure.
*/
int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
- void __iomem *dst)
+ void __iomem *dst, u32 src_offset, u32 size)
{
const struct firmware *fw;
- const u64 *fw_data;
+ const void *fw_data;
size_t fw_size;
int rc;
@@ -57,9 +57,20 @@ int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
goto out;
}
- fw_data = (const u64 *) fw->data;
+ if (size - src_offset > fw_size) {
+ dev_err(hdev->dev,
+ "size to copy(%u) and offset(%u) are invalid\n",
+ size, src_offset);
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if (size)
+ fw_size = size;
+
+ fw_data = (const void *) fw->data;
- memcpy_toio(dst, fw_data, fw_size);
+ memcpy_toio(dst, fw_data + src_offset, fw_size);
out:
release_firmware(fw);
@@ -77,7 +88,7 @@ int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
}
int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
- u16 len, u32 timeout, long *result)
+ u16 len, u32 timeout, u64 *result)
{
struct cpucp_packet *pkt;
dma_addr_t pkt_dma_addr;
@@ -132,7 +143,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
>> CPUCP_PKT_CTL_OPCODE_SHIFT);
rc = -EIO;
} else if (result) {
- *result = (long) le64_to_cpu(pkt->result);
+ *result = le64_to_cpu(pkt->result);
}
out:
@@ -146,7 +157,7 @@ out:
int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type)
{
struct cpucp_packet pkt;
- long result;
+ u64 result;
int rc;
memset(&pkt, 0, sizeof(pkt));
@@ -169,7 +180,7 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
{
struct cpucp_unmask_irq_arr_packet *pkt;
size_t total_pkt_size;
- long result;
+ u64 result;
int rc;
total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) +
@@ -208,7 +219,7 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
int hl_fw_test_cpu_queue(struct hl_device *hdev)
{
struct cpucp_packet test_pkt = {};
- long result;
+ u64 result;
int rc;
test_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST <<
@@ -221,7 +232,7 @@ int hl_fw_test_cpu_queue(struct hl_device *hdev)
if (!rc) {
if (result != CPUCP_PACKET_FENCE_VAL)
dev_err(hdev->dev,
- "CPU queue test failed (0x%08lX)\n", result);
+ "CPU queue test failed (%#08llx)\n", result);
} else {
dev_err(hdev->dev, "CPU queue test failed, error %d\n", rc);
}
@@ -252,7 +263,7 @@ void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
int hl_fw_send_heartbeat(struct hl_device *hdev)
{
struct cpucp_packet hb_pkt = {};
- long result;
+ u64 result;
int rc;
hb_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST <<
@@ -268,13 +279,14 @@ int hl_fw_send_heartbeat(struct hl_device *hdev)
return rc;
}
-int hl_fw_cpucp_info_get(struct hl_device *hdev)
+int hl_fw_cpucp_info_get(struct hl_device *hdev,
+ u32 cpu_security_boot_status_reg)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct cpucp_packet pkt = {};
void *cpucp_info_cpu_addr;
dma_addr_t cpucp_info_dma_addr;
- long result;
+ u64 result;
int rc;
cpucp_info_cpu_addr =
@@ -313,6 +325,11 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev)
goto out;
}
+ /* Read FW application security bits again */
+ if (hdev->asic_prop.fw_security_status_valid)
+ hdev->asic_prop.fw_app_security_map =
+ RREG32(cpu_security_boot_status_reg);
+
out:
hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
sizeof(struct cpucp_info), cpucp_info_cpu_addr);
@@ -325,7 +342,7 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
struct cpucp_packet pkt = {};
void *eeprom_info_cpu_addr;
dma_addr_t eeprom_info_dma_addr;
- long result;
+ u64 result;
int rc;
eeprom_info_cpu_addr =
@@ -368,7 +385,7 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
struct hl_info_pci_counters *counters)
{
struct cpucp_packet pkt = {};
- long result;
+ u64 result;
int rc;
pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET <<
@@ -415,7 +432,7 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy)
{
struct cpucp_packet pkt = {};
- long result;
+ u64 result;
int rc;
pkt.ctl = cpu_to_le32(CPUCP_PACKET_TOTAL_ENERGY_GET <<
@@ -435,9 +452,36 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy)
return rc;
}
-static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg)
+int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index,
+ u16 *pll_freq_arr)
{
- u32 err_val;
+ struct cpucp_packet pkt;
+ u64 result;
+ int rc;
+
+ memset(&pkt, 0, sizeof(pkt));
+
+ pkt.ctl = cpu_to_le32(CPUCP_PACKET_PLL_INFO_GET <<
+ CPUCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.pll_type = __cpu_to_le16(pll_index);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ HL_CPUCP_INFO_TIMEOUT_USEC, &result);
+ if (rc)
+ dev_err(hdev->dev, "Failed to read PLL info, error %d\n", rc);
+
+ pll_freq_arr[0] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT0_MASK, result);
+ pll_freq_arr[1] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT1_MASK, result);
+ pll_freq_arr[2] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT2_MASK, result);
+ pll_freq_arr[3] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT3_MASK, result);
+
+ return rc;
+}
+
+static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
+ u32 cpu_security_boot_status_reg)
+{
+ u32 err_val, security_val;
/* Some of the firmware status codes are deprecated in newer f/w
* versions. In those versions, the errors are reported
@@ -472,6 +516,18 @@ static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg)
if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
dev_err(hdev->dev,
"Device boot error - NIC F/W initialization failed\n");
+ if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY)
+ dev_warn(hdev->dev,
+ "Device boot warning - security not ready\n");
+ if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL)
+ dev_err(hdev->dev, "Device boot error - security failure\n");
+ if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL)
+ dev_err(hdev->dev, "Device boot error - eFuse failure\n");
+
+ security_val = RREG32(cpu_security_boot_status_reg);
+ if (security_val & CPU_BOOT_DEV_STS0_ENABLED)
+ dev_dbg(hdev->dev, "Device security status %#x\n",
+ security_val);
}
static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
@@ -524,10 +580,12 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
}
}
-int hl_fw_read_preboot_ver(struct hl_device *hdev, u32 cpu_boot_status_reg,
- u32 boot_err0_reg, u32 timeout)
+int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
+ u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
+ u32 timeout)
{
- u32 status;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u32 status, security_status;
int rc;
if (!hdev->cpu_enable)
@@ -557,23 +615,52 @@ int hl_fw_read_preboot_ver(struct hl_device *hdev, u32 cpu_boot_status_reg,
if (rc) {
dev_err(hdev->dev, "Failed to read preboot version\n");
detect_cpu_boot_status(hdev, status);
- fw_read_errors(hdev, boot_err0_reg);
+ fw_read_errors(hdev, boot_err0_reg,
+ cpu_security_boot_status_reg);
return -EIO;
}
- hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT);
+ rc = hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT);
+ if (rc)
+ return rc;
+
+ security_status = RREG32(cpu_security_boot_status_reg);
+
+ /* We read security status multiple times during boot:
+ * 1. preboot - we check if fw security feature is supported
+ * 2. boot cpu - we get boot cpu security status
+ * 3. FW application - we get FW application security status
+ *
+ * Preboot:
+ * Check security status bit (CPU_BOOT_DEV_STS0_ENABLED), if it is set
+ * check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN)
+ */
+ if (security_status & CPU_BOOT_DEV_STS0_ENABLED) {
+ hdev->asic_prop.fw_security_status_valid = 1;
+ prop->fw_security_disabled =
+ !(security_status & CPU_BOOT_DEV_STS0_SECURITY_EN);
+ } else {
+ hdev->asic_prop.fw_security_status_valid = 0;
+ prop->fw_security_disabled = true;
+ }
+
+ dev_info(hdev->dev, "firmware-level security is %s\n",
+ prop->fw_security_disabled ? "disabled" : "enabled");
return 0;
}
int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
- u32 boot_err0_reg, bool skip_bmc,
- u32 cpu_timeout, u32 boot_fit_timeout)
+ u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
+ bool skip_bmc, u32 cpu_timeout, u32 boot_fit_timeout)
{
u32 status;
int rc;
+ if (!(hdev->fw_loading & FW_TYPE_BOOT_CPU))
+ return 0;
+
dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n",
cpu_timeout / USEC_PER_SEC);
@@ -631,17 +718,24 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
10000,
cpu_timeout);
+ dev_dbg(hdev->dev, "uboot status = %d\n", status);
+
/* Read U-Boot version now in case we will later fail */
hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT);
+ /* Read boot_cpu security bits */
+ if (hdev->asic_prop.fw_security_status_valid)
+ hdev->asic_prop.fw_boot_cpu_security_map =
+ RREG32(cpu_security_boot_status_reg);
+
if (rc) {
detect_cpu_boot_status(hdev, status);
rc = -EIO;
goto out;
}
- if (!hdev->fw_loading) {
- dev_info(hdev->dev, "Skip loading FW\n");
+ if (!(hdev->fw_loading & FW_TYPE_LINUX)) {
+ dev_info(hdev->dev, "Skip loading Linux F/W\n");
goto out;
}
@@ -702,10 +796,23 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
goto out;
}
+ /* Read FW application security bits */
+ if (hdev->asic_prop.fw_security_status_valid) {
+ hdev->asic_prop.fw_app_security_map =
+ RREG32(cpu_security_boot_status_reg);
+
+ if (hdev->asic_prop.fw_app_security_map &
+ CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
+ hdev->asic_prop.hard_reset_done_by_fw = true;
+ }
+
+ dev_dbg(hdev->dev, "Firmware hard-reset is %s\n",
+ hdev->asic_prop.hard_reset_done_by_fw ? "enabled" : "disabled");
+
dev_info(hdev->dev, "Successfully loaded firmware to device\n");
out:
- fw_read_errors(hdev, boot_err0_reg);
+ fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);
return rc;
}
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 6ed974d2def0..571eda6ef5ab 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -10,6 +10,7 @@
#include "../include/common/cpucp_if.h"
#include "../include/common/qman_if.h"
+#include "../include/hw_ip/mmu/mmu_general.h"
#include <uapi/misc/habanalabs.h>
#include <linux/cdev.h>
@@ -19,6 +20,10 @@
#include <linux/scatterlist.h>
#include <linux/hashtable.h>
#include <linux/bitfield.h>
+#include <linux/genalloc.h>
+#include <linux/sched/signal.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/coresight.h>
#define HL_NAME "habanalabs"
@@ -36,7 +41,9 @@
#define HL_MMAP_OFFSET_VALUE_MASK (0x3FFFFFFFFFFFull >> PAGE_SHIFT)
#define HL_MMAP_OFFSET_VALUE_GET(off) (off & HL_MMAP_OFFSET_VALUE_MASK)
-#define HL_PENDING_RESET_PER_SEC 30
+#define HL_PENDING_RESET_PER_SEC 10
+#define HL_PENDING_RESET_MAX_TRIALS 60 /* 10 minutes */
+#define HL_PENDING_RESET_LONG_SEC 60
#define HL_HARD_RESET_MAX_TIMEOUT 120
@@ -61,15 +68,29 @@
/* MMU */
#define MMU_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
+/**
+ * enum hl_mmu_page_table_locaion - mmu page table location
+ * @MMU_DR_PGT: page-table is located on device DRAM.
+ * @MMU_HR_PGT: page-table is located on host memory.
+ * @MMU_NUM_PGT_LOCATIONS: number of page-table locations currently supported.
+ */
+enum hl_mmu_page_table_location {
+ MMU_DR_PGT = 0, /* device-dram-resident MMU PGT */
+ MMU_HR_PGT, /* host resident MMU PGT */
+ MMU_NUM_PGT_LOCATIONS /* num of PGT locations */
+};
+
/*
* HL_RSVD_SOBS 'sync stream' reserved sync objects per QMAN stream
* HL_RSVD_MONS 'sync stream' reserved monitors per QMAN stream
*/
-#define HL_RSVD_SOBS 4
-#define HL_RSVD_MONS 2
+#define HL_RSVD_SOBS 2
+#define HL_RSVD_MONS 1
-#define HL_RSVD_SOBS_IN_USE 2
-#define HL_RSVD_MONS_IN_USE 1
+/*
+ * HL_COLLECTIVE_RSVD_MSTR_MONS 'collective' reserved monitors per QMAN stream
+ */
+#define HL_COLLECTIVE_RSVD_MSTR_MONS 2
#define HL_MAX_SOB_VAL (1 << 15)
@@ -80,6 +101,28 @@
#define HL_MAX_DCORES 4
+#define HL_MAX_SOBS_PER_MONITOR 8
+
+/**
+ * struct hl_gen_wait_properties - properties for generating a wait CB
+ * @data: command buffer
+ * @q_idx: queue id is used to extract fence register address
+ * @size: offset in command buffer
+ * @sob_base: SOB base to use in this wait CB
+ * @sob_val: SOB value to wait for
+ * @mon_id: monitor to use in this wait CB
+ * @sob_mask: each bit represents a SOB offset from sob_base to be used
+ */
+struct hl_gen_wait_properties {
+ void *data;
+ u32 q_idx;
+ u32 size;
+ u16 sob_base;
+ u16 sob_val;
+ u16 mon_id;
+ u8 sob_mask;
+};
+
/**
* struct pgt_info - MMU hop page info.
* @node: hash linked-list node for the pgts shadow hash of pgts.
@@ -125,6 +168,18 @@ enum hl_fw_component {
};
/**
+ * enum hl_fw_types - F/W types to load
+ * @FW_TYPE_LINUX: Linux image for device CPU
+ * @FW_TYPE_BOOT_CPU: Boot image for device CPU
+ * @FW_TYPE_ALL_TYPES: Mask for all types
+ */
+enum hl_fw_types {
+ FW_TYPE_LINUX = 0x1,
+ FW_TYPE_BOOT_CPU = 0x2,
+ FW_TYPE_ALL_TYPES = (FW_TYPE_LINUX | FW_TYPE_BOOT_CPU)
+};
+
+/**
* enum hl_queue_type - Supported QUEUE types.
* @QUEUE_TYPE_NA: queue is not available.
* @QUEUE_TYPE_EXT: external queue which is a DMA channel that may access the
@@ -146,7 +201,8 @@ enum hl_queue_type {
enum hl_cs_type {
CS_TYPE_DEFAULT,
CS_TYPE_SIGNAL,
- CS_TYPE_WAIT
+ CS_TYPE_WAIT,
+ CS_TYPE_COLLECTIVE_WAIT
};
/*
@@ -176,6 +232,17 @@ struct hl_outbound_pci_region {
};
/*
+ * enum queue_cb_alloc_flags - Indicates queue support for CBs that
+ * allocated by Kernel or by User
+ * @CB_ALLOC_KERNEL: support only CBs that allocated by Kernel
+ * @CB_ALLOC_USER: support only CBs that allocated by User
+ */
+enum queue_cb_alloc_flags {
+ CB_ALLOC_KERNEL = 0x1,
+ CB_ALLOC_USER = 0x2
+};
+
+/*
* struct hl_hw_sob - H/W SOB info.
* @hdev: habanalabs device structure.
* @kref: refcount of this SOB. The SOB will reset once the refcount is zero.
@@ -189,19 +256,29 @@ struct hl_hw_sob {
u32 q_idx;
};
+enum hl_collective_mode {
+ HL_COLLECTIVE_NOT_SUPPORTED = 0x0,
+ HL_COLLECTIVE_MASTER = 0x1,
+ HL_COLLECTIVE_SLAVE = 0x2
+};
+
/**
* struct hw_queue_properties - queue information.
* @type: queue type.
+ * @queue_cb_alloc_flags: bitmap which indicates if the hw queue supports CB
+ * that allocated by the Kernel driver and therefore,
+ * a CB handle can be provided for jobs on this queue.
+ * Otherwise, a CB address must be provided.
+ * @collective_mode: collective mode of current queue
* @driver_only: true if only the driver is allowed to send a job to this queue,
* false otherwise.
- * @requires_kernel_cb: true if a CB handle must be provided for jobs on this
- * queue, false otherwise (a CB address must be provided).
* @supports_sync_stream: True if queue supports sync stream
*/
struct hw_queue_properties {
enum hl_queue_type type;
+ enum queue_cb_alloc_flags cb_alloc_flags;
+ enum hl_collective_mode collective_mode;
u8 driver_only;
- u8 requires_kernel_cb;
u8 supports_sync_stream;
};
@@ -227,6 +304,8 @@ enum hl_device_hw_state {
HL_DEVICE_HW_STATE_DIRTY
};
+#define HL_MMU_VA_ALIGNMENT_NOT_NEEDED 0
+
/**
* struct hl_mmu_properties - ASIC specific MMU address translation properties.
* @start_addr: virtual start address of the memory region.
@@ -245,6 +324,8 @@ enum hl_device_hw_state {
* @hop5_mask: mask to get the PTE address in hop 5.
* @page_size: default page size used to allocate memory.
* @num_hops: The amount of hops supported by the translation table.
+ * @host_resident: Should the MMU page table reside in host memory or in the
+ * device DRAM.
*/
struct hl_mmu_properties {
u64 start_addr;
@@ -263,6 +344,7 @@ struct hl_mmu_properties {
u64 hop5_mask;
u32 page_size;
u32 num_hops;
+ u8 host_resident;
};
/**
@@ -314,6 +396,14 @@ struct hl_mmu_properties {
* @cb_pool_cb_size: size of each CB in the CB pool.
* @max_pending_cs: maximum of concurrent pending command submissions
* @max_queues: maximum amount of queues in the system
+ * @fw_boot_cpu_security_map: bitmap representation of boot cpu security status
+ * reported by FW, bit description can be found in
+ * CPU_BOOT_DEV_STS*
+ * @fw_app_security_map: bitmap representation of application security status
+ * reported by FW, bit description can be found in
+ * CPU_BOOT_DEV_STS*
+ * @collective_first_sob: first sync object available for collective use
+ * @collective_first_mon: first monitor available for collective use
* @sync_stream_first_sob: first sync object available for sync stream use
* @sync_stream_first_mon: first monitor available for sync stream use
* @first_available_user_sob: first sob available for the user
@@ -322,6 +412,10 @@ struct hl_mmu_properties {
* @completion_queues_count: number of completion queues.
* @fw_security_disabled: true if security measures are disabled in firmware,
* false otherwise
+ * @fw_security_status_valid: security status bits are valid and can be fetched
+ * from BOOT_DEV_STS0
+ * @dram_supports_virtual_memory: is there an MMU towards the DRAM
+ * @hard_reset_done_by_fw: true if firmware is handling hard reset flow
*/
struct asic_fixed_properties {
struct hw_queue_properties *hw_queues_props;
@@ -366,6 +460,10 @@ struct asic_fixed_properties {
u32 cb_pool_cb_size;
u32 max_pending_cs;
u32 max_queues;
+ u32 fw_boot_cpu_security_map;
+ u32 fw_app_security_map;
+ u16 collective_first_sob;
+ u16 collective_first_mon;
u16 sync_stream_first_sob;
u16 sync_stream_first_mon;
u16 first_available_user_sob[HL_MAX_DCORES];
@@ -373,6 +471,9 @@ struct asic_fixed_properties {
u8 tpc_enabled_mask;
u8 completion_queues_count;
u8 fw_security_disabled;
+ u8 fw_security_status_valid;
+ u8 dram_supports_virtual_memory;
+ u8 hard_reset_done_by_fw;
};
/**
@@ -380,12 +481,14 @@ struct asic_fixed_properties {
* @completion: fence is implemented using completion
* @refcount: refcount for this fence
* @error: mark this fence with error
+ * @timestamp: timestamp upon completion
*
*/
struct hl_fence {
struct completion completion;
struct kref refcount;
int error;
+ ktime_t timestamp;
};
/**
@@ -397,6 +500,7 @@ struct hl_fence {
* @cs_seq: command submission sequence number.
* @type: type of the CS - signal/wait.
* @sob_val: the SOB value that is used in this signal/wait CS.
+ * @sob_group: the SOB group that is used in this collective wait CS.
*/
struct hl_cs_compl {
struct hl_fence base_fence;
@@ -406,6 +510,7 @@ struct hl_cs_compl {
u64 cs_seq;
enum hl_cs_type type;
u16 sob_val;
+ u16 sob_group;
};
/*
@@ -427,7 +532,7 @@ struct hl_cb_mgr {
* @refcount: reference counter for usage of the CB.
* @hdev: pointer to device this CB belongs to.
* @ctx: pointer to the CB owner's context.
- * @lock: spinlock to protect mmap/cs flows.
+ * @lock: spinlock to protect mmap flows.
* @debugfs_list: node in debugfs list of command buffers.
* @pool_list: node in pool list of command buffers.
* @va_block_list: list of virtual addresses blocks of the CB if it is mapped to
@@ -456,7 +561,7 @@ struct hl_cb {
dma_addr_t bus_address;
u32 mmap_size;
u32 size;
- u32 cs_cnt;
+ atomic_t cs_cnt;
u8 mmap;
u8 is_pool;
u8 is_internal;
@@ -468,6 +573,7 @@ struct hl_cb {
* QUEUES
*/
+struct hl_cs;
struct hl_cs_job;
/* Queue length of external and HW queues */
@@ -490,10 +596,38 @@ struct hl_cs_job;
#define HL_CPU_ACCESSIBLE_MEM_SIZE SZ_2M
/**
- * struct hl_hw_queue - describes a H/W transport queue.
+ * struct hl_sync_stream_properties -
+ * describes a H/W queue sync stream properties
* @hw_sob: array of the used H/W SOBs by this H/W queue.
+ * @next_sob_val: the next value to use for the currently used SOB.
+ * @base_sob_id: the base SOB id of the SOBs used by this queue.
+ * @base_mon_id: the base MON id of the MONs used by this queue.
+ * @collective_mstr_mon_id: the MON ids of the MONs used by this master queue
+ * in order to sync with all slave queues.
+ * @collective_slave_mon_id: the MON id used by this slave queue in order to
+ * sync with its master queue.
+ * @collective_sob_id: current SOB id used by this collective slave queue
+ * to signal its collective master queue upon completion.
+ * @curr_sob_offset: the id offset to the currently used SOB from the
+ * HL_RSVD_SOBS that are being used by this queue.
+ */
+struct hl_sync_stream_properties {
+ struct hl_hw_sob hw_sob[HL_RSVD_SOBS];
+ u16 next_sob_val;
+ u16 base_sob_id;
+ u16 base_mon_id;
+ u16 collective_mstr_mon_id[HL_COLLECTIVE_RSVD_MSTR_MONS];
+ u16 collective_slave_mon_id;
+ u16 collective_sob_id;
+ u8 curr_sob_offset;
+};
+
+/**
+ * struct hl_hw_queue - describes a H/W transport queue.
* @shadow_queue: pointer to a shadow queue that holds pointers to jobs.
+ * @sync_stream_prop: sync stream queue properties
* @queue_type: type of queue.
+ * @collective_mode: collective mode of current queue
* @kernel_address: holds the queue's kernel virtual address.
* @bus_address: holds the queue's DMA address.
* @pi: holds the queue's pi value.
@@ -502,33 +636,25 @@ struct hl_cs_job;
* @cq_id: the id for the corresponding CQ for this H/W queue.
* @msi_vec: the IRQ number of the H/W queue.
* @int_queue_len: length of internal queue (number of entries).
- * @next_sob_val: the next value to use for the currently used SOB.
- * @base_sob_id: the base SOB id of the SOBs used by this queue.
- * @base_mon_id: the base MON id of the MONs used by this queue.
* @valid: is the queue valid (we have array of 32 queues, not all of them
* exist).
- * @curr_sob_offset: the id offset to the currently used SOB from the
- * HL_RSVD_SOBS that are being used by this queue.
* @supports_sync_stream: True if queue supports sync stream
*/
struct hl_hw_queue {
- struct hl_hw_sob hw_sob[HL_RSVD_SOBS];
- struct hl_cs_job **shadow_queue;
- enum hl_queue_type queue_type;
- void *kernel_address;
- dma_addr_t bus_address;
- u32 pi;
- atomic_t ci;
- u32 hw_queue_id;
- u32 cq_id;
- u32 msi_vec;
- u16 int_queue_len;
- u16 next_sob_val;
- u16 base_sob_id;
- u16 base_mon_id;
- u8 valid;
- u8 curr_sob_offset;
- u8 supports_sync_stream;
+ struct hl_cs_job **shadow_queue;
+ struct hl_sync_stream_properties sync_stream_prop;
+ enum hl_queue_type queue_type;
+ enum hl_collective_mode collective_mode;
+ void *kernel_address;
+ dma_addr_t bus_address;
+ u32 pi;
+ atomic_t ci;
+ u32 hw_queue_id;
+ u32 cq_id;
+ u32 msi_vec;
+ u16 int_queue_len;
+ u8 valid;
+ u8 supports_sync_stream;
};
/**
@@ -650,6 +776,7 @@ enum div_select_defs {
* dma_free_coherent(). This is ASIC function because
* its implementation is not trivial when the driver
* is loaded in simulation mode (not upstreamed).
+ * @scrub_device_mem: Scrub device memory given an address and size
* @get_int_queue_base: get the internal queue base address.
* @test_queues: run simple test on all queues for sanity check.
* @asic_dma_pool_zalloc: small DMA allocation of coherent memory from DMA pool.
@@ -700,6 +827,7 @@ enum div_select_defs {
* @wreg: Write a register. Needed for simulator support.
* @halt_coresight: stop the ETF and ETR traces.
* @ctx_init: context dependent initialization.
+ * @ctx_fini: context dependent cleanup.
* @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz
* @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index.
* @read_device_fw_version: read the device's firmware versions that are
@@ -711,9 +839,13 @@ enum div_select_defs {
* @gen_signal_cb: Generate a signal CB.
* @gen_wait_cb: Generate a wait CB.
* @reset_sob: Reset a SOB.
+ * @reset_sob_group: Reset SOB group
* @set_dma_mask_from_fw: set the DMA mask in the driver according to the
* firmware configuration
* @get_device_time: Get the device time.
+ * @collective_wait_init_cs: Generate collective master/slave packets
+ * and place them in the relevant cs jobs
+ * @collective_wait_create_jobs: allocate collective wait cs jobs
*/
struct hl_asic_funcs {
int (*early_init)(struct hl_device *hdev);
@@ -736,6 +868,7 @@ struct hl_asic_funcs {
dma_addr_t *dma_handle, gfp_t flag);
void (*asic_dma_free_coherent)(struct hl_device *hdev, size_t size,
void *cpu_addr, dma_addr_t dma_handle);
+ int (*scrub_device_mem)(struct hl_device *hdev, u64 addr, u64 size);
void* (*get_int_queue_base)(struct hl_device *hdev, u32 queue_id,
dma_addr_t *dma_handle, u16 *queue_len);
int (*test_queues)(struct hl_device *hdev);
@@ -794,28 +927,34 @@ struct hl_asic_funcs {
int (*get_eeprom_data)(struct hl_device *hdev, void *data,
size_t max_size);
int (*send_cpu_message)(struct hl_device *hdev, u32 *msg,
- u16 len, u32 timeout, long *result);
- enum hl_device_hw_state (*get_hw_state)(struct hl_device *hdev);
+ u16 len, u32 timeout, u64 *result);
int (*pci_bars_map)(struct hl_device *hdev);
int (*init_iatu)(struct hl_device *hdev);
u32 (*rreg)(struct hl_device *hdev, u32 reg);
void (*wreg)(struct hl_device *hdev, u32 reg, u32 val);
void (*halt_coresight)(struct hl_device *hdev);
int (*ctx_init)(struct hl_ctx *ctx);
+ void (*ctx_fini)(struct hl_ctx *ctx);
int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx);
- void (*read_device_fw_version)(struct hl_device *hdev,
+ int (*read_device_fw_version)(struct hl_device *hdev,
enum hl_fw_component fwc);
int (*load_firmware_to_device)(struct hl_device *hdev);
int (*load_boot_fit_to_device)(struct hl_device *hdev);
u32 (*get_signal_cb_size)(struct hl_device *hdev);
u32 (*get_wait_cb_size)(struct hl_device *hdev);
- void (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id);
- void (*gen_wait_cb)(struct hl_device *hdev, void *data, u16 sob_id,
- u16 sob_val, u16 mon_id, u32 q_idx);
+ u32 (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id,
+ u32 size);
+ u32 (*gen_wait_cb)(struct hl_device *hdev,
+ struct hl_gen_wait_properties *prop);
void (*reset_sob)(struct hl_device *hdev, void *data);
+ void (*reset_sob_group)(struct hl_device *hdev, u16 sob_group);
void (*set_dma_mask_from_fw)(struct hl_device *hdev);
u64 (*get_device_time)(struct hl_device *hdev);
+ void (*collective_wait_init_cs)(struct hl_cs *cs);
+ int (*collective_wait_create_jobs)(struct hl_device *hdev,
+ struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
+ u32 collective_engine_id);
};
@@ -826,17 +965,48 @@ struct hl_asic_funcs {
#define HL_KERNEL_ASID_ID 0
/**
+ * enum hl_va_range_type - virtual address range type.
+ * @HL_VA_RANGE_TYPE_HOST: range type of host pages
+ * @HL_VA_RANGE_TYPE_HOST_HUGE: range type of host huge pages
+ * @HL_VA_RANGE_TYPE_DRAM: range type of dram pages
+ */
+enum hl_va_range_type {
+ HL_VA_RANGE_TYPE_HOST,
+ HL_VA_RANGE_TYPE_HOST_HUGE,
+ HL_VA_RANGE_TYPE_DRAM,
+ HL_VA_RANGE_TYPE_MAX
+};
+
+/**
* struct hl_va_range - virtual addresses range.
* @lock: protects the virtual addresses list.
* @list: list of virtual addresses blocks available for mappings.
* @start_addr: range start address.
* @end_addr: range end address.
+ * @page_size: page size of this va range.
*/
struct hl_va_range {
struct mutex lock;
struct list_head list;
u64 start_addr;
u64 end_addr;
+ u32 page_size;
+};
+
+/**
+ * struct hl_cs_counters_atomic - command submission counters
+ * @out_of_mem_drop_cnt: dropped due to memory allocation issue
+ * @parsing_drop_cnt: dropped due to error in packet parsing
+ * @queue_full_drop_cnt: dropped due to queue full
+ * @device_in_reset_drop_cnt: dropped due to device in reset
+ * @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight
+ */
+struct hl_cs_counters_atomic {
+ atomic64_t out_of_mem_drop_cnt;
+ atomic64_t parsing_drop_cnt;
+ atomic64_t queue_full_drop_cnt;
+ atomic64_t device_in_reset_drop_cnt;
+ atomic64_t max_cs_in_flight_drop_cnt;
};
/**
@@ -849,14 +1019,12 @@ struct hl_va_range {
* @refcount: reference counter for the context. Context is released only when
* this hits 0l. It is incremented on CS and CS_WAIT.
* @cs_pending: array of hl fence objects representing pending CS.
- * @host_va_range: holds available virtual addresses for host mappings.
- * @host_huge_va_range: holds available virtual addresses for host mappings
- * with huge pages.
- * @dram_va_range: holds available virtual addresses for DRAM mappings.
+ * @va_range: holds available virtual addresses for host and dram mappings.
* @mem_hash_lock: protects the mem_hash.
* @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the
* MMU hash or walking the PGT requires talking this lock.
* @debugfs_list: node in debugfs list of contexts.
+ * @cs_counters: context command submission counters.
* @cb_va_pool: device VA pool for command buffers which are mapped to the
* device's MMU.
* @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
@@ -879,26 +1047,24 @@ struct hl_va_range {
struct hl_ctx {
DECLARE_HASHTABLE(mem_hash, MEM_HASH_TABLE_BITS);
DECLARE_HASHTABLE(mmu_shadow_hash, MMU_HASH_TABLE_BITS);
- struct hl_fpriv *hpriv;
- struct hl_device *hdev;
- struct kref refcount;
- struct hl_fence **cs_pending;
- struct hl_va_range *host_va_range;
- struct hl_va_range *host_huge_va_range;
- struct hl_va_range *dram_va_range;
- struct mutex mem_hash_lock;
- struct mutex mmu_lock;
- struct list_head debugfs_list;
- struct hl_cs_counters cs_counters;
- struct gen_pool *cb_va_pool;
- u64 cs_sequence;
- u64 *dram_default_hops;
- spinlock_t cs_lock;
- atomic64_t dram_phys_mem;
- atomic_t thread_ctx_switch_token;
- u32 thread_ctx_switch_wait_token;
- u32 asid;
- u32 handle;
+ struct hl_fpriv *hpriv;
+ struct hl_device *hdev;
+ struct kref refcount;
+ struct hl_fence **cs_pending;
+ struct hl_va_range *va_range[HL_VA_RANGE_TYPE_MAX];
+ struct mutex mem_hash_lock;
+ struct mutex mmu_lock;
+ struct list_head debugfs_list;
+ struct hl_cs_counters_atomic cs_counters;
+ struct gen_pool *cb_va_pool;
+ u64 cs_sequence;
+ u64 *dram_default_hops;
+ spinlock_t cs_lock;
+ atomic64_t dram_phys_mem;
+ atomic_t thread_ctx_switch_token;
+ u32 thread_ctx_switch_wait_token;
+ u32 asid;
+ u32 handle;
};
/**
@@ -963,6 +1129,7 @@ struct hl_userptr {
* @tdr_active: true if TDR was activated for this CS (to prevent
* double TDR activation).
* @aborted: true if CS was aborted due to some device error.
+ * @timestamp: true if a timestmap must be captured upon completion
*/
struct hl_cs {
u16 *jobs_in_queue_cnt;
@@ -983,6 +1150,7 @@ struct hl_cs {
u8 timedout;
u8 tdr_active;
u8 aborted;
+ u8 timestamp;
};
/**
@@ -996,6 +1164,7 @@ struct hl_cs {
* @userptr_list: linked-list of userptr mappings that belong to this job and
* wait for completion.
* @debugfs_list: node in debugfs list of command submission jobs.
+ * @refcount: reference counter for usage of the CS job.
* @queue_type: the type of the H/W queue this job is submitted to.
* @id: the id of this job inside a CS.
* @hw_queue_id: the id of the H/W queue this job is submitted to.
@@ -1019,6 +1188,7 @@ struct hl_cs_job {
struct work_struct finish_work;
struct list_head userptr_list;
struct list_head debugfs_list;
+ struct kref refcount;
enum hl_queue_type queue_type;
u32 id;
u32 hw_queue_id;
@@ -1067,7 +1237,6 @@ struct hl_cs_parser {
u8 contains_dma_pkt;
};
-
/*
* MEMORY STRUCTURE
*/
@@ -1285,6 +1454,10 @@ struct hl_dbg_device_entry {
* DEVICES
*/
+#define HL_STR_MAX 32
+
+#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_NEEDS_RESET + 1)
+
/* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe
* x16 cards. In extreme cases, there are hosts that can accommodate 16 cards.
*/
@@ -1428,11 +1601,13 @@ struct hwmon_chip_info;
/**
* struct hl_device_reset_work - reset workqueue task wrapper.
+ * @wq: work queue for device reset procedure.
* @reset_work: reset work to be done.
* @hdev: habanalabs device structure.
*/
struct hl_device_reset_work {
- struct work_struct reset_work;
+ struct workqueue_struct *wq;
+ struct delayed_work reset_work;
struct hl_device *hdev;
};
@@ -1446,18 +1621,78 @@ struct hl_device_idle_busy_ts {
ktime_t busy_to_idle_ts;
};
+/**
+ * struct hr_mmu_hop_addrs - used for holding per-device host-resident mmu hop
+ * information.
+ * @virt_addr: the virtual address of the hop.
+ * @phys-addr: the physical address of the hop (used by the device-mmu).
+ * @shadow_addr: The shadow of the hop used by the driver for walking the hops.
+ */
+struct hr_mmu_hop_addrs {
+ u64 virt_addr;
+ u64 phys_addr;
+ u64 shadow_addr;
+};
/**
- * struct hl_mmu_priv - used for holding per-device mmu internal information.
+ * struct hl_mmu_hr_pgt_priv - used for holding per-device mmu host-resident
+ * page-table internal information.
* @mmu_pgt_pool: pool of page tables used by MMU for allocating hops.
* @mmu_shadow_hop0: shadow array of hop0 tables.
*/
-struct hl_mmu_priv {
+struct hl_mmu_hr_priv {
+ struct gen_pool *mmu_pgt_pool;
+ struct hr_mmu_hop_addrs *mmu_shadow_hop0;
+};
+
+/**
+ * struct hl_mmu_dr_pgt_priv - used for holding per-device mmu device-resident
+ * page-table internal information.
+ * @mmu_pgt_pool: pool of page tables used by MMU for allocating hops.
+ * @mmu_shadow_hop0: shadow array of hop0 tables.
+ */
+struct hl_mmu_dr_priv {
struct gen_pool *mmu_pgt_pool;
void *mmu_shadow_hop0;
};
/**
+ * struct hl_mmu_priv - used for holding per-device mmu internal information.
+ * @dr: information on the device-resident MMU, when exists.
+ * @hr: information on the host-resident MMU, when exists.
+ */
+struct hl_mmu_priv {
+ struct hl_mmu_dr_priv dr;
+ struct hl_mmu_hr_priv hr;
+};
+
+/**
+ * struct hl_mmu_per_hop_info - A structure describing one TLB HOP and its entry
+ * that was created in order to translate a virtual address to a
+ * physical one.
+ * @hop_addr: The address of the hop.
+ * @hop_pte_addr: The address of the hop entry.
+ * @hop_pte_val: The value in the hop entry.
+ */
+struct hl_mmu_per_hop_info {
+ u64 hop_addr;
+ u64 hop_pte_addr;
+ u64 hop_pte_val;
+};
+
+/**
+ * struct hl_mmu_hop_info - A structure describing the TLB hops and their
+ * hop-entries that were created in order to translate a virtual address to a
+ * physical one.
+ * @hop_info: Array holding the per-hop information used for the translation.
+ * @used_hops: The number of hops used for the translation.
+ */
+struct hl_mmu_hop_info {
+ struct hl_mmu_per_hop_info hop_info[MMU_ARCH_5_HOPS];
+ u32 used_hops;
+};
+
+/**
* struct hl_mmu_funcs - Device related MMU functions.
* @init: initialize the MMU module.
* @fini: release the MMU module.
@@ -1468,6 +1703,9 @@ struct hl_mmu_priv {
* @flush: flush all writes from all cores to reach device MMU.
* @swap_out: marks all mapping of the given context as swapped out.
* @swap_in: marks all mapping of the given context as swapped in.
+ * @get_tlb_info: returns the list of hops and hop-entries used that were
+ * created in order to translate the giver virtual address to a
+ * physical one.
*/
struct hl_mmu_funcs {
int (*init)(struct hl_device *hdev);
@@ -1482,6 +1720,8 @@ struct hl_mmu_funcs {
void (*flush)(struct hl_ctx *ctx);
void (*swap_out)(struct hl_ctx *ctx);
void (*swap_in)(struct hl_ctx *ctx);
+ int (*get_tlb_info)(struct hl_ctx *ctx,
+ u64 virt_addr, struct hl_mmu_hop_info *hops);
};
/**
@@ -1497,6 +1737,7 @@ struct hl_mmu_funcs {
* @dev_ctrl: related kernel device structure for the control device
* @work_freq: delayed work to lower device frequency if possible.
* @work_heartbeat: delayed work for CPU-CP is-alive check.
+ * @device_reset_work: delayed work which performs hard reset
* @asic_name: ASIC specific name.
* @asic_type: ASIC specific type.
* @completion_queue: array of hl_cq.
@@ -1505,8 +1746,8 @@ struct hl_mmu_funcs {
* @eq_wq: work queue of event queue for executing work in process context.
* @kernel_ctx: Kernel driver context structure.
* @kernel_queues: array of hl_hw_queue.
- * @hw_queues_mirror_list: CS mirror list for TDR.
- * @hw_queues_mirror_lock: protects hw_queues_mirror_list.
+ * @cs_mirror_list: CS mirror list for TDR.
+ * @cs_mirror_lock: protects cs_mirror_list.
* @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs.
* @event_queue: event queue for IRQ from CPU-CP.
* @dma_pool: DMA pool for small allocations.
@@ -1525,6 +1766,7 @@ struct hl_mmu_funcs {
* @hwmon_dev: H/W monitor device.
* @pm_mng_profile: current power management profile.
* @hl_chip_info: ASIC's sensors information.
+ * @device_status_description: device status description.
* @hl_debugfs: device's debugfs manager.
* @cb_pool: list of preallocated CBs.
* @cb_pool_lock: protects the CB pool.
@@ -1572,13 +1814,12 @@ struct hl_mmu_funcs {
* @heartbeat: is heartbeat sanity check towards CPU-CP enabled.
* @reset_on_lockup: true if a reset should be done in case of stuck CS, false
* otherwise.
- * @dram_supports_virtual_memory: is MMU enabled towards DRAM.
* @dram_default_page_mapping: is DRAM default page mapping enabled.
+ * @memory_scrub: true to perform device memory scrub in various locations,
+ * such as context-switch, context close, page free, etc.
* @pmmu_huge_range: is a different virtual addresses range used for PMMU with
* huge pages.
* @init_done: is the initialization of the device done.
- * @mmu_enable: is MMU enabled.
- * @mmu_huge_page_opt: is MMU huge pages optimization enabled.
* @device_cpu_disabled: is the device CPU disabled (due to timeouts)
* @dma_mask: the dma mask that was set for this device
* @in_debug: is device under debug. This, together with fpriv_list, enforces
@@ -1589,9 +1830,16 @@ struct hl_mmu_funcs {
* @stop_on_err: true if engines should stop on error.
* @supports_sync_stream: is sync stream supported.
* @sync_stream_queue_idx: helper index for sync stream queues initialization.
+ * @collective_mon_idx: helper index for collective initialization
* @supports_coresight: is CoreSight supported.
* @supports_soft_reset: is soft reset supported.
* @supports_cb_mapping: is mapping a CB to the device's MMU supported.
+ * @needs_reset: true if reset_on_lockup is false and device should be reset
+ * due to lockup.
+ * @process_kill_trial_cnt: number of trials reset thread tried killing
+ * user processes
+ * @device_fini_pending: true if device_fini was called and might be
+ * waiting for the reset thread to finish
*/
struct hl_device {
struct pci_dev *pdev;
@@ -1604,15 +1852,17 @@ struct hl_device {
struct device *dev_ctrl;
struct delayed_work work_freq;
struct delayed_work work_heartbeat;
- char asic_name[32];
+ struct hl_device_reset_work device_reset_work;
+ char asic_name[HL_STR_MAX];
+ char status[HL_DEV_STS_MAX][HL_STR_MAX];
enum hl_asic_type asic_type;
struct hl_cq *completion_queue;
struct workqueue_struct **cq_wq;
struct workqueue_struct *eq_wq;
struct hl_ctx *kernel_ctx;
struct hl_hw_queue *kernel_queues;
- struct list_head hw_queues_mirror_list;
- spinlock_t hw_queues_mirror_lock;
+ struct list_head cs_mirror_list;
+ spinlock_t cs_mirror_lock;
struct hl_cb_mgr kernel_cb_mgr;
struct hl_eq event_queue;
struct dma_pool *dma_pool;
@@ -1649,10 +1899,10 @@ struct hl_device {
struct hl_device_idle_busy_ts *idle_busy_ts_arr;
- struct hl_cs_counters aggregated_cs_counters;
+ struct hl_cs_counters_atomic aggregated_cs_counters;
struct hl_mmu_priv mmu_priv;
- struct hl_mmu_funcs mmu_func;
+ struct hl_mmu_funcs mmu_func[MMU_NUM_PGT_LOCATIONS];
atomic64_t dram_used_mem;
u64 timeout_jiffies;
@@ -1677,8 +1927,8 @@ struct hl_device {
u8 hard_reset_pending;
u8 heartbeat;
u8 reset_on_lockup;
- u8 dram_supports_virtual_memory;
u8 dram_default_page_mapping;
+ u8 memory_scrub;
u8 pmmu_huge_range;
u8 init_done;
u8 device_cpu_disabled;
@@ -1689,17 +1939,22 @@ struct hl_device {
u8 stop_on_err;
u8 supports_sync_stream;
u8 sync_stream_queue_idx;
+ u8 collective_mon_idx;
u8 supports_coresight;
u8 supports_soft_reset;
u8 supports_cb_mapping;
+ u8 needs_reset;
+ u8 process_kill_trial_cnt;
+ u8 device_fini_pending;
/* Parameters for bring-up */
+ u64 nic_ports_mask;
+ u64 fw_loading;
u8 mmu_enable;
u8 mmu_huge_page_opt;
u8 cpu_enable;
u8 reset_pcilink;
u8 cpu_queues_enable;
- u8 fw_loading;
u8 pldm;
u8 axi_drain;
u8 sram_scrambler_enable;
@@ -1707,6 +1962,7 @@ struct hl_device {
u8 hard_reset_on_fw_events;
u8 bmc_enable;
u8 rl_enable;
+ u8 reset_on_preboot_fail;
};
@@ -1793,7 +2049,8 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
int hl_device_open(struct inode *inode, struct file *filp);
int hl_device_open_ctrl(struct inode *inode, struct file *filp);
-bool hl_device_disabled_or_in_reset(struct hl_device *hdev);
+bool hl_device_operational(struct hl_device *hdev,
+ enum hl_device_status *status);
enum hl_device_status hl_device_status(struct hl_device *hdev);
int hl_device_set_debug_mode(struct hl_device *hdev, bool enable);
int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
@@ -1878,8 +2135,10 @@ void hl_cs_rollback_all(struct hl_device *hdev);
struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
void hl_sob_reset_error(struct kref *ref);
+int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask);
void hl_fence_put(struct hl_fence *fence);
void hl_fence_get(struct hl_fence *fence);
+void cs_get(struct hl_cs *cs);
void goya_set_asic_funcs(struct hl_device *hdev);
void gaudi_set_asic_funcs(struct hl_device *hdev);
@@ -1890,6 +2149,10 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx);
int hl_vm_init(struct hl_device *hdev);
void hl_vm_fini(struct hl_device *hdev);
+u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
+ enum hl_va_range_type type, u32 size, u32 alignment);
+int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
+ u64 start_addr, u64 size);
int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
struct hl_userptr *userptr);
void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr);
@@ -1903,20 +2166,26 @@ int hl_mmu_init(struct hl_device *hdev);
void hl_mmu_fini(struct hl_device *hdev);
int hl_mmu_ctx_init(struct hl_ctx *ctx);
void hl_mmu_ctx_fini(struct hl_ctx *ctx);
-int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
+int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
u32 page_size, bool flush_pte);
-int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
+int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
bool flush_pte);
+int hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr,
+ u64 phys_addr, u32 size);
+int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size);
void hl_mmu_swap_out(struct hl_ctx *ctx);
void hl_mmu_swap_in(struct hl_ctx *ctx);
int hl_mmu_if_set_funcs(struct hl_device *hdev);
-void hl_mmu_v1_set_funcs(struct hl_device *hdev);
+void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu);
+int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr);
+int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
+ struct hl_mmu_hop_info *hops);
int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
- void __iomem *dst);
+ void __iomem *dst, u32 src_offset, u32 size);
int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode);
int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
- u16 len, u32 timeout, long *result);
+ u16 len, u32 timeout, u64 *result);
int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type);
int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
size_t irq_arr_size);
@@ -1926,18 +2195,22 @@ void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
void *vaddr);
int hl_fw_send_heartbeat(struct hl_device *hdev);
-int hl_fw_cpucp_info_get(struct hl_device *hdev);
+int hl_fw_cpucp_info_get(struct hl_device *hdev,
+ u32 cpu_security_boot_status_reg);
int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
struct hl_info_pci_counters *counters);
int hl_fw_cpucp_total_energy_get(struct hl_device *hdev,
u64 *total_energy);
+int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index,
+ u16 *pll_freq_arr);
int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
- u32 boot_err0_reg, bool skip_bmc,
- u32 cpu_timeout, u32 boot_fit_timeout);
-int hl_fw_read_preboot_ver(struct hl_device *hdev, u32 cpu_boot_status_reg,
- u32 boot_err0_reg, u32 timeout);
+ u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
+ bool skip_bmc, u32 cpu_timeout, u32 boot_fit_timeout);
+int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
+ u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
+ u32 timeout);
int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
bool is_wc[3]);
@@ -1946,8 +2219,7 @@ int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
struct hl_inbound_pci_region *pci_region);
int hl_pci_set_outbound_region(struct hl_device *hdev,
struct hl_outbound_pci_region *pci_region);
-int hl_pci_init(struct hl_device *hdev, u32 cpu_boot_status_reg,
- u32 boot_err0_reg, u32 preboot_ver_timeout);
+int hl_pci_init(struct hl_device *hdev);
void hl_pci_fini(struct hl_device *hdev);
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index f9067d3ef437..6bbb6bca6860 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -29,6 +29,7 @@ static DEFINE_MUTEX(hl_devs_idr_lock);
static int timeout_locked = 5;
static int reset_on_lockup = 1;
+static int memory_scrub = 1;
module_param(timeout_locked, int, 0444);
MODULE_PARM_DESC(timeout_locked,
@@ -38,6 +39,10 @@ module_param(reset_on_lockup, int, 0444);
MODULE_PARM_DESC(reset_on_lockup,
"Do device reset on lockup (0 = no, 1 = yes, default yes)");
+module_param(memory_scrub, int, 0444);
+MODULE_PARM_DESC(memory_scrub,
+ "Scrub device memory in various states (0 = no, 1 = yes, default yes)");
+
#define PCI_VENDOR_ID_HABANALABS 0x1da3
#define PCI_IDS_GOYA 0x0001
@@ -87,6 +92,7 @@ static enum hl_asic_type get_asic_type(u16 device)
*/
int hl_device_open(struct inode *inode, struct file *filp)
{
+ enum hl_device_status status;
struct hl_device *hdev;
struct hl_fpriv *hpriv;
int rc;
@@ -119,10 +125,10 @@ int hl_device_open(struct inode *inode, struct file *filp)
mutex_lock(&hdev->fpriv_list_lock);
- if (hl_device_disabled_or_in_reset(hdev)) {
+ if (!hl_device_operational(hdev, &status)) {
dev_err_ratelimited(hdev->dev,
- "Can't open %s because it is disabled or in reset\n",
- dev_name(hdev->dev));
+ "Can't open %s because it is %s\n",
+ dev_name(hdev->dev), hdev->status[status]);
rc = -EPERM;
goto out_err;
}
@@ -199,7 +205,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
mutex_lock(&hdev->fpriv_list_lock);
- if (hl_device_disabled_or_in_reset(hdev)) {
+ if (!hl_device_operational(hdev, NULL)) {
dev_err_ratelimited(hdev->dev_ctrl,
"Can't open %s because it is disabled or in reset\n",
dev_name(hdev->dev_ctrl));
@@ -228,19 +234,20 @@ out_err:
static void set_driver_behavior_per_device(struct hl_device *hdev)
{
- hdev->mmu_enable = 1;
hdev->cpu_enable = 1;
- hdev->fw_loading = 1;
+ hdev->fw_loading = FW_TYPE_ALL_TYPES;
hdev->cpu_queues_enable = 1;
hdev->heartbeat = 1;
+ hdev->mmu_enable = 1;
hdev->clock_gating_mask = ULONG_MAX;
-
- hdev->reset_pcilink = 0;
- hdev->axi_drain = 0;
hdev->sram_scrambler_enable = 1;
hdev->dram_scrambler_enable = 1;
hdev->bmc_enable = 1;
hdev->hard_reset_on_fw_events = 1;
+ hdev->reset_on_preboot_fail = 1;
+
+ hdev->reset_pcilink = 0;
+ hdev->axi_drain = 0;
}
/*
@@ -281,8 +288,17 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
hdev->asic_type = asic_type;
}
+ /* Assign status description string */
+ strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION],
+ "disabled", HL_STR_MAX);
+ strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET],
+ "in reset", HL_STR_MAX);
+ strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET],
+ "needs reset", HL_STR_MAX);
+
hdev->major = hl_major;
hdev->reset_on_lockup = reset_on_lockup;
+ hdev->memory_scrub = memory_scrub;
hdev->pldm = 0;
set_driver_behavior_per_device(hdev);
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 07317ea49129..32e6af1db4e3 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -314,20 +314,45 @@ static int clk_throttle_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+ struct hl_info_cs_counters cs_counters = {0};
struct hl_device *hdev = hpriv->hdev;
- struct hl_info_cs_counters cs_counters = { {0} };
+ struct hl_cs_counters_atomic *cntr;
u32 max_size = args->return_size;
- void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+ cntr = &hdev->aggregated_cs_counters;
if ((!max_size) || (!out))
return -EINVAL;
- memcpy(&cs_counters.cs_counters, &hdev->aggregated_cs_counters,
- sizeof(struct hl_cs_counters));
-
- if (hpriv->ctx)
- memcpy(&cs_counters.ctx_cs_counters, &hpriv->ctx->cs_counters,
- sizeof(struct hl_cs_counters));
+ cs_counters.total_out_of_mem_drop_cnt =
+ atomic64_read(&cntr->out_of_mem_drop_cnt);
+ cs_counters.total_parsing_drop_cnt =
+ atomic64_read(&cntr->parsing_drop_cnt);
+ cs_counters.total_queue_full_drop_cnt =
+ atomic64_read(&cntr->queue_full_drop_cnt);
+ cs_counters.total_device_in_reset_drop_cnt =
+ atomic64_read(&cntr->device_in_reset_drop_cnt);
+ cs_counters.total_max_cs_in_flight_drop_cnt =
+ atomic64_read(&cntr->max_cs_in_flight_drop_cnt);
+
+ if (hpriv->ctx) {
+ cs_counters.ctx_out_of_mem_drop_cnt =
+ atomic64_read(
+ &hpriv->ctx->cs_counters.out_of_mem_drop_cnt);
+ cs_counters.ctx_parsing_drop_cnt =
+ atomic64_read(
+ &hpriv->ctx->cs_counters.parsing_drop_cnt);
+ cs_counters.ctx_queue_full_drop_cnt =
+ atomic64_read(
+ &hpriv->ctx->cs_counters.queue_full_drop_cnt);
+ cs_counters.ctx_device_in_reset_drop_cnt =
+ atomic64_read(
+ &hpriv->ctx->cs_counters.device_in_reset_drop_cnt);
+ cs_counters.ctx_max_cs_in_flight_drop_cnt =
+ atomic64_read(
+ &hpriv->ctx->cs_counters.max_cs_in_flight_drop_cnt);
+ }
return copy_to_user(out, &cs_counters,
min((size_t) max_size, sizeof(cs_counters))) ? -EFAULT : 0;
@@ -378,11 +403,32 @@ static int total_energy_consumption_info(struct hl_fpriv *hpriv,
min((size_t) max_size, sizeof(total_energy))) ? -EFAULT : 0;
}
+static int pll_frequency_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+ struct hl_device *hdev = hpriv->hdev;
+ struct hl_pll_frequency_info freq_info = {0};
+ u32 max_size = args->return_size;
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+ int rc;
+
+ if ((!max_size) || (!out))
+ return -EINVAL;
+
+ rc = hl_fw_cpucp_pll_info_get(hdev, args->pll_index, freq_info.output);
+ if (rc)
+ return rc;
+
+ return copy_to_user(out, &freq_info,
+ min((size_t) max_size, sizeof(freq_info))) ? -EFAULT : 0;
+}
+
static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
struct device *dev)
{
+ enum hl_device_status status;
struct hl_info_args *args = data;
struct hl_device *hdev = hpriv->hdev;
+
int rc;
/*
@@ -403,10 +449,10 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
break;
}
- if (hl_device_disabled_or_in_reset(hdev)) {
+ if (!hl_device_operational(hdev, &status)) {
dev_warn_ratelimited(dev,
"Device is %s. Can't execute INFO IOCTL\n",
- atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
+ hdev->status[status]);
return -EBUSY;
}
@@ -453,6 +499,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_TOTAL_ENERGY:
return total_energy_consumption_info(hpriv, args);
+ case HL_INFO_PLL_FREQUENCY:
+ return pll_frequency_info(hpriv, args);
+
default:
dev_err(dev, "Invalid request %d\n", args->op);
rc = -ENOTTY;
@@ -476,12 +525,14 @@ static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
{
struct hl_debug_args *args = data;
struct hl_device *hdev = hpriv->hdev;
+ enum hl_device_status status;
+
int rc = 0;
- if (hl_device_disabled_or_in_reset(hdev)) {
+ if (!hl_device_operational(hdev, &status)) {
dev_warn_ratelimited(hdev->dev,
"Device is %s. Can't execute DEBUG IOCTL\n",
- atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
+ hdev->status[status]);
return -EBUSY;
}
@@ -544,7 +595,7 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
int retcode;
if (hdev->hard_reset_pending) {
- dev_crit_ratelimited(hdev->dev_ctrl,
+ dev_crit_ratelimited(dev,
"Device HARD reset pending! Please close FD\n");
return -ENODEV;
}
diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c
index 250cf9cefc06..7caf868d1585 100644
--- a/drivers/misc/habanalabs/common/hw_queue.c
+++ b/drivers/misc/habanalabs/common/hw_queue.c
@@ -48,6 +48,11 @@ void hl_int_hw_queue_update_ci(struct hl_cs *cs)
return;
q = &hdev->kernel_queues[0];
+
+ /* There are no internal queues if H/W queues are being used */
+ if (!hdev->asic_prop.max_queues || q->queue_type == QUEUE_TYPE_HW)
+ return;
+
for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) {
if (q->queue_type == QUEUE_TYPE_INT)
atomic_add(cs->jobs_in_queue_cnt[i], &q->ci);
@@ -333,7 +338,14 @@ static void int_queue_schedule_job(struct hl_cs_job *job)
bd.ctl = 0;
bd.len = cpu_to_le32(job->job_cb_size);
- bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb);
+
+ if (job->is_kernel_allocated_cb)
+ /* bus_address is actually a mmu mapped address
+ * allocated from an internal pool
+ */
+ bd.ptr = cpu_to_le64(job->user_cb->bus_address);
+ else
+ bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb);
pi = q->kernel_address + (q->pi & (q->int_queue_len - 1)) * sizeof(bd);
@@ -388,6 +400,91 @@ static void hw_queue_schedule_job(struct hl_cs_job *job)
ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
}
+static void init_signal_cs(struct hl_device *hdev,
+ struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
+{
+ struct hl_sync_stream_properties *prop;
+ struct hl_hw_sob *hw_sob;
+ u32 q_idx;
+
+ q_idx = job->hw_queue_id;
+ prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
+ hw_sob = &prop->hw_sob[prop->curr_sob_offset];
+
+ cs_cmpl->hw_sob = hw_sob;
+ cs_cmpl->sob_val = prop->next_sob_val++;
+
+ dev_dbg(hdev->dev,
+ "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n",
+ cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx);
+
+ hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
+ cs_cmpl->hw_sob->sob_id, 0);
+
+ kref_get(&hw_sob->kref);
+
+ /* check for wraparound */
+ if (prop->next_sob_val == HL_MAX_SOB_VAL) {
+ /*
+ * Decrement as we reached the max value.
+ * The release function won't be called here as we've
+ * just incremented the refcount.
+ */
+ kref_put(&hw_sob->kref, hl_sob_reset_error);
+ prop->next_sob_val = 1;
+ /* only two SOBs are currently in use */
+ prop->curr_sob_offset =
+ (prop->curr_sob_offset + 1) % HL_RSVD_SOBS;
+
+ dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n",
+ prop->curr_sob_offset, q_idx);
+ }
+}
+
+static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
+ struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
+{
+ struct hl_cs_compl *signal_cs_cmpl;
+ struct hl_sync_stream_properties *prop;
+ struct hl_gen_wait_properties wait_prop;
+ u32 q_idx;
+
+ q_idx = job->hw_queue_id;
+ prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
+
+ signal_cs_cmpl = container_of(cs->signal_fence,
+ struct hl_cs_compl,
+ base_fence);
+
+ /* copy the SOB id and value of the signal CS */
+ cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
+ cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
+
+ dev_dbg(hdev->dev,
+ "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d\n",
+ cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
+ prop->base_mon_id, q_idx);
+
+ wait_prop.data = (void *) job->patched_cb;
+ wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
+ wait_prop.sob_mask = 0x1;
+ wait_prop.sob_val = cs_cmpl->sob_val;
+ wait_prop.mon_id = prop->base_mon_id;
+ wait_prop.q_idx = q_idx;
+ wait_prop.size = 0;
+ hdev->asic_funcs->gen_wait_cb(hdev, &wait_prop);
+
+ kref_get(&cs_cmpl->hw_sob->kref);
+ /*
+ * Must put the signal fence after the SOB refcnt increment so
+ * the SOB refcnt won't turn 0 and reset the SOB before the
+ * wait CS was submitted.
+ */
+ mb();
+ hl_fence_put(cs->signal_fence);
+ cs->signal_fence = NULL;
+}
+
/*
* init_signal_wait_cs - initialize a signal/wait CS
* @cs: pointer to the signal/wait CS
@@ -398,84 +495,18 @@ static void init_signal_wait_cs(struct hl_cs *cs)
{
struct hl_ctx *ctx = cs->ctx;
struct hl_device *hdev = ctx->hdev;
- struct hl_hw_queue *hw_queue;
+ struct hl_cs_job *job;
struct hl_cs_compl *cs_cmpl =
container_of(cs->fence, struct hl_cs_compl, base_fence);
- struct hl_hw_sob *hw_sob;
- struct hl_cs_job *job;
- u32 q_idx;
-
/* There is only one job in a signal/wait CS */
job = list_first_entry(&cs->job_list, struct hl_cs_job,
cs_node);
- q_idx = job->hw_queue_id;
- hw_queue = &hdev->kernel_queues[q_idx];
-
- if (cs->type & CS_TYPE_SIGNAL) {
- hw_sob = &hw_queue->hw_sob[hw_queue->curr_sob_offset];
-
- cs_cmpl->hw_sob = hw_sob;
- cs_cmpl->sob_val = hw_queue->next_sob_val++;
-
- dev_dbg(hdev->dev,
- "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n",
- cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx);
-
- hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
- cs_cmpl->hw_sob->sob_id);
-
- kref_get(&hw_sob->kref);
-
- /* check for wraparound */
- if (hw_queue->next_sob_val == HL_MAX_SOB_VAL) {
- /*
- * Decrement as we reached the max value.
- * The release function won't be called here as we've
- * just incremented the refcount.
- */
- kref_put(&hw_sob->kref, hl_sob_reset_error);
- hw_queue->next_sob_val = 1;
- /* only two SOBs are currently in use */
- hw_queue->curr_sob_offset =
- (hw_queue->curr_sob_offset + 1) %
- HL_RSVD_SOBS_IN_USE;
-
- dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n",
- hw_queue->curr_sob_offset, q_idx);
- }
- } else if (cs->type & CS_TYPE_WAIT) {
- struct hl_cs_compl *signal_cs_cmpl;
-
- signal_cs_cmpl = container_of(cs->signal_fence,
- struct hl_cs_compl,
- base_fence);
-
- /* copy the the SOB id and value of the signal CS */
- cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
- cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
-
- dev_dbg(hdev->dev,
- "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d\n",
- cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
- hw_queue->base_mon_id, q_idx);
- hdev->asic_funcs->gen_wait_cb(hdev, job->patched_cb,
- cs_cmpl->hw_sob->sob_id,
- cs_cmpl->sob_val,
- hw_queue->base_mon_id,
- q_idx);
-
- kref_get(&cs_cmpl->hw_sob->kref);
- /*
- * Must put the signal fence after the SOB refcnt increment so
- * the SOB refcnt won't turn 0 and reset the SOB before the
- * wait CS was submitted.
- */
- mb();
- hl_fence_put(cs->signal_fence);
- cs->signal_fence = NULL;
- }
+ if (cs->type & CS_TYPE_SIGNAL)
+ init_signal_cs(hdev, job, cs_cmpl);
+ else if (cs->type & CS_TYPE_WAIT)
+ init_wait_cs(hdev, cs, job, cs_cmpl);
}
/*
@@ -484,19 +515,24 @@ static void init_signal_wait_cs(struct hl_cs *cs)
*/
int hl_hw_queue_schedule_cs(struct hl_cs *cs)
{
+ enum hl_device_status status;
+ struct hl_cs_counters_atomic *cntr;
struct hl_ctx *ctx = cs->ctx;
struct hl_device *hdev = ctx->hdev;
struct hl_cs_job *job, *tmp;
struct hl_hw_queue *q;
- u32 max_queues;
int rc = 0, i, cq_cnt;
+ u32 max_queues;
+
+ cntr = &hdev->aggregated_cs_counters;
hdev->asic_funcs->hw_queues_lock(hdev);
- if (hl_device_disabled_or_in_reset(hdev)) {
- ctx->cs_counters.device_in_reset_drop_cnt++;
+ if (!hl_device_operational(hdev, &status)) {
+ atomic64_inc(&cntr->device_in_reset_drop_cnt);
+ atomic64_inc(&ctx->cs_counters.device_in_reset_drop_cnt);
dev_err(hdev->dev,
- "device is disabled or in reset, CS rejected!\n");
+ "device is %s, CS rejected!\n", hdev->status[status]);
rc = -EPERM;
goto out;
}
@@ -527,7 +563,9 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
}
if (rc) {
- ctx->cs_counters.queue_full_drop_cnt++;
+ atomic64_inc(
+ &ctx->cs_counters.queue_full_drop_cnt);
+ atomic64_inc(&cntr->queue_full_drop_cnt);
goto unroll_cq_resv;
}
@@ -538,21 +576,23 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT))
init_signal_wait_cs(cs);
+ else if (cs->type == CS_TYPE_COLLECTIVE_WAIT)
+ hdev->asic_funcs->collective_wait_init_cs(cs);
- spin_lock(&hdev->hw_queues_mirror_lock);
- list_add_tail(&cs->mirror_node, &hdev->hw_queues_mirror_list);
+ spin_lock(&hdev->cs_mirror_lock);
+ list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list);
/* Queue TDR if the CS is the first entry and if timeout is wanted */
if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) &&
- (list_first_entry(&hdev->hw_queues_mirror_list,
+ (list_first_entry(&hdev->cs_mirror_list,
struct hl_cs, mirror_node) == cs)) {
cs->tdr_active = true;
schedule_delayed_work(&cs->work_tdr, hdev->timeout_jiffies);
- spin_unlock(&hdev->hw_queues_mirror_lock);
- } else {
- spin_unlock(&hdev->hw_queues_mirror_lock);
+
}
+ spin_unlock(&hdev->cs_mirror_lock);
+
if (!hdev->cs_active_cnt++) {
struct hl_device_idle_busy_ts *ts;
@@ -714,22 +754,56 @@ static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx)
{
- struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
+ struct hl_sync_stream_properties *sync_stream_prop;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_hw_sob *hw_sob;
- int sob, queue_idx = hdev->sync_stream_queue_idx++;
+ int sob, reserved_mon_idx, queue_idx;
+
+ sync_stream_prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
+
+ /* We use 'collective_mon_idx' as a running index in order to reserve
+ * monitors for collective master/slave queues.
+ * collective master queue gets 2 reserved monitors
+ * collective slave queue gets 1 reserved monitor
+ */
+ if (hdev->kernel_queues[q_idx].collective_mode ==
+ HL_COLLECTIVE_MASTER) {
+ reserved_mon_idx = hdev->collective_mon_idx;
+
+ /* reserve the first monitor for collective master queue */
+ sync_stream_prop->collective_mstr_mon_id[0] =
+ prop->collective_first_mon + reserved_mon_idx;
+
+ /* reserve the second monitor for collective master queue */
+ sync_stream_prop->collective_mstr_mon_id[1] =
+ prop->collective_first_mon + reserved_mon_idx + 1;
+
+ hdev->collective_mon_idx += HL_COLLECTIVE_RSVD_MSTR_MONS;
+ } else if (hdev->kernel_queues[q_idx].collective_mode ==
+ HL_COLLECTIVE_SLAVE) {
+ reserved_mon_idx = hdev->collective_mon_idx++;
+
+ /* reserve a monitor for collective slave queue */
+ sync_stream_prop->collective_slave_mon_id =
+ prop->collective_first_mon + reserved_mon_idx;
+ }
+
+ if (!hdev->kernel_queues[q_idx].supports_sync_stream)
+ return;
+
+ queue_idx = hdev->sync_stream_queue_idx++;
- hw_queue->base_sob_id =
- prop->sync_stream_first_sob + queue_idx * HL_RSVD_SOBS;
- hw_queue->base_mon_id =
- prop->sync_stream_first_mon + queue_idx * HL_RSVD_MONS;
- hw_queue->next_sob_val = 1;
- hw_queue->curr_sob_offset = 0;
+ sync_stream_prop->base_sob_id = prop->sync_stream_first_sob +
+ (queue_idx * HL_RSVD_SOBS);
+ sync_stream_prop->base_mon_id = prop->sync_stream_first_mon +
+ (queue_idx * HL_RSVD_MONS);
+ sync_stream_prop->next_sob_val = 1;
+ sync_stream_prop->curr_sob_offset = 0;
for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) {
- hw_sob = &hw_queue->hw_sob[sob];
+ hw_sob = &sync_stream_prop->hw_sob[sob];
hw_sob->hdev = hdev;
- hw_sob->sob_id = hw_queue->base_sob_id + sob;
+ hw_sob->sob_id = sync_stream_prop->base_sob_id + sob;
hw_sob->q_idx = q_idx;
kref_init(&hw_sob->kref);
}
@@ -737,15 +811,16 @@ static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx)
static void sync_stream_queue_reset(struct hl_device *hdev, u32 q_idx)
{
- struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
+ struct hl_sync_stream_properties *prop =
+ &hdev->kernel_queues[q_idx].sync_stream_prop;
/*
* In case we got here due to a stuck CS, the refcnt might be bigger
* than 1 and therefore we reset it.
*/
- kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref);
- hw_queue->curr_sob_offset = 0;
- hw_queue->next_sob_val = 1;
+ kref_init(&prop->hw_sob[prop->curr_sob_offset].kref);
+ prop->curr_sob_offset = 0;
+ prop->next_sob_val = 1;
}
/*
@@ -788,8 +863,7 @@ static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
break;
}
- if (q->supports_sync_stream)
- sync_stream_queue_init(hdev, q->hw_queue_id);
+ sync_stream_queue_init(hdev, q->hw_queue_id);
if (rc)
return rc;
@@ -867,6 +941,7 @@ int hl_hw_queues_create(struct hl_device *hdev)
q->queue_type = asic->hw_queues_props[i].type;
q->supports_sync_stream =
asic->hw_queues_props[i].supports_sync_stream;
+ q->collective_mode = asic->hw_queues_props[i].collective_mode;
rc = queue_init(hdev, q, i);
if (rc) {
dev_err(hdev->dev,
diff --git a/drivers/misc/habanalabs/common/hwmon.c b/drivers/misc/habanalabs/common/hwmon.c
index 2ac29cb2fe61..6b421d76b311 100644
--- a/drivers/misc/habanalabs/common/hwmon.c
+++ b/drivers/misc/habanalabs/common/hwmon.c
@@ -114,7 +114,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
struct hl_device *hdev = dev_get_drvdata(dev);
int rc;
- if (hl_device_disabled_or_in_reset(hdev))
+ if (!hl_device_operational(hdev, NULL))
return -ENODEV;
switch (type) {
@@ -192,7 +192,7 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type,
{
struct hl_device *hdev = dev_get_drvdata(dev);
- if (hl_device_disabled_or_in_reset(hdev))
+ if (!hl_device_operational(hdev, NULL))
return -ENODEV;
switch (type) {
@@ -312,6 +312,7 @@ int hl_get_temperature(struct hl_device *hdev,
int sensor_index, u32 attr, long *value)
{
struct cpucp_packet pkt;
+ u64 result;
int rc;
memset(&pkt, 0, sizeof(pkt));
@@ -322,7 +323,9 @@ int hl_get_temperature(struct hl_device *hdev,
pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- 0, value);
+ 0, &result);
+
+ *value = (long) result;
if (rc) {
dev_err(hdev->dev,
@@ -363,6 +366,7 @@ int hl_get_voltage(struct hl_device *hdev,
int sensor_index, u32 attr, long *value)
{
struct cpucp_packet pkt;
+ u64 result;
int rc;
memset(&pkt, 0, sizeof(pkt));
@@ -373,7 +377,9 @@ int hl_get_voltage(struct hl_device *hdev,
pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- 0, value);
+ 0, &result);
+
+ *value = (long) result;
if (rc) {
dev_err(hdev->dev,
@@ -389,6 +395,7 @@ int hl_get_current(struct hl_device *hdev,
int sensor_index, u32 attr, long *value)
{
struct cpucp_packet pkt;
+ u64 result;
int rc;
memset(&pkt, 0, sizeof(pkt));
@@ -399,7 +406,9 @@ int hl_get_current(struct hl_device *hdev,
pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- 0, value);
+ 0, &result);
+
+ *value = (long) result;
if (rc) {
dev_err(hdev->dev,
@@ -415,6 +424,7 @@ int hl_get_fan_speed(struct hl_device *hdev,
int sensor_index, u32 attr, long *value)
{
struct cpucp_packet pkt;
+ u64 result;
int rc;
memset(&pkt, 0, sizeof(pkt));
@@ -425,7 +435,9 @@ int hl_get_fan_speed(struct hl_device *hdev,
pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- 0, value);
+ 0, &result);
+
+ *value = (long) result;
if (rc) {
dev_err(hdev->dev,
@@ -441,6 +453,7 @@ int hl_get_pwm_info(struct hl_device *hdev,
int sensor_index, u32 attr, long *value)
{
struct cpucp_packet pkt;
+ u64 result;
int rc;
memset(&pkt, 0, sizeof(pkt));
@@ -451,7 +464,9 @@ int hl_get_pwm_info(struct hl_device *hdev,
pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- 0, value);
+ 0, &result);
+
+ *value = (long) result;
if (rc) {
dev_err(hdev->dev,
@@ -542,7 +557,7 @@ int hl_hwmon_init(struct hl_device *hdev)
struct asic_fixed_properties *prop = &hdev->asic_prop;
int rc;
- if ((hdev->hwmon_initialized) || !(hdev->fw_loading))
+ if ((hdev->hwmon_initialized) || !(hdev->cpu_queues_enable))
return 0;
if (hdev->hl_chip_info->info) {
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index 84227819e4d1..cbe9da4e0211 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -11,7 +11,6 @@
#include <linux/uaccess.h>
#include <linux/slab.h>
-#include <linux/genalloc.h>
#define HL_MMU_DEBUG 0
@@ -46,7 +45,7 @@
* @ret_handle : result handle
*
* This function does the following:
- * - Allocate the requested size rounded up to 2MB pages
+ * - Allocate the requested size rounded up to 'dram_page_size' pages
* - Return unique handle
*/
static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
@@ -81,6 +80,16 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
num_pgs, total_size);
return -ENOMEM;
}
+
+ if (hdev->memory_scrub) {
+ rc = hdev->asic_funcs->scrub_device_mem(hdev, paddr,
+ total_size);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to scrub contiguous device memory\n");
+ goto pages_pack_err;
+ }
+ }
}
phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
@@ -118,6 +127,17 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
goto page_err;
}
+ if (hdev->memory_scrub) {
+ rc = hdev->asic_funcs->scrub_device_mem(hdev,
+ phys_pg_pack->pages[i],
+ page_size);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to scrub device memory\n");
+ goto page_err;
+ }
+ }
+
num_curr_pgs++;
}
}
@@ -601,6 +621,87 @@ out:
}
/*
+ * hl_reserve_va_block() - reserve a virtual block of a given size.
+ * @hdev: pointer to the habanalabs device structure.
+ * @ctx: current context
+ * @type: virtual addresses range type.
+ * @size: requested block size.
+ * @alignment: required alignment in bytes of the virtual block start address,
+ * 0 means no alignment.
+ *
+ * This function does the following:
+ * - Iterate on the virtual block list to find a suitable virtual block for the
+ * given size and alignment.
+ * - Reserve the requested block and update the list.
+ * - Return the start address of the virtual block.
+ */
+u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
+ enum hl_va_range_type type, u32 size, u32 alignment)
+{
+ return get_va_block(hdev, ctx->va_range[type], size, 0,
+ max(alignment, ctx->va_range[type]->page_size));
+}
+
+/**
+ * hl_get_va_range_type() - get va_range type for the given address and size.
+ * @address: The start address of the area we want to validate.
+ * @size: The size in bytes of the area we want to validate.
+ * @type: returned va_range type
+ *
+ * Return: true if the area is inside a valid range, false otherwise.
+ */
+static int hl_get_va_range_type(struct hl_ctx *ctx, u64 address, u64 size,
+ enum hl_va_range_type *type)
+{
+ int i;
+
+ for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX; i++) {
+ if (hl_mem_area_inside_range(address, size,
+ ctx->va_range[i]->start_addr,
+ ctx->va_range[i]->end_addr)) {
+ *type = i;
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+/*
+ * hl_unreserve_va_block - wrapper for add_va_block for unreserving a va block
+ *
+ * @hdev: pointer to the habanalabs device structure
+ * @ctx: current context
+ * @start: start virtual address
+ * @end: end virtual address
+ *
+ * This function does the following:
+ * - Takes the list lock and calls add_va_block_locked
+ */
+int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
+ u64 start_addr, u64 size)
+{
+ enum hl_va_range_type type;
+ int rc;
+
+ rc = hl_get_va_range_type(ctx, start_addr, size, &type);
+ if (rc) {
+ dev_err(hdev->dev,
+ "cannot find va_range for va %#llx size %llu",
+ start_addr, size);
+ return rc;
+ }
+
+ rc = add_va_block(hdev, ctx->va_range[type], start_addr,
+ start_addr + size - 1);
+ if (rc)
+ dev_warn(hdev->dev,
+ "add va block failed for vaddr: 0x%llx\n", start_addr);
+
+ return rc;
+}
+
+/*
* get_sg_info - get number of pages and the DMA address from SG list
*
* @sg : the SG list
@@ -742,7 +843,7 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
for (i = 0 ; i < phys_pg_pack->npages ; i++) {
paddr = phys_pg_pack->pages[i];
- rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size,
+ rc = hl_mmu_map_page(ctx, next_vaddr, paddr, page_size,
(i + 1) == phys_pg_pack->npages);
if (rc) {
dev_err(hdev->dev,
@@ -761,7 +862,7 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
err:
next_vaddr = vaddr;
for (i = 0 ; i < mapped_pg_cnt ; i++) {
- if (hl_mmu_unmap(ctx, next_vaddr, page_size,
+ if (hl_mmu_unmap_page(ctx, next_vaddr, page_size,
(i + 1) == mapped_pg_cnt))
dev_warn_ratelimited(hdev->dev,
"failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
@@ -791,7 +892,7 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
next_vaddr = vaddr;
for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
- if (hl_mmu_unmap(ctx, next_vaddr, page_size,
+ if (hl_mmu_unmap_page(ctx, next_vaddr, page_size,
(i + 1) == phys_pg_pack->npages))
dev_warn_ratelimited(hdev->dev,
"unmap failed for vaddr: 0x%llx\n", next_vaddr);
@@ -888,7 +989,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
/* get required alignment */
if (phys_pg_pack->page_size == page_size) {
- va_range = ctx->host_va_range;
+ va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST];
/*
* huge page alignment may be needed in case of regular
@@ -903,7 +1004,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
* huge page alignment is needed in case of huge page
* mapping
*/
- va_range = ctx->host_huge_va_range;
+ va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE];
va_block_align = huge_page_size;
}
} else {
@@ -928,7 +1029,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
hint_addr = args->map_device.hint_addr;
/* DRAM VA alignment is the same as the DRAM page size */
- va_range = ctx->dram_va_range;
+ va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM];
va_block_align = hdev->asic_prop.dmmu.page_size;
}
@@ -1073,12 +1174,12 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
if (phys_pg_pack->page_size ==
hdev->asic_prop.pmmu.page_size)
- va_range = ctx->host_va_range;
+ va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST];
else
- va_range = ctx->host_huge_va_range;
+ va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE];
} else if (*vm_type == VM_TYPE_PHYS_PACK) {
is_userptr = false;
- va_range = ctx->dram_va_range;
+ va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM];
phys_pg_pack = hnode->ptr;
} else {
dev_warn(hdev->dev,
@@ -1217,6 +1318,7 @@ out:
int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
{
+ enum hl_device_status status;
union hl_mem_args *args = data;
struct hl_device *hdev = hpriv->hdev;
struct hl_ctx *ctx = hpriv->ctx;
@@ -1224,10 +1326,10 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
u32 handle = 0;
int rc;
- if (hl_device_disabled_or_in_reset(hdev)) {
+ if (!hl_device_operational(hdev, &status)) {
dev_warn_ratelimited(hdev->dev,
"Device is %s. Can't execute MEMORY IOCTL\n",
- atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
+ hdev->status[status]);
return -EBUSY;
}
@@ -1236,18 +1338,35 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
switch (args->in.op) {
case HL_MEM_OP_ALLOC:
- if (!hdev->dram_supports_virtual_memory) {
- dev_err(hdev->dev, "DRAM alloc is not supported\n");
- rc = -EINVAL;
- goto out;
- }
-
if (args->in.alloc.mem_size == 0) {
dev_err(hdev->dev,
"alloc size must be larger than 0\n");
rc = -EINVAL;
goto out;
}
+
+ /* If DRAM does not support virtual memory the driver won't
+ * handle the allocation/freeing of that memory. However, for
+ * system administration/monitoring purposes, the driver will
+ * keep track of the amount of DRAM memory that is allocated
+ * and freed by the user. Because this code totally relies on
+ * the user's input, the driver can't ensure the validity
+ * of this accounting.
+ */
+ if (!hdev->asic_prop.dram_supports_virtual_memory) {
+ atomic64_add(args->in.alloc.mem_size,
+ &ctx->dram_phys_mem);
+ atomic64_add(args->in.alloc.mem_size,
+ &hdev->dram_used_mem);
+
+ dev_dbg(hdev->dev, "DRAM alloc is not supported\n");
+ rc = 0;
+
+ memset(args, 0, sizeof(*args));
+ args->out.handle = 0;
+ goto out;
+ }
+
rc = alloc_device_memory(ctx, &args->in, &handle);
memset(args, 0, sizeof(*args));
@@ -1255,6 +1374,26 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
break;
case HL_MEM_OP_FREE:
+ /* If DRAM does not support virtual memory the driver won't
+ * handle the allocation/freeing of that memory. However, for
+ * system administration/monitoring purposes, the driver will
+ * keep track of the amount of DRAM memory that is allocated
+ * and freed by the user. Because this code totally relies on
+ * the user's input, the driver can't ensure the validity
+ * of this accounting.
+ */
+ if (!hdev->asic_prop.dram_supports_virtual_memory) {
+ atomic64_sub(args->in.alloc.mem_size,
+ &ctx->dram_phys_mem);
+ atomic64_sub(args->in.alloc.mem_size,
+ &hdev->dram_used_mem);
+
+ dev_dbg(hdev->dev, "DRAM alloc is not supported\n");
+ rc = 0;
+
+ goto out;
+ }
+
rc = free_device_memory(ctx, args->in.free.handle);
break;
@@ -1498,7 +1637,7 @@ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
* addresses.
*/
static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
- u64 start, u64 end)
+ u64 start, u64 end, u32 page_size)
{
int rc;
@@ -1528,6 +1667,7 @@ static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
va_range->start_addr = start;
va_range->end_addr = end;
+ va_range->page_size = page_size;
return 0;
}
@@ -1540,8 +1680,7 @@ static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
* This function does the following:
* - Frees the virtual addresses block list and its lock
*/
-static void va_range_fini(struct hl_device *hdev,
- struct hl_va_range *va_range)
+static void va_range_fini(struct hl_device *hdev, struct hl_va_range *va_range)
{
mutex_lock(&va_range->lock);
clear_va_list_locked(hdev, &va_range->list);
@@ -1571,101 +1710,97 @@ static void va_range_fini(struct hl_device *hdev,
static int vm_ctx_init_with_ranges(struct hl_ctx *ctx,
u64 host_range_start,
u64 host_range_end,
+ u32 host_page_size,
u64 host_huge_range_start,
u64 host_huge_range_end,
+ u32 host_huge_page_size,
u64 dram_range_start,
- u64 dram_range_end)
+ u64 dram_range_end,
+ u32 dram_page_size)
{
struct hl_device *hdev = ctx->hdev;
- int rc;
-
- ctx->host_va_range = kzalloc(sizeof(*ctx->host_va_range), GFP_KERNEL);
- if (!ctx->host_va_range)
- return -ENOMEM;
-
- ctx->host_huge_va_range = kzalloc(sizeof(*ctx->host_huge_va_range),
- GFP_KERNEL);
- if (!ctx->host_huge_va_range) {
- rc = -ENOMEM;
- goto host_huge_va_range_err;
- }
-
- ctx->dram_va_range = kzalloc(sizeof(*ctx->dram_va_range), GFP_KERNEL);
- if (!ctx->dram_va_range) {
- rc = -ENOMEM;
- goto dram_va_range_err;
+ int i, rc;
+
+ for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX ; i++) {
+ ctx->va_range[i] =
+ kzalloc(sizeof(struct hl_va_range), GFP_KERNEL);
+ if (!ctx->va_range[i]) {
+ rc = -ENOMEM;
+ goto free_va_range;
+ }
}
rc = hl_mmu_ctx_init(ctx);
if (rc) {
dev_err(hdev->dev, "failed to init context %d\n", ctx->asid);
- goto mmu_ctx_err;
+ goto free_va_range;
}
mutex_init(&ctx->mem_hash_lock);
hash_init(ctx->mem_hash);
- mutex_init(&ctx->host_va_range->lock);
+ mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock);
- rc = va_range_init(hdev, ctx->host_va_range, host_range_start,
- host_range_end);
+ rc = va_range_init(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST],
+ host_range_start, host_range_end, host_page_size);
if (rc) {
dev_err(hdev->dev, "failed to init host vm range\n");
- goto host_page_range_err;
+ goto mmu_ctx_fini;
}
if (hdev->pmmu_huge_range) {
- mutex_init(&ctx->host_huge_va_range->lock);
+ mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock);
- rc = va_range_init(hdev, ctx->host_huge_va_range,
- host_huge_range_start,
- host_huge_range_end);
+ rc = va_range_init(hdev,
+ ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE],
+ host_huge_range_start, host_huge_range_end,
+ host_huge_page_size);
if (rc) {
dev_err(hdev->dev,
"failed to init host huge vm range\n");
- goto host_hpage_range_err;
+ goto clear_host_va_range;
}
} else {
- ctx->host_huge_va_range = ctx->host_va_range;
+ kfree(ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]);
+ ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE] =
+ ctx->va_range[HL_VA_RANGE_TYPE_HOST];
}
- mutex_init(&ctx->dram_va_range->lock);
+ mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_DRAM]->lock);
- rc = va_range_init(hdev, ctx->dram_va_range, dram_range_start,
- dram_range_end);
+ rc = va_range_init(hdev, ctx->va_range[HL_VA_RANGE_TYPE_DRAM],
+ dram_range_start, dram_range_end, dram_page_size);
if (rc) {
dev_err(hdev->dev, "failed to init dram vm range\n");
- goto dram_vm_err;
+ goto clear_host_huge_va_range;
}
hl_debugfs_add_ctx_mem_hash(hdev, ctx);
return 0;
-dram_vm_err:
- mutex_destroy(&ctx->dram_va_range->lock);
+clear_host_huge_va_range:
+ mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_DRAM]->lock);
if (hdev->pmmu_huge_range) {
- mutex_lock(&ctx->host_huge_va_range->lock);
- clear_va_list_locked(hdev, &ctx->host_huge_va_range->list);
- mutex_unlock(&ctx->host_huge_va_range->lock);
+ mutex_lock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock);
+ clear_va_list_locked(hdev,
+ &ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->list);
+ mutex_unlock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock);
}
-host_hpage_range_err:
+clear_host_va_range:
if (hdev->pmmu_huge_range)
- mutex_destroy(&ctx->host_huge_va_range->lock);
- mutex_lock(&ctx->host_va_range->lock);
- clear_va_list_locked(hdev, &ctx->host_va_range->list);
- mutex_unlock(&ctx->host_va_range->lock);
-host_page_range_err:
- mutex_destroy(&ctx->host_va_range->lock);
+ mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock);
+ mutex_lock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock);
+ clear_va_list_locked(hdev, &ctx->va_range[HL_VA_RANGE_TYPE_HOST]->list);
+ mutex_unlock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock);
+mmu_ctx_fini:
+ mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock);
mutex_destroy(&ctx->mem_hash_lock);
hl_mmu_ctx_fini(ctx);
-mmu_ctx_err:
- kfree(ctx->dram_va_range);
-dram_va_range_err:
- kfree(ctx->host_huge_va_range);
-host_huge_va_range_err:
- kfree(ctx->host_va_range);
+free_va_range:
+ for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX ; i++)
+ kfree(ctx->va_range[i]);
return rc;
}
@@ -1675,6 +1810,7 @@ int hl_vm_ctx_init(struct hl_ctx *ctx)
struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
u64 host_range_start, host_range_end, host_huge_range_start,
host_huge_range_end, dram_range_start, dram_range_end;
+ u32 host_page_size, host_huge_page_size, dram_page_size;
atomic64_set(&ctx->dram_phys_mem, 0);
@@ -1685,27 +1821,23 @@ int hl_vm_ctx_init(struct hl_ctx *ctx)
* In case of DRAM mapping, the returned address is the physical
* address of the memory related to the given handle.
*/
- if (ctx->hdev->mmu_enable) {
- dram_range_start = prop->dmmu.start_addr;
- dram_range_end = prop->dmmu.end_addr;
- host_range_start = prop->pmmu.start_addr;
- host_range_end = prop->pmmu.end_addr;
- host_huge_range_start = prop->pmmu_huge.start_addr;
- host_huge_range_end = prop->pmmu_huge.end_addr;
- } else {
- dram_range_start = prop->dram_user_base_address;
- dram_range_end = prop->dram_end_address;
- host_range_start = prop->dram_user_base_address;
- host_range_end = prop->dram_end_address;
- host_huge_range_start = prop->dram_user_base_address;
- host_huge_range_end = prop->dram_end_address;
- }
+ if (!ctx->hdev->mmu_enable)
+ return 0;
+
+ dram_range_start = prop->dmmu.start_addr;
+ dram_range_end = prop->dmmu.end_addr;
+ dram_page_size = prop->dmmu.page_size;
+ host_range_start = prop->pmmu.start_addr;
+ host_range_end = prop->pmmu.end_addr;
+ host_page_size = prop->pmmu.page_size;
+ host_huge_range_start = prop->pmmu_huge.start_addr;
+ host_huge_range_end = prop->pmmu_huge.end_addr;
+ host_huge_page_size = prop->pmmu_huge.page_size;
return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
- host_huge_range_start,
- host_huge_range_end,
- dram_range_start,
- dram_range_end);
+ host_page_size, host_huge_range_start,
+ host_huge_range_end, host_huge_page_size,
+ dram_range_start, dram_range_end, dram_page_size);
}
/*
@@ -1737,6 +1869,9 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
struct hlist_node *tmp_node;
int i;
+ if (!ctx->hdev->mmu_enable)
+ return;
+
hl_debugfs_remove_ctx_mem_hash(hdev, ctx);
/*
@@ -1771,13 +1906,21 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
}
spin_unlock(&vm->idr_lock);
- va_range_fini(hdev, ctx->dram_va_range);
+ va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_DRAM]);
+ va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST]);
+
if (hdev->pmmu_huge_range)
- va_range_fini(hdev, ctx->host_huge_va_range);
- va_range_fini(hdev, ctx->host_va_range);
+ va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]);
mutex_destroy(&ctx->mem_hash_lock);
hl_mmu_ctx_fini(ctx);
+
+ /* In this case we need to clear the global accounting of DRAM usage
+ * because the user notifies us on allocations. If the user is no more,
+ * all DRAM is available
+ */
+ if (!ctx->hdev->asic_prop.dram_supports_virtual_memory)
+ atomic64_set(&ctx->hdev->dram_used_mem, 0);
}
/*
diff --git a/drivers/misc/habanalabs/common/mmu.c b/drivers/misc/habanalabs/common/mmu.c
index b5058798aeb9..33ae953d3a36 100644
--- a/drivers/misc/habanalabs/common/mmu.c
+++ b/drivers/misc/habanalabs/common/mmu.c
@@ -22,18 +22,25 @@ static bool is_dram_va(struct hl_device *hdev, u64 virt_addr)
* hl_mmu_init() - initialize the MMU module.
* @hdev: habanalabs device structure.
*
- * This function does the following:
- * - Create a pool of pages for pgt_infos.
- * - Create a shadow table for pgt
- *
* Return: 0 for success, non-zero for failure.
*/
int hl_mmu_init(struct hl_device *hdev)
{
- if (hdev->mmu_enable)
- return hdev->mmu_func.init(hdev);
+ int rc = -EOPNOTSUPP;
- return 0;
+ if (!hdev->mmu_enable)
+ return 0;
+
+ if (hdev->mmu_func[MMU_DR_PGT].init != NULL) {
+ rc = hdev->mmu_func[MMU_DR_PGT].init(hdev);
+ if (rc)
+ return rc;
+ }
+
+ if (hdev->mmu_func[MMU_HR_PGT].init != NULL)
+ rc = hdev->mmu_func[MMU_HR_PGT].init(hdev);
+
+ return rc;
}
/**
@@ -48,8 +55,14 @@ int hl_mmu_init(struct hl_device *hdev)
*/
void hl_mmu_fini(struct hl_device *hdev)
{
- if (hdev->mmu_enable)
- hdev->mmu_func.fini(hdev);
+ if (!hdev->mmu_enable)
+ return;
+
+ if (hdev->mmu_func[MMU_DR_PGT].fini != NULL)
+ hdev->mmu_func[MMU_DR_PGT].fini(hdev);
+
+ if (hdev->mmu_func[MMU_HR_PGT].fini != NULL)
+ hdev->mmu_func[MMU_HR_PGT].fini(hdev);
}
/**
@@ -63,11 +76,23 @@ void hl_mmu_fini(struct hl_device *hdev)
int hl_mmu_ctx_init(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
+ int rc = -EOPNOTSUPP;
- if (hdev->mmu_enable)
- return hdev->mmu_func.ctx_init(ctx);
+ if (!hdev->mmu_enable)
+ return 0;
- return 0;
+ mutex_init(&ctx->mmu_lock);
+
+ if (hdev->mmu_func[MMU_DR_PGT].ctx_init != NULL) {
+ rc = hdev->mmu_func[MMU_DR_PGT].ctx_init(ctx);
+ if (rc)
+ return rc;
+ }
+
+ if (hdev->mmu_func[MMU_HR_PGT].ctx_init != NULL)
+ rc = hdev->mmu_func[MMU_HR_PGT].ctx_init(ctx);
+
+ return rc;
}
/*
@@ -84,12 +109,20 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
- if (hdev->mmu_enable)
- hdev->mmu_func.ctx_fini(ctx);
+ if (!hdev->mmu_enable)
+ return;
+
+ if (hdev->mmu_func[MMU_DR_PGT].ctx_fini != NULL)
+ hdev->mmu_func[MMU_DR_PGT].ctx_fini(ctx);
+
+ if (hdev->mmu_func[MMU_HR_PGT].ctx_fini != NULL)
+ hdev->mmu_func[MMU_HR_PGT].ctx_fini(ctx);
+
+ mutex_destroy(&ctx->mmu_lock);
}
/*
- * hl_mmu_unmap - unmaps a virtual addr
+ * hl_mmu_unmap_page - unmaps a virtual addr
*
* @ctx: pointer to the context structure
* @virt_addr: virt addr to map from
@@ -109,7 +142,7 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx)
* For optimization reasons PCI flush may be requested once after unmapping of
* large area.
*/
-int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
+int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
bool flush_pte)
{
struct hl_device *hdev = ctx->hdev;
@@ -117,7 +150,7 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
struct hl_mmu_properties *mmu_prop;
u64 real_virt_addr;
u32 real_page_size, npages;
- int i, rc = 0;
+ int i, rc = 0, pgt_residency;
bool is_dram_addr;
if (!hdev->mmu_enable)
@@ -132,6 +165,8 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
else
mmu_prop = &prop->pmmu;
+ pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT;
+
/*
* The H/W handles mapping of specific page sizes. Hence if the page
* size is bigger, we break it to sub-pages and unmap them separately.
@@ -150,7 +185,8 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
real_virt_addr = virt_addr;
for (i = 0 ; i < npages ; i++) {
- rc = hdev->mmu_func.unmap(ctx, real_virt_addr, is_dram_addr);
+ rc = hdev->mmu_func[pgt_residency].unmap(ctx,
+ real_virt_addr, is_dram_addr);
if (rc)
break;
@@ -158,13 +194,13 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
}
if (flush_pte)
- hdev->mmu_func.flush(ctx);
+ hdev->mmu_func[pgt_residency].flush(ctx);
return rc;
}
/*
- * hl_mmu_map - maps a virtual addr to physical addr
+ * hl_mmu_map_page - maps a virtual addr to physical addr
*
* @ctx: pointer to the context structure
* @virt_addr: virt addr to map from
@@ -185,17 +221,18 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
* For optimization reasons PCI flush may be requested once after mapping of
* large area.
*/
-int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
- bool flush_pte)
+int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
+ u32 page_size, bool flush_pte)
{
struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_mmu_properties *mmu_prop;
u64 real_virt_addr, real_phys_addr;
u32 real_page_size, npages;
- int i, rc, mapped_cnt = 0;
+ int i, rc, pgt_residency, mapped_cnt = 0;
bool is_dram_addr;
+
if (!hdev->mmu_enable)
return 0;
@@ -208,6 +245,8 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
else
mmu_prop = &prop->pmmu;
+ pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT;
+
/*
* The H/W handles mapping of specific page sizes. Hence if the page
* size is bigger, we break it to sub-pages and map them separately.
@@ -216,7 +255,7 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
real_page_size = mmu_prop->page_size;
} else {
dev_err(hdev->dev,
- "page size of %u is not %uKB aligned, can't unmap\n",
+ "page size of %u is not %uKB aligned, can't map\n",
page_size, mmu_prop->page_size >> 10);
return -EFAULT;
@@ -231,8 +270,9 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
real_phys_addr = phys_addr;
for (i = 0 ; i < npages ; i++) {
- rc = hdev->mmu_func.map(ctx, real_virt_addr, real_phys_addr,
- real_page_size, is_dram_addr);
+ rc = hdev->mmu_func[pgt_residency].map(ctx,
+ real_virt_addr, real_phys_addr,
+ real_page_size, is_dram_addr);
if (rc)
goto err;
@@ -242,21 +282,124 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
}
if (flush_pte)
- hdev->mmu_func.flush(ctx);
+ hdev->mmu_func[pgt_residency].flush(ctx);
return 0;
err:
real_virt_addr = virt_addr;
for (i = 0 ; i < mapped_cnt ; i++) {
- if (hdev->mmu_func.unmap(ctx, real_virt_addr, is_dram_addr))
+ if (hdev->mmu_func[pgt_residency].unmap(ctx,
+ real_virt_addr, is_dram_addr))
dev_warn_ratelimited(hdev->dev,
"failed to unmap va: 0x%llx\n", real_virt_addr);
real_virt_addr += real_page_size;
}
- hdev->mmu_func.flush(ctx);
+ hdev->mmu_func[pgt_residency].flush(ctx);
+
+ return rc;
+}
+
+/*
+ * hl_mmu_map_contiguous - implements a wrapper for hl_mmu_map_page
+ * for mapping contiguous physical memory
+ *
+ * @ctx: pointer to the context structure
+ * @virt_addr: virt addr to map from
+ * @phys_addr: phys addr to map to
+ * @size: size to map
+ *
+ */
+int hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr,
+ u64 phys_addr, u32 size)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u64 curr_va, curr_pa;
+ u32 page_size;
+ bool flush_pte;
+ int rc = 0, off;
+
+ if (hl_mem_area_inside_range(virt_addr, size,
+ prop->dmmu.start_addr, prop->dmmu.end_addr))
+ page_size = prop->dmmu.page_size;
+ else if (hl_mem_area_inside_range(virt_addr, size,
+ prop->pmmu.start_addr, prop->pmmu.end_addr))
+ page_size = prop->pmmu.page_size;
+ else if (hl_mem_area_inside_range(virt_addr, size,
+ prop->pmmu_huge.start_addr, prop->pmmu_huge.end_addr))
+ page_size = prop->pmmu_huge.page_size;
+ else
+ return -EINVAL;
+
+ for (off = 0 ; off < size ; off += page_size) {
+ curr_va = virt_addr + off;
+ curr_pa = phys_addr + off;
+ flush_pte = (off + page_size) >= size;
+ rc = hl_mmu_map_page(ctx, curr_va, curr_pa, page_size,
+ flush_pte);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Map failed for va 0x%llx to pa 0x%llx\n",
+ curr_va, curr_pa);
+ goto unmap;
+ }
+ }
+
+ return rc;
+
+unmap:
+ for (; off >= 0 ; off -= page_size) {
+ curr_va = virt_addr + off;
+ flush_pte = (off - (s32) page_size) < 0;
+ if (hl_mmu_unmap_page(ctx, curr_va, page_size, flush_pte))
+ dev_warn_ratelimited(hdev->dev,
+ "failed to unmap va 0x%llx\n", curr_va);
+ }
+
+ return rc;
+}
+
+/*
+ * hl_mmu_unmap_contiguous - implements a wrapper for hl_mmu_unmap_page
+ * for unmapping contiguous physical memory
+ *
+ * @ctx: pointer to the context structure
+ * @virt_addr: virt addr to unmap
+ * @size: size to unmap
+ *
+ */
+int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u64 curr_va;
+ u32 page_size;
+ bool flush_pte;
+ int rc = 0, off;
+
+ if (hl_mem_area_inside_range(virt_addr, size,
+ prop->dmmu.start_addr, prop->dmmu.end_addr))
+ page_size = prop->dmmu.page_size;
+ else if (hl_mem_area_inside_range(virt_addr, size,
+ prop->pmmu.start_addr, prop->pmmu.end_addr))
+ page_size = prop->pmmu.page_size;
+ else if (hl_mem_area_inside_range(virt_addr, size,
+ prop->pmmu_huge.start_addr, prop->pmmu_huge.end_addr))
+ page_size = prop->pmmu_huge.page_size;
+ else
+ return -EINVAL;
+
+ for (off = 0 ; off < size ; off += page_size) {
+ curr_va = virt_addr + off;
+ flush_pte = (off + page_size) >= size;
+ rc = hl_mmu_unmap_page(ctx, curr_va, page_size, flush_pte);
+ if (rc)
+ dev_warn_ratelimited(hdev->dev,
+ "Unmap failed for va 0x%llx\n", curr_va);
+ }
return rc;
}
@@ -271,8 +414,14 @@ void hl_mmu_swap_out(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
- if (hdev->mmu_enable)
- hdev->mmu_func.swap_out(ctx);
+ if (!hdev->mmu_enable)
+ return;
+
+ if (hdev->mmu_func[MMU_DR_PGT].swap_out != NULL)
+ hdev->mmu_func[MMU_DR_PGT].swap_out(ctx);
+
+ if (hdev->mmu_func[MMU_HR_PGT].swap_out != NULL)
+ hdev->mmu_func[MMU_HR_PGT].swap_out(ctx);
}
/*
@@ -285,8 +434,64 @@ void hl_mmu_swap_in(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
- if (hdev->mmu_enable)
- hdev->mmu_func.swap_in(ctx);
+ if (!hdev->mmu_enable)
+ return;
+
+ if (hdev->mmu_func[MMU_DR_PGT].swap_in != NULL)
+ hdev->mmu_func[MMU_DR_PGT].swap_in(ctx);
+
+ if (hdev->mmu_func[MMU_HR_PGT].swap_in != NULL)
+ hdev->mmu_func[MMU_HR_PGT].swap_in(ctx);
+}
+
+int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr)
+{
+ struct hl_mmu_hop_info hops;
+ u64 tmp_addr;
+ int rc;
+
+ rc = hl_mmu_get_tlb_info(ctx, virt_addr, &hops);
+ if (rc)
+ return rc;
+
+ /* last hop holds the phys address and flags */
+ tmp_addr = hops.hop_info[hops.used_hops - 1].hop_pte_val;
+ *phys_addr = (tmp_addr & HOP_PHYS_ADDR_MASK) | (virt_addr & FLAGS_MASK);
+
+ return 0;
+}
+
+int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
+ struct hl_mmu_hop_info *hops)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct hl_mmu_properties *mmu_prop;
+ int rc;
+ bool is_dram_addr;
+
+ if (!hdev->mmu_enable)
+ return -EOPNOTSUPP;
+
+ is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+ prop->dmmu.start_addr,
+ prop->dmmu.end_addr);
+
+ /* host-residency is the same in PMMU and HPMMU, use one of them */
+ mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+
+ mutex_lock(&ctx->mmu_lock);
+
+ if (mmu_prop->host_resident)
+ rc = hdev->mmu_func[MMU_HR_PGT].get_tlb_info(ctx,
+ virt_addr, hops);
+ else
+ rc = hdev->mmu_func[MMU_DR_PGT].get_tlb_info(ctx,
+ virt_addr, hops);
+
+ mutex_unlock(&ctx->mmu_lock);
+
+ return rc;
}
int hl_mmu_if_set_funcs(struct hl_device *hdev)
@@ -297,7 +502,7 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev)
switch (hdev->asic_type) {
case ASIC_GOYA:
case ASIC_GAUDI:
- hl_mmu_v1_set_funcs(hdev);
+ hl_mmu_v1_set_funcs(hdev, &hdev->mmu_func[MMU_DR_PGT]);
break;
default:
dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
diff --git a/drivers/misc/habanalabs/common/mmu_v1.c b/drivers/misc/habanalabs/common/mmu_v1.c
index 8d1eb5265419..2ce6ea89d4fa 100644
--- a/drivers/misc/habanalabs/common/mmu_v1.c
+++ b/drivers/misc/habanalabs/common/mmu_v1.c
@@ -8,7 +8,6 @@
#include "habanalabs.h"
#include "../include/hw_ip/mmu/mmu_general.h"
-#include <linux/genalloc.h>
#include <linux/slab.h>
static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
@@ -29,7 +28,7 @@ static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info)
{
struct hl_device *hdev = ctx->hdev;
- gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, pgt_info->phys_addr,
+ gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, pgt_info->phys_addr,
hdev->asic_prop.mmu_hop_table_size);
hash_del(&pgt_info->node);
kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
@@ -54,7 +53,7 @@ static u64 alloc_hop(struct hl_ctx *ctx)
if (!pgt_info)
return ULLONG_MAX;
- phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.mmu_pgt_pool,
+ phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.dr.mmu_pgt_pool,
prop->mmu_hop_table_size);
if (!phys_addr) {
dev_err(hdev->dev, "failed to allocate page\n");
@@ -75,7 +74,7 @@ static u64 alloc_hop(struct hl_ctx *ctx)
return shadow_addr;
shadow_err:
- gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, phys_addr,
+ gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, phys_addr,
prop->mmu_hop_table_size);
pool_add_err:
kfree(pgt_info);
@@ -91,7 +90,7 @@ static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx)
static inline u64 get_hop0_addr(struct hl_ctx *ctx)
{
- return (u64) (uintptr_t) ctx->hdev->mmu_priv.mmu_shadow_hop0 +
+ return (u64) (uintptr_t) ctx->hdev->mmu_priv.dr.mmu_shadow_hop0 +
(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
}
@@ -263,7 +262,7 @@ static int dram_default_mapping_init(struct hl_ctx *ctx)
hop2_pte_addr, hop3_pte_addr, pte_val;
int rc, i, j, hop3_allocated = 0;
- if ((!hdev->dram_supports_virtual_memory) ||
+ if ((!prop->dram_supports_virtual_memory) ||
(!hdev->dram_default_page_mapping) ||
(ctx->asid == HL_KERNEL_ASID_ID))
return 0;
@@ -363,7 +362,7 @@ static void dram_default_mapping_fini(struct hl_ctx *ctx)
hop2_pte_addr, hop3_pte_addr;
int i, j;
- if ((!hdev->dram_supports_virtual_memory) ||
+ if ((!prop->dram_supports_virtual_memory) ||
(!hdev->dram_default_page_mapping) ||
(ctx->asid == HL_KERNEL_ASID_ID))
return;
@@ -419,15 +418,15 @@ static int hl_mmu_v1_init(struct hl_device *hdev)
struct asic_fixed_properties *prop = &hdev->asic_prop;
int rc;
- hdev->mmu_priv.mmu_pgt_pool =
+ hdev->mmu_priv.dr.mmu_pgt_pool =
gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
- if (!hdev->mmu_priv.mmu_pgt_pool) {
+ if (!hdev->mmu_priv.dr.mmu_pgt_pool) {
dev_err(hdev->dev, "Failed to create page gen pool\n");
return -ENOMEM;
}
- rc = gen_pool_add(hdev->mmu_priv.mmu_pgt_pool, prop->mmu_pgt_addr +
+ rc = gen_pool_add(hdev->mmu_priv.dr.mmu_pgt_pool, prop->mmu_pgt_addr +
prop->mmu_hop0_tables_total_size,
prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
-1);
@@ -436,10 +435,10 @@ static int hl_mmu_v1_init(struct hl_device *hdev)
goto err_pool_add;
}
- hdev->mmu_priv.mmu_shadow_hop0 = kvmalloc_array(prop->max_asid,
+ hdev->mmu_priv.dr.mmu_shadow_hop0 = kvmalloc_array(prop->max_asid,
prop->mmu_hop_table_size,
GFP_KERNEL | __GFP_ZERO);
- if (ZERO_OR_NULL_PTR(hdev->mmu_priv.mmu_shadow_hop0)) {
+ if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) {
rc = -ENOMEM;
goto err_pool_add;
}
@@ -449,7 +448,7 @@ static int hl_mmu_v1_init(struct hl_device *hdev)
return 0;
err_pool_add:
- gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool);
+ gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
return rc;
}
@@ -468,8 +467,8 @@ static void hl_mmu_v1_fini(struct hl_device *hdev)
{
/* MMU H/W fini was already done in device hw_fini() */
- kvfree(hdev->mmu_priv.mmu_shadow_hop0);
- gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool);
+ kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
+ gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
}
/**
@@ -482,9 +481,7 @@ static void hl_mmu_v1_fini(struct hl_device *hdev)
*/
static int hl_mmu_v1_ctx_init(struct hl_ctx *ctx)
{
- mutex_init(&ctx->mmu_lock);
hash_init(ctx->mmu_shadow_hash);
-
return dram_default_mapping_init(ctx);
}
@@ -517,8 +514,6 @@ static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx)
pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
_free_hop(ctx, pgt_info);
}
-
- mutex_destroy(&ctx->mmu_lock);
}
static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
@@ -842,15 +837,114 @@ static void hl_mmu_v1_swap_in(struct hl_ctx *ctx)
}
+static inline u64 get_hop_pte_addr(struct hl_ctx *ctx,
+ struct hl_mmu_properties *mmu_prop,
+ int hop_num, u64 hop_addr, u64 virt_addr)
+{
+ switch (hop_num) {
+ case 0:
+ return get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
+ case 1:
+ return get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
+ case 2:
+ return get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
+ case 3:
+ return get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
+ case 4:
+ return get_hop4_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
+ default:
+ break;
+ }
+ return U64_MAX;
+}
+
+static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
+ struct hl_mmu_hop_info *hops)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct hl_mmu_properties *mmu_prop;
+ bool is_dram_addr, is_pmmu_addr, is_pmmu_h_addr, is_huge;
+ int i, used_hops;
+
+ is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+ prop->dmmu.start_addr,
+ prop->dmmu.end_addr);
+ is_pmmu_addr = hl_mem_area_inside_range(virt_addr, prop->pmmu.page_size,
+ prop->pmmu.start_addr,
+ prop->pmmu.end_addr);
+ is_pmmu_h_addr = hl_mem_area_inside_range(virt_addr,
+ prop->pmmu_huge.page_size,
+ prop->pmmu_huge.start_addr,
+ prop->pmmu_huge.end_addr);
+ if (is_dram_addr) {
+ mmu_prop = &prop->dmmu;
+ is_huge = true;
+ } else if (is_pmmu_addr) {
+ mmu_prop = &prop->pmmu;
+ is_huge = false;
+ } else if (is_pmmu_h_addr) {
+ mmu_prop = &prop->pmmu_huge;
+ is_huge = true;
+ } else {
+ return -EINVAL;
+ }
+
+ used_hops = mmu_prop->num_hops;
+
+ /* huge pages use lesser hops */
+ if (is_huge)
+ used_hops--;
+
+ hops->hop_info[0].hop_addr = get_phys_hop0_addr(ctx);
+ hops->hop_info[0].hop_pte_addr =
+ get_hop_pte_addr(ctx, mmu_prop, 0,
+ hops->hop_info[0].hop_addr, virt_addr);
+ hops->hop_info[0].hop_pte_val =
+ hdev->asic_funcs->read_pte(hdev,
+ hops->hop_info[0].hop_pte_addr);
+
+ for (i = 1 ; i < used_hops ; i++) {
+ hops->hop_info[i].hop_addr =
+ get_next_hop_addr(ctx,
+ hops->hop_info[i - 1].hop_pte_val);
+ if (hops->hop_info[i].hop_addr == ULLONG_MAX)
+ return -EFAULT;
+
+ hops->hop_info[i].hop_pte_addr =
+ get_hop_pte_addr(ctx, mmu_prop, i,
+ hops->hop_info[i].hop_addr,
+ virt_addr);
+ hops->hop_info[i].hop_pte_val =
+ hdev->asic_funcs->read_pte(hdev,
+ hops->hop_info[i].hop_pte_addr);
+
+ if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK))
+ return -EFAULT;
+
+ if (hops->hop_info[i].hop_pte_val & LAST_MASK)
+ break;
+ }
+
+ /* if passed over all hops then no last hop was found */
+ if (i == mmu_prop->num_hops)
+ return -EFAULT;
+
+ if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK))
+ return -EFAULT;
+
+ hops->used_hops = i + 1;
+
+ return 0;
+}
+
/*
* hl_mmu_v1_prepare - prepare mmu for working with mmu v1
*
* @hdev: pointer to the device structure
*/
-void hl_mmu_v1_set_funcs(struct hl_device *hdev)
+void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu)
{
- struct hl_mmu_funcs *mmu = &hdev->mmu_func;
-
mmu->init = hl_mmu_v1_init;
mmu->fini = hl_mmu_v1_fini;
mmu->ctx_init = hl_mmu_v1_ctx_init;
@@ -860,4 +954,5 @@ void hl_mmu_v1_set_funcs(struct hl_device *hdev)
mmu->flush = flush;
mmu->swap_out = hl_mmu_v1_swap_out;
mmu->swap_in = hl_mmu_v1_swap_in;
+ mmu->get_tlb_info = hl_mmu_v1_get_tlb_info;
}
diff --git a/drivers/misc/habanalabs/common/pci.c b/drivers/misc/habanalabs/common/pci.c
index 4327e5704ebb..923b2606e29f 100644
--- a/drivers/misc/habanalabs/common/pci.c
+++ b/drivers/misc/habanalabs/common/pci.c
@@ -338,17 +338,12 @@ static int hl_pci_set_dma_mask(struct hl_device *hdev)
/**
* hl_pci_init() - PCI initialization code.
* @hdev: Pointer to hl_device structure.
- * @cpu_boot_status_reg: status register of the device's CPU
- * @boot_err0_reg: boot error register of the device's CPU
- * @preboot_ver_timeout: how much to wait before bailing out on reading
- * the preboot version
*
* Set DMA masks, initialize the PCI controller and map the PCI BARs.
*
* Return: 0 on success, non-zero for failure.
*/
-int hl_pci_init(struct hl_device *hdev, u32 cpu_boot_status_reg,
- u32 boot_err0_reg, u32 preboot_ver_timeout)
+int hl_pci_init(struct hl_device *hdev)
{
struct pci_dev *pdev = hdev->pdev;
int rc;
@@ -380,15 +375,6 @@ int hl_pci_init(struct hl_device *hdev, u32 cpu_boot_status_reg,
if (rc)
goto unmap_pci_bars;
- /* Before continuing in the initialization, we need to read the preboot
- * version to determine whether we run with a security-enabled firmware
- * The check will be done in each ASIC's specific code
- */
- rc = hl_fw_read_preboot_ver(hdev, cpu_boot_status_reg, boot_err0_reg,
- preboot_ver_timeout);
- if (rc)
- goto unmap_pci_bars;
-
return 0;
unmap_pci_bars:
diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c
index 3ceae87016b1..4366d8f93842 100644
--- a/drivers/misc/habanalabs/common/sysfs.c
+++ b/drivers/misc/habanalabs/common/sysfs.c
@@ -12,7 +12,7 @@
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
{
struct cpucp_packet pkt;
- long result;
+ u64 result;
int rc;
memset(&pkt, 0, sizeof(pkt));
@@ -32,10 +32,10 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
dev_err(hdev->dev,
"Failed to get frequency of PLL %d, error %d\n",
pll_index, rc);
- result = rc;
+ return rc;
}
- return result;
+ return (long) result;
}
void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
@@ -62,7 +62,7 @@ void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
u64 hl_get_max_power(struct hl_device *hdev)
{
struct cpucp_packet pkt;
- long result;
+ u64 result;
int rc;
memset(&pkt, 0, sizeof(pkt));
@@ -75,7 +75,7 @@ u64 hl_get_max_power(struct hl_device *hdev)
if (rc) {
dev_err(hdev->dev, "Failed to get max power, error %d\n", rc);
- result = rc;
+ return (u64) rc;
}
return result;
@@ -276,6 +276,8 @@ static ssize_t status_show(struct device *dev, struct device_attribute *attr,
str = "In reset";
else if (hdev->disabled)
str = "Malfunction";
+ else if (hdev->needs_reset)
+ str = "Needs Reset";
else
str = "Operational";
@@ -304,7 +306,7 @@ static ssize_t max_power_show(struct device *dev, struct device_attribute *attr,
struct hl_device *hdev = dev_get_drvdata(dev);
long val;
- if (hl_device_disabled_or_in_reset(hdev))
+ if (!hl_device_operational(hdev, NULL))
return -ENODEV;
val = hl_get_max_power(hdev);
@@ -319,7 +321,7 @@ static ssize_t max_power_store(struct device *dev,
unsigned long value;
int rc;
- if (hl_device_disabled_or_in_reset(hdev)) {
+ if (!hl_device_operational(hdev, NULL)) {
count = -ENODEV;
goto out;
}
@@ -347,7 +349,7 @@ static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj,
char *data;
int rc;
- if (hl_device_disabled_or_in_reset(hdev))
+ if (!hl_device_operational(hdev, NULL))
return -ENODEV;
if (!max_size)