diff options
Diffstat (limited to 'drivers/misc/habanalabs/common')
-rw-r--r-- | drivers/misc/habanalabs/common/command_buffer.c | 55 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/command_submission.c | 1016 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/context.c | 4 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/debugfs.c | 310 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/device.c | 198 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/firmware_if.c | 167 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/habanalabs.h | 458 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/habanalabs_drv.c | 34 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/habanalabs_ioctl.c | 77 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/hw_queue.c | 273 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/hwmon.c | 31 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/memory.c | 333 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/mmu.c | 273 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/mmu_v1.c | 139 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/pci.c | 16 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/sysfs.c | 18 |
16 files changed, 2228 insertions, 1174 deletions
diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c index ada570f35a41..6f6a904ab6ca 100644 --- a/drivers/misc/habanalabs/common/command_buffer.c +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -11,7 +11,6 @@ #include <linux/mm.h> #include <linux/slab.h> #include <linux/uaccess.h> -#include <linux/genalloc.h> static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb) { @@ -68,9 +67,9 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb) bus_addr = cb->bus_address; offset = 0; list_for_each_entry(va_block, &cb->va_block_list, node) { - rc = hl_mmu_map(ctx, va_block->start, bus_addr, va_block->size, - list_is_last(&va_block->node, - &cb->va_block_list)); + rc = hl_mmu_map_page(ctx, va_block->start, bus_addr, + va_block->size, list_is_last(&va_block->node, + &cb->va_block_list)); if (rc) { dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", va_block->start); @@ -93,7 +92,7 @@ err_va_umap: list_for_each_entry(va_block, &cb->va_block_list, node) { if (offset <= 0) break; - hl_mmu_unmap(ctx, va_block->start, va_block->size, + hl_mmu_unmap_page(ctx, va_block->start, va_block->size, offset <= va_block->size); offset -= va_block->size; } @@ -120,7 +119,7 @@ static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb) mutex_lock(&ctx->mmu_lock); list_for_each_entry(va_block, &cb->va_block_list, node) - if (hl_mmu_unmap(ctx, va_block->start, va_block->size, + if (hl_mmu_unmap_page(ctx, va_block->start, va_block->size, list_is_last(&va_block->node, &cb->va_block_list))) dev_warn_ratelimited(hdev->dev, @@ -376,17 +375,49 @@ int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle) return rc; } +static int hl_cb_info(struct hl_device *hdev, struct hl_cb_mgr *mgr, + u64 cb_handle, u32 *usage_cnt) +{ + struct hl_cb *cb; + u32 handle; + int rc = 0; + + /* The CB handle was given to user to do mmap, so need to shift it back + * to the value which was allocated by the IDR module. + */ + cb_handle >>= PAGE_SHIFT; + handle = (u32) cb_handle; + + spin_lock(&mgr->cb_lock); + + cb = idr_find(&mgr->cb_handles, handle); + if (!cb) { + dev_err(hdev->dev, + "CB info failed, no match to handle 0x%x\n", handle); + rc = -EINVAL; + goto out; + } + + *usage_cnt = atomic_read(&cb->cs_cnt); + +out: + spin_unlock(&mgr->cb_lock); + return rc; +} + int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) { union hl_cb_args *args = data; struct hl_device *hdev = hpriv->hdev; + enum hl_device_status status; u64 handle = 0; + u32 usage_cnt = 0; int rc; - if (hl_device_disabled_or_in_reset(hdev)) { + if (!hl_device_operational(hdev, &status)) { dev_warn_ratelimited(hdev->dev, "Device is %s. Can't execute CB IOCTL\n", - atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); + hdev->status[status]); return -EBUSY; } @@ -413,6 +444,13 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) args->in.cb_handle); break; + case HL_CB_OP_INFO: + rc = hl_cb_info(hdev, &hpriv->cb_mgr, args->in.cb_handle, + &usage_cnt); + memset(args, 0, sizeof(*args)); + args->out.usage_cnt = usage_cnt; + break; + default: rc = -ENOTTY; break; @@ -517,6 +555,7 @@ int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) } cb->mmap_size = cb->size; + vma->vm_pgoff = handle; return 0; diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index b2b974ecc431..beb482310a58 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -11,11 +11,25 @@ #include <linux/uaccess.h> #include <linux/slab.h> -#define HL_CS_FLAGS_SIG_WAIT (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT) +#define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \ + HL_CS_FLAGS_COLLECTIVE_WAIT) + +/** + * enum hl_cs_wait_status - cs wait status + * @CS_WAIT_STATUS_BUSY: cs was not completed yet + * @CS_WAIT_STATUS_COMPLETED: cs completed + * @CS_WAIT_STATUS_GONE: cs completed but fence is already gone + */ +enum hl_cs_wait_status { + CS_WAIT_STATUS_BUSY, + CS_WAIT_STATUS_COMPLETED, + CS_WAIT_STATUS_GONE +}; static void job_wq_completion(struct work_struct *work); -static long _hl_cs_wait_ioctl(struct hl_device *hdev, - struct hl_ctx *ctx, u64 timeout_us, u64 seq); +static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, + u64 timeout_us, u64 seq, + enum hl_cs_wait_status *status, s64 *timestamp); static void cs_do_release(struct kref *ref); static void hl_sob_reset(struct kref *ref) @@ -38,6 +52,38 @@ void hl_sob_reset_error(struct kref *ref) hw_sob->q_idx, hw_sob->sob_id); } +/** + * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet + * @sob_base: sob base id + * @sob_mask: sob user mask, each bit represents a sob offset from sob base + * @mask: generated mask + * + * Return: 0 if given parameters are valid + */ +int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask) +{ + int i; + + if (sob_mask == 0) + return -EINVAL; + + if (sob_mask == 0x1) { + *mask = ~(1 << (sob_base & 0x7)); + } else { + /* find msb in order to verify sob range is valid */ + for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--) + if (BIT(i) & sob_mask) + break; + + if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1)) + return -EINVAL; + + *mask = ~sob_mask; + } + + return 0; +} + static void hl_fence_release(struct kref *kref) { struct hl_fence *fence = @@ -53,7 +99,8 @@ static void hl_fence_release(struct kref *kref) goto free; if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) || - (hl_cs_cmpl->type == CS_TYPE_WAIT)) { + (hl_cs_cmpl->type == CS_TYPE_WAIT) || + (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)) { dev_dbg(hdev->dev, "CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n", @@ -80,6 +127,10 @@ static void hl_fence_release(struct kref *kref) * hence the above scenario is avoided. */ kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset); + + if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) + hdev->asic_funcs->reset_sob_group(hdev, + hl_cs_cmpl->sob_group); } free: @@ -102,10 +153,11 @@ static void hl_fence_init(struct hl_fence *fence) { kref_init(&fence->refcount); fence->error = 0; + fence->timestamp = ktime_set(0, 0); init_completion(&fence->completion); } -static void cs_get(struct hl_cs *cs) +void cs_get(struct hl_cs *cs) { kref_get(&cs->refcount); } @@ -120,6 +172,18 @@ static void cs_put(struct hl_cs *cs) kref_put(&cs->refcount, cs_do_release); } +static void cs_job_do_release(struct kref *ref) +{ + struct hl_cs_job *job = container_of(ref, struct hl_cs_job, refcount); + + kfree(job); +} + +static void cs_job_put(struct hl_cs_job *job) +{ + kref_put(&job->refcount, cs_job_do_release); +} + static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job) { /* @@ -169,10 +233,7 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job) job->patched_cb = parser.patched_cb; job->job_cb_size = parser.patched_cb_size; job->contains_dma_pkt = parser.contains_dma_pkt; - - spin_lock(&job->patched_cb->lock); - job->patched_cb->cs_cnt++; - spin_unlock(&job->patched_cb->lock); + atomic_inc(&job->patched_cb->cs_cnt); } /* @@ -180,9 +241,7 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job) * original CB anymore because it was already parsed and * won't be accessed again for this CS */ - spin_lock(&job->user_cb->lock); - job->user_cb->cs_cnt--; - spin_unlock(&job->user_cb->lock); + atomic_dec(&job->user_cb->cs_cnt); hl_cb_put(job->user_cb); job->user_cb = NULL; } else if (!rc) { @@ -192,7 +251,7 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job) return rc; } -static void free_job(struct hl_device *hdev, struct hl_cs_job *job) +static void complete_job(struct hl_device *hdev, struct hl_cs_job *job) { struct hl_cs *cs = job->cs; @@ -204,10 +263,7 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job) * created, so we need to check it's not NULL */ if (job->patched_cb) { - spin_lock(&job->patched_cb->lock); - job->patched_cb->cs_cnt--; - spin_unlock(&job->patched_cb->lock); - + atomic_dec(&job->patched_cb->cs_cnt); hl_cb_put(job->patched_cb); } } @@ -215,13 +271,12 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job) /* For H/W queue jobs, if a user CB was allocated by driver and MMU is * enabled, the user CB isn't released in cs_parser() and thus should be * released here. + * This is also true for INT queues jobs which were allocated by driver */ - if (job->queue_type == QUEUE_TYPE_HW && - job->is_kernel_allocated_cb && hdev->mmu_enable) { - spin_lock(&job->user_cb->lock); - job->user_cb->cs_cnt--; - spin_unlock(&job->user_cb->lock); - + if (job->is_kernel_allocated_cb && + ((job->queue_type == QUEUE_TYPE_HW && hdev->mmu_enable) || + job->queue_type == QUEUE_TYPE_INT)) { + atomic_dec(&job->user_cb->cs_cnt); hl_cb_put(job->user_cb); } @@ -239,27 +294,12 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job) job->queue_type == QUEUE_TYPE_HW) cs_put(cs); - kfree(job); -} - -static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx) -{ - hdev->aggregated_cs_counters.device_in_reset_drop_cnt += - ctx->cs_counters.device_in_reset_drop_cnt; - hdev->aggregated_cs_counters.out_of_mem_drop_cnt += - ctx->cs_counters.out_of_mem_drop_cnt; - hdev->aggregated_cs_counters.parsing_drop_cnt += - ctx->cs_counters.parsing_drop_cnt; - hdev->aggregated_cs_counters.queue_full_drop_cnt += - ctx->cs_counters.queue_full_drop_cnt; - hdev->aggregated_cs_counters.max_cs_in_flight_drop_cnt += - ctx->cs_counters.max_cs_in_flight_drop_cnt; + cs_job_put(job); } static void cs_do_release(struct kref *ref) { - struct hl_cs *cs = container_of(ref, struct hl_cs, - refcount); + struct hl_cs *cs = container_of(ref, struct hl_cs, refcount); struct hl_device *hdev = cs->ctx->hdev; struct hl_cs_job *job, *tmp; @@ -268,77 +308,78 @@ static void cs_do_release(struct kref *ref) /* * Although if we reached here it means that all external jobs have * finished, because each one of them took refcnt to CS, we still - * need to go over the internal jobs and free them. Otherwise, we + * need to go over the internal jobs and complete them. Otherwise, we * will have leaked memory and what's worse, the CS object (and * potentially the CTX object) could be released, while the JOB * still holds a pointer to them (but no reference). */ list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) - free_job(hdev, job); + complete_job(hdev, job); - /* We also need to update CI for internal queues */ - if (cs->submitted) { - hdev->asic_funcs->hw_queues_lock(hdev); + if (!cs->submitted) { + /* In case the wait for signal CS was submitted, the put occurs + * in init_signal_wait_cs() or collective_wait_init_cs() + * right before hanging on the PQ. + */ + if (cs->type == CS_TYPE_WAIT || + cs->type == CS_TYPE_COLLECTIVE_WAIT) + hl_fence_put(cs->signal_fence); - hdev->cs_active_cnt--; - if (!hdev->cs_active_cnt) { - struct hl_device_idle_busy_ts *ts; + goto out; + } - ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++]; - ts->busy_to_idle_ts = ktime_get(); + hdev->asic_funcs->hw_queues_lock(hdev); - if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE) - hdev->idle_busy_ts_idx = 0; - } else if (hdev->cs_active_cnt < 0) { - dev_crit(hdev->dev, "CS active cnt %d is negative\n", - hdev->cs_active_cnt); - } + hdev->cs_active_cnt--; + if (!hdev->cs_active_cnt) { + struct hl_device_idle_busy_ts *ts; - hdev->asic_funcs->hw_queues_unlock(hdev); + ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++]; + ts->busy_to_idle_ts = ktime_get(); - hl_int_hw_queue_update_ci(cs); + if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE) + hdev->idle_busy_ts_idx = 0; + } else if (hdev->cs_active_cnt < 0) { + dev_crit(hdev->dev, "CS active cnt %d is negative\n", + hdev->cs_active_cnt); + } - spin_lock(&hdev->hw_queues_mirror_lock); - /* remove CS from hw_queues mirror list */ - list_del_init(&cs->mirror_node); - spin_unlock(&hdev->hw_queues_mirror_lock); + hdev->asic_funcs->hw_queues_unlock(hdev); - /* - * Don't cancel TDR in case this CS was timedout because we - * might be running from the TDR context - */ - if ((!cs->timedout) && - (hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT)) { - struct hl_cs *next; + /* Need to update CI for internal queues */ + hl_int_hw_queue_update_ci(cs); - if (cs->tdr_active) - cancel_delayed_work_sync(&cs->work_tdr); + /* remove CS from CS mirror list */ + spin_lock(&hdev->cs_mirror_lock); + list_del_init(&cs->mirror_node); + spin_unlock(&hdev->cs_mirror_lock); - spin_lock(&hdev->hw_queues_mirror_lock); + /* Don't cancel TDR in case this CS was timedout because we might be + * running from the TDR context + */ + if (!cs->timedout && hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) { + struct hl_cs *next; - /* queue TDR for next CS */ - next = list_first_entry_or_null( - &hdev->hw_queues_mirror_list, - struct hl_cs, mirror_node); + if (cs->tdr_active) + cancel_delayed_work_sync(&cs->work_tdr); - if ((next) && (!next->tdr_active)) { - next->tdr_active = true; - schedule_delayed_work(&next->work_tdr, - hdev->timeout_jiffies); - } + spin_lock(&hdev->cs_mirror_lock); - spin_unlock(&hdev->hw_queues_mirror_lock); + /* queue TDR for next CS */ + next = list_first_entry_or_null(&hdev->cs_mirror_list, + struct hl_cs, mirror_node); + + if (next && !next->tdr_active) { + next->tdr_active = true; + schedule_delayed_work(&next->work_tdr, + hdev->timeout_jiffies); } - } else if (cs->type == CS_TYPE_WAIT) { - /* - * In case the wait for signal CS was submitted, the put occurs - * in init_signal_wait_cs() right before hanging on the PQ. - */ - hl_fence_put(cs->signal_fence); + + spin_unlock(&hdev->cs_mirror_lock); } - /* - * Must be called before hl_ctx_put because inside we use ctx to get +out: + /* Must be called before hl_ctx_put because inside we use ctx to get * the device */ hl_debugfs_remove_cs(cs); @@ -356,9 +397,10 @@ static void cs_do_release(struct kref *ref) else if (!cs->submitted) cs->fence->error = -EBUSY; + if (cs->timestamp) + cs->fence->timestamp = ktime_get(); complete_all(&cs->fence->completion); hl_fence_put(cs->fence); - cs_counters_aggregate(hdev, cs->ctx); kfree(cs->jobs_in_queue_cnt); kfree(cs); @@ -384,24 +426,51 @@ static void cs_timedout(struct work_struct *work) hdev = cs->ctx->hdev; - dev_err(hdev->dev, - "Command submission %llu has not finished in time!\n", - cs->sequence); + switch (cs->type) { + case CS_TYPE_SIGNAL: + dev_err(hdev->dev, + "Signal command submission %llu has not finished in time!\n", + cs->sequence); + break; + + case CS_TYPE_WAIT: + dev_err(hdev->dev, + "Wait command submission %llu has not finished in time!\n", + cs->sequence); + break; + + case CS_TYPE_COLLECTIVE_WAIT: + dev_err(hdev->dev, + "Collective Wait command submission %llu has not finished in time!\n", + cs->sequence); + break; + + default: + dev_err(hdev->dev, + "Command submission %llu has not finished in time!\n", + cs->sequence); + break; + } cs_put(cs); if (hdev->reset_on_lockup) hl_device_reset(hdev, false, false); + else + hdev->needs_reset = true; } static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, enum hl_cs_type cs_type, struct hl_cs **cs_new) { - struct hl_cs_compl *cs_cmpl; + struct hl_cs_counters_atomic *cntr; struct hl_fence *other = NULL; + struct hl_cs_compl *cs_cmpl; struct hl_cs *cs; int rc; + cntr = &hdev->aggregated_cs_counters; + cs = kzalloc(sizeof(*cs), GFP_ATOMIC); if (!cs) return -ENOMEM; @@ -435,7 +504,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, if (other && !completion_done(&other->completion)) { dev_dbg_ratelimited(hdev->dev, "Rejecting CS because of too many in-flights CS\n"); - ctx->cs_counters.max_cs_in_flight_drop_cnt++; + atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt); + atomic64_inc(&cntr->max_cs_in_flight_drop_cnt); rc = -EAGAIN; goto free_fence; } @@ -480,7 +550,7 @@ static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs) struct hl_cs_job *job, *tmp; list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) - free_job(hdev, job); + complete_job(hdev, job); } void hl_cs_rollback_all(struct hl_device *hdev) @@ -493,8 +563,7 @@ void hl_cs_rollback_all(struct hl_device *hdev) flush_workqueue(hdev->cq_wq[i]); /* Make sure we don't have leftovers in the H/W queues mirror list */ - list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list, - mirror_node) { + list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) { cs_get(cs); cs->aborted = true; dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n", @@ -512,7 +581,7 @@ static void job_wq_completion(struct work_struct *work) struct hl_device *hdev = cs->ctx->hdev; /* job is no longer needed */ - free_job(hdev, job); + complete_job(hdev, job); } static int validate_queue_index(struct hl_device *hdev, @@ -547,9 +616,36 @@ static int validate_queue_index(struct hl_device *hdev, return -EINVAL; } - *queue_type = hw_queue_prop->type; - *is_kernel_allocated_cb = !!hw_queue_prop->requires_kernel_cb; + /* When hw queue type isn't QUEUE_TYPE_HW, + * USER_ALLOC_CB flag shall be referred as "don't care". + */ + if (hw_queue_prop->type == QUEUE_TYPE_HW) { + if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) { + if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) { + dev_err(hdev->dev, + "Queue index %d doesn't support user CB\n", + chunk->queue_index); + return -EINVAL; + } + + *is_kernel_allocated_cb = false; + } else { + if (!(hw_queue_prop->cb_alloc_flags & + CB_ALLOC_KERNEL)) { + dev_err(hdev->dev, + "Queue index %d doesn't support kernel CB\n", + chunk->queue_index); + return -EINVAL; + } + + *is_kernel_allocated_cb = true; + } + } else { + *is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags + & CB_ALLOC_KERNEL); + } + *queue_type = hw_queue_prop->type; return 0; } @@ -573,9 +669,7 @@ static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev, goto release_cb; } - spin_lock(&cb->lock); - cb->cs_cnt++; - spin_unlock(&cb->lock); + atomic_inc(&cb->cs_cnt); return cb; @@ -593,6 +687,7 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, if (!job) return NULL; + kref_init(&job->refcount); job->queue_type = queue_type; job->is_kernel_allocated_cb = is_kernel_allocated_cb; @@ -605,42 +700,115 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, return job; } -static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, - u32 num_chunks, u64 *cs_seq) +static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags) +{ + if (cs_type_flags & HL_CS_FLAGS_SIGNAL) + return CS_TYPE_SIGNAL; + else if (cs_type_flags & HL_CS_FLAGS_WAIT) + return CS_TYPE_WAIT; + else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT) + return CS_TYPE_COLLECTIVE_WAIT; + else + return CS_TYPE_DEFAULT; +} + +static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args) { struct hl_device *hdev = hpriv->hdev; - struct hl_cs_chunk *cs_chunk_array; - struct hl_cs_job *job; - struct hl_cs *cs; - struct hl_cb *cb; - bool int_queues_only = true; - u32 size_to_copy; - int rc, i; + struct hl_ctx *ctx = hpriv->ctx; + u32 cs_type_flags, num_chunks; + enum hl_device_status status; + enum hl_cs_type cs_type; - *cs_seq = ULLONG_MAX; + if (!hl_device_operational(hdev, &status)) { + dev_warn_ratelimited(hdev->dev, + "Device is %s. Can't submit new CS\n", + hdev->status[status]); + return -EBUSY; + } + + cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK; + + if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) { + dev_err(hdev->dev, + "CS type flags are mutually exclusive, context %d\n", + ctx->asid); + return -EINVAL; + } + + cs_type = hl_cs_get_cs_type(cs_type_flags); + num_chunks = args->in.num_chunks_execute; + + if (unlikely((cs_type != CS_TYPE_DEFAULT) && + !hdev->supports_sync_stream)) { + dev_err(hdev->dev, "Sync stream CS is not supported\n"); + return -EINVAL; + } + + if (cs_type == CS_TYPE_DEFAULT) { + if (!num_chunks) { + dev_err(hdev->dev, + "Got execute CS with 0 chunks, context %d\n", + ctx->asid); + return -EINVAL; + } + } else if (num_chunks != 1) { + dev_err(hdev->dev, + "Sync stream CS mandates one chunk only, context %d\n", + ctx->asid); + return -EINVAL; + } + + return 0; +} + +static int hl_cs_copy_chunk_array(struct hl_device *hdev, + struct hl_cs_chunk **cs_chunk_array, + void __user *chunks, u32 num_chunks) +{ + u32 size_to_copy; if (num_chunks > HL_MAX_JOBS_PER_CS) { dev_err(hdev->dev, "Number of chunks can NOT be larger than %d\n", HL_MAX_JOBS_PER_CS); - rc = -EINVAL; - goto out; + return -EINVAL; } - cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array), + *cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array), GFP_ATOMIC); - if (!cs_chunk_array) { - rc = -ENOMEM; - goto out; - } + if (!*cs_chunk_array) + return -ENOMEM; size_to_copy = num_chunks * sizeof(struct hl_cs_chunk); - if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) { + if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) { dev_err(hdev->dev, "Failed to copy cs chunk array from user\n"); - rc = -EFAULT; - goto free_cs_chunk_array; + kfree(*cs_chunk_array); + return -EFAULT; } + return 0; +} + +static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, + u32 num_chunks, u64 *cs_seq, bool timestamp) +{ + bool int_queues_only = true; + struct hl_device *hdev = hpriv->hdev; + struct hl_cs_chunk *cs_chunk_array; + struct hl_cs_counters_atomic *cntr; + struct hl_cs_job *job; + struct hl_cs *cs; + struct hl_cb *cb; + int rc, i; + + cntr = &hdev->aggregated_cs_counters; + *cs_seq = ULLONG_MAX; + + rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks); + if (rc) + goto out; + /* increment refcnt for context */ hl_ctx_get(hdev, hpriv->ctx); @@ -650,6 +818,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, goto free_cs_chunk_array; } + cs->timestamp = !!timestamp; *cs_seq = cs->sequence; hl_debugfs_add_cs(cs); @@ -663,14 +832,17 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, rc = validate_queue_index(hdev, chunk, &queue_type, &is_kernel_allocated_cb); if (rc) { - hpriv->ctx->cs_counters.parsing_drop_cnt++; + atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt); + atomic64_inc(&cntr->parsing_drop_cnt); goto free_cs_object; } if (is_kernel_allocated_cb) { cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk); if (!cb) { - hpriv->ctx->cs_counters.parsing_drop_cnt++; + atomic64_inc( + &hpriv->ctx->cs_counters.parsing_drop_cnt); + atomic64_inc(&cntr->parsing_drop_cnt); rc = -EINVAL; goto free_cs_object; } @@ -684,7 +856,9 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, job = hl_cs_allocate_job(hdev, queue_type, is_kernel_allocated_cb); if (!job) { - hpriv->ctx->cs_counters.out_of_mem_drop_cnt++; + atomic64_inc( + &hpriv->ctx->cs_counters.out_of_mem_drop_cnt); + atomic64_inc(&cntr->out_of_mem_drop_cnt); dev_err(hdev->dev, "Failed to allocate a new job\n"); rc = -ENOMEM; if (is_kernel_allocated_cb) @@ -717,7 +891,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, rc = cs_parser(hpriv, job); if (rc) { - hpriv->ctx->cs_counters.parsing_drop_cnt++; + atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt); + atomic64_inc(&cntr->parsing_drop_cnt); dev_err(hdev->dev, "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n", cs->ctx->asid, cs->sequence, job->id, rc); @@ -726,7 +901,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, } if (int_queues_only) { - hpriv->ctx->cs_counters.parsing_drop_cnt++; + atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt); + atomic64_inc(&cntr->parsing_drop_cnt); dev_err(hdev->dev, "Reject CS %d.%llu because only internal queues jobs are present\n", cs->ctx->asid, cs->sequence); @@ -747,9 +923,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, goto put_cs; release_cb: - spin_lock(&cb->lock); - cb->cs_cnt--; - spin_unlock(&cb->lock); + atomic_dec(&cb->cs_cnt); hl_cb_put(cb); free_cs_object: cs_rollback(hdev, cs); @@ -764,47 +938,234 @@ out: return rc; } -static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, - void __user *chunks, u32 num_chunks, +static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args, u64 *cs_seq) { struct hl_device *hdev = hpriv->hdev; struct hl_ctx *ctx = hpriv->ctx; - struct hl_cs_chunk *cs_chunk_array, *chunk; - struct hw_queue_properties *hw_queue_prop; - struct hl_fence *sig_fence = NULL; - struct hl_cs_job *job; - struct hl_cs *cs; - struct hl_cb *cb; - enum hl_queue_type q_type; - u64 *signal_seq_arr = NULL, signal_seq; - u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size; - int rc; + bool need_soft_reset = false; + int rc = 0, do_ctx_switch; + void __user *chunks; + u32 num_chunks, tmp; + int ret; - *cs_seq = ULLONG_MAX; + do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); - if (num_chunks > HL_MAX_JOBS_PER_CS) { + if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) { + mutex_lock(&hpriv->restore_phase_mutex); + + if (do_ctx_switch) { + rc = hdev->asic_funcs->context_switch(hdev, ctx->asid); + if (rc) { + dev_err_ratelimited(hdev->dev, + "Failed to switch to context %d, rejecting CS! %d\n", + ctx->asid, rc); + /* + * If we timedout, or if the device is not IDLE + * while we want to do context-switch (-EBUSY), + * we need to soft-reset because QMAN is + * probably stuck. However, we can't call to + * reset here directly because of deadlock, so + * need to do it at the very end of this + * function + */ + if ((rc == -ETIMEDOUT) || (rc == -EBUSY)) + need_soft_reset = true; + mutex_unlock(&hpriv->restore_phase_mutex); + goto out; + } + } + + hdev->asic_funcs->restore_phase_topology(hdev); + + chunks = (void __user *) (uintptr_t) args->in.chunks_restore; + num_chunks = args->in.num_chunks_restore; + + if (!num_chunks) { + dev_dbg(hdev->dev, + "Need to run restore phase but restore CS is empty\n"); + rc = 0; + } else { + rc = cs_ioctl_default(hpriv, chunks, num_chunks, + cs_seq, false); + } + + mutex_unlock(&hpriv->restore_phase_mutex); + + if (rc) { + dev_err(hdev->dev, + "Failed to submit restore CS for context %d (%d)\n", + ctx->asid, rc); + goto out; + } + + /* Need to wait for restore completion before execution phase */ + if (num_chunks) { + enum hl_cs_wait_status status; +wait_again: + ret = _hl_cs_wait_ioctl(hdev, ctx, + jiffies_to_usecs(hdev->timeout_jiffies), + *cs_seq, &status, NULL); + if (ret) { + if (ret == -ERESTARTSYS) { + usleep_range(100, 200); + goto wait_again; + } + + dev_err(hdev->dev, + "Restore CS for context %d failed to complete %d\n", + ctx->asid, ret); + rc = -ENOEXEC; + goto out; + } + } + + ctx->thread_ctx_switch_wait_token = 1; + + } else if (!ctx->thread_ctx_switch_wait_token) { + rc = hl_poll_timeout_memory(hdev, + &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1), + 100, jiffies_to_usecs(hdev->timeout_jiffies), false); + + if (rc == -ETIMEDOUT) { + dev_err(hdev->dev, + "context switch phase timeout (%d)\n", tmp); + goto out; + } + } + +out: + if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset)) + hl_device_reset(hdev, false, false); + + return rc; +} + +static int cs_ioctl_extract_signal_seq(struct hl_device *hdev, + struct hl_cs_chunk *chunk, u64 *signal_seq) +{ + u64 *signal_seq_arr = NULL; + u32 size_to_copy, signal_seq_arr_len; + int rc = 0; + + signal_seq_arr_len = chunk->num_signal_seq_arr; + + /* currently only one signal seq is supported */ + if (signal_seq_arr_len != 1) { dev_err(hdev->dev, - "Number of chunks can NOT be larger than %d\n", - HL_MAX_JOBS_PER_CS); - rc = -EINVAL; - goto out; + "Wait for signal CS supports only one signal CS seq\n"); + return -EINVAL; } - cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array), + signal_seq_arr = kmalloc_array(signal_seq_arr_len, + sizeof(*signal_seq_arr), GFP_ATOMIC); - if (!cs_chunk_array) { - rc = -ENOMEM; + if (!signal_seq_arr) + return -ENOMEM; + + size_to_copy = chunk->num_signal_seq_arr * sizeof(*signal_seq_arr); + if (copy_from_user(signal_seq_arr, + u64_to_user_ptr(chunk->signal_seq_arr), + size_to_copy)) { + dev_err(hdev->dev, + "Failed to copy signal seq array from user\n"); + rc = -EFAULT; goto out; } - size_to_copy = num_chunks * sizeof(struct hl_cs_chunk); - if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) { - dev_err(hdev->dev, "Failed to copy cs chunk array from user\n"); - rc = -EFAULT; - goto free_cs_chunk_array; + /* currently it is guaranteed to have only one signal seq */ + *signal_seq = signal_seq_arr[0]; + +out: + kfree(signal_seq_arr); + + return rc; +} + +static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev, + struct hl_ctx *ctx, struct hl_cs *cs, enum hl_queue_type q_type, + u32 q_idx) +{ + struct hl_cs_counters_atomic *cntr; + struct hl_cs_job *job; + struct hl_cb *cb; + u32 cb_size; + + cntr = &hdev->aggregated_cs_counters; + + job = hl_cs_allocate_job(hdev, q_type, true); + if (!job) { + atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); + atomic64_inc(&cntr->out_of_mem_drop_cnt); + dev_err(hdev->dev, "Failed to allocate a new job\n"); + return -ENOMEM; } + if (cs->type == CS_TYPE_WAIT) + cb_size = hdev->asic_funcs->get_wait_cb_size(hdev); + else + cb_size = hdev->asic_funcs->get_signal_cb_size(hdev); + + cb = hl_cb_kernel_create(hdev, cb_size, + q_type == QUEUE_TYPE_HW && hdev->mmu_enable); + if (!cb) { + atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); + atomic64_inc(&cntr->out_of_mem_drop_cnt); + kfree(job); + return -EFAULT; + } + + job->id = 0; + job->cs = cs; + job->user_cb = cb; + atomic_inc(&job->user_cb->cs_cnt); + job->user_cb_size = cb_size; + job->hw_queue_id = q_idx; + + /* + * No need in parsing, user CB is the patched CB. + * We call hl_cb_destroy() out of two reasons - we don't need the CB in + * the CB idr anymore and to decrement its refcount as it was + * incremented inside hl_cb_kernel_create(). + */ + job->patched_cb = job->user_cb; + job->job_cb_size = job->user_cb_size; + hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); + + /* increment refcount as for external queues we get completion */ + cs_get(cs); + + cs->jobs_in_queue_cnt[job->hw_queue_id]++; + + list_add_tail(&job->cs_node, &cs->job_list); + + hl_debugfs_add_job(hdev, job); + + return 0; +} + +static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, + void __user *chunks, u32 num_chunks, + u64 *cs_seq, bool timestamp) +{ + struct hl_cs_chunk *cs_chunk_array, *chunk; + struct hw_queue_properties *hw_queue_prop; + struct hl_device *hdev = hpriv->hdev; + struct hl_cs_compl *sig_waitcs_cmpl; + u32 q_idx, collective_engine_id = 0; + struct hl_fence *sig_fence = NULL; + struct hl_ctx *ctx = hpriv->ctx; + enum hl_queue_type q_type; + struct hl_cs *cs; + u64 signal_seq; + int rc; + + *cs_seq = ULLONG_MAX; + + rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks); + if (rc) + goto out; + /* currently it is guaranteed to have only one chunk */ chunk = &cs_chunk_array[0]; @@ -819,60 +1180,43 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; q_type = hw_queue_prop->type; - if ((q_idx >= hdev->asic_prop.max_queues) || - (!hw_queue_prop->supports_sync_stream)) { - dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx); + if (!hw_queue_prop->supports_sync_stream) { + dev_err(hdev->dev, + "Queue index %d does not support sync stream operations\n", + q_idx); rc = -EINVAL; goto free_cs_chunk_array; } - if (cs_type == CS_TYPE_WAIT) { - struct hl_cs_compl *sig_waitcs_cmpl; - - signal_seq_arr_len = chunk->num_signal_seq_arr; - - /* currently only one signal seq is supported */ - if (signal_seq_arr_len != 1) { + if (cs_type == CS_TYPE_COLLECTIVE_WAIT) { + if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) { dev_err(hdev->dev, - "Wait for signal CS supports only one signal CS seq\n"); + "Queue index %d is invalid\n", q_idx); rc = -EINVAL; goto free_cs_chunk_array; } - signal_seq_arr = kmalloc_array(signal_seq_arr_len, - sizeof(*signal_seq_arr), - GFP_ATOMIC); - if (!signal_seq_arr) { - rc = -ENOMEM; - goto free_cs_chunk_array; - } + collective_engine_id = chunk->collective_engine_id; + } - size_to_copy = chunk->num_signal_seq_arr * - sizeof(*signal_seq_arr); - if (copy_from_user(signal_seq_arr, - u64_to_user_ptr(chunk->signal_seq_arr), - size_to_copy)) { - dev_err(hdev->dev, - "Failed to copy signal seq array from user\n"); - rc = -EFAULT; - goto free_signal_seq_array; - } + if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) { + rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq); + if (rc) + goto free_cs_chunk_array; - /* currently it is guaranteed to have only one signal seq */ - signal_seq = signal_seq_arr[0]; sig_fence = hl_ctx_get_fence(ctx, signal_seq); if (IS_ERR(sig_fence)) { dev_err(hdev->dev, "Failed to get signal CS with seq 0x%llx\n", signal_seq); rc = PTR_ERR(sig_fence); - goto free_signal_seq_array; + goto free_cs_chunk_array; } if (!sig_fence) { /* signal CS already finished */ rc = 0; - goto free_signal_seq_array; + goto free_cs_chunk_array; } sig_waitcs_cmpl = @@ -884,14 +1228,14 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, signal_seq); hl_fence_put(sig_fence); rc = -EINVAL; - goto free_signal_seq_array; + goto free_cs_chunk_array; } if (completion_done(&sig_fence->completion)) { /* signal CS already finished */ hl_fence_put(sig_fence); rc = 0; - goto free_signal_seq_array; + goto free_cs_chunk_array; } } @@ -900,70 +1244,37 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, rc = allocate_cs(hdev, ctx, cs_type, &cs); if (rc) { - if (cs_type == CS_TYPE_WAIT) + if (cs_type == CS_TYPE_WAIT || + cs_type == CS_TYPE_COLLECTIVE_WAIT) hl_fence_put(sig_fence); hl_ctx_put(ctx); - goto free_signal_seq_array; + goto free_cs_chunk_array; } + cs->timestamp = !!timestamp; + /* * Save the signal CS fence for later initialization right before * hanging the wait CS on the queue. */ - if (cs->type == CS_TYPE_WAIT) + if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) cs->signal_fence = sig_fence; hl_debugfs_add_cs(cs); *cs_seq = cs->sequence; - job = hl_cs_allocate_job(hdev, q_type, true); - if (!job) { - ctx->cs_counters.out_of_mem_drop_cnt++; - dev_err(hdev->dev, "Failed to allocate a new job\n"); - rc = -ENOMEM; - goto put_cs; - } - - if (cs->type == CS_TYPE_WAIT) - cb_size = hdev->asic_funcs->get_wait_cb_size(hdev); + if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL) + rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type, + q_idx); + else if (cs_type == CS_TYPE_COLLECTIVE_WAIT) + rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx, + cs, q_idx, collective_engine_id); else - cb_size = hdev->asic_funcs->get_signal_cb_size(hdev); - - cb = hl_cb_kernel_create(hdev, cb_size, - q_type == QUEUE_TYPE_HW && hdev->mmu_enable); - if (!cb) { - ctx->cs_counters.out_of_mem_drop_cnt++; - kfree(job); - rc = -EFAULT; - goto put_cs; - } - - job->id = 0; - job->cs = cs; - job->user_cb = cb; - job->user_cb->cs_cnt++; - job->user_cb_size = cb_size; - job->hw_queue_id = q_idx; - - /* - * No need in parsing, user CB is the patched CB. - * We call hl_cb_destroy() out of two reasons - we don't need the CB in - * the CB idr anymore and to decrement its refcount as it was - * incremented inside hl_cb_kernel_create(). - */ - job->patched_cb = job->user_cb; - job->job_cb_size = job->user_cb_size; - hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); - - cs->jobs_in_queue_cnt[job->hw_queue_id]++; - - list_add_tail(&job->cs_node, &cs->job_list); - - /* increment refcount as for external queues we get completion */ - cs_get(cs); + rc = -EINVAL; - hl_debugfs_add_job(hdev, job); + if (rc) + goto free_cs_object; rc = hl_hw_queue_schedule_cs(cs); if (rc) { @@ -984,9 +1295,6 @@ free_cs_object: put_cs: /* We finished with the CS in this function, so put the ref */ cs_put(cs); -free_signal_seq_array: - if (cs_type == CS_TYPE_WAIT) - kfree(signal_seq_arr); free_cs_chunk_array: kfree(cs_chunk_array); out: @@ -995,156 +1303,39 @@ out: int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) { - struct hl_device *hdev = hpriv->hdev; union hl_cs_args *args = data; - struct hl_ctx *ctx = hpriv->ctx; - void __user *chunks_execute, *chunks_restore; enum hl_cs_type cs_type; - u32 num_chunks_execute, num_chunks_restore, sig_wait_flags; u64 cs_seq = ULONG_MAX; - int rc, do_ctx_switch; - bool need_soft_reset = false; - - if (hl_device_disabled_or_in_reset(hdev)) { - dev_warn_ratelimited(hdev->dev, - "Device is %s. Can't submit new CS\n", - atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); - rc = -EBUSY; - goto out; - } - - sig_wait_flags = args->in.cs_flags & HL_CS_FLAGS_SIG_WAIT; + void __user *chunks; + u32 num_chunks; + int rc; - if (unlikely(sig_wait_flags == HL_CS_FLAGS_SIG_WAIT)) { - dev_err(hdev->dev, - "Signal and wait CS flags are mutually exclusive, context %d\n", - ctx->asid); - rc = -EINVAL; + rc = hl_cs_sanity_checks(hpriv, args); + if (rc) goto out; - } - if (unlikely((sig_wait_flags & HL_CS_FLAGS_SIG_WAIT) && - (!hdev->supports_sync_stream))) { - dev_err(hdev->dev, "Sync stream CS is not supported\n"); - rc = -EINVAL; + rc = hl_cs_ctx_switch(hpriv, args, &cs_seq); + if (rc) goto out; - } - if (args->in.cs_flags & HL_CS_FLAGS_SIGNAL) - cs_type = CS_TYPE_SIGNAL; - else if (args->in.cs_flags & HL_CS_FLAGS_WAIT) - cs_type = CS_TYPE_WAIT; - else - cs_type = CS_TYPE_DEFAULT; - - chunks_execute = (void __user *) (uintptr_t) args->in.chunks_execute; - num_chunks_execute = args->in.num_chunks_execute; - - if (cs_type == CS_TYPE_DEFAULT) { - if (!num_chunks_execute) { - dev_err(hdev->dev, - "Got execute CS with 0 chunks, context %d\n", - ctx->asid); - rc = -EINVAL; - goto out; - } - } else if (num_chunks_execute != 1) { - dev_err(hdev->dev, - "Sync stream CS mandates one chunk only, context %d\n", - ctx->asid); - rc = -EINVAL; - goto out; + cs_type = hl_cs_get_cs_type(args->in.cs_flags & + ~HL_CS_FLAGS_FORCE_RESTORE); + chunks = (void __user *) (uintptr_t) args->in.chunks_execute; + num_chunks = args->in.num_chunks_execute; + + switch (cs_type) { + case CS_TYPE_SIGNAL: + case CS_TYPE_WAIT: + case CS_TYPE_COLLECTIVE_WAIT: + rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks, + &cs_seq, args->in.cs_flags & HL_CS_FLAGS_TIMESTAMP); + break; + default: + rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq, + args->in.cs_flags & HL_CS_FLAGS_TIMESTAMP); + break; } - do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); - - if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) { - long ret; - - chunks_restore = - (void __user *) (uintptr_t) args->in.chunks_restore; - num_chunks_restore = args->in.num_chunks_restore; - - mutex_lock(&hpriv->restore_phase_mutex); - - if (do_ctx_switch) { - rc = hdev->asic_funcs->context_switch(hdev, ctx->asid); - if (rc) { - dev_err_ratelimited(hdev->dev, - "Failed to switch to context %d, rejecting CS! %d\n", - ctx->asid, rc); - /* - * If we timedout, or if the device is not IDLE - * while we want to do context-switch (-EBUSY), - * we need to soft-reset because QMAN is - * probably stuck. However, we can't call to - * reset here directly because of deadlock, so - * need to do it at the very end of this - * function - */ - if ((rc == -ETIMEDOUT) || (rc == -EBUSY)) - need_soft_reset = true; - mutex_unlock(&hpriv->restore_phase_mutex); - goto out; - } - } - - hdev->asic_funcs->restore_phase_topology(hdev); - - if (!num_chunks_restore) { - dev_dbg(hdev->dev, - "Need to run restore phase but restore CS is empty\n"); - rc = 0; - } else { - rc = cs_ioctl_default(hpriv, chunks_restore, - num_chunks_restore, &cs_seq); - } - - mutex_unlock(&hpriv->restore_phase_mutex); - - if (rc) { - dev_err(hdev->dev, - "Failed to submit restore CS for context %d (%d)\n", - ctx->asid, rc); - goto out; - } - - /* Need to wait for restore completion before execution phase */ - if (num_chunks_restore) { - ret = _hl_cs_wait_ioctl(hdev, ctx, - jiffies_to_usecs(hdev->timeout_jiffies), - cs_seq); - if (ret <= 0) { - dev_err(hdev->dev, - "Restore CS for context %d failed to complete %ld\n", - ctx->asid, ret); - rc = -ENOEXEC; - goto out; - } - } - - ctx->thread_ctx_switch_wait_token = 1; - } else if (!ctx->thread_ctx_switch_wait_token) { - u32 tmp; - - rc = hl_poll_timeout_memory(hdev, - &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1), - 100, jiffies_to_usecs(hdev->timeout_jiffies), false); - - if (rc == -ETIMEDOUT) { - dev_err(hdev->dev, - "context switch phase timeout (%d)\n", tmp); - goto out; - } - } - - if (cs_type == CS_TYPE_DEFAULT) - rc = cs_ioctl_default(hpriv, chunks_execute, num_chunks_execute, - &cs_seq); - else - rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks_execute, - num_chunks_execute, &cs_seq); - out: if (rc != -EAGAIN) { memset(args, 0, sizeof(*args)); @@ -1152,18 +1343,20 @@ out: args->out.seq = cs_seq; } - if (((rc == -ETIMEDOUT) || (rc == -EBUSY)) && (need_soft_reset)) - hl_device_reset(hdev, false, false); - return rc; } -static long _hl_cs_wait_ioctl(struct hl_device *hdev, - struct hl_ctx *ctx, u64 timeout_us, u64 seq) +static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, + u64 timeout_us, u64 seq, + enum hl_cs_wait_status *status, s64 *timestamp) { struct hl_fence *fence; unsigned long timeout; - long rc; + int rc = 0; + long completion_rc; + + if (timestamp) + *timestamp = 0; if (timeout_us == MAX_SCHEDULE_TIMEOUT) timeout = timeout_us; @@ -1181,11 +1374,20 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev, seq, ctx->cs_sequence); } else if (fence) { if (!timeout_us) - rc = completion_done(&fence->completion); + completion_rc = completion_done(&fence->completion); else - rc = wait_for_completion_interruptible_timeout( + completion_rc = + wait_for_completion_interruptible_timeout( &fence->completion, timeout); + if (completion_rc > 0) { + *status = CS_WAIT_STATUS_COMPLETED; + if (timestamp) + *timestamp = ktime_to_ns(fence->timestamp); + } else { + *status = CS_WAIT_STATUS_BUSY; + } + if (fence->error == -ETIMEDOUT) rc = -ETIMEDOUT; else if (fence->error == -EIO) @@ -1196,7 +1398,7 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev, dev_dbg(hdev->dev, "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n", seq, ctx->cs_sequence); - rc = 1; + *status = CS_WAIT_STATUS_GONE; } hl_ctx_put(ctx); @@ -1208,14 +1410,17 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) { struct hl_device *hdev = hpriv->hdev; union hl_wait_cs_args *args = data; + enum hl_cs_wait_status status; u64 seq = args->in.seq; - long rc; + s64 timestamp; + int rc; - rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq); + rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq, + &status, ×tamp); memset(args, 0, sizeof(*args)); - if (rc < 0) { + if (rc) { if (rc == -ERESTARTSYS) { dev_err_ratelimited(hdev->dev, "user process got signal while waiting for CS handle %llu\n", @@ -1236,10 +1441,23 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) return rc; } - if (rc == 0) - args->out.status = HL_WAIT_CS_STATUS_BUSY; - else + if (timestamp) { + args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; + args->out.timestamp_nsec = timestamp; + } + + switch (status) { + case CS_WAIT_STATUS_GONE: + args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE; + fallthrough; + case CS_WAIT_STATUS_COMPLETED: args->out.status = HL_WAIT_CS_STATUS_COMPLETED; + break; + case CS_WAIT_STATUS_BUSY: + default: + args->out.status = HL_WAIT_CS_STATUS_BUSY; + break; + } return 0; } diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c index 7a59dd7c6450..f65e6559149b 100644 --- a/drivers/misc/habanalabs/common/context.c +++ b/drivers/misc/habanalabs/common/context.c @@ -40,10 +40,14 @@ static void hl_ctx_fini(struct hl_ctx *ctx) if ((hdev->in_debug) && (hdev->compute_ctx == ctx)) hl_device_set_debug_mode(hdev, false); + hdev->asic_funcs->ctx_fini(ctx); hl_cb_va_pool_fini(ctx); hl_vm_ctx_fini(ctx); hl_asid_free(hdev, ctx->asid); + /* Scrub both SRAM and DRAM */ + hdev->asic_funcs->scrub_device_mem(hdev, 0, 0); + if ((!hdev->pldm) && (hdev->pdev) && (!hdev->asic_funcs->is_device_idle(hdev, &idle_mask, NULL))) diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index 912ddfa360b1..cef716643979 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -22,9 +22,10 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, u8 i2c_reg, long *val) { struct cpucp_packet pkt; + u64 result; int rc; - if (hl_device_disabled_or_in_reset(hdev)) + if (!hl_device_operational(hdev, NULL)) return -EBUSY; memset(&pkt, 0, sizeof(pkt)); @@ -36,7 +37,9 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, pkt.i2c_reg = i2c_reg; rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - 0, val); + 0, &result); + + *val = (long) result; if (rc) dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc); @@ -50,7 +53,7 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, struct cpucp_packet pkt; int rc; - if (hl_device_disabled_or_in_reset(hdev)) + if (!hl_device_operational(hdev, NULL)) return -EBUSY; memset(&pkt, 0, sizeof(pkt)); @@ -76,7 +79,7 @@ static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state) struct cpucp_packet pkt; int rc; - if (hl_device_disabled_or_in_reset(hdev)) + if (!hl_device_operational(hdev, NULL)) return; memset(&pkt, 0, sizeof(pkt)); @@ -113,7 +116,7 @@ static int command_buffers_show(struct seq_file *s, void *data) " %03llu %d 0x%08x %d %d %d\n", cb->id, cb->ctx->asid, cb->size, kref_read(&cb->refcount), - cb->mmap, cb->cs_cnt); + cb->mmap, atomic_read(&cb->cs_cnt)); } spin_unlock(&dev_entry->cb_spinlock); @@ -168,18 +171,19 @@ static int command_submission_jobs_show(struct seq_file *s, void *data) if (first) { first = false; seq_puts(s, "\n"); - seq_puts(s, " JOB ID CS ID CTX ASID H/W Queue\n"); - seq_puts(s, "---------------------------------------\n"); + seq_puts(s, " JOB ID CS ID CTX ASID JOB RefCnt H/W Queue\n"); + seq_puts(s, "----------------------------------------------------\n"); } if (job->cs) seq_printf(s, - " %02d %llu %d %d\n", + " %02d %llu %d %d %d\n", job->id, job->cs->sequence, job->cs->ctx->asid, - job->hw_queue_id); + kref_read(&job->refcount), job->hw_queue_id); else seq_printf(s, - " %02d 0 %d %d\n", - job->id, HL_KERNEL_ASID_ID, job->hw_queue_id); + " %02d 0 %d %d %d\n", + job->id, HL_KERNEL_ASID_ID, + kref_read(&job->refcount), job->hw_queue_id); } spin_unlock(&dev_entry->cs_job_spinlock); @@ -300,93 +304,15 @@ static int vm_show(struct seq_file *s, void *data) return 0; } -/* these inline functions are copied from mmu.c */ -static inline u64 get_hop0_addr(struct hl_ctx *ctx) -{ - return ctx->hdev->asic_prop.mmu_pgt_addr + - (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); -} - -static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr, - u64 virt_addr, u64 mask, u64 shift) -{ - return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * - ((virt_addr & mask) >> shift); -} - -static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_specs, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop0_mask, - mmu_specs->hop0_shift); -} - -static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_specs, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop1_mask, - mmu_specs->hop1_shift); -} - -static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_specs, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop2_mask, - mmu_specs->hop2_shift); -} - -static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_specs, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop3_mask, - mmu_specs->hop3_shift); -} - -static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_specs, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop4_mask, - mmu_specs->hop4_shift); -} - -static inline u64 get_hop5_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_specs, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop5_mask, - mmu_specs->hop5_shift); -} - -static inline u64 get_next_hop_addr(u64 curr_pte) -{ - if (curr_pte & PAGE_PRESENT_MASK) - return curr_pte & HOP_PHYS_ADDR_MASK; - else - return ULLONG_MAX; -} - static int mmu_show(struct seq_file *s, void *data) { struct hl_debugfs_entry *entry = s->private; struct hl_dbg_device_entry *dev_entry = entry->dev_entry; struct hl_device *hdev = dev_entry->hdev; - struct asic_fixed_properties *prop = &hdev->asic_prop; - struct hl_mmu_properties *mmu_prop; struct hl_ctx *ctx; - bool is_dram_addr; - - u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0, - hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0, - hop2_addr = 0, hop2_pte_addr = 0, hop2_pte = 0, - hop3_addr = 0, hop3_pte_addr = 0, hop3_pte = 0, - hop4_addr = 0, hop4_pte_addr = 0, hop4_pte = 0, - hop5_addr = 0, hop5_pte_addr = 0, hop5_pte = 0, - virt_addr = dev_entry->mmu_addr; + struct hl_mmu_hop_info hops_info; + u64 virt_addr = dev_entry->mmu_addr; + int i; if (!hdev->mmu_enable) return 0; @@ -401,132 +327,24 @@ static int mmu_show(struct seq_file *s, void *data) return 0; } - is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, - prop->dmmu.start_addr, - prop->dmmu.end_addr); - - /* shifts and masks are the same in PMMU and HPMMU, use one of them */ - mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; - - mutex_lock(&ctx->mmu_lock); - - /* the following lookup is copied from unmap() in mmu.c */ - - hop0_addr = get_hop0_addr(ctx); - hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); - hop0_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr); - hop1_addr = get_next_hop_addr(hop0_pte); - - if (hop1_addr == ULLONG_MAX) - goto not_mapped; - - hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); - hop1_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr); - hop2_addr = get_next_hop_addr(hop1_pte); - - if (hop2_addr == ULLONG_MAX) - goto not_mapped; - - hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); - hop2_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr); - hop3_addr = get_next_hop_addr(hop2_pte); - - if (hop3_addr == ULLONG_MAX) - goto not_mapped; - - hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); - hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr); - - if (mmu_prop->num_hops == MMU_ARCH_5_HOPS) { - if (!(hop3_pte & LAST_MASK)) { - hop4_addr = get_next_hop_addr(hop3_pte); - - if (hop4_addr == ULLONG_MAX) - goto not_mapped; - - hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, - hop4_addr, virt_addr); - hop4_pte = hdev->asic_funcs->read_pte(hdev, - hop4_pte_addr); - if (!(hop4_pte & PAGE_PRESENT_MASK)) - goto not_mapped; - } else { - if (!(hop3_pte & PAGE_PRESENT_MASK)) - goto not_mapped; - } - } else { - hop4_addr = get_next_hop_addr(hop3_pte); - - if (hop4_addr == ULLONG_MAX) - goto not_mapped; - - hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, - hop4_addr, virt_addr); - hop4_pte = hdev->asic_funcs->read_pte(hdev, - hop4_pte_addr); - if (!(hop4_pte & LAST_MASK)) { - hop5_addr = get_next_hop_addr(hop4_pte); - - if (hop5_addr == ULLONG_MAX) - goto not_mapped; - - hop5_pte_addr = get_hop5_pte_addr(ctx, mmu_prop, - hop5_addr, virt_addr); - hop5_pte = hdev->asic_funcs->read_pte(hdev, - hop5_pte_addr); - if (!(hop5_pte & PAGE_PRESENT_MASK)) - goto not_mapped; - } else { - if (!(hop4_pte & PAGE_PRESENT_MASK)) - goto not_mapped; - } + if (hl_mmu_get_tlb_info(ctx, virt_addr, &hops_info)) { + dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", + virt_addr); + return 0; } seq_printf(s, "asid: %u, virt_addr: 0x%llx\n", dev_entry->mmu_asid, dev_entry->mmu_addr); - seq_printf(s, "hop0_addr: 0x%llx\n", hop0_addr); - seq_printf(s, "hop0_pte_addr: 0x%llx\n", hop0_pte_addr); - seq_printf(s, "hop0_pte: 0x%llx\n", hop0_pte); - - seq_printf(s, "hop1_addr: 0x%llx\n", hop1_addr); - seq_printf(s, "hop1_pte_addr: 0x%llx\n", hop1_pte_addr); - seq_printf(s, "hop1_pte: 0x%llx\n", hop1_pte); - - seq_printf(s, "hop2_addr: 0x%llx\n", hop2_addr); - seq_printf(s, "hop2_pte_addr: 0x%llx\n", hop2_pte_addr); - seq_printf(s, "hop2_pte: 0x%llx\n", hop2_pte); - - seq_printf(s, "hop3_addr: 0x%llx\n", hop3_addr); - seq_printf(s, "hop3_pte_addr: 0x%llx\n", hop3_pte_addr); - seq_printf(s, "hop3_pte: 0x%llx\n", hop3_pte); - - if (mmu_prop->num_hops == MMU_ARCH_5_HOPS) { - if (!(hop3_pte & LAST_MASK)) { - seq_printf(s, "hop4_addr: 0x%llx\n", hop4_addr); - seq_printf(s, "hop4_pte_addr: 0x%llx\n", hop4_pte_addr); - seq_printf(s, "hop4_pte: 0x%llx\n", hop4_pte); - } - } else { - seq_printf(s, "hop4_addr: 0x%llx\n", hop4_addr); - seq_printf(s, "hop4_pte_addr: 0x%llx\n", hop4_pte_addr); - seq_printf(s, "hop4_pte: 0x%llx\n", hop4_pte); - - if (!(hop4_pte & LAST_MASK)) { - seq_printf(s, "hop5_addr: 0x%llx\n", hop5_addr); - seq_printf(s, "hop5_pte_addr: 0x%llx\n", hop5_pte_addr); - seq_printf(s, "hop5_pte: 0x%llx\n", hop5_pte); - } + for (i = 0 ; i < hops_info.used_hops ; i++) { + seq_printf(s, "hop%d_addr: 0x%llx\n", + i, hops_info.hop_info[i].hop_addr); + seq_printf(s, "hop%d_pte_addr: 0x%llx\n", + i, hops_info.hop_info[i].hop_pte_addr); + seq_printf(s, "hop%d_pte: 0x%llx\n", + i, hops_info.hop_info[i].hop_pte_val); } - goto out; - -not_mapped: - dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", - virt_addr); -out: - mutex_unlock(&ctx->mmu_lock); - return 0; } @@ -597,7 +415,7 @@ static bool hl_is_device_va(struct hl_device *hdev, u64 addr) if (!hdev->mmu_enable) goto out; - if (hdev->dram_supports_virtual_memory && + if (prop->dram_supports_virtual_memory && (addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr)) return true; @@ -616,78 +434,20 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u64 *phys_addr) { struct hl_ctx *ctx = hdev->compute_ctx; - struct asic_fixed_properties *prop = &hdev->asic_prop; - struct hl_mmu_properties *mmu_prop; - u64 hop_addr, hop_pte_addr, hop_pte; - u64 offset_mask = HOP4_MASK | FLAGS_MASK; int rc = 0; - bool is_dram_addr; if (!ctx) { dev_err(hdev->dev, "no ctx available\n"); return -EINVAL; } - is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, - prop->dmmu.start_addr, - prop->dmmu.end_addr); - - /* shifts and masks are the same in PMMU and HPMMU, use one of them */ - mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; - - mutex_lock(&ctx->mmu_lock); - - /* hop 0 */ - hop_addr = get_hop0_addr(ctx); - hop_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); - hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); - - /* hop 1 */ - hop_addr = get_next_hop_addr(hop_pte); - if (hop_addr == ULLONG_MAX) - goto not_mapped; - hop_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); - hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); - - /* hop 2 */ - hop_addr = get_next_hop_addr(hop_pte); - if (hop_addr == ULLONG_MAX) - goto not_mapped; - hop_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); - hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); - - /* hop 3 */ - hop_addr = get_next_hop_addr(hop_pte); - if (hop_addr == ULLONG_MAX) - goto not_mapped; - hop_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); - hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); - - if (!(hop_pte & LAST_MASK)) { - /* hop 4 */ - hop_addr = get_next_hop_addr(hop_pte); - if (hop_addr == ULLONG_MAX) - goto not_mapped; - hop_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop_addr, - virt_addr); - hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); - - offset_mask = FLAGS_MASK; + rc = hl_mmu_va_to_pa(ctx, virt_addr, phys_addr); + if (rc) { + dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", + virt_addr); + rc = -EINVAL; } - if (!(hop_pte & PAGE_PRESENT_MASK)) - goto not_mapped; - - *phys_addr = (hop_pte & ~offset_mask) | (virt_addr & offset_mask); - - goto out; - -not_mapped: - dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", - virt_addr); - rc = -EINVAL; -out: - mutex_unlock(&ctx->mmu_lock); return rc; } diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 20572224099a..5871162a8442 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -10,20 +10,9 @@ #include "habanalabs.h" #include <linux/pci.h> -#include <linux/sched/signal.h> #include <linux/hwmon.h> #include <uapi/misc/habanalabs.h> -#define HL_PLDM_PENDING_RESET_PER_SEC (HL_PENDING_RESET_PER_SEC * 10) - -bool hl_device_disabled_or_in_reset(struct hl_device *hdev) -{ - if ((hdev->disabled) || (atomic_read(&hdev->in_reset))) - return true; - else - return false; -} - enum hl_device_status hl_device_status(struct hl_device *hdev) { enum hl_device_status status; @@ -32,12 +21,34 @@ enum hl_device_status hl_device_status(struct hl_device *hdev) status = HL_DEVICE_STATUS_MALFUNCTION; else if (atomic_read(&hdev->in_reset)) status = HL_DEVICE_STATUS_IN_RESET; + else if (hdev->needs_reset) + status = HL_DEVICE_STATUS_NEEDS_RESET; else status = HL_DEVICE_STATUS_OPERATIONAL; return status; } +bool hl_device_operational(struct hl_device *hdev, + enum hl_device_status *status) +{ + enum hl_device_status current_status; + + current_status = hl_device_status(hdev); + if (status) + *status = current_status; + + switch (current_status) { + case HL_DEVICE_STATUS_IN_RESET: + case HL_DEVICE_STATUS_MALFUNCTION: + case HL_DEVICE_STATUS_NEEDS_RESET: + return false; + case HL_DEVICE_STATUS_OPERATIONAL: + default: + return true; + } +} + static void hpriv_release(struct kref *ref) { struct hl_fpriv *hpriv; @@ -231,16 +242,36 @@ delete_cdev_device: static void device_cdev_sysfs_del(struct hl_device *hdev) { - /* device_release() won't be called so must free devices explicitly */ - if (!hdev->cdev_sysfs_created) { - kfree(hdev->dev_ctrl); - kfree(hdev->dev); - return; - } + if (!hdev->cdev_sysfs_created) + goto put_devices; hl_sysfs_fini(hdev); cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); cdev_device_del(&hdev->cdev, hdev->dev); + +put_devices: + put_device(hdev->dev); + put_device(hdev->dev_ctrl); +} + +static void device_hard_reset_pending(struct work_struct *work) +{ + struct hl_device_reset_work *device_reset_work = + container_of(work, struct hl_device_reset_work, + reset_work.work); + struct hl_device *hdev = device_reset_work->hdev; + int rc; + + rc = hl_device_reset(hdev, true, true); + if ((rc == -EBUSY) && !hdev->device_fini_pending) { + dev_info(hdev->dev, + "Could not reset device. will try again in %u seconds", + HL_PENDING_RESET_PER_SEC); + + queue_delayed_work(device_reset_work->wq, + &device_reset_work->reset_work, + msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000)); + } } /* @@ -327,17 +358,32 @@ static int device_early_init(struct hl_device *hdev) hl_cb_mgr_init(&hdev->kernel_cb_mgr); + hdev->device_reset_work.wq = + create_singlethread_workqueue("hl_device_reset"); + if (!hdev->device_reset_work.wq) { + rc = -ENOMEM; + dev_err(hdev->dev, "Failed to create device reset WQ\n"); + goto free_cb_mgr; + } + + INIT_DELAYED_WORK(&hdev->device_reset_work.reset_work, + device_hard_reset_pending); + hdev->device_reset_work.hdev = hdev; + hdev->device_fini_pending = 0; + mutex_init(&hdev->send_cpu_message_lock); mutex_init(&hdev->debug_lock); mutex_init(&hdev->mmu_cache_lock); - INIT_LIST_HEAD(&hdev->hw_queues_mirror_list); - spin_lock_init(&hdev->hw_queues_mirror_lock); + INIT_LIST_HEAD(&hdev->cs_mirror_list); + spin_lock_init(&hdev->cs_mirror_lock); INIT_LIST_HEAD(&hdev->fpriv_list); mutex_init(&hdev->fpriv_list_lock); atomic_set(&hdev->in_reset, 0); return 0; +free_cb_mgr: + hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr); free_idle_busy_ts_arr: kfree(hdev->idle_busy_ts_arr); free_chip_info: @@ -380,6 +426,7 @@ static void device_early_fini(struct hl_device *hdev) kfree(hdev->hl_chip_info); destroy_workqueue(hdev->eq_wq); + destroy_workqueue(hdev->device_reset_work.wq); for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) destroy_workqueue(hdev->cq_wq[i]); @@ -412,7 +459,7 @@ static void hl_device_heartbeat(struct work_struct *work) struct hl_device *hdev = container_of(work, struct hl_device, work_heartbeat.work); - if (hl_device_disabled_or_in_reset(hdev)) + if (!hl_device_operational(hdev, NULL)) goto reschedule; if (!hdev->asic_funcs->send_heartbeat(hdev)) @@ -758,16 +805,12 @@ disable_device: return rc; } -static int device_kill_open_processes(struct hl_device *hdev) +static int device_kill_open_processes(struct hl_device *hdev, u32 timeout) { - u16 pending_total, pending_cnt; struct hl_fpriv *hpriv; struct task_struct *task = NULL; + u32 pending_cnt; - if (hdev->pldm) - pending_total = HL_PLDM_PENDING_RESET_PER_SEC; - else - pending_total = HL_PENDING_RESET_PER_SEC; /* Giving time for user to close FD, and for processes that are inside * hl_device_open to finish @@ -775,6 +818,19 @@ static int device_kill_open_processes(struct hl_device *hdev) if (!list_empty(&hdev->fpriv_list)) ssleep(1); + if (timeout) { + pending_cnt = timeout; + } else { + if (hdev->process_kill_trial_cnt) { + /* Processes have been already killed */ + pending_cnt = 1; + goto wait_for_processes; + } else { + /* Wait a small period after process kill */ + pending_cnt = HL_PENDING_RESET_PER_SEC; + } + } + mutex_lock(&hdev->fpriv_list_lock); /* This section must be protected because we are dereferencing @@ -794,16 +850,18 @@ static int device_kill_open_processes(struct hl_device *hdev) mutex_unlock(&hdev->fpriv_list_lock); - /* We killed the open users, but because the driver cleans up after the - * user contexts are closed (e.g. mmu mappings), we need to wait again - * to make sure the cleaning phase is finished before continuing with - * the reset + /* + * We killed the open users, but that doesn't mean they are closed. + * It could be that they are running a long cleanup phase in the driver + * e.g. MMU unmappings, or running other long teardown flow even before + * our cleanup. + * Therefore we need to wait again to make sure they are closed before + * continuing with the reset. */ - pending_cnt = pending_total; - +wait_for_processes: while ((!list_empty(&hdev->fpriv_list)) && (pending_cnt)) { - dev_info(hdev->dev, + dev_dbg(hdev->dev, "Waiting for all unmap operations to finish before hard reset\n"); pending_cnt--; @@ -811,18 +869,17 @@ static int device_kill_open_processes(struct hl_device *hdev) ssleep(1); } - return list_empty(&hdev->fpriv_list) ? 0 : -EBUSY; -} + /* All processes exited successfully */ + if (list_empty(&hdev->fpriv_list)) + return 0; -static void device_hard_reset_pending(struct work_struct *work) -{ - struct hl_device_reset_work *device_reset_work = - container_of(work, struct hl_device_reset_work, reset_work); - struct hl_device *hdev = device_reset_work->hdev; + /* Give up waiting for processes to exit */ + if (hdev->process_kill_trial_cnt == HL_PENDING_RESET_MAX_TRIALS) + return -ETIME; - hl_device_reset(hdev, true, true); + hdev->process_kill_trial_cnt++; - kfree(device_reset_work); + return -EBUSY; } /* @@ -859,6 +916,10 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, hard_reset = true; } + /* Re-entry of reset thread */ + if (from_hard_reset_thread && hdev->process_kill_trial_cnt) + goto kill_processes; + /* * Prevent concurrency in this function - only one reset should be * done at any given time. Only need to perform this if we didn't @@ -904,26 +965,17 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, again: if ((hard_reset) && (!from_hard_reset_thread)) { - struct hl_device_reset_work *device_reset_work; - hdev->hard_reset_pending = true; - device_reset_work = kzalloc(sizeof(*device_reset_work), - GFP_ATOMIC); - if (!device_reset_work) { - rc = -ENOMEM; - goto out_err; - } + hdev->process_kill_trial_cnt = 0; /* * Because the reset function can't run from interrupt or * from heartbeat work, we need to call the reset function * from a dedicated work */ - INIT_WORK(&device_reset_work->reset_work, - device_hard_reset_pending); - device_reset_work->hdev = hdev; - schedule_work(&device_reset_work->reset_work); + queue_delayed_work(hdev->device_reset_work.wq, + &hdev->device_reset_work.reset_work, 0); return 0; } @@ -949,12 +1001,25 @@ again: /* Go over all the queues, release all CS and their jobs */ hl_cs_rollback_all(hdev); +kill_processes: if (hard_reset) { /* Kill processes here after CS rollback. This is because the * process can't really exit until all its CSs are done, which * is what we do in cs rollback */ - rc = device_kill_open_processes(hdev); + rc = device_kill_open_processes(hdev, 0); + + if (rc == -EBUSY) { + if (hdev->device_fini_pending) { + dev_crit(hdev->dev, + "Failed to kill all open processes, stopping hard reset\n"); + goto out_err; + } + + /* signal reset thread to reschedule */ + return rc; + } + if (rc) { dev_crit(hdev->dev, "Failed to kill all open processes, stopping hard reset\n"); @@ -1089,6 +1154,7 @@ again: } atomic_set(&hdev->in_reset, 0); + hdev->needs_reset = false; if (hard_reset) hdev->hard_reset_cnt++; @@ -1261,13 +1327,6 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) hl_debugfs_add_device(hdev); - if (hdev->asic_funcs->get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { - dev_info(hdev->dev, - "H/W state is dirty, must reset before initializing\n"); - hdev->asic_funcs->halt_engines(hdev, true); - hdev->asic_funcs->hw_fini(hdev, true); - } - /* * From this point, in case of an error, add char devices and create * sysfs nodes as part of the error flow, to allow debugging. @@ -1371,9 +1430,9 @@ sw_fini: early_fini: device_early_fini(hdev); free_dev_ctrl: - kfree(hdev->dev_ctrl); + put_device(hdev->dev_ctrl); free_dev: - kfree(hdev->dev); + put_device(hdev->dev); out_disabled: hdev->disabled = true; if (add_cdev_sysfs_on_err) @@ -1398,11 +1457,14 @@ out_disabled: */ void hl_device_fini(struct hl_device *hdev) { - int i, rc; ktime_t timeout; + int i, rc; dev_info(hdev->dev, "Removing device\n"); + hdev->device_fini_pending = 1; + flush_delayed_work(&hdev->device_reset_work.reset_work); + /* * This function is competing with the reset function, so try to * take the reset atomic and if we are already in middle of reset, @@ -1458,7 +1520,11 @@ void hl_device_fini(struct hl_device *hdev) * can't really exit until all its CSs are done, which is what we * do in cs rollback */ - rc = device_kill_open_processes(hdev); + dev_info(hdev->dev, + "Waiting for all processes to exit (timeout of %u seconds)", + HL_PENDING_RESET_LONG_SEC); + + rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC); if (rc) dev_crit(hdev->dev, "Failed to kill all open processes\n"); diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index cd41c7ceb0e7..0e1c629e9800 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -9,8 +9,6 @@ #include "../include/common/hl_boot_if.h" #include <linux/firmware.h> -#include <linux/genalloc.h> -#include <linux/io-64-nonatomic-lo-hi.h> #include <linux/slab.h> #define FW_FILE_MAX_SIZE 0x1400000 /* maximum size of 20MB */ @@ -20,16 +18,18 @@ * @hdev: pointer to hl_device structure. * @fw_name: the firmware image name * @dst: IO memory mapped address space to copy firmware to + * @src_offset: offset in src FW to copy from + * @size: amount of bytes to copy (0 to copy the whole binary) * * Copy fw code from firmware file to device memory. * * Return: 0 on success, non-zero for failure. */ int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name, - void __iomem *dst) + void __iomem *dst, u32 src_offset, u32 size) { const struct firmware *fw; - const u64 *fw_data; + const void *fw_data; size_t fw_size; int rc; @@ -57,9 +57,20 @@ int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name, goto out; } - fw_data = (const u64 *) fw->data; + if (size - src_offset > fw_size) { + dev_err(hdev->dev, + "size to copy(%u) and offset(%u) are invalid\n", + size, src_offset); + rc = -EINVAL; + goto out; + } + + if (size) + fw_size = size; + + fw_data = (const void *) fw->data; - memcpy_toio(dst, fw_data, fw_size); + memcpy_toio(dst, fw_data + src_offset, fw_size); out: release_firmware(fw); @@ -77,7 +88,7 @@ int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode) } int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, - u16 len, u32 timeout, long *result) + u16 len, u32 timeout, u64 *result) { struct cpucp_packet *pkt; dma_addr_t pkt_dma_addr; @@ -132,7 +143,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, >> CPUCP_PKT_CTL_OPCODE_SHIFT); rc = -EIO; } else if (result) { - *result = (long) le64_to_cpu(pkt->result); + *result = le64_to_cpu(pkt->result); } out: @@ -146,7 +157,7 @@ out: int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type) { struct cpucp_packet pkt; - long result; + u64 result; int rc; memset(&pkt, 0, sizeof(pkt)); @@ -169,7 +180,7 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr, { struct cpucp_unmask_irq_arr_packet *pkt; size_t total_pkt_size; - long result; + u64 result; int rc; total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) + @@ -208,7 +219,7 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr, int hl_fw_test_cpu_queue(struct hl_device *hdev) { struct cpucp_packet test_pkt = {}; - long result; + u64 result; int rc; test_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST << @@ -221,7 +232,7 @@ int hl_fw_test_cpu_queue(struct hl_device *hdev) if (!rc) { if (result != CPUCP_PACKET_FENCE_VAL) dev_err(hdev->dev, - "CPU queue test failed (0x%08lX)\n", result); + "CPU queue test failed (%#08llx)\n", result); } else { dev_err(hdev->dev, "CPU queue test failed, error %d\n", rc); } @@ -252,7 +263,7 @@ void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, int hl_fw_send_heartbeat(struct hl_device *hdev) { struct cpucp_packet hb_pkt = {}; - long result; + u64 result; int rc; hb_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST << @@ -268,13 +279,14 @@ int hl_fw_send_heartbeat(struct hl_device *hdev) return rc; } -int hl_fw_cpucp_info_get(struct hl_device *hdev) +int hl_fw_cpucp_info_get(struct hl_device *hdev, + u32 cpu_security_boot_status_reg) { struct asic_fixed_properties *prop = &hdev->asic_prop; struct cpucp_packet pkt = {}; void *cpucp_info_cpu_addr; dma_addr_t cpucp_info_dma_addr; - long result; + u64 result; int rc; cpucp_info_cpu_addr = @@ -313,6 +325,11 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev) goto out; } + /* Read FW application security bits again */ + if (hdev->asic_prop.fw_security_status_valid) + hdev->asic_prop.fw_app_security_map = + RREG32(cpu_security_boot_status_reg); + out: hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, sizeof(struct cpucp_info), cpucp_info_cpu_addr); @@ -325,7 +342,7 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) struct cpucp_packet pkt = {}; void *eeprom_info_cpu_addr; dma_addr_t eeprom_info_dma_addr; - long result; + u64 result; int rc; eeprom_info_cpu_addr = @@ -368,7 +385,7 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, struct hl_info_pci_counters *counters) { struct cpucp_packet pkt = {}; - long result; + u64 result; int rc; pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET << @@ -415,7 +432,7 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy) { struct cpucp_packet pkt = {}; - long result; + u64 result; int rc; pkt.ctl = cpu_to_le32(CPUCP_PACKET_TOTAL_ENERGY_GET << @@ -435,9 +452,36 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy) return rc; } -static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg) +int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index, + u16 *pll_freq_arr) { - u32 err_val; + struct cpucp_packet pkt; + u64 result; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_PLL_INFO_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.pll_type = __cpu_to_le16(pll_index); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_CPUCP_INFO_TIMEOUT_USEC, &result); + if (rc) + dev_err(hdev->dev, "Failed to read PLL info, error %d\n", rc); + + pll_freq_arr[0] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT0_MASK, result); + pll_freq_arr[1] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT1_MASK, result); + pll_freq_arr[2] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT2_MASK, result); + pll_freq_arr[3] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT3_MASK, result); + + return rc; +} + +static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg, + u32 cpu_security_boot_status_reg) +{ + u32 err_val, security_val; /* Some of the firmware status codes are deprecated in newer f/w * versions. In those versions, the errors are reported @@ -472,6 +516,18 @@ static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg) if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL) dev_err(hdev->dev, "Device boot error - NIC F/W initialization failed\n"); + if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY) + dev_warn(hdev->dev, + "Device boot warning - security not ready\n"); + if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL) + dev_err(hdev->dev, "Device boot error - security failure\n"); + if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL) + dev_err(hdev->dev, "Device boot error - eFuse failure\n"); + + security_val = RREG32(cpu_security_boot_status_reg); + if (security_val & CPU_BOOT_DEV_STS0_ENABLED) + dev_dbg(hdev->dev, "Device security status %#x\n", + security_val); } static void detect_cpu_boot_status(struct hl_device *hdev, u32 status) @@ -524,10 +580,12 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status) } } -int hl_fw_read_preboot_ver(struct hl_device *hdev, u32 cpu_boot_status_reg, - u32 boot_err0_reg, u32 timeout) +int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, + u32 cpu_security_boot_status_reg, u32 boot_err0_reg, + u32 timeout) { - u32 status; + struct asic_fixed_properties *prop = &hdev->asic_prop; + u32 status, security_status; int rc; if (!hdev->cpu_enable) @@ -557,23 +615,52 @@ int hl_fw_read_preboot_ver(struct hl_device *hdev, u32 cpu_boot_status_reg, if (rc) { dev_err(hdev->dev, "Failed to read preboot version\n"); detect_cpu_boot_status(hdev, status); - fw_read_errors(hdev, boot_err0_reg); + fw_read_errors(hdev, boot_err0_reg, + cpu_security_boot_status_reg); return -EIO; } - hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT); + rc = hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT); + if (rc) + return rc; + + security_status = RREG32(cpu_security_boot_status_reg); + + /* We read security status multiple times during boot: + * 1. preboot - we check if fw security feature is supported + * 2. boot cpu - we get boot cpu security status + * 3. FW application - we get FW application security status + * + * Preboot: + * Check security status bit (CPU_BOOT_DEV_STS0_ENABLED), if it is set + * check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN) + */ + if (security_status & CPU_BOOT_DEV_STS0_ENABLED) { + hdev->asic_prop.fw_security_status_valid = 1; + prop->fw_security_disabled = + !(security_status & CPU_BOOT_DEV_STS0_SECURITY_EN); + } else { + hdev->asic_prop.fw_security_status_valid = 0; + prop->fw_security_disabled = true; + } + + dev_info(hdev->dev, "firmware-level security is %s\n", + prop->fw_security_disabled ? "disabled" : "enabled"); return 0; } int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, u32 msg_to_cpu_reg, u32 cpu_msg_status_reg, - u32 boot_err0_reg, bool skip_bmc, - u32 cpu_timeout, u32 boot_fit_timeout) + u32 cpu_security_boot_status_reg, u32 boot_err0_reg, + bool skip_bmc, u32 cpu_timeout, u32 boot_fit_timeout) { u32 status; int rc; + if (!(hdev->fw_loading & FW_TYPE_BOOT_CPU)) + return 0; + dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n", cpu_timeout / USEC_PER_SEC); @@ -631,17 +718,24 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, 10000, cpu_timeout); + dev_dbg(hdev->dev, "uboot status = %d\n", status); + /* Read U-Boot version now in case we will later fail */ hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT); + /* Read boot_cpu security bits */ + if (hdev->asic_prop.fw_security_status_valid) + hdev->asic_prop.fw_boot_cpu_security_map = + RREG32(cpu_security_boot_status_reg); + if (rc) { detect_cpu_boot_status(hdev, status); rc = -EIO; goto out; } - if (!hdev->fw_loading) { - dev_info(hdev->dev, "Skip loading FW\n"); + if (!(hdev->fw_loading & FW_TYPE_LINUX)) { + dev_info(hdev->dev, "Skip loading Linux F/W\n"); goto out; } @@ -702,10 +796,23 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, goto out; } + /* Read FW application security bits */ + if (hdev->asic_prop.fw_security_status_valid) { + hdev->asic_prop.fw_app_security_map = + RREG32(cpu_security_boot_status_reg); + + if (hdev->asic_prop.fw_app_security_map & + CPU_BOOT_DEV_STS0_FW_HARD_RST_EN) + hdev->asic_prop.hard_reset_done_by_fw = true; + } + + dev_dbg(hdev->dev, "Firmware hard-reset is %s\n", + hdev->asic_prop.hard_reset_done_by_fw ? "enabled" : "disabled"); + dev_info(hdev->dev, "Successfully loaded firmware to device\n"); out: - fw_read_errors(hdev, boot_err0_reg); + fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg); return rc; } diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 6ed974d2def0..571eda6ef5ab 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -10,6 +10,7 @@ #include "../include/common/cpucp_if.h" #include "../include/common/qman_if.h" +#include "../include/hw_ip/mmu/mmu_general.h" #include <uapi/misc/habanalabs.h> #include <linux/cdev.h> @@ -19,6 +20,10 @@ #include <linux/scatterlist.h> #include <linux/hashtable.h> #include <linux/bitfield.h> +#include <linux/genalloc.h> +#include <linux/sched/signal.h> +#include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/coresight.h> #define HL_NAME "habanalabs" @@ -36,7 +41,9 @@ #define HL_MMAP_OFFSET_VALUE_MASK (0x3FFFFFFFFFFFull >> PAGE_SHIFT) #define HL_MMAP_OFFSET_VALUE_GET(off) (off & HL_MMAP_OFFSET_VALUE_MASK) -#define HL_PENDING_RESET_PER_SEC 30 +#define HL_PENDING_RESET_PER_SEC 10 +#define HL_PENDING_RESET_MAX_TRIALS 60 /* 10 minutes */ +#define HL_PENDING_RESET_LONG_SEC 60 #define HL_HARD_RESET_MAX_TIMEOUT 120 @@ -61,15 +68,29 @@ /* MMU */ #define MMU_HASH_TABLE_BITS 7 /* 1 << 7 buckets */ +/** + * enum hl_mmu_page_table_locaion - mmu page table location + * @MMU_DR_PGT: page-table is located on device DRAM. + * @MMU_HR_PGT: page-table is located on host memory. + * @MMU_NUM_PGT_LOCATIONS: number of page-table locations currently supported. + */ +enum hl_mmu_page_table_location { + MMU_DR_PGT = 0, /* device-dram-resident MMU PGT */ + MMU_HR_PGT, /* host resident MMU PGT */ + MMU_NUM_PGT_LOCATIONS /* num of PGT locations */ +}; + /* * HL_RSVD_SOBS 'sync stream' reserved sync objects per QMAN stream * HL_RSVD_MONS 'sync stream' reserved monitors per QMAN stream */ -#define HL_RSVD_SOBS 4 -#define HL_RSVD_MONS 2 +#define HL_RSVD_SOBS 2 +#define HL_RSVD_MONS 1 -#define HL_RSVD_SOBS_IN_USE 2 -#define HL_RSVD_MONS_IN_USE 1 +/* + * HL_COLLECTIVE_RSVD_MSTR_MONS 'collective' reserved monitors per QMAN stream + */ +#define HL_COLLECTIVE_RSVD_MSTR_MONS 2 #define HL_MAX_SOB_VAL (1 << 15) @@ -80,6 +101,28 @@ #define HL_MAX_DCORES 4 +#define HL_MAX_SOBS_PER_MONITOR 8 + +/** + * struct hl_gen_wait_properties - properties for generating a wait CB + * @data: command buffer + * @q_idx: queue id is used to extract fence register address + * @size: offset in command buffer + * @sob_base: SOB base to use in this wait CB + * @sob_val: SOB value to wait for + * @mon_id: monitor to use in this wait CB + * @sob_mask: each bit represents a SOB offset from sob_base to be used + */ +struct hl_gen_wait_properties { + void *data; + u32 q_idx; + u32 size; + u16 sob_base; + u16 sob_val; + u16 mon_id; + u8 sob_mask; +}; + /** * struct pgt_info - MMU hop page info. * @node: hash linked-list node for the pgts shadow hash of pgts. @@ -125,6 +168,18 @@ enum hl_fw_component { }; /** + * enum hl_fw_types - F/W types to load + * @FW_TYPE_LINUX: Linux image for device CPU + * @FW_TYPE_BOOT_CPU: Boot image for device CPU + * @FW_TYPE_ALL_TYPES: Mask for all types + */ +enum hl_fw_types { + FW_TYPE_LINUX = 0x1, + FW_TYPE_BOOT_CPU = 0x2, + FW_TYPE_ALL_TYPES = (FW_TYPE_LINUX | FW_TYPE_BOOT_CPU) +}; + +/** * enum hl_queue_type - Supported QUEUE types. * @QUEUE_TYPE_NA: queue is not available. * @QUEUE_TYPE_EXT: external queue which is a DMA channel that may access the @@ -146,7 +201,8 @@ enum hl_queue_type { enum hl_cs_type { CS_TYPE_DEFAULT, CS_TYPE_SIGNAL, - CS_TYPE_WAIT + CS_TYPE_WAIT, + CS_TYPE_COLLECTIVE_WAIT }; /* @@ -176,6 +232,17 @@ struct hl_outbound_pci_region { }; /* + * enum queue_cb_alloc_flags - Indicates queue support for CBs that + * allocated by Kernel or by User + * @CB_ALLOC_KERNEL: support only CBs that allocated by Kernel + * @CB_ALLOC_USER: support only CBs that allocated by User + */ +enum queue_cb_alloc_flags { + CB_ALLOC_KERNEL = 0x1, + CB_ALLOC_USER = 0x2 +}; + +/* * struct hl_hw_sob - H/W SOB info. * @hdev: habanalabs device structure. * @kref: refcount of this SOB. The SOB will reset once the refcount is zero. @@ -189,19 +256,29 @@ struct hl_hw_sob { u32 q_idx; }; +enum hl_collective_mode { + HL_COLLECTIVE_NOT_SUPPORTED = 0x0, + HL_COLLECTIVE_MASTER = 0x1, + HL_COLLECTIVE_SLAVE = 0x2 +}; + /** * struct hw_queue_properties - queue information. * @type: queue type. + * @queue_cb_alloc_flags: bitmap which indicates if the hw queue supports CB + * that allocated by the Kernel driver and therefore, + * a CB handle can be provided for jobs on this queue. + * Otherwise, a CB address must be provided. + * @collective_mode: collective mode of current queue * @driver_only: true if only the driver is allowed to send a job to this queue, * false otherwise. - * @requires_kernel_cb: true if a CB handle must be provided for jobs on this - * queue, false otherwise (a CB address must be provided). * @supports_sync_stream: True if queue supports sync stream */ struct hw_queue_properties { enum hl_queue_type type; + enum queue_cb_alloc_flags cb_alloc_flags; + enum hl_collective_mode collective_mode; u8 driver_only; - u8 requires_kernel_cb; u8 supports_sync_stream; }; @@ -227,6 +304,8 @@ enum hl_device_hw_state { HL_DEVICE_HW_STATE_DIRTY }; +#define HL_MMU_VA_ALIGNMENT_NOT_NEEDED 0 + /** * struct hl_mmu_properties - ASIC specific MMU address translation properties. * @start_addr: virtual start address of the memory region. @@ -245,6 +324,8 @@ enum hl_device_hw_state { * @hop5_mask: mask to get the PTE address in hop 5. * @page_size: default page size used to allocate memory. * @num_hops: The amount of hops supported by the translation table. + * @host_resident: Should the MMU page table reside in host memory or in the + * device DRAM. */ struct hl_mmu_properties { u64 start_addr; @@ -263,6 +344,7 @@ struct hl_mmu_properties { u64 hop5_mask; u32 page_size; u32 num_hops; + u8 host_resident; }; /** @@ -314,6 +396,14 @@ struct hl_mmu_properties { * @cb_pool_cb_size: size of each CB in the CB pool. * @max_pending_cs: maximum of concurrent pending command submissions * @max_queues: maximum amount of queues in the system + * @fw_boot_cpu_security_map: bitmap representation of boot cpu security status + * reported by FW, bit description can be found in + * CPU_BOOT_DEV_STS* + * @fw_app_security_map: bitmap representation of application security status + * reported by FW, bit description can be found in + * CPU_BOOT_DEV_STS* + * @collective_first_sob: first sync object available for collective use + * @collective_first_mon: first monitor available for collective use * @sync_stream_first_sob: first sync object available for sync stream use * @sync_stream_first_mon: first monitor available for sync stream use * @first_available_user_sob: first sob available for the user @@ -322,6 +412,10 @@ struct hl_mmu_properties { * @completion_queues_count: number of completion queues. * @fw_security_disabled: true if security measures are disabled in firmware, * false otherwise + * @fw_security_status_valid: security status bits are valid and can be fetched + * from BOOT_DEV_STS0 + * @dram_supports_virtual_memory: is there an MMU towards the DRAM + * @hard_reset_done_by_fw: true if firmware is handling hard reset flow */ struct asic_fixed_properties { struct hw_queue_properties *hw_queues_props; @@ -366,6 +460,10 @@ struct asic_fixed_properties { u32 cb_pool_cb_size; u32 max_pending_cs; u32 max_queues; + u32 fw_boot_cpu_security_map; + u32 fw_app_security_map; + u16 collective_first_sob; + u16 collective_first_mon; u16 sync_stream_first_sob; u16 sync_stream_first_mon; u16 first_available_user_sob[HL_MAX_DCORES]; @@ -373,6 +471,9 @@ struct asic_fixed_properties { u8 tpc_enabled_mask; u8 completion_queues_count; u8 fw_security_disabled; + u8 fw_security_status_valid; + u8 dram_supports_virtual_memory; + u8 hard_reset_done_by_fw; }; /** @@ -380,12 +481,14 @@ struct asic_fixed_properties { * @completion: fence is implemented using completion * @refcount: refcount for this fence * @error: mark this fence with error + * @timestamp: timestamp upon completion * */ struct hl_fence { struct completion completion; struct kref refcount; int error; + ktime_t timestamp; }; /** @@ -397,6 +500,7 @@ struct hl_fence { * @cs_seq: command submission sequence number. * @type: type of the CS - signal/wait. * @sob_val: the SOB value that is used in this signal/wait CS. + * @sob_group: the SOB group that is used in this collective wait CS. */ struct hl_cs_compl { struct hl_fence base_fence; @@ -406,6 +510,7 @@ struct hl_cs_compl { u64 cs_seq; enum hl_cs_type type; u16 sob_val; + u16 sob_group; }; /* @@ -427,7 +532,7 @@ struct hl_cb_mgr { * @refcount: reference counter for usage of the CB. * @hdev: pointer to device this CB belongs to. * @ctx: pointer to the CB owner's context. - * @lock: spinlock to protect mmap/cs flows. + * @lock: spinlock to protect mmap flows. * @debugfs_list: node in debugfs list of command buffers. * @pool_list: node in pool list of command buffers. * @va_block_list: list of virtual addresses blocks of the CB if it is mapped to @@ -456,7 +561,7 @@ struct hl_cb { dma_addr_t bus_address; u32 mmap_size; u32 size; - u32 cs_cnt; + atomic_t cs_cnt; u8 mmap; u8 is_pool; u8 is_internal; @@ -468,6 +573,7 @@ struct hl_cb { * QUEUES */ +struct hl_cs; struct hl_cs_job; /* Queue length of external and HW queues */ @@ -490,10 +596,38 @@ struct hl_cs_job; #define HL_CPU_ACCESSIBLE_MEM_SIZE SZ_2M /** - * struct hl_hw_queue - describes a H/W transport queue. + * struct hl_sync_stream_properties - + * describes a H/W queue sync stream properties * @hw_sob: array of the used H/W SOBs by this H/W queue. + * @next_sob_val: the next value to use for the currently used SOB. + * @base_sob_id: the base SOB id of the SOBs used by this queue. + * @base_mon_id: the base MON id of the MONs used by this queue. + * @collective_mstr_mon_id: the MON ids of the MONs used by this master queue + * in order to sync with all slave queues. + * @collective_slave_mon_id: the MON id used by this slave queue in order to + * sync with its master queue. + * @collective_sob_id: current SOB id used by this collective slave queue + * to signal its collective master queue upon completion. + * @curr_sob_offset: the id offset to the currently used SOB from the + * HL_RSVD_SOBS that are being used by this queue. + */ +struct hl_sync_stream_properties { + struct hl_hw_sob hw_sob[HL_RSVD_SOBS]; + u16 next_sob_val; + u16 base_sob_id; + u16 base_mon_id; + u16 collective_mstr_mon_id[HL_COLLECTIVE_RSVD_MSTR_MONS]; + u16 collective_slave_mon_id; + u16 collective_sob_id; + u8 curr_sob_offset; +}; + +/** + * struct hl_hw_queue - describes a H/W transport queue. * @shadow_queue: pointer to a shadow queue that holds pointers to jobs. + * @sync_stream_prop: sync stream queue properties * @queue_type: type of queue. + * @collective_mode: collective mode of current queue * @kernel_address: holds the queue's kernel virtual address. * @bus_address: holds the queue's DMA address. * @pi: holds the queue's pi value. @@ -502,33 +636,25 @@ struct hl_cs_job; * @cq_id: the id for the corresponding CQ for this H/W queue. * @msi_vec: the IRQ number of the H/W queue. * @int_queue_len: length of internal queue (number of entries). - * @next_sob_val: the next value to use for the currently used SOB. - * @base_sob_id: the base SOB id of the SOBs used by this queue. - * @base_mon_id: the base MON id of the MONs used by this queue. * @valid: is the queue valid (we have array of 32 queues, not all of them * exist). - * @curr_sob_offset: the id offset to the currently used SOB from the - * HL_RSVD_SOBS that are being used by this queue. * @supports_sync_stream: True if queue supports sync stream */ struct hl_hw_queue { - struct hl_hw_sob hw_sob[HL_RSVD_SOBS]; - struct hl_cs_job **shadow_queue; - enum hl_queue_type queue_type; - void *kernel_address; - dma_addr_t bus_address; - u32 pi; - atomic_t ci; - u32 hw_queue_id; - u32 cq_id; - u32 msi_vec; - u16 int_queue_len; - u16 next_sob_val; - u16 base_sob_id; - u16 base_mon_id; - u8 valid; - u8 curr_sob_offset; - u8 supports_sync_stream; + struct hl_cs_job **shadow_queue; + struct hl_sync_stream_properties sync_stream_prop; + enum hl_queue_type queue_type; + enum hl_collective_mode collective_mode; + void *kernel_address; + dma_addr_t bus_address; + u32 pi; + atomic_t ci; + u32 hw_queue_id; + u32 cq_id; + u32 msi_vec; + u16 int_queue_len; + u8 valid; + u8 supports_sync_stream; }; /** @@ -650,6 +776,7 @@ enum div_select_defs { * dma_free_coherent(). This is ASIC function because * its implementation is not trivial when the driver * is loaded in simulation mode (not upstreamed). + * @scrub_device_mem: Scrub device memory given an address and size * @get_int_queue_base: get the internal queue base address. * @test_queues: run simple test on all queues for sanity check. * @asic_dma_pool_zalloc: small DMA allocation of coherent memory from DMA pool. @@ -700,6 +827,7 @@ enum div_select_defs { * @wreg: Write a register. Needed for simulator support. * @halt_coresight: stop the ETF and ETR traces. * @ctx_init: context dependent initialization. + * @ctx_fini: context dependent cleanup. * @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz * @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index. * @read_device_fw_version: read the device's firmware versions that are @@ -711,9 +839,13 @@ enum div_select_defs { * @gen_signal_cb: Generate a signal CB. * @gen_wait_cb: Generate a wait CB. * @reset_sob: Reset a SOB. + * @reset_sob_group: Reset SOB group * @set_dma_mask_from_fw: set the DMA mask in the driver according to the * firmware configuration * @get_device_time: Get the device time. + * @collective_wait_init_cs: Generate collective master/slave packets + * and place them in the relevant cs jobs + * @collective_wait_create_jobs: allocate collective wait cs jobs */ struct hl_asic_funcs { int (*early_init)(struct hl_device *hdev); @@ -736,6 +868,7 @@ struct hl_asic_funcs { dma_addr_t *dma_handle, gfp_t flag); void (*asic_dma_free_coherent)(struct hl_device *hdev, size_t size, void *cpu_addr, dma_addr_t dma_handle); + int (*scrub_device_mem)(struct hl_device *hdev, u64 addr, u64 size); void* (*get_int_queue_base)(struct hl_device *hdev, u32 queue_id, dma_addr_t *dma_handle, u16 *queue_len); int (*test_queues)(struct hl_device *hdev); @@ -794,28 +927,34 @@ struct hl_asic_funcs { int (*get_eeprom_data)(struct hl_device *hdev, void *data, size_t max_size); int (*send_cpu_message)(struct hl_device *hdev, u32 *msg, - u16 len, u32 timeout, long *result); - enum hl_device_hw_state (*get_hw_state)(struct hl_device *hdev); + u16 len, u32 timeout, u64 *result); int (*pci_bars_map)(struct hl_device *hdev); int (*init_iatu)(struct hl_device *hdev); u32 (*rreg)(struct hl_device *hdev, u32 reg); void (*wreg)(struct hl_device *hdev, u32 reg, u32 val); void (*halt_coresight)(struct hl_device *hdev); int (*ctx_init)(struct hl_ctx *ctx); + void (*ctx_fini)(struct hl_ctx *ctx); int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk); u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx); - void (*read_device_fw_version)(struct hl_device *hdev, + int (*read_device_fw_version)(struct hl_device *hdev, enum hl_fw_component fwc); int (*load_firmware_to_device)(struct hl_device *hdev); int (*load_boot_fit_to_device)(struct hl_device *hdev); u32 (*get_signal_cb_size)(struct hl_device *hdev); u32 (*get_wait_cb_size)(struct hl_device *hdev); - void (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id); - void (*gen_wait_cb)(struct hl_device *hdev, void *data, u16 sob_id, - u16 sob_val, u16 mon_id, u32 q_idx); + u32 (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id, + u32 size); + u32 (*gen_wait_cb)(struct hl_device *hdev, + struct hl_gen_wait_properties *prop); void (*reset_sob)(struct hl_device *hdev, void *data); + void (*reset_sob_group)(struct hl_device *hdev, u16 sob_group); void (*set_dma_mask_from_fw)(struct hl_device *hdev); u64 (*get_device_time)(struct hl_device *hdev); + void (*collective_wait_init_cs)(struct hl_cs *cs); + int (*collective_wait_create_jobs)(struct hl_device *hdev, + struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id, + u32 collective_engine_id); }; @@ -826,17 +965,48 @@ struct hl_asic_funcs { #define HL_KERNEL_ASID_ID 0 /** + * enum hl_va_range_type - virtual address range type. + * @HL_VA_RANGE_TYPE_HOST: range type of host pages + * @HL_VA_RANGE_TYPE_HOST_HUGE: range type of host huge pages + * @HL_VA_RANGE_TYPE_DRAM: range type of dram pages + */ +enum hl_va_range_type { + HL_VA_RANGE_TYPE_HOST, + HL_VA_RANGE_TYPE_HOST_HUGE, + HL_VA_RANGE_TYPE_DRAM, + HL_VA_RANGE_TYPE_MAX +}; + +/** * struct hl_va_range - virtual addresses range. * @lock: protects the virtual addresses list. * @list: list of virtual addresses blocks available for mappings. * @start_addr: range start address. * @end_addr: range end address. + * @page_size: page size of this va range. */ struct hl_va_range { struct mutex lock; struct list_head list; u64 start_addr; u64 end_addr; + u32 page_size; +}; + +/** + * struct hl_cs_counters_atomic - command submission counters + * @out_of_mem_drop_cnt: dropped due to memory allocation issue + * @parsing_drop_cnt: dropped due to error in packet parsing + * @queue_full_drop_cnt: dropped due to queue full + * @device_in_reset_drop_cnt: dropped due to device in reset + * @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight + */ +struct hl_cs_counters_atomic { + atomic64_t out_of_mem_drop_cnt; + atomic64_t parsing_drop_cnt; + atomic64_t queue_full_drop_cnt; + atomic64_t device_in_reset_drop_cnt; + atomic64_t max_cs_in_flight_drop_cnt; }; /** @@ -849,14 +1019,12 @@ struct hl_va_range { * @refcount: reference counter for the context. Context is released only when * this hits 0l. It is incremented on CS and CS_WAIT. * @cs_pending: array of hl fence objects representing pending CS. - * @host_va_range: holds available virtual addresses for host mappings. - * @host_huge_va_range: holds available virtual addresses for host mappings - * with huge pages. - * @dram_va_range: holds available virtual addresses for DRAM mappings. + * @va_range: holds available virtual addresses for host and dram mappings. * @mem_hash_lock: protects the mem_hash. * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the * MMU hash or walking the PGT requires talking this lock. * @debugfs_list: node in debugfs list of contexts. + * @cs_counters: context command submission counters. * @cb_va_pool: device VA pool for command buffers which are mapped to the * device's MMU. * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed @@ -879,26 +1047,24 @@ struct hl_va_range { struct hl_ctx { DECLARE_HASHTABLE(mem_hash, MEM_HASH_TABLE_BITS); DECLARE_HASHTABLE(mmu_shadow_hash, MMU_HASH_TABLE_BITS); - struct hl_fpriv *hpriv; - struct hl_device *hdev; - struct kref refcount; - struct hl_fence **cs_pending; - struct hl_va_range *host_va_range; - struct hl_va_range *host_huge_va_range; - struct hl_va_range *dram_va_range; - struct mutex mem_hash_lock; - struct mutex mmu_lock; - struct list_head debugfs_list; - struct hl_cs_counters cs_counters; - struct gen_pool *cb_va_pool; - u64 cs_sequence; - u64 *dram_default_hops; - spinlock_t cs_lock; - atomic64_t dram_phys_mem; - atomic_t thread_ctx_switch_token; - u32 thread_ctx_switch_wait_token; - u32 asid; - u32 handle; + struct hl_fpriv *hpriv; + struct hl_device *hdev; + struct kref refcount; + struct hl_fence **cs_pending; + struct hl_va_range *va_range[HL_VA_RANGE_TYPE_MAX]; + struct mutex mem_hash_lock; + struct mutex mmu_lock; + struct list_head debugfs_list; + struct hl_cs_counters_atomic cs_counters; + struct gen_pool *cb_va_pool; + u64 cs_sequence; + u64 *dram_default_hops; + spinlock_t cs_lock; + atomic64_t dram_phys_mem; + atomic_t thread_ctx_switch_token; + u32 thread_ctx_switch_wait_token; + u32 asid; + u32 handle; }; /** @@ -963,6 +1129,7 @@ struct hl_userptr { * @tdr_active: true if TDR was activated for this CS (to prevent * double TDR activation). * @aborted: true if CS was aborted due to some device error. + * @timestamp: true if a timestmap must be captured upon completion */ struct hl_cs { u16 *jobs_in_queue_cnt; @@ -983,6 +1150,7 @@ struct hl_cs { u8 timedout; u8 tdr_active; u8 aborted; + u8 timestamp; }; /** @@ -996,6 +1164,7 @@ struct hl_cs { * @userptr_list: linked-list of userptr mappings that belong to this job and * wait for completion. * @debugfs_list: node in debugfs list of command submission jobs. + * @refcount: reference counter for usage of the CS job. * @queue_type: the type of the H/W queue this job is submitted to. * @id: the id of this job inside a CS. * @hw_queue_id: the id of the H/W queue this job is submitted to. @@ -1019,6 +1188,7 @@ struct hl_cs_job { struct work_struct finish_work; struct list_head userptr_list; struct list_head debugfs_list; + struct kref refcount; enum hl_queue_type queue_type; u32 id; u32 hw_queue_id; @@ -1067,7 +1237,6 @@ struct hl_cs_parser { u8 contains_dma_pkt; }; - /* * MEMORY STRUCTURE */ @@ -1285,6 +1454,10 @@ struct hl_dbg_device_entry { * DEVICES */ +#define HL_STR_MAX 32 + +#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_NEEDS_RESET + 1) + /* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe * x16 cards. In extreme cases, there are hosts that can accommodate 16 cards. */ @@ -1428,11 +1601,13 @@ struct hwmon_chip_info; /** * struct hl_device_reset_work - reset workqueue task wrapper. + * @wq: work queue for device reset procedure. * @reset_work: reset work to be done. * @hdev: habanalabs device structure. */ struct hl_device_reset_work { - struct work_struct reset_work; + struct workqueue_struct *wq; + struct delayed_work reset_work; struct hl_device *hdev; }; @@ -1446,18 +1621,78 @@ struct hl_device_idle_busy_ts { ktime_t busy_to_idle_ts; }; +/** + * struct hr_mmu_hop_addrs - used for holding per-device host-resident mmu hop + * information. + * @virt_addr: the virtual address of the hop. + * @phys-addr: the physical address of the hop (used by the device-mmu). + * @shadow_addr: The shadow of the hop used by the driver for walking the hops. + */ +struct hr_mmu_hop_addrs { + u64 virt_addr; + u64 phys_addr; + u64 shadow_addr; +}; /** - * struct hl_mmu_priv - used for holding per-device mmu internal information. + * struct hl_mmu_hr_pgt_priv - used for holding per-device mmu host-resident + * page-table internal information. * @mmu_pgt_pool: pool of page tables used by MMU for allocating hops. * @mmu_shadow_hop0: shadow array of hop0 tables. */ -struct hl_mmu_priv { +struct hl_mmu_hr_priv { + struct gen_pool *mmu_pgt_pool; + struct hr_mmu_hop_addrs *mmu_shadow_hop0; +}; + +/** + * struct hl_mmu_dr_pgt_priv - used for holding per-device mmu device-resident + * page-table internal information. + * @mmu_pgt_pool: pool of page tables used by MMU for allocating hops. + * @mmu_shadow_hop0: shadow array of hop0 tables. + */ +struct hl_mmu_dr_priv { struct gen_pool *mmu_pgt_pool; void *mmu_shadow_hop0; }; /** + * struct hl_mmu_priv - used for holding per-device mmu internal information. + * @dr: information on the device-resident MMU, when exists. + * @hr: information on the host-resident MMU, when exists. + */ +struct hl_mmu_priv { + struct hl_mmu_dr_priv dr; + struct hl_mmu_hr_priv hr; +}; + +/** + * struct hl_mmu_per_hop_info - A structure describing one TLB HOP and its entry + * that was created in order to translate a virtual address to a + * physical one. + * @hop_addr: The address of the hop. + * @hop_pte_addr: The address of the hop entry. + * @hop_pte_val: The value in the hop entry. + */ +struct hl_mmu_per_hop_info { + u64 hop_addr; + u64 hop_pte_addr; + u64 hop_pte_val; +}; + +/** + * struct hl_mmu_hop_info - A structure describing the TLB hops and their + * hop-entries that were created in order to translate a virtual address to a + * physical one. + * @hop_info: Array holding the per-hop information used for the translation. + * @used_hops: The number of hops used for the translation. + */ +struct hl_mmu_hop_info { + struct hl_mmu_per_hop_info hop_info[MMU_ARCH_5_HOPS]; + u32 used_hops; +}; + +/** * struct hl_mmu_funcs - Device related MMU functions. * @init: initialize the MMU module. * @fini: release the MMU module. @@ -1468,6 +1703,9 @@ struct hl_mmu_priv { * @flush: flush all writes from all cores to reach device MMU. * @swap_out: marks all mapping of the given context as swapped out. * @swap_in: marks all mapping of the given context as swapped in. + * @get_tlb_info: returns the list of hops and hop-entries used that were + * created in order to translate the giver virtual address to a + * physical one. */ struct hl_mmu_funcs { int (*init)(struct hl_device *hdev); @@ -1482,6 +1720,8 @@ struct hl_mmu_funcs { void (*flush)(struct hl_ctx *ctx); void (*swap_out)(struct hl_ctx *ctx); void (*swap_in)(struct hl_ctx *ctx); + int (*get_tlb_info)(struct hl_ctx *ctx, + u64 virt_addr, struct hl_mmu_hop_info *hops); }; /** @@ -1497,6 +1737,7 @@ struct hl_mmu_funcs { * @dev_ctrl: related kernel device structure for the control device * @work_freq: delayed work to lower device frequency if possible. * @work_heartbeat: delayed work for CPU-CP is-alive check. + * @device_reset_work: delayed work which performs hard reset * @asic_name: ASIC specific name. * @asic_type: ASIC specific type. * @completion_queue: array of hl_cq. @@ -1505,8 +1746,8 @@ struct hl_mmu_funcs { * @eq_wq: work queue of event queue for executing work in process context. * @kernel_ctx: Kernel driver context structure. * @kernel_queues: array of hl_hw_queue. - * @hw_queues_mirror_list: CS mirror list for TDR. - * @hw_queues_mirror_lock: protects hw_queues_mirror_list. + * @cs_mirror_list: CS mirror list for TDR. + * @cs_mirror_lock: protects cs_mirror_list. * @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs. * @event_queue: event queue for IRQ from CPU-CP. * @dma_pool: DMA pool for small allocations. @@ -1525,6 +1766,7 @@ struct hl_mmu_funcs { * @hwmon_dev: H/W monitor device. * @pm_mng_profile: current power management profile. * @hl_chip_info: ASIC's sensors information. + * @device_status_description: device status description. * @hl_debugfs: device's debugfs manager. * @cb_pool: list of preallocated CBs. * @cb_pool_lock: protects the CB pool. @@ -1572,13 +1814,12 @@ struct hl_mmu_funcs { * @heartbeat: is heartbeat sanity check towards CPU-CP enabled. * @reset_on_lockup: true if a reset should be done in case of stuck CS, false * otherwise. - * @dram_supports_virtual_memory: is MMU enabled towards DRAM. * @dram_default_page_mapping: is DRAM default page mapping enabled. + * @memory_scrub: true to perform device memory scrub in various locations, + * such as context-switch, context close, page free, etc. * @pmmu_huge_range: is a different virtual addresses range used for PMMU with * huge pages. * @init_done: is the initialization of the device done. - * @mmu_enable: is MMU enabled. - * @mmu_huge_page_opt: is MMU huge pages optimization enabled. * @device_cpu_disabled: is the device CPU disabled (due to timeouts) * @dma_mask: the dma mask that was set for this device * @in_debug: is device under debug. This, together with fpriv_list, enforces @@ -1589,9 +1830,16 @@ struct hl_mmu_funcs { * @stop_on_err: true if engines should stop on error. * @supports_sync_stream: is sync stream supported. * @sync_stream_queue_idx: helper index for sync stream queues initialization. + * @collective_mon_idx: helper index for collective initialization * @supports_coresight: is CoreSight supported. * @supports_soft_reset: is soft reset supported. * @supports_cb_mapping: is mapping a CB to the device's MMU supported. + * @needs_reset: true if reset_on_lockup is false and device should be reset + * due to lockup. + * @process_kill_trial_cnt: number of trials reset thread tried killing + * user processes + * @device_fini_pending: true if device_fini was called and might be + * waiting for the reset thread to finish */ struct hl_device { struct pci_dev *pdev; @@ -1604,15 +1852,17 @@ struct hl_device { struct device *dev_ctrl; struct delayed_work work_freq; struct delayed_work work_heartbeat; - char asic_name[32]; + struct hl_device_reset_work device_reset_work; + char asic_name[HL_STR_MAX]; + char status[HL_DEV_STS_MAX][HL_STR_MAX]; enum hl_asic_type asic_type; struct hl_cq *completion_queue; struct workqueue_struct **cq_wq; struct workqueue_struct *eq_wq; struct hl_ctx *kernel_ctx; struct hl_hw_queue *kernel_queues; - struct list_head hw_queues_mirror_list; - spinlock_t hw_queues_mirror_lock; + struct list_head cs_mirror_list; + spinlock_t cs_mirror_lock; struct hl_cb_mgr kernel_cb_mgr; struct hl_eq event_queue; struct dma_pool *dma_pool; @@ -1649,10 +1899,10 @@ struct hl_device { struct hl_device_idle_busy_ts *idle_busy_ts_arr; - struct hl_cs_counters aggregated_cs_counters; + struct hl_cs_counters_atomic aggregated_cs_counters; struct hl_mmu_priv mmu_priv; - struct hl_mmu_funcs mmu_func; + struct hl_mmu_funcs mmu_func[MMU_NUM_PGT_LOCATIONS]; atomic64_t dram_used_mem; u64 timeout_jiffies; @@ -1677,8 +1927,8 @@ struct hl_device { u8 hard_reset_pending; u8 heartbeat; u8 reset_on_lockup; - u8 dram_supports_virtual_memory; u8 dram_default_page_mapping; + u8 memory_scrub; u8 pmmu_huge_range; u8 init_done; u8 device_cpu_disabled; @@ -1689,17 +1939,22 @@ struct hl_device { u8 stop_on_err; u8 supports_sync_stream; u8 sync_stream_queue_idx; + u8 collective_mon_idx; u8 supports_coresight; u8 supports_soft_reset; u8 supports_cb_mapping; + u8 needs_reset; + u8 process_kill_trial_cnt; + u8 device_fini_pending; /* Parameters for bring-up */ + u64 nic_ports_mask; + u64 fw_loading; u8 mmu_enable; u8 mmu_huge_page_opt; u8 cpu_enable; u8 reset_pcilink; u8 cpu_queues_enable; - u8 fw_loading; u8 pldm; u8 axi_drain; u8 sram_scrambler_enable; @@ -1707,6 +1962,7 @@ struct hl_device { u8 hard_reset_on_fw_events; u8 bmc_enable; u8 rl_enable; + u8 reset_on_preboot_fail; }; @@ -1793,7 +2049,8 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size, int hl_device_open(struct inode *inode, struct file *filp); int hl_device_open_ctrl(struct inode *inode, struct file *filp); -bool hl_device_disabled_or_in_reset(struct hl_device *hdev); +bool hl_device_operational(struct hl_device *hdev, + enum hl_device_status *status); enum hl_device_status hl_device_status(struct hl_device *hdev); int hl_device_set_debug_mode(struct hl_device *hdev, bool enable); int create_hdev(struct hl_device **dev, struct pci_dev *pdev, @@ -1878,8 +2135,10 @@ void hl_cs_rollback_all(struct hl_device *hdev); struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, enum hl_queue_type queue_type, bool is_kernel_allocated_cb); void hl_sob_reset_error(struct kref *ref); +int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask); void hl_fence_put(struct hl_fence *fence); void hl_fence_get(struct hl_fence *fence); +void cs_get(struct hl_cs *cs); void goya_set_asic_funcs(struct hl_device *hdev); void gaudi_set_asic_funcs(struct hl_device *hdev); @@ -1890,6 +2149,10 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx); int hl_vm_init(struct hl_device *hdev); void hl_vm_fini(struct hl_device *hdev); +u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, + enum hl_va_range_type type, u32 size, u32 alignment); +int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, + u64 start_addr, u64 size); int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, struct hl_userptr *userptr); void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr); @@ -1903,20 +2166,26 @@ int hl_mmu_init(struct hl_device *hdev); void hl_mmu_fini(struct hl_device *hdev); int hl_mmu_ctx_init(struct hl_ctx *ctx); void hl_mmu_ctx_fini(struct hl_ctx *ctx); -int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, +int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size, bool flush_pte); -int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, +int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, bool flush_pte); +int hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr, + u64 phys_addr, u32 size); +int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size); void hl_mmu_swap_out(struct hl_ctx *ctx); void hl_mmu_swap_in(struct hl_ctx *ctx); int hl_mmu_if_set_funcs(struct hl_device *hdev); -void hl_mmu_v1_set_funcs(struct hl_device *hdev); +void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu); +int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr); +int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, + struct hl_mmu_hop_info *hops); int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name, - void __iomem *dst); + void __iomem *dst, u32 src_offset, u32 size); int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode); int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, - u16 len, u32 timeout, long *result); + u16 len, u32 timeout, u64 *result); int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type); int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr, size_t irq_arr_size); @@ -1926,18 +2195,22 @@ void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr); int hl_fw_send_heartbeat(struct hl_device *hdev); -int hl_fw_cpucp_info_get(struct hl_device *hdev); +int hl_fw_cpucp_info_get(struct hl_device *hdev, + u32 cpu_security_boot_status_reg); int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size); int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, struct hl_info_pci_counters *counters); int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy); +int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index, + u16 *pll_freq_arr); int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, u32 msg_to_cpu_reg, u32 cpu_msg_status_reg, - u32 boot_err0_reg, bool skip_bmc, - u32 cpu_timeout, u32 boot_fit_timeout); -int hl_fw_read_preboot_ver(struct hl_device *hdev, u32 cpu_boot_status_reg, - u32 boot_err0_reg, u32 timeout); + u32 cpu_security_boot_status_reg, u32 boot_err0_reg, + bool skip_bmc, u32 cpu_timeout, u32 boot_fit_timeout); +int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, + u32 cpu_security_boot_status_reg, u32 boot_err0_reg, + u32 timeout); int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3], bool is_wc[3]); @@ -1946,8 +2219,7 @@ int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region, struct hl_inbound_pci_region *pci_region); int hl_pci_set_outbound_region(struct hl_device *hdev, struct hl_outbound_pci_region *pci_region); -int hl_pci_init(struct hl_device *hdev, u32 cpu_boot_status_reg, - u32 boot_err0_reg, u32 preboot_ver_timeout); +int hl_pci_init(struct hl_device *hdev); void hl_pci_fini(struct hl_device *hdev); long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr); diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index f9067d3ef437..6bbb6bca6860 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -29,6 +29,7 @@ static DEFINE_MUTEX(hl_devs_idr_lock); static int timeout_locked = 5; static int reset_on_lockup = 1; +static int memory_scrub = 1; module_param(timeout_locked, int, 0444); MODULE_PARM_DESC(timeout_locked, @@ -38,6 +39,10 @@ module_param(reset_on_lockup, int, 0444); MODULE_PARM_DESC(reset_on_lockup, "Do device reset on lockup (0 = no, 1 = yes, default yes)"); +module_param(memory_scrub, int, 0444); +MODULE_PARM_DESC(memory_scrub, + "Scrub device memory in various states (0 = no, 1 = yes, default yes)"); + #define PCI_VENDOR_ID_HABANALABS 0x1da3 #define PCI_IDS_GOYA 0x0001 @@ -87,6 +92,7 @@ static enum hl_asic_type get_asic_type(u16 device) */ int hl_device_open(struct inode *inode, struct file *filp) { + enum hl_device_status status; struct hl_device *hdev; struct hl_fpriv *hpriv; int rc; @@ -119,10 +125,10 @@ int hl_device_open(struct inode *inode, struct file *filp) mutex_lock(&hdev->fpriv_list_lock); - if (hl_device_disabled_or_in_reset(hdev)) { + if (!hl_device_operational(hdev, &status)) { dev_err_ratelimited(hdev->dev, - "Can't open %s because it is disabled or in reset\n", - dev_name(hdev->dev)); + "Can't open %s because it is %s\n", + dev_name(hdev->dev), hdev->status[status]); rc = -EPERM; goto out_err; } @@ -199,7 +205,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp) mutex_lock(&hdev->fpriv_list_lock); - if (hl_device_disabled_or_in_reset(hdev)) { + if (!hl_device_operational(hdev, NULL)) { dev_err_ratelimited(hdev->dev_ctrl, "Can't open %s because it is disabled or in reset\n", dev_name(hdev->dev_ctrl)); @@ -228,19 +234,20 @@ out_err: static void set_driver_behavior_per_device(struct hl_device *hdev) { - hdev->mmu_enable = 1; hdev->cpu_enable = 1; - hdev->fw_loading = 1; + hdev->fw_loading = FW_TYPE_ALL_TYPES; hdev->cpu_queues_enable = 1; hdev->heartbeat = 1; + hdev->mmu_enable = 1; hdev->clock_gating_mask = ULONG_MAX; - - hdev->reset_pcilink = 0; - hdev->axi_drain = 0; hdev->sram_scrambler_enable = 1; hdev->dram_scrambler_enable = 1; hdev->bmc_enable = 1; hdev->hard_reset_on_fw_events = 1; + hdev->reset_on_preboot_fail = 1; + + hdev->reset_pcilink = 0; + hdev->axi_drain = 0; } /* @@ -281,8 +288,17 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev, hdev->asic_type = asic_type; } + /* Assign status description string */ + strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], + "disabled", HL_STR_MAX); + strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], + "in reset", HL_STR_MAX); + strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], + "needs reset", HL_STR_MAX); + hdev->major = hl_major; hdev->reset_on_lockup = reset_on_lockup; + hdev->memory_scrub = memory_scrub; hdev->pldm = 0; set_driver_behavior_per_device(hdev); diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 07317ea49129..32e6af1db4e3 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -314,20 +314,45 @@ static int clk_throttle_info(struct hl_fpriv *hpriv, struct hl_info_args *args) static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args) { + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + struct hl_info_cs_counters cs_counters = {0}; struct hl_device *hdev = hpriv->hdev; - struct hl_info_cs_counters cs_counters = { {0} }; + struct hl_cs_counters_atomic *cntr; u32 max_size = args->return_size; - void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + cntr = &hdev->aggregated_cs_counters; if ((!max_size) || (!out)) return -EINVAL; - memcpy(&cs_counters.cs_counters, &hdev->aggregated_cs_counters, - sizeof(struct hl_cs_counters)); - - if (hpriv->ctx) - memcpy(&cs_counters.ctx_cs_counters, &hpriv->ctx->cs_counters, - sizeof(struct hl_cs_counters)); + cs_counters.total_out_of_mem_drop_cnt = + atomic64_read(&cntr->out_of_mem_drop_cnt); + cs_counters.total_parsing_drop_cnt = + atomic64_read(&cntr->parsing_drop_cnt); + cs_counters.total_queue_full_drop_cnt = + atomic64_read(&cntr->queue_full_drop_cnt); + cs_counters.total_device_in_reset_drop_cnt = + atomic64_read(&cntr->device_in_reset_drop_cnt); + cs_counters.total_max_cs_in_flight_drop_cnt = + atomic64_read(&cntr->max_cs_in_flight_drop_cnt); + + if (hpriv->ctx) { + cs_counters.ctx_out_of_mem_drop_cnt = + atomic64_read( + &hpriv->ctx->cs_counters.out_of_mem_drop_cnt); + cs_counters.ctx_parsing_drop_cnt = + atomic64_read( + &hpriv->ctx->cs_counters.parsing_drop_cnt); + cs_counters.ctx_queue_full_drop_cnt = + atomic64_read( + &hpriv->ctx->cs_counters.queue_full_drop_cnt); + cs_counters.ctx_device_in_reset_drop_cnt = + atomic64_read( + &hpriv->ctx->cs_counters.device_in_reset_drop_cnt); + cs_counters.ctx_max_cs_in_flight_drop_cnt = + atomic64_read( + &hpriv->ctx->cs_counters.max_cs_in_flight_drop_cnt); + } return copy_to_user(out, &cs_counters, min((size_t) max_size, sizeof(cs_counters))) ? -EFAULT : 0; @@ -378,11 +403,32 @@ static int total_energy_consumption_info(struct hl_fpriv *hpriv, min((size_t) max_size, sizeof(total_energy))) ? -EFAULT : 0; } +static int pll_frequency_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + struct hl_device *hdev = hpriv->hdev; + struct hl_pll_frequency_info freq_info = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + int rc; + + if ((!max_size) || (!out)) + return -EINVAL; + + rc = hl_fw_cpucp_pll_info_get(hdev, args->pll_index, freq_info.output); + if (rc) + return rc; + + return copy_to_user(out, &freq_info, + min((size_t) max_size, sizeof(freq_info))) ? -EFAULT : 0; +} + static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, struct device *dev) { + enum hl_device_status status; struct hl_info_args *args = data; struct hl_device *hdev = hpriv->hdev; + int rc; /* @@ -403,10 +449,10 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, break; } - if (hl_device_disabled_or_in_reset(hdev)) { + if (!hl_device_operational(hdev, &status)) { dev_warn_ratelimited(dev, "Device is %s. Can't execute INFO IOCTL\n", - atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); + hdev->status[status]); return -EBUSY; } @@ -453,6 +499,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_TOTAL_ENERGY: return total_energy_consumption_info(hpriv, args); + case HL_INFO_PLL_FREQUENCY: + return pll_frequency_info(hpriv, args); + default: dev_err(dev, "Invalid request %d\n", args->op); rc = -ENOTTY; @@ -476,12 +525,14 @@ static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data) { struct hl_debug_args *args = data; struct hl_device *hdev = hpriv->hdev; + enum hl_device_status status; + int rc = 0; - if (hl_device_disabled_or_in_reset(hdev)) { + if (!hl_device_operational(hdev, &status)) { dev_warn_ratelimited(hdev->dev, "Device is %s. Can't execute DEBUG IOCTL\n", - atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); + hdev->status[status]); return -EBUSY; } @@ -544,7 +595,7 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg, int retcode; if (hdev->hard_reset_pending) { - dev_crit_ratelimited(hdev->dev_ctrl, + dev_crit_ratelimited(dev, "Device HARD reset pending! Please close FD\n"); return -ENODEV; } diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c index 250cf9cefc06..7caf868d1585 100644 --- a/drivers/misc/habanalabs/common/hw_queue.c +++ b/drivers/misc/habanalabs/common/hw_queue.c @@ -48,6 +48,11 @@ void hl_int_hw_queue_update_ci(struct hl_cs *cs) return; q = &hdev->kernel_queues[0]; + + /* There are no internal queues if H/W queues are being used */ + if (!hdev->asic_prop.max_queues || q->queue_type == QUEUE_TYPE_HW) + return; + for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) { if (q->queue_type == QUEUE_TYPE_INT) atomic_add(cs->jobs_in_queue_cnt[i], &q->ci); @@ -333,7 +338,14 @@ static void int_queue_schedule_job(struct hl_cs_job *job) bd.ctl = 0; bd.len = cpu_to_le32(job->job_cb_size); - bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb); + + if (job->is_kernel_allocated_cb) + /* bus_address is actually a mmu mapped address + * allocated from an internal pool + */ + bd.ptr = cpu_to_le64(job->user_cb->bus_address); + else + bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb); pi = q->kernel_address + (q->pi & (q->int_queue_len - 1)) * sizeof(bd); @@ -388,6 +400,91 @@ static void hw_queue_schedule_job(struct hl_cs_job *job) ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr); } +static void init_signal_cs(struct hl_device *hdev, + struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl) +{ + struct hl_sync_stream_properties *prop; + struct hl_hw_sob *hw_sob; + u32 q_idx; + + q_idx = job->hw_queue_id; + prop = &hdev->kernel_queues[q_idx].sync_stream_prop; + hw_sob = &prop->hw_sob[prop->curr_sob_offset]; + + cs_cmpl->hw_sob = hw_sob; + cs_cmpl->sob_val = prop->next_sob_val++; + + dev_dbg(hdev->dev, + "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n", + cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx); + + hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb, + cs_cmpl->hw_sob->sob_id, 0); + + kref_get(&hw_sob->kref); + + /* check for wraparound */ + if (prop->next_sob_val == HL_MAX_SOB_VAL) { + /* + * Decrement as we reached the max value. + * The release function won't be called here as we've + * just incremented the refcount. + */ + kref_put(&hw_sob->kref, hl_sob_reset_error); + prop->next_sob_val = 1; + /* only two SOBs are currently in use */ + prop->curr_sob_offset = + (prop->curr_sob_offset + 1) % HL_RSVD_SOBS; + + dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n", + prop->curr_sob_offset, q_idx); + } +} + +static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs, + struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl) +{ + struct hl_cs_compl *signal_cs_cmpl; + struct hl_sync_stream_properties *prop; + struct hl_gen_wait_properties wait_prop; + u32 q_idx; + + q_idx = job->hw_queue_id; + prop = &hdev->kernel_queues[q_idx].sync_stream_prop; + + signal_cs_cmpl = container_of(cs->signal_fence, + struct hl_cs_compl, + base_fence); + + /* copy the SOB id and value of the signal CS */ + cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; + cs_cmpl->sob_val = signal_cs_cmpl->sob_val; + + dev_dbg(hdev->dev, + "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d\n", + cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, + prop->base_mon_id, q_idx); + + wait_prop.data = (void *) job->patched_cb; + wait_prop.sob_base = cs_cmpl->hw_sob->sob_id; + wait_prop.sob_mask = 0x1; + wait_prop.sob_val = cs_cmpl->sob_val; + wait_prop.mon_id = prop->base_mon_id; + wait_prop.q_idx = q_idx; + wait_prop.size = 0; + hdev->asic_funcs->gen_wait_cb(hdev, &wait_prop); + + kref_get(&cs_cmpl->hw_sob->kref); + /* + * Must put the signal fence after the SOB refcnt increment so + * the SOB refcnt won't turn 0 and reset the SOB before the + * wait CS was submitted. + */ + mb(); + hl_fence_put(cs->signal_fence); + cs->signal_fence = NULL; +} + /* * init_signal_wait_cs - initialize a signal/wait CS * @cs: pointer to the signal/wait CS @@ -398,84 +495,18 @@ static void init_signal_wait_cs(struct hl_cs *cs) { struct hl_ctx *ctx = cs->ctx; struct hl_device *hdev = ctx->hdev; - struct hl_hw_queue *hw_queue; + struct hl_cs_job *job; struct hl_cs_compl *cs_cmpl = container_of(cs->fence, struct hl_cs_compl, base_fence); - struct hl_hw_sob *hw_sob; - struct hl_cs_job *job; - u32 q_idx; - /* There is only one job in a signal/wait CS */ job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node); - q_idx = job->hw_queue_id; - hw_queue = &hdev->kernel_queues[q_idx]; - - if (cs->type & CS_TYPE_SIGNAL) { - hw_sob = &hw_queue->hw_sob[hw_queue->curr_sob_offset]; - - cs_cmpl->hw_sob = hw_sob; - cs_cmpl->sob_val = hw_queue->next_sob_val++; - - dev_dbg(hdev->dev, - "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n", - cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx); - - hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb, - cs_cmpl->hw_sob->sob_id); - - kref_get(&hw_sob->kref); - - /* check for wraparound */ - if (hw_queue->next_sob_val == HL_MAX_SOB_VAL) { - /* - * Decrement as we reached the max value. - * The release function won't be called here as we've - * just incremented the refcount. - */ - kref_put(&hw_sob->kref, hl_sob_reset_error); - hw_queue->next_sob_val = 1; - /* only two SOBs are currently in use */ - hw_queue->curr_sob_offset = - (hw_queue->curr_sob_offset + 1) % - HL_RSVD_SOBS_IN_USE; - - dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n", - hw_queue->curr_sob_offset, q_idx); - } - } else if (cs->type & CS_TYPE_WAIT) { - struct hl_cs_compl *signal_cs_cmpl; - - signal_cs_cmpl = container_of(cs->signal_fence, - struct hl_cs_compl, - base_fence); - - /* copy the the SOB id and value of the signal CS */ - cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; - cs_cmpl->sob_val = signal_cs_cmpl->sob_val; - - dev_dbg(hdev->dev, - "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d\n", - cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, - hw_queue->base_mon_id, q_idx); - hdev->asic_funcs->gen_wait_cb(hdev, job->patched_cb, - cs_cmpl->hw_sob->sob_id, - cs_cmpl->sob_val, - hw_queue->base_mon_id, - q_idx); - - kref_get(&cs_cmpl->hw_sob->kref); - /* - * Must put the signal fence after the SOB refcnt increment so - * the SOB refcnt won't turn 0 and reset the SOB before the - * wait CS was submitted. - */ - mb(); - hl_fence_put(cs->signal_fence); - cs->signal_fence = NULL; - } + if (cs->type & CS_TYPE_SIGNAL) + init_signal_cs(hdev, job, cs_cmpl); + else if (cs->type & CS_TYPE_WAIT) + init_wait_cs(hdev, cs, job, cs_cmpl); } /* @@ -484,19 +515,24 @@ static void init_signal_wait_cs(struct hl_cs *cs) */ int hl_hw_queue_schedule_cs(struct hl_cs *cs) { + enum hl_device_status status; + struct hl_cs_counters_atomic *cntr; struct hl_ctx *ctx = cs->ctx; struct hl_device *hdev = ctx->hdev; struct hl_cs_job *job, *tmp; struct hl_hw_queue *q; - u32 max_queues; int rc = 0, i, cq_cnt; + u32 max_queues; + + cntr = &hdev->aggregated_cs_counters; hdev->asic_funcs->hw_queues_lock(hdev); - if (hl_device_disabled_or_in_reset(hdev)) { - ctx->cs_counters.device_in_reset_drop_cnt++; + if (!hl_device_operational(hdev, &status)) { + atomic64_inc(&cntr->device_in_reset_drop_cnt); + atomic64_inc(&ctx->cs_counters.device_in_reset_drop_cnt); dev_err(hdev->dev, - "device is disabled or in reset, CS rejected!\n"); + "device is %s, CS rejected!\n", hdev->status[status]); rc = -EPERM; goto out; } @@ -527,7 +563,9 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs) } if (rc) { - ctx->cs_counters.queue_full_drop_cnt++; + atomic64_inc( + &ctx->cs_counters.queue_full_drop_cnt); + atomic64_inc(&cntr->queue_full_drop_cnt); goto unroll_cq_resv; } @@ -538,21 +576,23 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs) if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT)) init_signal_wait_cs(cs); + else if (cs->type == CS_TYPE_COLLECTIVE_WAIT) + hdev->asic_funcs->collective_wait_init_cs(cs); - spin_lock(&hdev->hw_queues_mirror_lock); - list_add_tail(&cs->mirror_node, &hdev->hw_queues_mirror_list); + spin_lock(&hdev->cs_mirror_lock); + list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list); /* Queue TDR if the CS is the first entry and if timeout is wanted */ if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) && - (list_first_entry(&hdev->hw_queues_mirror_list, + (list_first_entry(&hdev->cs_mirror_list, struct hl_cs, mirror_node) == cs)) { cs->tdr_active = true; schedule_delayed_work(&cs->work_tdr, hdev->timeout_jiffies); - spin_unlock(&hdev->hw_queues_mirror_lock); - } else { - spin_unlock(&hdev->hw_queues_mirror_lock); + } + spin_unlock(&hdev->cs_mirror_lock); + if (!hdev->cs_active_cnt++) { struct hl_device_idle_busy_ts *ts; @@ -714,22 +754,56 @@ static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx) { - struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx]; + struct hl_sync_stream_properties *sync_stream_prop; struct asic_fixed_properties *prop = &hdev->asic_prop; struct hl_hw_sob *hw_sob; - int sob, queue_idx = hdev->sync_stream_queue_idx++; + int sob, reserved_mon_idx, queue_idx; + + sync_stream_prop = &hdev->kernel_queues[q_idx].sync_stream_prop; + + /* We use 'collective_mon_idx' as a running index in order to reserve + * monitors for collective master/slave queues. + * collective master queue gets 2 reserved monitors + * collective slave queue gets 1 reserved monitor + */ + if (hdev->kernel_queues[q_idx].collective_mode == + HL_COLLECTIVE_MASTER) { + reserved_mon_idx = hdev->collective_mon_idx; + + /* reserve the first monitor for collective master queue */ + sync_stream_prop->collective_mstr_mon_id[0] = + prop->collective_first_mon + reserved_mon_idx; + + /* reserve the second monitor for collective master queue */ + sync_stream_prop->collective_mstr_mon_id[1] = + prop->collective_first_mon + reserved_mon_idx + 1; + + hdev->collective_mon_idx += HL_COLLECTIVE_RSVD_MSTR_MONS; + } else if (hdev->kernel_queues[q_idx].collective_mode == + HL_COLLECTIVE_SLAVE) { + reserved_mon_idx = hdev->collective_mon_idx++; + + /* reserve a monitor for collective slave queue */ + sync_stream_prop->collective_slave_mon_id = + prop->collective_first_mon + reserved_mon_idx; + } + + if (!hdev->kernel_queues[q_idx].supports_sync_stream) + return; + + queue_idx = hdev->sync_stream_queue_idx++; - hw_queue->base_sob_id = - prop->sync_stream_first_sob + queue_idx * HL_RSVD_SOBS; - hw_queue->base_mon_id = - prop->sync_stream_first_mon + queue_idx * HL_RSVD_MONS; - hw_queue->next_sob_val = 1; - hw_queue->curr_sob_offset = 0; + sync_stream_prop->base_sob_id = prop->sync_stream_first_sob + + (queue_idx * HL_RSVD_SOBS); + sync_stream_prop->base_mon_id = prop->sync_stream_first_mon + + (queue_idx * HL_RSVD_MONS); + sync_stream_prop->next_sob_val = 1; + sync_stream_prop->curr_sob_offset = 0; for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) { - hw_sob = &hw_queue->hw_sob[sob]; + hw_sob = &sync_stream_prop->hw_sob[sob]; hw_sob->hdev = hdev; - hw_sob->sob_id = hw_queue->base_sob_id + sob; + hw_sob->sob_id = sync_stream_prop->base_sob_id + sob; hw_sob->q_idx = q_idx; kref_init(&hw_sob->kref); } @@ -737,15 +811,16 @@ static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx) static void sync_stream_queue_reset(struct hl_device *hdev, u32 q_idx) { - struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx]; + struct hl_sync_stream_properties *prop = + &hdev->kernel_queues[q_idx].sync_stream_prop; /* * In case we got here due to a stuck CS, the refcnt might be bigger * than 1 and therefore we reset it. */ - kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref); - hw_queue->curr_sob_offset = 0; - hw_queue->next_sob_val = 1; + kref_init(&prop->hw_sob[prop->curr_sob_offset].kref); + prop->curr_sob_offset = 0; + prop->next_sob_val = 1; } /* @@ -788,8 +863,7 @@ static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q, break; } - if (q->supports_sync_stream) - sync_stream_queue_init(hdev, q->hw_queue_id); + sync_stream_queue_init(hdev, q->hw_queue_id); if (rc) return rc; @@ -867,6 +941,7 @@ int hl_hw_queues_create(struct hl_device *hdev) q->queue_type = asic->hw_queues_props[i].type; q->supports_sync_stream = asic->hw_queues_props[i].supports_sync_stream; + q->collective_mode = asic->hw_queues_props[i].collective_mode; rc = queue_init(hdev, q, i); if (rc) { dev_err(hdev->dev, diff --git a/drivers/misc/habanalabs/common/hwmon.c b/drivers/misc/habanalabs/common/hwmon.c index 2ac29cb2fe61..6b421d76b311 100644 --- a/drivers/misc/habanalabs/common/hwmon.c +++ b/drivers/misc/habanalabs/common/hwmon.c @@ -114,7 +114,7 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type, struct hl_device *hdev = dev_get_drvdata(dev); int rc; - if (hl_device_disabled_or_in_reset(hdev)) + if (!hl_device_operational(hdev, NULL)) return -ENODEV; switch (type) { @@ -192,7 +192,7 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type, { struct hl_device *hdev = dev_get_drvdata(dev); - if (hl_device_disabled_or_in_reset(hdev)) + if (!hl_device_operational(hdev, NULL)) return -ENODEV; switch (type) { @@ -312,6 +312,7 @@ int hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr, long *value) { struct cpucp_packet pkt; + u64 result; int rc; memset(&pkt, 0, sizeof(pkt)); @@ -322,7 +323,9 @@ int hl_get_temperature(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - 0, value); + 0, &result); + + *value = (long) result; if (rc) { dev_err(hdev->dev, @@ -363,6 +366,7 @@ int hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long *value) { struct cpucp_packet pkt; + u64 result; int rc; memset(&pkt, 0, sizeof(pkt)); @@ -373,7 +377,9 @@ int hl_get_voltage(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - 0, value); + 0, &result); + + *value = (long) result; if (rc) { dev_err(hdev->dev, @@ -389,6 +395,7 @@ int hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr, long *value) { struct cpucp_packet pkt; + u64 result; int rc; memset(&pkt, 0, sizeof(pkt)); @@ -399,7 +406,9 @@ int hl_get_current(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - 0, value); + 0, &result); + + *value = (long) result; if (rc) { dev_err(hdev->dev, @@ -415,6 +424,7 @@ int hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr, long *value) { struct cpucp_packet pkt; + u64 result; int rc; memset(&pkt, 0, sizeof(pkt)); @@ -425,7 +435,9 @@ int hl_get_fan_speed(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - 0, value); + 0, &result); + + *value = (long) result; if (rc) { dev_err(hdev->dev, @@ -441,6 +453,7 @@ int hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long *value) { struct cpucp_packet pkt; + u64 result; int rc; memset(&pkt, 0, sizeof(pkt)); @@ -451,7 +464,9 @@ int hl_get_pwm_info(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - 0, value); + 0, &result); + + *value = (long) result; if (rc) { dev_err(hdev->dev, @@ -542,7 +557,7 @@ int hl_hwmon_init(struct hl_device *hdev) struct asic_fixed_properties *prop = &hdev->asic_prop; int rc; - if ((hdev->hwmon_initialized) || !(hdev->fw_loading)) + if ((hdev->hwmon_initialized) || !(hdev->cpu_queues_enable)) return 0; if (hdev->hl_chip_info->info) { diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index 84227819e4d1..cbe9da4e0211 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -11,7 +11,6 @@ #include <linux/uaccess.h> #include <linux/slab.h> -#include <linux/genalloc.h> #define HL_MMU_DEBUG 0 @@ -46,7 +45,7 @@ * @ret_handle : result handle * * This function does the following: - * - Allocate the requested size rounded up to 2MB pages + * - Allocate the requested size rounded up to 'dram_page_size' pages * - Return unique handle */ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, @@ -81,6 +80,16 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, num_pgs, total_size); return -ENOMEM; } + + if (hdev->memory_scrub) { + rc = hdev->asic_funcs->scrub_device_mem(hdev, paddr, + total_size); + if (rc) { + dev_err(hdev->dev, + "Failed to scrub contiguous device memory\n"); + goto pages_pack_err; + } + } } phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); @@ -118,6 +127,17 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, goto page_err; } + if (hdev->memory_scrub) { + rc = hdev->asic_funcs->scrub_device_mem(hdev, + phys_pg_pack->pages[i], + page_size); + if (rc) { + dev_err(hdev->dev, + "Failed to scrub device memory\n"); + goto page_err; + } + } + num_curr_pgs++; } } @@ -601,6 +621,87 @@ out: } /* + * hl_reserve_va_block() - reserve a virtual block of a given size. + * @hdev: pointer to the habanalabs device structure. + * @ctx: current context + * @type: virtual addresses range type. + * @size: requested block size. + * @alignment: required alignment in bytes of the virtual block start address, + * 0 means no alignment. + * + * This function does the following: + * - Iterate on the virtual block list to find a suitable virtual block for the + * given size and alignment. + * - Reserve the requested block and update the list. + * - Return the start address of the virtual block. + */ +u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, + enum hl_va_range_type type, u32 size, u32 alignment) +{ + return get_va_block(hdev, ctx->va_range[type], size, 0, + max(alignment, ctx->va_range[type]->page_size)); +} + +/** + * hl_get_va_range_type() - get va_range type for the given address and size. + * @address: The start address of the area we want to validate. + * @size: The size in bytes of the area we want to validate. + * @type: returned va_range type + * + * Return: true if the area is inside a valid range, false otherwise. + */ +static int hl_get_va_range_type(struct hl_ctx *ctx, u64 address, u64 size, + enum hl_va_range_type *type) +{ + int i; + + for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX; i++) { + if (hl_mem_area_inside_range(address, size, + ctx->va_range[i]->start_addr, + ctx->va_range[i]->end_addr)) { + *type = i; + return 0; + } + } + + return -EINVAL; +} + +/* + * hl_unreserve_va_block - wrapper for add_va_block for unreserving a va block + * + * @hdev: pointer to the habanalabs device structure + * @ctx: current context + * @start: start virtual address + * @end: end virtual address + * + * This function does the following: + * - Takes the list lock and calls add_va_block_locked + */ +int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, + u64 start_addr, u64 size) +{ + enum hl_va_range_type type; + int rc; + + rc = hl_get_va_range_type(ctx, start_addr, size, &type); + if (rc) { + dev_err(hdev->dev, + "cannot find va_range for va %#llx size %llu", + start_addr, size); + return rc; + } + + rc = add_va_block(hdev, ctx->va_range[type], start_addr, + start_addr + size - 1); + if (rc) + dev_warn(hdev->dev, + "add va block failed for vaddr: 0x%llx\n", start_addr); + + return rc; +} + +/* * get_sg_info - get number of pages and the DMA address from SG list * * @sg : the SG list @@ -742,7 +843,7 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, for (i = 0 ; i < phys_pg_pack->npages ; i++) { paddr = phys_pg_pack->pages[i]; - rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size, + rc = hl_mmu_map_page(ctx, next_vaddr, paddr, page_size, (i + 1) == phys_pg_pack->npages); if (rc) { dev_err(hdev->dev, @@ -761,7 +862,7 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, err: next_vaddr = vaddr; for (i = 0 ; i < mapped_pg_cnt ; i++) { - if (hl_mmu_unmap(ctx, next_vaddr, page_size, + if (hl_mmu_unmap_page(ctx, next_vaddr, page_size, (i + 1) == mapped_pg_cnt)) dev_warn_ratelimited(hdev->dev, "failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n", @@ -791,7 +892,7 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, next_vaddr = vaddr; for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) { - if (hl_mmu_unmap(ctx, next_vaddr, page_size, + if (hl_mmu_unmap_page(ctx, next_vaddr, page_size, (i + 1) == phys_pg_pack->npages)) dev_warn_ratelimited(hdev->dev, "unmap failed for vaddr: 0x%llx\n", next_vaddr); @@ -888,7 +989,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, /* get required alignment */ if (phys_pg_pack->page_size == page_size) { - va_range = ctx->host_va_range; + va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST]; /* * huge page alignment may be needed in case of regular @@ -903,7 +1004,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, * huge page alignment is needed in case of huge page * mapping */ - va_range = ctx->host_huge_va_range; + va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]; va_block_align = huge_page_size; } } else { @@ -928,7 +1029,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, hint_addr = args->map_device.hint_addr; /* DRAM VA alignment is the same as the DRAM page size */ - va_range = ctx->dram_va_range; + va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM]; va_block_align = hdev->asic_prop.dmmu.page_size; } @@ -1073,12 +1174,12 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free) if (phys_pg_pack->page_size == hdev->asic_prop.pmmu.page_size) - va_range = ctx->host_va_range; + va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST]; else - va_range = ctx->host_huge_va_range; + va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]; } else if (*vm_type == VM_TYPE_PHYS_PACK) { is_userptr = false; - va_range = ctx->dram_va_range; + va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM]; phys_pg_pack = hnode->ptr; } else { dev_warn(hdev->dev, @@ -1217,6 +1318,7 @@ out: int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) { + enum hl_device_status status; union hl_mem_args *args = data; struct hl_device *hdev = hpriv->hdev; struct hl_ctx *ctx = hpriv->ctx; @@ -1224,10 +1326,10 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) u32 handle = 0; int rc; - if (hl_device_disabled_or_in_reset(hdev)) { + if (!hl_device_operational(hdev, &status)) { dev_warn_ratelimited(hdev->dev, "Device is %s. Can't execute MEMORY IOCTL\n", - atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); + hdev->status[status]); return -EBUSY; } @@ -1236,18 +1338,35 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) switch (args->in.op) { case HL_MEM_OP_ALLOC: - if (!hdev->dram_supports_virtual_memory) { - dev_err(hdev->dev, "DRAM alloc is not supported\n"); - rc = -EINVAL; - goto out; - } - if (args->in.alloc.mem_size == 0) { dev_err(hdev->dev, "alloc size must be larger than 0\n"); rc = -EINVAL; goto out; } + + /* If DRAM does not support virtual memory the driver won't + * handle the allocation/freeing of that memory. However, for + * system administration/monitoring purposes, the driver will + * keep track of the amount of DRAM memory that is allocated + * and freed by the user. Because this code totally relies on + * the user's input, the driver can't ensure the validity + * of this accounting. + */ + if (!hdev->asic_prop.dram_supports_virtual_memory) { + atomic64_add(args->in.alloc.mem_size, + &ctx->dram_phys_mem); + atomic64_add(args->in.alloc.mem_size, + &hdev->dram_used_mem); + + dev_dbg(hdev->dev, "DRAM alloc is not supported\n"); + rc = 0; + + memset(args, 0, sizeof(*args)); + args->out.handle = 0; + goto out; + } + rc = alloc_device_memory(ctx, &args->in, &handle); memset(args, 0, sizeof(*args)); @@ -1255,6 +1374,26 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) break; case HL_MEM_OP_FREE: + /* If DRAM does not support virtual memory the driver won't + * handle the allocation/freeing of that memory. However, for + * system administration/monitoring purposes, the driver will + * keep track of the amount of DRAM memory that is allocated + * and freed by the user. Because this code totally relies on + * the user's input, the driver can't ensure the validity + * of this accounting. + */ + if (!hdev->asic_prop.dram_supports_virtual_memory) { + atomic64_sub(args->in.alloc.mem_size, + &ctx->dram_phys_mem); + atomic64_sub(args->in.alloc.mem_size, + &hdev->dram_used_mem); + + dev_dbg(hdev->dev, "DRAM alloc is not supported\n"); + rc = 0; + + goto out; + } + rc = free_device_memory(ctx, args->in.free.handle); break; @@ -1498,7 +1637,7 @@ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, * addresses. */ static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range, - u64 start, u64 end) + u64 start, u64 end, u32 page_size) { int rc; @@ -1528,6 +1667,7 @@ static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range, va_range->start_addr = start; va_range->end_addr = end; + va_range->page_size = page_size; return 0; } @@ -1540,8 +1680,7 @@ static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range, * This function does the following: * - Frees the virtual addresses block list and its lock */ -static void va_range_fini(struct hl_device *hdev, - struct hl_va_range *va_range) +static void va_range_fini(struct hl_device *hdev, struct hl_va_range *va_range) { mutex_lock(&va_range->lock); clear_va_list_locked(hdev, &va_range->list); @@ -1571,101 +1710,97 @@ static void va_range_fini(struct hl_device *hdev, static int vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start, u64 host_range_end, + u32 host_page_size, u64 host_huge_range_start, u64 host_huge_range_end, + u32 host_huge_page_size, u64 dram_range_start, - u64 dram_range_end) + u64 dram_range_end, + u32 dram_page_size) { struct hl_device *hdev = ctx->hdev; - int rc; - - ctx->host_va_range = kzalloc(sizeof(*ctx->host_va_range), GFP_KERNEL); - if (!ctx->host_va_range) - return -ENOMEM; - - ctx->host_huge_va_range = kzalloc(sizeof(*ctx->host_huge_va_range), - GFP_KERNEL); - if (!ctx->host_huge_va_range) { - rc = -ENOMEM; - goto host_huge_va_range_err; - } - - ctx->dram_va_range = kzalloc(sizeof(*ctx->dram_va_range), GFP_KERNEL); - if (!ctx->dram_va_range) { - rc = -ENOMEM; - goto dram_va_range_err; + int i, rc; + + for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX ; i++) { + ctx->va_range[i] = + kzalloc(sizeof(struct hl_va_range), GFP_KERNEL); + if (!ctx->va_range[i]) { + rc = -ENOMEM; + goto free_va_range; + } } rc = hl_mmu_ctx_init(ctx); if (rc) { dev_err(hdev->dev, "failed to init context %d\n", ctx->asid); - goto mmu_ctx_err; + goto free_va_range; } mutex_init(&ctx->mem_hash_lock); hash_init(ctx->mem_hash); - mutex_init(&ctx->host_va_range->lock); + mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock); - rc = va_range_init(hdev, ctx->host_va_range, host_range_start, - host_range_end); + rc = va_range_init(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST], + host_range_start, host_range_end, host_page_size); if (rc) { dev_err(hdev->dev, "failed to init host vm range\n"); - goto host_page_range_err; + goto mmu_ctx_fini; } if (hdev->pmmu_huge_range) { - mutex_init(&ctx->host_huge_va_range->lock); + mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock); - rc = va_range_init(hdev, ctx->host_huge_va_range, - host_huge_range_start, - host_huge_range_end); + rc = va_range_init(hdev, + ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE], + host_huge_range_start, host_huge_range_end, + host_huge_page_size); if (rc) { dev_err(hdev->dev, "failed to init host huge vm range\n"); - goto host_hpage_range_err; + goto clear_host_va_range; } } else { - ctx->host_huge_va_range = ctx->host_va_range; + kfree(ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]); + ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE] = + ctx->va_range[HL_VA_RANGE_TYPE_HOST]; } - mutex_init(&ctx->dram_va_range->lock); + mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_DRAM]->lock); - rc = va_range_init(hdev, ctx->dram_va_range, dram_range_start, - dram_range_end); + rc = va_range_init(hdev, ctx->va_range[HL_VA_RANGE_TYPE_DRAM], + dram_range_start, dram_range_end, dram_page_size); if (rc) { dev_err(hdev->dev, "failed to init dram vm range\n"); - goto dram_vm_err; + goto clear_host_huge_va_range; } hl_debugfs_add_ctx_mem_hash(hdev, ctx); return 0; -dram_vm_err: - mutex_destroy(&ctx->dram_va_range->lock); +clear_host_huge_va_range: + mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_DRAM]->lock); if (hdev->pmmu_huge_range) { - mutex_lock(&ctx->host_huge_va_range->lock); - clear_va_list_locked(hdev, &ctx->host_huge_va_range->list); - mutex_unlock(&ctx->host_huge_va_range->lock); + mutex_lock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock); + clear_va_list_locked(hdev, + &ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->list); + mutex_unlock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock); } -host_hpage_range_err: +clear_host_va_range: if (hdev->pmmu_huge_range) - mutex_destroy(&ctx->host_huge_va_range->lock); - mutex_lock(&ctx->host_va_range->lock); - clear_va_list_locked(hdev, &ctx->host_va_range->list); - mutex_unlock(&ctx->host_va_range->lock); -host_page_range_err: - mutex_destroy(&ctx->host_va_range->lock); + mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock); + mutex_lock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock); + clear_va_list_locked(hdev, &ctx->va_range[HL_VA_RANGE_TYPE_HOST]->list); + mutex_unlock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock); +mmu_ctx_fini: + mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock); mutex_destroy(&ctx->mem_hash_lock); hl_mmu_ctx_fini(ctx); -mmu_ctx_err: - kfree(ctx->dram_va_range); -dram_va_range_err: - kfree(ctx->host_huge_va_range); -host_huge_va_range_err: - kfree(ctx->host_va_range); +free_va_range: + for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX ; i++) + kfree(ctx->va_range[i]); return rc; } @@ -1675,6 +1810,7 @@ int hl_vm_ctx_init(struct hl_ctx *ctx) struct asic_fixed_properties *prop = &ctx->hdev->asic_prop; u64 host_range_start, host_range_end, host_huge_range_start, host_huge_range_end, dram_range_start, dram_range_end; + u32 host_page_size, host_huge_page_size, dram_page_size; atomic64_set(&ctx->dram_phys_mem, 0); @@ -1685,27 +1821,23 @@ int hl_vm_ctx_init(struct hl_ctx *ctx) * In case of DRAM mapping, the returned address is the physical * address of the memory related to the given handle. */ - if (ctx->hdev->mmu_enable) { - dram_range_start = prop->dmmu.start_addr; - dram_range_end = prop->dmmu.end_addr; - host_range_start = prop->pmmu.start_addr; - host_range_end = prop->pmmu.end_addr; - host_huge_range_start = prop->pmmu_huge.start_addr; - host_huge_range_end = prop->pmmu_huge.end_addr; - } else { - dram_range_start = prop->dram_user_base_address; - dram_range_end = prop->dram_end_address; - host_range_start = prop->dram_user_base_address; - host_range_end = prop->dram_end_address; - host_huge_range_start = prop->dram_user_base_address; - host_huge_range_end = prop->dram_end_address; - } + if (!ctx->hdev->mmu_enable) + return 0; + + dram_range_start = prop->dmmu.start_addr; + dram_range_end = prop->dmmu.end_addr; + dram_page_size = prop->dmmu.page_size; + host_range_start = prop->pmmu.start_addr; + host_range_end = prop->pmmu.end_addr; + host_page_size = prop->pmmu.page_size; + host_huge_range_start = prop->pmmu_huge.start_addr; + host_huge_range_end = prop->pmmu_huge.end_addr; + host_huge_page_size = prop->pmmu_huge.page_size; return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end, - host_huge_range_start, - host_huge_range_end, - dram_range_start, - dram_range_end); + host_page_size, host_huge_range_start, + host_huge_range_end, host_huge_page_size, + dram_range_start, dram_range_end, dram_page_size); } /* @@ -1737,6 +1869,9 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx) struct hlist_node *tmp_node; int i; + if (!ctx->hdev->mmu_enable) + return; + hl_debugfs_remove_ctx_mem_hash(hdev, ctx); /* @@ -1771,13 +1906,21 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx) } spin_unlock(&vm->idr_lock); - va_range_fini(hdev, ctx->dram_va_range); + va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_DRAM]); + va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST]); + if (hdev->pmmu_huge_range) - va_range_fini(hdev, ctx->host_huge_va_range); - va_range_fini(hdev, ctx->host_va_range); + va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]); mutex_destroy(&ctx->mem_hash_lock); hl_mmu_ctx_fini(ctx); + + /* In this case we need to clear the global accounting of DRAM usage + * because the user notifies us on allocations. If the user is no more, + * all DRAM is available + */ + if (!ctx->hdev->asic_prop.dram_supports_virtual_memory) + atomic64_set(&ctx->hdev->dram_used_mem, 0); } /* diff --git a/drivers/misc/habanalabs/common/mmu.c b/drivers/misc/habanalabs/common/mmu.c index b5058798aeb9..33ae953d3a36 100644 --- a/drivers/misc/habanalabs/common/mmu.c +++ b/drivers/misc/habanalabs/common/mmu.c @@ -22,18 +22,25 @@ static bool is_dram_va(struct hl_device *hdev, u64 virt_addr) * hl_mmu_init() - initialize the MMU module. * @hdev: habanalabs device structure. * - * This function does the following: - * - Create a pool of pages for pgt_infos. - * - Create a shadow table for pgt - * * Return: 0 for success, non-zero for failure. */ int hl_mmu_init(struct hl_device *hdev) { - if (hdev->mmu_enable) - return hdev->mmu_func.init(hdev); + int rc = -EOPNOTSUPP; - return 0; + if (!hdev->mmu_enable) + return 0; + + if (hdev->mmu_func[MMU_DR_PGT].init != NULL) { + rc = hdev->mmu_func[MMU_DR_PGT].init(hdev); + if (rc) + return rc; + } + + if (hdev->mmu_func[MMU_HR_PGT].init != NULL) + rc = hdev->mmu_func[MMU_HR_PGT].init(hdev); + + return rc; } /** @@ -48,8 +55,14 @@ int hl_mmu_init(struct hl_device *hdev) */ void hl_mmu_fini(struct hl_device *hdev) { - if (hdev->mmu_enable) - hdev->mmu_func.fini(hdev); + if (!hdev->mmu_enable) + return; + + if (hdev->mmu_func[MMU_DR_PGT].fini != NULL) + hdev->mmu_func[MMU_DR_PGT].fini(hdev); + + if (hdev->mmu_func[MMU_HR_PGT].fini != NULL) + hdev->mmu_func[MMU_HR_PGT].fini(hdev); } /** @@ -63,11 +76,23 @@ void hl_mmu_fini(struct hl_device *hdev) int hl_mmu_ctx_init(struct hl_ctx *ctx) { struct hl_device *hdev = ctx->hdev; + int rc = -EOPNOTSUPP; - if (hdev->mmu_enable) - return hdev->mmu_func.ctx_init(ctx); + if (!hdev->mmu_enable) + return 0; - return 0; + mutex_init(&ctx->mmu_lock); + + if (hdev->mmu_func[MMU_DR_PGT].ctx_init != NULL) { + rc = hdev->mmu_func[MMU_DR_PGT].ctx_init(ctx); + if (rc) + return rc; + } + + if (hdev->mmu_func[MMU_HR_PGT].ctx_init != NULL) + rc = hdev->mmu_func[MMU_HR_PGT].ctx_init(ctx); + + return rc; } /* @@ -84,12 +109,20 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx) { struct hl_device *hdev = ctx->hdev; - if (hdev->mmu_enable) - hdev->mmu_func.ctx_fini(ctx); + if (!hdev->mmu_enable) + return; + + if (hdev->mmu_func[MMU_DR_PGT].ctx_fini != NULL) + hdev->mmu_func[MMU_DR_PGT].ctx_fini(ctx); + + if (hdev->mmu_func[MMU_HR_PGT].ctx_fini != NULL) + hdev->mmu_func[MMU_HR_PGT].ctx_fini(ctx); + + mutex_destroy(&ctx->mmu_lock); } /* - * hl_mmu_unmap - unmaps a virtual addr + * hl_mmu_unmap_page - unmaps a virtual addr * * @ctx: pointer to the context structure * @virt_addr: virt addr to map from @@ -109,7 +142,7 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx) * For optimization reasons PCI flush may be requested once after unmapping of * large area. */ -int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, +int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, bool flush_pte) { struct hl_device *hdev = ctx->hdev; @@ -117,7 +150,7 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, struct hl_mmu_properties *mmu_prop; u64 real_virt_addr; u32 real_page_size, npages; - int i, rc = 0; + int i, rc = 0, pgt_residency; bool is_dram_addr; if (!hdev->mmu_enable) @@ -132,6 +165,8 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, else mmu_prop = &prop->pmmu; + pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT; + /* * The H/W handles mapping of specific page sizes. Hence if the page * size is bigger, we break it to sub-pages and unmap them separately. @@ -150,7 +185,8 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, real_virt_addr = virt_addr; for (i = 0 ; i < npages ; i++) { - rc = hdev->mmu_func.unmap(ctx, real_virt_addr, is_dram_addr); + rc = hdev->mmu_func[pgt_residency].unmap(ctx, + real_virt_addr, is_dram_addr); if (rc) break; @@ -158,13 +194,13 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, } if (flush_pte) - hdev->mmu_func.flush(ctx); + hdev->mmu_func[pgt_residency].flush(ctx); return rc; } /* - * hl_mmu_map - maps a virtual addr to physical addr + * hl_mmu_map_page - maps a virtual addr to physical addr * * @ctx: pointer to the context structure * @virt_addr: virt addr to map from @@ -185,17 +221,18 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, * For optimization reasons PCI flush may be requested once after mapping of * large area. */ -int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size, - bool flush_pte) +int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, + u32 page_size, bool flush_pte) { struct hl_device *hdev = ctx->hdev; struct asic_fixed_properties *prop = &hdev->asic_prop; struct hl_mmu_properties *mmu_prop; u64 real_virt_addr, real_phys_addr; u32 real_page_size, npages; - int i, rc, mapped_cnt = 0; + int i, rc, pgt_residency, mapped_cnt = 0; bool is_dram_addr; + if (!hdev->mmu_enable) return 0; @@ -208,6 +245,8 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size, else mmu_prop = &prop->pmmu; + pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT; + /* * The H/W handles mapping of specific page sizes. Hence if the page * size is bigger, we break it to sub-pages and map them separately. @@ -216,7 +255,7 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size, real_page_size = mmu_prop->page_size; } else { dev_err(hdev->dev, - "page size of %u is not %uKB aligned, can't unmap\n", + "page size of %u is not %uKB aligned, can't map\n", page_size, mmu_prop->page_size >> 10); return -EFAULT; @@ -231,8 +270,9 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size, real_phys_addr = phys_addr; for (i = 0 ; i < npages ; i++) { - rc = hdev->mmu_func.map(ctx, real_virt_addr, real_phys_addr, - real_page_size, is_dram_addr); + rc = hdev->mmu_func[pgt_residency].map(ctx, + real_virt_addr, real_phys_addr, + real_page_size, is_dram_addr); if (rc) goto err; @@ -242,21 +282,124 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size, } if (flush_pte) - hdev->mmu_func.flush(ctx); + hdev->mmu_func[pgt_residency].flush(ctx); return 0; err: real_virt_addr = virt_addr; for (i = 0 ; i < mapped_cnt ; i++) { - if (hdev->mmu_func.unmap(ctx, real_virt_addr, is_dram_addr)) + if (hdev->mmu_func[pgt_residency].unmap(ctx, + real_virt_addr, is_dram_addr)) dev_warn_ratelimited(hdev->dev, "failed to unmap va: 0x%llx\n", real_virt_addr); real_virt_addr += real_page_size; } - hdev->mmu_func.flush(ctx); + hdev->mmu_func[pgt_residency].flush(ctx); + + return rc; +} + +/* + * hl_mmu_map_contiguous - implements a wrapper for hl_mmu_map_page + * for mapping contiguous physical memory + * + * @ctx: pointer to the context structure + * @virt_addr: virt addr to map from + * @phys_addr: phys addr to map to + * @size: size to map + * + */ +int hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr, + u64 phys_addr, u32 size) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 curr_va, curr_pa; + u32 page_size; + bool flush_pte; + int rc = 0, off; + + if (hl_mem_area_inside_range(virt_addr, size, + prop->dmmu.start_addr, prop->dmmu.end_addr)) + page_size = prop->dmmu.page_size; + else if (hl_mem_area_inside_range(virt_addr, size, + prop->pmmu.start_addr, prop->pmmu.end_addr)) + page_size = prop->pmmu.page_size; + else if (hl_mem_area_inside_range(virt_addr, size, + prop->pmmu_huge.start_addr, prop->pmmu_huge.end_addr)) + page_size = prop->pmmu_huge.page_size; + else + return -EINVAL; + + for (off = 0 ; off < size ; off += page_size) { + curr_va = virt_addr + off; + curr_pa = phys_addr + off; + flush_pte = (off + page_size) >= size; + rc = hl_mmu_map_page(ctx, curr_va, curr_pa, page_size, + flush_pte); + if (rc) { + dev_err(hdev->dev, + "Map failed for va 0x%llx to pa 0x%llx\n", + curr_va, curr_pa); + goto unmap; + } + } + + return rc; + +unmap: + for (; off >= 0 ; off -= page_size) { + curr_va = virt_addr + off; + flush_pte = (off - (s32) page_size) < 0; + if (hl_mmu_unmap_page(ctx, curr_va, page_size, flush_pte)) + dev_warn_ratelimited(hdev->dev, + "failed to unmap va 0x%llx\n", curr_va); + } + + return rc; +} + +/* + * hl_mmu_unmap_contiguous - implements a wrapper for hl_mmu_unmap_page + * for unmapping contiguous physical memory + * + * @ctx: pointer to the context structure + * @virt_addr: virt addr to unmap + * @size: size to unmap + * + */ +int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 curr_va; + u32 page_size; + bool flush_pte; + int rc = 0, off; + + if (hl_mem_area_inside_range(virt_addr, size, + prop->dmmu.start_addr, prop->dmmu.end_addr)) + page_size = prop->dmmu.page_size; + else if (hl_mem_area_inside_range(virt_addr, size, + prop->pmmu.start_addr, prop->pmmu.end_addr)) + page_size = prop->pmmu.page_size; + else if (hl_mem_area_inside_range(virt_addr, size, + prop->pmmu_huge.start_addr, prop->pmmu_huge.end_addr)) + page_size = prop->pmmu_huge.page_size; + else + return -EINVAL; + + for (off = 0 ; off < size ; off += page_size) { + curr_va = virt_addr + off; + flush_pte = (off + page_size) >= size; + rc = hl_mmu_unmap_page(ctx, curr_va, page_size, flush_pte); + if (rc) + dev_warn_ratelimited(hdev->dev, + "Unmap failed for va 0x%llx\n", curr_va); + } return rc; } @@ -271,8 +414,14 @@ void hl_mmu_swap_out(struct hl_ctx *ctx) { struct hl_device *hdev = ctx->hdev; - if (hdev->mmu_enable) - hdev->mmu_func.swap_out(ctx); + if (!hdev->mmu_enable) + return; + + if (hdev->mmu_func[MMU_DR_PGT].swap_out != NULL) + hdev->mmu_func[MMU_DR_PGT].swap_out(ctx); + + if (hdev->mmu_func[MMU_HR_PGT].swap_out != NULL) + hdev->mmu_func[MMU_HR_PGT].swap_out(ctx); } /* @@ -285,8 +434,64 @@ void hl_mmu_swap_in(struct hl_ctx *ctx) { struct hl_device *hdev = ctx->hdev; - if (hdev->mmu_enable) - hdev->mmu_func.swap_in(ctx); + if (!hdev->mmu_enable) + return; + + if (hdev->mmu_func[MMU_DR_PGT].swap_in != NULL) + hdev->mmu_func[MMU_DR_PGT].swap_in(ctx); + + if (hdev->mmu_func[MMU_HR_PGT].swap_in != NULL) + hdev->mmu_func[MMU_HR_PGT].swap_in(ctx); +} + +int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr) +{ + struct hl_mmu_hop_info hops; + u64 tmp_addr; + int rc; + + rc = hl_mmu_get_tlb_info(ctx, virt_addr, &hops); + if (rc) + return rc; + + /* last hop holds the phys address and flags */ + tmp_addr = hops.hop_info[hops.used_hops - 1].hop_pte_val; + *phys_addr = (tmp_addr & HOP_PHYS_ADDR_MASK) | (virt_addr & FLAGS_MASK); + + return 0; +} + +int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, + struct hl_mmu_hop_info *hops) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; + int rc; + bool is_dram_addr; + + if (!hdev->mmu_enable) + return -EOPNOTSUPP; + + is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, + prop->dmmu.start_addr, + prop->dmmu.end_addr); + + /* host-residency is the same in PMMU and HPMMU, use one of them */ + mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + + mutex_lock(&ctx->mmu_lock); + + if (mmu_prop->host_resident) + rc = hdev->mmu_func[MMU_HR_PGT].get_tlb_info(ctx, + virt_addr, hops); + else + rc = hdev->mmu_func[MMU_DR_PGT].get_tlb_info(ctx, + virt_addr, hops); + + mutex_unlock(&ctx->mmu_lock); + + return rc; } int hl_mmu_if_set_funcs(struct hl_device *hdev) @@ -297,7 +502,7 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev) switch (hdev->asic_type) { case ASIC_GOYA: case ASIC_GAUDI: - hl_mmu_v1_set_funcs(hdev); + hl_mmu_v1_set_funcs(hdev, &hdev->mmu_func[MMU_DR_PGT]); break; default: dev_err(hdev->dev, "Unrecognized ASIC type %d\n", diff --git a/drivers/misc/habanalabs/common/mmu_v1.c b/drivers/misc/habanalabs/common/mmu_v1.c index 8d1eb5265419..2ce6ea89d4fa 100644 --- a/drivers/misc/habanalabs/common/mmu_v1.c +++ b/drivers/misc/habanalabs/common/mmu_v1.c @@ -8,7 +8,6 @@ #include "habanalabs.h" #include "../include/hw_ip/mmu/mmu_general.h" -#include <linux/genalloc.h> #include <linux/slab.h> static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr); @@ -29,7 +28,7 @@ static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info) { struct hl_device *hdev = ctx->hdev; - gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, pgt_info->phys_addr, + gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, pgt_info->phys_addr, hdev->asic_prop.mmu_hop_table_size); hash_del(&pgt_info->node); kfree((u64 *) (uintptr_t) pgt_info->shadow_addr); @@ -54,7 +53,7 @@ static u64 alloc_hop(struct hl_ctx *ctx) if (!pgt_info) return ULLONG_MAX; - phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.mmu_pgt_pool, + phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.dr.mmu_pgt_pool, prop->mmu_hop_table_size); if (!phys_addr) { dev_err(hdev->dev, "failed to allocate page\n"); @@ -75,7 +74,7 @@ static u64 alloc_hop(struct hl_ctx *ctx) return shadow_addr; shadow_err: - gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, phys_addr, + gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, phys_addr, prop->mmu_hop_table_size); pool_add_err: kfree(pgt_info); @@ -91,7 +90,7 @@ static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx) static inline u64 get_hop0_addr(struct hl_ctx *ctx) { - return (u64) (uintptr_t) ctx->hdev->mmu_priv.mmu_shadow_hop0 + + return (u64) (uintptr_t) ctx->hdev->mmu_priv.dr.mmu_shadow_hop0 + (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); } @@ -263,7 +262,7 @@ static int dram_default_mapping_init(struct hl_ctx *ctx) hop2_pte_addr, hop3_pte_addr, pte_val; int rc, i, j, hop3_allocated = 0; - if ((!hdev->dram_supports_virtual_memory) || + if ((!prop->dram_supports_virtual_memory) || (!hdev->dram_default_page_mapping) || (ctx->asid == HL_KERNEL_ASID_ID)) return 0; @@ -363,7 +362,7 @@ static void dram_default_mapping_fini(struct hl_ctx *ctx) hop2_pte_addr, hop3_pte_addr; int i, j; - if ((!hdev->dram_supports_virtual_memory) || + if ((!prop->dram_supports_virtual_memory) || (!hdev->dram_default_page_mapping) || (ctx->asid == HL_KERNEL_ASID_ID)) return; @@ -419,15 +418,15 @@ static int hl_mmu_v1_init(struct hl_device *hdev) struct asic_fixed_properties *prop = &hdev->asic_prop; int rc; - hdev->mmu_priv.mmu_pgt_pool = + hdev->mmu_priv.dr.mmu_pgt_pool = gen_pool_create(__ffs(prop->mmu_hop_table_size), -1); - if (!hdev->mmu_priv.mmu_pgt_pool) { + if (!hdev->mmu_priv.dr.mmu_pgt_pool) { dev_err(hdev->dev, "Failed to create page gen pool\n"); return -ENOMEM; } - rc = gen_pool_add(hdev->mmu_priv.mmu_pgt_pool, prop->mmu_pgt_addr + + rc = gen_pool_add(hdev->mmu_priv.dr.mmu_pgt_pool, prop->mmu_pgt_addr + prop->mmu_hop0_tables_total_size, prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size, -1); @@ -436,10 +435,10 @@ static int hl_mmu_v1_init(struct hl_device *hdev) goto err_pool_add; } - hdev->mmu_priv.mmu_shadow_hop0 = kvmalloc_array(prop->max_asid, + hdev->mmu_priv.dr.mmu_shadow_hop0 = kvmalloc_array(prop->max_asid, prop->mmu_hop_table_size, GFP_KERNEL | __GFP_ZERO); - if (ZERO_OR_NULL_PTR(hdev->mmu_priv.mmu_shadow_hop0)) { + if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) { rc = -ENOMEM; goto err_pool_add; } @@ -449,7 +448,7 @@ static int hl_mmu_v1_init(struct hl_device *hdev) return 0; err_pool_add: - gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool); + gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); return rc; } @@ -468,8 +467,8 @@ static void hl_mmu_v1_fini(struct hl_device *hdev) { /* MMU H/W fini was already done in device hw_fini() */ - kvfree(hdev->mmu_priv.mmu_shadow_hop0); - gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool); + kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0); + gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); } /** @@ -482,9 +481,7 @@ static void hl_mmu_v1_fini(struct hl_device *hdev) */ static int hl_mmu_v1_ctx_init(struct hl_ctx *ctx) { - mutex_init(&ctx->mmu_lock); hash_init(ctx->mmu_shadow_hash); - return dram_default_mapping_init(ctx); } @@ -517,8 +514,6 @@ static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx) pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes); _free_hop(ctx, pgt_info); } - - mutex_destroy(&ctx->mmu_lock); } static int _hl_mmu_v1_unmap(struct hl_ctx *ctx, @@ -842,15 +837,114 @@ static void hl_mmu_v1_swap_in(struct hl_ctx *ctx) } +static inline u64 get_hop_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + int hop_num, u64 hop_addr, u64 virt_addr) +{ + switch (hop_num) { + case 0: + return get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); + case 1: + return get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); + case 2: + return get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); + case 3: + return get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); + case 4: + return get_hop4_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); + default: + break; + } + return U64_MAX; +} + +static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, + struct hl_mmu_hop_info *hops) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; + bool is_dram_addr, is_pmmu_addr, is_pmmu_h_addr, is_huge; + int i, used_hops; + + is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, + prop->dmmu.start_addr, + prop->dmmu.end_addr); + is_pmmu_addr = hl_mem_area_inside_range(virt_addr, prop->pmmu.page_size, + prop->pmmu.start_addr, + prop->pmmu.end_addr); + is_pmmu_h_addr = hl_mem_area_inside_range(virt_addr, + prop->pmmu_huge.page_size, + prop->pmmu_huge.start_addr, + prop->pmmu_huge.end_addr); + if (is_dram_addr) { + mmu_prop = &prop->dmmu; + is_huge = true; + } else if (is_pmmu_addr) { + mmu_prop = &prop->pmmu; + is_huge = false; + } else if (is_pmmu_h_addr) { + mmu_prop = &prop->pmmu_huge; + is_huge = true; + } else { + return -EINVAL; + } + + used_hops = mmu_prop->num_hops; + + /* huge pages use lesser hops */ + if (is_huge) + used_hops--; + + hops->hop_info[0].hop_addr = get_phys_hop0_addr(ctx); + hops->hop_info[0].hop_pte_addr = + get_hop_pte_addr(ctx, mmu_prop, 0, + hops->hop_info[0].hop_addr, virt_addr); + hops->hop_info[0].hop_pte_val = + hdev->asic_funcs->read_pte(hdev, + hops->hop_info[0].hop_pte_addr); + + for (i = 1 ; i < used_hops ; i++) { + hops->hop_info[i].hop_addr = + get_next_hop_addr(ctx, + hops->hop_info[i - 1].hop_pte_val); + if (hops->hop_info[i].hop_addr == ULLONG_MAX) + return -EFAULT; + + hops->hop_info[i].hop_pte_addr = + get_hop_pte_addr(ctx, mmu_prop, i, + hops->hop_info[i].hop_addr, + virt_addr); + hops->hop_info[i].hop_pte_val = + hdev->asic_funcs->read_pte(hdev, + hops->hop_info[i].hop_pte_addr); + + if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK)) + return -EFAULT; + + if (hops->hop_info[i].hop_pte_val & LAST_MASK) + break; + } + + /* if passed over all hops then no last hop was found */ + if (i == mmu_prop->num_hops) + return -EFAULT; + + if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK)) + return -EFAULT; + + hops->used_hops = i + 1; + + return 0; +} + /* * hl_mmu_v1_prepare - prepare mmu for working with mmu v1 * * @hdev: pointer to the device structure */ -void hl_mmu_v1_set_funcs(struct hl_device *hdev) +void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu) { - struct hl_mmu_funcs *mmu = &hdev->mmu_func; - mmu->init = hl_mmu_v1_init; mmu->fini = hl_mmu_v1_fini; mmu->ctx_init = hl_mmu_v1_ctx_init; @@ -860,4 +954,5 @@ void hl_mmu_v1_set_funcs(struct hl_device *hdev) mmu->flush = flush; mmu->swap_out = hl_mmu_v1_swap_out; mmu->swap_in = hl_mmu_v1_swap_in; + mmu->get_tlb_info = hl_mmu_v1_get_tlb_info; } diff --git a/drivers/misc/habanalabs/common/pci.c b/drivers/misc/habanalabs/common/pci.c index 4327e5704ebb..923b2606e29f 100644 --- a/drivers/misc/habanalabs/common/pci.c +++ b/drivers/misc/habanalabs/common/pci.c @@ -338,17 +338,12 @@ static int hl_pci_set_dma_mask(struct hl_device *hdev) /** * hl_pci_init() - PCI initialization code. * @hdev: Pointer to hl_device structure. - * @cpu_boot_status_reg: status register of the device's CPU - * @boot_err0_reg: boot error register of the device's CPU - * @preboot_ver_timeout: how much to wait before bailing out on reading - * the preboot version * * Set DMA masks, initialize the PCI controller and map the PCI BARs. * * Return: 0 on success, non-zero for failure. */ -int hl_pci_init(struct hl_device *hdev, u32 cpu_boot_status_reg, - u32 boot_err0_reg, u32 preboot_ver_timeout) +int hl_pci_init(struct hl_device *hdev) { struct pci_dev *pdev = hdev->pdev; int rc; @@ -380,15 +375,6 @@ int hl_pci_init(struct hl_device *hdev, u32 cpu_boot_status_reg, if (rc) goto unmap_pci_bars; - /* Before continuing in the initialization, we need to read the preboot - * version to determine whether we run with a security-enabled firmware - * The check will be done in each ASIC's specific code - */ - rc = hl_fw_read_preboot_ver(hdev, cpu_boot_status_reg, boot_err0_reg, - preboot_ver_timeout); - if (rc) - goto unmap_pci_bars; - return 0; unmap_pci_bars: diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c index 3ceae87016b1..4366d8f93842 100644 --- a/drivers/misc/habanalabs/common/sysfs.c +++ b/drivers/misc/habanalabs/common/sysfs.c @@ -12,7 +12,7 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) { struct cpucp_packet pkt; - long result; + u64 result; int rc; memset(&pkt, 0, sizeof(pkt)); @@ -32,10 +32,10 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) dev_err(hdev->dev, "Failed to get frequency of PLL %d, error %d\n", pll_index, rc); - result = rc; + return rc; } - return result; + return (long) result; } void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) @@ -62,7 +62,7 @@ void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) u64 hl_get_max_power(struct hl_device *hdev) { struct cpucp_packet pkt; - long result; + u64 result; int rc; memset(&pkt, 0, sizeof(pkt)); @@ -75,7 +75,7 @@ u64 hl_get_max_power(struct hl_device *hdev) if (rc) { dev_err(hdev->dev, "Failed to get max power, error %d\n", rc); - result = rc; + return (u64) rc; } return result; @@ -276,6 +276,8 @@ static ssize_t status_show(struct device *dev, struct device_attribute *attr, str = "In reset"; else if (hdev->disabled) str = "Malfunction"; + else if (hdev->needs_reset) + str = "Needs Reset"; else str = "Operational"; @@ -304,7 +306,7 @@ static ssize_t max_power_show(struct device *dev, struct device_attribute *attr, struct hl_device *hdev = dev_get_drvdata(dev); long val; - if (hl_device_disabled_or_in_reset(hdev)) + if (!hl_device_operational(hdev, NULL)) return -ENODEV; val = hl_get_max_power(hdev); @@ -319,7 +321,7 @@ static ssize_t max_power_store(struct device *dev, unsigned long value; int rc; - if (hl_device_disabled_or_in_reset(hdev)) { + if (!hl_device_operational(hdev, NULL)) { count = -ENODEV; goto out; } @@ -347,7 +349,7 @@ static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj, char *data; int rc; - if (hl_device_disabled_or_in_reset(hdev)) + if (!hl_device_operational(hdev, NULL)) return -ENODEV; if (!max_size) |