aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/v3d
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/v3d')
-rw-r--r--drivers/gpu/drm/v3d/v3d_bo.c1
-rw-r--r--drivers/gpu/drm/v3d/v3d_debugfs.c46
-rw-r--r--drivers/gpu/drm/v3d/v3d_drv.c15
-rw-r--r--drivers/gpu/drm/v3d/v3d_drv.h37
-rw-r--r--drivers/gpu/drm/v3d/v3d_fence.c10
-rw-r--r--drivers/gpu/drm/v3d/v3d_gem.c212
-rw-r--r--drivers/gpu/drm/v3d/v3d_irq.c29
-rw-r--r--drivers/gpu/drm/v3d/v3d_regs.h79
-rw-r--r--drivers/gpu/drm/v3d/v3d_sched.c152
-rw-r--r--drivers/gpu/drm/v3d/v3d_trace.h121
10 files changed, 620 insertions, 82 deletions
diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c
index 54d96518a131..a08766d39eab 100644
--- a/drivers/gpu/drm/v3d/v3d_bo.c
+++ b/drivers/gpu/drm/v3d/v3d_bo.c
@@ -293,6 +293,7 @@ v3d_prime_import_sg_table(struct drm_device *dev,
bo->resv = attach->dmabuf->resv;
bo->sgt = sgt;
+ obj->import_attach = attach;
v3d_bo_get_pages(bo);
v3d_mmu_insert_ptes(bo);
diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c
index 4db62c545748..eb2b2d2f8553 100644
--- a/drivers/gpu/drm/v3d/v3d_debugfs.c
+++ b/drivers/gpu/drm/v3d/v3d_debugfs.c
@@ -71,10 +71,13 @@ static int v3d_v3d_debugfs_regs(struct seq_file *m, void *unused)
V3D_READ(v3d_hub_reg_defs[i].reg));
}
- for (i = 0; i < ARRAY_SIZE(v3d_gca_reg_defs); i++) {
- seq_printf(m, "%s (0x%04x): 0x%08x\n",
- v3d_gca_reg_defs[i].name, v3d_gca_reg_defs[i].reg,
- V3D_GCA_READ(v3d_gca_reg_defs[i].reg));
+ if (v3d->ver < 41) {
+ for (i = 0; i < ARRAY_SIZE(v3d_gca_reg_defs); i++) {
+ seq_printf(m, "%s (0x%04x): 0x%08x\n",
+ v3d_gca_reg_defs[i].name,
+ v3d_gca_reg_defs[i].reg,
+ V3D_GCA_READ(v3d_gca_reg_defs[i].reg));
+ }
}
for (core = 0; core < v3d->cores; core++) {
@@ -176,9 +179,44 @@ static int v3d_debugfs_bo_stats(struct seq_file *m, void *unused)
return 0;
}
+static int v3d_measure_clock(struct seq_file *m, void *unused)
+{
+ struct drm_info_node *node = (struct drm_info_node *)m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ uint32_t cycles;
+ int core = 0;
+ int measure_ms = 1000;
+
+ if (v3d->ver >= 40) {
+ V3D_CORE_WRITE(core, V3D_V4_PCTR_0_SRC_0_3,
+ V3D_SET_FIELD(V3D_PCTR_CYCLE_COUNT,
+ V3D_PCTR_S0));
+ V3D_CORE_WRITE(core, V3D_V4_PCTR_0_CLR, 1);
+ V3D_CORE_WRITE(core, V3D_V4_PCTR_0_EN, 1);
+ } else {
+ V3D_CORE_WRITE(core, V3D_V3_PCTR_0_PCTRS0,
+ V3D_PCTR_CYCLE_COUNT);
+ V3D_CORE_WRITE(core, V3D_V3_PCTR_0_CLR, 1);
+ V3D_CORE_WRITE(core, V3D_V3_PCTR_0_EN,
+ V3D_V3_PCTR_0_EN_ENABLE |
+ 1);
+ }
+ msleep(measure_ms);
+ cycles = V3D_CORE_READ(core, V3D_PCTR_0_PCTR0);
+
+ seq_printf(m, "cycles: %d (%d.%d Mhz)\n",
+ cycles,
+ cycles / (measure_ms * 1000),
+ (cycles / (measure_ms * 100)) % 10);
+
+ return 0;
+}
+
static const struct drm_info_list v3d_debugfs_list[] = {
{"v3d_ident", v3d_v3d_debugfs_ident, 0},
{"v3d_regs", v3d_v3d_debugfs_regs, 0},
+ {"measure_clock", v3d_measure_clock, 0},
{"bo_stats", v3d_debugfs_bo_stats, 0},
};
diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index 2a85fa68ffea..f0afcec72c34 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -112,10 +112,15 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data,
return 0;
}
- /* Any params that aren't just register reads would go here. */
- DRM_DEBUG("Unknown parameter %d\n", args->param);
- return -EINVAL;
+ switch (args->param) {
+ case DRM_V3D_PARAM_SUPPORTS_TFU:
+ args->value = 1;
+ return 0;
+ default:
+ DRM_DEBUG("Unknown parameter %d\n", args->param);
+ return -EINVAL;
+ }
}
static int
@@ -170,7 +175,8 @@ static const struct file_operations v3d_drm_fops = {
/* DRM_AUTH is required on SUBMIT_CL for now, while we don't have GMP
* protection between clients. Note that render nodes would be be
* able to submit CLs that could access BOs from clients authenticated
- * with the master node.
+ * with the master node. The TFU doesn't use the GMP, so it would
+ * need to stay DRM_AUTH until we do buffer size/offset validation.
*/
static const struct drm_ioctl_desc v3d_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CL, v3d_submit_cl_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
@@ -179,6 +185,7 @@ static const struct drm_ioctl_desc v3d_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(V3D_MMAP_BO, v3d_mmap_bo_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(V3D_GET_PARAM, v3d_get_param_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(V3D_GET_BO_OFFSET, v3d_get_bo_offset_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(V3D_SUBMIT_TFU, v3d_submit_tfu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
};
static const struct vm_operations_struct v3d_vm_ops = {
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index e6fed696ad86..dcb772a19191 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -7,19 +7,18 @@
#include <drm/drm_encoder.h>
#include <drm/drm_gem.h>
#include <drm/gpu_scheduler.h>
+#include "uapi/drm/v3d_drm.h"
#define GMP_GRANULARITY (128 * 1024)
-/* Enum for each of the V3D queues. We maintain various queue
- * tracking as an array because at some point we'll want to support
- * the TFU (texture formatting unit) as another queue.
- */
+/* Enum for each of the V3D queues. */
enum v3d_queue {
V3D_BIN,
V3D_RENDER,
+ V3D_TFU,
};
-#define V3D_MAX_QUEUES (V3D_RENDER + 1)
+#define V3D_MAX_QUEUES (V3D_TFU + 1)
struct v3d_queue_state {
struct drm_gpu_scheduler sched;
@@ -68,6 +67,7 @@ struct v3d_dev {
struct v3d_exec_info *bin_job;
struct v3d_exec_info *render_job;
+ struct v3d_tfu_job *tfu_job;
struct v3d_queue_state queue[V3D_MAX_QUEUES];
@@ -198,6 +198,11 @@ struct v3d_exec_info {
*/
struct dma_fence *bin_done_fence;
+ /* Fence for when the scheduler considers the render to be
+ * done, for when the BOs reservations should be complete.
+ */
+ struct dma_fence *render_done_fence;
+
struct kref refcount;
/* This is the array of BOs that were looked up at the start of exec. */
@@ -213,6 +218,25 @@ struct v3d_exec_info {
u32 qma, qms, qts;
};
+struct v3d_tfu_job {
+ struct drm_sched_job base;
+
+ struct drm_v3d_submit_tfu args;
+
+ /* An optional fence userspace can pass in for the job to depend on. */
+ struct dma_fence *in_fence;
+
+ /* v3d fence to be signaled by IRQ handler when the job is complete. */
+ struct dma_fence *done_fence;
+
+ struct v3d_dev *v3d;
+
+ struct kref refcount;
+
+ /* This is the array of BOs that were looked up at the start of exec. */
+ struct v3d_bo *bo[4];
+};
+
/**
* _wait_for - magic (register) wait macro
*
@@ -276,9 +300,12 @@ int v3d_gem_init(struct drm_device *dev);
void v3d_gem_destroy(struct drm_device *dev);
int v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
+int v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
int v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
void v3d_exec_put(struct v3d_exec_info *exec);
+void v3d_tfu_job_put(struct v3d_tfu_job *exec);
void v3d_reset(struct v3d_dev *v3d);
void v3d_invalidate_caches(struct v3d_dev *v3d);
void v3d_flush_caches(struct v3d_dev *v3d);
diff --git a/drivers/gpu/drm/v3d/v3d_fence.c b/drivers/gpu/drm/v3d/v3d_fence.c
index 50bfcf9a8a1a..b0a2a1ae2eb1 100644
--- a/drivers/gpu/drm/v3d/v3d_fence.c
+++ b/drivers/gpu/drm/v3d/v3d_fence.c
@@ -29,10 +29,16 @@ static const char *v3d_fence_get_timeline_name(struct dma_fence *fence)
{
struct v3d_fence *f = to_v3d_fence(fence);
- if (f->queue == V3D_BIN)
+ switch (f->queue) {
+ case V3D_BIN:
return "v3d-bin";
- else
+ case V3D_RENDER:
return "v3d-render";
+ case V3D_TFU:
+ return "v3d-tfu";
+ default:
+ return NULL;
+ }
}
const struct dma_fence_ops v3d_fence_ops = {
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 70c54774400b..05ca6319065e 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -207,32 +207,26 @@ v3d_flush_caches(struct v3d_dev *v3d)
}
static void
-v3d_attach_object_fences(struct v3d_exec_info *exec)
+v3d_attach_object_fences(struct v3d_bo **bos, int bo_count,
+ struct dma_fence *fence)
{
- struct dma_fence *out_fence = &exec->render.base.s_fence->finished;
- struct v3d_bo *bo;
int i;
- for (i = 0; i < exec->bo_count; i++) {
- bo = to_v3d_bo(&exec->bo[i]->base);
-
+ for (i = 0; i < bo_count; i++) {
/* XXX: Use shared fences for read-only objects. */
- reservation_object_add_excl_fence(bo->resv, out_fence);
+ reservation_object_add_excl_fence(bos[i]->resv, fence);
}
}
static void
-v3d_unlock_bo_reservations(struct drm_device *dev,
- struct v3d_exec_info *exec,
+v3d_unlock_bo_reservations(struct v3d_bo **bos,
+ int bo_count,
struct ww_acquire_ctx *acquire_ctx)
{
int i;
- for (i = 0; i < exec->bo_count; i++) {
- struct v3d_bo *bo = to_v3d_bo(&exec->bo[i]->base);
-
- ww_mutex_unlock(&bo->resv->lock);
- }
+ for (i = 0; i < bo_count; i++)
+ ww_mutex_unlock(&bos[i]->resv->lock);
ww_acquire_fini(acquire_ctx);
}
@@ -245,19 +239,19 @@ v3d_unlock_bo_reservations(struct drm_device *dev,
* to v3d, so we don't attach dma-buf fences to them.
*/
static int
-v3d_lock_bo_reservations(struct drm_device *dev,
- struct v3d_exec_info *exec,
+v3d_lock_bo_reservations(struct v3d_bo **bos,
+ int bo_count,
struct ww_acquire_ctx *acquire_ctx)
{
int contended_lock = -1;
int i, ret;
- struct v3d_bo *bo;
ww_acquire_init(acquire_ctx, &reservation_ww_class);
retry:
if (contended_lock != -1) {
- bo = to_v3d_bo(&exec->bo[contended_lock]->base);
+ struct v3d_bo *bo = bos[contended_lock];
+
ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock,
acquire_ctx);
if (ret) {
@@ -266,23 +260,20 @@ retry:
}
}
- for (i = 0; i < exec->bo_count; i++) {
+ for (i = 0; i < bo_count; i++) {
if (i == contended_lock)
continue;
- bo = to_v3d_bo(&exec->bo[i]->base);
-
- ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx);
+ ret = ww_mutex_lock_interruptible(&bos[i]->resv->lock,
+ acquire_ctx);
if (ret) {
int j;
- for (j = 0; j < i; j++) {
- bo = to_v3d_bo(&exec->bo[j]->base);
- ww_mutex_unlock(&bo->resv->lock);
- }
+ for (j = 0; j < i; j++)
+ ww_mutex_unlock(&bos[j]->resv->lock);
if (contended_lock != -1 && contended_lock >= i) {
- bo = to_v3d_bo(&exec->bo[contended_lock]->base);
+ struct v3d_bo *bo = bos[contended_lock];
ww_mutex_unlock(&bo->resv->lock);
}
@@ -302,12 +293,11 @@ retry:
/* Reserve space for our shared (read-only) fence references,
* before we commit the CL to the hardware.
*/
- for (i = 0; i < exec->bo_count; i++) {
- bo = to_v3d_bo(&exec->bo[i]->base);
-
- ret = reservation_object_reserve_shared(bo->resv);
+ for (i = 0; i < bo_count; i++) {
+ ret = reservation_object_reserve_shared(bos[i]->resv, 1);
if (ret) {
- v3d_unlock_bo_reservations(dev, exec, acquire_ctx);
+ v3d_unlock_bo_reservations(bos, bo_count,
+ acquire_ctx);
return ret;
}
}
@@ -409,6 +399,7 @@ v3d_exec_cleanup(struct kref *ref)
dma_fence_put(exec->render.done_fence);
dma_fence_put(exec->bin_done_fence);
+ dma_fence_put(exec->render_done_fence);
for (i = 0; i < exec->bo_count; i++)
drm_gem_object_put_unlocked(&exec->bo[i]->base);
@@ -429,6 +420,33 @@ void v3d_exec_put(struct v3d_exec_info *exec)
kref_put(&exec->refcount, v3d_exec_cleanup);
}
+static void
+v3d_tfu_job_cleanup(struct kref *ref)
+{
+ struct v3d_tfu_job *job = container_of(ref, struct v3d_tfu_job,
+ refcount);
+ struct v3d_dev *v3d = job->v3d;
+ unsigned int i;
+
+ dma_fence_put(job->in_fence);
+ dma_fence_put(job->done_fence);
+
+ for (i = 0; i < ARRAY_SIZE(job->bo); i++) {
+ if (job->bo[i])
+ drm_gem_object_put_unlocked(&job->bo[i]->base);
+ }
+
+ pm_runtime_mark_last_busy(v3d->dev);
+ pm_runtime_put_autosuspend(v3d->dev);
+
+ kfree(job);
+}
+
+void v3d_tfu_job_put(struct v3d_tfu_job *job)
+{
+ kref_put(&job->refcount, v3d_tfu_job_cleanup);
+}
+
int
v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
@@ -503,6 +521,8 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
struct drm_syncobj *sync_out;
int ret = 0;
+ trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end);
+
if (args->pad != 0) {
DRM_INFO("pad must be zero: %d\n", args->pad);
return -EINVAL;
@@ -521,12 +541,12 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
kref_init(&exec->refcount);
ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl,
- 0, &exec->bin.in_fence);
+ 0, 0, &exec->bin.in_fence);
if (ret == -EINVAL)
goto fail;
ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl,
- 0, &exec->render.in_fence);
+ 0, 0, &exec->render.in_fence);
if (ret == -EINVAL)
goto fail;
@@ -546,7 +566,8 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
if (ret)
goto fail;
- ret = v3d_lock_bo_reservations(dev, exec, &acquire_ctx);
+ ret = v3d_lock_bo_reservations(exec->bo, exec->bo_count,
+ &acquire_ctx);
if (ret)
goto fail;
@@ -572,20 +593,23 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
if (ret)
goto fail_unreserve;
+ exec->render_done_fence =
+ dma_fence_get(&exec->render.base.s_fence->finished);
+
kref_get(&exec->refcount); /* put by scheduler job completion */
drm_sched_entity_push_job(&exec->render.base,
&v3d_priv->sched_entity[V3D_RENDER]);
mutex_unlock(&v3d->sched_lock);
- v3d_attach_object_fences(exec);
+ v3d_attach_object_fences(exec->bo, exec->bo_count,
+ exec->render_done_fence);
- v3d_unlock_bo_reservations(dev, exec, &acquire_ctx);
+ v3d_unlock_bo_reservations(exec->bo, exec->bo_count, &acquire_ctx);
/* Update the return sync object for the */
sync_out = drm_syncobj_find(file_priv, args->out_sync);
if (sync_out) {
- drm_syncobj_replace_fence(sync_out, 0,
- &exec->render.base.s_fence->finished);
+ drm_syncobj_replace_fence(sync_out, exec->render_done_fence);
drm_syncobj_put(sync_out);
}
@@ -595,13 +619,121 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
fail_unreserve:
mutex_unlock(&v3d->sched_lock);
- v3d_unlock_bo_reservations(dev, exec, &acquire_ctx);
+ v3d_unlock_bo_reservations(exec->bo, exec->bo_count, &acquire_ctx);
fail:
v3d_exec_put(exec);
return ret;
}
+/**
+ * v3d_submit_tfu_ioctl() - Submits a TFU (texture formatting) job to the V3D.
+ * @dev: DRM device
+ * @data: ioctl argument
+ * @file_priv: DRM file for this fd
+ *
+ * Userspace provides the register setup for the TFU, which we don't
+ * need to validate since the TFU is behind the MMU.
+ */
+int
+v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
+ struct drm_v3d_submit_tfu *args = data;
+ struct v3d_tfu_job *job;
+ struct ww_acquire_ctx acquire_ctx;
+ struct drm_syncobj *sync_out;
+ struct dma_fence *sched_done_fence;
+ int ret = 0;
+ int bo_count;
+
+ trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia);
+
+ job = kcalloc(1, sizeof(*job), GFP_KERNEL);
+ if (!job)
+ return -ENOMEM;
+
+ ret = pm_runtime_get_sync(v3d->dev);
+ if (ret < 0) {
+ kfree(job);
+ return ret;
+ }
+
+ kref_init(&job->refcount);
+
+ ret = drm_syncobj_find_fence(file_priv, args->in_sync,
+ 0, 0, &job->in_fence);
+ if (ret == -EINVAL)
+ goto fail;
+
+ job->args = *args;
+ job->v3d = v3d;
+
+ spin_lock(&file_priv->table_lock);
+ for (bo_count = 0; bo_count < ARRAY_SIZE(job->bo); bo_count++) {
+ struct drm_gem_object *bo;
+
+ if (!args->bo_handles[bo_count])
+ break;
+
+ bo = idr_find(&file_priv->object_idr,
+ args->bo_handles[bo_count]);
+ if (!bo) {
+ DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
+ bo_count, args->bo_handles[bo_count]);
+ ret = -ENOENT;
+ spin_unlock(&file_priv->table_lock);
+ goto fail;
+ }
+ drm_gem_object_get(bo);
+ job->bo[bo_count] = to_v3d_bo(bo);
+ }
+ spin_unlock(&file_priv->table_lock);
+
+ ret = v3d_lock_bo_reservations(job->bo, bo_count, &acquire_ctx);
+ if (ret)
+ goto fail;
+
+ mutex_lock(&v3d->sched_lock);
+ ret = drm_sched_job_init(&job->base,
+ &v3d_priv->sched_entity[V3D_TFU],
+ v3d_priv);
+ if (ret)
+ goto fail_unreserve;
+
+ sched_done_fence = dma_fence_get(&job->base.s_fence->finished);
+
+ kref_get(&job->refcount); /* put by scheduler job completion */
+ drm_sched_entity_push_job(&job->base, &v3d_priv->sched_entity[V3D_TFU]);
+ mutex_unlock(&v3d->sched_lock);
+
+ v3d_attach_object_fences(job->bo, bo_count, sched_done_fence);
+
+ v3d_unlock_bo_reservations(job->bo, bo_count, &acquire_ctx);
+
+ /* Update the return sync object */
+ sync_out = drm_syncobj_find(file_priv, args->out_sync);
+ if (sync_out) {
+ drm_syncobj_replace_fence(sync_out, sched_done_fence);
+ drm_syncobj_put(sync_out);
+ }
+ dma_fence_put(sched_done_fence);
+
+ v3d_tfu_job_put(job);
+
+ return 0;
+
+fail_unreserve:
+ mutex_unlock(&v3d->sched_lock);
+ v3d_unlock_bo_reservations(job->bo, bo_count, &acquire_ctx);
+fail:
+ v3d_tfu_job_put(job);
+
+ return ret;
+}
+
int
v3d_gem_init(struct drm_device *dev)
{
diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
index e07514eb11b5..69338da70ddc 100644
--- a/drivers/gpu/drm/v3d/v3d_irq.c
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
@@ -4,8 +4,8 @@
/**
* DOC: Interrupt management for the V3D engine
*
- * When we take a binning or rendering flush done interrupt, we need
- * to signal the fence for that job so that the scheduler can queue up
+ * When we take a bin, render, or TFU done interrupt, we need to
+ * signal the fence for that job so that the scheduler can queue up
* the next one and unblock any waiters.
*
* When we take the binner out of memory interrupt, we need to
@@ -15,6 +15,7 @@
#include "v3d_drv.h"
#include "v3d_regs.h"
+#include "v3d_trace.h"
#define V3D_CORE_IRQS ((u32)(V3D_INT_OUTOMEM | \
V3D_INT_FLDONE | \
@@ -23,7 +24,8 @@
#define V3D_HUB_IRQS ((u32)(V3D_HUB_INT_MMU_WRV | \
V3D_HUB_INT_MMU_PTI | \
- V3D_HUB_INT_MMU_CAP))
+ V3D_HUB_INT_MMU_CAP | \
+ V3D_HUB_INT_TFUC))
static void
v3d_overflow_mem_work(struct work_struct *work)
@@ -87,12 +89,20 @@ v3d_irq(int irq, void *arg)
}
if (intsts & V3D_INT_FLDONE) {
- dma_fence_signal(v3d->bin_job->bin.done_fence);
+ struct v3d_fence *fence =
+ to_v3d_fence(v3d->bin_job->bin.done_fence);
+
+ trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
+ dma_fence_signal(&fence->base);
status = IRQ_HANDLED;
}
if (intsts & V3D_INT_FRDONE) {
- dma_fence_signal(v3d->render_job->render.done_fence);
+ struct v3d_fence *fence =
+ to_v3d_fence(v3d->render_job->render.done_fence);
+
+ trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
+ dma_fence_signal(&fence->base);
status = IRQ_HANDLED;
}
@@ -117,6 +127,15 @@ v3d_hub_irq(int irq, void *arg)
/* Acknowledge the interrupts we're handling here. */
V3D_WRITE(V3D_HUB_INT_CLR, intsts);
+ if (intsts & V3D_HUB_INT_TFUC) {
+ struct v3d_fence *fence =
+ to_v3d_fence(v3d->tfu_job->done_fence);
+
+ trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
+ dma_fence_signal(&fence->base);
+ status = IRQ_HANDLED;
+ }
+
if (intsts & (V3D_HUB_INT_MMU_WRV |
V3D_HUB_INT_MMU_PTI |
V3D_HUB_INT_MMU_CAP)) {
diff --git a/drivers/gpu/drm/v3d/v3d_regs.h b/drivers/gpu/drm/v3d/v3d_regs.h
index 854046565989..6ccdee9d47bd 100644
--- a/drivers/gpu/drm/v3d/v3d_regs.h
+++ b/drivers/gpu/drm/v3d/v3d_regs.h
@@ -86,6 +86,55 @@
# define V3D_TOP_GR_BRIDGE_SW_INIT_1 0x0000c
# define V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT BIT(0)
+#define V3D_TFU_CS 0x00400
+/* Stops current job, empties input fifo. */
+# define V3D_TFU_CS_TFURST BIT(31)
+# define V3D_TFU_CS_CVTCT_MASK V3D_MASK(23, 16)
+# define V3D_TFU_CS_CVTCT_SHIFT 16
+# define V3D_TFU_CS_NFREE_MASK V3D_MASK(13, 8)
+# define V3D_TFU_CS_NFREE_SHIFT 8
+# define V3D_TFU_CS_BUSY BIT(0)
+
+#define V3D_TFU_SU 0x00404
+/* Interrupt when FINTTHR input slots are free (0 = disabled) */
+# define V3D_TFU_SU_FINTTHR_MASK V3D_MASK(13, 8)
+# define V3D_TFU_SU_FINTTHR_SHIFT 8
+/* Skips resetting the CRC at the start of CRC generation. */
+# define V3D_TFU_SU_CRCCHAIN BIT(4)
+/* skips writes, computes CRC of the image. miplevels must be 0. */
+# define V3D_TFU_SU_CRC BIT(3)
+# define V3D_TFU_SU_THROTTLE_MASK V3D_MASK(1, 0)
+# define V3D_TFU_SU_THROTTLE_SHIFT 0
+
+#define V3D_TFU_ICFG 0x00408
+/* Interrupt when the conversion is complete. */
+# define V3D_TFU_ICFG_IOC BIT(0)
+
+/* Input Image Address */
+#define V3D_TFU_IIA 0x0040c
+/* Input Chroma Address */
+#define V3D_TFU_ICA 0x00410
+/* Input Image Stride */
+#define V3D_TFU_IIS 0x00414
+/* Input Image U-Plane Address */
+#define V3D_TFU_IUA 0x00418
+/* Output Image Address */
+#define V3D_TFU_IOA 0x0041c
+/* Image Output Size */
+#define V3D_TFU_IOS 0x00420
+/* TFU YUV Coefficient 0 */
+#define V3D_TFU_COEF0 0x00424
+/* Use these regs instead of the defaults. */
+# define V3D_TFU_COEF0_USECOEF BIT(31)
+/* TFU YUV Coefficient 1 */
+#define V3D_TFU_COEF1 0x00428
+/* TFU YUV Coefficient 2 */
+#define V3D_TFU_COEF2 0x0042c
+/* TFU YUV Coefficient 3 */
+#define V3D_TFU_COEF3 0x00430
+
+#define V3D_TFU_CRC 0x00434
+
/* Per-MMU registers. */
#define V3D_MMUC_CONTROL 0x01000
@@ -267,6 +316,36 @@
# define V3D_PTB_BXCF_RWORDERDISA BIT(1)
# define V3D_PTB_BXCF_CLIPDISA BIT(0)
+#define V3D_V3_PCTR_0_EN 0x00674
+#define V3D_V3_PCTR_0_EN_ENABLE BIT(31)
+#define V3D_V4_PCTR_0_EN 0x00650
+/* When a bit is set, resets the counter to 0. */
+#define V3D_V3_PCTR_0_CLR 0x00670
+#define V3D_V4_PCTR_0_CLR 0x00654
+#define V3D_PCTR_0_OVERFLOW 0x00658
+
+#define V3D_V3_PCTR_0_PCTRS0 0x00684
+#define V3D_V3_PCTR_0_PCTRS15 0x00660
+#define V3D_V3_PCTR_0_PCTRSX(x) (V3D_V3_PCTR_0_PCTRS0 + \
+ 4 * (x))
+/* Each src reg muxes four counters each. */
+#define V3D_V4_PCTR_0_SRC_0_3 0x00660
+#define V3D_V4_PCTR_0_SRC_28_31 0x0067c
+# define V3D_PCTR_S0_MASK V3D_MASK(6, 0)
+# define V3D_PCTR_S0_SHIFT 0
+# define V3D_PCTR_S1_MASK V3D_MASK(14, 8)
+# define V3D_PCTR_S1_SHIFT 8
+# define V3D_PCTR_S2_MASK V3D_MASK(22, 16)
+# define V3D_PCTR_S2_SHIFT 16
+# define V3D_PCTR_S3_MASK V3D_MASK(30, 24)
+# define V3D_PCTR_S3_SHIFT 24
+# define V3D_PCTR_CYCLE_COUNT 32
+
+/* Output values of the counters. */
+#define V3D_PCTR_0_PCTR0 0x00680
+#define V3D_PCTR_0_PCTR31 0x006fc
+#define V3D_PCTR_0_PCTRX(x) (V3D_PCTR_0_PCTR0 + \
+ 4 * (x))
#define V3D_GMP_STATUS 0x00800
# define V3D_GMP_STATUS_GMPRST BIT(31)
# define V3D_GMP_STATUS_WR_COUNT_MASK V3D_MASK(30, 24)
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 9243dea6e6ad..f7508e907536 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -30,16 +30,34 @@ to_v3d_job(struct drm_sched_job *sched_job)
return container_of(sched_job, struct v3d_job, base);
}
+static struct v3d_tfu_job *
+to_tfu_job(struct drm_sched_job *sched_job)
+{
+ return container_of(sched_job, struct v3d_tfu_job, base);
+}
+
static void
v3d_job_free(struct drm_sched_job *sched_job)
{
struct v3d_job *job = to_v3d_job(sched_job);
+ drm_sched_job_cleanup(sched_job);
+
v3d_exec_put(job->exec);
}
+static void
+v3d_tfu_job_free(struct drm_sched_job *sched_job)
+{
+ struct v3d_tfu_job *job = to_tfu_job(sched_job);
+
+ drm_sched_job_cleanup(sched_job);
+
+ v3d_tfu_job_put(job);
+}
+
/**
- * Returns the fences that the bin job depends on, one by one.
+ * Returns the fences that the bin or render job depends on, one by one.
* v3d_job_run() won't be called until all of them have been signaled.
*/
static struct dma_fence *
@@ -76,6 +94,27 @@ v3d_job_dependency(struct drm_sched_job *sched_job,
return fence;
}
+/**
+ * Returns the fences that the TFU job depends on, one by one.
+ * v3d_tfu_job_run() won't be called until all of them have been
+ * signaled.
+ */
+static struct dma_fence *
+v3d_tfu_job_dependency(struct drm_sched_job *sched_job,
+ struct drm_sched_entity *s_entity)
+{
+ struct v3d_tfu_job *job = to_tfu_job(sched_job);
+ struct dma_fence *fence;
+
+ fence = job->in_fence;
+ if (fence) {
+ job->in_fence = NULL;
+ return fence;
+ }
+
+ return NULL;
+}
+
static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job)
{
struct v3d_job *job = to_v3d_job(sched_job);
@@ -147,31 +186,47 @@ static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job)
return fence;
}
-static void
-v3d_job_timedout(struct drm_sched_job *sched_job)
+static struct dma_fence *
+v3d_tfu_job_run(struct drm_sched_job *sched_job)
{
- struct v3d_job *job = to_v3d_job(sched_job);
- struct v3d_exec_info *exec = job->exec;
- struct v3d_dev *v3d = exec->v3d;
- enum v3d_queue job_q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
- enum v3d_queue q;
- u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(job_q));
- u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(job_q));
+ struct v3d_tfu_job *job = to_tfu_job(sched_job);
+ struct v3d_dev *v3d = job->v3d;
+ struct drm_device *dev = &v3d->drm;
+ struct dma_fence *fence;
- /* If the current address or return address have changed, then
- * the GPU has probably made progress and we should delay the
- * reset. This could fail if the GPU got in an infinite loop
- * in the CL, but that is pretty unlikely outside of an i-g-t
- * testcase.
- */
- if (job->timedout_ctca != ctca || job->timedout_ctra != ctra) {
- job->timedout_ctca = ctca;
- job->timedout_ctra = ctra;
+ fence = v3d_fence_create(v3d, V3D_TFU);
+ if (IS_ERR(fence))
+ return NULL;
- schedule_delayed_work(&job->base.sched->work_tdr,
- job->base.sched->timeout);
- return;
+ v3d->tfu_job = job;
+ if (job->done_fence)
+ dma_fence_put(job->done_fence);
+ job->done_fence = dma_fence_get(fence);
+
+ trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
+
+ V3D_WRITE(V3D_TFU_IIA, job->args.iia);
+ V3D_WRITE(V3D_TFU_IIS, job->args.iis);
+ V3D_WRITE(V3D_TFU_ICA, job->args.ica);
+ V3D_WRITE(V3D_TFU_IUA, job->args.iua);
+ V3D_WRITE(V3D_TFU_IOA, job->args.ioa);
+ V3D_WRITE(V3D_TFU_IOS, job->args.ios);
+ V3D_WRITE(V3D_TFU_COEF0, job->args.coef[0]);
+ if (job->args.coef[0] & V3D_TFU_COEF0_USECOEF) {
+ V3D_WRITE(V3D_TFU_COEF1, job->args.coef[1]);
+ V3D_WRITE(V3D_TFU_COEF2, job->args.coef[2]);
+ V3D_WRITE(V3D_TFU_COEF3, job->args.coef[3]);
}
+ /* ICFG kicks off the job. */
+ V3D_WRITE(V3D_TFU_ICFG, job->args.icfg | V3D_TFU_ICFG_IOC);
+
+ return fence;
+}
+
+static void
+v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job)
+{
+ enum v3d_queue q;
mutex_lock(&v3d->reset_lock);
@@ -196,6 +251,39 @@ v3d_job_timedout(struct drm_sched_job *sched_job)
mutex_unlock(&v3d->reset_lock);
}
+static void
+v3d_job_timedout(struct drm_sched_job *sched_job)
+{
+ struct v3d_job *job = to_v3d_job(sched_job);
+ struct v3d_exec_info *exec = job->exec;
+ struct v3d_dev *v3d = exec->v3d;
+ enum v3d_queue job_q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
+ u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(job_q));
+ u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(job_q));
+
+ /* If the current address or return address have changed, then
+ * the GPU has probably made progress and we should delay the
+ * reset. This could fail if the GPU got in an infinite loop
+ * in the CL, but that is pretty unlikely outside of an i-g-t
+ * testcase.
+ */
+ if (job->timedout_ctca != ctca || job->timedout_ctra != ctra) {
+ job->timedout_ctca = ctca;
+ job->timedout_ctra = ctra;
+ return;
+ }
+
+ v3d_gpu_reset_for_timeout(v3d, sched_job);
+}
+
+static void
+v3d_tfu_job_timedout(struct drm_sched_job *sched_job)
+{
+ struct v3d_tfu_job *job = to_tfu_job(sched_job);
+
+ v3d_gpu_reset_for_timeout(job->v3d, sched_job);
+}
+
static const struct drm_sched_backend_ops v3d_sched_ops = {
.dependency = v3d_job_dependency,
.run_job = v3d_job_run,
@@ -203,6 +291,13 @@ static const struct drm_sched_backend_ops v3d_sched_ops = {
.free_job = v3d_job_free
};
+static const struct drm_sched_backend_ops v3d_tfu_sched_ops = {
+ .dependency = v3d_tfu_job_dependency,
+ .run_job = v3d_tfu_job_run,
+ .timedout_job = v3d_tfu_job_timedout,
+ .free_job = v3d_tfu_job_free
+};
+
int
v3d_sched_init(struct v3d_dev *v3d)
{
@@ -233,6 +328,19 @@ v3d_sched_init(struct v3d_dev *v3d)
return ret;
}
+ ret = drm_sched_init(&v3d->queue[V3D_TFU].sched,
+ &v3d_tfu_sched_ops,
+ hw_jobs_limit, job_hang_limit,
+ msecs_to_jiffies(hang_limit_ms),
+ "v3d_tfu");
+ if (ret) {
+ dev_err(v3d->dev, "Failed to create TFU scheduler: %d.",
+ ret);
+ drm_sched_fini(&v3d->queue[V3D_RENDER].sched);
+ drm_sched_fini(&v3d->queue[V3D_BIN].sched);
+ return ret;
+ }
+
return 0;
}
diff --git a/drivers/gpu/drm/v3d/v3d_trace.h b/drivers/gpu/drm/v3d/v3d_trace.h
index 85dd351e1e09..edd984afa33f 100644
--- a/drivers/gpu/drm/v3d/v3d_trace.h
+++ b/drivers/gpu/drm/v3d/v3d_trace.h
@@ -12,6 +12,28 @@
#define TRACE_SYSTEM v3d
#define TRACE_INCLUDE_FILE v3d_trace
+TRACE_EVENT(v3d_submit_cl_ioctl,
+ TP_PROTO(struct drm_device *dev, u32 ct1qba, u32 ct1qea),
+ TP_ARGS(dev, ct1qba, ct1qea),
+
+ TP_STRUCT__entry(
+ __field(u32, dev)
+ __field(u32, ct1qba)
+ __field(u32, ct1qea)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev->primary->index;
+ __entry->ct1qba = ct1qba;
+ __entry->ct1qea = ct1qea;
+ ),
+
+ TP_printk("dev=%u, RCL 0x%08x..0x%08x",
+ __entry->dev,
+ __entry->ct1qba,
+ __entry->ct1qea)
+);
+
TRACE_EVENT(v3d_submit_cl,
TP_PROTO(struct drm_device *dev, bool is_render,
uint64_t seqno,
@@ -42,6 +64,105 @@ TRACE_EVENT(v3d_submit_cl,
__entry->ctnqea)
);
+TRACE_EVENT(v3d_bcl_irq,
+ TP_PROTO(struct drm_device *dev,
+ uint64_t seqno),
+ TP_ARGS(dev, seqno),
+
+ TP_STRUCT__entry(
+ __field(u32, dev)
+ __field(u64, seqno)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev->primary->index;
+ __entry->seqno = seqno;
+ ),
+
+ TP_printk("dev=%u, seqno=%llu",
+ __entry->dev,
+ __entry->seqno)
+);
+
+TRACE_EVENT(v3d_rcl_irq,
+ TP_PROTO(struct drm_device *dev,
+ uint64_t seqno),
+ TP_ARGS(dev, seqno),
+
+ TP_STRUCT__entry(
+ __field(u32, dev)
+ __field(u64, seqno)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev->primary->index;
+ __entry->seqno = seqno;
+ ),
+
+ TP_printk("dev=%u, seqno=%llu",
+ __entry->dev,
+ __entry->seqno)
+);
+
+TRACE_EVENT(v3d_tfu_irq,
+ TP_PROTO(struct drm_device *dev,
+ uint64_t seqno),
+ TP_ARGS(dev, seqno),
+
+ TP_STRUCT__entry(
+ __field(u32, dev)
+ __field(u64, seqno)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev->primary->index;
+ __entry->seqno = seqno;
+ ),
+
+ TP_printk("dev=%u, seqno=%llu",
+ __entry->dev,
+ __entry->seqno)
+);
+
+TRACE_EVENT(v3d_submit_tfu_ioctl,
+ TP_PROTO(struct drm_device *dev, u32 iia),
+ TP_ARGS(dev, iia),
+
+ TP_STRUCT__entry(
+ __field(u32, dev)
+ __field(u32, iia)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev->primary->index;
+ __entry->iia = iia;
+ ),
+
+ TP_printk("dev=%u, IIA 0x%08x",
+ __entry->dev,
+ __entry->iia)
+);
+
+TRACE_EVENT(v3d_submit_tfu,
+ TP_PROTO(struct drm_device *dev,
+ uint64_t seqno),
+ TP_ARGS(dev, seqno),
+
+ TP_STRUCT__entry(
+ __field(u32, dev)
+ __field(u64, seqno)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev->primary->index;
+ __entry->seqno = seqno;
+ ),
+
+ TP_printk("dev=%u, seqno=%llu",
+ __entry->dev,
+ __entry->seqno)
+);
+
TRACE_EVENT(v3d_reset_begin,
TP_PROTO(struct drm_device *dev),
TP_ARGS(dev),