aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c')
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c333
1 files changed, 43 insertions, 290 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 80df9f592407..ee9612a3ee5e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -3,10 +3,9 @@
* Copyright © 2021 Intel Corporation
*/
-#include <linux/dma-fence-array.h>
-
#include <drm/ttm/ttm_bo_driver.h>
+#include "i915_deps.h"
#include "i915_drv.h"
#include "intel_memory_region.h"
#include "intel_region_ttm.h"
@@ -43,234 +42,6 @@ void i915_ttm_migrate_set_failure_modes(bool gpu_migration,
}
#endif
-/**
- * DOC: Set of utilities to dynamically collect dependencies and
- * eventually coalesce them into a single fence which is fed into
- * the GT migration code, since it only accepts a single dependency
- * fence.
- * The single fence returned from these utilities, in the case of
- * dependencies from multiple fence contexts, a struct dma_fence_array,
- * since the i915 request code can break that up and await the individual
- * fences.
- *
- * Once we can do async unbinding, this is also needed to coalesce
- * the migration fence with the unbind fences.
- *
- * While collecting the individual dependencies, we store the refcounted
- * struct dma_fence pointers in a realloc-managed pointer array, since
- * that can be easily fed into a dma_fence_array. Other options are
- * available, like for example an xarray for similarity with drm/sched.
- * Can be changed easily if needed.
- *
- * A struct i915_deps need to be initialized using i915_deps_init().
- * If i915_deps_add_dependency() or i915_deps_add_resv() return an
- * error code they will internally call i915_deps_fini(), which frees
- * all internal references and allocations. After a call to
- * i915_deps_to_fence(), or i915_deps_sync(), the struct should similarly
- * be viewed as uninitialized.
- *
- * We might want to break this out into a separate file as a utility.
- */
-
-#define I915_DEPS_MIN_ALLOC_CHUNK 8U
-
-/**
- * struct i915_deps - Collect dependencies into a single dma-fence
- * @single: Storage for pointer if the collection is a single fence.
- * @fence: Allocated array of fence pointers if more than a single fence;
- * otherwise points to the address of @single.
- * @num_deps: Current number of dependency fences.
- * @fences_size: Size of the @fences array in number of pointers.
- * @gfp: Allocation mode.
- */
-struct i915_deps {
- struct dma_fence *single;
- struct dma_fence **fences;
- unsigned int num_deps;
- unsigned int fences_size;
- gfp_t gfp;
-};
-
-static void i915_deps_reset_fences(struct i915_deps *deps)
-{
- if (deps->fences != &deps->single)
- kfree(deps->fences);
- deps->num_deps = 0;
- deps->fences_size = 1;
- deps->fences = &deps->single;
-}
-
-static void i915_deps_init(struct i915_deps *deps, gfp_t gfp)
-{
- deps->fences = NULL;
- deps->gfp = gfp;
- i915_deps_reset_fences(deps);
-}
-
-static void i915_deps_fini(struct i915_deps *deps)
-{
- unsigned int i;
-
- for (i = 0; i < deps->num_deps; ++i)
- dma_fence_put(deps->fences[i]);
-
- if (deps->fences != &deps->single)
- kfree(deps->fences);
-}
-
-static int i915_deps_grow(struct i915_deps *deps, struct dma_fence *fence,
- const struct ttm_operation_ctx *ctx)
-{
- int ret;
-
- if (deps->num_deps >= deps->fences_size) {
- unsigned int new_size = 2 * deps->fences_size;
- struct dma_fence **new_fences;
-
- new_size = max(new_size, I915_DEPS_MIN_ALLOC_CHUNK);
- new_fences = kmalloc_array(new_size, sizeof(*new_fences), deps->gfp);
- if (!new_fences)
- goto sync;
-
- memcpy(new_fences, deps->fences,
- deps->fences_size * sizeof(*new_fences));
- swap(new_fences, deps->fences);
- if (new_fences != &deps->single)
- kfree(new_fences);
- deps->fences_size = new_size;
- }
- deps->fences[deps->num_deps++] = dma_fence_get(fence);
- return 0;
-
-sync:
- if (ctx->no_wait_gpu && !dma_fence_is_signaled(fence)) {
- ret = -EBUSY;
- goto unref;
- }
-
- ret = dma_fence_wait(fence, ctx->interruptible);
- if (ret)
- goto unref;
-
- ret = fence->error;
- if (ret)
- goto unref;
-
- return 0;
-
-unref:
- i915_deps_fini(deps);
- return ret;
-}
-
-static int i915_deps_sync(struct i915_deps *deps,
- const struct ttm_operation_ctx *ctx)
-{
- struct dma_fence **fences = deps->fences;
- unsigned int i;
- int ret = 0;
-
- for (i = 0; i < deps->num_deps; ++i, ++fences) {
- if (ctx->no_wait_gpu && !dma_fence_is_signaled(*fences)) {
- ret = -EBUSY;
- break;
- }
-
- ret = dma_fence_wait(*fences, ctx->interruptible);
- if (!ret)
- ret = (*fences)->error;
- if (ret)
- break;
- }
-
- i915_deps_fini(deps);
- return ret;
-}
-
-static int i915_deps_add_dependency(struct i915_deps *deps,
- struct dma_fence *fence,
- const struct ttm_operation_ctx *ctx)
-{
- unsigned int i;
- int ret;
-
- if (!fence)
- return 0;
-
- if (dma_fence_is_signaled(fence)) {
- ret = fence->error;
- if (ret)
- i915_deps_fini(deps);
- return ret;
- }
-
- for (i = 0; i < deps->num_deps; ++i) {
- struct dma_fence *entry = deps->fences[i];
-
- if (!entry->context || entry->context != fence->context)
- continue;
-
- if (dma_fence_is_later(fence, entry)) {
- dma_fence_put(entry);
- deps->fences[i] = dma_fence_get(fence);
- }
-
- return 0;
- }
-
- return i915_deps_grow(deps, fence, ctx);
-}
-
-static struct dma_fence *i915_deps_to_fence(struct i915_deps *deps,
- const struct ttm_operation_ctx *ctx)
-{
- struct dma_fence_array *array;
-
- if (deps->num_deps == 0)
- return NULL;
-
- if (deps->num_deps == 1) {
- deps->num_deps = 0;
- return deps->fences[0];
- }
-
- /*
- * TODO: Alter the allocation mode here to not try too hard to
- * make things async.
- */
- array = dma_fence_array_create(deps->num_deps, deps->fences, 0, 0,
- false);
- if (!array)
- return ERR_PTR(i915_deps_sync(deps, ctx));
-
- deps->fences = NULL;
- i915_deps_reset_fences(deps);
-
- return &array->base;
-}
-
-static int i915_deps_add_resv(struct i915_deps *deps, struct dma_resv *resv,
- bool all, const bool no_excl,
- const struct ttm_operation_ctx *ctx)
-{
- struct dma_resv_iter iter;
- struct dma_fence *fence;
-
- dma_resv_assert_held(resv);
- dma_resv_for_each_fence(&iter, resv, all, fence) {
- int ret;
-
- if (no_excl && dma_resv_iter_is_exclusive(&iter))
- continue;
-
- ret = i915_deps_add_dependency(deps, fence, ctx);
- if (ret)
- return ret;
- }
-
- return 0;
-}
-
static enum i915_cache_level
i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res,
struct ttm_tt *ttm)
@@ -387,7 +158,7 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
struct ttm_resource *dst_mem,
struct ttm_tt *dst_ttm,
struct sg_table *dst_st,
- struct dma_fence *dep)
+ const struct i915_deps *deps)
{
struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915),
bdev);
@@ -397,7 +168,7 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
enum i915_cache_level src_level, dst_level;
int ret;
- if (!i915->gt.migrate.context || intel_gt_is_wedged(&i915->gt))
+ if (!to_gt(i915)->migrate.context || intel_gt_is_wedged(to_gt(i915)))
return ERR_PTR(-EINVAL);
/* With fail_gpu_migration, we always perform a GPU clear. */
@@ -410,8 +181,8 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
!I915_SELFTEST_ONLY(fail_gpu_migration))
return ERR_PTR(-EINVAL);
- intel_engine_pm_get(i915->gt.migrate.context->engine);
- ret = intel_context_migrate_clear(i915->gt.migrate.context, dep,
+ intel_engine_pm_get(to_gt(i915)->migrate.context->engine);
+ ret = intel_context_migrate_clear(to_gt(i915)->migrate.context, deps,
dst_st->sgl, dst_level,
i915_ttm_gtt_binds_lmem(dst_mem),
0, &rq);
@@ -423,9 +194,9 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
return ERR_CAST(src_rsgt);
src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm);
- intel_engine_pm_get(i915->gt.migrate.context->engine);
- ret = intel_context_migrate_copy(i915->gt.migrate.context,
- dep, src_rsgt->table.sgl,
+ intel_engine_pm_get(to_gt(i915)->migrate.context->engine);
+ ret = intel_context_migrate_copy(to_gt(i915)->migrate.context,
+ deps, src_rsgt->table.sgl,
src_level,
i915_ttm_gtt_binds_lmem(bo->resource),
dst_st->sgl, dst_level,
@@ -435,7 +206,7 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
i915_refct_sgt_put(src_rsgt);
}
- intel_engine_pm_put(i915->gt.migrate.context->engine);
+ intel_engine_pm_put(to_gt(i915)->migrate.context->engine);
if (ret && rq) {
i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
@@ -610,10 +381,11 @@ i915_ttm_memcpy_work_arm(struct i915_ttm_memcpy_work *work,
}
static struct dma_fence *
-__i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
+__i915_ttm_move(struct ttm_buffer_object *bo,
+ const struct ttm_operation_ctx *ctx, bool clear,
struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm,
struct i915_refct_sgt *dst_rsgt, bool allow_accel,
- struct dma_fence *move_dep)
+ const struct i915_deps *move_deps)
{
struct i915_ttm_memcpy_work *copy_work = NULL;
struct i915_ttm_memcpy_arg _arg, *arg = &_arg;
@@ -621,7 +393,7 @@ __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
if (allow_accel) {
fence = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm,
- &dst_rsgt->table, move_dep);
+ &dst_rsgt->table, move_deps);
/*
* We only need to intercept the error when moving to lmem.
@@ -655,8 +427,8 @@ __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
if (!IS_ERR(fence))
goto out;
- } else if (move_dep) {
- int err = dma_fence_wait(move_dep, true);
+ } else if (move_deps) {
+ int err = i915_deps_sync(move_deps, ctx);
if (err)
return ERR_PTR(err);
@@ -680,29 +452,17 @@ out:
return fence;
}
-static struct dma_fence *prev_fence(struct ttm_buffer_object *bo,
- struct ttm_operation_ctx *ctx)
+static int
+prev_deps(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+ struct i915_deps *deps)
{
- struct i915_deps deps;
int ret;
- /*
- * Instead of trying hard with GFP_KERNEL to allocate memory,
- * the dependency collection will just sync if it doesn't
- * succeed.
- */
- i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
- ret = i915_deps_add_dependency(&deps, bo->moving, ctx);
+ ret = i915_deps_add_dependency(deps, bo->moving, ctx);
if (!ret)
- /*
- * TODO: Only await excl fence here, and shared fences before
- * signaling the migration fence.
- */
- ret = i915_deps_add_resv(&deps, bo->base.resv, true, false, ctx);
- if (ret)
- return ERR_PTR(ret);
+ ret = i915_deps_add_resv(deps, bo->base.resv, ctx);
- return i915_deps_to_fence(&deps, ctx);
+ return ret;
}
/**
@@ -756,16 +516,18 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm));
if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) {
- struct dma_fence *dep = prev_fence(bo, ctx);
+ struct i915_deps deps;
- if (IS_ERR(dep)) {
+ i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+ ret = prev_deps(bo, ctx, &deps);
+ if (ret) {
i915_refct_sgt_put(dst_rsgt);
- return PTR_ERR(dep);
+ return ret;
}
- migration_fence = __i915_ttm_move(bo, clear, dst_mem, bo->ttm,
- dst_rsgt, true, dep);
- dma_fence_put(dep);
+ migration_fence = __i915_ttm_move(bo, ctx, clear, dst_mem, bo->ttm,
+ dst_rsgt, true, &deps);
+ i915_deps_fini(&deps);
}
/* We can possibly get an -ERESTARTSYS here */
@@ -826,47 +588,38 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
.interruptible = intr,
};
struct i915_refct_sgt *dst_rsgt;
- struct dma_fence *copy_fence, *dep_fence;
+ struct dma_fence *copy_fence;
struct i915_deps deps;
- int ret, shared_err;
+ int ret;
assert_object_held(dst);
assert_object_held(src);
i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
- /*
- * We plan to add a shared fence only for the source. If that
- * fails, we await all source fences before commencing
- * the copy instead of only the exclusive.
- */
- shared_err = dma_resv_reserve_shared(src_bo->base.resv, 1);
- ret = i915_deps_add_resv(&deps, dst_bo->base.resv, true, false, &ctx);
- if (!ret)
- ret = i915_deps_add_resv(&deps, src_bo->base.resv,
- !!shared_err, false, &ctx);
+ ret = dma_resv_reserve_shared(src_bo->base.resv, 1);
if (ret)
return ret;
- dep_fence = i915_deps_to_fence(&deps, &ctx);
- if (IS_ERR(dep_fence))
- return PTR_ERR(dep_fence);
+ ret = i915_deps_add_resv(&deps, dst_bo->base.resv, &ctx);
+ if (ret)
+ return ret;
+
+ ret = i915_deps_add_resv(&deps, src_bo->base.resv, &ctx);
+ if (ret)
+ return ret;
dst_rsgt = i915_ttm_resource_get_st(dst, dst_bo->resource);
- copy_fence = __i915_ttm_move(src_bo, false, dst_bo->resource,
+ copy_fence = __i915_ttm_move(src_bo, &ctx, false, dst_bo->resource,
dst_bo->ttm, dst_rsgt, allow_accel,
- dep_fence);
+ &deps);
+ i915_deps_fini(&deps);
i915_refct_sgt_put(dst_rsgt);
if (IS_ERR_OR_NULL(copy_fence))
return PTR_ERR_OR_ZERO(copy_fence);
dma_resv_add_excl_fence(dst_bo->base.resv, copy_fence);
-
- /* If we failed to reserve a shared slot, add an exclusive fence */
- if (shared_err)
- dma_resv_add_excl_fence(src_bo->base.resv, copy_fence);
- else
- dma_resv_add_shared_fence(src_bo->base.resv, copy_fence);
+ dma_resv_add_shared_fence(src_bo->base.resv, copy_fence);
dma_fence_put(copy_fence);