aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gem
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gem')
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_busy.c35
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_clflush.c18
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c26
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_create.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c15
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_domain.c22
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c195
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_internal.c44
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_mman.c11
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.c92
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.h48
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_types.h46
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pages.c21
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_phys.c6
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pm.c6
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_region.c4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shmem.c195
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shrinker.c137
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_stolen.c19
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_throttle.c3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm.c765
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm.h41
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c627
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h41
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_userptr.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_wait.c106
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gemfs.c5
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/huge_pages.c134
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c2
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c71
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c4
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c26
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c26
34 files changed, 1984 insertions, 814 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
index 7358bebef15c..470fdfd61a0f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
@@ -115,8 +115,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
{
struct drm_i915_gem_busy *args = data;
struct drm_i915_gem_object *obj;
- struct dma_resv_list *list;
- unsigned int seq;
+ struct dma_resv_iter cursor;
+ struct dma_fence *fence;
int err;
err = -ENOENT;
@@ -142,27 +142,20 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
* to report the overall busyness. This is what the wait-ioctl does.
*
*/
-retry:
- seq = raw_read_seqcount(&obj->base.resv->seq);
-
- /* Translate the exclusive fence to the READ *and* WRITE engine */
- args->busy = busy_check_writer(dma_resv_excl_fence(obj->base.resv));
-
- /* Translate shared fences to READ set of engines */
- list = dma_resv_shared_list(obj->base.resv);
- if (list) {
- unsigned int shared_count = list->shared_count, i;
-
- for (i = 0; i < shared_count; ++i) {
- struct dma_fence *fence =
- rcu_dereference(list->shared[i]);
-
+ args->busy = 0;
+ dma_resv_iter_begin(&cursor, obj->base.resv, true);
+ dma_resv_for_each_fence_unlocked(&cursor, fence) {
+ if (dma_resv_iter_is_restarted(&cursor))
+ args->busy = 0;
+
+ if (dma_resv_iter_is_exclusive(&cursor))
+ /* Translate the exclusive fence to the READ *and* WRITE engine */
+ args->busy |= busy_check_writer(fence);
+ else
+ /* Translate shared fences to READ set of engines */
args->busy |= busy_check_reader(fence);
- }
}
-
- if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq))
- goto retry;
+ dma_resv_iter_end(&cursor);
err = 0;
out:
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
index f0435c6feb68..8a248003dfae 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -69,10 +69,16 @@ static struct clflush *clflush_work_create(struct drm_i915_gem_object *obj)
bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
unsigned int flags)
{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct clflush *clflush;
assert_object_held(obj);
+ if (IS_DGFX(i915)) {
+ WARN_ON_ONCE(obj->cache_dirty);
+ return false;
+ }
+
/*
* Stolen memory is always coherent with the GPU as it is explicitly
* marked as wc by the system, or the system is cache-coherent.
@@ -105,16 +111,24 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
if (clflush) {
i915_sw_fence_await_reservation(&clflush->base.chain,
obj->base.resv, NULL, true,
- i915_fence_timeout(to_i915(obj->base.dev)),
+ i915_fence_timeout(i915),
I915_FENCE_GFP);
dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma);
dma_fence_work_commit(&clflush->base);
+ /*
+ * We must have successfully populated the pages(since we are
+ * holding a pin on the pages as per the flush worker) to reach
+ * this point, which must mean we have already done the required
+ * flush-on-acquire, hence resetting cache_dirty here should be
+ * safe.
+ */
+ obj->cache_dirty = false;
} else if (obj->mm.pages) {
__do_clflush(obj);
+ obj->cache_dirty = false;
} else {
GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
}
- obj->cache_dirty = false;
return true;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index c37c9f0d8167..00327b750fbb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -237,7 +237,7 @@ static int proto_context_set_persistence(struct drm_i915_private *i915,
* colateral damage, and we should not pretend we can by
* exposing the interface.
*/
- if (!intel_has_reset_engine(&i915->gt))
+ if (!intel_has_reset_engine(to_gt(i915)))
return -ENODEV;
pc->user_flags &= ~BIT(UCONTEXT_PERSISTENCE);
@@ -254,7 +254,7 @@ static int proto_context_set_protected(struct drm_i915_private *i915,
if (!protected) {
pc->uses_protected_content = false;
- } else if (!intel_pxp_is_enabled(&i915->gt.pxp)) {
+ } else if (!intel_pxp_is_enabled(&to_gt(i915)->pxp)) {
ret = -ENODEV;
} else if ((pc->user_flags & BIT(UCONTEXT_RECOVERABLE)) ||
!(pc->user_flags & BIT(UCONTEXT_BANNABLE))) {
@@ -268,8 +268,8 @@ static int proto_context_set_protected(struct drm_i915_private *i915,
*/
pc->pxp_wakeref = intel_runtime_pm_get(&i915->runtime_pm);
- if (!intel_pxp_is_active(&i915->gt.pxp))
- ret = intel_pxp_start(&i915->gt.pxp);
+ if (!intel_pxp_is_active(&to_gt(i915)->pxp))
+ ret = intel_pxp_start(&to_gt(i915)->pxp);
}
return ret;
@@ -479,7 +479,7 @@ set_proto_ctx_engines_bond(struct i915_user_extension __user *base, void *data)
if (GRAPHICS_VER(i915) >= 12 && !IS_TIGERLAKE(i915) &&
!IS_ROCKETLAKE(i915) && !IS_ALDERLAKE_S(i915)) {
drm_dbg(&i915->drm,
- "Bonding on gen12+ aside from TGL, RKL, and ADL_S not supported\n");
+ "Bonding not supported on this platform\n");
return -ENODEV;
}
@@ -572,7 +572,7 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base,
intel_engine_mask_t prev_mask;
/* FIXME: This is NIY for execlists */
- if (!(intel_uc_uses_guc_submission(&i915->gt.uc)))
+ if (!(intel_uc_uses_guc_submission(&to_gt(i915)->uc)))
return -ENODEV;
if (get_user(slot, &ext->engine_index))
@@ -833,7 +833,7 @@ static int set_proto_ctx_sseu(struct drm_i915_file_private *fpriv,
sseu = &pc->legacy_rcs_sseu;
}
- ret = i915_gem_user_to_context_sseu(&i915->gt, &user_sseu, sseu);
+ ret = i915_gem_user_to_context_sseu(to_gt(i915), &user_sseu, sseu);
if (ret)
return ret;
@@ -1001,7 +1001,7 @@ static void free_engines_rcu(struct rcu_head *rcu)
free_engines(engines);
}
-static int __i915_sw_fence_call
+static int
engines_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
{
struct i915_gem_engines *engines =
@@ -1044,7 +1044,7 @@ static struct i915_gem_engines *alloc_engines(unsigned int count)
static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx,
struct intel_sseu rcs_sseu)
{
- const struct intel_gt *gt = &ctx->i915->gt;
+ const struct intel_gt *gt = to_gt(ctx->i915);
struct intel_engine_cs *engine;
struct i915_gem_engines *e, *err;
enum intel_engine_id id;
@@ -1521,7 +1521,7 @@ static int __context_set_persistence(struct i915_gem_context *ctx, bool state)
* colateral damage, and we should not pretend we can by
* exposing the interface.
*/
- if (!intel_has_reset_engine(&ctx->i915->gt))
+ if (!intel_has_reset_engine(to_gt(ctx->i915)))
return -ENODEV;
i915_gem_context_clear_persistence(ctx);
@@ -1559,7 +1559,7 @@ i915_gem_create_context(struct drm_i915_private *i915,
} else if (HAS_FULL_PPGTT(i915)) {
struct i915_ppgtt *ppgtt;
- ppgtt = i915_ppgtt_create(&i915->gt, 0);
+ ppgtt = i915_ppgtt_create(to_gt(i915), 0);
if (IS_ERR(ppgtt)) {
drm_dbg(&i915->drm, "PPGTT setup failed (%ld)\n",
PTR_ERR(ppgtt));
@@ -1742,7 +1742,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
if (args->flags)
return -EINVAL;
- ppgtt = i915_ppgtt_create(&i915->gt, 0);
+ ppgtt = i915_ppgtt_create(to_gt(i915), 0);
if (IS_ERR(ppgtt))
return PTR_ERR(ppgtt);
@@ -2194,7 +2194,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
if (args->flags & I915_CONTEXT_CREATE_FLAGS_UNKNOWN)
return -EINVAL;
- ret = intel_gt_terminally_wedged(&i915->gt);
+ ret = intel_gt_terminally_wedged(to_gt(i915));
if (ret)
return ret;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index 8955d6abcef1..9402d4bf4ffc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -379,7 +379,7 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data
if (ext.flags)
return -EINVAL;
- if (!intel_pxp_is_enabled(&ext_data->i915->gt.pxp))
+ if (!intel_pxp_is_enabled(&to_gt(ext_data->i915)->pxp))
return -ENODEV;
ext_data->flags |= I915_BO_PROTECTED;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index e8a58c997170..1b526039a60d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -248,8 +248,19 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
if (IS_ERR(pages))
return PTR_ERR(pages);
- /* XXX: consider doing a vmap flush or something */
- if (!HAS_LLC(i915) || i915_gem_object_can_bypass_llc(obj))
+ /*
+ * DG1 is special here since it still snoops transactions even with
+ * CACHE_NONE. This is not the case with other HAS_SNOOP platforms. We
+ * might need to revisit this as we add new discrete platforms.
+ *
+ * XXX: Consider doing a vmap flush or something, where possible.
+ * Currently we just do a heavy handed wbinvd_on_all_cpus() here since
+ * the underlying sg_table might not even point to struct pages, so we
+ * can't just call drm_clflush_sg or similar, like we do elsewhere in
+ * the driver.
+ */
+ if (i915_gem_object_can_bypass_llc(obj) ||
+ (!HAS_LLC(i915) && !IS_DG1(i915)))
wbinvd_on_all_cpus();
sg_page_sizes = i915_sg_dma_sizes(pages->sgl);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index b684a62bf3b0..26532c07d467 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -18,10 +18,32 @@
static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+ if (IS_DGFX(i915))
+ return false;
+
return !(obj->cache_level == I915_CACHE_NONE ||
obj->cache_level == I915_CACHE_WT);
}
+bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+ if (obj->cache_dirty)
+ return false;
+
+ if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
+ return true;
+
+ if (IS_DGFX(i915))
+ return false;
+
+ /* Currently in use by HW (display engine)? Keep flushed. */
+ return i915_gem_object_is_framebuffer(obj);
+}
+
static void
flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
{
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index cb0bf6ffd0e3..3a5b247be738 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -29,6 +29,7 @@
#include "i915_gem_ioctls.h"
#include "i915_trace.h"
#include "i915_user_extensions.h"
+#include "i915_vma_snapshot.h"
struct eb_vma {
struct i915_vma *vma;
@@ -307,11 +308,15 @@ struct i915_execbuffer {
struct eb_fence *fences;
unsigned long num_fences;
+#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
+ struct i915_capture_list *capture_lists[MAX_ENGINE_INSTANCE + 1];
+#endif
};
static int eb_parse(struct i915_execbuffer *eb);
static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle);
static void eb_unpin_engine(struct i915_execbuffer *eb);
+static void eb_capture_release(struct i915_execbuffer *eb);
static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
{
@@ -990,7 +995,7 @@ static int eb_validate_vmas(struct i915_execbuffer *eb)
}
if (!(ev->flags & EXEC_OBJECT_WRITE)) {
- err = dma_resv_reserve_shared(vma->resv, 1);
+ err = dma_resv_reserve_shared(vma->obj->base.resv, 1);
if (err)
return err;
}
@@ -1043,6 +1048,7 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
i915_vma_put(vma);
}
+ eb_capture_release(eb);
eb_unpin_engine(eb);
}
@@ -1092,6 +1098,47 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
return &i915->ggtt;
}
+static void reloc_cache_unmap(struct reloc_cache *cache)
+{
+ void *vaddr;
+
+ if (!cache->vaddr)
+ return;
+
+ vaddr = unmask_page(cache->vaddr);
+ if (cache->vaddr & KMAP)
+ kunmap_atomic(vaddr);
+ else
+ io_mapping_unmap_atomic((void __iomem *)vaddr);
+}
+
+static void reloc_cache_remap(struct reloc_cache *cache,
+ struct drm_i915_gem_object *obj)
+{
+ void *vaddr;
+
+ if (!cache->vaddr)
+ return;
+
+ if (cache->vaddr & KMAP) {
+ struct page *page = i915_gem_object_get_page(obj, cache->page);
+
+ vaddr = kmap_atomic(page);
+ cache->vaddr = unmask_flags(cache->vaddr) |
+ (unsigned long)vaddr;
+ } else {
+ struct i915_ggtt *ggtt = cache_to_ggtt(cache);
+ unsigned long offset;
+
+ offset = cache->node.start;
+ if (!drm_mm_node_allocated(&cache->node))
+ offset += cache->page << PAGE_SHIFT;
+
+ cache->vaddr = (unsigned long)
+ io_mapping_map_atomic_wc(&ggtt->iomap, offset);
+ }
+}
+
static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb)
{
void *vaddr;
@@ -1356,10 +1403,17 @@ eb_relocate_entry(struct i915_execbuffer *eb,
* batchbuffers.
*/
if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
- GRAPHICS_VER(eb->i915) == 6) {
+ GRAPHICS_VER(eb->i915) == 6 &&
+ !i915_vma_is_bound(target->vma, I915_VMA_GLOBAL_BIND)) {
+ struct i915_vma *vma = target->vma;
+
+ reloc_cache_unmap(&eb->reloc_cache);
+ mutex_lock(&vma->vm->mutex);
err = i915_vma_bind(target->vma,
target->vma->obj->cache_level,
PIN_GLOBAL, NULL);
+ mutex_unlock(&vma->vm->mutex);
+ reloc_cache_remap(&eb->reloc_cache, ev->vma->obj);
if (err)
return err;
}
@@ -1880,36 +1934,113 @@ eb_find_first_request_added(struct i915_execbuffer *eb)
return NULL;
}
-static int eb_move_to_gpu(struct i915_execbuffer *eb)
+#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
+
+/* Stage with GFP_KERNEL allocations before we enter the signaling critical path */
+static void eb_capture_stage(struct i915_execbuffer *eb)
{
const unsigned int count = eb->buffer_count;
- unsigned int i = count;
- int err = 0, j;
+ unsigned int i = count, j;
+ struct i915_vma_snapshot *vsnap;
while (i--) {
struct eb_vma *ev = &eb->vma[i];
struct i915_vma *vma = ev->vma;
unsigned int flags = ev->flags;
- struct drm_i915_gem_object *obj = vma->obj;
- assert_vma_held(vma);
+ if (!(flags & EXEC_OBJECT_CAPTURE))
+ continue;
- if (flags & EXEC_OBJECT_CAPTURE) {
+ vsnap = i915_vma_snapshot_alloc(GFP_KERNEL);
+ if (!vsnap)
+ continue;
+
+ i915_vma_snapshot_init(vsnap, vma, "user");
+ for_each_batch_create_order(eb, j) {
struct i915_capture_list *capture;
- for_each_batch_create_order(eb, j) {
- if (!eb->requests[j])
- break;
+ capture = kmalloc(sizeof(*capture), GFP_KERNEL);
+ if (!capture)
+ continue;
- capture = kmalloc(sizeof(*capture), GFP_KERNEL);
- if (capture) {
- capture->next =
- eb->requests[j]->capture_list;
- capture->vma = vma;
- eb->requests[j]->capture_list = capture;
- }
- }
+ capture->next = eb->capture_lists[j];
+ capture->vma_snapshot = i915_vma_snapshot_get(vsnap);
+ eb->capture_lists[j] = capture;
+ }
+ i915_vma_snapshot_put(vsnap);
+ }
+}
+
+/* Commit once we're in the critical path */
+static void eb_capture_commit(struct i915_execbuffer *eb)
+{
+ unsigned int j;
+
+ for_each_batch_create_order(eb, j) {
+ struct i915_request *rq = eb->requests[j];
+
+ if (!rq)
+ break;
+
+ rq->capture_list = eb->capture_lists[j];
+ eb->capture_lists[j] = NULL;
+ }
+}
+
+/*
+ * Release anything that didn't get committed due to errors.
+ * The capture_list will otherwise be freed at request retire.
+ */
+static void eb_capture_release(struct i915_execbuffer *eb)
+{
+ unsigned int j;
+
+ for_each_batch_create_order(eb, j) {
+ if (eb->capture_lists[j]) {
+ i915_request_free_capture_list(eb->capture_lists[j]);
+ eb->capture_lists[j] = NULL;
}
+ }
+}
+
+static void eb_capture_list_clear(struct i915_execbuffer *eb)
+{
+ memset(eb->capture_lists, 0, sizeof(eb->capture_lists));
+}
+
+#else
+
+static void eb_capture_stage(struct i915_execbuffer *eb)
+{
+}
+
+static void eb_capture_commit(struct i915_execbuffer *eb)
+{
+}
+
+static void eb_capture_release(struct i915_execbuffer *eb)
+{
+}
+
+static void eb_capture_list_clear(struct i915_execbuffer *eb)
+{
+}
+
+#endif
+
+static int eb_move_to_gpu(struct i915_execbuffer *eb)
+{
+ const unsigned int count = eb->buffer_count;
+ unsigned int i = count;
+ int err = 0, j;
+
+ while (i--) {
+ struct eb_vma *ev = &eb->vma[i];
+ struct i915_vma *vma = ev->vma;
+ unsigned int flags = ev->flags;
+ struct drm_i915_gem_object *obj = vma->obj;
+
+ assert_vma_held(vma);
/*
* If the GPU is not _reading_ through the CPU cache, we need
@@ -1990,6 +2121,8 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
/* Unconditionally flush any chipset caches (for streaming writes). */
intel_gt_chipset_flush(eb->gt);
+ eb_capture_commit(eb);
+
return 0;
err_skip:
@@ -2164,7 +2297,7 @@ static int eb_parse(struct i915_execbuffer *eb)
goto err_trampoline;
}
- err = dma_resv_reserve_shared(shadow->resv, 1);
+ err = dma_resv_reserve_shared(shadow->obj->base.resv, 1);
if (err)
goto err_trampoline;
@@ -2276,9 +2409,9 @@ static int eb_submit(struct i915_execbuffer *eb)
return err;
}
-static int num_vcs_engines(const struct drm_i915_private *i915)
+static int num_vcs_engines(struct drm_i915_private *i915)
{
- return hweight_long(VDBOX_MASK(&i915->gt));
+ return hweight_long(VDBOX_MASK(to_gt(i915)));
}
/*
@@ -3114,7 +3247,7 @@ eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence,
/* Allocate a request for this batch buffer nice and early. */
eb->requests[i] = i915_request_create(eb_find_context(eb, i));
if (IS_ERR(eb->requests[i])) {
- out_fence = ERR_PTR(PTR_ERR(eb->requests[i]));
+ out_fence = ERR_CAST(eb->requests[i]);
eb->requests[i] = NULL;
return out_fence;
}
@@ -3132,13 +3265,14 @@ eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence,
}
/*
- * Whilst this request exists, batch_obj will be on the
- * active_list, and so will hold the active reference. Only when
- * this request is retired will the batch_obj be moved onto
- * the inactive_list and lose its active reference. Hence we do
- * not need to explicitly hold another reference here.
+ * Not really on stack, but we don't want to call
+ * kfree on the batch_snapshot when we put it, so use the
+ * _onstack interface.
*/
- eb->requests[i]->batch = eb->batches[i]->vma;
+ if (eb->batches[i]->vma)
+ i915_vma_snapshot_init_onstack(&eb->requests[i]->batch_snapshot,
+ eb->batches[i]->vma,
+ "batch");
if (eb->batch_pool) {
GEM_BUG_ON(intel_context_is_parallel(eb->context));
intel_gt_buffer_pool_mark_active(eb->batch_pool,
@@ -3187,6 +3321,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.fences = NULL;
eb.num_fences = 0;
+ eb_capture_list_clear(&eb);
+
memset(eb.requests, 0, sizeof(struct i915_request *) *
ARRAY_SIZE(eb.requests));
eb.composite_fence = NULL;
@@ -3273,6 +3409,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
}
ww_acquire_done(&eb.ww.ctx);
+ eb_capture_stage(&eb);
out_fence = eb_requests_create(&eb, in_fence, out_fence_fd);
if (IS_ERR(out_fence)) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index a57a6b7013c2..c5150a1ee3d2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -145,24 +145,10 @@ static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = {
.put_pages = i915_gem_object_put_pages_internal,
};
-/**
- * i915_gem_object_create_internal: create an object with volatile pages
- * @i915: the i915 device
- * @size: the size in bytes of backing storage to allocate for the object
- *
- * Creates a new object that wraps some internal memory for private use.
- * This object is not backed by swappable storage, and as such its contents
- * are volatile and only valid whilst pinned. If the object is reaped by the
- * shrinker, its pages and data will be discarded. Equally, it is not a full
- * GEM object and so not valid for access from userspace. This makes it useful
- * for hardware interfaces like ringbuffers (which are pinned from the time
- * the request is written to the time the hardware stops accessing it), but
- * not for contexts (which need to be preserved when not active for later
- * reuse). Note that it is not cleared upon allocation.
- */
struct drm_i915_gem_object *
-i915_gem_object_create_internal(struct drm_i915_private *i915,
- phys_addr_t size)
+__i915_gem_object_create_internal(struct drm_i915_private *i915,
+ const struct drm_i915_gem_object_ops *ops,
+ phys_addr_t size)
{
static struct lock_class_key lock_class;
struct drm_i915_gem_object *obj;
@@ -179,7 +165,7 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
return ERR_PTR(-ENOMEM);
drm_gem_private_object_init(&i915->drm, &obj->base, size);
- i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, 0);
+ i915_gem_object_init(obj, ops, &lock_class, 0);
obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
/*
@@ -199,3 +185,25 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
return obj;
}
+
+/**
+ * i915_gem_object_create_internal: create an object with volatile pages
+ * @i915: the i915 device
+ * @size: the size in bytes of backing storage to allocate for the object
+ *
+ * Creates a new object that wraps some internal memory for private use.
+ * This object is not backed by swappable storage, and as such its contents
+ * are volatile and only valid whilst pinned. If the object is reaped by the
+ * shrinker, its pages and data will be discarded. Equally, it is not a full
+ * GEM object and so not valid for access from userspace. This makes it useful
+ * for hardware interfaces like ringbuffers (which are pinned from the time
+ * the request is written to the time the hardware stops accessing it), but
+ * not for contexts (which need to be preserved when not active for later
+ * reuse). Note that it is not cleared upon allocation.
+ */
+struct drm_i915_gem_object *
+i915_gem_object_create_internal(struct drm_i915_private *i915,
+ phys_addr_t size)
+{
+ return __i915_gem_object_create_internal(i915, &i915_gem_object_internal_ops, size);
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 65fc6ff5f59d..aaf970c37aa2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -17,6 +17,7 @@
#include "i915_gem_ioctls.h"
#include "i915_gem_object.h"
#include "i915_gem_mman.h"
+#include "i915_mm.h"
#include "i915_trace.h"
#include "i915_user_extensions.h"
#include "i915_gem_ttm.h"
@@ -72,7 +73,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
if (args->flags & ~(I915_MMAP_WC))
return -EINVAL;
- if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
+ if (args->flags & I915_MMAP_WC && !pat_enabled())
return -ENODEV;
obj = i915_gem_object_lookup(file, args->handle);
@@ -645,7 +646,7 @@ mmap_offset_attach(struct drm_i915_gem_object *obj,
goto insert;
/* Attempt to reap some mmap space from dead objects */
- err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT,
+ err = intel_gt_retire_requests_timeout(to_gt(i915), MAX_SCHEDULE_TIMEOUT,
NULL);
if (err)
goto err;
@@ -736,7 +737,7 @@ i915_gem_dumb_mmap_offset(struct drm_file *file,
if (HAS_LMEM(to_i915(dev)))
mmap_type = I915_MMAP_TYPE_FIXED;
- else if (boot_cpu_has(X86_FEATURE_PAT))
+ else if (pat_enabled())
mmap_type = I915_MMAP_TYPE_WC;
else if (!i915_ggtt_has_aperture(&to_i915(dev)->ggtt))
return -ENODEV;
@@ -792,7 +793,7 @@ i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
break;
case I915_MMAP_OFFSET_WC:
- if (!boot_cpu_has(X86_FEATURE_PAT))
+ if (!pat_enabled())
return -ENODEV;
type = I915_MMAP_TYPE_WC;
break;
@@ -802,7 +803,7 @@ i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
break;
case I915_MMAP_OFFSET_UC:
- if (!boot_cpu_has(X86_FEATURE_PAT))
+ if (!pat_enabled())
return -ENODEV;
type = I915_MMAP_TYPE_UC;
break;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 1e426a42a36c..d87b508b59b1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -31,6 +31,7 @@
#include "i915_gem_context.h"
#include "i915_gem_mman.h"
#include "i915_gem_object.h"
+#include "i915_gem_ttm.h"
#include "i915_memcpy.h"
#include "i915_trace.h"
@@ -91,7 +92,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
}
/**
- * i915_gem_object_fini - Clean up a GEM object initialization
+ * __i915_gem_object_fini - Clean up a GEM object initialization
* @obj: The gem object to cleanup
*
* This function cleans up gem object fields that are set up by
@@ -107,25 +108,29 @@ void __i915_gem_object_fini(struct drm_i915_gem_object *obj)
}
/**
- * Mark up the object's coherency levels for a given cache_level
+ * i915_gem_object_set_cache_coherency - Mark up the object's coherency levels
+ * for a given cache_level
* @obj: #drm_i915_gem_object
* @cache_level: cache level
*/
void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
unsigned int cache_level)
{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
obj->cache_level = cache_level;
if (cache_level != I915_CACHE_NONE)
obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
I915_BO_CACHE_COHERENT_FOR_WRITE);
- else if (HAS_LLC(to_i915(obj->base.dev)))
+ else if (HAS_LLC(i915))
obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ;
else
obj->cache_coherent = 0;
obj->cache_dirty =
- !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
+ !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE) &&
+ !IS_DGFX(i915);
}
bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj)
@@ -257,6 +262,8 @@ static void __i915_gem_object_free_mmaps(struct drm_i915_gem_object *obj)
*/
void __i915_gem_object_pages_fini(struct drm_i915_gem_object *obj)
{
+ assert_object_held(obj);
+
if (!list_empty(&obj->vma.list)) {
struct i915_vma *vma;
@@ -323,7 +330,16 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
obj->ops->delayed_free(obj);
continue;
}
+
+ if (!i915_gem_object_trylock(obj, NULL)) {
+ /* busy, toss it back to the pile */
+ if (llist_add(&obj->freed, &i915->mm.free_list))
+ queue_delayed_work(i915->wq, &i915->mm.free_work, msecs_to_jiffies(10));
+ continue;
+ }
+
__i915_gem_object_pages_fini(obj);
+ i915_gem_object_unlock(obj);
__i915_gem_free_object(obj);
/* But keep the pointer alive for RCU-protected lookups */
@@ -343,7 +359,7 @@ void i915_gem_flush_free_objects(struct drm_i915_private *i915)
static void __i915_gem_free_work(struct work_struct *work)
{
struct drm_i915_private *i915 =
- container_of(work, struct drm_i915_private, mm.free_work);
+ container_of(work, struct drm_i915_private, mm.free_work.work);
i915_gem_flush_free_objects(i915);
}
@@ -364,15 +380,6 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj)
atomic_inc(&i915->mm.free_count);
/*
- * This serializes freeing with the shrinker. Since the free
- * is delayed, first by RCU then by the workqueue, we want the
- * shrinker to be able to free pages of unreferenced objects,
- * or else we may oom whilst there are plenty of deferred
- * freed objects.
- */
- i915_gem_object_make_unshrinkable(obj);
-
- /*
* Since we require blocking on struct_mutex to unbind the freed
* object from the GPU before releasing resources back to the
* system, we can not do that directly from the RCU callback (which may
@@ -384,7 +391,7 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj)
*/
if (llist_add(&obj->freed, &i915->mm.free_list))
- queue_work(i915->wq, &i915->mm.free_work);
+ queue_delayed_work(i915->wq, &i915->mm.free_work, 0);
}
void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
@@ -456,7 +463,7 @@ i915_gem_object_read_from_page_iomap(struct drm_i915_gem_object *obj, u64 offset
* from can't cross a page boundary. The caller must ensure that @obj pages
* are pinned and that @obj is synced wrt. any related writes.
*
- * Returns 0 on success or -ENODEV if the type of @obj's backing store is
+ * Return: %0 on success or -ENODEV if the type of @obj's backing store is
* unsupported.
*/
int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset, void *dst, int size)
@@ -709,7 +716,7 @@ bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj,
void i915_gem_init__objects(struct drm_i915_private *i915)
{
- INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
+ INIT_DELAYED_WORK(&i915->mm.free_work, __i915_gem_free_work);
}
void i915_objects_module_exit(void)
@@ -732,6 +739,57 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = {
.export = i915_gem_prime_export,
};
+/**
+ * i915_gem_object_get_moving_fence - Get the object's moving fence if any
+ * @obj: The object whose moving fence to get.
+ *
+ * A non-signaled moving fence means that there is an async operation
+ * pending on the object that needs to be waited on before setting up
+ * any GPU- or CPU PTEs to the object's pages.
+ *
+ * Return: A refcounted pointer to the object's moving fence if any,
+ * NULL otherwise.
+ */
+struct dma_fence *
+i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj)
+{
+ return dma_fence_get(i915_gem_to_ttm(obj)->moving);
+}
+
+/**
+ * i915_gem_object_wait_moving_fence - Wait for the object's moving fence if any
+ * @obj: The object whose moving fence to wait for.
+ * @intr: Whether to wait interruptible.
+ *
+ * If the moving fence signaled without an error, it is detached from the
+ * object and put.
+ *
+ * Return: 0 if successful, -ERESTARTSYS if the wait was interrupted,
+ * negative error code if the async operation represented by the
+ * moving fence failed.
+ */
+int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj,
+ bool intr)
+{
+ struct dma_fence *fence = i915_gem_to_ttm(obj)->moving;
+ int ret;
+
+ assert_object_held(obj);
+ if (!fence)
+ return 0;
+
+ ret = dma_fence_wait(fence, intr);
+ if (ret)
+ return ret;
+
+ if (fence->error)
+ return fence->error;
+
+ i915_gem_to_ttm(obj)->moving = NULL;
+ dma_fence_put(fence);
+ return 0;
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/huge_gem_object.c"
#include "selftests/huge_pages.c"
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 59201801cec5..f66d46882ea7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -93,7 +93,6 @@ void i915_gem_flush_free_objects(struct drm_i915_private *i915);
struct sg_table *
__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj);
-void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
/**
* i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
@@ -211,9 +210,13 @@ static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object
return __i915_gem_object_lock(obj, ww, true);
}
-static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
+static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj,
+ struct i915_gem_ww_ctx *ww)
{
- return dma_resv_trylock(obj->base.resv);
+ if (!ww)
+ return dma_resv_trylock(obj->base.resv);
+ else
+ return ww_mutex_trylock(&obj->base.resv->lock, &ww->ctx);
}
static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
@@ -296,6 +299,12 @@ i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj)
}
static inline bool
+i915_gem_object_has_self_managed_shrink_list(const struct drm_i915_gem_object *obj)
+{
+ return i915_gem_object_type_has(obj, I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST);
+}
+
+static inline bool
i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj)
{
return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_PROXY);
@@ -449,7 +458,7 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
}
int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
-void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
+int i915_gem_object_truncate(struct drm_i915_gem_object *obj);
void i915_gem_object_writeback(struct drm_i915_gem_object *obj);
/**
@@ -512,11 +521,18 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
i915_gem_object_unpin_pages(obj);
}
+struct dma_fence *
+i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj);
+
+int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj,
+ bool intr);
+
void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
unsigned int cache_level);
bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj);
void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj);
+bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj);
int __must_check
i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
@@ -533,25 +549,15 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj);
void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj);
+void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj);
+void __i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj);
void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj);
-static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
-{
- if (obj->cache_dirty)
- return false;
-
- if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
- return true;
-
- /* Currently in use by HW (display engine)? Keep flushed. */
- return i915_gem_object_is_framebuffer(obj);
-}
-
static inline void __start_cpu_write(struct drm_i915_gem_object *obj)
{
obj->read_domains = I915_GEM_DOMAIN_CPU;
obj->write_domain = I915_GEM_DOMAIN_CPU;
- if (cpu_write_needs_clflush(obj))
+ if (i915_gem_cpu_write_needs_clflush(obj))
obj->cache_dirty = true;
}
@@ -613,6 +619,14 @@ int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj,
bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj,
enum intel_memory_type type);
+int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
+ size_t size, struct intel_memory_region *mr,
+ struct address_space *mapping,
+ unsigned int max_segment);
+void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping,
+ bool dirty, bool backup);
+void __shmem_writeback(size_t size, struct address_space *mapping);
+
#ifdef CONFIG_MMU_NOTIFIER
static inline bool
i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index da85169006d4..f9f7e44099fe 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -34,9 +34,11 @@ struct i915_lut_handle {
struct drm_i915_gem_object_ops {
unsigned int flags;
-#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1)
-#define I915_GEM_OBJECT_IS_PROXY BIT(2)
-#define I915_GEM_OBJECT_NO_MMAP BIT(3)
+#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1)
+/* Skip the shrinker management in set_pages/unset_pages */
+#define I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST BIT(2)
+#define I915_GEM_OBJECT_IS_PROXY BIT(3)
+#define I915_GEM_OBJECT_NO_MMAP BIT(4)
/* Interface between the GEM object and its backing storage.
* get_pages() is called once prior to the use of the associated set
@@ -54,8 +56,11 @@ struct drm_i915_gem_object_ops {
int (*get_pages)(struct drm_i915_gem_object *obj);
void (*put_pages)(struct drm_i915_gem_object *obj,
struct sg_table *pages);
- void (*truncate)(struct drm_i915_gem_object *obj);
+ int (*truncate)(struct drm_i915_gem_object *obj);
void (*writeback)(struct drm_i915_gem_object *obj);
+ int (*shrinker_release_pages)(struct drm_i915_gem_object *obj,
+ bool no_gpu_wait,
+ bool should_writeback);
int (*pread)(struct drm_i915_gem_object *obj,
const struct drm_i915_gem_pread *arg);
@@ -486,9 +491,37 @@ struct drm_i915_gem_object {
* instead go through the pin/unpin interfaces.
*/
atomic_t pages_pin_count;
+
+ /**
+ * @shrink_pin: Prevents the pages from being made visible to
+ * the shrinker, while the shrink_pin is non-zero. Most users
+ * should pretty much never have to care about this, outside of
+ * some special use cases.
+ *
+ * By default most objects will start out as visible to the
+ * shrinker(if I915_GEM_OBJECT_IS_SHRINKABLE) as soon as the
+ * backing pages are attached to the object, like in
+ * __i915_gem_object_set_pages(). They will then be removed the
+ * shrinker list once the pages are released.
+ *
+ * The @shrink_pin is incremented by calling
+ * i915_gem_object_make_unshrinkable(), which will also remove
+ * the object from the shrinker list, if the pin count was zero.
+ *
+ * Callers will then typically call
+ * i915_gem_object_make_shrinkable() or
+ * i915_gem_object_make_purgeable() to decrement the pin count,
+ * and make the pages visible again.
+ */
atomic_t shrink_pin;
/**
+ * @ttm_shrinkable: True when the object is using shmem pages
+ * underneath. Protected by the object lock.
+ */
+ bool ttm_shrinkable;
+
+ /**
* Priority list of potential placements for this object.
*/
struct intel_memory_region **placements;
@@ -512,6 +545,7 @@ struct drm_i915_gem_object {
*/
struct list_head region_link;
+ struct i915_refct_sgt *rsgt;
struct sg_table *pages;
void *mapping;
@@ -547,7 +581,7 @@ struct drm_i915_gem_object {
struct i915_gem_object_page_iter get_dma_page;
/**
- * Element within i915->mm.unbound_list or i915->mm.bound_list,
+ * Element within i915->mm.shrink_list or i915->mm.purge_list,
* locked by i915->mm.obj_lock.
*/
struct list_head link;
@@ -565,7 +599,7 @@ struct drm_i915_gem_object {
} mm;
struct {
- struct sg_table *cached_io_st;
+ struct i915_refct_sgt *cached_io_rsgt;
struct i915_gem_object_page_iter get_io_page;
struct drm_i915_gem_object *backup;
bool created:1;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 8eb1c3a6fc9c..89b70f5cde7a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -26,6 +26,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
/* Make the pages coherent with the GPU (flushing any swapin). */
if (obj->cache_dirty) {
+ WARN_ON_ONCE(IS_DGFX(i915));
obj->write_domain = 0;
if (i915_gem_object_has_struct_page(obj))
drm_clflush_sg(pages);
@@ -68,7 +69,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
shrinkable = false;
}
- if (shrinkable) {
+ if (shrinkable && !i915_gem_object_has_self_managed_shrink_list(obj)) {
struct list_head *list;
unsigned long flags;
@@ -158,11 +159,13 @@ retry:
}
/* Immediately discard the backing storage */
-void i915_gem_object_truncate(struct drm_i915_gem_object *obj)
+int i915_gem_object_truncate(struct drm_i915_gem_object *obj)
{
drm_gem_free_mmap_offset(&obj->base);
if (obj->ops->truncate)
- obj->ops->truncate(obj);
+ return obj->ops->truncate(obj);
+
+ return 0;
}
/* Try to discard unwanted pages */
@@ -208,7 +211,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
if (i915_gem_object_is_volatile(obj))
obj->mm.madv = I915_MADV_WILLNEED;
- i915_gem_object_make_unshrinkable(obj);
+ if (!i915_gem_object_has_self_managed_shrink_list(obj))
+ i915_gem_object_make_unshrinkable(obj);
if (obj->mm.mapping) {
unmap_object(obj, page_mask_bits(obj->mm.mapping));
@@ -414,8 +418,13 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
}
if (!ptr) {
- if (GEM_WARN_ON(type == I915_MAP_WC &&
- !static_cpu_has(X86_FEATURE_PAT)))
+ err = i915_gem_object_wait_moving_fence(obj, true);
+ if (err) {
+ ptr = ERR_PTR(err);
+ goto err_unpin;
+ }
+
+ if (GEM_WARN_ON(type == I915_MAP_WC && !pat_enabled()))
ptr = ERR_PTR(-ENODEV);
else if (i915_gem_object_has_struct_page(obj))
ptr = i915_gem_object_map_page(obj, type);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
index 7986612f48fa..ca6faffcc496 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -19,6 +19,7 @@
static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
{
struct address_space *mapping = obj->base.filp->f_mapping;
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct scatterlist *sg;
struct sg_table *st;
dma_addr_t dma;
@@ -73,7 +74,7 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
dst += PAGE_SIZE;
}
- intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt);
+ intel_gt_chipset_flush(to_gt(i915));
/* We're no longer struct page backed */
obj->mem_flags &= ~I915_BO_FLAG_STRUCT_PAGE;
@@ -140,6 +141,7 @@ int i915_gem_object_pwrite_phys(struct drm_i915_gem_object *obj,
{
void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset;
char __user *user_data = u64_to_user_ptr(args->data_ptr);
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
int err;
err = i915_gem_object_wait(obj,
@@ -159,7 +161,7 @@ int i915_gem_object_pwrite_phys(struct drm_i915_gem_object *obj,
return -EFAULT;
drm_clflush_virt_range(vaddr, args->size);
- intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt);
+ intel_gt_chipset_flush(to_gt(i915));
i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
return 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 726b40e1fbb0..ac56124760e1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -35,7 +35,7 @@ void i915_gem_suspend(struct drm_i915_private *i915)
* state. Fortunately, the kernel_context is disposable and we do
* not rely on its state.
*/
- intel_gt_suspend_prepare(&i915->gt);
+ intel_gt_suspend_prepare(to_gt(i915));
i915_gem_drain_freed_objects(i915);
}
@@ -153,7 +153,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
* machine in an unusable condition.
*/
- intel_gt_suspend_late(&i915->gt);
+ intel_gt_suspend_late(to_gt(i915));
spin_lock_irqsave(&i915->mm.obj_lock, flags);
for (phase = phases; *phase; phase++) {
@@ -223,7 +223,7 @@ void i915_gem_resume(struct drm_i915_private *i915)
* guarantee that the context image is complete. So let's just reset
* it and start again.
*/
- intel_gt_resume(&i915->gt);
+ intel_gt_resume(to_gt(i915));
ret = lmem_restore(i915, I915_TTM_BACKUP_ALLOW_GPU);
GEM_WARN_ON(ret);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c
index a016ccec36f3..a4350227e9ae 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_region.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c
@@ -11,7 +11,7 @@
void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj,
struct intel_memory_region *mem)
{
- obj->mm.region = intel_memory_region_get(mem);
+ obj->mm.region = mem;
mutex_lock(&mem->objects.lock);
list_add(&obj->mm.region_link, &mem->objects.list);
@@ -25,8 +25,6 @@ void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj)
mutex_lock(&mem->objects.lock);
list_del(&obj->mm.region_link);
mutex_unlock(&mem->objects.lock);
-
- intel_memory_region_put(mem);
}
struct drm_i915_gem_object *
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index d77da59fae04..cc9fe258fba7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -25,62 +25,67 @@ static void check_release_pagevec(struct pagevec *pvec)
cond_resched();
}
-static int shmem_get_pages(struct drm_i915_gem_object *obj)
+void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping,
+ bool dirty, bool backup)
{
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
- struct intel_memory_region *mem = obj->mm.region;
- const unsigned long page_count = obj->base.size / PAGE_SIZE;
+ struct sgt_iter sgt_iter;
+ struct pagevec pvec;
+ struct page *page;
+
+ mapping_clear_unevictable(mapping);
+
+ pagevec_init(&pvec);
+ for_each_sgt_page(page, sgt_iter, st) {
+ if (dirty)
+ set_page_dirty(page);
+
+ if (backup)
+ mark_page_accessed(page);
+
+ if (!pagevec_add(&pvec, page))
+ check_release_pagevec(&pvec);
+ }
+ if (pagevec_count(&pvec))
+ check_release_pagevec(&pvec);
+
+ sg_free_table(st);
+}
+
+int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
+ size_t size, struct intel_memory_region *mr,
+ struct address_space *mapping,
+ unsigned int max_segment)
+{
+ const unsigned long page_count = size / PAGE_SIZE;
unsigned long i;
- struct address_space *mapping;
- struct sg_table *st;
struct scatterlist *sg;
- struct sgt_iter sgt_iter;
struct page *page;
unsigned long last_pfn = 0; /* suppress gcc warning */
- unsigned int max_segment = i915_sg_segment_size();
- unsigned int sg_page_sizes;
gfp_t noreclaim;
int ret;
/*
- * Assert that the object is not currently in any GPU domain. As it
- * wasn't in the GTT, there shouldn't be any way it could have been in
- * a GPU cache
- */
- GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
- GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
-
- /*
* If there's no chance of allocating enough pages for the whole
* object, bail early.
*/
- if (obj->base.size > resource_size(&mem->region))
+ if (size > resource_size(&mr->region))
return -ENOMEM;
- st = kmalloc(sizeof(*st), GFP_KERNEL);
- if (!st)
+ if (sg_alloc_table(st, page_count, GFP_KERNEL))
return -ENOMEM;
-rebuild_st:
- if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
- kfree(st);
- return -ENOMEM;
- }
-
/*
* Get the list of pages out of our struct file. They'll be pinned
* at this point until we release them.
*
* Fail silently without starting the shrinker
*/
- mapping = obj->base.filp->f_mapping;
mapping_set_unevictable(mapping);
noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
sg = st->sgl;
st->nents = 0;
- sg_page_sizes = 0;
for (i = 0; i < page_count; i++) {
const unsigned int shrink[] = {
I915_SHRINK_BOUND | I915_SHRINK_UNBOUND,
@@ -135,10 +140,9 @@ rebuild_st:
if (!i ||
sg->length >= max_segment ||
page_to_pfn(page) != last_pfn + 1) {
- if (i) {
- sg_page_sizes |= sg->length;
+ if (i)
sg = sg_next(sg);
- }
+
st->nents++;
sg_set_page(sg, page, PAGE_SIZE, 0);
} else {
@@ -149,14 +153,67 @@ rebuild_st:
/* Check that the i965g/gm workaround works. */
GEM_BUG_ON(gfp & __GFP_DMA32 && last_pfn >= 0x00100000UL);
}
- if (sg) { /* loop terminated early; short sg table */
- sg_page_sizes |= sg->length;
+ if (sg) /* loop terminated early; short sg table */
sg_mark_end(sg);
- }
/* Trim unused sg entries to avoid wasting memory. */
i915_sg_trim(st);
+ return 0;
+err_sg:
+ sg_mark_end(sg);
+ if (sg != st->sgl) {
+ shmem_sg_free_table(st, mapping, false, false);
+ } else {
+ mapping_clear_unevictable(mapping);
+ sg_free_table(st);
+ }
+
+ /*
+ * shmemfs first checks if there is enough memory to allocate the page
+ * and reports ENOSPC should there be insufficient, along with the usual
+ * ENOMEM for a genuine allocation failure.
+ *
+ * We use ENOSPC in our driver to mean that we have run out of aperture
+ * space and so want to translate the error from shmemfs back to our
+ * usual understanding of ENOMEM.
+ */
+ if (ret == -ENOSPC)
+ ret = -ENOMEM;
+
+ return ret;
+}
+
+static int shmem_get_pages(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ struct intel_memory_region *mem = obj->mm.region;
+ struct address_space *mapping = obj->base.filp->f_mapping;
+ const unsigned long page_count = obj->base.size / PAGE_SIZE;
+ unsigned int max_segment = i915_sg_segment_size();
+ struct sg_table *st;
+ struct sgt_iter sgt_iter;
+ struct page *page;
+ int ret;
+
+ /*
+ * Assert that the object is not currently in any GPU domain. As it
+ * wasn't in the GTT, there shouldn't be any way it could have been in
+ * a GPU cache
+ */
+ GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
+ GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
+
+rebuild_st:
+ st = kmalloc(sizeof(*st), GFP_KERNEL);
+ if (!st)
+ return -ENOMEM;
+
+ ret = shmem_sg_alloc_table(i915, st, obj->base.size, mem, mapping,
+ max_segment);
+ if (ret)
+ goto err_st;
+
ret = i915_gem_gtt_prepare_pages(obj, st);
if (ret) {
/*
@@ -168,6 +225,7 @@ rebuild_st:
for_each_sgt_page(page, sgt_iter, st)
put_page(page);
sg_free_table(st);
+ kfree(st);
max_segment = PAGE_SIZE;
goto rebuild_st;
@@ -185,28 +243,12 @@ rebuild_st:
if (i915_gem_object_can_bypass_llc(obj))
obj->cache_dirty = true;
- __i915_gem_object_set_pages(obj, st, sg_page_sizes);
+ __i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl));
return 0;
-err_sg:
- sg_mark_end(sg);
err_pages:
- mapping_clear_unevictable(mapping);
- if (sg != st->sgl) {
- struct pagevec pvec;
-
- pagevec_init(&pvec);
- for_each_sgt_page(page, sgt_iter, st) {
- if (!pagevec_add(&pvec, page))
- check_release_pagevec(&pvec);
- }
- if (pagevec_count(&pvec))
- check_release_pagevec(&pvec);
- }
- sg_free_table(st);
- kfree(st);
-
+ shmem_sg_free_table(st, mapping, false, false);
/*
* shmemfs first checks if there is enough memory to allocate the page
* and reports ENOSPC should there be insufficient, along with the usual
@@ -216,13 +258,16 @@ err_pages:
* space and so want to translate the error from shmemfs back to our
* usual understanding of ENOMEM.
*/
+err_st:
if (ret == -ENOSPC)
ret = -ENOMEM;
+ kfree(st);
+
return ret;
}
-static void
+static int
shmem_truncate(struct drm_i915_gem_object *obj)
{
/*
@@ -234,12 +279,12 @@ shmem_truncate(struct drm_i915_gem_object *obj)
shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
obj->mm.madv = __I915_MADV_PURGED;
obj->mm.pages = ERR_PTR(-EFAULT);
+
+ return 0;
}
-static void
-shmem_writeback(struct drm_i915_gem_object *obj)
+void __shmem_writeback(size_t size, struct address_space *mapping)
{
- struct address_space *mapping;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_NONE,
.nr_to_write = SWAP_CLUSTER_MAX,
@@ -255,10 +300,9 @@ shmem_writeback(struct drm_i915_gem_object *obj)
* instead of invoking writeback so they are aged and paged out
* as normal.
*/
- mapping = obj->base.filp->f_mapping;
/* Begin writeback on each dirty page */
- for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) {
+ for (i = 0; i < size >> PAGE_SHIFT; i++) {
struct page *page;
page = find_lock_page(mapping, i);
@@ -281,6 +325,12 @@ put:
}
}
+static void
+shmem_writeback(struct drm_i915_gem_object *obj)
+{
+ __shmem_writeback(obj->base.size, obj->base.filp->f_mapping);
+}
+
void
__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
struct sg_table *pages,
@@ -313,11 +363,6 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages)
{
- struct sgt_iter sgt_iter;
- struct pagevec pvec;
- struct page *page;
-
- GEM_WARN_ON(IS_DGFX(to_i915(obj->base.dev)));
__i915_gem_object_release_shmem(obj, pages, true);
i915_gem_gtt_finish_pages(obj, pages);
@@ -325,25 +370,10 @@ void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_
if (i915_gem_object_needs_bit17_swizzle(obj))
i915_gem_object_save_bit_17_swizzle(obj, pages);
- mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
-
- pagevec_init(&pvec);
- for_each_sgt_page(page, sgt_iter, pages) {
- if (obj->mm.dirty)
- set_page_dirty(page);
-
- if (obj->mm.madv == I915_MADV_WILLNEED)
- mark_page_accessed(page);
-
- if (!pagevec_add(&pvec, page))
- check_release_pagevec(&pvec);
- }
- if (pagevec_count(&pvec))
- check_release_pagevec(&pvec);
- obj->mm.dirty = false;
-
- sg_free_table(pages);
+ shmem_sg_free_table(pages, file_inode(obj->base.filp)->i_mapping,
+ obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED);
kfree(pages);
+ obj->mm.dirty = false;
}
static void
@@ -634,9 +664,10 @@ static int init_shmem(struct intel_memory_region *mem)
return 0; /* Don't error, we can simply fallback to the kernel mnt */
}
-static void release_shmem(struct intel_memory_region *mem)
+static int release_shmem(struct intel_memory_region *mem)
{
i915_gemfs_fini(mem->i915);
+ return 0;
}
static const struct intel_memory_region_ops shmem_region_ops = {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index 5ab136ffdeb2..cc927e49d21f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -15,7 +15,6 @@
#include "gt/intel_gt_requests.h"
-#include "dma_resv_utils.h"
#include "i915_trace.h"
static bool swap_available(void)
@@ -37,8 +36,8 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
return swap_available() || obj->mm.madv == I915_MADV_DONTNEED;
}
-static bool unsafe_drop_pages(struct drm_i915_gem_object *obj,
- unsigned long shrink, bool trylock_vm)
+static int drop_pages(struct drm_i915_gem_object *obj,
+ unsigned long shrink, bool trylock_vm)
{
unsigned long flags;
@@ -56,19 +55,25 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj,
return false;
}
-static void try_to_writeback(struct drm_i915_gem_object *obj,
- unsigned int flags)
+static int try_to_writeback(struct drm_i915_gem_object *obj, unsigned int flags)
{
+ if (obj->ops->shrinker_release_pages)
+ return obj->ops->shrinker_release_pages(obj,
+ !(flags & I915_SHRINK_ACTIVE),
+ flags & I915_SHRINK_WRITEBACK);
+
switch (obj->mm.madv) {
case I915_MADV_DONTNEED:
i915_gem_object_truncate(obj);
- return;
+ return 0;
case __I915_MADV_PURGED:
- return;
+ return 0;
}
if (flags & I915_SHRINK_WRITEBACK)
i915_gem_object_writeback(obj);
+
+ return 0;
}
/**
@@ -148,7 +153,7 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww,
*/
if (shrink & I915_SHRINK_ACTIVE)
/* Retire requests to unpin all idle contexts */
- intel_gt_retire_requests(&i915->gt);
+ intel_gt_retire_requests(to_gt(i915));
/*
* As we may completely rewrite the (un)bound list whilst unbinding
@@ -209,27 +214,23 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww,
spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
- err = 0;
- if (unsafe_drop_pages(obj, shrink, trylock_vm)) {
- /* May arrive from get_pages on another bo */
- if (!ww) {
- if (!i915_gem_object_trylock(obj))
- goto skip;
- } else {
- err = i915_gem_object_lock(obj, ww);
- if (err)
- goto skip;
- }
-
- if (!__i915_gem_object_put_pages(obj)) {
- try_to_writeback(obj, shrink);
- count += obj->base.size >> PAGE_SHIFT;
- }
- if (!ww)
- i915_gem_object_unlock(obj);
+ /* May arrive from get_pages on another bo */
+ if (!ww) {
+ if (!i915_gem_object_trylock(obj, NULL))
+ goto skip;
+ } else {
+ err = i915_gem_object_lock(obj, ww);
+ if (err)
+ goto skip;
}
- dma_resv_prune(obj->base.resv);
+ if (drop_pages(obj, shrink, trylock_vm) &&
+ !__i915_gem_object_put_pages(obj) &&
+ !try_to_writeback(obj, shrink))
+ count += obj->base.size >> PAGE_SHIFT;
+
+ if (!ww)
+ i915_gem_object_unlock(obj);
scanned += obj->base.size >> PAGE_SHIFT;
skip:
@@ -404,12 +405,18 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
list_for_each_entry_safe(vma, next,
&i915->ggtt.vm.bound_list, vm_link) {
unsigned long count = vma->node.size >> PAGE_SHIFT;
+ struct drm_i915_gem_object *obj = vma->obj;
if (!vma->iomap || i915_vma_is_active(vma))
continue;
+ if (!i915_gem_object_trylock(obj, NULL))
+ continue;
+
if (__i915_vma_unbind(vma) == 0)
freed_pages += count;
+
+ i915_gem_object_unlock(obj);
}
mutex_unlock(&i915->ggtt.vm.mutex);
@@ -458,6 +465,16 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
+/**
+ * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
+ * default all object types that support shrinking(see IS_SHRINKABLE), will also
+ * make the object visible to the shrinker after allocating the system memory
+ * pages.
+ * @obj: The GEM object.
+ *
+ * This is typically used for special kernel internal objects that can't be
+ * easily processed by the shrinker, like if they are perma-pinned.
+ */
void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj)
{
struct drm_i915_private *i915 = obj_to_i915(obj);
@@ -482,13 +499,12 @@ void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj)
spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
}
-static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
- struct list_head *head)
+static void ___i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
+ struct list_head *head)
{
struct drm_i915_private *i915 = obj_to_i915(obj);
unsigned long flags;
- GEM_BUG_ON(!i915_gem_object_has_pages(obj));
if (!i915_gem_object_is_shrinkable(obj))
return;
@@ -508,14 +524,67 @@ static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
}
+/**
+ * __i915_gem_object_make_shrinkable - Move the object to the tail of the
+ * shrinkable list. Objects on this list might be swapped out. Used with
+ * WILLNEED objects.
+ * @obj: The GEM object.
+ *
+ * DO NOT USE. This is intended to be called on very special objects that don't
+ * yet have mm.pages, but are guaranteed to have potentially reclaimable pages
+ * underneath.
+ */
+void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
+{
+ ___i915_gem_object_make_shrinkable(obj,
+ &obj_to_i915(obj)->mm.shrink_list);
+}
+
+/**
+ * __i915_gem_object_make_purgeable - Move the object to the tail of the
+ * purgeable list. Objects on this list might be swapped out. Used with
+ * DONTNEED objects.
+ * @obj: The GEM object.
+ *
+ * DO NOT USE. This is intended to be called on very special objects that don't
+ * yet have mm.pages, but are guaranteed to have potentially reclaimable pages
+ * underneath.
+ */
+void __i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
+{
+ ___i915_gem_object_make_shrinkable(obj,
+ &obj_to_i915(obj)->mm.purge_list);
+}
+
+/**
+ * i915_gem_object_make_shrinkable - Move the object to the tail of the
+ * shrinkable list. Objects on this list might be swapped out. Used with
+ * WILLNEED objects.
+ * @obj: The GEM object.
+ *
+ * MUST only be called on objects which have backing pages.
+ *
+ * MUST be balanced with previous call to i915_gem_object_make_unshrinkable().
+ */
void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
{
- __i915_gem_object_make_shrinkable(obj,
- &obj_to_i915(obj)->mm.shrink_list);
+ GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+ __i915_gem_object_make_shrinkable(obj);
}
+/**
+ * i915_gem_object_make_purgeable - Move the object to the tail of the purgeable
+ * list. Used with DONTNEED objects. Unlike with shrinkable objects, the
+ * shrinker will attempt to discard the backing pages, instead of trying to swap
+ * them out.
+ * @obj: The GEM object.
+ *
+ * MUST only be called on objects which have backing pages.
+ *
+ * MUST be balanced with previous call to i915_gem_object_make_unshrinkable().
+ */
void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
{
- __i915_gem_object_make_shrinkable(obj,
- &obj_to_i915(obj)->mm.purge_list);
+ GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+ __i915_gem_object_make_purgeable(obj);
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index ddd37ccb1362..7df50fd6cc7b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -399,7 +399,7 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
return 0;
}
- if (intel_vtd_active() && GRAPHICS_VER(i915) < 8) {
+ if (intel_vtd_active(i915) && GRAPHICS_VER(i915) < 8) {
drm_notice(&i915->drm,
"%s, disabling use of stolen memory\n",
"DMAR active");
@@ -488,6 +488,9 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
return 0;
}
+ /* Exclude the reserved region from driver use */
+ mem->region.end = reserved_base - 1;
+
/* It is possible for the reserved area to end before the end of stolen
* memory, so just consider the start. */
reserved_total = stolen_top - reserved_base;
@@ -653,7 +656,7 @@ static int __i915_gem_object_create_stolen(struct intel_memory_region *mem,
cache_level = HAS_LLC(mem->i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
i915_gem_object_set_cache_coherency(obj, cache_level);
- if (WARN_ON(!i915_gem_object_trylock(obj)))
+ if (WARN_ON(!i915_gem_object_trylock(obj, NULL)))
return -EBUSY;
i915_gem_object_init_memory_region(obj, mem);
@@ -720,9 +723,10 @@ static int init_stolen_smem(struct intel_memory_region *mem)
return i915_gem_init_stolen(mem);
}
-static void release_stolen_smem(struct intel_memory_region *mem)
+static int release_stolen_smem(struct intel_memory_region *mem)
{
i915_gem_cleanup_stolen(mem->i915);
+ return 0;
}
static const struct intel_memory_region_ops i915_region_stolen_smem_ops = {
@@ -759,10 +763,11 @@ err_fini:
return err;
}
-static void release_stolen_lmem(struct intel_memory_region *mem)
+static int release_stolen_lmem(struct intel_memory_region *mem)
{
io_mapping_fini(&mem->iomap);
i915_gem_cleanup_stolen(mem->i915);
+ return 0;
}
static const struct intel_memory_region_ops i915_region_stolen_lmem_ops = {
@@ -778,6 +783,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
struct intel_uncore *uncore = &i915->uncore;
struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
struct intel_memory_region *mem;
+ resource_size_t min_page_size;
resource_size_t io_start;
resource_size_t lmem_size;
u64 lmem_base;
@@ -789,8 +795,11 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
lmem_size = pci_resource_len(pdev, 2) - lmem_base;
io_start = pci_resource_start(pdev, 2) + lmem_base;
+ min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K :
+ I915_GTT_PAGE_SIZE_4K;
+
mem = intel_memory_region_create(i915, lmem_base, lmem_size,
- I915_GTT_PAGE_SIZE_4K, io_start,
+ min_page_size, io_start,
type, instance,
&i915_region_stolen_lmem_ops);
if (IS_ERR(mem))
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
index 1929d6cf4150..75501db71041 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
@@ -38,12 +38,13 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
{
const unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
struct drm_i915_file_private *file_priv = file->driver_priv;
+ struct drm_i915_private *i915 = to_i915(dev);
struct i915_gem_context *ctx;
unsigned long idx;
long ret;
/* ABI: return -EIO if already wedged */
- ret = intel_gt_terminally_wedged(&to_i915(dev)->gt);
+ ret = intel_gt_terminally_wedged(to_gt(i915));
if (ret)
return ret;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 74a1ffd0d7dd..923cc7ad8d70 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -14,13 +14,9 @@
#include "gem/i915_gem_object.h"
#include "gem/i915_gem_region.h"
#include "gem/i915_gem_ttm.h"
+#include "gem/i915_gem_ttm_move.h"
#include "gem/i915_gem_ttm_pm.h"
-
-#include "gt/intel_engine_pm.h"
-#include "gt/intel_gt.h"
-#include "gt/intel_migrate.h"
-
#define I915_TTM_PRIO_PURGE 0
#define I915_TTM_PRIO_NO_PAGES 1
#define I915_TTM_PRIO_HAS_PAGES 2
@@ -34,7 +30,9 @@
* struct i915_ttm_tt - TTM page vector with additional private information
* @ttm: The base TTM page vector.
* @dev: The struct device used for dma mapping and unmapping.
- * @cached_st: The cached scatter-gather table.
+ * @cached_rsgt: The cached scatter-gather table.
+ * @is_shmem: Set if using shmem.
+ * @filp: The shmem file, if using shmem backend.
*
* Note that DMA may be going on right up to the point where the page-
* vector is unpopulated in delayed destroy. Hence keep the
@@ -45,7 +43,10 @@
struct i915_ttm_tt {
struct ttm_tt ttm;
struct device *dev;
- struct sg_table *cached_st;
+ struct i915_refct_sgt cached_rsgt;
+
+ bool is_shmem;
+ struct file *filp;
};
static const struct ttm_place sys_placement_flags = {
@@ -103,37 +104,15 @@ static int i915_ttm_err_to_gem(int err)
return err;
}
-static bool gpu_binds_iomem(struct ttm_resource *mem)
-{
- return mem->mem_type != TTM_PL_SYSTEM;
-}
-
-static bool cpu_maps_iomem(struct ttm_resource *mem)
-{
- /* Once / if we support GGTT, this is also false for cached ttm_tts */
- return mem->mem_type != TTM_PL_SYSTEM;
-}
-
-static enum i915_cache_level
-i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res,
- struct ttm_tt *ttm)
-{
- return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && !gpu_binds_iomem(res) &&
- ttm->caching == ttm_cached) ? I915_CACHE_LLC :
- I915_CACHE_NONE;
-}
-
-static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj);
-
static enum ttm_caching
i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj)
{
/*
- * Objects only allowed in system get cached cpu-mappings.
- * Other objects get WC mapping for now. Even if in system.
+ * Objects only allowed in system get cached cpu-mappings, or when
+ * evicting lmem-only buffers to system for swapping. Other objects get
+ * WC mapping for now. Even if in system.
*/
- if (obj->mm.region->type == INTEL_MEMORY_SYSTEM &&
- obj->mm.n_placements <= 1)
+ if (obj->mm.n_placements <= 1)
return ttm_cached;
return ttm_write_combined;
@@ -179,15 +158,103 @@ i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj,
placement->busy_placement = busy;
}
+static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev,
+ struct ttm_tt *ttm,
+ struct ttm_operation_ctx *ctx)
+{
+ struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
+ struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM];
+ struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+ const unsigned int max_segment = i915_sg_segment_size();
+ const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT;
+ struct file *filp = i915_tt->filp;
+ struct sgt_iter sgt_iter;
+ struct sg_table *st;
+ struct page *page;
+ unsigned long i;
+ int err;
+
+ if (!filp) {
+ struct address_space *mapping;
+ gfp_t mask;
+
+ filp = shmem_file_setup("i915-shmem-tt", size, VM_NORESERVE);
+ if (IS_ERR(filp))
+ return PTR_ERR(filp);
+
+ mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
+
+ mapping = filp->f_mapping;
+ mapping_set_gfp_mask(mapping, mask);
+ GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
+
+ i915_tt->filp = filp;
+ }
+
+ st = &i915_tt->cached_rsgt.table;
+ err = shmem_sg_alloc_table(i915, st, size, mr, filp->f_mapping,
+ max_segment);
+ if (err)
+ return err;
+
+ err = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ if (err)
+ goto err_free_st;
+
+ i = 0;
+ for_each_sgt_page(page, sgt_iter, st)
+ ttm->pages[i++] = page;
+
+ if (ttm->page_flags & TTM_TT_FLAG_SWAPPED)
+ ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED;
+
+ return 0;
+
+err_free_st:
+ shmem_sg_free_table(st, filp->f_mapping, false, false);
+
+ return err;
+}
+
+static void i915_ttm_tt_shmem_unpopulate(struct ttm_tt *ttm)
+{
+ struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+ bool backup = ttm->page_flags & TTM_TT_FLAG_SWAPPED;
+ struct sg_table *st = &i915_tt->cached_rsgt.table;
+
+ shmem_sg_free_table(st, file_inode(i915_tt->filp)->i_mapping,
+ backup, backup);
+}
+
+static void i915_ttm_tt_release(struct kref *ref)
+{
+ struct i915_ttm_tt *i915_tt =
+ container_of(ref, typeof(*i915_tt), cached_rsgt.kref);
+ struct sg_table *st = &i915_tt->cached_rsgt.table;
+
+ GEM_WARN_ON(st->sgl);
+
+ kfree(i915_tt);
+}
+
+static const struct i915_refct_sgt_ops tt_rsgt_ops = {
+ .release = i915_ttm_tt_release
+};
+
static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
uint32_t page_flags)
{
struct ttm_resource_manager *man =
ttm_manager_type(bo->bdev, bo->resource->mem_type);
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+ enum ttm_caching caching;
struct i915_ttm_tt *i915_tt;
int ret;
+ if (!obj)
+ return NULL;
+
i915_tt = kzalloc(sizeof(*i915_tt), GFP_KERNEL);
if (!i915_tt)
return NULL;
@@ -196,38 +263,66 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
man->use_tt)
page_flags |= TTM_TT_FLAG_ZERO_ALLOC;
- ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags,
- i915_ttm_select_tt_caching(obj));
- if (ret) {
- kfree(i915_tt);
- return NULL;
+ caching = i915_ttm_select_tt_caching(obj);
+ if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached) {
+ page_flags |= TTM_TT_FLAG_EXTERNAL |
+ TTM_TT_FLAG_EXTERNAL_MAPPABLE;
+ i915_tt->is_shmem = true;
}
+ ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, caching);
+ if (ret)
+ goto err_free;
+
+ __i915_refct_sgt_init(&i915_tt->cached_rsgt, bo->base.size,
+ &tt_rsgt_ops);
+
i915_tt->dev = obj->base.dev->dev;
return &i915_tt->ttm;
+
+err_free:
+ kfree(i915_tt);
+ return NULL;
+}
+
+static int i915_ttm_tt_populate(struct ttm_device *bdev,
+ struct ttm_tt *ttm,
+ struct ttm_operation_ctx *ctx)
+{
+ struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+
+ if (i915_tt->is_shmem)
+ return i915_ttm_tt_shmem_populate(bdev, ttm, ctx);
+
+ return ttm_pool_alloc(&bdev->pool, ttm, ctx);
}
static void i915_ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
{
struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+ struct sg_table *st = &i915_tt->cached_rsgt.table;
- if (i915_tt->cached_st) {
- dma_unmap_sgtable(i915_tt->dev, i915_tt->cached_st,
- DMA_BIDIRECTIONAL, 0);
- sg_free_table(i915_tt->cached_st);
- kfree(i915_tt->cached_st);
- i915_tt->cached_st = NULL;
+ if (st->sgl)
+ dma_unmap_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, 0);
+
+ if (i915_tt->is_shmem) {
+ i915_ttm_tt_shmem_unpopulate(ttm);
+ } else {
+ sg_free_table(st);
+ ttm_pool_free(&bdev->pool, ttm);
}
- ttm_pool_free(&bdev->pool, ttm);
}
static void i915_ttm_tt_destroy(struct ttm_device *bdev, struct ttm_tt *ttm)
{
struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+ if (i915_tt->filp)
+ fput(i915_tt->filp);
+
ttm_tt_fini(ttm);
- kfree(i915_tt);
+ i915_refct_sgt_put(&i915_tt->cached_rsgt);
}
static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo,
@@ -235,6 +330,17 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo,
{
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+ if (!obj)
+ return false;
+
+ /*
+ * EXTERNAL objects should never be swapped out by TTM, instead we need
+ * to handle that ourselves. TTM will already skip such objects for us,
+ * but we would like to avoid grabbing locks for no good reason.
+ */
+ if (bo->ttm && bo->ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
+ return false;
+
/* Will do for now. Our pinned objects are still on TTM's LRU lists */
return i915_gem_object_evictable(obj);
}
@@ -245,28 +351,19 @@ static void i915_ttm_evict_flags(struct ttm_buffer_object *bo,
*placement = i915_sys_placement;
}
-static int i915_ttm_move_notify(struct ttm_buffer_object *bo)
-{
- struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
- int ret;
-
- ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
- if (ret)
- return ret;
-
- ret = __i915_gem_object_put_pages(obj);
- if (ret)
- return ret;
-
- return 0;
-}
-
-static void i915_ttm_free_cached_io_st(struct drm_i915_gem_object *obj)
+/**
+ * i915_ttm_free_cached_io_rsgt - Free object cached LMEM information
+ * @obj: The GEM object
+ * This function frees any LMEM-related information that is cached on
+ * the object. For example the radix tree for fast page lookup and the
+ * cached refcounted sg-table
+ */
+void i915_ttm_free_cached_io_rsgt(struct drm_i915_gem_object *obj)
{
struct radix_tree_iter iter;
void __rcu **slot;
- if (!obj->ttm.cached_io_st)
+ if (!obj->ttm.cached_io_rsgt)
return;
rcu_read_lock();
@@ -274,93 +371,106 @@ static void i915_ttm_free_cached_io_st(struct drm_i915_gem_object *obj)
radix_tree_delete(&obj->ttm.get_io_page.radix, iter.index);
rcu_read_unlock();
- sg_free_table(obj->ttm.cached_io_st);
- kfree(obj->ttm.cached_io_st);
- obj->ttm.cached_io_st = NULL;
+ i915_refct_sgt_put(obj->ttm.cached_io_rsgt);
+ obj->ttm.cached_io_rsgt = NULL;
}
-static void
-i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj)
+/**
+ * i915_ttm_purge - Clear an object of its memory
+ * @obj: The object
+ *
+ * This function is called to clear an object of it's memory when it is
+ * marked as not needed anymore.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int i915_ttm_purge(struct drm_i915_gem_object *obj)
{
struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+ struct i915_ttm_tt *i915_tt =
+ container_of(bo->ttm, typeof(*i915_tt), ttm);
+ struct ttm_operation_ctx ctx = {
+ .interruptible = true,
+ .no_wait_gpu = false,
+ };
+ struct ttm_placement place = {};
+ int ret;
- if (cpu_maps_iomem(bo->resource) || bo->ttm->caching != ttm_cached) {
- obj->write_domain = I915_GEM_DOMAIN_WC;
- obj->read_domains = I915_GEM_DOMAIN_WC;
- } else {
- obj->write_domain = I915_GEM_DOMAIN_CPU;
- obj->read_domains = I915_GEM_DOMAIN_CPU;
- }
-}
+ if (obj->mm.madv == __I915_MADV_PURGED)
+ return 0;
-static void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj)
-{
- struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
- unsigned int cache_level;
- unsigned int i;
+ ret = ttm_bo_validate(bo, &place, &ctx);
+ if (ret)
+ return ret;
- /*
- * If object was moved to an allowable region, update the object
- * region to consider it migrated. Note that if it's currently not
- * in an allowable region, it's evicted and we don't update the
- * object region.
- */
- if (intel_region_to_ttm_type(obj->mm.region) != bo->resource->mem_type) {
- for (i = 0; i < obj->mm.n_placements; ++i) {
- struct intel_memory_region *mr = obj->mm.placements[i];
-
- if (intel_region_to_ttm_type(mr) == bo->resource->mem_type &&
- mr != obj->mm.region) {
- i915_gem_object_release_memory_region(obj);
- i915_gem_object_init_memory_region(obj, mr);
- break;
- }
- }
+ if (bo->ttm && i915_tt->filp) {
+ /*
+ * The below fput(which eventually calls shmem_truncate) might
+ * be delayed by worker, so when directly called to purge the
+ * pages(like by the shrinker) we should try to be more
+ * aggressive and release the pages immediately.
+ */
+ shmem_truncate_range(file_inode(i915_tt->filp),
+ 0, (loff_t)-1);
+ fput(fetch_and_zero(&i915_tt->filp));
}
- obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM);
-
- obj->mem_flags |= cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM :
- I915_BO_FLAG_STRUCT_PAGE;
+ obj->write_domain = 0;
+ obj->read_domains = 0;
+ i915_ttm_adjust_gem_after_move(obj);
+ i915_ttm_free_cached_io_rsgt(obj);
+ obj->mm.madv = __I915_MADV_PURGED;
- cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource,
- bo->ttm);
- i915_gem_object_set_cache_coherency(obj, cache_level);
+ return 0;
}
-static void i915_ttm_purge(struct drm_i915_gem_object *obj)
+static int i915_ttm_shrinker_release_pages(struct drm_i915_gem_object *obj,
+ bool no_wait_gpu,
+ bool should_writeback)
{
struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+ struct i915_ttm_tt *i915_tt =
+ container_of(bo->ttm, typeof(*i915_tt), ttm);
struct ttm_operation_ctx ctx = {
.interruptible = true,
- .no_wait_gpu = false,
+ .no_wait_gpu = no_wait_gpu,
};
struct ttm_placement place = {};
int ret;
- if (obj->mm.madv == __I915_MADV_PURGED)
- return;
+ if (!bo->ttm || bo->resource->mem_type != TTM_PL_SYSTEM)
+ return 0;
+
+ GEM_BUG_ON(!i915_tt->is_shmem);
+
+ if (!i915_tt->filp)
+ return 0;
+
+ ret = ttm_bo_wait_ctx(bo, &ctx);
+ if (ret)
+ return ret;
- /* TTM's purge interface. Note that we might be reentering. */
+ switch (obj->mm.madv) {
+ case I915_MADV_DONTNEED:
+ return i915_ttm_purge(obj);
+ case __I915_MADV_PURGED:
+ return 0;
+ }
+
+ if (bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED)
+ return 0;
+
+ bo->ttm->page_flags |= TTM_TT_FLAG_SWAPPED;
ret = ttm_bo_validate(bo, &place, &ctx);
- if (!ret) {
- obj->write_domain = 0;
- obj->read_domains = 0;
- i915_ttm_adjust_gem_after_move(obj);
- i915_ttm_free_cached_io_st(obj);
- obj->mm.madv = __I915_MADV_PURGED;
+ if (ret) {
+ bo->ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED;
+ return ret;
}
-}
-static void i915_ttm_swap_notify(struct ttm_buffer_object *bo)
-{
- struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
- int ret = i915_ttm_move_notify(bo);
+ if (should_writeback)
+ __shmem_writeback(obj->base.size, i915_tt->filp->f_mapping);
- GEM_WARN_ON(ret);
- GEM_WARN_ON(obj->ttm.cached_io_st);
- if (!ret && obj->mm.madv != I915_MADV_WILLNEED)
- i915_ttm_purge(obj);
+ return 0;
}
static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo)
@@ -369,232 +479,101 @@ static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo)
if (likely(obj)) {
__i915_gem_object_pages_fini(obj);
- i915_ttm_free_cached_io_st(obj);
+ i915_ttm_free_cached_io_rsgt(obj);
}
}
-static struct intel_memory_region *
-i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type)
-{
- struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
-
- /* There's some room for optimization here... */
- GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM &&
- ttm_mem_type < I915_PL_LMEM0);
- if (ttm_mem_type == I915_PL_SYSTEM)
- return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM,
- 0);
-
- return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL,
- ttm_mem_type - I915_PL_LMEM0);
-}
-
-static struct sg_table *i915_ttm_tt_get_st(struct ttm_tt *ttm)
+static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt *ttm)
{
struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
struct sg_table *st;
int ret;
- if (i915_tt->cached_st)
- return i915_tt->cached_st;
-
- st = kzalloc(sizeof(*st), GFP_KERNEL);
- if (!st)
- return ERR_PTR(-ENOMEM);
+ if (i915_tt->cached_rsgt.table.sgl)
+ return i915_refct_sgt_get(&i915_tt->cached_rsgt);
+ st = &i915_tt->cached_rsgt.table;
ret = sg_alloc_table_from_pages_segment(st,
ttm->pages, ttm->num_pages,
0, (unsigned long)ttm->num_pages << PAGE_SHIFT,
i915_sg_segment_size(), GFP_KERNEL);
if (ret) {
- kfree(st);
+ st->sgl = NULL;
return ERR_PTR(ret);
}
ret = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, 0);
if (ret) {
sg_free_table(st);
- kfree(st);
return ERR_PTR(ret);
}
- i915_tt->cached_st = st;
- return st;
+ return i915_refct_sgt_get(&i915_tt->cached_rsgt);
}
-static struct sg_table *
+/**
+ * i915_ttm_resource_get_st - Get a refcounted sg-table pointing to the
+ * resource memory
+ * @obj: The GEM object used for sg-table caching
+ * @res: The struct ttm_resource for which an sg-table is requested.
+ *
+ * This function returns a refcounted sg-table representing the memory
+ * pointed to by @res. If @res is the object's current resource it may also
+ * cache the sg_table on the object or attempt to access an already cached
+ * sg-table. The refcounted sg-table needs to be put when no-longer in use.
+ *
+ * Return: A valid pointer to a struct i915_refct_sgt or error pointer on
+ * failure.
+ */
+struct i915_refct_sgt *
i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
struct ttm_resource *res)
{
struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
- if (!gpu_binds_iomem(res))
+ if (!i915_ttm_gtt_binds_lmem(res))
return i915_ttm_tt_get_st(bo->ttm);
/*
* If CPU mapping differs, we need to add the ttm_tt pages to
* the resulting st. Might make sense for GGTT.
*/
- GEM_WARN_ON(!cpu_maps_iomem(res));
- return intel_region_ttm_resource_to_st(obj->mm.region, res);
-}
-
-static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
- bool clear,
- struct ttm_resource *dst_mem,
- struct ttm_tt *dst_ttm,
- struct sg_table *dst_st)
-{
- struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915),
- bdev);
- struct ttm_resource_manager *src_man =
- ttm_manager_type(bo->bdev, bo->resource->mem_type);
- struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
- struct sg_table *src_st;
- struct i915_request *rq;
- struct ttm_tt *src_ttm = bo->ttm;
- enum i915_cache_level src_level, dst_level;
- int ret;
-
- if (!i915->gt.migrate.context || intel_gt_is_wedged(&i915->gt))
- return -EINVAL;
+ GEM_WARN_ON(!i915_ttm_cpu_maps_iomem(res));
+ if (bo->resource == res) {
+ if (!obj->ttm.cached_io_rsgt) {
+ struct i915_refct_sgt *rsgt;
- dst_level = i915_ttm_cache_level(i915, dst_mem, dst_ttm);
- if (clear) {
- if (bo->type == ttm_bo_type_kernel)
- return -EINVAL;
+ rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region,
+ res);
+ if (IS_ERR(rsgt))
+ return rsgt;
- intel_engine_pm_get(i915->gt.migrate.context->engine);
- ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL,
- dst_st->sgl, dst_level,
- gpu_binds_iomem(dst_mem),
- 0, &rq);
-
- if (!ret && rq) {
- i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
- i915_request_put(rq);
- }
- intel_engine_pm_put(i915->gt.migrate.context->engine);
- } else {
- src_st = src_man->use_tt ? i915_ttm_tt_get_st(src_ttm) :
- obj->ttm.cached_io_st;
-
- src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm);
- intel_engine_pm_get(i915->gt.migrate.context->engine);
- ret = intel_context_migrate_copy(i915->gt.migrate.context,
- NULL, src_st->sgl, src_level,
- gpu_binds_iomem(bo->resource),
- dst_st->sgl, dst_level,
- gpu_binds_iomem(dst_mem),
- &rq);
- if (!ret && rq) {
- i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
- i915_request_put(rq);
+ obj->ttm.cached_io_rsgt = rsgt;
}
- intel_engine_pm_put(i915->gt.migrate.context->engine);
+ return i915_refct_sgt_get(obj->ttm.cached_io_rsgt);
}
- return ret;
-}
-
-static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
- struct ttm_resource *dst_mem,
- struct ttm_tt *dst_ttm,
- struct sg_table *dst_st,
- bool allow_accel)
-{
- int ret = -EINVAL;
-
- if (allow_accel)
- ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm, dst_st);
- if (ret) {
- struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
- struct intel_memory_region *dst_reg, *src_reg;
- union {
- struct ttm_kmap_iter_tt tt;
- struct ttm_kmap_iter_iomap io;
- } _dst_iter, _src_iter;
- struct ttm_kmap_iter *dst_iter, *src_iter;
-
- dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
- src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type);
- GEM_BUG_ON(!dst_reg || !src_reg);
-
- dst_iter = !cpu_maps_iomem(dst_mem) ?
- ttm_kmap_iter_tt_init(&_dst_iter.tt, dst_ttm) :
- ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap,
- dst_st, dst_reg->region.start);
-
- src_iter = !cpu_maps_iomem(bo->resource) ?
- ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) :
- ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap,
- obj->ttm.cached_io_st,
- src_reg->region.start);
-
- ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter);
- }
+ return intel_region_ttm_resource_to_rsgt(obj->mm.region, res);
}
-static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
- struct ttm_operation_ctx *ctx,
- struct ttm_resource *dst_mem,
- struct ttm_place *hop)
+static void i915_ttm_swap_notify(struct ttm_buffer_object *bo)
{
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
- struct ttm_resource_manager *dst_man =
- ttm_manager_type(bo->bdev, dst_mem->mem_type);
- struct ttm_tt *ttm = bo->ttm;
- struct sg_table *dst_st;
- bool clear;
int ret;
- /* Sync for now. We could do the actual copy async. */
- ret = ttm_bo_wait_ctx(bo, ctx);
- if (ret)
- return ret;
+ if (!obj)
+ return;
ret = i915_ttm_move_notify(bo);
- if (ret)
- return ret;
-
- if (obj->mm.madv != I915_MADV_WILLNEED) {
+ GEM_WARN_ON(ret);
+ GEM_WARN_ON(obj->ttm.cached_io_rsgt);
+ if (!ret && obj->mm.madv != I915_MADV_WILLNEED)
i915_ttm_purge(obj);
- ttm_resource_free(bo, &dst_mem);
- return 0;
- }
-
- /* Populate ttm with pages if needed. Typically system memory. */
- if (ttm && (dst_man->use_tt || (ttm->page_flags & TTM_TT_FLAG_SWAPPED))) {
- ret = ttm_tt_populate(bo->bdev, ttm, ctx);
- if (ret)
- return ret;
- }
-
- dst_st = i915_ttm_resource_get_st(obj, dst_mem);
- if (IS_ERR(dst_st))
- return PTR_ERR(dst_st);
-
- clear = !cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm));
- if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC)))
- __i915_ttm_move(bo, clear, dst_mem, bo->ttm, dst_st, true);
-
- ttm_bo_move_sync_cleanup(bo, dst_mem);
- i915_ttm_adjust_domains_after_move(obj);
- i915_ttm_free_cached_io_st(obj);
-
- if (gpu_binds_iomem(dst_mem) || cpu_maps_iomem(dst_mem)) {
- obj->ttm.cached_io_st = dst_st;
- obj->ttm.get_io_page.sg_pos = dst_st->sgl;
- obj->ttm.get_io_page.sg_idx = 0;
- }
-
- i915_ttm_adjust_gem_after_move(obj);
- return 0;
}
static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem)
{
- if (!cpu_maps_iomem(mem))
+ if (!i915_ttm_cpu_maps_iomem(mem))
return 0;
mem->bus.caching = ttm_write_combined;
@@ -607,19 +586,26 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
unsigned long page_offset)
{
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
- unsigned long base = obj->mm.region->iomap.base - obj->mm.region->region.start;
struct scatterlist *sg;
+ unsigned long base;
unsigned int ofs;
+ GEM_BUG_ON(!obj);
GEM_WARN_ON(bo->ttm);
+ base = obj->mm.region->iomap.base - obj->mm.region->region.start;
sg = __i915_gem_object_get_sg(obj, &obj->ttm.get_io_page, page_offset, &ofs, true);
return ((base + sg_dma_address(sg)) >> PAGE_SHIFT) + ofs;
}
+/*
+ * All callbacks need to take care not to downcast a struct ttm_buffer_object
+ * without checking its subclass, since it might be a TTM ghost object.
+ */
static struct ttm_device_funcs i915_ttm_bo_driver = {
.ttm_tt_create = i915_ttm_tt_create,
+ .ttm_tt_populate = i915_ttm_tt_populate,
.ttm_tt_unpopulate = i915_ttm_tt_unpopulate,
.ttm_tt_destroy = i915_ttm_tt_destroy,
.eviction_valuable = i915_ttm_eviction_valuable,
@@ -649,7 +635,6 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
.interruptible = true,
.no_wait_gpu = false,
};
- struct sg_table *st;
int real_num_busy;
int ret;
@@ -676,7 +661,6 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
return i915_ttm_err_to_gem(ret);
}
- i915_ttm_adjust_lru(obj);
if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) {
ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx);
if (ret)
@@ -687,14 +671,19 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
}
if (!i915_gem_object_has_pages(obj)) {
- /* Object either has a page vector or is an iomem object */
- st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st;
- if (IS_ERR(st))
- return PTR_ERR(st);
+ struct i915_refct_sgt *rsgt =
+ i915_ttm_resource_get_st(obj, bo->resource);
+
+ if (IS_ERR(rsgt))
+ return PTR_ERR(rsgt);
- __i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl));
+ GEM_BUG_ON(obj->mm.rsgt);
+ obj->mm.rsgt = rsgt;
+ __i915_gem_object_set_pages(obj, &rsgt->table,
+ i915_sg_dma_sizes(rsgt->table.sgl));
}
+ i915_ttm_adjust_lru(obj);
return ret;
}
@@ -766,12 +755,21 @@ static void i915_ttm_put_pages(struct drm_i915_gem_object *obj,
* and shrinkers will move it out if needed.
*/
- i915_ttm_adjust_lru(obj);
+ if (obj->mm.rsgt)
+ i915_refct_sgt_put(fetch_and_zero(&obj->mm.rsgt));
}
-static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
+/**
+ * i915_ttm_adjust_lru - Adjust an object's position on relevant LRU lists.
+ * @obj: The object
+ */
+void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
{
struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+ struct i915_ttm_tt *i915_tt =
+ container_of(bo->ttm, typeof(*i915_tt), ttm);
+ bool shrinkable =
+ bo->ttm && i915_tt->filp && ttm_tt_is_populated(bo->ttm);
/*
* Don't manipulate the TTM LRUs while in TTM bo destruction.
@@ -781,10 +779,53 @@ static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
return;
/*
+ * We skip managing the shrinker LRU in set_pages() and just manage
+ * everything here. This does at least solve the issue with having
+ * temporary shmem mappings(like with evicted lmem) not being visible to
+ * the shrinker. Only our shmem objects are shrinkable, everything else
+ * we keep as unshrinkable.
+ *
+ * To make sure everything plays nice we keep an extra shrink pin in TTM
+ * if the underlying pages are not currently shrinkable. Once we release
+ * our pin, like when the pages are moved to shmem, the pages will then
+ * be added to the shrinker LRU, assuming the caller isn't also holding
+ * a pin.
+ *
+ * TODO: consider maybe also bumping the shrinker list here when we have
+ * already unpinned it, which should give us something more like an LRU.
+ *
+ * TODO: There is a small window of opportunity for this function to
+ * get called from eviction after we've dropped the last GEM refcount,
+ * but before the TTM deleted flag is set on the object. Avoid
+ * adjusting the shrinker list in such cases, since the object is
+ * not available to the shrinker anyway due to its zero refcount.
+ * To fix this properly we should move to a TTM shrinker LRU list for
+ * these objects.
+ */
+ if (kref_get_unless_zero(&obj->base.refcount)) {
+ if (shrinkable != obj->mm.ttm_shrinkable) {
+ if (shrinkable) {
+ if (obj->mm.madv == I915_MADV_WILLNEED)
+ __i915_gem_object_make_shrinkable(obj);
+ else
+ __i915_gem_object_make_purgeable(obj);
+ } else {
+ i915_gem_object_make_unshrinkable(obj);
+ }
+
+ obj->mm.ttm_shrinkable = shrinkable;
+ }
+ i915_gem_object_put(obj);
+ }
+
+ /*
* Put on the correct LRU list depending on the MADV status
*/
spin_lock(&bo->bdev->lru_lock);
- if (obj->mm.madv != I915_MADV_WILLNEED) {
+ if (shrinkable) {
+ /* Try to keep shmem_tt from being considered for shrinking. */
+ bo->priority = TTM_MAX_BO_PRIORITY - 1;
+ } else if (obj->mm.madv != I915_MADV_WILLNEED) {
bo->priority = I915_TTM_PRIO_PURGE;
} else if (!i915_gem_object_has_pages(obj)) {
if (bo->priority < I915_TTM_PRIO_HAS_PAGES)
@@ -823,15 +864,39 @@ static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj)
static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
{
struct vm_area_struct *area = vmf->vma;
- struct drm_i915_gem_object *obj =
- i915_ttm_to_gem(area->vm_private_data);
+ struct ttm_buffer_object *bo = area->vm_private_data;
+ struct drm_device *dev = bo->base.dev;
+ struct drm_i915_gem_object *obj;
+ vm_fault_t ret;
+ int idx;
+
+ obj = i915_ttm_to_gem(bo);
+ if (!obj)
+ return VM_FAULT_SIGBUS;
/* Sanity check that we allow writing into this object */
if (unlikely(i915_gem_object_is_readonly(obj) &&
area->vm_flags & VM_WRITE))
return VM_FAULT_SIGBUS;
- return ttm_bo_vm_fault(vmf);
+ ret = ttm_bo_vm_reserve(bo, vmf);
+ if (ret)
+ return ret;
+
+ if (drm_dev_enter(dev, &idx)) {
+ ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
+ TTM_BO_VM_NUM_PREFAULT);
+ drm_dev_exit(idx);
+ } else {
+ ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
+ }
+ if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+ return ret;
+
+ i915_ttm_adjust_lru(obj);
+
+ dma_resv_unlock(bo->base.resv);
+ return ret;
}
static int
@@ -882,13 +947,18 @@ static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj)
static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = {
.name = "i915_gem_object_ttm",
+ .flags = I915_GEM_OBJECT_IS_SHRINKABLE |
+ I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST,
.get_pages = i915_ttm_get_pages,
.put_pages = i915_ttm_put_pages,
.truncate = i915_ttm_purge,
+ .shrinker_release_pages = i915_ttm_shrinker_release_pages,
+
.adjust_lru = i915_ttm_adjust_lru,
.delayed_free = i915_ttm_delayed_free,
.migrate = i915_ttm_migrate,
+
.mmap_offset = i915_ttm_mmap_offset,
.mmap_ops = &vm_ops_ttm,
};
@@ -901,6 +971,18 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo)
mutex_destroy(&obj->ttm.get_io_page.lock);
if (obj->ttm.created) {
+ /*
+ * We freely manage the shrinker LRU outide of the mm.pages life
+ * cycle. As a result when destroying the object we should be
+ * extra paranoid and ensure we remove it from the LRU, before
+ * we free the object.
+ *
+ * Touching the ttm_shrinkable outside of the object lock here
+ * should be safe now that the last GEM object ref was dropped.
+ */
+ if (obj->mm.ttm_shrinkable)
+ i915_gem_object_make_unshrinkable(obj);
+
i915_ttm_backup_free(obj);
/* This releases all gem object bindings to the backend. */
@@ -940,10 +1022,9 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
i915_gem_object_init(obj, &i915_gem_ttm_obj_ops, &lock_class, flags);
/* Don't put on a region list until we're either locked or fully initialized. */
- obj->mm.region = intel_memory_region_get(mem);
+ obj->mm.region = mem;
INIT_LIST_HEAD(&obj->mm.region_link);
- i915_gem_object_make_unshrinkable(obj);
INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN);
mutex_init(&obj->ttm.get_io_page.lock);
bo_type = (obj->flags & I915_BO_ALLOC_USER) ? ttm_bo_type_device :
@@ -955,6 +1036,14 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
GEM_BUG_ON(page_size && obj->mm.n_placements);
/*
+ * Keep an extra shrink pin to prevent the object from being made
+ * shrinkable too early. If the ttm_tt is ever allocated in shmem, we
+ * drop the pin. The TTM backend manages the shrinker LRU itself,
+ * outside of the normal mm.pages life cycle.
+ */
+ i915_gem_object_make_unshrinkable(obj);
+
+ /*
* If this function fails, it will call the destructor, but
* our caller still owns the object. So no freeing in the
* destructor until obj->ttm.created is true.
@@ -980,6 +1069,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
static const struct intel_memory_region_ops ttm_system_region_ops = {
.init_object = __i915_gem_ttm_object_init,
+ .release = intel_region_ttm_fini,
};
struct intel_memory_region *
@@ -999,50 +1089,3 @@ i915_gem_ttm_system_setup(struct drm_i915_private *i915,
intel_memory_region_set_name(mr, "system-ttm");
return mr;
}
-
-/**
- * i915_gem_obj_copy_ttm - Copy the contents of one ttm-based gem object to
- * another
- * @dst: The destination object
- * @src: The source object
- * @allow_accel: Allow using the blitter. Otherwise TTM memcpy is used.
- * @intr: Whether to perform waits interruptible:
- *
- * Note: The caller is responsible for assuring that the underlying
- * TTM objects are populated if needed and locked.
- *
- * Return: Zero on success. Negative error code on error. If @intr == true,
- * then it may return -ERESTARTSYS or -EINTR.
- */
-int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
- struct drm_i915_gem_object *src,
- bool allow_accel, bool intr)
-{
- struct ttm_buffer_object *dst_bo = i915_gem_to_ttm(dst);
- struct ttm_buffer_object *src_bo = i915_gem_to_ttm(src);
- struct ttm_operation_ctx ctx = {
- .interruptible = intr,
- };
- struct sg_table *dst_st;
- int ret;
-
- assert_object_held(dst);
- assert_object_held(src);
-
- /*
- * Sync for now. This will change with async moves.
- */
- ret = ttm_bo_wait_ctx(dst_bo, &ctx);
- if (!ret)
- ret = ttm_bo_wait_ctx(src_bo, &ctx);
- if (ret)
- return ret;
-
- dst_st = gpu_binds_iomem(dst_bo->resource) ?
- dst->ttm.cached_io_st : i915_ttm_tt_get_st(dst_bo->ttm);
-
- __i915_ttm_move(src_bo, false, dst_bo->resource, dst_bo->ttm,
- dst_st, allow_accel);
-
- return 0;
-}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
index 0b7291dd897c..9d698ad00853 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
@@ -5,6 +5,8 @@
#ifndef _I915_GEM_TTM_H_
#define _I915_GEM_TTM_H_
+#include <drm/ttm/ttm_placement.h>
+
#include "gem/i915_gem_object_types.h"
/**
@@ -35,7 +37,7 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo);
static inline struct drm_i915_gem_object *
i915_ttm_to_gem(struct ttm_buffer_object *bo)
{
- if (GEM_WARN_ON(bo->destroy != i915_ttm_bo_destroy))
+ if (bo->destroy != i915_ttm_bo_destroy)
return NULL;
return container_of(bo, struct drm_i915_gem_object, __do_not_access);
@@ -47,10 +49,6 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
resource_size_t page_size,
unsigned int flags);
-int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
- struct drm_i915_gem_object *src,
- bool allow_accel, bool intr);
-
/* Internal I915 TTM declarations and definitions below. */
#define I915_PL_LMEM0 TTM_PL_PRIV
@@ -60,4 +58,37 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
struct ttm_placement *i915_ttm_sys_placement(void);
+void i915_ttm_free_cached_io_rsgt(struct drm_i915_gem_object *obj);
+
+struct i915_refct_sgt *
+i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
+ struct ttm_resource *res);
+
+void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj);
+
+int i915_ttm_purge(struct drm_i915_gem_object *obj);
+
+/**
+ * i915_ttm_gtt_binds_lmem - Should the memory be viewed as LMEM by the GTT?
+ * @mem: struct ttm_resource representing the memory.
+ *
+ * Return: true if memory should be viewed as LMEM for GTT binding purposes,
+ * false otherwise.
+ */
+static inline bool i915_ttm_gtt_binds_lmem(struct ttm_resource *mem)
+{
+ return mem->mem_type != I915_PL_SYSTEM;
+}
+
+/**
+ * i915_ttm_cpu_maps_iomem - Should the memory be viewed as IOMEM by the CPU?
+ * @mem: struct ttm_resource representing the memory.
+ *
+ * Return: true if memory should be viewed as IOMEM for CPU mapping purposes.
+ */
+static inline bool i915_ttm_cpu_maps_iomem(struct ttm_resource *mem)
+{
+ /* Once / if we support GGTT, this is also false for cached ttm_tts */
+ return mem->mem_type != I915_PL_SYSTEM;
+}
#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
new file mode 100644
index 000000000000..ee9612a3ee5e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -0,0 +1,627 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <drm/ttm/ttm_bo_driver.h>
+
+#include "i915_deps.h"
+#include "i915_drv.h"
+#include "intel_memory_region.h"
+#include "intel_region_ttm.h"
+
+#include "gem/i915_gem_object.h"
+#include "gem/i915_gem_region.h"
+#include "gem/i915_gem_ttm.h"
+#include "gem/i915_gem_ttm_move.h"
+
+#include "gt/intel_engine_pm.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_migrate.h"
+
+/**
+ * DOC: Selftest failure modes for failsafe migration:
+ *
+ * For fail_gpu_migration, the gpu blit scheduled is always a clear blit
+ * rather than a copy blit, and then we force the failure paths as if
+ * the blit fence returned an error.
+ *
+ * For fail_work_allocation we fail the kmalloc of the async worker, we
+ * sync the gpu blit. If it then fails, or fail_gpu_migration is set to
+ * true, then a memcpy operation is performed sync.
+ */
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+static bool fail_gpu_migration;
+static bool fail_work_allocation;
+
+void i915_ttm_migrate_set_failure_modes(bool gpu_migration,
+ bool work_allocation)
+{
+ fail_gpu_migration = gpu_migration;
+ fail_work_allocation = work_allocation;
+}
+#endif
+
+static enum i915_cache_level
+i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res,
+ struct ttm_tt *ttm)
+{
+ return ((HAS_LLC(i915) || HAS_SNOOP(i915)) &&
+ !i915_ttm_gtt_binds_lmem(res) &&
+ ttm->caching == ttm_cached) ? I915_CACHE_LLC :
+ I915_CACHE_NONE;
+}
+
+static struct intel_memory_region *
+i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type)
+{
+ struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
+
+ /* There's some room for optimization here... */
+ GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM &&
+ ttm_mem_type < I915_PL_LMEM0);
+ if (ttm_mem_type == I915_PL_SYSTEM)
+ return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM,
+ 0);
+
+ return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL,
+ ttm_mem_type - I915_PL_LMEM0);
+}
+
+/**
+ * i915_ttm_adjust_domains_after_move - Adjust the GEM domains after a
+ * TTM move
+ * @obj: The gem object
+ */
+void i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj)
+{
+ struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+
+ if (i915_ttm_cpu_maps_iomem(bo->resource) || bo->ttm->caching != ttm_cached) {
+ obj->write_domain = I915_GEM_DOMAIN_WC;
+ obj->read_domains = I915_GEM_DOMAIN_WC;
+ } else {
+ obj->write_domain = I915_GEM_DOMAIN_CPU;
+ obj->read_domains = I915_GEM_DOMAIN_CPU;
+ }
+}
+
+/**
+ * i915_ttm_adjust_gem_after_move - Adjust the GEM state after a TTM move
+ * @obj: The gem object
+ *
+ * Adjusts the GEM object's region, mem_flags and cache coherency after a
+ * TTM move.
+ */
+void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj)
+{
+ struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+ unsigned int cache_level;
+ unsigned int i;
+
+ /*
+ * If object was moved to an allowable region, update the object
+ * region to consider it migrated. Note that if it's currently not
+ * in an allowable region, it's evicted and we don't update the
+ * object region.
+ */
+ if (intel_region_to_ttm_type(obj->mm.region) != bo->resource->mem_type) {
+ for (i = 0; i < obj->mm.n_placements; ++i) {
+ struct intel_memory_region *mr = obj->mm.placements[i];
+
+ if (intel_region_to_ttm_type(mr) == bo->resource->mem_type &&
+ mr != obj->mm.region) {
+ i915_gem_object_release_memory_region(obj);
+ i915_gem_object_init_memory_region(obj, mr);
+ break;
+ }
+ }
+ }
+
+ obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM);
+
+ obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM :
+ I915_BO_FLAG_STRUCT_PAGE;
+
+ cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource,
+ bo->ttm);
+ i915_gem_object_set_cache_coherency(obj, cache_level);
+}
+
+/**
+ * i915_ttm_move_notify - Prepare an object for move
+ * @bo: The ttm buffer object.
+ *
+ * This function prepares an object for move by removing all GPU bindings,
+ * removing all CPU mapings and finally releasing the pages sg-table.
+ *
+ * Return: 0 if successful, negative error code on error.
+ */
+int i915_ttm_move_notify(struct ttm_buffer_object *bo)
+{
+ struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+ int ret;
+
+ ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
+ if (ret)
+ return ret;
+
+ ret = __i915_gem_object_put_pages(obj);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
+ bool clear,
+ struct ttm_resource *dst_mem,
+ struct ttm_tt *dst_ttm,
+ struct sg_table *dst_st,
+ const struct i915_deps *deps)
+{
+ struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915),
+ bdev);
+ struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+ struct i915_request *rq;
+ struct ttm_tt *src_ttm = bo->ttm;
+ enum i915_cache_level src_level, dst_level;
+ int ret;
+
+ if (!to_gt(i915)->migrate.context || intel_gt_is_wedged(to_gt(i915)))
+ return ERR_PTR(-EINVAL);
+
+ /* With fail_gpu_migration, we always perform a GPU clear. */
+ if (I915_SELFTEST_ONLY(fail_gpu_migration))
+ clear = true;
+
+ dst_level = i915_ttm_cache_level(i915, dst_mem, dst_ttm);
+ if (clear) {
+ if (bo->type == ttm_bo_type_kernel &&
+ !I915_SELFTEST_ONLY(fail_gpu_migration))
+ return ERR_PTR(-EINVAL);
+
+ intel_engine_pm_get(to_gt(i915)->migrate.context->engine);
+ ret = intel_context_migrate_clear(to_gt(i915)->migrate.context, deps,
+ dst_st->sgl, dst_level,
+ i915_ttm_gtt_binds_lmem(dst_mem),
+ 0, &rq);
+ } else {
+ struct i915_refct_sgt *src_rsgt =
+ i915_ttm_resource_get_st(obj, bo->resource);
+
+ if (IS_ERR(src_rsgt))
+ return ERR_CAST(src_rsgt);
+
+ src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm);
+ intel_engine_pm_get(to_gt(i915)->migrate.context->engine);
+ ret = intel_context_migrate_copy(to_gt(i915)->migrate.context,
+ deps, src_rsgt->table.sgl,
+ src_level,
+ i915_ttm_gtt_binds_lmem(bo->resource),
+ dst_st->sgl, dst_level,
+ i915_ttm_gtt_binds_lmem(dst_mem),
+ &rq);
+
+ i915_refct_sgt_put(src_rsgt);
+ }
+
+ intel_engine_pm_put(to_gt(i915)->migrate.context->engine);
+
+ if (ret && rq) {
+ i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
+ i915_request_put(rq);
+ }
+
+ return ret ? ERR_PTR(ret) : &rq->fence;
+}
+
+/**
+ * struct i915_ttm_memcpy_arg - argument for the bo memcpy functionality.
+ * @_dst_iter: Storage space for the destination kmap iterator.
+ * @_src_iter: Storage space for the source kmap iterator.
+ * @dst_iter: Pointer to the destination kmap iterator.
+ * @src_iter: Pointer to the source kmap iterator.
+ * @clear: Whether to clear instead of copy.
+ * @src_rsgt: Refcounted scatter-gather list of source memory.
+ * @dst_rsgt: Refcounted scatter-gather list of destination memory.
+ */
+struct i915_ttm_memcpy_arg {
+ union {
+ struct ttm_kmap_iter_tt tt;
+ struct ttm_kmap_iter_iomap io;
+ } _dst_iter,
+ _src_iter;
+ struct ttm_kmap_iter *dst_iter;
+ struct ttm_kmap_iter *src_iter;
+ unsigned long num_pages;
+ bool clear;
+ struct i915_refct_sgt *src_rsgt;
+ struct i915_refct_sgt *dst_rsgt;
+};
+
+/**
+ * struct i915_ttm_memcpy_work - Async memcpy worker under a dma-fence.
+ * @fence: The dma-fence.
+ * @work: The work struct use for the memcpy work.
+ * @lock: The fence lock. Not used to protect anything else ATM.
+ * @irq_work: Low latency worker to signal the fence since it can't be done
+ * from the callback for lockdep reasons.
+ * @cb: Callback for the accelerated migration fence.
+ * @arg: The argument for the memcpy functionality.
+ */
+struct i915_ttm_memcpy_work {
+ struct dma_fence fence;
+ struct work_struct work;
+ /* The fence lock */
+ spinlock_t lock;
+ struct irq_work irq_work;
+ struct dma_fence_cb cb;
+ struct i915_ttm_memcpy_arg arg;
+};
+
+static void i915_ttm_move_memcpy(struct i915_ttm_memcpy_arg *arg)
+{
+ ttm_move_memcpy(arg->clear, arg->num_pages,
+ arg->dst_iter, arg->src_iter);
+}
+
+static void i915_ttm_memcpy_init(struct i915_ttm_memcpy_arg *arg,
+ struct ttm_buffer_object *bo, bool clear,
+ struct ttm_resource *dst_mem,
+ struct ttm_tt *dst_ttm,
+ struct i915_refct_sgt *dst_rsgt)
+{
+ struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+ struct intel_memory_region *dst_reg, *src_reg;
+
+ dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
+ src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type);
+ GEM_BUG_ON(!dst_reg || !src_reg);
+
+ arg->dst_iter = !i915_ttm_cpu_maps_iomem(dst_mem) ?
+ ttm_kmap_iter_tt_init(&arg->_dst_iter.tt, dst_ttm) :
+ ttm_kmap_iter_iomap_init(&arg->_dst_iter.io, &dst_reg->iomap,
+ &dst_rsgt->table, dst_reg->region.start);
+
+ arg->src_iter = !i915_ttm_cpu_maps_iomem(bo->resource) ?
+ ttm_kmap_iter_tt_init(&arg->_src_iter.tt, bo->ttm) :
+ ttm_kmap_iter_iomap_init(&arg->_src_iter.io, &src_reg->iomap,
+ &obj->ttm.cached_io_rsgt->table,
+ src_reg->region.start);
+ arg->clear = clear;
+ arg->num_pages = bo->base.size >> PAGE_SHIFT;
+
+ arg->dst_rsgt = i915_refct_sgt_get(dst_rsgt);
+ arg->src_rsgt = clear ? NULL :
+ i915_ttm_resource_get_st(obj, bo->resource);
+}
+
+static void i915_ttm_memcpy_release(struct i915_ttm_memcpy_arg *arg)
+{
+ i915_refct_sgt_put(arg->src_rsgt);
+ i915_refct_sgt_put(arg->dst_rsgt);
+}
+
+static void __memcpy_work(struct work_struct *work)
+{
+ struct i915_ttm_memcpy_work *copy_work =
+ container_of(work, typeof(*copy_work), work);
+ struct i915_ttm_memcpy_arg *arg = &copy_work->arg;
+ bool cookie = dma_fence_begin_signalling();
+
+ i915_ttm_move_memcpy(arg);
+ dma_fence_end_signalling(cookie);
+
+ dma_fence_signal(&copy_work->fence);
+
+ i915_ttm_memcpy_release(arg);
+ dma_fence_put(&copy_work->fence);
+}
+
+static void __memcpy_irq_work(struct irq_work *irq_work)
+{
+ struct i915_ttm_memcpy_work *copy_work =
+ container_of(irq_work, typeof(*copy_work), irq_work);
+ struct i915_ttm_memcpy_arg *arg = &copy_work->arg;
+
+ dma_fence_signal(&copy_work->fence);
+ i915_ttm_memcpy_release(arg);
+ dma_fence_put(&copy_work->fence);
+}
+
+static void __memcpy_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+ struct i915_ttm_memcpy_work *copy_work =
+ container_of(cb, typeof(*copy_work), cb);
+
+ if (unlikely(fence->error || I915_SELFTEST_ONLY(fail_gpu_migration))) {
+ INIT_WORK(&copy_work->work, __memcpy_work);
+ queue_work(system_unbound_wq, &copy_work->work);
+ } else {
+ init_irq_work(&copy_work->irq_work, __memcpy_irq_work);
+ irq_work_queue(&copy_work->irq_work);
+ }
+}
+
+static const char *get_driver_name(struct dma_fence *fence)
+{
+ return "i915_ttm_memcpy_work";
+}
+
+static const char *get_timeline_name(struct dma_fence *fence)
+{
+ return "unbound";
+}
+
+static const struct dma_fence_ops dma_fence_memcpy_ops = {
+ .get_driver_name = get_driver_name,
+ .get_timeline_name = get_timeline_name,
+};
+
+static struct dma_fence *
+i915_ttm_memcpy_work_arm(struct i915_ttm_memcpy_work *work,
+ struct dma_fence *dep)
+{
+ int ret;
+
+ spin_lock_init(&work->lock);
+ dma_fence_init(&work->fence, &dma_fence_memcpy_ops, &work->lock, 0, 0);
+ dma_fence_get(&work->fence);
+ ret = dma_fence_add_callback(dep, &work->cb, __memcpy_cb);
+ if (ret) {
+ if (ret != -ENOENT)
+ dma_fence_wait(dep, false);
+
+ return ERR_PTR(I915_SELFTEST_ONLY(fail_gpu_migration) ? -EINVAL :
+ dep->error);
+ }
+
+ return &work->fence;
+}
+
+static struct dma_fence *
+__i915_ttm_move(struct ttm_buffer_object *bo,
+ const struct ttm_operation_ctx *ctx, bool clear,
+ struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm,
+ struct i915_refct_sgt *dst_rsgt, bool allow_accel,
+ const struct i915_deps *move_deps)
+{
+ struct i915_ttm_memcpy_work *copy_work = NULL;
+ struct i915_ttm_memcpy_arg _arg, *arg = &_arg;
+ struct dma_fence *fence = ERR_PTR(-EINVAL);
+
+ if (allow_accel) {
+ fence = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm,
+ &dst_rsgt->table, move_deps);
+
+ /*
+ * We only need to intercept the error when moving to lmem.
+ * When moving to system, TTM or shmem will provide us with
+ * cleared pages.
+ */
+ if (!IS_ERR(fence) && !i915_ttm_gtt_binds_lmem(dst_mem) &&
+ !I915_SELFTEST_ONLY(fail_gpu_migration ||
+ fail_work_allocation))
+ goto out;
+ }
+
+ /* If we've scheduled gpu migration. Try to arm error intercept. */
+ if (!IS_ERR(fence)) {
+ struct dma_fence *dep = fence;
+
+ if (!I915_SELFTEST_ONLY(fail_work_allocation))
+ copy_work = kzalloc(sizeof(*copy_work), GFP_KERNEL);
+
+ if (copy_work) {
+ arg = &copy_work->arg;
+ i915_ttm_memcpy_init(arg, bo, clear, dst_mem, dst_ttm,
+ dst_rsgt);
+ fence = i915_ttm_memcpy_work_arm(copy_work, dep);
+ } else {
+ dma_fence_wait(dep, false);
+ fence = ERR_PTR(I915_SELFTEST_ONLY(fail_gpu_migration) ?
+ -EINVAL : fence->error);
+ }
+ dma_fence_put(dep);
+
+ if (!IS_ERR(fence))
+ goto out;
+ } else if (move_deps) {
+ int err = i915_deps_sync(move_deps, ctx);
+
+ if (err)
+ return ERR_PTR(err);
+ }
+
+ /* Error intercept failed or no accelerated migration to start with */
+ if (!copy_work)
+ i915_ttm_memcpy_init(arg, bo, clear, dst_mem, dst_ttm,
+ dst_rsgt);
+ i915_ttm_move_memcpy(arg);
+ i915_ttm_memcpy_release(arg);
+ kfree(copy_work);
+
+ return NULL;
+out:
+ if (!fence && copy_work) {
+ i915_ttm_memcpy_release(arg);
+ kfree(copy_work);
+ }
+
+ return fence;
+}
+
+static int
+prev_deps(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+ struct i915_deps *deps)
+{
+ int ret;
+
+ ret = i915_deps_add_dependency(deps, bo->moving, ctx);
+ if (!ret)
+ ret = i915_deps_add_resv(deps, bo->base.resv, ctx);
+
+ return ret;
+}
+
+/**
+ * i915_ttm_move - The TTM move callback used by i915.
+ * @bo: The buffer object.
+ * @evict: Whether this is an eviction.
+ * @dst_mem: The destination ttm resource.
+ * @hop: If we need multihop, what temporary memory type to move to.
+ *
+ * Return: 0 if successful, negative error code otherwise.
+ */
+int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
+ struct ttm_operation_ctx *ctx,
+ struct ttm_resource *dst_mem,
+ struct ttm_place *hop)
+{
+ struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+ struct ttm_resource_manager *dst_man =
+ ttm_manager_type(bo->bdev, dst_mem->mem_type);
+ struct dma_fence *migration_fence = NULL;
+ struct ttm_tt *ttm = bo->ttm;
+ struct i915_refct_sgt *dst_rsgt;
+ bool clear;
+ int ret;
+
+ if (GEM_WARN_ON(!obj)) {
+ ttm_bo_move_null(bo, dst_mem);
+ return 0;
+ }
+
+ ret = i915_ttm_move_notify(bo);
+ if (ret)
+ return ret;
+
+ if (obj->mm.madv != I915_MADV_WILLNEED) {
+ i915_ttm_purge(obj);
+ ttm_resource_free(bo, &dst_mem);
+ return 0;
+ }
+
+ /* Populate ttm with pages if needed. Typically system memory. */
+ if (ttm && (dst_man->use_tt || (ttm->page_flags & TTM_TT_FLAG_SWAPPED))) {
+ ret = ttm_tt_populate(bo->bdev, ttm, ctx);
+ if (ret)
+ return ret;
+ }
+
+ dst_rsgt = i915_ttm_resource_get_st(obj, dst_mem);
+ if (IS_ERR(dst_rsgt))
+ return PTR_ERR(dst_rsgt);
+
+ clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm));
+ if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) {
+ struct i915_deps deps;
+
+ i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+ ret = prev_deps(bo, ctx, &deps);
+ if (ret) {
+ i915_refct_sgt_put(dst_rsgt);
+ return ret;
+ }
+
+ migration_fence = __i915_ttm_move(bo, ctx, clear, dst_mem, bo->ttm,
+ dst_rsgt, true, &deps);
+ i915_deps_fini(&deps);
+ }
+
+ /* We can possibly get an -ERESTARTSYS here */
+ if (IS_ERR(migration_fence)) {
+ i915_refct_sgt_put(dst_rsgt);
+ return PTR_ERR(migration_fence);
+ }
+
+ if (migration_fence) {
+ ret = ttm_bo_move_accel_cleanup(bo, migration_fence, evict,
+ true, dst_mem);
+ if (ret) {
+ dma_fence_wait(migration_fence, false);
+ ttm_bo_move_sync_cleanup(bo, dst_mem);
+ }
+ dma_fence_put(migration_fence);
+ } else {
+ ttm_bo_move_sync_cleanup(bo, dst_mem);
+ }
+
+ i915_ttm_adjust_domains_after_move(obj);
+ i915_ttm_free_cached_io_rsgt(obj);
+
+ if (i915_ttm_gtt_binds_lmem(dst_mem) || i915_ttm_cpu_maps_iomem(dst_mem)) {
+ obj->ttm.cached_io_rsgt = dst_rsgt;
+ obj->ttm.get_io_page.sg_pos = dst_rsgt->table.sgl;
+ obj->ttm.get_io_page.sg_idx = 0;
+ } else {
+ i915_refct_sgt_put(dst_rsgt);
+ }
+
+ i915_ttm_adjust_lru(obj);
+ i915_ttm_adjust_gem_after_move(obj);
+ return 0;
+}
+
+/**
+ * i915_gem_obj_copy_ttm - Copy the contents of one ttm-based gem object to
+ * another
+ * @dst: The destination object
+ * @src: The source object
+ * @allow_accel: Allow using the blitter. Otherwise TTM memcpy is used.
+ * @intr: Whether to perform waits interruptible:
+ *
+ * Note: The caller is responsible for assuring that the underlying
+ * TTM objects are populated if needed and locked.
+ *
+ * Return: Zero on success. Negative error code on error. If @intr == true,
+ * then it may return -ERESTARTSYS or -EINTR.
+ */
+int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
+ struct drm_i915_gem_object *src,
+ bool allow_accel, bool intr)
+{
+ struct ttm_buffer_object *dst_bo = i915_gem_to_ttm(dst);
+ struct ttm_buffer_object *src_bo = i915_gem_to_ttm(src);
+ struct ttm_operation_ctx ctx = {
+ .interruptible = intr,
+ };
+ struct i915_refct_sgt *dst_rsgt;
+ struct dma_fence *copy_fence;
+ struct i915_deps deps;
+ int ret;
+
+ assert_object_held(dst);
+ assert_object_held(src);
+ i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+
+ ret = dma_resv_reserve_shared(src_bo->base.resv, 1);
+ if (ret)
+ return ret;
+
+ ret = i915_deps_add_resv(&deps, dst_bo->base.resv, &ctx);
+ if (ret)
+ return ret;
+
+ ret = i915_deps_add_resv(&deps, src_bo->base.resv, &ctx);
+ if (ret)
+ return ret;
+
+ dst_rsgt = i915_ttm_resource_get_st(dst, dst_bo->resource);
+ copy_fence = __i915_ttm_move(src_bo, &ctx, false, dst_bo->resource,
+ dst_bo->ttm, dst_rsgt, allow_accel,
+ &deps);
+
+ i915_deps_fini(&deps);
+ i915_refct_sgt_put(dst_rsgt);
+ if (IS_ERR_OR_NULL(copy_fence))
+ return PTR_ERR_OR_ZERO(copy_fence);
+
+ dma_resv_add_excl_fence(dst_bo->base.resv, copy_fence);
+ dma_resv_add_shared_fence(src_bo->base.resv, copy_fence);
+
+ dma_fence_put(copy_fence);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h
new file mode 100644
index 000000000000..d2e7f149e05c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+#ifndef _I915_GEM_TTM_MOVE_H_
+#define _I915_GEM_TTM_MOVE_H_
+
+#include <linux/types.h>
+
+#include "i915_selftest.h"
+
+struct ttm_buffer_object;
+struct ttm_operation_ctx;
+struct ttm_place;
+struct ttm_resource;
+struct ttm_tt;
+
+struct drm_i915_gem_object;
+struct i915_refct_sgt;
+
+int i915_ttm_move_notify(struct ttm_buffer_object *bo);
+
+I915_SELFTEST_DECLARE(void i915_ttm_migrate_set_failure_modes(bool gpu_migration,
+ bool work_allocation));
+
+int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
+ struct drm_i915_gem_object *src,
+ bool allow_accel, bool intr);
+
+/* Internal I915 TTM declarations and definitions below. */
+
+int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
+ struct ttm_operation_ctx *ctx,
+ struct ttm_resource *dst_mem,
+ struct ttm_place *hop);
+
+void i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj);
+
+void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c
index 3b6d14b5c604..9aad84059d56 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c
@@ -12,6 +12,7 @@
#include "gem/i915_gem_region.h"
#include "gem/i915_gem_ttm.h"
+#include "gem/i915_gem_ttm_move.h"
#include "gem/i915_gem_ttm_pm.h"
/**
@@ -79,6 +80,7 @@ static int i915_ttm_backup(struct i915_gem_apply_to_region *apply,
err = i915_gem_obj_copy_ttm(backup, obj, pm_apply->allow_gpu, false);
GEM_WARN_ON(err);
+ ttm_bo_wait_ctx(backup_bo, &ctx);
obj->ttm.backup = backup;
return 0;
@@ -169,6 +171,7 @@ static int i915_ttm_restore(struct i915_gem_apply_to_region *apply,
err = i915_gem_obj_copy_ttm(obj, backup, pm_apply->allow_gpu,
false);
GEM_WARN_ON(err);
+ ttm_bo_wait_ctx(backup_bo, &ctx);
obj->ttm.backup = NULL;
err = 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 3173c9f9a040..3cc01c30dd62 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -529,7 +529,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
* On almost all of the older hw, we cannot tell the GPU that
* a page is readonly.
*/
- if (!dev_priv->gt.vm->has_read_only)
+ if (!to_gt(dev_priv)->vm->has_read_only)
return -ENODEV;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
index f909aaa09d9c..dab3d30c09a0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
@@ -10,7 +10,6 @@
#include "gt/intel_engine.h"
-#include "dma_resv_utils.h"
#include "i915_gem_ioctls.h"
#include "i915_gem_object.h"
@@ -25,7 +24,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
return timeout;
if (dma_fence_is_i915(fence))
- return i915_request_wait(to_request(fence), flags, timeout);
+ return i915_request_wait_timeout(to_request(fence), flags, timeout);
return dma_fence_wait_timeout(fence,
flags & I915_WAIT_INTERRUPTIBLE,
@@ -37,58 +36,22 @@ i915_gem_object_wait_reservation(struct dma_resv *resv,
unsigned int flags,
long timeout)
{
- struct dma_fence *excl;
- bool prune_fences = false;
-
- if (flags & I915_WAIT_ALL) {
- struct dma_fence **shared;
- unsigned int count, i;
- int ret;
-
- ret = dma_resv_get_fences(resv, &excl, &count, &shared);
- if (ret)
- return ret;
-
- for (i = 0; i < count; i++) {
- timeout = i915_gem_object_wait_fence(shared[i],
- flags, timeout);
- if (timeout < 0)
- break;
-
- dma_fence_put(shared[i]);
- }
-
- for (; i < count; i++)
- dma_fence_put(shared[i]);
- kfree(shared);
+ struct dma_resv_iter cursor;
+ struct dma_fence *fence;
+ long ret = timeout ?: 1;
+
+ dma_resv_iter_begin(&cursor, resv, flags & I915_WAIT_ALL);
+ dma_resv_for_each_fence_unlocked(&cursor, fence) {
+ ret = i915_gem_object_wait_fence(fence, flags, timeout);
+ if (ret <= 0)
+ break;
- /*
- * If both shared fences and an exclusive fence exist,
- * then by construction the shared fences must be later
- * than the exclusive fence. If we successfully wait for
- * all the shared fences, we know that the exclusive fence
- * must all be signaled. If all the shared fences are
- * signaled, we can prune the array and recover the
- * floating references on the fences/requests.
- */
- prune_fences = count && timeout >= 0;
- } else {
- excl = dma_resv_get_excl_unlocked(resv);
+ if (timeout)
+ timeout = ret;
}
+ dma_resv_iter_end(&cursor);
- if (excl && timeout >= 0)
- timeout = i915_gem_object_wait_fence(excl, flags, timeout);
-
- dma_fence_put(excl);
-
- /*
- * Opportunistically prune the fences iff we know they have *all* been
- * signaled.
- */
- if (prune_fences)
- dma_resv_prune(resv);
-
- return timeout;
+ return ret;
}
static void fence_set_priority(struct dma_fence *fence,
@@ -151,32 +114,13 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
unsigned int flags,
const struct i915_sched_attr *attr)
{
- struct dma_fence *excl;
+ struct dma_resv_iter cursor;
+ struct dma_fence *fence;
- if (flags & I915_WAIT_ALL) {
- struct dma_fence **shared;
- unsigned int count, i;
- int ret;
-
- ret = dma_resv_get_fences(obj->base.resv, &excl, &count,
- &shared);
- if (ret)
- return ret;
-
- for (i = 0; i < count; i++) {
- i915_gem_fence_wait_priority(shared[i], attr);
- dma_fence_put(shared[i]);
- }
-
- kfree(shared);
- } else {
- excl = dma_resv_get_excl_unlocked(obj->base.resv);
- }
-
- if (excl) {
- i915_gem_fence_wait_priority(excl, attr);
- dma_fence_put(excl);
- }
+ dma_resv_iter_begin(&cursor, obj->base.resv, flags & I915_WAIT_ALL);
+ dma_resv_for_each_fence_unlocked(&cursor, fence)
+ i915_gem_fence_wait_priority(fence, attr);
+ dma_resv_iter_end(&cursor);
return 0;
}
@@ -196,7 +140,11 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj,
timeout = i915_gem_object_wait_reservation(obj->base.resv,
flags, timeout);
- return timeout < 0 ? timeout : 0;
+
+ if (timeout < 0)
+ return timeout;
+
+ return !timeout ? -ETIME : 0;
}
static inline unsigned long nsecs_to_jiffies_timeout(const u64 n)
@@ -306,6 +254,6 @@ int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj,
unsigned int flags)
{
might_sleep();
- /* NOP for now. */
- return 0;
+
+ return i915_gem_object_wait_moving_fence(obj, !!(flags & I915_WAIT_INTERRUPTIBLE));
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c
index dbdbdc344d87..7271fbf813fa 100644
--- a/drivers/gpu/drm/i915/gem/i915_gemfs.c
+++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c
@@ -12,6 +12,7 @@
int i915_gemfs_init(struct drm_i915_private *i915)
{
+ char huge_opt[] = "huge=within_size"; /* r/w */
struct file_system_type *type;
struct vfsmount *gemfs;
char *opts;
@@ -31,10 +32,8 @@ int i915_gemfs_init(struct drm_i915_private *i915)
*/
opts = NULL;
- if (intel_vtd_active()) {
+ if (intel_vtd_active(i915)) {
if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
- static char huge_opt[] = "huge=within_size"; /* r/w */
-
opts = huge_opt;
drm_info(&i915->drm,
"Transparent Hugepage mode '%s'\n",
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index b2003133deaf..11f0aa65f8a3 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -22,6 +22,22 @@
#include "selftests/mock_region.h"
#include "selftests/i915_random.h"
+static struct i915_gem_context *hugepage_ctx(struct drm_i915_private *i915,
+ struct file *file)
+{
+ struct i915_gem_context *ctx = live_context(i915, file);
+ struct i915_address_space *vm;
+
+ if (IS_ERR(ctx))
+ return ctx;
+
+ vm = ctx->vm;
+ if (vm)
+ WRITE_ONCE(vm->scrub_64K, true);
+
+ return ctx;
+}
+
static const unsigned int page_sizes[] = {
I915_GTT_PAGE_SIZE_2M,
I915_GTT_PAGE_SIZE_64K,
@@ -552,7 +568,7 @@ out_unpin:
out_put:
i915_gem_object_put(obj);
out_region:
- intel_memory_region_put(mem);
+ intel_memory_region_destroy(mem);
return err;
}
@@ -959,6 +975,8 @@ static int igt_mock_ppgtt_64K(void *arg)
__i915_gem_object_put_pages(obj);
i915_gem_object_unlock(obj);
i915_gem_object_put(obj);
+
+ i915_gem_drain_freed_objects(i915);
}
}
@@ -1080,10 +1098,6 @@ static int __igt_write_huge(struct intel_context *ce,
if (IS_ERR(vma))
return PTR_ERR(vma);
- err = i915_vma_unbind(vma);
- if (err)
- return err;
-
err = i915_vma_pin(vma, size, 0, flags | offset);
if (err) {
/*
@@ -1117,7 +1131,7 @@ out_vma_unpin:
return err;
}
-static int igt_write_huge(struct i915_gem_context *ctx,
+static int igt_write_huge(struct drm_i915_private *i915,
struct drm_i915_gem_object *obj)
{
struct i915_gem_engines *engines;
@@ -1127,6 +1141,8 @@ static int igt_write_huge(struct i915_gem_context *ctx,
IGT_TIMEOUT(end_time);
unsigned int max_page_size;
unsigned int count;
+ struct i915_gem_context *ctx;
+ struct file *file;
u64 max;
u64 num;
u64 size;
@@ -1134,6 +1150,16 @@ static int igt_write_huge(struct i915_gem_context *ctx,
int i, n;
int err = 0;
+ file = mock_file(i915);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ ctx = hugepage_ctx(i915, file);
+ if (IS_ERR(ctx)) {
+ err = PTR_ERR(ctx);
+ goto out;
+ }
+
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
size = obj->base.size;
@@ -1153,7 +1179,7 @@ static int igt_write_huge(struct i915_gem_context *ctx,
}
i915_gem_context_unlock_engines(ctx);
if (!n)
- return 0;
+ goto out;
/*
* To keep things interesting when alternating between engines in our
@@ -1215,6 +1241,8 @@ static int igt_write_huge(struct i915_gem_context *ctx,
kfree(order);
+out:
+ fput(file);
return err;
}
@@ -1277,8 +1305,7 @@ static u32 igt_random_size(struct rnd_state *prng,
static int igt_ppgtt_smoke_huge(void *arg)
{
- struct i915_gem_context *ctx = arg;
- struct drm_i915_private *i915 = ctx->i915;
+ struct drm_i915_private *i915 = arg;
struct drm_i915_gem_object *obj;
I915_RND_STATE(prng);
struct {
@@ -1302,6 +1329,7 @@ static int igt_ppgtt_smoke_huge(void *arg)
u32 min = backends[i].min;
u32 max = backends[i].max;
u32 size = max;
+
try_again:
size = igt_random_size(&prng, min, rounddown_pow_of_two(size));
@@ -1336,7 +1364,7 @@ try_again:
goto out_unpin;
}
- err = igt_write_huge(ctx, obj);
+ err = igt_write_huge(i915, obj);
if (err) {
pr_err("%s write-huge failed with size=%u, i=%d\n",
__func__, size, i);
@@ -1363,8 +1391,7 @@ out_put:
static int igt_ppgtt_sanity_check(void *arg)
{
- struct i915_gem_context *ctx = arg;
- struct drm_i915_private *i915 = ctx->i915;
+ struct drm_i915_private *i915 = arg;
unsigned int supported = INTEL_INFO(i915)->page_sizes;
struct {
igt_create_fn fn;
@@ -1431,7 +1458,7 @@ static int igt_ppgtt_sanity_check(void *arg)
if (pages)
obj->mm.page_sizes.sg = pages;
- err = igt_write_huge(ctx, obj);
+ err = igt_write_huge(i915, obj);
i915_gem_object_lock(obj, NULL);
i915_gem_object_unpin_pages(obj);
@@ -1458,15 +1485,27 @@ out:
static int igt_tmpfs_fallback(void *arg)
{
- struct i915_gem_context *ctx = arg;
- struct drm_i915_private *i915 = ctx->i915;
+ struct drm_i915_private *i915 = arg;
+ struct i915_address_space *vm;
+ struct i915_gem_context *ctx;
struct vfsmount *gemfs = i915->mm.gemfs;
- struct i915_address_space *vm = i915_gem_context_get_eb_vm(ctx);
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
+ struct file *file;
u32 *vaddr;
int err = 0;
+ file = mock_file(i915);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ ctx = hugepage_ctx(i915, file);
+ if (IS_ERR(ctx)) {
+ err = PTR_ERR(ctx);
+ goto out;
+ }
+ vm = i915_gem_context_get_eb_vm(ctx);
+
/*
* Make sure that we don't burst into a ball of flames upon falling back
* to tmpfs, which we rely on if on the off-chance we encouter a failure
@@ -1510,33 +1549,47 @@ out_restore:
i915->mm.gemfs = gemfs;
i915_vm_put(vm);
+out:
+ fput(file);
return err;
}
static int igt_shrink_thp(void *arg)
{
- struct i915_gem_context *ctx = arg;
- struct drm_i915_private *i915 = ctx->i915;
- struct i915_address_space *vm = i915_gem_context_get_eb_vm(ctx);
+ struct drm_i915_private *i915 = arg;
+ struct i915_address_space *vm;
+ struct i915_gem_context *ctx;
struct drm_i915_gem_object *obj;
struct i915_gem_engines_iter it;
struct intel_context *ce;
struct i915_vma *vma;
+ struct file *file;
unsigned int flags = PIN_USER;
unsigned int n;
bool should_swap;
- int err = 0;
+ int err;
+
+ if (!igt_can_allocate_thp(i915)) {
+ pr_info("missing THP support, skipping\n");
+ return 0;
+ }
+
+ file = mock_file(i915);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ ctx = hugepage_ctx(i915, file);
+ if (IS_ERR(ctx)) {
+ err = PTR_ERR(ctx);
+ goto out;
+ }
+ vm = i915_gem_context_get_eb_vm(ctx);
/*
* Sanity check shrinking huge-paged object -- make sure nothing blows
* up.
*/
- if (!igt_can_allocate_thp(i915)) {
- pr_info("missing THP support, skipping\n");
- goto out_vm;
- }
-
obj = i915_gem_object_create_shmem(i915, SZ_2M);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
@@ -1626,7 +1679,8 @@ out_put:
i915_gem_object_put(obj);
out_vm:
i915_vm_put(vm);
-
+out:
+ fput(file);
return err;
}
@@ -1651,7 +1705,7 @@ int i915_gem_huge_page_mock_selftests(void)
mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL;
mkwrite_device_info(dev_priv)->ppgtt_size = 48;
- ppgtt = i915_ppgtt_create(&dev_priv->gt, 0);
+ ppgtt = i915_ppgtt_create(to_gt(dev_priv), 0);
if (IS_ERR(ppgtt)) {
err = PTR_ERR(ppgtt);
goto out_unlock;
@@ -1687,36 +1741,14 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
SUBTEST(igt_ppgtt_smoke_huge),
SUBTEST(igt_ppgtt_sanity_check),
};
- struct i915_gem_context *ctx;
- struct i915_address_space *vm;
- struct file *file;
- int err;
if (!HAS_PPGTT(i915)) {
pr_info("PPGTT not supported, skipping live-selftests\n");
return 0;
}
- if (intel_gt_is_wedged(&i915->gt))
+ if (intel_gt_is_wedged(to_gt(i915)))
return 0;
- file = mock_file(i915);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- ctx = live_context(i915, file);
- if (IS_ERR(ctx)) {
- err = PTR_ERR(ctx);
- goto out_file;
- }
-
- vm = ctx->vm;
- if (vm)
- WRITE_ONCE(vm->scrub_64K, true);
-
- err = i915_subtests(tests, ctx);
-
-out_file:
- fput(file);
- return err;
+ return i915_live_subtests(tests, i915);
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index 8402ed925a69..75947e9dada2 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -592,7 +592,7 @@ int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915)
SUBTEST(igt_client_tiled_blits),
};
- if (intel_gt_is_wedged(&i915->gt))
+ if (intel_gt_is_wedged(to_gt(i915)))
return 0;
return i915_live_subtests(tests, i915);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index b32f7fed2d9c..3f41fe5ec9d4 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -88,9 +88,9 @@ static int live_nop_switch(void *arg)
rq = i915_request_get(this);
i915_request_add(this);
}
- if (i915_request_wait(rq, 0, HZ) < 0) {
+ if (i915_request_wait(rq, 0, 10 * HZ) < 0) {
pr_err("Failed to populated %d contexts\n", nctx);
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(to_gt(i915));
i915_request_put(rq);
err = -EIO;
goto out_file;
@@ -146,7 +146,7 @@ static int live_nop_switch(void *arg)
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
pr_err("Switching between %ld contexts timed out\n",
prime);
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(to_gt(i915));
i915_request_put(rq);
break;
}
@@ -1223,7 +1223,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
return 0;
if (flags & TEST_RESET)
- igt_global_reset_lock(&i915->gt);
+ igt_global_reset_lock(to_gt(i915));
obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
if (IS_ERR(obj)) {
@@ -1306,7 +1306,7 @@ out_put:
out_unlock:
if (flags & TEST_RESET)
- igt_global_reset_unlock(&i915->gt);
+ igt_global_reset_unlock(to_gt(i915));
if (ret)
pr_err("%s: Failed with %d!\n", name, ret);
@@ -1481,10 +1481,10 @@ static int check_scratch(struct i915_address_space *vm, u64 offset)
static int write_to_scratch(struct i915_gem_context *ctx,
struct intel_engine_cs *engine,
+ struct drm_i915_gem_object *obj,
u64 offset, u32 value)
{
struct drm_i915_private *i915 = ctx->i915;
- struct drm_i915_gem_object *obj;
struct i915_address_space *vm;
struct i915_request *rq;
struct i915_vma *vma;
@@ -1497,15 +1497,9 @@ static int write_to_scratch(struct i915_gem_context *ctx,
if (err)
return err;
- obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
-
cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
- if (IS_ERR(cmd)) {
- err = PTR_ERR(cmd);
- goto out;
- }
+ if (IS_ERR(cmd))
+ return PTR_ERR(cmd);
*cmd++ = MI_STORE_DWORD_IMM_GEN4;
if (GRAPHICS_VER(i915) >= 8) {
@@ -1569,17 +1563,19 @@ err_unpin:
i915_vma_unpin(vma);
out_vm:
i915_vm_put(vm);
-out:
- i915_gem_object_put(obj);
+
+ if (!err)
+ err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
+
return err;
}
static int read_from_scratch(struct i915_gem_context *ctx,
struct intel_engine_cs *engine,
+ struct drm_i915_gem_object *obj,
u64 offset, u32 *value)
{
struct drm_i915_private *i915 = ctx->i915;
- struct drm_i915_gem_object *obj;
struct i915_address_space *vm;
const u32 result = 0x100;
struct i915_request *rq;
@@ -1594,10 +1590,6 @@ static int read_from_scratch(struct i915_gem_context *ctx,
if (err)
return err;
- obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
-
if (GRAPHICS_VER(i915) >= 8) {
const u32 GPR0 = engine->mmio_base + 0x600;
@@ -1615,7 +1607,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
if (IS_ERR(cmd)) {
err = PTR_ERR(cmd);
- goto out;
+ goto err_unpin;
}
memset(cmd, POISON_INUSE, PAGE_SIZE);
@@ -1651,7 +1643,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
if (IS_ERR(cmd)) {
err = PTR_ERR(cmd);
- goto out;
+ goto err_unpin;
}
memset(cmd, POISON_INUSE, PAGE_SIZE);
@@ -1722,8 +1714,10 @@ err_unpin:
i915_vma_unpin(vma);
out_vm:
i915_vm_put(vm);
-out:
- i915_gem_object_put(obj);
+
+ if (!err)
+ err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
+
return err;
}
@@ -1757,6 +1751,7 @@ static int igt_vm_isolation(void *arg)
{
struct drm_i915_private *i915 = arg;
struct i915_gem_context *ctx_a, *ctx_b;
+ struct drm_i915_gem_object *obj_a, *obj_b;
unsigned long num_engines, count;
struct intel_engine_cs *engine;
struct igt_live_test t;
@@ -1810,6 +1805,18 @@ static int igt_vm_isolation(void *arg)
vm_total = ctx_a->vm->total;
GEM_BUG_ON(ctx_b->vm->total != vm_total);
+ obj_a = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ if (IS_ERR(obj_a)) {
+ err = PTR_ERR(obj_a);
+ goto out_file;
+ }
+
+ obj_b = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ if (IS_ERR(obj_b)) {
+ err = PTR_ERR(obj_b);
+ goto put_a;
+ }
+
count = 0;
num_engines = 0;
for_each_uabi_engine(engine, i915) {
@@ -1832,13 +1839,13 @@ static int igt_vm_isolation(void *arg)
I915_GTT_PAGE_SIZE, vm_total,
sizeof(u32), alignof_dword);
- err = write_to_scratch(ctx_a, engine,
+ err = write_to_scratch(ctx_a, engine, obj_a,
offset, 0xdeadbeef);
if (err == 0)
- err = read_from_scratch(ctx_b, engine,
+ err = read_from_scratch(ctx_b, engine, obj_b,
offset, &value);
if (err)
- goto out_file;
+ goto put_b;
if (value != expected) {
pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n",
@@ -1847,7 +1854,7 @@ static int igt_vm_isolation(void *arg)
lower_32_bits(offset),
this);
err = -EINVAL;
- goto out_file;
+ goto put_b;
}
this++;
@@ -1858,6 +1865,10 @@ static int igt_vm_isolation(void *arg)
pr_info("Checked %lu scratch offsets across %lu engines\n",
count, num_engines);
+put_b:
+ i915_gem_object_put(obj_b);
+put_a:
+ i915_gem_object_put(obj_a);
out_file:
if (igt_live_test_end(&t))
err = -EIO;
@@ -1877,7 +1888,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *i915)
SUBTEST(igt_vm_isolation),
};
- if (intel_gt_is_wedged(&i915->gt))
+ if (intel_gt_is_wedged(to_gt(i915)))
return 0;
return i915_live_subtests(tests, i915);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
index 4a6bb64c3a35..3cc74b0fed06 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
@@ -102,7 +102,7 @@ static int igt_dmabuf_import_same_driver_lmem(void *arg)
obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &lmem, 1);
if (IS_ERR(obj)) {
pr_err("__i915_gem_object_create_user failed with err=%ld\n",
- PTR_ERR(dmabuf));
+ PTR_ERR(obj));
err = PTR_ERR(obj);
goto out_ret;
}
@@ -158,7 +158,7 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915,
regions, num_regions);
if (IS_ERR(obj)) {
pr_err("__i915_gem_object_create_user failed with err=%ld\n",
- PTR_ERR(dmabuf));
+ PTR_ERR(obj));
err = PTR_ERR(obj);
goto out_ret;
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
index 28a700f08b49..ecb691c81d1e 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
@@ -4,6 +4,7 @@
*/
#include "gt/intel_migrate.h"
+#include "gem/i915_gem_ttm_move.h"
static int igt_fill_check_buffer(struct drm_i915_gem_object *obj,
bool fill)
@@ -227,17 +228,38 @@ out_put:
return err;
}
+static int igt_lmem_pages_failsafe_migrate(void *arg)
+{
+ int fail_gpu, fail_alloc, ret;
+
+ for (fail_gpu = 0; fail_gpu < 2; ++fail_gpu) {
+ for (fail_alloc = 0; fail_alloc < 2; ++fail_alloc) {
+ pr_info("Simulated failure modes: gpu: %d, alloc: %d\n",
+ fail_gpu, fail_alloc);
+ i915_ttm_migrate_set_failure_modes(fail_gpu,
+ fail_alloc);
+ ret = igt_lmem_pages_migrate(arg);
+ if (ret)
+ goto out_err;
+ }
+ }
+
+out_err:
+ i915_ttm_migrate_set_failure_modes(false, false);
+ return ret;
+}
+
int i915_gem_migrate_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(igt_smem_create_migrate),
SUBTEST(igt_lmem_create_migrate),
SUBTEST(igt_same_create_migrate),
- SUBTEST(igt_lmem_pages_migrate),
+ SUBTEST(igt_lmem_pages_failsafe_migrate),
};
if (!HAS_LMEM(i915))
return 0;
- return intel_gt_live_subtests(tests, &i915->gt);
+ return intel_gt_live_subtests(tests, to_gt(i915));
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 6d30cdfa80f3..743e6ab2c40b 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -84,6 +84,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
struct rnd_state *prng)
{
const unsigned long npages = obj->base.size / PAGE_SIZE;
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct i915_ggtt_view view;
struct i915_vma *vma;
unsigned long page;
@@ -141,7 +142,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
if (offset >= obj->base.size)
goto out;
- intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt);
+ intel_gt_flush_ggtt_writes(to_gt(i915));
p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
cpu = kmap(p) + offset_in_page(offset);
@@ -175,6 +176,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj,
{
const unsigned int nreal = obj->scratch / PAGE_SIZE;
const unsigned long npages = obj->base.size / PAGE_SIZE;
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct i915_vma *vma;
unsigned long page;
int err;
@@ -234,7 +236,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj,
if (offset >= obj->base.size)
continue;
- intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt);
+ intel_gt_flush_ggtt_writes(to_gt(i915));
p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
cpu = kmap(p) + offset_in_page(offset);
@@ -616,14 +618,14 @@ static bool assert_mmap_offset(struct drm_i915_private *i915,
static void disable_retire_worker(struct drm_i915_private *i915)
{
i915_gem_driver_unregister__shrinker(i915);
- intel_gt_pm_get(&i915->gt);
- cancel_delayed_work_sync(&i915->gt.requests.retire_work);
+ intel_gt_pm_get(to_gt(i915));
+ cancel_delayed_work_sync(&to_gt(i915)->requests.retire_work);
}
static void restore_retire_worker(struct drm_i915_private *i915)
{
igt_flush_test(i915);
- intel_gt_pm_put(&i915->gt);
+ intel_gt_pm_put(to_gt(i915));
i915_gem_driver_register__shrinker(i915);
}
@@ -651,8 +653,8 @@ static int igt_mmap_offset_exhaustion(void *arg)
/* Disable background reaper */
disable_retire_worker(i915);
- GEM_BUG_ON(!i915->gt.awake);
- intel_gt_retire_requests(&i915->gt);
+ GEM_BUG_ON(!to_gt(i915)->awake);
+ intel_gt_retire_requests(to_gt(i915));
i915_gem_drain_freed_objects(i915);
/* Trim the device mmap space to only a page */
@@ -728,7 +730,7 @@ static int igt_mmap_offset_exhaustion(void *arg)
/* Now fill with busy dead objects that we expect to reap */
for (loop = 0; loop < 3; loop++) {
- if (intel_gt_is_wedged(&i915->gt))
+ if (intel_gt_is_wedged(to_gt(i915)))
break;
obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
@@ -942,7 +944,7 @@ static int __igt_mmap(struct drm_i915_private *i915,
}
if (type == I915_MMAP_TYPE_GTT)
- intel_gt_flush_ggtt_writes(&i915->gt);
+ intel_gt_flush_ggtt_writes(to_gt(i915));
err = wc_check(obj);
if (err == -ENXIO)
@@ -1049,7 +1051,7 @@ static int __igt_mmap_access(struct drm_i915_private *i915,
goto out_unmap;
}
- intel_gt_flush_ggtt_writes(&i915->gt);
+ intel_gt_flush_ggtt_writes(to_gt(i915));
err = access_process_vm(current, addr, &x, sizeof(x), 0);
if (err != sizeof(x)) {
@@ -1065,7 +1067,7 @@ static int __igt_mmap_access(struct drm_i915_private *i915,
goto out_unmap;
}
- intel_gt_flush_ggtt_writes(&i915->gt);
+ intel_gt_flush_ggtt_writes(to_gt(i915));
err = __get_user(y, ptr);
if (err) {
@@ -1165,7 +1167,7 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
}
if (type == I915_MMAP_TYPE_GTT)
- intel_gt_flush_ggtt_writes(&i915->gt);
+ intel_gt_flush_ggtt_writes(to_gt(i915));
for_each_uabi_engine(engine, i915) {
struct i915_request *rq;