aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c89
1 files changed, 49 insertions, 40 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index e635e1e5f4d3..f0998f1225af 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -19,6 +19,7 @@
#include "gt/intel_engine_pool.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h"
+#include "gt/intel_ring.h"
#include "i915_drv.h"
#include "i915_gem_clflush.h"
@@ -252,6 +253,7 @@ struct i915_execbuffer {
bool has_fence : 1;
bool needs_unfenced : 1;
+ struct intel_context *ce;
struct i915_request *rq;
u32 *rq_cmd;
unsigned int rq_size;
@@ -699,7 +701,9 @@ static int eb_reserve(struct i915_execbuffer *eb)
case 1:
/* Too fragmented, unbind everything and retry */
+ mutex_lock(&eb->context->vm->mutex);
err = i915_gem_evict_vm(eb->context->vm);
+ mutex_unlock(&eb->context->vm->mutex);
if (err)
return err;
break;
@@ -727,7 +731,7 @@ static int eb_select_context(struct i915_execbuffer *eb)
return -ENOENT;
eb->gem_context = ctx;
- if (ctx->vm)
+ if (rcu_access_pointer(ctx->vm))
eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
eb->context_flags = 0;
@@ -882,6 +886,9 @@ static void eb_destroy(const struct i915_execbuffer *eb)
{
GEM_BUG_ON(eb->reloc_cache.rq);
+ if (eb->reloc_cache.ce)
+ intel_context_put(eb->reloc_cache.ce);
+
if (eb->lut_size > 0)
kfree(eb->buckets);
}
@@ -904,7 +911,8 @@ static void reloc_cache_init(struct reloc_cache *cache,
cache->use_64bit_reloc = HAS_64BIT_RELOC(i915);
cache->has_fence = cache->gen < 4;
cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
- cache->node.allocated = false;
+ cache->node.flags = 0;
+ cache->ce = NULL;
cache->rq = NULL;
cache->rq_size = 0;
}
@@ -965,11 +973,13 @@ static void reloc_cache_reset(struct reloc_cache *cache)
intel_gt_flush_ggtt_writes(ggtt->vm.gt);
io_mapping_unmap_atomic((void __iomem *)vaddr);
- if (cache->node.allocated) {
+ if (drm_mm_node_allocated(&cache->node)) {
ggtt->vm.clear_range(&ggtt->vm,
cache->node.start,
cache->node.size);
+ mutex_lock(&ggtt->vm.mutex);
drm_mm_remove_node(&cache->node);
+ mutex_unlock(&ggtt->vm.mutex);
} else {
i915_vma_unpin((struct i915_vma *)cache->node.mm);
}
@@ -1044,11 +1054,13 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
PIN_NOEVICT);
if (IS_ERR(vma)) {
memset(&cache->node, 0, sizeof(cache->node));
+ mutex_lock(&ggtt->vm.mutex);
err = drm_mm_insert_node_in_range
(&ggtt->vm.mm, &cache->node,
PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
0, ggtt->mappable_end,
DRM_MM_INSERT_LOW);
+ mutex_unlock(&ggtt->vm.mutex);
if (err) /* no inactive aperture space, use cpu reloc */
return NULL;
} else {
@@ -1058,7 +1070,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
}
offset = cache->node.start;
- if (cache->node.allocated) {
+ if (drm_mm_node_allocated(&cache->node)) {
ggtt->vm.insert_page(&ggtt->vm,
i915_gem_object_get_dma_address(obj, page),
offset, I915_CACHE_NONE, 0);
@@ -1147,7 +1159,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
u32 *cmd;
int err;
- pool = intel_engine_pool_get(&eb->engine->pool, PAGE_SIZE);
+ pool = intel_engine_get_pool(eb->engine, PAGE_SIZE);
if (IS_ERR(pool))
return PTR_ERR(pool);
@@ -1170,7 +1182,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
if (err)
goto err_unmap;
- rq = i915_request_create(eb->context);
+ rq = intel_context_create_request(cache->ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_unpin;
@@ -1241,6 +1253,29 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
if (!intel_engine_can_store_dword(eb->engine))
return ERR_PTR(-ENODEV);
+ if (!cache->ce) {
+ struct intel_context *ce;
+
+ /*
+ * The CS pre-parser can pre-fetch commands across
+ * memory sync points and starting gen12 it is able to
+ * pre-fetch across BB_START and BB_END boundaries
+ * (within the same context). We therefore use a
+ * separate context gen12+ to guarantee that the reloc
+ * writes land before the parser gets to the target
+ * memory location.
+ */
+ if (cache->gen >= 12)
+ ce = intel_context_create(eb->context->gem_context,
+ eb->engine);
+ else
+ ce = intel_context_get(eb->context);
+ if (IS_ERR(ce))
+ return ERR_CAST(ce);
+
+ cache->ce = ce;
+ }
+
err = __reloc_gpu_alloc(eb, vma, len);
if (unlikely(err))
return ERR_PTR(err);
@@ -1390,7 +1425,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
IS_GEN(eb->i915, 6)) {
err = i915_vma_bind(target, target->obj->cache_level,
- PIN_GLOBAL);
+ PIN_GLOBAL, NULL);
if (WARN_ONCE(err,
"Unexpected failure to bind target VMA!"))
return err;
@@ -1992,7 +2027,7 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb)
u64 shadow_batch_start;
int err;
- pool = intel_engine_pool_get(&eb->engine->pool, eb->batch_len);
+ pool = intel_engine_get_pool(eb->engine, eb->batch_len);
if (IS_ERR(pool))
return ERR_CAST(pool);
@@ -2099,6 +2134,9 @@ static int eb_submit(struct i915_execbuffer *eb)
if (err)
return err;
+ if (i915_gem_context_nopreempt(eb->gem_context))
+ eb->request->flags |= I915_REQUEST_NOPREEMPT;
+
return 0;
}
@@ -2168,35 +2206,6 @@ static struct i915_request *eb_throttle(struct intel_context *ce)
return i915_request_get(rq);
}
-static int
-__eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
-{
- int err;
-
- if (likely(atomic_inc_not_zero(&ce->pin_count)))
- return 0;
-
- err = mutex_lock_interruptible(&eb->i915->drm.struct_mutex);
- if (err)
- return err;
-
- err = __intel_context_do_pin(ce);
- mutex_unlock(&eb->i915->drm.struct_mutex);
-
- return err;
-}
-
-static void
-__eb_unpin_context(struct i915_execbuffer *eb, struct intel_context *ce)
-{
- if (likely(atomic_add_unless(&ce->pin_count, -1, 1)))
- return;
-
- mutex_lock(&eb->i915->drm.struct_mutex);
- intel_context_unpin(ce);
- mutex_unlock(&eb->i915->drm.struct_mutex);
-}
-
static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
{
struct intel_timeline *tl;
@@ -2216,7 +2225,7 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
* GGTT space, so do this first before we reserve a seqno for
* ourselves.
*/
- err = __eb_pin_context(eb, ce);
+ err = intel_context_pin(ce);
if (err)
return err;
@@ -2260,7 +2269,7 @@ err_exit:
intel_context_exit(ce);
intel_context_timeline_unlock(tl);
err_unpin:
- __eb_unpin_context(eb, ce);
+ intel_context_unpin(ce);
return err;
}
@@ -2273,7 +2282,7 @@ static void eb_unpin_engine(struct i915_execbuffer *eb)
intel_context_exit(ce);
mutex_unlock(&tl->mutex);
- __eb_unpin_context(eb, ce);
+ intel_context_unpin(ce);
}
static unsigned int