aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2021-08-12 09:43:38 +1000
committerDave Airlie <airlied@redhat.com>2021-08-12 09:56:04 +1000
commit25fed6b324ac556859d6dd0b7827cc8fb653ca99 (patch)
tree3f634521c723d6434c921fc1adbefa0e7d7e4be7 /drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
parentMerge tag 'amd-drm-next-5.15-2021-08-06' of https://gitlab.freedesktop.org/agd5f/linux into drm-next (diff)
parentdrm/i915/dg2: Add SQIDI steering (diff)
downloadlinux-dev-25fed6b324ac556859d6dd0b7827cc8fb653ca99.tar.xz
linux-dev-25fed6b324ac556859d6dd0b7827cc8fb653ca99.zip
Merge tag 'drm-intel-gt-next-2021-08-06-1' of ssh://git.freedesktop.org/git/drm/drm-intel into drm-next
UAPI Changes: - Add I915_MMAP_OFFSET_FIXED On devices with local memory `I915_MMAP_OFFSET_FIXED` is the only valid type. On devices without local memory, this caching mode is invalid. As caching mode when specifying `I915_MMAP_OFFSET_FIXED`, WC or WB will be used, depending on the object placement on creation. WB will be used when the object can only exist in system memory, WC otherwise. Userspace: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11888 - Reinstate the mmap ioctl for (already released) integrated Gen12 platforms Rationale: Otherwise media driver breaks eg. for ADL-P. Long term goal is still to sunset the IOCTL even for integrated and require using mmap_offset. - Reject caching/set_domain IOCTLs on discrete Expected to become immutable property of the BO - Disallow changing context parameters after first use on Gen12 and earlier - Require setting context parameters at creation on platforms after Gen12 Rationale (for both): Allow less dynamic changes to the context to simplify the implementation and avoid user shooting theirselves in the foot. - Drop I915_CONTEXT_PARAM_RINGSIZE Userspace PR for compute-driver has not been merged - Drop I915_CONTEXT_PARAM_NO_ZEROMAP Userspace PR for libdrm / Beignet was never landed - Drop CONTEXT_CLONE API Userspace PR for Mesa was never landed - Drop getparam support for I915_CONTEXT_PARAM_ENGINES Only existed for symmetry wrt. setparam, never used. - Disallow bonding of virtual engines Drop the prep work, no hardware has been released needing it. - (Implicit) Disable gpu relocations Media userspace was the last userspace to still use them. They have converted so performance can be regained with an update. Core Changes: - Merge topic branch 'topic/i915-ttm-2021-06-11' (from Maarten) - Merge topic branch 'topic/revid_steppings' (from Matt R) - Merge topic branch 'topic/xehp-dg2-definitions-2021-07-21' (from Matt R) - Backmerges drm-next (Rodrigo) Driver Changes: - Initial workarounds for ADL-P (Clint) - Preliminary code for XeHP/DG2 (Stuart, Umesh, Matt R, Prathap, Ram, Venkata, Akeem, Tvrtko, John, Lucas) - Fix ADL-S DMA mask size to 39 bits (Tejas) - Remove code for CNL (Lucas) - Add ADL-P GuC/HuC firmwares (John) - Update HuC to 7.9.3 for TGL/ADL-S/RKL (John) - Fix -EDEADLK handling regression (Ville) - Implement Wa_1508744258 for DG1 and Gen12 iGFX (Jose) - Extend Wa_1406941453 to ADL-S (Jose) - Drop unnecessary workarounds per stepping for SKL/BXT/ICL (Matt R) - Use fuse info to enable SFC on Gen12 (Venkata) - Unconditionally flush the pages on acquire on EHL/JSL (Matt A) - Probe existence of backing struct pages upon userptr creation (Chris, Matt A) - Add an intermediate GEM proto-context to delay real context creation (Jason) - Implement SINGLE_TIMELINE with a syncobj (Jason) - Set the watchdog timeout directly in intel_context_set_gem (Jason) - Disallow userspace from creating contexts with too many engines (Jason) - Revert "drm/i915/gem: Asynchronous cmdparser" (Jason) - Revert "drm/i915: Propagate errors on awaiting already signaled fences" (Jason) - Revert "drm/i915: Skip over MI_NOOP when parsing" (Jason) - Revert "drm/i915: Shrink the GEM kmem_caches upon idling" (Daniel) - Always let TTM handle object migration (Jason) - Correct the locking and pin pattern for dma-buf (Thomas H, Michael R, Jason) - Migrate to system at dma-buf attach time (Thomas, Michael R) - MAJOR refactoring of the GuC backend code to allow for enabling on Gen11+ (Matt B, John, Michal Wa., Fernando, Daniele, Vinay) - Update GuC firmware interface to v62.0.0 (John, Michal Wa., Matt B) - Add GuCRC feature to hand over the control of HW RC6 to the GuC on Gen12+ when GuC submission is enabled (Vinay, Sujaritha, Daniele, John, Tvrtko) - Use the correct IRQ during resume and eliminate DRM IRQ midlayer (Thomas Z) - Add pipelined page migration and clearing (Chris, Thomas H) - Use TTM for system memory on discrete (Thomas H) - Implement object migration for display vs. dma-buf (Thomas H) - Perform execbuffer object locking as a separate step (Thomas H) - Add support for explicit L3BANK steering (Matt, Daniele) - Remove duplicated call to ops->pread (Daniel) - Fix pagefault disabling in the first execbuf slowpath (Daniel) - Simplify userptr locking (Thomas H) - Improvements to the GuC CTB code (Matt B, John) - Make GT workaround upper bounds exclusive (Matt R) - Check for nomodeset in i915_init() first (Daniel) - Delete now unused gpu reloc code (Daniel) - Document RFC plans for GuC submission, DRM scheduler and new parallel submit uAPI (Matt B) - Reintroduce buddy allocator this time with TTM (Matt A) - Support forcing page size with LMEM (Matt A) - Add i915_sched_engine to abstract a submission queue between backends (Matt B) - Use accelerated move in TTM (Ram) - Fix memory leaks from TTM backend (Thomas H) - Introduce WW transaction helper (Thomas H) - Improve debug Kconfig texts a bit (Daniel) - Unify user object creation code (Jason) - Use a table for i915_init/exit (Jason) - Move slabs to module init/exit (Daniel) - Remove now unused i915_globals (Daniel) - Extract i915_module.c (Daniel) - Consistently use adl-p/adl-s in WA comments (Jose) - Finish INTEL_GEN and friends conversion (Lucas) - Correct variable/function namings (Lucas) - Code checker fixes (Wan, Matt A) - Tracepoint improvements (Matt B) - Kerneldoc improvements (Tvrtko, Jason, Matt A, Maarten) - Selftest improvements (Chris, Matt A, Tejas, Thomas H, John, Matt B, Rahul, Vinay) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/YQ0JmYiXhGskNcrI@jlahtine-mobl.ger.corp.intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c480
1 files changed, 66 insertions, 414 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 989ff064cf3a..1aa249908b64 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -277,18 +277,9 @@ struct i915_execbuffer {
bool has_llc : 1;
bool has_fence : 1;
bool needs_unfenced : 1;
-
- struct i915_request *rq;
- u32 *rq_cmd;
- unsigned int rq_size;
- struct intel_gt_buffer_pool_node *pool;
} reloc_cache;
- struct intel_gt_buffer_pool_node *reloc_pool; /** relocation pool for -EDEADLK handling */
- struct intel_context *reloc_context;
-
u64 invalid_flags; /** Set of execobj.flags that are invalid */
- u32 context_flags; /** Set of execobj.flags to insert from the ctx */
u64 batch_len; /** Length of batch within object */
u32 batch_start_offset; /** Location within object of batch */
@@ -539,9 +530,6 @@ eb_validate_vma(struct i915_execbuffer *eb,
entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP;
}
- if (!(entry->flags & EXEC_OBJECT_PINNED))
- entry->flags |= eb->context_flags;
-
return 0;
}
@@ -741,17 +729,13 @@ static int eb_select_context(struct i915_execbuffer *eb)
struct i915_gem_context *ctx;
ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1);
- if (unlikely(!ctx))
- return -ENOENT;
+ if (unlikely(IS_ERR(ctx)))
+ return PTR_ERR(ctx);
eb->gem_context = ctx;
if (rcu_access_pointer(ctx->vm))
eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
- eb->context_flags = 0;
- if (test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags))
- eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS;
-
return 0;
}
@@ -920,21 +904,38 @@ err:
return err;
}
-static int eb_validate_vmas(struct i915_execbuffer *eb)
+static int eb_lock_vmas(struct i915_execbuffer *eb)
{
unsigned int i;
int err;
- INIT_LIST_HEAD(&eb->unbound);
-
for (i = 0; i < eb->buffer_count; i++) {
- struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
struct eb_vma *ev = &eb->vma[i];
struct i915_vma *vma = ev->vma;
err = i915_gem_object_lock(vma->obj, &eb->ww);
if (err)
return err;
+ }
+
+ return 0;
+}
+
+static int eb_validate_vmas(struct i915_execbuffer *eb)
+{
+ unsigned int i;
+ int err;
+
+ INIT_LIST_HEAD(&eb->unbound);
+
+ err = eb_lock_vmas(eb);
+ if (err)
+ return err;
+
+ for (i = 0; i < eb->buffer_count; i++) {
+ struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
+ struct eb_vma *ev = &eb->vma[i];
+ struct i915_vma *vma = ev->vma;
err = eb_pin_vma(eb, entry, ev);
if (err == -EDEADLK)
@@ -992,7 +993,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
}
}
-static void eb_release_vmas(struct i915_execbuffer *eb, bool final, bool release_userptr)
+static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
{
const unsigned int count = eb->buffer_count;
unsigned int i;
@@ -1006,11 +1007,6 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final, bool release
eb_unreserve_vma(ev);
- if (release_userptr && ev->flags & __EXEC_OBJECT_USERPTR_INIT) {
- ev->flags &= ~__EXEC_OBJECT_USERPTR_INIT;
- i915_gem_object_userptr_submit_fini(vma->obj);
- }
-
if (final)
i915_vma_put(vma);
}
@@ -1020,8 +1016,6 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final, bool release
static void eb_destroy(const struct i915_execbuffer *eb)
{
- GEM_BUG_ON(eb->reloc_cache.rq);
-
if (eb->lut_size > 0)
kfree(eb->buckets);
}
@@ -1033,14 +1027,6 @@ relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
return gen8_canonical_addr((int)reloc->delta + target->node.start);
}
-static void reloc_cache_clear(struct reloc_cache *cache)
-{
- cache->rq = NULL;
- cache->rq_cmd = NULL;
- cache->pool = NULL;
- cache->rq_size = 0;
-}
-
static void reloc_cache_init(struct reloc_cache *cache,
struct drm_i915_private *i915)
{
@@ -1053,7 +1039,6 @@ static void reloc_cache_init(struct reloc_cache *cache,
cache->has_fence = cache->graphics_ver < 4;
cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
cache->node.flags = 0;
- reloc_cache_clear(cache);
}
static inline void *unmask_page(unsigned long p)
@@ -1075,48 +1060,10 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
return &i915->ggtt;
}
-static void reloc_cache_put_pool(struct i915_execbuffer *eb, struct reloc_cache *cache)
-{
- if (!cache->pool)
- return;
-
- /*
- * This is a bit nasty, normally we keep objects locked until the end
- * of execbuffer, but we already submit this, and have to unlock before
- * dropping the reference. Fortunately we can only hold 1 pool node at
- * a time, so this should be harmless.
- */
- i915_gem_ww_unlock_single(cache->pool->obj);
- intel_gt_buffer_pool_put(cache->pool);
- cache->pool = NULL;
-}
-
-static void reloc_gpu_flush(struct i915_execbuffer *eb, struct reloc_cache *cache)
-{
- struct drm_i915_gem_object *obj = cache->rq->batch->obj;
-
- GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
- cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END;
-
- i915_gem_object_flush_map(obj);
- i915_gem_object_unpin_map(obj);
-
- intel_gt_chipset_flush(cache->rq->engine->gt);
-
- i915_request_add(cache->rq);
- reloc_cache_put_pool(eb, cache);
- reloc_cache_clear(cache);
-
- eb->reloc_pool = NULL;
-}
-
static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb)
{
void *vaddr;
- if (cache->rq)
- reloc_gpu_flush(eb, cache);
-
if (!cache->vaddr)
return;
@@ -1298,295 +1245,6 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
*addr = value;
}
-static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
-{
- struct drm_i915_gem_object *obj = vma->obj;
- int err;
-
- assert_vma_held(vma);
-
- if (obj->cache_dirty & ~obj->cache_coherent)
- i915_gem_clflush_object(obj, 0);
- obj->write_domain = 0;
-
- err = i915_request_await_object(rq, vma->obj, true);
- if (err == 0)
- err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
-
- return err;
-}
-
-static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
- struct intel_engine_cs *engine,
- struct i915_vma *vma,
- unsigned int len)
-{
- struct reloc_cache *cache = &eb->reloc_cache;
- struct intel_gt_buffer_pool_node *pool = eb->reloc_pool;
- struct i915_request *rq;
- struct i915_vma *batch;
- u32 *cmd;
- int err;
-
- if (!pool) {
- pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE,
- cache->has_llc ?
- I915_MAP_WB :
- I915_MAP_WC);
- if (IS_ERR(pool))
- return PTR_ERR(pool);
- }
- eb->reloc_pool = NULL;
-
- err = i915_gem_object_lock(pool->obj, &eb->ww);
- if (err)
- goto err_pool;
-
- cmd = i915_gem_object_pin_map(pool->obj, pool->type);
- if (IS_ERR(cmd)) {
- err = PTR_ERR(cmd);
- goto err_pool;
- }
- intel_gt_buffer_pool_mark_used(pool);
-
- memset32(cmd, 0, pool->obj->base.size / sizeof(u32));
-
- batch = i915_vma_instance(pool->obj, vma->vm, NULL);
- if (IS_ERR(batch)) {
- err = PTR_ERR(batch);
- goto err_unmap;
- }
-
- err = i915_vma_pin_ww(batch, &eb->ww, 0, 0, PIN_USER | PIN_NONBLOCK);
- if (err)
- goto err_unmap;
-
- if (engine == eb->context->engine) {
- rq = i915_request_create(eb->context);
- } else {
- struct intel_context *ce = eb->reloc_context;
-
- if (!ce) {
- ce = intel_context_create(engine);
- if (IS_ERR(ce)) {
- err = PTR_ERR(ce);
- goto err_unpin;
- }
-
- i915_vm_put(ce->vm);
- ce->vm = i915_vm_get(eb->context->vm);
- eb->reloc_context = ce;
- }
-
- err = intel_context_pin_ww(ce, &eb->ww);
- if (err)
- goto err_unpin;
-
- rq = i915_request_create(ce);
- intel_context_unpin(ce);
- }
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto err_unpin;
- }
-
- err = intel_gt_buffer_pool_mark_active(pool, rq);
- if (err)
- goto err_request;
-
- err = reloc_move_to_gpu(rq, vma);
- if (err)
- goto err_request;
-
- err = eb->engine->emit_bb_start(rq,
- batch->node.start, PAGE_SIZE,
- cache->graphics_ver > 5 ? 0 : I915_DISPATCH_SECURE);
- if (err)
- goto skip_request;
-
- assert_vma_held(batch);
- err = i915_request_await_object(rq, batch->obj, false);
- if (err == 0)
- err = i915_vma_move_to_active(batch, rq, 0);
- if (err)
- goto skip_request;
-
- rq->batch = batch;
- i915_vma_unpin(batch);
-
- cache->rq = rq;
- cache->rq_cmd = cmd;
- cache->rq_size = 0;
- cache->pool = pool;
-
- /* Return with batch mapping (cmd) still pinned */
- return 0;
-
-skip_request:
- i915_request_set_error_once(rq, err);
-err_request:
- i915_request_add(rq);
-err_unpin:
- i915_vma_unpin(batch);
-err_unmap:
- i915_gem_object_unpin_map(pool->obj);
-err_pool:
- eb->reloc_pool = pool;
- return err;
-}
-
-static bool reloc_can_use_engine(const struct intel_engine_cs *engine)
-{
- return engine->class != VIDEO_DECODE_CLASS || GRAPHICS_VER(engine->i915) != 6;
-}
-
-static u32 *reloc_gpu(struct i915_execbuffer *eb,
- struct i915_vma *vma,
- unsigned int len)
-{
- struct reloc_cache *cache = &eb->reloc_cache;
- u32 *cmd;
-
- if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1))
- reloc_gpu_flush(eb, cache);
-
- if (unlikely(!cache->rq)) {
- int err;
- struct intel_engine_cs *engine = eb->engine;
-
- /* If we need to copy for the cmdparser, we will stall anyway */
- if (eb_use_cmdparser(eb))
- return ERR_PTR(-EWOULDBLOCK);
-
- if (!reloc_can_use_engine(engine)) {
- engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
- if (!engine)
- return ERR_PTR(-ENODEV);
- }
-
- err = __reloc_gpu_alloc(eb, engine, vma, len);
- if (unlikely(err))
- return ERR_PTR(err);
- }
-
- cmd = cache->rq_cmd + cache->rq_size;
- cache->rq_size += len;
-
- return cmd;
-}
-
-static inline bool use_reloc_gpu(struct i915_vma *vma)
-{
- if (DBG_FORCE_RELOC == FORCE_GPU_RELOC)
- return true;
-
- if (DBG_FORCE_RELOC)
- return false;
-
- return !dma_resv_test_signaled(vma->resv, true);
-}
-
-static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset)
-{
- struct page *page;
- unsigned long addr;
-
- GEM_BUG_ON(vma->pages != vma->obj->mm.pages);
-
- page = i915_gem_object_get_page(vma->obj, offset >> PAGE_SHIFT);
- addr = PFN_PHYS(page_to_pfn(page));
- GEM_BUG_ON(overflows_type(addr, u32)); /* expected dma32 */
-
- return addr + offset_in_page(offset);
-}
-
-static int __reloc_entry_gpu(struct i915_execbuffer *eb,
- struct i915_vma *vma,
- u64 offset,
- u64 target_addr)
-{
- const unsigned int ver = eb->reloc_cache.graphics_ver;
- unsigned int len;
- u32 *batch;
- u64 addr;
-
- if (ver >= 8)
- len = offset & 7 ? 8 : 5;
- else if (ver >= 4)
- len = 4;
- else
- len = 3;
-
- batch = reloc_gpu(eb, vma, len);
- if (batch == ERR_PTR(-EDEADLK))
- return -EDEADLK;
- else if (IS_ERR(batch))
- return false;
-
- addr = gen8_canonical_addr(vma->node.start + offset);
- if (ver >= 8) {
- if (offset & 7) {
- *batch++ = MI_STORE_DWORD_IMM_GEN4;
- *batch++ = lower_32_bits(addr);
- *batch++ = upper_32_bits(addr);
- *batch++ = lower_32_bits(target_addr);
-
- addr = gen8_canonical_addr(addr + 4);
-
- *batch++ = MI_STORE_DWORD_IMM_GEN4;
- *batch++ = lower_32_bits(addr);
- *batch++ = upper_32_bits(addr);
- *batch++ = upper_32_bits(target_addr);
- } else {
- *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1;
- *batch++ = lower_32_bits(addr);
- *batch++ = upper_32_bits(addr);
- *batch++ = lower_32_bits(target_addr);
- *batch++ = upper_32_bits(target_addr);
- }
- } else if (ver >= 6) {
- *batch++ = MI_STORE_DWORD_IMM_GEN4;
- *batch++ = 0;
- *batch++ = addr;
- *batch++ = target_addr;
- } else if (IS_I965G(eb->i915)) {
- *batch++ = MI_STORE_DWORD_IMM_GEN4;
- *batch++ = 0;
- *batch++ = vma_phys_addr(vma, offset);
- *batch++ = target_addr;
- } else if (ver >= 4) {
- *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
- *batch++ = 0;
- *batch++ = addr;
- *batch++ = target_addr;
- } else if (ver >= 3 &&
- !(IS_I915G(eb->i915) || IS_I915GM(eb->i915))) {
- *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
- *batch++ = addr;
- *batch++ = target_addr;
- } else {
- *batch++ = MI_STORE_DWORD_IMM;
- *batch++ = vma_phys_addr(vma, offset);
- *batch++ = target_addr;
- }
-
- return true;
-}
-
-static int reloc_entry_gpu(struct i915_execbuffer *eb,
- struct i915_vma *vma,
- u64 offset,
- u64 target_addr)
-{
- if (eb->reloc_cache.vaddr)
- return false;
-
- if (!use_reloc_gpu(vma))
- return false;
-
- return __reloc_entry_gpu(eb, vma, offset, target_addr);
-}
-
static u64
relocate_entry(struct i915_vma *vma,
const struct drm_i915_gem_relocation_entry *reloc,
@@ -1595,32 +1253,25 @@ relocate_entry(struct i915_vma *vma,
{
u64 target_addr = relocation_target(reloc, target);
u64 offset = reloc->offset;
- int reloc_gpu = reloc_entry_gpu(eb, vma, offset, target_addr);
-
- if (reloc_gpu < 0)
- return reloc_gpu;
-
- if (!reloc_gpu) {
- bool wide = eb->reloc_cache.use_64bit_reloc;
- void *vaddr;
+ bool wide = eb->reloc_cache.use_64bit_reloc;
+ void *vaddr;
repeat:
- vaddr = reloc_vaddr(vma->obj, eb,
- offset >> PAGE_SHIFT);
- if (IS_ERR(vaddr))
- return PTR_ERR(vaddr);
-
- GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32)));
- clflush_write32(vaddr + offset_in_page(offset),
- lower_32_bits(target_addr),
- eb->reloc_cache.vaddr);
-
- if (wide) {
- offset += sizeof(u32);
- target_addr >>= 32;
- wide = false;
- goto repeat;
- }
+ vaddr = reloc_vaddr(vma->obj, eb,
+ offset >> PAGE_SHIFT);
+ if (IS_ERR(vaddr))
+ return PTR_ERR(vaddr);
+
+ GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32)));
+ clflush_write32(vaddr + offset_in_page(offset),
+ lower_32_bits(target_addr),
+ eb->reloc_cache.vaddr);
+
+ if (wide) {
+ offset += sizeof(u32);
+ target_addr >>= 32;
+ wide = false;
+ goto repeat;
}
return target->node.start | UPDATE;
@@ -1992,7 +1643,7 @@ repeat:
}
/* We may process another execbuffer during the unlock... */
- eb_release_vmas(eb, false, true);
+ eb_release_vmas(eb, false);
i915_gem_ww_ctx_fini(&eb->ww);
if (rq) {
@@ -2061,9 +1712,7 @@ repeat_validate:
list_for_each_entry(ev, &eb->relocs, reloc_link) {
if (!have_copy) {
- pagefault_disable();
err = eb_relocate_vma(eb, ev);
- pagefault_enable();
if (err)
break;
} else {
@@ -2096,7 +1745,7 @@ repeat_validate:
err:
if (err == -EDEADLK) {
- eb_release_vmas(eb, false, false);
+ eb_release_vmas(eb, false);
err = i915_gem_ww_ctx_backoff(&eb->ww);
if (!err)
goto repeat_validate;
@@ -2193,7 +1842,7 @@ retry:
err:
if (err == -EDEADLK) {
- eb_release_vmas(eb, false, false);
+ eb_release_vmas(eb, false);
err = i915_gem_ww_ctx_backoff(&eb->ww);
if (!err)
goto retry;
@@ -2270,7 +1919,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
#ifdef CONFIG_MMU_NOTIFIER
if (!err && (eb->args->flags & __EXEC_USERPTR_USED)) {
- spin_lock(&eb->i915->mm.notifier_lock);
+ read_lock(&eb->i915->mm.notifier_lock);
/*
* count is always at least 1, otherwise __EXEC_USERPTR_USED
@@ -2288,7 +1937,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
break;
}
- spin_unlock(&eb->i915->mm.notifier_lock);
+ read_unlock(&eb->i915->mm.notifier_lock);
}
#endif
@@ -3156,8 +2805,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.exec = exec;
eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1);
eb.vma[0].vma = NULL;
- eb.reloc_pool = eb.batch_pool = NULL;
- eb.reloc_context = NULL;
+ eb.batch_pool = NULL;
eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
reloc_cache_init(&eb.reloc_cache, eb.i915);
@@ -3232,7 +2880,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
err = eb_lookup_vmas(&eb);
if (err) {
- eb_release_vmas(&eb, true, true);
+ eb_release_vmas(&eb, true);
goto err_engine;
}
@@ -3255,9 +2903,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
batch = eb.batch->vma;
- /* All GPU relocation batches must be submitted prior to the user rq */
- GEM_BUG_ON(eb.reloc_cache.rq);
-
/* Allocate a request for this batch buffer nice and early. */
eb.request = i915_request_create(eb.context);
if (IS_ERR(eb.request)) {
@@ -3265,11 +2910,20 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_vma;
}
+ if (unlikely(eb.gem_context->syncobj)) {
+ struct dma_fence *fence;
+
+ fence = drm_syncobj_fence_get(eb.gem_context->syncobj);
+ err = i915_request_await_dma_fence(eb.request, fence);
+ dma_fence_put(fence);
+ if (err)
+ goto err_ext;
+ }
+
if (in_fence) {
if (args->flags & I915_EXEC_FENCE_SUBMIT)
err = i915_request_await_execution(eb.request,
- in_fence,
- eb.engine->bond_execute);
+ in_fence);
else
err = i915_request_await_dma_fence(eb.request,
in_fence);
@@ -3322,10 +2976,16 @@ err_request:
fput(out_fence->file);
}
}
+
+ if (unlikely(eb.gem_context->syncobj)) {
+ drm_syncobj_replace_fence(eb.gem_context->syncobj,
+ &eb.request->fence);
+ }
+
i915_request_put(eb.request);
err_vma:
- eb_release_vmas(&eb, true, true);
+ eb_release_vmas(&eb, true);
if (eb.trampoline)
i915_vma_unpin(eb.trampoline);
WARN_ON(err == -EDEADLK);
@@ -3333,10 +2993,6 @@ err_vma:
if (eb.batch_pool)
intel_gt_buffer_pool_put(eb.batch_pool);
- if (eb.reloc_pool)
- intel_gt_buffer_pool_put(eb.reloc_pool);
- if (eb.reloc_context)
- intel_context_put(eb.reloc_context);
err_engine:
eb_put_engine(&eb);
err_context:
@@ -3450,7 +3106,3 @@ end:;
kvfree(exec2_list);
return err;
}
-
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/i915_gem_execbuffer.c"
-#endif