diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gem')
39 files changed, 1544 insertions, 1179 deletions
diff --git a/drivers/gpu/drm/i915/gem/Makefile b/drivers/gpu/drm/i915/gem/Makefile index 07e7b8b840ea..7e73aa587967 100644 --- a/drivers/gpu/drm/i915/gem/Makefile +++ b/drivers/gpu/drm/i915/gem/Makefile @@ -1 +1,5 @@ -include $(src)/Makefile.header-test # Extra header tests +# For building individual subdir files on the command line +subdir-ccflags-y += -I$(srctree)/$(src)/.. + +# Extra header tests +header-test-pattern-$(CONFIG_DRM_I915_WERROR) := *.h diff --git a/drivers/gpu/drm/i915/gem/Makefile.header-test b/drivers/gpu/drm/i915/gem/Makefile.header-test deleted file mode 100644 index 61e06cbb4b32..000000000000 --- a/drivers/gpu/drm/i915/gem/Makefile.header-test +++ /dev/null @@ -1,16 +0,0 @@ -# SPDX-License-Identifier: MIT -# Copyright © 2019 Intel Corporation - -# Test the headers are compilable as standalone units -header_test := $(notdir $(wildcard $(src)/*.h)) - -quiet_cmd_header_test = HDRTEST $@ - cmd_header_test = echo "\#include \"$(<F)\"" > $@ - -header_test_%.c: %.h - $(call cmd,header_test) - -extra-$(CONFIG_DRM_I915_WERROR) += \ - $(foreach h,$(header_test),$(patsubst %.h,header_test_%.o,$(h))) - -clean-files += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.c,$(h))) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6ad93a09968c..3d4f5775a4ba 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -82,7 +82,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - struct reservation_object_list *list; + struct dma_resv_list *list; unsigned int seq; int err; @@ -105,7 +105,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * Alternatively, we can trade that extra information on read/write * activity with * args->busy = - * !reservation_object_test_signaled_rcu(obj->resv, true); + * !dma_resv_test_signaled_rcu(obj->resv, true); * to report the overall busyness. This is what the wait-ioctl does. * */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index 5295285d5843..b9f504ba3b32 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -8,87 +8,67 @@ #include "i915_drv.h" #include "i915_gem_clflush.h" - -static DEFINE_SPINLOCK(clflush_lock); +#include "i915_sw_fence_work.h" +#include "i915_trace.h" struct clflush { - struct dma_fence dma; /* Must be first for dma_fence_free() */ - struct i915_sw_fence wait; - struct work_struct work; + struct dma_fence_work base; struct drm_i915_gem_object *obj; }; -static const char *i915_clflush_get_driver_name(struct dma_fence *fence) -{ - return DRIVER_NAME; -} - -static const char *i915_clflush_get_timeline_name(struct dma_fence *fence) -{ - return "clflush"; -} - -static void i915_clflush_release(struct dma_fence *fence) -{ - struct clflush *clflush = container_of(fence, typeof(*clflush), dma); - - i915_sw_fence_fini(&clflush->wait); - - BUILD_BUG_ON(offsetof(typeof(*clflush), dma)); - dma_fence_free(&clflush->dma); -} - -static const struct dma_fence_ops i915_clflush_ops = { - .get_driver_name = i915_clflush_get_driver_name, - .get_timeline_name = i915_clflush_get_timeline_name, - .release = i915_clflush_release, -}; - -static void __i915_do_clflush(struct drm_i915_gem_object *obj) +static void __do_clflush(struct drm_i915_gem_object *obj) { GEM_BUG_ON(!i915_gem_object_has_pages(obj)); drm_clflush_sg(obj->mm.pages); - intel_fb_obj_flush(obj, ORIGIN_CPU); + intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU); } -static void i915_clflush_work(struct work_struct *work) +static int clflush_work(struct dma_fence_work *base) { - struct clflush *clflush = container_of(work, typeof(*clflush), work); - struct drm_i915_gem_object *obj = clflush->obj; - - if (i915_gem_object_pin_pages(obj)) { - DRM_ERROR("Failed to acquire obj->pages for clflushing\n"); - goto out; - } + struct clflush *clflush = container_of(base, typeof(*clflush), base); + struct drm_i915_gem_object *obj = fetch_and_zero(&clflush->obj); + int err; - __i915_do_clflush(obj); + err = i915_gem_object_pin_pages(obj); + if (err) + goto put; + __do_clflush(obj); i915_gem_object_unpin_pages(obj); -out: +put: i915_gem_object_put(obj); + return err; +} + +static void clflush_release(struct dma_fence_work *base) +{ + struct clflush *clflush = container_of(base, typeof(*clflush), base); - dma_fence_signal(&clflush->dma); - dma_fence_put(&clflush->dma); + if (clflush->obj) + i915_gem_object_put(clflush->obj); } -static int __i915_sw_fence_call -i915_clflush_notify(struct i915_sw_fence *fence, - enum i915_sw_fence_notify state) +static const struct dma_fence_work_ops clflush_ops = { + .name = "clflush", + .work = clflush_work, + .release = clflush_release, +}; + +static struct clflush *clflush_work_create(struct drm_i915_gem_object *obj) { - struct clflush *clflush = container_of(fence, typeof(*clflush), wait); + struct clflush *clflush; - switch (state) { - case FENCE_COMPLETE: - schedule_work(&clflush->work); - break; + GEM_BUG_ON(!obj->cache_dirty); - case FENCE_FREE: - dma_fence_put(&clflush->dma); - break; - } + clflush = kmalloc(sizeof(*clflush), GFP_KERNEL); + if (!clflush) + return NULL; - return NOTIFY_DONE; + dma_fence_work_init(&clflush->base, &clflush_ops); + clflush->obj = i915_gem_object_get(obj); /* obj <-> clflush cycle */ + + return clflush; } bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, @@ -126,33 +106,16 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, clflush = NULL; if (!(flags & I915_CLFLUSH_SYNC)) - clflush = kmalloc(sizeof(*clflush), GFP_KERNEL); + clflush = clflush_work_create(obj); if (clflush) { - GEM_BUG_ON(!obj->cache_dirty); - - dma_fence_init(&clflush->dma, - &i915_clflush_ops, - &clflush_lock, - to_i915(obj->base.dev)->mm.unordered_timeline, - 0); - i915_sw_fence_init(&clflush->wait, i915_clflush_notify); - - clflush->obj = i915_gem_object_get(obj); - INIT_WORK(&clflush->work, i915_clflush_work); - - dma_fence_get(&clflush->dma); - - i915_sw_fence_await_reservation(&clflush->wait, - obj->base.resv, NULL, - true, I915_FENCE_TIMEOUT, + i915_sw_fence_await_reservation(&clflush->base.chain, + obj->base.resv, NULL, true, + I915_FENCE_TIMEOUT, I915_FENCE_GFP); - - reservation_object_add_excl_fence(obj->base.resv, - &clflush->dma); - - i915_sw_fence_commit(&clflush->wait); + dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma); + dma_fence_work_commit(&clflush->base); } else if (obj->mm.pages) { - __i915_do_clflush(obj); + __do_clflush(obj); } else { GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c index 1fdab0767a47..f99920652751 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c @@ -2,10 +2,13 @@ /* * Copyright © 2019 Intel Corporation */ -#include "i915_gem_client_blt.h" +#include "i915_drv.h" +#include "gt/intel_context.h" +#include "gt/intel_engine_pm.h" +#include "gt/intel_engine_pool.h" +#include "i915_gem_client_blt.h" #include "i915_gem_object_blt.h" -#include "intel_drv.h" struct i915_sleeve { struct i915_vma *vma; @@ -72,7 +75,6 @@ static struct i915_sleeve *create_sleeve(struct i915_address_space *vm, vma->ops = &proxy_vma_ops; sleeve->vma = vma; - sleeve->obj = i915_gem_object_get(obj); sleeve->pages = pages; sleeve->page_sizes = *page_sizes; @@ -85,7 +87,6 @@ err_free: static void destroy_sleeve(struct i915_sleeve *sleeve) { - i915_gem_object_put(sleeve->obj); kfree(sleeve); } @@ -154,21 +155,23 @@ static void clear_pages_dma_fence_cb(struct dma_fence *fence, static void clear_pages_worker(struct work_struct *work) { struct clear_pages_work *w = container_of(work, typeof(*w), work); - struct drm_i915_private *i915 = w->ce->gem_context->i915; - struct drm_i915_gem_object *obj = w->sleeve->obj; + struct drm_i915_private *i915 = w->ce->engine->i915; + struct drm_i915_gem_object *obj = w->sleeve->vma->obj; struct i915_vma *vma = w->sleeve->vma; struct i915_request *rq; + struct i915_vma *batch; int err = w->dma.error; if (unlikely(err)) goto out_signal; if (obj->cache_dirty) { - obj->write_domain = 0; if (i915_gem_object_has_struct_page(obj)) drm_clflush_sg(w->sleeve->pages); obj->cache_dirty = false; } + obj->read_domains = I915_GEM_GPU_DOMAINS; + obj->write_domain = 0; /* XXX: we need to kill this */ mutex_lock(&i915->drm.struct_mutex); @@ -176,10 +179,16 @@ static void clear_pages_worker(struct work_struct *work) if (unlikely(err)) goto out_unlock; - rq = i915_request_create(w->ce); + batch = intel_emit_vma_fill_blt(w->ce, vma, w->value); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_unpin; + } + + rq = intel_context_create_request(w->ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto out_unpin; + goto out_batch; } /* There's no way the fence has signalled */ @@ -187,20 +196,28 @@ static void clear_pages_worker(struct work_struct *work) clear_pages_dma_fence_cb)) GEM_BUG_ON(1); + err = intel_emit_vma_mark_active(batch, rq); + if (unlikely(err)) + goto out_request; + if (w->ce->engine->emit_init_breadcrumb) { err = w->ce->engine->emit_init_breadcrumb(rq); if (unlikely(err)) goto out_request; } - /* XXX: more feverish nightmares await */ - i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); + /* + * w->dma is already exported via (vma|obj)->resv we need only + * keep track of the GPU activity within this vma/request, and + * propagate the signal from the request to w->dma. + */ + err = i915_active_ref(&vma->active, rq->timeline, rq); if (err) goto out_request; - err = intel_emit_vma_fill_blt(rq, vma, w->value); + err = w->ce->engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + 0); out_request: if (unlikely(err)) { i915_request_skip(rq, err); @@ -208,6 +225,8 @@ out_request: } i915_request_add(rq); +out_batch: + intel_emit_vma_release(w->ce, batch); out_unpin: i915_vma_unpin(vma); out_unlock: @@ -248,14 +267,11 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj, struct i915_page_sizes *page_sizes, u32 value) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_gem_context *ctx = ce->gem_context; - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; struct clear_pages_work *work; struct i915_sleeve *sleeve; int err; - sleeve = create_sleeve(vm, obj, pages, page_sizes); + sleeve = create_sleeve(ce->vm, obj, pages, page_sizes); if (IS_ERR(sleeve)) return PTR_ERR(sleeve); @@ -273,11 +289,7 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj, init_irq_work(&work->irq_work, clear_pages_signal_irq_worker); - dma_fence_init(&work->dma, - &clear_pages_work_ops, - &fence_lock, - i915->mm.unordered_timeline, - 0); + dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0); i915_sw_fence_init(&work->wait, clear_pages_work_notify); i915_gem_object_lock(obj); @@ -288,7 +300,7 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj, if (err < 0) { dma_fence_set_error(&work->dma, err); } else { - reservation_object_add_excl_fence(obj->base.resv, &work->dma); + dma_resv_add_excl_fence(obj->base.resv, &work->dma); err = 0; } i915_gem_object_unlock(obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 0f2c22a3bcb6..1cdfe05514c3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -70,6 +70,7 @@ #include <drm/i915_drm.h> #include "gt/intel_lrc_reg.h" +#include "gt/intel_engine_user.h" #include "i915_gem_context.h" #include "i915_globals.h" @@ -158,7 +159,7 @@ lookup_user_engine(struct i915_gem_context *ctx, if (!engine) return ERR_PTR(-EINVAL); - idx = engine->id; + idx = engine->legacy_idx; } else { idx = ci->engine_instance; } @@ -172,7 +173,9 @@ static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp) lockdep_assert_held(&i915->contexts.mutex); - if (INTEL_GEN(i915) >= 11) + if (INTEL_GEN(i915) >= 12) + max = GEN12_MAX_CONTEXT_HW_ID; + else if (INTEL_GEN(i915) >= 11) max = GEN11_MAX_CONTEXT_HW_ID; else if (USES_GUC_SUBMISSION(i915)) /* @@ -278,6 +281,7 @@ static void free_engines_rcu(struct rcu_head *rcu) static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) { + const struct intel_gt *gt = &ctx->i915->gt; struct intel_engine_cs *engine; struct i915_gem_engines *e; enum intel_engine_id id; @@ -287,7 +291,7 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) return ERR_PTR(-ENOMEM); init_rcu_head(&e->rcu); - for_each_engine(engine, ctx->i915, id) { + for_each_engine(engine, gt, id) { struct intel_context *ce; ce = intel_context_create(ctx, engine); @@ -297,8 +301,8 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) } e->engines[id] = ce; + e->num_engines = id + 1; } - e->num_engines = id; return e; } @@ -316,7 +320,7 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) mutex_destroy(&ctx->engines_mutex); if (ctx->timeline) - i915_timeline_put(ctx->timeline); + intel_timeline_put(ctx->timeline); kfree(ctx->name); put_pid(ctx->pid); @@ -397,30 +401,6 @@ static void context_close(struct i915_gem_context *ctx) i915_gem_context_put(ctx); } -static u32 default_desc_template(const struct drm_i915_private *i915, - const struct i915_address_space *vm) -{ - u32 address_mode; - u32 desc; - - desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; - - address_mode = INTEL_LEGACY_32B_CONTEXT; - if (vm && i915_vm_is_4lvl(vm)) - address_mode = INTEL_LEGACY_64B_CONTEXT; - desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; - - if (IS_GEN(i915, 8)) - desc |= GEN8_CTX_L3LLC_COHERENT; - - /* TODO: WaDisableLiteRestore when we start using semaphore - * signalling between Command Streamers - * ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; - */ - - return desc; -} - static struct i915_gem_context * __create_context(struct drm_i915_private *i915) { @@ -458,10 +438,6 @@ __create_context(struct drm_i915_private *i915) i915_gem_context_set_bannable(ctx); i915_gem_context_set_recoverable(ctx); - ctx->ring_size = 4 * PAGE_SIZE; - ctx->desc_template = - default_desc_template(i915, &i915->mm.aliasing_ppgtt->vm); - for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; @@ -472,13 +448,34 @@ err_free: return ERR_PTR(err); } +static void +context_apply_all(struct i915_gem_context *ctx, + void (*fn)(struct intel_context *ce, void *data), + void *data) +{ + struct i915_gem_engines_iter it; + struct intel_context *ce; + + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) + fn(ce, data); + i915_gem_context_unlock_engines(ctx); +} + +static void __apply_ppgtt(struct intel_context *ce, void *vm) +{ + i915_vm_put(ce->vm); + ce->vm = i915_vm_get(vm); +} + static struct i915_address_space * __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm) { struct i915_address_space *old = ctx->vm; + GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old)); + ctx->vm = i915_vm_get(vm); - ctx->desc_template = default_desc_template(ctx->i915, vm); + context_apply_all(ctx, __apply_ppgtt, vm); return old; } @@ -494,6 +491,29 @@ static void __assign_ppgtt(struct i915_gem_context *ctx, i915_vm_put(vm); } +static void __set_timeline(struct intel_timeline **dst, + struct intel_timeline *src) +{ + struct intel_timeline *old = *dst; + + *dst = src ? intel_timeline_get(src) : NULL; + + if (old) + intel_timeline_put(old); +} + +static void __apply_timeline(struct intel_context *ce, void *timeline) +{ + __set_timeline(&ce->timeline, timeline); +} + +static void __assign_timeline(struct i915_gem_context *ctx, + struct intel_timeline *timeline) +{ + __set_timeline(&ctx->timeline, timeline); + context_apply_all(ctx, __apply_timeline, timeline); +} + static struct i915_gem_context * i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags) { @@ -528,15 +548,16 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags) } if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { - struct i915_timeline *timeline; + struct intel_timeline *timeline; - timeline = i915_timeline_create(dev_priv, NULL); + timeline = intel_timeline_create(&dev_priv->gt, NULL); if (IS_ERR(timeline)) { context_close(ctx); return ERR_CAST(timeline); } - ctx->timeline = timeline; + __assign_timeline(ctx, timeline); + intel_timeline_put(timeline); } trace_i915_context_create(ctx); @@ -544,53 +565,6 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags) return ctx; } -/** - * i915_gem_context_create_gvt - create a GVT GEM context - * @dev: drm device * - * - * This function is used to create a GVT specific GEM context. - * - * Returns: - * pointer to i915_gem_context on success, error pointer if failed - * - */ -struct i915_gem_context * -i915_gem_context_create_gvt(struct drm_device *dev) -{ - struct i915_gem_context *ctx; - int ret; - - if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) - return ERR_PTR(-ENODEV); - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ERR_PTR(ret); - - ctx = i915_gem_create_context(to_i915(dev), 0); - if (IS_ERR(ctx)) - goto out; - - ret = i915_gem_context_pin_hw_id(ctx); - if (ret) { - context_close(ctx); - ctx = ERR_PTR(ret); - goto out; - } - - ctx->file_priv = ERR_PTR(-EBADF); - i915_gem_context_set_closed(ctx); /* not user accessible */ - i915_gem_context_clear_bannable(ctx); - i915_gem_context_set_force_single_submission(ctx); - if (!USES_GUC_SUBMISSION(to_i915(dev))) - ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */ - - GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); -out: - mutex_unlock(&dev->struct_mutex); - return ctx; -} - static void destroy_kernel_context(struct i915_gem_context **ctxp) { @@ -622,7 +596,6 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) i915_gem_context_clear_bannable(ctx); ctx->sched.priority = I915_USER_PRIORITY(prio); - ctx->ring_size = PAGE_SIZE; GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); @@ -644,20 +617,13 @@ static void init_contexts(struct drm_i915_private *i915) init_llist_head(&i915->contexts.free_list); } -static bool needs_preempt_context(struct drm_i915_private *i915) -{ - return HAS_EXECLISTS(i915); -} - int i915_gem_contexts_init(struct drm_i915_private *dev_priv) { struct i915_gem_context *ctx; /* Reassure ourselves we are only called once */ GEM_BUG_ON(dev_priv->kernel_context); - GEM_BUG_ON(dev_priv->preempt_context); - intel_engine_init_ctx_wa(dev_priv->engine[RCS0]); init_contexts(dev_priv); /* lowest priority; idle task */ @@ -677,15 +643,6 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv) GEM_BUG_ON(!atomic_read(&ctx->hw_id_pin_count)); dev_priv->kernel_context = ctx; - /* highest priority; preempting task */ - if (needs_preempt_context(dev_priv)) { - ctx = i915_gem_context_create_kernel(dev_priv, INT_MAX); - if (!IS_ERR(ctx)) - dev_priv->preempt_context = ctx; - else - DRM_ERROR("Failed to create preempt context; disabling preemption\n"); - } - DRM_DEBUG_DRIVER("%s context support initialized\n", DRIVER_CAPS(dev_priv)->has_logical_contexts ? "logical" : "fake"); @@ -696,8 +653,6 @@ void i915_gem_contexts_fini(struct drm_i915_private *i915) { lockdep_assert_held(&i915->drm.struct_mutex); - if (i915->preempt_context) - destroy_kernel_context(&i915->preempt_context); destroy_kernel_context(&i915->kernel_context); /* Must free all deferred contexts (via flush_workqueue) first */ @@ -923,8 +878,12 @@ static int context_barrier_task(struct i915_gem_context *ctx, if (!cb) return -ENOMEM; - i915_active_init(i915, &cb->base, cb_retire); - i915_active_acquire(&cb->base); + i915_active_init(i915, &cb->base, NULL, cb_retire); + err = i915_active_acquire(&cb->base); + if (err) { + kfree(cb); + return err; + } for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { struct i915_request *rq; @@ -951,7 +910,7 @@ static int context_barrier_task(struct i915_gem_context *ctx, if (emit) err = emit(rq, data); if (err == 0) - err = i915_active_ref(&cb->base, rq->fence.context, rq); + err = i915_active_ref(&cb->base, rq->timeline, rq); i915_request_add(rq); if (err) @@ -1019,7 +978,7 @@ static void set_ppgtt_barrier(void *data) static int emit_ppgtt_update(struct i915_request *rq, void *data) { - struct i915_address_space *vm = rq->gem_context->vm; + struct i915_address_space *vm = rq->hw_context->vm; struct intel_engine_cs *engine = rq->engine; u32 base = engine->mmio_base; u32 *cs; @@ -1128,9 +1087,8 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, set_ppgtt_barrier, old); if (err) { - ctx->vm = old; - ctx->desc_template = default_desc_template(ctx->i915, old); - i915_vm_put(vm); + i915_vm_put(__set_ppgtt(ctx, old)); + i915_vm_put(old); } unlock: @@ -1187,26 +1145,11 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) if (IS_ERR(rq)) return PTR_ERR(rq); - /* Queue this switch after all other activity by this context. */ - ret = i915_active_request_set(&ce->ring->timeline->last_request, rq); - if (ret) - goto out_add; + /* Serialise with the remote context */ + ret = intel_context_prepare_remote_request(ce, rq); + if (ret == 0) + ret = gen8_emit_rpcs_config(rq, ce, sseu); - /* - * Guarantee context image and the timeline remains pinned until the - * modifying request is retired by setting the ce activity tracker. - * - * But we only need to take one pin on the account of it. Or in other - * words transfer the pinned ce object to tracked active request. - */ - GEM_BUG_ON(i915_active_is_idle(&ce->active)); - ret = i915_active_ref(&ce->active, rq->fence.context, rq); - if (ret) - goto out_add; - - ret = gen8_emit_rpcs_config(rq, ce, sseu); - -out_add: i915_request_add(rq); return ret; } @@ -1217,7 +1160,7 @@ __intel_context_reconfigure_sseu(struct intel_context *ce, { int ret; - GEM_BUG_ON(INTEL_GEN(ce->gem_context->i915) < 8); + GEM_BUG_ON(INTEL_GEN(ce->engine->i915) < 8); ret = intel_context_lock_pinned(ce); if (ret) @@ -1239,7 +1182,7 @@ unlock: static int intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu) { - struct drm_i915_private *i915 = ce->gem_context->i915; + struct drm_i915_private *i915 = ce->engine->i915; int ret; ret = mutex_lock_interruptible(&i915->drm.struct_mutex); @@ -1636,6 +1579,7 @@ set_engines(struct i915_gem_context *ctx, for (n = 0; n < num_engines; n++) { struct i915_engine_class_instance ci; struct intel_engine_cs *engine; + struct intel_context *ce; if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) { __free_engines(set.engines, n); @@ -1658,11 +1602,13 @@ set_engines(struct i915_gem_context *ctx, return -ENOENT; } - set.engines->engines[n] = intel_context_create(ctx, engine); - if (!set.engines->engines[n]) { + ce = intel_context_create(ctx, engine); + if (IS_ERR(ce)) { __free_engines(set.engines, n); - return -ENOMEM; + return PTR_ERR(ce); } + + set.engines->engines[n] = ce; } set.engines->num_engines = num_engines; @@ -1776,7 +1722,7 @@ get_engines(struct i915_gem_context *ctx, if (e->engines[n]) { ci.engine_class = e->engines[n]->engine->uabi_class; - ci.engine_instance = e->engines[n]->engine->instance; + ci.engine_instance = e->engines[n]->engine->uabi_instance; } if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) { @@ -2011,13 +1957,8 @@ unlock: static int clone_timeline(struct i915_gem_context *dst, struct i915_gem_context *src) { - if (src->timeline) { - GEM_BUG_ON(src->timeline == dst->timeline); - - if (dst->timeline) - i915_timeline_put(dst->timeline); - dst->timeline = i915_timeline_get(src->timeline); - } + if (src->timeline) + __assign_timeline(dst, src->timeline); return 0; } @@ -2141,7 +2082,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, if (args->flags & I915_CONTEXT_CREATE_FLAGS_UNKNOWN) return -EINVAL; - ret = i915_terminally_wedged(i915); + ret = intel_gt_terminally_wedged(&i915->gt); if (ret) return ret; @@ -2287,8 +2228,8 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, args->size = 0; if (ctx->vm) args->value = ctx->vm->total; - else if (to_i915(dev)->mm.aliasing_ppgtt) - args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total; + else if (to_i915(dev)->ggtt.alias) + args->value = to_i915(dev)->ggtt.alias->vm.total; else args->value = to_i915(dev)->ggtt.vm.total; break; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index 9691dd062f72..176978608b6f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -141,8 +141,6 @@ int i915_gem_context_open(struct drm_i915_private *i915, void i915_gem_context_close(struct drm_file *file); void i915_gem_context_release(struct kref *ctx_ref); -struct i915_gem_context * -i915_gem_context_create_gvt(struct drm_device *dev); int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); @@ -198,12 +196,6 @@ i915_gem_context_unlock_engines(struct i915_gem_context *ctx) } static inline struct intel_context * -i915_gem_context_lookup_engine(struct i915_gem_context *ctx, unsigned int idx) -{ - return i915_gem_context_engines(ctx)->engines[idx]; -} - -static inline struct intel_context * i915_gem_context_get_engine(struct i915_gem_context *ctx, unsigned int idx) { struct intel_context *ce = ERR_PTR(-EINVAL); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index cc513410eeef..260d59cc3de8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -26,7 +26,7 @@ struct pid; struct drm_i915_private; struct drm_i915_file_private; struct i915_address_space; -struct i915_timeline; +struct intel_timeline; struct intel_ring; struct i915_gem_engines { @@ -77,7 +77,7 @@ struct i915_gem_context { struct i915_gem_engines __rcu *engines; struct mutex engines_mutex; /* guards writes to engines */ - struct i915_timeline *timeline; + struct intel_timeline *timeline; /** * @vm: unique address space (GTT) @@ -169,11 +169,6 @@ struct i915_gem_context { struct i915_sched_attr sched; - /** ring_size: size for allocating the per-engine ring buffer */ - u32 ring_size; - /** desc_template: invariant fields for the HW context descriptor */ - u32 desc_template; - /** guilty_count: How many times this context has caused a GPU hang. */ atomic_t guilty_count; /** diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index cbf1701d3acc..96ce95c8ac5a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -6,7 +6,7 @@ #include <linux/dma-buf.h> #include <linux/highmem.h> -#include <linux/reservation.h> +#include <linux/dma-resv.h> #include "i915_drv.h" #include "i915_gem_object.h" @@ -204,8 +204,7 @@ static const struct dma_buf_ops i915_dmabuf_ops = { .end_cpu_access = i915_gem_end_cpu_access, }; -struct dma_buf *i915_gem_prime_export(struct drm_device *dev, - struct drm_gem_object *gem_obj, int flags) +struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags) { struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); DEFINE_DMA_BUF_EXPORT_INFO(exp_info); @@ -222,7 +221,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev, return ERR_PTR(ret); } - return drm_gem_dmabuf_export(dev, &exp_info); + return drm_gem_dmabuf_export(gem_obj->dev, &exp_info); } static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 2e3ce2a69653..9c58e8fac1d9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -221,6 +221,8 @@ restart: * state and so involves less work. */ if (atomic_read(&obj->bind_count)) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + /* Before we change the PTE, the GPU must not be accessing it. * If we wait upon the object, we know that all the bound * VMA are no longer active. @@ -232,18 +234,30 @@ restart: if (ret) return ret; - if (!HAS_LLC(to_i915(obj->base.dev)) && - cache_level != I915_CACHE_NONE) { - /* Access to snoopable pages through the GTT is + if (!HAS_LLC(i915) && cache_level != I915_CACHE_NONE) { + intel_wakeref_t wakeref = + intel_runtime_pm_get(&i915->runtime_pm); + + /* + * Access to snoopable pages through the GTT is * incoherent and on some machines causes a hard * lockup. Relinquish the CPU mmaping to force * userspace to refault in the pages and we can * then double check if the GTT mapping is still * valid for that pointer access. */ - i915_gem_object_release_mmap(obj); + ret = mutex_lock_interruptible(&i915->ggtt.vm.mutex); + if (ret) { + intel_runtime_pm_put(&i915->runtime_pm, + wakeref); + return ret; + } + + if (obj->userfault_count) + __i915_gem_object_release_mmap(obj); - /* As we no longer need a fence for GTT access, + /* + * As we no longer need a fence for GTT access, * we can relinquish it now (and so prevent having * to steal a fence from someone else on the next * fence request). Note GPU activity would have @@ -251,12 +265,17 @@ restart: * supposed to be linear. */ for_each_ggtt_vma(vma, obj) { - ret = i915_vma_put_fence(vma); + ret = i915_vma_revoke_fence(vma); if (ret) - return ret; + break; } + mutex_unlock(&i915->ggtt.vm.mutex); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + if (ret) + return ret; } else { - /* We either have incoherent backing store and + /* + * We either have incoherent backing store and * so no GTT access or the architecture is fully * coherent. In such cases, existing GTT mmaps * ignore the cache bit in the PTE and we can @@ -551,13 +570,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) return 0; } -static inline enum fb_op_origin -fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) -{ - return (domain == I915_GEM_DOMAIN_GTT ? - obj->frontbuffer_ggtt_origin : ORIGIN_CPU); -} - /** * Called when user space prepares to use an object with the CPU, either * through the mmap ioctl's mapping or a GTT mapping. @@ -661,9 +673,8 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, i915_gem_object_unlock(obj); - if (write_domain != 0) - intel_fb_obj_invalidate(obj, - fb_write_origin(obj, write_domain)); + if (write_domain) + intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU); out_unpin: i915_gem_object_unpin_pages(obj); @@ -783,7 +794,7 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, } out: - intel_fb_obj_invalidate(obj, ORIGIN_CPU); + intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU); obj->mm.dirty = true; /* return with the pages pinned */ return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 41dab9ea33cd..b5f6937369ea 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -5,7 +5,7 @@ */ #include <linux/intel-iommu.h> -#include <linux/reservation.h> +#include <linux/dma-resv.h> #include <linux/sync_file.h> #include <linux/uaccess.h> @@ -16,13 +16,15 @@ #include "gem/i915_gem_ioctls.h" #include "gt/intel_context.h" +#include "gt/intel_engine_pool.h" +#include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" -#include "i915_gem_ioctls.h" +#include "i915_drv.h" #include "i915_gem_clflush.h" #include "i915_gem_context.h" +#include "i915_gem_ioctls.h" #include "i915_trace.h" -#include "intel_drv.h" enum { FORCE_CPU_RELOC = 1, @@ -222,7 +224,6 @@ struct i915_execbuffer { struct intel_engine_cs *engine; /** engine to queue the request to */ struct intel_context *context; /* logical state for the request */ struct i915_gem_context *gem_context; /** caller's context */ - struct i915_address_space *vm; /** GTT and vma for the request */ struct i915_request *request; /** our request to build */ struct i915_vma *batch; /** identity of the batch obj/vma */ @@ -696,7 +697,7 @@ static int eb_reserve(struct i915_execbuffer *eb) case 1: /* Too fragmented, unbind everything and retry */ - err = i915_gem_evict_vm(eb->vm); + err = i915_gem_evict_vm(eb->context->vm); if (err) return err; break; @@ -724,12 +725,8 @@ static int eb_select_context(struct i915_execbuffer *eb) return -ENOENT; eb->gem_context = ctx; - if (ctx->vm) { - eb->vm = ctx->vm; + if (ctx->vm) eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; - } else { - eb->vm = &eb->i915->ggtt.vm; - } eb->context_flags = 0; if (test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags)) @@ -738,63 +735,6 @@ static int eb_select_context(struct i915_execbuffer *eb) return 0; } -static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring) -{ - struct i915_request *rq; - - /* - * Completely unscientific finger-in-the-air estimates for suitable - * maximum user request size (to avoid blocking) and then backoff. - */ - if (intel_ring_update_space(ring) >= PAGE_SIZE) - return NULL; - - /* - * Find a request that after waiting upon, there will be at least half - * the ring available. The hysteresis allows us to compete for the - * shared ring and should mean that we sleep less often prior to - * claiming our resources, but not so long that the ring completely - * drains before we can submit our next request. - */ - list_for_each_entry(rq, &ring->request_list, ring_link) { - if (__intel_ring_space(rq->postfix, - ring->emit, ring->size) > ring->size / 2) - break; - } - if (&rq->ring_link == &ring->request_list) - return NULL; /* weird, we will check again later for real */ - - return i915_request_get(rq); -} - -static int eb_wait_for_ring(const struct i915_execbuffer *eb) -{ - struct i915_request *rq; - int ret = 0; - - /* - * Apply a light amount of backpressure to prevent excessive hogs - * from blocking waiting for space whilst holding struct_mutex and - * keeping all of their resources pinned. - */ - - rq = __eb_wait_for_ring(eb->context->ring); - if (rq) { - mutex_unlock(&eb->i915->drm.struct_mutex); - - if (i915_request_wait(rq, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT) < 0) - ret = -EINTR; - - i915_request_put(rq); - - mutex_lock(&eb->i915->drm.struct_mutex); - } - - return ret; -} - static int eb_lookup_vmas(struct i915_execbuffer *eb) { struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma; @@ -831,7 +771,7 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) goto err_vma; } - vma = i915_vma_instance(obj, eb->vm, NULL); + vma = i915_vma_instance(obj, eb->context->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_obj; @@ -994,7 +934,7 @@ static void reloc_gpu_flush(struct reloc_cache *cache) __i915_gem_object_flush_map(cache->rq->batch->obj, 0, cache->rq_size); i915_gem_object_unpin_map(cache->rq->batch->obj); - i915_gem_chipset_flush(cache->rq->i915); + intel_gt_chipset_flush(cache->rq->engine->gt); i915_request_add(cache->rq); cache->rq = NULL; @@ -1018,11 +958,12 @@ static void reloc_cache_reset(struct reloc_cache *cache) kunmap_atomic(vaddr); i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm); } else { - wmb(); + struct i915_ggtt *ggtt = cache_to_ggtt(cache); + + intel_gt_flush_ggtt_writes(ggtt->vm.gt); io_mapping_unmap_atomic((void __iomem *)vaddr); - if (cache->node.allocated) { - struct i915_ggtt *ggtt = cache_to_ggtt(cache); + if (cache->node.allocated) { ggtt->vm.clear_range(&ggtt->vm, cache->node.start, cache->node.size); @@ -1077,11 +1018,15 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, void *vaddr; if (cache->vaddr) { + intel_gt_flush_ggtt_writes(ggtt->vm.gt); io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr)); } else { struct i915_vma *vma; int err; + if (i915_gem_object_is_tiled(obj)) + return ERR_PTR(-EINVAL); + if (use_cpu_reloc(cache, obj)) return NULL; @@ -1093,8 +1038,8 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE | - PIN_NONBLOCK | - PIN_NONFAULT); + PIN_NONBLOCK /* NOWARN */ | + PIN_NOEVICT); if (IS_ERR(vma)) { memset(&cache->node, 0, sizeof(cache->node)); err = drm_mm_insert_node_in_range @@ -1105,12 +1050,6 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, if (err) /* no inactive aperture space, use cpu reloc */ return NULL; } else { - err = i915_vma_put_fence(vma); - if (err) { - i915_vma_unpin(vma); - return ERR_PTR(err); - } - cache->node.start = vma->node.start; cache->node.mm = (void *)vma; } @@ -1118,7 +1057,6 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, offset = cache->node.start; if (cache->node.allocated) { - wmb(); ggtt->vm.insert_page(&ggtt->vm, i915_gem_object_get_dma_address(obj, page), offset, I915_CACHE_NONE, 0); @@ -1201,25 +1139,26 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, unsigned int len) { struct reloc_cache *cache = &eb->reloc_cache; - struct drm_i915_gem_object *obj; + struct intel_engine_pool_node *pool; struct i915_request *rq; struct i915_vma *batch; u32 *cmd; int err; - obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); + pool = intel_engine_pool_get(&eb->engine->pool, PAGE_SIZE); + if (IS_ERR(pool)) + return PTR_ERR(pool); - cmd = i915_gem_object_pin_map(obj, + cmd = i915_gem_object_pin_map(pool->obj, cache->has_llc ? I915_MAP_FORCE_WB : I915_MAP_FORCE_WC); - i915_gem_object_unpin_pages(obj); - if (IS_ERR(cmd)) - return PTR_ERR(cmd); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto out_pool; + } - batch = i915_vma_instance(obj, vma->vm, NULL); + batch = i915_vma_instance(pool->obj, vma->vm, NULL); if (IS_ERR(batch)) { err = PTR_ERR(batch); goto err_unmap; @@ -1235,6 +1174,10 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, goto err_unpin; } + err = intel_engine_pool_mark_active(pool, rq); + if (err) + goto err_request; + err = reloc_move_to_gpu(rq, vma); if (err) goto err_request; @@ -1246,8 +1189,9 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, goto skip_request; i915_vma_lock(batch); - GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true)); - err = i915_vma_move_to_active(batch, rq, 0); + err = i915_request_await_object(rq, batch->obj, false); + if (err == 0) + err = i915_vma_move_to_active(batch, rq, 0); i915_vma_unlock(batch); if (err) goto skip_request; @@ -1260,7 +1204,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, cache->rq_size = 0; /* Return with batch mapping (cmd) still pinned */ - return 0; + goto out_pool; skip_request: i915_request_skip(rq, err); @@ -1269,7 +1213,9 @@ err_request: err_unpin: i915_vma_unpin(batch); err_unmap: - i915_gem_object_unpin_map(obj); + i915_gem_object_unpin_map(pool->obj); +out_pool: + intel_engine_pool_put(pool); return err; } @@ -1317,7 +1263,7 @@ relocate_entry(struct i915_vma *vma, if (!eb->reloc_cache.vaddr && (DBG_FORCE_RELOC == FORCE_GPU_RELOC || - !reservation_object_test_signaled_rcu(vma->resv, true))) { + !dma_resv_test_signaled_rcu(vma->resv, true))) { const unsigned int gen = eb->reloc_cache.gen; unsigned int len; u32 *batch; @@ -1952,7 +1898,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) eb->exec = NULL; /* Unconditionally flush any chipset caches (for streaming writes). */ - i915_gem_chipset_flush(eb->i915); + intel_gt_chipset_flush(eb->engine->gt); return 0; err_skip: @@ -2011,18 +1957,17 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq) static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) { - struct drm_i915_gem_object *shadow_batch_obj; + struct intel_engine_pool_node *pool; struct i915_vma *vma; int err; - shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, - PAGE_ALIGN(eb->batch_len)); - if (IS_ERR(shadow_batch_obj)) - return ERR_CAST(shadow_batch_obj); + pool = intel_engine_pool_get(&eb->engine->pool, eb->batch_len); + if (IS_ERR(pool)) + return ERR_CAST(pool); err = intel_engine_cmd_parser(eb->engine, eb->batch->obj, - shadow_batch_obj, + pool->obj, eb->batch_start_offset, eb->batch_len, is_master); @@ -2031,12 +1976,12 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) vma = NULL; else vma = ERR_PTR(err); - goto out; + goto err; } - vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0); + vma = i915_gem_object_ggtt_pin(pool->obj, NULL, 0, 0, 0); if (IS_ERR(vma)) - goto out; + goto err; eb->vma[eb->buffer_count] = i915_vma_get(vma); eb->flags[eb->buffer_count] = @@ -2044,16 +1989,24 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) vma->exec_flags = &eb->flags[eb->buffer_count]; eb->buffer_count++; -out: - i915_gem_object_unpin_pages(shadow_batch_obj); + vma->private = pool; + return vma; + +err: + intel_engine_pool_put(pool); return vma; } static void add_to_client(struct i915_request *rq, struct drm_file *file) { - rq->file_priv = file->driver_priv; - list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list); + struct drm_i915_file_private *file_priv = file->driver_priv; + + rq->file_priv = file_priv; + + spin_lock(&file_priv->mm.lock); + list_add_tail(&rq->client_link, &file_priv->mm.request_list); + spin_unlock(&file_priv->mm.lock); } static int eb_submit(struct i915_execbuffer *eb) @@ -2093,6 +2046,12 @@ static int eb_submit(struct i915_execbuffer *eb) return 0; } +static int num_vcs_engines(const struct drm_i915_private *i915) +{ + return hweight64(INTEL_INFO(i915)->engine_mask & + GENMASK_ULL(VCS0 + I915_MAX_VCS - 1, VCS0)); +} + /* * Find one BSD ring to dispatch the corresponding BSD command. * The engine index is returned. @@ -2105,8 +2064,8 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, /* Check whether the file_priv has already selected one ring. */ if ((int)file_priv->bsd_engine < 0) - file_priv->bsd_engine = atomic_fetch_xor(1, - &dev_priv->mm.bsd_engine_dispatch_index); + file_priv->bsd_engine = + get_random_int() % num_vcs_engines(dev_priv); return file_priv->bsd_engine; } @@ -2119,15 +2078,80 @@ static const enum intel_engine_id user_ring_map[] = { [I915_EXEC_VEBOX] = VECS0 }; -static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) +static struct i915_request *eb_throttle(struct intel_context *ce) +{ + struct intel_ring *ring = ce->ring; + struct intel_timeline *tl = ce->timeline; + struct i915_request *rq; + + /* + * Completely unscientific finger-in-the-air estimates for suitable + * maximum user request size (to avoid blocking) and then backoff. + */ + if (intel_ring_update_space(ring) >= PAGE_SIZE) + return NULL; + + /* + * Find a request that after waiting upon, there will be at least half + * the ring available. The hysteresis allows us to compete for the + * shared ring and should mean that we sleep less often prior to + * claiming our resources, but not so long that the ring completely + * drains before we can submit our next request. + */ + list_for_each_entry(rq, &tl->requests, link) { + if (rq->ring != ring) + continue; + + if (__intel_ring_space(rq->postfix, + ring->emit, ring->size) > ring->size / 2) + break; + } + if (&rq->link == &tl->requests) + return NULL; /* weird, we will check again later for real */ + + return i915_request_get(rq); +} + +static int +__eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) { int err; + if (likely(atomic_inc_not_zero(&ce->pin_count))) + return 0; + + err = mutex_lock_interruptible(&eb->i915->drm.struct_mutex); + if (err) + return err; + + err = __intel_context_do_pin(ce); + mutex_unlock(&eb->i915->drm.struct_mutex); + + return err; +} + +static void +__eb_unpin_context(struct i915_execbuffer *eb, struct intel_context *ce) +{ + if (likely(atomic_add_unless(&ce->pin_count, -1, 1))) + return; + + mutex_lock(&eb->i915->drm.struct_mutex); + intel_context_unpin(ce); + mutex_unlock(&eb->i915->drm.struct_mutex); +} + +static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) +{ + struct intel_timeline *tl; + struct i915_request *rq; + int err; + /* * ABI: Before userspace accesses the GPU (e.g. execbuffer), report * EIO if the GPU is already wedged. */ - err = i915_terminally_wedged(eb->i915); + err = intel_gt_terminally_wedged(ce->engine->gt); if (err) return err; @@ -2136,18 +2160,64 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) * GGTT space, so do this first before we reserve a seqno for * ourselves. */ - err = intel_context_pin(ce); + err = __eb_pin_context(eb, ce); if (err) return err; + /* + * Take a local wakeref for preparing to dispatch the execbuf as + * we expect to access the hardware fairly frequently in the + * process, and require the engine to be kept awake between accesses. + * Upon dispatch, we acquire another prolonged wakeref that we hold + * until the timeline is idle, which in turn releases the wakeref + * taken on the engine, and the parent device. + */ + tl = intel_context_timeline_lock(ce); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto err_unpin; + } + + intel_context_enter(ce); + rq = eb_throttle(ce); + + intel_context_timeline_unlock(tl); + + if (rq) { + if (i915_request_wait(rq, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT) < 0) { + i915_request_put(rq); + err = -EINTR; + goto err_exit; + } + + i915_request_put(rq); + } + eb->engine = ce->engine; eb->context = ce; return 0; + +err_exit: + mutex_lock(&tl->mutex); + intel_context_exit(ce); + intel_context_timeline_unlock(tl); +err_unpin: + __eb_unpin_context(eb, ce); + return err; } -static void eb_unpin_context(struct i915_execbuffer *eb) +static void eb_unpin_engine(struct i915_execbuffer *eb) { - intel_context_unpin(eb->context); + struct intel_context *ce = eb->context; + struct intel_timeline *tl = ce->timeline; + + mutex_lock(&tl->mutex); + intel_context_exit(ce); + mutex_unlock(&tl->mutex); + + __eb_unpin_context(eb, ce); } static unsigned int @@ -2165,7 +2235,7 @@ eb_select_legacy_ring(struct i915_execbuffer *eb, return -1; } - if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(i915, VCS1)) { + if (user_ring_id == I915_EXEC_BSD && num_vcs_engines(i915) > 1) { unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; if (bsd_idx == I915_EXEC_BSD_DEFAULT) { @@ -2192,9 +2262,9 @@ eb_select_legacy_ring(struct i915_execbuffer *eb, } static int -eb_select_engine(struct i915_execbuffer *eb, - struct drm_file *file, - struct drm_i915_gem_execbuffer2 *args) +eb_pin_engine(struct i915_execbuffer *eb, + struct drm_file *file, + struct drm_i915_gem_execbuffer2 *args) { struct intel_context *ce; unsigned int idx; @@ -2209,7 +2279,7 @@ eb_select_engine(struct i915_execbuffer *eb, if (IS_ERR(ce)) return PTR_ERR(ce); - err = eb_pin_context(eb, ce); + err = __eb_pin_engine(eb, ce); intel_context_put(ce); return err; @@ -2427,25 +2497,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (unlikely(err)) goto err_destroy; - /* - * Take a local wakeref for preparing to dispatch the execbuf as - * we expect to access the hardware fairly frequently in the - * process. Upon first dispatch, we acquire another prolonged - * wakeref that we hold until the GPU has been idle for at least - * 100ms. - */ - intel_gt_pm_get(eb.i915); + err = eb_pin_engine(&eb, file, args); + if (unlikely(err)) + goto err_context; err = i915_mutex_lock_interruptible(dev); if (err) - goto err_rpm; - - err = eb_select_engine(&eb, file, args); - if (unlikely(err)) - goto err_unlock; - - err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */ - if (unlikely(err)) goto err_engine; err = eb_relocate(&eb); @@ -2572,6 +2629,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, * to explicitly hold another reference here. */ eb.request->batch = eb.batch; + if (eb.batch->private) + intel_engine_pool_mark_active(eb.batch->private, eb.request); trace_i915_request_queue(eb.request, eb.batch_flags); err = eb_submit(&eb); @@ -2596,15 +2655,15 @@ err_request: err_batch_unpin: if (eb.batch_flags & I915_DISPATCH_SECURE) i915_vma_unpin(eb.batch); + if (eb.batch->private) + intel_engine_pool_put(eb.batch->private); err_vma: if (eb.exec) eb_release_vmas(&eb); -err_engine: - eb_unpin_context(&eb); -err_unlock: mutex_unlock(&dev->struct_mutex); -err_rpm: - intel_gt_pm_put(eb.i915); +err_engine: + eb_unpin_engine(&eb); +err_context: i915_gem_context_put(eb.gem_context); err_destroy: eb_destroy(&eb); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_fence.c b/drivers/gpu/drm/i915/gem/i915_gem_fence.c index cf0439e6be83..2f6100ec2608 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_fence.c @@ -69,8 +69,7 @@ i915_gem_object_lock_fence(struct drm_i915_gem_object *obj) i915_sw_fence_init(&stub->chain, stub_notify); dma_fence_init(&stub->dma, &stub_fence_ops, &stub->chain.wait.lock, - to_i915(obj->base.dev)->mm.unordered_timeline, - 0); + 0, 0); if (i915_sw_fence_await_reservation(&stub->chain, obj->base.resv, NULL, @@ -78,7 +77,7 @@ i915_gem_object_lock_fence(struct drm_i915_gem_object *obj) I915_FENCE_GFP) < 0) goto err; - reservation_object_add_excl_fence(obj->base.resv, &stub->dma); + dma_resv_add_excl_fence(obj->base.resv, &stub->dma); return &stub->dma; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 39a661927d8e..261c9bd83f51 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -7,12 +7,14 @@ #include <linux/mman.h> #include <linux/sizes.h> +#include "gt/intel_gt.h" + #include "i915_drv.h" #include "i915_gem_gtt.h" #include "i915_gem_ioctls.h" #include "i915_gem_object.h" +#include "i915_trace.h" #include "i915_vma.h" -#include "intel_drv.h" static inline bool __vma_matches(struct vm_area_struct *vma, struct file *filp, @@ -99,9 +101,6 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, up_write(&mm->mmap_sem); if (IS_ERR_VALUE(addr)) goto err; - - /* This may race, but that's ok, it only gets set */ - WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU); } i915_gem_object_put(obj); @@ -246,7 +245,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) wakeref = intel_runtime_pm_get(rpm); - srcu = i915_reset_trylock(i915); + srcu = intel_gt_reset_trylock(ggtt->vm.gt); if (srcu < 0) { ret = srcu; goto err_rpm; @@ -265,15 +264,15 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) /* Now pin it into the GTT as needed */ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE | - PIN_NONBLOCK | - PIN_NONFAULT); + PIN_NONBLOCK /* NOWARN */ | + PIN_NOEVICT); if (IS_ERR(vma)) { /* Use a partial view if it is bigger than available space */ struct i915_ggtt_view view = compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); unsigned int flags; - flags = PIN_MAPPABLE; + flags = PIN_MAPPABLE | PIN_NOSEARCH; if (view.type == I915_GGTT_VIEW_NORMAL) flags |= PIN_NONBLOCK; /* avoid warnings for pinned */ @@ -281,10 +280,9 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) * Userspace is now writing through an untracked VMA, abandon * all hope that the hardware is able to track future writes. */ - obj->frontbuffer_ggtt_origin = ORIGIN_CPU; vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); - if (IS_ERR(vma) && !view.type) { + if (IS_ERR(vma)) { flags = PIN_MAPPABLE; view.type = I915_GGTT_VIEW_PARTIAL; vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); @@ -308,14 +306,17 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) if (ret) goto err_fence; - /* Mark as being mmapped into userspace for later revocation */ assert_rpm_wakelock_held(rpm); + + /* Mark as being mmapped into userspace for later revocation */ + mutex_lock(&i915->ggtt.vm.mutex); if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) list_add(&obj->userfault_link, &i915->ggtt.userfault_list); + mutex_unlock(&i915->ggtt.vm.mutex); + if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND) intel_wakeref_auto(&i915->ggtt.userfault_wakeref, msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)); - GEM_BUG_ON(!obj->userfault_count); i915_vma_set_ggtt_write(vma); @@ -326,7 +327,7 @@ err_unpin: err_unlock: mutex_unlock(&dev->struct_mutex); err_reset: - i915_reset_unlock(i915, srcu); + intel_gt_reset_unlock(ggtt->vm.gt, srcu); err_rpm: intel_runtime_pm_put(rpm, wakeref); i915_gem_object_unpin_pages(obj); @@ -339,7 +340,7 @@ err: * fail). But any other -EIO isn't ours (e.g. swap in failure) * and so needs to be reported. */ - if (!i915_terminally_wedged(i915)) + if (!intel_gt_is_wedged(ggtt->vm.gt)) return VM_FAULT_SIGBUS; /* else, fall through */ case -EAGAIN: @@ -410,8 +411,8 @@ void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) * requirement that operations to the GGTT be made holding the RPM * wakeref. */ - lockdep_assert_held(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(&i915->runtime_pm); + mutex_lock(&i915->ggtt.vm.mutex); if (!obj->userfault_count) goto out; @@ -428,6 +429,7 @@ void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) wmb(); out: + mutex_unlock(&i915->ggtt.vm.mutex); intel_runtime_pm_put(&i915->runtime_pm, wakeref); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index be6caccce0c5..d7855dc5a5c5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -23,12 +23,13 @@ */ #include "display/intel_frontbuffer.h" - +#include "gt/intel_gt.h" #include "i915_drv.h" #include "i915_gem_clflush.h" #include "i915_gem_context.h" #include "i915_gem_object.h" #include "i915_globals.h" +#include "i915_trace.h" static struct i915_global_object { struct i915_global base; @@ -45,16 +46,6 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj) return kmem_cache_free(global.slab_objects, obj); } -static void -frontbuffer_retire(struct i915_active_request *active, - struct i915_request *request) -{ - struct drm_i915_gem_object *obj = - container_of(active, typeof(*obj), frontbuffer_write); - - intel_fb_obj_flush(obj, ORIGIN_CS); -} - void i915_gem_object_init(struct drm_i915_gem_object *obj, const struct drm_i915_gem_object_ops *ops) { @@ -63,17 +54,14 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, spin_lock_init(&obj->vma.lock); INIT_LIST_HEAD(&obj->vma.list); + INIT_LIST_HEAD(&obj->mm.link); + INIT_LIST_HEAD(&obj->lut_list); - INIT_LIST_HEAD(&obj->batch_pool_link); init_rcu_head(&obj->rcu); obj->ops = ops; - obj->frontbuffer_ggtt_origin = ORIGIN_GTT; - i915_active_request_init(&obj->frontbuffer_write, - NULL, frontbuffer_retire); - obj->mm.madv = I915_MADV_WILLNEED; INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); mutex_init(&obj->mm.get_page.lock); @@ -146,6 +134,19 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) } } +static void __i915_gem_free_object_rcu(struct rcu_head *head) +{ + struct drm_i915_gem_object *obj = + container_of(head, typeof(*obj), rcu); + struct drm_i915_private *i915 = to_i915(obj->base.dev); + + dma_resv_fini(&obj->base._resv); + i915_gem_object_free(obj); + + GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); + atomic_dec(&i915->mm.free_count); +} + static void __i915_gem_free_objects(struct drm_i915_private *i915, struct llist_node *freed) { @@ -160,7 +161,6 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, mutex_lock(&i915->drm.struct_mutex); - GEM_BUG_ON(i915_gem_object_is_active(obj)); list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) { GEM_BUG_ON(i915_vma_is_active(vma)); vma->flags &= ~I915_VMA_PIN_MASK; @@ -169,110 +169,70 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, GEM_BUG_ON(!list_empty(&obj->vma.list)); GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree)); - /* - * This serializes freeing with the shrinker. Since the free - * is delayed, first by RCU then by the workqueue, we want the - * shrinker to be able to free pages of unreferenced objects, - * or else we may oom whilst there are plenty of deferred - * freed objects. - */ - if (i915_gem_object_has_pages(obj) && - i915_gem_object_is_shrinkable(obj)) { - unsigned long flags; - - spin_lock_irqsave(&i915->mm.obj_lock, flags); - list_del_init(&obj->mm.link); - spin_unlock_irqrestore(&i915->mm.obj_lock, flags); - } - mutex_unlock(&i915->drm.struct_mutex); GEM_BUG_ON(atomic_read(&obj->bind_count)); GEM_BUG_ON(obj->userfault_count); - GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); GEM_BUG_ON(!list_empty(&obj->lut_list)); - if (obj->ops->release) - obj->ops->release(obj); - atomic_set(&obj->mm.pages_pin_count, 0); __i915_gem_object_put_pages(obj, I915_MM_NORMAL); GEM_BUG_ON(i915_gem_object_has_pages(obj)); + bitmap_free(obj->bit_17); if (obj->base.import_attach) drm_prime_gem_destroy(&obj->base, NULL); - drm_gem_object_release(&obj->base); + drm_gem_free_mmap_offset(&obj->base); - bitmap_free(obj->bit_17); - i915_gem_object_free(obj); - - GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); - atomic_dec(&i915->mm.free_count); + if (obj->ops->release) + obj->ops->release(obj); - cond_resched(); + /* But keep the pointer alive for RCU-protected lookups */ + call_rcu(&obj->rcu, __i915_gem_free_object_rcu); } intel_runtime_pm_put(&i915->runtime_pm, wakeref); } void i915_gem_flush_free_objects(struct drm_i915_private *i915) { - struct llist_node *freed; - - /* Free the oldest, most stale object to keep the free_list short */ - freed = NULL; - if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */ - /* Only one consumer of llist_del_first() allowed */ - spin_lock(&i915->mm.free_lock); - freed = llist_del_first(&i915->mm.free_list); - spin_unlock(&i915->mm.free_lock); - } - if (unlikely(freed)) { - freed->next = NULL; + struct llist_node *freed = llist_del_all(&i915->mm.free_list); + + if (unlikely(freed)) __i915_gem_free_objects(i915, freed); - } } static void __i915_gem_free_work(struct work_struct *work) { struct drm_i915_private *i915 = container_of(work, struct drm_i915_private, mm.free_work); - struct llist_node *freed; - /* - * All file-owned VMA should have been released by this point through - * i915_gem_close_object(), or earlier by i915_gem_context_close(). - * However, the object may also be bound into the global GTT (e.g. - * older GPUs without per-process support, or for direct access through - * the GTT either for the user or for scanout). Those VMA still need to - * unbound now. - */ - - spin_lock(&i915->mm.free_lock); - while ((freed = llist_del_all(&i915->mm.free_list))) { - spin_unlock(&i915->mm.free_lock); - - __i915_gem_free_objects(i915, freed); - if (need_resched()) - return; - - spin_lock(&i915->mm.free_lock); - } - spin_unlock(&i915->mm.free_lock); + i915_gem_flush_free_objects(i915); } -static void __i915_gem_free_object_rcu(struct rcu_head *head) +void i915_gem_free_object(struct drm_gem_object *gem_obj) { - struct drm_i915_gem_object *obj = - container_of(head, typeof(*obj), rcu); + struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); struct drm_i915_private *i915 = to_i915(obj->base.dev); + GEM_BUG_ON(i915_gem_object_is_framebuffer(obj)); + + /* + * Before we free the object, make sure any pure RCU-only + * read-side critical sections are complete, e.g. + * i915_gem_busy_ioctl(). For the corresponding synchronized + * lookup see i915_gem_object_lookup_rcu(). + */ + atomic_inc(&i915->mm.free_count); + /* - * We reuse obj->rcu for the freed list, so we had better not treat - * it like a rcu_head from this point forwards. And we expect all - * objects to be freed via this path. + * This serializes freeing with the shrinker. Since the free + * is delayed, first by RCU then by the workqueue, we want the + * shrinker to be able to free pages of unreferenced objects, + * or else we may oom whilst there are plenty of deferred + * freed objects. */ - destroy_rcu_head(&obj->rcu); + i915_gem_object_make_unshrinkable(obj); /* * Since we require blocking on struct_mutex to unbind the freed @@ -288,27 +248,6 @@ static void __i915_gem_free_object_rcu(struct rcu_head *head) queue_work(i915->wq, &i915->mm.free_work); } -void i915_gem_free_object(struct drm_gem_object *gem_obj) -{ - struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); - - /* - * Before we free the object, make sure any pure RCU-only - * read-side critical sections are complete, e.g. - * i915_gem_busy_ioctl(). For the corresponding synchronized - * lookup see i915_gem_object_lookup_rcu(). - */ - atomic_inc(&to_i915(obj->base.dev)->mm.free_count); - call_rcu(&obj->rcu, __i915_gem_free_object_rcu); -} - -static inline enum fb_op_origin -fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) -{ - return (domain == I915_GEM_DOMAIN_GTT ? - obj->frontbuffer_ggtt_origin : ORIGIN_CPU); -} - static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) { return !(obj->cache_level == I915_CACHE_NONE || @@ -319,7 +258,6 @@ void i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct i915_vma *vma; assert_object_held(obj); @@ -329,10 +267,10 @@ i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, switch (obj->write_domain) { case I915_GEM_DOMAIN_GTT: - i915_gem_flush_ggtt_writes(dev_priv); + for_each_ggtt_vma(vma, obj) + intel_gt_flush_ggtt_writes(vma->vm->gt); - intel_fb_obj_flush(obj, - fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); + intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU); for_each_ggtt_vma(vma, obj) { if (vma->iomap) @@ -340,6 +278,7 @@ i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, i915_vma_unset_ggtt_write(vma); } + break; case I915_GEM_DOMAIN_WC: diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index dfebd5706f16..5efb9936e05b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -81,7 +81,7 @@ i915_gem_object_lookup(struct drm_file *file, u32 handle) } __deprecated -extern struct drm_gem_object * +struct drm_gem_object * drm_gem_object_lookup(struct drm_file *file, u32 handle); __attribute__((nonnull)) @@ -99,22 +99,22 @@ i915_gem_object_put(struct drm_i915_gem_object *obj) __drm_gem_object_put(&obj->base); } -#define assert_object_held(obj) reservation_object_assert_held((obj)->base.resv) +#define assert_object_held(obj) dma_resv_assert_held((obj)->base.resv) static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj) { - reservation_object_lock(obj->base.resv, NULL); + dma_resv_lock(obj->base.resv, NULL); } static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj) { - return reservation_object_lock_interruptible(obj->base.resv, NULL); + return dma_resv_lock_interruptible(obj->base.resv, NULL); } static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj) { - reservation_object_unlock(obj->base.resv); + dma_resv_unlock(obj->base.resv); } struct dma_fence * @@ -159,15 +159,9 @@ i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj) } static inline bool -i915_gem_object_is_active(const struct drm_i915_gem_object *obj) -{ - return READ_ONCE(obj->active_count); -} - -static inline bool i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj) { - return READ_ONCE(obj->framebuffer_references); + return READ_ONCE(obj->frontbuffer); } static inline unsigned int @@ -373,7 +367,7 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) struct dma_fence *fence; rcu_read_lock(); - fence = reservation_object_get_excl_rcu(obj->base.resv); + fence = dma_resv_get_excl_rcu(obj->base.resv); rcu_read_unlock(); if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence)) @@ -400,6 +394,10 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, unsigned int flags); void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma); +void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj); +void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj); +void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj); + static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) { if (obj->cache_dirty) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c index cb42e3a312e2..6415f9a17e2d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c @@ -3,59 +3,136 @@ * Copyright © 2019 Intel Corporation */ -#include "i915_gem_object_blt.h" - +#include "i915_drv.h" +#include "gt/intel_context.h" +#include "gt/intel_engine_pm.h" +#include "gt/intel_engine_pool.h" +#include "gt/intel_gt.h" #include "i915_gem_clflush.h" -#include "intel_drv.h" +#include "i915_gem_object_blt.h" -int intel_emit_vma_fill_blt(struct i915_request *rq, - struct i915_vma *vma, - u32 value) +struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, + struct i915_vma *vma, + u32 value) { - u32 *cs; - - cs = intel_ring_begin(rq, 8); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - if (INTEL_GEN(rq->i915) >= 8) { - *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2); - *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; - *cs++ = 0; - *cs++ = vma->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cs++ = lower_32_bits(vma->node.start); - *cs++ = upper_32_bits(vma->node.start); - *cs++ = value; - *cs++ = MI_NOOP; - } else { - *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); - *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; - *cs++ = 0; - *cs++ = vma->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cs++ = vma->node.start; - *cs++ = value; - *cs++ = MI_NOOP; - *cs++ = MI_NOOP; + struct drm_i915_private *i915 = ce->vm->i915; + const u32 block_size = S16_MAX * PAGE_SIZE; + struct intel_engine_pool_node *pool; + struct i915_vma *batch; + u64 offset; + u64 count; + u64 rem; + u32 size; + u32 *cmd; + int err; + + GEM_BUG_ON(intel_engine_is_virtual(ce->engine)); + intel_engine_pm_get(ce->engine); + + count = div_u64(vma->size, block_size); + size = (1 + 8 * count) * sizeof(u32); + size = round_up(size, PAGE_SIZE); + pool = intel_engine_pool_get(&ce->engine->pool, size); + if (IS_ERR(pool)) { + err = PTR_ERR(pool); + goto out_pm; + } + + cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto out_put; + } + + rem = vma->size; + offset = vma->node.start; + + do { + u32 size = min_t(u64, rem, block_size); + + GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); + + if (INTEL_GEN(i915) >= 8) { + *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2); + *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; + *cmd++ = 0; + *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cmd++ = lower_32_bits(offset); + *cmd++ = upper_32_bits(offset); + *cmd++ = value; + } else { + *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); + *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; + *cmd++ = 0; + *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cmd++ = offset; + *cmd++ = value; + } + + /* Allow ourselves to be preempted in between blocks. */ + *cmd++ = MI_ARB_CHECK; + + offset += size; + rem -= size; + } while (rem); + + *cmd = MI_BATCH_BUFFER_END; + intel_gt_chipset_flush(ce->vm->gt); + + i915_gem_object_unpin_map(pool->obj); + + batch = i915_vma_instance(pool->obj, ce->vm, NULL); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_put; } - intel_ring_advance(rq, cs); + err = i915_vma_pin(batch, 0, 0, PIN_USER); + if (unlikely(err)) + goto out_put; + + batch->private = pool; + return batch; - return 0; +out_put: + intel_engine_pool_put(pool); +out_pm: + intel_engine_pm_put(ce->engine); + return ERR_PTR(err); +} + +int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq) +{ + int err; + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, false); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, 0); + i915_vma_unlock(vma); + if (unlikely(err)) + return err; + + return intel_engine_pool_mark_active(vma->private, rq); +} + +void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma) +{ + i915_vma_unpin(vma); + intel_engine_pool_put(vma->private); + intel_engine_pm_put(ce->engine); } int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, struct intel_context *ce, u32 value) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_gem_context *ctx = ce->gem_context; - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; struct i915_request *rq; + struct i915_vma *batch; struct i915_vma *vma; int err; - /* XXX: ce->vm please */ - vma = i915_vma_instance(obj, vm, NULL); + vma = i915_vma_instance(obj, ce->vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -69,12 +146,22 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, i915_gem_object_unlock(obj); } - rq = i915_request_create(ce); + batch = intel_emit_vma_fill_blt(ce, vma, value); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_unpin; + } + + rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto out_unpin; + goto out_batch; } + err = intel_emit_vma_mark_active(batch, rq); + if (unlikely(err)) + goto out_request; + err = i915_request_await_object(rq, obj, true); if (unlikely(err)) goto out_request; @@ -86,22 +173,229 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, } i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + err = i915_request_await_object(rq, vma->obj, true); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unlock(vma); if (unlikely(err)) goto out_request; - err = intel_emit_vma_fill_blt(rq, vma, value); + err = ce->engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + 0); out_request: if (unlikely(err)) i915_request_skip(rq, err); i915_request_add(rq); +out_batch: + intel_emit_vma_release(ce, batch); out_unpin: i915_vma_unpin(vma); return err; } +struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, + struct i915_vma *src, + struct i915_vma *dst) +{ + struct drm_i915_private *i915 = ce->vm->i915; + const u32 block_size = S16_MAX * PAGE_SIZE; + struct intel_engine_pool_node *pool; + struct i915_vma *batch; + u64 src_offset, dst_offset; + u64 count, rem; + u32 size, *cmd; + int err; + + GEM_BUG_ON(src->size != dst->size); + + GEM_BUG_ON(intel_engine_is_virtual(ce->engine)); + intel_engine_pm_get(ce->engine); + + count = div_u64(dst->size, block_size); + size = (1 + 11 * count) * sizeof(u32); + size = round_up(size, PAGE_SIZE); + pool = intel_engine_pool_get(&ce->engine->pool, size); + if (IS_ERR(pool)) { + err = PTR_ERR(pool); + goto out_pm; + } + + cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto out_put; + } + + rem = src->size; + src_offset = src->node.start; + dst_offset = dst->node.start; + + do { + size = min_t(u64, rem, block_size); + GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); + + if (INTEL_GEN(i915) >= 9) { + *cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2); + *cmd++ = BLT_DEPTH_32 | PAGE_SIZE; + *cmd++ = 0; + *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cmd++ = lower_32_bits(dst_offset); + *cmd++ = upper_32_bits(dst_offset); + *cmd++ = 0; + *cmd++ = PAGE_SIZE; + *cmd++ = lower_32_bits(src_offset); + *cmd++ = upper_32_bits(src_offset); + } else if (INTEL_GEN(i915) >= 8) { + *cmd++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2); + *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; + *cmd++ = 0; + *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cmd++ = lower_32_bits(dst_offset); + *cmd++ = upper_32_bits(dst_offset); + *cmd++ = 0; + *cmd++ = PAGE_SIZE; + *cmd++ = lower_32_bits(src_offset); + *cmd++ = upper_32_bits(src_offset); + } else { + *cmd++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); + *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; + *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE; + *cmd++ = dst_offset; + *cmd++ = PAGE_SIZE; + *cmd++ = src_offset; + } + + /* Allow ourselves to be preempted in between blocks. */ + *cmd++ = MI_ARB_CHECK; + + src_offset += size; + dst_offset += size; + rem -= size; + } while (rem); + + *cmd = MI_BATCH_BUFFER_END; + intel_gt_chipset_flush(ce->vm->gt); + + i915_gem_object_unpin_map(pool->obj); + + batch = i915_vma_instance(pool->obj, ce->vm, NULL); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_put; + } + + err = i915_vma_pin(batch, 0, 0, PIN_USER); + if (unlikely(err)) + goto out_put; + + batch->private = pool; + return batch; + +out_put: + intel_engine_pool_put(pool); +out_pm: + intel_engine_pm_put(ce->engine); + return ERR_PTR(err); +} + +static int move_to_gpu(struct i915_vma *vma, struct i915_request *rq, bool write) +{ + struct drm_i915_gem_object *obj = vma->obj; + + if (obj->cache_dirty & ~obj->cache_coherent) + i915_gem_clflush_object(obj, 0); + + return i915_request_await_object(rq, obj, write); +} + +int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, + struct drm_i915_gem_object *dst, + struct intel_context *ce) +{ + struct drm_gem_object *objs[] = { &src->base, &dst->base }; + struct i915_address_space *vm = ce->vm; + struct i915_vma *vma[2], *batch; + struct ww_acquire_ctx acquire; + struct i915_request *rq; + int err, i; + + vma[0] = i915_vma_instance(src, vm, NULL); + if (IS_ERR(vma[0])) + return PTR_ERR(vma[0]); + + err = i915_vma_pin(vma[0], 0, 0, PIN_USER); + if (unlikely(err)) + return err; + + vma[1] = i915_vma_instance(dst, vm, NULL); + if (IS_ERR(vma[1])) + goto out_unpin_src; + + err = i915_vma_pin(vma[1], 0, 0, PIN_USER); + if (unlikely(err)) + goto out_unpin_src; + + batch = intel_emit_vma_copy_blt(ce, vma[0], vma[1]); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_unpin_dst; + } + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_batch; + } + + err = intel_emit_vma_mark_active(batch, rq); + if (unlikely(err)) + goto out_request; + + err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire); + if (unlikely(err)) + goto out_request; + + for (i = 0; i < ARRAY_SIZE(vma); i++) { + err = move_to_gpu(vma[i], rq, i); + if (unlikely(err)) + goto out_unlock; + } + + for (i = 0; i < ARRAY_SIZE(vma); i++) { + unsigned int flags = i ? EXEC_OBJECT_WRITE : 0; + + err = i915_vma_move_to_active(vma[i], rq, flags); + if (unlikely(err)) + goto out_unlock; + } + + if (rq->engine->emit_init_breadcrumb) { + err = rq->engine->emit_init_breadcrumb(rq); + if (unlikely(err)) + goto out_unlock; + } + + err = rq->engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + 0); +out_unlock: + drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire); +out_request: + if (unlikely(err)) + i915_request_skip(rq, err); + + i915_request_add(rq); +out_batch: + intel_emit_vma_release(ce, batch); +out_unpin_dst: + i915_vma_unpin(vma[1]); +out_unpin_src: + i915_vma_unpin(vma[0]); + return err; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/i915_gem_object_blt.c" #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h index 7ec7de6ac0c0..243a43a87824 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h @@ -8,17 +8,30 @@ #include <linux/types.h> +#include "gt/intel_context.h" +#include "gt/intel_engine_pm.h" +#include "gt/intel_engine_pool.h" +#include "i915_vma.h" + struct drm_i915_gem_object; -struct intel_context; -struct i915_request; -struct i915_vma; -int intel_emit_vma_fill_blt(struct i915_request *rq, - struct i915_vma *vma, - u32 value); +struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, + struct i915_vma *vma, + u32 value); + +struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, + struct i915_vma *src, + struct i915_vma *dst); + +int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq); +void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma); int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, struct intel_context *ce, u32 value); +int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, + struct drm_i915_gem_object *dst, + struct intel_context *ce); + #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 18bf4f8d6d80..ede0eb4218a8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -13,6 +13,7 @@ #include "i915_selftest.h" struct drm_i915_gem_object; +struct intel_fronbuffer; /* * struct i915_lut_handle tracks the fast lookups from handle to vma used @@ -114,7 +115,6 @@ struct drm_i915_gem_object { unsigned int userfault_count; struct list_head userfault_link; - struct list_head batch_pool_link; I915_SELFTEST_DECLARE(struct list_head st_link); /* @@ -142,9 +142,7 @@ struct drm_i915_gem_object { */ u16 write_domain; - atomic_t frontbuffer_bits; - unsigned int frontbuffer_ggtt_origin; /* write once */ - struct i915_active_request frontbuffer_write; + struct intel_frontbuffer *frontbuffer; /** Current tiling stride for the object, if it's tiled. */ unsigned int tiling_and_stride; @@ -154,7 +152,6 @@ struct drm_i915_gem_object { /** Count of VMA actually bound by this object */ atomic_t bind_count; - unsigned int active_count; /** Count of how many global VMA are currently pinned for use by HW */ unsigned int pin_global; @@ -226,9 +223,6 @@ struct drm_i915_gem_object { bool quirked:1; } mm; - /** References from framebuffers, locks out tiling changes. */ - unsigned int framebuffer_references; - /** Record of address bit 17 of each page at last unbind. */ unsigned long *bit_17; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 65eb430cedba..18f0ce0135c1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -153,24 +153,13 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) struct sg_table * __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); struct sg_table *pages; pages = fetch_and_zero(&obj->mm.pages); if (IS_ERR_OR_NULL(pages)) return pages; - if (i915_gem_object_is_shrinkable(obj)) { - unsigned long flags; - - spin_lock_irqsave(&i915->mm.obj_lock, flags); - - list_del(&obj->mm.link); - i915->mm.shrink_count--; - i915->mm.shrink_memory -= obj->base.size; - - spin_unlock_irqrestore(&i915->mm.obj_lock, flags); - } + i915_gem_object_make_unshrinkable(obj); if (obj->mm.mapping) { void *ptr; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index 2deac933cf59..768356908160 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -13,6 +13,7 @@ #include <drm/drm_legacy.h> /* for drm_pci.h! */ #include <drm/drm_pci.h> +#include "gt/intel_gt.h" #include "i915_drv.h" #include "i915_gem_object.h" #include "i915_scatterlist.h" @@ -60,7 +61,7 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) vaddr += PAGE_SIZE; } - i915_gem_chipset_flush(to_i915(obj->base.dev)); + intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); st = kmalloc(sizeof(*st), GFP_KERNEL); if (!st) { @@ -132,16 +133,16 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, drm_pci_free(obj->base.dev, obj->phys_handle); } -static void -i915_gem_object_release_phys(struct drm_i915_gem_object *obj) +static void phys_release(struct drm_i915_gem_object *obj) { - i915_gem_object_unpin_pages(obj); + fput(obj->base.filp); } static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { .get_pages = i915_gem_object_get_pages_phys, .put_pages = i915_gem_object_put_pages_phys, - .release = i915_gem_object_release_phys, + + .release = phys_release, }; int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) @@ -158,7 +159,7 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) if (obj->ops != &i915_gem_shmem_ops) return -EINVAL; - err = i915_gem_object_unbind(obj); + err = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); if (err) return err; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 914b5d4112bb..92e53c25424c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -5,6 +5,7 @@ */ #include "gem/i915_gem_pm.h" +#include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" #include "i915_drv.h" @@ -33,12 +34,9 @@ static void i915_gem_park(struct drm_i915_private *i915) lockdep_assert_held(&i915->drm.struct_mutex); - for_each_engine(engine, i915, id) { + for_each_engine(engine, i915, id) call_idle_barriers(engine); /* cleanup after wedging */ - i915_gem_batch_pool_fini(&engine->batch_pool); - } - i915_timelines_park(i915); i915_vma_parked(i915); i915_globals_park(); @@ -54,7 +52,8 @@ static void idle_work_handler(struct work_struct *work) mutex_lock(&i915->drm.struct_mutex); intel_wakeref_lock(&i915->gt.wakeref); - park = !intel_wakeref_active(&i915->gt.wakeref) && !work_pending(work); + park = (!intel_wakeref_is_active(&i915->gt.wakeref) && + !work_pending(work)); intel_wakeref_unlock(&i915->gt.wakeref); if (park) i915_gem_park(i915); @@ -105,18 +104,18 @@ static int pm_notifier(struct notifier_block *nb, return NOTIFY_OK; } -static bool switch_to_kernel_context_sync(struct drm_i915_private *i915) +static bool switch_to_kernel_context_sync(struct intel_gt *gt) { - bool result = !i915_terminally_wedged(i915); + bool result = !intel_gt_is_wedged(gt); do { - if (i915_gem_wait_for_idle(i915, + if (i915_gem_wait_for_idle(gt->i915, I915_WAIT_LOCKED | I915_WAIT_FOR_IDLE_BOOST, I915_GEM_IDLE_TIMEOUT) == -ETIME) { /* XXX hide warning from gem_eio */ if (i915_modparams.reset) { - dev_err(i915->drm.dev, + dev_err(gt->i915->drm.dev, "Failed to idle engines, declaring wedged!\n"); GEM_TRACE_DUMP(); } @@ -125,18 +124,20 @@ static bool switch_to_kernel_context_sync(struct drm_i915_private *i915) * Forcibly cancel outstanding work and leave * the gpu quiet. */ - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); result = false; } - } while (i915_retire_requests(i915) && result); + } while (i915_retire_requests(gt->i915) && result); + + if (intel_gt_pm_wait_for_idle(gt)) + result = false; - GEM_BUG_ON(i915->gt.awake); return result; } bool i915_gem_load_power_context(struct drm_i915_private *i915) { - return switch_to_kernel_context_sync(i915); + return switch_to_kernel_context_sync(&i915->gt); } void i915_gem_suspend(struct drm_i915_private *i915) @@ -157,22 +158,15 @@ void i915_gem_suspend(struct drm_i915_private *i915) * state. Fortunately, the kernel_context is disposable and we do * not rely on its state. */ - switch_to_kernel_context_sync(i915); + switch_to_kernel_context_sync(&i915->gt); mutex_unlock(&i915->drm.struct_mutex); - /* - * Assert that we successfully flushed all the work and - * reset the GPU back to its idle, low power state. - */ - GEM_BUG_ON(i915->gt.awake); - flush_work(&i915->gem.idle_work); - - cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); + cancel_delayed_work_sync(&i915->gt.hangcheck.work); i915_gem_drain_freed_objects(i915); - intel_uc_suspend(i915); + intel_uc_suspend(&i915->gt.uc); } static struct drm_i915_gem_object *first_mm_object(struct list_head *list) @@ -237,7 +231,6 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) } spin_unlock_irqrestore(&i915->mm.obj_lock, flags); - intel_uc_sanitize(i915); i915_gem_sanitize(i915); } @@ -245,8 +238,6 @@ void i915_gem_resume(struct drm_i915_private *i915) { GEM_TRACE("\n"); - WARN_ON(i915->gt.awake); - mutex_lock(&i915->drm.struct_mutex); intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); @@ -261,10 +252,10 @@ void i915_gem_resume(struct drm_i915_private *i915) * guarantee that the context image is complete. So let's just reset * it and start again. */ - if (intel_gt_resume(i915)) + if (intel_gt_resume(&i915->gt)) goto err_wedged; - intel_uc_resume(i915); + intel_uc_resume(&i915->gt.uc); /* Always reload a context for powersaving. */ if (!i915_gem_load_power_context(i915)) @@ -276,10 +267,10 @@ out_unlock: return; err_wedged: - if (!i915_reset_failed(i915)) { + if (!intel_gt_is_wedged(&i915->gt)) { dev_err(i915->drm.dev, "Failed to re-initialize GPU, declaring it wedged!\n"); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); } goto out_unlock; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 19d9ecdb2894..4c4954e8ce0a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -10,6 +10,7 @@ #include "i915_drv.h" #include "i915_gem_object.h" #include "i915_scatterlist.h" +#include "i915_trace.h" /* * Move pages to appropriate lru and release the pagevec, decrementing the @@ -414,6 +415,11 @@ shmem_pwrite(struct drm_i915_gem_object *obj, return 0; } +static void shmem_release(struct drm_i915_gem_object *obj) +{ + fput(obj->base.filp); +} + const struct drm_i915_gem_object_ops i915_gem_shmem_ops = { .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_IS_SHRINKABLE, @@ -424,6 +430,8 @@ const struct drm_i915_gem_object_ops i915_gem_shmem_ops = { .writeback = shmem_writeback, .pwrite = shmem_pwrite, + + .release = shmem_release, }; static int create_shmem(struct drm_i915_private *i915, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index 3a926a8755c6..edd21d14e64f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -88,10 +88,18 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) return swap_available() || obj->mm.madv == I915_MADV_DONTNEED; } -static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) +static bool unsafe_drop_pages(struct drm_i915_gem_object *obj, + unsigned long shrink) { - if (i915_gem_object_unbind(obj) == 0) + unsigned long flags; + + flags = 0; + if (shrink & I915_SHRINK_ACTIVE) + flags = I915_GEM_OBJECT_UNBIND_ACTIVE; + + if (i915_gem_object_unbind(obj, flags) == 0) __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); + return !i915_gem_object_has_pages(obj); } @@ -169,7 +177,6 @@ i915_gem_shrink(struct drm_i915_private *i915, */ trace_i915_gem_shrink(i915, target, shrink); - i915_retire_requests(i915); /* * Unbinding of objects will require HW access; Let us not wake the @@ -230,8 +237,7 @@ i915_gem_shrink(struct drm_i915_private *i915, continue; if (!(shrink & I915_SHRINK_ACTIVE) && - (i915_gem_object_is_active(obj) || - i915_gem_object_is_framebuffer(obj))) + i915_gem_object_is_framebuffer(obj)) continue; if (!(shrink & I915_SHRINK_BOUND) && @@ -246,7 +252,7 @@ i915_gem_shrink(struct drm_i915_private *i915, spin_unlock_irqrestore(&i915->mm.obj_lock, flags); - if (unsafe_drop_pages(obj)) { + if (unsafe_drop_pages(obj, shrink)) { /* May arrive from get_pages on another bo */ mutex_lock_nested(&obj->mm.lock, I915_MM_SHRINKER); @@ -269,8 +275,6 @@ i915_gem_shrink(struct drm_i915_private *i915, if (shrink & I915_SHRINK_BOUND) intel_runtime_pm_put(&i915->runtime_pm, wakeref); - i915_retire_requests(i915); - shrinker_unlock(i915, unlock); if (nr_scanned) @@ -427,12 +431,6 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr if (!shrinker_lock(i915, 0, &unlock)) return NOTIFY_DONE; - /* Force everything onto the inactive lists */ - if (i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT)) - goto out; - with_intel_runtime_pm(&i915->runtime_pm, wakeref) freed_pages += i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_BOUND | @@ -455,20 +453,13 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr } mutex_unlock(&i915->ggtt.vm.mutex); -out: shrinker_unlock(i915, unlock); *(unsigned long *)ptr += freed_pages; return NOTIFY_DONE; } -/** - * i915_gem_shrinker_register - Register the i915 shrinker - * @i915: i915 device - * - * This function registers and sets up the i915 shrinker and OOM handler. - */ -void i915_gem_shrinker_register(struct drm_i915_private *i915) +void i915_gem_driver_register__shrinker(struct drm_i915_private *i915) { i915->mm.shrinker.scan_objects = i915_gem_shrinker_scan; i915->mm.shrinker.count_objects = i915_gem_shrinker_count; @@ -483,13 +474,7 @@ void i915_gem_shrinker_register(struct drm_i915_private *i915) WARN_ON(register_vmap_purge_notifier(&i915->mm.vmap_notifier)); } -/** - * i915_gem_shrinker_unregister - Unregisters the i915 shrinker - * @i915: i915 device - * - * This function unregisters the i915 shrinker and OOM handler. - */ -void i915_gem_shrinker_unregister(struct drm_i915_private *i915) +void i915_gem_driver_unregister__shrinker(struct drm_i915_private *i915) { WARN_ON(unregister_vmap_purge_notifier(&i915->mm.vmap_notifier)); WARN_ON(unregister_oom_notifier(&i915->mm.oom_notifier)); @@ -533,3 +518,61 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, if (unlock) mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_); } + +#define obj_to_i915(obj__) to_i915((obj__)->base.dev) + +void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj) +{ + /* + * We can only be called while the pages are pinned or when + * the pages are released. If pinned, we should only be called + * from a single caller under controlled conditions; and on release + * only one caller may release us. Neither the two may cross. + */ + if (!list_empty(&obj->mm.link)) { /* pinned by caller */ + struct drm_i915_private *i915 = obj_to_i915(obj); + unsigned long flags; + + spin_lock_irqsave(&i915->mm.obj_lock, flags); + GEM_BUG_ON(list_empty(&obj->mm.link)); + + list_del_init(&obj->mm.link); + i915->mm.shrink_count--; + i915->mm.shrink_memory -= obj->base.size; + + spin_unlock_irqrestore(&i915->mm.obj_lock, flags); + } +} + +static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj, + struct list_head *head) +{ + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); + GEM_BUG_ON(!list_empty(&obj->mm.link)); + + if (i915_gem_object_is_shrinkable(obj)) { + struct drm_i915_private *i915 = obj_to_i915(obj); + unsigned long flags; + + spin_lock_irqsave(&i915->mm.obj_lock, flags); + GEM_BUG_ON(!kref_read(&obj->base.refcount)); + + list_add_tail(&obj->mm.link, head); + i915->mm.shrink_count++; + i915->mm.shrink_memory += obj->base.size; + + spin_unlock_irqrestore(&i915->mm.obj_lock, flags); + } +} + +void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj) +{ + __i915_gem_object_make_shrinkable(obj, + &obj_to_i915(obj)->mm.shrink_list); +} + +void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj) +{ + __i915_gem_object_make_shrinkable(obj, + &obj_to_i915(obj)->mm.purge_list); +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h new file mode 100644 index 000000000000..b397d7785789 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_SHRINKER_H__ +#define __I915_GEM_SHRINKER_H__ + +#include <linux/bits.h> + +struct drm_i915_private; +struct mutex; + +/* i915_gem_shrinker.c */ +unsigned long i915_gem_shrink(struct drm_i915_private *i915, + unsigned long target, + unsigned long *nr_scanned, + unsigned flags); +#define I915_SHRINK_UNBOUND BIT(0) +#define I915_SHRINK_BOUND BIT(1) +#define I915_SHRINK_ACTIVE BIT(2) +#define I915_SHRINK_VMAPS BIT(3) +#define I915_SHRINK_WRITEBACK BIT(4) + +unsigned long i915_gem_shrink_all(struct drm_i915_private *i915); +void i915_gem_driver_register__shrinker(struct drm_i915_private *i915); +void i915_gem_driver_unregister__shrinker(struct drm_i915_private *i915); +void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, + struct mutex *mutex); + +#endif /* __I915_GEM_SHRINKER_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index de1fab2058ec..aa533b4ab5f5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -11,6 +11,7 @@ #include <drm/i915_drm.h> #include "i915_drv.h" +#include "i915_gem_stolen.h" /* * The BIOS typically reserves some of the system's memory for the exclusive @@ -362,12 +363,16 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) mutex_init(&dev_priv->mm.stolen_lock); if (intel_vgpu_active(dev_priv)) { - DRM_INFO("iGVT-g active, disabling use of stolen memory\n"); + dev_notice(dev_priv->drm.dev, + "%s, disabling use of stolen memory\n", + "iGVT-g active"); return 0; } if (intel_vtd_active() && INTEL_GEN(dev_priv) < 8) { - DRM_INFO("DMAR active, disabling use of stolen memory\n"); + dev_notice(dev_priv->drm.dev, + "%s, disabling use of stolen memory\n", + "DMAR active"); return 0; } @@ -529,8 +534,6 @@ i915_gem_object_release_stolen(struct drm_i915_gem_object *obj) GEM_BUG_ON(!stolen); - __i915_gem_object_unpin_pages(obj); - i915_gem_stolen_remove_node(dev_priv, stolen); kfree(stolen); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h new file mode 100644 index 000000000000..2289644d8604 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_STOLEN_H__ +#define __I915_GEM_STOLEN_H__ + +#include <linux/types.h> + +struct drm_i915_private; +struct drm_mm_node; +struct drm_i915_gem_object; + +int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv, + struct drm_mm_node *node, u64 size, + unsigned alignment); +int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv, + struct drm_mm_node *node, u64 size, + unsigned alignment, u64 start, + u64 end); +void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, + struct drm_mm_node *node); +int i915_gem_init_stolen(struct drm_i915_private *dev_priv); +void i915_gem_cleanup_stolen(struct drm_i915_private *dev_priv); +struct drm_i915_gem_object * +i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, + resource_size_t size); +struct drm_i915_gem_object * +i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv, + resource_size_t stolen_offset, + resource_size_t gtt_offset, + resource_size_t size); + +#endif /* __I915_GEM_STOLEN_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c index adb3074d9ce2..1e372420771b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c @@ -41,7 +41,7 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data, long ret; /* ABI: return -EIO if already wedged */ - ret = i915_terminally_wedged(to_i915(dev)); + ret = intel_gt_terminally_wedged(&to_i915(dev)->gt); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 2caa594322bc..11b231c187c5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -12,11 +12,10 @@ #include <drm/i915_drm.h> +#include "i915_drv.h" #include "i915_gem_ioctls.h" #include "i915_gem_object.h" #include "i915_scatterlist.h" -#include "i915_trace.h" -#include "intel_drv.h" struct i915_mm_struct { struct mm_struct *mm; @@ -150,7 +149,8 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, } } - ret = i915_gem_object_unbind(obj); + ret = i915_gem_object_unbind(obj, + I915_GEM_OBJECT_UNBIND_ACTIVE); if (ret == 0) ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); i915_gem_object_put(obj); @@ -662,17 +662,17 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, __i915_gem_object_release_shmem(obj, pages, true); i915_gem_gtt_finish_pages(obj, pages); + /* + * We always mark objects as dirty when they are used by the GPU, + * just in case. However, if we set the vma as being read-only we know + * that the object will never have been written to. + */ + if (i915_gem_object_is_readonly(obj)) + obj->mm.dirty = false; + for_each_sgt_page(page, sgt_iter, pages) { if (obj->mm.dirty) - /* - * As this may not be anonymous memory (e.g. shmem) - * but exist on a real mapping, we have to lock - * the page in order to dirty it -- holding - * the page reference is not sufficient to - * prevent the inode from being truncated. - * Play safe and take the lock. - */ - set_page_dirty_lock(page); + set_page_dirty(page); mark_page_accessed(page); put_page(page); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index 26ec6579b7cd..8af55cd3e690 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -31,11 +31,10 @@ i915_gem_object_wait_fence(struct dma_fence *fence, } static long -i915_gem_object_wait_reservation(struct reservation_object *resv, +i915_gem_object_wait_reservation(struct dma_resv *resv, unsigned int flags, long timeout) { - unsigned int seq = __read_seqcount_begin(&resv->seq); struct dma_fence *excl; bool prune_fences = false; @@ -44,7 +43,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, unsigned int count, i; int ret; - ret = reservation_object_get_fences_rcu(resv, + ret = dma_resv_get_fences_rcu(resv, &excl, &count, &shared); if (ret) return ret; @@ -73,7 +72,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, */ prune_fences = count && timeout >= 0; } else { - excl = reservation_object_get_excl_rcu(resv); + excl = dma_resv_get_excl_rcu(resv); } if (excl && timeout >= 0) @@ -83,15 +82,12 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, /* * Opportunistically prune the fences iff we know they have *all* been - * signaled and that the reservation object has not been changed (i.e. - * no new fences have been added). + * signaled. */ - if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) { - if (reservation_object_trylock(resv)) { - if (!__read_seqcount_retry(&resv->seq, seq)) - reservation_object_add_excl_fence(resv, NULL); - reservation_object_unlock(resv); - } + if (prune_fences && dma_resv_trylock(resv)) { + if (dma_resv_test_signaled_rcu(resv, true)) + dma_resv_add_excl_fence(resv, NULL); + dma_resv_unlock(resv); } return timeout; @@ -144,7 +140,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int count, i; int ret; - ret = reservation_object_get_fences_rcu(obj->base.resv, + ret = dma_resv_get_fences_rcu(obj->base.resv, &excl, &count, &shared); if (ret) return ret; @@ -156,7 +152,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, kfree(shared); } else { - excl = reservation_object_get_excl_rcu(obj->base.resv); + excl = dma_resv_get_excl_rcu(obj->base.resv); } if (excl) { diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c index 099f3397aada..5e6e8c91ab38 100644 --- a/drivers/gpu/drm/i915/gem/i915_gemfs.c +++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c @@ -20,31 +20,18 @@ int i915_gemfs_init(struct drm_i915_private *i915) if (!type) return -ENODEV; - gemfs = kern_mount(type); - if (IS_ERR(gemfs)) - return PTR_ERR(gemfs); - /* - * Enable huge-pages for objects that are at least HPAGE_PMD_SIZE, most - * likely 2M. Note that within_size may overallocate huge-pages, if say - * we allocate an object of size 2M + 4K, we may get 2M + 2M, but under - * memory pressure shmem should split any huge-pages which can be - * shrunk. + * By creating our own shmemfs mountpoint, we can pass in + * mount flags that better match our usecase. + * + * One example, although it is probably better with a per-file + * control, is selecting huge page allocations ("huge=within_size"). + * Currently unused due to bandwidth issues (slow reads) on Broadwell+. */ - if (has_transparent_hugepage()) { - struct super_block *sb = gemfs->mnt_sb; - /* FIXME: Disabled until we get W/A for read BW issue. */ - char options[] = "huge=never"; - int flags = 0; - int err; - - err = sb->s_op->remount_fs(sb, &flags, options); - if (err) { - kern_unmount(gemfs); - return err; - } - } + gemfs = kern_mount(type); + if (IS_ERR(gemfs)) + return PTR_ERR(gemfs); i915->mm.gemfs = gemfs; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index b74729b6f353..8de83c6d81f5 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -10,6 +10,8 @@ #include "gem/i915_gem_pm.h" +#include "gt/intel_gt.h" + #include "igt_gem_utils.h" #include "mock_context.h" @@ -877,126 +879,22 @@ out_object_put: return err; } -static struct i915_vma * -gpu_write_dw(struct i915_vma *vma, u64 offset, u32 val) -{ - struct drm_i915_private *i915 = vma->vm->i915; - const int gen = INTEL_GEN(i915); - unsigned int count = vma->size >> PAGE_SHIFT; - struct drm_i915_gem_object *obj; - struct i915_vma *batch; - unsigned int size; - u32 *cmd; - int n; - int err; - - size = (1 + 4 * count) * sizeof(u32); - size = round_up(size, PAGE_SIZE); - obj = i915_gem_object_create_internal(i915, size); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err; - } - - offset += vma->node.start; - - for (n = 0; n < count; n++) { - if (gen >= 8) { - *cmd++ = MI_STORE_DWORD_IMM_GEN4; - *cmd++ = lower_32_bits(offset); - *cmd++ = upper_32_bits(offset); - *cmd++ = val; - } else if (gen >= 4) { - *cmd++ = MI_STORE_DWORD_IMM_GEN4 | - (gen < 6 ? MI_USE_GGTT : 0); - *cmd++ = 0; - *cmd++ = offset; - *cmd++ = val; - } else { - *cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *cmd++ = offset; - *cmd++ = val; - } - - offset += PAGE_SIZE; - } - - *cmd = MI_BATCH_BUFFER_END; - i915_gem_chipset_flush(i915); - - i915_gem_object_unpin_map(obj); - - batch = i915_vma_instance(obj, vma->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto err; - } - - err = i915_vma_pin(batch, 0, 0, PIN_USER); - if (err) - goto err; - - return batch; - -err: - i915_gem_object_put(obj); - - return ERR_PTR(err); -} - static int gpu_write(struct i915_vma *vma, struct i915_gem_context *ctx, struct intel_engine_cs *engine, - u32 dword, - u32 value) + u32 dw, + u32 val) { - struct i915_request *rq; - struct i915_vma *batch; int err; - GEM_BUG_ON(!intel_engine_can_store_dword(engine)); - - batch = gpu_write_dw(vma, dword * sizeof(u32), value); - if (IS_ERR(batch)) - return PTR_ERR(batch); - - rq = igt_request_alloc(ctx, engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_batch; - } - - i915_vma_lock(batch); - err = i915_vma_move_to_active(batch, rq, 0); - i915_vma_unlock(batch); + i915_gem_object_lock(vma->obj); + err = i915_gem_object_set_to_gtt_domain(vma->obj, true); + i915_gem_object_unlock(vma->obj); if (err) - goto err_request; - - i915_vma_lock(vma); - err = i915_gem_object_set_to_gtt_domain(vma->obj, false); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); - if (err) - goto err_request; - - err = engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - 0); -err_request: - if (err) - i915_request_skip(rq, err); - i915_request_add(rq); -err_batch: - i915_vma_unpin(batch); - i915_vma_close(batch); - i915_vma_put(batch); + return err; - return err; + return igt_gpu_fill_dw(vma, ctx, engine, dw * sizeof(u32), + vma->size >> PAGE_SHIFT, val); } static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) @@ -1037,8 +935,7 @@ static int __igt_write_huge(struct i915_gem_context *ctx, u64 size, u64 offset, u32 dword, u32 val) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; + struct i915_address_space *vm = ctx->vm ?: &engine->gt->ggtt->vm; unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; struct i915_vma *vma; int err; @@ -1421,6 +1318,9 @@ static int igt_ppgtt_pin_update(void *arg) struct drm_i915_gem_object *obj; struct i915_vma *vma; unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned int n; int first, last; int err; @@ -1518,11 +1418,20 @@ static int igt_ppgtt_pin_update(void *arg) * land in the now stale 2M page. */ - err = gpu_write(vma, ctx, dev_priv->engine[RCS0], 0, 0xdeadbeaf); - if (err) - goto out_unpin; + n = 0; + for_each_engine(engine, dev_priv, id) { + if (!intel_engine_can_store_dword(engine)) + continue; - err = cpu_check(obj, 0, 0xdeadbeaf); + err = gpu_write(vma, ctx, engine, n++, 0xdeadbeaf); + if (err) + goto out_unpin; + } + while (n--) { + err = cpu_check(obj, n, 0xdeadbeaf); + if (err) + goto out_unpin; + } out_unpin: i915_vma_unpin(vma); @@ -1598,8 +1507,11 @@ static int igt_shrink_thp(void *arg) struct drm_i915_private *i915 = ctx->i915; struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; struct drm_i915_gem_object *obj; + struct intel_engine_cs *engine; + enum intel_engine_id id; struct i915_vma *vma; unsigned int flags = PIN_USER; + unsigned int n; int err; /* @@ -1635,9 +1547,15 @@ static int igt_shrink_thp(void *arg) if (err) goto out_unpin; - err = gpu_write(vma, ctx, i915->engine[RCS0], 0, 0xdeadbeaf); - if (err) - goto out_unpin; + n = 0; + for_each_engine(engine, i915, id) { + if (!intel_engine_can_store_dword(engine)) + continue; + + err = gpu_write(vma, ctx, engine, n++, 0xdeadbeaf); + if (err) + goto out_unpin; + } i915_vma_unpin(vma); @@ -1662,7 +1580,12 @@ static int igt_shrink_thp(void *arg) if (err) goto out_close; - err = cpu_check(obj, 0, 0xdeadbeaf); + while (n--) { + err = cpu_check(obj, n, 0xdeadbeaf); + if (err) + goto out_unpin; + } + out_unpin: i915_vma_unpin(vma); @@ -1726,7 +1649,7 @@ out_unlock: return err; } -int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) +int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_shrink_thp), @@ -1741,22 +1664,22 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) intel_wakeref_t wakeref; int err; - if (!HAS_PPGTT(dev_priv)) { + if (!HAS_PPGTT(i915)) { pr_info("PPGTT not supported, skipping live-selftests\n"); return 0; } - if (i915_terminally_wedged(dev_priv)) + if (intel_gt_is_wedged(&i915->gt)) return 0; - file = mock_file(dev_priv); + file = mock_file(i915); if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&dev_priv->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); - ctx = live_context(dev_priv, file); + ctx = live_context(i915, file); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out_unlock; @@ -1768,10 +1691,10 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) err = i915_subtests(tests, ctx); out_unlock: - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - mutex_unlock(&dev_priv->drm.struct_mutex); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mutex_unlock(&i915->drm.struct_mutex); - mock_file_free(dev_priv, file); + mock_file_free(i915, file); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index f3a5eb807c1c..d8804a847945 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -5,14 +5,17 @@ #include "i915_selftest.h" +#include "gt/intel_gt.h" + #include "selftests/igt_flush_test.h" #include "selftests/mock_drm.h" +#include "huge_gem_object.h" #include "mock_context.h" static int igt_client_fill(void *arg) { - struct intel_context *ce = arg; - struct drm_i915_private *i915 = ce->gem_context->i915; + struct drm_i915_private *i915 = arg; + struct intel_context *ce = i915->engine[BCS0]->kernel_context; struct drm_i915_gem_object *obj; struct rnd_state prng; IGT_TIMEOUT(end); @@ -22,15 +25,19 @@ static int igt_client_fill(void *arg) prandom_seed_state(&prng, i915_selftest.random_seed); do { - u32 sz = prandom_u32_state(&prng) % SZ_32M; + const u32 max_block_size = S16_MAX * PAGE_SIZE; + u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng)); + u32 phys_sz = sz % (max_block_size + 1); u32 val = prandom_u32_state(&prng); u32 i; sz = round_up(sz, PAGE_SIZE); + phys_sz = round_up(phys_sz, PAGE_SIZE); - pr_debug("%s with sz=%x, val=%x\n", __func__, sz, val); + pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, + phys_sz, sz, val); - obj = i915_gem_object_create_internal(i915, sz); + obj = huge_gem_object(i915, phys_sz, sz); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto err_flush; @@ -52,7 +59,8 @@ static int igt_client_fill(void *arg) * values after we do the set_to_cpu_domain and pick it up as a * test failure. */ - memset32(vaddr, val ^ 0xdeadbeaf, obj->base.size / sizeof(u32)); + memset32(vaddr, val ^ 0xdeadbeaf, + huge_gem_object_phys_size(obj) / sizeof(u32)); if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) obj->cache_dirty = true; @@ -63,24 +71,13 @@ static int igt_client_fill(void *arg) if (err) goto err_unpin; - /* - * XXX: For now do the wait without the object resv lock to - * ensure we don't deadlock. - */ - err = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT); - if (err) - goto err_unpin; - i915_gem_object_lock(obj); err = i915_gem_object_set_to_cpu_domain(obj, false); i915_gem_object_unlock(obj); if (err) goto err_unpin; - for (i = 0; i < obj->base.size / sizeof(u32); ++i) { + for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) { if (vaddr[i] != val) { pr_err("vaddr[%u]=%x, expected=%x\n", i, vaddr[i], val); @@ -100,11 +97,6 @@ err_unpin: err_put: i915_gem_object_put(obj); err_flush: - mutex_lock(&i915->drm.struct_mutex); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); - if (err == -ENOMEM) err = 0; @@ -117,11 +109,11 @@ int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_client_fill), }; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) return 0; if (!HAS_ENGINE(i915, BCS0)) return 0; - return i915_subtests(tests, i915->engine[BCS0]->kernel_context); + return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 8f22d3f18422..0ff7a89aadca 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -6,6 +6,8 @@ #include <linux/prime_numbers.h> +#include "gt/intel_gt.h" + #include "i915_selftest.h" #include "selftests/i915_random.h" @@ -226,7 +228,9 @@ static int gpu_set(struct drm_i915_gem_object *obj, intel_ring_advance(rq, cs); i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + err = i915_request_await_object(rq, vma->obj, true); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unlock(vma); i915_vma_unpin(vma); @@ -242,12 +246,15 @@ static bool always_valid(struct drm_i915_private *i915) static bool needs_fence_registers(struct drm_i915_private *i915) { - return !i915_terminally_wedged(i915); + return !intel_gt_is_wedged(&i915->gt); } static bool needs_mi_store_dword(struct drm_i915_private *i915) { - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) + return false; + + if (!HAS_ENGINE(i915, RCS0)) return false; return intel_engine_can_store_dword(i915->engine[RCS0]); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index eaa2b16574c7..3e6f4a65d356 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -7,6 +7,7 @@ #include <linux/prime_numbers.h> #include "gem/i915_gem_pm.h" +#include "gt/intel_gt.h" #include "gt/intel_reset.h" #include "i915_selftest.h" @@ -31,7 +32,6 @@ static int live_nop_switch(void *arg) struct intel_engine_cs *engine; struct i915_gem_context **ctx; enum intel_engine_id id; - intel_wakeref_t wakeref; struct igt_live_test t; struct drm_file *file; unsigned long n; @@ -53,7 +53,6 @@ static int live_nop_switch(void *arg) return PTR_ERR(file); mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); if (!ctx) { @@ -85,7 +84,7 @@ static int live_nop_switch(void *arg) } if (i915_request_wait(rq, 0, HZ / 5) < 0) { pr_err("Failed to populated %d contexts\n", nctx); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto out_unlock; } @@ -129,7 +128,7 @@ static int live_nop_switch(void *arg) if (i915_request_wait(rq, 0, HZ / 5) < 0) { pr_err("Switching between %ld contexts timed out\n", prime); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); break; } @@ -152,76 +151,11 @@ static int live_nop_switch(void *arg) } out_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); mutex_unlock(&i915->drm.struct_mutex); mock_file_free(i915, file); return err; } -static struct i915_vma * -gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value) -{ - struct drm_i915_gem_object *obj; - const int gen = INTEL_GEN(vma->vm->i915); - unsigned long n, size; - u32 *cmd; - int err; - - size = (4 * count + 1) * sizeof(u32); - size = round_up(size, PAGE_SIZE); - obj = i915_gem_object_create_internal(vma->vm->i915, size); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err; - } - - GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size); - offset += vma->node.start; - - for (n = 0; n < count; n++) { - if (gen >= 8) { - *cmd++ = MI_STORE_DWORD_IMM_GEN4; - *cmd++ = lower_32_bits(offset); - *cmd++ = upper_32_bits(offset); - *cmd++ = value; - } else if (gen >= 4) { - *cmd++ = MI_STORE_DWORD_IMM_GEN4 | - (gen < 6 ? MI_USE_GGTT : 0); - *cmd++ = 0; - *cmd++ = offset; - *cmd++ = value; - } else { - *cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *cmd++ = offset; - *cmd++ = value; - } - offset += PAGE_SIZE; - } - *cmd = MI_BATCH_BUFFER_END; - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); - - vma = i915_vma_instance(obj, vma->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - goto err; - - return vma; - -err: - i915_gem_object_put(obj); - return ERR_PTR(err); -} - static unsigned long real_page_count(struct drm_i915_gem_object *obj) { return huge_gem_object_phys_size(obj) >> PAGE_SHIFT; @@ -237,12 +171,8 @@ static int gpu_fill(struct drm_i915_gem_object *obj, struct intel_engine_cs *engine, unsigned int dw) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; - struct i915_request *rq; + struct i915_address_space *vm = ctx->vm ?: &engine->gt->ggtt->vm; struct i915_vma *vma; - struct i915_vma *batch; - unsigned int flags; int err; GEM_BUG_ON(obj->base.size > vm->total); @@ -253,7 +183,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj, return PTR_ERR(vma); i915_gem_object_lock(obj); - err = i915_gem_object_set_to_gtt_domain(obj, false); + err = i915_gem_object_set_to_gtt_domain(obj, true); i915_gem_object_unlock(obj); if (err) return err; @@ -262,70 +192,23 @@ static int gpu_fill(struct drm_i915_gem_object *obj, if (err) return err; - /* Within the GTT the huge objects maps every page onto + /* + * Within the GTT the huge objects maps every page onto * its 1024 real pages (using phys_pfn = dma_pfn % 1024). * We set the nth dword within the page using the nth * mapping via the GTT - this should exercise the GTT mapping * whilst checking that each context provides a unique view * into the object. */ - batch = gpu_fill_dw(vma, - (dw * real_page_count(obj)) << PAGE_SHIFT | - (dw * sizeof(u32)), - real_page_count(obj), - dw); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto err_vma; - } - - rq = igt_request_alloc(ctx, engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_batch; - } - - flags = 0; - if (INTEL_GEN(vm->i915) <= 5) - flags |= I915_DISPATCH_SECURE; - - err = engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - flags); - if (err) - goto err_request; - - i915_vma_lock(batch); - err = i915_vma_move_to_active(batch, rq, 0); - i915_vma_unlock(batch); - if (err) - goto skip_request; - - i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); - if (err) - goto skip_request; - - i915_request_add(rq); - - i915_vma_unpin(batch); - i915_vma_close(batch); - i915_vma_put(batch); - + err = igt_gpu_fill_dw(vma, + ctx, + engine, + (dw * real_page_count(obj)) << PAGE_SHIFT | + (dw * sizeof(u32)), + real_page_count(obj), + dw); i915_vma_unpin(vma); - return 0; - -skip_request: - i915_request_skip(rq, err); -err_request: - i915_request_add(rq); -err_batch: - i915_vma_unpin(batch); - i915_vma_put(batch); -err_vma: - i915_vma_unpin(vma); return err; } @@ -431,6 +314,9 @@ create_test_object(struct i915_gem_context *ctx, u64 size; int err; + /* Keep in GEM's good graces */ + i915_retire_requests(ctx->i915); + size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); size = round_down(size, DW_PER_PAGE * PAGE_SIZE); @@ -507,7 +393,6 @@ static int igt_ctx_exec(void *arg) dw = 0; while (!time_after(jiffies, end_time)) { struct i915_gem_context *ctx; - intel_wakeref_t wakeref; ctx = live_context(i915, file); if (IS_ERR(ctx)) { @@ -523,8 +408,7 @@ static int igt_ctx_exec(void *arg) } } - with_intel_runtime_pm(&i915->runtime_pm, wakeref) - err = gpu_fill(obj, ctx, engine, dw); + err = gpu_fill(obj, ctx, engine, dw); if (err) { pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), @@ -565,6 +449,8 @@ out_unlock: mock_file_free(i915, file); if (err) return err; + + i915_gem_drain_freed_objects(i915); } return 0; @@ -623,7 +509,6 @@ static int igt_shared_ctx_exec(void *arg) ncontexts = 0; while (!time_after(jiffies, end_time)) { struct i915_gem_context *ctx; - intel_wakeref_t wakeref; ctx = kernel_context(i915); if (IS_ERR(ctx)) { @@ -642,9 +527,7 @@ static int igt_shared_ctx_exec(void *arg) } } - err = 0; - with_intel_runtime_pm(&i915->runtime_pm, wakeref) - err = gpu_fill(obj, ctx, engine, dw); + err = gpu_fill(obj, ctx, engine, dw); if (err) { pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), @@ -678,6 +561,10 @@ static int igt_shared_ctx_exec(void *arg) dw += rem; } + + mutex_unlock(&i915->drm.struct_mutex); + i915_gem_drain_freed_objects(i915); + mutex_lock(&i915->drm.struct_mutex); } out_test: if (igt_live_test_end(&t)) @@ -746,7 +633,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); - vma = i915_vma_instance(obj, ce->gem_context->vm, NULL); + vma = i915_vma_instance(obj, ce->vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -779,13 +666,17 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, goto err_request; i915_vma_lock(batch); - err = i915_vma_move_to_active(batch, rq, 0); + err = i915_request_await_object(rq, batch->obj, false); + if (err == 0) + err = i915_vma_move_to_active(batch, rq, 0); i915_vma_unlock(batch); if (err) goto skip_request; i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + err = i915_request_await_object(rq, vma->obj, true); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unlock(vma); if (err) goto skip_request; @@ -820,8 +711,7 @@ err_vma: #define TEST_RESET BIT(2) static int -__sseu_prepare(struct drm_i915_private *i915, - const char *name, +__sseu_prepare(const char *name, unsigned int flags, struct intel_context *ce, struct igt_spinner **spin) @@ -837,14 +727,11 @@ __sseu_prepare(struct drm_i915_private *i915, if (!*spin) return -ENOMEM; - ret = igt_spinner_init(*spin, i915); + ret = igt_spinner_init(*spin, ce->engine->gt); if (ret) goto err_free; - rq = igt_spinner_create_request(*spin, - ce->gem_context, - ce->engine, - MI_NOOP); + rq = igt_spinner_create_request(*spin, ce, MI_NOOP); if (IS_ERR(rq)) { ret = PTR_ERR(rq); goto err_fini; @@ -870,8 +757,7 @@ err_free: } static int -__read_slice_count(struct drm_i915_private *i915, - struct intel_context *ce, +__read_slice_count(struct intel_context *ce, struct drm_i915_gem_object *obj, struct igt_spinner *spin, u32 *rpcs) @@ -900,7 +786,7 @@ __read_slice_count(struct drm_i915_private *i915, return ret; } - if (INTEL_GEN(i915) >= 11) { + if (INTEL_GEN(ce->engine->i915) >= 11) { s_mask = GEN11_RPCS_S_CNT_MASK; s_shift = GEN11_RPCS_S_CNT_SHIFT; } else { @@ -943,8 +829,7 @@ __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected, } static int -__sseu_finish(struct drm_i915_private *i915, - const char *name, +__sseu_finish(const char *name, unsigned int flags, struct intel_context *ce, struct drm_i915_gem_object *obj, @@ -956,19 +841,18 @@ __sseu_finish(struct drm_i915_private *i915, int ret = 0; if (flags & TEST_RESET) { - ret = i915_reset_engine(ce->engine, "sseu"); + ret = intel_engine_reset(ce->engine, "sseu"); if (ret) goto out; } - ret = __read_slice_count(i915, ce, obj, + ret = __read_slice_count(ce, obj, flags & TEST_RESET ? NULL : spin, &rpcs); ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!"); if (ret) goto out; - ret = __read_slice_count(i915, ce->engine->kernel_context, obj, - NULL, &rpcs); + ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs); ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!"); out: @@ -976,11 +860,12 @@ out: igt_spinner_end(spin); if ((flags & TEST_IDLE) && ret == 0) { - ret = i915_gem_wait_for_idle(i915, 0, MAX_SCHEDULE_TIMEOUT); + ret = i915_gem_wait_for_idle(ce->engine->i915, + 0, MAX_SCHEDULE_TIMEOUT); if (ret) return ret; - ret = __read_slice_count(i915, ce, obj, NULL, &rpcs); + ret = __read_slice_count(ce, obj, NULL, &rpcs); ret = __check_rpcs(name, rpcs, ret, expected, "Context", " after idle!"); } @@ -989,8 +874,7 @@ out: } static int -__sseu_test(struct drm_i915_private *i915, - const char *name, +__sseu_test(const char *name, unsigned int flags, struct intel_context *ce, struct drm_i915_gem_object *obj, @@ -999,7 +883,7 @@ __sseu_test(struct drm_i915_private *i915, struct igt_spinner *spin = NULL; int ret; - ret = __sseu_prepare(i915, name, flags, ce, &spin); + ret = __sseu_prepare(name, flags, ce, &spin); if (ret) return ret; @@ -1007,7 +891,7 @@ __sseu_test(struct drm_i915_private *i915, if (ret) goto out_spin; - ret = __sseu_finish(i915, name, flags, ce, obj, + ret = __sseu_finish(name, flags, ce, obj, hweight32(sseu.slice_mask), spin); out_spin: @@ -1025,35 +909,33 @@ __igt_ctx_sseu(struct drm_i915_private *i915, unsigned int flags) { struct intel_engine_cs *engine = i915->engine[RCS0]; - struct intel_sseu default_sseu = engine->sseu; struct drm_i915_gem_object *obj; struct i915_gem_context *ctx; struct intel_context *ce; struct intel_sseu pg_sseu; - intel_wakeref_t wakeref; struct drm_file *file; int ret; - if (INTEL_GEN(i915) < 9) + if (INTEL_GEN(i915) < 9 || !engine) return 0; if (!RUNTIME_INFO(i915)->sseu.has_slice_pg) return 0; - if (hweight32(default_sseu.slice_mask) < 2) + if (hweight32(engine->sseu.slice_mask) < 2) return 0; /* * Gen11 VME friendly power-gated configuration with half enabled * sub-slices. */ - pg_sseu = default_sseu; + pg_sseu = engine->sseu; pg_sseu.slice_mask = 1; pg_sseu.subslice_mask = - ~(~0 << (hweight32(default_sseu.subslice_mask) / 2)); + ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2)); pr_info("SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", - name, flags, hweight32(default_sseu.slice_mask), + name, flags, hweight32(engine->sseu.slice_mask), hweight32(pg_sseu.slice_mask)); file = mock_file(i915); @@ -1061,7 +943,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915, return PTR_ERR(file); if (flags & TEST_RESET) - igt_global_reset_lock(i915); + igt_global_reset_lock(&i915->gt); mutex_lock(&i915->drm.struct_mutex); @@ -1078,12 +960,10 @@ __igt_ctx_sseu(struct drm_i915_private *i915, goto out_unlock; } - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - ce = i915_gem_context_get_engine(ctx, RCS0); if (IS_ERR(ce)) { ret = PTR_ERR(ce); - goto out_rpm; + goto out_put; } ret = intel_context_pin(ce); @@ -1091,22 +971,22 @@ __igt_ctx_sseu(struct drm_i915_private *i915, goto out_context; /* First set the default mask. */ - ret = __sseu_test(i915, name, flags, ce, obj, default_sseu); + ret = __sseu_test(name, flags, ce, obj, engine->sseu); if (ret) goto out_fail; /* Then set a power-gated configuration. */ - ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu); + ret = __sseu_test(name, flags, ce, obj, pg_sseu); if (ret) goto out_fail; /* Back to defaults. */ - ret = __sseu_test(i915, name, flags, ce, obj, default_sseu); + ret = __sseu_test(name, flags, ce, obj, engine->sseu); if (ret) goto out_fail; /* One last power-gated configuration for the road. */ - ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu); + ret = __sseu_test(name, flags, ce, obj, pg_sseu); if (ret) goto out_fail; @@ -1117,15 +997,14 @@ out_fail: intel_context_unpin(ce); out_context: intel_context_put(ce); -out_rpm: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); +out_put: i915_gem_object_put(obj); out_unlock: mutex_unlock(&i915->drm.struct_mutex); if (flags & TEST_RESET) - igt_global_reset_unlock(i915); + igt_global_reset_unlock(&i915->gt); mock_file_free(i915, file); @@ -1194,7 +1073,7 @@ static int igt_ctx_readonly(void *arg) goto out_unlock; } - vm = ctx->vm ?: &i915->mm.aliasing_ppgtt->vm; + vm = ctx->vm ?: &i915->ggtt.alias->vm; if (!vm || !vm->has_read_only) { err = 0; goto out_unlock; @@ -1207,8 +1086,6 @@ static int igt_ctx_readonly(void *arg) unsigned int id; for_each_engine(engine, i915, id) { - intel_wakeref_t wakeref; - if (!intel_engine_can_store_dword(engine)) continue; @@ -1223,9 +1100,7 @@ static int igt_ctx_readonly(void *arg) i915_gem_object_set_readonly(obj); } - err = 0; - with_intel_runtime_pm(&i915->runtime_pm, wakeref) - err = gpu_fill(obj, ctx, engine, dw); + err = gpu_fill(obj, ctx, engine, dw); if (err) { pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), @@ -1347,7 +1222,9 @@ static int write_to_scratch(struct i915_gem_context *ctx, goto err_request; i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, 0); + err = i915_request_await_object(rq, vma->obj, false); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, 0); i915_vma_unlock(vma); if (err) goto skip_request; @@ -1444,7 +1321,9 @@ static int read_from_scratch(struct i915_gem_context *ctx, goto err_request; i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + err = i915_request_await_object(rq, vma->obj, true); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unlock(vma); if (err) goto skip_request; @@ -1488,7 +1367,6 @@ static int igt_vm_isolation(void *arg) struct drm_i915_private *i915 = arg; struct i915_gem_context *ctx_a, *ctx_b; struct intel_engine_cs *engine; - intel_wakeref_t wakeref; struct igt_live_test t; struct drm_file *file; I915_RND_STATE(prng); @@ -1535,8 +1413,6 @@ static int igt_vm_isolation(void *arg) GEM_BUG_ON(ctx_b->vm->total != vm_total); vm_total -= I915_GTT_PAGE_SIZE; - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - count = 0; for_each_engine(engine, i915, id) { IGT_TIMEOUT(end_time); @@ -1551,7 +1427,7 @@ static int igt_vm_isolation(void *arg) div64_u64_rem(i915_prandom_u64_state(&prng), vm_total, &offset); - offset &= -sizeof(u32); + offset = round_down(offset, alignof_dword); offset += I915_GTT_PAGE_SIZE; err = write_to_scratch(ctx_a, engine, @@ -1560,7 +1436,7 @@ static int igt_vm_isolation(void *arg) err = read_from_scratch(ctx_b, engine, offset, &value); if (err) - goto out_rpm; + goto out_unlock; if (value) { pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", @@ -1569,7 +1445,7 @@ static int igt_vm_isolation(void *arg) lower_32_bits(offset), this); err = -EINVAL; - goto out_rpm; + goto out_unlock; } this++; @@ -1579,8 +1455,6 @@ static int igt_vm_isolation(void *arg) pr_info("Checked %lu scratch offsets across %d engines\n", count, RUNTIME_INFO(i915)->num_engines); -out_rpm: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); out_unlock: if (igt_live_test_end(&t)) err = -EIO; @@ -1736,7 +1610,7 @@ int i915_gem_context_mock_selftests(void) return err; } -int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) +int i915_gem_context_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_nop_switch), @@ -1747,8 +1621,8 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) SUBTEST(igt_vm_isolation), }; - if (i915_terminally_wedged(dev_priv)) + if (intel_gt_is_wedged(&i915->gt)) return 0; - return i915_subtests(tests, dev_priv); + return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index e3a64edef918..d85d1ce273ca 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -20,7 +20,7 @@ static int igt_dmabuf_export(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + dmabuf = i915_gem_prime_export(&obj->base, 0); i915_gem_object_put(obj); if (IS_ERR(dmabuf)) { pr_err("i915_gem_prime_export failed with err=%d\n", @@ -44,7 +44,7 @@ static int igt_dmabuf_import_self(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + dmabuf = i915_gem_prime_export(&obj->base, 0); if (IS_ERR(dmabuf)) { pr_err("i915_gem_prime_export failed with err=%d\n", (int)PTR_ERR(dmabuf)); @@ -219,7 +219,7 @@ static int igt_dmabuf_export_vmap(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + dmabuf = i915_gem_prime_export(&obj->base, 0); if (IS_ERR(dmabuf)) { pr_err("i915_gem_prime_export failed with err=%d\n", (int)PTR_ERR(dmabuf)); @@ -266,7 +266,7 @@ static int igt_dmabuf_export_kmap(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + dmabuf = i915_gem_prime_export(&obj->base, 0); i915_gem_object_put(obj); if (IS_ERR(dmabuf)) { err = PTR_ERR(dmabuf); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 5c81f4b4813a..1d27babff0ce 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -6,6 +6,7 @@ #include <linux/prime_numbers.h> +#include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" #include "huge_gem_object.h" #include "i915_selftest.h" @@ -143,7 +144,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, if (offset >= obj->base.size) continue; - i915_gem_flush_ggtt_writes(to_i915(obj->base.dev)); + intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt); p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); cpu = kmap(p) + offset_in_page(offset); @@ -327,7 +328,8 @@ out: static int make_obj_busy(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_request *rq; + struct intel_engine_cs *engine; + enum intel_engine_id id; struct i915_vma *vma; int err; @@ -339,17 +341,24 @@ static int make_obj_busy(struct drm_i915_gem_object *obj) if (err) return err; - rq = i915_request_create(i915->engine[RCS0]->kernel_context); - if (IS_ERR(rq)) { - i915_vma_unpin(vma); - return PTR_ERR(rq); - } + for_each_engine(engine, i915, id) { + struct i915_request *rq; + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) { + i915_vma_unpin(vma); + return PTR_ERR(rq); + } - i915_vma_lock(vma); - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, true); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, + EXEC_OBJECT_WRITE); + i915_vma_unlock(vma); - i915_request_add(rq); + i915_request_add(rq); + } i915_vma_unpin(vma); i915_gem_object_put(obj); /* leave it only alive via its active ref */ @@ -376,9 +385,9 @@ static bool assert_mmap_offset(struct drm_i915_private *i915, static void disable_retire_worker(struct drm_i915_private *i915) { - i915_gem_shrinker_unregister(i915); + i915_gem_driver_unregister__shrinker(i915); - intel_gt_pm_get(i915); + intel_gt_pm_get(&i915->gt); cancel_delayed_work_sync(&i915->gem.retire_work); flush_work(&i915->gem.idle_work); @@ -386,13 +395,25 @@ static void disable_retire_worker(struct drm_i915_private *i915) static void restore_retire_worker(struct drm_i915_private *i915) { - intel_gt_pm_put(i915); + intel_gt_pm_put(&i915->gt); mutex_lock(&i915->drm.struct_mutex); igt_flush_test(i915, I915_WAIT_LOCKED); mutex_unlock(&i915->drm.struct_mutex); - i915_gem_shrinker_register(i915); + i915_gem_driver_register__shrinker(i915); +} + +static void mmap_offset_lock(struct drm_i915_private *i915) + __acquires(&i915->drm.vma_offset_manager->vm_lock) +{ + write_lock(&i915->drm.vma_offset_manager->vm_lock); +} + +static void mmap_offset_unlock(struct drm_i915_private *i915) + __releases(&i915->drm.vma_offset_manager->vm_lock) +{ + write_unlock(&i915->drm.vma_offset_manager->vm_lock); } static int igt_mmap_offset_exhaustion(void *arg) @@ -413,7 +434,9 @@ static int igt_mmap_offset_exhaustion(void *arg) drm_mm_for_each_hole(hole, mm, hole_start, hole_end) { resv.start = hole_start; resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */ + mmap_offset_lock(i915); err = drm_mm_reserve_node(mm, &resv); + mmap_offset_unlock(i915); if (err) { pr_err("Failed to trim VMA manager, err=%d\n", err); goto out_park; @@ -458,7 +481,7 @@ static int igt_mmap_offset_exhaustion(void *arg) /* Now fill with busy dead objects that we expect to reap */ for (loop = 0; loop < 3; loop++) { - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) break; obj = i915_gem_object_create_internal(i915, PAGE_SIZE); @@ -474,19 +497,12 @@ static int igt_mmap_offset_exhaustion(void *arg) pr_err("[loop %d] Failed to busy the object\n", loop); goto err_obj; } - - /* NB we rely on the _active_ reference to access obj now */ - GEM_BUG_ON(!i915_gem_object_is_active(obj)); - err = create_mmap_offset(obj); - if (err) { - pr_err("[loop %d] create_mmap_offset failed with err=%d\n", - loop, err); - goto out; - } } out: + mmap_offset_lock(i915); drm_mm_remove_node(&resv); + mmap_offset_unlock(i915); out_park: restore_retire_worker(i915); return err; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c index e23d8c9e9298..c21d747e7d05 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c @@ -3,16 +3,19 @@ * Copyright © 2019 Intel Corporation */ +#include "gt/intel_gt.h" + #include "i915_selftest.h" #include "selftests/igt_flush_test.h" #include "selftests/mock_drm.h" +#include "huge_gem_object.h" #include "mock_context.h" static int igt_fill_blt(void *arg) { - struct intel_context *ce = arg; - struct drm_i915_private *i915 = ce->gem_context->i915; + struct drm_i915_private *i915 = arg; + struct intel_context *ce = i915->engine[BCS0]->kernel_context; struct drm_i915_gem_object *obj; struct rnd_state prng; IGT_TIMEOUT(end); @@ -21,16 +24,26 @@ static int igt_fill_blt(void *arg) prandom_seed_state(&prng, i915_selftest.random_seed); + /* + * XXX: needs some threads to scale all these tests, also maybe throw + * in submission from higher priority context to see if we are + * preempted for very large objects... + */ + do { - u32 sz = prandom_u32_state(&prng) % SZ_32M; + const u32 max_block_size = S16_MAX * PAGE_SIZE; + u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng)); + u32 phys_sz = sz % (max_block_size + 1); u32 val = prandom_u32_state(&prng); u32 i; sz = round_up(sz, PAGE_SIZE); + phys_sz = round_up(phys_sz, PAGE_SIZE); - pr_debug("%s with sz=%x, val=%x\n", __func__, sz, val); + pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, + phys_sz, sz, val); - obj = i915_gem_object_create_internal(i915, sz); + obj = huge_gem_object(i915, phys_sz, sz); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto err_flush; @@ -46,7 +59,8 @@ static int igt_fill_blt(void *arg) * Make sure the potentially async clflush does its job, if * required. */ - memset32(vaddr, val ^ 0xdeadbeaf, obj->base.size / sizeof(u32)); + memset32(vaddr, val ^ 0xdeadbeaf, + huge_gem_object_phys_size(obj) / sizeof(u32)); if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) obj->cache_dirty = true; @@ -63,7 +77,7 @@ static int igt_fill_blt(void *arg) if (err) goto err_unpin; - for (i = 0; i < obj->base.size / sizeof(u32); ++i) { + for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) { if (vaddr[i] != val) { pr_err("vaddr[%u]=%x, expected=%x\n", i, vaddr[i], val); @@ -83,11 +97,111 @@ err_unpin: err_put: i915_gem_object_put(obj); err_flush: - mutex_lock(&i915->drm.struct_mutex); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); + if (err == -ENOMEM) + err = 0; + + return err; +} + +static int igt_copy_blt(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_context *ce = i915->engine[BCS0]->kernel_context; + struct drm_i915_gem_object *src, *dst; + struct rnd_state prng; + IGT_TIMEOUT(end); + u32 *vaddr; + int err = 0; + + prandom_seed_state(&prng, i915_selftest.random_seed); + + do { + const u32 max_block_size = S16_MAX * PAGE_SIZE; + u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng)); + u32 phys_sz = sz % (max_block_size + 1); + u32 val = prandom_u32_state(&prng); + u32 i; + + sz = round_up(sz, PAGE_SIZE); + phys_sz = round_up(phys_sz, PAGE_SIZE); + + pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, + phys_sz, sz, val); + + src = huge_gem_object(i915, phys_sz, sz); + if (IS_ERR(src)) { + err = PTR_ERR(src); + goto err_flush; + } + + vaddr = i915_gem_object_pin_map(src, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_put_src; + } + + memset32(vaddr, val, + huge_gem_object_phys_size(src) / sizeof(u32)); + + i915_gem_object_unpin_map(src); + + if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) + src->cache_dirty = true; + dst = huge_gem_object(i915, phys_sz, sz); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + goto err_put_src; + } + + vaddr = i915_gem_object_pin_map(dst, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_put_dst; + } + + memset32(vaddr, val ^ 0xdeadbeaf, + huge_gem_object_phys_size(dst) / sizeof(u32)); + + if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) + dst->cache_dirty = true; + + mutex_lock(&i915->drm.struct_mutex); + err = i915_gem_object_copy_blt(src, dst, ce); + mutex_unlock(&i915->drm.struct_mutex); + if (err) + goto err_unpin; + + i915_gem_object_lock(dst); + err = i915_gem_object_set_to_cpu_domain(dst, false); + i915_gem_object_unlock(dst); + if (err) + goto err_unpin; + + for (i = 0; i < huge_gem_object_phys_size(dst) / sizeof(u32); ++i) { + if (vaddr[i] != val) { + pr_err("vaddr[%u]=%x, expected=%x\n", i, + vaddr[i], val); + err = -EINVAL; + goto err_unpin; + } + } + + i915_gem_object_unpin_map(dst); + + i915_gem_object_put(src); + i915_gem_object_put(dst); + } while (!time_after(jiffies, end)); + + goto err_flush; + +err_unpin: + i915_gem_object_unpin_map(dst); +err_put_dst: + i915_gem_object_put(dst); +err_put_src: + i915_gem_object_put(src); +err_flush: if (err == -ENOMEM) err = 0; @@ -98,13 +212,14 @@ int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_fill_blt), + SUBTEST(igt_copy_blt), }; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) return 0; if (!HAS_ENGINE(i915, BCS0)) return 0; - return i915_subtests(tests, i915->engine[BCS0]->kernel_context); + return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c index b232e6d2cd92..57ece53c1075 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c @@ -9,6 +9,8 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_pm.h" #include "gt/intel_context.h" +#include "i915_vma.h" +#include "i915_drv.h" #include "i915_request.h" @@ -23,7 +25,7 @@ igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine) * GGTT space, so do this first before we reserve a seqno for * ourselves. */ - ce = i915_gem_context_get_engine(ctx, engine->id); + ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); if (IS_ERR(ce)) return ERR_CAST(ce); @@ -32,3 +34,140 @@ igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine) return rq; } + +struct i915_vma * +igt_emit_store_dw(struct i915_vma *vma, + u64 offset, + unsigned long count, + u32 val) +{ + struct drm_i915_gem_object *obj; + const int gen = INTEL_GEN(vma->vm->i915); + unsigned long n, size; + u32 *cmd; + int err; + + size = (4 * count + 1) * sizeof(u32); + size = round_up(size, PAGE_SIZE); + obj = i915_gem_object_create_internal(vma->vm->i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size); + offset += vma->node.start; + + for (n = 0; n < count; n++) { + if (gen >= 8) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4; + *cmd++ = lower_32_bits(offset); + *cmd++ = upper_32_bits(offset); + *cmd++ = val; + } else if (gen >= 4) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4 | + (gen < 6 ? MI_USE_GGTT : 0); + *cmd++ = 0; + *cmd++ = offset; + *cmd++ = val; + } else { + *cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + *cmd++ = offset; + *cmd++ = val; + } + offset += PAGE_SIZE; + } + *cmd = MI_BATCH_BUFFER_END; + i915_gem_object_unpin_map(obj); + + vma = i915_vma_instance(obj, vma->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +int igt_gpu_fill_dw(struct i915_vma *vma, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + u64 offset, + unsigned long count, + u32 val) +{ + struct i915_address_space *vm = ctx->vm ?: &engine->gt->ggtt->vm; + struct i915_request *rq; + struct i915_vma *batch; + unsigned int flags; + int err; + + GEM_BUG_ON(vma->size > vm->total); + GEM_BUG_ON(!intel_engine_can_store_dword(engine)); + GEM_BUG_ON(!i915_vma_is_pinned(vma)); + + batch = igt_emit_store_dw(vma, offset, count, val); + if (IS_ERR(batch)) + return PTR_ERR(batch); + + rq = igt_request_alloc(ctx, engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_batch; + } + + flags = 0; + if (INTEL_GEN(vm->i915) <= 5) + flags |= I915_DISPATCH_SECURE; + + err = engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + flags); + if (err) + goto err_request; + + i915_vma_lock(batch); + err = i915_request_await_object(rq, batch->obj, false); + if (err == 0) + err = i915_vma_move_to_active(batch, rq, 0); + i915_vma_unlock(batch); + if (err) + goto skip_request; + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, true); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(vma); + if (err) + goto skip_request; + + i915_request_add(rq); + + i915_vma_unpin(batch); + i915_vma_close(batch); + i915_vma_put(batch); + + return 0; + +skip_request: + i915_request_skip(rq, err); +err_request: + i915_request_add(rq); +err_batch: + i915_vma_unpin(batch); + i915_vma_put(batch); + return err; +} diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h index 0f17251cf75d..361a7ef866b0 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h @@ -7,11 +7,27 @@ #ifndef __IGT_GEM_UTILS_H__ #define __IGT_GEM_UTILS_H__ +#include <linux/types.h> + struct i915_request; struct i915_gem_context; struct intel_engine_cs; +struct i915_vma; struct i915_request * igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine); +struct i915_vma * +igt_emit_store_dw(struct i915_vma *vma, + u64 offset, + unsigned long count, + u32 val); + +int igt_gpu_fill_dw(struct i915_vma *vma, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + u64 offset, + unsigned long count, + u32 val); + #endif /* __IGT_GEM_UTILS_H__ */ |