diff options
author | Dave Airlie <airlied@redhat.com> | 2019-10-08 12:54:38 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2019-10-08 12:54:38 +1000 |
commit | 97ea56540ffc59dac275ccb64b8a5e457348a250 (patch) | |
tree | 84f09dc2857ecc1f0946f7b4fbea2cc3efb2ff40 /drivers/gpu/drm/i915/gem | |
parent | Linux 5.4-rc2 (diff) | |
parent | drm/i915: Update DRIVER_DATE to 20191007 (diff) | |
download | linux-dev-97ea56540ffc59dac275ccb64b8a5e457348a250.tar.xz linux-dev-97ea56540ffc59dac275ccb64b8a5e457348a250.zip |
Merge tag 'drm-intel-next-2019-10-07' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
UAPI Changes:
- Never allow userptr into the mappable GGTT (Chris)
No existing users. Avoid anyone from even trying to
spare a deadlock scenario.
Cross-subsystem Changes:
Core Changes:
Driver Changes:
- Eliminate struct_mutex use as BKL! (Chris)
Only used for execbuf serialisation.
- Initialize DDI TC and TBT ports (D-I) on Tigerlake (Lucas)
- Fix DKL link training for 2.7GHz and 1.62GHz (Jose)
- Add Tigerlake DKL PHY programming sequences (Clinton)
- Add Tigerlake Thunderbolt PLL divider values (Imre)
- drm/i915: Use helpers for drm_mm_node booleans (Chris)
- Restrict L3 remapping sysfs interface to dwords (Chris)
- Fix audio power up sequence for gen10+ display (Kai)
- Skip redundant execlist resubmission (Chris)
- Only unwedge if we can reset GPU first (Chris)
- Initialise breadcrumb lists on the virtual engine (Chris)
- Don't rely on kernel context existing during early errors (Matt A)
- Update Icelake+ MG_DP_MODE programming table (Clinton)
- Update DMC firmware for Icelake (Anusha)
- Downgrade DP MST error after unplugging TypeC cable (Srinivasan)
- Limit MST modes based on plane size too (Ville)
- Polish intel_tv_mode_valid() (Ville)
- Fix g4x sprite scaling stride check with GTT remapping (Ville)
- Don't advertize non-exisiting crtcs (Ville)
- Clean up encoder->crtc_mask setup (Ville)
- Use tc_port instead of port parameter to MG registers (Jose)
- Remove static variable for aux last status (Jani)
- Implement a better i945gm vblank irq vs. C-states workaround (Ville)
- Make the object creation interface consistent (CQ)
- Rename intel_vga_msr_write() to intel_vga_reset_io_mem() (Jani, Ville)
- Eliminate previous drm_dbg/drm_err usage (Jani)
- Move gmbus setup down to intel_modeset_init() (Jani)
- Abstract all vgaarb access to intel_vga.[ch] (Jani)
- Split out i915_switcheroo.[ch] from i915_drv.c (Jani)
- Use intel_gt in has_reset* (Chris)
- Eliminate return value for i915_gem_init_early (Matt A)
- Selftest improvements (Chris)
- Update HuC firmware header version number format (Daniele)
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191007134801.GA24313@jlahtine-desk.ger.corp.intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/gem')
27 files changed, 1188 insertions, 1069 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c index f99920652751..81366aa4812b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c @@ -155,7 +155,6 @@ static void clear_pages_dma_fence_cb(struct dma_fence *fence, static void clear_pages_worker(struct work_struct *work) { struct clear_pages_work *w = container_of(work, typeof(*w), work); - struct drm_i915_private *i915 = w->ce->engine->i915; struct drm_i915_gem_object *obj = w->sleeve->vma->obj; struct i915_vma *vma = w->sleeve->vma; struct i915_request *rq; @@ -173,11 +172,9 @@ static void clear_pages_worker(struct work_struct *work) obj->read_domains = I915_GEM_GPU_DOMAINS; obj->write_domain = 0; - /* XXX: we need to kill this */ - mutex_lock(&i915->drm.struct_mutex); err = i915_vma_pin(vma, 0, 0, PIN_USER); if (unlikely(err)) - goto out_unlock; + goto out_signal; batch = intel_emit_vma_fill_blt(w->ce, vma, w->value); if (IS_ERR(batch)) { @@ -211,7 +208,7 @@ static void clear_pages_worker(struct work_struct *work) * keep track of the GPU activity within this vma/request, and * propagate the signal from the request to w->dma. */ - err = i915_active_ref(&vma->active, rq->timeline, rq); + err = __i915_vma_move_to_active(vma, rq); if (err) goto out_request; @@ -229,8 +226,6 @@ out_batch: intel_emit_vma_release(w->ce, batch); out_unpin: i915_vma_unpin(vma); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); out_signal: if (unlikely(err)) { dma_fence_set_error(&w->dma, err); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 1cdfe05514c3..5d8221c7ba83 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -167,97 +167,6 @@ lookup_user_engine(struct i915_gem_context *ctx, return i915_gem_context_get_engine(ctx, idx); } -static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp) -{ - unsigned int max; - - lockdep_assert_held(&i915->contexts.mutex); - - if (INTEL_GEN(i915) >= 12) - max = GEN12_MAX_CONTEXT_HW_ID; - else if (INTEL_GEN(i915) >= 11) - max = GEN11_MAX_CONTEXT_HW_ID; - else if (USES_GUC_SUBMISSION(i915)) - /* - * When using GuC in proxy submission, GuC consumes the - * highest bit in the context id to indicate proxy submission. - */ - max = MAX_GUC_CONTEXT_HW_ID; - else - max = MAX_CONTEXT_HW_ID; - - return ida_simple_get(&i915->contexts.hw_ida, 0, max, gfp); -} - -static int steal_hw_id(struct drm_i915_private *i915) -{ - struct i915_gem_context *ctx, *cn; - LIST_HEAD(pinned); - int id = -ENOSPC; - - lockdep_assert_held(&i915->contexts.mutex); - - list_for_each_entry_safe(ctx, cn, - &i915->contexts.hw_id_list, hw_id_link) { - if (atomic_read(&ctx->hw_id_pin_count)) { - list_move_tail(&ctx->hw_id_link, &pinned); - continue; - } - - GEM_BUG_ON(!ctx->hw_id); /* perma-pinned kernel context */ - list_del_init(&ctx->hw_id_link); - id = ctx->hw_id; - break; - } - - /* - * Remember how far we got up on the last repossesion scan, so the - * list is kept in a "least recently scanned" order. - */ - list_splice_tail(&pinned, &i915->contexts.hw_id_list); - return id; -} - -static int assign_hw_id(struct drm_i915_private *i915, unsigned int *out) -{ - int ret; - - lockdep_assert_held(&i915->contexts.mutex); - - /* - * We prefer to steal/stall ourselves and our users over that of the - * entire system. That may be a little unfair to our users, and - * even hurt high priority clients. The choice is whether to oomkill - * something else, or steal a context id. - */ - ret = new_hw_id(i915, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); - if (unlikely(ret < 0)) { - ret = steal_hw_id(i915); - if (ret < 0) /* once again for the correct errno code */ - ret = new_hw_id(i915, GFP_KERNEL); - if (ret < 0) - return ret; - } - - *out = ret; - return 0; -} - -static void release_hw_id(struct i915_gem_context *ctx) -{ - struct drm_i915_private *i915 = ctx->i915; - - if (list_empty(&ctx->hw_id_link)) - return; - - mutex_lock(&i915->contexts.mutex); - if (!list_empty(&ctx->hw_id_link)) { - ida_simple_remove(&i915->contexts.hw_ida, ctx->hw_id); - list_del_init(&ctx->hw_id_link); - } - mutex_unlock(&i915->contexts.mutex); -} - static void __free_engines(struct i915_gem_engines *e, unsigned int count) { while (count--) { @@ -309,12 +218,11 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) static void i915_gem_context_free(struct i915_gem_context *ctx) { - lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); - release_hw_id(ctx); - if (ctx->vm) - i915_vm_put(ctx->vm); + spin_lock(&ctx->i915->gem.contexts.lock); + list_del(&ctx->link); + spin_unlock(&ctx->i915->gem.contexts.lock); free_engines(rcu_access_pointer(ctx->engines)); mutex_destroy(&ctx->engines_mutex); @@ -325,70 +233,55 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) kfree(ctx->name); put_pid(ctx->pid); - list_del(&ctx->link); mutex_destroy(&ctx->mutex); kfree_rcu(ctx, rcu); } -static void contexts_free(struct drm_i915_private *i915) +static void contexts_free_all(struct llist_node *list) { - struct llist_node *freed = llist_del_all(&i915->contexts.free_list); struct i915_gem_context *ctx, *cn; - lockdep_assert_held(&i915->drm.struct_mutex); - - llist_for_each_entry_safe(ctx, cn, freed, free_link) + llist_for_each_entry_safe(ctx, cn, list, free_link) i915_gem_context_free(ctx); } -static void contexts_free_first(struct drm_i915_private *i915) +static void contexts_flush_free(struct i915_gem_contexts *gc) { - struct i915_gem_context *ctx; - struct llist_node *freed; - - lockdep_assert_held(&i915->drm.struct_mutex); - - freed = llist_del_first(&i915->contexts.free_list); - if (!freed) - return; - - ctx = container_of(freed, typeof(*ctx), free_link); - i915_gem_context_free(ctx); + contexts_free_all(llist_del_all(&gc->free_list)); } static void contexts_free_worker(struct work_struct *work) { - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), contexts.free_work); + struct i915_gem_contexts *gc = + container_of(work, typeof(*gc), free_work); - mutex_lock(&i915->drm.struct_mutex); - contexts_free(i915); - mutex_unlock(&i915->drm.struct_mutex); + contexts_flush_free(gc); } void i915_gem_context_release(struct kref *ref) { struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref); - struct drm_i915_private *i915 = ctx->i915; + struct i915_gem_contexts *gc = &ctx->i915->gem.contexts; trace_i915_context_free(ctx); - if (llist_add(&ctx->free_link, &i915->contexts.free_list)) - queue_work(i915->wq, &i915->contexts.free_work); + if (llist_add(&ctx->free_link, &gc->free_list)) + schedule_work(&gc->free_work); } static void context_close(struct i915_gem_context *ctx) { - mutex_lock(&ctx->mutex); + struct i915_address_space *vm; i915_gem_context_set_closed(ctx); - ctx->file_priv = ERR_PTR(-EBADF); - /* - * This context will never again be assinged to HW, so we can - * reuse its ID for the next context. - */ - release_hw_id(ctx); + mutex_lock(&ctx->mutex); + + vm = i915_gem_context_vm(ctx); + if (vm) + i915_vm_close(vm); + + ctx->file_priv = ERR_PTR(-EBADF); /* * The LUT uses the VMA as a backpointer to unref the object, @@ -414,7 +307,6 @@ __create_context(struct drm_i915_private *i915) return ERR_PTR(-ENOMEM); kref_init(&ctx->ref); - list_add_tail(&ctx->link, &i915->contexts.list); ctx->i915 = i915; ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL); mutex_init(&ctx->mutex); @@ -428,7 +320,6 @@ __create_context(struct drm_i915_private *i915) RCU_INIT_POINTER(ctx->engines, e); INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); - INIT_LIST_HEAD(&ctx->hw_id_link); /* NB: Mark all slices as needing a remap so that when the context first * loads it will restore whatever remap state already exists. If there @@ -441,6 +332,10 @@ __create_context(struct drm_i915_private *i915) for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; + spin_lock(&i915->gem.contexts.lock); + list_add_tail(&ctx->link, &i915->gem.contexts.list); + spin_unlock(&i915->gem.contexts.lock); + return ctx; err_free: @@ -470,11 +365,11 @@ static void __apply_ppgtt(struct intel_context *ce, void *vm) static struct i915_address_space * __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm) { - struct i915_address_space *old = ctx->vm; + struct i915_address_space *old = i915_gem_context_vm(ctx); GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old)); - ctx->vm = i915_vm_get(vm); + rcu_assign_pointer(ctx->vm, i915_vm_open(vm)); context_apply_all(ctx, __apply_ppgtt, vm); return old; @@ -483,12 +378,12 @@ __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm) static void __assign_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm) { - if (vm == ctx->vm) + if (vm == rcu_access_pointer(ctx->vm)) return; vm = __set_ppgtt(ctx, vm); if (vm) - i915_vm_put(vm); + i915_vm_close(vm); } static void __set_timeline(struct intel_timeline **dst, @@ -515,27 +410,25 @@ static void __assign_timeline(struct i915_gem_context *ctx, } static struct i915_gem_context * -i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags) +i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) { struct i915_gem_context *ctx; - lockdep_assert_held(&dev_priv->drm.struct_mutex); - if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE && - !HAS_EXECLISTS(dev_priv)) + !HAS_EXECLISTS(i915)) return ERR_PTR(-EINVAL); - /* Reap the most stale context */ - contexts_free_first(dev_priv); + /* Reap the stale contexts */ + contexts_flush_free(&i915->gem.contexts); - ctx = __create_context(dev_priv); + ctx = __create_context(i915); if (IS_ERR(ctx)) return ctx; - if (HAS_FULL_PPGTT(dev_priv)) { + if (HAS_FULL_PPGTT(i915)) { struct i915_ppgtt *ppgtt; - ppgtt = i915_ppgtt_create(dev_priv); + ppgtt = i915_ppgtt_create(i915); if (IS_ERR(ppgtt)) { DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); @@ -543,14 +436,17 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags) return ERR_CAST(ppgtt); } + mutex_lock(&ctx->mutex); __assign_ppgtt(ctx, &ppgtt->vm); + mutex_unlock(&ctx->mutex); + i915_vm_put(&ppgtt->vm); } if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { struct intel_timeline *timeline; - timeline = intel_timeline_create(&dev_priv->gt, NULL); + timeline = intel_timeline_create(&i915->gt, NULL); if (IS_ERR(timeline)) { context_close(ctx); return ERR_CAST(timeline); @@ -582,18 +478,11 @@ struct i915_gem_context * i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) { struct i915_gem_context *ctx; - int err; ctx = i915_gem_create_context(i915, 0); if (IS_ERR(ctx)) return ctx; - err = i915_gem_context_pin_hw_id(ctx); - if (err) { - destroy_kernel_context(&ctx); - return ERR_PTR(err); - } - i915_gem_context_clear_bannable(ctx); ctx->sched.priority = I915_USER_PRIORITY(prio); @@ -602,62 +491,41 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) return ctx; } -static void init_contexts(struct drm_i915_private *i915) +static void init_contexts(struct i915_gem_contexts *gc) { - mutex_init(&i915->contexts.mutex); - INIT_LIST_HEAD(&i915->contexts.list); - - /* Using the simple ida interface, the max is limited by sizeof(int) */ - BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); - BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX); - ida_init(&i915->contexts.hw_ida); - INIT_LIST_HEAD(&i915->contexts.hw_id_list); + spin_lock_init(&gc->lock); + INIT_LIST_HEAD(&gc->list); - INIT_WORK(&i915->contexts.free_work, contexts_free_worker); - init_llist_head(&i915->contexts.free_list); + INIT_WORK(&gc->free_work, contexts_free_worker); + init_llist_head(&gc->free_list); } -int i915_gem_contexts_init(struct drm_i915_private *dev_priv) +int i915_gem_init_contexts(struct drm_i915_private *i915) { struct i915_gem_context *ctx; /* Reassure ourselves we are only called once */ - GEM_BUG_ON(dev_priv->kernel_context); + GEM_BUG_ON(i915->kernel_context); - init_contexts(dev_priv); + init_contexts(&i915->gem.contexts); /* lowest priority; idle task */ - ctx = i915_gem_context_create_kernel(dev_priv, I915_PRIORITY_MIN); + ctx = i915_gem_context_create_kernel(i915, I915_PRIORITY_MIN); if (IS_ERR(ctx)) { DRM_ERROR("Failed to create default global context\n"); return PTR_ERR(ctx); } - /* - * For easy recognisablity, we want the kernel context to be 0 and then - * all user contexts will have non-zero hw_id. Kernel contexts are - * permanently pinned, so that we never suffer a stall and can - * use them from any allocation context (e.g. for evicting other - * contexts and from inside the shrinker). - */ - GEM_BUG_ON(ctx->hw_id); - GEM_BUG_ON(!atomic_read(&ctx->hw_id_pin_count)); - dev_priv->kernel_context = ctx; + i915->kernel_context = ctx; DRM_DEBUG_DRIVER("%s context support initialized\n", - DRIVER_CAPS(dev_priv)->has_logical_contexts ? + DRIVER_CAPS(i915)->has_logical_contexts ? "logical" : "fake"); return 0; } -void i915_gem_contexts_fini(struct drm_i915_private *i915) +void i915_gem_driver_release__contexts(struct drm_i915_private *i915) { - lockdep_assert_held(&i915->drm.struct_mutex); - destroy_kernel_context(&i915->kernel_context); - - /* Must free all deferred contexts (via flush_workqueue) first */ - GEM_BUG_ON(!list_empty(&i915->contexts.hw_id_list)); - ida_destroy(&i915->contexts.hw_ida); } static int context_idr_cleanup(int id, void *p, void *data) @@ -675,11 +543,16 @@ static int vm_idr_cleanup(int id, void *p, void *data) static int gem_context_register(struct i915_gem_context *ctx, struct drm_i915_file_private *fpriv) { + struct i915_address_space *vm; int ret; ctx->file_priv = fpriv; - if (ctx->vm) - ctx->vm->file = fpriv; + + mutex_lock(&ctx->mutex); + vm = i915_gem_context_vm(ctx); + if (vm) + WRITE_ONCE(vm->file, fpriv); /* XXX */ + mutex_unlock(&ctx->mutex); ctx->pid = get_task_pid(current, PIDTYPE_PID); ctx->name = kasprintf(GFP_KERNEL, "%s[%d]", @@ -716,9 +589,7 @@ int i915_gem_context_open(struct drm_i915_private *i915, idr_init(&file_priv->context_idr); idr_init_base(&file_priv->vm_idr, 1); - mutex_lock(&i915->drm.struct_mutex); ctx = i915_gem_create_context(i915, 0); - mutex_unlock(&i915->drm.struct_mutex); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto err; @@ -746,6 +617,7 @@ err: void i915_gem_context_close(struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; + struct drm_i915_private *i915 = file_priv->dev_priv; idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL); idr_destroy(&file_priv->context_idr); @@ -754,6 +626,8 @@ void i915_gem_context_close(struct drm_file *file) idr_for_each(&file_priv->vm_idr, vm_idr_cleanup, NULL); idr_destroy(&file_priv->vm_idr); mutex_destroy(&file_priv->vm_idr_lock); + + contexts_flush_free(&i915->gem.contexts); } int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, @@ -846,6 +720,7 @@ struct context_barrier_task { void *data; }; +__i915_active_call static void cb_retire(struct i915_active *base) { struct context_barrier_task *cb = container_of(base, typeof(*cb), base); @@ -865,20 +740,18 @@ static int context_barrier_task(struct i915_gem_context *ctx, void (*task)(void *data), void *data) { - struct drm_i915_private *i915 = ctx->i915; struct context_barrier_task *cb; struct i915_gem_engines_iter it; struct intel_context *ce; int err = 0; - lockdep_assert_held(&i915->drm.struct_mutex); GEM_BUG_ON(!task); cb = kmalloc(sizeof(*cb), GFP_KERNEL); if (!cb) return -ENOMEM; - i915_active_init(i915, &cb->base, NULL, cb_retire); + i915_active_init(&cb->base, NULL, cb_retire); err = i915_active_acquire(&cb->base); if (err) { kfree(cb); @@ -910,7 +783,7 @@ static int context_barrier_task(struct i915_gem_context *ctx, if (emit) err = emit(rq, data); if (err == 0) - err = i915_active_ref(&cb->base, rq->timeline, rq); + err = i915_active_add_request(&cb->base, rq); i915_request_add(rq); if (err) @@ -933,16 +806,12 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, struct i915_address_space *vm; int ret; - if (!ctx->vm) + if (!rcu_access_pointer(ctx->vm)) return -ENODEV; - /* XXX rcu acquire? */ - ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); - if (ret) - return ret; - + rcu_read_lock(); vm = i915_vm_get(ctx->vm); - mutex_unlock(&ctx->i915->drm.struct_mutex); + rcu_read_unlock(); ret = mutex_lock_interruptible(&file_priv->vm_idr_lock); if (ret) @@ -953,7 +822,7 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, if (ret < 0) goto err_unlock; - i915_vm_get(vm); + i915_vm_open(vm); args->size = 0; args->value = ret; @@ -973,7 +842,7 @@ static void set_ppgtt_barrier(void *data) if (INTEL_GEN(old->i915) < 8) gen6_ppgtt_unpin_all(i915_vm_to_ppgtt(old)); - i915_vm_put(old); + i915_vm_close(old); } static int emit_ppgtt_update(struct i915_request *rq, void *data) @@ -1003,12 +872,18 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data) intel_ring_advance(rq, cs); } else if (HAS_LOGICAL_RING_CONTEXTS(engine->i915)) { struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + int err; + + /* Magic required to prevent forcewake errors! */ + err = engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + return err; cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); if (IS_ERR(cs)) return PTR_ERR(cs); - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES); + *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; for (i = GEN8_3LVL_PDPES; i--; ) { const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); @@ -1045,34 +920,34 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, if (args->size) return -EINVAL; - if (!ctx->vm) + if (!rcu_access_pointer(ctx->vm)) return -ENODEV; if (upper_32_bits(args->value)) return -ENOENT; - err = mutex_lock_interruptible(&file_priv->vm_idr_lock); - if (err) - return err; - + rcu_read_lock(); vm = idr_find(&file_priv->vm_idr, args->value); - if (vm) - i915_vm_get(vm); - mutex_unlock(&file_priv->vm_idr_lock); + if (vm && !kref_get_unless_zero(&vm->ref)) + vm = NULL; + rcu_read_unlock(); if (!vm) return -ENOENT; - err = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); + err = mutex_lock_interruptible(&ctx->mutex); if (err) goto out; - if (vm == ctx->vm) + if (i915_gem_context_is_closed(ctx)) { + err = -ENOENT; + goto out; + } + + if (vm == rcu_access_pointer(ctx->vm)) goto unlock; /* Teardown the existing obj:vma cache, it will have to be rebuilt. */ - mutex_lock(&ctx->mutex); lut_close(ctx); - mutex_unlock(&ctx->mutex); old = __set_ppgtt(ctx, vm); @@ -1087,13 +962,12 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, set_ppgtt_barrier, old); if (err) { - i915_vm_put(__set_ppgtt(ctx, old)); - i915_vm_put(old); + i915_vm_close(__set_ppgtt(ctx, old)); + i915_vm_close(old); } unlock: - mutex_unlock(&ctx->i915->drm.struct_mutex); - + mutex_unlock(&ctx->mutex); out: i915_vm_put(vm); return err; @@ -1112,7 +986,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq, offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE + - (CTX_R_PWR_CLK_STATE + 1) * 4; + CTX_R_PWR_CLK_STATE * 4; *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = lower_32_bits(offset); @@ -1155,8 +1029,7 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) } static int -__intel_context_reconfigure_sseu(struct intel_context *ce, - struct intel_sseu sseu) +intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu) { int ret; @@ -1180,23 +1053,6 @@ unlock: } static int -intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu) -{ - struct drm_i915_private *i915 = ce->engine->i915; - int ret; - - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - return ret; - - ret = __intel_context_reconfigure_sseu(ce, sseu); - - mutex_unlock(&i915->drm.struct_mutex); - - return ret; -} - -static int user_to_context_sseu(struct drm_i915_private *i915, const struct drm_i915_gem_context_param_sseu *user, struct intel_sseu *context) @@ -1967,10 +1823,11 @@ static int clone_vm(struct i915_gem_context *dst, struct i915_gem_context *src) { struct i915_address_space *vm; + int err = 0; rcu_read_lock(); do { - vm = READ_ONCE(src->vm); + vm = rcu_dereference(src->vm); if (!vm) break; @@ -1992,7 +1849,7 @@ static int clone_vm(struct i915_gem_context *dst, * it cannot be reallocated elsewhere. */ - if (vm == READ_ONCE(src->vm)) + if (vm == rcu_access_pointer(src->vm)) break; i915_vm_put(vm); @@ -2000,11 +1857,16 @@ static int clone_vm(struct i915_gem_context *dst, rcu_read_unlock(); if (vm) { - __assign_ppgtt(dst, vm); + if (!mutex_lock_interruptible(&dst->mutex)) { + __assign_ppgtt(dst, vm); + mutex_unlock(&dst->mutex); + } else { + err = -EINTR; + } i915_vm_put(vm); } - return 0; + return err; } static int create_clone(struct i915_user_extension __user *ext, void *data) @@ -2094,12 +1956,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, return -EIO; } - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - ext_data.ctx = i915_gem_create_context(i915, args->flags); - mutex_unlock(&dev->struct_mutex); if (IS_ERR(ext_data.ctx)) return PTR_ERR(ext_data.ctx); @@ -2226,12 +2083,12 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, case I915_CONTEXT_PARAM_GTT_SIZE: args->size = 0; - if (ctx->vm) - args->value = ctx->vm->total; - else if (to_i915(dev)->ggtt.alias) - args->value = to_i915(dev)->ggtt.alias->vm.total; + rcu_read_lock(); + if (rcu_access_pointer(ctx->vm)) + args->value = rcu_dereference(ctx->vm)->total; else args->value = to_i915(dev)->ggtt.vm.total; + rcu_read_unlock(); break; case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: @@ -2297,7 +2154,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_reset_stats *args = data; struct i915_gem_context *ctx; int ret; @@ -2319,7 +2176,7 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, */ if (capable(CAP_SYS_ADMIN)) - args->reset_count = i915_reset_count(&dev_priv->gpu_error); + args->reset_count = i915_reset_count(&i915->gpu_error); else args->reset_count = 0; @@ -2332,33 +2189,6 @@ out: return ret; } -int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx) -{ - struct drm_i915_private *i915 = ctx->i915; - int err = 0; - - mutex_lock(&i915->contexts.mutex); - - GEM_BUG_ON(i915_gem_context_is_closed(ctx)); - - if (list_empty(&ctx->hw_id_link)) { - GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count)); - - err = assign_hw_id(i915, &ctx->hw_id); - if (err) - goto out_unlock; - - list_add_tail(&ctx->hw_id_link, &i915->contexts.hw_id_list); - } - - GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == ~0u); - atomic_inc(&ctx->hw_id_pin_count); - -out_unlock: - mutex_unlock(&i915->contexts.mutex); - return err; -} - /* GEM context-engines iterator: for_each_gem_engine() */ struct intel_context * i915_gem_engines_iter_next(struct i915_gem_engines_iter *it) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index 176978608b6f..9234586830d1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -11,7 +11,9 @@ #include "gt/intel_context.h" +#include "i915_drv.h" #include "i915_gem.h" +#include "i915_gem_gtt.h" #include "i915_scheduler.h" #include "intel_device_info.h" @@ -112,29 +114,14 @@ i915_gem_context_clear_user_engines(struct i915_gem_context *ctx) clear_bit(CONTEXT_USER_ENGINES, &ctx->flags); } -int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx); -static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx) -{ - if (atomic_inc_not_zero(&ctx->hw_id_pin_count)) - return 0; - - return __i915_gem_context_pin_hw_id(ctx); -} - -static inline void i915_gem_context_unpin_hw_id(struct i915_gem_context *ctx) -{ - GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == 0u); - atomic_dec(&ctx->hw_id_pin_count); -} - static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) { return !ctx->file_priv; } /* i915_gem_context.c */ -int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv); -void i915_gem_contexts_fini(struct drm_i915_private *dev_priv); +int __must_check i915_gem_init_contexts(struct drm_i915_private *i915); +void i915_gem_driver_release__contexts(struct drm_i915_private *i915); int i915_gem_context_open(struct drm_i915_private *i915, struct drm_file *file); @@ -173,6 +160,27 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx) kref_put(&ctx->ref, i915_gem_context_release); } +static inline struct i915_address_space * +i915_gem_context_vm(struct i915_gem_context *ctx) +{ + return rcu_dereference_protected(ctx->vm, lockdep_is_held(&ctx->mutex)); +} + +static inline struct i915_address_space * +i915_gem_context_get_vm_rcu(struct i915_gem_context *ctx) +{ + struct i915_address_space *vm; + + rcu_read_lock(); + vm = rcu_dereference(ctx->vm); + if (!vm) + vm = &ctx->i915->ggtt.vm; + vm = i915_vm_get(vm); + rcu_read_unlock(); + + return vm; +} + static inline struct i915_gem_engines * i915_gem_context_engines(struct i915_gem_context *ctx) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 260d59cc3de8..ab8e1367dfc8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -88,7 +88,7 @@ struct i915_gem_context { * In other modes, this is a NULL pointer with the expectation that * the caller uses the shared global GTT. */ - struct i915_address_space *vm; + struct i915_address_space __rcu *vm; /** * @pid: process id of creator @@ -147,24 +147,6 @@ struct i915_gem_context { #define CONTEXT_FORCE_SINGLE_SUBMISSION 2 #define CONTEXT_USER_ENGINES 3 - /** - * @hw_id: - unique identifier for the context - * - * The hardware needs to uniquely identify the context for a few - * functions like fault reporting, PASID, scheduling. The - * &drm_i915_private.context_hw_ida is used to assign a unqiue - * id for the lifetime of the context. - * - * @hw_id_pin_count: - number of times this context had been pinned - * for use (should be, at most, once per engine). - * - * @hw_id_link: - all contexts with an assigned id are tracked - * for possible repossession. - */ - unsigned int hw_id; - atomic_t hw_id_pin_count; - struct list_head hw_id_link; - struct mutex mutex; struct i915_sched_attr sched; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 9c58e8fac1d9..9937b4c341f1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -27,7 +27,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) { - if (!READ_ONCE(obj->pin_global)) + if (!i915_gem_object_is_framebuffer(obj)) return; i915_gem_object_lock(obj); @@ -288,14 +288,21 @@ restart: if (!drm_mm_node_allocated(&vma->node)) continue; - ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); + /* Wait for an earlier async bind, need to rewrite it */ + ret = i915_vma_sync(vma); + if (ret) + return ret; + + ret = i915_vma_bind(vma, cache_level, PIN_UPDATE, NULL); if (ret) return ret; } } - list_for_each_entry(vma, &obj->vma.list, obj_link) - vma->node.color = cache_level; + list_for_each_entry(vma, &obj->vma.list, obj_link) { + if (i915_vm_has_cache_coloring(vma->vm)) + vma->node.color = cache_level; + } i915_gem_object_set_cache_coherency(obj, cache_level); obj->cache_dirty = true; /* Always invalidate stale cachelines */ @@ -389,16 +396,11 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, if (ret) goto out; - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - goto out; - ret = i915_gem_object_lock_interruptible(obj); if (ret == 0) { ret = i915_gem_object_set_cache_level(obj, level); i915_gem_object_unlock(obj); } - mutex_unlock(&i915->drm.struct_mutex); out: i915_gem_object_put(obj); @@ -422,12 +424,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, assert_object_held(obj); - /* Mark the global pin early so that we account for the - * display coherency whilst setting up the cache domains. - */ - obj->pin_global++; - - /* The display engine is not coherent with the LLC cache on gen6. As + /* + * The display engine is not coherent with the LLC cache on gen6. As * a result, we make sure that the pinning that is about to occur is * done with uncached PTEs. This is lowest common denominator for all * chipsets. @@ -439,12 +437,11 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, ret = i915_gem_object_set_cache_level(obj, HAS_WT(to_i915(obj->base.dev)) ? I915_CACHE_WT : I915_CACHE_NONE); - if (ret) { - vma = ERR_PTR(ret); - goto err_unpin_global; - } + if (ret) + return ERR_PTR(ret); - /* As the user may map the buffer once pinned in the display plane + /* + * As the user may map the buffer once pinned in the display plane * (e.g. libkms for the bootup splash), we have to ensure that we * always use map_and_fenceable for all scanout buffers. However, * it may simply be too big to fit into mappable, in which case @@ -461,22 +458,19 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); if (IS_ERR(vma)) - goto err_unpin_global; + return vma; vma->display_alignment = max_t(u64, vma->display_alignment, alignment); __i915_gem_object_flush_for_display(obj); - /* It should now be out of any other write domains, and we can update + /* + * It should now be out of any other write domains, and we can update * the domain values for our changes. */ obj->read_domains |= I915_GEM_DOMAIN_GTT; return vma; - -err_unpin_global: - obj->pin_global--; - return vma; } static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) @@ -491,6 +485,7 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) if (!drm_mm_node_allocated(&vma->node)) continue; + GEM_BUG_ON(vma->vm != &i915->ggtt.vm); list_move_tail(&vma->vm_link, &vma->vm->bound_list); } mutex_unlock(&i915->ggtt.vm.mutex); @@ -500,7 +495,8 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) spin_lock_irqsave(&i915->mm.obj_lock, flags); - if (obj->mm.madv == I915_MADV_WILLNEED) + if (obj->mm.madv == I915_MADV_WILLNEED && + !atomic_read(&obj->mm.shrink_pin)) list_move_tail(&obj->mm.link, &i915->mm.shrink_list); spin_unlock_irqrestore(&i915->mm.obj_lock, flags); @@ -514,12 +510,6 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) assert_object_held(obj); - if (WARN_ON(obj->pin_global == 0)) - return; - - if (--obj->pin_global == 0) - vma->display_alignment = I915_GTT_MIN_ALIGNMENT; - /* Bump the LRU to try and avoid premature eviction whilst flipping */ i915_gem_object_bump_inactive_ggtt(obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index b5f6937369ea..e8ddc2320efa 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -252,6 +252,7 @@ struct i915_execbuffer { bool has_fence : 1; bool needs_unfenced : 1; + struct intel_context *ce; struct i915_request *rq; u32 *rq_cmd; unsigned int rq_size; @@ -697,7 +698,9 @@ static int eb_reserve(struct i915_execbuffer *eb) case 1: /* Too fragmented, unbind everything and retry */ + mutex_lock(&eb->context->vm->mutex); err = i915_gem_evict_vm(eb->context->vm); + mutex_unlock(&eb->context->vm->mutex); if (err) return err; break; @@ -725,7 +728,7 @@ static int eb_select_context(struct i915_execbuffer *eb) return -ENOENT; eb->gem_context = ctx; - if (ctx->vm) + if (rcu_access_pointer(ctx->vm)) eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; eb->context_flags = 0; @@ -880,6 +883,9 @@ static void eb_destroy(const struct i915_execbuffer *eb) { GEM_BUG_ON(eb->reloc_cache.rq); + if (eb->reloc_cache.ce) + intel_context_put(eb->reloc_cache.ce); + if (eb->lut_size > 0) kfree(eb->buckets); } @@ -903,6 +909,7 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->has_fence = cache->gen < 4; cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; cache->node.allocated = false; + cache->ce = NULL; cache->rq = NULL; cache->rq_size = 0; } @@ -963,11 +970,13 @@ static void reloc_cache_reset(struct reloc_cache *cache) intel_gt_flush_ggtt_writes(ggtt->vm.gt); io_mapping_unmap_atomic((void __iomem *)vaddr); - if (cache->node.allocated) { + if (drm_mm_node_allocated(&cache->node)) { ggtt->vm.clear_range(&ggtt->vm, cache->node.start, cache->node.size); + mutex_lock(&ggtt->vm.mutex); drm_mm_remove_node(&cache->node); + mutex_unlock(&ggtt->vm.mutex); } else { i915_vma_unpin((struct i915_vma *)cache->node.mm); } @@ -1042,11 +1051,13 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, PIN_NOEVICT); if (IS_ERR(vma)) { memset(&cache->node, 0, sizeof(cache->node)); + mutex_lock(&ggtt->vm.mutex); err = drm_mm_insert_node_in_range (&ggtt->vm.mm, &cache->node, PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 0, ggtt->mappable_end, DRM_MM_INSERT_LOW); + mutex_unlock(&ggtt->vm.mutex); if (err) /* no inactive aperture space, use cpu reloc */ return NULL; } else { @@ -1056,7 +1067,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, } offset = cache->node.start; - if (cache->node.allocated) { + if (drm_mm_node_allocated(&cache->node)) { ggtt->vm.insert_page(&ggtt->vm, i915_gem_object_get_dma_address(obj, page), offset, I915_CACHE_NONE, 0); @@ -1145,7 +1156,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, u32 *cmd; int err; - pool = intel_engine_pool_get(&eb->engine->pool, PAGE_SIZE); + pool = intel_engine_get_pool(eb->engine, PAGE_SIZE); if (IS_ERR(pool)) return PTR_ERR(pool); @@ -1168,7 +1179,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (err) goto err_unmap; - rq = i915_request_create(eb->context); + rq = intel_context_create_request(cache->ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_unpin; @@ -1239,6 +1250,29 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, if (!intel_engine_can_store_dword(eb->engine)) return ERR_PTR(-ENODEV); + if (!cache->ce) { + struct intel_context *ce; + + /* + * The CS pre-parser can pre-fetch commands across + * memory sync points and starting gen12 it is able to + * pre-fetch across BB_START and BB_END boundaries + * (within the same context). We therefore use a + * separate context gen12+ to guarantee that the reloc + * writes land before the parser gets to the target + * memory location. + */ + if (cache->gen >= 12) + ce = intel_context_create(eb->context->gem_context, + eb->engine); + else + ce = intel_context_get(eb->context); + if (IS_ERR(ce)) + return ERR_CAST(ce); + + cache->ce = ce; + } + err = __reloc_gpu_alloc(eb, vma, len); if (unlikely(err)) return ERR_PTR(err); @@ -1388,7 +1422,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && IS_GEN(eb->i915, 6)) { err = i915_vma_bind(target, target->obj->cache_level, - PIN_GLOBAL); + PIN_GLOBAL, NULL); if (WARN_ONCE(err, "Unexpected failure to bind target VMA!")) return err; @@ -1961,7 +1995,7 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) struct i915_vma *vma; int err; - pool = intel_engine_pool_get(&eb->engine->pool, eb->batch_len); + pool = intel_engine_get_pool(eb->engine, eb->batch_len); if (IS_ERR(pool)) return ERR_CAST(pool); @@ -2112,35 +2146,6 @@ static struct i915_request *eb_throttle(struct intel_context *ce) return i915_request_get(rq); } -static int -__eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) -{ - int err; - - if (likely(atomic_inc_not_zero(&ce->pin_count))) - return 0; - - err = mutex_lock_interruptible(&eb->i915->drm.struct_mutex); - if (err) - return err; - - err = __intel_context_do_pin(ce); - mutex_unlock(&eb->i915->drm.struct_mutex); - - return err; -} - -static void -__eb_unpin_context(struct i915_execbuffer *eb, struct intel_context *ce) -{ - if (likely(atomic_add_unless(&ce->pin_count, -1, 1))) - return; - - mutex_lock(&eb->i915->drm.struct_mutex); - intel_context_unpin(ce); - mutex_unlock(&eb->i915->drm.struct_mutex); -} - static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) { struct intel_timeline *tl; @@ -2160,7 +2165,7 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) * GGTT space, so do this first before we reserve a seqno for * ourselves. */ - err = __eb_pin_context(eb, ce); + err = intel_context_pin(ce); if (err) return err; @@ -2204,7 +2209,7 @@ err_exit: intel_context_exit(ce); intel_context_timeline_unlock(tl); err_unpin: - __eb_unpin_context(eb, ce); + intel_context_unpin(ce); return err; } @@ -2217,7 +2222,7 @@ static void eb_unpin_engine(struct i915_execbuffer *eb) intel_context_exit(ce); mutex_unlock(&tl->mutex); - __eb_unpin_context(eb, ce); + intel_context_unpin(ce); } static unsigned int diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 261c9bd83f51..fd4122d8c0a9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -8,6 +8,7 @@ #include <linux/sizes.h> #include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" #include "i915_drv.h" #include "i915_gem_gtt.h" @@ -245,21 +246,9 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) wakeref = intel_runtime_pm_get(rpm); - srcu = intel_gt_reset_trylock(ggtt->vm.gt); - if (srcu < 0) { - ret = srcu; - goto err_rpm; - } - - ret = i915_mutex_lock_interruptible(dev); + ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu); if (ret) - goto err_reset; - - /* Access to snoopable pages through the GTT is incoherent. */ - if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) { - ret = -EFAULT; - goto err_unlock; - } + goto err_rpm; /* Now pin it into the GTT as needed */ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, @@ -287,10 +276,19 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) view.type = I915_GGTT_VIEW_PARTIAL; vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); } + + /* The entire mappable GGTT is pinned? Unexpected! */ + GEM_BUG_ON(vma == ERR_PTR(-ENOSPC)); } if (IS_ERR(vma)) { ret = PTR_ERR(vma); - goto err_unlock; + goto err_reset; + } + + /* Access to snoopable pages through the GTT is incoherent. */ + if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) { + ret = -EFAULT; + goto err_unpin; } ret = i915_vma_pin_fence(vma); @@ -318,14 +316,16 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) intel_wakeref_auto(&i915->ggtt.userfault_wakeref, msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)); - i915_vma_set_ggtt_write(vma); + if (write) { + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + i915_vma_set_ggtt_write(vma); + obj->mm.dirty = true; + } err_fence: i915_vma_unpin_fence(vma); err_unpin: __i915_vma_unpin(vma); -err_unlock: - mutex_unlock(&dev->struct_mutex); err_reset: intel_gt_reset_unlock(ggtt->vm.gt, srcu); err_rpm: @@ -333,23 +333,20 @@ err_rpm: i915_gem_object_unpin_pages(obj); err: switch (ret) { - case -EIO: - /* - * We eat errors when the gpu is terminally wedged to avoid - * userspace unduly crashing (gl has no provisions for mmaps to - * fail). But any other -EIO isn't ours (e.g. swap in failure) - * and so needs to be reported. - */ - if (!intel_gt_is_wedged(ggtt->vm.gt)) - return VM_FAULT_SIGBUS; - /* else, fall through */ - case -EAGAIN: - /* - * EAGAIN means the gpu is hung and we'll wait for the error - * handler to reset everything when re-faulting in - * i915_mutex_lock_interruptible. - */ + default: + WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret); + /* fallthrough */ + case -EIO: /* shmemfs failure from swap device */ + case -EFAULT: /* purged object */ + case -ENODEV: /* bad object, how did you get here! */ + return VM_FAULT_SIGBUS; + + case -ENOSPC: /* shmemfs allocation failure */ + case -ENOMEM: /* our allocation failure */ + return VM_FAULT_OOM; + case 0: + case -EAGAIN: case -ERESTARTSYS: case -EINTR: case -EBUSY: @@ -358,14 +355,6 @@ err: * already did the job. */ return VM_FAULT_NOPAGE; - case -ENOMEM: - return VM_FAULT_OOM; - case -ENOSPC: - case -EFAULT: - return VM_FAULT_SIGBUS; - default: - WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret); - return VM_FAULT_SIGBUS; } } @@ -436,6 +425,7 @@ out: static int create_mmap_offset(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct intel_gt *gt = &i915->gt; int err; err = drm_gem_create_mmap_offset(&obj->base); @@ -443,21 +433,12 @@ static int create_mmap_offset(struct drm_i915_gem_object *obj) return 0; /* Attempt to reap some mmap space from dead objects */ - do { - err = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - if (err) - break; - - i915_gem_drain_freed_objects(i915); - err = drm_gem_create_mmap_offset(&obj->base); - if (!err) - break; + err = intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT); + if (err) + return err; - } while (flush_delayed_work(&i915->gem.retire_work)); - - return err; + i915_gem_drain_freed_objects(i915); + return drm_gem_create_mmap_offset(&obj->base); } int @@ -473,10 +454,16 @@ i915_gem_mmap_gtt(struct drm_file *file, if (!obj) return -ENOENT; + if (i915_gem_object_never_bind_ggtt(obj)) { + ret = -ENODEV; + goto out; + } + ret = create_mmap_offset(obj); if (ret == 0) *offset = drm_vma_node_offset_addr(&obj->base.vma_node); +out: i915_gem_object_put(obj); return ret; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index d7855dc5a5c5..dbf9be9a79f4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -155,21 +155,30 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, wakeref = intel_runtime_pm_get(&i915->runtime_pm); llist_for_each_entry_safe(obj, on, freed, freed) { - struct i915_vma *vma, *vn; - trace_i915_gem_object_destroy(obj); - mutex_lock(&i915->drm.struct_mutex); - - list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) { - GEM_BUG_ON(i915_vma_is_active(vma)); - vma->flags &= ~I915_VMA_PIN_MASK; - i915_vma_destroy(vma); + if (!list_empty(&obj->vma.list)) { + struct i915_vma *vma; + + /* + * Note that the vma keeps an object reference while + * it is active, so it *should* not sleep while we + * destroy it. Our debug code errs insits it *might*. + * For the moment, play along. + */ + spin_lock(&obj->vma.lock); + while ((vma = list_first_entry_or_null(&obj->vma.list, + struct i915_vma, + obj_link))) { + GEM_BUG_ON(vma->obj != obj); + spin_unlock(&obj->vma.lock); + + i915_vma_destroy(vma); + + spin_lock(&obj->vma.lock); + } + spin_unlock(&obj->vma.lock); } - GEM_BUG_ON(!list_empty(&obj->vma.list)); - GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree)); - - mutex_unlock(&i915->drm.struct_mutex); GEM_BUG_ON(atomic_read(&obj->bind_count)); GEM_BUG_ON(obj->userfault_count); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 5efb9936e05b..086a9bf5adcc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -106,6 +106,11 @@ static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj) dma_resv_lock(obj->base.resv, NULL); } +static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj) +{ + return dma_resv_trylock(obj->base.resv); +} + static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj) { @@ -135,27 +140,40 @@ i915_gem_object_is_readonly(const struct drm_i915_gem_object *obj) } static inline bool +i915_gem_object_type_has(const struct drm_i915_gem_object *obj, + unsigned long flags) +{ + return obj->ops->flags & flags; +} + +static inline bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) { - return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE; + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_STRUCT_PAGE); } static inline bool i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) { - return obj->ops->flags & I915_GEM_OBJECT_IS_SHRINKABLE; + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_SHRINKABLE); } static inline bool i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj) { - return obj->ops->flags & I915_GEM_OBJECT_IS_PROXY; + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_PROXY); +} + +static inline bool +i915_gem_object_never_bind_ggtt(const struct drm_i915_gem_object *obj) +{ + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_NO_GGTT); } static inline bool i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj) { - return obj->ops->flags & I915_GEM_OBJECT_ASYNC_CANCEL; + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_ASYNC_CANCEL); } static inline bool @@ -406,7 +424,8 @@ static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) return true; - return obj->pin_global; /* currently in use by HW, keep flushed */ + /* Currently in use by HW (display engine)? Keep flushed. */ + return i915_gem_object_is_framebuffer(obj); } static inline void __start_cpu_write(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c index 6415f9a17e2d..5bd8de124d74 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c @@ -32,7 +32,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, count = div_u64(vma->size, block_size); size = (1 + 8 * count) * sizeof(u32); size = round_up(size, PAGE_SIZE); - pool = intel_engine_pool_get(&ce->engine->pool, size); + pool = intel_engine_get_pool(ce->engine, size); if (IS_ERR(pool)) { err = PTR_ERR(pool); goto out_pm; @@ -216,7 +216,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, count = div_u64(dst->size, block_size); size = (1 + 11 * count) * sizeof(u32); size = round_up(size, PAGE_SIZE); - pool = intel_engine_pool_get(&ce->engine->pool, size); + pool = intel_engine_get_pool(ce->engine, size); if (IS_ERR(pool)) { err = PTR_ERR(pool); goto out_pm; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index ede0eb4218a8..c00b4f077f9e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -8,6 +8,7 @@ #define __I915_GEM_OBJECT_TYPES_H__ #include <drm/drm_gem.h> +#include <uapi/drm/i915_drm.h> #include "i915_active.h" #include "i915_selftest.h" @@ -32,7 +33,8 @@ struct drm_i915_gem_object_ops { #define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0) #define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) #define I915_GEM_OBJECT_IS_PROXY BIT(2) -#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(3) +#define I915_GEM_OBJECT_NO_GGTT BIT(3) +#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(4) /* Interface between the GEM object and its backing storage. * get_pages() is called once prior to the use of the associated set @@ -152,17 +154,15 @@ struct drm_i915_gem_object { /** Count of VMA actually bound by this object */ atomic_t bind_count; - /** Count of how many global VMA are currently pinned for use by HW */ - unsigned int pin_global; struct { struct mutex lock; /* protects the pages and their use */ atomic_t pages_pin_count; + atomic_t shrink_pin; struct sg_table *pages; void *mapping; - /* TODO: whack some of this into the error state */ struct i915_page_sizes { /** * The sg mask of the pages sg_table. i.e the mask of diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 18f0ce0135c1..2e941f093a20 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -71,6 +71,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, list = &i915->mm.shrink_list; list_add_tail(&obj->mm.link, list); + atomic_set(&obj->mm.shrink_pin, 0); spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 92e53c25424c..7987b54fb1f5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -7,79 +7,9 @@ #include "gem/i915_gem_pm.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_gt_requests.h" #include "i915_drv.h" -#include "i915_globals.h" - -static void call_idle_barriers(struct intel_engine_cs *engine) -{ - struct llist_node *node, *next; - - llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) { - struct i915_active_request *active = - container_of((struct list_head *)node, - typeof(*active), link); - - INIT_LIST_HEAD(&active->link); - RCU_INIT_POINTER(active->request, NULL); - - active->retire(active, NULL); - } -} - -static void i915_gem_park(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - lockdep_assert_held(&i915->drm.struct_mutex); - - for_each_engine(engine, i915, id) - call_idle_barriers(engine); /* cleanup after wedging */ - - i915_vma_parked(i915); - - i915_globals_park(); -} - -static void idle_work_handler(struct work_struct *work) -{ - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), gem.idle_work); - bool park; - - cancel_delayed_work_sync(&i915->gem.retire_work); - mutex_lock(&i915->drm.struct_mutex); - - intel_wakeref_lock(&i915->gt.wakeref); - park = (!intel_wakeref_is_active(&i915->gt.wakeref) && - !work_pending(work)); - intel_wakeref_unlock(&i915->gt.wakeref); - if (park) - i915_gem_park(i915); - else - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); - - mutex_unlock(&i915->drm.struct_mutex); -} - -static void retire_work_handler(struct work_struct *work) -{ - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), gem.retire_work.work); - - /* Come back later if the device is busy... */ - if (mutex_trylock(&i915->drm.struct_mutex)) { - i915_retire_requests(i915); - mutex_unlock(&i915->drm.struct_mutex); - } - - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); -} static int pm_notifier(struct notifier_block *nb, unsigned long action, @@ -90,14 +20,10 @@ static int pm_notifier(struct notifier_block *nb, switch (action) { case INTEL_GT_UNPARK: - i915_globals_unpark(); - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); break; case INTEL_GT_PARK: - queue_work(i915->wq, &i915->gem.idle_work); + i915_vma_parked(i915); break; } @@ -108,26 +34,21 @@ static bool switch_to_kernel_context_sync(struct intel_gt *gt) { bool result = !intel_gt_is_wedged(gt); - do { - if (i915_gem_wait_for_idle(gt->i915, - I915_WAIT_LOCKED | - I915_WAIT_FOR_IDLE_BOOST, - I915_GEM_IDLE_TIMEOUT) == -ETIME) { - /* XXX hide warning from gem_eio */ - if (i915_modparams.reset) { - dev_err(gt->i915->drm.dev, - "Failed to idle engines, declaring wedged!\n"); - GEM_TRACE_DUMP(); - } - - /* - * Forcibly cancel outstanding work and leave - * the gpu quiet. - */ - intel_gt_set_wedged(gt); - result = false; + if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) { + /* XXX hide warning from gem_eio */ + if (i915_modparams.reset) { + dev_err(gt->i915->drm.dev, + "Failed to idle engines, declaring wedged!\n"); + GEM_TRACE_DUMP(); } - } while (i915_retire_requests(gt->i915) && result); + + /* + * Forcibly cancel outstanding work and leave + * the gpu quiet. + */ + intel_gt_set_wedged(gt); + result = false; + } if (intel_gt_pm_wait_for_idle(gt)) result = false; @@ -140,6 +61,24 @@ bool i915_gem_load_power_context(struct drm_i915_private *i915) return switch_to_kernel_context_sync(&i915->gt); } +static void user_forcewake(struct intel_gt *gt, bool suspend) +{ + int count = atomic_read(>->user_wakeref); + + /* Inside suspend/resume so single threaded, no races to worry about. */ + if (likely(!count)) + return; + + intel_gt_pm_get(gt); + if (suspend) { + GEM_BUG_ON(count > atomic_read(>->wakeref.count)); + atomic_sub(count, >->wakeref.count); + } else { + atomic_add(count, >->wakeref.count); + } + intel_gt_pm_put(gt); +} + void i915_gem_suspend(struct drm_i915_private *i915) { GEM_TRACE("\n"); @@ -147,7 +86,7 @@ void i915_gem_suspend(struct drm_i915_private *i915) intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0); flush_workqueue(i915->wq); - mutex_lock(&i915->drm.struct_mutex); + user_forcewake(&i915->gt, true); /* * We have to flush all the executing contexts to main memory so @@ -158,15 +97,12 @@ void i915_gem_suspend(struct drm_i915_private *i915) * state. Fortunately, the kernel_context is disposable and we do * not rely on its state. */ - switch_to_kernel_context_sync(&i915->gt); - - mutex_unlock(&i915->drm.struct_mutex); + intel_gt_suspend(&i915->gt); + intel_uc_suspend(&i915->gt.uc); cancel_delayed_work_sync(&i915->gt.hangcheck.work); i915_gem_drain_freed_objects(i915); - - intel_uc_suspend(&i915->gt.uc); } static struct drm_i915_gem_object *first_mm_object(struct list_head *list) @@ -238,13 +174,9 @@ void i915_gem_resume(struct drm_i915_private *i915) { GEM_TRACE("\n"); - mutex_lock(&i915->drm.struct_mutex); intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); - i915_gem_restore_gtt_mappings(i915); - i915_gem_restore_fences(i915); - - if (i915_gem_init_hw(i915)) + if (intel_gt_init_hw(&i915->gt)) goto err_wedged; /* @@ -261,9 +193,10 @@ void i915_gem_resume(struct drm_i915_private *i915) if (!i915_gem_load_power_context(i915)) goto err_wedged; + user_forcewake(&i915->gt, false); + out_unlock: intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); - mutex_unlock(&i915->drm.struct_mutex); return; err_wedged: @@ -277,9 +210,6 @@ err_wedged: void i915_gem_init__pm(struct drm_i915_private *i915) { - INIT_WORK(&i915->gem.idle_work, idle_work_handler); - INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler); - i915->gem.pm_notifier.notifier_call = pm_notifier; blocking_notifier_chain_register(&i915->gt.pm_notifications, &i915->gem.pm_notifier); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index edd21d14e64f..fd3ce6da8497 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -16,40 +16,6 @@ #include "i915_trace.h" -static bool shrinker_lock(struct drm_i915_private *i915, - unsigned int flags, - bool *unlock) -{ - struct mutex *m = &i915->drm.struct_mutex; - - switch (mutex_trylock_recursive(m)) { - case MUTEX_TRYLOCK_RECURSIVE: - *unlock = false; - return true; - - case MUTEX_TRYLOCK_FAILED: - *unlock = false; - if (flags & I915_SHRINK_ACTIVE && - mutex_lock_killable_nested(m, I915_MM_SHRINKER) == 0) - *unlock = true; - return *unlock; - - case MUTEX_TRYLOCK_SUCCESS: - *unlock = true; - return true; - } - - BUG(); -} - -static void shrinker_unlock(struct drm_i915_private *i915, bool unlock) -{ - if (!unlock) - return; - - mutex_unlock(&i915->drm.struct_mutex); -} - static bool swap_available(void) { return get_nr_swap_pages() > 0; @@ -61,7 +27,8 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) if (!i915_gem_object_is_shrinkable(obj)) return false; - /* Only report true if by unbinding the object and putting its pages + /* + * Only report true if by unbinding the object and putting its pages * we can actually make forward progress towards freeing physical * pages. * @@ -72,16 +39,8 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) if (atomic_read(&obj->mm.pages_pin_count) > atomic_read(&obj->bind_count)) return false; - /* If any vma are "permanently" pinned, it will prevent us from - * reclaiming the obj->mm.pages. We only allow scanout objects to claim - * a permanent pin, along with a few others like the context objects. - * To simplify the scan, and to avoid walking the list of vma under the - * object, we just check the count of its permanently pinned. - */ - if (READ_ONCE(obj->pin_global)) - return false; - - /* We can only return physical pages to the system if we can either + /* + * We can only return physical pages to the system if we can either * discard the contents (because the user has marked them as being * purgeable) or if we can move their contents out to swap. */ @@ -162,10 +121,6 @@ i915_gem_shrink(struct drm_i915_private *i915, intel_wakeref_t wakeref = 0; unsigned long count = 0; unsigned long scanned = 0; - bool unlock; - - if (!shrinker_lock(i915, shrink, &unlock)) - return 0; /* * When shrinking the active list, we should also consider active @@ -275,8 +230,6 @@ i915_gem_shrink(struct drm_i915_private *i915, if (shrink & I915_SHRINK_BOUND) intel_runtime_pm_put(&i915->runtime_pm, wakeref); - shrinker_unlock(i915, unlock); - if (nr_scanned) *nr_scanned += scanned; return count; @@ -346,19 +299,14 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) struct drm_i915_private *i915 = container_of(shrinker, struct drm_i915_private, mm.shrinker); unsigned long freed; - bool unlock; sc->nr_scanned = 0; - if (!shrinker_lock(i915, 0, &unlock)) - return SHRINK_STOP; - freed = i915_gem_shrink(i915, sc->nr_to_scan, &sc->nr_scanned, I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_WRITEBACK); + I915_SHRINK_UNBOUND); if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) { intel_wakeref_t wakeref; @@ -373,8 +321,6 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) } } - shrinker_unlock(i915, unlock); - return sc->nr_scanned ? freed : SHRINK_STOP; } @@ -391,6 +337,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) freed_pages = 0; with_intel_runtime_pm(&i915->runtime_pm, wakeref) freed_pages += i915_gem_shrink(i915, -1UL, NULL, + I915_SHRINK_ACTIVE | I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_WRITEBACK); @@ -426,10 +373,6 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr struct i915_vma *vma, *next; unsigned long freed_pages = 0; intel_wakeref_t wakeref; - bool unlock; - - if (!shrinker_lock(i915, 0, &unlock)) - return NOTIFY_DONE; with_intel_runtime_pm(&i915->runtime_pm, wakeref) freed_pages += i915_gem_shrink(i915, -1UL, NULL, @@ -446,15 +389,11 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr if (!vma->iomap || i915_vma_is_active(vma)) continue; - mutex_unlock(&i915->ggtt.vm.mutex); - if (i915_vma_unbind(vma) == 0) + if (__i915_vma_unbind(vma) == 0) freed_pages += count; - mutex_lock(&i915->ggtt.vm.mutex); } mutex_unlock(&i915->ggtt.vm.mutex); - shrinker_unlock(i915, unlock); - *(unsigned long *)ptr += freed_pages; return NOTIFY_DONE; } @@ -497,22 +436,9 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, fs_reclaim_acquire(GFP_KERNEL); - /* - * As we invariably rely on the struct_mutex within the shrinker, - * but have a complicated recursion dance, taint all the mutexes used - * within the shrinker with the struct_mutex. For completeness, we - * taint with all subclass of struct_mutex, even though we should - * only need tainting by I915_MM_NORMAL to catch possible ABBA - * deadlocks from using struct_mutex inside @mutex. - */ - mutex_acquire(&i915->drm.struct_mutex.dep_map, - I915_MM_SHRINKER, 0, _RET_IP_); - mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_); mutex_release(&mutex->dep_map, 0, _RET_IP_); - mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_); - fs_reclaim_release(GFP_KERNEL); if (unlock) @@ -523,46 +449,52 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj) { + struct drm_i915_private *i915 = obj_to_i915(obj); + unsigned long flags; + /* * We can only be called while the pages are pinned or when * the pages are released. If pinned, we should only be called * from a single caller under controlled conditions; and on release * only one caller may release us. Neither the two may cross. */ - if (!list_empty(&obj->mm.link)) { /* pinned by caller */ - struct drm_i915_private *i915 = obj_to_i915(obj); - unsigned long flags; - - spin_lock_irqsave(&i915->mm.obj_lock, flags); - GEM_BUG_ON(list_empty(&obj->mm.link)); + if (atomic_add_unless(&obj->mm.shrink_pin, 1, 0)) + return; + spin_lock_irqsave(&i915->mm.obj_lock, flags); + if (!atomic_fetch_inc(&obj->mm.shrink_pin) && + !list_empty(&obj->mm.link)) { list_del_init(&obj->mm.link); i915->mm.shrink_count--; i915->mm.shrink_memory -= obj->base.size; - - spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } + spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj, struct list_head *head) { + struct drm_i915_private *i915 = obj_to_i915(obj); + unsigned long flags; + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); - GEM_BUG_ON(!list_empty(&obj->mm.link)); + if (!i915_gem_object_is_shrinkable(obj)) + return; - if (i915_gem_object_is_shrinkable(obj)) { - struct drm_i915_private *i915 = obj_to_i915(obj); - unsigned long flags; + if (atomic_add_unless(&obj->mm.shrink_pin, -1, 1)) + return; - spin_lock_irqsave(&i915->mm.obj_lock, flags); - GEM_BUG_ON(!kref_read(&obj->base.refcount)); + spin_lock_irqsave(&i915->mm.obj_lock, flags); + GEM_BUG_ON(!kref_read(&obj->base.refcount)); + if (atomic_dec_and_test(&obj->mm.shrink_pin)) { + GEM_BUG_ON(!list_empty(&obj->mm.link)); list_add_tail(&obj->mm.link, head); i915->mm.shrink_count++; i915->mm.shrink_memory += obj->base.size; - spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } + spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index aa533b4ab5f5..c76260ce13e3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -425,8 +425,11 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) bdw_get_stolen_reserved(dev_priv, &reserved_base, &reserved_size); break; - case 11: default: + MISSING_CASE(INTEL_GEN(dev_priv)); + /* fall-through */ + case 11: + case 12: icl_get_stolen_reserved(dev_priv, &reserved_base, &reserved_size); break; @@ -550,10 +553,11 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, { struct drm_i915_gem_object *obj; unsigned int cache_level; + int err = -ENOMEM; obj = i915_gem_object_alloc(); - if (obj == NULL) - return NULL; + if (!obj) + goto err; drm_gem_private_object_init(&dev_priv->drm, &obj->base, stolen->size); i915_gem_object_init(obj, &i915_gem_object_stolen_ops); @@ -563,14 +567,16 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE; i915_gem_object_set_cache_coherency(obj, cache_level); - if (i915_gem_object_pin_pages(obj)) + err = i915_gem_object_pin_pages(obj); + if (err) goto cleanup; return obj; cleanup: i915_gem_object_free(obj); - return NULL; +err: + return ERR_PTR(err); } struct drm_i915_gem_object * @@ -582,28 +588,32 @@ i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, int ret; if (!drm_mm_initialized(&dev_priv->mm.stolen)) - return NULL; + return ERR_PTR(-ENODEV); if (size == 0) - return NULL; + return ERR_PTR(-EINVAL); stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); if (!stolen) - return NULL; + return ERR_PTR(-ENOMEM); ret = i915_gem_stolen_insert_node(dev_priv, stolen, size, 4096); if (ret) { - kfree(stolen); - return NULL; + obj = ERR_PTR(ret); + goto err_free; } obj = _i915_gem_object_create_stolen(dev_priv, stolen); - if (obj) - return obj; + if (IS_ERR(obj)) + goto err_remove; + + return obj; +err_remove: i915_gem_stolen_remove_node(dev_priv, stolen); +err_free: kfree(stolen); - return NULL; + return obj; } struct drm_i915_gem_object * @@ -619,9 +629,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv int ret; if (!drm_mm_initialized(&dev_priv->mm.stolen)) - return NULL; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); + return ERR_PTR(-ENODEV); DRM_DEBUG_DRIVER("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n", &stolen_offset, >t_offset, &size); @@ -630,11 +638,11 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv if (WARN_ON(size == 0) || WARN_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)) || WARN_ON(!IS_ALIGNED(stolen_offset, I915_GTT_MIN_ALIGNMENT))) - return NULL; + return ERR_PTR(-EINVAL); stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); if (!stolen) - return NULL; + return ERR_PTR(-ENOMEM); stolen->start = stolen_offset; stolen->size = size; @@ -644,15 +652,15 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv if (ret) { DRM_DEBUG_DRIVER("failed to allocate stolen space\n"); kfree(stolen); - return NULL; + return ERR_PTR(ret); } obj = _i915_gem_object_create_stolen(dev_priv, stolen); - if (obj == NULL) { + if (IS_ERR(obj)) { DRM_DEBUG_DRIVER("failed to allocate stolen object\n"); i915_gem_stolen_remove_node(dev_priv, stolen); kfree(stolen); - return NULL; + return obj; } /* Some objects just need physical mem from stolen space */ @@ -674,22 +682,26 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv * setting up the GTT space. The actual reservation will occur * later. */ + mutex_lock(&ggtt->vm.mutex); ret = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, size, gtt_offset, obj->cache_level, 0); if (ret) { DRM_DEBUG_DRIVER("failed to allocate stolen GTT space\n"); + mutex_unlock(&ggtt->vm.mutex); goto err_pages; } GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + GEM_BUG_ON(vma->pages); vma->pages = obj->mm.pages; - vma->flags |= I915_VMA_GLOBAL_BIND; + atomic_set(&vma->pages_count, I915_VMA_PAGES_ACTIVE); + + set_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma)); __i915_vma_set_map_and_fenceable(vma); - mutex_lock(&ggtt->vm.mutex); - list_move_tail(&vma->vm_link, &ggtt->vm.bound_list); + list_add_tail(&vma->vm_link, &ggtt->vm.bound_list); mutex_unlock(&ggtt->vm.mutex); GEM_BUG_ON(i915_gem_object_is_shrinkable(obj)); @@ -701,5 +713,5 @@ err_pages: i915_gem_object_unpin_pages(obj); err: i915_gem_object_put(obj); - return NULL; + return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c index 1e372420771b..540ef0551789 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c @@ -50,10 +50,8 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data, if (time_after_eq(request->emitted_jiffies, recent_enough)) break; - if (target) { + if (target && xchg(&target->file_priv, NULL)) list_del(&target->client_link); - target->file_priv = NULL; - } target = request; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c index ca0c2f451742..dc2a83ce44d5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -181,22 +181,25 @@ static int i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, int tiling_mode, unsigned int stride) { + struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt; struct i915_vma *vma; - int ret; + int ret = 0; if (tiling_mode == I915_TILING_NONE) return 0; + mutex_lock(&ggtt->vm.mutex); for_each_ggtt_vma(vma, obj) { if (i915_vma_fence_prepare(vma, tiling_mode, stride)) continue; - ret = i915_vma_unbind(vma); + ret = __i915_vma_unbind(vma); if (ret) - return ret; + break; } + mutex_unlock(&ggtt->vm.mutex); - return 0; + return ret; } int @@ -212,7 +215,6 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, GEM_BUG_ON(!i915_tiling_ok(obj, tiling, stride)); GEM_BUG_ON(!stride ^ (tiling == I915_TILING_NONE)); - lockdep_assert_held(&i915->drm.struct_mutex); if ((tiling | stride) == obj->tiling_and_stride) return 0; @@ -233,16 +235,18 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, * whilst executing a fenced command for an untiled object. */ - err = i915_gem_object_fence_prepare(obj, tiling, stride); - if (err) - return err; - i915_gem_object_lock(obj); if (i915_gem_object_is_framebuffer(obj)) { i915_gem_object_unlock(obj); return -EBUSY; } + err = i915_gem_object_fence_prepare(obj, tiling, stride); + if (err) { + i915_gem_object_unlock(obj); + return err; + } + /* If the memory has unknown (i.e. varying) swizzling, we pin the * pages to prevent them being swapped out and causing corruption * due to the change in swizzling. @@ -313,10 +317,14 @@ int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_set_tiling *args = data; struct drm_i915_gem_object *obj; int err; + if (!dev_priv->ggtt.num_fences) + return -EOPNOTSUPP; + obj = i915_gem_object_lookup(file, args->handle); if (!obj) return -ENOENT; @@ -364,12 +372,7 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, } } - err = mutex_lock_interruptible(&dev->struct_mutex); - if (err) - goto err; - err = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride); - mutex_unlock(&dev->struct_mutex); /* We have to maintain this existing ABI... */ args->stride = i915_gem_object_get_stride(obj); @@ -402,6 +405,9 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_object *obj; int err = -ENOENT; + if (!dev_priv->ggtt.num_fences) + return -EOPNOTSUPP; + rcu_read_lock(); obj = i915_gem_object_lookup_rcu(file, args->handle); if (obj) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 11b231c187c5..4f970474013f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -92,7 +92,6 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn); struct interval_tree_node *it; - struct mutex *unlock = NULL; unsigned long end; int ret = 0; @@ -129,33 +128,13 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, } spin_unlock(&mn->lock); - if (!unlock) { - unlock = &mn->mm->i915->drm.struct_mutex; - - switch (mutex_trylock_recursive(unlock)) { - default: - case MUTEX_TRYLOCK_FAILED: - if (mutex_lock_killable_nested(unlock, I915_MM_SHRINKER)) { - i915_gem_object_put(obj); - return -EINTR; - } - /* fall through */ - case MUTEX_TRYLOCK_SUCCESS: - break; - - case MUTEX_TRYLOCK_RECURSIVE: - unlock = ERR_PTR(-EEXIST); - break; - } - } - ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); if (ret == 0) ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); i915_gem_object_put(obj); if (ret) - goto unlock; + return ret; spin_lock(&mn->lock); @@ -168,10 +147,6 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, } spin_unlock(&mn->lock); -unlock: - if (!IS_ERR_OR_NULL(unlock)) - mutex_unlock(unlock); - return ret; } @@ -702,6 +677,7 @@ i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj) static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_IS_SHRINKABLE | + I915_GEM_OBJECT_NO_GGTT | I915_GEM_OBJECT_ASYNC_CANCEL, .get_pages = i915_gem_userptr_get_pages, .put_pages = i915_gem_userptr_put_pages, @@ -782,7 +758,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev, * On almost all of the older hw, we cannot tell the GPU that * a page is readonly. */ - vm = dev_priv->kernel_context->vm; + vm = rcu_dereference_protected(dev_priv->kernel_context->vm, + true); /* static vm */ if (!vm || !vm->has_read_only) return -ENODEV; } diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 8de83c6d81f5..e42abddd4a36 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -333,7 +333,12 @@ static int igt_check_page_sizes(struct i915_vma *vma) struct drm_i915_private *i915 = vma->vm->i915; unsigned int supported = INTEL_INFO(i915)->page_sizes; struct drm_i915_gem_object *obj = vma->obj; - int err = 0; + int err; + + /* We have to wait for the async bind to complete before our asserts */ + err = i915_vma_sync(vma); + if (err) + return err; if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) { pr_err("unsupported page_sizes.sg=%u, supported=%u\n", @@ -879,9 +884,8 @@ out_object_put: return err; } -static int gpu_write(struct i915_vma *vma, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, +static int gpu_write(struct intel_context *ce, + struct i915_vma *vma, u32 dw, u32 val) { @@ -893,7 +897,7 @@ static int gpu_write(struct i915_vma *vma, if (err) return err; - return igt_gpu_fill_dw(vma, ctx, engine, dw * sizeof(u32), + return igt_gpu_fill_dw(ce, vma, dw * sizeof(u32), vma->size >> PAGE_SHIFT, val); } @@ -929,18 +933,16 @@ static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) return err; } -static int __igt_write_huge(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, +static int __igt_write_huge(struct intel_context *ce, struct drm_i915_gem_object *obj, u64 size, u64 offset, u32 dword, u32 val) { - struct i915_address_space *vm = ctx->vm ?: &engine->gt->ggtt->vm; unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; struct i915_vma *vma; int err; - vma = i915_vma_instance(obj, vm, NULL); + vma = i915_vma_instance(obj, ce->vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -954,7 +956,7 @@ static int __igt_write_huge(struct i915_gem_context *ctx, * The ggtt may have some pages reserved so * refrain from erroring out. */ - if (err == -ENOSPC && i915_is_ggtt(vm)) + if (err == -ENOSPC && i915_is_ggtt(ce->vm)) err = 0; goto out_vma_close; @@ -964,7 +966,7 @@ static int __igt_write_huge(struct i915_gem_context *ctx, if (err) goto out_vma_unpin; - err = gpu_write(vma, ctx, engine, dword, val); + err = gpu_write(ce, vma, dword, val); if (err) { pr_err("gpu-write failed at offset=%llx\n", offset); goto out_vma_unpin; @@ -987,14 +989,13 @@ out_vma_close: static int igt_write_huge(struct i915_gem_context *ctx, struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; - static struct intel_engine_cs *engines[I915_NUM_ENGINES]; - struct intel_engine_cs *engine; + struct i915_gem_engines *engines; + struct i915_gem_engines_iter it; + struct intel_context *ce; I915_RND_STATE(prng); IGT_TIMEOUT(end_time); unsigned int max_page_size; - unsigned int id; + unsigned int count; u64 max; u64 num; u64 size; @@ -1008,19 +1009,18 @@ static int igt_write_huge(struct i915_gem_context *ctx, if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) size = round_up(size, I915_GTT_PAGE_SIZE_2M); - max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg); - max = div_u64((vm->total - size), max_page_size); - n = 0; - for_each_engine(engine, i915, id) { - if (!intel_engine_can_store_dword(engine)) { - pr_info("store-dword-imm not supported on engine=%u\n", - id); + count = 0; + max = U64_MAX; + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + count++; + if (!intel_engine_can_store_dword(ce->engine)) continue; - } - engines[n++] = engine; - } + max = min(max, ce->vm->total); + n++; + } + i915_gem_context_unlock_engines(ctx); if (!n) return 0; @@ -1029,23 +1029,30 @@ static int igt_write_huge(struct i915_gem_context *ctx, * randomized order, lets also make feeding to the same engine a few * times in succession a possibility by enlarging the permutation array. */ - order = i915_random_order(n * I915_NUM_ENGINES, &prng); + order = i915_random_order(count * count, &prng); if (!order) return -ENOMEM; + max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg); + max = div_u64(max - size, max_page_size); + /* * Try various offsets in an ascending/descending fashion until we * timeout -- we want to avoid issues hidden by effectively always using * offset = 0. */ i = 0; + engines = i915_gem_context_lock_engines(ctx); for_each_prime_number_from(num, 0, max) { u64 offset_low = num * max_page_size; u64 offset_high = (max - num) * max_page_size; u32 dword = offset_in_page(num) / 4; + struct intel_context *ce; - engine = engines[order[i] % n]; - i = (i + 1) % (n * I915_NUM_ENGINES); + ce = engines->engines[order[i] % engines->num_engines]; + i = (i + 1) % (count * count); + if (!ce || !intel_engine_can_store_dword(ce->engine)) + continue; /* * In order to utilize 64K pages we need to both pad the vma @@ -1057,22 +1064,23 @@ static int igt_write_huge(struct i915_gem_context *ctx, offset_low = round_down(offset_low, I915_GTT_PAGE_SIZE_2M); - err = __igt_write_huge(ctx, engine, obj, size, offset_low, + err = __igt_write_huge(ce, obj, size, offset_low, dword, num + 1); if (err) break; - err = __igt_write_huge(ctx, engine, obj, size, offset_high, + err = __igt_write_huge(ce, obj, size, offset_high, dword, num + 1); if (err) break; if (igt_timeout(end_time, - "%s timed out on engine=%u, offset_low=%llx offset_high=%llx, max_page_size=%x\n", - __func__, engine->id, offset_low, offset_high, + "%s timed out on %s, offset_low=%llx offset_high=%llx, max_page_size=%x\n", + __func__, ce->engine->name, offset_low, offset_high, max_page_size)) break; } + i915_gem_context_unlock_engines(ctx); kfree(order); @@ -1314,15 +1322,15 @@ static int igt_ppgtt_pin_update(void *arg) struct i915_gem_context *ctx = arg; struct drm_i915_private *dev_priv = ctx->i915; unsigned long supported = INTEL_INFO(dev_priv)->page_sizes; - struct i915_address_space *vm = ctx->vm; struct drm_i915_gem_object *obj; + struct i915_gem_engines_iter it; + struct i915_address_space *vm; + struct intel_context *ce; struct i915_vma *vma; unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; - struct intel_engine_cs *engine; - enum intel_engine_id id; unsigned int n; int first, last; - int err; + int err = 0; /* * Make sure there's no funny business when doing a PIN_UPDATE -- in the @@ -1332,9 +1340,10 @@ static int igt_ppgtt_pin_update(void *arg) * huge-gtt-pages. */ - if (!vm || !i915_vm_is_4lvl(vm)) { + vm = i915_gem_context_get_vm_rcu(ctx); + if (!i915_vm_is_4lvl(vm)) { pr_info("48b PPGTT not supported, skipping\n"); - return 0; + goto out_vm; } first = ilog2(I915_GTT_PAGE_SIZE_64K); @@ -1387,7 +1396,7 @@ static int igt_ppgtt_pin_update(void *arg) goto out_unpin; } - err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE); + err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE, NULL); if (err) goto out_unpin; @@ -1419,14 +1428,18 @@ static int igt_ppgtt_pin_update(void *arg) */ n = 0; - for_each_engine(engine, dev_priv, id) { - if (!intel_engine_can_store_dword(engine)) + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + if (!intel_engine_can_store_dword(ce->engine)) continue; - err = gpu_write(vma, ctx, engine, n++, 0xdeadbeaf); + err = gpu_write(ce, vma, n++, 0xdeadbeaf); if (err) - goto out_unpin; + break; } + i915_gem_context_unlock_engines(ctx); + if (err) + goto out_unpin; + while (n--) { err = cpu_check(obj, n, 0xdeadbeaf); if (err) @@ -1439,6 +1452,8 @@ out_close: i915_vma_close(vma); out_put: i915_gem_object_put(obj); +out_vm: + i915_vm_put(vm); return err; } @@ -1448,7 +1463,7 @@ static int igt_tmpfs_fallback(void *arg) struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; struct vfsmount *gemfs = i915->mm.gemfs; - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; + struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx); struct drm_i915_gem_object *obj; struct i915_vma *vma; u32 *vaddr; @@ -1498,6 +1513,7 @@ out_put: out_restore: i915->mm.gemfs = gemfs; + i915_vm_put(vm); return err; } @@ -1505,14 +1521,14 @@ static int igt_shrink_thp(void *arg) { struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; + struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx); struct drm_i915_gem_object *obj; - struct intel_engine_cs *engine; - enum intel_engine_id id; + struct i915_gem_engines_iter it; + struct intel_context *ce; struct i915_vma *vma; unsigned int flags = PIN_USER; unsigned int n; - int err; + int err = 0; /* * Sanity check shrinking huge-paged object -- make sure nothing blows @@ -1521,12 +1537,14 @@ static int igt_shrink_thp(void *arg) if (!igt_can_allocate_thp(i915)) { pr_info("missing THP support, skipping\n"); - return 0; + goto out_vm; } obj = i915_gem_object_create_shmem(i915, SZ_2M); - if (IS_ERR(obj)) - return PTR_ERR(obj); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_vm; + } vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { @@ -1548,16 +1566,19 @@ static int igt_shrink_thp(void *arg) goto out_unpin; n = 0; - for_each_engine(engine, i915, id) { - if (!intel_engine_can_store_dword(engine)) + + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + if (!intel_engine_can_store_dword(ce->engine)) continue; - err = gpu_write(vma, ctx, engine, n++, 0xdeadbeaf); + err = gpu_write(ce, vma, n++, 0xdeadbeaf); if (err) - goto out_unpin; + break; } - + i915_gem_context_unlock_engines(ctx); i915_vma_unpin(vma); + if (err) + goto out_close; /* * Now that the pages are *unpinned* shrink-all should invoke @@ -1583,16 +1604,17 @@ static int igt_shrink_thp(void *arg) while (n--) { err = cpu_check(obj, n, 0xdeadbeaf); if (err) - goto out_unpin; + break; } - out_unpin: i915_vma_unpin(vma); out_close: i915_vma_close(vma); out_put: i915_gem_object_put(obj); +out_vm: + i915_vm_put(vm); return err; } @@ -1617,7 +1639,6 @@ int i915_gem_huge_page_mock_selftests(void) mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL; mkwrite_device_info(dev_priv)->ppgtt_size = 48; - mutex_lock(&dev_priv->drm.struct_mutex); ppgtt = i915_ppgtt_create(dev_priv); if (IS_ERR(ppgtt)) { err = PTR_ERR(ppgtt); @@ -1643,9 +1664,7 @@ out_close: i915_vm_put(&ppgtt->vm); out_unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); drm_dev_put(&dev_priv->drm); - return err; } @@ -1661,7 +1680,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) }; struct drm_file *file; struct i915_gem_context *ctx; - intel_wakeref_t wakeref; + struct i915_address_space *vm; int err; if (!HAS_PPGTT(i915)) { @@ -1676,25 +1695,21 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - ctx = live_context(i915, file); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); - goto out_unlock; + goto out_file; } - if (ctx->vm) - ctx->vm->scrub_64K = true; + mutex_lock(&ctx->mutex); + vm = i915_gem_context_vm(ctx); + if (vm) + WRITE_ONCE(vm->scrub_64K, true); + mutex_unlock(&ctx->mutex); err = i915_subtests(tests, ctx); -out_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - +out_file: mock_file_free(i915, file); - return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 0ff7a89aadca..549810f70aeb 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -7,6 +7,7 @@ #include <linux/prime_numbers.h> #include "gt/intel_gt.h" +#include "gt/intel_gt_pm.h" #include "i915_selftest.h" #include "selftests/i915_random.h" @@ -78,7 +79,7 @@ static int gtt_set(struct drm_i915_gem_object *obj, { struct i915_vma *vma; u32 __iomem *map; - int err; + int err = 0; i915_gem_object_lock(obj); err = i915_gem_object_set_to_gtt_domain(obj, true); @@ -90,15 +91,21 @@ static int gtt_set(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return PTR_ERR(vma); + intel_gt_pm_get(vma->vm->gt); + map = i915_vma_pin_iomap(vma); i915_vma_unpin(vma); - if (IS_ERR(map)) - return PTR_ERR(map); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto out_rpm; + } iowrite32(v, &map[offset / sizeof(*map)]); i915_vma_unpin_iomap(vma); - return 0; +out_rpm: + intel_gt_pm_put(vma->vm->gt); + return err; } static int gtt_get(struct drm_i915_gem_object *obj, @@ -107,7 +114,7 @@ static int gtt_get(struct drm_i915_gem_object *obj, { struct i915_vma *vma; u32 __iomem *map; - int err; + int err = 0; i915_gem_object_lock(obj); err = i915_gem_object_set_to_gtt_domain(obj, false); @@ -119,15 +126,21 @@ static int gtt_get(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return PTR_ERR(vma); + intel_gt_pm_get(vma->vm->gt); + map = i915_vma_pin_iomap(vma); i915_vma_unpin(vma); - if (IS_ERR(map)) - return PTR_ERR(map); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto out_rpm; + } *v = ioread32(&map[offset / sizeof(*map)]); i915_vma_unpin_iomap(vma); - return 0; +out_rpm: + intel_gt_pm_put(vma->vm->gt); + return err; } static int wc_set(struct drm_i915_gem_object *obj, @@ -280,7 +293,6 @@ static int igt_gem_coherency(void *arg) struct drm_i915_private *i915 = arg; const struct igt_coherency_mode *read, *write, *over; struct drm_i915_gem_object *obj; - intel_wakeref_t wakeref; unsigned long count, n; u32 *offsets, *values; int err = 0; @@ -299,8 +311,6 @@ static int igt_gem_coherency(void *arg) values = offsets + ncachelines; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); for (over = igt_coherency_mode; over->name; over++) { if (!over->set) continue; @@ -326,7 +336,7 @@ static int igt_gem_coherency(void *arg) obj = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); - goto unlock; + goto free; } i915_random_reorder(offsets, ncachelines, &prng); @@ -377,15 +387,13 @@ static int igt_gem_coherency(void *arg) } } } -unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); +free: kfree(offsets); return err; put_object: i915_gem_object_put(obj); - goto unlock; + goto free; } int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 3e6f4a65d356..fb58c0919ea1 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -8,6 +8,7 @@ #include "gem/i915_gem_pm.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" #include "gt/intel_reset.h" #include "i915_selftest.h" @@ -52,19 +53,17 @@ static int live_nop_switch(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); if (!ctx) { err = -ENOMEM; - goto out_unlock; + goto out_file; } for (n = 0; n < nctx; n++) { ctx[n] = live_context(i915, file); if (IS_ERR(ctx[n])) { err = PTR_ERR(ctx[n]); - goto out_unlock; + goto out_file; } } @@ -78,7 +77,7 @@ static int live_nop_switch(void *arg) rq = igt_request_alloc(ctx[n], engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto out_unlock; + goto out_file; } i915_request_add(rq); } @@ -86,7 +85,7 @@ static int live_nop_switch(void *arg) pr_err("Failed to populated %d contexts\n", nctx); intel_gt_set_wedged(&i915->gt); err = -EIO; - goto out_unlock; + goto out_file; } times[1] = ktime_get_raw(); @@ -96,7 +95,7 @@ static int live_nop_switch(void *arg) err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) - goto out_unlock; + goto out_file; end_time = jiffies + i915_selftest.timeout_jiffies; for_each_prime_number_from(prime, 2, 8192) { @@ -106,7 +105,7 @@ static int live_nop_switch(void *arg) rq = igt_request_alloc(ctx[n % nctx], engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto out_unlock; + goto out_file; } /* @@ -142,7 +141,7 @@ static int live_nop_switch(void *arg) err = igt_live_test_end(&t); if (err) - goto out_unlock; + goto out_file; pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", engine->name, @@ -150,8 +149,212 @@ static int live_nop_switch(void *arg) prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); } -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); +out_file: + mock_file_free(i915, file); + return err; +} + +struct parallel_switch { + struct task_struct *tsk; + struct intel_context *ce[2]; +}; + +static int __live_parallel_switch1(void *data) +{ + struct parallel_switch *arg = data; + IGT_TIMEOUT(end_time); + unsigned long count; + + count = 0; + do { + struct i915_request *rq = NULL; + int err, n; + + for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { + i915_request_put(rq); + + rq = i915_request_create(arg->ce[n]); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_get(rq); + i915_request_add(rq); + } + + err = 0; + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -ETIME; + i915_request_put(rq); + if (err) + return err; + + count++; + } while (!__igt_timeout(end_time, NULL)); + + pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count); + return 0; +} + +static int __live_parallel_switchN(void *data) +{ + struct parallel_switch *arg = data; + IGT_TIMEOUT(end_time); + unsigned long count; + int n; + + count = 0; + do { + for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { + struct i915_request *rq; + + rq = i915_request_create(arg->ce[n]); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_add(rq); + } + + count++; + } while (!__igt_timeout(end_time, NULL)); + + pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count); + return 0; +} + +static int live_parallel_switch(void *arg) +{ + struct drm_i915_private *i915 = arg; + static int (* const func[])(void *arg) = { + __live_parallel_switch1, + __live_parallel_switchN, + NULL, + }; + struct parallel_switch *data = NULL; + struct i915_gem_engines *engines; + struct i915_gem_engines_iter it; + int (* const *fn)(void *arg); + struct i915_gem_context *ctx; + struct intel_context *ce; + struct drm_file *file; + int n, m, count; + int err = 0; + + /* + * Check we can process switches on all engines simultaneously. + */ + + if (!DRIVER_CAPS(i915)->has_logical_contexts) + return 0; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_file; + } + + engines = i915_gem_context_lock_engines(ctx); + count = engines->num_engines; + + data = kcalloc(count, sizeof(*data), GFP_KERNEL); + if (!data) { + i915_gem_context_unlock_engines(ctx); + err = -ENOMEM; + goto out; + } + + m = 0; /* Use the first context as our template for the engines */ + for_each_gem_engine(ce, engines, it) { + err = intel_context_pin(ce); + if (err) { + i915_gem_context_unlock_engines(ctx); + goto out; + } + data[m++].ce[0] = intel_context_get(ce); + } + i915_gem_context_unlock_engines(ctx); + + /* Clone the same set of engines into the other contexts */ + for (n = 1; n < ARRAY_SIZE(data->ce); n++) { + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out; + } + + for (m = 0; m < count; m++) { + if (!data[m].ce[0]) + continue; + + ce = intel_context_create(ctx, data[m].ce[0]->engine); + if (IS_ERR(ce)) + goto out; + + err = intel_context_pin(ce); + if (err) { + intel_context_put(ce); + goto out; + } + + data[m].ce[n] = ce; + } + } + + for (fn = func; !err && *fn; fn++) { + struct igt_live_test t; + int n; + + err = igt_live_test_begin(&t, i915, __func__, ""); + if (err) + break; + + for (n = 0; n < count; n++) { + if (!data[n].ce[0]) + continue; + + data[n].tsk = kthread_run(*fn, &data[n], + "igt/parallel:%s", + data[n].ce[0]->engine->name); + if (IS_ERR(data[n].tsk)) { + err = PTR_ERR(data[n].tsk); + break; + } + get_task_struct(data[n].tsk); + } + + for (n = 0; n < count; n++) { + int status; + + if (IS_ERR_OR_NULL(data[n].tsk)) + continue; + + status = kthread_stop(data[n].tsk); + if (status && !err) + err = status; + + put_task_struct(data[n].tsk); + data[n].tsk = NULL; + } + + if (igt_live_test_end(&t)) + err = -EIO; + } + +out: + for (n = 0; n < count; n++) { + for (m = 0; m < ARRAY_SIZE(data->ce); m++) { + if (!data[n].ce[m]) + continue; + + intel_context_unpin(data[n].ce[m]); + intel_context_put(data[n].ce[m]); + } + } + kfree(data); +out_file: mock_file_free(i915, file); return err; } @@ -166,28 +369,20 @@ static unsigned long fake_page_count(struct drm_i915_gem_object *obj) return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; } -static int gpu_fill(struct drm_i915_gem_object *obj, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, +static int gpu_fill(struct intel_context *ce, + struct drm_i915_gem_object *obj, unsigned int dw) { - struct i915_address_space *vm = ctx->vm ?: &engine->gt->ggtt->vm; struct i915_vma *vma; int err; - GEM_BUG_ON(obj->base.size > vm->total); - GEM_BUG_ON(!intel_engine_can_store_dword(engine)); + GEM_BUG_ON(obj->base.size > ce->vm->total); + GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); - vma = i915_vma_instance(obj, vm, NULL); + vma = i915_vma_instance(obj, ce->vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_gtt_domain(obj, true); - i915_gem_object_unlock(obj); - if (err) - return err; - err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); if (err) return err; @@ -200,9 +395,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj, * whilst checking that each context provides a unique view * into the object. */ - err = igt_gpu_fill_dw(vma, - ctx, - engine, + err = igt_gpu_fill_dw(ce, vma, (dw * real_page_count(obj)) << PAGE_SHIFT | (dw * sizeof(u32)), real_page_count(obj), @@ -305,22 +498,21 @@ static int file_add_object(struct drm_file *file, } static struct drm_i915_gem_object * -create_test_object(struct i915_gem_context *ctx, +create_test_object(struct i915_address_space *vm, struct drm_file *file, struct list_head *objects) { struct drm_i915_gem_object *obj; - struct i915_address_space *vm = ctx->vm ?: &ctx->i915->ggtt.vm; u64 size; int err; /* Keep in GEM's good graces */ - i915_retire_requests(ctx->i915); + intel_gt_retire_requests(vm->gt); size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); size = round_down(size, DW_PER_PAGE * PAGE_SIZE); - obj = huge_gem_object(ctx->i915, DW_PER_PAGE * PAGE_SIZE, size); + obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size); if (IS_ERR(obj)) return obj; @@ -348,6 +540,45 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj) return npages / DW_PER_PAGE; } +static void throttle_release(struct i915_request **q, int count) +{ + int i; + + for (i = 0; i < count; i++) { + if (IS_ERR_OR_NULL(q[i])) + continue; + + i915_request_put(fetch_and_zero(&q[i])); + } +} + +static int throttle(struct intel_context *ce, + struct i915_request **q, int count) +{ + int i; + + if (!IS_ERR_OR_NULL(q[0])) { + if (i915_request_wait(q[0], + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT) < 0) + return -EINTR; + + i915_request_put(q[0]); + } + + for (i = 0; i < count - 1; i++) + q[i] = q[i + 1]; + + q[i] = intel_context_create_request(ce); + if (IS_ERR(q[i])) + return PTR_ERR(q[i]); + + i915_request_get(q[i]); + i915_request_add(q[i]); + + return 0; +} + static int igt_ctx_exec(void *arg) { struct drm_i915_private *i915 = arg; @@ -367,6 +598,7 @@ static int igt_ctx_exec(void *arg) for_each_engine(engine, i915, id) { struct drm_i915_gem_object *obj = NULL; unsigned long ncontexts, ndwords, dw; + struct i915_request *tq[5] = {}; struct igt_live_test t; struct drm_file *file; IGT_TIMEOUT(end_time); @@ -382,39 +614,53 @@ static int igt_ctx_exec(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) - goto out_unlock; + goto out_file; ncontexts = 0; ndwords = 0; dw = 0; while (!time_after(jiffies, end_time)) { struct i915_gem_context *ctx; + struct intel_context *ce; - ctx = live_context(i915, file); + ctx = kernel_context(i915); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); - goto out_unlock; + goto out_file; } + ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); + GEM_BUG_ON(IS_ERR(ce)); + if (!obj) { - obj = create_test_object(ctx, file, &objects); + obj = create_test_object(ce->vm, file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); - goto out_unlock; + intel_context_put(ce); + kernel_context_close(ctx); + goto out_file; } } - err = gpu_fill(obj, ctx, engine, dw); + err = gpu_fill(ce, obj, dw); if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), - engine->name, ctx->hw_id, - yesno(!!ctx->vm), err); - goto out_unlock; + engine->name, + yesno(!!rcu_access_pointer(ctx->vm)), + err); + intel_context_put(ce); + kernel_context_close(ctx); + goto out_file; + } + + err = throttle(ce, tq, ARRAY_SIZE(tq)); + if (err) { + intel_context_put(ce); + kernel_context_close(ctx); + goto out_file; } if (++dw == max_dwords(obj)) { @@ -424,6 +670,9 @@ static int igt_ctx_exec(void *arg) ndwords++; ncontexts++; + + intel_context_put(ce); + kernel_context_close(ctx); } pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", @@ -441,10 +690,10 @@ static int igt_ctx_exec(void *arg) dw += rem; } -out_unlock: +out_file: + throttle_release(tq, ARRAY_SIZE(tq)); if (igt_live_test_end(&t)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); mock_file_free(i915, file); if (err) @@ -459,6 +708,7 @@ out_unlock: static int igt_shared_ctx_exec(void *arg) { struct drm_i915_private *i915 = arg; + struct i915_request *tq[5] = {}; struct i915_gem_context *parent; struct intel_engine_cs *engine; enum intel_engine_id id; @@ -478,22 +728,20 @@ static int igt_shared_ctx_exec(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - parent = live_context(i915, file); if (IS_ERR(parent)) { err = PTR_ERR(parent); - goto out_unlock; + goto out_file; } if (!parent->vm) { /* not full-ppgtt; nothing to share */ err = 0; - goto out_unlock; + goto out_file; } err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + goto out_file; for_each_engine(engine, i915, id) { unsigned long ncontexts, ndwords, dw; @@ -509,6 +757,7 @@ static int igt_shared_ctx_exec(void *arg) ncontexts = 0; while (!time_after(jiffies, end_time)) { struct i915_gem_context *ctx; + struct intel_context *ce; ctx = kernel_context(i915); if (IS_ERR(ctx)) { @@ -516,23 +765,38 @@ static int igt_shared_ctx_exec(void *arg) goto out_test; } + mutex_lock(&ctx->mutex); __assign_ppgtt(ctx, parent->vm); + mutex_unlock(&ctx->mutex); + + ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); + GEM_BUG_ON(IS_ERR(ce)); if (!obj) { - obj = create_test_object(parent, file, &objects); + obj = create_test_object(parent->vm, file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); + intel_context_put(ce); kernel_context_close(ctx); goto out_test; } } - err = gpu_fill(obj, ctx, engine, dw); + err = gpu_fill(ce, obj, dw); if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), - engine->name, ctx->hw_id, - yesno(!!ctx->vm), err); + engine->name, + yesno(!!rcu_access_pointer(ctx->vm)), + err); + intel_context_put(ce); + kernel_context_close(ctx); + goto out_test; + } + + err = throttle(ce, tq, ARRAY_SIZE(tq)); + if (err) { + intel_context_put(ce); kernel_context_close(ctx); goto out_test; } @@ -545,6 +809,7 @@ static int igt_shared_ctx_exec(void *arg) ndwords++; ncontexts++; + intel_context_put(ce); kernel_context_close(ctx); } pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", @@ -562,16 +827,13 @@ static int igt_shared_ctx_exec(void *arg) dw += rem; } - mutex_unlock(&i915->drm.struct_mutex); i915_gem_drain_freed_objects(i915); - mutex_lock(&i915->drm.struct_mutex); } out_test: + throttle_release(tq, ARRAY_SIZE(tq)); if (igt_live_test_end(&t)) err = -EIO; -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - +out_file: mock_file_free(i915, file); return err; } @@ -604,6 +866,8 @@ static struct i915_vma *rpcs_query_batch(struct i915_vma *vma) __i915_gem_object_flush_map(obj, 0, 64); i915_gem_object_unpin_map(obj); + intel_gt_chipset_flush(vma->vm->gt); + vma = i915_vma_instance(obj, vma->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); @@ -681,10 +945,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, if (err) goto skip_request; - i915_vma_unpin(batch); - i915_vma_close(batch); - i915_vma_put(batch); - + i915_vma_unpin_and_release(&batch, 0); i915_vma_unpin(vma); *rq_out = i915_request_get(rq); @@ -698,8 +959,7 @@ skip_request: err_request: i915_request_add(rq); err_batch: - i915_vma_unpin(batch); - i915_vma_put(batch); + i915_vma_unpin_and_release(&batch, 0); err_vma: i915_vma_unpin(vma); @@ -860,8 +1120,8 @@ out: igt_spinner_end(spin); if ((flags & TEST_IDLE) && ret == 0) { - ret = i915_gem_wait_for_idle(ce->engine->i915, - 0, MAX_SCHEDULE_TIMEOUT); + ret = intel_gt_wait_for_idle(ce->engine->gt, + MAX_SCHEDULE_TIMEOUT); if (ret) return ret; @@ -887,7 +1147,7 @@ __sseu_test(const char *name, if (ret) return ret; - ret = __intel_context_reconfigure_sseu(ce, sseu); + ret = intel_context_reconfigure_sseu(ce, sseu); if (ret) goto out_spin; @@ -945,8 +1205,6 @@ __igt_ctx_sseu(struct drm_i915_private *i915, if (flags & TEST_RESET) igt_global_reset_lock(&i915->gt); - mutex_lock(&i915->drm.struct_mutex); - ctx = live_context(i915, file); if (IS_ERR(ctx)) { ret = PTR_ERR(ctx); @@ -991,7 +1249,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915, goto out_fail; out_fail: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) ret = -EIO; intel_context_unpin(ce); @@ -1001,8 +1259,6 @@ out_put: i915_gem_object_put(obj); out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - if (flags & TEST_RESET) igt_global_reset_unlock(&i915->gt); @@ -1041,6 +1297,7 @@ static int igt_ctx_readonly(void *arg) { struct drm_i915_private *i915 = arg; struct drm_i915_gem_object *obj = NULL; + struct i915_request *tq[5] = {}; struct i915_address_space *vm; struct i915_gem_context *ctx; unsigned long idx, ndwords, dw; @@ -1061,52 +1318,63 @@ static int igt_ctx_readonly(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + goto out_file; ctx = live_context(i915, file); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); - goto out_unlock; + goto out_file; } - vm = ctx->vm ?: &i915->ggtt.alias->vm; + rcu_read_lock(); + vm = rcu_dereference(ctx->vm) ?: &i915->ggtt.alias->vm; if (!vm || !vm->has_read_only) { + rcu_read_unlock(); err = 0; - goto out_unlock; + goto out_file; } + rcu_read_unlock(); ndwords = 0; dw = 0; while (!time_after(jiffies, end_time)) { - struct intel_engine_cs *engine; - unsigned int id; + struct i915_gem_engines_iter it; + struct intel_context *ce; - for_each_engine(engine, i915, id) { - if (!intel_engine_can_store_dword(engine)) + for_each_gem_engine(ce, + i915_gem_context_lock_engines(ctx), it) { + if (!intel_engine_can_store_dword(ce->engine)) continue; if (!obj) { - obj = create_test_object(ctx, file, &objects); + obj = create_test_object(ce->vm, file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); - goto out_unlock; + i915_gem_context_unlock_engines(ctx); + goto out_file; } if (prandom_u32_state(&prng) & 1) i915_gem_object_set_readonly(obj); } - err = gpu_fill(obj, ctx, engine, dw); + err = gpu_fill(ce, obj, dw); if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), - engine->name, ctx->hw_id, - yesno(!!ctx->vm), err); - goto out_unlock; + ce->engine->name, + yesno(!!rcu_access_pointer(ctx->vm)), + err); + i915_gem_context_unlock_engines(ctx); + goto out_file; + } + + err = throttle(ce, tq, ARRAY_SIZE(tq)); + if (err) { + i915_gem_context_unlock_engines(ctx); + goto out_file; } if (++dw == max_dwords(obj)) { @@ -1115,6 +1383,7 @@ static int igt_ctx_readonly(void *arg) } ndwords++; } + i915_gem_context_unlock_engines(ctx); } pr_info("Submitted %lu dwords (across %u engines)\n", ndwords, RUNTIME_INFO(i915)->num_engines); @@ -1137,19 +1406,19 @@ static int igt_ctx_readonly(void *arg) dw += rem; } -out_unlock: +out_file: + throttle_release(tq, ARRAY_SIZE(tq)); if (igt_live_test_end(&t)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); mock_file_free(i915, file); return err; } -static int check_scratch(struct i915_gem_context *ctx, u64 offset) +static int check_scratch(struct i915_address_space *vm, u64 offset) { struct drm_mm_node *node = - __drm_mm_interval_first(&ctx->vm->mm, + __drm_mm_interval_first(&vm->mm, offset, offset + sizeof(u32) - 1); if (!node || node->start > offset) return 0; @@ -1167,6 +1436,7 @@ static int write_to_scratch(struct i915_gem_context *ctx, { struct drm_i915_private *i915 = ctx->i915; struct drm_i915_gem_object *obj; + struct i915_address_space *vm; struct i915_request *rq; struct i915_vma *vma; u32 *cmd; @@ -1197,17 +1467,20 @@ static int write_to_scratch(struct i915_gem_context *ctx, __i915_gem_object_flush_map(obj, 0, 64); i915_gem_object_unpin_map(obj); - vma = i915_vma_instance(obj, ctx->vm, NULL); + intel_gt_chipset_flush(engine->gt); + + vm = i915_gem_context_get_vm_rcu(ctx); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); - goto err; + goto err_vm; } err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); if (err) - goto err; + goto err_vm; - err = check_scratch(ctx, offset); + err = check_scratch(vm, offset); if (err) goto err_unpin; @@ -1229,12 +1502,11 @@ static int write_to_scratch(struct i915_gem_context *ctx, if (err) goto skip_request; - i915_vma_unpin(vma); - i915_vma_close(vma); - i915_vma_put(vma); + i915_vma_unpin_and_release(&vma, 0); i915_request_add(rq); + i915_vm_put(vm); return 0; skip_request: @@ -1243,6 +1515,8 @@ err_request: i915_request_add(rq); err_unpin: i915_vma_unpin(vma); +err_vm: + i915_vm_put(vm); err: i915_gem_object_put(obj); return err; @@ -1254,6 +1528,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, { struct drm_i915_private *i915 = ctx->i915; struct drm_i915_gem_object *obj; + struct i915_address_space *vm; const u32 RCS_GPR0 = 0x2600; /* not all engines have their own GPR! */ const u32 result = 0x100; struct i915_request *rq; @@ -1296,17 +1571,20 @@ static int read_from_scratch(struct i915_gem_context *ctx, i915_gem_object_flush_map(obj); i915_gem_object_unpin_map(obj); - vma = i915_vma_instance(obj, ctx->vm, NULL); + intel_gt_chipset_flush(engine->gt); + + vm = i915_gem_context_get_vm_rcu(ctx); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); - goto err; + goto err_vm; } err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); if (err) - goto err; + goto err_vm; - err = check_scratch(ctx, offset); + err = check_scratch(vm, offset); if (err) goto err_unpin; @@ -1337,12 +1615,12 @@ static int read_from_scratch(struct i915_gem_context *ctx, err = i915_gem_object_set_to_cpu_domain(obj, false); i915_gem_object_unlock(obj); if (err) - goto err; + goto err_vm; cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto err; + goto err_vm; } *value = cmd[result / sizeof(*cmd)]; @@ -1357,6 +1635,8 @@ err_request: i915_request_add(rq); err_unpin: i915_vma_unpin(vma); +err_vm: + i915_vm_put(vm); err: i915_gem_object_put(obj); return err; @@ -1387,27 +1667,25 @@ static int igt_vm_isolation(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + goto out_file; ctx_a = live_context(i915, file); if (IS_ERR(ctx_a)) { err = PTR_ERR(ctx_a); - goto out_unlock; + goto out_file; } ctx_b = live_context(i915, file); if (IS_ERR(ctx_b)) { err = PTR_ERR(ctx_b); - goto out_unlock; + goto out_file; } /* We can only test vm isolation, if the vm are distinct */ if (ctx_a->vm == ctx_b->vm) - goto out_unlock; + goto out_file; vm_total = ctx_a->vm->total; GEM_BUG_ON(ctx_b->vm->total != vm_total); @@ -1436,7 +1714,7 @@ static int igt_vm_isolation(void *arg) err = read_from_scratch(ctx_b, engine, offset, &value); if (err) - goto out_unlock; + goto out_file; if (value) { pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", @@ -1445,7 +1723,7 @@ static int igt_vm_isolation(void *arg) lower_32_bits(offset), this); err = -EINVAL; - goto out_unlock; + goto out_file; } this++; @@ -1455,30 +1733,13 @@ static int igt_vm_isolation(void *arg) pr_info("Checked %lu scratch offsets across %d engines\n", count, RUNTIME_INFO(i915)->num_engines); -out_unlock: +out_file: if (igt_live_test_end(&t)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); - mock_file_free(i915, file); return err; } -static __maybe_unused const char * -__engine_name(struct drm_i915_private *i915, intel_engine_mask_t engines) -{ - struct intel_engine_cs *engine; - intel_engine_mask_t tmp; - - if (engines == ALL_ENGINES) - return "all"; - - for_each_engine_masked(engine, i915, engines, tmp) - return engine->name; - - return "none"; -} - static bool skip_unused_engines(struct intel_context *ce, void *data) { return !ce->state; @@ -1506,13 +1767,9 @@ static int mock_context_barrier(void *arg) * a request; useful for retiring old state after loading new. */ - mutex_lock(&i915->drm.struct_mutex); - ctx = mock_context(i915, "mock"); - if (!ctx) { - err = -ENOMEM; - goto unlock; - } + if (!ctx) + return -ENOMEM; counter = 0; err = context_barrier_task(ctx, 0, @@ -1585,8 +1842,6 @@ static int mock_context_barrier(void *arg) out: mock_context_close(ctx); -unlock: - mutex_unlock(&i915->drm.struct_mutex); return err; #undef pr_fmt #define pr_fmt(x) x @@ -1614,6 +1869,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_nop_switch), + SUBTEST(live_parallel_switch), SUBTEST(igt_ctx_exec), SUBTEST(igt_ctx_readonly), SUBTEST(igt_ctx_sseu), diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 1d27babff0ce..cfa52c525691 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -10,6 +10,7 @@ #include "gt/intel_gt_pm.h" #include "huge_gem_object.h" #include "i915_selftest.h" +#include "selftests/i915_random.h" #include "selftests/igt_flush_test.h" struct tile { @@ -76,18 +77,103 @@ static u64 tiled_offset(const struct tile *tile, u64 v) static int check_partial_mapping(struct drm_i915_gem_object *obj, const struct tile *tile, - unsigned long end_time) + struct rnd_state *prng) { - const unsigned int nreal = obj->scratch / PAGE_SIZE; const unsigned long npages = obj->base.size / PAGE_SIZE; + struct i915_ggtt_view view; struct i915_vma *vma; unsigned long page; + u32 __iomem *io; + struct page *p; + unsigned int n; + u64 offset; + u32 *cpu; int err; - if (igt_timeout(end_time, - "%s: timed out before tiling=%d stride=%d\n", - __func__, tile->tiling, tile->stride)) - return -EINTR; + err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride); + if (err) { + pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n", + tile->tiling, tile->stride, err); + return err; + } + + GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling); + GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride); + + i915_gem_object_lock(obj); + err = i915_gem_object_set_to_gtt_domain(obj, true); + i915_gem_object_unlock(obj); + if (err) { + pr_err("Failed to flush to GTT write domain; err=%d\n", err); + return err; + } + + page = i915_prandom_u32_max_state(npages, prng); + view = compute_partial_view(obj, page, MIN_CHUNK_PAGES); + + vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) { + pr_err("Failed to pin partial view: offset=%lu; err=%d\n", + page, (int)PTR_ERR(vma)); + return PTR_ERR(vma); + } + + n = page - view.partial.offset; + GEM_BUG_ON(n >= view.partial.size); + + io = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(io)) { + pr_err("Failed to iomap partial view: offset=%lu; err=%d\n", + page, (int)PTR_ERR(io)); + err = PTR_ERR(io); + goto out; + } + + iowrite32(page, io + n * PAGE_SIZE / sizeof(*io)); + i915_vma_unpin_iomap(vma); + + offset = tiled_offset(tile, page << PAGE_SHIFT); + if (offset >= obj->base.size) + goto out; + + intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt); + + p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + cpu = kmap(p) + offset_in_page(offset); + drm_clflush_virt_range(cpu, sizeof(*cpu)); + if (*cpu != (u32)page) { + pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n", + page, n, + view.partial.offset, + view.partial.size, + vma->size >> PAGE_SHIFT, + tile->tiling ? tile_row_pages(obj) : 0, + vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride, + offset >> PAGE_SHIFT, + (unsigned int)offset_in_page(offset), + offset, + (u32)page, *cpu); + err = -EINVAL; + } + *cpu = 0; + drm_clflush_virt_range(cpu, sizeof(*cpu)); + kunmap(p); + +out: + i915_vma_destroy(vma); + return err; +} + +static int check_partial_mappings(struct drm_i915_gem_object *obj, + const struct tile *tile, + unsigned long end_time) +{ + const unsigned int nreal = obj->scratch / PAGE_SIZE; + const unsigned long npages = obj->base.size / PAGE_SIZE; + struct i915_vma *vma; + unsigned long page; + int err; err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride); if (err) { @@ -170,11 +256,42 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, return err; i915_vma_destroy(vma); + + if (igt_timeout(end_time, + "%s: timed out after tiling=%d stride=%d\n", + __func__, tile->tiling, tile->stride)) + return -EINTR; } return 0; } +static unsigned int +setup_tile_size(struct tile *tile, struct drm_i915_private *i915) +{ + if (INTEL_GEN(i915) <= 2) { + tile->height = 16; + tile->width = 128; + tile->size = 11; + } else if (tile->tiling == I915_TILING_Y && + HAS_128_BYTE_Y_TILING(i915)) { + tile->height = 32; + tile->width = 128; + tile->size = 12; + } else { + tile->height = 8; + tile->width = 512; + tile->size = 12; + } + + if (INTEL_GEN(i915) < 4) + return 8192 / tile->width; + else if (INTEL_GEN(i915) < 7) + return 128 * I965_FENCE_MAX_PITCH_VAL / tile->width; + else + return 128 * GEN7_FENCE_MAX_PITCH_VAL / tile->width; +} + static int igt_partial_tiling(void *arg) { const unsigned int nreal = 1 << 12; /* largest tile row x2 */ @@ -205,7 +322,6 @@ static int igt_partial_tiling(void *arg) goto out; } - mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(&i915->runtime_pm); if (1) { @@ -219,7 +335,7 @@ static int igt_partial_tiling(void *arg) tile.swizzle = I915_BIT_6_SWIZZLE_NONE; tile.tiling = I915_TILING_NONE; - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err && err != -EINTR) goto out_unlock; } @@ -253,31 +369,11 @@ static int igt_partial_tiling(void *arg) tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17) continue; - if (INTEL_GEN(i915) <= 2) { - tile.height = 16; - tile.width = 128; - tile.size = 11; - } else if (tile.tiling == I915_TILING_Y && - HAS_128_BYTE_Y_TILING(i915)) { - tile.height = 32; - tile.width = 128; - tile.size = 12; - } else { - tile.height = 8; - tile.width = 512; - tile.size = 12; - } - - if (INTEL_GEN(i915) < 4) - max_pitch = 8192 / tile.width; - else if (INTEL_GEN(i915) < 7) - max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width; - else - max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width; + max_pitch = setup_tile_size(&tile, i915); for (pitch = max_pitch; pitch; pitch >>= 1) { tile.stride = tile.width * pitch; - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err == -EINTR) goto next_tiling; if (err) @@ -285,7 +381,7 @@ static int igt_partial_tiling(void *arg) if (pitch > 2 && INTEL_GEN(i915) >= 4) { tile.stride = tile.width * (pitch - 1); - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err == -EINTR) goto next_tiling; if (err) @@ -294,7 +390,7 @@ static int igt_partial_tiling(void *arg) if (pitch < max_pitch && INTEL_GEN(i915) >= 4) { tile.stride = tile.width * (pitch + 1); - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err == -EINTR) goto next_tiling; if (err) @@ -305,7 +401,7 @@ static int igt_partial_tiling(void *arg) if (INTEL_GEN(i915) >= 4) { for_each_prime_number(pitch, max_pitch) { tile.stride = tile.width * pitch; - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err == -EINTR) goto next_tiling; if (err) @@ -318,7 +414,97 @@ next_tiling: ; out_unlock: intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); + i915_gem_object_unpin_pages(obj); +out: + i915_gem_object_put(obj); + return err; +} + +static int igt_smoke_tiling(void *arg) +{ + const unsigned int nreal = 1 << 12; /* largest tile row x2 */ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + intel_wakeref_t wakeref; + I915_RND_STATE(prng); + unsigned long count; + IGT_TIMEOUT(end); + int err; + + /* + * igt_partial_tiling() does an exhastive check of partial tiling + * chunking, but will undoubtably run out of time. Here, we do a + * randomised search and hope over many runs of 1s with different + * seeds we will do a thorough check. + * + * Remember to look at the st_seed if we see a flip-flop in BAT! + */ + + if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) + return 0; + + obj = huge_gem_object(i915, + nreal << PAGE_SHIFT, + (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) { + pr_err("Failed to allocate %u pages (%lu total), err=%d\n", + nreal, obj->base.size / PAGE_SIZE, err); + goto out; + } + + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + count = 0; + do { + struct tile tile; + + tile.tiling = + i915_prandom_u32_max_state(I915_TILING_Y + 1, &prng); + switch (tile.tiling) { + case I915_TILING_NONE: + tile.height = 1; + tile.width = 1; + tile.size = 0; + tile.stride = 0; + tile.swizzle = I915_BIT_6_SWIZZLE_NONE; + break; + + case I915_TILING_X: + tile.swizzle = i915->mm.bit_6_swizzle_x; + break; + case I915_TILING_Y: + tile.swizzle = i915->mm.bit_6_swizzle_y; + break; + } + + if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 || + tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17) + continue; + + if (tile.tiling != I915_TILING_NONE) { + unsigned int max_pitch = setup_tile_size(&tile, i915); + + tile.stride = + i915_prandom_u32_max_state(max_pitch, &prng); + tile.stride = (1 + tile.stride) * tile.width; + if (INTEL_GEN(i915) < 4) + tile.stride = rounddown_pow_of_two(tile.stride); + } + + err = check_partial_mapping(obj, &tile, &prng); + if (err) + break; + + count++; + } while (!__igt_timeout(end, NULL)); + + pr_info("%s: Completed %lu trials\n", __func__, count); + + intel_runtime_pm_put(&i915->runtime_pm, wakeref); i915_gem_object_unpin_pages(obj); out: i915_gem_object_put(obj); @@ -386,21 +572,14 @@ static bool assert_mmap_offset(struct drm_i915_private *i915, static void disable_retire_worker(struct drm_i915_private *i915) { i915_gem_driver_unregister__shrinker(i915); - intel_gt_pm_get(&i915->gt); - - cancel_delayed_work_sync(&i915->gem.retire_work); - flush_work(&i915->gem.idle_work); + cancel_delayed_work_sync(&i915->gt.requests.retire_work); } static void restore_retire_worker(struct drm_i915_private *i915) { + igt_flush_test(i915); intel_gt_pm_put(&i915->gt); - - mutex_lock(&i915->drm.struct_mutex); - igt_flush_test(i915, I915_WAIT_LOCKED); - mutex_unlock(&i915->drm.struct_mutex); - i915_gem_driver_register__shrinker(i915); } @@ -490,9 +669,7 @@ static int igt_mmap_offset_exhaustion(void *arg) goto out; } - mutex_lock(&i915->drm.struct_mutex); err = make_obj_busy(obj); - mutex_unlock(&i915->drm.struct_mutex); if (err) { pr_err("[loop %d] Failed to busy the object\n", loop); goto err_obj; @@ -515,6 +692,7 @@ int i915_gem_mman_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_partial_tiling), + SUBTEST(igt_smoke_tiling), SUBTEST(igt_mmap_offset_exhaustion), }; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c index c21d747e7d05..9ec55b3a3815 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c @@ -65,9 +65,7 @@ static int igt_fill_blt(void *arg) if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) obj->cache_dirty = true; - mutex_lock(&i915->drm.struct_mutex); err = i915_gem_object_fill_blt(obj, ce, val); - mutex_unlock(&i915->drm.struct_mutex); if (err) goto err_unpin; @@ -166,9 +164,7 @@ static int igt_copy_blt(void *arg) if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) dst->cache_dirty = true; - mutex_lock(&i915->drm.struct_mutex); err = i915_gem_object_copy_blt(src, dst, ce); - mutex_unlock(&i915->drm.struct_mutex); if (err) goto err_unpin; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c index 94a15e3f6db8..34932871b3a5 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c @@ -25,9 +25,7 @@ static int mock_phys_object(void *arg) goto out; } - mutex_lock(&i915->drm.struct_mutex); err = i915_gem_object_attach_phys(obj, PAGE_SIZE); - mutex_unlock(&i915->drm.struct_mutex); if (err) { pr_err("i915_gem_object_attach_phys failed, err=%d\n", err); goto out_obj; diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c index 57ece53c1075..6718da20f35d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c @@ -9,6 +9,7 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_pm.h" #include "gt/intel_context.h" +#include "gt/intel_gt.h" #include "i915_vma.h" #include "i915_drv.h" @@ -84,6 +85,8 @@ igt_emit_store_dw(struct i915_vma *vma, *cmd = MI_BATCH_BUFFER_END; i915_gem_object_unpin_map(obj); + intel_gt_chipset_flush(vma->vm->gt); + vma = i915_vma_instance(obj, vma->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); @@ -101,40 +104,35 @@ err: return ERR_PTR(err); } -int igt_gpu_fill_dw(struct i915_vma *vma, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - u64 offset, - unsigned long count, - u32 val) +int igt_gpu_fill_dw(struct intel_context *ce, + struct i915_vma *vma, u64 offset, + unsigned long count, u32 val) { - struct i915_address_space *vm = ctx->vm ?: &engine->gt->ggtt->vm; struct i915_request *rq; struct i915_vma *batch; unsigned int flags; int err; - GEM_BUG_ON(vma->size > vm->total); - GEM_BUG_ON(!intel_engine_can_store_dword(engine)); + GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); GEM_BUG_ON(!i915_vma_is_pinned(vma)); batch = igt_emit_store_dw(vma, offset, count, val); if (IS_ERR(batch)) return PTR_ERR(batch); - rq = igt_request_alloc(ctx, engine); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_batch; } flags = 0; - if (INTEL_GEN(vm->i915) <= 5) + if (INTEL_GEN(ce->vm->i915) <= 5) flags |= I915_DISPATCH_SECURE; - err = engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - flags); + err = rq->engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + flags); if (err) goto err_request; @@ -156,9 +154,7 @@ int igt_gpu_fill_dw(struct i915_vma *vma, i915_request_add(rq); - i915_vma_unpin(batch); - i915_vma_close(batch); - i915_vma_put(batch); + i915_vma_unpin_and_release(&batch, 0); return 0; @@ -167,7 +163,6 @@ skip_request: err_request: i915_request_add(rq); err_batch: - i915_vma_unpin(batch); - i915_vma_put(batch); + i915_vma_unpin_and_release(&batch, 0); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h index 361a7ef866b0..4221cf84d175 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h @@ -11,9 +11,11 @@ struct i915_request; struct i915_gem_context; -struct intel_engine_cs; struct i915_vma; +struct intel_context; +struct intel_engine_cs; + struct i915_request * igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine); @@ -23,11 +25,8 @@ igt_emit_store_dw(struct i915_vma *vma, unsigned long count, u32 val); -int igt_gpu_fill_dw(struct i915_vma *vma, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - u64 offset, - unsigned long count, - u32 val); +int igt_gpu_fill_dw(struct intel_context *ce, + struct i915_vma *vma, u64 offset, + unsigned long count, u32 val); #endif /* __IGT_GEM_UTILS_H__ */ diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index be8974ccff24..74ddd682c9cd 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -13,7 +13,6 @@ mock_context(struct drm_i915_private *i915, { struct i915_gem_context *ctx; struct i915_gem_engines *e; - int ret; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -30,13 +29,8 @@ mock_context(struct drm_i915_private *i915, RCU_INIT_POINTER(ctx->engines, e); INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); - INIT_LIST_HEAD(&ctx->hw_id_link); mutex_init(&ctx->mutex); - ret = i915_gem_context_pin_hw_id(ctx); - if (ret < 0) - goto err_engines; - if (name) { struct i915_ppgtt *ppgtt; @@ -48,14 +42,15 @@ mock_context(struct drm_i915_private *i915, if (!ppgtt) goto err_put; + mutex_lock(&ctx->mutex); __set_ppgtt(ctx, &ppgtt->vm); + mutex_unlock(&ctx->mutex); + i915_vm_put(&ppgtt->vm); } return ctx; -err_engines: - free_engines(rcu_access_pointer(ctx->engines)); err_free: kfree(ctx); return NULL; @@ -73,7 +68,7 @@ void mock_context_close(struct i915_gem_context *ctx) void mock_init_contexts(struct drm_i915_private *i915) { - init_contexts(i915); + init_contexts(&i915->gem.contexts); } struct i915_gem_context * @@ -82,8 +77,6 @@ live_context(struct drm_i915_private *i915, struct drm_file *file) struct i915_gem_context *ctx; int err; - lockdep_assert_held(&i915->drm.struct_mutex); - ctx = i915_gem_create_context(i915, 0); if (IS_ERR(ctx)) return ctx; |