From e61e0f51ba7974bb575cdc23220b573e5cd4ff2a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 21 Feb 2018 09:56:36 +0000 Subject: drm/i915: Rename drm_i915_gem_request to i915_request MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want to de-emphasize the link between the request (dependency, execution and fence tracking) from GEM and so rename the struct from drm_i915_gem_request to i915_request. That is we may implement the GEM user interface on top of requests, but they are an abstraction for tracking execution rather than an implementation detail of GEM. (Since they are not tied to HW, we keep the i915 prefix as opposed to intel.) In short, the spatch: @@ @@ - struct drm_i915_gem_request + struct i915_request A corollary to contracting the type name, we also harmonise on using 'rq' shorthand for local variables where space if of the essence and repetition makes 'request' unwieldy. For globals and struct members, 'request' is still much preferred for its clarity. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Cc: Tvrtko Ursulin Cc: Michał Winiarski Cc: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20180221095636.6649-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala Reviewed-by: Michał Winiarski Acked-by: Joonas Lahtinen --- drivers/gpu/drm/i915/Makefile | 2 +- drivers/gpu/drm/i915/gvt/scheduler.c | 16 +- drivers/gpu/drm/i915/gvt/scheduler.h | 2 +- drivers/gpu/drm/i915/i915_debugfs.c | 6 +- drivers/gpu/drm/i915/i915_drv.c | 6 +- drivers/gpu/drm/i915/i915_drv.h | 26 +- drivers/gpu/drm/i915/i915_gem.c | 88 +- drivers/gpu/drm/i915/i915_gem_batch_pool.c | 2 +- drivers/gpu/drm/i915/i915_gem_context.c | 18 +- drivers/gpu/drm/i915/i915_gem_context.h | 2 +- drivers/gpu/drm/i915/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 60 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 38 +- drivers/gpu/drm/i915/i915_gem_gtt.h | 5 +- drivers/gpu/drm/i915/i915_gem_object.h | 2 +- drivers/gpu/drm/i915/i915_gem_render_state.c | 2 +- drivers/gpu/drm/i915/i915_gem_render_state.h | 4 +- drivers/gpu/drm/i915/i915_gem_request.c | 1397 ------------------- drivers/gpu/drm/i915/i915_gem_request.h | 730 ---------- drivers/gpu/drm/i915/i915_gem_shrinker.c | 4 +- drivers/gpu/drm/i915/i915_gem_timeline.h | 4 +- drivers/gpu/drm/i915/i915_gpu_error.c | 18 +- drivers/gpu/drm/i915/i915_irq.c | 8 +- drivers/gpu/drm/i915/i915_perf.c | 28 +- drivers/gpu/drm/i915/i915_request.c | 1411 ++++++++++++++++++++ drivers/gpu/drm/i915/i915_request.h | 738 ++++++++++ drivers/gpu/drm/i915/i915_trace.h | 128 +- drivers/gpu/drm/i915/i915_vma.c | 3 +- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/intel_breadcrumbs.c | 31 +- drivers/gpu/drm/i915/intel_display.c | 8 +- drivers/gpu/drm/i915/intel_drv.h | 3 +- drivers/gpu/drm/i915/intel_engine_cs.c | 26 +- drivers/gpu/drm/i915/intel_guc_submission.c | 27 +- drivers/gpu/drm/i915/intel_lrc.c | 115 +- drivers/gpu/drm/i915/intel_mocs.c | 28 +- drivers/gpu/drm/i915/intel_mocs.h | 2 +- drivers/gpu/drm/i915/intel_overlay.c | 82 +- drivers/gpu/drm/i915/intel_pm.c | 4 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 196 ++- drivers/gpu/drm/i915/intel_ringbuffer.h | 78 +- drivers/gpu/drm/i915/selftests/huge_pages.c | 6 +- .../gpu/drm/i915/selftests/i915_gem_coherency.c | 8 +- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 8 +- drivers/gpu/drm/i915/selftests/i915_gem_evict.c | 6 +- drivers/gpu/drm/i915/selftests/i915_gem_object.c | 6 +- drivers/gpu/drm/i915/selftests/i915_gem_request.c | 868 ------------ .../gpu/drm/i915/selftests/i915_live_selftests.h | 2 +- .../gpu/drm/i915/selftests/i915_mock_selftests.h | 2 +- drivers/gpu/drm/i915/selftests/i915_request.c | 865 ++++++++++++ drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 117 +- drivers/gpu/drm/i915/selftests/mock_engine.c | 10 +- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 2 +- drivers/gpu/drm/i915/selftests/mock_request.c | 10 +- drivers/gpu/drm/i915/selftests/mock_request.h | 8 +- 55 files changed, 3633 insertions(+), 3639 deletions(-) delete mode 100644 drivers/gpu/drm/i915/i915_gem_request.c delete mode 100644 drivers/gpu/drm/i915/i915_gem_request.h create mode 100644 drivers/gpu/drm/i915/i915_request.c create mode 100644 drivers/gpu/drm/i915/i915_request.h delete mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_request.c create mode 100644 drivers/gpu/drm/i915/selftests/i915_request.c (limited to 'drivers') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 838f9b48246b..5b908c797294 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -63,13 +63,13 @@ i915-y += i915_cmd_parser.o \ i915_gem.o \ i915_gem_object.o \ i915_gem_render_state.o \ - i915_gem_request.o \ i915_gem_shrinker.o \ i915_gem_stolen.o \ i915_gem_tiling.o \ i915_gem_timeline.o \ i915_gem_userptr.o \ i915_gemfs.o \ + i915_request.o \ i915_trace_points.o \ i915_vma.o \ intel_breadcrumbs.o \ diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 0056638b0c16..a22a686f14c2 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -126,7 +126,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) return 0; } -static inline bool is_gvt_request(struct drm_i915_gem_request *req) +static inline bool is_gvt_request(struct i915_request *req) { return i915_gem_context_force_single_submission(req->ctx); } @@ -148,7 +148,7 @@ static void save_ring_hw_state(struct intel_vgpu *vgpu, int ring_id) static int shadow_context_status_change(struct notifier_block *nb, unsigned long action, void *data) { - struct drm_i915_gem_request *req = (struct drm_i915_gem_request *)data; + struct i915_request *req = data; struct intel_gvt *gvt = container_of(nb, struct intel_gvt, shadow_ctx_notifier_block[req->engine->id]); struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; @@ -333,13 +333,13 @@ static int intel_gvt_generate_request(struct intel_vgpu_workload *workload) int ring_id = workload->ring_id; struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; struct intel_engine_cs *engine = dev_priv->engine[ring_id]; - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu_submission *s = &vgpu->submission; struct i915_gem_context *shadow_ctx = s->shadow_ctx; int ret; - rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx); + rq = i915_request_alloc(dev_priv->engine[ring_id], shadow_ctx); if (IS_ERR(rq)) { gvt_vgpu_err("fail to allocate gem request\n"); ret = PTR_ERR(rq); @@ -348,7 +348,7 @@ static int intel_gvt_generate_request(struct intel_vgpu_workload *workload) gvt_dbg_sched("ring id %d get i915 gem request %p\n", ring_id, rq); - workload->req = i915_gem_request_get(rq); + workload->req = i915_request_get(rq); ret = copy_workload_to_ring_buffer(workload); if (ret) goto err_unpin; @@ -582,7 +582,7 @@ out: if (!IS_ERR_OR_NULL(workload->req)) { gvt_dbg_sched("ring id %d submit workload to i915 %p\n", ring_id, workload->req); - i915_add_request(workload->req); + i915_request_add(workload->req); workload->dispatched = true; } @@ -769,7 +769,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) workload->status = 0; } - i915_gem_request_put(fetch_and_zero(&workload->req)); + i915_request_put(fetch_and_zero(&workload->req)); if (!workload->status && !(vgpu->resetting_eng & ENGINE_MASK(ring_id))) { @@ -886,7 +886,7 @@ static int workload_thread(void *priv) gvt_dbg_sched("ring id %d wait workload %p\n", workload->ring_id, workload); - i915_wait_request(workload->req, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_wait(workload->req, 0, MAX_SCHEDULE_TIMEOUT); complete: gvt_dbg_sched("will complete workload %p, status: %d\n", diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 3de77dfa7c59..899831b089d4 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -80,7 +80,7 @@ struct intel_shadow_wa_ctx { struct intel_vgpu_workload { struct intel_vgpu *vgpu; int ring_id; - struct drm_i915_gem_request *req; + struct i915_request *req; /* if this workload has been dispatched to i915? */ bool dispatched; bool shadowed; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 05b41045b8f9..bad2ed7050ba 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -519,7 +519,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) list_for_each_entry_reverse(file, &dev->filelist, lhead) { struct file_stats stats; struct drm_i915_file_private *file_priv = file->driver_priv; - struct drm_i915_gem_request *request; + struct i915_request *request; struct task_struct *task; mutex_lock(&dev->struct_mutex); @@ -536,7 +536,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) * Therefore, we need to protect this ->comm access using RCU. */ request = list_first_entry_or_null(&file_priv->mm.request_list, - struct drm_i915_gem_request, + struct i915_request, client_link); rcu_read_lock(); task = pid_task(request && request->ctx->pid ? @@ -4060,7 +4060,7 @@ i915_drop_caches_set(void *data, u64 val) I915_WAIT_LOCKED); if (val & DROP_RETIRE) - i915_gem_retire_requests(dev_priv); + i915_retire_requests(dev_priv); mutex_unlock(&dev->struct_mutex); } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index d09f8e661fbd..aaa861b51024 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -808,7 +808,7 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv) /* * The i915 workqueue is primarily used for batched retirement of * requests (and thus managing bo) once the task has been completed - * by the GPU. i915_gem_retire_requests() is called directly when we + * by the GPU. i915_retire_requests() is called directly when we * need high-priority retirement, such as waiting for an explicit * bo. * @@ -1992,7 +1992,7 @@ taint: add_taint(TAINT_WARN, LOCKDEP_STILL_OK); error: i915_gem_set_wedged(i915); - i915_gem_retire_requests(i915); + i915_retire_requests(i915); intel_gpu_reset(i915, ALL_ENGINES); goto finish; } @@ -2019,7 +2019,7 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *dev_priv, int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags) { struct i915_gpu_error *error = &engine->i915->gpu_error; - struct drm_i915_gem_request *active_request; + struct i915_request *active_request; int ret; GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4a279be84f66..9143d0d6be5a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -71,9 +71,9 @@ #include "i915_gem_fence_reg.h" #include "i915_gem_object.h" #include "i915_gem_gtt.h" -#include "i915_gem_request.h" #include "i915_gem_timeline.h" +#include "i915_request.h" #include "i915_vma.h" #include "intel_gvt.h" @@ -1231,7 +1231,7 @@ struct i915_gpu_error { * * #I915_WEDGED - If reset fails and we can no longer use the GPU, * we set the #I915_WEDGED bit. Prior to command submission, e.g. - * i915_gem_request_alloc(), this bit is checked and the sequence + * i915_request_alloc(), this bit is checked and the sequence * aborted (with -EIO reported to userspace) if set. */ unsigned long flags; @@ -3329,7 +3329,7 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj) int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); void i915_vma_move_to_active(struct i915_vma *vma, - struct drm_i915_gem_request *req, + struct i915_request *rq, unsigned int flags); int i915_gem_dumb_create(struct drm_file *file_priv, struct drm_device *dev, @@ -3344,11 +3344,9 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, int __must_check i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno); -struct drm_i915_gem_request * +struct i915_request * i915_gem_find_active_request(struct intel_engine_cs *engine); -void i915_gem_retire_requests(struct drm_i915_private *dev_priv); - static inline bool i915_reset_backoff(struct i915_gpu_error *error) { return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags)); @@ -3380,7 +3378,7 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error, return READ_ONCE(error->reset_engine_count[engine->id]); } -struct drm_i915_gem_request * +struct i915_request * i915_gem_reset_prepare_engine(struct intel_engine_cs *engine); int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); void i915_gem_reset(struct drm_i915_private *dev_priv); @@ -3389,7 +3387,7 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv); void i915_gem_set_wedged(struct drm_i915_private *dev_priv); bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv); void i915_gem_reset_engine(struct intel_engine_cs *engine, - struct drm_i915_gem_request *request); + struct i915_request *request); void i915_gem_init_mmio(struct drm_i915_private *i915); int __must_check i915_gem_init(struct drm_i915_private *dev_priv); @@ -4007,9 +4005,9 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) } static inline bool -__i915_request_irq_complete(const struct drm_i915_gem_request *req) +__i915_request_irq_complete(const struct i915_request *rq) { - struct intel_engine_cs *engine = req->engine; + struct intel_engine_cs *engine = rq->engine; u32 seqno; /* Note that the engine may have wrapped around the seqno, and @@ -4018,7 +4016,7 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req) * this by kicking all the waiters before resetting the seqno * in hardware, and also signal the fence. */ - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &req->fence.flags)) + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) return true; /* The request was dequeued before we were awoken. We check after @@ -4027,14 +4025,14 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req) * the request execution are sufficient to ensure that a check * after reading the value from hw matches this request. */ - seqno = i915_gem_request_global_seqno(req); + seqno = i915_request_global_seqno(rq); if (!seqno) return false; /* Before we do the heavier coherent read of the seqno, * check the value (hopefully) in the CPU cacheline. */ - if (__i915_gem_request_completed(req, seqno)) + if (__i915_request_completed(rq, seqno)) return true; /* Ensure our read of the seqno is coherent so that we @@ -4083,7 +4081,7 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req) wake_up_process(b->irq_wait->tsk); spin_unlock_irq(&b->irq_lock); - if (__i915_gem_request_completed(req, seqno)) + if (__i915_request_completed(rq, seqno)) return true; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 43afa1c1b14f..14c855b1a3a4 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -353,7 +353,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence, long timeout, struct intel_rps_client *rps_client) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1); @@ -366,7 +366,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence, timeout); rq = to_request(fence); - if (i915_gem_request_completed(rq)) + if (i915_request_completed(rq)) goto out; /* @@ -385,16 +385,16 @@ i915_gem_object_wait_fence(struct dma_fence *fence, * forcing the clocks too high for the whole system, we only allow * each client to waitboost once in a busy period. */ - if (rps_client && !i915_gem_request_started(rq)) { + if (rps_client && !i915_request_started(rq)) { if (INTEL_GEN(rq->i915) >= 6) gen6_rps_boost(rq, rps_client); } - timeout = i915_wait_request(rq, flags, timeout); + timeout = i915_request_wait(rq, flags, timeout); out: - if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq)) - i915_gem_request_retire_upto(rq); + if (flags & I915_WAIT_LOCKED && i915_request_completed(rq)) + i915_request_retire_upto(rq); return timeout; } @@ -463,7 +463,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, static void __fence_set_priority(struct dma_fence *fence, int prio) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct intel_engine_cs *engine; if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence)) @@ -2856,10 +2856,10 @@ static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) atomic_inc(&ctx->active_count); } -struct drm_i915_gem_request * +struct i915_request * i915_gem_find_active_request(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *request, *active = NULL; + struct i915_request *request, *active = NULL; unsigned long flags; /* We are called by the error capture and reset at a random @@ -2872,8 +2872,7 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) */ spin_lock_irqsave(&engine->timeline->lock, flags); list_for_each_entry(request, &engine->timeline->requests, link) { - if (__i915_gem_request_completed(request, - request->global_seqno)) + if (__i915_request_completed(request, request->global_seqno)) continue; GEM_BUG_ON(request->engine != engine); @@ -2906,10 +2905,10 @@ static bool engine_stalled(struct intel_engine_cs *engine) * Ensure irq handler finishes, and not run again. * Also return the active request so that we only search for it once. */ -struct drm_i915_gem_request * +struct i915_request * i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *request = NULL; + struct i915_request *request = NULL; /* * During the reset sequence, we must prevent the engine from @@ -2967,7 +2966,7 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; - struct drm_i915_gem_request *request; + struct i915_request *request; enum intel_engine_id id; int err = 0; @@ -2986,7 +2985,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) return err; } -static void skip_request(struct drm_i915_gem_request *request) +static void skip_request(struct i915_request *request) { void *vaddr = request->ring->vaddr; u32 head; @@ -3005,7 +3004,7 @@ static void skip_request(struct drm_i915_gem_request *request) dma_fence_set_error(&request->fence, -EIO); } -static void engine_skip_context(struct drm_i915_gem_request *request) +static void engine_skip_context(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; struct i915_gem_context *hung_ctx = request->ctx; @@ -3029,9 +3028,9 @@ static void engine_skip_context(struct drm_i915_gem_request *request) } /* Returns the request if it was guilty of the hang */ -static struct drm_i915_gem_request * +static struct i915_request * i915_gem_reset_request(struct intel_engine_cs *engine, - struct drm_i915_gem_request *request) + struct i915_request *request) { /* The guilty request will get skipped on a hung engine. * @@ -3085,7 +3084,7 @@ i915_gem_reset_request(struct intel_engine_cs *engine, } void i915_gem_reset_engine(struct intel_engine_cs *engine, - struct drm_i915_gem_request *request) + struct i915_request *request) { /* * Make sure this write is visible before we re-enable the interrupt @@ -3113,7 +3112,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) lockdep_assert_held(&dev_priv->drm.struct_mutex); - i915_gem_retire_requests(dev_priv); + i915_retire_requests(dev_priv); for_each_engine(engine, dev_priv, id) { struct i915_gem_context *ctx; @@ -3134,12 +3133,12 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) * empty request appears sufficient to paper over the glitch. */ if (intel_engine_is_idle(engine)) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; - rq = i915_gem_request_alloc(engine, - dev_priv->kernel_context); + rq = i915_request_alloc(engine, + dev_priv->kernel_context); if (!IS_ERR(rq)) - __i915_add_request(rq, false); + __i915_request_add(rq, false); } } @@ -3174,21 +3173,21 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) } } -static void nop_submit_request(struct drm_i915_gem_request *request) +static void nop_submit_request(struct i915_request *request) { dma_fence_set_error(&request->fence, -EIO); - i915_gem_request_submit(request); + i915_request_submit(request); } -static void nop_complete_submit_request(struct drm_i915_gem_request *request) +static void nop_complete_submit_request(struct i915_request *request) { unsigned long flags; dma_fence_set_error(&request->fence, -EIO); spin_lock_irqsave(&request->engine->timeline->lock, flags); - __i915_gem_request_submit(request); + __i915_request_submit(request); intel_engine_init_global_seqno(request->engine, request->global_seqno); spin_unlock_irqrestore(&request->engine->timeline->lock, flags); } @@ -3281,7 +3280,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) */ list_for_each_entry(tl, &i915->gt.timelines, link) { for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; rq = i915_gem_active_peek(&tl->engine[i].last_request, &i915->drm.struct_mutex); @@ -3330,7 +3329,7 @@ i915_gem_retire_work_handler(struct work_struct *work) /* Come back later if the device is busy... */ if (mutex_trylock(&dev->struct_mutex)) { - i915_gem_retire_requests(dev_priv); + i915_retire_requests(dev_priv); mutex_unlock(&dev->struct_mutex); } @@ -3684,7 +3683,7 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) if (ret) return ret; } - i915_gem_retire_requests(i915); + i915_retire_requests(i915); ret = wait_for_engines(i915); } else { @@ -4224,7 +4223,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_file_private *file_priv = file->driver_priv; unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; - struct drm_i915_gem_request *request, *target = NULL; + struct i915_request *request, *target = NULL; long ret; /* ABI: return -EIO if already wedged */ @@ -4244,16 +4243,16 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) target = request; } if (target) - i915_gem_request_get(target); + i915_request_get(target); spin_unlock(&file_priv->mm.lock); if (target == NULL) return 0; - ret = i915_wait_request(target, + ret = i915_request_wait(target, I915_WAIT_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); - i915_gem_request_put(target); + i915_request_put(target); return ret < 0 ? ret : 0; } @@ -4367,7 +4366,7 @@ static __always_inline unsigned int __busy_set_if_active(const struct dma_fence *fence, unsigned int (*flag)(unsigned int id)) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; /* We have to check the current hw status of the fence as the uABI * guarantees forward progress. We could rely on the idle worker @@ -4380,8 +4379,8 @@ __busy_set_if_active(const struct dma_fence *fence, return 0; /* opencode to_request() in order to avoid const warnings */ - rq = container_of(fence, struct drm_i915_gem_request, fence); - if (i915_gem_request_completed(rq)) + rq = container_of(fence, struct i915_request, fence); + if (i915_request_completed(rq)) return 0; return flag(rq->engine->uabi_id); @@ -4526,8 +4525,7 @@ out: } static void -frontbuffer_retire(struct i915_gem_active *active, - struct drm_i915_gem_request *request) +frontbuffer_retire(struct i915_gem_active *active, struct i915_request *request) { struct drm_i915_gem_object *obj = container_of(active, typeof(*obj), frontbuffer_write); @@ -5161,9 +5159,9 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) return PTR_ERR(ctx); for_each_engine(engine, i915, id) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; - rq = i915_gem_request_alloc(engine, ctx); + rq = i915_request_alloc(engine, ctx); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out_ctx; @@ -5173,7 +5171,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) if (engine->init_context) err = engine->init_context(rq); - __i915_add_request(rq, true); + __i915_request_add(rq, true); if (err) goto err_active; } @@ -5479,7 +5477,7 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) if (!dev_priv->luts) goto err_vmas; - dev_priv->requests = KMEM_CACHE(drm_i915_gem_request, + dev_priv->requests = KMEM_CACHE(i915_request, SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_TYPESAFE_BY_RCU); @@ -5612,7 +5610,7 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv) void i915_gem_release(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; - struct drm_i915_gem_request *request; + struct i915_request *request; /* Clean up our request list when the client is going away, so that * later retire_requests won't dereference our soon-to-be-gone diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index c93005c2e0fb..d3cbe8432f48 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -119,7 +119,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, if (!reservation_object_test_signaled_rcu(resv, true)) break; - i915_gem_retire_requests(pool->engine->i915); + i915_retire_requests(pool->engine->i915); GEM_BUG_ON(i915_gem_object_is_active(obj)); /* diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 3d75f484f6e5..a73340ae9419 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -219,7 +219,7 @@ static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) * Flush any pending retires to hopefully release some * stale contexts and try again. */ - i915_gem_retire_requests(dev_priv); + i915_retire_requests(dev_priv); ret = ida_simple_get(&dev_priv->contexts.hw_ida, 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); if (ret < 0) @@ -590,28 +590,28 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) lockdep_assert_held(&dev_priv->drm.struct_mutex); - i915_gem_retire_requests(dev_priv); + i915_retire_requests(dev_priv); for_each_engine(engine, dev_priv, id) { - struct drm_i915_gem_request *req; + struct i915_request *rq; if (engine_has_idle_kernel_context(engine)) continue; - req = i915_gem_request_alloc(engine, dev_priv->kernel_context); - if (IS_ERR(req)) - return PTR_ERR(req); + rq = i915_request_alloc(engine, dev_priv->kernel_context); + if (IS_ERR(rq)) + return PTR_ERR(rq); /* Queue this switch after all other activity */ list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { - struct drm_i915_gem_request *prev; + struct i915_request *prev; struct intel_timeline *tl; tl = &timeline->engine[engine->id]; prev = i915_gem_active_raw(&tl->last_request, &dev_priv->drm.struct_mutex); if (prev) - i915_sw_fence_await_sw_fence_gfp(&req->submit, + i915_sw_fence_await_sw_fence_gfp(&rq->submit, &prev->submit, I915_FENCE_GFP); } @@ -623,7 +623,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) * but an extra layer of paranoia before we declare the system * idle (on suspend etc) is advisable! */ - __i915_add_request(req, true); + __i915_request_add(rq, true); } return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index a681c5b891ff..1829dafe54b4 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -276,7 +276,7 @@ int i915_gem_context_open(struct drm_i915_private *i915, struct drm_file *file); void i915_gem_context_close(struct drm_file *file); -int i915_switch_context(struct drm_i915_gem_request *req); +int i915_switch_context(struct i915_request *rq); int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv); void i915_gem_context_release(struct kref *ctx_ref); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 60ca4f05ae94..54814a196ee4 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -168,7 +168,7 @@ i915_gem_evict_something(struct i915_address_space *vm, * retiring. */ if (!(flags & PIN_NONBLOCK)) - i915_gem_retire_requests(dev_priv); + i915_retire_requests(dev_priv); else phases[1] = NULL; @@ -293,7 +293,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, * retiring. */ if (!(flags & PIN_NONBLOCK)) - i915_gem_retire_requests(vm->i915); + i915_retire_requests(vm->i915); check_color = vm->mm.color_adjust; if (check_color) { diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 4eb28e84fda4..8c170db8495d 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -200,7 +200,7 @@ struct i915_execbuffer { struct i915_gem_context *ctx; /** context for building the request */ struct i915_address_space *vm; /** GTT and vma for the request */ - struct drm_i915_gem_request *request; /** our request to build */ + struct i915_request *request; /** our request to build */ struct i915_vma *batch; /** identity of the batch obj/vma */ /** actual size of execobj[] as we may extend it for the cmdparser */ @@ -227,7 +227,7 @@ struct i915_execbuffer { bool has_fence : 1; bool needs_unfenced : 1; - struct drm_i915_gem_request *rq; + struct i915_request *rq; u32 *rq_cmd; unsigned int rq_size; } reloc_cache; @@ -886,7 +886,7 @@ static void reloc_gpu_flush(struct reloc_cache *cache) i915_gem_object_unpin_map(cache->rq->batch->obj); i915_gem_chipset_flush(cache->rq->i915); - __i915_add_request(cache->rq, true); + __i915_request_add(cache->rq, true); cache->rq = NULL; } @@ -1070,7 +1070,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, { struct reloc_cache *cache = &eb->reloc_cache; struct drm_i915_gem_object *obj; - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct i915_vma *batch; u32 *cmd; int err; @@ -1103,13 +1103,13 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (err) goto err_unmap; - rq = i915_gem_request_alloc(eb->engine, eb->ctx); + rq = i915_request_alloc(eb->engine, eb->ctx); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_unpin; } - err = i915_gem_request_await_object(rq, vma->obj, true); + err = i915_request_await_object(rq, vma->obj, true); if (err) goto err_request; @@ -1141,7 +1141,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, return 0; err_request: - i915_add_request(rq); + i915_request_add(rq); err_unpin: i915_vma_unpin(batch); err_unmap: @@ -1727,7 +1727,7 @@ slow: } static void eb_export_fence(struct i915_vma *vma, - struct drm_i915_gem_request *req, + struct i915_request *rq, unsigned int flags) { struct reservation_object *resv = vma->resv; @@ -1739,9 +1739,9 @@ static void eb_export_fence(struct i915_vma *vma, */ reservation_object_lock(resv, NULL); if (flags & EXEC_OBJECT_WRITE) - reservation_object_add_excl_fence(resv, &req->fence); + reservation_object_add_excl_fence(resv, &rq->fence); else if (reservation_object_reserve_shared(resv) == 0) - reservation_object_add_shared_fence(resv, &req->fence); + reservation_object_add_shared_fence(resv, &rq->fence); reservation_object_unlock(resv); } @@ -1757,7 +1757,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) struct drm_i915_gem_object *obj = vma->obj; if (flags & EXEC_OBJECT_CAPTURE) { - struct i915_gem_capture_list *capture; + struct i915_capture_list *capture; capture = kmalloc(sizeof(*capture), GFP_KERNEL); if (unlikely(!capture)) @@ -1788,7 +1788,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) if (flags & EXEC_OBJECT_ASYNC) continue; - err = i915_gem_request_await_object + err = i915_request_await_object (eb->request, obj, flags & EXEC_OBJECT_WRITE); if (err) return err; @@ -1840,13 +1840,13 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) } void i915_vma_move_to_active(struct i915_vma *vma, - struct drm_i915_gem_request *req, + struct i915_request *rq, unsigned int flags) { struct drm_i915_gem_object *obj = vma->obj; - const unsigned int idx = req->engine->id; + const unsigned int idx = rq->engine->id; - lockdep_assert_held(&req->i915->drm.struct_mutex); + lockdep_assert_held(&rq->i915->drm.struct_mutex); GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); /* @@ -1860,7 +1860,7 @@ void i915_vma_move_to_active(struct i915_vma *vma, if (!i915_vma_is_active(vma)) obj->active_count++; i915_vma_set_active(vma, idx); - i915_gem_active_set(&vma->last_read[idx], req); + i915_gem_active_set(&vma->last_read[idx], rq); list_move_tail(&vma->vm_link, &vma->vm->active_list); obj->write_domain = 0; @@ -1868,27 +1868,27 @@ void i915_vma_move_to_active(struct i915_vma *vma, obj->write_domain = I915_GEM_DOMAIN_RENDER; if (intel_fb_obj_invalidate(obj, ORIGIN_CS)) - i915_gem_active_set(&obj->frontbuffer_write, req); + i915_gem_active_set(&obj->frontbuffer_write, rq); obj->read_domains = 0; } obj->read_domains |= I915_GEM_GPU_DOMAINS; if (flags & EXEC_OBJECT_NEEDS_FENCE) - i915_gem_active_set(&vma->last_fence, req); + i915_gem_active_set(&vma->last_fence, rq); } -static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) +static int i915_reset_gen7_sol_offsets(struct i915_request *rq) { u32 *cs; int i; - if (!IS_GEN7(req->i915) || req->engine->id != RCS) { + if (!IS_GEN7(rq->i915) || rq->engine->id != RCS) { DRM_DEBUG("sol reset is gen7/rcs only\n"); return -EINVAL; } - cs = intel_ring_begin(req, 4 * 2 + 2); + cs = intel_ring_begin(rq, 4 * 2 + 2); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1898,7 +1898,7 @@ static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) *cs++ = 0; } *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -1944,10 +1944,10 @@ out: } static void -add_to_client(struct drm_i915_gem_request *req, struct drm_file *file) +add_to_client(struct i915_request *rq, struct drm_file *file) { - req->file_priv = file->driver_priv; - list_add_tail(&req->client_link, &req->file_priv->mm.request_list); + rq->file_priv = file->driver_priv; + list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list); } static int eb_submit(struct i915_execbuffer *eb) @@ -2151,7 +2151,7 @@ await_fence_array(struct i915_execbuffer *eb, if (!fence) return -EINVAL; - err = i915_gem_request_await_dma_fence(eb->request, fence); + err = i915_request_await_dma_fence(eb->request, fence); dma_fence_put(fence); if (err < 0) return err; @@ -2365,14 +2365,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, GEM_BUG_ON(eb.reloc_cache.rq); /* Allocate a request for this batch buffer nice and early. */ - eb.request = i915_gem_request_alloc(eb.engine, eb.ctx); + eb.request = i915_request_alloc(eb.engine, eb.ctx); if (IS_ERR(eb.request)) { err = PTR_ERR(eb.request); goto err_batch_unpin; } if (in_fence) { - err = i915_gem_request_await_dma_fence(eb.request, in_fence); + err = i915_request_await_dma_fence(eb.request, in_fence); if (err < 0) goto err_request; } @@ -2400,10 +2400,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, */ eb.request->batch = eb.batch; - trace_i915_gem_request_queue(eb.request, eb.batch_flags); + trace_i915_request_queue(eb.request, eb.batch_flags); err = eb_submit(&eb); err_request: - __i915_add_request(eb.request, err == 0); + __i915_request_add(eb.request, err == 0); add_to_client(eb.request, file); if (fences) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index cd5984246bc3..21d72f695adb 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -765,16 +765,16 @@ static void gen8_initialize_pml4(struct i915_address_space *vm, } /* Broadwell Page Directory Pointer Descriptors */ -static int gen8_write_pdp(struct drm_i915_gem_request *req, +static int gen8_write_pdp(struct i915_request *rq, unsigned entry, dma_addr_t addr) { - struct intel_engine_cs *engine = req->engine; + struct intel_engine_cs *engine = rq->engine; u32 *cs; BUG_ON(entry >= 4); - cs = intel_ring_begin(req, 6); + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -784,20 +784,20 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req, *cs++ = MI_LOAD_REGISTER_IMM(1); *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, entry)); *cs++ = lower_32_bits(addr); - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int gen8_mm_switch_3lvl(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req) + struct i915_request *rq) { int i, ret; for (i = GEN8_3LVL_PDPES - 1; i >= 0; i--) { const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); - ret = gen8_write_pdp(req, i, pd_daddr); + ret = gen8_write_pdp(rq, i, pd_daddr); if (ret) return ret; } @@ -806,9 +806,9 @@ static int gen8_mm_switch_3lvl(struct i915_hw_ppgtt *ppgtt, } static int gen8_mm_switch_4lvl(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req) + struct i915_request *rq) { - return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4)); + return gen8_write_pdp(rq, 0, px_dma(&ppgtt->pml4)); } /* PDE TLBs are a pain to invalidate on GEN8+. When we modify @@ -1732,13 +1732,13 @@ static inline u32 get_pd_offset(struct i915_hw_ppgtt *ppgtt) } static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req) + struct i915_request *rq) { - struct intel_engine_cs *engine = req->engine; + struct intel_engine_cs *engine = rq->engine; u32 *cs; /* NB: TLBs must be flushed and invalidated before a switch */ - cs = intel_ring_begin(req, 6); + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1748,19 +1748,19 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); *cs++ = get_pd_offset(ppgtt); *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req) + struct i915_request *rq) { - struct intel_engine_cs *engine = req->engine; + struct intel_engine_cs *engine = rq->engine; u32 *cs; /* NB: TLBs must be flushed and invalidated before a switch */ - cs = intel_ring_begin(req, 6); + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1770,16 +1770,16 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); *cs++ = get_pd_offset(ppgtt); *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req) + struct i915_request *rq) { - struct intel_engine_cs *engine = req->engine; - struct drm_i915_private *dev_priv = req->i915; + struct intel_engine_cs *engine = rq->engine; + struct drm_i915_private *dev_priv = rq->i915; I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt)); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index a42890d9af38..6efc017e8bb3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -39,7 +39,8 @@ #include #include "i915_gem_timeline.h" -#include "i915_gem_request.h" + +#include "i915_request.h" #include "i915_selftest.h" #define I915_GTT_PAGE_SIZE_4K BIT(12) @@ -398,7 +399,7 @@ struct i915_hw_ppgtt { gen6_pte_t __iomem *pd_addr; int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req); + struct i915_request *rq); void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m); }; diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index ca2b3b62569d..54f00b350779 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -33,7 +33,7 @@ #include -#include "i915_gem_request.h" +#include "i915_request.h" #include "i915_selftest.h" struct drm_i915_gem_object; diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index f7fc0df251ac..1036e8686916 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -177,7 +177,7 @@ err: #undef OUT_BATCH -int i915_gem_render_state_emit(struct drm_i915_gem_request *rq) +int i915_gem_render_state_emit(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; struct intel_render_state so = {}; /* keep the compiler happy */ diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h index 86369520482e..112cda8fa1a8 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.h +++ b/drivers/gpu/drm/i915/i915_gem_render_state.h @@ -24,8 +24,8 @@ #ifndef _I915_GEM_RENDER_STATE_H_ #define _I915_GEM_RENDER_STATE_H_ -struct drm_i915_gem_request; +struct i915_request; -int i915_gem_render_state_emit(struct drm_i915_gem_request *rq); +int i915_gem_render_state_emit(struct i915_request *rq); #endif /* _I915_GEM_RENDER_STATE_H_ */ diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c deleted file mode 100644 index 0deca06fdf0e..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ /dev/null @@ -1,1397 +0,0 @@ -/* - * Copyright © 2008-2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include -#include -#include -#include -#include - -#include "i915_drv.h" - -static const char *i915_fence_get_driver_name(struct dma_fence *fence) -{ - return "i915"; -} - -static const char *i915_fence_get_timeline_name(struct dma_fence *fence) -{ - /* The timeline struct (as part of the ppgtt underneath a context) - * may be freed when the request is no longer in use by the GPU. - * We could extend the life of a context to beyond that of all - * fences, possibly keeping the hw resource around indefinitely, - * or we just give them a false name. Since - * dma_fence_ops.get_timeline_name is a debug feature, the occasional - * lie seems justifiable. - */ - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) - return "signaled"; - - return to_request(fence)->timeline->common->name; -} - -static bool i915_fence_signaled(struct dma_fence *fence) -{ - return i915_gem_request_completed(to_request(fence)); -} - -static bool i915_fence_enable_signaling(struct dma_fence *fence) -{ - if (i915_fence_signaled(fence)) - return false; - - intel_engine_enable_signaling(to_request(fence), true); - return !i915_fence_signaled(fence); -} - -static signed long i915_fence_wait(struct dma_fence *fence, - bool interruptible, - signed long timeout) -{ - return i915_wait_request(to_request(fence), interruptible, timeout); -} - -static void i915_fence_release(struct dma_fence *fence) -{ - struct drm_i915_gem_request *req = to_request(fence); - - /* The request is put onto a RCU freelist (i.e. the address - * is immediately reused), mark the fences as being freed now. - * Otherwise the debugobjects for the fences are only marked as - * freed when the slab cache itself is freed, and so we would get - * caught trying to reuse dead objects. - */ - i915_sw_fence_fini(&req->submit); - - kmem_cache_free(req->i915->requests, req); -} - -const struct dma_fence_ops i915_fence_ops = { - .get_driver_name = i915_fence_get_driver_name, - .get_timeline_name = i915_fence_get_timeline_name, - .enable_signaling = i915_fence_enable_signaling, - .signaled = i915_fence_signaled, - .wait = i915_fence_wait, - .release = i915_fence_release, -}; - -static inline void -i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) -{ - struct drm_i915_file_private *file_priv; - - file_priv = request->file_priv; - if (!file_priv) - return; - - spin_lock(&file_priv->mm.lock); - if (request->file_priv) { - list_del(&request->client_link); - request->file_priv = NULL; - } - spin_unlock(&file_priv->mm.lock); -} - -static struct i915_dependency * -i915_dependency_alloc(struct drm_i915_private *i915) -{ - return kmem_cache_alloc(i915->dependencies, GFP_KERNEL); -} - -static void -i915_dependency_free(struct drm_i915_private *i915, - struct i915_dependency *dep) -{ - kmem_cache_free(i915->dependencies, dep); -} - -static void -__i915_priotree_add_dependency(struct i915_priotree *pt, - struct i915_priotree *signal, - struct i915_dependency *dep, - unsigned long flags) -{ - INIT_LIST_HEAD(&dep->dfs_link); - list_add(&dep->wait_link, &signal->waiters_list); - list_add(&dep->signal_link, &pt->signalers_list); - dep->signaler = signal; - dep->flags = flags; -} - -static int -i915_priotree_add_dependency(struct drm_i915_private *i915, - struct i915_priotree *pt, - struct i915_priotree *signal) -{ - struct i915_dependency *dep; - - dep = i915_dependency_alloc(i915); - if (!dep) - return -ENOMEM; - - __i915_priotree_add_dependency(pt, signal, dep, I915_DEPENDENCY_ALLOC); - return 0; -} - -static void -i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) -{ - struct i915_dependency *dep, *next; - - GEM_BUG_ON(!list_empty(&pt->link)); - - /* - * Everyone we depended upon (the fences we wait to be signaled) - * should retire before us and remove themselves from our list. - * However, retirement is run independently on each timeline and - * so we may be called out-of-order. - */ - list_for_each_entry_safe(dep, next, &pt->signalers_list, signal_link) { - GEM_BUG_ON(!i915_priotree_signaled(dep->signaler)); - GEM_BUG_ON(!list_empty(&dep->dfs_link)); - - list_del(&dep->wait_link); - if (dep->flags & I915_DEPENDENCY_ALLOC) - i915_dependency_free(i915, dep); - } - - /* Remove ourselves from everyone who depends upon us */ - list_for_each_entry_safe(dep, next, &pt->waiters_list, wait_link) { - GEM_BUG_ON(dep->signaler != pt); - GEM_BUG_ON(!list_empty(&dep->dfs_link)); - - list_del(&dep->signal_link); - if (dep->flags & I915_DEPENDENCY_ALLOC) - i915_dependency_free(i915, dep); - } -} - -static void -i915_priotree_init(struct i915_priotree *pt) -{ - INIT_LIST_HEAD(&pt->signalers_list); - INIT_LIST_HEAD(&pt->waiters_list); - INIT_LIST_HEAD(&pt->link); - pt->priority = I915_PRIORITY_INVALID; -} - -static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - int ret; - - /* Carefully retire all requests without writing to the rings */ - ret = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED); - if (ret) - return ret; - - /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ - for_each_engine(engine, i915, id) { - struct i915_gem_timeline *timeline; - struct intel_timeline *tl = engine->timeline; - - if (!i915_seqno_passed(seqno, tl->seqno)) { - /* spin until threads are complete */ - while (intel_breadcrumbs_busy(engine)) - cond_resched(); - } - - /* Check we are idle before we fiddle with hw state! */ - GEM_BUG_ON(!intel_engine_is_idle(engine)); - GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request)); - - /* Finally reset hw state */ - intel_engine_init_global_seqno(engine, seqno); - tl->seqno = seqno; - - list_for_each_entry(timeline, &i915->gt.timelines, link) - memset(timeline->engine[id].global_sync, 0, - sizeof(timeline->engine[id].global_sync)); - } - - return 0; -} - -int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - if (seqno == 0) - return -EINVAL; - - /* HWS page needs to be set less than what we - * will inject to ring - */ - return reset_all_global_seqno(dev_priv, seqno - 1); -} - -static void mark_busy(struct drm_i915_private *i915) -{ - if (i915->gt.awake) - return; - - GEM_BUG_ON(!i915->gt.active_requests); - - intel_runtime_pm_get_noresume(i915); - - /* - * It seems that the DMC likes to transition between the DC states a lot - * when there are no connected displays (no active power domains) during - * command submission. - * - * This activity has negative impact on the performance of the chip with - * huge latencies observed in the interrupt handler and elsewhere. - * - * Work around it by grabbing a GT IRQ power domain whilst there is any - * GT activity, preventing any DC state transitions. - */ - intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); - - i915->gt.awake = true; - if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ - i915->gt.epoch = 1; - - intel_enable_gt_powersave(i915); - i915_update_gfx_val(i915); - if (INTEL_GEN(i915) >= 6) - gen6_rps_busy(i915); - i915_pmu_gt_unparked(i915); - - intel_engines_unpark(i915); - - i915_queue_hangcheck(i915); - - queue_delayed_work(i915->wq, - &i915->gt.retire_work, - round_jiffies_up_relative(HZ)); -} - -static int reserve_engine(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - u32 active = ++engine->timeline->inflight_seqnos; - u32 seqno = engine->timeline->seqno; - int ret; - - /* Reservation is fine until we need to wrap around */ - if (unlikely(add_overflows(seqno, active))) { - ret = reset_all_global_seqno(i915, 0); - if (ret) { - engine->timeline->inflight_seqnos--; - return ret; - } - } - - if (!i915->gt.active_requests++) - mark_busy(i915); - - return 0; -} - -static void unreserve_engine(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - - if (!--i915->gt.active_requests) { - /* Cancel the mark_busy() from our reserve_engine() */ - GEM_BUG_ON(!i915->gt.awake); - mod_delayed_work(i915->wq, - &i915->gt.idle_work, - msecs_to_jiffies(100)); - } - - GEM_BUG_ON(!engine->timeline->inflight_seqnos); - engine->timeline->inflight_seqnos--; -} - -void i915_gem_retire_noop(struct i915_gem_active *active, - struct drm_i915_gem_request *request) -{ - /* Space left intentionally blank */ -} - -static void advance_ring(struct drm_i915_gem_request *request) -{ - unsigned int tail; - - /* We know the GPU must have read the request to have - * sent us the seqno + interrupt, so use the position - * of tail of the request to update the last known position - * of the GPU head. - * - * Note this requires that we are always called in request - * completion order. - */ - if (list_is_last(&request->ring_link, &request->ring->request_list)) { - /* We may race here with execlists resubmitting this request - * as we retire it. The resubmission will move the ring->tail - * forwards (to request->wa_tail). We either read the - * current value that was written to hw, or the value that - * is just about to be. Either works, if we miss the last two - * noops - they are safe to be replayed on a reset. - */ - tail = READ_ONCE(request->ring->tail); - } else { - tail = request->postfix; - } - list_del(&request->ring_link); - - request->ring->head = tail; -} - -static void free_capture_list(struct drm_i915_gem_request *request) -{ - struct i915_gem_capture_list *capture; - - capture = request->capture_list; - while (capture) { - struct i915_gem_capture_list *next = capture->next; - - kfree(capture); - capture = next; - } -} - -static void i915_gem_request_retire(struct drm_i915_gem_request *request) -{ - struct intel_engine_cs *engine = request->engine; - struct i915_gem_active *active, *next; - - lockdep_assert_held(&request->i915->drm.struct_mutex); - GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); - GEM_BUG_ON(!i915_gem_request_completed(request)); - GEM_BUG_ON(!request->i915->gt.active_requests); - - trace_i915_gem_request_retire(request); - - spin_lock_irq(&engine->timeline->lock); - list_del_init(&request->link); - spin_unlock_irq(&engine->timeline->lock); - - unreserve_engine(request->engine); - advance_ring(request); - - free_capture_list(request); - - /* Walk through the active list, calling retire on each. This allows - * objects to track their GPU activity and mark themselves as idle - * when their *last* active request is completed (updating state - * tracking lists for eviction, active references for GEM, etc). - * - * As the ->retire() may free the node, we decouple it first and - * pass along the auxiliary information (to avoid dereferencing - * the node after the callback). - */ - list_for_each_entry_safe(active, next, &request->active_list, link) { - /* In microbenchmarks or focusing upon time inside the kernel, - * we may spend an inordinate amount of time simply handling - * the retirement of requests and processing their callbacks. - * Of which, this loop itself is particularly hot due to the - * cache misses when jumping around the list of i915_gem_active. - * So we try to keep this loop as streamlined as possible and - * also prefetch the next i915_gem_active to try and hide - * the likely cache miss. - */ - prefetchw(next); - - INIT_LIST_HEAD(&active->link); - RCU_INIT_POINTER(active->request, NULL); - - active->retire(active, request); - } - - i915_gem_request_remove_from_client(request); - - /* Retirement decays the ban score as it is a sign of ctx progress */ - atomic_dec_if_positive(&request->ctx->ban_score); - - /* The backing object for the context is done after switching to the - * *next* context. Therefore we cannot retire the previous context until - * the next context has already started running. However, since we - * cannot take the required locks at i915_gem_request_submit() we - * defer the unpinning of the active context to now, retirement of - * the subsequent request. - */ - if (engine->last_retired_context) - engine->context_unpin(engine, engine->last_retired_context); - engine->last_retired_context = request->ctx; - - spin_lock_irq(&request->lock); - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags)) - dma_fence_signal_locked(&request->fence); - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) - intel_engine_cancel_signaling(request); - if (request->waitboost) { - GEM_BUG_ON(!atomic_read(&request->i915->gt_pm.rps.num_waiters)); - atomic_dec(&request->i915->gt_pm.rps.num_waiters); - } - spin_unlock_irq(&request->lock); - - i915_priotree_fini(request->i915, &request->priotree); - i915_gem_request_put(request); -} - -void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - struct drm_i915_gem_request *tmp; - - lockdep_assert_held(&req->i915->drm.struct_mutex); - GEM_BUG_ON(!i915_gem_request_completed(req)); - - if (list_empty(&req->link)) - return; - - do { - tmp = list_first_entry(&engine->timeline->requests, - typeof(*tmp), link); - - i915_gem_request_retire(tmp); - } while (tmp != req); -} - -static u32 timeline_get_seqno(struct intel_timeline *tl) -{ - return ++tl->seqno; -} - -void __i915_gem_request_submit(struct drm_i915_gem_request *request) -{ - struct intel_engine_cs *engine = request->engine; - struct intel_timeline *timeline; - u32 seqno; - - GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->timeline->lock); - - /* Transfer from per-context onto the global per-engine timeline */ - timeline = engine->timeline; - GEM_BUG_ON(timeline == request->timeline); - GEM_BUG_ON(request->global_seqno); - - seqno = timeline_get_seqno(timeline); - GEM_BUG_ON(!seqno); - GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno)); - - /* We may be recursing from the signal callback of another i915 fence */ - spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); - request->global_seqno = seqno; - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) - intel_engine_enable_signaling(request, false); - spin_unlock(&request->lock); - - engine->emit_breadcrumb(request, - request->ring->vaddr + request->postfix); - - spin_lock(&request->timeline->lock); - list_move_tail(&request->link, &timeline->requests); - spin_unlock(&request->timeline->lock); - - trace_i915_gem_request_execute(request); - - wake_up_all(&request->execute); -} - -void i915_gem_request_submit(struct drm_i915_gem_request *request) -{ - struct intel_engine_cs *engine = request->engine; - unsigned long flags; - - /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline->lock, flags); - - __i915_gem_request_submit(request); - - spin_unlock_irqrestore(&engine->timeline->lock, flags); -} - -void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request) -{ - struct intel_engine_cs *engine = request->engine; - struct intel_timeline *timeline; - - GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->timeline->lock); - - /* Only unwind in reverse order, required so that the per-context list - * is kept in seqno/ring order. - */ - GEM_BUG_ON(!request->global_seqno); - GEM_BUG_ON(request->global_seqno != engine->timeline->seqno); - GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), - request->global_seqno)); - engine->timeline->seqno--; - - /* We may be recursing from the signal callback of another i915 fence */ - spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); - request->global_seqno = 0; - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) - intel_engine_cancel_signaling(request); - spin_unlock(&request->lock); - - /* Transfer back from the global per-engine timeline to per-context */ - timeline = request->timeline; - GEM_BUG_ON(timeline == engine->timeline); - - spin_lock(&timeline->lock); - list_move(&request->link, &timeline->requests); - spin_unlock(&timeline->lock); - - /* We don't need to wake_up any waiters on request->execute, they - * will get woken by any other event or us re-adding this request - * to the engine timeline (__i915_gem_request_submit()). The waiters - * should be quite adapt at finding that the request now has a new - * global_seqno to the one they went to sleep on. - */ -} - -void i915_gem_request_unsubmit(struct drm_i915_gem_request *request) -{ - struct intel_engine_cs *engine = request->engine; - unsigned long flags; - - /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline->lock, flags); - - __i915_gem_request_unsubmit(request); - - spin_unlock_irqrestore(&engine->timeline->lock, flags); -} - -static int __i915_sw_fence_call -submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) -{ - struct drm_i915_gem_request *request = - container_of(fence, typeof(*request), submit); - - switch (state) { - case FENCE_COMPLETE: - trace_i915_gem_request_submit(request); - /* - * We need to serialize use of the submit_request() callback with its - * hotplugging performed during an emergency i915_gem_set_wedged(). - * We use the RCU mechanism to mark the critical section in order to - * force i915_gem_set_wedged() to wait until the submit_request() is - * completed before proceeding. - */ - rcu_read_lock(); - request->engine->submit_request(request); - rcu_read_unlock(); - break; - - case FENCE_FREE: - i915_gem_request_put(request); - break; - } - - return NOTIFY_DONE; -} - -/** - * i915_gem_request_alloc - allocate a request structure - * - * @engine: engine that we wish to issue the request on. - * @ctx: context that the request will be associated with. - * - * Returns a pointer to the allocated request if successful, - * or an error code if not. - */ -struct drm_i915_gem_request * -i915_gem_request_alloc(struct intel_engine_cs *engine, - struct i915_gem_context *ctx) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct drm_i915_gem_request *req; - struct intel_ring *ring; - int ret; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - /* - * Preempt contexts are reserved for exclusive use to inject a - * preemption context switch. They are never to be used for any trivial - * request! - */ - GEM_BUG_ON(ctx == dev_priv->preempt_context); - - /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report - * EIO if the GPU is already wedged. - */ - if (i915_terminally_wedged(&dev_priv->gpu_error)) - return ERR_PTR(-EIO); - - /* Pinning the contexts may generate requests in order to acquire - * GGTT space, so do this first before we reserve a seqno for - * ourselves. - */ - ring = engine->context_pin(engine, ctx); - if (IS_ERR(ring)) - return ERR_CAST(ring); - GEM_BUG_ON(!ring); - - ret = reserve_engine(engine); - if (ret) - goto err_unpin; - - ret = intel_ring_wait_for_space(ring, MIN_SPACE_FOR_ADD_REQUEST); - if (ret) - goto err_unreserve; - - /* Move the oldest request to the slab-cache (if not in use!) */ - req = list_first_entry_or_null(&engine->timeline->requests, - typeof(*req), link); - if (req && i915_gem_request_completed(req)) - i915_gem_request_retire(req); - - /* Beware: Dragons be flying overhead. - * - * We use RCU to look up requests in flight. The lookups may - * race with the request being allocated from the slab freelist. - * That is the request we are writing to here, may be in the process - * of being read by __i915_gem_active_get_rcu(). As such, - * we have to be very careful when overwriting the contents. During - * the RCU lookup, we change chase the request->engine pointer, - * read the request->global_seqno and increment the reference count. - * - * The reference count is incremented atomically. If it is zero, - * the lookup knows the request is unallocated and complete. Otherwise, - * it is either still in use, or has been reallocated and reset - * with dma_fence_init(). This increment is safe for release as we - * check that the request we have a reference to and matches the active - * request. - * - * Before we increment the refcount, we chase the request->engine - * pointer. We must not call kmem_cache_zalloc() or else we set - * that pointer to NULL and cause a crash during the lookup. If - * we see the request is completed (based on the value of the - * old engine and seqno), the lookup is complete and reports NULL. - * If we decide the request is not completed (new engine or seqno), - * then we grab a reference and double check that it is still the - * active request - which it won't be and restart the lookup. - * - * Do not use kmem_cache_zalloc() here! - */ - req = kmem_cache_alloc(dev_priv->requests, - GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); - if (unlikely(!req)) { - /* Ratelimit ourselves to prevent oom from malicious clients */ - ret = i915_gem_wait_for_idle(dev_priv, - I915_WAIT_LOCKED | - I915_WAIT_INTERRUPTIBLE); - if (ret) - goto err_unreserve; - - /* - * We've forced the client to stall and catch up with whatever - * backlog there might have been. As we are assuming that we - * caused the mempressure, now is an opportune time to - * recover as much memory from the request pool as is possible. - * Having already penalized the client to stall, we spend - * a little extra time to re-optimise page allocation. - */ - kmem_cache_shrink(dev_priv->requests); - rcu_barrier(); /* Recover the TYPESAFE_BY_RCU pages */ - - req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL); - if (!req) { - ret = -ENOMEM; - goto err_unreserve; - } - } - - req->timeline = i915_gem_context_lookup_timeline(ctx, engine); - GEM_BUG_ON(req->timeline == engine->timeline); - - spin_lock_init(&req->lock); - dma_fence_init(&req->fence, - &i915_fence_ops, - &req->lock, - req->timeline->fence_context, - timeline_get_seqno(req->timeline)); - - /* We bump the ref for the fence chain */ - i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify); - init_waitqueue_head(&req->execute); - - i915_priotree_init(&req->priotree); - - INIT_LIST_HEAD(&req->active_list); - req->i915 = dev_priv; - req->engine = engine; - req->ctx = ctx; - req->ring = ring; - - /* No zalloc, must clear what we need by hand */ - req->global_seqno = 0; - req->signaling.wait.seqno = 0; - req->file_priv = NULL; - req->batch = NULL; - req->capture_list = NULL; - req->waitboost = false; - - /* - * Reserve space in the ring buffer for all the commands required to - * eventually emit this request. This is to guarantee that the - * i915_add_request() call can't fail. Note that the reserve may need - * to be redone if the request is not actually submitted straight - * away, e.g. because a GPU scheduler has deferred it. - */ - req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; - GEM_BUG_ON(req->reserved_space < engine->emit_breadcrumb_sz); - - /* - * Record the position of the start of the request so that - * should we detect the updated seqno part-way through the - * GPU processing the request, we never over-estimate the - * position of the head. - */ - req->head = req->ring->emit; - - /* Unconditionally invalidate GPU caches and TLBs. */ - ret = engine->emit_flush(req, EMIT_INVALIDATE); - if (ret) - goto err_unwind; - - ret = engine->request_alloc(req); - if (ret) - goto err_unwind; - - /* Check that we didn't interrupt ourselves with a new request */ - GEM_BUG_ON(req->timeline->seqno != req->fence.seqno); - return req; - -err_unwind: - req->ring->emit = req->head; - - /* Make sure we didn't add ourselves to external state before freeing */ - GEM_BUG_ON(!list_empty(&req->active_list)); - GEM_BUG_ON(!list_empty(&req->priotree.signalers_list)); - GEM_BUG_ON(!list_empty(&req->priotree.waiters_list)); - - kmem_cache_free(dev_priv->requests, req); -err_unreserve: - unreserve_engine(engine); -err_unpin: - engine->context_unpin(engine, ctx); - return ERR_PTR(ret); -} - -static int -i915_gem_request_await_request(struct drm_i915_gem_request *to, - struct drm_i915_gem_request *from) -{ - int ret; - - GEM_BUG_ON(to == from); - GEM_BUG_ON(to->timeline == from->timeline); - - if (i915_gem_request_completed(from)) - return 0; - - if (to->engine->schedule) { - ret = i915_priotree_add_dependency(to->i915, - &to->priotree, - &from->priotree); - if (ret < 0) - return ret; - } - - if (to->engine == from->engine) { - ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, - &from->submit, - I915_FENCE_GFP); - return ret < 0 ? ret : 0; - } - - if (to->engine->semaphore.sync_to) { - u32 seqno; - - GEM_BUG_ON(!from->engine->semaphore.signal); - - seqno = i915_gem_request_global_seqno(from); - if (!seqno) - goto await_dma_fence; - - if (seqno <= to->timeline->global_sync[from->engine->id]) - return 0; - - trace_i915_gem_ring_sync_to(to, from); - ret = to->engine->semaphore.sync_to(to, from); - if (ret) - return ret; - - to->timeline->global_sync[from->engine->id] = seqno; - return 0; - } - -await_dma_fence: - ret = i915_sw_fence_await_dma_fence(&to->submit, - &from->fence, 0, - I915_FENCE_GFP); - return ret < 0 ? ret : 0; -} - -int -i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req, - struct dma_fence *fence) -{ - struct dma_fence **child = &fence; - unsigned int nchild = 1; - int ret; - - /* Note that if the fence-array was created in signal-on-any mode, - * we should *not* decompose it into its individual fences. However, - * we don't currently store which mode the fence-array is operating - * in. Fortunately, the only user of signal-on-any is private to - * amdgpu and we should not see any incoming fence-array from - * sync-file being in signal-on-any mode. - */ - if (dma_fence_is_array(fence)) { - struct dma_fence_array *array = to_dma_fence_array(fence); - - child = array->fences; - nchild = array->num_fences; - GEM_BUG_ON(!nchild); - } - - do { - fence = *child++; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) - continue; - - /* - * Requests on the same timeline are explicitly ordered, along - * with their dependencies, by i915_add_request() which ensures - * that requests are submitted in-order through each ring. - */ - if (fence->context == req->fence.context) - continue; - - /* Squash repeated waits to the same timelines */ - if (fence->context != req->i915->mm.unordered_timeline && - intel_timeline_sync_is_later(req->timeline, fence)) - continue; - - if (dma_fence_is_i915(fence)) - ret = i915_gem_request_await_request(req, - to_request(fence)); - else - ret = i915_sw_fence_await_dma_fence(&req->submit, fence, - I915_FENCE_TIMEOUT, - I915_FENCE_GFP); - if (ret < 0) - return ret; - - /* Record the latest fence used against each timeline */ - if (fence->context != req->i915->mm.unordered_timeline) - intel_timeline_sync_set(req->timeline, fence); - } while (--nchild); - - return 0; -} - -/** - * i915_gem_request_await_object - set this request to (async) wait upon a bo - * @to: request we are wishing to use - * @obj: object which may be in use on another ring. - * @write: whether the wait is on behalf of a writer - * - * This code is meant to abstract object synchronization with the GPU. - * Conceptually we serialise writes between engines inside the GPU. - * We only allow one engine to write into a buffer at any time, but - * multiple readers. To ensure each has a coherent view of memory, we must: - * - * - If there is an outstanding write request to the object, the new - * request must wait for it to complete (either CPU or in hw, requests - * on the same ring will be naturally ordered). - * - * - If we are a write request (pending_write_domain is set), the new - * request must wait for outstanding read requests to complete. - * - * Returns 0 if successful, else propagates up the lower layer error. - */ -int -i915_gem_request_await_object(struct drm_i915_gem_request *to, - struct drm_i915_gem_object *obj, - bool write) -{ - struct dma_fence *excl; - int ret = 0; - - if (write) { - struct dma_fence **shared; - unsigned int count, i; - - ret = reservation_object_get_fences_rcu(obj->resv, - &excl, &count, &shared); - if (ret) - return ret; - - for (i = 0; i < count; i++) { - ret = i915_gem_request_await_dma_fence(to, shared[i]); - if (ret) - break; - - dma_fence_put(shared[i]); - } - - for (; i < count; i++) - dma_fence_put(shared[i]); - kfree(shared); - } else { - excl = reservation_object_get_excl_rcu(obj->resv); - } - - if (excl) { - if (ret == 0) - ret = i915_gem_request_await_dma_fence(to, excl); - - dma_fence_put(excl); - } - - return ret; -} - -/* - * NB: This function is not allowed to fail. Doing so would mean the the - * request is not being tracked for completion but the work itself is - * going to happen on the hardware. This would be a Bad Thing(tm). - */ -void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) -{ - struct intel_engine_cs *engine = request->engine; - struct intel_ring *ring = request->ring; - struct intel_timeline *timeline = request->timeline; - struct drm_i915_gem_request *prev; - u32 *cs; - int err; - - lockdep_assert_held(&request->i915->drm.struct_mutex); - trace_i915_gem_request_add(request); - - /* - * Make sure that no request gazumped us - if it was allocated after - * our i915_gem_request_alloc() and called __i915_add_request() before - * us, the timeline will hold its seqno which is later than ours. - */ - GEM_BUG_ON(timeline->seqno != request->fence.seqno); - - /* - * To ensure that this call will not fail, space for its emissions - * should already have been reserved in the ring buffer. Let the ring - * know that it is time to use that space up. - */ - request->reserved_space = 0; - - /* - * Emit any outstanding flushes - execbuf can fail to emit the flush - * after having emitted the batchbuffer command. Hence we need to fix - * things up similar to emitting the lazy request. The difference here - * is that the flush _must_ happen before the next request, no matter - * what. - */ - if (flush_caches) { - err = engine->emit_flush(request, EMIT_FLUSH); - - /* Not allowed to fail! */ - WARN(err, "engine->emit_flush() failed: %d!\n", err); - } - - /* - * Record the position of the start of the breadcrumb so that - * should we detect the updated seqno part-way through the - * GPU processing the request, we never over-estimate the - * position of the ring's HEAD. - */ - cs = intel_ring_begin(request, engine->emit_breadcrumb_sz); - GEM_BUG_ON(IS_ERR(cs)); - request->postfix = intel_ring_offset(request, cs); - - /* - * Seal the request and mark it as pending execution. Note that - * we may inspect this state, without holding any locks, during - * hangcheck. Hence we apply the barrier to ensure that we do not - * see a more recent value in the hws than we are tracking. - */ - - prev = i915_gem_active_raw(&timeline->last_request, - &request->i915->drm.struct_mutex); - if (prev && !i915_gem_request_completed(prev)) { - i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, - &request->submitq); - if (engine->schedule) - __i915_priotree_add_dependency(&request->priotree, - &prev->priotree, - &request->dep, - 0); - } - - spin_lock_irq(&timeline->lock); - list_add_tail(&request->link, &timeline->requests); - spin_unlock_irq(&timeline->lock); - - GEM_BUG_ON(timeline->seqno != request->fence.seqno); - i915_gem_active_set(&timeline->last_request, request); - - list_add_tail(&request->ring_link, &ring->request_list); - request->emitted_jiffies = jiffies; - - /* - * Let the backend know a new request has arrived that may need - * to adjust the existing execution schedule due to a high priority - * request - i.e. we may want to preempt the current request in order - * to run a high priority dependency chain *before* we can execute this - * request. - * - * This is called before the request is ready to run so that we can - * decide whether to preempt the entire chain so that it is ready to - * run at the earliest possible convenience. - */ - if (engine->schedule) - engine->schedule(request, request->ctx->priority); - - local_bh_disable(); - i915_sw_fence_commit(&request->submit); - local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ - - /* - * In typical scenarios, we do not expect the previous request on - * the timeline to be still tracked by timeline->last_request if it - * has been completed. If the completed request is still here, that - * implies that request retirement is a long way behind submission, - * suggesting that we haven't been retiring frequently enough from - * the combination of retire-before-alloc, waiters and the background - * retirement worker. So if the last request on this timeline was - * already completed, do a catch up pass, flushing the retirement queue - * up to this client. Since we have now moved the heaviest operations - * during retirement onto secondary workers, such as freeing objects - * or contexts, retiring a bunch of requests is mostly list management - * (and cache misses), and so we should not be overly penalizing this - * client by performing excess work, though we may still performing - * work on behalf of others -- but instead we should benefit from - * improved resource management. (Well, that's the theory at least.) - */ - if (prev && i915_gem_request_completed(prev)) - i915_gem_request_retire_upto(prev); -} - -static unsigned long local_clock_us(unsigned int *cpu) -{ - unsigned long t; - - /* Cheaply and approximately convert from nanoseconds to microseconds. - * The result and subsequent calculations are also defined in the same - * approximate microseconds units. The principal source of timing - * error here is from the simple truncation. - * - * Note that local_clock() is only defined wrt to the current CPU; - * the comparisons are no longer valid if we switch CPUs. Instead of - * blocking preemption for the entire busywait, we can detect the CPU - * switch and use that as indicator of system load and a reason to - * stop busywaiting, see busywait_stop(). - */ - *cpu = get_cpu(); - t = local_clock() >> 10; - put_cpu(); - - return t; -} - -static bool busywait_stop(unsigned long timeout, unsigned int cpu) -{ - unsigned int this_cpu; - - if (time_after(local_clock_us(&this_cpu), timeout)) - return true; - - return this_cpu != cpu; -} - -static bool __i915_spin_request(const struct drm_i915_gem_request *req, - u32 seqno, int state, unsigned long timeout_us) -{ - struct intel_engine_cs *engine = req->engine; - unsigned int irq, cpu; - - GEM_BUG_ON(!seqno); - - /* - * Only wait for the request if we know it is likely to complete. - * - * We don't track the timestamps around requests, nor the average - * request length, so we do not have a good indicator that this - * request will complete within the timeout. What we do know is the - * order in which requests are executed by the engine and so we can - * tell if the request has started. If the request hasn't started yet, - * it is a fair assumption that it will not complete within our - * relatively short timeout. - */ - if (!i915_seqno_passed(intel_engine_get_seqno(engine), seqno - 1)) - return false; - - /* When waiting for high frequency requests, e.g. during synchronous - * rendering split between the CPU and GPU, the finite amount of time - * required to set up the irq and wait upon it limits the response - * rate. By busywaiting on the request completion for a short while we - * can service the high frequency waits as quick as possible. However, - * if it is a slow request, we want to sleep as quickly as possible. - * The tradeoff between waiting and sleeping is roughly the time it - * takes to sleep on a request, on the order of a microsecond. - */ - - irq = atomic_read(&engine->irq_count); - timeout_us += local_clock_us(&cpu); - do { - if (i915_seqno_passed(intel_engine_get_seqno(engine), seqno)) - return seqno == i915_gem_request_global_seqno(req); - - /* Seqno are meant to be ordered *before* the interrupt. If - * we see an interrupt without a corresponding seqno advance, - * assume we won't see one in the near future but require - * the engine->seqno_barrier() to fixup coherency. - */ - if (atomic_read(&engine->irq_count) != irq) - break; - - if (signal_pending_state(state, current)) - break; - - if (busywait_stop(timeout_us, cpu)) - break; - - cpu_relax(); - } while (!need_resched()); - - return false; -} - -static bool __i915_wait_request_check_and_reset(struct drm_i915_gem_request *request) -{ - if (likely(!i915_reset_handoff(&request->i915->gpu_error))) - return false; - - __set_current_state(TASK_RUNNING); - i915_reset(request->i915, 0); - return true; -} - -/** - * i915_wait_request - wait until execution of request has finished - * @req: the request to wait upon - * @flags: how to wait - * @timeout: how long to wait in jiffies - * - * i915_wait_request() waits for the request to be completed, for a - * maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an - * unbounded wait). - * - * If the caller holds the struct_mutex, the caller must pass I915_WAIT_LOCKED - * in via the flags, and vice versa if the struct_mutex is not held, the caller - * must not specify that the wait is locked. - * - * Returns the remaining time (in jiffies) if the request completed, which may - * be zero or -ETIME if the request is unfinished after the timeout expires. - * May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is - * pending before the request completes. - */ -long i915_wait_request(struct drm_i915_gem_request *req, - unsigned int flags, - long timeout) -{ - const int state = flags & I915_WAIT_INTERRUPTIBLE ? - TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; - wait_queue_head_t *errq = &req->i915->gpu_error.wait_queue; - DEFINE_WAIT_FUNC(reset, default_wake_function); - DEFINE_WAIT_FUNC(exec, default_wake_function); - struct intel_wait wait; - - might_sleep(); -#if IS_ENABLED(CONFIG_LOCKDEP) - GEM_BUG_ON(debug_locks && - !!lockdep_is_held(&req->i915->drm.struct_mutex) != - !!(flags & I915_WAIT_LOCKED)); -#endif - GEM_BUG_ON(timeout < 0); - - if (i915_gem_request_completed(req)) - return timeout; - - if (!timeout) - return -ETIME; - - trace_i915_gem_request_wait_begin(req, flags); - - add_wait_queue(&req->execute, &exec); - if (flags & I915_WAIT_LOCKED) - add_wait_queue(errq, &reset); - - intel_wait_init(&wait, req); - -restart: - do { - set_current_state(state); - if (intel_wait_update_request(&wait, req)) - break; - - if (flags & I915_WAIT_LOCKED && - __i915_wait_request_check_and_reset(req)) - continue; - - if (signal_pending_state(state, current)) { - timeout = -ERESTARTSYS; - goto complete; - } - - if (!timeout) { - timeout = -ETIME; - goto complete; - } - - timeout = io_schedule_timeout(timeout); - } while (1); - - GEM_BUG_ON(!intel_wait_has_seqno(&wait)); - GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit)); - - /* Optimistic short spin before touching IRQs */ - if (__i915_spin_request(req, wait.seqno, state, 5)) - goto complete; - - set_current_state(state); - if (intel_engine_add_wait(req->engine, &wait)) - /* In order to check that we haven't missed the interrupt - * as we enabled it, we need to kick ourselves to do a - * coherent check on the seqno before we sleep. - */ - goto wakeup; - - if (flags & I915_WAIT_LOCKED) - __i915_wait_request_check_and_reset(req); - - for (;;) { - if (signal_pending_state(state, current)) { - timeout = -ERESTARTSYS; - break; - } - - if (!timeout) { - timeout = -ETIME; - break; - } - - timeout = io_schedule_timeout(timeout); - - if (intel_wait_complete(&wait) && - intel_wait_check_request(&wait, req)) - break; - - set_current_state(state); - -wakeup: - /* Carefully check if the request is complete, giving time - * for the seqno to be visible following the interrupt. - * We also have to check in case we are kicked by the GPU - * reset in order to drop the struct_mutex. - */ - if (__i915_request_irq_complete(req)) - break; - - /* If the GPU is hung, and we hold the lock, reset the GPU - * and then check for completion. On a full reset, the engine's - * HW seqno will be advanced passed us and we are complete. - * If we do a partial reset, we have to wait for the GPU to - * resume and update the breadcrumb. - * - * If we don't hold the mutex, we can just wait for the worker - * to come along and update the breadcrumb (either directly - * itself, or indirectly by recovering the GPU). - */ - if (flags & I915_WAIT_LOCKED && - __i915_wait_request_check_and_reset(req)) - continue; - - /* Only spin if we know the GPU is processing this request */ - if (__i915_spin_request(req, wait.seqno, state, 2)) - break; - - if (!intel_wait_check_request(&wait, req)) { - intel_engine_remove_wait(req->engine, &wait); - goto restart; - } - } - - intel_engine_remove_wait(req->engine, &wait); -complete: - __set_current_state(TASK_RUNNING); - if (flags & I915_WAIT_LOCKED) - remove_wait_queue(errq, &reset); - remove_wait_queue(&req->execute, &exec); - trace_i915_gem_request_wait_end(req); - - return timeout; -} - -static void engine_retire_requests(struct intel_engine_cs *engine) -{ - struct drm_i915_gem_request *request, *next; - u32 seqno = intel_engine_get_seqno(engine); - LIST_HEAD(retire); - - spin_lock_irq(&engine->timeline->lock); - list_for_each_entry_safe(request, next, - &engine->timeline->requests, link) { - if (!i915_seqno_passed(seqno, request->global_seqno)) - break; - - list_move_tail(&request->link, &retire); - } - spin_unlock_irq(&engine->timeline->lock); - - list_for_each_entry_safe(request, next, &retire, link) - i915_gem_request_retire(request); -} - -void i915_gem_retire_requests(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - if (!dev_priv->gt.active_requests) - return; - - for_each_engine(engine, dev_priv, id) - engine_retire_requests(engine); -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/mock_request.c" -#include "selftests/i915_gem_request.c" -#endif diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h deleted file mode 100644 index 2236e9188c5c..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ /dev/null @@ -1,730 +0,0 @@ -/* - * Copyright © 2008-2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef I915_GEM_REQUEST_H -#define I915_GEM_REQUEST_H - -#include - -#include "i915_gem.h" -#include "i915_sw_fence.h" - -#include - -struct drm_file; -struct drm_i915_gem_object; -struct drm_i915_gem_request; - -struct intel_wait { - struct rb_node node; - struct task_struct *tsk; - struct drm_i915_gem_request *request; - u32 seqno; -}; - -struct intel_signal_node { - struct rb_node node; - struct intel_wait wait; -}; - -struct i915_dependency { - struct i915_priotree *signaler; - struct list_head signal_link; - struct list_head wait_link; - struct list_head dfs_link; - unsigned long flags; -#define I915_DEPENDENCY_ALLOC BIT(0) -}; - -/* Requests exist in a complex web of interdependencies. Each request - * has to wait for some other request to complete before it is ready to be run - * (e.g. we have to wait until the pixels have been rendering into a texture - * before we can copy from it). We track the readiness of a request in terms - * of fences, but we also need to keep the dependency tree for the lifetime - * of the request (beyond the life of an individual fence). We use the tree - * at various points to reorder the requests whilst keeping the requests - * in order with respect to their various dependencies. - */ -struct i915_priotree { - struct list_head signalers_list; /* those before us, we depend upon */ - struct list_head waiters_list; /* those after us, they depend upon us */ - struct list_head link; - int priority; -}; - -enum { - I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, - I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, - I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, - - I915_PRIORITY_INVALID = INT_MIN -}; - -struct i915_gem_capture_list { - struct i915_gem_capture_list *next; - struct i915_vma *vma; -}; - -/** - * Request queue structure. - * - * The request queue allows us to note sequence numbers that have been emitted - * and may be associated with active buffers to be retired. - * - * By keeping this list, we can avoid having to do questionable sequence - * number comparisons on buffer last_read|write_seqno. It also allows an - * emission time to be associated with the request for tracking how far ahead - * of the GPU the submission is. - * - * When modifying this structure be very aware that we perform a lockless - * RCU lookup of it that may race against reallocation of the struct - * from the slab freelist. We intentionally do not zero the structure on - * allocation so that the lookup can use the dangling pointers (and is - * cogniscent that those pointers may be wrong). Instead, everything that - * needs to be initialised must be done so explicitly. - * - * The requests are reference counted. - */ -struct drm_i915_gem_request { - struct dma_fence fence; - spinlock_t lock; - - /** On Which ring this request was generated */ - struct drm_i915_private *i915; - - /** - * Context and ring buffer related to this request - * Contexts are refcounted, so when this request is associated with a - * context, we must increment the context's refcount, to guarantee that - * it persists while any request is linked to it. Requests themselves - * are also refcounted, so the request will only be freed when the last - * reference to it is dismissed, and the code in - * i915_gem_request_free() will then decrement the refcount on the - * context. - */ - struct i915_gem_context *ctx; - struct intel_engine_cs *engine; - struct intel_ring *ring; - struct intel_timeline *timeline; - struct intel_signal_node signaling; - - /* Fences for the various phases in the request's lifetime. - * - * The submit fence is used to await upon all of the request's - * dependencies. When it is signaled, the request is ready to run. - * It is used by the driver to then queue the request for execution. - */ - struct i915_sw_fence submit; - wait_queue_entry_t submitq; - wait_queue_head_t execute; - - /* A list of everyone we wait upon, and everyone who waits upon us. - * Even though we will not be submitted to the hardware before the - * submit fence is signaled (it waits for all external events as well - * as our own requests), the scheduler still needs to know the - * dependency tree for the lifetime of the request (from execbuf - * to retirement), i.e. bidirectional dependency information for the - * request not tied to individual fences. - */ - struct i915_priotree priotree; - struct i915_dependency dep; - - /** GEM sequence number associated with this request on the - * global execution timeline. It is zero when the request is not - * on the HW queue (i.e. not on the engine timeline list). - * Its value is guarded by the timeline spinlock. - */ - u32 global_seqno; - - /** Position in the ring of the start of the request */ - u32 head; - - /** - * Position in the ring of the start of the postfix. - * This is required to calculate the maximum available ring space - * without overwriting the postfix. - */ - u32 postfix; - - /** Position in the ring of the end of the whole request */ - u32 tail; - - /** Position in the ring of the end of any workarounds after the tail */ - u32 wa_tail; - - /** Preallocate space in the ring for the emitting the request */ - u32 reserved_space; - - /** Batch buffer related to this request if any (used for - * error state dump only). - */ - struct i915_vma *batch; - /** Additional buffers requested by userspace to be captured upon - * a GPU hang. The vma/obj on this list are protected by their - * active reference - all objects on this list must also be - * on the active_list (of their final request). - */ - struct i915_gem_capture_list *capture_list; - struct list_head active_list; - - /** Time at which this request was emitted, in jiffies. */ - unsigned long emitted_jiffies; - - bool waitboost; - - /** engine->request_list entry for this request */ - struct list_head link; - - /** ring->request_list entry for this request */ - struct list_head ring_link; - - struct drm_i915_file_private *file_priv; - /** file_priv list entry for this request */ - struct list_head client_link; -}; - -#define I915_FENCE_GFP (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) - -extern const struct dma_fence_ops i915_fence_ops; - -static inline bool dma_fence_is_i915(const struct dma_fence *fence) -{ - return fence->ops == &i915_fence_ops; -} - -struct drm_i915_gem_request * __must_check -i915_gem_request_alloc(struct intel_engine_cs *engine, - struct i915_gem_context *ctx); -void i915_gem_request_retire_upto(struct drm_i915_gem_request *req); - -static inline struct drm_i915_gem_request * -to_request(struct dma_fence *fence) -{ - /* We assume that NULL fence/request are interoperable */ - BUILD_BUG_ON(offsetof(struct drm_i915_gem_request, fence) != 0); - GEM_BUG_ON(fence && !dma_fence_is_i915(fence)); - return container_of(fence, struct drm_i915_gem_request, fence); -} - -static inline struct drm_i915_gem_request * -i915_gem_request_get(struct drm_i915_gem_request *req) -{ - return to_request(dma_fence_get(&req->fence)); -} - -static inline struct drm_i915_gem_request * -i915_gem_request_get_rcu(struct drm_i915_gem_request *req) -{ - return to_request(dma_fence_get_rcu(&req->fence)); -} - -static inline void -i915_gem_request_put(struct drm_i915_gem_request *req) -{ - dma_fence_put(&req->fence); -} - -/** - * i915_gem_request_global_seqno - report the current global seqno - * @request - the request - * - * A request is assigned a global seqno only when it is on the hardware - * execution queue. The global seqno can be used to maintain a list of - * requests on the same engine in retirement order, for example for - * constructing a priority queue for waiting. Prior to its execution, or - * if it is subsequently removed in the event of preemption, its global - * seqno is zero. As both insertion and removal from the execution queue - * may operate in IRQ context, it is not guarded by the usual struct_mutex - * BKL. Instead those relying on the global seqno must be prepared for its - * value to change between reads. Only when the request is complete can - * the global seqno be stable (due to the memory barriers on submitting - * the commands to the hardware to write the breadcrumb, if the HWS shows - * that it has passed the global seqno and the global seqno is unchanged - * after the read, it is indeed complete). - */ -static u32 -i915_gem_request_global_seqno(const struct drm_i915_gem_request *request) -{ - return READ_ONCE(request->global_seqno); -} - -int -i915_gem_request_await_object(struct drm_i915_gem_request *to, - struct drm_i915_gem_object *obj, - bool write); -int i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req, - struct dma_fence *fence); - -void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches); -#define i915_add_request(req) \ - __i915_add_request(req, false) - -void __i915_gem_request_submit(struct drm_i915_gem_request *request); -void i915_gem_request_submit(struct drm_i915_gem_request *request); - -void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request); -void i915_gem_request_unsubmit(struct drm_i915_gem_request *request); - -struct intel_rps_client; -#define NO_WAITBOOST ERR_PTR(-1) -#define IS_RPS_CLIENT(p) (!IS_ERR(p)) -#define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p)) - -long i915_wait_request(struct drm_i915_gem_request *req, - unsigned int flags, - long timeout) - __attribute__((nonnull(1))); -#define I915_WAIT_INTERRUPTIBLE BIT(0) -#define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */ -#define I915_WAIT_ALL BIT(2) /* used by i915_gem_object_wait() */ - -static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine); - -/** - * Returns true if seq1 is later than seq2. - */ -static inline bool i915_seqno_passed(u32 seq1, u32 seq2) -{ - return (s32)(seq1 - seq2) >= 0; -} - -static inline bool -__i915_gem_request_completed(const struct drm_i915_gem_request *req, u32 seqno) -{ - GEM_BUG_ON(!seqno); - return i915_seqno_passed(intel_engine_get_seqno(req->engine), seqno) && - seqno == i915_gem_request_global_seqno(req); -} - -static inline bool -i915_gem_request_completed(const struct drm_i915_gem_request *req) -{ - u32 seqno; - - seqno = i915_gem_request_global_seqno(req); - if (!seqno) - return false; - - return __i915_gem_request_completed(req, seqno); -} - -static inline bool -i915_gem_request_started(const struct drm_i915_gem_request *req) -{ - u32 seqno; - - seqno = i915_gem_request_global_seqno(req); - if (!seqno) - return false; - - return i915_seqno_passed(intel_engine_get_seqno(req->engine), - seqno - 1); -} - -static inline bool i915_priotree_signaled(const struct i915_priotree *pt) -{ - const struct drm_i915_gem_request *rq = - container_of(pt, const struct drm_i915_gem_request, priotree); - - return i915_gem_request_completed(rq); -} - -/* We treat requests as fences. This is not be to confused with our - * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync. - * We use the fences to synchronize access from the CPU with activity on the - * GPU, for example, we should not rewrite an object's PTE whilst the GPU - * is reading them. We also track fences at a higher level to provide - * implicit synchronisation around GEM objects, e.g. set-domain will wait - * for outstanding GPU rendering before marking the object ready for CPU - * access, or a pageflip will wait until the GPU is complete before showing - * the frame on the scanout. - * - * In order to use a fence, the object must track the fence it needs to - * serialise with. For example, GEM objects want to track both read and - * write access so that we can perform concurrent read operations between - * the CPU and GPU engines, as well as waiting for all rendering to - * complete, or waiting for the last GPU user of a "fence register". The - * object then embeds a #i915_gem_active to track the most recent (in - * retirement order) request relevant for the desired mode of access. - * The #i915_gem_active is updated with i915_gem_active_set() to track the - * most recent fence request, typically this is done as part of - * i915_vma_move_to_active(). - * - * When the #i915_gem_active completes (is retired), it will - * signal its completion to the owner through a callback as well as mark - * itself as idle (i915_gem_active.request == NULL). The owner - * can then perform any action, such as delayed freeing of an active - * resource including itself. - */ -struct i915_gem_active; - -typedef void (*i915_gem_retire_fn)(struct i915_gem_active *, - struct drm_i915_gem_request *); - -struct i915_gem_active { - struct drm_i915_gem_request __rcu *request; - struct list_head link; - i915_gem_retire_fn retire; -}; - -void i915_gem_retire_noop(struct i915_gem_active *, - struct drm_i915_gem_request *request); - -/** - * init_request_active - prepares the activity tracker for use - * @active - the active tracker - * @func - a callback when then the tracker is retired (becomes idle), - * can be NULL - * - * init_request_active() prepares the embedded @active struct for use as - * an activity tracker, that is for tracking the last known active request - * associated with it. When the last request becomes idle, when it is retired - * after completion, the optional callback @func is invoked. - */ -static inline void -init_request_active(struct i915_gem_active *active, - i915_gem_retire_fn retire) -{ - INIT_LIST_HEAD(&active->link); - active->retire = retire ?: i915_gem_retire_noop; -} - -/** - * i915_gem_active_set - updates the tracker to watch the current request - * @active - the active tracker - * @request - the request to watch - * - * i915_gem_active_set() watches the given @request for completion. Whilst - * that @request is busy, the @active reports busy. When that @request is - * retired, the @active tracker is updated to report idle. - */ -static inline void -i915_gem_active_set(struct i915_gem_active *active, - struct drm_i915_gem_request *request) -{ - list_move(&active->link, &request->active_list); - rcu_assign_pointer(active->request, request); -} - -/** - * i915_gem_active_set_retire_fn - updates the retirement callback - * @active - the active tracker - * @fn - the routine called when the request is retired - * @mutex - struct_mutex used to guard retirements - * - * i915_gem_active_set_retire_fn() updates the function pointer that - * is called when the final request associated with the @active tracker - * is retired. - */ -static inline void -i915_gem_active_set_retire_fn(struct i915_gem_active *active, - i915_gem_retire_fn fn, - struct mutex *mutex) -{ - lockdep_assert_held(mutex); - active->retire = fn ?: i915_gem_retire_noop; -} - -static inline struct drm_i915_gem_request * -__i915_gem_active_peek(const struct i915_gem_active *active) -{ - /* Inside the error capture (running with the driver in an unknown - * state), we want to bend the rules slightly (a lot). - * - * Work is in progress to make it safer, in the meantime this keeps - * the known issue from spamming the logs. - */ - return rcu_dereference_protected(active->request, 1); -} - -/** - * i915_gem_active_raw - return the active request - * @active - the active tracker - * - * i915_gem_active_raw() returns the current request being tracked, or NULL. - * It does not obtain a reference on the request for the caller, so the caller - * must hold struct_mutex. - */ -static inline struct drm_i915_gem_request * -i915_gem_active_raw(const struct i915_gem_active *active, struct mutex *mutex) -{ - return rcu_dereference_protected(active->request, - lockdep_is_held(mutex)); -} - -/** - * i915_gem_active_peek - report the active request being monitored - * @active - the active tracker - * - * i915_gem_active_peek() returns the current request being tracked if - * still active, or NULL. It does not obtain a reference on the request - * for the caller, so the caller must hold struct_mutex. - */ -static inline struct drm_i915_gem_request * -i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex) -{ - struct drm_i915_gem_request *request; - - request = i915_gem_active_raw(active, mutex); - if (!request || i915_gem_request_completed(request)) - return NULL; - - return request; -} - -/** - * i915_gem_active_get - return a reference to the active request - * @active - the active tracker - * - * i915_gem_active_get() returns a reference to the active request, or NULL - * if the active tracker is idle. The caller must hold struct_mutex. - */ -static inline struct drm_i915_gem_request * -i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex) -{ - return i915_gem_request_get(i915_gem_active_peek(active, mutex)); -} - -/** - * __i915_gem_active_get_rcu - return a reference to the active request - * @active - the active tracker - * - * __i915_gem_active_get() returns a reference to the active request, or NULL - * if the active tracker is idle. The caller must hold the RCU read lock, but - * the returned pointer is safe to use outside of RCU. - */ -static inline struct drm_i915_gem_request * -__i915_gem_active_get_rcu(const struct i915_gem_active *active) -{ - /* Performing a lockless retrieval of the active request is super - * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing - * slab of request objects will not be freed whilst we hold the - * RCU read lock. It does not guarantee that the request itself - * will not be freed and then *reused*. Viz, - * - * Thread A Thread B - * - * req = active.request - * retire(req) -> free(req); - * (req is now first on the slab freelist) - * active.request = NULL - * - * req = new submission on a new object - * ref(req) - * - * To prevent the request from being reused whilst the caller - * uses it, we take a reference like normal. Whilst acquiring - * the reference we check that it is not in a destroyed state - * (refcnt == 0). That prevents the request being reallocated - * whilst the caller holds on to it. To check that the request - * was not reallocated as we acquired the reference we have to - * check that our request remains the active request across - * the lookup, in the same manner as a seqlock. The visibility - * of the pointer versus the reference counting is controlled - * by using RCU barriers (rcu_dereference and rcu_assign_pointer). - * - * In the middle of all that, we inspect whether the request is - * complete. Retiring is lazy so the request may be completed long - * before the active tracker is updated. Querying whether the - * request is complete is far cheaper (as it involves no locked - * instructions setting cachelines to exclusive) than acquiring - * the reference, so we do it first. The RCU read lock ensures the - * pointer dereference is valid, but does not ensure that the - * seqno nor HWS is the right one! However, if the request was - * reallocated, that means the active tracker's request was complete. - * If the new request is also complete, then both are and we can - * just report the active tracker is idle. If the new request is - * incomplete, then we acquire a reference on it and check that - * it remained the active request. - * - * It is then imperative that we do not zero the request on - * reallocation, so that we can chase the dangling pointers! - * See i915_gem_request_alloc(). - */ - do { - struct drm_i915_gem_request *request; - - request = rcu_dereference(active->request); - if (!request || i915_gem_request_completed(request)) - return NULL; - - /* An especially silly compiler could decide to recompute the - * result of i915_gem_request_completed, more specifically - * re-emit the load for request->fence.seqno. A race would catch - * a later seqno value, which could flip the result from true to - * false. Which means part of the instructions below might not - * be executed, while later on instructions are executed. Due to - * barriers within the refcounting the inconsistency can't reach - * past the call to i915_gem_request_get_rcu, but not executing - * that while still executing i915_gem_request_put() creates - * havoc enough. Prevent this with a compiler barrier. - */ - barrier(); - - request = i915_gem_request_get_rcu(request); - - /* What stops the following rcu_access_pointer() from occurring - * before the above i915_gem_request_get_rcu()? If we were - * to read the value before pausing to get the reference to - * the request, we may not notice a change in the active - * tracker. - * - * The rcu_access_pointer() is a mere compiler barrier, which - * means both the CPU and compiler are free to perform the - * memory read without constraint. The compiler only has to - * ensure that any operations after the rcu_access_pointer() - * occur afterwards in program order. This means the read may - * be performed earlier by an out-of-order CPU, or adventurous - * compiler. - * - * The atomic operation at the heart of - * i915_gem_request_get_rcu(), see dma_fence_get_rcu(), is - * atomic_inc_not_zero() which is only a full memory barrier - * when successful. That is, if i915_gem_request_get_rcu() - * returns the request (and so with the reference counted - * incremented) then the following read for rcu_access_pointer() - * must occur after the atomic operation and so confirm - * that this request is the one currently being tracked. - * - * The corresponding write barrier is part of - * rcu_assign_pointer(). - */ - if (!request || request == rcu_access_pointer(active->request)) - return rcu_pointer_handoff(request); - - i915_gem_request_put(request); - } while (1); -} - -/** - * i915_gem_active_get_unlocked - return a reference to the active request - * @active - the active tracker - * - * i915_gem_active_get_unlocked() returns a reference to the active request, - * or NULL if the active tracker is idle. The reference is obtained under RCU, - * so no locking is required by the caller. - * - * The reference should be freed with i915_gem_request_put(). - */ -static inline struct drm_i915_gem_request * -i915_gem_active_get_unlocked(const struct i915_gem_active *active) -{ - struct drm_i915_gem_request *request; - - rcu_read_lock(); - request = __i915_gem_active_get_rcu(active); - rcu_read_unlock(); - - return request; -} - -/** - * i915_gem_active_isset - report whether the active tracker is assigned - * @active - the active tracker - * - * i915_gem_active_isset() returns true if the active tracker is currently - * assigned to a request. Due to the lazy retiring, that request may be idle - * and this may report stale information. - */ -static inline bool -i915_gem_active_isset(const struct i915_gem_active *active) -{ - return rcu_access_pointer(active->request); -} - -/** - * i915_gem_active_wait - waits until the request is completed - * @active - the active request on which to wait - * @flags - how to wait - * @timeout - how long to wait at most - * @rps - userspace client to charge for a waitboost - * - * i915_gem_active_wait() waits until the request is completed before - * returning, without requiring any locks to be held. Note that it does not - * retire any requests before returning. - * - * This function relies on RCU in order to acquire the reference to the active - * request without holding any locks. See __i915_gem_active_get_rcu() for the - * glory details on how that is managed. Once the reference is acquired, we - * can then wait upon the request, and afterwards release our reference, - * free of any locking. - * - * This function wraps i915_wait_request(), see it for the full details on - * the arguments. - * - * Returns 0 if successful, or a negative error code. - */ -static inline int -i915_gem_active_wait(const struct i915_gem_active *active, unsigned int flags) -{ - struct drm_i915_gem_request *request; - long ret = 0; - - request = i915_gem_active_get_unlocked(active); - if (request) { - ret = i915_wait_request(request, flags, MAX_SCHEDULE_TIMEOUT); - i915_gem_request_put(request); - } - - return ret < 0 ? ret : 0; -} - -/** - * i915_gem_active_retire - waits until the request is retired - * @active - the active request on which to wait - * - * i915_gem_active_retire() waits until the request is completed, - * and then ensures that at least the retirement handler for this - * @active tracker is called before returning. If the @active - * tracker is idle, the function returns immediately. - */ -static inline int __must_check -i915_gem_active_retire(struct i915_gem_active *active, - struct mutex *mutex) -{ - struct drm_i915_gem_request *request; - long ret; - - request = i915_gem_active_raw(active, mutex); - if (!request) - return 0; - - ret = i915_wait_request(request, - I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (ret < 0) - return ret; - - list_del_init(&active->link); - RCU_INIT_POINTER(active->request, NULL); - - active->retire(active, request); - - return 0; -} - -#define for_each_active(mask, idx) \ - for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx)) - -#endif /* I915_GEM_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 0e158f9287c4..5757fb7c4b5a 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -175,7 +175,7 @@ i915_gem_shrink(struct drm_i915_private *i915, i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); trace_i915_gem_shrink(i915, target, flags); - i915_gem_retire_requests(i915); + i915_retire_requests(i915); /* * Unbinding of objects will require HW access; Let us not wake the @@ -267,7 +267,7 @@ i915_gem_shrink(struct drm_i915_private *i915, if (flags & I915_SHRINK_BOUND) intel_runtime_pm_put(i915); - i915_gem_retire_requests(i915); + i915_retire_requests(i915); shrinker_unlock(i915, unlock); diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h index b5a22400a01f..33e01bf6aa36 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h @@ -27,9 +27,9 @@ #include -#include "i915_utils.h" -#include "i915_gem_request.h" +#include "i915_request.h" #include "i915_syncmap.h" +#include "i915_utils.h" struct i915_gem_timeline; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 65c0bef73ee5..a7933c9b5562 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -991,7 +991,7 @@ out: static inline uint32_t __active_get_seqno(struct i915_gem_active *active) { - struct drm_i915_gem_request *request; + struct i915_request *request; request = __i915_gem_active_peek(active); return request ? request->global_seqno : 0; @@ -1000,7 +1000,7 @@ __active_get_seqno(struct i915_gem_active *active) static inline int __active_get_engine_id(struct i915_gem_active *active) { - struct drm_i915_gem_request *request; + struct i915_request *request; request = __i915_gem_active_peek(active); return request ? request->engine->id : -1; @@ -1293,7 +1293,7 @@ static void error_record_engine_registers(struct i915_gpu_state *error, } } -static void record_request(struct drm_i915_gem_request *request, +static void record_request(struct i915_request *request, struct drm_i915_error_request *erq) { erq->context = request->ctx->hw_id; @@ -1310,10 +1310,10 @@ static void record_request(struct drm_i915_gem_request *request, } static void engine_record_requests(struct intel_engine_cs *engine, - struct drm_i915_gem_request *first, + struct i915_request *first, struct drm_i915_error_engine *ee) { - struct drm_i915_gem_request *request; + struct i915_request *request; int count; count = 0; @@ -1363,7 +1363,7 @@ static void error_record_engine_execlists(struct intel_engine_cs *engine, unsigned int n; for (n = 0; n < execlists_num_ports(execlists); n++) { - struct drm_i915_gem_request *rq = port_request(&execlists->port[n]); + struct i915_request *rq = port_request(&execlists->port[n]); if (!rq) break; @@ -1398,10 +1398,10 @@ static void record_context(struct drm_i915_error_context *e, e->active = atomic_read(&ctx->active_count); } -static void request_record_user_bo(struct drm_i915_gem_request *request, +static void request_record_user_bo(struct i915_request *request, struct drm_i915_error_engine *ee) { - struct i915_gem_capture_list *c; + struct i915_capture_list *c; struct drm_i915_error_object **bo; long count; @@ -1454,7 +1454,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, for (i = 0; i < I915_NUM_ENGINES; i++) { struct intel_engine_cs *engine = dev_priv->engine[i]; struct drm_i915_error_engine *ee = &error->engine[i]; - struct drm_i915_gem_request *request; + struct i915_request *request; ee->engine_id = -1; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 17de6cef2a30..0a7ed990a8d1 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1071,7 +1071,7 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) static void notify_ring(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *rq = NULL; + struct i915_request *rq = NULL; struct intel_wait *wait; if (!engine->breadcrumbs.irq_armed) @@ -1098,13 +1098,13 @@ static void notify_ring(struct intel_engine_cs *engine) */ if (i915_seqno_passed(intel_engine_get_seqno(engine), wait->seqno)) { - struct drm_i915_gem_request *waiter = wait->request; + struct i915_request *waiter = wait->request; wakeup = true; if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &waiter->fence.flags) && intel_wait_check_request(wait, waiter)) - rq = i915_gem_request_get(waiter); + rq = i915_request_get(waiter); } if (wakeup) @@ -1117,7 +1117,7 @@ static void notify_ring(struct intel_engine_cs *engine) if (rq) { dma_fence_signal(&rq->fence); - i915_gem_request_put(rq); + i915_request_put(rq); } trace_intel_engine_notify(engine, wait); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index f0cfdece14ae..f464c3737228 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1630,10 +1630,10 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx, * Same as gen8_update_reg_state_unlocked only through the batchbuffer. This * is only used by the kernel context. */ -static int gen8_emit_oa_config(struct drm_i915_gem_request *req, +static int gen8_emit_oa_config(struct i915_request *rq, const struct i915_oa_config *oa_config) { - struct drm_i915_private *dev_priv = req->i915; + struct drm_i915_private *dev_priv = rq->i915; /* The MMIO offsets for Flex EU registers aren't contiguous */ u32 flex_mmio[] = { i915_mmio_reg_offset(EU_PERF_CNTL0), @@ -1647,7 +1647,7 @@ static int gen8_emit_oa_config(struct drm_i915_gem_request *req, u32 *cs; int i; - cs = intel_ring_begin(req, ARRAY_SIZE(flex_mmio) * 2 + 4); + cs = intel_ring_begin(rq, ARRAY_SIZE(flex_mmio) * 2 + 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1685,7 +1685,7 @@ static int gen8_emit_oa_config(struct drm_i915_gem_request *req, } *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -1695,38 +1695,38 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr { struct intel_engine_cs *engine = dev_priv->engine[RCS]; struct i915_gem_timeline *timeline; - struct drm_i915_gem_request *req; + struct i915_request *rq; int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); - i915_gem_retire_requests(dev_priv); + i915_retire_requests(dev_priv); - req = i915_gem_request_alloc(engine, dev_priv->kernel_context); - if (IS_ERR(req)) - return PTR_ERR(req); + rq = i915_request_alloc(engine, dev_priv->kernel_context); + if (IS_ERR(rq)) + return PTR_ERR(rq); - ret = gen8_emit_oa_config(req, oa_config); + ret = gen8_emit_oa_config(rq, oa_config); if (ret) { - i915_add_request(req); + i915_request_add(rq); return ret; } /* Queue this switch after all other activity */ list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { - struct drm_i915_gem_request *prev; + struct i915_request *prev; struct intel_timeline *tl; tl = &timeline->engine[engine->id]; prev = i915_gem_active_raw(&tl->last_request, &dev_priv->drm.struct_mutex); if (prev) - i915_sw_fence_await_sw_fence_gfp(&req->submit, + i915_sw_fence_await_sw_fence_gfp(&rq->submit, &prev->submit, GFP_KERNEL); } - i915_add_request(req); + i915_request_add(rq); return 0; } diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c new file mode 100644 index 000000000000..9b25270f2491 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_request.c @@ -0,0 +1,1411 @@ +/* + * Copyright © 2008-2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include +#include +#include +#include +#include + +#include "i915_drv.h" + +static const char *i915_fence_get_driver_name(struct dma_fence *fence) +{ + return "i915"; +} + +static const char *i915_fence_get_timeline_name(struct dma_fence *fence) +{ + /* + * The timeline struct (as part of the ppgtt underneath a context) + * may be freed when the request is no longer in use by the GPU. + * We could extend the life of a context to beyond that of all + * fences, possibly keeping the hw resource around indefinitely, + * or we just give them a false name. Since + * dma_fence_ops.get_timeline_name is a debug feature, the occasional + * lie seems justifiable. + */ + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + return "signaled"; + + return to_request(fence)->timeline->common->name; +} + +static bool i915_fence_signaled(struct dma_fence *fence) +{ + return i915_request_completed(to_request(fence)); +} + +static bool i915_fence_enable_signaling(struct dma_fence *fence) +{ + if (i915_fence_signaled(fence)) + return false; + + intel_engine_enable_signaling(to_request(fence), true); + return !i915_fence_signaled(fence); +} + +static signed long i915_fence_wait(struct dma_fence *fence, + bool interruptible, + signed long timeout) +{ + return i915_request_wait(to_request(fence), interruptible, timeout); +} + +static void i915_fence_release(struct dma_fence *fence) +{ + struct i915_request *rq = to_request(fence); + + /* + * The request is put onto a RCU freelist (i.e. the address + * is immediately reused), mark the fences as being freed now. + * Otherwise the debugobjects for the fences are only marked as + * freed when the slab cache itself is freed, and so we would get + * caught trying to reuse dead objects. + */ + i915_sw_fence_fini(&rq->submit); + + kmem_cache_free(rq->i915->requests, rq); +} + +const struct dma_fence_ops i915_fence_ops = { + .get_driver_name = i915_fence_get_driver_name, + .get_timeline_name = i915_fence_get_timeline_name, + .enable_signaling = i915_fence_enable_signaling, + .signaled = i915_fence_signaled, + .wait = i915_fence_wait, + .release = i915_fence_release, +}; + +static inline void +i915_request_remove_from_client(struct i915_request *request) +{ + struct drm_i915_file_private *file_priv; + + file_priv = request->file_priv; + if (!file_priv) + return; + + spin_lock(&file_priv->mm.lock); + if (request->file_priv) { + list_del(&request->client_link); + request->file_priv = NULL; + } + spin_unlock(&file_priv->mm.lock); +} + +static struct i915_dependency * +i915_dependency_alloc(struct drm_i915_private *i915) +{ + return kmem_cache_alloc(i915->dependencies, GFP_KERNEL); +} + +static void +i915_dependency_free(struct drm_i915_private *i915, + struct i915_dependency *dep) +{ + kmem_cache_free(i915->dependencies, dep); +} + +static void +__i915_priotree_add_dependency(struct i915_priotree *pt, + struct i915_priotree *signal, + struct i915_dependency *dep, + unsigned long flags) +{ + INIT_LIST_HEAD(&dep->dfs_link); + list_add(&dep->wait_link, &signal->waiters_list); + list_add(&dep->signal_link, &pt->signalers_list); + dep->signaler = signal; + dep->flags = flags; +} + +static int +i915_priotree_add_dependency(struct drm_i915_private *i915, + struct i915_priotree *pt, + struct i915_priotree *signal) +{ + struct i915_dependency *dep; + + dep = i915_dependency_alloc(i915); + if (!dep) + return -ENOMEM; + + __i915_priotree_add_dependency(pt, signal, dep, I915_DEPENDENCY_ALLOC); + return 0; +} + +static void +i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) +{ + struct i915_dependency *dep, *next; + + GEM_BUG_ON(!list_empty(&pt->link)); + + /* + * Everyone we depended upon (the fences we wait to be signaled) + * should retire before us and remove themselves from our list. + * However, retirement is run independently on each timeline and + * so we may be called out-of-order. + */ + list_for_each_entry_safe(dep, next, &pt->signalers_list, signal_link) { + GEM_BUG_ON(!i915_priotree_signaled(dep->signaler)); + GEM_BUG_ON(!list_empty(&dep->dfs_link)); + + list_del(&dep->wait_link); + if (dep->flags & I915_DEPENDENCY_ALLOC) + i915_dependency_free(i915, dep); + } + + /* Remove ourselves from everyone who depends upon us */ + list_for_each_entry_safe(dep, next, &pt->waiters_list, wait_link) { + GEM_BUG_ON(dep->signaler != pt); + GEM_BUG_ON(!list_empty(&dep->dfs_link)); + + list_del(&dep->signal_link); + if (dep->flags & I915_DEPENDENCY_ALLOC) + i915_dependency_free(i915, dep); + } +} + +static void +i915_priotree_init(struct i915_priotree *pt) +{ + INIT_LIST_HEAD(&pt->signalers_list); + INIT_LIST_HEAD(&pt->waiters_list); + INIT_LIST_HEAD(&pt->link); + pt->priority = I915_PRIORITY_INVALID; +} + +static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int ret; + + /* Carefully retire all requests without writing to the rings */ + ret = i915_gem_wait_for_idle(i915, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED); + if (ret) + return ret; + + /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ + for_each_engine(engine, i915, id) { + struct i915_gem_timeline *timeline; + struct intel_timeline *tl = engine->timeline; + + if (!i915_seqno_passed(seqno, tl->seqno)) { + /* spin until threads are complete */ + while (intel_breadcrumbs_busy(engine)) + cond_resched(); + } + + /* Check we are idle before we fiddle with hw state! */ + GEM_BUG_ON(!intel_engine_is_idle(engine)); + GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request)); + + /* Finally reset hw state */ + intel_engine_init_global_seqno(engine, seqno); + tl->seqno = seqno; + + list_for_each_entry(timeline, &i915->gt.timelines, link) + memset(timeline->engine[id].global_sync, 0, + sizeof(timeline->engine[id].global_sync)); + } + + return 0; +} + +int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) +{ + struct drm_i915_private *i915 = to_i915(dev); + + lockdep_assert_held(&i915->drm.struct_mutex); + + if (seqno == 0) + return -EINVAL; + + /* HWS page needs to be set less than what we will inject to ring */ + return reset_all_global_seqno(i915, seqno - 1); +} + +static void mark_busy(struct drm_i915_private *i915) +{ + if (i915->gt.awake) + return; + + GEM_BUG_ON(!i915->gt.active_requests); + + intel_runtime_pm_get_noresume(i915); + + /* + * It seems that the DMC likes to transition between the DC states a lot + * when there are no connected displays (no active power domains) during + * command submission. + * + * This activity has negative impact on the performance of the chip with + * huge latencies observed in the interrupt handler and elsewhere. + * + * Work around it by grabbing a GT IRQ power domain whilst there is any + * GT activity, preventing any DC state transitions. + */ + intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); + + i915->gt.awake = true; + if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ + i915->gt.epoch = 1; + + intel_enable_gt_powersave(i915); + i915_update_gfx_val(i915); + if (INTEL_GEN(i915) >= 6) + gen6_rps_busy(i915); + i915_pmu_gt_unparked(i915); + + intel_engines_unpark(i915); + + i915_queue_hangcheck(i915); + + queue_delayed_work(i915->wq, + &i915->gt.retire_work, + round_jiffies_up_relative(HZ)); +} + +static int reserve_engine(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + u32 active = ++engine->timeline->inflight_seqnos; + u32 seqno = engine->timeline->seqno; + int ret; + + /* Reservation is fine until we need to wrap around */ + if (unlikely(add_overflows(seqno, active))) { + ret = reset_all_global_seqno(i915, 0); + if (ret) { + engine->timeline->inflight_seqnos--; + return ret; + } + } + + if (!i915->gt.active_requests++) + mark_busy(i915); + + return 0; +} + +static void unreserve_engine(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + + if (!--i915->gt.active_requests) { + /* Cancel the mark_busy() from our reserve_engine() */ + GEM_BUG_ON(!i915->gt.awake); + mod_delayed_work(i915->wq, + &i915->gt.idle_work, + msecs_to_jiffies(100)); + } + + GEM_BUG_ON(!engine->timeline->inflight_seqnos); + engine->timeline->inflight_seqnos--; +} + +void i915_gem_retire_noop(struct i915_gem_active *active, + struct i915_request *request) +{ + /* Space left intentionally blank */ +} + +static void advance_ring(struct i915_request *request) +{ + unsigned int tail; + + /* + * We know the GPU must have read the request to have + * sent us the seqno + interrupt, so use the position + * of tail of the request to update the last known position + * of the GPU head. + * + * Note this requires that we are always called in request + * completion order. + */ + if (list_is_last(&request->ring_link, &request->ring->request_list)) { + /* + * We may race here with execlists resubmitting this request + * as we retire it. The resubmission will move the ring->tail + * forwards (to request->wa_tail). We either read the + * current value that was written to hw, or the value that + * is just about to be. Either works, if we miss the last two + * noops - they are safe to be replayed on a reset. + */ + tail = READ_ONCE(request->ring->tail); + } else { + tail = request->postfix; + } + list_del(&request->ring_link); + + request->ring->head = tail; +} + +static void free_capture_list(struct i915_request *request) +{ + struct i915_capture_list *capture; + + capture = request->capture_list; + while (capture) { + struct i915_capture_list *next = capture->next; + + kfree(capture); + capture = next; + } +} + +static void i915_request_retire(struct i915_request *request) +{ + struct intel_engine_cs *engine = request->engine; + struct i915_gem_active *active, *next; + + lockdep_assert_held(&request->i915->drm.struct_mutex); + GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); + GEM_BUG_ON(!i915_request_completed(request)); + GEM_BUG_ON(!request->i915->gt.active_requests); + + trace_i915_request_retire(request); + + spin_lock_irq(&engine->timeline->lock); + list_del_init(&request->link); + spin_unlock_irq(&engine->timeline->lock); + + unreserve_engine(request->engine); + advance_ring(request); + + free_capture_list(request); + + /* + * Walk through the active list, calling retire on each. This allows + * objects to track their GPU activity and mark themselves as idle + * when their *last* active request is completed (updating state + * tracking lists for eviction, active references for GEM, etc). + * + * As the ->retire() may free the node, we decouple it first and + * pass along the auxiliary information (to avoid dereferencing + * the node after the callback). + */ + list_for_each_entry_safe(active, next, &request->active_list, link) { + /* + * In microbenchmarks or focusing upon time inside the kernel, + * we may spend an inordinate amount of time simply handling + * the retirement of requests and processing their callbacks. + * Of which, this loop itself is particularly hot due to the + * cache misses when jumping around the list of i915_gem_active. + * So we try to keep this loop as streamlined as possible and + * also prefetch the next i915_gem_active to try and hide + * the likely cache miss. + */ + prefetchw(next); + + INIT_LIST_HEAD(&active->link); + RCU_INIT_POINTER(active->request, NULL); + + active->retire(active, request); + } + + i915_request_remove_from_client(request); + + /* Retirement decays the ban score as it is a sign of ctx progress */ + atomic_dec_if_positive(&request->ctx->ban_score); + + /* + * The backing object for the context is done after switching to the + * *next* context. Therefore we cannot retire the previous context until + * the next context has already started running. However, since we + * cannot take the required locks at i915_request_submit() we + * defer the unpinning of the active context to now, retirement of + * the subsequent request. + */ + if (engine->last_retired_context) + engine->context_unpin(engine, engine->last_retired_context); + engine->last_retired_context = request->ctx; + + spin_lock_irq(&request->lock); + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags)) + dma_fence_signal_locked(&request->fence); + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) + intel_engine_cancel_signaling(request); + if (request->waitboost) { + GEM_BUG_ON(!atomic_read(&request->i915->gt_pm.rps.num_waiters)); + atomic_dec(&request->i915->gt_pm.rps.num_waiters); + } + spin_unlock_irq(&request->lock); + + i915_priotree_fini(request->i915, &request->priotree); + i915_request_put(request); +} + +void i915_request_retire_upto(struct i915_request *rq) +{ + struct intel_engine_cs *engine = rq->engine; + struct i915_request *tmp; + + lockdep_assert_held(&rq->i915->drm.struct_mutex); + GEM_BUG_ON(!i915_request_completed(rq)); + + if (list_empty(&rq->link)) + return; + + do { + tmp = list_first_entry(&engine->timeline->requests, + typeof(*tmp), link); + + i915_request_retire(tmp); + } while (tmp != rq); +} + +static u32 timeline_get_seqno(struct intel_timeline *tl) +{ + return ++tl->seqno; +} + +void __i915_request_submit(struct i915_request *request) +{ + struct intel_engine_cs *engine = request->engine; + struct intel_timeline *timeline; + u32 seqno; + + GEM_BUG_ON(!irqs_disabled()); + lockdep_assert_held(&engine->timeline->lock); + + /* Transfer from per-context onto the global per-engine timeline */ + timeline = engine->timeline; + GEM_BUG_ON(timeline == request->timeline); + GEM_BUG_ON(request->global_seqno); + + seqno = timeline_get_seqno(timeline); + GEM_BUG_ON(!seqno); + GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno)); + + /* We may be recursing from the signal callback of another i915 fence */ + spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + request->global_seqno = seqno; + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) + intel_engine_enable_signaling(request, false); + spin_unlock(&request->lock); + + engine->emit_breadcrumb(request, + request->ring->vaddr + request->postfix); + + spin_lock(&request->timeline->lock); + list_move_tail(&request->link, &timeline->requests); + spin_unlock(&request->timeline->lock); + + trace_i915_request_execute(request); + + wake_up_all(&request->execute); +} + +void i915_request_submit(struct i915_request *request) +{ + struct intel_engine_cs *engine = request->engine; + unsigned long flags; + + /* Will be called from irq-context when using foreign fences. */ + spin_lock_irqsave(&engine->timeline->lock, flags); + + __i915_request_submit(request); + + spin_unlock_irqrestore(&engine->timeline->lock, flags); +} + +void __i915_request_unsubmit(struct i915_request *request) +{ + struct intel_engine_cs *engine = request->engine; + struct intel_timeline *timeline; + + GEM_BUG_ON(!irqs_disabled()); + lockdep_assert_held(&engine->timeline->lock); + + /* + * Only unwind in reverse order, required so that the per-context list + * is kept in seqno/ring order. + */ + GEM_BUG_ON(!request->global_seqno); + GEM_BUG_ON(request->global_seqno != engine->timeline->seqno); + GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), + request->global_seqno)); + engine->timeline->seqno--; + + /* We may be recursing from the signal callback of another i915 fence */ + spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + request->global_seqno = 0; + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) + intel_engine_cancel_signaling(request); + spin_unlock(&request->lock); + + /* Transfer back from the global per-engine timeline to per-context */ + timeline = request->timeline; + GEM_BUG_ON(timeline == engine->timeline); + + spin_lock(&timeline->lock); + list_move(&request->link, &timeline->requests); + spin_unlock(&timeline->lock); + + /* + * We don't need to wake_up any waiters on request->execute, they + * will get woken by any other event or us re-adding this request + * to the engine timeline (__i915_request_submit()). The waiters + * should be quite adapt at finding that the request now has a new + * global_seqno to the one they went to sleep on. + */ +} + +void i915_request_unsubmit(struct i915_request *request) +{ + struct intel_engine_cs *engine = request->engine; + unsigned long flags; + + /* Will be called from irq-context when using foreign fences. */ + spin_lock_irqsave(&engine->timeline->lock, flags); + + __i915_request_unsubmit(request); + + spin_unlock_irqrestore(&engine->timeline->lock, flags); +} + +static int __i915_sw_fence_call +submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) +{ + struct i915_request *request = + container_of(fence, typeof(*request), submit); + + switch (state) { + case FENCE_COMPLETE: + trace_i915_request_submit(request); + /* + * We need to serialize use of the submit_request() callback + * with its hotplugging performed during an emergency + * i915_gem_set_wedged(). We use the RCU mechanism to mark the + * critical section in order to force i915_gem_set_wedged() to + * wait until the submit_request() is completed before + * proceeding. + */ + rcu_read_lock(); + request->engine->submit_request(request); + rcu_read_unlock(); + break; + + case FENCE_FREE: + i915_request_put(request); + break; + } + + return NOTIFY_DONE; +} + +/** + * i915_request_alloc - allocate a request structure + * + * @engine: engine that we wish to issue the request on. + * @ctx: context that the request will be associated with. + * + * Returns a pointer to the allocated request if successful, + * or an error code if not. + */ +struct i915_request * +i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) +{ + struct drm_i915_private *i915 = engine->i915; + struct i915_request *rq; + struct intel_ring *ring; + int ret; + + lockdep_assert_held(&i915->drm.struct_mutex); + + /* + * Preempt contexts are reserved for exclusive use to inject a + * preemption context switch. They are never to be used for any trivial + * request! + */ + GEM_BUG_ON(ctx == i915->preempt_context); + + /* + * ABI: Before userspace accesses the GPU (e.g. execbuffer), report + * EIO if the GPU is already wedged. + */ + if (i915_terminally_wedged(&i915->gpu_error)) + return ERR_PTR(-EIO); + + /* + * Pinning the contexts may generate requests in order to acquire + * GGTT space, so do this first before we reserve a seqno for + * ourselves. + */ + ring = engine->context_pin(engine, ctx); + if (IS_ERR(ring)) + return ERR_CAST(ring); + GEM_BUG_ON(!ring); + + ret = reserve_engine(engine); + if (ret) + goto err_unpin; + + ret = intel_ring_wait_for_space(ring, MIN_SPACE_FOR_ADD_REQUEST); + if (ret) + goto err_unreserve; + + /* Move the oldest request to the slab-cache (if not in use!) */ + rq = list_first_entry_or_null(&engine->timeline->requests, + typeof(*rq), link); + if (rq && i915_request_completed(rq)) + i915_request_retire(rq); + + /* + * Beware: Dragons be flying overhead. + * + * We use RCU to look up requests in flight. The lookups may + * race with the request being allocated from the slab freelist. + * That is the request we are writing to here, may be in the process + * of being read by __i915_gem_active_get_rcu(). As such, + * we have to be very careful when overwriting the contents. During + * the RCU lookup, we change chase the request->engine pointer, + * read the request->global_seqno and increment the reference count. + * + * The reference count is incremented atomically. If it is zero, + * the lookup knows the request is unallocated and complete. Otherwise, + * it is either still in use, or has been reallocated and reset + * with dma_fence_init(). This increment is safe for release as we + * check that the request we have a reference to and matches the active + * request. + * + * Before we increment the refcount, we chase the request->engine + * pointer. We must not call kmem_cache_zalloc() or else we set + * that pointer to NULL and cause a crash during the lookup. If + * we see the request is completed (based on the value of the + * old engine and seqno), the lookup is complete and reports NULL. + * If we decide the request is not completed (new engine or seqno), + * then we grab a reference and double check that it is still the + * active request - which it won't be and restart the lookup. + * + * Do not use kmem_cache_zalloc() here! + */ + rq = kmem_cache_alloc(i915->requests, + GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); + if (unlikely(!rq)) { + /* Ratelimit ourselves to prevent oom from malicious clients */ + ret = i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED | + I915_WAIT_INTERRUPTIBLE); + if (ret) + goto err_unreserve; + + /* + * We've forced the client to stall and catch up with whatever + * backlog there might have been. As we are assuming that we + * caused the mempressure, now is an opportune time to + * recover as much memory from the request pool as is possible. + * Having already penalized the client to stall, we spend + * a little extra time to re-optimise page allocation. + */ + kmem_cache_shrink(i915->requests); + rcu_barrier(); /* Recover the TYPESAFE_BY_RCU pages */ + + rq = kmem_cache_alloc(i915->requests, GFP_KERNEL); + if (!rq) { + ret = -ENOMEM; + goto err_unreserve; + } + } + + rq->timeline = i915_gem_context_lookup_timeline(ctx, engine); + GEM_BUG_ON(rq->timeline == engine->timeline); + + spin_lock_init(&rq->lock); + dma_fence_init(&rq->fence, + &i915_fence_ops, + &rq->lock, + rq->timeline->fence_context, + timeline_get_seqno(rq->timeline)); + + /* We bump the ref for the fence chain */ + i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify); + init_waitqueue_head(&rq->execute); + + i915_priotree_init(&rq->priotree); + + INIT_LIST_HEAD(&rq->active_list); + rq->i915 = i915; + rq->engine = engine; + rq->ctx = ctx; + rq->ring = ring; + + /* No zalloc, must clear what we need by hand */ + rq->global_seqno = 0; + rq->signaling.wait.seqno = 0; + rq->file_priv = NULL; + rq->batch = NULL; + rq->capture_list = NULL; + rq->waitboost = false; + + /* + * Reserve space in the ring buffer for all the commands required to + * eventually emit this request. This is to guarantee that the + * i915_request_add() call can't fail. Note that the reserve may need + * to be redone if the request is not actually submitted straight + * away, e.g. because a GPU scheduler has deferred it. + */ + rq->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; + GEM_BUG_ON(rq->reserved_space < engine->emit_breadcrumb_sz); + + /* + * Record the position of the start of the request so that + * should we detect the updated seqno part-way through the + * GPU processing the request, we never over-estimate the + * position of the head. + */ + rq->head = rq->ring->emit; + + /* Unconditionally invalidate GPU caches and TLBs. */ + ret = engine->emit_flush(rq, EMIT_INVALIDATE); + if (ret) + goto err_unwind; + + ret = engine->request_alloc(rq); + if (ret) + goto err_unwind; + + /* Check that we didn't interrupt ourselves with a new request */ + GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno); + return rq; + +err_unwind: + rq->ring->emit = rq->head; + + /* Make sure we didn't add ourselves to external state before freeing */ + GEM_BUG_ON(!list_empty(&rq->active_list)); + GEM_BUG_ON(!list_empty(&rq->priotree.signalers_list)); + GEM_BUG_ON(!list_empty(&rq->priotree.waiters_list)); + + kmem_cache_free(i915->requests, rq); +err_unreserve: + unreserve_engine(engine); +err_unpin: + engine->context_unpin(engine, ctx); + return ERR_PTR(ret); +} + +static int +i915_request_await_request(struct i915_request *to, struct i915_request *from) +{ + int ret; + + GEM_BUG_ON(to == from); + GEM_BUG_ON(to->timeline == from->timeline); + + if (i915_request_completed(from)) + return 0; + + if (to->engine->schedule) { + ret = i915_priotree_add_dependency(to->i915, + &to->priotree, + &from->priotree); + if (ret < 0) + return ret; + } + + if (to->engine == from->engine) { + ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, + &from->submit, + I915_FENCE_GFP); + return ret < 0 ? ret : 0; + } + + if (to->engine->semaphore.sync_to) { + u32 seqno; + + GEM_BUG_ON(!from->engine->semaphore.signal); + + seqno = i915_request_global_seqno(from); + if (!seqno) + goto await_dma_fence; + + if (seqno <= to->timeline->global_sync[from->engine->id]) + return 0; + + trace_i915_gem_ring_sync_to(to, from); + ret = to->engine->semaphore.sync_to(to, from); + if (ret) + return ret; + + to->timeline->global_sync[from->engine->id] = seqno; + return 0; + } + +await_dma_fence: + ret = i915_sw_fence_await_dma_fence(&to->submit, + &from->fence, 0, + I915_FENCE_GFP); + return ret < 0 ? ret : 0; +} + +int +i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) +{ + struct dma_fence **child = &fence; + unsigned int nchild = 1; + int ret; + + /* + * Note that if the fence-array was created in signal-on-any mode, + * we should *not* decompose it into its individual fences. However, + * we don't currently store which mode the fence-array is operating + * in. Fortunately, the only user of signal-on-any is private to + * amdgpu and we should not see any incoming fence-array from + * sync-file being in signal-on-any mode. + */ + if (dma_fence_is_array(fence)) { + struct dma_fence_array *array = to_dma_fence_array(fence); + + child = array->fences; + nchild = array->num_fences; + GEM_BUG_ON(!nchild); + } + + do { + fence = *child++; + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + continue; + + /* + * Requests on the same timeline are explicitly ordered, along + * with their dependencies, by i915_request_add() which ensures + * that requests are submitted in-order through each ring. + */ + if (fence->context == rq->fence.context) + continue; + + /* Squash repeated waits to the same timelines */ + if (fence->context != rq->i915->mm.unordered_timeline && + intel_timeline_sync_is_later(rq->timeline, fence)) + continue; + + if (dma_fence_is_i915(fence)) + ret = i915_request_await_request(rq, to_request(fence)); + else + ret = i915_sw_fence_await_dma_fence(&rq->submit, fence, + I915_FENCE_TIMEOUT, + I915_FENCE_GFP); + if (ret < 0) + return ret; + + /* Record the latest fence used against each timeline */ + if (fence->context != rq->i915->mm.unordered_timeline) + intel_timeline_sync_set(rq->timeline, fence); + } while (--nchild); + + return 0; +} + +/** + * i915_request_await_object - set this request to (async) wait upon a bo + * @to: request we are wishing to use + * @obj: object which may be in use on another ring. + * @write: whether the wait is on behalf of a writer + * + * This code is meant to abstract object synchronization with the GPU. + * Conceptually we serialise writes between engines inside the GPU. + * We only allow one engine to write into a buffer at any time, but + * multiple readers. To ensure each has a coherent view of memory, we must: + * + * - If there is an outstanding write request to the object, the new + * request must wait for it to complete (either CPU or in hw, requests + * on the same ring will be naturally ordered). + * + * - If we are a write request (pending_write_domain is set), the new + * request must wait for outstanding read requests to complete. + * + * Returns 0 if successful, else propagates up the lower layer error. + */ +int +i915_request_await_object(struct i915_request *to, + struct drm_i915_gem_object *obj, + bool write) +{ + struct dma_fence *excl; + int ret = 0; + + if (write) { + struct dma_fence **shared; + unsigned int count, i; + + ret = reservation_object_get_fences_rcu(obj->resv, + &excl, &count, &shared); + if (ret) + return ret; + + for (i = 0; i < count; i++) { + ret = i915_request_await_dma_fence(to, shared[i]); + if (ret) + break; + + dma_fence_put(shared[i]); + } + + for (; i < count; i++) + dma_fence_put(shared[i]); + kfree(shared); + } else { + excl = reservation_object_get_excl_rcu(obj->resv); + } + + if (excl) { + if (ret == 0) + ret = i915_request_await_dma_fence(to, excl); + + dma_fence_put(excl); + } + + return ret; +} + +/* + * NB: This function is not allowed to fail. Doing so would mean the the + * request is not being tracked for completion but the work itself is + * going to happen on the hardware. This would be a Bad Thing(tm). + */ +void __i915_request_add(struct i915_request *request, bool flush_caches) +{ + struct intel_engine_cs *engine = request->engine; + struct intel_ring *ring = request->ring; + struct intel_timeline *timeline = request->timeline; + struct i915_request *prev; + u32 *cs; + int err; + + lockdep_assert_held(&request->i915->drm.struct_mutex); + trace_i915_request_add(request); + + /* + * Make sure that no request gazumped us - if it was allocated after + * our i915_request_alloc() and called __i915_request_add() before + * us, the timeline will hold its seqno which is later than ours. + */ + GEM_BUG_ON(timeline->seqno != request->fence.seqno); + + /* + * To ensure that this call will not fail, space for its emissions + * should already have been reserved in the ring buffer. Let the ring + * know that it is time to use that space up. + */ + request->reserved_space = 0; + + /* + * Emit any outstanding flushes - execbuf can fail to emit the flush + * after having emitted the batchbuffer command. Hence we need to fix + * things up similar to emitting the lazy request. The difference here + * is that the flush _must_ happen before the next request, no matter + * what. + */ + if (flush_caches) { + err = engine->emit_flush(request, EMIT_FLUSH); + + /* Not allowed to fail! */ + WARN(err, "engine->emit_flush() failed: %d!\n", err); + } + + /* + * Record the position of the start of the breadcrumb so that + * should we detect the updated seqno part-way through the + * GPU processing the request, we never over-estimate the + * position of the ring's HEAD. + */ + cs = intel_ring_begin(request, engine->emit_breadcrumb_sz); + GEM_BUG_ON(IS_ERR(cs)); + request->postfix = intel_ring_offset(request, cs); + + /* + * Seal the request and mark it as pending execution. Note that + * we may inspect this state, without holding any locks, during + * hangcheck. Hence we apply the barrier to ensure that we do not + * see a more recent value in the hws than we are tracking. + */ + + prev = i915_gem_active_raw(&timeline->last_request, + &request->i915->drm.struct_mutex); + if (prev && !i915_request_completed(prev)) { + i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, + &request->submitq); + if (engine->schedule) + __i915_priotree_add_dependency(&request->priotree, + &prev->priotree, + &request->dep, + 0); + } + + spin_lock_irq(&timeline->lock); + list_add_tail(&request->link, &timeline->requests); + spin_unlock_irq(&timeline->lock); + + GEM_BUG_ON(timeline->seqno != request->fence.seqno); + i915_gem_active_set(&timeline->last_request, request); + + list_add_tail(&request->ring_link, &ring->request_list); + request->emitted_jiffies = jiffies; + + /* + * Let the backend know a new request has arrived that may need + * to adjust the existing execution schedule due to a high priority + * request - i.e. we may want to preempt the current request in order + * to run a high priority dependency chain *before* we can execute this + * request. + * + * This is called before the request is ready to run so that we can + * decide whether to preempt the entire chain so that it is ready to + * run at the earliest possible convenience. + */ + if (engine->schedule) + engine->schedule(request, request->ctx->priority); + + local_bh_disable(); + i915_sw_fence_commit(&request->submit); + local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ + + /* + * In typical scenarios, we do not expect the previous request on + * the timeline to be still tracked by timeline->last_request if it + * has been completed. If the completed request is still here, that + * implies that request retirement is a long way behind submission, + * suggesting that we haven't been retiring frequently enough from + * the combination of retire-before-alloc, waiters and the background + * retirement worker. So if the last request on this timeline was + * already completed, do a catch up pass, flushing the retirement queue + * up to this client. Since we have now moved the heaviest operations + * during retirement onto secondary workers, such as freeing objects + * or contexts, retiring a bunch of requests is mostly list management + * (and cache misses), and so we should not be overly penalizing this + * client by performing excess work, though we may still performing + * work on behalf of others -- but instead we should benefit from + * improved resource management. (Well, that's the theory at least.) + */ + if (prev && i915_request_completed(prev)) + i915_request_retire_upto(prev); +} + +static unsigned long local_clock_us(unsigned int *cpu) +{ + unsigned long t; + + /* + * Cheaply and approximately convert from nanoseconds to microseconds. + * The result and subsequent calculations are also defined in the same + * approximate microseconds units. The principal source of timing + * error here is from the simple truncation. + * + * Note that local_clock() is only defined wrt to the current CPU; + * the comparisons are no longer valid if we switch CPUs. Instead of + * blocking preemption for the entire busywait, we can detect the CPU + * switch and use that as indicator of system load and a reason to + * stop busywaiting, see busywait_stop(). + */ + *cpu = get_cpu(); + t = local_clock() >> 10; + put_cpu(); + + return t; +} + +static bool busywait_stop(unsigned long timeout, unsigned int cpu) +{ + unsigned int this_cpu; + + if (time_after(local_clock_us(&this_cpu), timeout)) + return true; + + return this_cpu != cpu; +} + +static bool __i915_spin_request(const struct i915_request *rq, + u32 seqno, int state, unsigned long timeout_us) +{ + struct intel_engine_cs *engine = rq->engine; + unsigned int irq, cpu; + + GEM_BUG_ON(!seqno); + + /* + * Only wait for the request if we know it is likely to complete. + * + * We don't track the timestamps around requests, nor the average + * request length, so we do not have a good indicator that this + * request will complete within the timeout. What we do know is the + * order in which requests are executed by the engine and so we can + * tell if the request has started. If the request hasn't started yet, + * it is a fair assumption that it will not complete within our + * relatively short timeout. + */ + if (!i915_seqno_passed(intel_engine_get_seqno(engine), seqno - 1)) + return false; + + /* + * When waiting for high frequency requests, e.g. during synchronous + * rendering split between the CPU and GPU, the finite amount of time + * required to set up the irq and wait upon it limits the response + * rate. By busywaiting on the request completion for a short while we + * can service the high frequency waits as quick as possible. However, + * if it is a slow request, we want to sleep as quickly as possible. + * The tradeoff between waiting and sleeping is roughly the time it + * takes to sleep on a request, on the order of a microsecond. + */ + + irq = atomic_read(&engine->irq_count); + timeout_us += local_clock_us(&cpu); + do { + if (i915_seqno_passed(intel_engine_get_seqno(engine), seqno)) + return seqno == i915_request_global_seqno(rq); + + /* + * Seqno are meant to be ordered *before* the interrupt. If + * we see an interrupt without a corresponding seqno advance, + * assume we won't see one in the near future but require + * the engine->seqno_barrier() to fixup coherency. + */ + if (atomic_read(&engine->irq_count) != irq) + break; + + if (signal_pending_state(state, current)) + break; + + if (busywait_stop(timeout_us, cpu)) + break; + + cpu_relax(); + } while (!need_resched()); + + return false; +} + +static bool __i915_wait_request_check_and_reset(struct i915_request *request) +{ + if (likely(!i915_reset_handoff(&request->i915->gpu_error))) + return false; + + __set_current_state(TASK_RUNNING); + i915_reset(request->i915, 0); + return true; +} + +/** + * i915_wait_request - wait until execution of request has finished + * @rq: the request to wait upon + * @flags: how to wait + * @timeout: how long to wait in jiffies + * + * i915_wait_request() waits for the request to be completed, for a + * maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an + * unbounded wait). + * + * If the caller holds the struct_mutex, the caller must pass I915_WAIT_LOCKED + * in via the flags, and vice versa if the struct_mutex is not held, the caller + * must not specify that the wait is locked. + * + * Returns the remaining time (in jiffies) if the request completed, which may + * be zero or -ETIME if the request is unfinished after the timeout expires. + * May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is + * pending before the request completes. + */ +long i915_request_wait(struct i915_request *rq, + unsigned int flags, + long timeout) +{ + const int state = flags & I915_WAIT_INTERRUPTIBLE ? + TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; + wait_queue_head_t *errq = &rq->i915->gpu_error.wait_queue; + DEFINE_WAIT_FUNC(reset, default_wake_function); + DEFINE_WAIT_FUNC(exec, default_wake_function); + struct intel_wait wait; + + might_sleep(); +#if IS_ENABLED(CONFIG_LOCKDEP) + GEM_BUG_ON(debug_locks && + !!lockdep_is_held(&rq->i915->drm.struct_mutex) != + !!(flags & I915_WAIT_LOCKED)); +#endif + GEM_BUG_ON(timeout < 0); + + if (i915_request_completed(rq)) + return timeout; + + if (!timeout) + return -ETIME; + + trace_i915_request_wait_begin(rq, flags); + + add_wait_queue(&rq->execute, &exec); + if (flags & I915_WAIT_LOCKED) + add_wait_queue(errq, &reset); + + intel_wait_init(&wait, rq); + +restart: + do { + set_current_state(state); + if (intel_wait_update_request(&wait, rq)) + break; + + if (flags & I915_WAIT_LOCKED && + __i915_wait_request_check_and_reset(rq)) + continue; + + if (signal_pending_state(state, current)) { + timeout = -ERESTARTSYS; + goto complete; + } + + if (!timeout) { + timeout = -ETIME; + goto complete; + } + + timeout = io_schedule_timeout(timeout); + } while (1); + + GEM_BUG_ON(!intel_wait_has_seqno(&wait)); + GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); + + /* Optimistic short spin before touching IRQs */ + if (__i915_spin_request(rq, wait.seqno, state, 5)) + goto complete; + + set_current_state(state); + if (intel_engine_add_wait(rq->engine, &wait)) + /* + * In order to check that we haven't missed the interrupt + * as we enabled it, we need to kick ourselves to do a + * coherent check on the seqno before we sleep. + */ + goto wakeup; + + if (flags & I915_WAIT_LOCKED) + __i915_wait_request_check_and_reset(rq); + + for (;;) { + if (signal_pending_state(state, current)) { + timeout = -ERESTARTSYS; + break; + } + + if (!timeout) { + timeout = -ETIME; + break; + } + + timeout = io_schedule_timeout(timeout); + + if (intel_wait_complete(&wait) && + intel_wait_check_request(&wait, rq)) + break; + + set_current_state(state); + +wakeup: + /* + * Carefully check if the request is complete, giving time + * for the seqno to be visible following the interrupt. + * We also have to check in case we are kicked by the GPU + * reset in order to drop the struct_mutex. + */ + if (__i915_request_irq_complete(rq)) + break; + + /* + * If the GPU is hung, and we hold the lock, reset the GPU + * and then check for completion. On a full reset, the engine's + * HW seqno will be advanced passed us and we are complete. + * If we do a partial reset, we have to wait for the GPU to + * resume and update the breadcrumb. + * + * If we don't hold the mutex, we can just wait for the worker + * to come along and update the breadcrumb (either directly + * itself, or indirectly by recovering the GPU). + */ + if (flags & I915_WAIT_LOCKED && + __i915_wait_request_check_and_reset(rq)) + continue; + + /* Only spin if we know the GPU is processing this request */ + if (__i915_spin_request(rq, wait.seqno, state, 2)) + break; + + if (!intel_wait_check_request(&wait, rq)) { + intel_engine_remove_wait(rq->engine, &wait); + goto restart; + } + } + + intel_engine_remove_wait(rq->engine, &wait); +complete: + __set_current_state(TASK_RUNNING); + if (flags & I915_WAIT_LOCKED) + remove_wait_queue(errq, &reset); + remove_wait_queue(&rq->execute, &exec); + trace_i915_request_wait_end(rq); + + return timeout; +} + +static void engine_retire_requests(struct intel_engine_cs *engine) +{ + struct i915_request *request, *next; + u32 seqno = intel_engine_get_seqno(engine); + LIST_HEAD(retire); + + spin_lock_irq(&engine->timeline->lock); + list_for_each_entry_safe(request, next, + &engine->timeline->requests, link) { + if (!i915_seqno_passed(seqno, request->global_seqno)) + break; + + list_move_tail(&request->link, &retire); + } + spin_unlock_irq(&engine->timeline->lock); + + list_for_each_entry_safe(request, next, &retire, link) + i915_request_retire(request); +} + +void i915_retire_requests(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + lockdep_assert_held(&i915->drm.struct_mutex); + + if (!i915->gt.active_requests) + return; + + for_each_engine(engine, i915, id) + engine_retire_requests(engine); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_request.c" +#include "selftests/i915_request.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h new file mode 100644 index 000000000000..74311fc53e2f --- /dev/null +++ b/drivers/gpu/drm/i915/i915_request.h @@ -0,0 +1,738 @@ +/* + * Copyright © 2008-2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef I915_REQUEST_H +#define I915_REQUEST_H + +#include + +#include "i915_gem.h" +#include "i915_sw_fence.h" + +#include + +struct drm_file; +struct drm_i915_gem_object; +struct i915_request; + +struct intel_wait { + struct rb_node node; + struct task_struct *tsk; + struct i915_request *request; + u32 seqno; +}; + +struct intel_signal_node { + struct rb_node node; + struct intel_wait wait; +}; + +struct i915_dependency { + struct i915_priotree *signaler; + struct list_head signal_link; + struct list_head wait_link; + struct list_head dfs_link; + unsigned long flags; +#define I915_DEPENDENCY_ALLOC BIT(0) +}; + +/* + * "People assume that time is a strict progression of cause to effect, but + * actually, from a nonlinear, non-subjective viewpoint, it's more like a big + * ball of wibbly-wobbly, timey-wimey ... stuff." -The Doctor, 2015 + * + * Requests exist in a complex web of interdependencies. Each request + * has to wait for some other request to complete before it is ready to be run + * (e.g. we have to wait until the pixels have been rendering into a texture + * before we can copy from it). We track the readiness of a request in terms + * of fences, but we also need to keep the dependency tree for the lifetime + * of the request (beyond the life of an individual fence). We use the tree + * at various points to reorder the requests whilst keeping the requests + * in order with respect to their various dependencies. + */ +struct i915_priotree { + struct list_head signalers_list; /* those before us, we depend upon */ + struct list_head waiters_list; /* those after us, they depend upon us */ + struct list_head link; + int priority; +}; + +enum { + I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, + I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, + I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, + + I915_PRIORITY_INVALID = INT_MIN +}; + +struct i915_capture_list { + struct i915_capture_list *next; + struct i915_vma *vma; +}; + +/** + * Request queue structure. + * + * The request queue allows us to note sequence numbers that have been emitted + * and may be associated with active buffers to be retired. + * + * By keeping this list, we can avoid having to do questionable sequence + * number comparisons on buffer last_read|write_seqno. It also allows an + * emission time to be associated with the request for tracking how far ahead + * of the GPU the submission is. + * + * When modifying this structure be very aware that we perform a lockless + * RCU lookup of it that may race against reallocation of the struct + * from the slab freelist. We intentionally do not zero the structure on + * allocation so that the lookup can use the dangling pointers (and is + * cogniscent that those pointers may be wrong). Instead, everything that + * needs to be initialised must be done so explicitly. + * + * The requests are reference counted. + */ +struct i915_request { + struct dma_fence fence; + spinlock_t lock; + + /** On Which ring this request was generated */ + struct drm_i915_private *i915; + + /** + * Context and ring buffer related to this request + * Contexts are refcounted, so when this request is associated with a + * context, we must increment the context's refcount, to guarantee that + * it persists while any request is linked to it. Requests themselves + * are also refcounted, so the request will only be freed when the last + * reference to it is dismissed, and the code in + * i915_request_free() will then decrement the refcount on the + * context. + */ + struct i915_gem_context *ctx; + struct intel_engine_cs *engine; + struct intel_ring *ring; + struct intel_timeline *timeline; + struct intel_signal_node signaling; + + /* + * Fences for the various phases in the request's lifetime. + * + * The submit fence is used to await upon all of the request's + * dependencies. When it is signaled, the request is ready to run. + * It is used by the driver to then queue the request for execution. + */ + struct i915_sw_fence submit; + wait_queue_entry_t submitq; + wait_queue_head_t execute; + + /* + * A list of everyone we wait upon, and everyone who waits upon us. + * Even though we will not be submitted to the hardware before the + * submit fence is signaled (it waits for all external events as well + * as our own requests), the scheduler still needs to know the + * dependency tree for the lifetime of the request (from execbuf + * to retirement), i.e. bidirectional dependency information for the + * request not tied to individual fences. + */ + struct i915_priotree priotree; + struct i915_dependency dep; + + /** + * GEM sequence number associated with this request on the + * global execution timeline. It is zero when the request is not + * on the HW queue (i.e. not on the engine timeline list). + * Its value is guarded by the timeline spinlock. + */ + u32 global_seqno; + + /** Position in the ring of the start of the request */ + u32 head; + + /** + * Position in the ring of the start of the postfix. + * This is required to calculate the maximum available ring space + * without overwriting the postfix. + */ + u32 postfix; + + /** Position in the ring of the end of the whole request */ + u32 tail; + + /** Position in the ring of the end of any workarounds after the tail */ + u32 wa_tail; + + /** Preallocate space in the ring for the emitting the request */ + u32 reserved_space; + + /** Batch buffer related to this request if any (used for + * error state dump only). + */ + struct i915_vma *batch; + /** + * Additional buffers requested by userspace to be captured upon + * a GPU hang. The vma/obj on this list are protected by their + * active reference - all objects on this list must also be + * on the active_list (of their final request). + */ + struct i915_capture_list *capture_list; + struct list_head active_list; + + /** Time at which this request was emitted, in jiffies. */ + unsigned long emitted_jiffies; + + bool waitboost; + + /** engine->request_list entry for this request */ + struct list_head link; + + /** ring->request_list entry for this request */ + struct list_head ring_link; + + struct drm_i915_file_private *file_priv; + /** file_priv list entry for this request */ + struct list_head client_link; +}; + +#define I915_FENCE_GFP (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) + +extern const struct dma_fence_ops i915_fence_ops; + +static inline bool dma_fence_is_i915(const struct dma_fence *fence) +{ + return fence->ops == &i915_fence_ops; +} + +struct i915_request * __must_check +i915_request_alloc(struct intel_engine_cs *engine, + struct i915_gem_context *ctx); +void i915_request_retire_upto(struct i915_request *rq); + +static inline struct i915_request * +to_request(struct dma_fence *fence) +{ + /* We assume that NULL fence/request are interoperable */ + BUILD_BUG_ON(offsetof(struct i915_request, fence) != 0); + GEM_BUG_ON(fence && !dma_fence_is_i915(fence)); + return container_of(fence, struct i915_request, fence); +} + +static inline struct i915_request * +i915_request_get(struct i915_request *rq) +{ + return to_request(dma_fence_get(&rq->fence)); +} + +static inline struct i915_request * +i915_request_get_rcu(struct i915_request *rq) +{ + return to_request(dma_fence_get_rcu(&rq->fence)); +} + +static inline void +i915_request_put(struct i915_request *rq) +{ + dma_fence_put(&rq->fence); +} + +/** + * i915_request_global_seqno - report the current global seqno + * @request - the request + * + * A request is assigned a global seqno only when it is on the hardware + * execution queue. The global seqno can be used to maintain a list of + * requests on the same engine in retirement order, for example for + * constructing a priority queue for waiting. Prior to its execution, or + * if it is subsequently removed in the event of preemption, its global + * seqno is zero. As both insertion and removal from the execution queue + * may operate in IRQ context, it is not guarded by the usual struct_mutex + * BKL. Instead those relying on the global seqno must be prepared for its + * value to change between reads. Only when the request is complete can + * the global seqno be stable (due to the memory barriers on submitting + * the commands to the hardware to write the breadcrumb, if the HWS shows + * that it has passed the global seqno and the global seqno is unchanged + * after the read, it is indeed complete). + */ +static u32 +i915_request_global_seqno(const struct i915_request *request) +{ + return READ_ONCE(request->global_seqno); +} + +int i915_request_await_object(struct i915_request *to, + struct drm_i915_gem_object *obj, + bool write); +int i915_request_await_dma_fence(struct i915_request *rq, + struct dma_fence *fence); + +void __i915_request_add(struct i915_request *rq, bool flush_caches); +#define i915_request_add(rq) \ + __i915_request_add(rq, false) + +void __i915_request_submit(struct i915_request *request); +void i915_request_submit(struct i915_request *request); + +void __i915_request_unsubmit(struct i915_request *request); +void i915_request_unsubmit(struct i915_request *request); + +long i915_request_wait(struct i915_request *rq, + unsigned int flags, + long timeout) + __attribute__((nonnull(1))); +#define I915_WAIT_INTERRUPTIBLE BIT(0) +#define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */ +#define I915_WAIT_ALL BIT(2) /* used by i915_gem_object_wait() */ + +static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine); + +/** + * Returns true if seq1 is later than seq2. + */ +static inline bool i915_seqno_passed(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) >= 0; +} + +static inline bool +__i915_request_completed(const struct i915_request *rq, u32 seqno) +{ + GEM_BUG_ON(!seqno); + return i915_seqno_passed(intel_engine_get_seqno(rq->engine), seqno) && + seqno == i915_request_global_seqno(rq); +} + +static inline bool i915_request_completed(const struct i915_request *rq) +{ + u32 seqno; + + seqno = i915_request_global_seqno(rq); + if (!seqno) + return false; + + return __i915_request_completed(rq, seqno); +} + +static inline bool i915_request_started(const struct i915_request *rq) +{ + u32 seqno; + + seqno = i915_request_global_seqno(rq); + if (!seqno) + return false; + + return i915_seqno_passed(intel_engine_get_seqno(rq->engine), + seqno - 1); +} + +static inline bool i915_priotree_signaled(const struct i915_priotree *pt) +{ + const struct i915_request *rq = + container_of(pt, const struct i915_request, priotree); + + return i915_request_completed(rq); +} + +void i915_retire_requests(struct drm_i915_private *i915); + +/* + * We treat requests as fences. This is not be to confused with our + * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync. + * We use the fences to synchronize access from the CPU with activity on the + * GPU, for example, we should not rewrite an object's PTE whilst the GPU + * is reading them. We also track fences at a higher level to provide + * implicit synchronisation around GEM objects, e.g. set-domain will wait + * for outstanding GPU rendering before marking the object ready for CPU + * access, or a pageflip will wait until the GPU is complete before showing + * the frame on the scanout. + * + * In order to use a fence, the object must track the fence it needs to + * serialise with. For example, GEM objects want to track both read and + * write access so that we can perform concurrent read operations between + * the CPU and GPU engines, as well as waiting for all rendering to + * complete, or waiting for the last GPU user of a "fence register". The + * object then embeds a #i915_gem_active to track the most recent (in + * retirement order) request relevant for the desired mode of access. + * The #i915_gem_active is updated with i915_gem_active_set() to track the + * most recent fence request, typically this is done as part of + * i915_vma_move_to_active(). + * + * When the #i915_gem_active completes (is retired), it will + * signal its completion to the owner through a callback as well as mark + * itself as idle (i915_gem_active.request == NULL). The owner + * can then perform any action, such as delayed freeing of an active + * resource including itself. + */ +struct i915_gem_active; + +typedef void (*i915_gem_retire_fn)(struct i915_gem_active *, + struct i915_request *); + +struct i915_gem_active { + struct i915_request __rcu *request; + struct list_head link; + i915_gem_retire_fn retire; +}; + +void i915_gem_retire_noop(struct i915_gem_active *, + struct i915_request *request); + +/** + * init_request_active - prepares the activity tracker for use + * @active - the active tracker + * @func - a callback when then the tracker is retired (becomes idle), + * can be NULL + * + * init_request_active() prepares the embedded @active struct for use as + * an activity tracker, that is for tracking the last known active request + * associated with it. When the last request becomes idle, when it is retired + * after completion, the optional callback @func is invoked. + */ +static inline void +init_request_active(struct i915_gem_active *active, + i915_gem_retire_fn retire) +{ + INIT_LIST_HEAD(&active->link); + active->retire = retire ?: i915_gem_retire_noop; +} + +/** + * i915_gem_active_set - updates the tracker to watch the current request + * @active - the active tracker + * @request - the request to watch + * + * i915_gem_active_set() watches the given @request for completion. Whilst + * that @request is busy, the @active reports busy. When that @request is + * retired, the @active tracker is updated to report idle. + */ +static inline void +i915_gem_active_set(struct i915_gem_active *active, + struct i915_request *request) +{ + list_move(&active->link, &request->active_list); + rcu_assign_pointer(active->request, request); +} + +/** + * i915_gem_active_set_retire_fn - updates the retirement callback + * @active - the active tracker + * @fn - the routine called when the request is retired + * @mutex - struct_mutex used to guard retirements + * + * i915_gem_active_set_retire_fn() updates the function pointer that + * is called when the final request associated with the @active tracker + * is retired. + */ +static inline void +i915_gem_active_set_retire_fn(struct i915_gem_active *active, + i915_gem_retire_fn fn, + struct mutex *mutex) +{ + lockdep_assert_held(mutex); + active->retire = fn ?: i915_gem_retire_noop; +} + +static inline struct i915_request * +__i915_gem_active_peek(const struct i915_gem_active *active) +{ + /* + * Inside the error capture (running with the driver in an unknown + * state), we want to bend the rules slightly (a lot). + * + * Work is in progress to make it safer, in the meantime this keeps + * the known issue from spamming the logs. + */ + return rcu_dereference_protected(active->request, 1); +} + +/** + * i915_gem_active_raw - return the active request + * @active - the active tracker + * + * i915_gem_active_raw() returns the current request being tracked, or NULL. + * It does not obtain a reference on the request for the caller, so the caller + * must hold struct_mutex. + */ +static inline struct i915_request * +i915_gem_active_raw(const struct i915_gem_active *active, struct mutex *mutex) +{ + return rcu_dereference_protected(active->request, + lockdep_is_held(mutex)); +} + +/** + * i915_gem_active_peek - report the active request being monitored + * @active - the active tracker + * + * i915_gem_active_peek() returns the current request being tracked if + * still active, or NULL. It does not obtain a reference on the request + * for the caller, so the caller must hold struct_mutex. + */ +static inline struct i915_request * +i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex) +{ + struct i915_request *request; + + request = i915_gem_active_raw(active, mutex); + if (!request || i915_request_completed(request)) + return NULL; + + return request; +} + +/** + * i915_gem_active_get - return a reference to the active request + * @active - the active tracker + * + * i915_gem_active_get() returns a reference to the active request, or NULL + * if the active tracker is idle. The caller must hold struct_mutex. + */ +static inline struct i915_request * +i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex) +{ + return i915_request_get(i915_gem_active_peek(active, mutex)); +} + +/** + * __i915_gem_active_get_rcu - return a reference to the active request + * @active - the active tracker + * + * __i915_gem_active_get() returns a reference to the active request, or NULL + * if the active tracker is idle. The caller must hold the RCU read lock, but + * the returned pointer is safe to use outside of RCU. + */ +static inline struct i915_request * +__i915_gem_active_get_rcu(const struct i915_gem_active *active) +{ + /* + * Performing a lockless retrieval of the active request is super + * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing + * slab of request objects will not be freed whilst we hold the + * RCU read lock. It does not guarantee that the request itself + * will not be freed and then *reused*. Viz, + * + * Thread A Thread B + * + * rq = active.request + * retire(rq) -> free(rq); + * (rq is now first on the slab freelist) + * active.request = NULL + * + * rq = new submission on a new object + * ref(rq) + * + * To prevent the request from being reused whilst the caller + * uses it, we take a reference like normal. Whilst acquiring + * the reference we check that it is not in a destroyed state + * (refcnt == 0). That prevents the request being reallocated + * whilst the caller holds on to it. To check that the request + * was not reallocated as we acquired the reference we have to + * check that our request remains the active request across + * the lookup, in the same manner as a seqlock. The visibility + * of the pointer versus the reference counting is controlled + * by using RCU barriers (rcu_dereference and rcu_assign_pointer). + * + * In the middle of all that, we inspect whether the request is + * complete. Retiring is lazy so the request may be completed long + * before the active tracker is updated. Querying whether the + * request is complete is far cheaper (as it involves no locked + * instructions setting cachelines to exclusive) than acquiring + * the reference, so we do it first. The RCU read lock ensures the + * pointer dereference is valid, but does not ensure that the + * seqno nor HWS is the right one! However, if the request was + * reallocated, that means the active tracker's request was complete. + * If the new request is also complete, then both are and we can + * just report the active tracker is idle. If the new request is + * incomplete, then we acquire a reference on it and check that + * it remained the active request. + * + * It is then imperative that we do not zero the request on + * reallocation, so that we can chase the dangling pointers! + * See i915_request_alloc(). + */ + do { + struct i915_request *request; + + request = rcu_dereference(active->request); + if (!request || i915_request_completed(request)) + return NULL; + + /* + * An especially silly compiler could decide to recompute the + * result of i915_request_completed, more specifically + * re-emit the load for request->fence.seqno. A race would catch + * a later seqno value, which could flip the result from true to + * false. Which means part of the instructions below might not + * be executed, while later on instructions are executed. Due to + * barriers within the refcounting the inconsistency can't reach + * past the call to i915_request_get_rcu, but not executing + * that while still executing i915_request_put() creates + * havoc enough. Prevent this with a compiler barrier. + */ + barrier(); + + request = i915_request_get_rcu(request); + + /* + * What stops the following rcu_access_pointer() from occurring + * before the above i915_request_get_rcu()? If we were + * to read the value before pausing to get the reference to + * the request, we may not notice a change in the active + * tracker. + * + * The rcu_access_pointer() is a mere compiler barrier, which + * means both the CPU and compiler are free to perform the + * memory read without constraint. The compiler only has to + * ensure that any operations after the rcu_access_pointer() + * occur afterwards in program order. This means the read may + * be performed earlier by an out-of-order CPU, or adventurous + * compiler. + * + * The atomic operation at the heart of + * i915_request_get_rcu(), see dma_fence_get_rcu(), is + * atomic_inc_not_zero() which is only a full memory barrier + * when successful. That is, if i915_request_get_rcu() + * returns the request (and so with the reference counted + * incremented) then the following read for rcu_access_pointer() + * must occur after the atomic operation and so confirm + * that this request is the one currently being tracked. + * + * The corresponding write barrier is part of + * rcu_assign_pointer(). + */ + if (!request || request == rcu_access_pointer(active->request)) + return rcu_pointer_handoff(request); + + i915_request_put(request); + } while (1); +} + +/** + * i915_gem_active_get_unlocked - return a reference to the active request + * @active - the active tracker + * + * i915_gem_active_get_unlocked() returns a reference to the active request, + * or NULL if the active tracker is idle. The reference is obtained under RCU, + * so no locking is required by the caller. + * + * The reference should be freed with i915_request_put(). + */ +static inline struct i915_request * +i915_gem_active_get_unlocked(const struct i915_gem_active *active) +{ + struct i915_request *request; + + rcu_read_lock(); + request = __i915_gem_active_get_rcu(active); + rcu_read_unlock(); + + return request; +} + +/** + * i915_gem_active_isset - report whether the active tracker is assigned + * @active - the active tracker + * + * i915_gem_active_isset() returns true if the active tracker is currently + * assigned to a request. Due to the lazy retiring, that request may be idle + * and this may report stale information. + */ +static inline bool +i915_gem_active_isset(const struct i915_gem_active *active) +{ + return rcu_access_pointer(active->request); +} + +/** + * i915_gem_active_wait - waits until the request is completed + * @active - the active request on which to wait + * @flags - how to wait + * @timeout - how long to wait at most + * @rps - userspace client to charge for a waitboost + * + * i915_gem_active_wait() waits until the request is completed before + * returning, without requiring any locks to be held. Note that it does not + * retire any requests before returning. + * + * This function relies on RCU in order to acquire the reference to the active + * request without holding any locks. See __i915_gem_active_get_rcu() for the + * glory details on how that is managed. Once the reference is acquired, we + * can then wait upon the request, and afterwards release our reference, + * free of any locking. + * + * This function wraps i915_request_wait(), see it for the full details on + * the arguments. + * + * Returns 0 if successful, or a negative error code. + */ +static inline int +i915_gem_active_wait(const struct i915_gem_active *active, unsigned int flags) +{ + struct i915_request *request; + long ret = 0; + + request = i915_gem_active_get_unlocked(active); + if (request) { + ret = i915_request_wait(request, flags, MAX_SCHEDULE_TIMEOUT); + i915_request_put(request); + } + + return ret < 0 ? ret : 0; +} + +/** + * i915_gem_active_retire - waits until the request is retired + * @active - the active request on which to wait + * + * i915_gem_active_retire() waits until the request is completed, + * and then ensures that at least the retirement handler for this + * @active tracker is called before returning. If the @active + * tracker is idle, the function returns immediately. + */ +static inline int __must_check +i915_gem_active_retire(struct i915_gem_active *active, + struct mutex *mutex) +{ + struct i915_request *request; + long ret; + + request = i915_gem_active_raw(active, mutex); + if (!request) + return 0; + + ret = i915_request_wait(request, + I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (ret < 0) + return ret; + + list_del_init(&active->link); + RCU_INIT_POINTER(active->request, NULL); + + active->retire(active, request); + + return 0; +} + +#define for_each_active(mask, idx) \ + for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx)) + +#endif /* I915_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index e1169c02eb2b..408827bf5d96 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -586,8 +586,7 @@ TRACE_EVENT(i915_gem_evict_vm, ); TRACE_EVENT(i915_gem_ring_sync_to, - TP_PROTO(struct drm_i915_gem_request *to, - struct drm_i915_gem_request *from), + TP_PROTO(struct i915_request *to, struct i915_request *from), TP_ARGS(to, from), TP_STRUCT__entry( @@ -610,9 +609,9 @@ TRACE_EVENT(i915_gem_ring_sync_to, __entry->seqno) ); -TRACE_EVENT(i915_gem_request_queue, - TP_PROTO(struct drm_i915_gem_request *req, u32 flags), - TP_ARGS(req, flags), +TRACE_EVENT(i915_request_queue, + TP_PROTO(struct i915_request *rq, u32 flags), + TP_ARGS(rq, flags), TP_STRUCT__entry( __field(u32, dev) @@ -624,11 +623,11 @@ TRACE_EVENT(i915_gem_request_queue, ), TP_fast_assign( - __entry->dev = req->i915->drm.primary->index; - __entry->hw_id = req->ctx->hw_id; - __entry->ring = req->engine->id; - __entry->ctx = req->fence.context; - __entry->seqno = req->fence.seqno; + __entry->dev = rq->i915->drm.primary->index; + __entry->hw_id = rq->ctx->hw_id; + __entry->ring = rq->engine->id; + __entry->ctx = rq->fence.context; + __entry->seqno = rq->fence.seqno; __entry->flags = flags; ), @@ -637,9 +636,9 @@ TRACE_EVENT(i915_gem_request_queue, __entry->seqno, __entry->flags) ); -DECLARE_EVENT_CLASS(i915_gem_request, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req), +DECLARE_EVENT_CLASS(i915_request, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq), TP_STRUCT__entry( __field(u32, dev) @@ -651,12 +650,12 @@ DECLARE_EVENT_CLASS(i915_gem_request, ), TP_fast_assign( - __entry->dev = req->i915->drm.primary->index; - __entry->hw_id = req->ctx->hw_id; - __entry->ring = req->engine->id; - __entry->ctx = req->fence.context; - __entry->seqno = req->fence.seqno; - __entry->global = req->global_seqno; + __entry->dev = rq->i915->drm.primary->index; + __entry->hw_id = rq->ctx->hw_id; + __entry->ring = rq->engine->id; + __entry->ctx = rq->fence.context; + __entry->seqno = rq->fence.seqno; + __entry->global = rq->global_seqno; ), TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u", @@ -664,26 +663,25 @@ DECLARE_EVENT_CLASS(i915_gem_request, __entry->seqno, __entry->global) ); -DEFINE_EVENT(i915_gem_request, i915_gem_request_add, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req) +DEFINE_EVENT(i915_request, i915_request_add, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) ); #if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) -DEFINE_EVENT(i915_gem_request, i915_gem_request_submit, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req) +DEFINE_EVENT(i915_request, i915_request_submit, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) ); -DEFINE_EVENT(i915_gem_request, i915_gem_request_execute, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req) +DEFINE_EVENT(i915_request, i915_request_execute, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) ); -DECLARE_EVENT_CLASS(i915_gem_request_hw, - TP_PROTO(struct drm_i915_gem_request *req, - unsigned int port), - TP_ARGS(req, port), +DECLARE_EVENT_CLASS(i915_request_hw, + TP_PROTO(struct i915_request *rq, unsigned int port), + TP_ARGS(rq, port), TP_STRUCT__entry( __field(u32, dev) @@ -696,14 +694,14 @@ DECLARE_EVENT_CLASS(i915_gem_request_hw, ), TP_fast_assign( - __entry->dev = req->i915->drm.primary->index; - __entry->hw_id = req->ctx->hw_id; - __entry->ring = req->engine->id; - __entry->ctx = req->fence.context; - __entry->seqno = req->fence.seqno; - __entry->global_seqno = req->global_seqno; - __entry->port = port; - ), + __entry->dev = rq->i915->drm.primary->index; + __entry->hw_id = rq->ctx->hw_id; + __entry->ring = rq->engine->id; + __entry->ctx = rq->fence.context; + __entry->seqno = rq->fence.seqno; + __entry->global_seqno = rq->global_seqno; + __entry->port = port; + ), TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u, port=%u", __entry->dev, __entry->hw_id, __entry->ring, @@ -711,34 +709,34 @@ DECLARE_EVENT_CLASS(i915_gem_request_hw, __entry->global_seqno, __entry->port) ); -DEFINE_EVENT(i915_gem_request_hw, i915_gem_request_in, - TP_PROTO(struct drm_i915_gem_request *req, unsigned int port), - TP_ARGS(req, port) +DEFINE_EVENT(i915_request_hw, i915_request_in, + TP_PROTO(struct i915_request *rq, unsigned int port), + TP_ARGS(rq, port) ); -DEFINE_EVENT(i915_gem_request, i915_gem_request_out, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req) +DEFINE_EVENT(i915_request, i915_request_out, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) ); #else #if !defined(TRACE_HEADER_MULTI_READ) static inline void -trace_i915_gem_request_submit(struct drm_i915_gem_request *req) +trace_i915_request_submit(struct i915_request *rq) { } static inline void -trace_i915_gem_request_execute(struct drm_i915_gem_request *req) +trace_i915_request_execute(struct i915_request *rq) { } static inline void -trace_i915_gem_request_in(struct drm_i915_gem_request *req, unsigned int port) +trace_i915_request_in(struct i915_request *rq, unsigned int port) { } static inline void -trace_i915_gem_request_out(struct drm_i915_gem_request *req) +trace_i915_request_out(struct i915_request *rq) { } #endif @@ -767,14 +765,14 @@ TRACE_EVENT(intel_engine_notify, __entry->waiters) ); -DEFINE_EVENT(i915_gem_request, i915_gem_request_retire, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req) +DEFINE_EVENT(i915_request, i915_request_retire, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) ); -TRACE_EVENT(i915_gem_request_wait_begin, - TP_PROTO(struct drm_i915_gem_request *req, unsigned int flags), - TP_ARGS(req, flags), +TRACE_EVENT(i915_request_wait_begin, + TP_PROTO(struct i915_request *rq, unsigned int flags), + TP_ARGS(rq, flags), TP_STRUCT__entry( __field(u32, dev) @@ -793,12 +791,12 @@ TRACE_EVENT(i915_gem_request_wait_begin, * less desirable. */ TP_fast_assign( - __entry->dev = req->i915->drm.primary->index; - __entry->hw_id = req->ctx->hw_id; - __entry->ring = req->engine->id; - __entry->ctx = req->fence.context; - __entry->seqno = req->fence.seqno; - __entry->global = req->global_seqno; + __entry->dev = rq->i915->drm.primary->index; + __entry->hw_id = rq->ctx->hw_id; + __entry->ring = rq->engine->id; + __entry->ctx = rq->fence.context; + __entry->seqno = rq->fence.seqno; + __entry->global = rq->global_seqno; __entry->flags = flags; ), @@ -808,9 +806,9 @@ TRACE_EVENT(i915_gem_request_wait_begin, !!(__entry->flags & I915_WAIT_LOCKED), __entry->flags) ); -DEFINE_EVENT(i915_gem_request, i915_gem_request_wait_end, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req) +DEFINE_EVENT(i915_request, i915_request_wait_end, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) ); TRACE_EVENT(i915_flip_request, diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index e0e7c48f45dc..4bda3bd29bf5 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -31,8 +31,7 @@ #include static void -i915_vma_retire(struct i915_gem_active *active, - struct drm_i915_gem_request *rq) +i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq) { const unsigned int idx = rq->engine->id; struct i915_vma *vma = diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index fd5b84904f7c..8c5022095418 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -32,8 +32,8 @@ #include "i915_gem_gtt.h" #include "i915_gem_fence_reg.h" #include "i915_gem_object.h" -#include "i915_gem_request.h" +#include "i915_request.h" enum i915_cache_level; diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index b955f7d7bd0f..a83690642aab 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -588,7 +588,7 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, spin_unlock_irq(&b->rb_lock); } -static bool signal_complete(const struct drm_i915_gem_request *request) +static bool signal_complete(const struct i915_request *request) { if (!request) return false; @@ -600,9 +600,9 @@ static bool signal_complete(const struct drm_i915_gem_request *request) return __i915_request_irq_complete(request); } -static struct drm_i915_gem_request *to_signaler(struct rb_node *rb) +static struct i915_request *to_signaler(struct rb_node *rb) { - return rb_entry(rb, struct drm_i915_gem_request, signaling.node); + return rb_entry(rb, struct i915_request, signaling.node); } static void signaler_set_rtpriority(void) @@ -613,7 +613,7 @@ static void signaler_set_rtpriority(void) } static void __intel_engine_remove_signal(struct intel_engine_cs *engine, - struct drm_i915_gem_request *request) + struct i915_request *request) { struct intel_breadcrumbs *b = &engine->breadcrumbs; @@ -644,7 +644,7 @@ static void __intel_engine_remove_signal(struct intel_engine_cs *engine, } } -static struct drm_i915_gem_request * +static struct i915_request * get_first_signal_rcu(struct intel_breadcrumbs *b) { /* @@ -654,18 +654,18 @@ get_first_signal_rcu(struct intel_breadcrumbs *b) * the required memory barriers. */ do { - struct drm_i915_gem_request *request; + struct i915_request *request; request = rcu_dereference(b->first_signal); if (request) - request = i915_gem_request_get_rcu(request); + request = i915_request_get_rcu(request); barrier(); if (!request || request == rcu_access_pointer(b->first_signal)) return rcu_pointer_handoff(request); - i915_gem_request_put(request); + i915_request_put(request); } while (1); } @@ -673,7 +673,7 @@ static int intel_breadcrumbs_signaler(void *arg) { struct intel_engine_cs *engine = arg; struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct drm_i915_gem_request *request; + struct i915_request *request; /* Install ourselves with high priority to reduce signalling latency */ signaler_set_rtpriority(); @@ -699,7 +699,7 @@ static int intel_breadcrumbs_signaler(void *arg) &request->fence.flags)) { local_bh_disable(); dma_fence_signal(&request->fence); - GEM_BUG_ON(!i915_gem_request_completed(request)); + GEM_BUG_ON(!i915_request_completed(request)); local_bh_enable(); /* kick start the tasklets */ } @@ -718,7 +718,7 @@ static int intel_breadcrumbs_signaler(void *arg) */ do_schedule = need_resched(); } - i915_gem_request_put(request); + i915_request_put(request); if (unlikely(do_schedule)) { if (kthread_should_park()) @@ -735,8 +735,7 @@ static int intel_breadcrumbs_signaler(void *arg) return 0; } -void intel_engine_enable_signaling(struct drm_i915_gem_request *request, - bool wakeup) +void intel_engine_enable_signaling(struct i915_request *request, bool wakeup) { struct intel_engine_cs *engine = request->engine; struct intel_breadcrumbs *b = &engine->breadcrumbs; @@ -753,7 +752,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request, GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&request->lock); - seqno = i915_gem_request_global_seqno(request); + seqno = i915_request_global_seqno(request); if (!seqno) return; @@ -774,7 +773,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request, */ wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait); - if (!__i915_gem_request_completed(request, seqno)) { + if (!__i915_request_completed(request, seqno)) { struct rb_node *parent, **p; bool first; @@ -811,7 +810,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request, wake_up_process(b->signaler); } -void intel_engine_cancel_signaling(struct drm_i915_gem_request *request) +void intel_engine_cancel_signaling(struct i915_request *request) { GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&request->lock); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 75baa5dab877..c14d2a25408d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12584,23 +12584,23 @@ struct wait_rps_boost { struct wait_queue_entry wait; struct drm_crtc *crtc; - struct drm_i915_gem_request *request; + struct i915_request *request; }; static int do_rps_boost(struct wait_queue_entry *_wait, unsigned mode, int sync, void *key) { struct wait_rps_boost *wait = container_of(_wait, typeof(*wait), wait); - struct drm_i915_gem_request *rq = wait->request; + struct i915_request *rq = wait->request; /* * If we missed the vblank, but the request is already running it * is reasonable to assume that it will complete before the next * vblank without our intervention, so leave RPS alone. */ - if (!i915_gem_request_started(rq)) + if (!i915_request_started(rq)) gen6_rps_boost(rq, NULL); - i915_gem_request_put(rq); + i915_request_put(rq); drm_crtc_vblank_put(wait->crtc); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index c81be2c7b582..04fc4bd12329 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1894,8 +1894,7 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv); void gen6_rps_busy(struct drm_i915_private *dev_priv); void gen6_rps_reset_ei(struct drm_i915_private *dev_priv); void gen6_rps_idle(struct drm_i915_private *dev_priv); -void gen6_rps_boost(struct drm_i915_gem_request *rq, - struct intel_rps_client *rps); +void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps); void g4x_wm_get_hw_state(struct drm_device *dev); void vlv_wm_get_hw_state(struct drm_device *dev); void ilk_wm_get_hw_state(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index f3c5100d629e..c31544406974 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1426,20 +1426,20 @@ int init_workarounds_ring(struct intel_engine_cs *engine) return 0; } -int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) +int intel_ring_workarounds_emit(struct i915_request *rq) { - struct i915_workarounds *w = &req->i915->workarounds; + struct i915_workarounds *w = &rq->i915->workarounds; u32 *cs; int ret, i; if (w->count == 0) return 0; - ret = req->engine->emit_flush(req, EMIT_BARRIER); + ret = rq->engine->emit_flush(rq, EMIT_BARRIER); if (ret) return ret; - cs = intel_ring_begin(req, (w->count * 2 + 2)); + cs = intel_ring_begin(rq, w->count * 2 + 2); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1450,9 +1450,9 @@ int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) } *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); - ret = req->engine->emit_flush(req, EMIT_BARRIER); + ret = rq->engine->emit_flush(rq, EMIT_BARRIER); if (ret) return ret; @@ -1552,7 +1552,7 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) { const struct i915_gem_context * const kernel_context = engine->i915->kernel_context; - struct drm_i915_gem_request *rq; + struct i915_request *rq; lockdep_assert_held(&engine->i915->drm.struct_mutex); @@ -1664,12 +1664,12 @@ unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915) } static void print_request(struct drm_printer *m, - struct drm_i915_gem_request *rq, + struct i915_request *rq, const char *prefix) { drm_printf(m, "%s%x%s [%x:%x] prio=%d @ %dms: %s\n", prefix, rq->global_seqno, - i915_gem_request_completed(rq) ? "!" : "", + i915_request_completed(rq) ? "!" : "", rq->ctx->hw_id, rq->fence.seqno, rq->priotree.priority, jiffies_to_msecs(jiffies - rq->emitted_jiffies), @@ -1803,7 +1803,7 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, rcu_read_lock(); for (idx = 0; idx < execlists_num_ports(execlists); idx++) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; unsigned int count; rq = port_unpack(&execlists->port[idx], &count); @@ -1837,7 +1837,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct intel_breadcrumbs * const b = &engine->breadcrumbs; const struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_gpu_error * const error = &engine->i915->gpu_error; - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct rb_node *rb; if (header) { @@ -1866,12 +1866,12 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tRequests:\n"); rq = list_first_entry(&engine->timeline->requests, - struct drm_i915_gem_request, link); + struct i915_request, link); if (&rq->link != &engine->timeline->requests) print_request(m, rq, "\t\tfirst "); rq = list_last_entry(&engine->timeline->requests, - struct drm_i915_gem_request, link); + struct i915_request, link); if (&rq->link != &engine->timeline->requests) print_request(m, rq, "\t\tlast "); diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 946766b62459..649113c7a3c2 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -496,8 +496,7 @@ static void guc_ring_doorbell(struct intel_guc_client *client) GEM_BUG_ON(db->db_status != GUC_DOORBELL_ENABLED); } -static void guc_add_request(struct intel_guc *guc, - struct drm_i915_gem_request *rq) +static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) { struct intel_guc_client *client = guc->execbuf_client; struct intel_engine_cs *engine = rq->engine; @@ -648,7 +647,7 @@ static void guc_submit(struct intel_engine_cs *engine) unsigned int n; for (n = 0; n < execlists_num_ports(execlists); n++) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; unsigned int count; rq = port_unpack(&port[n], &count); @@ -662,19 +661,18 @@ static void guc_submit(struct intel_engine_cs *engine) } } -static void port_assign(struct execlist_port *port, - struct drm_i915_gem_request *rq) +static void port_assign(struct execlist_port *port, struct i915_request *rq) { GEM_BUG_ON(port_isset(port)); - port_set(port, i915_gem_request_get(rq)); + port_set(port, i915_request_get(rq)); } static void guc_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; - struct drm_i915_gem_request *last = NULL; + struct i915_request *last = NULL; const struct execlist_port * const last_port = &execlists->port[execlists->port_mask]; bool submit = false; @@ -710,7 +708,7 @@ static void guc_dequeue(struct intel_engine_cs *engine) do { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); - struct drm_i915_gem_request *rq, *rn; + struct i915_request *rq, *rn; list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { if (last && rq->ctx != last->ctx) { @@ -727,9 +725,8 @@ static void guc_dequeue(struct intel_engine_cs *engine) INIT_LIST_HEAD(&rq->priotree.link); - __i915_gem_request_submit(rq); - trace_i915_gem_request_in(rq, - port_index(port, execlists)); + __i915_request_submit(rq); + trace_i915_request_in(rq, port_index(port, execlists)); last = rq; submit = true; } @@ -762,12 +759,12 @@ static void guc_submission_tasklet(unsigned long data) struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; - struct drm_i915_gem_request *rq; + struct i915_request *rq; rq = port_request(&port[0]); - while (rq && i915_gem_request_completed(rq)) { - trace_i915_gem_request_out(rq); - i915_gem_request_put(rq); + while (rq && i915_request_completed(rq)) { + trace_i915_request_out(rq); + i915_request_put(rq); execlists_port_complete(execlists, port); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 9b6d781b22ec..d8bca8ba2efc 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -267,7 +267,7 @@ find_priolist: return ptr_pack_bits(p, first, 1); } -static void unwind_wa_tail(struct drm_i915_gem_request *rq) +static void unwind_wa_tail(struct i915_request *rq) { rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES); assert_ring_tail_valid(rq->ring, rq->tail); @@ -275,7 +275,7 @@ static void unwind_wa_tail(struct drm_i915_gem_request *rq) static void __unwind_incomplete_requests(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *rq, *rn; + struct i915_request *rq, *rn; struct i915_priolist *uninitialized_var(p); int last_prio = I915_PRIORITY_INVALID; @@ -284,10 +284,10 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) list_for_each_entry_safe_reverse(rq, rn, &engine->timeline->requests, link) { - if (i915_gem_request_completed(rq)) + if (i915_request_completed(rq)) return; - __i915_gem_request_unsubmit(rq); + __i915_request_unsubmit(rq); unwind_wa_tail(rq); GEM_BUG_ON(rq->priotree.priority == I915_PRIORITY_INVALID); @@ -316,8 +316,7 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) } static inline void -execlists_context_status_change(struct drm_i915_gem_request *rq, - unsigned long status) +execlists_context_status_change(struct i915_request *rq, unsigned long status) { /* * Only used when GVT-g is enabled now. When GVT-g is disabled, @@ -331,14 +330,14 @@ execlists_context_status_change(struct drm_i915_gem_request *rq, } static inline void -execlists_context_schedule_in(struct drm_i915_gem_request *rq) +execlists_context_schedule_in(struct i915_request *rq) { execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); intel_engine_context_in(rq->engine); } static inline void -execlists_context_schedule_out(struct drm_i915_gem_request *rq) +execlists_context_schedule_out(struct i915_request *rq) { intel_engine_context_out(rq->engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); @@ -353,7 +352,7 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state) ASSIGN_CTX_PDP(ppgtt, reg_state, 0); } -static u64 execlists_update_context(struct drm_i915_gem_request *rq) +static u64 execlists_update_context(struct i915_request *rq) { struct intel_context *ce = &rq->ctx->engine[rq->engine->id]; struct i915_hw_ppgtt *ppgtt = @@ -385,7 +384,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) unsigned int n; for (n = execlists_num_ports(&engine->execlists); n--; ) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; unsigned int count; u64 desc; @@ -430,15 +429,14 @@ static bool can_merge_ctx(const struct i915_gem_context *prev, return true; } -static void port_assign(struct execlist_port *port, - struct drm_i915_gem_request *rq) +static void port_assign(struct execlist_port *port, struct i915_request *rq) { GEM_BUG_ON(rq == port_request(port)); if (port_isset(port)) - i915_gem_request_put(port_request(port)); + i915_request_put(port_request(port)); - port_set(port, port_pack(i915_gem_request_get(rq), port_count(port))); + port_set(port, port_pack(i915_request_get(rq), port_count(port))); } static void inject_preempt_context(struct intel_engine_cs *engine) @@ -476,7 +474,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct execlist_port *port = execlists->port; const struct execlist_port * const last_port = &execlists->port[execlists->port_mask]; - struct drm_i915_gem_request *last = port_request(port); + struct i915_request *last = port_request(port); struct rb_node *rb; bool submit = false; @@ -565,7 +563,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) /* WaIdleLiteRestore:bdw,skl * Apply the wa NOOPs to prevent - * ring:HEAD == req:TAIL as we resubmit the + * ring:HEAD == rq:TAIL as we resubmit the * request. See gen8_emit_breadcrumb() for * where we prepare the padding after the * end of the request. @@ -576,7 +574,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) do { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); - struct drm_i915_gem_request *rq, *rn; + struct i915_request *rq, *rn; list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { /* @@ -626,8 +624,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) } INIT_LIST_HEAD(&rq->priotree.link); - __i915_gem_request_submit(rq); - trace_i915_gem_request_in(rq, port_index(port, execlists)); + __i915_request_submit(rq); + trace_i915_request_in(rq, port_index(port, execlists)); last = rq; submit = true; } @@ -665,12 +663,12 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) unsigned int num_ports = execlists_num_ports(execlists); while (num_ports-- && port_isset(port)) { - struct drm_i915_gem_request *rq = port_request(port); + struct i915_request *rq = port_request(port); GEM_BUG_ON(!execlists->active); intel_engine_context_out(rq->engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_PREEMPTED); - i915_gem_request_put(rq); + i915_request_put(rq); memset(port, 0, sizeof(*port)); port++; @@ -680,7 +678,7 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) static void execlists_cancel_requests(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; - struct drm_i915_gem_request *rq, *rn; + struct i915_request *rq, *rn; struct rb_node *rb; unsigned long flags; @@ -692,7 +690,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) /* Mark all executing requests as skipped. */ list_for_each_entry(rq, &engine->timeline->requests, link) { GEM_BUG_ON(!rq->global_seqno); - if (!i915_gem_request_completed(rq)) + if (!i915_request_completed(rq)) dma_fence_set_error(&rq->fence, -EIO); } @@ -705,7 +703,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) INIT_LIST_HEAD(&rq->priotree.link); dma_fence_set_error(&rq->fence, -EIO); - __i915_gem_request_submit(rq); + __i915_request_submit(rq); } rb = rb_next(rb); @@ -806,7 +804,7 @@ static void execlists_submission_tasklet(unsigned long data) tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?"); while (head != tail) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; unsigned int status; unsigned int count; @@ -885,10 +883,10 @@ static void execlists_submission_tasklet(unsigned long data) GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); GEM_BUG_ON(port_isset(&port[1]) && !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)); - GEM_BUG_ON(!i915_gem_request_completed(rq)); + GEM_BUG_ON(!i915_request_completed(rq)); execlists_context_schedule_out(rq); - trace_i915_gem_request_out(rq); - i915_gem_request_put(rq); + trace_i915_request_out(rq); + i915_request_put(rq); execlists_port_complete(execlists, port); } else { @@ -928,7 +926,7 @@ static void insert_request(struct intel_engine_cs *engine, tasklet_hi_schedule(&engine->execlists.tasklet); } -static void execlists_submit_request(struct drm_i915_gem_request *request) +static void execlists_submit_request(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; unsigned long flags; @@ -944,9 +942,9 @@ static void execlists_submit_request(struct drm_i915_gem_request *request) spin_unlock_irqrestore(&engine->timeline->lock, flags); } -static struct drm_i915_gem_request *pt_to_request(struct i915_priotree *pt) +static struct i915_request *pt_to_request(struct i915_priotree *pt) { - return container_of(pt, struct drm_i915_gem_request, priotree); + return container_of(pt, struct i915_request, priotree); } static struct intel_engine_cs * @@ -964,7 +962,7 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked) return engine; } -static void execlists_schedule(struct drm_i915_gem_request *request, int prio) +static void execlists_schedule(struct i915_request *request, int prio) { struct intel_engine_cs *engine; struct i915_dependency *dep, *p; @@ -973,7 +971,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) GEM_BUG_ON(prio == I915_PRIORITY_INVALID); - if (i915_gem_request_completed(request)) + if (i915_request_completed(request)) return; if (prio <= READ_ONCE(request->priotree.priority)) @@ -1158,7 +1156,7 @@ static void execlists_context_unpin(struct intel_engine_cs *engine, i915_gem_context_put(ctx); } -static int execlists_request_alloc(struct drm_i915_gem_request *request) +static int execlists_request_alloc(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; struct intel_context *ce = &request->ctx->engine[engine->id]; @@ -1590,7 +1588,7 @@ static void reset_irq(struct intel_engine_cs *engine) } static void reset_common_ring(struct intel_engine_cs *engine, - struct drm_i915_gem_request *request) + struct i915_request *request) { struct intel_engine_execlists * const execlists = &engine->execlists; struct intel_context *ce; @@ -1658,15 +1656,15 @@ static void reset_common_ring(struct intel_engine_cs *engine, unwind_wa_tail(request); } -static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) +static int intel_logical_ring_emit_pdps(struct i915_request *rq) { - struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt; - struct intel_engine_cs *engine = req->engine; + struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt; + struct intel_engine_cs *engine = rq->engine; const int num_lri_cmds = GEN8_3LVL_PDPES * 2; u32 *cs; int i; - cs = intel_ring_begin(req, num_lri_cmds * 2 + 2); + cs = intel_ring_begin(rq, num_lri_cmds * 2 + 2); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1681,12 +1679,12 @@ static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) } *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } -static int gen8_emit_bb_start(struct drm_i915_gem_request *req, +static int gen8_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, const unsigned int flags) { @@ -1699,18 +1697,18 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, * it is unsafe in case of lite-restore (because the ctx is * not idle). PML4 is allocated during ppgtt init so this is * not needed in 48-bit.*/ - if (req->ctx->ppgtt && - (intel_engine_flag(req->engine) & req->ctx->ppgtt->pd_dirty_rings) && - !i915_vm_is_48bit(&req->ctx->ppgtt->base) && - !intel_vgpu_active(req->i915)) { - ret = intel_logical_ring_emit_pdps(req); + if (rq->ctx->ppgtt && + (intel_engine_flag(rq->engine) & rq->ctx->ppgtt->pd_dirty_rings) && + !i915_vm_is_48bit(&rq->ctx->ppgtt->base) && + !intel_vgpu_active(rq->i915)) { + ret = intel_logical_ring_emit_pdps(rq); if (ret) return ret; - req->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(req->engine); + rq->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine); } - cs = intel_ring_begin(req, 4); + cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1739,7 +1737,7 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); *cs++ = lower_32_bits(offset); *cs++ = upper_32_bits(offset); - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -1758,7 +1756,7 @@ static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine) I915_WRITE_IMR(engine, ~engine->irq_keep_mask); } -static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) +static int gen8_emit_flush(struct i915_request *request, u32 mode) { u32 cmd, *cs; @@ -1790,7 +1788,7 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) return 0; } -static int gen8_emit_flush_render(struct drm_i915_gem_request *request, +static int gen8_emit_flush_render(struct i915_request *request, u32 mode) { struct intel_engine_cs *engine = request->engine; @@ -1865,7 +1863,7 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, * used as a workaround for not being allowed to do lite * restore with HEAD==TAIL (WaIdleLiteRestore). */ -static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs) +static void gen8_emit_wa_tail(struct i915_request *request, u32 *cs) { /* Ensure there's always at least one preemption point per-request. */ *cs++ = MI_ARB_CHECK; @@ -1873,7 +1871,7 @@ static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs) request->wa_tail = intel_ring_offset(request, cs); } -static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) +static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) { /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); @@ -1889,8 +1887,7 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) } static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS; -static void gen8_emit_breadcrumb_rcs(struct drm_i915_gem_request *request, - u32 *cs) +static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) { /* We're using qword write, seqno should be aligned to 8 bytes. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1); @@ -1906,15 +1903,15 @@ static void gen8_emit_breadcrumb_rcs(struct drm_i915_gem_request *request, } static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS; -static int gen8_init_rcs_context(struct drm_i915_gem_request *req) +static int gen8_init_rcs_context(struct i915_request *rq) { int ret; - ret = intel_ring_workarounds_emit(req); + ret = intel_ring_workarounds_emit(rq); if (ret) return ret; - ret = intel_rcs_context_init_mocs(req); + ret = intel_rcs_context_init_mocs(rq); /* * Failing to program the MOCS is non-fatal.The system will not * run at peak performance. So generate an error and carry on. @@ -1922,7 +1919,7 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req) if (ret) DRM_ERROR("MOCS failed to program: expect performance issues.\n"); - return i915_gem_render_state_emit(req); + return i915_gem_render_state_emit(rq); } /** diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index abb7a8c1e340..c0b34b7943b9 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -265,7 +265,7 @@ int intel_mocs_init_engine(struct intel_engine_cs *engine) /** * emit_mocs_control_table() - emit the mocs control table - * @req: Request to set up the MOCS table for. + * @rq: Request to set up the MOCS table for. * @table: The values to program into the control regs. * * This function simply emits a MI_LOAD_REGISTER_IMM command for the @@ -273,17 +273,17 @@ int intel_mocs_init_engine(struct intel_engine_cs *engine) * * Return: 0 on success, otherwise the error status. */ -static int emit_mocs_control_table(struct drm_i915_gem_request *req, +static int emit_mocs_control_table(struct i915_request *rq, const struct drm_i915_mocs_table *table) { - enum intel_engine_id engine = req->engine->id; + enum intel_engine_id engine = rq->engine->id; unsigned int index; u32 *cs; if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) return -ENODEV; - cs = intel_ring_begin(req, 2 + 2 * GEN9_NUM_MOCS_ENTRIES); + cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -308,7 +308,7 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req, } *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -323,7 +323,7 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, /** * emit_mocs_l3cc_table() - emit the mocs control table - * @req: Request to set up the MOCS table for. + * @rq: Request to set up the MOCS table for. * @table: The values to program into the control regs. * * This function simply emits a MI_LOAD_REGISTER_IMM command for the @@ -332,7 +332,7 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, * * Return: 0 on success, otherwise the error status. */ -static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, +static int emit_mocs_l3cc_table(struct i915_request *rq, const struct drm_i915_mocs_table *table) { unsigned int i; @@ -341,7 +341,7 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) return -ENODEV; - cs = intel_ring_begin(req, 2 + GEN9_NUM_MOCS_ENTRIES); + cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -370,7 +370,7 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, } *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -417,7 +417,7 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv) /** * intel_rcs_context_init_mocs() - program the MOCS register. - * @req: Request to set up the MOCS tables for. + * @rq: Request to set up the MOCS tables for. * * This function will emit a batch buffer with the values required for * programming the MOCS register values for all the currently supported @@ -431,19 +431,19 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv) * * Return: 0 on success, otherwise the error status. */ -int intel_rcs_context_init_mocs(struct drm_i915_gem_request *req) +int intel_rcs_context_init_mocs(struct i915_request *rq) { struct drm_i915_mocs_table t; int ret; - if (get_mocs_settings(req->i915, &t)) { + if (get_mocs_settings(rq->i915, &t)) { /* Program the RCS control registers */ - ret = emit_mocs_control_table(req, &t); + ret = emit_mocs_control_table(rq, &t); if (ret) return ret; /* Now program the l3cc registers */ - ret = emit_mocs_l3cc_table(req, &t); + ret = emit_mocs_l3cc_table(rq, &t); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/intel_mocs.h b/drivers/gpu/drm/i915/intel_mocs.h index ce4a5dfa5f94..d1751f91c1a4 100644 --- a/drivers/gpu/drm/i915/intel_mocs.h +++ b/drivers/gpu/drm/i915/intel_mocs.h @@ -52,7 +52,7 @@ #include #include "i915_drv.h" -int intel_rcs_context_init_mocs(struct drm_i915_gem_request *req); +int intel_rcs_context_init_mocs(struct i915_request *rq); void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv); int intel_mocs_init_engine(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 89f568e739ee..36671a937fa4 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -234,50 +234,50 @@ static void intel_overlay_unmap_regs(struct intel_overlay *overlay, } static void intel_overlay_submit_request(struct intel_overlay *overlay, - struct drm_i915_gem_request *req, + struct i915_request *rq, i915_gem_retire_fn retire) { GEM_BUG_ON(i915_gem_active_peek(&overlay->last_flip, &overlay->i915->drm.struct_mutex)); i915_gem_active_set_retire_fn(&overlay->last_flip, retire, &overlay->i915->drm.struct_mutex); - i915_gem_active_set(&overlay->last_flip, req); - i915_add_request(req); + i915_gem_active_set(&overlay->last_flip, rq); + i915_request_add(rq); } static int intel_overlay_do_wait_request(struct intel_overlay *overlay, - struct drm_i915_gem_request *req, + struct i915_request *rq, i915_gem_retire_fn retire) { - intel_overlay_submit_request(overlay, req, retire); + intel_overlay_submit_request(overlay, rq, retire); return i915_gem_active_retire(&overlay->last_flip, &overlay->i915->drm.struct_mutex); } -static struct drm_i915_gem_request *alloc_request(struct intel_overlay *overlay) +static struct i915_request *alloc_request(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; struct intel_engine_cs *engine = dev_priv->engine[RCS]; - return i915_gem_request_alloc(engine, dev_priv->kernel_context); + return i915_request_alloc(engine, dev_priv->kernel_context); } /* overlay needs to be disable in OCMD reg */ static int intel_overlay_on(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; - struct drm_i915_gem_request *req; + struct i915_request *rq; u32 *cs; WARN_ON(overlay->active); - req = alloc_request(overlay); - if (IS_ERR(req)) - return PTR_ERR(req); + rq = alloc_request(overlay); + if (IS_ERR(rq)) + return PTR_ERR(rq); - cs = intel_ring_begin(req, 4); + cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) { - i915_add_request(req); + i915_request_add(rq); return PTR_ERR(cs); } @@ -290,9 +290,9 @@ static int intel_overlay_on(struct intel_overlay *overlay) *cs++ = overlay->flip_addr | OFC_UPDATE; *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); - return intel_overlay_do_wait_request(overlay, req, NULL); + return intel_overlay_do_wait_request(overlay, rq, NULL); } static void intel_overlay_flip_prepare(struct intel_overlay *overlay, @@ -322,7 +322,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay, bool load_polyphase_filter) { struct drm_i915_private *dev_priv = overlay->i915; - struct drm_i915_gem_request *req; + struct i915_request *rq; u32 flip_addr = overlay->flip_addr; u32 tmp, *cs; @@ -336,23 +336,23 @@ static int intel_overlay_continue(struct intel_overlay *overlay, if (tmp & (1 << 17)) DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp); - req = alloc_request(overlay); - if (IS_ERR(req)) - return PTR_ERR(req); + rq = alloc_request(overlay); + if (IS_ERR(rq)) + return PTR_ERR(rq); - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) { - i915_add_request(req); + i915_request_add(rq); return PTR_ERR(cs); } *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE; *cs++ = flip_addr; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); intel_overlay_flip_prepare(overlay, vma); - intel_overlay_submit_request(overlay, req, NULL); + intel_overlay_submit_request(overlay, rq, NULL); return 0; } @@ -373,7 +373,7 @@ static void intel_overlay_release_old_vma(struct intel_overlay *overlay) } static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active, - struct drm_i915_gem_request *req) + struct i915_request *rq) { struct intel_overlay *overlay = container_of(active, typeof(*overlay), last_flip); @@ -382,7 +382,7 @@ static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active, } static void intel_overlay_off_tail(struct i915_gem_active *active, - struct drm_i915_gem_request *req) + struct i915_request *rq) { struct intel_overlay *overlay = container_of(active, typeof(*overlay), last_flip); @@ -401,7 +401,7 @@ static void intel_overlay_off_tail(struct i915_gem_active *active, /* overlay needs to be disabled in OCMD reg */ static int intel_overlay_off(struct intel_overlay *overlay) { - struct drm_i915_gem_request *req; + struct i915_request *rq; u32 *cs, flip_addr = overlay->flip_addr; WARN_ON(!overlay->active); @@ -412,13 +412,13 @@ static int intel_overlay_off(struct intel_overlay *overlay) * of the hw. Do it in both cases */ flip_addr |= OFC_UPDATE; - req = alloc_request(overlay); - if (IS_ERR(req)) - return PTR_ERR(req); + rq = alloc_request(overlay); + if (IS_ERR(rq)) + return PTR_ERR(rq); - cs = intel_ring_begin(req, 6); + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) { - i915_add_request(req); + i915_request_add(rq); return PTR_ERR(cs); } @@ -432,11 +432,11 @@ static int intel_overlay_off(struct intel_overlay *overlay) *cs++ = flip_addr; *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); intel_overlay_flip_prepare(overlay, NULL); - return intel_overlay_do_wait_request(overlay, req, + return intel_overlay_do_wait_request(overlay, rq, intel_overlay_off_tail); } @@ -468,23 +468,23 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) { /* synchronous slowpath */ - struct drm_i915_gem_request *req; + struct i915_request *rq; - req = alloc_request(overlay); - if (IS_ERR(req)) - return PTR_ERR(req); + rq = alloc_request(overlay); + if (IS_ERR(rq)) + return PTR_ERR(rq); - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) { - i915_add_request(req); + i915_request_add(rq); return PTR_ERR(cs); } *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); - ret = intel_overlay_do_wait_request(overlay, req, + ret = intel_overlay_do_wait_request(overlay, rq, intel_overlay_release_old_vid_tail); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index abf80e462833..21dac6ebc202 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6360,7 +6360,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) mutex_unlock(&dev_priv->pcu_lock); } -void gen6_rps_boost(struct drm_i915_gem_request *rq, +void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps_client) { struct intel_rps *rps = &rq->i915->gt_pm.rps; @@ -6376,7 +6376,7 @@ void gen6_rps_boost(struct drm_i915_gem_request *rq, if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) return; - /* Serializes with i915_gem_request_retire() */ + /* Serializes with i915_request_retire() */ boost = false; spin_lock_irqsave(&rq->lock, flags); if (!rq->waitboost && !dma_fence_is_signaled_locked(&rq->fence)) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 5718f37160c5..1d599524a759 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -66,7 +66,7 @@ unsigned int intel_ring_update_space(struct intel_ring *ring) } static int -gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) +gen2_render_ring_flush(struct i915_request *rq, u32 mode) { u32 cmd, *cs; @@ -75,19 +75,19 @@ gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) if (mode & EMIT_INVALIDATE) cmd |= MI_READ_FLUSH; - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) return PTR_ERR(cs); *cs++ = cmd; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int -gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) +gen4_render_ring_flush(struct i915_request *rq, u32 mode) { u32 cmd, *cs; @@ -122,17 +122,17 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) cmd = MI_FLUSH; if (mode & EMIT_INVALIDATE) { cmd |= MI_EXE_FLUSH; - if (IS_G4X(req->i915) || IS_GEN5(req->i915)) + if (IS_G4X(rq->i915) || IS_GEN5(rq->i915)) cmd |= MI_INVALIDATE_ISP; } - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) return PTR_ERR(cs); *cs++ = cmd; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -175,13 +175,13 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) * really our business. That leaves only stall at scoreboard. */ static int -intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) +intel_emit_post_sync_nonzero_flush(struct i915_request *rq) { u32 scratch_addr = - i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; + i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; u32 *cs; - cs = intel_ring_begin(req, 6); + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -191,9 +191,9 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) *cs++ = 0; /* low dword */ *cs++ = 0; /* high dword */ *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); - cs = intel_ring_begin(req, 6); + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -203,21 +203,21 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) *cs++ = 0; *cs++ = 0; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int -gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) +gen6_render_ring_flush(struct i915_request *rq, u32 mode) { u32 scratch_addr = - i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; + i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; u32 *cs, flags = 0; int ret; /* Force SNB workarounds for PIPE_CONTROL flushes */ - ret = intel_emit_post_sync_nonzero_flush(req); + ret = intel_emit_post_sync_nonzero_flush(rq); if (ret) return ret; @@ -247,7 +247,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; } - cs = intel_ring_begin(req, 4); + cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -255,17 +255,17 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) *cs++ = flags; *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; *cs++ = 0; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int -gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) +gen7_render_ring_cs_stall_wa(struct i915_request *rq) { u32 *cs; - cs = intel_ring_begin(req, 4); + cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -273,16 +273,16 @@ gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; *cs++ = 0; *cs++ = 0; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int -gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) +gen7_render_ring_flush(struct i915_request *rq, u32 mode) { u32 scratch_addr = - i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; + i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES; u32 *cs, flags = 0; /* @@ -324,10 +324,10 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) /* Workaround: we must issue a pipe_control with CS-stall bit * set before a pipe_control command that has the state cache * invalidate bit set. */ - gen7_render_ring_cs_stall_wa(req); + gen7_render_ring_cs_stall_wa(rq); } - cs = intel_ring_begin(req, 4); + cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -335,7 +335,7 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) *cs++ = flags; *cs++ = scratch_addr; *cs++ = 0; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -531,7 +531,7 @@ out: } static void reset_ring_common(struct intel_engine_cs *engine, - struct drm_i915_gem_request *request) + struct i915_request *request) { /* * RC6 must be prevented until the reset is complete and the engine @@ -595,15 +595,15 @@ static void reset_ring_common(struct intel_engine_cs *engine, } } -static int intel_rcs_ctx_init(struct drm_i915_gem_request *req) +static int intel_rcs_ctx_init(struct i915_request *rq) { int ret; - ret = intel_ring_workarounds_emit(req); + ret = intel_ring_workarounds_emit(rq); if (ret != 0) return ret; - ret = i915_gem_render_state_emit(req); + ret = i915_gem_render_state_emit(rq); if (ret) return ret; @@ -661,9 +661,9 @@ static int init_render_ring(struct intel_engine_cs *engine) return init_workarounds_ring(engine); } -static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs) +static u32 *gen6_signal(struct i915_request *rq, u32 *cs) { - struct drm_i915_private *dev_priv = req->i915; + struct drm_i915_private *dev_priv = rq->i915; struct intel_engine_cs *engine; enum intel_engine_id id; int num_rings = 0; @@ -674,11 +674,11 @@ static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs) if (!(BIT(engine->hw_id) & GEN6_SEMAPHORES_MASK)) continue; - mbox_reg = req->engine->semaphore.mbox.signal[engine->hw_id]; + mbox_reg = rq->engine->semaphore.mbox.signal[engine->hw_id]; if (i915_mmio_reg_valid(mbox_reg)) { *cs++ = MI_LOAD_REGISTER_IMM(1); *cs++ = i915_mmio_reg_offset(mbox_reg); - *cs++ = req->global_seqno; + *cs++ = rq->global_seqno; num_rings++; } } @@ -690,7 +690,7 @@ static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs) static void cancel_requests(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *request; + struct i915_request *request; unsigned long flags; spin_lock_irqsave(&engine->timeline->lock, flags); @@ -698,7 +698,7 @@ static void cancel_requests(struct intel_engine_cs *engine) /* Mark all submitted requests as skipped. */ list_for_each_entry(request, &engine->timeline->requests, link) { GEM_BUG_ON(!request->global_seqno); - if (!i915_gem_request_completed(request)) + if (!i915_request_completed(request)) dma_fence_set_error(&request->fence, -EIO); } /* Remaining _unready_ requests will be nop'ed when submitted */ @@ -706,48 +706,46 @@ static void cancel_requests(struct intel_engine_cs *engine) spin_unlock_irqrestore(&engine->timeline->lock, flags); } -static void i9xx_submit_request(struct drm_i915_gem_request *request) +static void i9xx_submit_request(struct i915_request *request) { struct drm_i915_private *dev_priv = request->i915; - i915_gem_request_submit(request); + i915_request_submit(request); I915_WRITE_TAIL(request->engine, intel_ring_set_tail(request->ring, request->tail)); } -static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) +static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) { *cs++ = MI_STORE_DWORD_INDEX; *cs++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; - *cs++ = req->global_seqno; + *cs++ = rq->global_seqno; *cs++ = MI_USER_INTERRUPT; - req->tail = intel_ring_offset(req, cs); - assert_ring_tail_valid(req->ring, req->tail); + rq->tail = intel_ring_offset(rq, cs); + assert_ring_tail_valid(rq->ring, rq->tail); } static const int i9xx_emit_breadcrumb_sz = 4; -static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) +static void gen6_sema_emit_breadcrumb(struct i915_request *rq, u32 *cs) { - return i9xx_emit_breadcrumb(req, - req->engine->semaphore.signal(req, cs)); + return i9xx_emit_breadcrumb(rq, rq->engine->semaphore.signal(rq, cs)); } static int -gen6_ring_sync_to(struct drm_i915_gem_request *req, - struct drm_i915_gem_request *signal) +gen6_ring_sync_to(struct i915_request *rq, struct i915_request *signal) { u32 dw1 = MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | MI_SEMAPHORE_REGISTER; - u32 wait_mbox = signal->engine->semaphore.mbox.wait[req->engine->hw_id]; + u32 wait_mbox = signal->engine->semaphore.mbox.wait[rq->engine->hw_id]; u32 *cs; WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); - cs = intel_ring_begin(req, 4); + cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -759,7 +757,7 @@ gen6_ring_sync_to(struct drm_i915_gem_request *req, *cs++ = signal->global_seqno - 1; *cs++ = 0; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -858,17 +856,17 @@ i8xx_irq_disable(struct intel_engine_cs *engine) } static int -bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) +bsd_ring_flush(struct i915_request *rq, u32 mode) { u32 *cs; - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) return PTR_ERR(cs); *cs++ = MI_FLUSH; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -911,20 +909,20 @@ hsw_vebox_irq_disable(struct intel_engine_cs *engine) } static int -i965_emit_bb_start(struct drm_i915_gem_request *req, +i965_emit_bb_start(struct i915_request *rq, u64 offset, u32 length, unsigned int dispatch_flags) { u32 *cs; - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) return PTR_ERR(cs); *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965); *cs++ = offset; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -934,13 +932,13 @@ i965_emit_bb_start(struct drm_i915_gem_request *req, #define I830_TLB_ENTRIES (2) #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) static int -i830_emit_bb_start(struct drm_i915_gem_request *req, +i830_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, unsigned int dispatch_flags) { - u32 *cs, cs_offset = i915_ggtt_offset(req->engine->scratch); + u32 *cs, cs_offset = i915_ggtt_offset(rq->engine->scratch); - cs = intel_ring_begin(req, 6); + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -951,13 +949,13 @@ i830_emit_bb_start(struct drm_i915_gem_request *req, *cs++ = cs_offset; *cs++ = 0xdeadbeef; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { if (len > I830_BATCH_LIMIT) return -ENOSPC; - cs = intel_ring_begin(req, 6 + 2); + cs = intel_ring_begin(rq, 6 + 2); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -974,39 +972,39 @@ i830_emit_bb_start(struct drm_i915_gem_request *req, *cs++ = MI_FLUSH; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); /* ... and execute it. */ offset = cs_offset; } - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) return PTR_ERR(cs); *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE); - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int -i915_emit_bb_start(struct drm_i915_gem_request *req, +i915_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, unsigned int dispatch_flags) { u32 *cs; - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) return PTR_ERR(cs); *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE); - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } @@ -1377,7 +1375,7 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) intel_ring_reset(engine->buffer, 0); } -static inline int mi_set_context(struct drm_i915_gem_request *rq, u32 flags) +static inline int mi_set_context(struct i915_request *rq, u32 flags) { struct drm_i915_private *i915 = rq->i915; struct intel_engine_cs *engine = rq->engine; @@ -1463,7 +1461,7 @@ static inline int mi_set_context(struct drm_i915_gem_request *rq, u32 flags) return 0; } -static int remap_l3(struct drm_i915_gem_request *rq, int slice) +static int remap_l3(struct i915_request *rq, int slice) { u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice]; int i; @@ -1491,7 +1489,7 @@ static int remap_l3(struct drm_i915_gem_request *rq, int slice) return 0; } -static int switch_context(struct drm_i915_gem_request *rq) +static int switch_context(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; struct i915_gem_context *to_ctx = rq->ctx; @@ -1561,7 +1559,7 @@ err: return ret; } -static int ring_request_alloc(struct drm_i915_gem_request *request) +static int ring_request_alloc(struct i915_request *request) { int ret; @@ -1587,7 +1585,7 @@ static int ring_request_alloc(struct drm_i915_gem_request *request) static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) { - struct drm_i915_gem_request *target; + struct i915_request *target; long timeout; lockdep_assert_held(&ring->vma->vm->i915->drm.struct_mutex); @@ -1605,13 +1603,13 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) if (WARN_ON(&target->ring_link == &ring->request_list)) return -ENOSPC; - timeout = i915_wait_request(target, + timeout = i915_request_wait(target, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); if (timeout < 0) return timeout; - i915_gem_request_retire_upto(target); + i915_request_retire_upto(target); intel_ring_update_space(ring); GEM_BUG_ON(ring->space < bytes); @@ -1634,10 +1632,9 @@ int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes) return 0; } -u32 *intel_ring_begin(struct drm_i915_gem_request *req, - unsigned int num_dwords) +u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) { - struct intel_ring *ring = req->ring; + struct intel_ring *ring = rq->ring; const unsigned int remain_usable = ring->effective_size - ring->emit; const unsigned int bytes = num_dwords * sizeof(u32); unsigned int need_wrap = 0; @@ -1647,7 +1644,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, /* Packets must be qword aligned. */ GEM_BUG_ON(num_dwords & 1); - total_bytes = bytes + req->reserved_space; + total_bytes = bytes + rq->reserved_space; GEM_BUG_ON(total_bytes > ring->effective_size); if (unlikely(total_bytes > remain_usable)) { @@ -1668,7 +1665,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, * wrap and only need to effectively wait for the * reserved size from the start of ringbuffer. */ - total_bytes = req->reserved_space + remain_actual; + total_bytes = rq->reserved_space + remain_actual; } } @@ -1682,9 +1679,9 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, * overallocation and the assumption is that then we never need * to wait (which has the risk of failing with EINTR). * - * See also i915_gem_request_alloc() and i915_add_request(). + * See also i915_request_alloc() and i915_request_add(). */ - GEM_BUG_ON(!req->reserved_space); + GEM_BUG_ON(!rq->reserved_space); ret = wait_for_space(ring, total_bytes); if (unlikely(ret)) @@ -1713,29 +1710,28 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, } /* Align the ring tail to a cacheline boundary */ -int intel_ring_cacheline_align(struct drm_i915_gem_request *req) +int intel_ring_cacheline_align(struct i915_request *rq) { - int num_dwords = - (req->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); + int num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); u32 *cs; if (num_dwords == 0) return 0; - num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords; - cs = intel_ring_begin(req, num_dwords); + num_dwords = CACHELINE_BYTES / sizeof(u32) - num_dwords; + cs = intel_ring_begin(rq, num_dwords); if (IS_ERR(cs)) return PTR_ERR(cs); while (num_dwords--) *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } -static void gen6_bsd_submit_request(struct drm_i915_gem_request *request) +static void gen6_bsd_submit_request(struct i915_request *request) { struct drm_i915_private *dev_priv = request->i915; @@ -1772,11 +1768,11 @@ static void gen6_bsd_submit_request(struct drm_i915_gem_request *request) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) +static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode) { u32 cmd, *cs; - cs = intel_ring_begin(req, 4); + cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1802,18 +1798,18 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = 0; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int -hsw_emit_bb_start(struct drm_i915_gem_request *req, +hsw_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, unsigned int dispatch_flags) { u32 *cs; - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1823,19 +1819,19 @@ hsw_emit_bb_start(struct drm_i915_gem_request *req, MI_BATCH_RESOURCE_STREAMER : 0); /* bit0-7 is the length on GEN6+ */ *cs++ = offset; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } static int -gen6_emit_bb_start(struct drm_i915_gem_request *req, +gen6_emit_bb_start(struct i915_request *rq, u64 offset, u32 len, unsigned int dispatch_flags) { u32 *cs; - cs = intel_ring_begin(req, 2); + cs = intel_ring_begin(rq, 2); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1843,18 +1839,18 @@ gen6_emit_bb_start(struct drm_i915_gem_request *req, 0 : MI_BATCH_NON_SECURE_I965); /* bit0-7 is the length on GEN6+ */ *cs++ = offset; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } /* Blitter support (SandyBridge+) */ -static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) +static int gen6_ring_flush(struct i915_request *rq, u32 mode) { u32 cmd, *cs; - cs = intel_ring_begin(req, 4); + cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1879,7 +1875,7 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = 0; *cs++ = MI_NOOP; - intel_ring_advance(req, cs); + intel_ring_advance(rq, cs); return 0; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 51523ad049de..a9b83bf7e837 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -3,10 +3,12 @@ #define _INTEL_RINGBUFFER_H_ #include + #include "i915_gem_batch_pool.h" -#include "i915_gem_request.h" #include "i915_gem_timeline.h" + #include "i915_pmu.h" +#include "i915_request.h" #include "i915_selftest.h" struct drm_printer; @@ -115,7 +117,7 @@ struct intel_engine_hangcheck { unsigned long action_timestamp; int deadlock; struct intel_instdone instdone; - struct drm_i915_gem_request *active_request; + struct i915_request *active_request; bool stalled; }; @@ -156,7 +158,7 @@ struct i915_ctx_workarounds { struct i915_vma *vma; }; -struct drm_i915_gem_request; +struct i915_request; /* * Engine IDs definitions. @@ -218,7 +220,7 @@ struct intel_engine_execlists { /** * @request_count: combined request and submission count */ - struct drm_i915_gem_request *request_count; + struct i915_request *request_count; #define EXECLIST_COUNT_BITS 2 #define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) #define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) @@ -339,7 +341,7 @@ struct intel_engine_cs { struct rb_root waiters; /* sorted by retirement, priority */ struct rb_root signals; /* sorted by retirement */ struct task_struct *signaler; /* used for fence signalling */ - struct drm_i915_gem_request __rcu *first_signal; + struct i915_request __rcu *first_signal; struct timer_list fake_irq; /* used after a missed interrupt */ struct timer_list hangcheck; /* detect missed interrupts */ @@ -391,7 +393,7 @@ struct intel_engine_cs { int (*init_hw)(struct intel_engine_cs *engine); void (*reset_hw)(struct intel_engine_cs *engine, - struct drm_i915_gem_request *req); + struct i915_request *rq); void (*park)(struct intel_engine_cs *engine); void (*unpark)(struct intel_engine_cs *engine); @@ -402,22 +404,20 @@ struct intel_engine_cs { struct i915_gem_context *ctx); void (*context_unpin)(struct intel_engine_cs *engine, struct i915_gem_context *ctx); - int (*request_alloc)(struct drm_i915_gem_request *req); - int (*init_context)(struct drm_i915_gem_request *req); + int (*request_alloc)(struct i915_request *rq); + int (*init_context)(struct i915_request *rq); - int (*emit_flush)(struct drm_i915_gem_request *request, - u32 mode); + int (*emit_flush)(struct i915_request *request, u32 mode); #define EMIT_INVALIDATE BIT(0) #define EMIT_FLUSH BIT(1) #define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) - int (*emit_bb_start)(struct drm_i915_gem_request *req, + int (*emit_bb_start)(struct i915_request *rq, u64 offset, u32 length, unsigned int dispatch_flags); #define I915_DISPATCH_SECURE BIT(0) #define I915_DISPATCH_PINNED BIT(1) #define I915_DISPATCH_RS BIT(2) - void (*emit_breadcrumb)(struct drm_i915_gem_request *req, - u32 *cs); + void (*emit_breadcrumb)(struct i915_request *rq, u32 *cs); int emit_breadcrumb_sz; /* Pass the request to the hardware queue (e.g. directly into @@ -426,7 +426,7 @@ struct intel_engine_cs { * This is called from an atomic context with irqs disabled; must * be irq safe. */ - void (*submit_request)(struct drm_i915_gem_request *req); + void (*submit_request)(struct i915_request *rq); /* Call when the priority on a request has changed and it and its * dependencies may need rescheduling. Note the request itself may @@ -434,8 +434,7 @@ struct intel_engine_cs { * * Called under the struct_mutex. */ - void (*schedule)(struct drm_i915_gem_request *request, - int priority); + void (*schedule)(struct i915_request *request, int priority); /* * Cancel all requests on the hardware, or queued for execution. @@ -503,9 +502,9 @@ struct intel_engine_cs { } mbox; /* AKA wait() */ - int (*sync_to)(struct drm_i915_gem_request *req, - struct drm_i915_gem_request *signal); - u32 *(*signal)(struct drm_i915_gem_request *req, u32 *cs); + int (*sync_to)(struct i915_request *rq, + struct i915_request *signal); + u32 *(*signal)(struct i915_request *rq, u32 *cs); } semaphore; struct intel_engine_execlists execlists; @@ -726,14 +725,12 @@ void intel_engine_cleanup(struct intel_engine_cs *engine); void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); -int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); +int __must_check intel_ring_cacheline_align(struct i915_request *rq); int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes); -u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req, - unsigned int n); +u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n); -static inline void -intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs) +static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) { /* Dummy function. * @@ -743,22 +740,20 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs) * reserved for the command packet (i.e. the value passed to * intel_ring_begin()). */ - GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs); + GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs); } -static inline u32 -intel_ring_wrap(const struct intel_ring *ring, u32 pos) +static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) { return pos & (ring->size - 1); } -static inline u32 -intel_ring_offset(const struct drm_i915_gem_request *req, void *addr) +static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr) { /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ - u32 offset = addr - req->ring->vaddr; - GEM_BUG_ON(offset > req->ring->size); - return intel_ring_wrap(req->ring, offset); + u32 offset = addr - rq->ring->vaddr; + GEM_BUG_ON(offset > rq->ring->size); + return intel_ring_wrap(rq->ring, offset); } static inline void @@ -796,7 +791,7 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) { /* Whilst writes to the tail are strictly order, there is no * serialisation between readers and the writers. The tail may be - * read by i915_gem_request_retire() just as it is being updated + * read by i915_request_retire() just as it is being updated * by execlists, as although the breadcrumb is complete, the context * switch hasn't been seen. */ @@ -838,7 +833,7 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) } int init_workarounds_ring(struct intel_engine_cs *engine); -int intel_ring_workarounds_emit(struct drm_i915_gem_request *req); +int intel_ring_workarounds_emit(struct i915_request *rq); void intel_engine_get_instdone(struct intel_engine_cs *engine, struct intel_instdone *instdone); @@ -866,7 +861,7 @@ static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine) int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); static inline void intel_wait_init(struct intel_wait *wait, - struct drm_i915_gem_request *rq) + struct i915_request *rq) { wait->tsk = current; wait->request = rq; @@ -892,9 +887,9 @@ intel_wait_update_seqno(struct intel_wait *wait, u32 seqno) static inline bool intel_wait_update_request(struct intel_wait *wait, - const struct drm_i915_gem_request *rq) + const struct i915_request *rq) { - return intel_wait_update_seqno(wait, i915_gem_request_global_seqno(rq)); + return intel_wait_update_seqno(wait, i915_request_global_seqno(rq)); } static inline bool @@ -905,9 +900,9 @@ intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno) static inline bool intel_wait_check_request(const struct intel_wait *wait, - const struct drm_i915_gem_request *rq) + const struct i915_request *rq) { - return intel_wait_check_seqno(wait, i915_gem_request_global_seqno(rq)); + return intel_wait_check_seqno(wait, i915_request_global_seqno(rq)); } static inline bool intel_wait_complete(const struct intel_wait *wait) @@ -919,9 +914,8 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine, struct intel_wait *wait); void intel_engine_remove_wait(struct intel_engine_cs *engine, struct intel_wait *wait); -void intel_engine_enable_signaling(struct drm_i915_gem_request *request, - bool wakeup); -void intel_engine_cancel_signaling(struct drm_i915_gem_request *request); +void intel_engine_enable_signaling(struct i915_request *request, bool wakeup); +void intel_engine_cancel_signaling(struct i915_request *request); static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) { diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index 52b1bd17bf46..05bbef363fff 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -964,7 +964,7 @@ static int gpu_write(struct i915_vma *vma, u32 dword, u32 value) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct i915_vma *batch; int flags = 0; int err; @@ -975,7 +975,7 @@ static int gpu_write(struct i915_vma *vma, if (err) return err; - rq = i915_gem_request_alloc(engine, ctx); + rq = i915_request_alloc(engine, ctx); if (IS_ERR(rq)) return PTR_ERR(rq); @@ -1003,7 +1003,7 @@ static int gpu_write(struct i915_vma *vma, reservation_object_unlock(vma->resv); err_request: - __i915_add_request(rq, err == 0); + __i915_request_add(rq, err == 0); return err; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index 7a0d1e17c1ad..340a98c0c804 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -178,7 +178,7 @@ static int gpu_set(struct drm_i915_gem_object *obj, u32 v) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct i915_vma *vma; u32 *cs; int err; @@ -191,7 +191,7 @@ static int gpu_set(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return PTR_ERR(vma); - rq = i915_gem_request_alloc(i915->engine[RCS], i915->kernel_context); + rq = i915_request_alloc(i915->engine[RCS], i915->kernel_context); if (IS_ERR(rq)) { i915_vma_unpin(vma); return PTR_ERR(rq); @@ -199,7 +199,7 @@ static int gpu_set(struct drm_i915_gem_object *obj, cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) { - __i915_add_request(rq, false); + __i915_request_add(rq, false); i915_vma_unpin(vma); return PTR_ERR(cs); } @@ -229,7 +229,7 @@ static int gpu_set(struct drm_i915_gem_object *obj, reservation_object_add_excl_fence(obj->resv, &rq->fence); reservation_object_unlock(obj->resv); - __i915_add_request(rq, true); + __i915_request_add(rq, true); return 0; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 6da2a2f29c54..7ecaed50d0b9 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -114,7 +114,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj, struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct i915_vma *vma; struct i915_vma *batch; unsigned int flags; @@ -152,7 +152,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj, goto err_vma; } - rq = i915_gem_request_alloc(engine, ctx); + rq = i915_request_alloc(engine, ctx); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_batch; @@ -180,12 +180,12 @@ static int gpu_fill(struct drm_i915_gem_object *obj, reservation_object_add_excl_fence(obj->resv, &rq->fence); reservation_object_unlock(obj->resv); - __i915_add_request(rq, true); + __i915_request_add(rq, true); return 0; err_request: - __i915_add_request(rq, false); + __i915_request_add(rq, false); err_batch: i915_vma_unpin(batch); err_vma: diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index e1ddad635d73..ab9d7bee0aae 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -407,7 +407,7 @@ static int igt_evict_contexts(void *arg) mutex_lock(&i915->drm.struct_mutex); onstack_fence_init(&fence); do { - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct i915_gem_context *ctx; ctx = live_context(i915, file); @@ -416,7 +416,7 @@ static int igt_evict_contexts(void *arg) /* We will need some GGTT space for the rq's context */ igt_evict_ctl.fail_if_busy = true; - rq = i915_gem_request_alloc(engine, ctx); + rq = i915_request_alloc(engine, ctx); igt_evict_ctl.fail_if_busy = false; if (IS_ERR(rq)) { @@ -437,7 +437,7 @@ static int igt_evict_contexts(void *arg) if (err < 0) break; - i915_add_request(rq); + i915_request_add(rq); count++; err = 0; } while(1); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 3c64815e910b..fbdb2419d418 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -436,7 +436,7 @@ out: static int make_obj_busy(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct i915_vma *vma; int err; @@ -448,14 +448,14 @@ static int make_obj_busy(struct drm_i915_gem_object *obj) if (err) return err; - rq = i915_gem_request_alloc(i915->engine[RCS], i915->kernel_context); + rq = i915_request_alloc(i915->engine[RCS], i915->kernel_context); if (IS_ERR(rq)) { i915_vma_unpin(vma); return PTR_ERR(rq); } i915_vma_move_to_active(vma, rq, 0); - i915_add_request(rq); + i915_request_add(rq); i915_gem_object_set_active_reference(obj); i915_vma_unpin(vma); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c deleted file mode 100644 index 647bf2bbd799..000000000000 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ /dev/null @@ -1,868 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include - -#include "../i915_selftest.h" - -#include "mock_context.h" -#include "mock_gem_device.h" - -static int igt_add_request(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_request *request; - int err = -ENOMEM; - - /* Basic preliminary test to create a request and let it loose! */ - - mutex_lock(&i915->drm.struct_mutex); - request = mock_request(i915->engine[RCS], - i915->kernel_context, - HZ / 10); - if (!request) - goto out_unlock; - - i915_add_request(request); - - err = 0; -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; -} - -static int igt_wait_request(void *arg) -{ - const long T = HZ / 4; - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_request *request; - int err = -EINVAL; - - /* Submit a request, then wait upon it */ - - mutex_lock(&i915->drm.struct_mutex); - request = mock_request(i915->engine[RCS], i915->kernel_context, T); - if (!request) { - err = -ENOMEM; - goto out_unlock; - } - - if (i915_wait_request(request, I915_WAIT_LOCKED, 0) != -ETIME) { - pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n"); - goto out_unlock; - } - - if (i915_wait_request(request, I915_WAIT_LOCKED, T) != -ETIME) { - pr_err("request wait succeeded (expected timeout before submit!)\n"); - goto out_unlock; - } - - if (i915_gem_request_completed(request)) { - pr_err("request completed before submit!!\n"); - goto out_unlock; - } - - i915_add_request(request); - - if (i915_wait_request(request, I915_WAIT_LOCKED, 0) != -ETIME) { - pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n"); - goto out_unlock; - } - - if (i915_gem_request_completed(request)) { - pr_err("request completed immediately!\n"); - goto out_unlock; - } - - if (i915_wait_request(request, I915_WAIT_LOCKED, T / 2) != -ETIME) { - pr_err("request wait succeeded (expected timeout!)\n"); - goto out_unlock; - } - - if (i915_wait_request(request, I915_WAIT_LOCKED, T) == -ETIME) { - pr_err("request wait timed out!\n"); - goto out_unlock; - } - - if (!i915_gem_request_completed(request)) { - pr_err("request not complete after waiting!\n"); - goto out_unlock; - } - - if (i915_wait_request(request, I915_WAIT_LOCKED, T) == -ETIME) { - pr_err("request wait timed out when already complete!\n"); - goto out_unlock; - } - - err = 0; -out_unlock: - mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); - return err; -} - -static int igt_fence_wait(void *arg) -{ - const long T = HZ / 4; - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_request *request; - int err = -EINVAL; - - /* Submit a request, treat it as a fence and wait upon it */ - - mutex_lock(&i915->drm.struct_mutex); - request = mock_request(i915->engine[RCS], i915->kernel_context, T); - if (!request) { - err = -ENOMEM; - goto out_locked; - } - mutex_unlock(&i915->drm.struct_mutex); /* safe as we are single user */ - - if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { - pr_err("fence wait success before submit (expected timeout)!\n"); - goto out_device; - } - - mutex_lock(&i915->drm.struct_mutex); - i915_add_request(request); - mutex_unlock(&i915->drm.struct_mutex); - - if (dma_fence_is_signaled(&request->fence)) { - pr_err("fence signaled immediately!\n"); - goto out_device; - } - - if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { - pr_err("fence wait success after submit (expected timeout)!\n"); - goto out_device; - } - - if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { - pr_err("fence wait timed out (expected success)!\n"); - goto out_device; - } - - if (!dma_fence_is_signaled(&request->fence)) { - pr_err("fence unsignaled after waiting!\n"); - goto out_device; - } - - if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { - pr_err("fence wait timed out when complete (expected success)!\n"); - goto out_device; - } - - err = 0; -out_device: - mutex_lock(&i915->drm.struct_mutex); -out_locked: - mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); - return err; -} - -static int igt_request_rewind(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_request *request, *vip; - struct i915_gem_context *ctx[2]; - int err = -EINVAL; - - mutex_lock(&i915->drm.struct_mutex); - ctx[0] = mock_context(i915, "A"); - request = mock_request(i915->engine[RCS], ctx[0], 2 * HZ); - if (!request) { - err = -ENOMEM; - goto err_context_0; - } - - i915_gem_request_get(request); - i915_add_request(request); - - ctx[1] = mock_context(i915, "B"); - vip = mock_request(i915->engine[RCS], ctx[1], 0); - if (!vip) { - err = -ENOMEM; - goto err_context_1; - } - - /* Simulate preemption by manual reordering */ - if (!mock_cancel_request(request)) { - pr_err("failed to cancel request (already executed)!\n"); - i915_add_request(vip); - goto err_context_1; - } - i915_gem_request_get(vip); - i915_add_request(vip); - rcu_read_lock(); - request->engine->submit_request(request); - rcu_read_unlock(); - - mutex_unlock(&i915->drm.struct_mutex); - - if (i915_wait_request(vip, 0, HZ) == -ETIME) { - pr_err("timed out waiting for high priority request, vip.seqno=%d, current seqno=%d\n", - vip->global_seqno, intel_engine_get_seqno(i915->engine[RCS])); - goto err; - } - - if (i915_gem_request_completed(request)) { - pr_err("low priority request already completed\n"); - goto err; - } - - err = 0; -err: - i915_gem_request_put(vip); - mutex_lock(&i915->drm.struct_mutex); -err_context_1: - mock_context_close(ctx[1]); - i915_gem_request_put(request); -err_context_0: - mock_context_close(ctx[0]); - mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); - return err; -} - -int i915_gem_request_mock_selftests(void) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_add_request), - SUBTEST(igt_wait_request), - SUBTEST(igt_fence_wait), - SUBTEST(igt_request_rewind), - }; - struct drm_i915_private *i915; - int err; - - i915 = mock_gem_device(); - if (!i915) - return -ENOMEM; - - err = i915_subtests(tests, i915); - drm_dev_unref(&i915->drm); - - return err; -} - -struct live_test { - struct drm_i915_private *i915; - const char *func; - const char *name; - - unsigned int reset_count; -}; - -static int begin_live_test(struct live_test *t, - struct drm_i915_private *i915, - const char *func, - const char *name) -{ - int err; - - t->i915 = i915; - t->func = func; - t->name = name; - - err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); - if (err) { - pr_err("%s(%s): failed to idle before, with err=%d!", - func, name, err); - return err; - } - - i915->gpu_error.missed_irq_rings = 0; - t->reset_count = i915_reset_count(&i915->gpu_error); - - return 0; -} - -static int end_live_test(struct live_test *t) -{ - struct drm_i915_private *i915 = t->i915; - - i915_gem_retire_requests(i915); - - if (wait_for(intel_engines_are_idle(i915), 10)) { - pr_err("%s(%s): GPU not idle\n", t->func, t->name); - return -EIO; - } - - if (t->reset_count != i915_reset_count(&i915->gpu_error)) { - pr_err("%s(%s): GPU was reset %d times!\n", - t->func, t->name, - i915_reset_count(&i915->gpu_error) - t->reset_count); - return -EIO; - } - - if (i915->gpu_error.missed_irq_rings) { - pr_err("%s(%s): Missed interrupts on engines %lx\n", - t->func, t->name, i915->gpu_error.missed_irq_rings); - return -EIO; - } - - return 0; -} - -static int live_nop_request(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; - struct live_test t; - unsigned int id; - int err = -ENODEV; - - /* Submit various sized batches of empty requests, to each engine - * (individually), and wait for the batch to complete. We can check - * the overhead of submitting requests to the hardware. - */ - - mutex_lock(&i915->drm.struct_mutex); - - for_each_engine(engine, i915, id) { - IGT_TIMEOUT(end_time); - struct drm_i915_gem_request *request; - unsigned long n, prime; - ktime_t times[2] = {}; - - err = begin_live_test(&t, i915, __func__, engine->name); - if (err) - goto out_unlock; - - for_each_prime_number_from(prime, 1, 8192) { - times[1] = ktime_get_raw(); - - for (n = 0; n < prime; n++) { - request = i915_gem_request_alloc(engine, - i915->kernel_context); - if (IS_ERR(request)) { - err = PTR_ERR(request); - goto out_unlock; - } - - /* This space is left intentionally blank. - * - * We do not actually want to perform any - * action with this request, we just want - * to measure the latency in allocation - * and submission of our breadcrumbs - - * ensuring that the bare request is sufficient - * for the system to work (i.e. proper HEAD - * tracking of the rings, interrupt handling, - * etc). It also gives us the lowest bounds - * for latency. - */ - - i915_add_request(request); - } - i915_wait_request(request, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - - times[1] = ktime_sub(ktime_get_raw(), times[1]); - if (prime == 1) - times[0] = times[1]; - - if (__igt_timeout(end_time, NULL)) - break; - } - - err = end_live_test(&t); - if (err) - goto out_unlock; - - pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", - engine->name, - ktime_to_ns(times[0]), - prime, div64_u64(ktime_to_ns(times[1]), prime)); - } - -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; -} - -static struct i915_vma *empty_batch(struct drm_i915_private *i915) -{ - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - u32 *cmd; - int err; - - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err; - } - - *cmd = MI_BATCH_BUFFER_END; - i915_gem_chipset_flush(i915); - - i915_gem_object_unpin_map(obj); - - err = i915_gem_object_set_to_gtt_domain(obj, false); - if (err) - goto err; - - vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL); - if (err) - goto err; - - return vma; - -err: - i915_gem_object_put(obj); - return ERR_PTR(err); -} - -static struct drm_i915_gem_request * -empty_request(struct intel_engine_cs *engine, - struct i915_vma *batch) -{ - struct drm_i915_gem_request *request; - int err; - - request = i915_gem_request_alloc(engine, - engine->i915->kernel_context); - if (IS_ERR(request)) - return request; - - err = engine->emit_bb_start(request, - batch->node.start, - batch->node.size, - I915_DISPATCH_SECURE); - if (err) - goto out_request; - -out_request: - __i915_add_request(request, err == 0); - return err ? ERR_PTR(err) : request; -} - -static int live_empty_request(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; - struct live_test t; - struct i915_vma *batch; - unsigned int id; - int err = 0; - - /* Submit various sized batches of empty requests, to each engine - * (individually), and wait for the batch to complete. We can check - * the overhead of submitting requests to the hardware. - */ - - mutex_lock(&i915->drm.struct_mutex); - - batch = empty_batch(i915); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_unlock; - } - - for_each_engine(engine, i915, id) { - IGT_TIMEOUT(end_time); - struct drm_i915_gem_request *request; - unsigned long n, prime; - ktime_t times[2] = {}; - - err = begin_live_test(&t, i915, __func__, engine->name); - if (err) - goto out_batch; - - /* Warmup / preload */ - request = empty_request(engine, batch); - if (IS_ERR(request)) { - err = PTR_ERR(request); - goto out_batch; - } - i915_wait_request(request, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - - for_each_prime_number_from(prime, 1, 8192) { - times[1] = ktime_get_raw(); - - for (n = 0; n < prime; n++) { - request = empty_request(engine, batch); - if (IS_ERR(request)) { - err = PTR_ERR(request); - goto out_batch; - } - } - i915_wait_request(request, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - - times[1] = ktime_sub(ktime_get_raw(), times[1]); - if (prime == 1) - times[0] = times[1]; - - if (__igt_timeout(end_time, NULL)) - break; - } - - err = end_live_test(&t); - if (err) - goto out_batch; - - pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n", - engine->name, - ktime_to_ns(times[0]), - prime, div64_u64(ktime_to_ns(times[1]), prime)); - } - -out_batch: - i915_vma_unpin(batch); - i915_vma_put(batch); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; -} - -static struct i915_vma *recursive_batch(struct drm_i915_private *i915) -{ - struct i915_gem_context *ctx = i915->kernel_context; - struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; - struct drm_i915_gem_object *obj; - const int gen = INTEL_GEN(i915); - struct i915_vma *vma; - u32 *cmd; - int err; - - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - goto err; - - err = i915_gem_object_set_to_wc_domain(obj, true); - if (err) - goto err; - - cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err; - } - - if (gen >= 8) { - *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; - *cmd++ = lower_32_bits(vma->node.start); - *cmd++ = upper_32_bits(vma->node.start); - } else if (gen >= 6) { - *cmd++ = MI_BATCH_BUFFER_START | 1 << 8; - *cmd++ = lower_32_bits(vma->node.start); - } else if (gen >= 4) { - *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; - *cmd++ = lower_32_bits(vma->node.start); - } else { - *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | 1; - *cmd++ = lower_32_bits(vma->node.start); - } - *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ - i915_gem_chipset_flush(i915); - - i915_gem_object_unpin_map(obj); - - return vma; - -err: - i915_gem_object_put(obj); - return ERR_PTR(err); -} - -static int recursive_batch_resolve(struct i915_vma *batch) -{ - u32 *cmd; - - cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); - if (IS_ERR(cmd)) - return PTR_ERR(cmd); - - *cmd = MI_BATCH_BUFFER_END; - i915_gem_chipset_flush(batch->vm->i915); - - i915_gem_object_unpin_map(batch->obj); - - return 0; -} - -static int live_all_engines(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; - struct drm_i915_gem_request *request[I915_NUM_ENGINES]; - struct i915_vma *batch; - struct live_test t; - unsigned int id; - int err; - - /* Check we can submit requests to all engines simultaneously. We - * send a recursive batch to each engine - checking that we don't - * block doing so, and that they don't complete too soon. - */ - - mutex_lock(&i915->drm.struct_mutex); - - err = begin_live_test(&t, i915, __func__, ""); - if (err) - goto out_unlock; - - batch = recursive_batch(i915); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - pr_err("%s: Unable to create batch, err=%d\n", __func__, err); - goto out_unlock; - } - - for_each_engine(engine, i915, id) { - request[id] = i915_gem_request_alloc(engine, - i915->kernel_context); - if (IS_ERR(request[id])) { - err = PTR_ERR(request[id]); - pr_err("%s: Request allocation failed with err=%d\n", - __func__, err); - goto out_request; - } - - err = engine->emit_bb_start(request[id], - batch->node.start, - batch->node.size, - 0); - GEM_BUG_ON(err); - request[id]->batch = batch; - - if (!i915_gem_object_has_active_reference(batch->obj)) { - i915_gem_object_get(batch->obj); - i915_gem_object_set_active_reference(batch->obj); - } - - i915_vma_move_to_active(batch, request[id], 0); - i915_gem_request_get(request[id]); - i915_add_request(request[id]); - } - - for_each_engine(engine, i915, id) { - if (i915_gem_request_completed(request[id])) { - pr_err("%s(%s): request completed too early!\n", - __func__, engine->name); - err = -EINVAL; - goto out_request; - } - } - - err = recursive_batch_resolve(batch); - if (err) { - pr_err("%s: failed to resolve batch, err=%d\n", __func__, err); - goto out_request; - } - - for_each_engine(engine, i915, id) { - long timeout; - - timeout = i915_wait_request(request[id], - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (timeout < 0) { - err = timeout; - pr_err("%s: error waiting for request on %s, err=%d\n", - __func__, engine->name, err); - goto out_request; - } - - GEM_BUG_ON(!i915_gem_request_completed(request[id])); - i915_gem_request_put(request[id]); - request[id] = NULL; - } - - err = end_live_test(&t); - -out_request: - for_each_engine(engine, i915, id) - if (request[id]) - i915_gem_request_put(request[id]); - i915_vma_unpin(batch); - i915_vma_put(batch); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; -} - -static int live_sequential_engines(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_request *request[I915_NUM_ENGINES] = {}; - struct drm_i915_gem_request *prev = NULL; - struct intel_engine_cs *engine; - struct live_test t; - unsigned int id; - int err; - - /* Check we can submit requests to all engines sequentially, such - * that each successive request waits for the earlier ones. This - * tests that we don't execute requests out of order, even though - * they are running on independent engines. - */ - - mutex_lock(&i915->drm.struct_mutex); - - err = begin_live_test(&t, i915, __func__, ""); - if (err) - goto out_unlock; - - for_each_engine(engine, i915, id) { - struct i915_vma *batch; - - batch = recursive_batch(i915); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - pr_err("%s: Unable to create batch for %s, err=%d\n", - __func__, engine->name, err); - goto out_unlock; - } - - request[id] = i915_gem_request_alloc(engine, - i915->kernel_context); - if (IS_ERR(request[id])) { - err = PTR_ERR(request[id]); - pr_err("%s: Request allocation failed for %s with err=%d\n", - __func__, engine->name, err); - goto out_request; - } - - if (prev) { - err = i915_gem_request_await_dma_fence(request[id], - &prev->fence); - if (err) { - i915_add_request(request[id]); - pr_err("%s: Request await failed for %s with err=%d\n", - __func__, engine->name, err); - goto out_request; - } - } - - err = engine->emit_bb_start(request[id], - batch->node.start, - batch->node.size, - 0); - GEM_BUG_ON(err); - request[id]->batch = batch; - - i915_vma_move_to_active(batch, request[id], 0); - i915_gem_object_set_active_reference(batch->obj); - i915_vma_get(batch); - - i915_gem_request_get(request[id]); - i915_add_request(request[id]); - - prev = request[id]; - } - - for_each_engine(engine, i915, id) { - long timeout; - - if (i915_gem_request_completed(request[id])) { - pr_err("%s(%s): request completed too early!\n", - __func__, engine->name); - err = -EINVAL; - goto out_request; - } - - err = recursive_batch_resolve(request[id]->batch); - if (err) { - pr_err("%s: failed to resolve batch, err=%d\n", - __func__, err); - goto out_request; - } - - timeout = i915_wait_request(request[id], - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (timeout < 0) { - err = timeout; - pr_err("%s: error waiting for request on %s, err=%d\n", - __func__, engine->name, err); - goto out_request; - } - - GEM_BUG_ON(!i915_gem_request_completed(request[id])); - } - - err = end_live_test(&t); - -out_request: - for_each_engine(engine, i915, id) { - u32 *cmd; - - if (!request[id]) - break; - - cmd = i915_gem_object_pin_map(request[id]->batch->obj, - I915_MAP_WC); - if (!IS_ERR(cmd)) { - *cmd = MI_BATCH_BUFFER_END; - i915_gem_chipset_flush(i915); - - i915_gem_object_unpin_map(request[id]->batch->obj); - } - - i915_vma_put(request[id]->batch); - i915_gem_request_put(request[id]); - } -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; -} - -int i915_gem_request_live_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(live_nop_request), - SUBTEST(live_all_engines), - SUBTEST(live_sequential_engines), - SUBTEST(live_empty_request), - }; - return i915_subtests(tests, i915); -} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 088f45bc6199..9c76f0305b6a 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -11,7 +11,7 @@ */ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ selftest(uncore, intel_uncore_live_selftests) -selftest(requests, i915_gem_request_live_selftests) +selftest(requests, i915_request_live_selftests) selftest(objects, i915_gem_object_live_selftests) selftest(dmabuf, i915_gem_dmabuf_live_selftests) selftest(coherency, i915_gem_coherency_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 19c6fce837df..9a48aa441743 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -16,7 +16,7 @@ selftest(syncmap, i915_syncmap_mock_selftests) selftest(uncore, intel_uncore_mock_selftests) selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) selftest(timelines, i915_gem_timeline_mock_selftests) -selftest(requests, i915_gem_request_mock_selftests) +selftest(requests, i915_request_mock_selftests) selftest(objects, i915_gem_object_mock_selftests) selftest(dmabuf, i915_gem_dmabuf_mock_selftests) selftest(vma, i915_vma_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c new file mode 100644 index 000000000000..94bc2e1898a4 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -0,0 +1,865 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include + +#include "../i915_selftest.h" + +#include "mock_context.h" +#include "mock_gem_device.h" + +static int igt_add_request(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_request *request; + int err = -ENOMEM; + + /* Basic preliminary test to create a request and let it loose! */ + + mutex_lock(&i915->drm.struct_mutex); + request = mock_request(i915->engine[RCS], + i915->kernel_context, + HZ / 10); + if (!request) + goto out_unlock; + + i915_request_add(request); + + err = 0; +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int igt_wait_request(void *arg) +{ + const long T = HZ / 4; + struct drm_i915_private *i915 = arg; + struct i915_request *request; + int err = -EINVAL; + + /* Submit a request, then wait upon it */ + + mutex_lock(&i915->drm.struct_mutex); + request = mock_request(i915->engine[RCS], i915->kernel_context, T); + if (!request) { + err = -ENOMEM; + goto out_unlock; + } + + if (i915_request_wait(request, I915_WAIT_LOCKED, 0) != -ETIME) { + pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n"); + goto out_unlock; + } + + if (i915_request_wait(request, I915_WAIT_LOCKED, T) != -ETIME) { + pr_err("request wait succeeded (expected timeout before submit!)\n"); + goto out_unlock; + } + + if (i915_request_completed(request)) { + pr_err("request completed before submit!!\n"); + goto out_unlock; + } + + i915_request_add(request); + + if (i915_request_wait(request, I915_WAIT_LOCKED, 0) != -ETIME) { + pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n"); + goto out_unlock; + } + + if (i915_request_completed(request)) { + pr_err("request completed immediately!\n"); + goto out_unlock; + } + + if (i915_request_wait(request, I915_WAIT_LOCKED, T / 2) != -ETIME) { + pr_err("request wait succeeded (expected timeout!)\n"); + goto out_unlock; + } + + if (i915_request_wait(request, I915_WAIT_LOCKED, T) == -ETIME) { + pr_err("request wait timed out!\n"); + goto out_unlock; + } + + if (!i915_request_completed(request)) { + pr_err("request not complete after waiting!\n"); + goto out_unlock; + } + + if (i915_request_wait(request, I915_WAIT_LOCKED, T) == -ETIME) { + pr_err("request wait timed out when already complete!\n"); + goto out_unlock; + } + + err = 0; +out_unlock: + mock_device_flush(i915); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int igt_fence_wait(void *arg) +{ + const long T = HZ / 4; + struct drm_i915_private *i915 = arg; + struct i915_request *request; + int err = -EINVAL; + + /* Submit a request, treat it as a fence and wait upon it */ + + mutex_lock(&i915->drm.struct_mutex); + request = mock_request(i915->engine[RCS], i915->kernel_context, T); + if (!request) { + err = -ENOMEM; + goto out_locked; + } + mutex_unlock(&i915->drm.struct_mutex); /* safe as we are single user */ + + if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { + pr_err("fence wait success before submit (expected timeout)!\n"); + goto out_device; + } + + mutex_lock(&i915->drm.struct_mutex); + i915_request_add(request); + mutex_unlock(&i915->drm.struct_mutex); + + if (dma_fence_is_signaled(&request->fence)) { + pr_err("fence signaled immediately!\n"); + goto out_device; + } + + if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { + pr_err("fence wait success after submit (expected timeout)!\n"); + goto out_device; + } + + if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { + pr_err("fence wait timed out (expected success)!\n"); + goto out_device; + } + + if (!dma_fence_is_signaled(&request->fence)) { + pr_err("fence unsignaled after waiting!\n"); + goto out_device; + } + + if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { + pr_err("fence wait timed out when complete (expected success)!\n"); + goto out_device; + } + + err = 0; +out_device: + mutex_lock(&i915->drm.struct_mutex); +out_locked: + mock_device_flush(i915); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int igt_request_rewind(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_request *request, *vip; + struct i915_gem_context *ctx[2]; + int err = -EINVAL; + + mutex_lock(&i915->drm.struct_mutex); + ctx[0] = mock_context(i915, "A"); + request = mock_request(i915->engine[RCS], ctx[0], 2 * HZ); + if (!request) { + err = -ENOMEM; + goto err_context_0; + } + + i915_request_get(request); + i915_request_add(request); + + ctx[1] = mock_context(i915, "B"); + vip = mock_request(i915->engine[RCS], ctx[1], 0); + if (!vip) { + err = -ENOMEM; + goto err_context_1; + } + + /* Simulate preemption by manual reordering */ + if (!mock_cancel_request(request)) { + pr_err("failed to cancel request (already executed)!\n"); + i915_request_add(vip); + goto err_context_1; + } + i915_request_get(vip); + i915_request_add(vip); + rcu_read_lock(); + request->engine->submit_request(request); + rcu_read_unlock(); + + mutex_unlock(&i915->drm.struct_mutex); + + if (i915_request_wait(vip, 0, HZ) == -ETIME) { + pr_err("timed out waiting for high priority request, vip.seqno=%d, current seqno=%d\n", + vip->global_seqno, intel_engine_get_seqno(i915->engine[RCS])); + goto err; + } + + if (i915_request_completed(request)) { + pr_err("low priority request already completed\n"); + goto err; + } + + err = 0; +err: + i915_request_put(vip); + mutex_lock(&i915->drm.struct_mutex); +err_context_1: + mock_context_close(ctx[1]); + i915_request_put(request); +err_context_0: + mock_context_close(ctx[0]); + mock_device_flush(i915); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +int i915_request_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_add_request), + SUBTEST(igt_wait_request), + SUBTEST(igt_fence_wait), + SUBTEST(igt_request_rewind), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915); + drm_dev_unref(&i915->drm); + + return err; +} + +struct live_test { + struct drm_i915_private *i915; + const char *func; + const char *name; + + unsigned int reset_count; +}; + +static int begin_live_test(struct live_test *t, + struct drm_i915_private *i915, + const char *func, + const char *name) +{ + int err; + + t->i915 = i915; + t->func = func; + t->name = name; + + err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); + if (err) { + pr_err("%s(%s): failed to idle before, with err=%d!", + func, name, err); + return err; + } + + i915->gpu_error.missed_irq_rings = 0; + t->reset_count = i915_reset_count(&i915->gpu_error); + + return 0; +} + +static int end_live_test(struct live_test *t) +{ + struct drm_i915_private *i915 = t->i915; + + i915_retire_requests(i915); + + if (wait_for(intel_engines_are_idle(i915), 10)) { + pr_err("%s(%s): GPU not idle\n", t->func, t->name); + return -EIO; + } + + if (t->reset_count != i915_reset_count(&i915->gpu_error)) { + pr_err("%s(%s): GPU was reset %d times!\n", + t->func, t->name, + i915_reset_count(&i915->gpu_error) - t->reset_count); + return -EIO; + } + + if (i915->gpu_error.missed_irq_rings) { + pr_err("%s(%s): Missed interrupts on engines %lx\n", + t->func, t->name, i915->gpu_error.missed_irq_rings); + return -EIO; + } + + return 0; +} + +static int live_nop_request(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct live_test t; + unsigned int id; + int err = -ENODEV; + + /* Submit various sized batches of empty requests, to each engine + * (individually), and wait for the batch to complete. We can check + * the overhead of submitting requests to the hardware. + */ + + mutex_lock(&i915->drm.struct_mutex); + + for_each_engine(engine, i915, id) { + IGT_TIMEOUT(end_time); + struct i915_request *request; + unsigned long n, prime; + ktime_t times[2] = {}; + + err = begin_live_test(&t, i915, __func__, engine->name); + if (err) + goto out_unlock; + + for_each_prime_number_from(prime, 1, 8192) { + times[1] = ktime_get_raw(); + + for (n = 0; n < prime; n++) { + request = i915_request_alloc(engine, + i915->kernel_context); + if (IS_ERR(request)) { + err = PTR_ERR(request); + goto out_unlock; + } + + /* This space is left intentionally blank. + * + * We do not actually want to perform any + * action with this request, we just want + * to measure the latency in allocation + * and submission of our breadcrumbs - + * ensuring that the bare request is sufficient + * for the system to work (i.e. proper HEAD + * tracking of the rings, interrupt handling, + * etc). It also gives us the lowest bounds + * for latency. + */ + + i915_request_add(request); + } + i915_request_wait(request, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + + times[1] = ktime_sub(ktime_get_raw(), times[1]); + if (prime == 1) + times[0] = times[1]; + + if (__igt_timeout(end_time, NULL)) + break; + } + + err = end_live_test(&t); + if (err) + goto out_unlock; + + pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", + engine->name, + ktime_to_ns(times[0]), + prime, div64_u64(ktime_to_ns(times[1]), prime)); + } + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static struct i915_vma *empty_batch(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *cmd; + int err; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + *cmd = MI_BATCH_BUFFER_END; + i915_gem_chipset_flush(i915); + + i915_gem_object_unpin_map(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + goto err; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL); + if (err) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static struct i915_request * +empty_request(struct intel_engine_cs *engine, + struct i915_vma *batch) +{ + struct i915_request *request; + int err; + + request = i915_request_alloc(engine, engine->i915->kernel_context); + if (IS_ERR(request)) + return request; + + err = engine->emit_bb_start(request, + batch->node.start, + batch->node.size, + I915_DISPATCH_SECURE); + if (err) + goto out_request; + +out_request: + __i915_request_add(request, err == 0); + return err ? ERR_PTR(err) : request; +} + +static int live_empty_request(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct live_test t; + struct i915_vma *batch; + unsigned int id; + int err = 0; + + /* Submit various sized batches of empty requests, to each engine + * (individually), and wait for the batch to complete. We can check + * the overhead of submitting requests to the hardware. + */ + + mutex_lock(&i915->drm.struct_mutex); + + batch = empty_batch(i915); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_unlock; + } + + for_each_engine(engine, i915, id) { + IGT_TIMEOUT(end_time); + struct i915_request *request; + unsigned long n, prime; + ktime_t times[2] = {}; + + err = begin_live_test(&t, i915, __func__, engine->name); + if (err) + goto out_batch; + + /* Warmup / preload */ + request = empty_request(engine, batch); + if (IS_ERR(request)) { + err = PTR_ERR(request); + goto out_batch; + } + i915_request_wait(request, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + + for_each_prime_number_from(prime, 1, 8192) { + times[1] = ktime_get_raw(); + + for (n = 0; n < prime; n++) { + request = empty_request(engine, batch); + if (IS_ERR(request)) { + err = PTR_ERR(request); + goto out_batch; + } + } + i915_request_wait(request, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + + times[1] = ktime_sub(ktime_get_raw(), times[1]); + if (prime == 1) + times[0] = times[1]; + + if (__igt_timeout(end_time, NULL)) + break; + } + + err = end_live_test(&t); + if (err) + goto out_batch; + + pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n", + engine->name, + ktime_to_ns(times[0]), + prime, div64_u64(ktime_to_ns(times[1]), prime)); + } + +out_batch: + i915_vma_unpin(batch); + i915_vma_put(batch); +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static struct i915_vma *recursive_batch(struct drm_i915_private *i915) +{ + struct i915_gem_context *ctx = i915->kernel_context; + struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct drm_i915_gem_object *obj; + const int gen = INTEL_GEN(i915); + struct i915_vma *vma; + u32 *cmd; + int err; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err; + + err = i915_gem_object_set_to_wc_domain(obj, true); + if (err) + goto err; + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + if (gen >= 8) { + *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; + *cmd++ = lower_32_bits(vma->node.start); + *cmd++ = upper_32_bits(vma->node.start); + } else if (gen >= 6) { + *cmd++ = MI_BATCH_BUFFER_START | 1 << 8; + *cmd++ = lower_32_bits(vma->node.start); + } else if (gen >= 4) { + *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; + *cmd++ = lower_32_bits(vma->node.start); + } else { + *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | 1; + *cmd++ = lower_32_bits(vma->node.start); + } + *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ + i915_gem_chipset_flush(i915); + + i915_gem_object_unpin_map(obj); + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static int recursive_batch_resolve(struct i915_vma *batch) +{ + u32 *cmd; + + cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); + if (IS_ERR(cmd)) + return PTR_ERR(cmd); + + *cmd = MI_BATCH_BUFFER_END; + i915_gem_chipset_flush(batch->vm->i915); + + i915_gem_object_unpin_map(batch->obj); + + return 0; +} + +static int live_all_engines(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct i915_request *request[I915_NUM_ENGINES]; + struct i915_vma *batch; + struct live_test t; + unsigned int id; + int err; + + /* Check we can submit requests to all engines simultaneously. We + * send a recursive batch to each engine - checking that we don't + * block doing so, and that they don't complete too soon. + */ + + mutex_lock(&i915->drm.struct_mutex); + + err = begin_live_test(&t, i915, __func__, ""); + if (err) + goto out_unlock; + + batch = recursive_batch(i915); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + pr_err("%s: Unable to create batch, err=%d\n", __func__, err); + goto out_unlock; + } + + for_each_engine(engine, i915, id) { + request[id] = i915_request_alloc(engine, i915->kernel_context); + if (IS_ERR(request[id])) { + err = PTR_ERR(request[id]); + pr_err("%s: Request allocation failed with err=%d\n", + __func__, err); + goto out_request; + } + + err = engine->emit_bb_start(request[id], + batch->node.start, + batch->node.size, + 0); + GEM_BUG_ON(err); + request[id]->batch = batch; + + if (!i915_gem_object_has_active_reference(batch->obj)) { + i915_gem_object_get(batch->obj); + i915_gem_object_set_active_reference(batch->obj); + } + + i915_vma_move_to_active(batch, request[id], 0); + i915_request_get(request[id]); + i915_request_add(request[id]); + } + + for_each_engine(engine, i915, id) { + if (i915_request_completed(request[id])) { + pr_err("%s(%s): request completed too early!\n", + __func__, engine->name); + err = -EINVAL; + goto out_request; + } + } + + err = recursive_batch_resolve(batch); + if (err) { + pr_err("%s: failed to resolve batch, err=%d\n", __func__, err); + goto out_request; + } + + for_each_engine(engine, i915, id) { + long timeout; + + timeout = i915_request_wait(request[id], + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) { + err = timeout; + pr_err("%s: error waiting for request on %s, err=%d\n", + __func__, engine->name, err); + goto out_request; + } + + GEM_BUG_ON(!i915_request_completed(request[id])); + i915_request_put(request[id]); + request[id] = NULL; + } + + err = end_live_test(&t); + +out_request: + for_each_engine(engine, i915, id) + if (request[id]) + i915_request_put(request[id]); + i915_vma_unpin(batch); + i915_vma_put(batch); +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int live_sequential_engines(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_request *request[I915_NUM_ENGINES] = {}; + struct i915_request *prev = NULL; + struct intel_engine_cs *engine; + struct live_test t; + unsigned int id; + int err; + + /* Check we can submit requests to all engines sequentially, such + * that each successive request waits for the earlier ones. This + * tests that we don't execute requests out of order, even though + * they are running on independent engines. + */ + + mutex_lock(&i915->drm.struct_mutex); + + err = begin_live_test(&t, i915, __func__, ""); + if (err) + goto out_unlock; + + for_each_engine(engine, i915, id) { + struct i915_vma *batch; + + batch = recursive_batch(i915); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + pr_err("%s: Unable to create batch for %s, err=%d\n", + __func__, engine->name, err); + goto out_unlock; + } + + request[id] = i915_request_alloc(engine, i915->kernel_context); + if (IS_ERR(request[id])) { + err = PTR_ERR(request[id]); + pr_err("%s: Request allocation failed for %s with err=%d\n", + __func__, engine->name, err); + goto out_request; + } + + if (prev) { + err = i915_request_await_dma_fence(request[id], + &prev->fence); + if (err) { + i915_request_add(request[id]); + pr_err("%s: Request await failed for %s with err=%d\n", + __func__, engine->name, err); + goto out_request; + } + } + + err = engine->emit_bb_start(request[id], + batch->node.start, + batch->node.size, + 0); + GEM_BUG_ON(err); + request[id]->batch = batch; + + i915_vma_move_to_active(batch, request[id], 0); + i915_gem_object_set_active_reference(batch->obj); + i915_vma_get(batch); + + i915_request_get(request[id]); + i915_request_add(request[id]); + + prev = request[id]; + } + + for_each_engine(engine, i915, id) { + long timeout; + + if (i915_request_completed(request[id])) { + pr_err("%s(%s): request completed too early!\n", + __func__, engine->name); + err = -EINVAL; + goto out_request; + } + + err = recursive_batch_resolve(request[id]->batch); + if (err) { + pr_err("%s: failed to resolve batch, err=%d\n", + __func__, err); + goto out_request; + } + + timeout = i915_request_wait(request[id], + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) { + err = timeout; + pr_err("%s: error waiting for request on %s, err=%d\n", + __func__, engine->name, err); + goto out_request; + } + + GEM_BUG_ON(!i915_request_completed(request[id])); + } + + err = end_live_test(&t); + +out_request: + for_each_engine(engine, i915, id) { + u32 *cmd; + + if (!request[id]) + break; + + cmd = i915_gem_object_pin_map(request[id]->batch->obj, + I915_MAP_WC); + if (!IS_ERR(cmd)) { + *cmd = MI_BATCH_BUFFER_END; + i915_gem_chipset_flush(i915); + + i915_gem_object_unpin_map(request[id]->batch->obj); + } + + i915_vma_put(request[id]->batch); + i915_request_put(request[id]); + } +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +int i915_request_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_nop_request), + SUBTEST(live_all_engines), + SUBTEST(live_sequential_engines), + SUBTEST(live_empty_request), + }; + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index d1d2c2456f69..3edbb3e8c0e1 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -92,13 +92,13 @@ err_ctx: } static u64 hws_address(const struct i915_vma *hws, - const struct drm_i915_gem_request *rq) + const struct i915_request *rq) { return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context); } static int emit_recurse_batch(struct hang *h, - struct drm_i915_gem_request *rq) + struct i915_request *rq) { struct drm_i915_private *i915 = h->i915; struct i915_address_space *vm = rq->ctx->ppgtt ? &rq->ctx->ppgtt->base : &i915->ggtt.base; @@ -204,10 +204,10 @@ unpin_vma: return err; } -static struct drm_i915_gem_request * +static struct i915_request * hang_create_request(struct hang *h, struct intel_engine_cs *engine) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; int err; if (i915_gem_object_is_active(h->obj)) { @@ -232,21 +232,20 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) h->batch = vaddr; } - rq = i915_gem_request_alloc(engine, h->ctx); + rq = i915_request_alloc(engine, h->ctx); if (IS_ERR(rq)) return rq; err = emit_recurse_batch(h, rq); if (err) { - __i915_add_request(rq, false); + __i915_request_add(rq, false); return ERR_PTR(err); } return rq; } -static u32 hws_seqno(const struct hang *h, - const struct drm_i915_gem_request *rq) +static u32 hws_seqno(const struct hang *h, const struct i915_request *rq) { return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]); } @@ -319,7 +318,7 @@ static void hang_fini(struct hang *h) flush_test(h->i915, I915_WAIT_LOCKED); } -static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq) +static bool wait_for_hang(struct hang *h, struct i915_request *rq) { return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq), rq->fence.seqno), @@ -332,7 +331,7 @@ static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq) static int igt_hang_sanitycheck(void *arg) { struct drm_i915_private *i915 = arg; - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct intel_engine_cs *engine; enum intel_engine_id id; struct hang h; @@ -359,17 +358,17 @@ static int igt_hang_sanitycheck(void *arg) goto fini; } - i915_gem_request_get(rq); + i915_request_get(rq); *h.batch = MI_BATCH_BUFFER_END; i915_gem_chipset_flush(i915); - __i915_add_request(rq, true); + __i915_request_add(rq, true); - timeout = i915_wait_request(rq, + timeout = i915_request_wait(rq, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); - i915_gem_request_put(rq); + i915_request_put(rq); if (timeout < 0) { err = timeout; @@ -485,7 +484,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { if (active) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; mutex_lock(&i915->drm.struct_mutex); rq = hang_create_request(&h, engine); @@ -495,8 +494,8 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) break; } - i915_gem_request_get(rq); - __i915_add_request(rq, true); + i915_request_get(rq); + __i915_request_add(rq, true); mutex_unlock(&i915->drm.struct_mutex); if (!wait_for_hang(&h, rq)) { @@ -507,12 +506,12 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) intel_engine_dump(engine, &p, "%s\n", engine->name); - i915_gem_request_put(rq); + i915_request_put(rq); err = -EIO; break; } - i915_gem_request_put(rq); + i915_request_put(rq); } engine->hangcheck.stalled = true; @@ -577,7 +576,7 @@ static int igt_reset_active_engine(void *arg) static int active_engine(void *data) { struct intel_engine_cs *engine = data; - struct drm_i915_gem_request *rq[2] = {}; + struct i915_request *rq[2] = {}; struct i915_gem_context *ctx[2]; struct drm_file *file; unsigned long count = 0; @@ -606,29 +605,29 @@ static int active_engine(void *data) while (!kthread_should_stop()) { unsigned int idx = count++ & 1; - struct drm_i915_gem_request *old = rq[idx]; - struct drm_i915_gem_request *new; + struct i915_request *old = rq[idx]; + struct i915_request *new; mutex_lock(&engine->i915->drm.struct_mutex); - new = i915_gem_request_alloc(engine, ctx[idx]); + new = i915_request_alloc(engine, ctx[idx]); if (IS_ERR(new)) { mutex_unlock(&engine->i915->drm.struct_mutex); err = PTR_ERR(new); break; } - rq[idx] = i915_gem_request_get(new); - i915_add_request(new); + rq[idx] = i915_request_get(new); + i915_request_add(new); mutex_unlock(&engine->i915->drm.struct_mutex); if (old) { - i915_wait_request(old, 0, MAX_SCHEDULE_TIMEOUT); - i915_gem_request_put(old); + i915_request_wait(old, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_put(old); } } for (count = 0; count < ARRAY_SIZE(rq); count++) - i915_gem_request_put(rq[count]); + i915_request_put(rq[count]); err_file: mock_file_free(engine->i915, file); @@ -692,7 +691,7 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915, set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { if (active) { - struct drm_i915_gem_request *rq; + struct i915_request *rq; mutex_lock(&i915->drm.struct_mutex); rq = hang_create_request(&h, engine); @@ -702,8 +701,8 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915, break; } - i915_gem_request_get(rq); - __i915_add_request(rq, true); + i915_request_get(rq); + __i915_request_add(rq, true); mutex_unlock(&i915->drm.struct_mutex); if (!wait_for_hang(&h, rq)) { @@ -714,12 +713,12 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915, intel_engine_dump(engine, &p, "%s\n", engine->name); - i915_gem_request_put(rq); + i915_request_put(rq); err = -EIO; break; } - i915_gem_request_put(rq); + i915_request_put(rq); } engine->hangcheck.stalled = true; @@ -814,7 +813,7 @@ static int igt_reset_active_engine_others(void *arg) return __igt_reset_engine_others(arg, true); } -static u32 fake_hangcheck(struct drm_i915_gem_request *rq) +static u32 fake_hangcheck(struct i915_request *rq) { u32 reset_count; @@ -832,7 +831,7 @@ static u32 fake_hangcheck(struct drm_i915_gem_request *rq) static int igt_wait_reset(void *arg) { struct drm_i915_private *i915 = arg; - struct drm_i915_gem_request *rq; + struct i915_request *rq; unsigned int reset_count; struct hang h; long timeout; @@ -856,8 +855,8 @@ static int igt_wait_reset(void *arg) goto fini; } - i915_gem_request_get(rq); - __i915_add_request(rq, true); + i915_request_get(rq); + __i915_request_add(rq, true); if (!wait_for_hang(&h, rq)) { struct drm_printer p = drm_info_printer(i915->drm.dev); @@ -875,7 +874,7 @@ static int igt_wait_reset(void *arg) reset_count = fake_hangcheck(rq); - timeout = i915_wait_request(rq, I915_WAIT_LOCKED, 10); + timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10); if (timeout < 0) { pr_err("i915_wait_request failed on a stuck request: err=%ld\n", timeout); @@ -891,7 +890,7 @@ static int igt_wait_reset(void *arg) } out_rq: - i915_gem_request_put(rq); + i915_request_put(rq); fini: hang_fini(&h); unlock: @@ -922,7 +921,7 @@ static int igt_reset_queue(void *arg) goto unlock; for_each_engine(engine, i915, id) { - struct drm_i915_gem_request *prev; + struct i915_request *prev; IGT_TIMEOUT(end_time); unsigned int count; @@ -935,12 +934,12 @@ static int igt_reset_queue(void *arg) goto fini; } - i915_gem_request_get(prev); - __i915_add_request(prev, true); + i915_request_get(prev); + __i915_request_add(prev, true); count = 0; do { - struct drm_i915_gem_request *rq; + struct i915_request *rq; unsigned int reset_count; rq = hang_create_request(&h, engine); @@ -949,8 +948,8 @@ static int igt_reset_queue(void *arg) goto fini; } - i915_gem_request_get(rq); - __i915_add_request(rq, true); + i915_request_get(rq); + __i915_request_add(rq, true); if (!wait_for_hang(&h, prev)) { struct drm_printer p = drm_info_printer(i915->drm.dev); @@ -960,8 +959,8 @@ static int igt_reset_queue(void *arg) intel_engine_dump(prev->engine, &p, "%s\n", prev->engine->name); - i915_gem_request_put(rq); - i915_gem_request_put(prev); + i915_request_put(rq); + i915_request_put(prev); i915_reset(i915, 0); i915_gem_set_wedged(i915); @@ -980,8 +979,8 @@ static int igt_reset_queue(void *arg) if (prev->fence.error != -EIO) { pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n", prev->fence.error); - i915_gem_request_put(rq); - i915_gem_request_put(prev); + i915_request_put(rq); + i915_request_put(prev); err = -EINVAL; goto fini; } @@ -989,21 +988,21 @@ static int igt_reset_queue(void *arg) if (rq->fence.error) { pr_err("Fence error status not zero [%d] after unrelated reset\n", rq->fence.error); - i915_gem_request_put(rq); - i915_gem_request_put(prev); + i915_request_put(rq); + i915_request_put(prev); err = -EINVAL; goto fini; } if (i915_reset_count(&i915->gpu_error) == reset_count) { pr_err("No GPU reset recorded!\n"); - i915_gem_request_put(rq); - i915_gem_request_put(prev); + i915_request_put(rq); + i915_request_put(prev); err = -EINVAL; goto fini; } - i915_gem_request_put(prev); + i915_request_put(prev); prev = rq; count++; } while (time_before(jiffies, end_time)); @@ -1012,7 +1011,7 @@ static int igt_reset_queue(void *arg) *h.batch = MI_BATCH_BUFFER_END; i915_gem_chipset_flush(i915); - i915_gem_request_put(prev); + i915_request_put(prev); err = flush_test(i915, I915_WAIT_LOCKED); if (err) @@ -1036,7 +1035,7 @@ static int igt_handle_error(void *arg) struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine = i915->engine[RCS]; struct hang h; - struct drm_i915_gem_request *rq; + struct i915_request *rq; struct i915_gpu_state *error; int err; @@ -1060,8 +1059,8 @@ static int igt_handle_error(void *arg) goto err_fini; } - i915_gem_request_get(rq); - __i915_add_request(rq, true); + i915_request_get(rq); + __i915_request_add(rq, true); if (!wait_for_hang(&h, rq)) { struct drm_printer p = drm_info_printer(i915->drm.dev); @@ -1098,7 +1097,7 @@ static int igt_handle_error(void *arg) } err_request: - i915_gem_request_put(rq); + i915_request_put(rq); err_fini: hang_fini(&h); err_unlock: diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 55c0e2c15782..78a89efa1119 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -81,7 +81,7 @@ static void mock_context_unpin(struct intel_engine_cs *engine, i915_gem_context_put(ctx); } -static int mock_request_alloc(struct drm_i915_gem_request *request) +static int mock_request_alloc(struct i915_request *request) { struct mock_request *mock = container_of(request, typeof(*mock), base); @@ -91,24 +91,24 @@ static int mock_request_alloc(struct drm_i915_gem_request *request) return 0; } -static int mock_emit_flush(struct drm_i915_gem_request *request, +static int mock_emit_flush(struct i915_request *request, unsigned int flags) { return 0; } -static void mock_emit_breadcrumb(struct drm_i915_gem_request *request, +static void mock_emit_breadcrumb(struct i915_request *request, u32 *flags) { } -static void mock_submit_request(struct drm_i915_gem_request *request) +static void mock_submit_request(struct i915_request *request) { struct mock_request *mock = container_of(request, typeof(*mock), base); struct mock_engine *engine = container_of(request->engine, typeof(*engine), base); - i915_gem_request_submit(request); + i915_request_submit(request); GEM_BUG_ON(!request->global_seqno); spin_lock_irq(&engine->hw_lock); diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 3175db70cc6e..e6d4b882599a 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -43,7 +43,7 @@ void mock_device_flush(struct drm_i915_private *i915) for_each_engine(engine, i915, id) mock_engine_flush(engine); - i915_gem_retire_requests(i915); + i915_retire_requests(i915); } static void mock_device_release(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c index 8097e3693ec4..0dc29e242597 100644 --- a/drivers/gpu/drm/i915/selftests/mock_request.c +++ b/drivers/gpu/drm/i915/selftests/mock_request.c @@ -25,16 +25,16 @@ #include "mock_engine.h" #include "mock_request.h" -struct drm_i915_gem_request * +struct i915_request * mock_request(struct intel_engine_cs *engine, struct i915_gem_context *context, unsigned long delay) { - struct drm_i915_gem_request *request; + struct i915_request *request; struct mock_request *mock; /* NB the i915->requests slab cache is enlarged to fit mock_request */ - request = i915_gem_request_alloc(engine, context); + request = i915_request_alloc(engine, context); if (IS_ERR(request)) return NULL; @@ -44,7 +44,7 @@ mock_request(struct intel_engine_cs *engine, return &mock->base; } -bool mock_cancel_request(struct drm_i915_gem_request *request) +bool mock_cancel_request(struct i915_request *request) { struct mock_request *mock = container_of(request, typeof(*mock), base); struct mock_engine *engine = @@ -57,7 +57,7 @@ bool mock_cancel_request(struct drm_i915_gem_request *request) spin_unlock_irq(&engine->hw_lock); if (was_queued) - i915_gem_request_unsubmit(request); + i915_request_unsubmit(request); return was_queued; } diff --git a/drivers/gpu/drm/i915/selftests/mock_request.h b/drivers/gpu/drm/i915/selftests/mock_request.h index 4dea74c8e96d..995fb728380c 100644 --- a/drivers/gpu/drm/i915/selftests/mock_request.h +++ b/drivers/gpu/drm/i915/selftests/mock_request.h @@ -27,20 +27,20 @@ #include -#include "../i915_gem_request.h" +#include "../i915_request.h" struct mock_request { - struct drm_i915_gem_request base; + struct i915_request base; struct list_head link; unsigned long delay; }; -struct drm_i915_gem_request * +struct i915_request * mock_request(struct intel_engine_cs *engine, struct i915_gem_context *context, unsigned long delay); -bool mock_cancel_request(struct drm_i915_gem_request *request); +bool mock_cancel_request(struct i915_request *request); #endif /* !__MOCK_REQUEST__ */ -- cgit v1.2.3-59-g8ed1b