diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h')
-rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.h | 421 |
1 files changed, 370 insertions, 51 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 6b2067f10824..a0e7a6c2a57c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -6,8 +6,11 @@ #include "i915_gem_batch_pool.h" #include "i915_gem_request.h" #include "i915_gem_timeline.h" +#include "i915_pmu.h" #include "i915_selftest.h" +struct drm_printer; + #define I915_CMD_HASH_ORDER 9 /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, @@ -45,16 +48,6 @@ struct intel_hw_status_page { /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. */ -#define gen8_semaphore_seqno_size sizeof(uint64_t) -#define GEN8_SEMAPHORE_OFFSET(__from, __to) \ - (((__from) * I915_NUM_ENGINES + (__to)) * gen8_semaphore_seqno_size) -#define GEN8_SIGNAL_OFFSET(__ring, to) \ - (dev_priv->semaphore->node.start + \ - GEN8_SEMAPHORE_OFFSET((__ring)->id, (to))) -#define GEN8_WAIT_OFFSET(__ring, from) \ - (dev_priv->semaphore->node.start + \ - GEN8_SEMAPHORE_OFFSET(from, (__ring)->id)) - enum intel_engine_hangcheck_action { ENGINE_IDLE = 0, ENGINE_WAIT, @@ -164,7 +157,6 @@ struct i915_ctx_workarounds { }; struct drm_i915_gem_request; -struct intel_render_state; /* * Engine IDs definitions. @@ -185,16 +177,123 @@ struct i915_priolist { int priority; }; +/** + * struct intel_engine_execlists - execlist submission queue and port state + * + * The struct intel_engine_execlists represents the combined logical state of + * driver and the hardware state for execlist mode of submission. + */ +struct intel_engine_execlists { + /** + * @tasklet: softirq tasklet for bottom handler + */ + struct tasklet_struct tasklet; + + /** + * @default_priolist: priority list for I915_PRIORITY_NORMAL + */ + struct i915_priolist default_priolist; + + /** + * @no_priolist: priority lists disabled + */ + bool no_priolist; + + /** + * @elsp: the ExecList Submission Port register + */ + u32 __iomem *elsp; + + /** + * @port: execlist port states + * + * For each hardware ELSP (ExecList Submission Port) we keep + * track of the last request and the number of times we submitted + * that port to hw. We then count the number of times the hw reports + * a context completion or preemption. As only one context can + * be active on hw, we limit resubmission of context to port[0]. This + * is called Lite Restore, of the context. + */ + struct execlist_port { + /** + * @request_count: combined request and submission count + */ + struct drm_i915_gem_request *request_count; +#define EXECLIST_COUNT_BITS 2 +#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) +#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) +#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS) +#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS) +#define port_set(p, packed) ((p)->request_count = (packed)) +#define port_isset(p) ((p)->request_count) +#define port_index(p, execlists) ((p) - (execlists)->port) + + /** + * @context_id: context ID for port + */ + GEM_DEBUG_DECL(u32 context_id); + +#define EXECLIST_MAX_PORTS 2 + } port[EXECLIST_MAX_PORTS]; + + /** + * @active: is the HW active? We consider the HW as active after + * submitting any context for execution and until we have seen the + * last context completion event. After that, we do not expect any + * more events until we submit, and so can park the HW. + * + * As we have a small number of different sources from which we feed + * the HW, we track the state of each inside a single bitfield. + */ + unsigned int active; +#define EXECLISTS_ACTIVE_USER 0 +#define EXECLISTS_ACTIVE_PREEMPT 1 +#define EXECLISTS_ACTIVE_HWACK 2 + + /** + * @port_mask: number of execlist ports - 1 + */ + unsigned int port_mask; + + /** + * @queue: queue of requests, in priority lists + */ + struct rb_root queue; + + /** + * @first: leftmost level in priority @queue + */ + struct rb_node *first; + + /** + * @fw_domains: forcewake domains for irq tasklet + */ + unsigned int fw_domains; + + /** + * @csb_head: context status buffer head + */ + unsigned int csb_head; + + /** + * @csb_use_mmio: access csb through mmio, instead of hwsp + */ + bool csb_use_mmio; +}; + #define INTEL_ENGINE_CS_MAX_NAME 8 struct intel_engine_cs { struct drm_i915_private *i915; char name[INTEL_ENGINE_CS_MAX_NAME]; + enum intel_engine_id id; - unsigned int uabi_id; unsigned int hw_id; unsigned int guc_id; + u8 uabi_id; + u8 uabi_class; + u8 class; u8 instance; u32 context_size; @@ -204,7 +303,7 @@ struct intel_engine_cs { struct intel_ring *buffer; struct intel_timeline *timeline; - struct intel_render_state *render_state; + struct drm_i915_gem_object *default_state; atomic_t irq_count; unsigned long irq_posted; @@ -240,12 +339,35 @@ struct intel_engine_cs { struct timer_list hangcheck; /* detect missed interrupts */ unsigned int hangcheck_interrupts; + unsigned int irq_enabled; bool irq_armed : 1; - bool irq_enabled : 1; I915_SELFTEST_DECLARE(bool mock : 1); } breadcrumbs; + struct { + /** + * @enable: Bitmask of enable sample events on this engine. + * + * Bits correspond to sample event types, for instance + * I915_SAMPLE_QUEUED is bit 0 etc. + */ + u32 enable; + /** + * @enable_count: Reference count for the enabled samplers. + * + * Index number corresponds to the bit number from @enable. + */ + unsigned int enable_count[I915_PMU_SAMPLE_BITS]; + /** + * @sample: Counter values for sampling events. + * + * Our internal timer stores the current counters in this field. + */ +#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1) + struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX]; + } pmu; + /* * A pool of objects to use as shadow copies of client batch buffers * when the command parser is enabled. Prevents the client from @@ -266,6 +388,9 @@ struct intel_engine_cs { void (*reset_hw)(struct intel_engine_cs *engine, struct drm_i915_gem_request *req); + void (*park)(struct intel_engine_cs *engine); + void (*unpark)(struct intel_engine_cs *engine); + void (*set_default_submission)(struct intel_engine_cs *engine); struct intel_ring *(*context_pin)(struct intel_engine_cs *engine, @@ -307,6 +432,14 @@ struct intel_engine_cs { void (*schedule)(struct drm_i915_gem_request *request, int priority); + /* + * Cancel all requests on the hardware, or queued for execution. + * This should only cancel the ready requests that have been + * submitted to the engine (via the engine->submit_request callback). + * This is called when marking the device as wedged. + */ + void (*cancel_requests)(struct intel_engine_cs *engine); + /* Some chipsets are not quite as coherent as advertised and need * an expensive kick to force a true read of the up-to-date seqno. * However, the up-to-date seqno is not always required and the last @@ -354,18 +487,15 @@ struct intel_engine_cs { * ie. transpose of f(x, y) */ struct { - union { #define GEN6_SEMAPHORE_LAST VECS_HW #define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1) #define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0) - struct { - /* our mbox written by others */ - u32 wait[GEN6_NUM_SEMAPHORES]; - /* mboxes this ring signals to */ - i915_reg_t signal[GEN6_NUM_SEMAPHORES]; - } mbox; - u64 signal_ggtt[I915_NUM_ENGINES]; - }; + struct { + /* our mbox written by others */ + u32 wait[GEN6_NUM_SEMAPHORES]; + /* mboxes this ring signals to */ + i915_reg_t signal[GEN6_NUM_SEMAPHORES]; + } mbox; /* AKA wait() */ int (*sync_to)(struct drm_i915_gem_request *req, @@ -373,25 +503,7 @@ struct intel_engine_cs { u32 *(*signal)(struct drm_i915_gem_request *req, u32 *cs); } semaphore; - /* Execlists */ - struct tasklet_struct irq_tasklet; - struct i915_priolist default_priolist; - bool no_priolist; - struct execlist_port { - struct drm_i915_gem_request *request_count; -#define EXECLIST_COUNT_BITS 2 -#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) -#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) -#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS) -#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS) -#define port_set(p, packed) ((p)->request_count = (packed)) -#define port_isset(p) ((p)->request_count) -#define port_index(p, e) ((p) - (e)->execlist_port) - GEM_DEBUG_DECL(u32 context_id); - } execlist_port[2]; - struct rb_root execlist_queue; - struct rb_node *execlist_first; - unsigned int fw_domains; + struct intel_engine_execlists execlists; /* Contexts are pinned whilst they are active on the GPU. The last * context executed remains active whilst the GPU is idle - the @@ -411,13 +523,16 @@ struct intel_engine_cs { * stream (ring). */ struct i915_gem_context *legacy_active_context; + struct i915_hw_ppgtt *legacy_active_ppgtt; /* status_notifier: list of callbacks for context-switch changes */ struct atomic_notifier_head context_status_notifier; struct intel_engine_hangcheck hangcheck; - bool needs_cmd_parser; +#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) +#define I915_ENGINE_SUPPORTS_STATS BIT(1) + unsigned int flags; /* * Table of commands the command parser needs to know about @@ -442,8 +557,96 @@ struct intel_engine_cs { * certain bits to encode the command length in the header). */ u32 (*get_cmd_length_mask)(u32 cmd_header); + + struct { + /** + * @lock: Lock protecting the below fields. + */ + spinlock_t lock; + /** + * @enabled: Reference count indicating number of listeners. + */ + unsigned int enabled; + /** + * @active: Number of contexts currently scheduled in. + */ + unsigned int active; + /** + * @enabled_at: Timestamp when busy stats were enabled. + */ + ktime_t enabled_at; + /** + * @start: Timestamp of the last idle to active transition. + * + * Idle is defined as active == 0, active is active > 0. + */ + ktime_t start; + /** + * @total: Total time this engine was busy. + * + * Accumulated time not counting the most recent block in cases + * where engine is currently busy (active > 0). + */ + ktime_t total; + } stats; }; +static inline bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; +} + +static inline bool intel_engine_supports_stats(struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_SUPPORTS_STATS; +} + +static inline void +execlists_set_active(struct intel_engine_execlists *execlists, + unsigned int bit) +{ + __set_bit(bit, (unsigned long *)&execlists->active); +} + +static inline void +execlists_clear_active(struct intel_engine_execlists *execlists, + unsigned int bit) +{ + __clear_bit(bit, (unsigned long *)&execlists->active); +} + +static inline bool +execlists_is_active(const struct intel_engine_execlists *execlists, + unsigned int bit) +{ + return test_bit(bit, (unsigned long *)&execlists->active); +} + +void +execlists_cancel_port_requests(struct intel_engine_execlists * const execlists); + +void +execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); + +static inline unsigned int +execlists_num_ports(const struct intel_engine_execlists * const execlists) +{ + return execlists->port_mask + 1; +} + +static inline void +execlists_port_complete(struct intel_engine_execlists * const execlists, + struct execlist_port * const port) +{ + const unsigned int m = execlists->port_mask; + + GEM_BUG_ON(port_index(port, execlists) != 0); + GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); + + memmove(port, port + 1, m * sizeof(struct execlist_port)); + memset(port + m, 0, sizeof(struct execlist_port)); +} + static inline unsigned int intel_engine_flag(const struct intel_engine_cs *engine) { @@ -494,9 +697,15 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) */ #define I915_GEM_HWS_INDEX 0x30 #define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT) +#define I915_GEM_HWS_PREEMPT_INDEX 0x32 +#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT) #define I915_GEM_HWS_SCRATCH_INDEX 0x40 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) +#define I915_HWS_CSB_BUF0_INDEX 0x10 +#define I915_HWS_CSB_WRITE_INDEX 0x1f +#define CNL_HWS_CSB_WRITE_INDEX 0x2f + struct intel_ring * intel_engine_create_ring(struct intel_engine_cs *engine, int size); int intel_ring_pin(struct intel_ring *ring, @@ -514,6 +723,7 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); +int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes); u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req, unsigned int n); @@ -642,6 +852,11 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR; } +static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine) +{ + return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR; +} + /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); @@ -712,6 +927,9 @@ unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); #define ENGINE_WAKEUP_WAITER BIT(0) #define ENGINE_WAKEUP_ASLEEP BIT(1) +void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); +void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); + void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); @@ -730,22 +948,123 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) return batch + 6; } +static inline u32 * +gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset) +{ + /* We're using qword write, offset should be aligned to 8 bytes. */ + GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); + + /* w/a for post sync ops following a GPGPU operation we + * need a prior CS_STALL, which is emitted by the flush + * following the batch. + */ + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE; + *cs++ = gtt_offset; + *cs++ = 0; + *cs++ = value; + /* We're thrashing one dword of HWS. */ + *cs++ = 0; + + return cs; +} + +static inline u32 * +gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset) +{ + /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ + GEM_BUG_ON(gtt_offset & (1 << 5)); + /* Offset should be aligned to 8 bytes for both (QW/DW) write types */ + GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); + + *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; + *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT; + *cs++ = 0; + *cs++ = value; + + return cs; +} + bool intel_engine_is_idle(struct intel_engine_cs *engine); bool intel_engines_are_idle(struct drm_i915_private *dev_priv); -void intel_engines_mark_idle(struct drm_i915_private *i915); +bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine); + +void intel_engines_park(struct drm_i915_private *i915); +void intel_engines_unpark(struct drm_i915_private *i915); + void intel_engines_reset_default_submission(struct drm_i915_private *i915); +unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915); -static inline bool -__intel_engine_can_store_dword(unsigned int gen, unsigned int class) +bool intel_engine_can_store_dword(struct intel_engine_cs *engine); + +__printf(3, 4) +void intel_engine_dump(struct intel_engine_cs *engine, + struct drm_printer *m, + const char *header, ...); + +struct intel_engine_cs * +intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance); + +static inline void intel_engine_context_in(struct intel_engine_cs *engine) { - if (gen <= 2) - return false; /* uses physical not virtual addresses */ + unsigned long flags; + + if (READ_ONCE(engine->stats.enabled) == 0) + return; + + spin_lock_irqsave(&engine->stats.lock, flags); - if (gen == 6 && class == VIDEO_DECODE_CLASS) - return false; /* b0rked */ + if (engine->stats.enabled > 0) { + if (engine->stats.active++ == 0) + engine->stats.start = ktime_get(); + GEM_BUG_ON(engine->stats.active == 0); + } + + spin_unlock_irqrestore(&engine->stats.lock, flags); +} + +static inline void intel_engine_context_out(struct intel_engine_cs *engine) +{ + unsigned long flags; + + if (READ_ONCE(engine->stats.enabled) == 0) + return; + + spin_lock_irqsave(&engine->stats.lock, flags); + + if (engine->stats.enabled > 0) { + ktime_t last; + + if (engine->stats.active && --engine->stats.active == 0) { + /* + * Decrement the active context count and in case GPU + * is now idle add up to the running total. + */ + last = ktime_sub(ktime_get(), engine->stats.start); + + engine->stats.total = ktime_add(engine->stats.total, + last); + } else if (engine->stats.active == 0) { + /* + * After turning on engine stats, context out might be + * the first event in which case we account from the + * time stats gathering was turned on. + */ + last = ktime_sub(ktime_get(), engine->stats.enabled_at); + + engine->stats.total = ktime_add(engine->stats.total, + last); + } + } - return true; + spin_unlock_irqrestore(&engine->stats.lock, flags); } +int intel_enable_engine_stats(struct intel_engine_cs *engine); +void intel_disable_engine_stats(struct intel_engine_cs *engine); + +ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine); + #endif /* _INTEL_RINGBUFFER_H_ */ |