aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_ringbuffer.h
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h421
1 files changed, 370 insertions, 51 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 6b2067f10824..a0e7a6c2a57c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -6,8 +6,11 @@
#include "i915_gem_batch_pool.h"
#include "i915_gem_request.h"
#include "i915_gem_timeline.h"
+#include "i915_pmu.h"
#include "i915_selftest.h"
+struct drm_printer;
+
#define I915_CMD_HASH_ORDER 9
/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
@@ -45,16 +48,6 @@ struct intel_hw_status_page {
/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
* do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
*/
-#define gen8_semaphore_seqno_size sizeof(uint64_t)
-#define GEN8_SEMAPHORE_OFFSET(__from, __to) \
- (((__from) * I915_NUM_ENGINES + (__to)) * gen8_semaphore_seqno_size)
-#define GEN8_SIGNAL_OFFSET(__ring, to) \
- (dev_priv->semaphore->node.start + \
- GEN8_SEMAPHORE_OFFSET((__ring)->id, (to)))
-#define GEN8_WAIT_OFFSET(__ring, from) \
- (dev_priv->semaphore->node.start + \
- GEN8_SEMAPHORE_OFFSET(from, (__ring)->id))
-
enum intel_engine_hangcheck_action {
ENGINE_IDLE = 0,
ENGINE_WAIT,
@@ -164,7 +157,6 @@ struct i915_ctx_workarounds {
};
struct drm_i915_gem_request;
-struct intel_render_state;
/*
* Engine IDs definitions.
@@ -185,16 +177,123 @@ struct i915_priolist {
int priority;
};
+/**
+ * struct intel_engine_execlists - execlist submission queue and port state
+ *
+ * The struct intel_engine_execlists represents the combined logical state of
+ * driver and the hardware state for execlist mode of submission.
+ */
+struct intel_engine_execlists {
+ /**
+ * @tasklet: softirq tasklet for bottom handler
+ */
+ struct tasklet_struct tasklet;
+
+ /**
+ * @default_priolist: priority list for I915_PRIORITY_NORMAL
+ */
+ struct i915_priolist default_priolist;
+
+ /**
+ * @no_priolist: priority lists disabled
+ */
+ bool no_priolist;
+
+ /**
+ * @elsp: the ExecList Submission Port register
+ */
+ u32 __iomem *elsp;
+
+ /**
+ * @port: execlist port states
+ *
+ * For each hardware ELSP (ExecList Submission Port) we keep
+ * track of the last request and the number of times we submitted
+ * that port to hw. We then count the number of times the hw reports
+ * a context completion or preemption. As only one context can
+ * be active on hw, we limit resubmission of context to port[0]. This
+ * is called Lite Restore, of the context.
+ */
+ struct execlist_port {
+ /**
+ * @request_count: combined request and submission count
+ */
+ struct drm_i915_gem_request *request_count;
+#define EXECLIST_COUNT_BITS 2
+#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
+#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
+#define port_set(p, packed) ((p)->request_count = (packed))
+#define port_isset(p) ((p)->request_count)
+#define port_index(p, execlists) ((p) - (execlists)->port)
+
+ /**
+ * @context_id: context ID for port
+ */
+ GEM_DEBUG_DECL(u32 context_id);
+
+#define EXECLIST_MAX_PORTS 2
+ } port[EXECLIST_MAX_PORTS];
+
+ /**
+ * @active: is the HW active? We consider the HW as active after
+ * submitting any context for execution and until we have seen the
+ * last context completion event. After that, we do not expect any
+ * more events until we submit, and so can park the HW.
+ *
+ * As we have a small number of different sources from which we feed
+ * the HW, we track the state of each inside a single bitfield.
+ */
+ unsigned int active;
+#define EXECLISTS_ACTIVE_USER 0
+#define EXECLISTS_ACTIVE_PREEMPT 1
+#define EXECLISTS_ACTIVE_HWACK 2
+
+ /**
+ * @port_mask: number of execlist ports - 1
+ */
+ unsigned int port_mask;
+
+ /**
+ * @queue: queue of requests, in priority lists
+ */
+ struct rb_root queue;
+
+ /**
+ * @first: leftmost level in priority @queue
+ */
+ struct rb_node *first;
+
+ /**
+ * @fw_domains: forcewake domains for irq tasklet
+ */
+ unsigned int fw_domains;
+
+ /**
+ * @csb_head: context status buffer head
+ */
+ unsigned int csb_head;
+
+ /**
+ * @csb_use_mmio: access csb through mmio, instead of hwsp
+ */
+ bool csb_use_mmio;
+};
+
#define INTEL_ENGINE_CS_MAX_NAME 8
struct intel_engine_cs {
struct drm_i915_private *i915;
char name[INTEL_ENGINE_CS_MAX_NAME];
+
enum intel_engine_id id;
- unsigned int uabi_id;
unsigned int hw_id;
unsigned int guc_id;
+ u8 uabi_id;
+ u8 uabi_class;
+
u8 class;
u8 instance;
u32 context_size;
@@ -204,7 +303,7 @@ struct intel_engine_cs {
struct intel_ring *buffer;
struct intel_timeline *timeline;
- struct intel_render_state *render_state;
+ struct drm_i915_gem_object *default_state;
atomic_t irq_count;
unsigned long irq_posted;
@@ -240,12 +339,35 @@ struct intel_engine_cs {
struct timer_list hangcheck; /* detect missed interrupts */
unsigned int hangcheck_interrupts;
+ unsigned int irq_enabled;
bool irq_armed : 1;
- bool irq_enabled : 1;
I915_SELFTEST_DECLARE(bool mock : 1);
} breadcrumbs;
+ struct {
+ /**
+ * @enable: Bitmask of enable sample events on this engine.
+ *
+ * Bits correspond to sample event types, for instance
+ * I915_SAMPLE_QUEUED is bit 0 etc.
+ */
+ u32 enable;
+ /**
+ * @enable_count: Reference count for the enabled samplers.
+ *
+ * Index number corresponds to the bit number from @enable.
+ */
+ unsigned int enable_count[I915_PMU_SAMPLE_BITS];
+ /**
+ * @sample: Counter values for sampling events.
+ *
+ * Our internal timer stores the current counters in this field.
+ */
+#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1)
+ struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
+ } pmu;
+
/*
* A pool of objects to use as shadow copies of client batch buffers
* when the command parser is enabled. Prevents the client from
@@ -266,6 +388,9 @@ struct intel_engine_cs {
void (*reset_hw)(struct intel_engine_cs *engine,
struct drm_i915_gem_request *req);
+ void (*park)(struct intel_engine_cs *engine);
+ void (*unpark)(struct intel_engine_cs *engine);
+
void (*set_default_submission)(struct intel_engine_cs *engine);
struct intel_ring *(*context_pin)(struct intel_engine_cs *engine,
@@ -307,6 +432,14 @@ struct intel_engine_cs {
void (*schedule)(struct drm_i915_gem_request *request,
int priority);
+ /*
+ * Cancel all requests on the hardware, or queued for execution.
+ * This should only cancel the ready requests that have been
+ * submitted to the engine (via the engine->submit_request callback).
+ * This is called when marking the device as wedged.
+ */
+ void (*cancel_requests)(struct intel_engine_cs *engine);
+
/* Some chipsets are not quite as coherent as advertised and need
* an expensive kick to force a true read of the up-to-date seqno.
* However, the up-to-date seqno is not always required and the last
@@ -354,18 +487,15 @@ struct intel_engine_cs {
* ie. transpose of f(x, y)
*/
struct {
- union {
#define GEN6_SEMAPHORE_LAST VECS_HW
#define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1)
#define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0)
- struct {
- /* our mbox written by others */
- u32 wait[GEN6_NUM_SEMAPHORES];
- /* mboxes this ring signals to */
- i915_reg_t signal[GEN6_NUM_SEMAPHORES];
- } mbox;
- u64 signal_ggtt[I915_NUM_ENGINES];
- };
+ struct {
+ /* our mbox written by others */
+ u32 wait[GEN6_NUM_SEMAPHORES];
+ /* mboxes this ring signals to */
+ i915_reg_t signal[GEN6_NUM_SEMAPHORES];
+ } mbox;
/* AKA wait() */
int (*sync_to)(struct drm_i915_gem_request *req,
@@ -373,25 +503,7 @@ struct intel_engine_cs {
u32 *(*signal)(struct drm_i915_gem_request *req, u32 *cs);
} semaphore;
- /* Execlists */
- struct tasklet_struct irq_tasklet;
- struct i915_priolist default_priolist;
- bool no_priolist;
- struct execlist_port {
- struct drm_i915_gem_request *request_count;
-#define EXECLIST_COUNT_BITS 2
-#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
-#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
-#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
-#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
-#define port_set(p, packed) ((p)->request_count = (packed))
-#define port_isset(p) ((p)->request_count)
-#define port_index(p, e) ((p) - (e)->execlist_port)
- GEM_DEBUG_DECL(u32 context_id);
- } execlist_port[2];
- struct rb_root execlist_queue;
- struct rb_node *execlist_first;
- unsigned int fw_domains;
+ struct intel_engine_execlists execlists;
/* Contexts are pinned whilst they are active on the GPU. The last
* context executed remains active whilst the GPU is idle - the
@@ -411,13 +523,16 @@ struct intel_engine_cs {
* stream (ring).
*/
struct i915_gem_context *legacy_active_context;
+ struct i915_hw_ppgtt *legacy_active_ppgtt;
/* status_notifier: list of callbacks for context-switch changes */
struct atomic_notifier_head context_status_notifier;
struct intel_engine_hangcheck hangcheck;
- bool needs_cmd_parser;
+#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
+#define I915_ENGINE_SUPPORTS_STATS BIT(1)
+ unsigned int flags;
/*
* Table of commands the command parser needs to know about
@@ -442,8 +557,96 @@ struct intel_engine_cs {
* certain bits to encode the command length in the header).
*/
u32 (*get_cmd_length_mask)(u32 cmd_header);
+
+ struct {
+ /**
+ * @lock: Lock protecting the below fields.
+ */
+ spinlock_t lock;
+ /**
+ * @enabled: Reference count indicating number of listeners.
+ */
+ unsigned int enabled;
+ /**
+ * @active: Number of contexts currently scheduled in.
+ */
+ unsigned int active;
+ /**
+ * @enabled_at: Timestamp when busy stats were enabled.
+ */
+ ktime_t enabled_at;
+ /**
+ * @start: Timestamp of the last idle to active transition.
+ *
+ * Idle is defined as active == 0, active is active > 0.
+ */
+ ktime_t start;
+ /**
+ * @total: Total time this engine was busy.
+ *
+ * Accumulated time not counting the most recent block in cases
+ * where engine is currently busy (active > 0).
+ */
+ ktime_t total;
+ } stats;
};
+static inline bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine)
+{
+ return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER;
+}
+
+static inline bool intel_engine_supports_stats(struct intel_engine_cs *engine)
+{
+ return engine->flags & I915_ENGINE_SUPPORTS_STATS;
+}
+
+static inline void
+execlists_set_active(struct intel_engine_execlists *execlists,
+ unsigned int bit)
+{
+ __set_bit(bit, (unsigned long *)&execlists->active);
+}
+
+static inline void
+execlists_clear_active(struct intel_engine_execlists *execlists,
+ unsigned int bit)
+{
+ __clear_bit(bit, (unsigned long *)&execlists->active);
+}
+
+static inline bool
+execlists_is_active(const struct intel_engine_execlists *execlists,
+ unsigned int bit)
+{
+ return test_bit(bit, (unsigned long *)&execlists->active);
+}
+
+void
+execlists_cancel_port_requests(struct intel_engine_execlists * const execlists);
+
+void
+execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
+
+static inline unsigned int
+execlists_num_ports(const struct intel_engine_execlists * const execlists)
+{
+ return execlists->port_mask + 1;
+}
+
+static inline void
+execlists_port_complete(struct intel_engine_execlists * const execlists,
+ struct execlist_port * const port)
+{
+ const unsigned int m = execlists->port_mask;
+
+ GEM_BUG_ON(port_index(port, execlists) != 0);
+ GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
+
+ memmove(port, port + 1, m * sizeof(struct execlist_port));
+ memset(port + m, 0, sizeof(struct execlist_port));
+}
+
static inline unsigned int
intel_engine_flag(const struct intel_engine_cs *engine)
{
@@ -494,9 +697,15 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
*/
#define I915_GEM_HWS_INDEX 0x30
#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
+#define I915_GEM_HWS_PREEMPT_INDEX 0x32
+#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
#define I915_GEM_HWS_SCRATCH_INDEX 0x40
#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
+#define I915_HWS_CSB_BUF0_INDEX 0x10
+#define I915_HWS_CSB_WRITE_INDEX 0x1f
+#define CNL_HWS_CSB_WRITE_INDEX 0x2f
+
struct intel_ring *
intel_engine_create_ring(struct intel_engine_cs *engine, int size);
int intel_ring_pin(struct intel_ring *ring,
@@ -514,6 +723,7 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req);
+int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes);
u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req,
unsigned int n);
@@ -642,6 +852,11 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
}
+static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
+{
+ return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR;
+}
+
/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
@@ -712,6 +927,9 @@ unsigned int intel_engine_wakeup(struct intel_engine_cs *engine);
#define ENGINE_WAKEUP_WAITER BIT(0)
#define ENGINE_WAKEUP_ASLEEP BIT(1)
+void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine);
+void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine);
+
void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
@@ -730,22 +948,123 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
return batch + 6;
}
+static inline u32 *
+gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset)
+{
+ /* We're using qword write, offset should be aligned to 8 bytes. */
+ GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
+
+ /* w/a for post sync ops following a GPGPU operation we
+ * need a prior CS_STALL, which is emitted by the flush
+ * following the batch.
+ */
+ *cs++ = GFX_OP_PIPE_CONTROL(6);
+ *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_QW_WRITE;
+ *cs++ = gtt_offset;
+ *cs++ = 0;
+ *cs++ = value;
+ /* We're thrashing one dword of HWS. */
+ *cs++ = 0;
+
+ return cs;
+}
+
+static inline u32 *
+gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset)
+{
+ /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
+ GEM_BUG_ON(gtt_offset & (1 << 5));
+ /* Offset should be aligned to 8 bytes for both (QW/DW) write types */
+ GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
+
+ *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
+ *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT;
+ *cs++ = 0;
+ *cs++ = value;
+
+ return cs;
+}
+
bool intel_engine_is_idle(struct intel_engine_cs *engine);
bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
-void intel_engines_mark_idle(struct drm_i915_private *i915);
+bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine);
+
+void intel_engines_park(struct drm_i915_private *i915);
+void intel_engines_unpark(struct drm_i915_private *i915);
+
void intel_engines_reset_default_submission(struct drm_i915_private *i915);
+unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
-static inline bool
-__intel_engine_can_store_dword(unsigned int gen, unsigned int class)
+bool intel_engine_can_store_dword(struct intel_engine_cs *engine);
+
+__printf(3, 4)
+void intel_engine_dump(struct intel_engine_cs *engine,
+ struct drm_printer *m,
+ const char *header, ...);
+
+struct intel_engine_cs *
+intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
+
+static inline void intel_engine_context_in(struct intel_engine_cs *engine)
{
- if (gen <= 2)
- return false; /* uses physical not virtual addresses */
+ unsigned long flags;
+
+ if (READ_ONCE(engine->stats.enabled) == 0)
+ return;
+
+ spin_lock_irqsave(&engine->stats.lock, flags);
- if (gen == 6 && class == VIDEO_DECODE_CLASS)
- return false; /* b0rked */
+ if (engine->stats.enabled > 0) {
+ if (engine->stats.active++ == 0)
+ engine->stats.start = ktime_get();
+ GEM_BUG_ON(engine->stats.active == 0);
+ }
+
+ spin_unlock_irqrestore(&engine->stats.lock, flags);
+}
+
+static inline void intel_engine_context_out(struct intel_engine_cs *engine)
+{
+ unsigned long flags;
+
+ if (READ_ONCE(engine->stats.enabled) == 0)
+ return;
+
+ spin_lock_irqsave(&engine->stats.lock, flags);
+
+ if (engine->stats.enabled > 0) {
+ ktime_t last;
+
+ if (engine->stats.active && --engine->stats.active == 0) {
+ /*
+ * Decrement the active context count and in case GPU
+ * is now idle add up to the running total.
+ */
+ last = ktime_sub(ktime_get(), engine->stats.start);
+
+ engine->stats.total = ktime_add(engine->stats.total,
+ last);
+ } else if (engine->stats.active == 0) {
+ /*
+ * After turning on engine stats, context out might be
+ * the first event in which case we account from the
+ * time stats gathering was turned on.
+ */
+ last = ktime_sub(ktime_get(), engine->stats.enabled_at);
+
+ engine->stats.total = ktime_add(engine->stats.total,
+ last);
+ }
+ }
- return true;
+ spin_unlock_irqrestore(&engine->stats.lock, flags);
}
+int intel_enable_engine_stats(struct intel_engine_cs *engine);
+void intel_disable_engine_stats(struct intel_engine_cs *engine);
+
+ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
+
#endif /* _INTEL_RINGBUFFER_H_ */