diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gpu_error.h')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gpu_error.h | 90 |
1 files changed, 18 insertions, 72 deletions
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index ff2652bbb0b0..5dc761e85d9d 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -45,6 +45,7 @@ struct i915_gpu_state { u32 reset_count; u32 suspend_count; struct intel_device_info device_info; + struct intel_runtime_info runtime_info; struct intel_driver_caps driver_caps; struct i915_params params; @@ -81,11 +82,7 @@ struct i915_gpu_state { int engine_id; /* Software tracked state */ bool idle; - bool waiting; - int num_waiters; unsigned long hangcheck_timestamp; - bool hangcheck_stalled; - enum intel_engine_hangcheck_action hangcheck_action; struct i915_address_space *vm; int num_requests; u32 reset_count; @@ -97,8 +94,6 @@ struct i915_gpu_state { u32 cpu_ring_head; u32 cpu_ring_tail; - u32 last_seqno; - /* Register state */ u32 start; u32 tail; @@ -111,24 +106,19 @@ struct i915_gpu_state { u32 bbstate; u32 instpm; u32 instps; - u32 seqno; u64 bbaddr; u64 acthd; u32 fault_reg; u64 faddr; u32 rc_psmi; /* sleep state */ - u32 semaphore_mboxes[I915_NUM_ENGINES - 1]; struct intel_instdone instdone; struct drm_i915_error_context { char comm[TASK_COMM_LEN]; pid_t pid; - u32 handle; u32 hw_id; - int ban_score; int active; int guilty; - bool bannable; struct i915_sched_attr sched_attr; } context; @@ -148,10 +138,10 @@ struct i915_gpu_state { struct drm_i915_error_object *default_state; struct drm_i915_error_request { + unsigned long flags; long jiffies; pid_t pid; u32 context; - int ban_score; u32 seqno; u32 start; u32 head; @@ -160,12 +150,6 @@ struct i915_gpu_state { } *requests, execlist[EXECLIST_MAX_PORTS]; unsigned int num_ports; - struct drm_i915_error_waiter { - char comm[TASK_COMM_LEN]; - pid_t pid; - u32 seqno; - } *waiters; - struct { u32 gfx_mode; union { @@ -178,7 +162,6 @@ struct i915_gpu_state { struct drm_i915_error_buffer { u32 size; u32 name; - u32 wseqno; u64 gtt_offset; u32 read_domains; u32 write_domain; @@ -187,7 +170,6 @@ struct i915_gpu_state { u32 dirty:1; u32 purgeable:1; u32 userptr:1; - s32 engine:4; u32 cache_level:3; } *active_bo[I915_NUM_ENGINES], *pinned_bo; u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count; @@ -196,6 +178,8 @@ struct i915_gpu_state { struct scatterlist *sgl, *fit; }; +struct i915_gpu_restart; + struct i915_gpu_error { /* For hangcheck timer */ #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ @@ -210,50 +194,13 @@ struct i915_gpu_error { atomic_t pending_fb_pin; - unsigned long missed_irq_rings; - - /** - * State variable controlling the reset flow and count - * - * This is a counter which gets incremented when reset is triggered, - * - * Before the reset commences, the I915_RESET_BACKOFF bit is set - * meaning that any waiters holding onto the struct_mutex should - * relinquish the lock immediately in order for the reset to start. - * - * If reset is not completed successfully, the I915_WEDGE bit is - * set meaning that hardware is terminally sour and there is no - * recovery. All waiters on the reset_queue will be woken when - * that happens. - * - * This counter is used by the wait_seqno code to notice that reset - * event happened and it needs to restart the entire ioctl (since most - * likely the seqno it waited for won't ever signal anytime soon). - * - * This is important for lock-free wait paths, where no contended lock - * naturally enforces the correct ordering between the bail-out of the - * waiter and the gpu reset work code. - */ - unsigned long reset_count; - /** * flags: Control various stages of the GPU reset * - * #I915_RESET_BACKOFF - When we start a reset, we want to stop any - * other users acquiring the struct_mutex. To do this we set the - * #I915_RESET_BACKOFF bit in the error flags when we detect a reset - * and then check for that bit before acquiring the struct_mutex (in - * i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a - * secondary role in preventing two concurrent global reset attempts. - * - * #I915_RESET_HANDOFF - To perform the actual GPU reset, we need the - * struct_mutex. We try to acquire the struct_mutex in the reset worker, - * but it may be held by some long running waiter (that we cannot - * interrupt without causing trouble). Once we are ready to do the GPU - * reset, we set the I915_RESET_HANDOFF bit and wakeup any waiters. If - * they already hold the struct_mutex and want to participate they can - * inspect the bit and do the reset directly, otherwise the worker - * waits for the struct_mutex. + * #I915_RESET_BACKOFF - When we start a global reset, we need to + * serialise with any other users attempting to do the same, and + * any global resources that may be clobber by the reset (such as + * FENCE registers). * * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to * acquire the struct_mutex to reset an engine, we need an explicit @@ -268,19 +215,17 @@ struct i915_gpu_error { */ unsigned long flags; #define I915_RESET_BACKOFF 0 -#define I915_RESET_HANDOFF 1 -#define I915_RESET_MODESET 2 +#define I915_RESET_MODESET 1 +#define I915_RESET_ENGINE 2 #define I915_WEDGED (BITS_PER_LONG - 1) -#define I915_RESET_ENGINE (I915_WEDGED - I915_NUM_ENGINES) + + /** Number of times the device has been reset (global) */ + u32 reset_count; /** Number of times an engine has been reset */ u32 reset_engine_count[I915_NUM_ENGINES]; - /** Set of stalled engines with guilty requests, in the current reset */ - u32 stalled_mask; - - /** Reason for the current *global* reset */ - const char *reason; + struct mutex wedge_mutex; /* serialises wedging/unwedging */ /** * Waitqueue to signal when a hang is detected. Used to for waiters @@ -294,8 +239,9 @@ struct i915_gpu_error { */ wait_queue_head_t reset_queue; - /* For missed irq/seqno simulation. */ - unsigned long test_irq_rings; + struct srcu_struct reset_backoff_srcu; + + struct i915_gpu_restart *restart; }; struct drm_i915_error_state_buf { @@ -317,7 +263,7 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915); void i915_capture_error_state(struct drm_i915_private *dev_priv, - u32 engine_mask, + intel_engine_mask_t engine_mask, const char *error_msg); static inline struct i915_gpu_state * |