aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gt
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r--drivers/gpu/drm/i915/gt/gen7_renderclear.c402
-rw-r--r--drivers/gpu/drm/i915/gt/gen7_renderclear.h15
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c27
-rw-r--r--drivers/gpu/drm/i915/gt/hsw_clear_kernel.c61
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c16
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.h25
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_param.c63
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_param.h14
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_sseu.c98
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h25
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h37
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c174
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_user.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c106
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gpu_commands.h17
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c68
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_irq.c117
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_irq.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.c65
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.h11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_llc.c6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c386
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc_reg.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.c76
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.c29
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c109
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.c6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c236
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c78
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.c6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c244
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds_types.h4
-rw-r--r--drivers/gpu/drm/i915/gt/ivb_clear_kernel.c61
-rw-r--r--drivers/gpu/drm/i915/gt/mock_engine.c7
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c30
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_hangcheck.c4
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_llc.c11
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_lrc.c1842
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_mocs.c24
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rc6.c28
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_reset.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_ring_submission.c296
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_timeline.c188
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_workarounds.c9
-rw-r--r--drivers/gpu/drm/i915/gt/sysfs_engines.c445
-rw-r--r--drivers/gpu/drm/i915/gt/sysfs_engines.h13
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c30
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h23
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c255
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h7
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c13
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h19
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.c7
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.h8
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c69
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.h62
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c11
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h18
64 files changed, 5009 insertions, 1032 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c
new file mode 100644
index 000000000000..de595b66a746
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c
@@ -0,0 +1,402 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "gen7_renderclear.h"
+#include "i915_drv.h"
+#include "intel_gpu_commands.h"
+
+#define MAX_URB_ENTRIES 64
+#define STATE_SIZE (4 * 1024)
+#define GT3_INLINE_DATA_DELAYS 0x1E00
+#define batch_advance(Y, CS) GEM_BUG_ON((Y)->end != (CS))
+
+struct cb_kernel {
+ const void *data;
+ u32 size;
+};
+
+#define CB_KERNEL(name) { .data = (name), .size = sizeof(name) }
+
+#include "ivb_clear_kernel.c"
+static const struct cb_kernel cb_kernel_ivb = CB_KERNEL(ivb_clear_kernel);
+
+#include "hsw_clear_kernel.c"
+static const struct cb_kernel cb_kernel_hsw = CB_KERNEL(hsw_clear_kernel);
+
+struct batch_chunk {
+ struct i915_vma *vma;
+ u32 offset;
+ u32 *start;
+ u32 *end;
+ u32 max_items;
+};
+
+struct batch_vals {
+ u32 max_primitives;
+ u32 max_urb_entries;
+ u32 cmd_size;
+ u32 state_size;
+ u32 state_start;
+ u32 batch_size;
+ u32 surface_height;
+ u32 surface_width;
+ u32 scratch_size;
+ u32 max_size;
+};
+
+static void
+batch_get_defaults(struct drm_i915_private *i915, struct batch_vals *bv)
+{
+ if (IS_HASWELL(i915)) {
+ bv->max_primitives = 280;
+ bv->max_urb_entries = MAX_URB_ENTRIES;
+ bv->surface_height = 16 * 16;
+ bv->surface_width = 32 * 2 * 16;
+ } else {
+ bv->max_primitives = 128;
+ bv->max_urb_entries = MAX_URB_ENTRIES / 2;
+ bv->surface_height = 16 * 8;
+ bv->surface_width = 32 * 16;
+ }
+ bv->cmd_size = bv->max_primitives * 4096;
+ bv->state_size = STATE_SIZE;
+ bv->state_start = bv->cmd_size;
+ bv->batch_size = bv->cmd_size + bv->state_size;
+ bv->scratch_size = bv->surface_height * bv->surface_width;
+ bv->max_size = bv->batch_size + bv->scratch_size;
+}
+
+static void batch_init(struct batch_chunk *bc,
+ struct i915_vma *vma,
+ u32 *start, u32 offset, u32 max_bytes)
+{
+ bc->vma = vma;
+ bc->offset = offset;
+ bc->start = start + bc->offset / sizeof(*bc->start);
+ bc->end = bc->start;
+ bc->max_items = max_bytes / sizeof(*bc->start);
+}
+
+static u32 batch_offset(const struct batch_chunk *bc, u32 *cs)
+{
+ return (cs - bc->start) * sizeof(*bc->start) + bc->offset;
+}
+
+static u32 batch_addr(const struct batch_chunk *bc)
+{
+ return bc->vma->node.start;
+}
+
+static void batch_add(struct batch_chunk *bc, const u32 d)
+{
+ GEM_BUG_ON((bc->end - bc->start) >= bc->max_items);
+ *bc->end++ = d;
+}
+
+static u32 *batch_alloc_items(struct batch_chunk *bc, u32 align, u32 items)
+{
+ u32 *map;
+
+ if (align) {
+ u32 *end = PTR_ALIGN(bc->end, align);
+
+ memset32(bc->end, 0, end - bc->end);
+ bc->end = end;
+ }
+
+ map = bc->end;
+ bc->end += items;
+
+ return map;
+}
+
+static u32 *batch_alloc_bytes(struct batch_chunk *bc, u32 align, u32 bytes)
+{
+ GEM_BUG_ON(!IS_ALIGNED(bytes, sizeof(*bc->start)));
+ return batch_alloc_items(bc, align, bytes / sizeof(*bc->start));
+}
+
+static u32
+gen7_fill_surface_state(struct batch_chunk *state,
+ const u32 dst_offset,
+ const struct batch_vals *bv)
+{
+ u32 surface_h = bv->surface_height;
+ u32 surface_w = bv->surface_width;
+ u32 *cs = batch_alloc_items(state, 32, 8);
+ u32 offset = batch_offset(state, cs);
+
+#define SURFACE_2D 1
+#define SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
+#define RENDER_CACHE_READ_WRITE 1
+
+ *cs++ = SURFACE_2D << 29 |
+ (SURFACEFORMAT_B8G8R8A8_UNORM << 18) |
+ (RENDER_CACHE_READ_WRITE << 8);
+
+ *cs++ = batch_addr(state) + dst_offset;
+
+ *cs++ = ((surface_h / 4 - 1) << 16) | (surface_w / 4 - 1);
+ *cs++ = surface_w;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+#define SHADER_CHANNELS(r, g, b, a) \
+ (((r) << 25) | ((g) << 22) | ((b) << 19) | ((a) << 16))
+ *cs++ = SHADER_CHANNELS(4, 5, 6, 7);
+ batch_advance(state, cs);
+
+ return offset;
+}
+
+static u32
+gen7_fill_binding_table(struct batch_chunk *state,
+ const struct batch_vals *bv)
+{
+ u32 surface_start = gen7_fill_surface_state(state, bv->batch_size, bv);
+ u32 *cs = batch_alloc_items(state, 32, 8);
+ u32 offset = batch_offset(state, cs);
+
+ *cs++ = surface_start - state->offset;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ batch_advance(state, cs);
+
+ return offset;
+}
+
+static u32
+gen7_fill_kernel_data(struct batch_chunk *state,
+ const u32 *data,
+ const u32 size)
+{
+ return batch_offset(state,
+ memcpy(batch_alloc_bytes(state, 64, size),
+ data, size));
+}
+
+static u32
+gen7_fill_interface_descriptor(struct batch_chunk *state,
+ const struct batch_vals *bv,
+ const struct cb_kernel *kernel,
+ unsigned int count)
+{
+ u32 kernel_offset =
+ gen7_fill_kernel_data(state, kernel->data, kernel->size);
+ u32 binding_table = gen7_fill_binding_table(state, bv);
+ u32 *cs = batch_alloc_items(state, 32, 8 * count);
+ u32 offset = batch_offset(state, cs);
+
+ *cs++ = kernel_offset;
+ *cs++ = (1 << 7) | (1 << 13);
+ *cs++ = 0;
+ *cs++ = (binding_table - state->offset) | 1;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+
+ /* 1 - 63dummy idds */
+ memset32(cs, 0x00, (count - 1) * 8);
+ batch_advance(state, cs + (count - 1) * 8);
+
+ return offset;
+}
+
+static void
+gen7_emit_state_base_address(struct batch_chunk *batch,
+ u32 surface_state_base)
+{
+ u32 *cs = batch_alloc_items(batch, 0, 12);
+
+ *cs++ = STATE_BASE_ADDRESS | (12 - 2);
+ /* general */
+ *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY;
+ /* surface */
+ *cs++ = batch_addr(batch) | surface_state_base | BASE_ADDRESS_MODIFY;
+ /* dynamic */
+ *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY;
+ /* indirect */
+ *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY;
+ /* instruction */
+ *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY;
+
+ /* general/dynamic/indirect/instruction access Bound */
+ *cs++ = 0;
+ *cs++ = BASE_ADDRESS_MODIFY;
+ *cs++ = 0;
+ *cs++ = BASE_ADDRESS_MODIFY;
+ *cs++ = 0;
+ *cs++ = 0;
+ batch_advance(batch, cs);
+}
+
+static void
+gen7_emit_vfe_state(struct batch_chunk *batch,
+ const struct batch_vals *bv,
+ u32 urb_size, u32 curbe_size,
+ u32 mode)
+{
+ u32 urb_entries = bv->max_urb_entries;
+ u32 threads = bv->max_primitives - 1;
+ u32 *cs = batch_alloc_items(batch, 32, 8);
+
+ *cs++ = MEDIA_VFE_STATE | (8 - 2);
+
+ /* scratch buffer */
+ *cs++ = 0;
+
+ /* number of threads & urb entries for GPGPU vs Media Mode */
+ *cs++ = threads << 16 | urb_entries << 8 | mode << 2;
+
+ *cs++ = 0;
+
+ /* urb entry size & curbe size in 256 bits unit */
+ *cs++ = urb_size << 16 | curbe_size;
+
+ /* scoreboard */
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ batch_advance(batch, cs);
+}
+
+static void
+gen7_emit_interface_descriptor_load(struct batch_chunk *batch,
+ const u32 interface_descriptor,
+ unsigned int count)
+{
+ u32 *cs = batch_alloc_items(batch, 8, 4);
+
+ *cs++ = MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2);
+ *cs++ = 0;
+ *cs++ = count * 8 * sizeof(*cs);
+
+ /*
+ * interface descriptor address - it is relative to the dynamics base
+ * address
+ */
+ *cs++ = interface_descriptor;
+ batch_advance(batch, cs);
+}
+
+static void
+gen7_emit_media_object(struct batch_chunk *batch,
+ unsigned int media_object_index)
+{
+ unsigned int x_offset = (media_object_index % 16) * 64;
+ unsigned int y_offset = (media_object_index / 16) * 16;
+ unsigned int inline_data_size;
+ unsigned int media_batch_size;
+ unsigned int i;
+ u32 *cs;
+
+ inline_data_size = 112 * 8;
+ media_batch_size = inline_data_size + 6;
+
+ cs = batch_alloc_items(batch, 8, media_batch_size);
+
+ *cs++ = MEDIA_OBJECT | (media_batch_size - 2);
+
+ /* interface descriptor offset */
+ *cs++ = 0;
+
+ /* without indirect data */
+ *cs++ = 0;
+ *cs++ = 0;
+
+ /* scoreboard */
+ *cs++ = 0;
+ *cs++ = 0;
+
+ /* inline */
+ *cs++ = (y_offset << 16) | (x_offset);
+ *cs++ = 0;
+ *cs++ = GT3_INLINE_DATA_DELAYS;
+ for (i = 3; i < inline_data_size; i++)
+ *cs++ = 0;
+
+ batch_advance(batch, cs);
+}
+
+static void gen7_emit_pipeline_flush(struct batch_chunk *batch)
+{
+ u32 *cs = batch_alloc_items(batch, 0, 5);
+
+ *cs++ = GFX_OP_PIPE_CONTROL(5);
+ *cs++ = PIPE_CONTROL_STATE_CACHE_INVALIDATE |
+ PIPE_CONTROL_GLOBAL_GTT_IVB;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ batch_advance(batch, cs);
+}
+
+static void emit_batch(struct i915_vma * const vma,
+ u32 *start,
+ const struct batch_vals *bv)
+{
+ struct drm_i915_private *i915 = vma->vm->i915;
+ unsigned int desc_count = 64;
+ const u32 urb_size = 112;
+ struct batch_chunk cmds, state;
+ u32 interface_descriptor;
+ unsigned int i;
+
+ batch_init(&cmds, vma, start, 0, bv->cmd_size);
+ batch_init(&state, vma, start, bv->state_start, bv->state_size);
+
+ interface_descriptor =
+ gen7_fill_interface_descriptor(&state, bv,
+ IS_HASWELL(i915) ?
+ &cb_kernel_hsw :
+ &cb_kernel_ivb,
+ desc_count);
+ gen7_emit_pipeline_flush(&cmds);
+ batch_add(&cmds, PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
+ batch_add(&cmds, MI_NOOP);
+ gen7_emit_state_base_address(&cmds, interface_descriptor);
+ gen7_emit_pipeline_flush(&cmds);
+
+ gen7_emit_vfe_state(&cmds, bv, urb_size - 1, 0, 0);
+
+ gen7_emit_interface_descriptor_load(&cmds,
+ interface_descriptor,
+ desc_count);
+
+ for (i = 0; i < bv->max_primitives; i++)
+ gen7_emit_media_object(&cmds, i);
+
+ batch_add(&cmds, MI_BATCH_BUFFER_END);
+}
+
+int gen7_setup_clear_gpr_bb(struct intel_engine_cs * const engine,
+ struct i915_vma * const vma)
+{
+ struct batch_vals bv;
+ u32 *batch;
+
+ batch_get_defaults(engine->i915, &bv);
+ if (!vma)
+ return bv.max_size;
+
+ GEM_BUG_ON(vma->obj->base.size < bv.max_size);
+
+ batch = i915_gem_object_pin_map(vma->obj, I915_MAP_WC);
+ if (IS_ERR(batch))
+ return PTR_ERR(batch);
+
+ emit_batch(vma, memset(batch, 0, bv.max_size), &bv);
+
+ i915_gem_object_flush_map(vma->obj);
+ i915_gem_object_unpin_map(vma->obj);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.h b/drivers/gpu/drm/i915/gt/gen7_renderclear.h
new file mode 100644
index 000000000000..bb100748e2c6
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __GEN7_RENDERCLEAR_H__
+#define __GEN7_RENDERCLEAR_H__
+
+struct intel_engine_cs;
+struct i915_vma;
+
+int gen7_setup_clear_gpr_bb(struct intel_engine_cs * const engine,
+ struct i915_vma * const vma);
+
+#endif /* __GEN7_RENDERCLEAR_H__ */
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 4d1de2d97d5c..94e746af8926 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -8,6 +8,7 @@
#include "gen8_ppgtt.h"
#include "i915_scatterlist.h"
#include "i915_trace.h"
+#include "i915_pvinfo.h"
#include "i915_vgpu.h"
#include "intel_gt.h"
#include "intel_gtt.h"
@@ -25,6 +26,30 @@ static u64 gen8_pde_encode(const dma_addr_t addr,
return pde;
}
+static u64 gen8_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW;
+
+ if (unlikely(flags & PTE_READ_ONLY))
+ pte &= ~_PAGE_RW;
+
+ switch (level) {
+ case I915_CACHE_NONE:
+ pte |= PPAT_UNCACHED;
+ break;
+ case I915_CACHE_WT:
+ pte |= PPAT_DISPLAY_ELLC;
+ break;
+ default:
+ pte |= PPAT_CACHED;
+ break;
+ }
+
+ return pte;
+}
+
static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
{
struct drm_i915_private *i915 = ppgtt->vm.i915;
@@ -706,6 +731,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
ppgtt->vm.clear_range = gen8_ppgtt_clear;
+ ppgtt->vm.pte_encode = gen8_pte_encode;
+
if (intel_vgpu_active(gt->i915))
gen8_ppgtt_notify_vgt(ppgtt, true);
diff --git a/drivers/gpu/drm/i915/gt/hsw_clear_kernel.c b/drivers/gpu/drm/i915/gt/hsw_clear_kernel.c
new file mode 100644
index 000000000000..b47f9d4a0848
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/hsw_clear_kernel.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ *
+ * Generated by: IGT Gpu Tools on Fri 21 Feb 2020 05:30:13 AM UTC
+ */
+
+static const u32 hsw_clear_kernel[] = {
+ 0x00000001, 0x26020128, 0x00000024, 0x00000000,
+ 0x00000040, 0x20280c21, 0x00000028, 0x00000001,
+ 0x01000010, 0x20000c20, 0x0000002c, 0x00000000,
+ 0x00010220, 0x34001c00, 0x00001400, 0x00000160,
+ 0x00600001, 0x20600061, 0x00000000, 0x00000000,
+ 0x00000008, 0x20601c85, 0x00000e00, 0x0000000c,
+ 0x00000005, 0x20601ca5, 0x00000060, 0x00000001,
+ 0x00000008, 0x20641c85, 0x00000e00, 0x0000000d,
+ 0x00000005, 0x20641ca5, 0x00000064, 0x00000003,
+ 0x00000041, 0x207424a5, 0x00000064, 0x00000034,
+ 0x00000040, 0x206014a5, 0x00000060, 0x00000074,
+ 0x00000008, 0x20681c85, 0x00000e00, 0x00000008,
+ 0x00000005, 0x20681ca5, 0x00000068, 0x0000000f,
+ 0x00000041, 0x20701ca5, 0x00000060, 0x00000010,
+ 0x00000040, 0x206814a5, 0x00000068, 0x00000070,
+ 0x00600001, 0x20a00061, 0x00000000, 0x00000000,
+ 0x00000005, 0x206c1c85, 0x00000e00, 0x00000007,
+ 0x00000041, 0x206c1ca5, 0x0000006c, 0x00000004,
+ 0x00600001, 0x20800021, 0x008d0000, 0x00000000,
+ 0x00000001, 0x20800021, 0x0000006c, 0x00000000,
+ 0x00000001, 0x20840021, 0x00000068, 0x00000000,
+ 0x00000001, 0x20880061, 0x00000000, 0x00000003,
+ 0x00000005, 0x208c0d21, 0x00000086, 0xffffffff,
+ 0x05600032, 0x20a00fa1, 0x008d0080, 0x02190001,
+ 0x00000040, 0x20a01ca5, 0x000000a0, 0x00000001,
+ 0x05600032, 0x20a00fa1, 0x008d0080, 0x040a8001,
+ 0x02000040, 0x20281c21, 0x00000028, 0xffffffff,
+ 0x00010220, 0x34001c00, 0x00001400, 0xffffffe0,
+ 0x00000001, 0x26020128, 0x00000024, 0x00000000,
+ 0x00000001, 0x220010e4, 0x00000000, 0x00000000,
+ 0x00000001, 0x220831ec, 0x00000000, 0x007f007f,
+ 0x00600001, 0x20400021, 0x008d0000, 0x00000000,
+ 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000,
+ 0x00200001, 0x20400121, 0x00450020, 0x00000000,
+ 0x00000001, 0x20480061, 0x00000000, 0x000f000f,
+ 0x00000005, 0x204c0d21, 0x00000046, 0xffffffef,
+ 0x00800001, 0x20600061, 0x00000000, 0x00000000,
+ 0x00800001, 0x20800061, 0x00000000, 0x00000000,
+ 0x00800001, 0x20a00061, 0x00000000, 0x00000000,
+ 0x00800001, 0x20c00061, 0x00000000, 0x00000000,
+ 0x00800001, 0x20e00061, 0x00000000, 0x00000000,
+ 0x00800001, 0x21000061, 0x00000000, 0x00000000,
+ 0x00800001, 0x21200061, 0x00000000, 0x00000000,
+ 0x00800001, 0x21400061, 0x00000000, 0x00000000,
+ 0x05600032, 0x20000fa0, 0x008d0040, 0x120a8000,
+ 0x00000040, 0x20402d21, 0x00000020, 0x00100010,
+ 0x05600032, 0x20000fa0, 0x008d0040, 0x120a8000,
+ 0x02000040, 0x22083d8c, 0x00000208, 0xffffffff,
+ 0x00800001, 0xa0000109, 0x00000602, 0x00000000,
+ 0x00000040, 0x22001c84, 0x00000200, 0x00000020,
+ 0x00010220, 0x34001c00, 0x00001400, 0xffffffc0,
+ 0x07600032, 0x20000fa0, 0x008d0fe0, 0x82000010,
+};
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 57e8a051ddc2..aea992e46c42 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -51,6 +51,11 @@ int intel_context_alloc_state(struct intel_context *ce)
return -EINTR;
if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+ if (intel_context_is_banned(ce)) {
+ err = -EIO;
+ goto unlock;
+ }
+
err = ce->ops->alloc(ce);
if (unlikely(err))
goto unlock;
@@ -92,6 +97,8 @@ int __intel_context_do_pin(struct intel_context *ce)
{
int err;
+ GEM_BUG_ON(intel_context_is_closed(ce));
+
if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
err = intel_context_alloc_state(ce);
if (err)
@@ -116,7 +123,8 @@ int __intel_context_do_pin(struct intel_context *ce)
if (unlikely(err))
goto err_active;
- CE_TRACE(ce, "pin ring:{head:%04x, tail:%04x}\n",
+ CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
+ i915_ggtt_offset(ce->ring->vma),
ce->ring->head, ce->ring->tail);
smp_mb__before_atomic(); /* flush pin before it is visible */
@@ -219,7 +227,9 @@ static void __intel_context_retire(struct i915_active *active)
{
struct intel_context *ce = container_of(active, typeof(*ce), active);
- CE_TRACE(ce, "retire\n");
+ CE_TRACE(ce, "retire runtime: { total:%lluns, avg:%lluns }\n",
+ intel_context_get_total_runtime_ns(ce),
+ intel_context_get_avg_runtime_ns(ce));
set_bit(CONTEXT_VALID_BIT, &ce->flags);
if (ce->state)
@@ -280,6 +290,8 @@ intel_context_init(struct intel_context *ce,
ce->sseu = engine->sseu;
ce->ring = __intel_context_ring_size(SZ_4K);
+ ewma_runtime_init(&ce->runtime.avg);
+
ce->vm = i915_vm_get(engine->gt->vm);
INIT_LIST_HEAD(&ce->signal_link);
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 30bd248827d8..07be021882cc 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -12,6 +12,7 @@
#include <linux/types.h>
#include "i915_active.h"
+#include "i915_drv.h"
#include "intel_context_types.h"
#include "intel_engine_types.h"
#include "intel_ring_types.h"
@@ -35,6 +36,9 @@ int intel_context_alloc_state(struct intel_context *ce);
void intel_context_free(struct intel_context *ce);
+int intel_context_reconfigure_sseu(struct intel_context *ce,
+ const struct intel_sseu sseu);
+
/**
* intel_context_lock_pinned - Stablises the 'pinned' status of the HW context
* @ce - the context
@@ -169,6 +173,11 @@ static inline bool intel_context_is_barrier(const struct intel_context *ce)
return test_bit(CONTEXT_BARRIER_BIT, &ce->flags);
}
+static inline bool intel_context_is_closed(const struct intel_context *ce)
+{
+ return test_bit(CONTEXT_CLOSED_BIT, &ce->flags);
+}
+
static inline bool intel_context_use_semaphores(const struct intel_context *ce)
{
return test_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
@@ -224,4 +233,20 @@ intel_context_clear_nopreempt(struct intel_context *ce)
clear_bit(CONTEXT_NOPREEMPT, &ce->flags);
}
+static inline u64 intel_context_get_total_runtime_ns(struct intel_context *ce)
+{
+ const u32 period =
+ RUNTIME_INFO(ce->engine->i915)->cs_timestamp_period_ns;
+
+ return READ_ONCE(ce->runtime.total) * period;
+}
+
+static inline u64 intel_context_get_avg_runtime_ns(struct intel_context *ce)
+{
+ const u32 period =
+ RUNTIME_INFO(ce->engine->i915)->cs_timestamp_period_ns;
+
+ return mul_u32_u32(ewma_runtime_read(&ce->runtime.avg), period);
+}
+
#endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.c b/drivers/gpu/drm/i915/gt/intel_context_param.c
new file mode 100644
index 000000000000..65dcd090245d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_context_param.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_active.h"
+#include "intel_context.h"
+#include "intel_context_param.h"
+#include "intel_ring.h"
+
+int intel_context_set_ring_size(struct intel_context *ce, long sz)
+{
+ int err;
+
+ if (intel_context_lock_pinned(ce))
+ return -EINTR;
+
+ err = i915_active_wait(&ce->active);
+ if (err < 0)
+ goto unlock;
+
+ if (intel_context_is_pinned(ce)) {
+ err = -EBUSY; /* In active use, come back later! */
+ goto unlock;
+ }
+
+ if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+ struct intel_ring *ring;
+
+ /* Replace the existing ringbuffer */
+ ring = intel_engine_create_ring(ce->engine, sz);
+ if (IS_ERR(ring)) {
+ err = PTR_ERR(ring);
+ goto unlock;
+ }
+
+ intel_ring_put(ce->ring);
+ ce->ring = ring;
+
+ /* Context image will be updated on next pin */
+ } else {
+ ce->ring = __intel_context_ring_size(sz);
+ }
+
+unlock:
+ intel_context_unlock_pinned(ce);
+ return err;
+}
+
+long intel_context_get_ring_size(struct intel_context *ce)
+{
+ long sz = (unsigned long)READ_ONCE(ce->ring);
+
+ if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+ if (intel_context_lock_pinned(ce))
+ return -EINTR;
+
+ sz = ce->ring->size;
+ intel_context_unlock_pinned(ce);
+ }
+
+ return sz;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.h b/drivers/gpu/drm/i915/gt/intel_context_param.h
new file mode 100644
index 000000000000..f053d8633fe2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_context_param.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_CONTEXT_PARAM_H
+#define INTEL_CONTEXT_PARAM_H
+
+struct intel_context;
+
+int intel_context_set_ring_size(struct intel_context *ce, long sz);
+long intel_context_get_ring_size(struct intel_context *ce);
+
+#endif /* INTEL_CONTEXT_PARAM_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_context_sseu.c b/drivers/gpu/drm/i915/gt/intel_context_sseu.c
new file mode 100644
index 000000000000..57a30956c922
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_context_sseu.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_vma.h"
+#include "intel_context.h"
+#include "intel_engine_pm.h"
+#include "intel_gpu_commands.h"
+#include "intel_lrc.h"
+#include "intel_lrc_reg.h"
+#include "intel_ring.h"
+#include "intel_sseu.h"
+
+static int gen8_emit_rpcs_config(struct i915_request *rq,
+ const struct intel_context *ce,
+ const struct intel_sseu sseu)
+{
+ u64 offset;
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ offset = i915_ggtt_offset(ce->state) +
+ LRC_STATE_PN * PAGE_SIZE +
+ CTX_R_PWR_CLK_STATE * 4;
+
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = lower_32_bits(offset);
+ *cs++ = upper_32_bits(offset);
+ *cs++ = intel_sseu_make_rpcs(rq->i915, &sseu);
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int
+gen8_modify_rpcs(struct intel_context *ce, const struct intel_sseu sseu)
+{
+ struct i915_request *rq;
+ int ret;
+
+ lockdep_assert_held(&ce->pin_mutex);
+
+ /*
+ * If the context is not idle, we have to submit an ordered request to
+ * modify its context image via the kernel context (writing to our own
+ * image, or into the registers directory, does not stick). Pristine
+ * and idle contexts will be configured on pinning.
+ */
+ if (!intel_context_pin_if_active(ce))
+ return 0;
+
+ rq = intel_engine_create_kernel_request(ce->engine);
+ if (IS_ERR(rq)) {
+ ret = PTR_ERR(rq);
+ goto out_unpin;
+ }
+
+ /* Serialise with the remote context */
+ ret = intel_context_prepare_remote_request(ce, rq);
+ if (ret == 0)
+ ret = gen8_emit_rpcs_config(rq, ce, sseu);
+
+ i915_request_add(rq);
+out_unpin:
+ intel_context_unpin(ce);
+ return ret;
+}
+
+int
+intel_context_reconfigure_sseu(struct intel_context *ce,
+ const struct intel_sseu sseu)
+{
+ int ret;
+
+ GEM_BUG_ON(INTEL_GEN(ce->engine->i915) < 8);
+
+ ret = intel_context_lock_pinned(ce);
+ if (ret)
+ return ret;
+
+ /* Nothing to do if unmodified. */
+ if (!memcmp(&ce->sseu, &sseu, sizeof(sseu)))
+ goto unlock;
+
+ ret = gen8_modify_rpcs(ce, sseu);
+ if (!ret)
+ ce->sseu = sseu;
+
+unlock:
+ intel_context_unlock_pinned(ce);
+ return ret;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index ca1420fb8b53..07cb83a0d017 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -7,6 +7,7 @@
#ifndef __INTEL_CONTEXT_TYPES__
#define __INTEL_CONTEXT_TYPES__
+#include <linux/average.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/mutex.h>
@@ -19,6 +20,8 @@
#define CONTEXT_REDZONE POISON_INUSE
+DECLARE_EWMA(runtime, 3, 8);
+
struct i915_gem_context;
struct i915_vma;
struct intel_context;
@@ -42,8 +45,8 @@ struct intel_context {
struct intel_engine_cs *engine;
struct intel_engine_cs *inflight;
-#define intel_context_inflight(ce) ptr_mask_bits((ce)->inflight, 2)
-#define intel_context_inflight_count(ce) ptr_unmask_bits((ce)->inflight, 2)
+#define intel_context_inflight(ce) ptr_mask_bits(READ_ONCE((ce)->inflight), 2)
+#define intel_context_inflight_count(ce) ptr_unmask_bits(READ_ONCE((ce)->inflight), 2)
struct i915_address_space *vm;
struct i915_gem_context __rcu *gem_context;
@@ -59,15 +62,25 @@ struct intel_context {
#define CONTEXT_BARRIER_BIT 0
#define CONTEXT_ALLOC_BIT 1
#define CONTEXT_VALID_BIT 2
-#define CONTEXT_USE_SEMAPHORES 3
-#define CONTEXT_BANNED 4
-#define CONTEXT_FORCE_SINGLE_SUBMISSION 5
-#define CONTEXT_NOPREEMPT 6
+#define CONTEXT_CLOSED_BIT 3
+#define CONTEXT_USE_SEMAPHORES 4
+#define CONTEXT_BANNED 5
+#define CONTEXT_FORCE_SINGLE_SUBMISSION 6
+#define CONTEXT_NOPREEMPT 7
u32 *lrc_reg_state;
u64 lrc_desc;
u32 tag; /* cookie passed to HW to track this context on submission */
+ /* Time on GPU as tracked by the hw. */
+ struct {
+ struct ewma_runtime avg;
+ u64 total;
+ u32 last;
+ I915_SELFTEST_DECLARE(u32 num_underflow);
+ I915_SELFTEST_DECLARE(u32 max_underflow);
+ } runtime;
+
unsigned int active_count; /* protected by timeline->mutex */
atomic_t pin_count;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 5df003061e44..b469de0dd9b6 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -107,7 +107,20 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists)
static inline struct i915_request *
execlists_active(const struct intel_engine_execlists *execlists)
{
- return *READ_ONCE(execlists->active);
+ struct i915_request * const *cur, * const *old, *active;
+
+ cur = READ_ONCE(execlists->active);
+ smp_rmb(); /* pairs with overwrite protection in process_csb() */
+ do {
+ old = cur;
+
+ active = READ_ONCE(*cur);
+ cur = READ_ONCE(execlists->active);
+
+ smp_rmb(); /* and complete the seqlock retry */
+ } while (unlikely(cur != old));
+
+ return active;
}
static inline void
@@ -192,6 +205,8 @@ void intel_engines_free(struct intel_gt *gt);
int intel_engine_init_common(struct intel_engine_cs *engine);
void intel_engine_cleanup_common(struct intel_engine_cs *engine);
+int intel_engine_resume(struct intel_engine_cs *engine);
+
int intel_ring_submission_setup(struct intel_engine_cs *engine);
int intel_engine_stop_cs(struct intel_engine_cs *engine);
@@ -303,26 +318,6 @@ intel_engine_find_active_request(struct intel_engine_cs *engine);
u32 intel_engine_context_size(struct intel_gt *gt, u8 class);
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-
-static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
-{
- if (!execlists->preempt_hang.inject_hang)
- return false;
-
- complete(&execlists->preempt_hang.completion);
- return true;
-}
-
-#else
-
-static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
-{
- return false;
-}
-
-#endif
-
void intel_engine_init_active(struct intel_engine_cs *engine,
unsigned int subclass);
#define ENGINE_PHYSICAL 0
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 06ff7695fa29..3aa8a652c16d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -35,6 +35,7 @@
#include "intel_engine_user.h"
#include "intel_gt.h"
#include "intel_gt_requests.h"
+#include "intel_gt_pm.h"
#include "intel_lrc.h"
#include "intel_reset.h"
#include "intel_ring.h"
@@ -199,10 +200,10 @@ u32 intel_engine_context_size(struct intel_gt *gt, u8 class)
* out in the wash.
*/
cxt_size = intel_uncore_read(uncore, CXT_SIZE) + 1;
- DRM_DEBUG_DRIVER("gen%d CXT_SIZE = %d bytes [0x%08x]\n",
- INTEL_GEN(gt->i915),
- cxt_size * 64,
- cxt_size - 1);
+ drm_dbg(&gt->i915->drm,
+ "gen%d CXT_SIZE = %d bytes [0x%08x]\n",
+ INTEL_GEN(gt->i915), cxt_size * 64,
+ cxt_size - 1);
return round_up(cxt_size * 64, PAGE_SIZE);
case 3:
case 2:
@@ -274,6 +275,7 @@ static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
{
const struct engine_info *info = &intel_engines[id];
+ struct drm_i915_private *i915 = gt->i915;
struct intel_engine_cs *engine;
BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH));
@@ -300,11 +302,11 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
engine->id = id;
engine->legacy_idx = INVALID_ENGINE;
engine->mask = BIT(id);
- engine->i915 = gt->i915;
+ engine->i915 = i915;
engine->gt = gt;
engine->uncore = gt->uncore;
engine->hw_id = engine->guc_id = info->hw_id;
- engine->mmio_base = __engine_mmio_base(gt->i915, info->mmio_bases);
+ engine->mmio_base = __engine_mmio_base(i915, info->mmio_bases);
engine->class = info->class;
engine->instance = info->instance;
@@ -312,6 +314,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
engine->props.heartbeat_interval_ms =
CONFIG_DRM_I915_HEARTBEAT_INTERVAL;
+ engine->props.max_busywait_duration_ns =
+ CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT;
engine->props.preempt_timeout_ms =
CONFIG_DRM_I915_PREEMPT_TIMEOUT;
engine->props.stop_timeout_ms =
@@ -319,11 +323,15 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
engine->props.timeslice_duration_ms =
CONFIG_DRM_I915_TIMESLICE_DURATION;
+ /* Override to uninterruptible for OpenCL workloads. */
+ if (INTEL_GEN(i915) == 12 && engine->class == RENDER_CLASS)
+ engine->props.preempt_timeout_ms = 0;
+
engine->context_size = intel_engine_context_size(gt, engine->class);
if (WARN_ON(engine->context_size > BIT(20)))
engine->context_size = 0;
if (engine->context_size)
- DRIVER_CAPS(gt->i915)->has_logical_contexts = true;
+ DRIVER_CAPS(i915)->has_logical_contexts = true;
/* Nothing to do here, execute in order of dependencies */
engine->schedule = NULL;
@@ -339,7 +347,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
gt->engine_class[info->class][info->instance] = engine;
gt->engine[id] = engine;
- gt->i915->engine[id] = engine;
+ i915->engine[id] = engine;
return 0;
}
@@ -392,8 +400,24 @@ void intel_engines_release(struct intel_gt *gt)
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ /*
+ * Before we release the resources held by engine, we must be certain
+ * that the HW is no longer accessing them -- having the GPU scribble
+ * to or read from a page being used for something else causes no end
+ * of fun.
+ *
+ * The GPU should be reset by this point, but assume the worst just
+ * in case we aborted before completely initialising the engines.
+ */
+ GEM_BUG_ON(intel_gt_pm_is_awake(gt));
+ if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
+ __intel_gt_reset(gt, ALL_ENGINES);
+
/* Decouple the backend; but keep the layout for late GPU resets */
for_each_engine(engine, gt, id) {
+ intel_wakeref_wait_for_idle(&engine->wakeref);
+ GEM_BUG_ON(intel_engine_pm_is_awake(engine));
+
if (!engine->release)
continue;
@@ -432,9 +456,9 @@ int intel_engines_init_mmio(struct intel_gt *gt)
unsigned int i;
int err;
- WARN_ON(engine_mask == 0);
- WARN_ON(engine_mask &
- GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES));
+ drm_WARN_ON(&i915->drm, engine_mask == 0);
+ drm_WARN_ON(&i915->drm, engine_mask &
+ GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES));
if (i915_inject_probe_failure(i915))
return -ENODEV;
@@ -455,7 +479,7 @@ int intel_engines_init_mmio(struct intel_gt *gt)
* are added to the driver by a warning and disabling the forgotten
* engines.
*/
- if (WARN_ON(mask != engine_mask))
+ if (drm_WARN_ON(&i915->drm, mask != engine_mask))
device_info->engine_mask = mask;
RUNTIME_INFO(i915)->num_engines = hweight32(mask);
@@ -510,7 +534,6 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
{
unsigned int flags;
- flags = PIN_GLOBAL;
if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt))
/*
* On g33, we cannot place HWS above 256MiB, so
@@ -523,11 +546,11 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
* above the mappable region (even though we never
* actually map it).
*/
- flags |= PIN_MAPPABLE;
+ flags = PIN_MAPPABLE;
else
- flags |= PIN_HIGH;
+ flags = PIN_HIGH;
- return i915_vma_pin(vma, 0, 0, flags);
+ return i915_ggtt_pin(vma, 0, flags);
}
static int init_status_page(struct intel_engine_cs *engine)
@@ -546,7 +569,8 @@ static int init_status_page(struct intel_engine_cs *engine)
*/
obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
if (IS_ERR(obj)) {
- DRM_ERROR("Failed to allocate status page\n");
+ drm_err(&engine->i915->drm,
+ "Failed to allocate status page\n");
return PTR_ERR(obj);
}
@@ -614,15 +638,15 @@ static int engine_setup_common(struct intel_engine_cs *engine)
struct measure_breadcrumb {
struct i915_request rq;
- struct intel_timeline timeline;
struct intel_ring ring;
u32 cs[1024];
};
-static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
+static int measure_breadcrumb_dw(struct intel_context *ce)
{
+ struct intel_engine_cs *engine = ce->engine;
struct measure_breadcrumb *frame;
- int dw = -ENOMEM;
+ int dw;
GEM_BUG_ON(!engine->gt->scratch);
@@ -630,39 +654,27 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
if (!frame)
return -ENOMEM;
- if (intel_timeline_init(&frame->timeline,
- engine->gt,
- engine->status_page.vma))
- goto out_frame;
-
- mutex_lock(&frame->timeline.mutex);
+ frame->rq.i915 = engine->i915;
+ frame->rq.engine = engine;
+ frame->rq.context = ce;
+ rcu_assign_pointer(frame->rq.timeline, ce->timeline);
frame->ring.vaddr = frame->cs;
frame->ring.size = sizeof(frame->cs);
frame->ring.effective_size = frame->ring.size;
intel_ring_update_space(&frame->ring);
-
- frame->rq.i915 = engine->i915;
- frame->rq.engine = engine;
frame->rq.ring = &frame->ring;
- rcu_assign_pointer(frame->rq.timeline, &frame->timeline);
-
- dw = intel_timeline_pin(&frame->timeline);
- if (dw < 0)
- goto out_timeline;
+ mutex_lock(&ce->timeline->mutex);
spin_lock_irq(&engine->active.lock);
+
dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
+
spin_unlock_irq(&engine->active.lock);
+ mutex_unlock(&ce->timeline->mutex);
GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */
- intel_timeline_unpin(&frame->timeline);
-
-out_timeline:
- mutex_unlock(&frame->timeline.mutex);
- intel_timeline_fini(&frame->timeline);
-out_frame:
kfree(frame);
return dw;
}
@@ -737,12 +749,6 @@ static int engine_init_common(struct intel_engine_cs *engine)
engine->set_default_submission(engine);
- ret = measure_breadcrumb_dw(engine);
- if (ret < 0)
- return ret;
-
- engine->emit_fini_breadcrumb_dw = ret;
-
/*
* We may need to do things with the shrinker which
* require us to immediately switch back to the default
@@ -755,9 +761,18 @@ static int engine_init_common(struct intel_engine_cs *engine)
if (IS_ERR(ce))
return PTR_ERR(ce);
+ ret = measure_breadcrumb_dw(ce);
+ if (ret < 0)
+ goto err_context;
+
+ engine->emit_fini_breadcrumb_dw = ret;
engine->kernel_context = ce;
return 0;
+
+err_context:
+ intel_context_put(ce);
+ return ret;
}
int intel_engines_init(struct intel_gt *gt)
@@ -824,6 +839,20 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
intel_wa_list_free(&engine->whitelist);
}
+/**
+ * intel_engine_resume - re-initializes the HW state of the engine
+ * @engine: Engine to resume.
+ *
+ * Returns zero on success or an error code on failure.
+ */
+int intel_engine_resume(struct intel_engine_cs *engine)
+{
+ intel_engine_apply_workarounds(engine);
+ intel_engine_apply_whitelist(engine);
+
+ return engine->resume(engine);
+}
+
u64 intel_engine_get_active_head(const struct intel_engine_cs *engine)
{
struct drm_i915_private *i915 = engine->i915;
@@ -982,6 +1011,12 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine,
instdone->slice_common =
intel_uncore_read(uncore, GEN7_SC_INSTDONE);
+ if (INTEL_GEN(i915) >= 12) {
+ instdone->slice_common_extra[0] =
+ intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA);
+ instdone->slice_common_extra[1] =
+ intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA2);
+ }
for_each_instdone_slice_subslice(i915, sseu, slice, subslice) {
instdone->sampler[slice][subslice] =
read_subslice_reg(engine, slice, subslice,
@@ -1276,8 +1311,14 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
}
if (INTEL_GEN(dev_priv) >= 6) {
- drm_printf(m, "\tRING_IMR: %08x\n",
+ drm_printf(m, "\tRING_IMR: 0x%08x\n",
ENGINE_READ(engine, RING_IMR));
+ drm_printf(m, "\tRING_ESR: 0x%08x\n",
+ ENGINE_READ(engine, RING_ESR));
+ drm_printf(m, "\tRING_EMR: 0x%08x\n",
+ ENGINE_READ(engine, RING_EMR));
+ drm_printf(m, "\tRING_EIR: 0x%08x\n",
+ ENGINE_READ(engine, RING_EIR));
}
addr = intel_engine_get_active_head(engine);
@@ -1342,25 +1383,27 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
execlists_active_lock_bh(execlists);
rcu_read_lock();
for (port = execlists->active; (rq = *port); port++) {
- char hdr[80];
+ char hdr[160];
int len;
- len = snprintf(hdr, sizeof(hdr),
- "\t\tActive[%d]: ",
- (int)(port - execlists->active));
+ len = scnprintf(hdr, sizeof(hdr),
+ "\t\tActive[%d]: ",
+ (int)(port - execlists->active));
if (!i915_request_signaled(rq)) {
struct intel_timeline *tl = get_timeline(rq);
- len += snprintf(hdr + len, sizeof(hdr) - len,
- "ring:{start:%08x, hwsp:%08x, seqno:%08x}, ",
- i915_ggtt_offset(rq->ring->vma),
- tl ? tl->hwsp_offset : 0,
- hwsp_seqno(rq));
+ len += scnprintf(hdr + len, sizeof(hdr) - len,
+ "ring:{start:%08x, hwsp:%08x, seqno:%08x, runtime:%llums}, ",
+ i915_ggtt_offset(rq->ring->vma),
+ tl ? tl->hwsp_offset : 0,
+ hwsp_seqno(rq),
+ DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context),
+ 1000 * 1000));
if (tl)
intel_timeline_put(tl);
}
- snprintf(hdr + len, sizeof(hdr) - len, "rq: ");
+ scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
print_request(m, rq, hdr);
}
for (port = execlists->pending; (rq = *port); port++) {
@@ -1657,6 +1700,23 @@ intel_engine_find_active_request(struct intel_engine_cs *engine)
* we only care about the snapshot of this moment.
*/
lockdep_assert_held(&engine->active.lock);
+
+ rcu_read_lock();
+ request = execlists_active(&engine->execlists);
+ if (request) {
+ struct intel_timeline *tl = request->context->timeline;
+
+ list_for_each_entry_from_reverse(request, &tl->requests, link) {
+ if (i915_request_completed(request))
+ break;
+
+ active = request;
+ }
+ }
+ rcu_read_unlock();
+ if (active)
+ return active;
+
list_for_each_entry(request, &engine->active.requests, sched.link) {
if (i915_request_completed(request))
continue;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 6c6fd185457c..dd825718e4e5 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -180,7 +180,7 @@ int intel_engine_pulse(struct intel_engine_cs *engine)
struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
struct intel_context *ce = engine->kernel_context;
struct i915_request *rq;
- int err = 0;
+ int err;
if (!intel_engine_has_preemption(engine))
return -ENODEV;
@@ -188,8 +188,10 @@ int intel_engine_pulse(struct intel_engine_cs *engine)
if (!intel_engine_pm_get_if_awake(engine))
return 0;
- if (mutex_lock_interruptible(&ce->timeline->mutex))
+ if (mutex_lock_interruptible(&ce->timeline->mutex)) {
+ err = -EINTR;
goto out_rpm;
+ }
intel_context_enter(ce);
rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
@@ -204,6 +206,8 @@ int intel_engine_pulse(struct intel_engine_cs *engine)
__i915_request_commit(rq);
__i915_request_queue(rq, &attr);
+ GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER);
+ err = 0;
out_unlock:
mutex_unlock(&ce->timeline->mutex);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index ea90ab3e396e..b6cf284e3a2d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -112,7 +112,7 @@ __queue_and_release_pm(struct i915_request *rq,
{
struct intel_gt_timelines *timelines = &engine->gt->timelines;
- ENGINE_TRACE(engine, "\n");
+ ENGINE_TRACE(engine, "parking\n");
/*
* We have to serialise all potential retirement paths with our
@@ -249,7 +249,7 @@ static int __engine_park(struct intel_wakeref *wf)
if (!switch_to_kernel_context(engine))
return -EBUSY;
- ENGINE_TRACE(engine, "\n");
+ ENGINE_TRACE(engine, "parked\n");
call_idle_barriers(engine); /* cleanup after wedging */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 92be41a6903c..80cdde712842 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -75,6 +75,7 @@ struct intel_instdone {
u32 instdone;
/* The following exist only in the RCS engine */
u32 slice_common;
+ u32 slice_common_extra[2];
u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES];
u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
};
@@ -126,7 +127,6 @@ DECLARE_EWMA(_engine_latency, 6, 4)
struct st_preempt_hang {
struct completion completion;
unsigned int count;
- bool inject_hang;
};
/**
@@ -157,6 +157,16 @@ struct intel_engine_execlists {
struct i915_priolist default_priolist;
/**
+ * @error_interrupt: CS Master EIR
+ *
+ * The CS generates an interrupt when it detects an error. We capture
+ * the first error interrupt, record the EIR and schedule the tasklet.
+ * In the tasklet, we process the pending CS events to ensure we have
+ * the guilty request, and then reset the engine.
+ */
+ u32 error_interrupt;
+
+ /**
* @no_priolist: priority lists disabled
*/
bool no_priolist;
@@ -537,6 +547,7 @@ struct intel_engine_cs {
struct {
unsigned long heartbeat_interval_ms;
+ unsigned long max_busywait_duration_ns;
unsigned long preempt_timeout_ms;
unsigned long stop_timeout_ms;
unsigned long timeslice_duration_ms;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 9e7f12bef828..848decee9066 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -278,7 +278,8 @@ void intel_engines_driver_register(struct drm_i915_private *i915)
}
}
- if (WARN(errors, "Invalid UABI engine mapping found"))
+ if (drm_WARN(&i915->drm, errors,
+ "Invalid UABI engine mapping found"))
i915->uabi_engines = RB_ROOT;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 531d501be01f..aed498a0d032 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -8,6 +8,8 @@
#include <asm/set_memory.h>
#include <asm/smp.h>
+#include <drm/i915_drm.h>
+
#include "intel_gt.h"
#include "i915_drv.h"
#include "i915_scatterlist.h"
@@ -104,27 +106,17 @@ static bool needs_idle_maps(struct drm_i915_private *i915)
return IS_GEN(i915, 5) && IS_MOBILE(i915) && intel_vtd_active();
}
-static void ggtt_suspend_mappings(struct i915_ggtt *ggtt)
+void i915_ggtt_suspend(struct i915_ggtt *ggtt)
{
- struct drm_i915_private *i915 = ggtt->vm.i915;
-
- /*
- * Don't bother messing with faults pre GEN6 as we have little
- * documentation supporting that it's a good idea.
- */
- if (INTEL_GEN(i915) < 6)
- return;
+ struct i915_vma *vma;
- intel_gt_check_and_clear_faults(ggtt->vm.gt);
+ list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
+ i915_vma_wait_for_bind(vma);
ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
-
ggtt->invalidate(ggtt);
-}
-void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915)
-{
- ggtt_suspend_mappings(&i915->ggtt);
+ intel_gt_check_and_clear_faults(ggtt->vm.gt);
}
void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
@@ -167,6 +159,13 @@ static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt)
intel_gtt_chipset_flush();
}
+static u64 gen8_ggtt_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ return addr | _PAGE_PRESENT;
+}
+
static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
{
writeq(pte, addr);
@@ -182,7 +181,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
gen8_pte_t __iomem *pte =
(gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
- gen8_set_pte(pte, gen8_pte_encode(addr, level, 0));
+ gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, 0));
ggtt->invalidate(ggtt);
}
@@ -195,7 +194,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
struct sgt_iter sgt_iter;
gen8_pte_t __iomem *gtt_entries;
- const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0);
+ const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, 0);
dma_addr_t addr;
/*
@@ -350,31 +349,6 @@ static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
}
-struct clear_range {
- struct i915_address_space *vm;
- u64 start;
- u64 length;
-};
-
-static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
-{
- struct clear_range *arg = _arg;
-
- gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
- bxt_vtd_ggtt_wa(arg->vm);
-
- return 0;
-}
-
-static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
- u64 start,
- u64 length)
-{
- struct clear_range arg = { vm, start, length };
-
- stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
-}
-
static void gen6_ggtt_clear_range(struct i915_address_space *vm,
u64 start, u64 length)
{
@@ -462,7 +436,7 @@ static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
u64 size;
int ret;
- if (!USES_GUC(ggtt->vm.i915))
+ if (!intel_uc_uses_guc(&ggtt->vm.gt->uc))
return 0;
GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
@@ -472,7 +446,8 @@ static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE,
PIN_NOEVICT);
if (ret)
- DRM_DEBUG_DRIVER("Failed to reserve top of GGTT for GuC\n");
+ drm_dbg(&ggtt->vm.i915->drm,
+ "Failed to reserve top of GGTT for GuC\n");
return ret;
}
@@ -544,8 +519,9 @@ static int init_ggtt(struct i915_ggtt *ggtt)
/* Clear any non-preallocated blocks */
drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) {
- DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
- hole_start, hole_end);
+ drm_dbg_kms(&ggtt->vm.i915->drm,
+ "clearing unused GTT space: [%lx, %lx]\n",
+ hole_start, hole_end);
ggtt->vm.clear_range(&ggtt->vm, hole_start,
hole_end - hole_start);
}
@@ -879,8 +855,8 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) {
ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL;
- if (ggtt->vm.clear_range != nop_clear_range)
- ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL;
+ ggtt->vm.bind_async_flags =
+ I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
}
ggtt->invalidate = gen8_ggtt_invalidate;
@@ -890,7 +866,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.vma_ops.set_pages = ggtt_set_pages;
ggtt->vm.vma_ops.clear_pages = clear_pages;
- ggtt->vm.pte_encode = gen8_pte_encode;
+ ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
setup_private_pat(ggtt->vm.gt->uncore);
@@ -1180,7 +1156,7 @@ void i915_ggtt_disable_guc(struct i915_ggtt *ggtt)
ggtt->invalidate(ggtt);
}
-static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
+void i915_ggtt_resume(struct i915_ggtt *ggtt)
{
struct i915_vma *vma;
bool flush = false;
@@ -1188,8 +1164,6 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
intel_gt_check_and_clear_faults(ggtt->vm.gt);
- mutex_lock(&ggtt->vm.mutex);
-
/* First fill our portion of the GTT with scratch pages */
ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
@@ -1216,19 +1190,10 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
atomic_set(&ggtt->vm.open, open);
ggtt->invalidate(ggtt);
- mutex_unlock(&ggtt->vm.mutex);
-
if (flush)
wbinvd_on_all_cpus();
-}
-
-void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915)
-{
- struct i915_ggtt *ggtt = &i915->ggtt;
-
- ggtt_restore_mappings(ggtt);
- if (INTEL_GEN(i915) >= 8)
+ if (INTEL_GEN(ggtt->vm.i915) >= 8)
setup_private_pat(ggtt->vm.gt->uncore);
}
@@ -1267,6 +1232,7 @@ intel_rotate_pages(struct intel_rotation_info *rot_info,
struct drm_i915_gem_object *obj)
{
unsigned int size = intel_rotation_info_size(rot_info);
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct sg_table *st;
struct scatterlist *sg;
int ret = -ENOMEM;
@@ -1296,8 +1262,9 @@ err_sg_alloc:
kfree(st);
err_st_alloc:
- DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
- obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
+ drm_dbg(&i915->drm, "Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
+ obj->base.size, rot_info->plane[0].width,
+ rot_info->plane[0].height, size);
return ERR_PTR(ret);
}
@@ -1349,6 +1316,7 @@ intel_remap_pages(struct intel_remapped_info *rem_info,
struct drm_i915_gem_object *obj)
{
unsigned int size = intel_remapped_info_size(rem_info);
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct sg_table *st;
struct scatterlist *sg;
int ret = -ENOMEM;
@@ -1380,8 +1348,9 @@ err_sg_alloc:
kfree(st);
err_st_alloc:
- DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n",
- obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size);
+ drm_dbg(&i915->drm, "Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n",
+ obj->base.size, rem_info->plane[0].width,
+ rem_info->plane[0].height, size);
return ERR_PTR(ret);
}
@@ -1479,8 +1448,9 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
if (IS_ERR(vma->pages)) {
ret = PTR_ERR(vma->pages);
vma->pages = NULL;
- DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
- vma->ggtt_view.type, ret);
+ drm_err(&vma->vm->i915->drm,
+ "Failed to get pages for VMA view type %u (%d)!\n",
+ vma->ggtt_view.type, ret);
}
return ret;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 51b8718513bc..f04214a54f75 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -292,10 +292,21 @@
#define MI_STORE_URB_MEM MI_INSTR(0x2D, 0)
#define MI_CONDITIONAL_BATCH_BUFFER_END MI_INSTR(0x36, 0)
-#define PIPELINE_SELECT ((0x3<<29)|(0x1<<27)|(0x1<<24)|(0x4<<16))
-#define GFX_OP_3DSTATE_VF_STATISTICS ((0x3<<29)|(0x1<<27)|(0x0<<24)|(0xB<<16))
-#define MEDIA_VFE_STATE ((0x3<<29)|(0x2<<27)|(0x0<<24)|(0x0<<16))
+#define STATE_BASE_ADDRESS \
+ ((0x3 << 29) | (0x0 << 27) | (0x1 << 24) | (0x1 << 16))
+#define BASE_ADDRESS_MODIFY REG_BIT(0)
+#define PIPELINE_SELECT \
+ ((0x3 << 29) | (0x1 << 27) | (0x1 << 24) | (0x4 << 16))
+#define PIPELINE_SELECT_MEDIA REG_BIT(0)
+#define GFX_OP_3DSTATE_VF_STATISTICS \
+ ((0x3 << 29) | (0x1 << 27) | (0x0 << 24) | (0xB << 16))
+#define MEDIA_VFE_STATE \
+ ((0x3 << 29) | (0x2 << 27) | (0x0 << 24) | (0x0 << 16))
#define MEDIA_VFE_STATE_MMIO_ACCESS_MASK (0x18)
+#define MEDIA_INTERFACE_DESCRIPTOR_LOAD \
+ ((0x3 << 29) | (0x2 << 27) | (0x0 << 24) | (0x2 << 16))
+#define MEDIA_OBJECT \
+ ((0x3 << 29) | (0x2 << 27) | (0x1 << 24) | (0x0 << 16))
#define GPGPU_OBJECT ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x4<<16))
#define GPGPU_WALKER ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x5<<16))
#define GFX_OP_3DSTATE_DX9_CONSTANTF_VS \
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index da2b6e2ae692..d09f7596cb98 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -198,16 +198,16 @@ static void gen6_check_faults(struct intel_gt *gt)
for_each_engine(engine, gt, id) {
fault = GEN6_RING_FAULT_REG_READ(engine);
if (fault & RING_FAULT_VALID) {
- DRM_DEBUG_DRIVER("Unexpected fault\n"
- "\tAddr: 0x%08lx\n"
- "\tAddress space: %s\n"
- "\tSource ID: %d\n"
- "\tType: %d\n",
- fault & PAGE_MASK,
- fault & RING_FAULT_GTTSEL_MASK ?
- "GGTT" : "PPGTT",
- RING_FAULT_SRCID(fault),
- RING_FAULT_FAULT_TYPE(fault));
+ drm_dbg(&engine->i915->drm, "Unexpected fault\n"
+ "\tAddr: 0x%08lx\n"
+ "\tAddress space: %s\n"
+ "\tSource ID: %d\n"
+ "\tType: %d\n",
+ fault & PAGE_MASK,
+ fault & RING_FAULT_GTTSEL_MASK ?
+ "GGTT" : "PPGTT",
+ RING_FAULT_SRCID(fault),
+ RING_FAULT_FAULT_TYPE(fault));
}
}
}
@@ -239,18 +239,17 @@ static void gen8_check_faults(struct intel_gt *gt)
fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
((u64)fault_data0 << 12);
- DRM_DEBUG_DRIVER("Unexpected fault\n"
- "\tAddr: 0x%08x_%08x\n"
- "\tAddress space: %s\n"
- "\tEngine ID: %d\n"
- "\tSource ID: %d\n"
- "\tType: %d\n",
- upper_32_bits(fault_addr),
- lower_32_bits(fault_addr),
- fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
- GEN8_RING_FAULT_ENGINE_ID(fault),
- RING_FAULT_SRCID(fault),
- RING_FAULT_FAULT_TYPE(fault));
+ drm_dbg(&uncore->i915->drm, "Unexpected fault\n"
+ "\tAddr: 0x%08x_%08x\n"
+ "\tAddress space: %s\n"
+ "\tEngine ID: %d\n"
+ "\tSource ID: %d\n"
+ "\tType: %d\n",
+ upper_32_bits(fault_addr), lower_32_bits(fault_addr),
+ fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
+ GEN8_RING_FAULT_ENGINE_ID(fault),
+ RING_FAULT_SRCID(fault),
+ RING_FAULT_FAULT_TYPE(fault));
}
}
@@ -345,7 +344,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
goto err_unref;
}
- ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ ret = i915_ggtt_pin(vma, 0, PIN_HIGH);
if (ret)
goto err_unref;
@@ -455,6 +454,11 @@ err_rq:
if (!rq)
continue;
+ if (rq->fence.error) {
+ err = -EIO;
+ goto out;
+ }
+
GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags));
state = rq->context->state;
if (!state)
@@ -538,6 +542,10 @@ static int __engines_verify_workarounds(struct intel_gt *gt)
err = -EIO;
}
+ /* Flush and restore the kernel context for safety */
+ if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME)
+ err = -EIO;
+
return err;
}
@@ -584,7 +592,9 @@ int intel_gt_init(struct intel_gt *gt)
if (err)
goto err_engines;
- intel_uc_init(&gt->uc);
+ err = intel_uc_init(&gt->uc);
+ if (err)
+ goto err_engines;
err = intel_gt_resume(gt);
if (err)
@@ -634,6 +644,13 @@ void intel_gt_driver_remove(struct intel_gt *gt)
void intel_gt_driver_unregister(struct intel_gt *gt)
{
intel_rps_driver_unregister(&gt->rps);
+
+ /*
+ * Upon unregistering the device to prevent any new users, cancel
+ * all in-flight requests so that we can quickly unbind the active
+ * resources.
+ */
+ intel_gt_set_wedged(gt);
}
void intel_gt_driver_release(struct intel_gt *gt)
@@ -650,6 +667,9 @@ void intel_gt_driver_release(struct intel_gt *gt)
void intel_gt_driver_late_release(struct intel_gt *gt)
{
+ /* We need to wait for inflight RCU frees to release their grip */
+ rcu_barrier();
+
intel_uc_driver_late_release(&gt->uc);
intel_gt_fini_requests(gt);
intel_gt_fini_reset(gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 1dac441cb8f4..4fac043750aa 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -14,7 +14,7 @@ struct drm_i915_private;
#define GT_TRACE(gt, fmt, ...) do { \
const struct intel_gt *gt__ __maybe_unused = (gt); \
- GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \
+ GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \
##__VA_ARGS__); \
} while (0)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index f796bdf1ed30..f0e7fd95165a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -24,6 +24,21 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
{
bool tasklet = false;
+ if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) {
+ u32 eir;
+
+ eir = ENGINE_READ(engine, RING_EIR);
+ ENGINE_TRACE(engine, "CS error: %x\n", eir);
+
+ /* Disable the error interrupt until after the reset */
+ if (likely(eir)) {
+ ENGINE_WRITE(engine, RING_EMR, ~0u);
+ ENGINE_WRITE(engine, RING_EIR, eir);
+ WRITE_ONCE(engine->execlists.error_interrupt, eir);
+ tasklet = true;
+ }
+ }
+
if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
tasklet = true;
@@ -210,7 +225,10 @@ void gen11_gt_irq_reset(struct intel_gt *gt)
void gen11_gt_irq_postinstall(struct intel_gt *gt)
{
- const u32 irqs = GT_RENDER_USER_INTERRUPT | GT_CONTEXT_SWITCH_INTERRUPT;
+ const u32 irqs =
+ GT_CS_MASTER_ERROR_INTERRUPT |
+ GT_RENDER_USER_INTERRUPT |
+ GT_CONTEXT_SWITCH_INTERRUPT;
struct intel_uncore *uncore = gt->uncore;
const u32 dmask = irqs << 16 | irqs;
const u32 smask = irqs << 16;
@@ -279,66 +297,56 @@ void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
GT_BSD_CS_ERROR_INTERRUPT |
- GT_RENDER_CS_MASTER_ERROR_INTERRUPT))
+ GT_CS_MASTER_ERROR_INTERRUPT))
DRM_DEBUG("Command parser error, gt_iir 0x%08x\n", gt_iir);
if (gt_iir & GT_PARITY_ERROR(gt->i915))
gen7_parity_error_irq_handler(gt, gt_iir);
}
-void gen8_gt_irq_ack(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4])
+void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
{
void __iomem * const regs = gt->uncore->regs;
+ u32 iir;
if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
- gt_iir[0] = raw_reg_read(regs, GEN8_GT_IIR(0));
- if (likely(gt_iir[0]))
- raw_reg_write(regs, GEN8_GT_IIR(0), gt_iir[0]);
- }
-
- if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) {
- gt_iir[1] = raw_reg_read(regs, GEN8_GT_IIR(1));
- if (likely(gt_iir[1]))
- raw_reg_write(regs, GEN8_GT_IIR(1), gt_iir[1]);
- }
-
- if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) {
- gt_iir[2] = raw_reg_read(regs, GEN8_GT_IIR(2));
- if (likely(gt_iir[2]))
- raw_reg_write(regs, GEN8_GT_IIR(2), gt_iir[2]);
- }
-
- if (master_ctl & GEN8_GT_VECS_IRQ) {
- gt_iir[3] = raw_reg_read(regs, GEN8_GT_IIR(3));
- if (likely(gt_iir[3]))
- raw_reg_write(regs, GEN8_GT_IIR(3), gt_iir[3]);
- }
-}
-
-void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4])
-{
- if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
- cs_irq_handler(gt->engine_class[RENDER_CLASS][0],
- gt_iir[0] >> GEN8_RCS_IRQ_SHIFT);
- cs_irq_handler(gt->engine_class[COPY_ENGINE_CLASS][0],
- gt_iir[0] >> GEN8_BCS_IRQ_SHIFT);
+ iir = raw_reg_read(regs, GEN8_GT_IIR(0));
+ if (likely(iir)) {
+ cs_irq_handler(gt->engine_class[RENDER_CLASS][0],
+ iir >> GEN8_RCS_IRQ_SHIFT);
+ cs_irq_handler(gt->engine_class[COPY_ENGINE_CLASS][0],
+ iir >> GEN8_BCS_IRQ_SHIFT);
+ raw_reg_write(regs, GEN8_GT_IIR(0), iir);
+ }
}
if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) {
- cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][0],
- gt_iir[1] >> GEN8_VCS0_IRQ_SHIFT);
- cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][1],
- gt_iir[1] >> GEN8_VCS1_IRQ_SHIFT);
+ iir = raw_reg_read(regs, GEN8_GT_IIR(1));
+ if (likely(iir)) {
+ cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][0],
+ iir >> GEN8_VCS0_IRQ_SHIFT);
+ cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][1],
+ iir >> GEN8_VCS1_IRQ_SHIFT);
+ raw_reg_write(regs, GEN8_GT_IIR(1), iir);
+ }
}
if (master_ctl & GEN8_GT_VECS_IRQ) {
- cs_irq_handler(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0],
- gt_iir[3] >> GEN8_VECS_IRQ_SHIFT);
+ iir = raw_reg_read(regs, GEN8_GT_IIR(3));
+ if (likely(iir)) {
+ cs_irq_handler(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0],
+ iir >> GEN8_VECS_IRQ_SHIFT);
+ raw_reg_write(regs, GEN8_GT_IIR(3), iir);
+ }
}
if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) {
- gen6_rps_irq_handler(&gt->rps, gt_iir[2]);
- guc_irq_handler(&gt->uc.guc, gt_iir[2] >> 16);
+ iir = raw_reg_read(regs, GEN8_GT_IIR(2));
+ if (likely(iir)) {
+ gen6_rps_irq_handler(&gt->rps, iir);
+ guc_irq_handler(&gt->uc.guc, iir >> 16);
+ raw_reg_write(regs, GEN8_GT_IIR(2), iir);
+ }
}
}
@@ -354,25 +362,18 @@ void gen8_gt_irq_reset(struct intel_gt *gt)
void gen8_gt_irq_postinstall(struct intel_gt *gt)
{
- struct intel_uncore *uncore = gt->uncore;
-
/* These are interrupts we'll toggle with the ring mask register */
- u32 gt_interrupts[] = {
- (GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT |
- GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT |
- GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT |
- GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT),
-
- (GT_RENDER_USER_INTERRUPT << GEN8_VCS0_IRQ_SHIFT |
- GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS0_IRQ_SHIFT |
- GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT |
- GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT),
-
+ const u32 irqs =
+ GT_CS_MASTER_ERROR_INTERRUPT |
+ GT_RENDER_USER_INTERRUPT |
+ GT_CONTEXT_SWITCH_INTERRUPT;
+ const u32 gt_interrupts[] = {
+ irqs << GEN8_RCS_IRQ_SHIFT | irqs << GEN8_BCS_IRQ_SHIFT,
+ irqs << GEN8_VCS0_IRQ_SHIFT | irqs << GEN8_VCS1_IRQ_SHIFT,
0,
-
- (GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT |
- GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT)
+ irqs << GEN8_VECS_IRQ_SHIFT,
};
+ struct intel_uncore *uncore = gt->uncore;
gt->pm_ier = 0x0;
gt->pm_imr = ~gt->pm_ier;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.h b/drivers/gpu/drm/i915/gt/intel_gt_irq.h
index 8f37593712c9..886c5cf408a2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.h
@@ -36,9 +36,8 @@ void gen5_gt_enable_irq(struct intel_gt *gt, u32 mask);
void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir);
-void gen8_gt_irq_ack(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4]);
+void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl);
void gen8_gt_irq_reset(struct intel_gt *gt);
-void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4]);
void gen8_gt_irq_postinstall(struct intel_gt *gt);
#endif /* INTEL_GT_IRQ_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index d1c2f034296a..8b653c0f5e5f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -216,7 +216,7 @@ int intel_gt_resume(struct intel_gt *gt)
intel_engine_pm_get(engine);
engine->serial++; /* kernel context lost */
- err = engine->resume(engine);
+ err = intel_engine_resume(engine);
intel_engine_pm_put(engine);
if (err) {
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 16acdc5d6734..2a72cce63fd9 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -171,7 +171,9 @@ void __i915_vm_close(struct i915_address_space *vm)
{
struct i915_vma *vma, *vn;
- mutex_lock(&vm->mutex);
+ if (!atomic_dec_and_mutex_lock(&vm->open, &vm->mutex))
+ return;
+
list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
struct drm_i915_gem_object *obj = vma->obj;
@@ -186,6 +188,7 @@ void __i915_vm_close(struct i915_address_space *vm)
i915_gem_object_put(obj);
}
GEM_BUG_ON(!list_empty(&vm->bound_list));
+
mutex_unlock(&vm->mutex);
}
@@ -299,6 +302,25 @@ fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count)
kunmap_atomic(memset64(kmap_atomic(p->page), val, count));
}
+static void poison_scratch_page(struct page *page, unsigned long size)
+{
+ if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ return;
+
+ GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
+
+ do {
+ void *vaddr;
+
+ vaddr = kmap(page);
+ memset(vaddr, POISON_FREE, PAGE_SIZE);
+ kunmap(page);
+
+ page = pfn_to_page(page_to_pfn(page) + 1);
+ size -= PAGE_SIZE;
+ } while (size);
+}
+
int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
{
unsigned long size;
@@ -331,6 +353,17 @@ int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
if (unlikely(!page))
goto skip;
+ /*
+ * Use a non-zero scratch page for debugging.
+ *
+ * We want a value that should be reasonably obvious
+ * to spot in the error state, while also causing a GPU hang
+ * if executed. We prefer using a clear page in production, so
+ * should it ever be accidentally used, the effect should be
+ * fairly benign.
+ */
+ poison_scratch_page(page, size);
+
addr = dma_map_page_attrs(vm->dma,
page, 0, size,
PCI_DMA_BIDIRECTIONAL,
@@ -448,36 +481,12 @@ void gtt_write_workarounds(struct intel_gt *gt)
intel_uncore_write(uncore,
HSW_GTT_CACHE_EN,
can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
- WARN_ON_ONCE(can_use_gtt_cache &&
- intel_uncore_read(uncore,
- HSW_GTT_CACHE_EN) == 0);
+ drm_WARN_ON_ONCE(&i915->drm, can_use_gtt_cache &&
+ intel_uncore_read(uncore,
+ HSW_GTT_CACHE_EN) == 0);
}
}
-u64 gen8_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
- u32 flags)
-{
- gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW;
-
- if (unlikely(flags & PTE_READ_ONLY))
- pte &= ~_PAGE_RW;
-
- switch (level) {
- case I915_CACHE_NONE:
- pte |= PPAT_UNCACHED;
- break;
- case I915_CACHE_WT:
- pte |= PPAT_DISPLAY_ELLC;
- break;
- default:
- pte |= PPAT_CACHED;
- break;
- }
-
- return pte;
-}
-
static void tgl_setup_private_ppat(struct intel_uncore *uncore)
{
/* TGL doesn't support LLC or AGE settings */
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 7da7681c20b1..b3116fe8d180 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -429,8 +429,7 @@ static inline void
i915_vm_close(struct i915_address_space *vm)
{
GEM_BUG_ON(!atomic_read(&vm->open));
- if (atomic_dec_and_test(&vm->open))
- __i915_vm_close(vm);
+ __i915_vm_close(vm);
i915_vm_put(vm);
}
@@ -512,12 +511,8 @@ int i915_ppgtt_init_hw(struct intel_gt *gt);
struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt);
-void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915);
-void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915);
-
-u64 gen8_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
- u32 flags);
+void i915_ggtt_suspend(struct i915_ggtt *gtt);
+void i915_ggtt_resume(struct i915_ggtt *ggtt);
int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p);
void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p);
diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c b/drivers/gpu/drm/i915/gt/intel_llc.c
index ceb785b75c25..e3f637b3650e 100644
--- a/drivers/gpu/drm/i915/gt/intel_llc.c
+++ b/drivers/gpu/drm/i915/gt/intel_llc.c
@@ -50,6 +50,9 @@ static bool get_ia_constants(struct intel_llc *llc,
struct drm_i915_private *i915 = llc_to_gt(llc)->i915;
struct intel_rps *rps = &llc_to_gt(llc)->rps;
+ if (!HAS_LLC(i915) || IS_DGFX(i915))
+ return false;
+
if (rps->max_freq <= rps->min_freq)
return false;
@@ -147,8 +150,7 @@ static void gen6_update_ring_freq(struct intel_llc *llc)
void intel_llc_enable(struct intel_llc *llc)
{
- if (HAS_LLC(llc_to_gt(llc)->i915))
- gen6_update_ring_freq(llc);
+ gen6_update_ring_freq(llc);
}
void intel_llc_disable(struct intel_llc *llc)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 31455eceeb0c..683014e7bc51 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -176,8 +176,6 @@
/* Typical size of the average request (2 pipecontrols and a MI_BB) */
#define EXECLISTS_REQUEST_SIZE 64 /* bytes */
-#define WA_TAIL_DWORDS 2
-#define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
struct virtual_engine {
struct intel_engine_cs base;
@@ -247,7 +245,7 @@ static void mark_eio(struct i915_request *rq)
GEM_BUG_ON(i915_request_signaled(rq));
- dma_fence_set_error(&rq->fence, -EIO);
+ i915_request_set_error_once(rq, -EIO);
i915_request_mark_complete(rq);
}
@@ -295,7 +293,7 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb)
static inline int rq_prio(const struct i915_request *rq)
{
- return rq->sched.attr.priority;
+ return READ_ONCE(rq->sched.attr.priority);
}
static int effective_prio(const struct i915_request *rq)
@@ -1006,7 +1004,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
i915_request_cancel_breadcrumb(rq);
spin_unlock(&rq->lock);
}
- rq->engine = owner;
+ WRITE_ONCE(rq->engine, owner);
owner->submit_request(rq);
active = NULL;
}
@@ -1197,6 +1195,48 @@ static void reset_active(struct i915_request *rq,
ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
}
+static u32 intel_context_get_runtime(const struct intel_context *ce)
+{
+ /*
+ * We can use either ppHWSP[16] which is recorded before the context
+ * switch (and so excludes the cost of context switches) or use the
+ * value from the context image itself, which is saved/restored earlier
+ * and so includes the cost of the save.
+ */
+ return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
+}
+
+static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
+{
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+ ce->runtime.num_underflow += dt < 0;
+ ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt);
+#endif
+}
+
+static void intel_context_update_runtime(struct intel_context *ce)
+{
+ u32 old;
+ s32 dt;
+
+ if (intel_context_is_barrier(ce))
+ return;
+
+ old = ce->runtime.last;
+ ce->runtime.last = intel_context_get_runtime(ce);
+ dt = ce->runtime.last - old;
+
+ if (unlikely(dt <= 0)) {
+ CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
+ old, ce->runtime.last, dt);
+ st_update_runtime_underflow(ce, dt);
+ return;
+ }
+
+ ewma_runtime_add(&ce->runtime.avg, dt);
+ ce->runtime.total += dt;
+}
+
static inline struct intel_engine_cs *
__execlists_schedule_in(struct i915_request *rq)
{
@@ -1211,12 +1251,12 @@ __execlists_schedule_in(struct i915_request *rq)
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
execlists_check_context(ce, engine);
+ ce->lrc_desc &= ~GENMASK_ULL(47, 37);
if (ce->tag) {
/* Use a fixed tag for OA and friends */
ce->lrc_desc |= (u64)ce->tag << 32;
} else {
/* We don't need a strict matching tag, just different values */
- ce->lrc_desc &= ~GENMASK_ULL(47, 37);
ce->lrc_desc |=
(u64)(++engine->context_tag % NUM_CONTEXT_TAG) <<
GEN11_SW_CTX_ID_SHIFT;
@@ -1276,10 +1316,11 @@ __execlists_schedule_out(struct i915_request *rq,
* If we have just completed this context, the engine may now be
* idle and we want to re-enter powersaving.
*/
- if (list_is_last(&rq->link, &ce->timeline->requests) &&
+ if (list_is_last_rcu(&rq->link, &ce->timeline->requests) &&
i915_request_completed(rq))
intel_engine_add_retire(engine, ce->timeline);
+ intel_context_update_runtime(ce);
intel_engine_context_out(engine);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
intel_gt_pm_put_async(engine->gt);
@@ -1395,15 +1436,26 @@ trace_ports(const struct intel_engine_execlists *execlists,
ports[1] ? ports[1]->fence.seqno : 0);
}
+static inline bool
+reset_in_progress(const struct intel_engine_execlists *execlists)
+{
+ return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
+}
+
static __maybe_unused bool
assert_pending_valid(const struct intel_engine_execlists *execlists,
const char *msg)
{
struct i915_request * const *port, *rq;
struct intel_context *ce = NULL;
+ bool sentinel = false;
trace_ports(execlists, msg, execlists->pending);
+ /* We may be messing around with the lists during reset, lalala */
+ if (reset_in_progress(execlists))
+ return true;
+
if (!execlists->pending[0]) {
GEM_TRACE_ERR("Nothing pending for promotion!\n");
return false;
@@ -1430,6 +1482,26 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
}
ce = rq->context;
+ /*
+ * Sentinels are supposed to be lonely so they flush the
+ * current exection off the HW. Check that they are the
+ * only request in the pending submission.
+ */
+ if (sentinel) {
+ GEM_TRACE_ERR("context:%llx after sentinel in pending[%zd]\n",
+ ce->timeline->fence_context,
+ port - execlists->pending);
+ return false;
+ }
+
+ sentinel = i915_request_has_sentinel(rq);
+ if (sentinel && port != execlists->pending) {
+ GEM_TRACE_ERR("sentinel context:%llx not in prime position[%zd]\n",
+ ce->timeline->fence_context,
+ port - execlists->pending);
+ return false;
+ }
+
/* Hold tightly onto the lock to prevent concurrent retires! */
if (!spin_trylock_irqsave(&rq->lock, flags))
continue;
@@ -1525,6 +1597,11 @@ static bool can_merge_ctx(const struct intel_context *prev,
return true;
}
+static unsigned long i915_request_flags(const struct i915_request *rq)
+{
+ return READ_ONCE(rq->fence.flags);
+}
+
static bool can_merge_rq(const struct i915_request *prev,
const struct i915_request *next)
{
@@ -1542,7 +1619,7 @@ static bool can_merge_rq(const struct i915_request *prev,
if (i915_request_completed(next))
return true;
- if (unlikely((prev->fence.flags ^ next->fence.flags) &
+ if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) &
(BIT(I915_FENCE_FLAG_NOPREEMPT) |
BIT(I915_FENCE_FLAG_SENTINEL))))
return false;
@@ -1550,6 +1627,7 @@ static bool can_merge_rq(const struct i915_request *prev,
if (!can_merge_ctx(prev->context, next->context))
return false;
+ GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno));
return true;
}
@@ -1585,7 +1663,7 @@ static bool virtual_matches(const struct virtual_engine *ve,
}
static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
- struct intel_engine_cs *engine)
+ struct i915_request *rq)
{
struct intel_engine_cs *old = ve->siblings[0];
@@ -1593,9 +1671,19 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
spin_lock(&old->breadcrumbs.irq_lock);
if (!list_empty(&ve->context.signal_link)) {
- list_move_tail(&ve->context.signal_link,
- &engine->breadcrumbs.signalers);
- intel_engine_signal_breadcrumbs(engine);
+ list_del_init(&ve->context.signal_link);
+
+ /*
+ * We cannot acquire the new engine->breadcrumbs.irq_lock
+ * (as we are holding a breadcrumbs.irq_lock already),
+ * so attach this request to the signaler on submission.
+ * The queued irq_work will occur when we finally drop
+ * the engine->active.lock after dequeue.
+ */
+ set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags);
+
+ /* Also transfer the pending irq_work for the old breadcrumb. */
+ intel_engine_signal_breadcrumbs(rq->engine);
}
spin_unlock(&old->breadcrumbs.irq_lock);
}
@@ -1605,6 +1693,11 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
&(rq__)->sched.waiters_list, \
wait_link)
+#define for_each_signaler(p__, rq__) \
+ list_for_each_entry_rcu(p__, \
+ &(rq__)->sched.signalers_list, \
+ signal_link)
+
static void defer_request(struct i915_request *rq, struct list_head * const pl)
{
LIST_HEAD(list);
@@ -1693,12 +1786,13 @@ timeslice(const struct intel_engine_cs *engine)
static unsigned long
active_timeslice(const struct intel_engine_cs *engine)
{
- const struct i915_request *rq = *engine->execlists.active;
+ const struct intel_engine_execlists *execlists = &engine->execlists;
+ const struct i915_request *rq = *execlists->active;
if (!rq || i915_request_completed(rq))
return 0;
- if (engine->execlists.switch_priority_hint < effective_prio(rq))
+ if (READ_ONCE(execlists->switch_priority_hint) < effective_prio(rq))
return 0;
return timeslice(engine);
@@ -1715,8 +1809,11 @@ static void set_timeslice(struct intel_engine_cs *engine)
static void start_timeslice(struct intel_engine_cs *engine)
{
struct intel_engine_execlists *execlists = &engine->execlists;
+ int prio = queue_prio(execlists);
- execlists->switch_priority_hint = execlists->queue_priority_hint;
+ WRITE_ONCE(execlists->switch_priority_hint, prio);
+ if (prio == INT_MIN)
+ return;
if (timer_pending(&execlists->timer))
return;
@@ -1938,13 +2035,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
"",
yesno(engine != ve->siblings[0]));
- ve->request = NULL;
- ve->base.execlists.queue_priority_hint = INT_MIN;
+ WRITE_ONCE(ve->request, NULL);
+ WRITE_ONCE(ve->base.execlists.queue_priority_hint,
+ INT_MIN);
rb_erase_cached(rb, &execlists->virtual);
RB_CLEAR_NODE(rb);
GEM_BUG_ON(!(rq->execution_mask & engine->mask));
- rq->engine = engine;
+ WRITE_ONCE(rq->engine, engine);
if (engine != ve->siblings[0]) {
u32 *regs = ve->context.lrc_reg_state;
@@ -1957,7 +2055,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
engine);
if (!list_empty(&ve->context.signals))
- virtual_xfer_breadcrumbs(ve, engine);
+ virtual_xfer_breadcrumbs(ve, rq);
/*
* Move the bound engine to the top of the list
@@ -2064,6 +2162,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
GEM_BUG_ON(last &&
!can_merge_ctx(last->context,
rq->context));
+ GEM_BUG_ON(last &&
+ i915_seqno_passed(last->fence.seqno,
+ rq->fence.seqno));
submit = true;
last = rq;
@@ -2134,6 +2235,7 @@ cancel_port_requests(struct intel_engine_execlists * const execlists)
execlists_schedule_out(*port);
clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
+ smp_wmb(); /* complete the seqlock for execlists_active() */
WRITE_ONCE(execlists->active, execlists->inflight);
}
@@ -2144,12 +2246,6 @@ invalidate_csb_entries(const u32 *first, const u32 *last)
clflush((void *)last);
}
-static inline bool
-reset_in_progress(const struct intel_engine_execlists *execlists)
-{
- return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
-}
-
/*
* Starting with Gen12, the status has a new format:
*
@@ -2240,7 +2336,6 @@ static void process_csb(struct intel_engine_cs *engine)
*/
head = execlists->csb_head;
tail = READ_ONCE(*execlists->csb_write);
- ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
if (unlikely(head == tail))
return;
@@ -2254,6 +2349,7 @@ static void process_csb(struct intel_engine_cs *engine)
*/
rmb();
+ ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
do {
bool promote;
@@ -2288,11 +2384,13 @@ static void process_csb(struct intel_engine_cs *engine)
if (promote) {
struct i915_request * const *old = execlists->active;
+ GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
+
+ ring_set_paused(engine, 0);
+
/* Point active to the new ELSP; prevent overwriting */
WRITE_ONCE(execlists->active, execlists->pending);
-
- if (!inject_preempt_hang(execlists))
- ring_set_paused(engine, 0);
+ smp_wmb(); /* notify execlists_active() */
/* cancel old inflight, prepare for switch */
trace_ports(execlists, "preempted", old);
@@ -2300,12 +2398,12 @@ static void process_csb(struct intel_engine_cs *engine)
execlists_schedule_out(*old++);
/* switch pending to inflight */
- GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
- WRITE_ONCE(execlists->active,
- memcpy(execlists->inflight,
- execlists->pending,
- execlists_num_ports(execlists) *
- sizeof(*execlists->pending)));
+ memcpy(execlists->inflight,
+ execlists->pending,
+ execlists_num_ports(execlists) *
+ sizeof(*execlists->pending));
+ smp_wmb(); /* complete the seqlock */
+ WRITE_ONCE(execlists->active, execlists->inflight);
WRITE_ONCE(execlists->pending[0], NULL);
} else {
@@ -2320,8 +2418,37 @@ static void process_csb(struct intel_engine_cs *engine)
* coherent (visible from the CPU) before the
* user interrupt and CSB is processed.
*/
- GEM_BUG_ON(!i915_request_completed(*execlists->active) &&
- !reset_in_progress(execlists));
+ if (GEM_SHOW_DEBUG() &&
+ !i915_request_completed(*execlists->active) &&
+ !reset_in_progress(execlists)) {
+ struct i915_request *rq __maybe_unused =
+ *execlists->active;
+ const u32 *regs __maybe_unused =
+ rq->context->lrc_reg_state;
+
+ ENGINE_TRACE(engine,
+ "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n",
+ ENGINE_READ(engine, RING_START),
+ ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR,
+ ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR,
+ ENGINE_READ(engine, RING_CTL),
+ ENGINE_READ(engine, RING_MI_MODE));
+ ENGINE_TRACE(engine,
+ "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ",
+ i915_ggtt_offset(rq->ring->vma),
+ rq->head, rq->tail,
+ rq->fence.context,
+ lower_32_bits(rq->fence.seqno),
+ hwsp_seqno(rq));
+ ENGINE_TRACE(engine,
+ "ctx:{start:%08x, head:%04x, tail:%04x}, ",
+ regs[CTX_RING_START],
+ regs[CTX_RING_HEAD],
+ regs[CTX_RING_TAIL]);
+
+ GEM_BUG_ON("context completed before request");
+ }
+
execlists_schedule_out(*execlists->active++);
GEM_BUG_ON(execlists->active - execlists->inflight >
@@ -2349,7 +2476,7 @@ static void process_csb(struct intel_engine_cs *engine)
static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
{
lockdep_assert_held(&engine->active.lock);
- if (!engine->execlists.pending[0]) {
+ if (!READ_ONCE(engine->execlists.pending[0])) {
rcu_read_lock(); /* protect peeking at execlists->active */
execlists_dequeue(engine);
rcu_read_unlock();
@@ -2366,12 +2493,12 @@ static void __execlists_hold(struct i915_request *rq)
if (i915_request_is_active(rq))
__i915_request_unsubmit(rq);
- RQ_TRACE(rq, "on hold\n");
clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
list_move_tail(&rq->sched.link, &rq->engine->active.hold);
i915_request_set_hold(rq);
+ RQ_TRACE(rq, "on hold\n");
- list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+ for_each_waiter(p, rq) {
struct i915_request *w =
container_of(p->waiter, typeof(*w), sched);
@@ -2385,7 +2512,7 @@ static void __execlists_hold(struct i915_request *rq)
if (i915_request_completed(w))
continue;
- if (i915_request_on_hold(rq))
+ if (i915_request_on_hold(w))
continue;
list_move_tail(&w->sched.link, &list);
@@ -2443,6 +2570,7 @@ static bool execlists_hold(struct intel_engine_cs *engine,
GEM_BUG_ON(i915_request_on_hold(rq));
GEM_BUG_ON(rq->engine != engine);
__execlists_hold(rq);
+ GEM_BUG_ON(list_empty(&engine->active.hold));
unlock:
spin_unlock_irq(&engine->active.lock);
@@ -2452,23 +2580,27 @@ unlock:
static bool hold_request(const struct i915_request *rq)
{
struct i915_dependency *p;
+ bool result = false;
/*
* If one of our ancestors is on hold, we must also be on hold,
* otherwise we will bypass it and execute before it.
*/
- list_for_each_entry(p, &rq->sched.signalers_list, signal_link) {
+ rcu_read_lock();
+ for_each_signaler(p, rq) {
const struct i915_request *s =
container_of(p->signaler, typeof(*s), sched);
if (s->engine != rq->engine)
continue;
- if (i915_request_on_hold(s))
- return true;
+ result = i915_request_on_hold(s);
+ if (result)
+ break;
}
+ rcu_read_unlock();
- return false;
+ return result;
}
static void __execlists_unhold(struct i915_request *rq)
@@ -2478,6 +2610,8 @@ static void __execlists_unhold(struct i915_request *rq)
do {
struct i915_dependency *p;
+ RQ_TRACE(rq, "hold release\n");
+
GEM_BUG_ON(!i915_request_on_hold(rq));
GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
@@ -2486,21 +2620,24 @@ static void __execlists_unhold(struct i915_request *rq)
i915_sched_lookup_priolist(rq->engine,
rq_prio(rq)));
set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
- RQ_TRACE(rq, "hold release\n");
/* Also release any children on this engine that are ready */
- list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+ for_each_waiter(p, rq) {
struct i915_request *w =
container_of(p->waiter, typeof(*w), sched);
+ /* Propagate any change in error status */
+ if (rq->fence.error)
+ i915_request_set_error_once(w, rq->fence.error);
+
if (w->engine != rq->engine)
continue;
- if (!i915_request_on_hold(rq))
+ if (!i915_request_on_hold(w))
continue;
/* Check that no other parents are also on hold */
- if (hold_request(rq))
+ if (hold_request(w))
continue;
list_move_tail(&w->sched.link, &list);
@@ -2615,13 +2752,13 @@ static bool execlists_capture(struct intel_engine_cs *engine)
if (!cap)
return true;
+ spin_lock_irq(&engine->active.lock);
cap->rq = execlists_active(&engine->execlists);
- GEM_BUG_ON(!cap->rq);
-
- rcu_read_lock();
- cap->rq = active_request(cap->rq->context->timeline, cap->rq);
- cap->rq = i915_request_get_rcu(cap->rq);
- rcu_read_unlock();
+ if (cap->rq) {
+ cap->rq = active_request(cap->rq->context->timeline, cap->rq);
+ cap->rq = i915_request_get_rcu(cap->rq);
+ }
+ spin_unlock_irq(&engine->active.lock);
if (!cap->rq)
goto err_free;
@@ -2660,27 +2797,25 @@ err_free:
return false;
}
-static noinline void preempt_reset(struct intel_engine_cs *engine)
+static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
{
const unsigned int bit = I915_RESET_ENGINE + engine->id;
unsigned long *lock = &engine->gt->reset.flags;
- if (i915_modparams.reset < 3)
+ if (!intel_has_reset_engine(engine->gt))
return;
if (test_and_set_bit(bit, lock))
return;
+ ENGINE_TRACE(engine, "reset for %s\n", msg);
+
/* Mark this tasklet as disabled to avoid waiting for it to complete */
tasklet_disable_nosync(&engine->execlists.tasklet);
- ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n",
- READ_ONCE(engine->props.preempt_timeout_ms),
- jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
-
ring_set_paused(engine, 1); /* Freeze the current request in place */
if (execlists_capture(engine))
- intel_engine_reset(engine, "preemption time out");
+ intel_engine_reset(engine, msg);
else
ring_set_paused(engine, 0);
@@ -2711,6 +2846,13 @@ static void execlists_submission_tasklet(unsigned long data)
bool timeout = preempt_timeout(engine);
process_csb(engine);
+
+ if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) {
+ engine->execlists.error_interrupt = 0;
+ if (ENGINE_READ(engine, RING_ESR)) /* confirm the error */
+ execlists_reset(engine, "CS error");
+ }
+
if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
unsigned long flags;
@@ -2719,8 +2861,8 @@ static void execlists_submission_tasklet(unsigned long data)
spin_unlock_irqrestore(&engine->active.lock, flags);
/* Recheck after serialising with direct-submission */
- if (timeout && preempt_timeout(engine))
- preempt_reset(engine);
+ if (unlikely(timeout && preempt_timeout(engine)))
+ execlists_reset(engine, "preemption time out");
}
}
@@ -2793,6 +2935,7 @@ static void execlists_submit_request(struct i915_request *request)
spin_lock_irqsave(&engine->active.lock, flags);
if (unlikely(ancestor_on_hold(engine, request))) {
+ RQ_TRACE(request, "ancestor on hold\n");
list_add_tail(&request->sched.link, &engine->active.hold);
i915_request_set_hold(request);
} else {
@@ -2874,6 +3017,7 @@ __execlists_update_reg_state(const struct intel_context *ce,
regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
regs[CTX_RING_HEAD] = head;
regs[CTX_RING_TAIL] = ring->tail;
+ regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
/* RPCS */
if (engine->class == RENDER_CLASS) {
@@ -2921,22 +3065,6 @@ static void execlists_context_reset(struct intel_context *ce)
CE_TRACE(ce, "reset\n");
GEM_BUG_ON(!intel_context_is_pinned(ce));
- /*
- * Because we emit WA_TAIL_DWORDS there may be a disparity
- * between our bookkeeping in ce->ring->head and ce->ring->tail and
- * that stored in context. As we only write new commands from
- * ce->ring->tail onwards, everything before that is junk. If the GPU
- * starts reading from its RING_HEAD from the context, it may try to
- * execute that junk and die.
- *
- * The contexts that are stilled pinned on resume belong to the
- * kernel, and are local to each engine. All other contexts will
- * have their head/tail sanitized upon pinning before use, so they
- * will never see garbage,
- *
- * So to avoid that we reset the context images upon resume. For
- * simplicity, we just zero everything out.
- */
intel_ring_reset(ce->ring, ce->ring->emit);
/* Scrub away the garbage */
@@ -2964,7 +3092,8 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
{
u32 *cs;
- GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb);
+ if (!i915_request_timeline(rq)->has_initial_breadcrumb)
+ return 0;
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
@@ -3257,7 +3386,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
goto err;
}
- err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ err = i915_ggtt_pin(vma, 0, PIN_HIGH);
if (err)
goto err;
@@ -3346,6 +3475,49 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
return ret;
}
+static void enable_error_interrupt(struct intel_engine_cs *engine)
+{
+ u32 status;
+
+ engine->execlists.error_interrupt = 0;
+ ENGINE_WRITE(engine, RING_EMR, ~0u);
+ ENGINE_WRITE(engine, RING_EIR, ~0u); /* clear all existing errors */
+
+ status = ENGINE_READ(engine, RING_ESR);
+ if (unlikely(status)) {
+ dev_err(engine->i915->drm.dev,
+ "engine '%s' resumed still in error: %08x\n",
+ engine->name, status);
+ __intel_gt_reset(engine->gt, engine->mask);
+ }
+
+ /*
+ * On current gen8+, we have 2 signals to play with
+ *
+ * - I915_ERROR_INSTUCTION (bit 0)
+ *
+ * Generate an error if the command parser encounters an invalid
+ * instruction
+ *
+ * This is a fatal error.
+ *
+ * - CP_PRIV (bit 2)
+ *
+ * Generate an error on privilege violation (where the CP replaces
+ * the instruction with a no-op). This also fires for writes into
+ * read-only scratch pages.
+ *
+ * This is a non-fatal error, parsing continues.
+ *
+ * * there are a few others defined for odd HW that we do not use
+ *
+ * Since CP_PRIV fires for cases where we have chosen to ignore the
+ * error (as the HW is validating and suppressing the mistakes), we
+ * only unmask the instruction error bit.
+ */
+ ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION);
+}
+
static void enable_execlists(struct intel_engine_cs *engine)
{
u32 mode;
@@ -3367,6 +3539,8 @@ static void enable_execlists(struct intel_engine_cs *engine)
i915_ggtt_offset(engine->status_page.vma));
ENGINE_POSTING_READ(engine, RING_HWS_PGA);
+ enable_error_interrupt(engine);
+
engine->context_tag = 0;
}
@@ -3384,9 +3558,6 @@ static bool unexpected_starting_state(struct intel_engine_cs *engine)
static int execlists_resume(struct intel_engine_cs *engine)
{
- intel_engine_apply_workarounds(engine);
- intel_engine_apply_whitelist(engine);
-
intel_mocs_init_engine(engine);
intel_engine_reset_breadcrumbs(engine);
@@ -3517,9 +3688,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
if (!rq)
goto unwind;
- /* We still have requests in-flight; the engine should be active */
- GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
-
ce = rq->context;
GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
@@ -3529,8 +3697,12 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
goto out_replay;
}
+ /* We still have requests in-flight; the engine should be active */
+ GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
+
/* Context has requests still in-flight; it should not be idle! */
GEM_BUG_ON(i915_active_is_idle(&ce->active));
+
rq = active_request(ce->timeline, rq);
head = intel_ring_wrap(ce->ring, rq->head);
GEM_BUG_ON(head == ce->ring->tail);
@@ -3604,7 +3776,10 @@ static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
static void nop_submission_tasklet(unsigned long data)
{
+ struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
+
/* The driver is wedged; don't process any more events. */
+ WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN);
}
static void execlists_reset_cancel(struct intel_engine_cs *engine)
@@ -4273,6 +4448,7 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
+ engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
}
static void rcs_submission_override(struct intel_engine_cs *engine)
@@ -4514,8 +4690,13 @@ populate_lr_context(struct intel_context *ce,
inhibit = false;
}
- /* The second page of the context object contains some fields which must
- * be set up prior to the first execution. */
+ /* Clear the ppHWSP (inc. per-context counters) */
+ memset(vaddr, 0, PAGE_SIZE);
+
+ /*
+ * The second page of the context object contains some registers which
+ * must be set up prior to the first execution.
+ */
execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
ce, engine, ring, inhibit);
@@ -4553,8 +4734,17 @@ static int __execlists_context_alloc(struct intel_context *ce,
if (!ce->timeline) {
struct intel_timeline *tl;
+ struct i915_vma *hwsp;
- tl = intel_timeline_create(engine->gt, NULL);
+ /*
+ * Use the static global HWSP for the kernel context, and
+ * a dynamically allocated cacheline for everyone else.
+ */
+ hwsp = NULL;
+ if (unlikely(intel_context_is_barrier(ce)))
+ hwsp = engine->status_page.vma;
+
+ tl = intel_timeline_create(engine->gt, hwsp);
if (IS_ERR(tl)) {
ret = PTR_ERR(tl);
goto error_deref_obj;
@@ -4723,7 +4913,7 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
mask = rq->execution_mask;
if (unlikely(!mask)) {
/* Invalid selection, submit to a random engine in error */
- i915_request_skip(rq, -ENODEV);
+ i915_request_set_error_once(rq, -ENODEV);
mask = ve->siblings[0]->mask;
}
@@ -4737,7 +4927,7 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
static void virtual_submission_tasklet(unsigned long data)
{
struct virtual_engine * const ve = (struct virtual_engine *)data;
- const int prio = ve->base.execlists.queue_priority_hint;
+ const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint);
intel_engine_mask_t mask;
unsigned int n;
@@ -5133,11 +5323,15 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
show_request(m, last, "\t\tE ");
}
- last = NULL;
- count = 0;
+ if (execlists->switch_priority_hint != INT_MIN)
+ drm_printf(m, "\t\tSwitch priority hint: %d\n",
+ READ_ONCE(execlists->switch_priority_hint));
if (execlists->queue_priority_hint != INT_MIN)
drm_printf(m, "\t\tQueue priority hint: %d\n",
- execlists->queue_priority_hint);
+ READ_ONCE(execlists->queue_priority_hint));
+
+ last = NULL;
+ count = 0;
for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
int i;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index 08a3be65f700..d39b72590e40 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -17,6 +17,7 @@
#define CTX_RING_CTL (0x0a + 1)
#define CTX_BB_STATE (0x10 + 1)
#define CTX_BB_PER_CTX_PTR (0x18 + 1)
+#define CTX_TIMESTAMP (0x22 + 1)
#define CTX_PDP3_UDW (0x24 + 1)
#define CTX_PDP3_LDW (0x26 + 1)
#define CTX_PDP2_UDW (0x28 + 1)
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index eeef90b55c64..632e08a4592b 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -280,9 +280,32 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = {
GEN11_MOCS_ENTRIES
};
-static bool get_mocs_settings(const struct drm_i915_private *i915,
- struct drm_i915_mocs_table *table)
+enum {
+ HAS_GLOBAL_MOCS = BIT(0),
+ HAS_ENGINE_MOCS = BIT(1),
+ HAS_RENDER_L3CC = BIT(2),
+};
+
+static bool has_l3cc(const struct drm_i915_private *i915)
{
+ return true;
+}
+
+static bool has_global_mocs(const struct drm_i915_private *i915)
+{
+ return HAS_GLOBAL_MOCS_REGISTERS(i915);
+}
+
+static bool has_mocs(const struct drm_i915_private *i915)
+{
+ return !IS_DGFX(i915);
+}
+
+static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
+ struct drm_i915_mocs_table *table)
+{
+ unsigned int flags;
+
if (INTEL_GEN(i915) >= 12) {
table->size = ARRAY_SIZE(tgl_mocs_table);
table->table = tgl_mocs_table;
@@ -300,13 +323,13 @@ static bool get_mocs_settings(const struct drm_i915_private *i915,
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
table->table = broxton_mocs_table;
} else {
- WARN_ONCE(INTEL_GEN(i915) >= 9,
- "Platform that should have a MOCS table does not.\n");
- return false;
+ drm_WARN_ONCE(&i915->drm, INTEL_GEN(i915) >= 9,
+ "Platform that should have a MOCS table does not.\n");
+ return 0;
}
if (GEM_DEBUG_WARN_ON(table->size > table->n_entries))
- return false;
+ return 0;
/* WaDisableSkipCaching:skl,bxt,kbl,glk */
if (IS_GEN(i915, 9)) {
@@ -315,10 +338,20 @@ static bool get_mocs_settings(const struct drm_i915_private *i915,
for (i = 0; i < table->size; i++)
if (GEM_DEBUG_WARN_ON(table->table[i].l3cc_value &
(L3_ESC(1) | L3_SCC(0x7))))
- return false;
+ return 0;
}
- return true;
+ flags = 0;
+ if (has_mocs(i915)) {
+ if (has_global_mocs(i915))
+ flags |= HAS_GLOBAL_MOCS;
+ else
+ flags |= HAS_ENGINE_MOCS;
+ }
+ if (has_l3cc(i915))
+ flags |= HAS_RENDER_L3CC;
+
+ return flags;
}
/*
@@ -411,18 +444,20 @@ static void init_l3cc_table(struct intel_engine_cs *engine,
void intel_mocs_init_engine(struct intel_engine_cs *engine)
{
struct drm_i915_mocs_table table;
+ unsigned int flags;
/* Called under a blanket forcewake */
assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
- if (!get_mocs_settings(engine->i915, &table))
+ flags = get_mocs_settings(engine->i915, &table);
+ if (!flags)
return;
/* Platforms with global MOCS do not need per-engine initialization. */
- if (!HAS_GLOBAL_MOCS_REGISTERS(engine->i915))
+ if (flags & HAS_ENGINE_MOCS)
init_mocs_table(engine, &table);
- if (engine->class == RENDER_CLASS)
+ if (flags & HAS_RENDER_L3CC && engine->class == RENDER_CLASS)
init_l3cc_table(engine, &table);
}
@@ -431,26 +466,17 @@ static u32 global_mocs_offset(void)
return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0));
}
-static void init_global_mocs(struct intel_gt *gt)
+void intel_mocs_init(struct intel_gt *gt)
{
struct drm_i915_mocs_table table;
+ unsigned int flags;
/*
* LLC and eDRAM control values are not applicable to dgfx
*/
- if (IS_DGFX(gt->i915))
- return;
-
- if (!get_mocs_settings(gt->i915, &table))
- return;
-
- __init_mocs_table(gt->uncore, &table, global_mocs_offset());
-}
-
-void intel_mocs_init(struct intel_gt *gt)
-{
- if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915))
- init_global_mocs(gt);
+ flags = get_mocs_settings(gt->i915, &table);
+ if (flags & HAS_GLOBAL_MOCS)
+ __init_mocs_table(gt->uncore, &table, global_mocs_offset());
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index 9e303c29d6e3..3847ee44b181 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -7,6 +7,7 @@
#include <linux/pm_runtime.h>
#include "i915_drv.h"
+#include "i915_vgpu.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
#include "intel_rc6.h"
@@ -226,10 +227,7 @@ static void gen6_rc6_enable(struct intel_rc6 *rc6)
set(uncore, GEN6_RC_SLEEP, 0);
set(uncore, GEN6_RC1e_THRESHOLD, 1000);
- if (IS_IVYBRIDGE(i915))
- set(uncore, GEN6_RC6_THRESHOLD, 125000);
- else
- set(uncore, GEN6_RC6_THRESHOLD, 50000);
+ set(uncore, GEN6_RC6_THRESHOLD, 50000);
set(uncore, GEN6_RC6p_THRESHOLD, 150000);
set(uncore, GEN6_RC6pp_THRESHOLD, 64000); /* unused */
@@ -299,7 +297,6 @@ static int vlv_rc6_init(struct intel_rc6 *rc6)
pcbr_offset = (pcbr & ~4095) - i915->dsm.start;
pctx = i915_gem_object_create_stolen_for_preallocated(i915,
pcbr_offset,
- I915_GTT_OFFSET_NONE,
pctx_size);
if (IS_ERR(pctx))
return PTR_ERR(pctx);
@@ -323,10 +320,10 @@ static int vlv_rc6_init(struct intel_rc6 *rc6)
return PTR_ERR(pctx);
}
- GEM_BUG_ON(range_overflows_t(u64,
- i915->dsm.start,
- pctx->stolen->start,
- U32_MAX));
+ GEM_BUG_ON(range_overflows_end_t(u64,
+ i915->dsm.start,
+ pctx->stolen->start,
+ U32_MAX));
pctx_paddr = i915->dsm.start + pctx->stolen->start;
intel_uncore_write(uncore, VLV_PCBR, pctx_paddr);
@@ -542,6 +539,8 @@ void intel_rc6_init(struct intel_rc6 *rc6)
void intel_rc6_sanitize(struct intel_rc6 *rc6)
{
+ memset(rc6->prev_hw_residency, 0, sizeof(rc6->prev_hw_residency));
+
if (rc6->enabled) { /* unbalanced suspend/resume */
rpm_get(rc6);
rc6->enabled = false;
@@ -604,6 +603,7 @@ void intel_rc6_unpark(struct intel_rc6 *rc6)
void intel_rc6_park(struct intel_rc6 *rc6)
{
struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ unsigned int target;
if (!rc6->enabled)
return;
@@ -618,7 +618,14 @@ void intel_rc6_park(struct intel_rc6 *rc6)
/* Turn off the HW timers and go directly to rc6 */
set(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE);
- set(uncore, GEN6_RC_STATE, 0x4 << RC_SW_TARGET_STATE_SHIFT);
+
+ if (HAS_RC6pp(rc6_to_i915(rc6)))
+ target = 0x6; /* deepest rc6 */
+ else if (HAS_RC6p(rc6_to_i915(rc6)))
+ target = 0x5; /* deep rc6 */
+ else
+ target = 0x4; /* normal rc6 */
+ set(uncore, GEN6_RC_STATE, target << RC_SW_TARGET_STATE_SHIFT);
}
void intel_rc6_disable(struct intel_rc6 *rc6)
@@ -713,7 +720,7 @@ u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, const i915_reg_t reg)
*/
i = (i915_mmio_reg_offset(reg) -
i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32);
- if (WARN_ON_ONCE(i >= ARRAY_SIZE(rc6->cur_residency)))
+ if (drm_WARN_ON_ONCE(&i915->drm, i >= ARRAY_SIZE(rc6->cur_residency)))
return 0;
fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ);
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index beee0cf89bce..80db3c9d785e 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -48,8 +48,10 @@ static void engine_skip_context(struct i915_request *rq)
lockdep_assert_held(&engine->active.lock);
list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
- if (rq->context == hung_ctx)
- i915_request_skip(rq, -EIO);
+ if (rq->context == hung_ctx) {
+ i915_request_set_error_once(rq, -EIO);
+ __i915_request_skip(rq);
+ }
}
static void client_mark_guilty(struct i915_gem_context *ctx, bool banned)
@@ -72,9 +74,10 @@ static void client_mark_guilty(struct i915_gem_context *ctx, bool banned)
if (score) {
atomic_add(score, &file_priv->ban_score);
- DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n",
- ctx->name, score,
- atomic_read(&file_priv->ban_score));
+ drm_dbg(&ctx->i915->drm,
+ "client %s: gained %u ban score, now %u\n",
+ ctx->name, score,
+ atomic_read(&file_priv->ban_score));
}
}
@@ -85,19 +88,18 @@ static bool mark_guilty(struct i915_request *rq)
bool banned;
int i;
+ if (intel_context_is_closed(rq->context)) {
+ intel_context_set_banned(rq->context);
+ return true;
+ }
+
rcu_read_lock();
ctx = rcu_dereference(rq->context->gem_context);
if (ctx && !kref_get_unless_zero(&ctx->ref))
ctx = NULL;
rcu_read_unlock();
if (!ctx)
- return false;
-
- if (i915_gem_context_is_closed(ctx)) {
- intel_context_set_banned(rq->context);
- banned = true;
- goto out;
- }
+ return intel_context_is_banned(rq->context);
atomic_inc(&ctx->guilty_count);
@@ -122,8 +124,8 @@ static bool mark_guilty(struct i915_request *rq)
if (time_before(jiffies, prev_hang + CONTEXT_FAST_HANG_JIFFIES))
banned = true;
if (banned) {
- DRM_DEBUG_DRIVER("context %s: guilty %d, banned\n",
- ctx->name, atomic_read(&ctx->guilty_count));
+ drm_dbg(&ctx->i915->drm, "context %s: guilty %d, banned\n",
+ ctx->name, atomic_read(&ctx->guilty_count));
intel_context_set_banned(rq->context);
}
@@ -153,11 +155,12 @@ void __i915_request_reset(struct i915_request *rq, bool guilty)
rcu_read_lock(); /* protect the GEM context */
if (guilty) {
- i915_request_skip(rq, -EIO);
+ i915_request_set_error_once(rq, -EIO);
+ __i915_request_skip(rq);
if (mark_guilty(rq))
engine_skip_context(rq);
} else {
- dma_fence_set_error(&rq->fence, -EAGAIN);
+ i915_request_set_error_once(rq, -EAGAIN);
mark_innocent(rq);
}
rcu_read_unlock();
@@ -226,7 +229,7 @@ static int g4x_do_reset(struct intel_gt *gt,
GRDOM_MEDIA | GRDOM_RESET_ENABLE);
ret = wait_for_atomic(g4x_reset_complete(pdev), 50);
if (ret) {
- DRM_DEBUG_DRIVER("Wait for media reset failed\n");
+ drm_dbg(&gt->i915->drm, "Wait for media reset failed\n");
goto out;
}
@@ -234,7 +237,7 @@ static int g4x_do_reset(struct intel_gt *gt,
GRDOM_RENDER | GRDOM_RESET_ENABLE);
ret = wait_for_atomic(g4x_reset_complete(pdev), 50);
if (ret) {
- DRM_DEBUG_DRIVER("Wait for render reset failed\n");
+ drm_dbg(&gt->i915->drm, "Wait for render reset failed\n");
goto out;
}
@@ -260,7 +263,7 @@ static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask,
5000, 0,
NULL);
if (ret) {
- DRM_DEBUG_DRIVER("Wait for render reset failed\n");
+ drm_dbg(&gt->i915->drm, "Wait for render reset failed\n");
goto out;
}
@@ -271,7 +274,7 @@ static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask,
5000, 0,
NULL);
if (ret) {
- DRM_DEBUG_DRIVER("Wait for media reset failed\n");
+ drm_dbg(&gt->i915->drm, "Wait for media reset failed\n");
goto out;
}
@@ -300,8 +303,9 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
500, 0,
NULL);
if (err)
- DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n",
- hw_domain_mask);
+ drm_dbg(&gt->i915->drm,
+ "Wait for 0x%08x engines reset failed\n",
+ hw_domain_mask);
return err;
}
@@ -401,7 +405,8 @@ static int gen11_lock_sfc(struct intel_engine_cs *engine, u32 *hw_mask)
return 0;
if (ret) {
- DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n");
+ drm_dbg(&engine->i915->drm,
+ "Wait for SFC forced lock ack failed\n");
return ret;
}
@@ -515,9 +520,10 @@ static int gen8_engine_reset_prepare(struct intel_engine_cs *engine)
ret = __intel_wait_for_register_fw(uncore, reg, mask, ack,
700, 0, NULL);
if (ret)
- DRM_ERROR("%s reset request timed out: {request: %08x, RESET_CTL: %08x}\n",
- engine->name, request,
- intel_uncore_read_fw(uncore, reg));
+ drm_err(&engine->i915->drm,
+ "%s reset request timed out: {request: %08x, RESET_CTL: %08x}\n",
+ engine->name, request,
+ intel_uncore_read_fw(uncore, reg));
return ret;
}
@@ -781,7 +787,7 @@ static void nop_submit_request(struct i915_request *request)
unsigned long flags;
RQ_TRACE(request, "-EIO\n");
- dma_fence_set_error(&request->fence, -EIO);
+ i915_request_set_error_once(request, -EIO);
spin_lock_irqsave(&engine->active.lock, flags);
__i915_request_submit(request);
@@ -800,13 +806,6 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
if (test_bit(I915_WEDGED, &gt->reset.flags))
return;
- if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(gt)) {
- struct drm_printer p = drm_debug_printer(__func__);
-
- for_each_engine(engine, gt, id)
- intel_engine_dump(engine, &p, "%s\n", engine->name);
- }
-
GT_TRACE(gt, "start\n");
/*
@@ -845,10 +844,30 @@ void intel_gt_set_wedged(struct intel_gt *gt)
{
intel_wakeref_t wakeref;
+ if (test_bit(I915_WEDGED, &gt->reset.flags))
+ return;
+
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
mutex_lock(&gt->reset.mutex);
- with_intel_runtime_pm(gt->uncore->rpm, wakeref)
- __intel_gt_set_wedged(gt);
+
+ if (GEM_SHOW_DEBUG()) {
+ struct drm_printer p = drm_debug_printer(__func__);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ drm_printf(&p, "called from %pS\n", (void *)_RET_IP_);
+ for_each_engine(engine, gt, id) {
+ if (intel_engine_is_idle(engine))
+ continue;
+
+ intel_engine_dump(engine, &p, "%s\n", engine->name);
+ }
+ }
+
+ __intel_gt_set_wedged(gt);
+
mutex_unlock(&gt->reset.mutex);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
}
static bool __intel_gt_unset_wedged(struct intel_gt *gt)
@@ -969,7 +988,7 @@ static int resume(struct intel_gt *gt)
int ret;
for_each_engine(engine, gt, id) {
- ret = engine->resume(engine);
+ ret = intel_engine_resume(engine);
if (ret)
return ret;
}
@@ -1022,7 +1041,7 @@ void intel_gt_reset(struct intel_gt *gt,
if (i915_modparams.reset)
dev_err(gt->i915->drm.dev, "GPU reset not supported\n");
else
- DRM_DEBUG_DRIVER("GPU reset disabled\n");
+ drm_dbg(&gt->i915->drm, "GPU reset disabled\n");
goto error;
}
@@ -1049,8 +1068,9 @@ void intel_gt_reset(struct intel_gt *gt,
*/
ret = intel_gt_init_hw(gt);
if (ret) {
- DRM_ERROR("Failed to initialise HW following reset (%d)\n",
- ret);
+ drm_err(&gt->i915->drm,
+ "Failed to initialise HW following reset (%d)\n",
+ ret);
goto taint;
}
@@ -1126,9 +1146,8 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine);
if (ret) {
/* If we fail here, we expect to fallback to a global reset */
- DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n",
- uses_guc ? "GuC " : "",
- engine->name, ret);
+ drm_dbg(&gt->i915->drm, "%sFailed to reset %s, ret=%d\n",
+ uses_guc ? "GuC " : "", engine->name, ret);
goto out;
}
@@ -1144,7 +1163,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
* have been reset to their default values. Follow the init_ring
* process to program RING_MODE, HWSP and re-enable submission.
*/
- ret = engine->resume(engine);
+ ret = intel_engine_resume(engine);
out:
intel_engine_cancel_stop_cs(engine);
@@ -1165,7 +1184,7 @@ static void intel_gt_reset_global(struct intel_gt *gt,
kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
- DRM_DEBUG_DRIVER("resetting chip\n");
+ drm_dbg(&gt->i915->drm, "resetting chip, engines=%x\n", engine_mask);
kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
/* Use a watchdog to ensure that our reset completes */
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
index 6ff803f397c4..8cda1b7e17ba 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -31,17 +31,15 @@ int intel_ring_pin(struct intel_ring *ring)
if (atomic_fetch_inc(&ring->pin_count))
return 0;
- flags = PIN_GLOBAL;
-
/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
- flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+ flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
if (vma->obj->stolen)
flags |= PIN_MAPPABLE;
else
flags |= PIN_HIGH;
- ret = i915_vma_pin(vma, 0, 0, flags);
+ ret = i915_ggtt_pin(vma, 0, flags);
if (unlikely(ret))
goto err_unpin;
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index bc44fe8e5ffa..fdc3f10e12aa 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -29,11 +29,10 @@
#include <linux/log2.h>
-#include <drm/i915_drm.h>
-
#include "gem/i915_gem_context.h"
#include "gen6_ppgtt.h"
+#include "gen7_renderclear.h"
#include "i915_drv.h"
#include "i915_trace.h"
#include "intel_context.h"
@@ -568,7 +567,8 @@ static void flush_cs_tlb(struct intel_engine_cs *engine)
return;
/* ring should be idle before issuing a sync flush*/
- WARN_ON((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
+ drm_WARN_ON(&dev_priv->drm,
+ (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
ENGINE_WRITE(engine, RING_INSTPM,
_MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
@@ -626,6 +626,27 @@ static bool stop_ring(struct intel_engine_cs *engine)
return (ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) == 0;
}
+static struct i915_address_space *vm_alias(struct i915_address_space *vm)
+{
+ if (i915_is_ggtt(vm))
+ vm = &i915_vm_to_ggtt(vm)->alias->vm;
+
+ return vm;
+}
+
+static void set_pp_dir(struct intel_engine_cs *engine)
+{
+ struct i915_address_space *vm = vm_alias(engine->gt->vm);
+
+ if (vm) {
+ struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+
+ ENGINE_WRITE(engine, RING_PP_DIR_DCLV, PP_DIR_DCLV_2G);
+ ENGINE_WRITE(engine, RING_PP_DIR_BASE,
+ px_base(ppgtt->pd)->ggtt_offset << 10);
+ }
+}
+
static int xcs_resume(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
@@ -684,6 +705,8 @@ static int xcs_resume(struct intel_engine_cs *engine)
GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
intel_ring_update_space(ring);
+ set_pp_dir(engine);
+
/* First wake the ring up to an empty/idle ring */
ENGINE_WRITE(engine, RING_HEAD, ring->head);
ENGINE_WRITE(engine, RING_TAIL, ring->head);
@@ -857,43 +880,6 @@ static int rcs_resume(struct intel_engine_cs *engine)
intel_uncore_write(uncore, ECOSKPD,
_MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE));
- /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
- if (IS_GEN_RANGE(i915, 4, 6))
- intel_uncore_write(uncore, MI_MODE,
- _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
-
- /* We need to disable the AsyncFlip performance optimisations in order
- * to use MI_WAIT_FOR_EVENT within the CS. It should already be
- * programmed to '1' on all products.
- *
- * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
- */
- if (IS_GEN_RANGE(i915, 6, 7))
- intel_uncore_write(uncore, MI_MODE,
- _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
-
- /* Required for the hardware to program scanline values for waiting */
- /* WaEnableFlushTlbInvalidationMode:snb */
- if (IS_GEN(i915, 6))
- intel_uncore_write(uncore, GFX_MODE,
- _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
-
- /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
- if (IS_GEN(i915, 7))
- intel_uncore_write(uncore, GFX_MODE_GEN7,
- _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
- _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
-
- if (IS_GEN(i915, 6)) {
- /* From the Sandybridge PRM, volume 1 part 3, page 24:
- * "If this bit is set, STCunit will have LRA as replacement
- * policy. [...] This bit must be reset. LRA replacement
- * policy is not supported."
- */
- intel_uncore_write(uncore, CACHE_MODE_0,
- _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
- }
-
if (IS_GEN_RANGE(i915, 6, 7))
intel_uncore_write(uncore, INSTPM,
_MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
@@ -910,9 +896,7 @@ static void reset_cancel(struct intel_engine_cs *engine)
/* Mark all submitted requests as skipped. */
list_for_each_entry(request, &engine->active.requests, sched.link) {
- if (!i915_request_signaled(request))
- dma_fence_set_error(&request->fence, -EIO);
-
+ i915_request_set_error_once(request, -EIO);
i915_request_mark_complete(request);
}
@@ -1197,23 +1181,12 @@ static void ring_context_destroy(struct kref *ref)
intel_context_free(ce);
}
-static struct i915_address_space *vm_alias(struct intel_context *ce)
-{
- struct i915_address_space *vm;
-
- vm = ce->vm;
- if (i915_is_ggtt(vm))
- vm = &i915_vm_to_ggtt(vm)->alias->vm;
-
- return vm;
-}
-
static int __context_pin_ppgtt(struct intel_context *ce)
{
struct i915_address_space *vm;
int err = 0;
- vm = vm_alias(ce);
+ vm = vm_alias(ce->vm);
if (vm)
err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)));
@@ -1224,7 +1197,7 @@ static void __context_unpin_ppgtt(struct intel_context *ce)
{
struct i915_address_space *vm;
- vm = vm_alias(ce);
+ vm = vm_alias(ce->vm);
if (vm)
gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm));
}
@@ -1384,7 +1357,9 @@ static int load_pd_dir(struct i915_request *rq,
return rq->engine->emit_flush(rq, EMIT_FLUSH);
}
-static inline int mi_set_context(struct i915_request *rq, u32 flags)
+static inline int mi_set_context(struct i915_request *rq,
+ struct intel_context *ce,
+ u32 flags)
{
struct drm_i915_private *i915 = rq->i915;
struct intel_engine_cs *engine = rq->engine;
@@ -1459,7 +1434,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
*cs++ = MI_NOOP;
*cs++ = MI_SET_CONTEXT;
- *cs++ = i915_ggtt_offset(rq->context->state) | flags;
+ *cs++ = i915_ggtt_offset(ce->state) | flags;
/*
* w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
* WaMiSetContext_Hang:snb,ivb,vlv
@@ -1574,21 +1549,64 @@ static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
return rq->engine->emit_flush(rq, EMIT_INVALIDATE);
}
+static int clear_residuals(struct i915_request *rq)
+{
+ struct intel_engine_cs *engine = rq->engine;
+ int ret;
+
+ ret = switch_mm(rq, vm_alias(engine->kernel_context->vm));
+ if (ret)
+ return ret;
+
+ if (engine->kernel_context->state) {
+ ret = mi_set_context(rq,
+ engine->kernel_context,
+ MI_MM_SPACE_GTT | MI_RESTORE_INHIBIT);
+ if (ret)
+ return ret;
+ }
+
+ ret = engine->emit_bb_start(rq,
+ engine->wa_ctx.vma->node.start, 0,
+ 0);
+ if (ret)
+ return ret;
+
+ ret = engine->emit_flush(rq, EMIT_FLUSH);
+ if (ret)
+ return ret;
+
+ /* Always invalidate before the next switch_mm() */
+ return engine->emit_flush(rq, EMIT_INVALIDATE);
+}
+
static int switch_context(struct i915_request *rq)
{
+ struct intel_engine_cs *engine = rq->engine;
struct intel_context *ce = rq->context;
+ void **residuals = NULL;
int ret;
GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
- ret = switch_mm(rq, vm_alias(ce));
+ if (engine->wa_ctx.vma && ce != engine->kernel_context) {
+ if (engine->wa_ctx.vma->private != ce) {
+ ret = clear_residuals(rq);
+ if (ret)
+ return ret;
+
+ residuals = &engine->wa_ctx.vma->private;
+ }
+ }
+
+ ret = switch_mm(rq, vm_alias(ce->vm));
if (ret)
return ret;
if (ce->state) {
u32 flags;
- GEM_BUG_ON(rq->engine->id != RCS0);
+ GEM_BUG_ON(engine->id != RCS0);
/* For resource streamer on HSW+ and power context elsewhere */
BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN);
@@ -1600,7 +1618,7 @@ static int switch_context(struct i915_request *rq)
else
flags |= MI_RESTORE_INHIBIT;
- ret = mi_set_context(rq, flags);
+ ret = mi_set_context(rq, ce, flags);
if (ret)
return ret;
}
@@ -1609,6 +1627,20 @@ static int switch_context(struct i915_request *rq)
if (ret)
return ret;
+ /*
+ * Now past the point of no return, this request _will_ be emitted.
+ *
+ * Or at least this preamble will be emitted, the request may be
+ * interrupted prior to submitting the user payload. If so, we
+ * still submit the "empty" request in order to preserve global
+ * state tracking such as this, our tracking of the current
+ * dirty context.
+ */
+ if (residuals) {
+ intel_context_put(*residuals);
+ *residuals = intel_context_get(ce);
+ }
+
return 0;
}
@@ -1662,7 +1694,8 @@ static void gen6_bsd_submit_request(struct i915_request *request)
GEN6_BSD_SLEEP_INDICATOR,
0,
1000, 0, NULL))
- DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
+ drm_err(&uncore->i915->drm,
+ "timed out waiting for the BSD ring to wake up\n");
/* Now that the ring is fully powered up, update the tail */
i9xx_submit_request(request);
@@ -1787,11 +1820,16 @@ static void ring_release(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
- WARN_ON(INTEL_GEN(dev_priv) > 2 &&
- (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
+ drm_WARN_ON(&dev_priv->drm, INTEL_GEN(dev_priv) > 2 &&
+ (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
intel_engine_cleanup_common(engine);
+ if (engine->wa_ctx.vma) {
+ intel_context_put(engine->wa_ctx.vma->private);
+ i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
+ }
+
intel_ring_unpin(engine->legacy.ring);
intel_ring_put(engine->legacy.ring);
@@ -1939,6 +1977,64 @@ static void setup_vecs(struct intel_engine_cs *engine)
engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
}
+static int gen7_ctx_switch_bb_setup(struct intel_engine_cs * const engine,
+ struct i915_vma * const vma)
+{
+ return gen7_setup_clear_gpr_bb(engine, vma);
+}
+
+static int gen7_ctx_switch_bb_init(struct intel_engine_cs *engine)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int size;
+ int err;
+
+ size = gen7_ctx_switch_bb_setup(engine, NULL /* probe size */);
+ if (size <= 0)
+ return size;
+
+ size = ALIGN(size, PAGE_SIZE);
+ obj = i915_gem_object_create_internal(engine->i915, size);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ vma = i915_vma_instance(obj, engine->gt->vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_obj;
+ }
+
+ vma->private = intel_context_create(engine); /* dummy residuals */
+ if (IS_ERR(vma->private)) {
+ err = PTR_ERR(vma->private);
+ goto err_obj;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
+ if (err)
+ goto err_private;
+
+ err = i915_vma_sync(vma);
+ if (err)
+ goto err_unpin;
+
+ err = gen7_ctx_switch_bb_setup(engine, vma);
+ if (err)
+ goto err_unpin;
+
+ engine->wa_ctx.vma = vma;
+ return 0;
+
+err_unpin:
+ i915_vma_unpin(vma);
+err_private:
+ intel_context_put(vma->private);
+err_obj:
+ i915_gem_object_put(obj);
+ return err;
+}
+
int intel_ring_submission_setup(struct intel_engine_cs *engine)
{
struct intel_timeline *timeline;
@@ -1992,11 +2088,19 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine)
GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma);
+ if (IS_HASWELL(engine->i915) && engine->class == RENDER_CLASS) {
+ err = gen7_ctx_switch_bb_init(engine);
+ if (err)
+ goto err_ring_unpin;
+ }
+
/* Finally, take ownership and responsibility for cleanup! */
engine->release = ring_release;
return 0;
+err_ring_unpin:
+ intel_ring_unpin(ring);
err_ring:
intel_ring_put(ring);
err_timeline_unpin:
@@ -2007,3 +2111,7 @@ err:
intel_engine_cleanup_common(engine);
return err;
}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_ring_submission.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index d2a3d935d186..cfaf141bac4d 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -4,6 +4,8 @@
* Copyright © 2019 Intel Corporation
*/
+#include <drm/i915_drm.h>
+
#include "i915_drv.h"
#include "intel_gt.h"
#include "intel_gt_irq.h"
@@ -55,7 +57,7 @@ static u32 rps_pm_mask(struct intel_rps *rps, u8 val)
if (val < rps->max_freq_softlimit)
mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
- mask &= rps->pm_events;
+ mask &= READ_ONCE(rps->pm_events);
return rps_pm_sanitize_mask(rps, ~mask);
}
@@ -68,17 +70,19 @@ static void rps_reset_ei(struct intel_rps *rps)
static void rps_enable_interrupts(struct intel_rps *rps)
{
struct intel_gt *gt = rps_to_gt(rps);
+ u32 events;
rps_reset_ei(rps);
if (IS_VALLEYVIEW(gt->i915))
/* WaGsvRC0ResidencyMethod:vlv */
- rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED;
+ events = GEN6_PM_RP_UP_EI_EXPIRED;
else
- rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
- GEN6_PM_RP_DOWN_THRESHOLD |
- GEN6_PM_RP_DOWN_TIMEOUT);
+ events = (GEN6_PM_RP_UP_THRESHOLD |
+ GEN6_PM_RP_DOWN_THRESHOLD |
+ GEN6_PM_RP_DOWN_TIMEOUT);
+ WRITE_ONCE(rps->pm_events, events);
spin_lock_irq(&gt->irq_lock);
gen6_gt_pm_enable_irq(gt, rps->pm_events);
spin_unlock_irq(&gt->irq_lock);
@@ -115,8 +119,7 @@ static void rps_disable_interrupts(struct intel_rps *rps)
{
struct intel_gt *gt = rps_to_gt(rps);
- rps->pm_events = 0;
-
+ WRITE_ONCE(rps->pm_events, 0);
set(gt->uncore, GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u));
spin_lock_irq(&gt->irq_lock);
@@ -642,7 +645,7 @@ void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive)
{
mutex_lock(&rps->power.mutex);
if (interactive) {
- if (!rps->power.interactive++ && rps->active)
+ if (!rps->power.interactive++ && READ_ONCE(rps->active))
rps_set_power(rps, HIGH_POWER);
} else {
GEM_BUG_ON(!rps->power.interactive);
@@ -719,11 +722,15 @@ void intel_rps_unpark(struct intel_rps *rps)
* performance, jump directly to RPe as our starting frequency.
*/
mutex_lock(&rps->lock);
- rps->active = true;
+
+ WRITE_ONCE(rps->active, true);
+
freq = max(rps->cur_freq, rps->efficient_freq),
freq = clamp(freq, rps->min_freq_softlimit, rps->max_freq_softlimit);
intel_rps_set(rps, freq);
+
rps->last_adj = 0;
+
mutex_unlock(&rps->lock);
if (INTEL_GEN(rps_to_i915(rps)) >= 6)
@@ -743,7 +750,7 @@ void intel_rps_park(struct intel_rps *rps)
if (INTEL_GEN(i915) >= 6)
rps_disable_interrupts(rps);
- rps->active = false;
+ WRITE_ONCE(rps->active, false);
if (rps->last_freq <= rps->idle_freq)
return;
@@ -763,14 +770,27 @@ void intel_rps_park(struct intel_rps *rps)
intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA);
rps_set(rps, rps->idle_freq, false);
intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA);
+
+ /*
+ * Since we will try and restart from the previously requested
+ * frequency on unparking, treat this idle point as a downclock
+ * interrupt and reduce the frequency for resume. If we park/unpark
+ * more frequently than the rps worker can run, we will not respond
+ * to any EI and never see a change in frequency.
+ *
+ * (Note we accommodate Cherryview's limitation of only using an
+ * even bin by applying it to all.)
+ */
+ rps->cur_freq =
+ max_t(int, round_down(rps->cur_freq - 1, 2), rps->min_freq);
}
void intel_rps_boost(struct i915_request *rq)
{
- struct intel_rps *rps = &rq->engine->gt->rps;
+ struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps;
unsigned long flags;
- if (i915_request_signaled(rq) || !rps->active)
+ if (i915_request_signaled(rq) || !READ_ONCE(rps->active))
return;
/* Serializes with i915_request_retire() */
@@ -1026,7 +1046,8 @@ static bool chv_rps_enable(struct intel_rps *rps)
vlv_punit_put(i915);
/* RPS code assumes GPLL is used */
- WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
+ drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
+ "GPLL not enabled\n");
DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
@@ -1123,7 +1144,8 @@ static bool vlv_rps_enable(struct intel_rps *rps)
vlv_punit_put(i915);
/* RPS code assumes GPLL is used */
- WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
+ drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
+ "GPLL not enabled\n");
DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
@@ -1191,11 +1213,11 @@ void intel_rps_enable(struct intel_rps *rps)
if (!rps->enabled)
return;
- WARN_ON(rps->max_freq < rps->min_freq);
- WARN_ON(rps->idle_freq > rps->max_freq);
+ drm_WARN_ON(&i915->drm, rps->max_freq < rps->min_freq);
+ drm_WARN_ON(&i915->drm, rps->idle_freq > rps->max_freq);
- WARN_ON(rps->efficient_freq < rps->min_freq);
- WARN_ON(rps->efficient_freq > rps->max_freq);
+ drm_WARN_ON(&i915->drm, rps->efficient_freq < rps->min_freq);
+ drm_WARN_ON(&i915->drm, rps->efficient_freq > rps->max_freq);
}
static void gen6_rps_disable(struct intel_rps *rps)
@@ -1390,9 +1412,9 @@ static void chv_rps_init(struct intel_rps *rps)
BIT(VLV_IOSF_SB_NC) |
BIT(VLV_IOSF_SB_CCK));
- WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
- rps->min_freq) & 1,
- "Odd GPU freq values\n");
+ drm_WARN_ONCE(&i915->drm, (rps->max_freq | rps->efficient_freq |
+ rps->rp1_freq | rps->min_freq) & 1,
+ "Odd GPU freq values\n");
}
static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei)
@@ -1451,12 +1473,12 @@ static void rps_work(struct work_struct *work)
u32 pm_iir = 0;
spin_lock_irq(&gt->irq_lock);
- pm_iir = fetch_and_zero(&rps->pm_iir);
+ pm_iir = fetch_and_zero(&rps->pm_iir) & READ_ONCE(rps->pm_events);
client_boost = atomic_read(&rps->num_waiters);
spin_unlock_irq(&gt->irq_lock);
/* Make sure we didn't queue anything we're not going to process. */
- if ((pm_iir & rps->pm_events) == 0 && !client_boost)
+ if (!pm_iir && !client_boost)
goto out;
mutex_lock(&rps->lock);
@@ -1552,11 +1574,15 @@ void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
{
struct intel_gt *gt = rps_to_gt(rps);
+ u32 events;
- if (pm_iir & rps->pm_events) {
+ events = pm_iir & READ_ONCE(rps->pm_events);
+ if (events) {
spin_lock(&gt->irq_lock);
- gen6_gt_pm_mask_irq(gt, pm_iir & rps->pm_events);
- rps->pm_iir |= pm_iir & rps->pm_events;
+
+ gen6_gt_pm_mask_irq(gt, events);
+ rps->pm_iir |= events;
+
schedule_work(&rps->work);
spin_unlock(&gt->irq_lock);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index d8d9f1179c2b..91debbc97c9a 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -312,7 +312,7 @@ int intel_timeline_pin(struct intel_timeline *tl)
if (atomic_add_unless(&tl->pin_count, 1, 0))
return 0;
- err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ err = i915_ggtt_pin(tl->hwsp_ggtt, 0, PIN_HIGH);
if (err)
return err;
@@ -410,6 +410,8 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
void *vaddr;
int err;
+ might_lock(&tl->gt->ggtt->vm.mutex);
+
/*
* If there is an outstanding GPU reference to this cacheline,
* such as it being sampled by a HW semaphore on another timeline,
@@ -435,7 +437,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
goto err_rollback;
}
- err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ err = i915_ggtt_pin(vma, 0, PIN_HIGH);
if (err) {
__idle_hwsp_free(vma->private, cacheline);
goto err_rollback;
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 6c2f8462e0f3..5176ad1a3976 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -116,17 +116,17 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
} else {
wa_ = &wal->list[mid];
- if ((wa->mask & ~wa_->mask) == 0) {
- DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
+ if ((wa->clr | wa_->clr) && !(wa->clr & ~wa_->clr)) {
+ DRM_ERROR("Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n",
i915_mmio_reg_offset(wa_->reg),
- wa_->mask, wa_->val);
+ wa_->clr, wa_->set);
- wa_->val &= ~wa->mask;
+ wa_->set &= ~wa->clr;
}
wal->wa_count++;
- wa_->val |= wa->val;
- wa_->mask |= wa->mask;
+ wa_->set |= wa->set;
+ wa_->clr |= wa->clr;
wa_->read |= wa->read;
return;
}
@@ -147,13 +147,13 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
}
}
-static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
- u32 val, u32 read_mask)
+static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
+ u32 clear, u32 set, u32 read_mask)
{
struct i915_wa wa = {
.reg = reg,
- .mask = mask,
- .val = val,
+ .clr = clear,
+ .set = set,
.read = read_mask,
};
@@ -161,38 +161,43 @@ static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
}
static void
-wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
- u32 val)
+wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
{
- wa_add(wal, reg, mask, val, mask);
+ wa_add(wal, reg, clear, set, clear);
}
static void
-wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
+wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
{
- wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
+ wa_write_masked_or(wal, reg, ~0, set);
}
static void
-wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
+wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
{
- wa_write_masked_or(wal, reg, ~0, val);
+ wa_write_masked_or(wal, reg, set, set);
}
static void
-wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
+wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
{
- wa_write_masked_or(wal, reg, val, val);
+ wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val);
+}
+
+static void
+wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
+{
+ wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val);
}
#define WA_SET_BIT_MASKED(addr, mask) \
- wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
+ wa_masked_en(wal, (addr), (mask))
#define WA_CLR_BIT_MASKED(addr, mask) \
- wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask))
+ wa_masked_dis(wal, (addr), (mask))
#define WA_SET_FIELD_MASKED(addr, mask, value) \
- wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value)))
+ wa_write_masked_or(wal, (addr), 0, _MASKED_FIELD((mask), (value)))
static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
@@ -570,12 +575,29 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
/* allow headerless messages for preemptible GPGPU context */
WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE,
GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
+
+ /* Wa_1604278689:icl,ehl */
+ wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
+ wa_write_masked_or(wal, IVB_FBC_RT_BASE_UPPER,
+ 0, /* write-only register; skip validation */
+ 0xFFFFFFFF);
+
+ /* Wa_1406306137:icl,ehl */
+ wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
}
static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
- /* Wa_1409142259:tgl */
+ /*
+ * Wa_1409142259:tgl
+ * Wa_1409347922:tgl
+ * Wa_1409252684:tgl
+ * Wa_1409217633:tgl
+ * Wa_1409207793:tgl
+ * Wa_1409178076:tgl
+ * Wa_1408979724:tgl
+ */
WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
@@ -588,6 +610,11 @@ static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
*/
wa_add(wal, FF_MODE2, FF_MODE2_TDS_TIMER_MASK,
FF_MODE2_TDS_TIMER_128, 0);
+
+ /* WaDisableGPGPUMidThreadPreemption:tgl */
+ WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
+ GEN9_PREEMPT_GPGPU_LEVEL_MASK,
+ GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
}
static void
@@ -657,7 +684,7 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
*cs++ = MI_LOAD_REGISTER_IMM(wal->count);
for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
*cs++ = i915_mmio_reg_offset(wa->reg);
- *cs++ = wa->val;
+ *cs++ = wa->set;
}
*cs++ = MI_NOOP;
@@ -822,7 +849,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
DRM_WARN("No common index found between subslice mask %x and L3 bank mask %x!\n",
intel_sseu_get_subslices(sseu, slice), l3_en);
subslice = fls(l3_en);
- WARN_ON(!subslice);
+ drm_WARN_ON(&i915->drm, !subslice);
}
subslice--;
@@ -893,11 +920,6 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
SLICE_UNIT_LEVEL_CLKGATE,
MSCUNIT_CLKGATE_DIS);
- /* Wa_1406680159:icl */
- wa_write_or(wal,
- SUBSLICE_UNIT_LEVEL_CLKGATE,
- GWUNIT_CLKGATE_DIS);
-
/* Wa_1406838659:icl (pre-prod) */
if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
wa_write_or(wal,
@@ -926,7 +948,7 @@ tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
SUBSLICE_UNIT_LEVEL_CLKGATE2,
CPSSUNIT_CLKGATE_DIS);
- /* Wa_1409180338:tgl */
+ /* Wa_1607087056:tgl also know as BUG:1409180338 */
if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
wa_write_or(wal,
SLICE_UNIT_LEVEL_CLKGATE,
@@ -986,11 +1008,10 @@ wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
static bool
wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
{
- if ((cur ^ wa->val) & wa->read) {
- DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
+ if ((cur ^ wa->set) & wa->read) {
+ DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x)\n",
name, from, i915_mmio_reg_offset(wa->reg),
- cur, cur & wa->read,
- wa->val, wa->mask);
+ cur, cur & wa->read, wa->set);
return false;
}
@@ -1015,7 +1036,10 @@ wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
intel_uncore_forcewake_get__locked(uncore, fw);
for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
- intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val);
+ if (wa->clr)
+ intel_uncore_rmw_fw(uncore, wa->reg, wa->clr, wa->set);
+ else
+ intel_uncore_write_fw(uncore, wa->reg, wa->set);
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
wa_verify(wa,
intel_uncore_read_fw(uncore, wa->reg),
@@ -1239,6 +1263,7 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine)
case RENDER_CLASS:
/*
* WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
+ * Wa_1408556865:tgl
*
* This covers 4 registers which are next to one another :
* - PS_INVOCATION_COUNT
@@ -1249,6 +1274,12 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine)
whitelist_reg_ext(w, PS_INVOCATION_COUNT,
RING_FORCE_TO_NONPRIV_ACCESS_RD |
RING_FORCE_TO_NONPRIV_RANGE_4);
+
+ /* Wa_1808121037:tgl */
+ whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
+
+ /* Wa_1806527549:tgl */
+ whitelist_reg(w, HIZ_CHICKEN);
break;
default:
break;
@@ -1315,19 +1346,21 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
struct drm_i915_private *i915 = engine->i915;
if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) {
- /* Wa_1606700617:tgl */
- wa_masked_en(wal,
- GEN9_CS_DEBUG_MODE1,
- FF_DOP_CLOCK_GATE_DISABLE);
-
- /* Wa_1607138336:tgl */
+ /*
+ * Wa_1607138336:tgl
+ * Wa_1607063988:tgl
+ */
wa_write_or(wal,
GEN9_CTX_PREEMPT_REG,
GEN12_DISABLE_POSH_BUSY_FF_DOP_CG);
- /* Wa_1607030317:tgl */
- /* Wa_1607186500:tgl */
- /* Wa_1607297627:tgl */
+ /*
+ * Wa_1607030317:tgl
+ * Wa_1607186500:tgl
+ * Wa_1607297627:tgl there is 3 entries for this WA on BSpec, 2
+ * of then says it is fixed on B0 the other one says it is
+ * permanent
+ */
wa_masked_en(wal,
GEN6_RC_SLEEP_PSMI_CONTROL,
GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
@@ -1340,6 +1373,35 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
wa_write_or(wal,
GEN7_SARCHKMD,
GEN7_DISABLE_SAMPLER_PREFETCH);
+
+ /* Wa_1407928979:tgl */
+ wa_write_or(wal,
+ GEN7_FF_THREAD_MODE,
+ GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
+
+ /*
+ * Wa_1409085225:tgl
+ * Wa_14010229206:tgl
+ */
+ wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
+
+ /* Wa_1408615072:tgl */
+ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
+ VSUNIT_CLKGATE_DIS_TGL);
+ }
+
+ if (IS_TIGERLAKE(i915)) {
+ /* Wa_1606931601:tgl */
+ wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
+
+ /* Wa_1409804808:tgl */
+ wa_masked_en(wal, GEN7_ROW_CHICKEN2,
+ GEN12_PUSH_CONST_DEREF_HOLD_DIS);
+
+ /* Wa_1606700617:tgl */
+ wa_masked_en(wal,
+ GEN9_CS_DEBUG_MODE1,
+ FF_DOP_CLOCK_GATE_DISABLE);
}
if (IS_GEN(i915, 11)) {
@@ -1405,10 +1467,38 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN11_SCRATCH2,
GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
0);
+
+ /* WaEnable32PlaneMode:icl */
+ wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
+ GEN11_ENABLE_32_PLANE_MODE);
+
+ /*
+ * Wa_1408615072:icl,ehl (vsunit)
+ * Wa_1407596294:icl,ehl (hsunit)
+ */
+ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
+ VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
+
+ /* Wa_1407352427:icl,ehl */
+ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
+ PSDUNIT_CLKGATE_DIS);
+
+ /* Wa_1406680159:icl,ehl */
+ wa_write_or(wal,
+ SUBSLICE_UNIT_LEVEL_CLKGATE,
+ GWUNIT_CLKGATE_DIS);
+
+ /*
+ * Wa_1408767742:icl[a2..forever],ehl[all]
+ * Wa_1605460711:icl[a0..c0]
+ */
+ wa_write_or(wal,
+ GEN7_FF_THREAD_MODE,
+ GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
}
- if (IS_GEN_RANGE(i915, 9, 11)) {
- /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */
+ if (IS_GEN_RANGE(i915, 9, 12)) {
+ /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */
wa_masked_en(wal,
GEN7_FF_SLICE_CS_CHICKEN1,
GEN9_FFSC_PERCTX_PREEMPT_CTRL);
@@ -1452,6 +1542,52 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN8_L3SQCREG4,
GEN8_LQSC_FLUSH_COHERENT_LINES);
}
+
+ if (IS_GEN(i915, 7))
+ /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
+ wa_masked_en(wal,
+ GFX_MODE_GEN7,
+ GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE);
+
+ if (IS_GEN_RANGE(i915, 6, 7))
+ /*
+ * We need to disable the AsyncFlip performance optimisations in
+ * order to use MI_WAIT_FOR_EVENT within the CS. It should
+ * already be programmed to '1' on all products.
+ *
+ * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
+ */
+ wa_masked_en(wal,
+ MI_MODE,
+ ASYNC_FLIP_PERF_DISABLE);
+
+ if (IS_GEN(i915, 6)) {
+ /*
+ * Required for the hardware to program scanline values for
+ * waiting
+ * WaEnableFlushTlbInvalidationMode:snb
+ */
+ wa_masked_en(wal,
+ GFX_MODE,
+ GFX_TLB_INVALIDATE_EXPLICIT);
+
+ /*
+ * From the Sandybridge PRM, volume 1 part 3, page 24:
+ * "If this bit is set, STCunit will have LRA as replacement
+ * policy. [...] This bit must be reset. LRA replacement
+ * policy is not supported."
+ */
+ wa_masked_dis(wal,
+ CACHE_MODE_0,
+ CM0_STC_EVICT_DISABLE_LRA_SNB);
+ }
+
+ if (IS_GEN_RANGE(i915, 4, 6))
+ /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
+ wa_add(wal, MI_MODE,
+ 0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH),
+ /* XXX bit doesn't stick on Broadwater */
+ IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH);
}
static void
@@ -1470,7 +1606,7 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
static void
engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
- if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8))
+ if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 4))
return;
if (engine->class == RENDER_CLASS)
@@ -1483,7 +1619,7 @@ void intel_engine_init_workarounds(struct intel_engine_cs *engine)
{
struct i915_wa_list *wal = &engine->wa_list;
- if (INTEL_GEN(engine->i915) < 8)
+ if (INTEL_GEN(engine->i915) < 4)
return;
wa_init_start(wal, "engine", engine->name);
@@ -1626,6 +1762,16 @@ static int engine_wa_list_verify(struct intel_context *ce,
goto err_vma;
}
+ i915_vma_lock(vma);
+ err = i915_request_await_object(rq, vma->obj, true);
+ if (err == 0)
+ err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+ i915_vma_unlock(vma);
+ if (err) {
+ i915_request_add(rq);
+ goto err_vma;
+ }
+
err = wa_list_srm(rq, wal, vma);
if (err)
goto err_vma;
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
index e27ab1b710b3..d166a7145720 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
@@ -13,8 +13,8 @@
struct i915_wa {
i915_reg_t reg;
- u32 mask;
- u32 val;
+ u32 clr;
+ u32 set;
u32 read;
};
diff --git a/drivers/gpu/drm/i915/gt/ivb_clear_kernel.c b/drivers/gpu/drm/i915/gt/ivb_clear_kernel.c
new file mode 100644
index 000000000000..610ca7687735
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/ivb_clear_kernel.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ *
+ * Generated by: IGT Gpu Tools on Fri 21 Feb 2020 05:29:32 AM UTC
+ */
+
+static const u32 ivb_clear_kernel[] = {
+ 0x00000001, 0x26020128, 0x00000024, 0x00000000,
+ 0x00000040, 0x20280c21, 0x00000028, 0x00000001,
+ 0x01000010, 0x20000c20, 0x0000002c, 0x00000000,
+ 0x00010220, 0x34001c00, 0x00001400, 0x0000002c,
+ 0x00600001, 0x20600061, 0x00000000, 0x00000000,
+ 0x00000008, 0x20601c85, 0x00000e00, 0x0000000c,
+ 0x00000005, 0x20601ca5, 0x00000060, 0x00000001,
+ 0x00000008, 0x20641c85, 0x00000e00, 0x0000000d,
+ 0x00000005, 0x20641ca5, 0x00000064, 0x00000003,
+ 0x00000041, 0x207424a5, 0x00000064, 0x00000034,
+ 0x00000040, 0x206014a5, 0x00000060, 0x00000074,
+ 0x00000008, 0x20681c85, 0x00000e00, 0x00000008,
+ 0x00000005, 0x20681ca5, 0x00000068, 0x0000000f,
+ 0x00000041, 0x20701ca5, 0x00000060, 0x00000010,
+ 0x00000040, 0x206814a5, 0x00000068, 0x00000070,
+ 0x00600001, 0x20a00061, 0x00000000, 0x00000000,
+ 0x00000005, 0x206c1c85, 0x00000e00, 0x00000007,
+ 0x00000041, 0x206c1ca5, 0x0000006c, 0x00000004,
+ 0x00600001, 0x20800021, 0x008d0000, 0x00000000,
+ 0x00000001, 0x20800021, 0x0000006c, 0x00000000,
+ 0x00000001, 0x20840021, 0x00000068, 0x00000000,
+ 0x00000001, 0x20880061, 0x00000000, 0x00000003,
+ 0x00000005, 0x208c0d21, 0x00000086, 0xffffffff,
+ 0x05600032, 0x20a00fa1, 0x008d0080, 0x02190001,
+ 0x00000040, 0x20a01ca5, 0x000000a0, 0x00000001,
+ 0x05600032, 0x20a00fa1, 0x008d0080, 0x040a8001,
+ 0x02000040, 0x20281c21, 0x00000028, 0xffffffff,
+ 0x00010220, 0x34001c00, 0x00001400, 0xfffffffc,
+ 0x00000001, 0x26020128, 0x00000024, 0x00000000,
+ 0x00000001, 0x220010e4, 0x00000000, 0x00000000,
+ 0x00000001, 0x220831ec, 0x00000000, 0x007f007f,
+ 0x00600001, 0x20400021, 0x008d0000, 0x00000000,
+ 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000,
+ 0x00200001, 0x20400121, 0x00450020, 0x00000000,
+ 0x00000001, 0x20480061, 0x00000000, 0x000f000f,
+ 0x00000005, 0x204c0d21, 0x00000046, 0xffffffef,
+ 0x00800001, 0x20600061, 0x00000000, 0x00000000,
+ 0x00800001, 0x20800061, 0x00000000, 0x00000000,
+ 0x00800001, 0x20a00061, 0x00000000, 0x00000000,
+ 0x00800001, 0x20c00061, 0x00000000, 0x00000000,
+ 0x00800001, 0x20e00061, 0x00000000, 0x00000000,
+ 0x00800001, 0x21000061, 0x00000000, 0x00000000,
+ 0x00800001, 0x21200061, 0x00000000, 0x00000000,
+ 0x00800001, 0x21400061, 0x00000000, 0x00000000,
+ 0x05600032, 0x20000fa0, 0x008d0040, 0x120a8000,
+ 0x00000040, 0x20402d21, 0x00000020, 0x00100010,
+ 0x05600032, 0x20000fa0, 0x008d0040, 0x120a8000,
+ 0x02000040, 0x22083d8c, 0x00000208, 0xffffffff,
+ 0x00800001, 0xa0000109, 0x00000602, 0x00000000,
+ 0x00000040, 0x22001c84, 0x00000200, 0x00000020,
+ 0x00010220, 0x34001c00, 0x00001400, 0xfffffff8,
+ 0x07600032, 0x20000fa0, 0x008d0fe0, 0x82000010,
+};
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index f2806381733f..4a53ded7c2dd 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -65,6 +65,9 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
return NULL;
}
i915_active_init(&ring->vma->active, NULL, NULL);
+ __set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(ring->vma));
+ __set_bit(DRM_MM_NODE_ALLOCATED_BIT, &ring->vma->node.flags);
+ ring->vma->node.size = sz;
intel_ring_update_space(ring);
@@ -241,9 +244,7 @@ static void mock_reset_cancel(struct intel_engine_cs *engine)
/* Mark all submitted requests as skipped. */
list_for_each_entry(request, &engine->active.requests, sched.link) {
- if (!i915_request_signaled(request))
- dma_fence_set_error(&request->fence, -EIO);
-
+ i915_request_set_error_once(request, -EIO);
i915_request_mark_complete(request);
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
index 43d4d589749f..697114dd1f47 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -142,6 +142,24 @@ out:
return err;
}
+static void engine_heartbeat_disable(struct intel_engine_cs *engine,
+ unsigned long *saved)
+{
+ *saved = engine->props.heartbeat_interval_ms;
+ engine->props.heartbeat_interval_ms = 0;
+
+ intel_engine_pm_get(engine);
+ intel_engine_park_heartbeat(engine);
+}
+
+static void engine_heartbeat_enable(struct intel_engine_cs *engine,
+ unsigned long saved)
+{
+ intel_engine_pm_put(engine);
+
+ engine->props.heartbeat_interval_ms = saved;
+}
+
static int live_idle_flush(void *arg)
{
struct intel_gt *gt = arg;
@@ -152,9 +170,11 @@ static int live_idle_flush(void *arg)
/* Check that we can flush the idle barriers */
for_each_engine(engine, gt, id) {
- intel_engine_pm_get(engine);
+ unsigned long heartbeat;
+
+ engine_heartbeat_disable(engine, &heartbeat);
err = __live_idle_pulse(engine, intel_engine_flush_barriers);
- intel_engine_pm_put(engine);
+ engine_heartbeat_enable(engine, heartbeat);
if (err)
break;
}
@@ -172,9 +192,11 @@ static int live_idle_pulse(void *arg)
/* Check that heartbeat pulses flush the idle barriers */
for_each_engine(engine, gt, id) {
- intel_engine_pm_get(engine);
+ unsigned long heartbeat;
+
+ engine_heartbeat_disable(engine, &heartbeat);
err = __live_idle_pulse(engine, intel_engine_pulse);
- intel_engine_pm_put(engine);
+ engine_heartbeat_enable(engine, heartbeat);
if (err && err != -ENODEV)
break;
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 3e5e6c86e843..2b2efff6e19d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -268,7 +268,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
cancel_rq:
if (err) {
- i915_request_skip(rq, err);
+ i915_request_set_error_once(rq, err);
i915_request_add(rq);
}
unpin_hws:
@@ -1640,7 +1640,7 @@ static int igt_reset_engines_atomic(void *arg)
if (!intel_has_reset_engine(gt))
return 0;
- if (USES_GUC_SUBMISSION(gt->i915))
+ if (intel_uc_uses_guc_submission(&gt->uc))
return 0;
igt_global_reset_lock(gt);
diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.c b/drivers/gpu/drm/i915/gt/selftest_llc.c
index fd3770e48ac7..a912159693fd 100644
--- a/drivers/gpu/drm/i915/gt/selftest_llc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_llc.c
@@ -18,10 +18,8 @@ static int gen6_verify_ring_freq(struct intel_llc *llc)
wakeref = intel_runtime_pm_get(llc_to_gt(llc)->uncore->rpm);
- if (!get_ia_constants(llc, &consts)) {
- err = -ENODEV;
+ if (!get_ia_constants(llc, &consts))
goto out_rpm;
- }
for (gpu_freq = consts.min_gpu_freq;
gpu_freq <= consts.max_gpu_freq;
@@ -71,10 +69,5 @@ out_rpm:
int st_llc_verify(struct intel_llc *llc)
{
- int err = 0;
-
- if (HAS_LLC(llc_to_gt(llc)->i915))
- err = gen6_verify_ring_freq(llc);
-
- return err;
+ return gen6_verify_ring_freq(llc);
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index b292f8cbd0bf..6f06ba750a0a 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -68,6 +68,71 @@ static void engine_heartbeat_enable(struct intel_engine_cs *engine,
engine->props.heartbeat_interval_ms = saved;
}
+static int wait_for_submit(struct intel_engine_cs *engine,
+ struct i915_request *rq,
+ unsigned long timeout)
+{
+ timeout += jiffies;
+ do {
+ cond_resched();
+ intel_engine_flush_submission(engine);
+
+ if (READ_ONCE(engine->execlists.pending[0]))
+ continue;
+
+ if (i915_request_is_active(rq))
+ return 0;
+
+ if (i915_request_started(rq)) /* that was quick! */
+ return 0;
+ } while (time_before(jiffies, timeout));
+
+ return -ETIME;
+}
+
+static int wait_for_reset(struct intel_engine_cs *engine,
+ struct i915_request *rq,
+ unsigned long timeout)
+{
+ timeout += jiffies;
+
+ do {
+ cond_resched();
+ intel_engine_flush_submission(engine);
+
+ if (READ_ONCE(engine->execlists.pending[0]))
+ continue;
+
+ if (i915_request_completed(rq))
+ break;
+
+ if (READ_ONCE(rq->fence.error))
+ break;
+ } while (time_before(jiffies, timeout));
+
+ flush_scheduled_work();
+
+ if (rq->fence.error != -EIO) {
+ pr_err("%s: hanging request %llx:%lld not reset\n",
+ engine->name,
+ rq->fence.context,
+ rq->fence.seqno);
+ return -EINVAL;
+ }
+
+ /* Give the request a jiffie to complete after flushing the worker */
+ if (i915_request_wait(rq, 0,
+ max(0l, (long)(timeout - jiffies)) + 1) < 0) {
+ pr_err("%s: hanging request %llx:%lld did not complete\n",
+ engine->name,
+ rq->fence.context,
+ rq->fence.seqno);
+ return -ETIME;
+ }
+
+ return 0;
+}
+
static int live_sanitycheck(void *arg)
{
struct intel_gt *gt = arg;
@@ -285,6 +350,84 @@ static int live_unlite_preempt(void *arg)
return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
}
+static int live_pin_rewind(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * We have to be careful not to trust intel_ring too much, for example
+ * ring->head is updated upon retire which is out of sync with pinning
+ * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
+ * or else we risk writing an older, stale value.
+ *
+ * To simulate this, let's apply a bit of deliberate sabotague.
+ */
+
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce;
+ struct i915_request *rq;
+ struct intel_ring *ring;
+ struct igt_live_test t;
+
+ if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
+ err = -EIO;
+ break;
+ }
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ break;
+ }
+
+ err = intel_context_pin(ce);
+ if (err) {
+ intel_context_put(ce);
+ break;
+ }
+
+ /* Keep the context awake while we play games */
+ err = i915_active_acquire(&ce->active);
+ if (err) {
+ intel_context_unpin(ce);
+ intel_context_put(ce);
+ break;
+ }
+ ring = ce->ring;
+
+ /* Poison the ring, and offset the next request from HEAD */
+ memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
+ ring->emit = ring->size / 2;
+ ring->tail = ring->emit;
+ GEM_BUG_ON(ring->head);
+
+ intel_context_unpin(ce);
+
+ /* Submit a simple nop request */
+ GEM_BUG_ON(intel_context_is_pinned(ce));
+ rq = intel_context_create_request(ce);
+ i915_active_release(&ce->active); /* e.g. async retire */
+ intel_context_put(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ break;
+ }
+ GEM_BUG_ON(!rq->head);
+ i915_request_add(rq);
+
+ /* Expect not to hang! */
+ if (igt_live_test_end(&t)) {
+ err = -EIO;
+ break;
+ }
+ }
+
+ return err;
+}
+
static int live_hold_reset(void *arg)
{
struct intel_gt *gt = arg;
@@ -386,6 +529,152 @@ out:
return err;
}
+static const char *error_repr(int err)
+{
+ return err ? "bad" : "good";
+}
+
+static int live_error_interrupt(void *arg)
+{
+ static const struct error_phase {
+ enum { GOOD = 0, BAD = -EIO } error[2];
+ } phases[] = {
+ { { BAD, GOOD } },
+ { { BAD, BAD } },
+ { { BAD, GOOD } },
+ { { GOOD, GOOD } }, /* sentinel */
+ };
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ /*
+ * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
+ * of invalid commands in user batches that will cause a GPU hang.
+ * This is a faster mechanism than using hangcheck/heartbeats, but
+ * only detects problems the HW knows about -- it will not warn when
+ * we kill the HW!
+ *
+ * To verify our detection and reset, we throw some invalid commands
+ * at the HW and wait for the interrupt.
+ */
+
+ if (!intel_has_reset_engine(gt))
+ return 0;
+
+ for_each_engine(engine, gt, id) {
+ const struct error_phase *p;
+ unsigned long heartbeat;
+ int err = 0;
+
+ engine_heartbeat_disable(engine, &heartbeat);
+
+ for (p = phases; p->error[0] != GOOD; p++) {
+ struct i915_request *client[ARRAY_SIZE(phases->error)];
+ u32 *cs;
+ int i;
+
+ memset(client, 0, sizeof(*client));
+ for (i = 0; i < ARRAY_SIZE(client); i++) {
+ struct intel_context *ce;
+ struct i915_request *rq;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto out;
+ }
+
+ rq = intel_context_create_request(ce);
+ intel_context_put(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out;
+ }
+
+ if (rq->engine->emit_init_breadcrumb) {
+ err = rq->engine->emit_init_breadcrumb(rq);
+ if (err) {
+ i915_request_add(rq);
+ goto out;
+ }
+ }
+
+ cs = intel_ring_begin(rq, 2);
+ if (IS_ERR(cs)) {
+ i915_request_add(rq);
+ err = PTR_ERR(cs);
+ goto out;
+ }
+
+ if (p->error[i]) {
+ *cs++ = 0xdeadbeef;
+ *cs++ = 0xdeadbeef;
+ } else {
+ *cs++ = MI_NOOP;
+ *cs++ = MI_NOOP;
+ }
+
+ client[i] = i915_request_get(rq);
+ i915_request_add(rq);
+ }
+
+ err = wait_for_submit(engine, client[0], HZ / 2);
+ if (err) {
+ pr_err("%s: first request did not start within time!\n",
+ engine->name);
+ err = -ETIME;
+ goto out;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(client); i++) {
+ if (i915_request_wait(client[i], 0, HZ / 5) < 0)
+ pr_debug("%s: %s request incomplete!\n",
+ engine->name,
+ error_repr(p->error[i]));
+
+ if (!i915_request_started(client[i])) {
+ pr_debug("%s: %s request not stated!\n",
+ engine->name,
+ error_repr(p->error[i]));
+ err = -ETIME;
+ goto out;
+ }
+
+ /* Kick the tasklet to process the error */
+ intel_engine_flush_submission(engine);
+ if (client[i]->fence.error != p->error[i]) {
+ pr_err("%s: %s request completed with wrong error code: %d\n",
+ engine->name,
+ error_repr(p->error[i]),
+ client[i]->fence.error);
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+out:
+ for (i = 0; i < ARRAY_SIZE(client); i++)
+ if (client[i])
+ i915_request_put(client[i]);
+ if (err) {
+ pr_err("%s: failed at phase[%zd] { %d, %d }\n",
+ engine->name, p - phases,
+ p->error[0], p->error[1]);
+ break;
+ }
+ }
+
+ engine_heartbeat_enable(engine, heartbeat);
+ if (err) {
+ intel_gt_set_wedged(gt);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
static int
emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
{
@@ -580,6 +869,10 @@ static int live_timeslice_preempt(void *arg)
if (err)
goto err_map;
+ err = i915_vma_sync(vma);
+ if (err)
+ goto err_pin;
+
for_each_prime_number_from(count, 1, 16) {
struct intel_engine_cs *engine;
enum intel_engine_id id;
@@ -614,33 +907,227 @@ err_obj:
return err;
}
-static struct i915_request *nop_request(struct intel_engine_cs *engine)
+static struct i915_request *
+create_rewinder(struct intel_context *ce,
+ struct i915_request *wait,
+ void *slot, int idx)
{
+ const u32 offset =
+ i915_ggtt_offset(ce->engine->status_page.vma) +
+ offset_in_page(slot);
struct i915_request *rq;
+ u32 *cs;
+ int err;
- rq = intel_engine_create_kernel_request(engine);
+ rq = intel_context_create_request(ce);
if (IS_ERR(rq))
return rq;
+ if (wait) {
+ err = i915_request_await_dma_fence(rq, &wait->fence);
+ if (err)
+ goto err;
+ }
+
+ cs = intel_ring_begin(rq, 10);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err;
+ }
+
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ *cs++ = MI_NOOP;
+
+ *cs++ = MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_NEQ_SDD;
+ *cs++ = 0;
+ *cs++ = offset;
+ *cs++ = 0;
+
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+ *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
+ *cs++ = offset + idx * sizeof(u32);
+ *cs++ = 0;
+
+ intel_ring_advance(rq, cs);
+
+ rq->sched.attr.priority = I915_PRIORITY_MASK;
+ err = 0;
+err:
i915_request_get(rq);
i915_request_add(rq);
+ if (err) {
+ i915_request_put(rq);
+ return ERR_PTR(err);
+ }
return rq;
}
-static int wait_for_submit(struct intel_engine_cs *engine,
- struct i915_request *rq,
- unsigned long timeout)
+static int live_timeslice_rewind(void *arg)
{
- timeout += jiffies;
- do {
- cond_resched();
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ /*
+ * The usual presumption on timeslice expiration is that we replace
+ * the active context with another. However, given a chain of
+ * dependencies we may end up with replacing the context with itself,
+ * but only a few of those requests, forcing us to rewind the
+ * RING_TAIL of the original request.
+ */
+ if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ return 0;
+
+ for_each_engine(engine, gt, id) {
+ enum { A1, A2, B1 };
+ enum { X = 1, Y, Z };
+ struct i915_request *rq[3] = {};
+ struct intel_context *ce;
+ unsigned long heartbeat;
+ unsigned long timeslice;
+ int i, err = 0;
+ u32 *slot;
+
+ if (!intel_engine_has_timeslices(engine))
+ continue;
+
+ /*
+ * A:rq1 -- semaphore wait, timestamp X
+ * A:rq2 -- write timestamp Y
+ *
+ * B:rq1 [await A:rq1] -- write timestamp Z
+ *
+ * Force timeslice, release semaphore.
+ *
+ * Expect execution/evaluation order XZY
+ */
+
+ engine_heartbeat_disable(engine, &heartbeat);
+ timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
+
+ slot = memset32(engine->status_page.addr + 1000, 0, 4);
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto err;
+ }
+
+ rq[0] = create_rewinder(ce, NULL, slot, 1);
+ if (IS_ERR(rq[0])) {
+ intel_context_put(ce);
+ goto err;
+ }
+
+ rq[1] = create_rewinder(ce, NULL, slot, 2);
+ intel_context_put(ce);
+ if (IS_ERR(rq[1]))
+ goto err;
+
+ err = wait_for_submit(engine, rq[1], HZ / 2);
+ if (err) {
+ pr_err("%s: failed to submit first context\n",
+ engine->name);
+ goto err;
+ }
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto err;
+ }
+
+ rq[2] = create_rewinder(ce, rq[0], slot, 3);
+ intel_context_put(ce);
+ if (IS_ERR(rq[2]))
+ goto err;
+
+ err = wait_for_submit(engine, rq[2], HZ / 2);
+ if (err) {
+ pr_err("%s: failed to submit second context\n",
+ engine->name);
+ goto err;
+ }
+ GEM_BUG_ON(!timer_pending(&engine->execlists.timer));
+
+ /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
+ GEM_BUG_ON(!i915_request_is_active(rq[A1]));
+ GEM_BUG_ON(!i915_request_is_active(rq[A2]));
+ GEM_BUG_ON(!i915_request_is_active(rq[B1]));
+
+ /* Wait for the timeslice to kick in */
+ del_timer(&engine->execlists.timer);
+ tasklet_hi_schedule(&engine->execlists.tasklet);
intel_engine_flush_submission(engine);
- if (i915_request_is_active(rq))
- return 0;
- } while (time_before(jiffies, timeout));
- return -ETIME;
+ /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
+ GEM_BUG_ON(!i915_request_is_active(rq[A1]));
+ GEM_BUG_ON(!i915_request_is_active(rq[B1]));
+ GEM_BUG_ON(i915_request_is_active(rq[A2]));
+
+ /* Release the hounds! */
+ slot[0] = 1;
+ wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
+
+ for (i = 1; i <= 3; i++) {
+ unsigned long timeout = jiffies + HZ / 2;
+
+ while (!READ_ONCE(slot[i]) &&
+ time_before(jiffies, timeout))
+ ;
+
+ if (!time_before(jiffies, timeout)) {
+ pr_err("%s: rq[%d] timed out\n",
+ engine->name, i - 1);
+ err = -ETIME;
+ goto err;
+ }
+
+ pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
+ }
+
+ /* XZY: XZ < XY */
+ if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
+ pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
+ engine->name,
+ slot[Z] - slot[X],
+ slot[Y] - slot[X]);
+ err = -EINVAL;
+ }
+
+err:
+ memset32(&slot[0], -1, 4);
+ wmb();
+
+ engine->props.timeslice_duration_ms = timeslice;
+ engine_heartbeat_enable(engine, heartbeat);
+ for (i = 0; i < 3; i++)
+ i915_request_put(rq[i]);
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static struct i915_request *nop_request(struct intel_engine_cs *engine)
+{
+ struct i915_request *rq;
+
+ rq = intel_engine_create_kernel_request(engine);
+ if (IS_ERR(rq))
+ return rq;
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ return rq;
}
static long timeslice_threshold(const struct intel_engine_cs *engine)
@@ -688,6 +1175,10 @@ static int live_timeslice_queue(void *arg)
if (err)
goto err_map;
+ err = i915_vma_sync(vma);
+ if (err)
+ goto err_pin;
+
for_each_engine(engine, gt, id) {
struct i915_sched_attr attr = {
.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
@@ -774,6 +1265,7 @@ err_heartbeat:
break;
}
+err_pin:
i915_vma_unpin(vma);
err_map:
i915_gem_object_unpin_map(obj);
@@ -832,6 +1324,10 @@ static int live_busywait_preempt(void *arg)
if (err)
goto err_map;
+ err = i915_vma_sync(vma);
+ if (err)
+ goto err_vma;
+
for_each_engine(engine, gt, id) {
struct i915_request *lo, *hi;
struct igt_live_test t;
@@ -1352,14 +1848,9 @@ static int __cancel_active0(struct live_preempt_cancel *arg)
if (err)
goto out;
- if (i915_request_wait(rq, 0, HZ / 5) < 0) {
- err = -EIO;
- goto out;
- }
-
- if (rq->fence.error != -EIO) {
- pr_err("Cancelled inflight0 request did not report -EIO\n");
- err = -EINVAL;
+ err = wait_for_reset(arg->engine, rq, HZ / 2);
+ if (err) {
+ pr_err("Cancelled inflight0 request did not reset\n");
goto out;
}
@@ -1417,10 +1908,9 @@ static int __cancel_active1(struct live_preempt_cancel *arg)
goto out;
igt_spinner_end(&arg->a.spin);
- if (i915_request_wait(rq[1], 0, HZ / 5) < 0) {
- err = -EIO;
+ err = wait_for_reset(arg->engine, rq[1], HZ / 2);
+ if (err)
goto out;
- }
if (rq[0]->fence.error != 0) {
pr_err("Normal inflight0 request did not complete\n");
@@ -1500,10 +1990,9 @@ static int __cancel_queued(struct live_preempt_cancel *arg)
if (err)
goto out;
- if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
- err = -EIO;
+ err = wait_for_reset(arg->engine, rq[2], HZ / 2);
+ if (err)
goto out;
- }
if (rq[0]->fence.error != -EIO) {
pr_err("Cancelled inflight0 request did not report -EIO\n");
@@ -1561,14 +2050,9 @@ static int __cancel_hostile(struct live_preempt_cancel *arg)
if (err)
goto out;
- if (i915_request_wait(rq, 0, HZ / 5) < 0) {
- err = -EIO;
- goto out;
- }
-
- if (rq->fence.error != -EIO) {
- pr_err("Cancelled inflight0 request did not report -EIO\n");
- err = -EINVAL;
+ err = wait_for_reset(arg->engine, rq, HZ / 2);
+ if (err) {
+ pr_err("Cancelled inflight0 request did not reset\n");
goto out;
}
@@ -1656,7 +2140,7 @@ static int live_suppress_self_preempt(void *arg)
if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
return 0;
- if (USES_GUC_SUBMISSION(gt->i915))
+ if (intel_uc_uses_guc_submission(&gt->uc))
return 0; /* presume black blox */
if (intel_vgpu_active(gt->i915))
@@ -2279,117 +2763,6 @@ static int live_preempt_gang(void *arg)
return 0;
}
-static int live_preempt_hang(void *arg)
-{
- struct intel_gt *gt = arg;
- struct i915_gem_context *ctx_hi, *ctx_lo;
- struct igt_spinner spin_hi, spin_lo;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- int err = -ENOMEM;
-
- if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
- return 0;
-
- if (!intel_has_reset_engine(gt))
- return 0;
-
- if (igt_spinner_init(&spin_hi, gt))
- return -ENOMEM;
-
- if (igt_spinner_init(&spin_lo, gt))
- goto err_spin_hi;
-
- ctx_hi = kernel_context(gt->i915);
- if (!ctx_hi)
- goto err_spin_lo;
- ctx_hi->sched.priority =
- I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
-
- ctx_lo = kernel_context(gt->i915);
- if (!ctx_lo)
- goto err_ctx_hi;
- ctx_lo->sched.priority =
- I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
-
- for_each_engine(engine, gt, id) {
- struct i915_request *rq;
-
- if (!intel_engine_has_preemption(engine))
- continue;
-
- rq = spinner_create_request(&spin_lo, ctx_lo, engine,
- MI_ARB_CHECK);
- if (IS_ERR(rq)) {
- err = PTR_ERR(rq);
- goto err_ctx_lo;
- }
-
- i915_request_add(rq);
- if (!igt_wait_for_spinner(&spin_lo, rq)) {
- GEM_TRACE("lo spinner failed to start\n");
- GEM_TRACE_DUMP();
- intel_gt_set_wedged(gt);
- err = -EIO;
- goto err_ctx_lo;
- }
-
- rq = spinner_create_request(&spin_hi, ctx_hi, engine,
- MI_ARB_CHECK);
- if (IS_ERR(rq)) {
- igt_spinner_end(&spin_lo);
- err = PTR_ERR(rq);
- goto err_ctx_lo;
- }
-
- init_completion(&engine->execlists.preempt_hang.completion);
- engine->execlists.preempt_hang.inject_hang = true;
-
- i915_request_add(rq);
-
- if (!wait_for_completion_timeout(&engine->execlists.preempt_hang.completion,
- HZ / 10)) {
- pr_err("Preemption did not occur within timeout!");
- GEM_TRACE_DUMP();
- intel_gt_set_wedged(gt);
- err = -EIO;
- goto err_ctx_lo;
- }
-
- set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
- intel_engine_reset(engine, NULL);
- clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
-
- engine->execlists.preempt_hang.inject_hang = false;
-
- if (!igt_wait_for_spinner(&spin_hi, rq)) {
- GEM_TRACE("hi spinner failed to start\n");
- GEM_TRACE_DUMP();
- intel_gt_set_wedged(gt);
- err = -EIO;
- goto err_ctx_lo;
- }
-
- igt_spinner_end(&spin_hi);
- igt_spinner_end(&spin_lo);
- if (igt_flush_test(gt->i915)) {
- err = -EIO;
- goto err_ctx_lo;
- }
- }
-
- err = 0;
-err_ctx_lo:
- kernel_context_close(ctx_lo);
-err_ctx_hi:
- kernel_context_close(ctx_hi);
-err_spin_lo:
- igt_spinner_fini(&spin_lo);
-err_spin_hi:
- igt_spinner_fini(&spin_hi);
- return err;
-}
-
static int live_preempt_timeout(void *arg)
{
struct intel_gt *gt = arg;
@@ -2882,7 +3255,7 @@ static int live_virtual_engine(void *arg)
unsigned int class, inst;
int err;
- if (USES_GUC_SUBMISSION(gt->i915))
+ if (intel_uc_uses_guc_submission(&gt->uc))
return 0;
for_each_engine(engine, gt, id) {
@@ -3015,7 +3388,7 @@ static int live_virtual_mask(void *arg)
unsigned int class, inst;
int err;
- if (USES_GUC_SUBMISSION(gt->i915))
+ if (intel_uc_uses_guc_submission(&gt->uc))
return 0;
for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
@@ -3055,6 +3428,10 @@ static int preserved_virtual_engine(struct intel_gt *gt,
if (IS_ERR(scratch))
return PTR_ERR(scratch);
+ err = i915_vma_sync(scratch);
+ if (err)
+ goto out_scratch;
+
ve = intel_execlists_create_virtual(siblings, nsibling);
if (IS_ERR(ve)) {
err = PTR_ERR(ve);
@@ -3153,7 +3530,7 @@ static int live_virtual_preserved(void *arg)
* are preserved.
*/
- if (USES_GUC_SUBMISSION(gt->i915))
+ if (intel_uc_uses_guc_submission(&gt->uc))
return 0;
/* As we use CS_GPR we cannot run before they existed on all engines. */
@@ -3243,15 +3620,21 @@ static int bond_virtual_engine(struct intel_gt *gt,
rq[0] = ERR_PTR(-ENOMEM);
for_each_engine(master, gt, id) {
struct i915_sw_fence fence = {};
+ struct intel_context *ce;
if (master->class == class)
continue;
+ ce = intel_context_create(master);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto out;
+ }
+
memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
- rq[0] = igt_spinner_create_request(&spin,
- master->kernel_context,
- MI_NOOP);
+ rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
+ intel_context_put(ce);
if (IS_ERR(rq[0])) {
err = PTR_ERR(rq[0]);
goto out;
@@ -3377,7 +3760,7 @@ static int live_virtual_bond(void *arg)
unsigned int class, inst;
int err;
- if (USES_GUC_SUBMISSION(gt->i915))
+ if (intel_uc_uses_guc_submission(&gt->uc))
return 0;
for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
@@ -3538,7 +3921,7 @@ static int live_virtual_reset(void *arg)
* forgotten.
*/
- if (USES_GUC_SUBMISSION(gt->i915))
+ if (intel_uc_uses_guc_submission(&gt->uc))
return 0;
if (!intel_has_reset_engine(gt))
@@ -3571,8 +3954,11 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_sanitycheck),
SUBTEST(live_unlite_switch),
SUBTEST(live_unlite_preempt),
+ SUBTEST(live_pin_rewind),
SUBTEST(live_hold_reset),
+ SUBTEST(live_error_interrupt),
SUBTEST(live_timeslice_preempt),
+ SUBTEST(live_timeslice_rewind),
SUBTEST(live_timeslice_queue),
SUBTEST(live_busywait_preempt),
SUBTEST(live_preempt),
@@ -3583,7 +3969,6 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_suppress_wait_preempt),
SUBTEST(live_chain_preempt),
SUBTEST(live_preempt_gang),
- SUBTEST(live_preempt_hang),
SUBTEST(live_preempt_timeout),
SUBTEST(live_preempt_smoke),
SUBTEST(live_virtual_engine),
@@ -3631,6 +4016,62 @@ static void hexdump(const void *buf, size_t len)
}
}
+static int emit_semaphore_signal(struct intel_context *ce, void *slot)
+{
+ const u32 offset =
+ i915_ggtt_offset(ce->engine->status_page.vma) +
+ offset_in_page(slot);
+ struct i915_request *rq;
+ u32 *cs;
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs)) {
+ i915_request_add(rq);
+ return PTR_ERR(cs);
+ }
+
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = offset;
+ *cs++ = 0;
+ *cs++ = 1;
+
+ intel_ring_advance(rq, cs);
+
+ rq->sched.attr.priority = I915_PRIORITY_BARRIER;
+ i915_request_add(rq);
+ return 0;
+}
+
+static int context_flush(struct intel_context *ce, long timeout)
+{
+ struct i915_request *rq;
+ struct dma_fence *fence;
+ int err = 0;
+
+ rq = intel_engine_create_kernel_request(ce->engine);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ fence = i915_active_fence_get(&ce->timeline->last_request);
+ if (fence) {
+ i915_request_await_dma_fence(rq, fence);
+ dma_fence_put(fence);
+ }
+
+ rq = i915_request_get(rq);
+ i915_request_add(rq);
+ if (i915_request_wait(rq, 0, timeout) < 0)
+ err = -ETIME;
+ i915_request_put(rq);
+
+ rmb(); /* We know the request is written, make sure all state is too! */
+ return err;
+}
+
static int live_lrc_layout(void *arg)
{
struct intel_gt *gt = arg;
@@ -3797,6 +4238,11 @@ static int live_lrc_fixed(void *arg)
CTX_BB_STATE - 1,
"BB_STATE"
},
+ {
+ i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
+ CTX_TIMESTAMP - 1,
+ "RING_CTX_TIMESTAMP"
+ },
{ },
}, *t;
u32 *hw;
@@ -3880,8 +4326,16 @@ static int __live_lrc_state(struct intel_engine_cs *engine,
*cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
*cs++ = 0;
+ i915_vma_lock(scratch);
+ err = i915_request_await_object(rq, scratch->obj, true);
+ if (!err)
+ err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
+ i915_vma_unlock(scratch);
+
i915_request_get(rq);
i915_request_add(rq);
+ if (err)
+ goto err_rq;
intel_engine_flush_submission(engine);
expected[RING_TAIL_IDX] = ce->ring->tail;
@@ -3947,13 +4401,13 @@ static int live_lrc_state(void *arg)
return err;
}
-static int gpr_make_dirty(struct intel_engine_cs *engine)
+static int gpr_make_dirty(struct intel_context *ce)
{
struct i915_request *rq;
u32 *cs;
int n;
- rq = intel_engine_create_kernel_request(engine);
+ rq = intel_context_create_request(ce);
if (IS_ERR(rq))
return PTR_ERR(rq);
@@ -3965,20 +4419,79 @@ static int gpr_make_dirty(struct intel_engine_cs *engine)
*cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
for (n = 0; n < NUM_GPR_DW; n++) {
- *cs++ = CS_GPR(engine, n);
+ *cs++ = CS_GPR(ce->engine, n);
*cs++ = STACK_MAGIC;
}
*cs++ = MI_NOOP;
intel_ring_advance(rq, cs);
+
+ rq->sched.attr.priority = I915_PRIORITY_BARRIER;
i915_request_add(rq);
return 0;
}
-static int __live_gpr_clear(struct intel_engine_cs *engine,
- struct i915_vma *scratch)
+static struct i915_request *
+__gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
{
+ const u32 offset =
+ i915_ggtt_offset(ce->engine->status_page.vma) +
+ offset_in_page(slot);
+ struct i915_request *rq;
+ u32 *cs;
+ int err;
+ int n;
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ return rq;
+
+ cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
+ if (IS_ERR(cs)) {
+ i915_request_add(rq);
+ return ERR_CAST(cs);
+ }
+
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ *cs++ = MI_NOOP;
+
+ *cs++ = MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_NEQ_SDD;
+ *cs++ = 0;
+ *cs++ = offset;
+ *cs++ = 0;
+
+ for (n = 0; n < NUM_GPR_DW; n++) {
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+ *cs++ = CS_GPR(ce->engine, n);
+ *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
+ *cs++ = 0;
+ }
+
+ i915_vma_lock(scratch);
+ err = i915_request_await_object(rq, scratch->obj, true);
+ if (!err)
+ err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
+ i915_vma_unlock(scratch);
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (err) {
+ i915_request_put(rq);
+ rq = ERR_PTR(err);
+ }
+
+ return rq;
+}
+
+static int __live_lrc_gpr(struct intel_engine_cs *engine,
+ struct i915_vma *scratch,
+ bool preempt)
+{
+ u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
struct intel_context *ce;
struct i915_request *rq;
u32 *cs;
@@ -3988,7 +4501,7 @@ static int __live_gpr_clear(struct intel_engine_cs *engine,
if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
return 0; /* GPR only on rcs0 for gen8 */
- err = gpr_make_dirty(engine);
+ err = gpr_make_dirty(engine->kernel_context);
if (err)
return err;
@@ -3996,28 +4509,28 @@ static int __live_gpr_clear(struct intel_engine_cs *engine,
if (IS_ERR(ce))
return PTR_ERR(ce);
- rq = intel_context_create_request(ce);
+ rq = __gpr_read(ce, scratch, slot);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_put;
}
- cs = intel_ring_begin(rq, 4 * NUM_GPR_DW);
- if (IS_ERR(cs)) {
- err = PTR_ERR(cs);
- i915_request_add(rq);
- goto err_put;
- }
+ err = wait_for_submit(engine, rq, HZ / 2);
+ if (err)
+ goto err_rq;
- for (n = 0; n < NUM_GPR_DW; n++) {
- *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
- *cs++ = CS_GPR(engine, n);
- *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
- *cs++ = 0;
- }
+ if (preempt) {
+ err = gpr_make_dirty(engine->kernel_context);
+ if (err)
+ goto err_rq;
- i915_request_get(rq);
- i915_request_add(rq);
+ err = emit_semaphore_signal(engine->kernel_context, slot);
+ if (err)
+ goto err_rq;
+ } else {
+ slot[0] = 1;
+ wmb();
+ }
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
err = -ETIME;
@@ -4044,13 +4557,15 @@ static int __live_gpr_clear(struct intel_engine_cs *engine,
i915_gem_object_unpin_map(scratch->obj);
err_rq:
+ memset32(&slot[0], -1, 4);
+ wmb();
i915_request_put(rq);
err_put:
intel_context_put(ce);
return err;
}
-static int live_gpr_clear(void *arg)
+static int live_lrc_gpr(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
@@ -4068,7 +4583,971 @@ static int live_gpr_clear(void *arg)
return PTR_ERR(scratch);
for_each_engine(engine, gt, id) {
- err = __live_gpr_clear(engine, scratch);
+ unsigned long heartbeat;
+
+ engine_heartbeat_disable(engine, &heartbeat);
+
+ err = __live_lrc_gpr(engine, scratch, false);
+ if (err)
+ goto err;
+
+ err = __live_lrc_gpr(engine, scratch, true);
+ if (err)
+ goto err;
+
+err:
+ engine_heartbeat_enable(engine, heartbeat);
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+ if (err)
+ break;
+ }
+
+ i915_vma_unpin_and_release(&scratch, 0);
+ return err;
+}
+
+static struct i915_request *
+create_timestamp(struct intel_context *ce, void *slot, int idx)
+{
+ const u32 offset =
+ i915_ggtt_offset(ce->engine->status_page.vma) +
+ offset_in_page(slot);
+ struct i915_request *rq;
+ u32 *cs;
+ int err;
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ return rq;
+
+ cs = intel_ring_begin(rq, 10);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err;
+ }
+
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ *cs++ = MI_NOOP;
+
+ *cs++ = MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_NEQ_SDD;
+ *cs++ = 0;
+ *cs++ = offset;
+ *cs++ = 0;
+
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+ *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
+ *cs++ = offset + idx * sizeof(u32);
+ *cs++ = 0;
+
+ intel_ring_advance(rq, cs);
+
+ rq->sched.attr.priority = I915_PRIORITY_MASK;
+ err = 0;
+err:
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (err) {
+ i915_request_put(rq);
+ return ERR_PTR(err);
+ }
+
+ return rq;
+}
+
+struct lrc_timestamp {
+ struct intel_engine_cs *engine;
+ struct intel_context *ce[2];
+ u32 poison;
+};
+
+static bool timestamp_advanced(u32 start, u32 end)
+{
+ return (s32)(end - start) > 0;
+}
+
+static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
+{
+ u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
+ struct i915_request *rq;
+ u32 timestamp;
+ int err = 0;
+
+ arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
+ rq = create_timestamp(arg->ce[0], slot, 1);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ err = wait_for_submit(rq->engine, rq, HZ / 2);
+ if (err)
+ goto err;
+
+ if (preempt) {
+ arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
+ err = emit_semaphore_signal(arg->ce[1], slot);
+ if (err)
+ goto err;
+ } else {
+ slot[0] = 1;
+ wmb();
+ }
+
+ /* And wait for switch to kernel (to save our context to memory) */
+ err = context_flush(arg->ce[0], HZ / 2);
+ if (err)
+ goto err;
+
+ if (!timestamp_advanced(arg->poison, slot[1])) {
+ pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
+ arg->engine->name, preempt ? "preempt" : "simple",
+ arg->poison, slot[1]);
+ err = -EINVAL;
+ }
+
+ timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
+ if (!timestamp_advanced(slot[1], timestamp)) {
+ pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
+ arg->engine->name, preempt ? "preempt" : "simple",
+ slot[1], timestamp);
+ err = -EINVAL;
+ }
+
+err:
+ memset32(slot, -1, 4);
+ i915_request_put(rq);
+ return err;
+}
+
+static int live_lrc_timestamp(void *arg)
+{
+ struct lrc_timestamp data = {};
+ struct intel_gt *gt = arg;
+ enum intel_engine_id id;
+ const u32 poison[] = {
+ 0,
+ S32_MAX,
+ (u32)S32_MAX + 1,
+ U32_MAX,
+ };
+
+ /*
+ * We want to verify that the timestamp is saved and restore across
+ * context switches and is monotonic.
+ *
+ * So we do this with a little bit of LRC poisoning to check various
+ * boundary conditions, and see what happens if we preempt the context
+ * with a second request (carrying more poison into the timestamp).
+ */
+
+ for_each_engine(data.engine, gt, id) {
+ unsigned long heartbeat;
+ int i, err = 0;
+
+ engine_heartbeat_disable(data.engine, &heartbeat);
+
+ for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
+ struct intel_context *tmp;
+
+ tmp = intel_context_create(data.engine);
+ if (IS_ERR(tmp)) {
+ err = PTR_ERR(tmp);
+ goto err;
+ }
+
+ err = intel_context_pin(tmp);
+ if (err) {
+ intel_context_put(tmp);
+ goto err;
+ }
+
+ data.ce[i] = tmp;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(poison); i++) {
+ data.poison = poison[i];
+
+ err = __lrc_timestamp(&data, false);
+ if (err)
+ break;
+
+ err = __lrc_timestamp(&data, true);
+ if (err)
+ break;
+ }
+
+err:
+ engine_heartbeat_enable(data.engine, heartbeat);
+ for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
+ if (!data.ce[i])
+ break;
+
+ intel_context_unpin(data.ce[i]);
+ intel_context_put(data.ce[i]);
+ }
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static struct i915_vma *
+create_user_vma(struct i915_address_space *vm, unsigned long size)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int err;
+
+ obj = i915_gem_object_create_internal(vm->i915, size);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ vma = i915_vma_instance(obj, vm, NULL);
+ if (IS_ERR(vma)) {
+ i915_gem_object_put(obj);
+ return vma;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err) {
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
+ }
+
+ return vma;
+}
+
+static struct i915_vma *
+store_context(struct intel_context *ce, struct i915_vma *scratch)
+{
+ struct i915_vma *batch;
+ u32 dw, x, *cs, *hw;
+
+ batch = create_user_vma(ce->vm, SZ_64K);
+ if (IS_ERR(batch))
+ return batch;
+
+ cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
+ if (IS_ERR(cs)) {
+ i915_vma_put(batch);
+ return ERR_CAST(cs);
+ }
+
+ x = 0;
+ dw = 0;
+ hw = ce->engine->pinned_default_state;
+ hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+ do {
+ u32 len = hw[dw] & 0x7f;
+
+ if (hw[dw] == 0) {
+ dw++;
+ continue;
+ }
+
+ if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
+ dw += len + 2;
+ continue;
+ }
+
+ dw++;
+ len = (len + 1) / 2;
+ while (len--) {
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8;
+ *cs++ = hw[dw];
+ *cs++ = lower_32_bits(scratch->node.start + x);
+ *cs++ = upper_32_bits(scratch->node.start + x);
+
+ dw += 2;
+ x += 4;
+ }
+ } while (dw < PAGE_SIZE / sizeof(u32) &&
+ (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ i915_gem_object_flush_map(batch->obj);
+ i915_gem_object_unpin_map(batch->obj);
+
+ return batch;
+}
+
+static int move_to_active(struct i915_request *rq,
+ struct i915_vma *vma,
+ unsigned int flags)
+{
+ int err;
+
+ i915_vma_lock(vma);
+ err = i915_request_await_object(rq, vma->obj, flags);
+ if (!err)
+ err = i915_vma_move_to_active(vma, rq, flags);
+ i915_vma_unlock(vma);
+
+ return err;
+}
+
+static struct i915_request *
+record_registers(struct intel_context *ce,
+ struct i915_vma *before,
+ struct i915_vma *after,
+ u32 *sema)
+{
+ struct i915_vma *b_before, *b_after;
+ struct i915_request *rq;
+ u32 *cs;
+ int err;
+
+ b_before = store_context(ce, before);
+ if (IS_ERR(b_before))
+ return ERR_CAST(b_before);
+
+ b_after = store_context(ce, after);
+ if (IS_ERR(b_after)) {
+ rq = ERR_CAST(b_after);
+ goto err_before;
+ }
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ goto err_after;
+
+ err = move_to_active(rq, before, EXEC_OBJECT_WRITE);
+ if (err)
+ goto err_rq;
+
+ err = move_to_active(rq, b_before, 0);
+ if (err)
+ goto err_rq;
+
+ err = move_to_active(rq, after, EXEC_OBJECT_WRITE);
+ if (err)
+ goto err_rq;
+
+ err = move_to_active(rq, b_after, 0);
+ if (err)
+ goto err_rq;
+
+ cs = intel_ring_begin(rq, 14);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_rq;
+ }
+
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+ *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
+ *cs++ = lower_32_bits(b_before->node.start);
+ *cs++ = upper_32_bits(b_before->node.start);
+
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ *cs++ = MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_NEQ_SDD;
+ *cs++ = 0;
+ *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
+ offset_in_page(sema);
+ *cs++ = 0;
+ *cs++ = MI_NOOP;
+
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+ *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
+ *cs++ = lower_32_bits(b_after->node.start);
+ *cs++ = upper_32_bits(b_after->node.start);
+
+ intel_ring_advance(rq, cs);
+
+ WRITE_ONCE(*sema, 0);
+ i915_request_get(rq);
+ i915_request_add(rq);
+err_after:
+ i915_vma_put(b_after);
+err_before:
+ i915_vma_put(b_before);
+ return rq;
+
+err_rq:
+ i915_request_add(rq);
+ rq = ERR_PTR(err);
+ goto err_after;
+}
+
+static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
+{
+ struct i915_vma *batch;
+ u32 dw, *cs, *hw;
+
+ batch = create_user_vma(ce->vm, SZ_64K);
+ if (IS_ERR(batch))
+ return batch;
+
+ cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
+ if (IS_ERR(cs)) {
+ i915_vma_put(batch);
+ return ERR_CAST(cs);
+ }
+
+ dw = 0;
+ hw = ce->engine->pinned_default_state;
+ hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+ do {
+ u32 len = hw[dw] & 0x7f;
+
+ if (hw[dw] == 0) {
+ dw++;
+ continue;
+ }
+
+ if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
+ dw += len + 2;
+ continue;
+ }
+
+ dw++;
+ len = (len + 1) / 2;
+ *cs++ = MI_LOAD_REGISTER_IMM(len);
+ while (len--) {
+ *cs++ = hw[dw];
+ *cs++ = poison;
+ dw += 2;
+ }
+ } while (dw < PAGE_SIZE / sizeof(u32) &&
+ (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ i915_gem_object_flush_map(batch->obj);
+ i915_gem_object_unpin_map(batch->obj);
+
+ return batch;
+}
+
+static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
+{
+ struct i915_request *rq;
+ struct i915_vma *batch;
+ u32 *cs;
+ int err;
+
+ batch = load_context(ce, poison);
+ if (IS_ERR(batch))
+ return PTR_ERR(batch);
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_batch;
+ }
+
+ err = move_to_active(rq, batch, 0);
+ if (err)
+ goto err_rq;
+
+ cs = intel_ring_begin(rq, 8);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_rq;
+ }
+
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+ *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
+ *cs++ = lower_32_bits(batch->node.start);
+ *cs++ = upper_32_bits(batch->node.start);
+
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
+ offset_in_page(sema);
+ *cs++ = 0;
+ *cs++ = 1;
+
+ intel_ring_advance(rq, cs);
+
+ rq->sched.attr.priority = I915_PRIORITY_BARRIER;
+err_rq:
+ i915_request_add(rq);
+err_batch:
+ i915_vma_put(batch);
+ return err;
+}
+
+static bool is_moving(u32 a, u32 b)
+{
+ return a != b;
+}
+
+static int compare_isolation(struct intel_engine_cs *engine,
+ struct i915_vma *ref[2],
+ struct i915_vma *result[2],
+ struct intel_context *ce,
+ u32 poison)
+{
+ u32 x, dw, *hw, *lrc;
+ u32 *A[2], *B[2];
+ int err = 0;
+
+ A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC);
+ if (IS_ERR(A[0]))
+ return PTR_ERR(A[0]);
+
+ A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC);
+ if (IS_ERR(A[1])) {
+ err = PTR_ERR(A[1]);
+ goto err_A0;
+ }
+
+ B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC);
+ if (IS_ERR(B[0])) {
+ err = PTR_ERR(B[0]);
+ goto err_A1;
+ }
+
+ B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC);
+ if (IS_ERR(B[1])) {
+ err = PTR_ERR(B[1]);
+ goto err_B0;
+ }
+
+ lrc = i915_gem_object_pin_map(ce->state->obj,
+ i915_coherent_map_type(engine->i915));
+ if (IS_ERR(lrc)) {
+ err = PTR_ERR(lrc);
+ goto err_B1;
+ }
+ lrc += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+
+ x = 0;
+ dw = 0;
+ hw = engine->pinned_default_state;
+ hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+ do {
+ u32 len = hw[dw] & 0x7f;
+
+ if (hw[dw] == 0) {
+ dw++;
+ continue;
+ }
+
+ if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
+ dw += len + 2;
+ continue;
+ }
+
+ dw++;
+ len = (len + 1) / 2;
+ while (len--) {
+ if (!is_moving(A[0][x], A[1][x]) &&
+ (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
+ switch (hw[dw] & 4095) {
+ case 0x30: /* RING_HEAD */
+ case 0x34: /* RING_TAIL */
+ break;
+
+ default:
+ pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
+ engine->name, dw,
+ hw[dw], hw[dw + 1],
+ A[0][x], B[0][x], B[1][x],
+ poison, lrc[dw + 1]);
+ err = -EINVAL;
+ break;
+ }
+ }
+ dw += 2;
+ x++;
+ }
+ } while (dw < PAGE_SIZE / sizeof(u32) &&
+ (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
+
+ i915_gem_object_unpin_map(ce->state->obj);
+err_B1:
+ i915_gem_object_unpin_map(result[1]->obj);
+err_B0:
+ i915_gem_object_unpin_map(result[0]->obj);
+err_A1:
+ i915_gem_object_unpin_map(ref[1]->obj);
+err_A0:
+ i915_gem_object_unpin_map(ref[0]->obj);
+ return err;
+}
+
+static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
+{
+ u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
+ struct i915_vma *ref[2], *result[2];
+ struct intel_context *A, *B;
+ struct i915_request *rq;
+ int err;
+
+ A = intel_context_create(engine);
+ if (IS_ERR(A))
+ return PTR_ERR(A);
+
+ B = intel_context_create(engine);
+ if (IS_ERR(B)) {
+ err = PTR_ERR(B);
+ goto err_A;
+ }
+
+ ref[0] = create_user_vma(A->vm, SZ_64K);
+ if (IS_ERR(ref[0])) {
+ err = PTR_ERR(ref[0]);
+ goto err_B;
+ }
+
+ ref[1] = create_user_vma(A->vm, SZ_64K);
+ if (IS_ERR(ref[1])) {
+ err = PTR_ERR(ref[1]);
+ goto err_ref0;
+ }
+
+ rq = record_registers(A, ref[0], ref[1], sema);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_ref1;
+ }
+
+ WRITE_ONCE(*sema, 1);
+ wmb();
+
+ if (i915_request_wait(rq, 0, HZ / 2) < 0) {
+ i915_request_put(rq);
+ err = -ETIME;
+ goto err_ref1;
+ }
+ i915_request_put(rq);
+
+ result[0] = create_user_vma(A->vm, SZ_64K);
+ if (IS_ERR(result[0])) {
+ err = PTR_ERR(result[0]);
+ goto err_ref1;
+ }
+
+ result[1] = create_user_vma(A->vm, SZ_64K);
+ if (IS_ERR(result[1])) {
+ err = PTR_ERR(result[1]);
+ goto err_result0;
+ }
+
+ rq = record_registers(A, result[0], result[1], sema);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_result1;
+ }
+
+ err = poison_registers(B, poison, sema);
+ if (err) {
+ WRITE_ONCE(*sema, -1);
+ i915_request_put(rq);
+ goto err_result1;
+ }
+
+ if (i915_request_wait(rq, 0, HZ / 2) < 0) {
+ i915_request_put(rq);
+ err = -ETIME;
+ goto err_result1;
+ }
+ i915_request_put(rq);
+
+ err = compare_isolation(engine, ref, result, A, poison);
+
+err_result1:
+ i915_vma_put(result[1]);
+err_result0:
+ i915_vma_put(result[0]);
+err_ref1:
+ i915_vma_put(ref[1]);
+err_ref0:
+ i915_vma_put(ref[0]);
+err_B:
+ intel_context_put(B);
+err_A:
+ intel_context_put(A);
+ return err;
+}
+
+static bool skip_isolation(const struct intel_engine_cs *engine)
+{
+ if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9)
+ return true;
+
+ if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11)
+ return true;
+
+ return false;
+}
+
+static int live_lrc_isolation(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ const u32 poison[] = {
+ STACK_MAGIC,
+ 0x3a3a3a3a,
+ 0x5c5c5c5c,
+ 0xffffffff,
+ 0xffff0000,
+ };
+
+ /*
+ * Our goal is try and verify that per-context state cannot be
+ * tampered with by another non-privileged client.
+ *
+ * We take the list of context registers from the LRI in the default
+ * context image and attempt to modify that list from a remote context.
+ */
+
+ for_each_engine(engine, gt, id) {
+ int err = 0;
+ int i;
+
+ /* Just don't even ask */
+ if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
+ skip_isolation(engine))
+ continue;
+
+ intel_engine_pm_get(engine);
+ if (engine->pinned_default_state) {
+ for (i = 0; i < ARRAY_SIZE(poison); i++) {
+ err = __lrc_isolation(engine, poison[i]);
+ if (err)
+ break;
+
+ err = __lrc_isolation(engine, ~poison[i]);
+ if (err)
+ break;
+ }
+ }
+ intel_engine_pm_put(engine);
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static void garbage_reset(struct intel_engine_cs *engine,
+ struct i915_request *rq)
+{
+ const unsigned int bit = I915_RESET_ENGINE + engine->id;
+ unsigned long *lock = &engine->gt->reset.flags;
+
+ if (test_and_set_bit(bit, lock))
+ return;
+
+ tasklet_disable(&engine->execlists.tasklet);
+
+ if (!rq->fence.error)
+ intel_engine_reset(engine, NULL);
+
+ tasklet_enable(&engine->execlists.tasklet);
+ clear_and_wake_up_bit(bit, lock);
+}
+
+static struct i915_request *garbage(struct intel_context *ce,
+ struct rnd_state *prng)
+{
+ struct i915_request *rq;
+ int err;
+
+ err = intel_context_pin(ce);
+ if (err)
+ return ERR_PTR(err);
+
+ prandom_bytes_state(prng,
+ ce->lrc_reg_state,
+ ce->engine->context_size -
+ LRC_STATE_PN * PAGE_SIZE);
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_unpin;
+ }
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+ return rq;
+
+err_unpin:
+ intel_context_unpin(ce);
+ return ERR_PTR(err);
+}
+
+static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
+{
+ struct intel_context *ce;
+ struct i915_request *hang;
+ int err = 0;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ hang = garbage(ce, prng);
+ if (IS_ERR(hang)) {
+ err = PTR_ERR(hang);
+ goto err_ce;
+ }
+
+ if (wait_for_submit(engine, hang, HZ / 2)) {
+ i915_request_put(hang);
+ err = -ETIME;
+ goto err_ce;
+ }
+
+ intel_context_set_banned(ce);
+ garbage_reset(engine, hang);
+
+ intel_engine_flush_submission(engine);
+ if (!hang->fence.error) {
+ i915_request_put(hang);
+ pr_err("%s: corrupted context was not reset\n",
+ engine->name);
+ err = -EINVAL;
+ goto err_ce;
+ }
+
+ if (i915_request_wait(hang, 0, HZ / 2) < 0) {
+ pr_err("%s: corrupted context did not recover\n",
+ engine->name);
+ i915_request_put(hang);
+ err = -EIO;
+ goto err_ce;
+ }
+ i915_request_put(hang);
+
+err_ce:
+ intel_context_put(ce);
+ return err;
+}
+
+static int live_lrc_garbage(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ /*
+ * Verify that we can recover if one context state is completely
+ * corrupted.
+ */
+
+ if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
+ return 0;
+
+ for_each_engine(engine, gt, id) {
+ I915_RND_STATE(prng);
+ int err = 0, i;
+
+ if (!intel_has_reset_engine(engine->gt))
+ continue;
+
+ intel_engine_pm_get(engine);
+ for (i = 0; i < 3; i++) {
+ err = __lrc_garbage(engine, &prng);
+ if (err)
+ break;
+ }
+ intel_engine_pm_put(engine);
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
+{
+ struct intel_context *ce;
+ struct i915_request *rq;
+ IGT_TIMEOUT(end_time);
+ int err;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ ce->runtime.num_underflow = 0;
+ ce->runtime.max_underflow = 0;
+
+ do {
+ unsigned int loop = 1024;
+
+ while (loop) {
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_rq;
+ }
+
+ if (--loop == 0)
+ i915_request_get(rq);
+
+ i915_request_add(rq);
+ }
+
+ if (__igt_timeout(end_time, NULL))
+ break;
+
+ i915_request_put(rq);
+ } while (1);
+
+ err = i915_request_wait(rq, 0, HZ / 5);
+ if (err < 0) {
+ pr_err("%s: request not completed!\n", engine->name);
+ goto err_wait;
+ }
+
+ igt_flush_test(engine->i915);
+
+ pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
+ engine->name,
+ intel_context_get_total_runtime_ns(ce),
+ intel_context_get_avg_runtime_ns(ce));
+
+ err = 0;
+ if (ce->runtime.num_underflow) {
+ pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
+ engine->name,
+ ce->runtime.num_underflow,
+ ce->runtime.max_underflow);
+ GEM_TRACE_DUMP();
+ err = -EOVERFLOW;
+ }
+
+err_wait:
+ i915_request_put(rq);
+err_rq:
+ intel_context_put(ce);
+ return err;
+}
+
+static int live_pphwsp_runtime(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * Check that cumulative context runtime as stored in the pphwsp[16]
+ * is monotonic.
+ */
+
+ for_each_engine(engine, gt, id) {
+ err = __live_pphwsp_runtime(engine);
if (err)
break;
}
@@ -4076,7 +5555,6 @@ static int live_gpr_clear(void *arg)
if (igt_flush_test(gt->i915))
err = -EIO;
- i915_vma_unpin_and_release(&scratch, 0);
return err;
}
@@ -4086,7 +5564,11 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_lrc_layout),
SUBTEST(live_lrc_fixed),
SUBTEST(live_lrc_state),
- SUBTEST(live_gpr_clear),
+ SUBTEST(live_lrc_gpr),
+ SUBTEST(live_lrc_isolation),
+ SUBTEST(live_lrc_timestamp),
+ SUBTEST(live_lrc_garbage),
+ SUBTEST(live_pphwsp_runtime),
};
if (!HAS_LOGICAL_RING_CONTEXTS(i915))
diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c
index de1f83100fb6..8831ffee2061 100644
--- a/drivers/gpu/drm/i915/gt/selftest_mocs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
@@ -12,7 +12,8 @@
#include "selftests/igt_spinner.h"
struct live_mocs {
- struct drm_i915_mocs_table table;
+ struct drm_i915_mocs_table mocs;
+ struct drm_i915_mocs_table l3cc;
struct i915_vma *scratch;
void *vaddr;
};
@@ -70,11 +71,22 @@ static struct i915_vma *create_scratch(struct intel_gt *gt)
static int live_mocs_init(struct live_mocs *arg, struct intel_gt *gt)
{
+ struct drm_i915_mocs_table table;
+ unsigned int flags;
int err;
- if (!get_mocs_settings(gt->i915, &arg->table))
+ memset(arg, 0, sizeof(*arg));
+
+ flags = get_mocs_settings(gt->i915, &table);
+ if (!flags)
return -EINVAL;
+ if (flags & HAS_RENDER_L3CC)
+ arg->l3cc = table;
+
+ if (flags & (HAS_GLOBAL_MOCS | HAS_ENGINE_MOCS))
+ arg->mocs = table;
+
arg->scratch = create_scratch(gt);
if (IS_ERR(arg->scratch))
return PTR_ERR(arg->scratch);
@@ -223,9 +235,9 @@ static int check_mocs_engine(struct live_mocs *arg,
/* Read the mocs tables back using SRM */
offset = i915_ggtt_offset(vma);
if (!err)
- err = read_mocs_table(rq, &arg->table, &offset);
+ err = read_mocs_table(rq, &arg->mocs, &offset);
if (!err && ce->engine->class == RENDER_CLASS)
- err = read_l3cc_table(rq, &arg->table, &offset);
+ err = read_l3cc_table(rq, &arg->l3cc, &offset);
offset -= i915_ggtt_offset(vma);
GEM_BUG_ON(offset > PAGE_SIZE);
@@ -236,9 +248,9 @@ static int check_mocs_engine(struct live_mocs *arg,
/* Compare the results against the expected tables */
vaddr = arg->vaddr;
if (!err)
- err = check_mocs_table(ce->engine, &arg->table, &vaddr);
+ err = check_mocs_table(ce->engine, &arg->mocs, &vaddr);
if (!err && ce->engine->class == RENDER_CLASS)
- err = check_l3cc_table(ce->engine, &arg->table, &vaddr);
+ err = check_l3cc_table(ce->engine, &arg->l3cc, &vaddr);
if (err)
return err;
diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c
index 8cc55a0e9e06..95b165faeba7 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rc6.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c
@@ -12,6 +12,21 @@
#include "selftests/i915_random.h"
+static u64 rc6_residency(struct intel_rc6 *rc6)
+{
+ u64 result;
+
+ /* XXX VLV_GT_MEDIA_RC6? */
+
+ result = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+ if (HAS_RC6p(rc6_to_i915(rc6)))
+ result += intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6p);
+ if (HAS_RC6pp(rc6_to_i915(rc6)))
+ result += intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6pp);
+
+ return result;
+}
+
int live_rc6_manual(void *arg)
{
struct intel_gt *gt = arg;
@@ -38,9 +53,9 @@ int live_rc6_manual(void *arg)
__intel_rc6_disable(rc6);
msleep(1); /* wakeup is not immediate, takes about 100us on icl */
- res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+ res[0] = rc6_residency(rc6);
msleep(250);
- res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+ res[1] = rc6_residency(rc6);
if ((res[1] - res[0]) >> 10) {
pr_err("RC6 residency increased by %lldus while disabled for 250ms!\n",
(res[1] - res[0]) >> 10);
@@ -51,14 +66,15 @@ int live_rc6_manual(void *arg)
/* Manually enter RC6 */
intel_rc6_park(rc6);
- res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+ res[0] = rc6_residency(rc6);
msleep(100);
- res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+ res[1] = rc6_residency(rc6);
if (res[1] == res[0]) {
- pr_err("Did not enter RC6! RC6_STATE=%08x, RC6_CONTROL=%08x\n",
+ pr_err("Did not enter RC6! RC6_STATE=%08x, RC6_CONTROL=%08x, residency=%lld\n",
intel_uncore_read_fw(gt->uncore, GEN6_RC_STATE),
- intel_uncore_read_fw(gt->uncore, GEN6_RC_CONTROL));
+ intel_uncore_read_fw(gt->uncore, GEN6_RC_CONTROL),
+ res[0]);
err = -EINVAL;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 6ad6aca315f6..35406ecdf0b2 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -115,7 +115,7 @@ static int igt_atomic_engine_reset(void *arg)
if (!intel_has_reset_engine(gt))
return 0;
- if (USES_GUC_SUBMISSION(gt->i915))
+ if (intel_uc_uses_guc_submission(&gt->uc))
return 0;
intel_gt_pm_get(gt);
diff --git a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
new file mode 100644
index 000000000000..9995faadd7e8
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
@@ -0,0 +1,296 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include "intel_engine_pm.h"
+#include "selftests/igt_flush_test.h"
+
+static struct i915_vma *create_wally(struct intel_engine_cs *engine)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ u32 *cs;
+ int err;
+
+ obj = i915_gem_object_create_internal(engine->i915, 4096);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ vma = i915_vma_instance(obj, engine->gt->vm, NULL);
+ if (IS_ERR(vma)) {
+ i915_gem_object_put(obj);
+ return vma;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
+ if (err) {
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
+ }
+
+ err = i915_vma_sync(vma);
+ if (err) {
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
+ }
+
+ cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
+ if (IS_ERR(cs)) {
+ i915_gem_object_put(obj);
+ return ERR_CAST(cs);
+ }
+
+ if (INTEL_GEN(engine->i915) >= 6) {
+ *cs++ = MI_STORE_DWORD_IMM_GEN4;
+ *cs++ = 0;
+ } else if (INTEL_GEN(engine->i915) >= 4) {
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = 0;
+ } else {
+ *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+ }
+ *cs++ = vma->node.start + 4000;
+ *cs++ = STACK_MAGIC;
+
+ *cs++ = MI_BATCH_BUFFER_END;
+ i915_gem_object_unpin_map(obj);
+
+ vma->private = intel_context_create(engine); /* dummy residuals */
+ if (IS_ERR(vma->private)) {
+ vma = ERR_CAST(vma->private);
+ i915_gem_object_put(obj);
+ }
+
+ return vma;
+}
+
+static int context_sync(struct intel_context *ce)
+{
+ struct i915_request *rq;
+ int err = 0;
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0)
+ err = -ETIME;
+ i915_request_put(rq);
+
+ return err;
+}
+
+static int new_context_sync(struct intel_engine_cs *engine)
+{
+ struct intel_context *ce;
+ int err;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ err = context_sync(ce);
+ intel_context_put(ce);
+
+ return err;
+}
+
+static int mixed_contexts_sync(struct intel_engine_cs *engine, u32 *result)
+{
+ int pass;
+ int err;
+
+ for (pass = 0; pass < 2; pass++) {
+ WRITE_ONCE(*result, 0);
+ err = context_sync(engine->kernel_context);
+ if (err || READ_ONCE(*result)) {
+ if (!err) {
+ pr_err("pass[%d] wa_bb emitted for the kernel context\n",
+ pass);
+ err = -EINVAL;
+ }
+ return err;
+ }
+
+ WRITE_ONCE(*result, 0);
+ err = new_context_sync(engine);
+ if (READ_ONCE(*result) != STACK_MAGIC) {
+ if (!err) {
+ pr_err("pass[%d] wa_bb *NOT* emitted after the kernel context\n",
+ pass);
+ err = -EINVAL;
+ }
+ return err;
+ }
+
+ WRITE_ONCE(*result, 0);
+ err = new_context_sync(engine);
+ if (READ_ONCE(*result) != STACK_MAGIC) {
+ if (!err) {
+ pr_err("pass[%d] wa_bb *NOT* emitted for the user context switch\n",
+ pass);
+ err = -EINVAL;
+ }
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int double_context_sync_00(struct intel_engine_cs *engine, u32 *result)
+{
+ struct intel_context *ce;
+ int err, i;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ for (i = 0; i < 2; i++) {
+ WRITE_ONCE(*result, 0);
+ err = context_sync(ce);
+ if (err)
+ break;
+ }
+ intel_context_put(ce);
+ if (err)
+ return err;
+
+ if (READ_ONCE(*result)) {
+ pr_err("wa_bb emitted between the same user context\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int kernel_context_sync_00(struct intel_engine_cs *engine, u32 *result)
+{
+ struct intel_context *ce;
+ int err, i;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ for (i = 0; i < 2; i++) {
+ WRITE_ONCE(*result, 0);
+ err = context_sync(ce);
+ if (err)
+ break;
+
+ err = context_sync(engine->kernel_context);
+ if (err)
+ break;
+ }
+ intel_context_put(ce);
+ if (err)
+ return err;
+
+ if (READ_ONCE(*result)) {
+ pr_err("wa_bb emitted between the same user context [with intervening kernel]\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int __live_ctx_switch_wa(struct intel_engine_cs *engine)
+{
+ struct i915_vma *bb;
+ u32 *result;
+ int err;
+
+ bb = create_wally(engine);
+ if (IS_ERR(bb))
+ return PTR_ERR(bb);
+
+ result = i915_gem_object_pin_map(bb->obj, I915_MAP_WC);
+ if (IS_ERR(result)) {
+ intel_context_put(bb->private);
+ i915_vma_unpin_and_release(&bb, 0);
+ return PTR_ERR(result);
+ }
+ result += 1000;
+
+ engine->wa_ctx.vma = bb;
+
+ err = mixed_contexts_sync(engine, result);
+ if (err)
+ goto out;
+
+ err = double_context_sync_00(engine, result);
+ if (err)
+ goto out;
+
+ err = kernel_context_sync_00(engine, result);
+ if (err)
+ goto out;
+
+out:
+ intel_context_put(engine->wa_ctx.vma->private);
+ i915_vma_unpin_and_release(&engine->wa_ctx.vma, I915_VMA_RELEASE_MAP);
+ return err;
+}
+
+static int live_ctx_switch_wa(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ /*
+ * Exercise the inter-context wa batch.
+ *
+ * Between each user context we run a wa batch, and since it may
+ * have implications for user visible state, we have to check that
+ * we do actually execute it.
+ *
+ * The trick we use is to replace the normal wa batch with a custom
+ * one that writes to a marker within it, and we can then look for
+ * that marker to confirm if the batch was run when we expect it,
+ * and equally important it was wasn't run when we don't!
+ */
+
+ for_each_engine(engine, gt, id) {
+ struct i915_vma *saved_wa;
+ int err;
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ if (IS_GEN_RANGE(gt->i915, 4, 5))
+ continue; /* MI_STORE_DWORD is privileged! */
+
+ saved_wa = fetch_and_zero(&engine->wa_ctx.vma);
+
+ intel_engine_pm_get(engine);
+ err = __live_ctx_switch_wa(engine);
+ intel_engine_pm_put(engine);
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+
+ engine->wa_ctx.vma = saved_wa;
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int intel_ring_submission_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(live_ctx_switch_wa),
+ };
+
+ if (HAS_EXECLISTS(i915))
+ return 0;
+
+ return intel_gt_live_subtests(tests, &i915->gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index e2d78cc22fb4..c2578a0f2f14 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -6,6 +6,8 @@
#include <linux/prime_numbers.h>
+#include "intel_context.h"
+#include "intel_engine_heartbeat.h"
#include "intel_engine_pm.h"
#include "intel_gt.h"
#include "intel_gt_requests.h"
@@ -604,7 +606,6 @@ static int live_hwsp_alternate(void *arg)
tl = checked_intel_timeline_create(gt);
if (IS_ERR(tl)) {
- intel_engine_pm_put(engine);
err = PTR_ERR(tl);
goto out;
}
@@ -750,6 +751,189 @@ out_free:
return err;
}
+static void engine_heartbeat_disable(struct intel_engine_cs *engine,
+ unsigned long *saved)
+{
+ *saved = engine->props.heartbeat_interval_ms;
+ engine->props.heartbeat_interval_ms = 0;
+
+ intel_engine_pm_get(engine);
+ intel_engine_park_heartbeat(engine);
+}
+
+static void engine_heartbeat_enable(struct intel_engine_cs *engine,
+ unsigned long saved)
+{
+ intel_engine_pm_put(engine);
+
+ engine->props.heartbeat_interval_ms = saved;
+}
+
+static int live_hwsp_rollover_kernel(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * Run the host for long enough, and even the kernel context will
+ * see a seqno rollover.
+ */
+
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce = engine->kernel_context;
+ struct intel_timeline *tl = ce->timeline;
+ struct i915_request *rq[3] = {};
+ unsigned long heartbeat;
+ int i;
+
+ engine_heartbeat_disable(engine, &heartbeat);
+ if (intel_gt_wait_for_idle(gt, HZ / 2)) {
+ err = -EIO;
+ goto out;
+ }
+
+ GEM_BUG_ON(i915_active_fence_isset(&tl->last_request));
+ tl->seqno = 0;
+ timeline_rollback(tl);
+ timeline_rollback(tl);
+ WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
+
+ for (i = 0; i < ARRAY_SIZE(rq); i++) {
+ struct i915_request *this;
+
+ this = i915_request_create(ce);
+ if (IS_ERR(this)) {
+ err = PTR_ERR(this);
+ goto out;
+ }
+
+ pr_debug("%s: create fence.seqnp:%d\n",
+ engine->name,
+ lower_32_bits(this->fence.seqno));
+
+ GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
+
+ rq[i] = i915_request_get(this);
+ i915_request_add(this);
+ }
+
+ /* We expected a wrap! */
+ GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
+
+ if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
+ pr_err("Wait for timeline wrap timed out!\n");
+ err = -EIO;
+ goto out;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(rq); i++) {
+ if (!i915_request_completed(rq[i])) {
+ pr_err("Pre-wrap request not completed!\n");
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+out:
+ for (i = 0; i < ARRAY_SIZE(rq); i++)
+ i915_request_put(rq[i]);
+ engine_heartbeat_enable(engine, heartbeat);
+ if (err)
+ break;
+ }
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+
+ return err;
+}
+
+static int live_hwsp_rollover_user(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * Simulate a long running user context, and force the seqno wrap
+ * on the user's timeline.
+ */
+
+ for_each_engine(engine, gt, id) {
+ struct i915_request *rq[3] = {};
+ struct intel_timeline *tl;
+ struct intel_context *ce;
+ int i;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ err = intel_context_alloc_state(ce);
+ if (err)
+ goto out;
+
+ tl = ce->timeline;
+ if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
+ goto out;
+
+ timeline_rollback(tl);
+ timeline_rollback(tl);
+ WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
+
+ for (i = 0; i < ARRAY_SIZE(rq); i++) {
+ struct i915_request *this;
+
+ this = intel_context_create_request(ce);
+ if (IS_ERR(this)) {
+ err = PTR_ERR(this);
+ goto out;
+ }
+
+ pr_debug("%s: create fence.seqnp:%d\n",
+ engine->name,
+ lower_32_bits(this->fence.seqno));
+
+ GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
+
+ rq[i] = i915_request_get(this);
+ i915_request_add(this);
+ }
+
+ /* We expected a wrap! */
+ GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
+
+ if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
+ pr_err("Wait for timeline wrap timed out!\n");
+ err = -EIO;
+ goto out;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(rq); i++) {
+ if (!i915_request_completed(rq[i])) {
+ pr_err("Pre-wrap request not completed!\n");
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+out:
+ for (i = 0; i < ARRAY_SIZE(rq); i++)
+ i915_request_put(rq[i]);
+ intel_context_put(ce);
+ if (err)
+ break;
+ }
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+
+ return err;
+}
+
static int live_hwsp_recycle(void *arg)
{
struct intel_gt *gt = arg;
@@ -827,6 +1011,8 @@ int intel_timeline_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_hwsp_engine),
SUBTEST(live_hwsp_alternate),
SUBTEST(live_hwsp_wrap),
+ SUBTEST(live_hwsp_rollover_kernel),
+ SUBTEST(live_hwsp_rollover_user),
};
if (intel_gt_is_wedged(&i915->gt))
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index ac1921854cbf..5ed323254ee1 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -583,6 +583,15 @@ static int check_dirty_whitelist(struct intel_context *ce)
if (err)
goto err_request;
+ i915_vma_lock(scratch);
+ err = i915_request_await_object(rq, scratch->obj, true);
+ if (err == 0)
+ err = i915_vma_move_to_active(scratch, rq,
+ EXEC_OBJECT_WRITE);
+ i915_vma_unlock(scratch);
+ if (err)
+ goto err_request;
+
err = engine->emit_bb_start(rq,
batch->node.start, PAGE_SIZE,
0);
diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c
new file mode 100644
index 000000000000..8f9b2f33dbaf
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c
@@ -0,0 +1,445 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+
+#include "i915_drv.h"
+#include "intel_engine.h"
+#include "intel_engine_heartbeat.h"
+#include "sysfs_engines.h"
+
+struct kobj_engine {
+ struct kobject base;
+ struct intel_engine_cs *engine;
+};
+
+static struct intel_engine_cs *kobj_to_engine(struct kobject *kobj)
+{
+ return container_of(kobj, struct kobj_engine, base)->engine;
+}
+
+static ssize_t
+name_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%s\n", kobj_to_engine(kobj)->name);
+}
+
+static struct kobj_attribute name_attr =
+__ATTR(name, 0444, name_show, NULL);
+
+static ssize_t
+class_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", kobj_to_engine(kobj)->uabi_class);
+}
+
+static struct kobj_attribute class_attr =
+__ATTR(class, 0444, class_show, NULL);
+
+static ssize_t
+inst_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", kobj_to_engine(kobj)->uabi_instance);
+}
+
+static struct kobj_attribute inst_attr =
+__ATTR(instance, 0444, inst_show, NULL);
+
+static ssize_t
+mmio_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "0x%x\n", kobj_to_engine(kobj)->mmio_base);
+}
+
+static struct kobj_attribute mmio_attr =
+__ATTR(mmio_base, 0444, mmio_show, NULL);
+
+static const char * const vcs_caps[] = {
+ [ilog2(I915_VIDEO_CLASS_CAPABILITY_HEVC)] = "hevc",
+ [ilog2(I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC)] = "sfc",
+};
+
+static const char * const vecs_caps[] = {
+ [ilog2(I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC)] = "sfc",
+};
+
+static ssize_t repr_trim(char *buf, ssize_t len)
+{
+ /* Trim off the trailing space and replace with a newline */
+ if (len > PAGE_SIZE)
+ len = PAGE_SIZE;
+ if (len > 0)
+ buf[len - 1] = '\n';
+
+ return len;
+}
+
+static ssize_t
+__caps_show(struct intel_engine_cs *engine,
+ u32 caps, char *buf, bool show_unknown)
+{
+ const char * const *repr;
+ int count, n;
+ ssize_t len;
+
+ BUILD_BUG_ON(!typecheck(typeof(caps), engine->uabi_capabilities));
+
+ switch (engine->class) {
+ case VIDEO_DECODE_CLASS:
+ repr = vcs_caps;
+ count = ARRAY_SIZE(vcs_caps);
+ break;
+
+ case VIDEO_ENHANCEMENT_CLASS:
+ repr = vecs_caps;
+ count = ARRAY_SIZE(vecs_caps);
+ break;
+
+ default:
+ repr = NULL;
+ count = 0;
+ break;
+ }
+ GEM_BUG_ON(count > BITS_PER_TYPE(typeof(caps)));
+
+ len = 0;
+ for_each_set_bit(n,
+ (unsigned long *)&caps,
+ show_unknown ? BITS_PER_TYPE(typeof(caps)) : count) {
+ if (n >= count || !repr[n]) {
+ if (GEM_WARN_ON(show_unknown))
+ len += snprintf(buf + len, PAGE_SIZE - len,
+ "[%x] ", n);
+ } else {
+ len += snprintf(buf + len, PAGE_SIZE - len,
+ "%s ", repr[n]);
+ }
+ if (GEM_WARN_ON(len >= PAGE_SIZE))
+ break;
+ }
+ return repr_trim(buf, len);
+}
+
+static ssize_t
+caps_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return __caps_show(engine, engine->uabi_capabilities, buf, true);
+}
+
+static struct kobj_attribute caps_attr =
+__ATTR(capabilities, 0444, caps_show, NULL);
+
+static ssize_t
+all_caps_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ return __caps_show(kobj_to_engine(kobj), -1, buf, false);
+}
+
+static struct kobj_attribute all_caps_attr =
+__ATTR(known_capabilities, 0444, all_caps_show, NULL);
+
+static ssize_t
+max_spin_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+ unsigned long long duration;
+ int err;
+
+ /*
+ * When waiting for a request, if is it currently being executed
+ * on the GPU, we busywait for a short while before sleeping. The
+ * premise is that most requests are short, and if it is already
+ * executing then there is a good chance that it will complete
+ * before we can setup the interrupt handler and go to sleep.
+ * We try to offset the cost of going to sleep, by first spinning
+ * on the request -- if it completed in less time than it would take
+ * to go sleep, process the interrupt and return back to the client,
+ * then we have saved the client some latency, albeit at the cost
+ * of spinning on an expensive CPU core.
+ *
+ * While we try to avoid waiting at all for a request that is unlikely
+ * to complete, deciding how long it is worth spinning is for is an
+ * arbitrary decision: trading off power vs latency.
+ */
+
+ err = kstrtoull(buf, 0, &duration);
+ if (err)
+ return err;
+
+ if (duration > jiffies_to_nsecs(2))
+ return -EINVAL;
+
+ WRITE_ONCE(engine->props.max_busywait_duration_ns, duration);
+
+ return count;
+}
+
+static ssize_t
+max_spin_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n", engine->props.max_busywait_duration_ns);
+}
+
+static struct kobj_attribute max_spin_attr =
+__ATTR(max_busywait_duration_ns, 0644, max_spin_show, max_spin_store);
+
+static ssize_t
+timeslice_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+ unsigned long long duration;
+ int err;
+
+ /*
+ * Execlists uses a scheduling quantum (a timeslice) to alternate
+ * execution between ready-to-run contexts of equal priority. This
+ * ensures that all users (though only if they of equal importance)
+ * have the opportunity to run and prevents livelocks where contexts
+ * may have implicit ordering due to userspace semaphores.
+ */
+
+ err = kstrtoull(buf, 0, &duration);
+ if (err)
+ return err;
+
+ if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+ return -EINVAL;
+
+ WRITE_ONCE(engine->props.timeslice_duration_ms, duration);
+
+ if (execlists_active(&engine->execlists))
+ set_timer_ms(&engine->execlists.timer, duration);
+
+ return count;
+}
+
+static ssize_t
+timeslice_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n", engine->props.timeslice_duration_ms);
+}
+
+static struct kobj_attribute timeslice_duration_attr =
+__ATTR(timeslice_duration_ms, 0644, timeslice_show, timeslice_store);
+
+static ssize_t
+stop_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+ unsigned long long duration;
+ int err;
+
+ /*
+ * When we allow ourselves to sleep before a GPU reset after disabling
+ * submission, even for a few milliseconds, gives an innocent context
+ * the opportunity to clear the GPU before the reset occurs. However,
+ * how long to sleep depends on the typical non-preemptible duration
+ * (a similar problem to determining the ideal preempt-reset timeout
+ * or even the heartbeat interval).
+ */
+
+ err = kstrtoull(buf, 0, &duration);
+ if (err)
+ return err;
+
+ if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+ return -EINVAL;
+
+ WRITE_ONCE(engine->props.stop_timeout_ms, duration);
+ return count;
+}
+
+static ssize_t
+stop_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n", engine->props.stop_timeout_ms);
+}
+
+static struct kobj_attribute stop_timeout_attr =
+__ATTR(stop_timeout_ms, 0644, stop_show, stop_store);
+
+static ssize_t
+preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+ unsigned long long timeout;
+ int err;
+
+ /*
+ * After initialising a preemption request, we give the current
+ * resident a small amount of time to vacate the GPU. The preemption
+ * request is for a higher priority context and should be immediate to
+ * maintain high quality of service (and avoid priority inversion).
+ * However, the preemption granularity of the GPU can be quite coarse
+ * and so we need a compromise.
+ */
+
+ err = kstrtoull(buf, 0, &timeout);
+ if (err)
+ return err;
+
+ if (timeout > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+ return -EINVAL;
+
+ WRITE_ONCE(engine->props.preempt_timeout_ms, timeout);
+
+ if (READ_ONCE(engine->execlists.pending[0]))
+ set_timer_ms(&engine->execlists.preempt, timeout);
+
+ return count;
+}
+
+static ssize_t
+preempt_timeout_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n", engine->props.preempt_timeout_ms);
+}
+
+static struct kobj_attribute preempt_timeout_attr =
+__ATTR(preempt_timeout_ms, 0644, preempt_timeout_show, preempt_timeout_store);
+
+static ssize_t
+heartbeat_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+ unsigned long long delay;
+ int err;
+
+ /*
+ * We monitor the health of the system via periodic heartbeat pulses.
+ * The pulses also provide the opportunity to perform garbage
+ * collection. However, we interpret an incomplete pulse (a missed
+ * heartbeat) as an indication that the system is no longer responsive,
+ * i.e. hung, and perform an engine or full GPU reset. Given that the
+ * preemption granularity can be very coarse on a system, the optimal
+ * value for any workload is unknowable!
+ */
+
+ err = kstrtoull(buf, 0, &delay);
+ if (err)
+ return err;
+
+ if (delay >= jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+ return -EINVAL;
+
+ err = intel_engine_set_heartbeat(engine, delay);
+ if (err)
+ return err;
+
+ return count;
+}
+
+static ssize_t
+heartbeat_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n", engine->props.heartbeat_interval_ms);
+}
+
+static struct kobj_attribute heartbeat_interval_attr =
+__ATTR(heartbeat_interval_ms, 0644, heartbeat_show, heartbeat_store);
+
+static void kobj_engine_release(struct kobject *kobj)
+{
+ kfree(kobj);
+}
+
+static struct kobj_type kobj_engine_type = {
+ .release = kobj_engine_release,
+ .sysfs_ops = &kobj_sysfs_ops
+};
+
+static struct kobject *
+kobj_engine(struct kobject *dir, struct intel_engine_cs *engine)
+{
+ struct kobj_engine *ke;
+
+ ke = kzalloc(sizeof(*ke), GFP_KERNEL);
+ if (!ke)
+ return NULL;
+
+ kobject_init(&ke->base, &kobj_engine_type);
+ ke->engine = engine;
+
+ if (kobject_add(&ke->base, dir, "%s", engine->name)) {
+ kobject_put(&ke->base);
+ return NULL;
+ }
+
+ /* xfer ownership to sysfs tree */
+ return &ke->base;
+}
+
+void intel_engines_add_sysfs(struct drm_i915_private *i915)
+{
+ static const struct attribute *files[] = {
+ &name_attr.attr,
+ &class_attr.attr,
+ &inst_attr.attr,
+ &mmio_attr.attr,
+ &caps_attr.attr,
+ &all_caps_attr.attr,
+ &max_spin_attr.attr,
+ &stop_timeout_attr.attr,
+#if CONFIG_DRM_I915_HEARTBEAT_INTERVAL
+ &heartbeat_interval_attr.attr,
+#endif
+ NULL
+ };
+
+ struct device *kdev = i915->drm.primary->kdev;
+ struct intel_engine_cs *engine;
+ struct kobject *dir;
+
+ dir = kobject_create_and_add("engine", &kdev->kobj);
+ if (!dir)
+ return;
+
+ for_each_uabi_engine(engine, i915) {
+ struct kobject *kobj;
+
+ kobj = kobj_engine(dir, engine);
+ if (!kobj)
+ goto err_engine;
+
+ if (sysfs_create_files(kobj, files))
+ goto err_object;
+
+ if (intel_engine_has_timeslices(engine) &&
+ sysfs_create_file(kobj, &timeslice_duration_attr.attr))
+ goto err_engine;
+
+ if (intel_engine_has_preempt_reset(engine) &&
+ sysfs_create_file(kobj, &preempt_timeout_attr.attr))
+ goto err_engine;
+
+ if (0) {
+err_object:
+ kobject_put(kobj);
+err_engine:
+ dev_err(kdev, "Failed to add sysfs engine '%s'\n",
+ engine->name);
+ break;
+ }
+ }
+}
diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.h b/drivers/gpu/drm/i915/gt/sysfs_engines.h
new file mode 100644
index 000000000000..9546fffe03a7
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/sysfs_engines.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_ENGINE_SYSFS_H
+#define INTEL_ENGINE_SYSFS_H
+
+struct drm_i915_private;
+
+void intel_engines_add_sysfs(struct drm_i915_private *i915);
+
+#endif /* INTEL_ENGINE_SYSFS_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 5d00a3b2d914..819f09ef51fc 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -207,7 +207,7 @@ static u32 guc_ctl_feature_flags(struct intel_guc *guc)
{
u32 flags = 0;
- if (!intel_guc_is_submission_supported(guc))
+ if (!intel_guc_submission_is_used(guc))
flags |= GUC_CTL_DISABLE_SCHEDULER;
return flags;
@@ -217,7 +217,7 @@ static u32 guc_ctl_ctxinfo_flags(struct intel_guc *guc)
{
u32 flags = 0;
- if (intel_guc_is_submission_supported(guc)) {
+ if (intel_guc_submission_is_used(guc)) {
u32 ctxnum, base;
base = intel_guc_ggtt_offset(guc, guc->stage_desc_pool);
@@ -333,7 +333,7 @@ int intel_guc_init(struct intel_guc *guc)
ret = intel_uc_fw_init(&guc->fw);
if (ret)
- goto err_fetch;
+ goto out;
ret = intel_guc_log_create(&guc->log);
if (ret)
@@ -348,7 +348,7 @@ int intel_guc_init(struct intel_guc *guc)
if (ret)
goto err_ads;
- if (intel_guc_is_submission_supported(guc)) {
+ if (intel_guc_submission_is_used(guc)) {
/*
* This is stuff we need to have available at fw load time
* if we are planning to enable submission later
@@ -364,6 +364,8 @@ int intel_guc_init(struct intel_guc *guc)
/* We need to notify the guc whenever we change the GGTT */
i915_ggtt_enable_guc(gt->ggtt);
+ intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_LOADABLE);
+
return 0;
err_ct:
@@ -374,9 +376,8 @@ err_log:
intel_guc_log_destroy(&guc->log);
err_fw:
intel_uc_fw_fini(&guc->fw);
-err_fetch:
- intel_uc_fw_cleanup_fetch(&guc->fw);
- DRM_DEV_DEBUG_DRIVER(gt->i915->drm.dev, "failed with %d\n", ret);
+out:
+ i915_probe_error(gt->i915, "failed with %d\n", ret);
return ret;
}
@@ -384,12 +385,12 @@ void intel_guc_fini(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
- if (!intel_uc_fw_is_available(&guc->fw))
+ if (!intel_uc_fw_is_loadable(&guc->fw))
return;
i915_ggtt_disable_guc(gt->ggtt);
- if (intel_guc_is_submission_supported(guc))
+ if (intel_guc_submission_is_used(guc))
intel_guc_submission_fini(guc);
intel_guc_ct_fini(&guc->ct);
@@ -397,9 +398,6 @@ void intel_guc_fini(struct intel_guc *guc)
intel_guc_ads_destroy(guc);
intel_guc_log_destroy(&guc->log);
intel_uc_fw_fini(&guc->fw);
- intel_uc_fw_cleanup_fetch(&guc->fw);
-
- intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_DISABLED);
}
/*
@@ -544,7 +542,7 @@ int intel_guc_suspend(struct intel_guc *guc)
* If GuC communication is enabled but submission is not supported,
* we do not need to suspend the GuC.
*/
- if (!intel_guc_submission_is_enabled(guc))
+ if (!intel_guc_submission_is_used(guc) || !intel_guc_is_ready(guc))
return 0;
/*
@@ -609,7 +607,7 @@ int intel_guc_resume(struct intel_guc *guc)
* we do not need to resume the GuC but we do need to enable the
* GuC communication on resume (above).
*/
- if (!intel_guc_submission_is_enabled(guc))
+ if (!intel_guc_submission_is_used(guc) || !intel_guc_is_ready(guc))
return 0;
return intel_guc_send(guc, action, ARRAY_SIZE(action));
@@ -678,8 +676,8 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
if (IS_ERR(vma))
goto err;
- flags = PIN_GLOBAL | PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
- ret = i915_vma_pin(vma, 0, 0, flags);
+ flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+ ret = i915_ggtt_pin(vma, 0, flags);
if (ret) {
vma = ERR_PTR(ret);
goto err;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 910d49590068..4594ccbeaa34 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -39,7 +39,7 @@ struct intel_guc {
void (*disable)(struct intel_guc *guc);
} interrupts;
- bool submission_supported;
+ bool submission_selected;
struct i915_vma *ads_vma;
struct __guc_ads_blob *ads_blob;
@@ -143,29 +143,36 @@ static inline bool intel_guc_is_supported(struct intel_guc *guc)
return intel_uc_fw_is_supported(&guc->fw);
}
-static inline bool intel_guc_is_enabled(struct intel_guc *guc)
+static inline bool intel_guc_is_wanted(struct intel_guc *guc)
{
return intel_uc_fw_is_enabled(&guc->fw);
}
-static inline bool intel_guc_is_running(struct intel_guc *guc)
+static inline bool intel_guc_is_used(struct intel_guc *guc)
+{
+ GEM_BUG_ON(__intel_uc_fw_status(&guc->fw) == INTEL_UC_FIRMWARE_SELECTED);
+ return intel_uc_fw_is_available(&guc->fw);
+}
+
+static inline bool intel_guc_is_fw_running(struct intel_guc *guc)
{
return intel_uc_fw_is_running(&guc->fw);
}
+static inline bool intel_guc_is_ready(struct intel_guc *guc)
+{
+ return intel_guc_is_fw_running(guc) && intel_guc_ct_enabled(&guc->ct);
+}
+
static inline int intel_guc_sanitize(struct intel_guc *guc)
{
intel_uc_fw_sanitize(&guc->fw);
+ intel_guc_ct_sanitize(&guc->ct);
guc->mmio_msg = 0;
return 0;
}
-static inline bool intel_guc_is_submission_supported(struct intel_guc *guc)
-{
- return guc->submission_supported;
-}
-
static inline void intel_guc_enable_msg(struct intel_guc *guc, u32 mask)
{
spin_lock_irq(&guc->irq_lock);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index c6f971a049f9..11742fca0e9e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -5,11 +5,15 @@
#include "i915_drv.h"
#include "intel_guc_ct.h"
+#include "gt/intel_gt.h"
+#define CT_ERROR(_ct, _fmt, ...) \
+ DRM_DEV_ERROR(ct_to_dev(_ct), "CT: " _fmt, ##__VA_ARGS__)
#ifdef CONFIG_DRM_I915_DEBUG_GUC
-#define CT_DEBUG_DRIVER(...) DRM_DEBUG_DRIVER(__VA_ARGS__)
+#define CT_DEBUG(_ct, _fmt, ...) \
+ DRM_DEV_DEBUG_DRIVER(ct_to_dev(_ct), "CT: " _fmt, ##__VA_ARGS__)
#else
-#define CT_DEBUG_DRIVER(...) do { } while (0)
+#define CT_DEBUG(...) do { } while (0)
#endif
struct ct_request {
@@ -48,6 +52,21 @@ static inline struct intel_guc *ct_to_guc(struct intel_guc_ct *ct)
return container_of(ct, struct intel_guc, ct);
}
+static inline struct intel_gt *ct_to_gt(struct intel_guc_ct *ct)
+{
+ return guc_to_gt(ct_to_guc(ct));
+}
+
+static inline struct drm_i915_private *ct_to_i915(struct intel_guc_ct *ct)
+{
+ return ct_to_gt(ct)->i915;
+}
+
+static inline struct device *ct_to_dev(struct intel_guc_ct *ct)
+{
+ return ct_to_i915(ct)->drm.dev;
+}
+
static inline const char *guc_ct_buffer_type_to_str(u32 type)
{
switch (type) {
@@ -63,7 +82,6 @@ static inline const char *guc_ct_buffer_type_to_str(u32 type)
static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc,
u32 cmds_addr, u32 size)
{
- CT_DEBUG_DRIVER("CT: init addr=%#x size=%u\n", cmds_addr, size);
memset(desc, 0, sizeof(*desc));
desc->addr = cmds_addr;
desc->size = size;
@@ -72,8 +90,6 @@ static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc,
static void guc_ct_buffer_desc_reset(struct guc_ct_buffer_desc *desc)
{
- CT_DEBUG_DRIVER("CT: desc %p reset head=%u tail=%u\n",
- desc, desc->head, desc->tail);
desc->head = 0;
desc->tail = 0;
desc->is_in_error = 0;
@@ -89,31 +105,40 @@ static int guc_action_register_ct_buffer(struct intel_guc *guc,
sizeof(struct guc_ct_buffer_desc),
type
};
- int err;
/* Can't use generic send(), CT registration must go over MMIO */
- err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0);
- if (err)
- DRM_ERROR("CT: register %s buffer failed; err=%d\n",
- guc_ct_buffer_type_to_str(type), err);
+ return intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0);
+}
+
+static int ct_register_buffer(struct intel_guc_ct *ct, u32 desc_addr, u32 type)
+{
+ int err = guc_action_register_ct_buffer(ct_to_guc(ct), desc_addr, type);
+
+ if (unlikely(err))
+ CT_ERROR(ct, "Failed to register %s buffer (err=%d)\n",
+ guc_ct_buffer_type_to_str(type), err);
return err;
}
-static int guc_action_deregister_ct_buffer(struct intel_guc *guc,
- u32 type)
+static int guc_action_deregister_ct_buffer(struct intel_guc *guc, u32 type)
{
u32 action[] = {
INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER,
CTB_OWNER_HOST,
type
};
- int err;
/* Can't use generic send(), CT deregistration must go over MMIO */
- err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0);
- if (err)
- DRM_ERROR("CT: deregister %s buffer failed; err=%d\n",
- guc_ct_buffer_type_to_str(type), err);
+ return intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0);
+}
+
+static int ct_deregister_buffer(struct intel_guc_ct *ct, u32 type)
+{
+ int err = guc_action_deregister_ct_buffer(ct_to_guc(ct), type);
+
+ if (unlikely(err))
+ CT_ERROR(ct, "Failed to deregister %s buffer (err=%d)\n",
+ guc_ct_buffer_type_to_str(type), err);
return err;
}
@@ -157,13 +182,12 @@ int intel_guc_ct_init(struct intel_guc_ct *ct)
*/
err = intel_guc_allocate_and_map_vma(guc, PAGE_SIZE, &ct->vma, &blob);
- if (err) {
- DRM_ERROR("CT: channel allocation failed; err=%d\n", err);
+ if (unlikely(err)) {
+ CT_ERROR(ct, "Failed to allocate CT channel (err=%d)\n", err);
return err;
}
- CT_DEBUG_DRIVER("CT: vma base=%#x\n",
- intel_guc_ggtt_offset(guc, ct->vma));
+ CT_DEBUG(ct, "vma base=%#x\n", intel_guc_ggtt_offset(guc, ct->vma));
/* store pointers to desc and cmds */
for (i = 0; i < ARRAY_SIZE(ct->ctbs); i++) {
@@ -197,7 +221,7 @@ void intel_guc_ct_fini(struct intel_guc_ct *ct)
int intel_guc_ct_enable(struct intel_guc_ct *ct)
{
struct intel_guc *guc = ct_to_guc(ct);
- u32 base;
+ u32 base, cmds, size;
int err;
int i;
@@ -212,23 +236,23 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct)
*/
for (i = 0; i < ARRAY_SIZE(ct->ctbs); i++) {
GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV));
- guc_ct_buffer_desc_init(ct->ctbs[i].desc,
- base + PAGE_SIZE/4 * i + PAGE_SIZE/2,
- PAGE_SIZE/4);
+ cmds = base + PAGE_SIZE / 4 * i + PAGE_SIZE / 2;
+ size = PAGE_SIZE / 4;
+ CT_DEBUG(ct, "%d: addr=%#x size=%u\n", i, cmds, size);
+ guc_ct_buffer_desc_init(ct->ctbs[i].desc, cmds, size);
}
- /* register buffers, starting wirh RECV buffer
- * descriptors are in first half of the blob
+ /*
+ * Register both CT buffers starting with RECV buffer.
+ * Descriptors are in first half of the blob.
*/
- err = guc_action_register_ct_buffer(guc,
- base + PAGE_SIZE/4 * CTB_RECV,
- INTEL_GUC_CT_BUFFER_TYPE_RECV);
+ err = ct_register_buffer(ct, base + PAGE_SIZE / 4 * CTB_RECV,
+ INTEL_GUC_CT_BUFFER_TYPE_RECV);
if (unlikely(err))
goto err_out;
- err = guc_action_register_ct_buffer(guc,
- base + PAGE_SIZE/4 * CTB_SEND,
- INTEL_GUC_CT_BUFFER_TYPE_SEND);
+ err = ct_register_buffer(ct, base + PAGE_SIZE / 4 * CTB_SEND,
+ INTEL_GUC_CT_BUFFER_TYPE_SEND);
if (unlikely(err))
goto err_deregister;
@@ -237,10 +261,9 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct)
return 0;
err_deregister:
- guc_action_deregister_ct_buffer(guc,
- INTEL_GUC_CT_BUFFER_TYPE_RECV);
+ ct_deregister_buffer(ct, INTEL_GUC_CT_BUFFER_TYPE_RECV);
err_out:
- DRM_ERROR("CT: can't open channel; err=%d\n", err);
+ CT_ERROR(ct, "Failed to open open CT channel (err=%d)\n", err);
return err;
}
@@ -256,18 +279,16 @@ void intel_guc_ct_disable(struct intel_guc_ct *ct)
ct->enabled = false;
- if (intel_guc_is_running(guc)) {
- guc_action_deregister_ct_buffer(guc,
- INTEL_GUC_CT_BUFFER_TYPE_SEND);
- guc_action_deregister_ct_buffer(guc,
- INTEL_GUC_CT_BUFFER_TYPE_RECV);
+ if (intel_guc_is_fw_running(guc)) {
+ ct_deregister_buffer(ct, INTEL_GUC_CT_BUFFER_TYPE_SEND);
+ ct_deregister_buffer(ct, INTEL_GUC_CT_BUFFER_TYPE_RECV);
}
}
static u32 ct_get_next_fence(struct intel_guc_ct *ct)
{
/* For now it's trivial */
- return ++ct->requests.next_fence;
+ return ++ct->requests.last_fence;
}
/**
@@ -288,25 +309,33 @@ static u32 ct_get_next_fence(struct intel_guc_ct *ct)
* ^-----------------len-------------------^
*/
-static int ctb_write(struct intel_guc_ct_buffer *ctb,
- const u32 *action,
- u32 len /* in dwords */,
- u32 fence,
- bool want_response)
+static int ct_write(struct intel_guc_ct *ct,
+ const u32 *action,
+ u32 len /* in dwords */,
+ u32 fence,
+ bool want_response)
{
+ struct intel_guc_ct_buffer *ctb = &ct->ctbs[CTB_SEND];
struct guc_ct_buffer_desc *desc = ctb->desc;
- u32 head = desc->head / 4; /* in dwords */
- u32 tail = desc->tail / 4; /* in dwords */
- u32 size = desc->size / 4; /* in dwords */
- u32 used; /* in dwords */
+ u32 head = desc->head;
+ u32 tail = desc->tail;
+ u32 size = desc->size;
+ u32 used;
u32 header;
u32 *cmds = ctb->cmds;
unsigned int i;
- GEM_BUG_ON(desc->size % 4);
- GEM_BUG_ON(desc->head % 4);
- GEM_BUG_ON(desc->tail % 4);
- GEM_BUG_ON(tail >= size);
+ if (unlikely(desc->is_in_error))
+ return -EPIPE;
+
+ if (unlikely(!IS_ALIGNED(head | tail | size, 4) ||
+ (tail | head) >= size))
+ goto corrupted;
+
+ /* later calculations will be done in dwords */
+ head /= 4;
+ tail /= 4;
+ size /= 4;
/*
* tail == head condition indicates empty. GuC FW does not support
@@ -332,9 +361,8 @@ static int ctb_write(struct intel_guc_ct_buffer *ctb,
(want_response ? GUC_CT_MSG_SEND_STATUS : 0) |
(action[0] << GUC_CT_MSG_ACTION_SHIFT);
- CT_DEBUG_DRIVER("CT: writing %*ph %*ph %*ph\n",
- 4, &header, 4, &fence,
- 4 * (len - 1), &action[1]);
+ CT_DEBUG(ct, "writing %*ph %*ph %*ph\n",
+ 4, &header, 4, &fence, 4 * (len - 1), &action[1]);
cmds[tail] = header;
tail = (tail + 1) % size;
@@ -346,12 +374,17 @@ static int ctb_write(struct intel_guc_ct_buffer *ctb,
cmds[tail] = action[i];
tail = (tail + 1) % size;
}
+ GEM_BUG_ON(tail > size);
/* now update desc tail (back in bytes) */
desc->tail = tail * 4;
- GEM_BUG_ON(desc->tail > desc->size);
-
return 0;
+
+corrupted:
+ CT_ERROR(ct, "Corrupted descriptor addr=%#x head=%u tail=%u size=%u\n",
+ desc->addr, desc->head, desc->tail, desc->size);
+ desc->is_in_error = 1;
+ return -EPIPE;
}
/**
@@ -469,7 +502,7 @@ static int ct_send(struct intel_guc_ct *ct,
list_add_tail(&request.link, &ct->requests.pending);
spin_unlock_irqrestore(&ct->requests.lock, flags);
- err = ctb_write(ctb, action, len, fence, !!response_buf);
+ err = ct_write(ct, action, len, fence, !!response_buf);
if (unlikely(err))
goto unlink;
@@ -526,11 +559,11 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len,
ret = ct_send(ct, action, len, response_buf, response_buf_size, &status);
if (unlikely(ret < 0)) {
- DRM_ERROR("CT: send action %#X failed; err=%d status=%#X\n",
- action[0], ret, status);
+ CT_ERROR(ct, "Sending action %#x failed (err=%d status=%#X)\n",
+ action[0], ret, status);
} else if (unlikely(ret)) {
- CT_DEBUG_DRIVER("CT: send action %#x returned %d (%#x)\n",
- action[0], ret, ret);
+ CT_DEBUG(ct, "send action %#x returned %d (%#x)\n",
+ action[0], ret, ret);
}
mutex_unlock(&guc->send_mutex);
@@ -552,22 +585,29 @@ static inline bool ct_header_is_response(u32 header)
return !!(header & GUC_CT_MSG_IS_RESPONSE);
}
-static int ctb_read(struct intel_guc_ct_buffer *ctb, u32 *data)
+static int ct_read(struct intel_guc_ct *ct, u32 *data)
{
+ struct intel_guc_ct_buffer *ctb = &ct->ctbs[CTB_RECV];
struct guc_ct_buffer_desc *desc = ctb->desc;
- u32 head = desc->head / 4; /* in dwords */
- u32 tail = desc->tail / 4; /* in dwords */
- u32 size = desc->size / 4; /* in dwords */
+ u32 head = desc->head;
+ u32 tail = desc->tail;
+ u32 size = desc->size;
u32 *cmds = ctb->cmds;
- s32 available; /* in dwords */
+ s32 available;
unsigned int len;
unsigned int i;
- GEM_BUG_ON(desc->size % 4);
- GEM_BUG_ON(desc->head % 4);
- GEM_BUG_ON(desc->tail % 4);
- GEM_BUG_ON(tail >= size);
- GEM_BUG_ON(head >= size);
+ if (unlikely(desc->is_in_error))
+ return -EPIPE;
+
+ if (unlikely(!IS_ALIGNED(head | tail | size, 4) ||
+ (tail | head) >= size))
+ goto corrupted;
+
+ /* later calculations will be done in dwords */
+ head /= 4;
+ tail /= 4;
+ size /= 4;
/* tail == head condition indicates empty */
available = tail - head;
@@ -577,7 +617,7 @@ static int ctb_read(struct intel_guc_ct_buffer *ctb, u32 *data)
/* beware of buffer wrap case */
if (unlikely(available < 0))
available += size;
- CT_DEBUG_DRIVER("CT: available %d (%u:%u)\n", available, head, tail);
+ CT_DEBUG(ct, "available %d (%u:%u)\n", available, head, tail);
GEM_BUG_ON(available < 0);
data[0] = cmds[head];
@@ -586,23 +626,29 @@ static int ctb_read(struct intel_guc_ct_buffer *ctb, u32 *data)
/* message len with header */
len = ct_header_get_len(data[0]) + 1;
if (unlikely(len > (u32)available)) {
- DRM_ERROR("CT: incomplete message %*ph %*ph %*ph\n",
- 4, data,
- 4 * (head + available - 1 > size ?
- size - head : available - 1), &cmds[head],
- 4 * (head + available - 1 > size ?
- available - 1 - size + head : 0), &cmds[0]);
- return -EPROTO;
+ CT_ERROR(ct, "Incomplete message %*ph %*ph %*ph\n",
+ 4, data,
+ 4 * (head + available - 1 > size ?
+ size - head : available - 1), &cmds[head],
+ 4 * (head + available - 1 > size ?
+ available - 1 - size + head : 0), &cmds[0]);
+ goto corrupted;
}
for (i = 1; i < len; i++) {
data[i] = cmds[head];
head = (head + 1) % size;
}
- CT_DEBUG_DRIVER("CT: received %*ph\n", 4 * len, data);
+ CT_DEBUG(ct, "received %*ph\n", 4 * len, data);
desc->head = head * 4;
return 0;
+
+corrupted:
+ CT_ERROR(ct, "Corrupted descriptor addr=%#x head=%u tail=%u size=%u\n",
+ desc->addr, desc->head, desc->tail, desc->size);
+ desc->is_in_error = 1;
+ return -EPIPE;
}
/**
@@ -627,7 +673,7 @@ static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg)
{
u32 header = msg[0];
u32 len = ct_header_get_len(header);
- u32 msglen = len + 1; /* total message length including header */
+ u32 msgsize = (len + 1) * sizeof(u32); /* msg size in bytes w/header */
u32 fence;
u32 status;
u32 datalen;
@@ -639,7 +685,7 @@ static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg)
/* Response payload shall at least include fence and status */
if (unlikely(len < 2)) {
- DRM_ERROR("CT: corrupted response %*ph\n", 4 * msglen, msg);
+ CT_ERROR(ct, "Corrupted response %*ph\n", msgsize, msg);
return -EPROTO;
}
@@ -649,22 +695,22 @@ static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg)
/* Format of the status follows RESPONSE message */
if (unlikely(!INTEL_GUC_MSG_IS_RESPONSE(status))) {
- DRM_ERROR("CT: corrupted response %*ph\n", 4 * msglen, msg);
+ CT_ERROR(ct, "Corrupted response %*ph\n", msgsize, msg);
return -EPROTO;
}
- CT_DEBUG_DRIVER("CT: response fence %u status %#x\n", fence, status);
+ CT_DEBUG(ct, "response fence %u status %#x\n", fence, status);
spin_lock(&ct->requests.lock);
list_for_each_entry(req, &ct->requests.pending, link) {
if (unlikely(fence != req->fence)) {
- CT_DEBUG_DRIVER("CT: request %u awaits response\n",
- req->fence);
+ CT_DEBUG(ct, "request %u awaits response\n",
+ req->fence);
continue;
}
if (unlikely(datalen > req->response_len)) {
- DRM_ERROR("CT: response %u too long %*ph\n",
- req->fence, 4 * msglen, msg);
+ CT_ERROR(ct, "Response for %u is too long %*ph\n",
+ req->fence, msgsize, msg);
datalen = 0;
}
if (datalen)
@@ -677,7 +723,7 @@ static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg)
spin_unlock(&ct->requests.lock);
if (!found)
- DRM_ERROR("CT: unsolicited response %*ph\n", 4 * msglen, msg);
+ CT_ERROR(ct, "Unsolicited response %*ph\n", msgsize, msg);
return 0;
}
@@ -687,7 +733,7 @@ static void ct_process_request(struct intel_guc_ct *ct,
struct intel_guc *guc = ct_to_guc(ct);
int ret;
- CT_DEBUG_DRIVER("CT: request %x %*ph\n", action, 4 * len, payload);
+ CT_DEBUG(ct, "request %x %*ph\n", action, 4 * len, payload);
switch (action) {
case INTEL_GUC_ACTION_DEFAULT:
@@ -698,8 +744,8 @@ static void ct_process_request(struct intel_guc_ct *ct,
default:
fail_unexpected:
- DRM_ERROR("CT: unexpected request %x %*ph\n",
- action, 4 * len, payload);
+ CT_ERROR(ct, "Unexpected request %x %*ph\n",
+ action, 4 * len, payload);
break;
}
}
@@ -767,18 +813,18 @@ static int ct_handle_request(struct intel_guc_ct *ct, const u32 *msg)
{
u32 header = msg[0];
u32 len = ct_header_get_len(header);
- u32 msglen = len + 1; /* total message length including header */
+ u32 msgsize = (len + 1) * sizeof(u32); /* msg size in bytes w/header */
struct ct_incoming_request *request;
unsigned long flags;
GEM_BUG_ON(ct_header_is_response(header));
- request = kmalloc(sizeof(*request) + 4 * msglen, GFP_ATOMIC);
+ request = kmalloc(sizeof(*request) + msgsize, GFP_ATOMIC);
if (unlikely(!request)) {
- DRM_ERROR("CT: dropping request %*ph\n", 4 * msglen, msg);
+ CT_ERROR(ct, "Dropping request %*ph\n", msgsize, msg);
return 0; /* XXX: -ENOMEM ? */
}
- memcpy(request->msg, msg, 4 * msglen);
+ memcpy(request->msg, msg, msgsize);
spin_lock_irqsave(&ct->requests.lock, flags);
list_add_tail(&request->link, &ct->requests.incoming);
@@ -794,7 +840,6 @@ static int ct_handle_request(struct intel_guc_ct *ct, const u32 *msg)
*/
void intel_guc_ct_event_handler(struct intel_guc_ct *ct)
{
- struct intel_guc_ct_buffer *ctb = &ct->ctbs[CTB_RECV];
u32 msg[GUC_CT_MSG_LEN_MASK + 1]; /* one extra dw for the header */
int err = 0;
@@ -804,7 +849,7 @@ void intel_guc_ct_event_handler(struct intel_guc_ct *ct)
}
do {
- err = ctb_read(ctb, msg);
+ err = ct_read(ct, msg);
if (err)
break;
@@ -813,10 +858,4 @@ void intel_guc_ct_event_handler(struct intel_guc_ct *ct)
else
err = ct_handle_request(ct, msg);
} while (!err);
-
- if (GEM_WARN_ON(err == -EPROTO)) {
- DRM_ERROR("CT: corrupted message detected!\n");
- ctb->desc->is_in_error = 1;
- }
}
-
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
index 3e7fe237cfa5..494a51a5200f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
@@ -49,7 +49,7 @@ struct intel_guc_ct {
struct intel_guc_ct_buffer ctbs[2];
struct {
- u32 next_fence; /* fence to be used with next request to send */
+ u32 last_fence; /* last fence used to send request */
spinlock_t lock; /* protects pending requests list */
struct list_head pending; /* requests waiting for response */
@@ -65,6 +65,11 @@ void intel_guc_ct_fini(struct intel_guc_ct *ct);
int intel_guc_ct_enable(struct intel_guc_ct *ct);
void intel_guc_ct_disable(struct intel_guc_ct *ct);
+static inline void intel_guc_ct_sanitize(struct intel_guc_ct *ct)
+{
+ ct->enabled = false;
+}
+
static inline bool intel_guc_ct_enabled(struct intel_guc_ct *ct)
{
return ct->enabled;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 9e42324fdecd..fe7778c28d2d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -456,9 +456,7 @@ static void guc_reset_cancel(struct intel_engine_cs *engine)
/* Mark all executing requests as skipped. */
list_for_each_entry(rq, &engine->active.requests, sched.link) {
- if (!i915_request_signaled(rq))
- dma_fence_set_error(&rq->fence, -EIO);
-
+ i915_request_set_error_once(rq, -EIO);
i915_request_mark_complete(rq);
}
@@ -660,12 +658,9 @@ void intel_guc_submission_disable(struct intel_guc *guc)
guc_proc_desc_fini(guc);
}
-static bool __guc_submission_support(struct intel_guc *guc)
+static bool __guc_submission_selected(struct intel_guc *guc)
{
- /* XXX: GuC submission is unavailable for now */
- return false;
-
- if (!intel_guc_is_supported(guc))
+ if (!intel_guc_submission_is_supported(guc))
return false;
return i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION;
@@ -673,7 +668,7 @@ static bool __guc_submission_support(struct intel_guc *guc)
void intel_guc_submission_init_early(struct intel_guc *guc)
{
- guc->submission_supported = __guc_submission_support(guc);
+ guc->submission_selected = __guc_submission_selected(guc);
}
bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
index e402a2932592..4cf9d3e50263 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
@@ -8,7 +8,8 @@
#include <linux/types.h>
-struct intel_guc;
+#include "intel_guc.h"
+
struct intel_engine_cs;
void intel_guc_submission_init_early(struct intel_guc *guc);
@@ -20,4 +21,20 @@ int intel_guc_preempt_work_create(struct intel_guc *guc);
void intel_guc_preempt_work_destroy(struct intel_guc *guc);
bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine);
+static inline bool intel_guc_submission_is_supported(struct intel_guc *guc)
+{
+ /* XXX: GuC submission is unavailable for now */
+ return false;
+}
+
+static inline bool intel_guc_submission_is_wanted(struct intel_guc *guc)
+{
+ return guc->submission_selected;
+}
+
+static inline bool intel_guc_submission_is_used(struct intel_guc *guc)
+{
+ return intel_guc_is_used(guc) && intel_guc_submission_is_wanted(guc);
+}
+
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index 32a069841c14..a74b65694512 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -121,19 +121,20 @@ int intel_huc_init(struct intel_huc *huc)
if (err)
goto out_fini;
+ intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOADABLE);
+
return 0;
out_fini:
intel_uc_fw_fini(&huc->fw);
out:
- intel_uc_fw_cleanup_fetch(&huc->fw);
- DRM_DEV_DEBUG_DRIVER(i915->drm.dev, "failed with %d\n", err);
+ i915_probe_error(i915, "failed with %d\n", err);
return err;
}
void intel_huc_fini(struct intel_huc *huc)
{
- if (!intel_uc_fw_is_available(&huc->fw))
+ if (!intel_uc_fw_is_loadable(&huc->fw))
return;
intel_huc_rsa_data_destroy(huc);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
index 644c059fe01d..a40b9cfc6c22 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
@@ -41,11 +41,17 @@ static inline bool intel_huc_is_supported(struct intel_huc *huc)
return intel_uc_fw_is_supported(&huc->fw);
}
-static inline bool intel_huc_is_enabled(struct intel_huc *huc)
+static inline bool intel_huc_is_wanted(struct intel_huc *huc)
{
return intel_uc_fw_is_enabled(&huc->fw);
}
+static inline bool intel_huc_is_used(struct intel_huc *huc)
+{
+ GEM_BUG_ON(__intel_uc_fw_status(&huc->fw) == INTEL_UC_FIRMWARE_SELECTED);
+ return intel_uc_fw_is_available(&huc->fw);
+}
+
static inline bool intel_huc_is_authenticated(struct intel_huc *huc)
{
return intel_uc_fw_is_running(&huc->fw);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
index eee193bf2cc4..9cdf4cbe691c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
@@ -20,7 +20,7 @@ void intel_huc_fw_init_early(struct intel_huc *huc)
struct drm_i915_private *i915 = gt->i915;
intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC,
- intel_uc_uses_guc(uc),
+ intel_uc_wants_guc(uc),
INTEL_INFO(i915)->platform, INTEL_REVID(i915));
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 64934a876a50..a4cbe06e06bd 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -48,17 +48,17 @@ static void __confirm_options(struct intel_uc *uc)
DRM_DEV_DEBUG_DRIVER(i915->drm.dev,
"enable_guc=%d (guc:%s submission:%s huc:%s)\n",
i915_modparams.enable_guc,
- yesno(intel_uc_uses_guc(uc)),
- yesno(intel_uc_uses_guc_submission(uc)),
- yesno(intel_uc_uses_huc(uc)));
+ yesno(intel_uc_wants_guc(uc)),
+ yesno(intel_uc_wants_guc_submission(uc)),
+ yesno(intel_uc_wants_huc(uc)));
if (i915_modparams.enable_guc == -1)
return;
if (i915_modparams.enable_guc == 0) {
- GEM_BUG_ON(intel_uc_uses_guc(uc));
- GEM_BUG_ON(intel_uc_uses_guc_submission(uc));
- GEM_BUG_ON(intel_uc_uses_huc(uc));
+ GEM_BUG_ON(intel_uc_wants_guc(uc));
+ GEM_BUG_ON(intel_uc_wants_guc_submission(uc));
+ GEM_BUG_ON(intel_uc_wants_huc(uc));
return;
}
@@ -93,7 +93,7 @@ void intel_uc_init_early(struct intel_uc *uc)
__confirm_options(uc);
- if (intel_uc_uses_guc(uc))
+ if (intel_uc_wants_guc(uc))
uc->ops = &uc_ops_on;
else
uc->ops = &uc_ops_off;
@@ -257,13 +257,13 @@ static void __uc_fetch_firmwares(struct intel_uc *uc)
{
int err;
- GEM_BUG_ON(!intel_uc_uses_guc(uc));
+ GEM_BUG_ON(!intel_uc_wants_guc(uc));
err = intel_uc_fw_fetch(&uc->guc.fw);
if (err)
return;
- if (intel_uc_uses_huc(uc))
+ if (intel_uc_wants_huc(uc))
intel_uc_fw_fetch(&uc->huc.fw);
}
@@ -273,25 +273,38 @@ static void __uc_cleanup_firmwares(struct intel_uc *uc)
intel_uc_fw_cleanup_fetch(&uc->guc.fw);
}
-static void __uc_init(struct intel_uc *uc)
+static int __uc_init(struct intel_uc *uc)
{
struct intel_guc *guc = &uc->guc;
struct intel_huc *huc = &uc->huc;
int ret;
- GEM_BUG_ON(!intel_uc_uses_guc(uc));
+ GEM_BUG_ON(!intel_uc_wants_guc(uc));
+
+ if (!intel_uc_uses_guc(uc))
+ return 0;
+
+ if (i915_inject_probe_failure(uc_to_gt(uc)->i915))
+ return -ENOMEM;
/* XXX: GuC submission is unavailable for now */
- GEM_BUG_ON(intel_uc_supports_guc_submission(uc));
+ GEM_BUG_ON(intel_uc_uses_guc_submission(uc));
ret = intel_guc_init(guc);
- if (ret) {
- intel_uc_fw_cleanup_fetch(&huc->fw);
- return;
+ if (ret)
+ return ret;
+
+ if (intel_uc_uses_huc(uc)) {
+ ret = intel_huc_init(huc);
+ if (ret)
+ goto out_guc;
}
- if (intel_uc_uses_huc(uc))
- intel_huc_init(huc);
+ return 0;
+
+out_guc:
+ intel_guc_fini(guc);
+ return ret;
}
static void __uc_fini(struct intel_uc *uc)
@@ -402,12 +415,12 @@ static int __uc_init_hw(struct intel_uc *uc)
int ret, attempts;
GEM_BUG_ON(!intel_uc_supports_guc(uc));
- GEM_BUG_ON(!intel_uc_uses_guc(uc));
+ GEM_BUG_ON(!intel_uc_wants_guc(uc));
- if (!intel_uc_fw_is_available(&guc->fw)) {
+ if (!intel_uc_fw_is_loadable(&guc->fw)) {
ret = __uc_check_hw(uc) ||
intel_uc_fw_is_overridden(&guc->fw) ||
- intel_uc_supports_guc_submission(uc) ?
+ intel_uc_wants_guc_submission(uc) ?
intel_uc_fw_status_to_error(guc->fw.status) : 0;
goto err_out;
}
@@ -459,14 +472,14 @@ static int __uc_init_hw(struct intel_uc *uc)
if (ret)
goto err_communication;
- if (intel_uc_supports_guc_submission(uc))
+ if (intel_uc_uses_guc_submission(uc))
intel_guc_submission_enable(guc);
dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n",
intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_GUC), guc->fw.path,
guc->fw.major_ver_found, guc->fw.minor_ver_found,
"submission",
- enableddisabled(intel_uc_supports_guc_submission(uc)));
+ enableddisabled(intel_uc_uses_guc_submission(uc)));
if (intel_uc_uses_huc(uc)) {
dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n",
@@ -505,10 +518,10 @@ static void __uc_fini_hw(struct intel_uc *uc)
{
struct intel_guc *guc = &uc->guc;
- if (!intel_guc_is_running(guc))
+ if (!intel_guc_is_fw_running(guc))
return;
- if (intel_uc_supports_guc_submission(uc))
+ if (intel_uc_uses_guc_submission(uc))
intel_guc_submission_disable(guc);
if (guc_communication_enabled(guc))
@@ -527,7 +540,7 @@ void intel_uc_reset_prepare(struct intel_uc *uc)
{
struct intel_guc *guc = &uc->guc;
- if (!intel_guc_is_running(guc))
+ if (!intel_guc_is_ready(guc))
return;
guc_disable_communication(guc);
@@ -539,7 +552,7 @@ void intel_uc_runtime_suspend(struct intel_uc *uc)
struct intel_guc *guc = &uc->guc;
int err;
- if (!intel_guc_is_running(guc))
+ if (!intel_guc_is_ready(guc))
return;
err = intel_guc_suspend(guc);
@@ -554,7 +567,7 @@ void intel_uc_suspend(struct intel_uc *uc)
struct intel_guc *guc = &uc->guc;
intel_wakeref_t wakeref;
- if (!intel_guc_is_running(guc))
+ if (!intel_guc_is_ready(guc))
return;
with_intel_runtime_pm(uc_to_gt(uc)->uncore->rpm, wakeref)
@@ -566,7 +579,7 @@ static int __uc_resume(struct intel_uc *uc, bool enable_communication)
struct intel_guc *guc = &uc->guc;
int err;
- if (!intel_guc_is_running(guc))
+ if (!intel_guc_is_fw_running(guc))
return 0;
/* Make sure we enable communication if and only if it's disabled */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
index 49c913524686..5ae7b50b7dc1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
@@ -7,6 +7,7 @@
#define _INTEL_UC_H_
#include "intel_guc.h"
+#include "intel_guc_submission.h"
#include "intel_huc.h"
#include "i915_params.h"
@@ -16,7 +17,7 @@ struct intel_uc_ops {
int (*sanitize)(struct intel_uc *uc);
void (*init_fw)(struct intel_uc *uc);
void (*fini_fw)(struct intel_uc *uc);
- void (*init)(struct intel_uc *uc);
+ int (*init)(struct intel_uc *uc);
void (*fini)(struct intel_uc *uc);
int (*init_hw)(struct intel_uc *uc);
void (*fini_hw)(struct intel_uc *uc);
@@ -40,35 +41,44 @@ void intel_uc_runtime_suspend(struct intel_uc *uc);
int intel_uc_resume(struct intel_uc *uc);
int intel_uc_runtime_resume(struct intel_uc *uc);
-static inline bool intel_uc_supports_guc(struct intel_uc *uc)
-{
- return intel_guc_is_supported(&uc->guc);
-}
-
-static inline bool intel_uc_uses_guc(struct intel_uc *uc)
-{
- return intel_guc_is_enabled(&uc->guc);
-}
+/*
+ * We need to know as early as possible if we're going to use GuC or not to
+ * take the correct setup paths. Additionally, once we've started loading the
+ * GuC, it is unsafe to keep executing without it because some parts of the HW,
+ * a subset of which is not cleaned on GT reset, will start expecting the GuC FW
+ * to be running.
+ * To solve both these requirements, we commit to using the microcontrollers if
+ * the relevant modparam is set and the blobs are found on the system. At this
+ * stage, the only thing that can stop us from attempting to load the blobs on
+ * the HW and use them is a fundamental issue (e.g. no memory for our
+ * structures); if we hit such a problem during driver load we're broken even
+ * without GuC, so there is no point in trying to fall back.
+ *
+ * Given the above, we can be in one of 4 states, with the last one implying
+ * we're committed to using the microcontroller:
+ * - Not supported: not available in HW and/or firmware not defined.
+ * - Supported: available in HW and firmware defined.
+ * - Wanted: supported + enabled in modparam.
+ * - In use: wanted + firmware found on the system and successfully fetched.
+ */
-static inline bool intel_uc_supports_guc_submission(struct intel_uc *uc)
-{
- return intel_guc_is_submission_supported(&uc->guc);
+#define __uc_state_checker(x, func, state, required) \
+static inline bool intel_uc_##state##_##func(struct intel_uc *uc) \
+{ \
+ return intel_##func##_is_##required(&uc->x); \
}
-static inline bool intel_uc_uses_guc_submission(struct intel_uc *uc)
-{
- return intel_guc_is_submission_supported(&uc->guc);
-}
+#define uc_state_checkers(x, func) \
+__uc_state_checker(x, func, supports, supported) \
+__uc_state_checker(x, func, wants, wanted) \
+__uc_state_checker(x, func, uses, used)
-static inline bool intel_uc_supports_huc(struct intel_uc *uc)
-{
- return intel_uc_supports_guc(uc);
-}
+uc_state_checkers(guc, guc);
+uc_state_checkers(huc, huc);
+uc_state_checkers(guc, guc_submission);
-static inline bool intel_uc_uses_huc(struct intel_uc *uc)
-{
- return intel_huc_is_enabled(&uc->huc);
-}
+#undef uc_state_checkers
+#undef __uc_state_checker
#define intel_uc_ops_function(_NAME, _OPS, _TYPE, _RET) \
static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \
@@ -80,7 +90,7 @@ static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \
intel_uc_ops_function(sanitize, sanitize, int, 0);
intel_uc_ops_function(fetch_firmwares, init_fw, void, );
intel_uc_ops_function(cleanup_firmwares, fini_fw, void, );
-intel_uc_ops_function(init, init, void, );
+intel_uc_ops_function(init, init, int, 0);
intel_uc_ops_function(fini, fini, void, );
intel_uc_ops_function(init_hw, init_hw, int, 0);
intel_uc_ops_function(fini_hw, fini_hw, void, );
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 8ee0a0c7f447..18c755203688 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -43,7 +43,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
* features.
*/
#define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \
- fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 3)) \
+ fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 12)) \
fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \
fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 9, 0, 0)) \
fw_def(COFFEELAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \
@@ -279,7 +279,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
err = i915_inject_probe_error(i915, -ENXIO);
if (err)
- return err;
+ goto fail;
__force_fw_fetch_failures(uc_fw, -EINVAL);
__force_fw_fetch_failures(uc_fw, -ESTALE);
@@ -501,7 +501,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags)
if (err)
return err;
- if (!intel_uc_fw_is_available(uc_fw))
+ if (!intel_uc_fw_is_loadable(uc_fw))
return -ENOEXEC;
/* Call custom loader */
@@ -544,7 +544,10 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw)
void intel_uc_fw_fini(struct intel_uc_fw *uc_fw)
{
- intel_uc_fw_cleanup_fetch(uc_fw);
+ if (i915_gem_object_has_pinned_pages(uc_fw->obj))
+ i915_gem_object_unpin_pages(uc_fw->obj);
+
+ intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_AVAILABLE);
}
/**
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index 1f30543d0d2d..888ff0de0244 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -29,8 +29,11 @@ struct intel_gt;
* | | SELECTED |
* +------------+- / | \ -+
* | | MISSING <--/ | \--> ERROR |
- * | fetch | | |
- * | | /------> AVAILABLE <---<-----------\ |
+ * | fetch | V |
+ * | | AVAILABLE |
+ * +------------+- | -+
+ * | init | V |
+ * | | /------> LOADABLE <----<-----------\ |
* +------------+- \ / \ \ \ -+
* | | FAIL <--< \--> TRANSFERRED \ |
* | upload | \ / \ / |
@@ -46,6 +49,7 @@ enum intel_uc_fw_status {
INTEL_UC_FIRMWARE_MISSING, /* blob not found on the system */
INTEL_UC_FIRMWARE_ERROR, /* invalid format or version */
INTEL_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */
+ INTEL_UC_FIRMWARE_LOADABLE, /* all fw-required objects are ready */
INTEL_UC_FIRMWARE_FAIL, /* failed to xfer or init/auth the fw */
INTEL_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */
INTEL_UC_FIRMWARE_RUNNING /* init/auth done */
@@ -115,6 +119,8 @@ const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status)
return "ERROR";
case INTEL_UC_FIRMWARE_AVAILABLE:
return "AVAILABLE";
+ case INTEL_UC_FIRMWARE_LOADABLE:
+ return "LOADABLE";
case INTEL_UC_FIRMWARE_FAIL:
return "FAIL";
case INTEL_UC_FIRMWARE_TRANSFERRED:
@@ -143,6 +149,7 @@ static inline int intel_uc_fw_status_to_error(enum intel_uc_fw_status status)
case INTEL_UC_FIRMWARE_SELECTED:
return -ESTALE;
case INTEL_UC_FIRMWARE_AVAILABLE:
+ case INTEL_UC_FIRMWARE_LOADABLE:
case INTEL_UC_FIRMWARE_TRANSFERRED:
case INTEL_UC_FIRMWARE_RUNNING:
return 0;
@@ -184,6 +191,11 @@ static inline bool intel_uc_fw_is_available(struct intel_uc_fw *uc_fw)
return __intel_uc_fw_status(uc_fw) >= INTEL_UC_FIRMWARE_AVAILABLE;
}
+static inline bool intel_uc_fw_is_loadable(struct intel_uc_fw *uc_fw)
+{
+ return __intel_uc_fw_status(uc_fw) >= INTEL_UC_FIRMWARE_LOADABLE;
+}
+
static inline bool intel_uc_fw_is_loaded(struct intel_uc_fw *uc_fw)
{
return __intel_uc_fw_status(uc_fw) >= INTEL_UC_FIRMWARE_TRANSFERRED;
@@ -202,7 +214,7 @@ static inline bool intel_uc_fw_is_overridden(const struct intel_uc_fw *uc_fw)
static inline void intel_uc_fw_sanitize(struct intel_uc_fw *uc_fw)
{
if (intel_uc_fw_is_loaded(uc_fw))
- intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_AVAILABLE);
+ intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_LOADABLE);
}
static inline u32 __intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw)