diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
64 files changed, 5009 insertions, 1032 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c new file mode 100644 index 000000000000..de595b66a746 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c @@ -0,0 +1,402 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "gen7_renderclear.h" +#include "i915_drv.h" +#include "intel_gpu_commands.h" + +#define MAX_URB_ENTRIES 64 +#define STATE_SIZE (4 * 1024) +#define GT3_INLINE_DATA_DELAYS 0x1E00 +#define batch_advance(Y, CS) GEM_BUG_ON((Y)->end != (CS)) + +struct cb_kernel { + const void *data; + u32 size; +}; + +#define CB_KERNEL(name) { .data = (name), .size = sizeof(name) } + +#include "ivb_clear_kernel.c" +static const struct cb_kernel cb_kernel_ivb = CB_KERNEL(ivb_clear_kernel); + +#include "hsw_clear_kernel.c" +static const struct cb_kernel cb_kernel_hsw = CB_KERNEL(hsw_clear_kernel); + +struct batch_chunk { + struct i915_vma *vma; + u32 offset; + u32 *start; + u32 *end; + u32 max_items; +}; + +struct batch_vals { + u32 max_primitives; + u32 max_urb_entries; + u32 cmd_size; + u32 state_size; + u32 state_start; + u32 batch_size; + u32 surface_height; + u32 surface_width; + u32 scratch_size; + u32 max_size; +}; + +static void +batch_get_defaults(struct drm_i915_private *i915, struct batch_vals *bv) +{ + if (IS_HASWELL(i915)) { + bv->max_primitives = 280; + bv->max_urb_entries = MAX_URB_ENTRIES; + bv->surface_height = 16 * 16; + bv->surface_width = 32 * 2 * 16; + } else { + bv->max_primitives = 128; + bv->max_urb_entries = MAX_URB_ENTRIES / 2; + bv->surface_height = 16 * 8; + bv->surface_width = 32 * 16; + } + bv->cmd_size = bv->max_primitives * 4096; + bv->state_size = STATE_SIZE; + bv->state_start = bv->cmd_size; + bv->batch_size = bv->cmd_size + bv->state_size; + bv->scratch_size = bv->surface_height * bv->surface_width; + bv->max_size = bv->batch_size + bv->scratch_size; +} + +static void batch_init(struct batch_chunk *bc, + struct i915_vma *vma, + u32 *start, u32 offset, u32 max_bytes) +{ + bc->vma = vma; + bc->offset = offset; + bc->start = start + bc->offset / sizeof(*bc->start); + bc->end = bc->start; + bc->max_items = max_bytes / sizeof(*bc->start); +} + +static u32 batch_offset(const struct batch_chunk *bc, u32 *cs) +{ + return (cs - bc->start) * sizeof(*bc->start) + bc->offset; +} + +static u32 batch_addr(const struct batch_chunk *bc) +{ + return bc->vma->node.start; +} + +static void batch_add(struct batch_chunk *bc, const u32 d) +{ + GEM_BUG_ON((bc->end - bc->start) >= bc->max_items); + *bc->end++ = d; +} + +static u32 *batch_alloc_items(struct batch_chunk *bc, u32 align, u32 items) +{ + u32 *map; + + if (align) { + u32 *end = PTR_ALIGN(bc->end, align); + + memset32(bc->end, 0, end - bc->end); + bc->end = end; + } + + map = bc->end; + bc->end += items; + + return map; +} + +static u32 *batch_alloc_bytes(struct batch_chunk *bc, u32 align, u32 bytes) +{ + GEM_BUG_ON(!IS_ALIGNED(bytes, sizeof(*bc->start))); + return batch_alloc_items(bc, align, bytes / sizeof(*bc->start)); +} + +static u32 +gen7_fill_surface_state(struct batch_chunk *state, + const u32 dst_offset, + const struct batch_vals *bv) +{ + u32 surface_h = bv->surface_height; + u32 surface_w = bv->surface_width; + u32 *cs = batch_alloc_items(state, 32, 8); + u32 offset = batch_offset(state, cs); + +#define SURFACE_2D 1 +#define SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 +#define RENDER_CACHE_READ_WRITE 1 + + *cs++ = SURFACE_2D << 29 | + (SURFACEFORMAT_B8G8R8A8_UNORM << 18) | + (RENDER_CACHE_READ_WRITE << 8); + + *cs++ = batch_addr(state) + dst_offset; + + *cs++ = ((surface_h / 4 - 1) << 16) | (surface_w / 4 - 1); + *cs++ = surface_w; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; +#define SHADER_CHANNELS(r, g, b, a) \ + (((r) << 25) | ((g) << 22) | ((b) << 19) | ((a) << 16)) + *cs++ = SHADER_CHANNELS(4, 5, 6, 7); + batch_advance(state, cs); + + return offset; +} + +static u32 +gen7_fill_binding_table(struct batch_chunk *state, + const struct batch_vals *bv) +{ + u32 surface_start = gen7_fill_surface_state(state, bv->batch_size, bv); + u32 *cs = batch_alloc_items(state, 32, 8); + u32 offset = batch_offset(state, cs); + + *cs++ = surface_start - state->offset; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + batch_advance(state, cs); + + return offset; +} + +static u32 +gen7_fill_kernel_data(struct batch_chunk *state, + const u32 *data, + const u32 size) +{ + return batch_offset(state, + memcpy(batch_alloc_bytes(state, 64, size), + data, size)); +} + +static u32 +gen7_fill_interface_descriptor(struct batch_chunk *state, + const struct batch_vals *bv, + const struct cb_kernel *kernel, + unsigned int count) +{ + u32 kernel_offset = + gen7_fill_kernel_data(state, kernel->data, kernel->size); + u32 binding_table = gen7_fill_binding_table(state, bv); + u32 *cs = batch_alloc_items(state, 32, 8 * count); + u32 offset = batch_offset(state, cs); + + *cs++ = kernel_offset; + *cs++ = (1 << 7) | (1 << 13); + *cs++ = 0; + *cs++ = (binding_table - state->offset) | 1; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + + /* 1 - 63dummy idds */ + memset32(cs, 0x00, (count - 1) * 8); + batch_advance(state, cs + (count - 1) * 8); + + return offset; +} + +static void +gen7_emit_state_base_address(struct batch_chunk *batch, + u32 surface_state_base) +{ + u32 *cs = batch_alloc_items(batch, 0, 12); + + *cs++ = STATE_BASE_ADDRESS | (12 - 2); + /* general */ + *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; + /* surface */ + *cs++ = batch_addr(batch) | surface_state_base | BASE_ADDRESS_MODIFY; + /* dynamic */ + *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; + /* indirect */ + *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; + /* instruction */ + *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; + + /* general/dynamic/indirect/instruction access Bound */ + *cs++ = 0; + *cs++ = BASE_ADDRESS_MODIFY; + *cs++ = 0; + *cs++ = BASE_ADDRESS_MODIFY; + *cs++ = 0; + *cs++ = 0; + batch_advance(batch, cs); +} + +static void +gen7_emit_vfe_state(struct batch_chunk *batch, + const struct batch_vals *bv, + u32 urb_size, u32 curbe_size, + u32 mode) +{ + u32 urb_entries = bv->max_urb_entries; + u32 threads = bv->max_primitives - 1; + u32 *cs = batch_alloc_items(batch, 32, 8); + + *cs++ = MEDIA_VFE_STATE | (8 - 2); + + /* scratch buffer */ + *cs++ = 0; + + /* number of threads & urb entries for GPGPU vs Media Mode */ + *cs++ = threads << 16 | urb_entries << 8 | mode << 2; + + *cs++ = 0; + + /* urb entry size & curbe size in 256 bits unit */ + *cs++ = urb_size << 16 | curbe_size; + + /* scoreboard */ + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + batch_advance(batch, cs); +} + +static void +gen7_emit_interface_descriptor_load(struct batch_chunk *batch, + const u32 interface_descriptor, + unsigned int count) +{ + u32 *cs = batch_alloc_items(batch, 8, 4); + + *cs++ = MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2); + *cs++ = 0; + *cs++ = count * 8 * sizeof(*cs); + + /* + * interface descriptor address - it is relative to the dynamics base + * address + */ + *cs++ = interface_descriptor; + batch_advance(batch, cs); +} + +static void +gen7_emit_media_object(struct batch_chunk *batch, + unsigned int media_object_index) +{ + unsigned int x_offset = (media_object_index % 16) * 64; + unsigned int y_offset = (media_object_index / 16) * 16; + unsigned int inline_data_size; + unsigned int media_batch_size; + unsigned int i; + u32 *cs; + + inline_data_size = 112 * 8; + media_batch_size = inline_data_size + 6; + + cs = batch_alloc_items(batch, 8, media_batch_size); + + *cs++ = MEDIA_OBJECT | (media_batch_size - 2); + + /* interface descriptor offset */ + *cs++ = 0; + + /* without indirect data */ + *cs++ = 0; + *cs++ = 0; + + /* scoreboard */ + *cs++ = 0; + *cs++ = 0; + + /* inline */ + *cs++ = (y_offset << 16) | (x_offset); + *cs++ = 0; + *cs++ = GT3_INLINE_DATA_DELAYS; + for (i = 3; i < inline_data_size; i++) + *cs++ = 0; + + batch_advance(batch, cs); +} + +static void gen7_emit_pipeline_flush(struct batch_chunk *batch) +{ + u32 *cs = batch_alloc_items(batch, 0, 5); + + *cs++ = GFX_OP_PIPE_CONTROL(5); + *cs++ = PIPE_CONTROL_STATE_CACHE_INVALIDATE | + PIPE_CONTROL_GLOBAL_GTT_IVB; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + batch_advance(batch, cs); +} + +static void emit_batch(struct i915_vma * const vma, + u32 *start, + const struct batch_vals *bv) +{ + struct drm_i915_private *i915 = vma->vm->i915; + unsigned int desc_count = 64; + const u32 urb_size = 112; + struct batch_chunk cmds, state; + u32 interface_descriptor; + unsigned int i; + + batch_init(&cmds, vma, start, 0, bv->cmd_size); + batch_init(&state, vma, start, bv->state_start, bv->state_size); + + interface_descriptor = + gen7_fill_interface_descriptor(&state, bv, + IS_HASWELL(i915) ? + &cb_kernel_hsw : + &cb_kernel_ivb, + desc_count); + gen7_emit_pipeline_flush(&cmds); + batch_add(&cmds, PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); + batch_add(&cmds, MI_NOOP); + gen7_emit_state_base_address(&cmds, interface_descriptor); + gen7_emit_pipeline_flush(&cmds); + + gen7_emit_vfe_state(&cmds, bv, urb_size - 1, 0, 0); + + gen7_emit_interface_descriptor_load(&cmds, + interface_descriptor, + desc_count); + + for (i = 0; i < bv->max_primitives; i++) + gen7_emit_media_object(&cmds, i); + + batch_add(&cmds, MI_BATCH_BUFFER_END); +} + +int gen7_setup_clear_gpr_bb(struct intel_engine_cs * const engine, + struct i915_vma * const vma) +{ + struct batch_vals bv; + u32 *batch; + + batch_get_defaults(engine->i915, &bv); + if (!vma) + return bv.max_size; + + GEM_BUG_ON(vma->obj->base.size < bv.max_size); + + batch = i915_gem_object_pin_map(vma->obj, I915_MAP_WC); + if (IS_ERR(batch)) + return PTR_ERR(batch); + + emit_batch(vma, memset(batch, 0, bv.max_size), &bv); + + i915_gem_object_flush_map(vma->obj); + i915_gem_object_unpin_map(vma->obj); + + return 0; +} diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.h b/drivers/gpu/drm/i915/gt/gen7_renderclear.h new file mode 100644 index 000000000000..bb100748e2c6 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __GEN7_RENDERCLEAR_H__ +#define __GEN7_RENDERCLEAR_H__ + +struct intel_engine_cs; +struct i915_vma; + +int gen7_setup_clear_gpr_bb(struct intel_engine_cs * const engine, + struct i915_vma * const vma); + +#endif /* __GEN7_RENDERCLEAR_H__ */ diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 4d1de2d97d5c..94e746af8926 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -8,6 +8,7 @@ #include "gen8_ppgtt.h" #include "i915_scatterlist.h" #include "i915_trace.h" +#include "i915_pvinfo.h" #include "i915_vgpu.h" #include "intel_gt.h" #include "intel_gtt.h" @@ -25,6 +26,30 @@ static u64 gen8_pde_encode(const dma_addr_t addr, return pde; } +static u64 gen8_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW; + + if (unlikely(flags & PTE_READ_ONLY)) + pte &= ~_PAGE_RW; + + switch (level) { + case I915_CACHE_NONE: + pte |= PPAT_UNCACHED; + break; + case I915_CACHE_WT: + pte |= PPAT_DISPLAY_ELLC; + break; + default: + pte |= PPAT_CACHED; + break; + } + + return pte; +} + static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) { struct drm_i915_private *i915 = ppgtt->vm.i915; @@ -706,6 +731,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt) ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; ppgtt->vm.clear_range = gen8_ppgtt_clear; + ppgtt->vm.pte_encode = gen8_pte_encode; + if (intel_vgpu_active(gt->i915)) gen8_ppgtt_notify_vgt(ppgtt, true); diff --git a/drivers/gpu/drm/i915/gt/hsw_clear_kernel.c b/drivers/gpu/drm/i915/gt/hsw_clear_kernel.c new file mode 100644 index 000000000000..b47f9d4a0848 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/hsw_clear_kernel.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + * + * Generated by: IGT Gpu Tools on Fri 21 Feb 2020 05:30:13 AM UTC + */ + +static const u32 hsw_clear_kernel[] = { + 0x00000001, 0x26020128, 0x00000024, 0x00000000, + 0x00000040, 0x20280c21, 0x00000028, 0x00000001, + 0x01000010, 0x20000c20, 0x0000002c, 0x00000000, + 0x00010220, 0x34001c00, 0x00001400, 0x00000160, + 0x00600001, 0x20600061, 0x00000000, 0x00000000, + 0x00000008, 0x20601c85, 0x00000e00, 0x0000000c, + 0x00000005, 0x20601ca5, 0x00000060, 0x00000001, + 0x00000008, 0x20641c85, 0x00000e00, 0x0000000d, + 0x00000005, 0x20641ca5, 0x00000064, 0x00000003, + 0x00000041, 0x207424a5, 0x00000064, 0x00000034, + 0x00000040, 0x206014a5, 0x00000060, 0x00000074, + 0x00000008, 0x20681c85, 0x00000e00, 0x00000008, + 0x00000005, 0x20681ca5, 0x00000068, 0x0000000f, + 0x00000041, 0x20701ca5, 0x00000060, 0x00000010, + 0x00000040, 0x206814a5, 0x00000068, 0x00000070, + 0x00600001, 0x20a00061, 0x00000000, 0x00000000, + 0x00000005, 0x206c1c85, 0x00000e00, 0x00000007, + 0x00000041, 0x206c1ca5, 0x0000006c, 0x00000004, + 0x00600001, 0x20800021, 0x008d0000, 0x00000000, + 0x00000001, 0x20800021, 0x0000006c, 0x00000000, + 0x00000001, 0x20840021, 0x00000068, 0x00000000, + 0x00000001, 0x20880061, 0x00000000, 0x00000003, + 0x00000005, 0x208c0d21, 0x00000086, 0xffffffff, + 0x05600032, 0x20a00fa1, 0x008d0080, 0x02190001, + 0x00000040, 0x20a01ca5, 0x000000a0, 0x00000001, + 0x05600032, 0x20a00fa1, 0x008d0080, 0x040a8001, + 0x02000040, 0x20281c21, 0x00000028, 0xffffffff, + 0x00010220, 0x34001c00, 0x00001400, 0xffffffe0, + 0x00000001, 0x26020128, 0x00000024, 0x00000000, + 0x00000001, 0x220010e4, 0x00000000, 0x00000000, + 0x00000001, 0x220831ec, 0x00000000, 0x007f007f, + 0x00600001, 0x20400021, 0x008d0000, 0x00000000, + 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000, + 0x00200001, 0x20400121, 0x00450020, 0x00000000, + 0x00000001, 0x20480061, 0x00000000, 0x000f000f, + 0x00000005, 0x204c0d21, 0x00000046, 0xffffffef, + 0x00800001, 0x20600061, 0x00000000, 0x00000000, + 0x00800001, 0x20800061, 0x00000000, 0x00000000, + 0x00800001, 0x20a00061, 0x00000000, 0x00000000, + 0x00800001, 0x20c00061, 0x00000000, 0x00000000, + 0x00800001, 0x20e00061, 0x00000000, 0x00000000, + 0x00800001, 0x21000061, 0x00000000, 0x00000000, + 0x00800001, 0x21200061, 0x00000000, 0x00000000, + 0x00800001, 0x21400061, 0x00000000, 0x00000000, + 0x05600032, 0x20000fa0, 0x008d0040, 0x120a8000, + 0x00000040, 0x20402d21, 0x00000020, 0x00100010, + 0x05600032, 0x20000fa0, 0x008d0040, 0x120a8000, + 0x02000040, 0x22083d8c, 0x00000208, 0xffffffff, + 0x00800001, 0xa0000109, 0x00000602, 0x00000000, + 0x00000040, 0x22001c84, 0x00000200, 0x00000020, + 0x00010220, 0x34001c00, 0x00001400, 0xffffffc0, + 0x07600032, 0x20000fa0, 0x008d0fe0, 0x82000010, +}; diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 57e8a051ddc2..aea992e46c42 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -51,6 +51,11 @@ int intel_context_alloc_state(struct intel_context *ce) return -EINTR; if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { + if (intel_context_is_banned(ce)) { + err = -EIO; + goto unlock; + } + err = ce->ops->alloc(ce); if (unlikely(err)) goto unlock; @@ -92,6 +97,8 @@ int __intel_context_do_pin(struct intel_context *ce) { int err; + GEM_BUG_ON(intel_context_is_closed(ce)); + if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) { err = intel_context_alloc_state(ce); if (err) @@ -116,7 +123,8 @@ int __intel_context_do_pin(struct intel_context *ce) if (unlikely(err)) goto err_active; - CE_TRACE(ce, "pin ring:{head:%04x, tail:%04x}\n", + CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n", + i915_ggtt_offset(ce->ring->vma), ce->ring->head, ce->ring->tail); smp_mb__before_atomic(); /* flush pin before it is visible */ @@ -219,7 +227,9 @@ static void __intel_context_retire(struct i915_active *active) { struct intel_context *ce = container_of(active, typeof(*ce), active); - CE_TRACE(ce, "retire\n"); + CE_TRACE(ce, "retire runtime: { total:%lluns, avg:%lluns }\n", + intel_context_get_total_runtime_ns(ce), + intel_context_get_avg_runtime_ns(ce)); set_bit(CONTEXT_VALID_BIT, &ce->flags); if (ce->state) @@ -280,6 +290,8 @@ intel_context_init(struct intel_context *ce, ce->sseu = engine->sseu; ce->ring = __intel_context_ring_size(SZ_4K); + ewma_runtime_init(&ce->runtime.avg); + ce->vm = i915_vm_get(engine->gt->vm); INIT_LIST_HEAD(&ce->signal_link); diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 30bd248827d8..07be021882cc 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -12,6 +12,7 @@ #include <linux/types.h> #include "i915_active.h" +#include "i915_drv.h" #include "intel_context_types.h" #include "intel_engine_types.h" #include "intel_ring_types.h" @@ -35,6 +36,9 @@ int intel_context_alloc_state(struct intel_context *ce); void intel_context_free(struct intel_context *ce); +int intel_context_reconfigure_sseu(struct intel_context *ce, + const struct intel_sseu sseu); + /** * intel_context_lock_pinned - Stablises the 'pinned' status of the HW context * @ce - the context @@ -169,6 +173,11 @@ static inline bool intel_context_is_barrier(const struct intel_context *ce) return test_bit(CONTEXT_BARRIER_BIT, &ce->flags); } +static inline bool intel_context_is_closed(const struct intel_context *ce) +{ + return test_bit(CONTEXT_CLOSED_BIT, &ce->flags); +} + static inline bool intel_context_use_semaphores(const struct intel_context *ce) { return test_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); @@ -224,4 +233,20 @@ intel_context_clear_nopreempt(struct intel_context *ce) clear_bit(CONTEXT_NOPREEMPT, &ce->flags); } +static inline u64 intel_context_get_total_runtime_ns(struct intel_context *ce) +{ + const u32 period = + RUNTIME_INFO(ce->engine->i915)->cs_timestamp_period_ns; + + return READ_ONCE(ce->runtime.total) * period; +} + +static inline u64 intel_context_get_avg_runtime_ns(struct intel_context *ce) +{ + const u32 period = + RUNTIME_INFO(ce->engine->i915)->cs_timestamp_period_ns; + + return mul_u32_u32(ewma_runtime_read(&ce->runtime.avg), period); +} + #endif /* __INTEL_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.c b/drivers/gpu/drm/i915/gt/intel_context_param.c new file mode 100644 index 000000000000..65dcd090245d --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_context_param.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "i915_active.h" +#include "intel_context.h" +#include "intel_context_param.h" +#include "intel_ring.h" + +int intel_context_set_ring_size(struct intel_context *ce, long sz) +{ + int err; + + if (intel_context_lock_pinned(ce)) + return -EINTR; + + err = i915_active_wait(&ce->active); + if (err < 0) + goto unlock; + + if (intel_context_is_pinned(ce)) { + err = -EBUSY; /* In active use, come back later! */ + goto unlock; + } + + if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { + struct intel_ring *ring; + + /* Replace the existing ringbuffer */ + ring = intel_engine_create_ring(ce->engine, sz); + if (IS_ERR(ring)) { + err = PTR_ERR(ring); + goto unlock; + } + + intel_ring_put(ce->ring); + ce->ring = ring; + + /* Context image will be updated on next pin */ + } else { + ce->ring = __intel_context_ring_size(sz); + } + +unlock: + intel_context_unlock_pinned(ce); + return err; +} + +long intel_context_get_ring_size(struct intel_context *ce) +{ + long sz = (unsigned long)READ_ONCE(ce->ring); + + if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { + if (intel_context_lock_pinned(ce)) + return -EINTR; + + sz = ce->ring->size; + intel_context_unlock_pinned(ce); + } + + return sz; +} diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.h b/drivers/gpu/drm/i915/gt/intel_context_param.h new file mode 100644 index 000000000000..f053d8633fe2 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_context_param.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_CONTEXT_PARAM_H +#define INTEL_CONTEXT_PARAM_H + +struct intel_context; + +int intel_context_set_ring_size(struct intel_context *ce, long sz); +long intel_context_get_ring_size(struct intel_context *ce); + +#endif /* INTEL_CONTEXT_PARAM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_context_sseu.c b/drivers/gpu/drm/i915/gt/intel_context_sseu.c new file mode 100644 index 000000000000..57a30956c922 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_context_sseu.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "i915_vma.h" +#include "intel_context.h" +#include "intel_engine_pm.h" +#include "intel_gpu_commands.h" +#include "intel_lrc.h" +#include "intel_lrc_reg.h" +#include "intel_ring.h" +#include "intel_sseu.h" + +static int gen8_emit_rpcs_config(struct i915_request *rq, + const struct intel_context *ce, + const struct intel_sseu sseu) +{ + u64 offset; + u32 *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + offset = i915_ggtt_offset(ce->state) + + LRC_STATE_PN * PAGE_SIZE + + CTX_R_PWR_CLK_STATE * 4; + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + *cs++ = intel_sseu_make_rpcs(rq->i915, &sseu); + + intel_ring_advance(rq, cs); + + return 0; +} + +static int +gen8_modify_rpcs(struct intel_context *ce, const struct intel_sseu sseu) +{ + struct i915_request *rq; + int ret; + + lockdep_assert_held(&ce->pin_mutex); + + /* + * If the context is not idle, we have to submit an ordered request to + * modify its context image via the kernel context (writing to our own + * image, or into the registers directory, does not stick). Pristine + * and idle contexts will be configured on pinning. + */ + if (!intel_context_pin_if_active(ce)) + return 0; + + rq = intel_engine_create_kernel_request(ce->engine); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + goto out_unpin; + } + + /* Serialise with the remote context */ + ret = intel_context_prepare_remote_request(ce, rq); + if (ret == 0) + ret = gen8_emit_rpcs_config(rq, ce, sseu); + + i915_request_add(rq); +out_unpin: + intel_context_unpin(ce); + return ret; +} + +int +intel_context_reconfigure_sseu(struct intel_context *ce, + const struct intel_sseu sseu) +{ + int ret; + + GEM_BUG_ON(INTEL_GEN(ce->engine->i915) < 8); + + ret = intel_context_lock_pinned(ce); + if (ret) + return ret; + + /* Nothing to do if unmodified. */ + if (!memcmp(&ce->sseu, &sseu, sizeof(sseu))) + goto unlock; + + ret = gen8_modify_rpcs(ce, sseu); + if (!ret) + ce->sseu = sseu; + +unlock: + intel_context_unlock_pinned(ce); + return ret; +} diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index ca1420fb8b53..07cb83a0d017 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -7,6 +7,7 @@ #ifndef __INTEL_CONTEXT_TYPES__ #define __INTEL_CONTEXT_TYPES__ +#include <linux/average.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/mutex.h> @@ -19,6 +20,8 @@ #define CONTEXT_REDZONE POISON_INUSE +DECLARE_EWMA(runtime, 3, 8); + struct i915_gem_context; struct i915_vma; struct intel_context; @@ -42,8 +45,8 @@ struct intel_context { struct intel_engine_cs *engine; struct intel_engine_cs *inflight; -#define intel_context_inflight(ce) ptr_mask_bits((ce)->inflight, 2) -#define intel_context_inflight_count(ce) ptr_unmask_bits((ce)->inflight, 2) +#define intel_context_inflight(ce) ptr_mask_bits(READ_ONCE((ce)->inflight), 2) +#define intel_context_inflight_count(ce) ptr_unmask_bits(READ_ONCE((ce)->inflight), 2) struct i915_address_space *vm; struct i915_gem_context __rcu *gem_context; @@ -59,15 +62,25 @@ struct intel_context { #define CONTEXT_BARRIER_BIT 0 #define CONTEXT_ALLOC_BIT 1 #define CONTEXT_VALID_BIT 2 -#define CONTEXT_USE_SEMAPHORES 3 -#define CONTEXT_BANNED 4 -#define CONTEXT_FORCE_SINGLE_SUBMISSION 5 -#define CONTEXT_NOPREEMPT 6 +#define CONTEXT_CLOSED_BIT 3 +#define CONTEXT_USE_SEMAPHORES 4 +#define CONTEXT_BANNED 5 +#define CONTEXT_FORCE_SINGLE_SUBMISSION 6 +#define CONTEXT_NOPREEMPT 7 u32 *lrc_reg_state; u64 lrc_desc; u32 tag; /* cookie passed to HW to track this context on submission */ + /* Time on GPU as tracked by the hw. */ + struct { + struct ewma_runtime avg; + u64 total; + u32 last; + I915_SELFTEST_DECLARE(u32 num_underflow); + I915_SELFTEST_DECLARE(u32 max_underflow); + } runtime; + unsigned int active_count; /* protected by timeline->mutex */ atomic_t pin_count; diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 5df003061e44..b469de0dd9b6 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -107,7 +107,20 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists) static inline struct i915_request * execlists_active(const struct intel_engine_execlists *execlists) { - return *READ_ONCE(execlists->active); + struct i915_request * const *cur, * const *old, *active; + + cur = READ_ONCE(execlists->active); + smp_rmb(); /* pairs with overwrite protection in process_csb() */ + do { + old = cur; + + active = READ_ONCE(*cur); + cur = READ_ONCE(execlists->active); + + smp_rmb(); /* and complete the seqlock retry */ + } while (unlikely(cur != old)); + + return active; } static inline void @@ -192,6 +205,8 @@ void intel_engines_free(struct intel_gt *gt); int intel_engine_init_common(struct intel_engine_cs *engine); void intel_engine_cleanup_common(struct intel_engine_cs *engine); +int intel_engine_resume(struct intel_engine_cs *engine); + int intel_ring_submission_setup(struct intel_engine_cs *engine); int intel_engine_stop_cs(struct intel_engine_cs *engine); @@ -303,26 +318,6 @@ intel_engine_find_active_request(struct intel_engine_cs *engine); u32 intel_engine_context_size(struct intel_gt *gt, u8 class); -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) - -static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists) -{ - if (!execlists->preempt_hang.inject_hang) - return false; - - complete(&execlists->preempt_hang.completion); - return true; -} - -#else - -static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists) -{ - return false; -} - -#endif - void intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass); #define ENGINE_PHYSICAL 0 diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 06ff7695fa29..3aa8a652c16d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -35,6 +35,7 @@ #include "intel_engine_user.h" #include "intel_gt.h" #include "intel_gt_requests.h" +#include "intel_gt_pm.h" #include "intel_lrc.h" #include "intel_reset.h" #include "intel_ring.h" @@ -199,10 +200,10 @@ u32 intel_engine_context_size(struct intel_gt *gt, u8 class) * out in the wash. */ cxt_size = intel_uncore_read(uncore, CXT_SIZE) + 1; - DRM_DEBUG_DRIVER("gen%d CXT_SIZE = %d bytes [0x%08x]\n", - INTEL_GEN(gt->i915), - cxt_size * 64, - cxt_size - 1); + drm_dbg(>->i915->drm, + "gen%d CXT_SIZE = %d bytes [0x%08x]\n", + INTEL_GEN(gt->i915), cxt_size * 64, + cxt_size - 1); return round_up(cxt_size * 64, PAGE_SIZE); case 3: case 2: @@ -274,6 +275,7 @@ static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine) static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) { const struct engine_info *info = &intel_engines[id]; + struct drm_i915_private *i915 = gt->i915; struct intel_engine_cs *engine; BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH)); @@ -300,11 +302,11 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->id = id; engine->legacy_idx = INVALID_ENGINE; engine->mask = BIT(id); - engine->i915 = gt->i915; + engine->i915 = i915; engine->gt = gt; engine->uncore = gt->uncore; engine->hw_id = engine->guc_id = info->hw_id; - engine->mmio_base = __engine_mmio_base(gt->i915, info->mmio_bases); + engine->mmio_base = __engine_mmio_base(i915, info->mmio_bases); engine->class = info->class; engine->instance = info->instance; @@ -312,6 +314,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->props.heartbeat_interval_ms = CONFIG_DRM_I915_HEARTBEAT_INTERVAL; + engine->props.max_busywait_duration_ns = + CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT; engine->props.preempt_timeout_ms = CONFIG_DRM_I915_PREEMPT_TIMEOUT; engine->props.stop_timeout_ms = @@ -319,11 +323,15 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->props.timeslice_duration_ms = CONFIG_DRM_I915_TIMESLICE_DURATION; + /* Override to uninterruptible for OpenCL workloads. */ + if (INTEL_GEN(i915) == 12 && engine->class == RENDER_CLASS) + engine->props.preempt_timeout_ms = 0; + engine->context_size = intel_engine_context_size(gt, engine->class); if (WARN_ON(engine->context_size > BIT(20))) engine->context_size = 0; if (engine->context_size) - DRIVER_CAPS(gt->i915)->has_logical_contexts = true; + DRIVER_CAPS(i915)->has_logical_contexts = true; /* Nothing to do here, execute in order of dependencies */ engine->schedule = NULL; @@ -339,7 +347,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) gt->engine_class[info->class][info->instance] = engine; gt->engine[id] = engine; - gt->i915->engine[id] = engine; + i915->engine[id] = engine; return 0; } @@ -392,8 +400,24 @@ void intel_engines_release(struct intel_gt *gt) struct intel_engine_cs *engine; enum intel_engine_id id; + /* + * Before we release the resources held by engine, we must be certain + * that the HW is no longer accessing them -- having the GPU scribble + * to or read from a page being used for something else causes no end + * of fun. + * + * The GPU should be reset by this point, but assume the worst just + * in case we aborted before completely initialising the engines. + */ + GEM_BUG_ON(intel_gt_pm_is_awake(gt)); + if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) + __intel_gt_reset(gt, ALL_ENGINES); + /* Decouple the backend; but keep the layout for late GPU resets */ for_each_engine(engine, gt, id) { + intel_wakeref_wait_for_idle(&engine->wakeref); + GEM_BUG_ON(intel_engine_pm_is_awake(engine)); + if (!engine->release) continue; @@ -432,9 +456,9 @@ int intel_engines_init_mmio(struct intel_gt *gt) unsigned int i; int err; - WARN_ON(engine_mask == 0); - WARN_ON(engine_mask & - GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES)); + drm_WARN_ON(&i915->drm, engine_mask == 0); + drm_WARN_ON(&i915->drm, engine_mask & + GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES)); if (i915_inject_probe_failure(i915)) return -ENODEV; @@ -455,7 +479,7 @@ int intel_engines_init_mmio(struct intel_gt *gt) * are added to the driver by a warning and disabling the forgotten * engines. */ - if (WARN_ON(mask != engine_mask)) + if (drm_WARN_ON(&i915->drm, mask != engine_mask)) device_info->engine_mask = mask; RUNTIME_INFO(i915)->num_engines = hweight32(mask); @@ -510,7 +534,6 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine, { unsigned int flags; - flags = PIN_GLOBAL; if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt)) /* * On g33, we cannot place HWS above 256MiB, so @@ -523,11 +546,11 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine, * above the mappable region (even though we never * actually map it). */ - flags |= PIN_MAPPABLE; + flags = PIN_MAPPABLE; else - flags |= PIN_HIGH; + flags = PIN_HIGH; - return i915_vma_pin(vma, 0, 0, flags); + return i915_ggtt_pin(vma, 0, flags); } static int init_status_page(struct intel_engine_cs *engine) @@ -546,7 +569,8 @@ static int init_status_page(struct intel_engine_cs *engine) */ obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); if (IS_ERR(obj)) { - DRM_ERROR("Failed to allocate status page\n"); + drm_err(&engine->i915->drm, + "Failed to allocate status page\n"); return PTR_ERR(obj); } @@ -614,15 +638,15 @@ static int engine_setup_common(struct intel_engine_cs *engine) struct measure_breadcrumb { struct i915_request rq; - struct intel_timeline timeline; struct intel_ring ring; u32 cs[1024]; }; -static int measure_breadcrumb_dw(struct intel_engine_cs *engine) +static int measure_breadcrumb_dw(struct intel_context *ce) { + struct intel_engine_cs *engine = ce->engine; struct measure_breadcrumb *frame; - int dw = -ENOMEM; + int dw; GEM_BUG_ON(!engine->gt->scratch); @@ -630,39 +654,27 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine) if (!frame) return -ENOMEM; - if (intel_timeline_init(&frame->timeline, - engine->gt, - engine->status_page.vma)) - goto out_frame; - - mutex_lock(&frame->timeline.mutex); + frame->rq.i915 = engine->i915; + frame->rq.engine = engine; + frame->rq.context = ce; + rcu_assign_pointer(frame->rq.timeline, ce->timeline); frame->ring.vaddr = frame->cs; frame->ring.size = sizeof(frame->cs); frame->ring.effective_size = frame->ring.size; intel_ring_update_space(&frame->ring); - - frame->rq.i915 = engine->i915; - frame->rq.engine = engine; frame->rq.ring = &frame->ring; - rcu_assign_pointer(frame->rq.timeline, &frame->timeline); - - dw = intel_timeline_pin(&frame->timeline); - if (dw < 0) - goto out_timeline; + mutex_lock(&ce->timeline->mutex); spin_lock_irq(&engine->active.lock); + dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs; + spin_unlock_irq(&engine->active.lock); + mutex_unlock(&ce->timeline->mutex); GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */ - intel_timeline_unpin(&frame->timeline); - -out_timeline: - mutex_unlock(&frame->timeline.mutex); - intel_timeline_fini(&frame->timeline); -out_frame: kfree(frame); return dw; } @@ -737,12 +749,6 @@ static int engine_init_common(struct intel_engine_cs *engine) engine->set_default_submission(engine); - ret = measure_breadcrumb_dw(engine); - if (ret < 0) - return ret; - - engine->emit_fini_breadcrumb_dw = ret; - /* * We may need to do things with the shrinker which * require us to immediately switch back to the default @@ -755,9 +761,18 @@ static int engine_init_common(struct intel_engine_cs *engine) if (IS_ERR(ce)) return PTR_ERR(ce); + ret = measure_breadcrumb_dw(ce); + if (ret < 0) + goto err_context; + + engine->emit_fini_breadcrumb_dw = ret; engine->kernel_context = ce; return 0; + +err_context: + intel_context_put(ce); + return ret; } int intel_engines_init(struct intel_gt *gt) @@ -824,6 +839,20 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) intel_wa_list_free(&engine->whitelist); } +/** + * intel_engine_resume - re-initializes the HW state of the engine + * @engine: Engine to resume. + * + * Returns zero on success or an error code on failure. + */ +int intel_engine_resume(struct intel_engine_cs *engine) +{ + intel_engine_apply_workarounds(engine); + intel_engine_apply_whitelist(engine); + + return engine->resume(engine); +} + u64 intel_engine_get_active_head(const struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; @@ -982,6 +1011,12 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, instdone->slice_common = intel_uncore_read(uncore, GEN7_SC_INSTDONE); + if (INTEL_GEN(i915) >= 12) { + instdone->slice_common_extra[0] = + intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA); + instdone->slice_common_extra[1] = + intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA2); + } for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { instdone->sampler[slice][subslice] = read_subslice_reg(engine, slice, subslice, @@ -1276,8 +1311,14 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, } if (INTEL_GEN(dev_priv) >= 6) { - drm_printf(m, "\tRING_IMR: %08x\n", + drm_printf(m, "\tRING_IMR: 0x%08x\n", ENGINE_READ(engine, RING_IMR)); + drm_printf(m, "\tRING_ESR: 0x%08x\n", + ENGINE_READ(engine, RING_ESR)); + drm_printf(m, "\tRING_EMR: 0x%08x\n", + ENGINE_READ(engine, RING_EMR)); + drm_printf(m, "\tRING_EIR: 0x%08x\n", + ENGINE_READ(engine, RING_EIR)); } addr = intel_engine_get_active_head(engine); @@ -1342,25 +1383,27 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, execlists_active_lock_bh(execlists); rcu_read_lock(); for (port = execlists->active; (rq = *port); port++) { - char hdr[80]; + char hdr[160]; int len; - len = snprintf(hdr, sizeof(hdr), - "\t\tActive[%d]: ", - (int)(port - execlists->active)); + len = scnprintf(hdr, sizeof(hdr), + "\t\tActive[%d]: ", + (int)(port - execlists->active)); if (!i915_request_signaled(rq)) { struct intel_timeline *tl = get_timeline(rq); - len += snprintf(hdr + len, sizeof(hdr) - len, - "ring:{start:%08x, hwsp:%08x, seqno:%08x}, ", - i915_ggtt_offset(rq->ring->vma), - tl ? tl->hwsp_offset : 0, - hwsp_seqno(rq)); + len += scnprintf(hdr + len, sizeof(hdr) - len, + "ring:{start:%08x, hwsp:%08x, seqno:%08x, runtime:%llums}, ", + i915_ggtt_offset(rq->ring->vma), + tl ? tl->hwsp_offset : 0, + hwsp_seqno(rq), + DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context), + 1000 * 1000)); if (tl) intel_timeline_put(tl); } - snprintf(hdr + len, sizeof(hdr) - len, "rq: "); + scnprintf(hdr + len, sizeof(hdr) - len, "rq: "); print_request(m, rq, hdr); } for (port = execlists->pending; (rq = *port); port++) { @@ -1657,6 +1700,23 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) * we only care about the snapshot of this moment. */ lockdep_assert_held(&engine->active.lock); + + rcu_read_lock(); + request = execlists_active(&engine->execlists); + if (request) { + struct intel_timeline *tl = request->context->timeline; + + list_for_each_entry_from_reverse(request, &tl->requests, link) { + if (i915_request_completed(request)) + break; + + active = request; + } + } + rcu_read_unlock(); + if (active) + return active; + list_for_each_entry(request, &engine->active.requests, sched.link) { if (i915_request_completed(request)) continue; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 6c6fd185457c..dd825718e4e5 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -180,7 +180,7 @@ int intel_engine_pulse(struct intel_engine_cs *engine) struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER }; struct intel_context *ce = engine->kernel_context; struct i915_request *rq; - int err = 0; + int err; if (!intel_engine_has_preemption(engine)) return -ENODEV; @@ -188,8 +188,10 @@ int intel_engine_pulse(struct intel_engine_cs *engine) if (!intel_engine_pm_get_if_awake(engine)) return 0; - if (mutex_lock_interruptible(&ce->timeline->mutex)) + if (mutex_lock_interruptible(&ce->timeline->mutex)) { + err = -EINTR; goto out_rpm; + } intel_context_enter(ce); rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN); @@ -204,6 +206,8 @@ int intel_engine_pulse(struct intel_engine_cs *engine) __i915_request_commit(rq); __i915_request_queue(rq, &attr); + GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER); + err = 0; out_unlock: mutex_unlock(&ce->timeline->mutex); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index ea90ab3e396e..b6cf284e3a2d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -112,7 +112,7 @@ __queue_and_release_pm(struct i915_request *rq, { struct intel_gt_timelines *timelines = &engine->gt->timelines; - ENGINE_TRACE(engine, "\n"); + ENGINE_TRACE(engine, "parking\n"); /* * We have to serialise all potential retirement paths with our @@ -249,7 +249,7 @@ static int __engine_park(struct intel_wakeref *wf) if (!switch_to_kernel_context(engine)) return -EBUSY; - ENGINE_TRACE(engine, "\n"); + ENGINE_TRACE(engine, "parked\n"); call_idle_barriers(engine); /* cleanup after wedging */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 92be41a6903c..80cdde712842 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -75,6 +75,7 @@ struct intel_instdone { u32 instdone; /* The following exist only in the RCS engine */ u32 slice_common; + u32 slice_common_extra[2]; u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; }; @@ -126,7 +127,6 @@ DECLARE_EWMA(_engine_latency, 6, 4) struct st_preempt_hang { struct completion completion; unsigned int count; - bool inject_hang; }; /** @@ -157,6 +157,16 @@ struct intel_engine_execlists { struct i915_priolist default_priolist; /** + * @error_interrupt: CS Master EIR + * + * The CS generates an interrupt when it detects an error. We capture + * the first error interrupt, record the EIR and schedule the tasklet. + * In the tasklet, we process the pending CS events to ensure we have + * the guilty request, and then reset the engine. + */ + u32 error_interrupt; + + /** * @no_priolist: priority lists disabled */ bool no_priolist; @@ -537,6 +547,7 @@ struct intel_engine_cs { struct { unsigned long heartbeat_interval_ms; + unsigned long max_busywait_duration_ns; unsigned long preempt_timeout_ms; unsigned long stop_timeout_ms; unsigned long timeslice_duration_ms; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 9e7f12bef828..848decee9066 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -278,7 +278,8 @@ void intel_engines_driver_register(struct drm_i915_private *i915) } } - if (WARN(errors, "Invalid UABI engine mapping found")) + if (drm_WARN(&i915->drm, errors, + "Invalid UABI engine mapping found")) i915->uabi_engines = RB_ROOT; } diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 531d501be01f..aed498a0d032 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -8,6 +8,8 @@ #include <asm/set_memory.h> #include <asm/smp.h> +#include <drm/i915_drm.h> + #include "intel_gt.h" #include "i915_drv.h" #include "i915_scatterlist.h" @@ -104,27 +106,17 @@ static bool needs_idle_maps(struct drm_i915_private *i915) return IS_GEN(i915, 5) && IS_MOBILE(i915) && intel_vtd_active(); } -static void ggtt_suspend_mappings(struct i915_ggtt *ggtt) +void i915_ggtt_suspend(struct i915_ggtt *ggtt) { - struct drm_i915_private *i915 = ggtt->vm.i915; - - /* - * Don't bother messing with faults pre GEN6 as we have little - * documentation supporting that it's a good idea. - */ - if (INTEL_GEN(i915) < 6) - return; + struct i915_vma *vma; - intel_gt_check_and_clear_faults(ggtt->vm.gt); + list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) + i915_vma_wait_for_bind(vma); ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); - ggtt->invalidate(ggtt); -} -void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915) -{ - ggtt_suspend_mappings(&i915->ggtt); + intel_gt_check_and_clear_faults(ggtt->vm.gt); } void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) @@ -167,6 +159,13 @@ static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt) intel_gtt_chipset_flush(); } +static u64 gen8_ggtt_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + return addr | _PAGE_PRESENT; +} + static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) { writeq(pte, addr); @@ -182,7 +181,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm, gen8_pte_t __iomem *pte = (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; - gen8_set_pte(pte, gen8_pte_encode(addr, level, 0)); + gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, 0)); ggtt->invalidate(ggtt); } @@ -195,7 +194,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); struct sgt_iter sgt_iter; gen8_pte_t __iomem *gtt_entries; - const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0); + const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, 0); dma_addr_t addr; /* @@ -350,31 +349,6 @@ static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); } -struct clear_range { - struct i915_address_space *vm; - u64 start; - u64 length; -}; - -static int bxt_vtd_ggtt_clear_range__cb(void *_arg) -{ - struct clear_range *arg = _arg; - - gen8_ggtt_clear_range(arg->vm, arg->start, arg->length); - bxt_vtd_ggtt_wa(arg->vm); - - return 0; -} - -static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm, - u64 start, - u64 length) -{ - struct clear_range arg = { vm, start, length }; - - stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL); -} - static void gen6_ggtt_clear_range(struct i915_address_space *vm, u64 start, u64 length) { @@ -462,7 +436,7 @@ static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) u64 size; int ret; - if (!USES_GUC(ggtt->vm.i915)) + if (!intel_uc_uses_guc(&ggtt->vm.gt->uc)) return 0; GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP); @@ -472,7 +446,8 @@ static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE, PIN_NOEVICT); if (ret) - DRM_DEBUG_DRIVER("Failed to reserve top of GGTT for GuC\n"); + drm_dbg(&ggtt->vm.i915->drm, + "Failed to reserve top of GGTT for GuC\n"); return ret; } @@ -544,8 +519,9 @@ static int init_ggtt(struct i915_ggtt *ggtt) /* Clear any non-preallocated blocks */ drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { - DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", - hole_start, hole_end); + drm_dbg_kms(&ggtt->vm.i915->drm, + "clearing unused GTT space: [%lx, %lx]\n", + hole_start, hole_end); ggtt->vm.clear_range(&ggtt->vm, hole_start, hole_end - hole_start); } @@ -879,8 +855,8 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) { ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; - if (ggtt->vm.clear_range != nop_clear_range) - ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL; + ggtt->vm.bind_async_flags = + I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; } ggtt->invalidate = gen8_ggtt_invalidate; @@ -890,7 +866,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.vma_ops.set_pages = ggtt_set_pages; ggtt->vm.vma_ops.clear_pages = clear_pages; - ggtt->vm.pte_encode = gen8_pte_encode; + ggtt->vm.pte_encode = gen8_ggtt_pte_encode; setup_private_pat(ggtt->vm.gt->uncore); @@ -1180,7 +1156,7 @@ void i915_ggtt_disable_guc(struct i915_ggtt *ggtt) ggtt->invalidate(ggtt); } -static void ggtt_restore_mappings(struct i915_ggtt *ggtt) +void i915_ggtt_resume(struct i915_ggtt *ggtt) { struct i915_vma *vma; bool flush = false; @@ -1188,8 +1164,6 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt) intel_gt_check_and_clear_faults(ggtt->vm.gt); - mutex_lock(&ggtt->vm.mutex); - /* First fill our portion of the GTT with scratch pages */ ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); @@ -1216,19 +1190,10 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt) atomic_set(&ggtt->vm.open, open); ggtt->invalidate(ggtt); - mutex_unlock(&ggtt->vm.mutex); - if (flush) wbinvd_on_all_cpus(); -} - -void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915) -{ - struct i915_ggtt *ggtt = &i915->ggtt; - - ggtt_restore_mappings(ggtt); - if (INTEL_GEN(i915) >= 8) + if (INTEL_GEN(ggtt->vm.i915) >= 8) setup_private_pat(ggtt->vm.gt->uncore); } @@ -1267,6 +1232,7 @@ intel_rotate_pages(struct intel_rotation_info *rot_info, struct drm_i915_gem_object *obj) { unsigned int size = intel_rotation_info_size(rot_info); + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct sg_table *st; struct scatterlist *sg; int ret = -ENOMEM; @@ -1296,8 +1262,9 @@ err_sg_alloc: kfree(st); err_st_alloc: - DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", - obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); + drm_dbg(&i915->drm, "Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", + obj->base.size, rot_info->plane[0].width, + rot_info->plane[0].height, size); return ERR_PTR(ret); } @@ -1349,6 +1316,7 @@ intel_remap_pages(struct intel_remapped_info *rem_info, struct drm_i915_gem_object *obj) { unsigned int size = intel_remapped_info_size(rem_info); + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct sg_table *st; struct scatterlist *sg; int ret = -ENOMEM; @@ -1380,8 +1348,9 @@ err_sg_alloc: kfree(st); err_st_alloc: - DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n", - obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size); + drm_dbg(&i915->drm, "Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n", + obj->base.size, rem_info->plane[0].width, + rem_info->plane[0].height, size); return ERR_PTR(ret); } @@ -1479,8 +1448,9 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) if (IS_ERR(vma->pages)) { ret = PTR_ERR(vma->pages); vma->pages = NULL; - DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", - vma->ggtt_view.type, ret); + drm_err(&vma->vm->i915->drm, + "Failed to get pages for VMA view type %u (%d)!\n", + vma->ggtt_view.type, ret); } return ret; } diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 51b8718513bc..f04214a54f75 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -292,10 +292,21 @@ #define MI_STORE_URB_MEM MI_INSTR(0x2D, 0) #define MI_CONDITIONAL_BATCH_BUFFER_END MI_INSTR(0x36, 0) -#define PIPELINE_SELECT ((0x3<<29)|(0x1<<27)|(0x1<<24)|(0x4<<16)) -#define GFX_OP_3DSTATE_VF_STATISTICS ((0x3<<29)|(0x1<<27)|(0x0<<24)|(0xB<<16)) -#define MEDIA_VFE_STATE ((0x3<<29)|(0x2<<27)|(0x0<<24)|(0x0<<16)) +#define STATE_BASE_ADDRESS \ + ((0x3 << 29) | (0x0 << 27) | (0x1 << 24) | (0x1 << 16)) +#define BASE_ADDRESS_MODIFY REG_BIT(0) +#define PIPELINE_SELECT \ + ((0x3 << 29) | (0x1 << 27) | (0x1 << 24) | (0x4 << 16)) +#define PIPELINE_SELECT_MEDIA REG_BIT(0) +#define GFX_OP_3DSTATE_VF_STATISTICS \ + ((0x3 << 29) | (0x1 << 27) | (0x0 << 24) | (0xB << 16)) +#define MEDIA_VFE_STATE \ + ((0x3 << 29) | (0x2 << 27) | (0x0 << 24) | (0x0 << 16)) #define MEDIA_VFE_STATE_MMIO_ACCESS_MASK (0x18) +#define MEDIA_INTERFACE_DESCRIPTOR_LOAD \ + ((0x3 << 29) | (0x2 << 27) | (0x0 << 24) | (0x2 << 16)) +#define MEDIA_OBJECT \ + ((0x3 << 29) | (0x2 << 27) | (0x1 << 24) | (0x0 << 16)) #define GPGPU_OBJECT ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x4<<16)) #define GPGPU_WALKER ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x5<<16)) #define GFX_OP_3DSTATE_DX9_CONSTANTF_VS \ diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index da2b6e2ae692..d09f7596cb98 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -198,16 +198,16 @@ static void gen6_check_faults(struct intel_gt *gt) for_each_engine(engine, gt, id) { fault = GEN6_RING_FAULT_REG_READ(engine); if (fault & RING_FAULT_VALID) { - DRM_DEBUG_DRIVER("Unexpected fault\n" - "\tAddr: 0x%08lx\n" - "\tAddress space: %s\n" - "\tSource ID: %d\n" - "\tType: %d\n", - fault & PAGE_MASK, - fault & RING_FAULT_GTTSEL_MASK ? - "GGTT" : "PPGTT", - RING_FAULT_SRCID(fault), - RING_FAULT_FAULT_TYPE(fault)); + drm_dbg(&engine->i915->drm, "Unexpected fault\n" + "\tAddr: 0x%08lx\n" + "\tAddress space: %s\n" + "\tSource ID: %d\n" + "\tType: %d\n", + fault & PAGE_MASK, + fault & RING_FAULT_GTTSEL_MASK ? + "GGTT" : "PPGTT", + RING_FAULT_SRCID(fault), + RING_FAULT_FAULT_TYPE(fault)); } } } @@ -239,18 +239,17 @@ static void gen8_check_faults(struct intel_gt *gt) fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) | ((u64)fault_data0 << 12); - DRM_DEBUG_DRIVER("Unexpected fault\n" - "\tAddr: 0x%08x_%08x\n" - "\tAddress space: %s\n" - "\tEngine ID: %d\n" - "\tSource ID: %d\n" - "\tType: %d\n", - upper_32_bits(fault_addr), - lower_32_bits(fault_addr), - fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT", - GEN8_RING_FAULT_ENGINE_ID(fault), - RING_FAULT_SRCID(fault), - RING_FAULT_FAULT_TYPE(fault)); + drm_dbg(&uncore->i915->drm, "Unexpected fault\n" + "\tAddr: 0x%08x_%08x\n" + "\tAddress space: %s\n" + "\tEngine ID: %d\n" + "\tSource ID: %d\n" + "\tType: %d\n", + upper_32_bits(fault_addr), lower_32_bits(fault_addr), + fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT", + GEN8_RING_FAULT_ENGINE_ID(fault), + RING_FAULT_SRCID(fault), + RING_FAULT_FAULT_TYPE(fault)); } } @@ -345,7 +344,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) goto err_unref; } - ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + ret = i915_ggtt_pin(vma, 0, PIN_HIGH); if (ret) goto err_unref; @@ -455,6 +454,11 @@ err_rq: if (!rq) continue; + if (rq->fence.error) { + err = -EIO; + goto out; + } + GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags)); state = rq->context->state; if (!state) @@ -538,6 +542,10 @@ static int __engines_verify_workarounds(struct intel_gt *gt) err = -EIO; } + /* Flush and restore the kernel context for safety */ + if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) + err = -EIO; + return err; } @@ -584,7 +592,9 @@ int intel_gt_init(struct intel_gt *gt) if (err) goto err_engines; - intel_uc_init(>->uc); + err = intel_uc_init(>->uc); + if (err) + goto err_engines; err = intel_gt_resume(gt); if (err) @@ -634,6 +644,13 @@ void intel_gt_driver_remove(struct intel_gt *gt) void intel_gt_driver_unregister(struct intel_gt *gt) { intel_rps_driver_unregister(>->rps); + + /* + * Upon unregistering the device to prevent any new users, cancel + * all in-flight requests so that we can quickly unbind the active + * resources. + */ + intel_gt_set_wedged(gt); } void intel_gt_driver_release(struct intel_gt *gt) @@ -650,6 +667,9 @@ void intel_gt_driver_release(struct intel_gt *gt) void intel_gt_driver_late_release(struct intel_gt *gt) { + /* We need to wait for inflight RCU frees to release their grip */ + rcu_barrier(); + intel_uc_driver_late_release(>->uc); intel_gt_fini_requests(gt); intel_gt_fini_reset(gt); diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 1dac441cb8f4..4fac043750aa 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -14,7 +14,7 @@ struct drm_i915_private; #define GT_TRACE(gt, fmt, ...) do { \ const struct intel_gt *gt__ __maybe_unused = (gt); \ - GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \ + GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \ ##__VA_ARGS__); \ } while (0) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index f796bdf1ed30..f0e7fd95165a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -24,6 +24,21 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir) { bool tasklet = false; + if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)) { + u32 eir; + + eir = ENGINE_READ(engine, RING_EIR); + ENGINE_TRACE(engine, "CS error: %x\n", eir); + + /* Disable the error interrupt until after the reset */ + if (likely(eir)) { + ENGINE_WRITE(engine, RING_EMR, ~0u); + ENGINE_WRITE(engine, RING_EIR, eir); + WRITE_ONCE(engine->execlists.error_interrupt, eir); + tasklet = true; + } + } + if (iir & GT_CONTEXT_SWITCH_INTERRUPT) tasklet = true; @@ -210,7 +225,10 @@ void gen11_gt_irq_reset(struct intel_gt *gt) void gen11_gt_irq_postinstall(struct intel_gt *gt) { - const u32 irqs = GT_RENDER_USER_INTERRUPT | GT_CONTEXT_SWITCH_INTERRUPT; + const u32 irqs = + GT_CS_MASTER_ERROR_INTERRUPT | + GT_RENDER_USER_INTERRUPT | + GT_CONTEXT_SWITCH_INTERRUPT; struct intel_uncore *uncore = gt->uncore; const u32 dmask = irqs << 16 | irqs; const u32 smask = irqs << 16; @@ -279,66 +297,56 @@ void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir) if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT | GT_BSD_CS_ERROR_INTERRUPT | - GT_RENDER_CS_MASTER_ERROR_INTERRUPT)) + GT_CS_MASTER_ERROR_INTERRUPT)) DRM_DEBUG("Command parser error, gt_iir 0x%08x\n", gt_iir); if (gt_iir & GT_PARITY_ERROR(gt->i915)) gen7_parity_error_irq_handler(gt, gt_iir); } -void gen8_gt_irq_ack(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4]) +void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl) { void __iomem * const regs = gt->uncore->regs; + u32 iir; if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) { - gt_iir[0] = raw_reg_read(regs, GEN8_GT_IIR(0)); - if (likely(gt_iir[0])) - raw_reg_write(regs, GEN8_GT_IIR(0), gt_iir[0]); - } - - if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) { - gt_iir[1] = raw_reg_read(regs, GEN8_GT_IIR(1)); - if (likely(gt_iir[1])) - raw_reg_write(regs, GEN8_GT_IIR(1), gt_iir[1]); - } - - if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) { - gt_iir[2] = raw_reg_read(regs, GEN8_GT_IIR(2)); - if (likely(gt_iir[2])) - raw_reg_write(regs, GEN8_GT_IIR(2), gt_iir[2]); - } - - if (master_ctl & GEN8_GT_VECS_IRQ) { - gt_iir[3] = raw_reg_read(regs, GEN8_GT_IIR(3)); - if (likely(gt_iir[3])) - raw_reg_write(regs, GEN8_GT_IIR(3), gt_iir[3]); - } -} - -void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4]) -{ - if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) { - cs_irq_handler(gt->engine_class[RENDER_CLASS][0], - gt_iir[0] >> GEN8_RCS_IRQ_SHIFT); - cs_irq_handler(gt->engine_class[COPY_ENGINE_CLASS][0], - gt_iir[0] >> GEN8_BCS_IRQ_SHIFT); + iir = raw_reg_read(regs, GEN8_GT_IIR(0)); + if (likely(iir)) { + cs_irq_handler(gt->engine_class[RENDER_CLASS][0], + iir >> GEN8_RCS_IRQ_SHIFT); + cs_irq_handler(gt->engine_class[COPY_ENGINE_CLASS][0], + iir >> GEN8_BCS_IRQ_SHIFT); + raw_reg_write(regs, GEN8_GT_IIR(0), iir); + } } if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) { - cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][0], - gt_iir[1] >> GEN8_VCS0_IRQ_SHIFT); - cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][1], - gt_iir[1] >> GEN8_VCS1_IRQ_SHIFT); + iir = raw_reg_read(regs, GEN8_GT_IIR(1)); + if (likely(iir)) { + cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][0], + iir >> GEN8_VCS0_IRQ_SHIFT); + cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][1], + iir >> GEN8_VCS1_IRQ_SHIFT); + raw_reg_write(regs, GEN8_GT_IIR(1), iir); + } } if (master_ctl & GEN8_GT_VECS_IRQ) { - cs_irq_handler(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0], - gt_iir[3] >> GEN8_VECS_IRQ_SHIFT); + iir = raw_reg_read(regs, GEN8_GT_IIR(3)); + if (likely(iir)) { + cs_irq_handler(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0], + iir >> GEN8_VECS_IRQ_SHIFT); + raw_reg_write(regs, GEN8_GT_IIR(3), iir); + } } if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) { - gen6_rps_irq_handler(>->rps, gt_iir[2]); - guc_irq_handler(>->uc.guc, gt_iir[2] >> 16); + iir = raw_reg_read(regs, GEN8_GT_IIR(2)); + if (likely(iir)) { + gen6_rps_irq_handler(>->rps, iir); + guc_irq_handler(>->uc.guc, iir >> 16); + raw_reg_write(regs, GEN8_GT_IIR(2), iir); + } } } @@ -354,25 +362,18 @@ void gen8_gt_irq_reset(struct intel_gt *gt) void gen8_gt_irq_postinstall(struct intel_gt *gt) { - struct intel_uncore *uncore = gt->uncore; - /* These are interrupts we'll toggle with the ring mask register */ - u32 gt_interrupts[] = { - (GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | - GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT | - GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT | - GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT), - - (GT_RENDER_USER_INTERRUPT << GEN8_VCS0_IRQ_SHIFT | - GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS0_IRQ_SHIFT | - GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT | - GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT), - + const u32 irqs = + GT_CS_MASTER_ERROR_INTERRUPT | + GT_RENDER_USER_INTERRUPT | + GT_CONTEXT_SWITCH_INTERRUPT; + const u32 gt_interrupts[] = { + irqs << GEN8_RCS_IRQ_SHIFT | irqs << GEN8_BCS_IRQ_SHIFT, + irqs << GEN8_VCS0_IRQ_SHIFT | irqs << GEN8_VCS1_IRQ_SHIFT, 0, - - (GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT | - GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT) + irqs << GEN8_VECS_IRQ_SHIFT, }; + struct intel_uncore *uncore = gt->uncore; gt->pm_ier = 0x0; gt->pm_imr = ~gt->pm_ier; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.h b/drivers/gpu/drm/i915/gt/intel_gt_irq.h index 8f37593712c9..886c5cf408a2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.h @@ -36,9 +36,8 @@ void gen5_gt_enable_irq(struct intel_gt *gt, u32 mask); void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir); -void gen8_gt_irq_ack(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4]); +void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl); void gen8_gt_irq_reset(struct intel_gt *gt); -void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4]); void gen8_gt_irq_postinstall(struct intel_gt *gt); #endif /* INTEL_GT_IRQ_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index d1c2f034296a..8b653c0f5e5f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -216,7 +216,7 @@ int intel_gt_resume(struct intel_gt *gt) intel_engine_pm_get(engine); engine->serial++; /* kernel context lost */ - err = engine->resume(engine); + err = intel_engine_resume(engine); intel_engine_pm_put(engine); if (err) { diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 16acdc5d6734..2a72cce63fd9 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -171,7 +171,9 @@ void __i915_vm_close(struct i915_address_space *vm) { struct i915_vma *vma, *vn; - mutex_lock(&vm->mutex); + if (!atomic_dec_and_mutex_lock(&vm->open, &vm->mutex)) + return; + list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { struct drm_i915_gem_object *obj = vma->obj; @@ -186,6 +188,7 @@ void __i915_vm_close(struct i915_address_space *vm) i915_gem_object_put(obj); } GEM_BUG_ON(!list_empty(&vm->bound_list)); + mutex_unlock(&vm->mutex); } @@ -299,6 +302,25 @@ fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count) kunmap_atomic(memset64(kmap_atomic(p->page), val, count)); } +static void poison_scratch_page(struct page *page, unsigned long size) +{ + if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + return; + + GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); + + do { + void *vaddr; + + vaddr = kmap(page); + memset(vaddr, POISON_FREE, PAGE_SIZE); + kunmap(page); + + page = pfn_to_page(page_to_pfn(page) + 1); + size -= PAGE_SIZE; + } while (size); +} + int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) { unsigned long size; @@ -331,6 +353,17 @@ int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) if (unlikely(!page)) goto skip; + /* + * Use a non-zero scratch page for debugging. + * + * We want a value that should be reasonably obvious + * to spot in the error state, while also causing a GPU hang + * if executed. We prefer using a clear page in production, so + * should it ever be accidentally used, the effect should be + * fairly benign. + */ + poison_scratch_page(page, size); + addr = dma_map_page_attrs(vm->dma, page, 0, size, PCI_DMA_BIDIRECTIONAL, @@ -448,36 +481,12 @@ void gtt_write_workarounds(struct intel_gt *gt) intel_uncore_write(uncore, HSW_GTT_CACHE_EN, can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); - WARN_ON_ONCE(can_use_gtt_cache && - intel_uncore_read(uncore, - HSW_GTT_CACHE_EN) == 0); + drm_WARN_ON_ONCE(&i915->drm, can_use_gtt_cache && + intel_uncore_read(uncore, + HSW_GTT_CACHE_EN) == 0); } } -u64 gen8_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags) -{ - gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW; - - if (unlikely(flags & PTE_READ_ONLY)) - pte &= ~_PAGE_RW; - - switch (level) { - case I915_CACHE_NONE: - pte |= PPAT_UNCACHED; - break; - case I915_CACHE_WT: - pte |= PPAT_DISPLAY_ELLC; - break; - default: - pte |= PPAT_CACHED; - break; - } - - return pte; -} - static void tgl_setup_private_ppat(struct intel_uncore *uncore) { /* TGL doesn't support LLC or AGE settings */ diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 7da7681c20b1..b3116fe8d180 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -429,8 +429,7 @@ static inline void i915_vm_close(struct i915_address_space *vm) { GEM_BUG_ON(!atomic_read(&vm->open)); - if (atomic_dec_and_test(&vm->open)) - __i915_vm_close(vm); + __i915_vm_close(vm); i915_vm_put(vm); } @@ -512,12 +511,8 @@ int i915_ppgtt_init_hw(struct intel_gt *gt); struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt); -void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915); -void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915); - -u64 gen8_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags); +void i915_ggtt_suspend(struct i915_ggtt *gtt); +void i915_ggtt_resume(struct i915_ggtt *ggtt); int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p); void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p); diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c b/drivers/gpu/drm/i915/gt/intel_llc.c index ceb785b75c25..e3f637b3650e 100644 --- a/drivers/gpu/drm/i915/gt/intel_llc.c +++ b/drivers/gpu/drm/i915/gt/intel_llc.c @@ -50,6 +50,9 @@ static bool get_ia_constants(struct intel_llc *llc, struct drm_i915_private *i915 = llc_to_gt(llc)->i915; struct intel_rps *rps = &llc_to_gt(llc)->rps; + if (!HAS_LLC(i915) || IS_DGFX(i915)) + return false; + if (rps->max_freq <= rps->min_freq) return false; @@ -147,8 +150,7 @@ static void gen6_update_ring_freq(struct intel_llc *llc) void intel_llc_enable(struct intel_llc *llc) { - if (HAS_LLC(llc_to_gt(llc)->i915)) - gen6_update_ring_freq(llc); + gen6_update_ring_freq(llc); } void intel_llc_disable(struct intel_llc *llc) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 31455eceeb0c..683014e7bc51 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -176,8 +176,6 @@ /* Typical size of the average request (2 pipecontrols and a MI_BB) */ #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ -#define WA_TAIL_DWORDS 2 -#define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS) struct virtual_engine { struct intel_engine_cs base; @@ -247,7 +245,7 @@ static void mark_eio(struct i915_request *rq) GEM_BUG_ON(i915_request_signaled(rq)); - dma_fence_set_error(&rq->fence, -EIO); + i915_request_set_error_once(rq, -EIO); i915_request_mark_complete(rq); } @@ -295,7 +293,7 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) static inline int rq_prio(const struct i915_request *rq) { - return rq->sched.attr.priority; + return READ_ONCE(rq->sched.attr.priority); } static int effective_prio(const struct i915_request *rq) @@ -1006,7 +1004,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) i915_request_cancel_breadcrumb(rq); spin_unlock(&rq->lock); } - rq->engine = owner; + WRITE_ONCE(rq->engine, owner); owner->submit_request(rq); active = NULL; } @@ -1197,6 +1195,48 @@ static void reset_active(struct i915_request *rq, ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; } +static u32 intel_context_get_runtime(const struct intel_context *ce) +{ + /* + * We can use either ppHWSP[16] which is recorded before the context + * switch (and so excludes the cost of context switches) or use the + * value from the context image itself, which is saved/restored earlier + * and so includes the cost of the save. + */ + return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]); +} + +static void st_update_runtime_underflow(struct intel_context *ce, s32 dt) +{ +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) + ce->runtime.num_underflow += dt < 0; + ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt); +#endif +} + +static void intel_context_update_runtime(struct intel_context *ce) +{ + u32 old; + s32 dt; + + if (intel_context_is_barrier(ce)) + return; + + old = ce->runtime.last; + ce->runtime.last = intel_context_get_runtime(ce); + dt = ce->runtime.last - old; + + if (unlikely(dt <= 0)) { + CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n", + old, ce->runtime.last, dt); + st_update_runtime_underflow(ce, dt); + return; + } + + ewma_runtime_add(&ce->runtime.avg, dt); + ce->runtime.total += dt; +} + static inline struct intel_engine_cs * __execlists_schedule_in(struct i915_request *rq) { @@ -1211,12 +1251,12 @@ __execlists_schedule_in(struct i915_request *rq) if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) execlists_check_context(ce, engine); + ce->lrc_desc &= ~GENMASK_ULL(47, 37); if (ce->tag) { /* Use a fixed tag for OA and friends */ ce->lrc_desc |= (u64)ce->tag << 32; } else { /* We don't need a strict matching tag, just different values */ - ce->lrc_desc &= ~GENMASK_ULL(47, 37); ce->lrc_desc |= (u64)(++engine->context_tag % NUM_CONTEXT_TAG) << GEN11_SW_CTX_ID_SHIFT; @@ -1276,10 +1316,11 @@ __execlists_schedule_out(struct i915_request *rq, * If we have just completed this context, the engine may now be * idle and we want to re-enter powersaving. */ - if (list_is_last(&rq->link, &ce->timeline->requests) && + if (list_is_last_rcu(&rq->link, &ce->timeline->requests) && i915_request_completed(rq)) intel_engine_add_retire(engine, ce->timeline); + intel_context_update_runtime(ce); intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); intel_gt_pm_put_async(engine->gt); @@ -1395,15 +1436,26 @@ trace_ports(const struct intel_engine_execlists *execlists, ports[1] ? ports[1]->fence.seqno : 0); } +static inline bool +reset_in_progress(const struct intel_engine_execlists *execlists) +{ + return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); +} + static __maybe_unused bool assert_pending_valid(const struct intel_engine_execlists *execlists, const char *msg) { struct i915_request * const *port, *rq; struct intel_context *ce = NULL; + bool sentinel = false; trace_ports(execlists, msg, execlists->pending); + /* We may be messing around with the lists during reset, lalala */ + if (reset_in_progress(execlists)) + return true; + if (!execlists->pending[0]) { GEM_TRACE_ERR("Nothing pending for promotion!\n"); return false; @@ -1430,6 +1482,26 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, } ce = rq->context; + /* + * Sentinels are supposed to be lonely so they flush the + * current exection off the HW. Check that they are the + * only request in the pending submission. + */ + if (sentinel) { + GEM_TRACE_ERR("context:%llx after sentinel in pending[%zd]\n", + ce->timeline->fence_context, + port - execlists->pending); + return false; + } + + sentinel = i915_request_has_sentinel(rq); + if (sentinel && port != execlists->pending) { + GEM_TRACE_ERR("sentinel context:%llx not in prime position[%zd]\n", + ce->timeline->fence_context, + port - execlists->pending); + return false; + } + /* Hold tightly onto the lock to prevent concurrent retires! */ if (!spin_trylock_irqsave(&rq->lock, flags)) continue; @@ -1525,6 +1597,11 @@ static bool can_merge_ctx(const struct intel_context *prev, return true; } +static unsigned long i915_request_flags(const struct i915_request *rq) +{ + return READ_ONCE(rq->fence.flags); +} + static bool can_merge_rq(const struct i915_request *prev, const struct i915_request *next) { @@ -1542,7 +1619,7 @@ static bool can_merge_rq(const struct i915_request *prev, if (i915_request_completed(next)) return true; - if (unlikely((prev->fence.flags ^ next->fence.flags) & + if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) & (BIT(I915_FENCE_FLAG_NOPREEMPT) | BIT(I915_FENCE_FLAG_SENTINEL)))) return false; @@ -1550,6 +1627,7 @@ static bool can_merge_rq(const struct i915_request *prev, if (!can_merge_ctx(prev->context, next->context)) return false; + GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno)); return true; } @@ -1585,7 +1663,7 @@ static bool virtual_matches(const struct virtual_engine *ve, } static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, - struct intel_engine_cs *engine) + struct i915_request *rq) { struct intel_engine_cs *old = ve->siblings[0]; @@ -1593,9 +1671,19 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, spin_lock(&old->breadcrumbs.irq_lock); if (!list_empty(&ve->context.signal_link)) { - list_move_tail(&ve->context.signal_link, - &engine->breadcrumbs.signalers); - intel_engine_signal_breadcrumbs(engine); + list_del_init(&ve->context.signal_link); + + /* + * We cannot acquire the new engine->breadcrumbs.irq_lock + * (as we are holding a breadcrumbs.irq_lock already), + * so attach this request to the signaler on submission. + * The queued irq_work will occur when we finally drop + * the engine->active.lock after dequeue. + */ + set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags); + + /* Also transfer the pending irq_work for the old breadcrumb. */ + intel_engine_signal_breadcrumbs(rq->engine); } spin_unlock(&old->breadcrumbs.irq_lock); } @@ -1605,6 +1693,11 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, &(rq__)->sched.waiters_list, \ wait_link) +#define for_each_signaler(p__, rq__) \ + list_for_each_entry_rcu(p__, \ + &(rq__)->sched.signalers_list, \ + signal_link) + static void defer_request(struct i915_request *rq, struct list_head * const pl) { LIST_HEAD(list); @@ -1693,12 +1786,13 @@ timeslice(const struct intel_engine_cs *engine) static unsigned long active_timeslice(const struct intel_engine_cs *engine) { - const struct i915_request *rq = *engine->execlists.active; + const struct intel_engine_execlists *execlists = &engine->execlists; + const struct i915_request *rq = *execlists->active; if (!rq || i915_request_completed(rq)) return 0; - if (engine->execlists.switch_priority_hint < effective_prio(rq)) + if (READ_ONCE(execlists->switch_priority_hint) < effective_prio(rq)) return 0; return timeslice(engine); @@ -1715,8 +1809,11 @@ static void set_timeslice(struct intel_engine_cs *engine) static void start_timeslice(struct intel_engine_cs *engine) { struct intel_engine_execlists *execlists = &engine->execlists; + int prio = queue_prio(execlists); - execlists->switch_priority_hint = execlists->queue_priority_hint; + WRITE_ONCE(execlists->switch_priority_hint, prio); + if (prio == INT_MIN) + return; if (timer_pending(&execlists->timer)) return; @@ -1938,13 +2035,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine) "", yesno(engine != ve->siblings[0])); - ve->request = NULL; - ve->base.execlists.queue_priority_hint = INT_MIN; + WRITE_ONCE(ve->request, NULL); + WRITE_ONCE(ve->base.execlists.queue_priority_hint, + INT_MIN); rb_erase_cached(rb, &execlists->virtual); RB_CLEAR_NODE(rb); GEM_BUG_ON(!(rq->execution_mask & engine->mask)); - rq->engine = engine; + WRITE_ONCE(rq->engine, engine); if (engine != ve->siblings[0]) { u32 *regs = ve->context.lrc_reg_state; @@ -1957,7 +2055,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) engine); if (!list_empty(&ve->context.signals)) - virtual_xfer_breadcrumbs(ve, engine); + virtual_xfer_breadcrumbs(ve, rq); /* * Move the bound engine to the top of the list @@ -2064,6 +2162,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(last && !can_merge_ctx(last->context, rq->context)); + GEM_BUG_ON(last && + i915_seqno_passed(last->fence.seqno, + rq->fence.seqno)); submit = true; last = rq; @@ -2134,6 +2235,7 @@ cancel_port_requests(struct intel_engine_execlists * const execlists) execlists_schedule_out(*port); clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight)); + smp_wmb(); /* complete the seqlock for execlists_active() */ WRITE_ONCE(execlists->active, execlists->inflight); } @@ -2144,12 +2246,6 @@ invalidate_csb_entries(const u32 *first, const u32 *last) clflush((void *)last); } -static inline bool -reset_in_progress(const struct intel_engine_execlists *execlists) -{ - return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); -} - /* * Starting with Gen12, the status has a new format: * @@ -2240,7 +2336,6 @@ static void process_csb(struct intel_engine_cs *engine) */ head = execlists->csb_head; tail = READ_ONCE(*execlists->csb_write); - ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail); if (unlikely(head == tail)) return; @@ -2254,6 +2349,7 @@ static void process_csb(struct intel_engine_cs *engine) */ rmb(); + ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail); do { bool promote; @@ -2288,11 +2384,13 @@ static void process_csb(struct intel_engine_cs *engine) if (promote) { struct i915_request * const *old = execlists->active; + GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); + + ring_set_paused(engine, 0); + /* Point active to the new ELSP; prevent overwriting */ WRITE_ONCE(execlists->active, execlists->pending); - - if (!inject_preempt_hang(execlists)) - ring_set_paused(engine, 0); + smp_wmb(); /* notify execlists_active() */ /* cancel old inflight, prepare for switch */ trace_ports(execlists, "preempted", old); @@ -2300,12 +2398,12 @@ static void process_csb(struct intel_engine_cs *engine) execlists_schedule_out(*old++); /* switch pending to inflight */ - GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); - WRITE_ONCE(execlists->active, - memcpy(execlists->inflight, - execlists->pending, - execlists_num_ports(execlists) * - sizeof(*execlists->pending))); + memcpy(execlists->inflight, + execlists->pending, + execlists_num_ports(execlists) * + sizeof(*execlists->pending)); + smp_wmb(); /* complete the seqlock */ + WRITE_ONCE(execlists->active, execlists->inflight); WRITE_ONCE(execlists->pending[0], NULL); } else { @@ -2320,8 +2418,37 @@ static void process_csb(struct intel_engine_cs *engine) * coherent (visible from the CPU) before the * user interrupt and CSB is processed. */ - GEM_BUG_ON(!i915_request_completed(*execlists->active) && - !reset_in_progress(execlists)); + if (GEM_SHOW_DEBUG() && + !i915_request_completed(*execlists->active) && + !reset_in_progress(execlists)) { + struct i915_request *rq __maybe_unused = + *execlists->active; + const u32 *regs __maybe_unused = + rq->context->lrc_reg_state; + + ENGINE_TRACE(engine, + "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n", + ENGINE_READ(engine, RING_START), + ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR, + ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR, + ENGINE_READ(engine, RING_CTL), + ENGINE_READ(engine, RING_MI_MODE)); + ENGINE_TRACE(engine, + "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ", + i915_ggtt_offset(rq->ring->vma), + rq->head, rq->tail, + rq->fence.context, + lower_32_bits(rq->fence.seqno), + hwsp_seqno(rq)); + ENGINE_TRACE(engine, + "ctx:{start:%08x, head:%04x, tail:%04x}, ", + regs[CTX_RING_START], + regs[CTX_RING_HEAD], + regs[CTX_RING_TAIL]); + + GEM_BUG_ON("context completed before request"); + } + execlists_schedule_out(*execlists->active++); GEM_BUG_ON(execlists->active - execlists->inflight > @@ -2349,7 +2476,7 @@ static void process_csb(struct intel_engine_cs *engine) static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) { lockdep_assert_held(&engine->active.lock); - if (!engine->execlists.pending[0]) { + if (!READ_ONCE(engine->execlists.pending[0])) { rcu_read_lock(); /* protect peeking at execlists->active */ execlists_dequeue(engine); rcu_read_unlock(); @@ -2366,12 +2493,12 @@ static void __execlists_hold(struct i915_request *rq) if (i915_request_is_active(rq)) __i915_request_unsubmit(rq); - RQ_TRACE(rq, "on hold\n"); clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); list_move_tail(&rq->sched.link, &rq->engine->active.hold); i915_request_set_hold(rq); + RQ_TRACE(rq, "on hold\n"); - list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { + for_each_waiter(p, rq) { struct i915_request *w = container_of(p->waiter, typeof(*w), sched); @@ -2385,7 +2512,7 @@ static void __execlists_hold(struct i915_request *rq) if (i915_request_completed(w)) continue; - if (i915_request_on_hold(rq)) + if (i915_request_on_hold(w)) continue; list_move_tail(&w->sched.link, &list); @@ -2443,6 +2570,7 @@ static bool execlists_hold(struct intel_engine_cs *engine, GEM_BUG_ON(i915_request_on_hold(rq)); GEM_BUG_ON(rq->engine != engine); __execlists_hold(rq); + GEM_BUG_ON(list_empty(&engine->active.hold)); unlock: spin_unlock_irq(&engine->active.lock); @@ -2452,23 +2580,27 @@ unlock: static bool hold_request(const struct i915_request *rq) { struct i915_dependency *p; + bool result = false; /* * If one of our ancestors is on hold, we must also be on hold, * otherwise we will bypass it and execute before it. */ - list_for_each_entry(p, &rq->sched.signalers_list, signal_link) { + rcu_read_lock(); + for_each_signaler(p, rq) { const struct i915_request *s = container_of(p->signaler, typeof(*s), sched); if (s->engine != rq->engine) continue; - if (i915_request_on_hold(s)) - return true; + result = i915_request_on_hold(s); + if (result) + break; } + rcu_read_unlock(); - return false; + return result; } static void __execlists_unhold(struct i915_request *rq) @@ -2478,6 +2610,8 @@ static void __execlists_unhold(struct i915_request *rq) do { struct i915_dependency *p; + RQ_TRACE(rq, "hold release\n"); + GEM_BUG_ON(!i915_request_on_hold(rq)); GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); @@ -2486,21 +2620,24 @@ static void __execlists_unhold(struct i915_request *rq) i915_sched_lookup_priolist(rq->engine, rq_prio(rq))); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); - RQ_TRACE(rq, "hold release\n"); /* Also release any children on this engine that are ready */ - list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { + for_each_waiter(p, rq) { struct i915_request *w = container_of(p->waiter, typeof(*w), sched); + /* Propagate any change in error status */ + if (rq->fence.error) + i915_request_set_error_once(w, rq->fence.error); + if (w->engine != rq->engine) continue; - if (!i915_request_on_hold(rq)) + if (!i915_request_on_hold(w)) continue; /* Check that no other parents are also on hold */ - if (hold_request(rq)) + if (hold_request(w)) continue; list_move_tail(&w->sched.link, &list); @@ -2615,13 +2752,13 @@ static bool execlists_capture(struct intel_engine_cs *engine) if (!cap) return true; + spin_lock_irq(&engine->active.lock); cap->rq = execlists_active(&engine->execlists); - GEM_BUG_ON(!cap->rq); - - rcu_read_lock(); - cap->rq = active_request(cap->rq->context->timeline, cap->rq); - cap->rq = i915_request_get_rcu(cap->rq); - rcu_read_unlock(); + if (cap->rq) { + cap->rq = active_request(cap->rq->context->timeline, cap->rq); + cap->rq = i915_request_get_rcu(cap->rq); + } + spin_unlock_irq(&engine->active.lock); if (!cap->rq) goto err_free; @@ -2660,27 +2797,25 @@ err_free: return false; } -static noinline void preempt_reset(struct intel_engine_cs *engine) +static void execlists_reset(struct intel_engine_cs *engine, const char *msg) { const unsigned int bit = I915_RESET_ENGINE + engine->id; unsigned long *lock = &engine->gt->reset.flags; - if (i915_modparams.reset < 3) + if (!intel_has_reset_engine(engine->gt)) return; if (test_and_set_bit(bit, lock)) return; + ENGINE_TRACE(engine, "reset for %s\n", msg); + /* Mark this tasklet as disabled to avoid waiting for it to complete */ tasklet_disable_nosync(&engine->execlists.tasklet); - ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n", - READ_ONCE(engine->props.preempt_timeout_ms), - jiffies_to_msecs(jiffies - engine->execlists.preempt.expires)); - ring_set_paused(engine, 1); /* Freeze the current request in place */ if (execlists_capture(engine)) - intel_engine_reset(engine, "preemption time out"); + intel_engine_reset(engine, msg); else ring_set_paused(engine, 0); @@ -2711,6 +2846,13 @@ static void execlists_submission_tasklet(unsigned long data) bool timeout = preempt_timeout(engine); process_csb(engine); + + if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) { + engine->execlists.error_interrupt = 0; + if (ENGINE_READ(engine, RING_ESR)) /* confirm the error */ + execlists_reset(engine, "CS error"); + } + if (!READ_ONCE(engine->execlists.pending[0]) || timeout) { unsigned long flags; @@ -2719,8 +2861,8 @@ static void execlists_submission_tasklet(unsigned long data) spin_unlock_irqrestore(&engine->active.lock, flags); /* Recheck after serialising with direct-submission */ - if (timeout && preempt_timeout(engine)) - preempt_reset(engine); + if (unlikely(timeout && preempt_timeout(engine))) + execlists_reset(engine, "preemption time out"); } } @@ -2793,6 +2935,7 @@ static void execlists_submit_request(struct i915_request *request) spin_lock_irqsave(&engine->active.lock, flags); if (unlikely(ancestor_on_hold(engine, request))) { + RQ_TRACE(request, "ancestor on hold\n"); list_add_tail(&request->sched.link, &engine->active.hold); i915_request_set_hold(request); } else { @@ -2874,6 +3017,7 @@ __execlists_update_reg_state(const struct intel_context *ce, regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); regs[CTX_RING_HEAD] = head; regs[CTX_RING_TAIL] = ring->tail; + regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; /* RPCS */ if (engine->class == RENDER_CLASS) { @@ -2921,22 +3065,6 @@ static void execlists_context_reset(struct intel_context *ce) CE_TRACE(ce, "reset\n"); GEM_BUG_ON(!intel_context_is_pinned(ce)); - /* - * Because we emit WA_TAIL_DWORDS there may be a disparity - * between our bookkeeping in ce->ring->head and ce->ring->tail and - * that stored in context. As we only write new commands from - * ce->ring->tail onwards, everything before that is junk. If the GPU - * starts reading from its RING_HEAD from the context, it may try to - * execute that junk and die. - * - * The contexts that are stilled pinned on resume belong to the - * kernel, and are local to each engine. All other contexts will - * have their head/tail sanitized upon pinning before use, so they - * will never see garbage, - * - * So to avoid that we reset the context images upon resume. For - * simplicity, we just zero everything out. - */ intel_ring_reset(ce->ring, ce->ring->emit); /* Scrub away the garbage */ @@ -2964,7 +3092,8 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) { u32 *cs; - GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb); + if (!i915_request_timeline(rq)->has_initial_breadcrumb) + return 0; cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) @@ -3257,7 +3386,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) goto err; } - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + err = i915_ggtt_pin(vma, 0, PIN_HIGH); if (err) goto err; @@ -3346,6 +3475,49 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return ret; } +static void enable_error_interrupt(struct intel_engine_cs *engine) +{ + u32 status; + + engine->execlists.error_interrupt = 0; + ENGINE_WRITE(engine, RING_EMR, ~0u); + ENGINE_WRITE(engine, RING_EIR, ~0u); /* clear all existing errors */ + + status = ENGINE_READ(engine, RING_ESR); + if (unlikely(status)) { + dev_err(engine->i915->drm.dev, + "engine '%s' resumed still in error: %08x\n", + engine->name, status); + __intel_gt_reset(engine->gt, engine->mask); + } + + /* + * On current gen8+, we have 2 signals to play with + * + * - I915_ERROR_INSTUCTION (bit 0) + * + * Generate an error if the command parser encounters an invalid + * instruction + * + * This is a fatal error. + * + * - CP_PRIV (bit 2) + * + * Generate an error on privilege violation (where the CP replaces + * the instruction with a no-op). This also fires for writes into + * read-only scratch pages. + * + * This is a non-fatal error, parsing continues. + * + * * there are a few others defined for odd HW that we do not use + * + * Since CP_PRIV fires for cases where we have chosen to ignore the + * error (as the HW is validating and suppressing the mistakes), we + * only unmask the instruction error bit. + */ + ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION); +} + static void enable_execlists(struct intel_engine_cs *engine) { u32 mode; @@ -3367,6 +3539,8 @@ static void enable_execlists(struct intel_engine_cs *engine) i915_ggtt_offset(engine->status_page.vma)); ENGINE_POSTING_READ(engine, RING_HWS_PGA); + enable_error_interrupt(engine); + engine->context_tag = 0; } @@ -3384,9 +3558,6 @@ static bool unexpected_starting_state(struct intel_engine_cs *engine) static int execlists_resume(struct intel_engine_cs *engine) { - intel_engine_apply_workarounds(engine); - intel_engine_apply_whitelist(engine); - intel_mocs_init_engine(engine); intel_engine_reset_breadcrumbs(engine); @@ -3517,9 +3688,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) if (!rq) goto unwind; - /* We still have requests in-flight; the engine should be active */ - GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); - ce = rq->context; GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); @@ -3529,8 +3697,12 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) goto out_replay; } + /* We still have requests in-flight; the engine should be active */ + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); + /* Context has requests still in-flight; it should not be idle! */ GEM_BUG_ON(i915_active_is_idle(&ce->active)); + rq = active_request(ce->timeline, rq); head = intel_ring_wrap(ce->ring, rq->head); GEM_BUG_ON(head == ce->ring->tail); @@ -3604,7 +3776,10 @@ static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled) static void nop_submission_tasklet(unsigned long data) { + struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; + /* The driver is wedged; don't process any more events. */ + WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN); } static void execlists_reset_cancel(struct intel_engine_cs *engine) @@ -4273,6 +4448,7 @@ logical_ring_default_irqs(struct intel_engine_cs *engine) engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; + engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift; } static void rcs_submission_override(struct intel_engine_cs *engine) @@ -4514,8 +4690,13 @@ populate_lr_context(struct intel_context *ce, inhibit = false; } - /* The second page of the context object contains some fields which must - * be set up prior to the first execution. */ + /* Clear the ppHWSP (inc. per-context counters) */ + memset(vaddr, 0, PAGE_SIZE); + + /* + * The second page of the context object contains some registers which + * must be set up prior to the first execution. + */ execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE, ce, engine, ring, inhibit); @@ -4553,8 +4734,17 @@ static int __execlists_context_alloc(struct intel_context *ce, if (!ce->timeline) { struct intel_timeline *tl; + struct i915_vma *hwsp; - tl = intel_timeline_create(engine->gt, NULL); + /* + * Use the static global HWSP for the kernel context, and + * a dynamically allocated cacheline for everyone else. + */ + hwsp = NULL; + if (unlikely(intel_context_is_barrier(ce))) + hwsp = engine->status_page.vma; + + tl = intel_timeline_create(engine->gt, hwsp); if (IS_ERR(tl)) { ret = PTR_ERR(tl); goto error_deref_obj; @@ -4723,7 +4913,7 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) mask = rq->execution_mask; if (unlikely(!mask)) { /* Invalid selection, submit to a random engine in error */ - i915_request_skip(rq, -ENODEV); + i915_request_set_error_once(rq, -ENODEV); mask = ve->siblings[0]->mask; } @@ -4737,7 +4927,7 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) static void virtual_submission_tasklet(unsigned long data) { struct virtual_engine * const ve = (struct virtual_engine *)data; - const int prio = ve->base.execlists.queue_priority_hint; + const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint); intel_engine_mask_t mask; unsigned int n; @@ -5133,11 +5323,15 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, show_request(m, last, "\t\tE "); } - last = NULL; - count = 0; + if (execlists->switch_priority_hint != INT_MIN) + drm_printf(m, "\t\tSwitch priority hint: %d\n", + READ_ONCE(execlists->switch_priority_hint)); if (execlists->queue_priority_hint != INT_MIN) drm_printf(m, "\t\tQueue priority hint: %d\n", - execlists->queue_priority_hint); + READ_ONCE(execlists->queue_priority_hint)); + + last = NULL; + count = 0; for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); int i; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h index 08a3be65f700..d39b72590e40 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -17,6 +17,7 @@ #define CTX_RING_CTL (0x0a + 1) #define CTX_BB_STATE (0x10 + 1) #define CTX_BB_PER_CTX_PTR (0x18 + 1) +#define CTX_TIMESTAMP (0x22 + 1) #define CTX_PDP3_UDW (0x24 + 1) #define CTX_PDP3_LDW (0x26 + 1) #define CTX_PDP2_UDW (0x28 + 1) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index eeef90b55c64..632e08a4592b 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -280,9 +280,32 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = { GEN11_MOCS_ENTRIES }; -static bool get_mocs_settings(const struct drm_i915_private *i915, - struct drm_i915_mocs_table *table) +enum { + HAS_GLOBAL_MOCS = BIT(0), + HAS_ENGINE_MOCS = BIT(1), + HAS_RENDER_L3CC = BIT(2), +}; + +static bool has_l3cc(const struct drm_i915_private *i915) { + return true; +} + +static bool has_global_mocs(const struct drm_i915_private *i915) +{ + return HAS_GLOBAL_MOCS_REGISTERS(i915); +} + +static bool has_mocs(const struct drm_i915_private *i915) +{ + return !IS_DGFX(i915); +} + +static unsigned int get_mocs_settings(const struct drm_i915_private *i915, + struct drm_i915_mocs_table *table) +{ + unsigned int flags; + if (INTEL_GEN(i915) >= 12) { table->size = ARRAY_SIZE(tgl_mocs_table); table->table = tgl_mocs_table; @@ -300,13 +323,13 @@ static bool get_mocs_settings(const struct drm_i915_private *i915, table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->table = broxton_mocs_table; } else { - WARN_ONCE(INTEL_GEN(i915) >= 9, - "Platform that should have a MOCS table does not.\n"); - return false; + drm_WARN_ONCE(&i915->drm, INTEL_GEN(i915) >= 9, + "Platform that should have a MOCS table does not.\n"); + return 0; } if (GEM_DEBUG_WARN_ON(table->size > table->n_entries)) - return false; + return 0; /* WaDisableSkipCaching:skl,bxt,kbl,glk */ if (IS_GEN(i915, 9)) { @@ -315,10 +338,20 @@ static bool get_mocs_settings(const struct drm_i915_private *i915, for (i = 0; i < table->size; i++) if (GEM_DEBUG_WARN_ON(table->table[i].l3cc_value & (L3_ESC(1) | L3_SCC(0x7)))) - return false; + return 0; } - return true; + flags = 0; + if (has_mocs(i915)) { + if (has_global_mocs(i915)) + flags |= HAS_GLOBAL_MOCS; + else + flags |= HAS_ENGINE_MOCS; + } + if (has_l3cc(i915)) + flags |= HAS_RENDER_L3CC; + + return flags; } /* @@ -411,18 +444,20 @@ static void init_l3cc_table(struct intel_engine_cs *engine, void intel_mocs_init_engine(struct intel_engine_cs *engine) { struct drm_i915_mocs_table table; + unsigned int flags; /* Called under a blanket forcewake */ assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); - if (!get_mocs_settings(engine->i915, &table)) + flags = get_mocs_settings(engine->i915, &table); + if (!flags) return; /* Platforms with global MOCS do not need per-engine initialization. */ - if (!HAS_GLOBAL_MOCS_REGISTERS(engine->i915)) + if (flags & HAS_ENGINE_MOCS) init_mocs_table(engine, &table); - if (engine->class == RENDER_CLASS) + if (flags & HAS_RENDER_L3CC && engine->class == RENDER_CLASS) init_l3cc_table(engine, &table); } @@ -431,26 +466,17 @@ static u32 global_mocs_offset(void) return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0)); } -static void init_global_mocs(struct intel_gt *gt) +void intel_mocs_init(struct intel_gt *gt) { struct drm_i915_mocs_table table; + unsigned int flags; /* * LLC and eDRAM control values are not applicable to dgfx */ - if (IS_DGFX(gt->i915)) - return; - - if (!get_mocs_settings(gt->i915, &table)) - return; - - __init_mocs_table(gt->uncore, &table, global_mocs_offset()); -} - -void intel_mocs_init(struct intel_gt *gt) -{ - if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915)) - init_global_mocs(gt); + flags = get_mocs_settings(gt->i915, &table); + if (flags & HAS_GLOBAL_MOCS) + __init_mocs_table(gt->uncore, &table, global_mocs_offset()); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 9e303c29d6e3..3847ee44b181 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -7,6 +7,7 @@ #include <linux/pm_runtime.h> #include "i915_drv.h" +#include "i915_vgpu.h" #include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_rc6.h" @@ -226,10 +227,7 @@ static void gen6_rc6_enable(struct intel_rc6 *rc6) set(uncore, GEN6_RC_SLEEP, 0); set(uncore, GEN6_RC1e_THRESHOLD, 1000); - if (IS_IVYBRIDGE(i915)) - set(uncore, GEN6_RC6_THRESHOLD, 125000); - else - set(uncore, GEN6_RC6_THRESHOLD, 50000); + set(uncore, GEN6_RC6_THRESHOLD, 50000); set(uncore, GEN6_RC6p_THRESHOLD, 150000); set(uncore, GEN6_RC6pp_THRESHOLD, 64000); /* unused */ @@ -299,7 +297,6 @@ static int vlv_rc6_init(struct intel_rc6 *rc6) pcbr_offset = (pcbr & ~4095) - i915->dsm.start; pctx = i915_gem_object_create_stolen_for_preallocated(i915, pcbr_offset, - I915_GTT_OFFSET_NONE, pctx_size); if (IS_ERR(pctx)) return PTR_ERR(pctx); @@ -323,10 +320,10 @@ static int vlv_rc6_init(struct intel_rc6 *rc6) return PTR_ERR(pctx); } - GEM_BUG_ON(range_overflows_t(u64, - i915->dsm.start, - pctx->stolen->start, - U32_MAX)); + GEM_BUG_ON(range_overflows_end_t(u64, + i915->dsm.start, + pctx->stolen->start, + U32_MAX)); pctx_paddr = i915->dsm.start + pctx->stolen->start; intel_uncore_write(uncore, VLV_PCBR, pctx_paddr); @@ -542,6 +539,8 @@ void intel_rc6_init(struct intel_rc6 *rc6) void intel_rc6_sanitize(struct intel_rc6 *rc6) { + memset(rc6->prev_hw_residency, 0, sizeof(rc6->prev_hw_residency)); + if (rc6->enabled) { /* unbalanced suspend/resume */ rpm_get(rc6); rc6->enabled = false; @@ -604,6 +603,7 @@ void intel_rc6_unpark(struct intel_rc6 *rc6) void intel_rc6_park(struct intel_rc6 *rc6) { struct intel_uncore *uncore = rc6_to_uncore(rc6); + unsigned int target; if (!rc6->enabled) return; @@ -618,7 +618,14 @@ void intel_rc6_park(struct intel_rc6 *rc6) /* Turn off the HW timers and go directly to rc6 */ set(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE); - set(uncore, GEN6_RC_STATE, 0x4 << RC_SW_TARGET_STATE_SHIFT); + + if (HAS_RC6pp(rc6_to_i915(rc6))) + target = 0x6; /* deepest rc6 */ + else if (HAS_RC6p(rc6_to_i915(rc6))) + target = 0x5; /* deep rc6 */ + else + target = 0x4; /* normal rc6 */ + set(uncore, GEN6_RC_STATE, target << RC_SW_TARGET_STATE_SHIFT); } void intel_rc6_disable(struct intel_rc6 *rc6) @@ -713,7 +720,7 @@ u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, const i915_reg_t reg) */ i = (i915_mmio_reg_offset(reg) - i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32); - if (WARN_ON_ONCE(i >= ARRAY_SIZE(rc6->cur_residency))) + if (drm_WARN_ON_ONCE(&i915->drm, i >= ARRAY_SIZE(rc6->cur_residency))) return 0; fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ); diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index beee0cf89bce..80db3c9d785e 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -48,8 +48,10 @@ static void engine_skip_context(struct i915_request *rq) lockdep_assert_held(&engine->active.lock); list_for_each_entry_continue(rq, &engine->active.requests, sched.link) - if (rq->context == hung_ctx) - i915_request_skip(rq, -EIO); + if (rq->context == hung_ctx) { + i915_request_set_error_once(rq, -EIO); + __i915_request_skip(rq); + } } static void client_mark_guilty(struct i915_gem_context *ctx, bool banned) @@ -72,9 +74,10 @@ static void client_mark_guilty(struct i915_gem_context *ctx, bool banned) if (score) { atomic_add(score, &file_priv->ban_score); - DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n", - ctx->name, score, - atomic_read(&file_priv->ban_score)); + drm_dbg(&ctx->i915->drm, + "client %s: gained %u ban score, now %u\n", + ctx->name, score, + atomic_read(&file_priv->ban_score)); } } @@ -85,19 +88,18 @@ static bool mark_guilty(struct i915_request *rq) bool banned; int i; + if (intel_context_is_closed(rq->context)) { + intel_context_set_banned(rq->context); + return true; + } + rcu_read_lock(); ctx = rcu_dereference(rq->context->gem_context); if (ctx && !kref_get_unless_zero(&ctx->ref)) ctx = NULL; rcu_read_unlock(); if (!ctx) - return false; - - if (i915_gem_context_is_closed(ctx)) { - intel_context_set_banned(rq->context); - banned = true; - goto out; - } + return intel_context_is_banned(rq->context); atomic_inc(&ctx->guilty_count); @@ -122,8 +124,8 @@ static bool mark_guilty(struct i915_request *rq) if (time_before(jiffies, prev_hang + CONTEXT_FAST_HANG_JIFFIES)) banned = true; if (banned) { - DRM_DEBUG_DRIVER("context %s: guilty %d, banned\n", - ctx->name, atomic_read(&ctx->guilty_count)); + drm_dbg(&ctx->i915->drm, "context %s: guilty %d, banned\n", + ctx->name, atomic_read(&ctx->guilty_count)); intel_context_set_banned(rq->context); } @@ -153,11 +155,12 @@ void __i915_request_reset(struct i915_request *rq, bool guilty) rcu_read_lock(); /* protect the GEM context */ if (guilty) { - i915_request_skip(rq, -EIO); + i915_request_set_error_once(rq, -EIO); + __i915_request_skip(rq); if (mark_guilty(rq)) engine_skip_context(rq); } else { - dma_fence_set_error(&rq->fence, -EAGAIN); + i915_request_set_error_once(rq, -EAGAIN); mark_innocent(rq); } rcu_read_unlock(); @@ -226,7 +229,7 @@ static int g4x_do_reset(struct intel_gt *gt, GRDOM_MEDIA | GRDOM_RESET_ENABLE); ret = wait_for_atomic(g4x_reset_complete(pdev), 50); if (ret) { - DRM_DEBUG_DRIVER("Wait for media reset failed\n"); + drm_dbg(>->i915->drm, "Wait for media reset failed\n"); goto out; } @@ -234,7 +237,7 @@ static int g4x_do_reset(struct intel_gt *gt, GRDOM_RENDER | GRDOM_RESET_ENABLE); ret = wait_for_atomic(g4x_reset_complete(pdev), 50); if (ret) { - DRM_DEBUG_DRIVER("Wait for render reset failed\n"); + drm_dbg(>->i915->drm, "Wait for render reset failed\n"); goto out; } @@ -260,7 +263,7 @@ static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask, 5000, 0, NULL); if (ret) { - DRM_DEBUG_DRIVER("Wait for render reset failed\n"); + drm_dbg(>->i915->drm, "Wait for render reset failed\n"); goto out; } @@ -271,7 +274,7 @@ static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask, 5000, 0, NULL); if (ret) { - DRM_DEBUG_DRIVER("Wait for media reset failed\n"); + drm_dbg(>->i915->drm, "Wait for media reset failed\n"); goto out; } @@ -300,8 +303,9 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) 500, 0, NULL); if (err) - DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n", - hw_domain_mask); + drm_dbg(>->i915->drm, + "Wait for 0x%08x engines reset failed\n", + hw_domain_mask); return err; } @@ -401,7 +405,8 @@ static int gen11_lock_sfc(struct intel_engine_cs *engine, u32 *hw_mask) return 0; if (ret) { - DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n"); + drm_dbg(&engine->i915->drm, + "Wait for SFC forced lock ack failed\n"); return ret; } @@ -515,9 +520,10 @@ static int gen8_engine_reset_prepare(struct intel_engine_cs *engine) ret = __intel_wait_for_register_fw(uncore, reg, mask, ack, 700, 0, NULL); if (ret) - DRM_ERROR("%s reset request timed out: {request: %08x, RESET_CTL: %08x}\n", - engine->name, request, - intel_uncore_read_fw(uncore, reg)); + drm_err(&engine->i915->drm, + "%s reset request timed out: {request: %08x, RESET_CTL: %08x}\n", + engine->name, request, + intel_uncore_read_fw(uncore, reg)); return ret; } @@ -781,7 +787,7 @@ static void nop_submit_request(struct i915_request *request) unsigned long flags; RQ_TRACE(request, "-EIO\n"); - dma_fence_set_error(&request->fence, -EIO); + i915_request_set_error_once(request, -EIO); spin_lock_irqsave(&engine->active.lock, flags); __i915_request_submit(request); @@ -800,13 +806,6 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) if (test_bit(I915_WEDGED, >->reset.flags)) return; - if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(gt)) { - struct drm_printer p = drm_debug_printer(__func__); - - for_each_engine(engine, gt, id) - intel_engine_dump(engine, &p, "%s\n", engine->name); - } - GT_TRACE(gt, "start\n"); /* @@ -845,10 +844,30 @@ void intel_gt_set_wedged(struct intel_gt *gt) { intel_wakeref_t wakeref; + if (test_bit(I915_WEDGED, >->reset.flags)) + return; + + wakeref = intel_runtime_pm_get(gt->uncore->rpm); mutex_lock(>->reset.mutex); - with_intel_runtime_pm(gt->uncore->rpm, wakeref) - __intel_gt_set_wedged(gt); + + if (GEM_SHOW_DEBUG()) { + struct drm_printer p = drm_debug_printer(__func__); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + drm_printf(&p, "called from %pS\n", (void *)_RET_IP_); + for_each_engine(engine, gt, id) { + if (intel_engine_is_idle(engine)) + continue; + + intel_engine_dump(engine, &p, "%s\n", engine->name); + } + } + + __intel_gt_set_wedged(gt); + mutex_unlock(>->reset.mutex); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); } static bool __intel_gt_unset_wedged(struct intel_gt *gt) @@ -969,7 +988,7 @@ static int resume(struct intel_gt *gt) int ret; for_each_engine(engine, gt, id) { - ret = engine->resume(engine); + ret = intel_engine_resume(engine); if (ret) return ret; } @@ -1022,7 +1041,7 @@ void intel_gt_reset(struct intel_gt *gt, if (i915_modparams.reset) dev_err(gt->i915->drm.dev, "GPU reset not supported\n"); else - DRM_DEBUG_DRIVER("GPU reset disabled\n"); + drm_dbg(>->i915->drm, "GPU reset disabled\n"); goto error; } @@ -1049,8 +1068,9 @@ void intel_gt_reset(struct intel_gt *gt, */ ret = intel_gt_init_hw(gt); if (ret) { - DRM_ERROR("Failed to initialise HW following reset (%d)\n", - ret); + drm_err(>->i915->drm, + "Failed to initialise HW following reset (%d)\n", + ret); goto taint; } @@ -1126,9 +1146,8 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine); if (ret) { /* If we fail here, we expect to fallback to a global reset */ - DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n", - uses_guc ? "GuC " : "", - engine->name, ret); + drm_dbg(>->i915->drm, "%sFailed to reset %s, ret=%d\n", + uses_guc ? "GuC " : "", engine->name, ret); goto out; } @@ -1144,7 +1163,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) * have been reset to their default values. Follow the init_ring * process to program RING_MODE, HWSP and re-enable submission. */ - ret = engine->resume(engine); + ret = intel_engine_resume(engine); out: intel_engine_cancel_stop_cs(engine); @@ -1165,7 +1184,7 @@ static void intel_gt_reset_global(struct intel_gt *gt, kobject_uevent_env(kobj, KOBJ_CHANGE, error_event); - DRM_DEBUG_DRIVER("resetting chip\n"); + drm_dbg(>->i915->drm, "resetting chip, engines=%x\n", engine_mask); kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event); /* Use a watchdog to ensure that our reset completes */ diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index 6ff803f397c4..8cda1b7e17ba 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -31,17 +31,15 @@ int intel_ring_pin(struct intel_ring *ring) if (atomic_fetch_inc(&ring->pin_count)) return 0; - flags = PIN_GLOBAL; - /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ - flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); + flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); if (vma->obj->stolen) flags |= PIN_MAPPABLE; else flags |= PIN_HIGH; - ret = i915_vma_pin(vma, 0, 0, flags); + ret = i915_ggtt_pin(vma, 0, flags); if (unlikely(ret)) goto err_unpin; diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index bc44fe8e5ffa..fdc3f10e12aa 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -29,11 +29,10 @@ #include <linux/log2.h> -#include <drm/i915_drm.h> - #include "gem/i915_gem_context.h" #include "gen6_ppgtt.h" +#include "gen7_renderclear.h" #include "i915_drv.h" #include "i915_trace.h" #include "intel_context.h" @@ -568,7 +567,8 @@ static void flush_cs_tlb(struct intel_engine_cs *engine) return; /* ring should be idle before issuing a sync flush*/ - WARN_ON((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); + drm_WARN_ON(&dev_priv->drm, + (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); ENGINE_WRITE(engine, RING_INSTPM, _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | @@ -626,6 +626,27 @@ static bool stop_ring(struct intel_engine_cs *engine) return (ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) == 0; } +static struct i915_address_space *vm_alias(struct i915_address_space *vm) +{ + if (i915_is_ggtt(vm)) + vm = &i915_vm_to_ggtt(vm)->alias->vm; + + return vm; +} + +static void set_pp_dir(struct intel_engine_cs *engine) +{ + struct i915_address_space *vm = vm_alias(engine->gt->vm); + + if (vm) { + struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + + ENGINE_WRITE(engine, RING_PP_DIR_DCLV, PP_DIR_DCLV_2G); + ENGINE_WRITE(engine, RING_PP_DIR_BASE, + px_base(ppgtt->pd)->ggtt_offset << 10); + } +} + static int xcs_resume(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -684,6 +705,8 @@ static int xcs_resume(struct intel_engine_cs *engine) GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); intel_ring_update_space(ring); + set_pp_dir(engine); + /* First wake the ring up to an empty/idle ring */ ENGINE_WRITE(engine, RING_HEAD, ring->head); ENGINE_WRITE(engine, RING_TAIL, ring->head); @@ -857,43 +880,6 @@ static int rcs_resume(struct intel_engine_cs *engine) intel_uncore_write(uncore, ECOSKPD, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE)); - /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ - if (IS_GEN_RANGE(i915, 4, 6)) - intel_uncore_write(uncore, MI_MODE, - _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); - - /* We need to disable the AsyncFlip performance optimisations in order - * to use MI_WAIT_FOR_EVENT within the CS. It should already be - * programmed to '1' on all products. - * - * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv - */ - if (IS_GEN_RANGE(i915, 6, 7)) - intel_uncore_write(uncore, MI_MODE, - _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); - - /* Required for the hardware to program scanline values for waiting */ - /* WaEnableFlushTlbInvalidationMode:snb */ - if (IS_GEN(i915, 6)) - intel_uncore_write(uncore, GFX_MODE, - _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT)); - - /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */ - if (IS_GEN(i915, 7)) - intel_uncore_write(uncore, GFX_MODE_GEN7, - _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) | - _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); - - if (IS_GEN(i915, 6)) { - /* From the Sandybridge PRM, volume 1 part 3, page 24: - * "If this bit is set, STCunit will have LRA as replacement - * policy. [...] This bit must be reset. LRA replacement - * policy is not supported." - */ - intel_uncore_write(uncore, CACHE_MODE_0, - _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); - } - if (IS_GEN_RANGE(i915, 6, 7)) intel_uncore_write(uncore, INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); @@ -910,9 +896,7 @@ static void reset_cancel(struct intel_engine_cs *engine) /* Mark all submitted requests as skipped. */ list_for_each_entry(request, &engine->active.requests, sched.link) { - if (!i915_request_signaled(request)) - dma_fence_set_error(&request->fence, -EIO); - + i915_request_set_error_once(request, -EIO); i915_request_mark_complete(request); } @@ -1197,23 +1181,12 @@ static void ring_context_destroy(struct kref *ref) intel_context_free(ce); } -static struct i915_address_space *vm_alias(struct intel_context *ce) -{ - struct i915_address_space *vm; - - vm = ce->vm; - if (i915_is_ggtt(vm)) - vm = &i915_vm_to_ggtt(vm)->alias->vm; - - return vm; -} - static int __context_pin_ppgtt(struct intel_context *ce) { struct i915_address_space *vm; int err = 0; - vm = vm_alias(ce); + vm = vm_alias(ce->vm); if (vm) err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm))); @@ -1224,7 +1197,7 @@ static void __context_unpin_ppgtt(struct intel_context *ce) { struct i915_address_space *vm; - vm = vm_alias(ce); + vm = vm_alias(ce->vm); if (vm) gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm)); } @@ -1384,7 +1357,9 @@ static int load_pd_dir(struct i915_request *rq, return rq->engine->emit_flush(rq, EMIT_FLUSH); } -static inline int mi_set_context(struct i915_request *rq, u32 flags) +static inline int mi_set_context(struct i915_request *rq, + struct intel_context *ce, + u32 flags) { struct drm_i915_private *i915 = rq->i915; struct intel_engine_cs *engine = rq->engine; @@ -1459,7 +1434,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) *cs++ = MI_NOOP; *cs++ = MI_SET_CONTEXT; - *cs++ = i915_ggtt_offset(rq->context->state) | flags; + *cs++ = i915_ggtt_offset(ce->state) | flags; /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv @@ -1574,21 +1549,64 @@ static int switch_mm(struct i915_request *rq, struct i915_address_space *vm) return rq->engine->emit_flush(rq, EMIT_INVALIDATE); } +static int clear_residuals(struct i915_request *rq) +{ + struct intel_engine_cs *engine = rq->engine; + int ret; + + ret = switch_mm(rq, vm_alias(engine->kernel_context->vm)); + if (ret) + return ret; + + if (engine->kernel_context->state) { + ret = mi_set_context(rq, + engine->kernel_context, + MI_MM_SPACE_GTT | MI_RESTORE_INHIBIT); + if (ret) + return ret; + } + + ret = engine->emit_bb_start(rq, + engine->wa_ctx.vma->node.start, 0, + 0); + if (ret) + return ret; + + ret = engine->emit_flush(rq, EMIT_FLUSH); + if (ret) + return ret; + + /* Always invalidate before the next switch_mm() */ + return engine->emit_flush(rq, EMIT_INVALIDATE); +} + static int switch_context(struct i915_request *rq) { + struct intel_engine_cs *engine = rq->engine; struct intel_context *ce = rq->context; + void **residuals = NULL; int ret; GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); - ret = switch_mm(rq, vm_alias(ce)); + if (engine->wa_ctx.vma && ce != engine->kernel_context) { + if (engine->wa_ctx.vma->private != ce) { + ret = clear_residuals(rq); + if (ret) + return ret; + + residuals = &engine->wa_ctx.vma->private; + } + } + + ret = switch_mm(rq, vm_alias(ce->vm)); if (ret) return ret; if (ce->state) { u32 flags; - GEM_BUG_ON(rq->engine->id != RCS0); + GEM_BUG_ON(engine->id != RCS0); /* For resource streamer on HSW+ and power context elsewhere */ BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN); @@ -1600,7 +1618,7 @@ static int switch_context(struct i915_request *rq) else flags |= MI_RESTORE_INHIBIT; - ret = mi_set_context(rq, flags); + ret = mi_set_context(rq, ce, flags); if (ret) return ret; } @@ -1609,6 +1627,20 @@ static int switch_context(struct i915_request *rq) if (ret) return ret; + /* + * Now past the point of no return, this request _will_ be emitted. + * + * Or at least this preamble will be emitted, the request may be + * interrupted prior to submitting the user payload. If so, we + * still submit the "empty" request in order to preserve global + * state tracking such as this, our tracking of the current + * dirty context. + */ + if (residuals) { + intel_context_put(*residuals); + *residuals = intel_context_get(ce); + } + return 0; } @@ -1662,7 +1694,8 @@ static void gen6_bsd_submit_request(struct i915_request *request) GEN6_BSD_SLEEP_INDICATOR, 0, 1000, 0, NULL)) - DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); + drm_err(&uncore->i915->drm, + "timed out waiting for the BSD ring to wake up\n"); /* Now that the ring is fully powered up, update the tail */ i9xx_submit_request(request); @@ -1787,11 +1820,16 @@ static void ring_release(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - WARN_ON(INTEL_GEN(dev_priv) > 2 && - (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); + drm_WARN_ON(&dev_priv->drm, INTEL_GEN(dev_priv) > 2 && + (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); intel_engine_cleanup_common(engine); + if (engine->wa_ctx.vma) { + intel_context_put(engine->wa_ctx.vma->private); + i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0); + } + intel_ring_unpin(engine->legacy.ring); intel_ring_put(engine->legacy.ring); @@ -1939,6 +1977,64 @@ static void setup_vecs(struct intel_engine_cs *engine) engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; } +static int gen7_ctx_switch_bb_setup(struct intel_engine_cs * const engine, + struct i915_vma * const vma) +{ + return gen7_setup_clear_gpr_bb(engine, vma); +} + +static int gen7_ctx_switch_bb_init(struct intel_engine_cs *engine) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int size; + int err; + + size = gen7_ctx_switch_bb_setup(engine, NULL /* probe size */); + if (size <= 0) + return size; + + size = ALIGN(size, PAGE_SIZE); + obj = i915_gem_object_create_internal(engine->i915, size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, engine->gt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + vma->private = intel_context_create(engine); /* dummy residuals */ + if (IS_ERR(vma->private)) { + err = PTR_ERR(vma->private); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH); + if (err) + goto err_private; + + err = i915_vma_sync(vma); + if (err) + goto err_unpin; + + err = gen7_ctx_switch_bb_setup(engine, vma); + if (err) + goto err_unpin; + + engine->wa_ctx.vma = vma; + return 0; + +err_unpin: + i915_vma_unpin(vma); +err_private: + intel_context_put(vma->private); +err_obj: + i915_gem_object_put(obj); + return err; +} + int intel_ring_submission_setup(struct intel_engine_cs *engine) { struct intel_timeline *timeline; @@ -1992,11 +2088,19 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine) GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma); + if (IS_HASWELL(engine->i915) && engine->class == RENDER_CLASS) { + err = gen7_ctx_switch_bb_init(engine); + if (err) + goto err_ring_unpin; + } + /* Finally, take ownership and responsibility for cleanup! */ engine->release = ring_release; return 0; +err_ring_unpin: + intel_ring_unpin(ring); err_ring: intel_ring_put(ring); err_timeline_unpin: @@ -2007,3 +2111,7 @@ err: intel_engine_cleanup_common(engine); return err; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_ring_submission.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index d2a3d935d186..cfaf141bac4d 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -4,6 +4,8 @@ * Copyright © 2019 Intel Corporation */ +#include <drm/i915_drm.h> + #include "i915_drv.h" #include "intel_gt.h" #include "intel_gt_irq.h" @@ -55,7 +57,7 @@ static u32 rps_pm_mask(struct intel_rps *rps, u8 val) if (val < rps->max_freq_softlimit) mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; - mask &= rps->pm_events; + mask &= READ_ONCE(rps->pm_events); return rps_pm_sanitize_mask(rps, ~mask); } @@ -68,17 +70,19 @@ static void rps_reset_ei(struct intel_rps *rps) static void rps_enable_interrupts(struct intel_rps *rps) { struct intel_gt *gt = rps_to_gt(rps); + u32 events; rps_reset_ei(rps); if (IS_VALLEYVIEW(gt->i915)) /* WaGsvRC0ResidencyMethod:vlv */ - rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED; + events = GEN6_PM_RP_UP_EI_EXPIRED; else - rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | - GEN6_PM_RP_DOWN_THRESHOLD | - GEN6_PM_RP_DOWN_TIMEOUT); + events = (GEN6_PM_RP_UP_THRESHOLD | + GEN6_PM_RP_DOWN_THRESHOLD | + GEN6_PM_RP_DOWN_TIMEOUT); + WRITE_ONCE(rps->pm_events, events); spin_lock_irq(>->irq_lock); gen6_gt_pm_enable_irq(gt, rps->pm_events); spin_unlock_irq(>->irq_lock); @@ -115,8 +119,7 @@ static void rps_disable_interrupts(struct intel_rps *rps) { struct intel_gt *gt = rps_to_gt(rps); - rps->pm_events = 0; - + WRITE_ONCE(rps->pm_events, 0); set(gt->uncore, GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u)); spin_lock_irq(>->irq_lock); @@ -642,7 +645,7 @@ void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive) { mutex_lock(&rps->power.mutex); if (interactive) { - if (!rps->power.interactive++ && rps->active) + if (!rps->power.interactive++ && READ_ONCE(rps->active)) rps_set_power(rps, HIGH_POWER); } else { GEM_BUG_ON(!rps->power.interactive); @@ -719,11 +722,15 @@ void intel_rps_unpark(struct intel_rps *rps) * performance, jump directly to RPe as our starting frequency. */ mutex_lock(&rps->lock); - rps->active = true; + + WRITE_ONCE(rps->active, true); + freq = max(rps->cur_freq, rps->efficient_freq), freq = clamp(freq, rps->min_freq_softlimit, rps->max_freq_softlimit); intel_rps_set(rps, freq); + rps->last_adj = 0; + mutex_unlock(&rps->lock); if (INTEL_GEN(rps_to_i915(rps)) >= 6) @@ -743,7 +750,7 @@ void intel_rps_park(struct intel_rps *rps) if (INTEL_GEN(i915) >= 6) rps_disable_interrupts(rps); - rps->active = false; + WRITE_ONCE(rps->active, false); if (rps->last_freq <= rps->idle_freq) return; @@ -763,14 +770,27 @@ void intel_rps_park(struct intel_rps *rps) intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA); rps_set(rps, rps->idle_freq, false); intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA); + + /* + * Since we will try and restart from the previously requested + * frequency on unparking, treat this idle point as a downclock + * interrupt and reduce the frequency for resume. If we park/unpark + * more frequently than the rps worker can run, we will not respond + * to any EI and never see a change in frequency. + * + * (Note we accommodate Cherryview's limitation of only using an + * even bin by applying it to all.) + */ + rps->cur_freq = + max_t(int, round_down(rps->cur_freq - 1, 2), rps->min_freq); } void intel_rps_boost(struct i915_request *rq) { - struct intel_rps *rps = &rq->engine->gt->rps; + struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps; unsigned long flags; - if (i915_request_signaled(rq) || !rps->active) + if (i915_request_signaled(rq) || !READ_ONCE(rps->active)) return; /* Serializes with i915_request_retire() */ @@ -1026,7 +1046,8 @@ static bool chv_rps_enable(struct intel_rps *rps) vlv_punit_put(i915); /* RPS code assumes GPLL is used */ - WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); + drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0, + "GPLL not enabled\n"); DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); @@ -1123,7 +1144,8 @@ static bool vlv_rps_enable(struct intel_rps *rps) vlv_punit_put(i915); /* RPS code assumes GPLL is used */ - WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); + drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0, + "GPLL not enabled\n"); DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); @@ -1191,11 +1213,11 @@ void intel_rps_enable(struct intel_rps *rps) if (!rps->enabled) return; - WARN_ON(rps->max_freq < rps->min_freq); - WARN_ON(rps->idle_freq > rps->max_freq); + drm_WARN_ON(&i915->drm, rps->max_freq < rps->min_freq); + drm_WARN_ON(&i915->drm, rps->idle_freq > rps->max_freq); - WARN_ON(rps->efficient_freq < rps->min_freq); - WARN_ON(rps->efficient_freq > rps->max_freq); + drm_WARN_ON(&i915->drm, rps->efficient_freq < rps->min_freq); + drm_WARN_ON(&i915->drm, rps->efficient_freq > rps->max_freq); } static void gen6_rps_disable(struct intel_rps *rps) @@ -1390,9 +1412,9 @@ static void chv_rps_init(struct intel_rps *rps) BIT(VLV_IOSF_SB_NC) | BIT(VLV_IOSF_SB_CCK)); - WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq | - rps->min_freq) & 1, - "Odd GPU freq values\n"); + drm_WARN_ONCE(&i915->drm, (rps->max_freq | rps->efficient_freq | + rps->rp1_freq | rps->min_freq) & 1, + "Odd GPU freq values\n"); } static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei) @@ -1451,12 +1473,12 @@ static void rps_work(struct work_struct *work) u32 pm_iir = 0; spin_lock_irq(>->irq_lock); - pm_iir = fetch_and_zero(&rps->pm_iir); + pm_iir = fetch_and_zero(&rps->pm_iir) & READ_ONCE(rps->pm_events); client_boost = atomic_read(&rps->num_waiters); spin_unlock_irq(>->irq_lock); /* Make sure we didn't queue anything we're not going to process. */ - if ((pm_iir & rps->pm_events) == 0 && !client_boost) + if (!pm_iir && !client_boost) goto out; mutex_lock(&rps->lock); @@ -1552,11 +1574,15 @@ void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) { struct intel_gt *gt = rps_to_gt(rps); + u32 events; - if (pm_iir & rps->pm_events) { + events = pm_iir & READ_ONCE(rps->pm_events); + if (events) { spin_lock(>->irq_lock); - gen6_gt_pm_mask_irq(gt, pm_iir & rps->pm_events); - rps->pm_iir |= pm_iir & rps->pm_events; + + gen6_gt_pm_mask_irq(gt, events); + rps->pm_iir |= events; + schedule_work(&rps->work); spin_unlock(>->irq_lock); } diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index d8d9f1179c2b..91debbc97c9a 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -312,7 +312,7 @@ int intel_timeline_pin(struct intel_timeline *tl) if (atomic_add_unless(&tl->pin_count, 1, 0)) return 0; - err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH); + err = i915_ggtt_pin(tl->hwsp_ggtt, 0, PIN_HIGH); if (err) return err; @@ -410,6 +410,8 @@ __intel_timeline_get_seqno(struct intel_timeline *tl, void *vaddr; int err; + might_lock(&tl->gt->ggtt->vm.mutex); + /* * If there is an outstanding GPU reference to this cacheline, * such as it being sampled by a HW semaphore on another timeline, @@ -435,7 +437,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl, goto err_rollback; } - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + err = i915_ggtt_pin(vma, 0, PIN_HIGH); if (err) { __idle_hwsp_free(vma->private, cacheline); goto err_rollback; diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 6c2f8462e0f3..5176ad1a3976 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -116,17 +116,17 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) } else { wa_ = &wal->list[mid]; - if ((wa->mask & ~wa_->mask) == 0) { - DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n", + if ((wa->clr | wa_->clr) && !(wa->clr & ~wa_->clr)) { + DRM_ERROR("Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n", i915_mmio_reg_offset(wa_->reg), - wa_->mask, wa_->val); + wa_->clr, wa_->set); - wa_->val &= ~wa->mask; + wa_->set &= ~wa->clr; } wal->wa_count++; - wa_->val |= wa->val; - wa_->mask |= wa->mask; + wa_->set |= wa->set; + wa_->clr |= wa->clr; wa_->read |= wa->read; return; } @@ -147,13 +147,13 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) } } -static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, - u32 val, u32 read_mask) +static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, + u32 clear, u32 set, u32 read_mask) { struct i915_wa wa = { .reg = reg, - .mask = mask, - .val = val, + .clr = clear, + .set = set, .read = read_mask, }; @@ -161,38 +161,43 @@ static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, } static void -wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, - u32 val) +wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set) { - wa_add(wal, reg, mask, val, mask); + wa_add(wal, reg, clear, set, clear); } static void -wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) +wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set) { - wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val)); + wa_write_masked_or(wal, reg, ~0, set); } static void -wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val) +wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set) { - wa_write_masked_or(wal, reg, ~0, val); + wa_write_masked_or(wal, reg, set, set); } static void -wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val) +wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) { - wa_write_masked_or(wal, reg, val, val); + wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val); +} + +static void +wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val) +{ + wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val); } #define WA_SET_BIT_MASKED(addr, mask) \ - wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask)) + wa_masked_en(wal, (addr), (mask)) #define WA_CLR_BIT_MASKED(addr, mask) \ - wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask)) + wa_masked_dis(wal, (addr), (mask)) #define WA_SET_FIELD_MASKED(addr, mask, value) \ - wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value))) + wa_write_masked_or(wal, (addr), 0, _MASKED_FIELD((mask), (value))) static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) @@ -570,12 +575,29 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, /* allow headerless messages for preemptible GPGPU context */ WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE, GEN11_SAMPLER_ENABLE_HEADLESS_MSG); + + /* Wa_1604278689:icl,ehl */ + wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID); + wa_write_masked_or(wal, IVB_FBC_RT_BASE_UPPER, + 0, /* write-only register; skip validation */ + 0xFFFFFFFF); + + /* Wa_1406306137:icl,ehl */ + wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU); } static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - /* Wa_1409142259:tgl */ + /* + * Wa_1409142259:tgl + * Wa_1409347922:tgl + * Wa_1409252684:tgl + * Wa_1409217633:tgl + * Wa_1409207793:tgl + * Wa_1409178076:tgl + * Wa_1408979724:tgl + */ WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); @@ -588,6 +610,11 @@ static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, */ wa_add(wal, FF_MODE2, FF_MODE2_TDS_TIMER_MASK, FF_MODE2_TDS_TIMER_128, 0); + + /* WaDisableGPGPUMidThreadPreemption:tgl */ + WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, + GEN9_PREEMPT_GPGPU_LEVEL_MASK, + GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); } static void @@ -657,7 +684,7 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) *cs++ = MI_LOAD_REGISTER_IMM(wal->count); for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { *cs++ = i915_mmio_reg_offset(wa->reg); - *cs++ = wa->val; + *cs++ = wa->set; } *cs++ = MI_NOOP; @@ -822,7 +849,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) DRM_WARN("No common index found between subslice mask %x and L3 bank mask %x!\n", intel_sseu_get_subslices(sseu, slice), l3_en); subslice = fls(l3_en); - WARN_ON(!subslice); + drm_WARN_ON(&i915->drm, !subslice); } subslice--; @@ -893,11 +920,6 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) SLICE_UNIT_LEVEL_CLKGATE, MSCUNIT_CLKGATE_DIS); - /* Wa_1406680159:icl */ - wa_write_or(wal, - SUBSLICE_UNIT_LEVEL_CLKGATE, - GWUNIT_CLKGATE_DIS); - /* Wa_1406838659:icl (pre-prod) */ if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) wa_write_or(wal, @@ -926,7 +948,7 @@ tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS); - /* Wa_1409180338:tgl */ + /* Wa_1607087056:tgl also know as BUG:1409180338 */ if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, @@ -986,11 +1008,10 @@ wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal) static bool wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) { - if ((cur ^ wa->val) & wa->read) { - DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n", + if ((cur ^ wa->set) & wa->read) { + DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x)\n", name, from, i915_mmio_reg_offset(wa->reg), - cur, cur & wa->read, - wa->val, wa->mask); + cur, cur & wa->read, wa->set); return false; } @@ -1015,7 +1036,10 @@ wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) intel_uncore_forcewake_get__locked(uncore, fw); for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { - intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val); + if (wa->clr) + intel_uncore_rmw_fw(uncore, wa->reg, wa->clr, wa->set); + else + intel_uncore_write_fw(uncore, wa->reg, wa->set); if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) wa_verify(wa, intel_uncore_read_fw(uncore, wa->reg), @@ -1239,6 +1263,7 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine) case RENDER_CLASS: /* * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl + * Wa_1408556865:tgl * * This covers 4 registers which are next to one another : * - PS_INVOCATION_COUNT @@ -1249,6 +1274,12 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine) whitelist_reg_ext(w, PS_INVOCATION_COUNT, RING_FORCE_TO_NONPRIV_ACCESS_RD | RING_FORCE_TO_NONPRIV_RANGE_4); + + /* Wa_1808121037:tgl */ + whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1); + + /* Wa_1806527549:tgl */ + whitelist_reg(w, HIZ_CHICKEN); break; default: break; @@ -1315,19 +1346,21 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) struct drm_i915_private *i915 = engine->i915; if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) { - /* Wa_1606700617:tgl */ - wa_masked_en(wal, - GEN9_CS_DEBUG_MODE1, - FF_DOP_CLOCK_GATE_DISABLE); - - /* Wa_1607138336:tgl */ + /* + * Wa_1607138336:tgl + * Wa_1607063988:tgl + */ wa_write_or(wal, GEN9_CTX_PREEMPT_REG, GEN12_DISABLE_POSH_BUSY_FF_DOP_CG); - /* Wa_1607030317:tgl */ - /* Wa_1607186500:tgl */ - /* Wa_1607297627:tgl */ + /* + * Wa_1607030317:tgl + * Wa_1607186500:tgl + * Wa_1607297627:tgl there is 3 entries for this WA on BSpec, 2 + * of then says it is fixed on B0 the other one says it is + * permanent + */ wa_masked_en(wal, GEN6_RC_SLEEP_PSMI_CONTROL, GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | @@ -1340,6 +1373,35 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_write_or(wal, GEN7_SARCHKMD, GEN7_DISABLE_SAMPLER_PREFETCH); + + /* Wa_1407928979:tgl */ + wa_write_or(wal, + GEN7_FF_THREAD_MODE, + GEN12_FF_TESSELATION_DOP_GATE_DISABLE); + + /* + * Wa_1409085225:tgl + * Wa_14010229206:tgl + */ + wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH); + + /* Wa_1408615072:tgl */ + wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, + VSUNIT_CLKGATE_DIS_TGL); + } + + if (IS_TIGERLAKE(i915)) { + /* Wa_1606931601:tgl */ + wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ); + + /* Wa_1409804808:tgl */ + wa_masked_en(wal, GEN7_ROW_CHICKEN2, + GEN12_PUSH_CONST_DEREF_HOLD_DIS); + + /* Wa_1606700617:tgl */ + wa_masked_en(wal, + GEN9_CS_DEBUG_MODE1, + FF_DOP_CLOCK_GATE_DISABLE); } if (IS_GEN(i915, 11)) { @@ -1405,10 +1467,38 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN11_SCRATCH2, GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE, 0); + + /* WaEnable32PlaneMode:icl */ + wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS, + GEN11_ENABLE_32_PLANE_MODE); + + /* + * Wa_1408615072:icl,ehl (vsunit) + * Wa_1407596294:icl,ehl (hsunit) + */ + wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, + VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS); + + /* Wa_1407352427:icl,ehl */ + wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, + PSDUNIT_CLKGATE_DIS); + + /* Wa_1406680159:icl,ehl */ + wa_write_or(wal, + SUBSLICE_UNIT_LEVEL_CLKGATE, + GWUNIT_CLKGATE_DIS); + + /* + * Wa_1408767742:icl[a2..forever],ehl[all] + * Wa_1605460711:icl[a0..c0] + */ + wa_write_or(wal, + GEN7_FF_THREAD_MODE, + GEN12_FF_TESSELATION_DOP_GATE_DISABLE); } - if (IS_GEN_RANGE(i915, 9, 11)) { - /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */ + if (IS_GEN_RANGE(i915, 9, 12)) { + /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */ wa_masked_en(wal, GEN7_FF_SLICE_CS_CHICKEN1, GEN9_FFSC_PERCTX_PREEMPT_CTRL); @@ -1452,6 +1542,52 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN8_L3SQCREG4, GEN8_LQSC_FLUSH_COHERENT_LINES); } + + if (IS_GEN(i915, 7)) + /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */ + wa_masked_en(wal, + GFX_MODE_GEN7, + GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE); + + if (IS_GEN_RANGE(i915, 6, 7)) + /* + * We need to disable the AsyncFlip performance optimisations in + * order to use MI_WAIT_FOR_EVENT within the CS. It should + * already be programmed to '1' on all products. + * + * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv + */ + wa_masked_en(wal, + MI_MODE, + ASYNC_FLIP_PERF_DISABLE); + + if (IS_GEN(i915, 6)) { + /* + * Required for the hardware to program scanline values for + * waiting + * WaEnableFlushTlbInvalidationMode:snb + */ + wa_masked_en(wal, + GFX_MODE, + GFX_TLB_INVALIDATE_EXPLICIT); + + /* + * From the Sandybridge PRM, volume 1 part 3, page 24: + * "If this bit is set, STCunit will have LRA as replacement + * policy. [...] This bit must be reset. LRA replacement + * policy is not supported." + */ + wa_masked_dis(wal, + CACHE_MODE_0, + CM0_STC_EVICT_DISABLE_LRA_SNB); + } + + if (IS_GEN_RANGE(i915, 4, 6)) + /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ + wa_add(wal, MI_MODE, + 0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH), + /* XXX bit doesn't stick on Broadwater */ + IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH); } static void @@ -1470,7 +1606,7 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) static void engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8)) + if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 4)) return; if (engine->class == RENDER_CLASS) @@ -1483,7 +1619,7 @@ void intel_engine_init_workarounds(struct intel_engine_cs *engine) { struct i915_wa_list *wal = &engine->wa_list; - if (INTEL_GEN(engine->i915) < 8) + if (INTEL_GEN(engine->i915) < 4) return; wa_init_start(wal, "engine", engine->name); @@ -1626,6 +1762,16 @@ static int engine_wa_list_verify(struct intel_context *ce, goto err_vma; } + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, true); + if (err == 0) + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(vma); + if (err) { + i915_request_add(rq); + goto err_vma; + } + err = wa_list_srm(rq, wal, vma); if (err) goto err_vma; diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h index e27ab1b710b3..d166a7145720 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h +++ b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h @@ -13,8 +13,8 @@ struct i915_wa { i915_reg_t reg; - u32 mask; - u32 val; + u32 clr; + u32 set; u32 read; }; diff --git a/drivers/gpu/drm/i915/gt/ivb_clear_kernel.c b/drivers/gpu/drm/i915/gt/ivb_clear_kernel.c new file mode 100644 index 000000000000..610ca7687735 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/ivb_clear_kernel.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + * + * Generated by: IGT Gpu Tools on Fri 21 Feb 2020 05:29:32 AM UTC + */ + +static const u32 ivb_clear_kernel[] = { + 0x00000001, 0x26020128, 0x00000024, 0x00000000, + 0x00000040, 0x20280c21, 0x00000028, 0x00000001, + 0x01000010, 0x20000c20, 0x0000002c, 0x00000000, + 0x00010220, 0x34001c00, 0x00001400, 0x0000002c, + 0x00600001, 0x20600061, 0x00000000, 0x00000000, + 0x00000008, 0x20601c85, 0x00000e00, 0x0000000c, + 0x00000005, 0x20601ca5, 0x00000060, 0x00000001, + 0x00000008, 0x20641c85, 0x00000e00, 0x0000000d, + 0x00000005, 0x20641ca5, 0x00000064, 0x00000003, + 0x00000041, 0x207424a5, 0x00000064, 0x00000034, + 0x00000040, 0x206014a5, 0x00000060, 0x00000074, + 0x00000008, 0x20681c85, 0x00000e00, 0x00000008, + 0x00000005, 0x20681ca5, 0x00000068, 0x0000000f, + 0x00000041, 0x20701ca5, 0x00000060, 0x00000010, + 0x00000040, 0x206814a5, 0x00000068, 0x00000070, + 0x00600001, 0x20a00061, 0x00000000, 0x00000000, + 0x00000005, 0x206c1c85, 0x00000e00, 0x00000007, + 0x00000041, 0x206c1ca5, 0x0000006c, 0x00000004, + 0x00600001, 0x20800021, 0x008d0000, 0x00000000, + 0x00000001, 0x20800021, 0x0000006c, 0x00000000, + 0x00000001, 0x20840021, 0x00000068, 0x00000000, + 0x00000001, 0x20880061, 0x00000000, 0x00000003, + 0x00000005, 0x208c0d21, 0x00000086, 0xffffffff, + 0x05600032, 0x20a00fa1, 0x008d0080, 0x02190001, + 0x00000040, 0x20a01ca5, 0x000000a0, 0x00000001, + 0x05600032, 0x20a00fa1, 0x008d0080, 0x040a8001, + 0x02000040, 0x20281c21, 0x00000028, 0xffffffff, + 0x00010220, 0x34001c00, 0x00001400, 0xfffffffc, + 0x00000001, 0x26020128, 0x00000024, 0x00000000, + 0x00000001, 0x220010e4, 0x00000000, 0x00000000, + 0x00000001, 0x220831ec, 0x00000000, 0x007f007f, + 0x00600001, 0x20400021, 0x008d0000, 0x00000000, + 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000, + 0x00200001, 0x20400121, 0x00450020, 0x00000000, + 0x00000001, 0x20480061, 0x00000000, 0x000f000f, + 0x00000005, 0x204c0d21, 0x00000046, 0xffffffef, + 0x00800001, 0x20600061, 0x00000000, 0x00000000, + 0x00800001, 0x20800061, 0x00000000, 0x00000000, + 0x00800001, 0x20a00061, 0x00000000, 0x00000000, + 0x00800001, 0x20c00061, 0x00000000, 0x00000000, + 0x00800001, 0x20e00061, 0x00000000, 0x00000000, + 0x00800001, 0x21000061, 0x00000000, 0x00000000, + 0x00800001, 0x21200061, 0x00000000, 0x00000000, + 0x00800001, 0x21400061, 0x00000000, 0x00000000, + 0x05600032, 0x20000fa0, 0x008d0040, 0x120a8000, + 0x00000040, 0x20402d21, 0x00000020, 0x00100010, + 0x05600032, 0x20000fa0, 0x008d0040, 0x120a8000, + 0x02000040, 0x22083d8c, 0x00000208, 0xffffffff, + 0x00800001, 0xa0000109, 0x00000602, 0x00000000, + 0x00000040, 0x22001c84, 0x00000200, 0x00000020, + 0x00010220, 0x34001c00, 0x00001400, 0xfffffff8, + 0x07600032, 0x20000fa0, 0x008d0fe0, 0x82000010, +}; diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index f2806381733f..4a53ded7c2dd 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -65,6 +65,9 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) return NULL; } i915_active_init(&ring->vma->active, NULL, NULL); + __set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(ring->vma)); + __set_bit(DRM_MM_NODE_ALLOCATED_BIT, &ring->vma->node.flags); + ring->vma->node.size = sz; intel_ring_update_space(ring); @@ -241,9 +244,7 @@ static void mock_reset_cancel(struct intel_engine_cs *engine) /* Mark all submitted requests as skipped. */ list_for_each_entry(request, &engine->active.requests, sched.link) { - if (!i915_request_signaled(request)) - dma_fence_set_error(&request->fence, -EIO); - + i915_request_set_error_once(request, -EIO); i915_request_mark_complete(request); } diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index 43d4d589749f..697114dd1f47 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -142,6 +142,24 @@ out: return err; } +static void engine_heartbeat_disable(struct intel_engine_cs *engine, + unsigned long *saved) +{ + *saved = engine->props.heartbeat_interval_ms; + engine->props.heartbeat_interval_ms = 0; + + intel_engine_pm_get(engine); + intel_engine_park_heartbeat(engine); +} + +static void engine_heartbeat_enable(struct intel_engine_cs *engine, + unsigned long saved) +{ + intel_engine_pm_put(engine); + + engine->props.heartbeat_interval_ms = saved; +} + static int live_idle_flush(void *arg) { struct intel_gt *gt = arg; @@ -152,9 +170,11 @@ static int live_idle_flush(void *arg) /* Check that we can flush the idle barriers */ for_each_engine(engine, gt, id) { - intel_engine_pm_get(engine); + unsigned long heartbeat; + + engine_heartbeat_disable(engine, &heartbeat); err = __live_idle_pulse(engine, intel_engine_flush_barriers); - intel_engine_pm_put(engine); + engine_heartbeat_enable(engine, heartbeat); if (err) break; } @@ -172,9 +192,11 @@ static int live_idle_pulse(void *arg) /* Check that heartbeat pulses flush the idle barriers */ for_each_engine(engine, gt, id) { - intel_engine_pm_get(engine); + unsigned long heartbeat; + + engine_heartbeat_disable(engine, &heartbeat); err = __live_idle_pulse(engine, intel_engine_pulse); - intel_engine_pm_put(engine); + engine_heartbeat_enable(engine, heartbeat); if (err && err != -ENODEV) break; diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 3e5e6c86e843..2b2efff6e19d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -268,7 +268,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) cancel_rq: if (err) { - i915_request_skip(rq, err); + i915_request_set_error_once(rq, err); i915_request_add(rq); } unpin_hws: @@ -1640,7 +1640,7 @@ static int igt_reset_engines_atomic(void *arg) if (!intel_has_reset_engine(gt)) return 0; - if (USES_GUC_SUBMISSION(gt->i915)) + if (intel_uc_uses_guc_submission(>->uc)) return 0; igt_global_reset_lock(gt); diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.c b/drivers/gpu/drm/i915/gt/selftest_llc.c index fd3770e48ac7..a912159693fd 100644 --- a/drivers/gpu/drm/i915/gt/selftest_llc.c +++ b/drivers/gpu/drm/i915/gt/selftest_llc.c @@ -18,10 +18,8 @@ static int gen6_verify_ring_freq(struct intel_llc *llc) wakeref = intel_runtime_pm_get(llc_to_gt(llc)->uncore->rpm); - if (!get_ia_constants(llc, &consts)) { - err = -ENODEV; + if (!get_ia_constants(llc, &consts)) goto out_rpm; - } for (gpu_freq = consts.min_gpu_freq; gpu_freq <= consts.max_gpu_freq; @@ -71,10 +69,5 @@ out_rpm: int st_llc_verify(struct intel_llc *llc) { - int err = 0; - - if (HAS_LLC(llc_to_gt(llc)->i915)) - err = gen6_verify_ring_freq(llc); - - return err; + return gen6_verify_ring_freq(llc); } diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index b292f8cbd0bf..6f06ba750a0a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -68,6 +68,71 @@ static void engine_heartbeat_enable(struct intel_engine_cs *engine, engine->props.heartbeat_interval_ms = saved; } +static int wait_for_submit(struct intel_engine_cs *engine, + struct i915_request *rq, + unsigned long timeout) +{ + timeout += jiffies; + do { + cond_resched(); + intel_engine_flush_submission(engine); + + if (READ_ONCE(engine->execlists.pending[0])) + continue; + + if (i915_request_is_active(rq)) + return 0; + + if (i915_request_started(rq)) /* that was quick! */ + return 0; + } while (time_before(jiffies, timeout)); + + return -ETIME; +} + +static int wait_for_reset(struct intel_engine_cs *engine, + struct i915_request *rq, + unsigned long timeout) +{ + timeout += jiffies; + + do { + cond_resched(); + intel_engine_flush_submission(engine); + + if (READ_ONCE(engine->execlists.pending[0])) + continue; + + if (i915_request_completed(rq)) + break; + + if (READ_ONCE(rq->fence.error)) + break; + } while (time_before(jiffies, timeout)); + + flush_scheduled_work(); + + if (rq->fence.error != -EIO) { + pr_err("%s: hanging request %llx:%lld not reset\n", + engine->name, + rq->fence.context, + rq->fence.seqno); + return -EINVAL; + } + + /* Give the request a jiffie to complete after flushing the worker */ + if (i915_request_wait(rq, 0, + max(0l, (long)(timeout - jiffies)) + 1) < 0) { + pr_err("%s: hanging request %llx:%lld did not complete\n", + engine->name, + rq->fence.context, + rq->fence.seqno); + return -ETIME; + } + + return 0; +} + static int live_sanitycheck(void *arg) { struct intel_gt *gt = arg; @@ -285,6 +350,84 @@ static int live_unlite_preempt(void *arg) return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); } +static int live_pin_rewind(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* + * We have to be careful not to trust intel_ring too much, for example + * ring->head is updated upon retire which is out of sync with pinning + * the context. Thus we cannot use ring->head to set CTX_RING_HEAD, + * or else we risk writing an older, stale value. + * + * To simulate this, let's apply a bit of deliberate sabotague. + */ + + for_each_engine(engine, gt, id) { + struct intel_context *ce; + struct i915_request *rq; + struct intel_ring *ring; + struct igt_live_test t; + + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { + err = -EIO; + break; + } + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + + err = intel_context_pin(ce); + if (err) { + intel_context_put(ce); + break; + } + + /* Keep the context awake while we play games */ + err = i915_active_acquire(&ce->active); + if (err) { + intel_context_unpin(ce); + intel_context_put(ce); + break; + } + ring = ce->ring; + + /* Poison the ring, and offset the next request from HEAD */ + memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32)); + ring->emit = ring->size / 2; + ring->tail = ring->emit; + GEM_BUG_ON(ring->head); + + intel_context_unpin(ce); + + /* Submit a simple nop request */ + GEM_BUG_ON(intel_context_is_pinned(ce)); + rq = intel_context_create_request(ce); + i915_active_release(&ce->active); /* e.g. async retire */ + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + GEM_BUG_ON(!rq->head); + i915_request_add(rq); + + /* Expect not to hang! */ + if (igt_live_test_end(&t)) { + err = -EIO; + break; + } + } + + return err; +} + static int live_hold_reset(void *arg) { struct intel_gt *gt = arg; @@ -386,6 +529,152 @@ out: return err; } +static const char *error_repr(int err) +{ + return err ? "bad" : "good"; +} + +static int live_error_interrupt(void *arg) +{ + static const struct error_phase { + enum { GOOD = 0, BAD = -EIO } error[2]; + } phases[] = { + { { BAD, GOOD } }, + { { BAD, BAD } }, + { { BAD, GOOD } }, + { { GOOD, GOOD } }, /* sentinel */ + }; + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* + * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning + * of invalid commands in user batches that will cause a GPU hang. + * This is a faster mechanism than using hangcheck/heartbeats, but + * only detects problems the HW knows about -- it will not warn when + * we kill the HW! + * + * To verify our detection and reset, we throw some invalid commands + * at the HW and wait for the interrupt. + */ + + if (!intel_has_reset_engine(gt)) + return 0; + + for_each_engine(engine, gt, id) { + const struct error_phase *p; + unsigned long heartbeat; + int err = 0; + + engine_heartbeat_disable(engine, &heartbeat); + + for (p = phases; p->error[0] != GOOD; p++) { + struct i915_request *client[ARRAY_SIZE(phases->error)]; + u32 *cs; + int i; + + memset(client, 0, sizeof(*client)); + for (i = 0; i < ARRAY_SIZE(client); i++) { + struct intel_context *ce; + struct i915_request *rq; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out; + } + + rq = intel_context_create_request(ce); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + if (rq->engine->emit_init_breadcrumb) { + err = rq->engine->emit_init_breadcrumb(rq); + if (err) { + i915_request_add(rq); + goto out; + } + } + + cs = intel_ring_begin(rq, 2); + if (IS_ERR(cs)) { + i915_request_add(rq); + err = PTR_ERR(cs); + goto out; + } + + if (p->error[i]) { + *cs++ = 0xdeadbeef; + *cs++ = 0xdeadbeef; + } else { + *cs++ = MI_NOOP; + *cs++ = MI_NOOP; + } + + client[i] = i915_request_get(rq); + i915_request_add(rq); + } + + err = wait_for_submit(engine, client[0], HZ / 2); + if (err) { + pr_err("%s: first request did not start within time!\n", + engine->name); + err = -ETIME; + goto out; + } + + for (i = 0; i < ARRAY_SIZE(client); i++) { + if (i915_request_wait(client[i], 0, HZ / 5) < 0) + pr_debug("%s: %s request incomplete!\n", + engine->name, + error_repr(p->error[i])); + + if (!i915_request_started(client[i])) { + pr_debug("%s: %s request not stated!\n", + engine->name, + error_repr(p->error[i])); + err = -ETIME; + goto out; + } + + /* Kick the tasklet to process the error */ + intel_engine_flush_submission(engine); + if (client[i]->fence.error != p->error[i]) { + pr_err("%s: %s request completed with wrong error code: %d\n", + engine->name, + error_repr(p->error[i]), + client[i]->fence.error); + err = -EINVAL; + goto out; + } + } + +out: + for (i = 0; i < ARRAY_SIZE(client); i++) + if (client[i]) + i915_request_put(client[i]); + if (err) { + pr_err("%s: failed at phase[%zd] { %d, %d }\n", + engine->name, p - phases, + p->error[0], p->error[1]); + break; + } + } + + engine_heartbeat_enable(engine, heartbeat); + if (err) { + intel_gt_set_wedged(gt); + return err; + } + } + + return 0; +} + static int emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) { @@ -580,6 +869,10 @@ static int live_timeslice_preempt(void *arg) if (err) goto err_map; + err = i915_vma_sync(vma); + if (err) + goto err_pin; + for_each_prime_number_from(count, 1, 16) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -614,33 +907,227 @@ err_obj: return err; } -static struct i915_request *nop_request(struct intel_engine_cs *engine) +static struct i915_request * +create_rewinder(struct intel_context *ce, + struct i915_request *wait, + void *slot, int idx) { + const u32 offset = + i915_ggtt_offset(ce->engine->status_page.vma) + + offset_in_page(slot); struct i915_request *rq; + u32 *cs; + int err; - rq = intel_engine_create_kernel_request(engine); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) return rq; + if (wait) { + err = i915_request_await_dma_fence(rq, &wait->fence); + if (err) + goto err; + } + + cs = intel_ring_begin(rq, 10); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err; + } + + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + *cs++ = MI_NOOP; + + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_NEQ_SDD; + *cs++ = 0; + *cs++ = offset; + *cs++ = 0; + + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); + *cs++ = offset + idx * sizeof(u32); + *cs++ = 0; + + intel_ring_advance(rq, cs); + + rq->sched.attr.priority = I915_PRIORITY_MASK; + err = 0; +err: i915_request_get(rq); i915_request_add(rq); + if (err) { + i915_request_put(rq); + return ERR_PTR(err); + } return rq; } -static int wait_for_submit(struct intel_engine_cs *engine, - struct i915_request *rq, - unsigned long timeout) +static int live_timeslice_rewind(void *arg) { - timeout += jiffies; - do { - cond_resched(); + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* + * The usual presumption on timeslice expiration is that we replace + * the active context with another. However, given a chain of + * dependencies we may end up with replacing the context with itself, + * but only a few of those requests, forcing us to rewind the + * RING_TAIL of the original request. + */ + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + return 0; + + for_each_engine(engine, gt, id) { + enum { A1, A2, B1 }; + enum { X = 1, Y, Z }; + struct i915_request *rq[3] = {}; + struct intel_context *ce; + unsigned long heartbeat; + unsigned long timeslice; + int i, err = 0; + u32 *slot; + + if (!intel_engine_has_timeslices(engine)) + continue; + + /* + * A:rq1 -- semaphore wait, timestamp X + * A:rq2 -- write timestamp Y + * + * B:rq1 [await A:rq1] -- write timestamp Z + * + * Force timeslice, release semaphore. + * + * Expect execution/evaluation order XZY + */ + + engine_heartbeat_disable(engine, &heartbeat); + timeslice = xchg(&engine->props.timeslice_duration_ms, 1); + + slot = memset32(engine->status_page.addr + 1000, 0, 4); + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto err; + } + + rq[0] = create_rewinder(ce, NULL, slot, 1); + if (IS_ERR(rq[0])) { + intel_context_put(ce); + goto err; + } + + rq[1] = create_rewinder(ce, NULL, slot, 2); + intel_context_put(ce); + if (IS_ERR(rq[1])) + goto err; + + err = wait_for_submit(engine, rq[1], HZ / 2); + if (err) { + pr_err("%s: failed to submit first context\n", + engine->name); + goto err; + } + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto err; + } + + rq[2] = create_rewinder(ce, rq[0], slot, 3); + intel_context_put(ce); + if (IS_ERR(rq[2])) + goto err; + + err = wait_for_submit(engine, rq[2], HZ / 2); + if (err) { + pr_err("%s: failed to submit second context\n", + engine->name); + goto err; + } + GEM_BUG_ON(!timer_pending(&engine->execlists.timer)); + + /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */ + GEM_BUG_ON(!i915_request_is_active(rq[A1])); + GEM_BUG_ON(!i915_request_is_active(rq[A2])); + GEM_BUG_ON(!i915_request_is_active(rq[B1])); + + /* Wait for the timeslice to kick in */ + del_timer(&engine->execlists.timer); + tasklet_hi_schedule(&engine->execlists.tasklet); intel_engine_flush_submission(engine); - if (i915_request_is_active(rq)) - return 0; - } while (time_before(jiffies, timeout)); - return -ETIME; + /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */ + GEM_BUG_ON(!i915_request_is_active(rq[A1])); + GEM_BUG_ON(!i915_request_is_active(rq[B1])); + GEM_BUG_ON(i915_request_is_active(rq[A2])); + + /* Release the hounds! */ + slot[0] = 1; + wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */ + + for (i = 1; i <= 3; i++) { + unsigned long timeout = jiffies + HZ / 2; + + while (!READ_ONCE(slot[i]) && + time_before(jiffies, timeout)) + ; + + if (!time_before(jiffies, timeout)) { + pr_err("%s: rq[%d] timed out\n", + engine->name, i - 1); + err = -ETIME; + goto err; + } + + pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]); + } + + /* XZY: XZ < XY */ + if (slot[Z] - slot[X] >= slot[Y] - slot[X]) { + pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n", + engine->name, + slot[Z] - slot[X], + slot[Y] - slot[X]); + err = -EINVAL; + } + +err: + memset32(&slot[0], -1, 4); + wmb(); + + engine->props.timeslice_duration_ms = timeslice; + engine_heartbeat_enable(engine, heartbeat); + for (i = 0; i < 3; i++) + i915_request_put(rq[i]); + if (igt_flush_test(gt->i915)) + err = -EIO; + if (err) + return err; + } + + return 0; +} + +static struct i915_request *nop_request(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + + rq = intel_engine_create_kernel_request(engine); + if (IS_ERR(rq)) + return rq; + + i915_request_get(rq); + i915_request_add(rq); + + return rq; } static long timeslice_threshold(const struct intel_engine_cs *engine) @@ -688,6 +1175,10 @@ static int live_timeslice_queue(void *arg) if (err) goto err_map; + err = i915_vma_sync(vma); + if (err) + goto err_pin; + for_each_engine(engine, gt, id) { struct i915_sched_attr attr = { .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), @@ -774,6 +1265,7 @@ err_heartbeat: break; } +err_pin: i915_vma_unpin(vma); err_map: i915_gem_object_unpin_map(obj); @@ -832,6 +1324,10 @@ static int live_busywait_preempt(void *arg) if (err) goto err_map; + err = i915_vma_sync(vma); + if (err) + goto err_vma; + for_each_engine(engine, gt, id) { struct i915_request *lo, *hi; struct igt_live_test t; @@ -1352,14 +1848,9 @@ static int __cancel_active0(struct live_preempt_cancel *arg) if (err) goto out; - if (i915_request_wait(rq, 0, HZ / 5) < 0) { - err = -EIO; - goto out; - } - - if (rq->fence.error != -EIO) { - pr_err("Cancelled inflight0 request did not report -EIO\n"); - err = -EINVAL; + err = wait_for_reset(arg->engine, rq, HZ / 2); + if (err) { + pr_err("Cancelled inflight0 request did not reset\n"); goto out; } @@ -1417,10 +1908,9 @@ static int __cancel_active1(struct live_preempt_cancel *arg) goto out; igt_spinner_end(&arg->a.spin); - if (i915_request_wait(rq[1], 0, HZ / 5) < 0) { - err = -EIO; + err = wait_for_reset(arg->engine, rq[1], HZ / 2); + if (err) goto out; - } if (rq[0]->fence.error != 0) { pr_err("Normal inflight0 request did not complete\n"); @@ -1500,10 +1990,9 @@ static int __cancel_queued(struct live_preempt_cancel *arg) if (err) goto out; - if (i915_request_wait(rq[2], 0, HZ / 5) < 0) { - err = -EIO; + err = wait_for_reset(arg->engine, rq[2], HZ / 2); + if (err) goto out; - } if (rq[0]->fence.error != -EIO) { pr_err("Cancelled inflight0 request did not report -EIO\n"); @@ -1561,14 +2050,9 @@ static int __cancel_hostile(struct live_preempt_cancel *arg) if (err) goto out; - if (i915_request_wait(rq, 0, HZ / 5) < 0) { - err = -EIO; - goto out; - } - - if (rq->fence.error != -EIO) { - pr_err("Cancelled inflight0 request did not report -EIO\n"); - err = -EINVAL; + err = wait_for_reset(arg->engine, rq, HZ / 2); + if (err) { + pr_err("Cancelled inflight0 request did not reset\n"); goto out; } @@ -1656,7 +2140,7 @@ static int live_suppress_self_preempt(void *arg) if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - if (USES_GUC_SUBMISSION(gt->i915)) + if (intel_uc_uses_guc_submission(>->uc)) return 0; /* presume black blox */ if (intel_vgpu_active(gt->i915)) @@ -2279,117 +2763,6 @@ static int live_preempt_gang(void *arg) return 0; } -static int live_preempt_hang(void *arg) -{ - struct intel_gt *gt = arg; - struct i915_gem_context *ctx_hi, *ctx_lo; - struct igt_spinner spin_hi, spin_lo; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err = -ENOMEM; - - if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) - return 0; - - if (!intel_has_reset_engine(gt)) - return 0; - - if (igt_spinner_init(&spin_hi, gt)) - return -ENOMEM; - - if (igt_spinner_init(&spin_lo, gt)) - goto err_spin_hi; - - ctx_hi = kernel_context(gt->i915); - if (!ctx_hi) - goto err_spin_lo; - ctx_hi->sched.priority = - I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); - - ctx_lo = kernel_context(gt->i915); - if (!ctx_lo) - goto err_ctx_hi; - ctx_lo->sched.priority = - I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); - - for_each_engine(engine, gt, id) { - struct i915_request *rq; - - if (!intel_engine_has_preemption(engine)) - continue; - - rq = spinner_create_request(&spin_lo, ctx_lo, engine, - MI_ARB_CHECK); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_ctx_lo; - } - - i915_request_add(rq); - if (!igt_wait_for_spinner(&spin_lo, rq)) { - GEM_TRACE("lo spinner failed to start\n"); - GEM_TRACE_DUMP(); - intel_gt_set_wedged(gt); - err = -EIO; - goto err_ctx_lo; - } - - rq = spinner_create_request(&spin_hi, ctx_hi, engine, - MI_ARB_CHECK); - if (IS_ERR(rq)) { - igt_spinner_end(&spin_lo); - err = PTR_ERR(rq); - goto err_ctx_lo; - } - - init_completion(&engine->execlists.preempt_hang.completion); - engine->execlists.preempt_hang.inject_hang = true; - - i915_request_add(rq); - - if (!wait_for_completion_timeout(&engine->execlists.preempt_hang.completion, - HZ / 10)) { - pr_err("Preemption did not occur within timeout!"); - GEM_TRACE_DUMP(); - intel_gt_set_wedged(gt); - err = -EIO; - goto err_ctx_lo; - } - - set_bit(I915_RESET_ENGINE + id, >->reset.flags); - intel_engine_reset(engine, NULL); - clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - - engine->execlists.preempt_hang.inject_hang = false; - - if (!igt_wait_for_spinner(&spin_hi, rq)) { - GEM_TRACE("hi spinner failed to start\n"); - GEM_TRACE_DUMP(); - intel_gt_set_wedged(gt); - err = -EIO; - goto err_ctx_lo; - } - - igt_spinner_end(&spin_hi); - igt_spinner_end(&spin_lo); - if (igt_flush_test(gt->i915)) { - err = -EIO; - goto err_ctx_lo; - } - } - - err = 0; -err_ctx_lo: - kernel_context_close(ctx_lo); -err_ctx_hi: - kernel_context_close(ctx_hi); -err_spin_lo: - igt_spinner_fini(&spin_lo); -err_spin_hi: - igt_spinner_fini(&spin_hi); - return err; -} - static int live_preempt_timeout(void *arg) { struct intel_gt *gt = arg; @@ -2882,7 +3255,7 @@ static int live_virtual_engine(void *arg) unsigned int class, inst; int err; - if (USES_GUC_SUBMISSION(gt->i915)) + if (intel_uc_uses_guc_submission(>->uc)) return 0; for_each_engine(engine, gt, id) { @@ -3015,7 +3388,7 @@ static int live_virtual_mask(void *arg) unsigned int class, inst; int err; - if (USES_GUC_SUBMISSION(gt->i915)) + if (intel_uc_uses_guc_submission(>->uc)) return 0; for (class = 0; class <= MAX_ENGINE_CLASS; class++) { @@ -3055,6 +3428,10 @@ static int preserved_virtual_engine(struct intel_gt *gt, if (IS_ERR(scratch)) return PTR_ERR(scratch); + err = i915_vma_sync(scratch); + if (err) + goto out_scratch; + ve = intel_execlists_create_virtual(siblings, nsibling); if (IS_ERR(ve)) { err = PTR_ERR(ve); @@ -3153,7 +3530,7 @@ static int live_virtual_preserved(void *arg) * are preserved. */ - if (USES_GUC_SUBMISSION(gt->i915)) + if (intel_uc_uses_guc_submission(>->uc)) return 0; /* As we use CS_GPR we cannot run before they existed on all engines. */ @@ -3243,15 +3620,21 @@ static int bond_virtual_engine(struct intel_gt *gt, rq[0] = ERR_PTR(-ENOMEM); for_each_engine(master, gt, id) { struct i915_sw_fence fence = {}; + struct intel_context *ce; if (master->class == class) continue; + ce = intel_context_create(master); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out; + } + memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq)); - rq[0] = igt_spinner_create_request(&spin, - master->kernel_context, - MI_NOOP); + rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP); + intel_context_put(ce); if (IS_ERR(rq[0])) { err = PTR_ERR(rq[0]); goto out; @@ -3377,7 +3760,7 @@ static int live_virtual_bond(void *arg) unsigned int class, inst; int err; - if (USES_GUC_SUBMISSION(gt->i915)) + if (intel_uc_uses_guc_submission(>->uc)) return 0; for (class = 0; class <= MAX_ENGINE_CLASS; class++) { @@ -3538,7 +3921,7 @@ static int live_virtual_reset(void *arg) * forgotten. */ - if (USES_GUC_SUBMISSION(gt->i915)) + if (intel_uc_uses_guc_submission(>->uc)) return 0; if (!intel_has_reset_engine(gt)) @@ -3571,8 +3954,11 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_sanitycheck), SUBTEST(live_unlite_switch), SUBTEST(live_unlite_preempt), + SUBTEST(live_pin_rewind), SUBTEST(live_hold_reset), + SUBTEST(live_error_interrupt), SUBTEST(live_timeslice_preempt), + SUBTEST(live_timeslice_rewind), SUBTEST(live_timeslice_queue), SUBTEST(live_busywait_preempt), SUBTEST(live_preempt), @@ -3583,7 +3969,6 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_suppress_wait_preempt), SUBTEST(live_chain_preempt), SUBTEST(live_preempt_gang), - SUBTEST(live_preempt_hang), SUBTEST(live_preempt_timeout), SUBTEST(live_preempt_smoke), SUBTEST(live_virtual_engine), @@ -3631,6 +4016,62 @@ static void hexdump(const void *buf, size_t len) } } +static int emit_semaphore_signal(struct intel_context *ce, void *slot) +{ + const u32 offset = + i915_ggtt_offset(ce->engine->status_page.vma) + + offset_in_page(slot); + struct i915_request *rq; + u32 *cs; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { + i915_request_add(rq); + return PTR_ERR(cs); + } + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = offset; + *cs++ = 0; + *cs++ = 1; + + intel_ring_advance(rq, cs); + + rq->sched.attr.priority = I915_PRIORITY_BARRIER; + i915_request_add(rq); + return 0; +} + +static int context_flush(struct intel_context *ce, long timeout) +{ + struct i915_request *rq; + struct dma_fence *fence; + int err = 0; + + rq = intel_engine_create_kernel_request(ce->engine); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + fence = i915_active_fence_get(&ce->timeline->last_request); + if (fence) { + i915_request_await_dma_fence(rq, fence); + dma_fence_put(fence); + } + + rq = i915_request_get(rq); + i915_request_add(rq); + if (i915_request_wait(rq, 0, timeout) < 0) + err = -ETIME; + i915_request_put(rq); + + rmb(); /* We know the request is written, make sure all state is too! */ + return err; +} + static int live_lrc_layout(void *arg) { struct intel_gt *gt = arg; @@ -3797,6 +4238,11 @@ static int live_lrc_fixed(void *arg) CTX_BB_STATE - 1, "BB_STATE" }, + { + i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)), + CTX_TIMESTAMP - 1, + "RING_CTX_TIMESTAMP" + }, { }, }, *t; u32 *hw; @@ -3880,8 +4326,16 @@ static int __live_lrc_state(struct intel_engine_cs *engine, *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32); *cs++ = 0; + i915_vma_lock(scratch); + err = i915_request_await_object(rq, scratch->obj, true); + if (!err) + err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(scratch); + i915_request_get(rq); i915_request_add(rq); + if (err) + goto err_rq; intel_engine_flush_submission(engine); expected[RING_TAIL_IDX] = ce->ring->tail; @@ -3947,13 +4401,13 @@ static int live_lrc_state(void *arg) return err; } -static int gpr_make_dirty(struct intel_engine_cs *engine) +static int gpr_make_dirty(struct intel_context *ce) { struct i915_request *rq; u32 *cs; int n; - rq = intel_engine_create_kernel_request(engine); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) return PTR_ERR(rq); @@ -3965,20 +4419,79 @@ static int gpr_make_dirty(struct intel_engine_cs *engine) *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW); for (n = 0; n < NUM_GPR_DW; n++) { - *cs++ = CS_GPR(engine, n); + *cs++ = CS_GPR(ce->engine, n); *cs++ = STACK_MAGIC; } *cs++ = MI_NOOP; intel_ring_advance(rq, cs); + + rq->sched.attr.priority = I915_PRIORITY_BARRIER; i915_request_add(rq); return 0; } -static int __live_gpr_clear(struct intel_engine_cs *engine, - struct i915_vma *scratch) +static struct i915_request * +__gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot) { + const u32 offset = + i915_ggtt_offset(ce->engine->status_page.vma) + + offset_in_page(slot); + struct i915_request *rq; + u32 *cs; + int err; + int n; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return rq; + + cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW); + if (IS_ERR(cs)) { + i915_request_add(rq); + return ERR_CAST(cs); + } + + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + *cs++ = MI_NOOP; + + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_NEQ_SDD; + *cs++ = 0; + *cs++ = offset; + *cs++ = 0; + + for (n = 0; n < NUM_GPR_DW; n++) { + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = CS_GPR(ce->engine, n); + *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); + *cs++ = 0; + } + + i915_vma_lock(scratch); + err = i915_request_await_object(rq, scratch->obj, true); + if (!err) + err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(scratch); + + i915_request_get(rq); + i915_request_add(rq); + if (err) { + i915_request_put(rq); + rq = ERR_PTR(err); + } + + return rq; +} + +static int __live_lrc_gpr(struct intel_engine_cs *engine, + struct i915_vma *scratch, + bool preempt) +{ + u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4); struct intel_context *ce; struct i915_request *rq; u32 *cs; @@ -3988,7 +4501,7 @@ static int __live_gpr_clear(struct intel_engine_cs *engine, if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS) return 0; /* GPR only on rcs0 for gen8 */ - err = gpr_make_dirty(engine); + err = gpr_make_dirty(engine->kernel_context); if (err) return err; @@ -3996,28 +4509,28 @@ static int __live_gpr_clear(struct intel_engine_cs *engine, if (IS_ERR(ce)) return PTR_ERR(ce); - rq = intel_context_create_request(ce); + rq = __gpr_read(ce, scratch, slot); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_put; } - cs = intel_ring_begin(rq, 4 * NUM_GPR_DW); - if (IS_ERR(cs)) { - err = PTR_ERR(cs); - i915_request_add(rq); - goto err_put; - } + err = wait_for_submit(engine, rq, HZ / 2); + if (err) + goto err_rq; - for (n = 0; n < NUM_GPR_DW; n++) { - *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; - *cs++ = CS_GPR(engine, n); - *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); - *cs++ = 0; - } + if (preempt) { + err = gpr_make_dirty(engine->kernel_context); + if (err) + goto err_rq; - i915_request_get(rq); - i915_request_add(rq); + err = emit_semaphore_signal(engine->kernel_context, slot); + if (err) + goto err_rq; + } else { + slot[0] = 1; + wmb(); + } if (i915_request_wait(rq, 0, HZ / 5) < 0) { err = -ETIME; @@ -4044,13 +4557,15 @@ static int __live_gpr_clear(struct intel_engine_cs *engine, i915_gem_object_unpin_map(scratch->obj); err_rq: + memset32(&slot[0], -1, 4); + wmb(); i915_request_put(rq); err_put: intel_context_put(ce); return err; } -static int live_gpr_clear(void *arg) +static int live_lrc_gpr(void *arg) { struct intel_gt *gt = arg; struct intel_engine_cs *engine; @@ -4068,7 +4583,971 @@ static int live_gpr_clear(void *arg) return PTR_ERR(scratch); for_each_engine(engine, gt, id) { - err = __live_gpr_clear(engine, scratch); + unsigned long heartbeat; + + engine_heartbeat_disable(engine, &heartbeat); + + err = __live_lrc_gpr(engine, scratch, false); + if (err) + goto err; + + err = __live_lrc_gpr(engine, scratch, true); + if (err) + goto err; + +err: + engine_heartbeat_enable(engine, heartbeat); + if (igt_flush_test(gt->i915)) + err = -EIO; + if (err) + break; + } + + i915_vma_unpin_and_release(&scratch, 0); + return err; +} + +static struct i915_request * +create_timestamp(struct intel_context *ce, void *slot, int idx) +{ + const u32 offset = + i915_ggtt_offset(ce->engine->status_page.vma) + + offset_in_page(slot); + struct i915_request *rq; + u32 *cs; + int err; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return rq; + + cs = intel_ring_begin(rq, 10); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err; + } + + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + *cs++ = MI_NOOP; + + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_NEQ_SDD; + *cs++ = 0; + *cs++ = offset; + *cs++ = 0; + + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base)); + *cs++ = offset + idx * sizeof(u32); + *cs++ = 0; + + intel_ring_advance(rq, cs); + + rq->sched.attr.priority = I915_PRIORITY_MASK; + err = 0; +err: + i915_request_get(rq); + i915_request_add(rq); + if (err) { + i915_request_put(rq); + return ERR_PTR(err); + } + + return rq; +} + +struct lrc_timestamp { + struct intel_engine_cs *engine; + struct intel_context *ce[2]; + u32 poison; +}; + +static bool timestamp_advanced(u32 start, u32 end) +{ + return (s32)(end - start) > 0; +} + +static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt) +{ + u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4); + struct i915_request *rq; + u32 timestamp; + int err = 0; + + arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison; + rq = create_timestamp(arg->ce[0], slot, 1); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + err = wait_for_submit(rq->engine, rq, HZ / 2); + if (err) + goto err; + + if (preempt) { + arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef; + err = emit_semaphore_signal(arg->ce[1], slot); + if (err) + goto err; + } else { + slot[0] = 1; + wmb(); + } + + /* And wait for switch to kernel (to save our context to memory) */ + err = context_flush(arg->ce[0], HZ / 2); + if (err) + goto err; + + if (!timestamp_advanced(arg->poison, slot[1])) { + pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n", + arg->engine->name, preempt ? "preempt" : "simple", + arg->poison, slot[1]); + err = -EINVAL; + } + + timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]); + if (!timestamp_advanced(slot[1], timestamp)) { + pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n", + arg->engine->name, preempt ? "preempt" : "simple", + slot[1], timestamp); + err = -EINVAL; + } + +err: + memset32(slot, -1, 4); + i915_request_put(rq); + return err; +} + +static int live_lrc_timestamp(void *arg) +{ + struct lrc_timestamp data = {}; + struct intel_gt *gt = arg; + enum intel_engine_id id; + const u32 poison[] = { + 0, + S32_MAX, + (u32)S32_MAX + 1, + U32_MAX, + }; + + /* + * We want to verify that the timestamp is saved and restore across + * context switches and is monotonic. + * + * So we do this with a little bit of LRC poisoning to check various + * boundary conditions, and see what happens if we preempt the context + * with a second request (carrying more poison into the timestamp). + */ + + for_each_engine(data.engine, gt, id) { + unsigned long heartbeat; + int i, err = 0; + + engine_heartbeat_disable(data.engine, &heartbeat); + + for (i = 0; i < ARRAY_SIZE(data.ce); i++) { + struct intel_context *tmp; + + tmp = intel_context_create(data.engine); + if (IS_ERR(tmp)) { + err = PTR_ERR(tmp); + goto err; + } + + err = intel_context_pin(tmp); + if (err) { + intel_context_put(tmp); + goto err; + } + + data.ce[i] = tmp; + } + + for (i = 0; i < ARRAY_SIZE(poison); i++) { + data.poison = poison[i]; + + err = __lrc_timestamp(&data, false); + if (err) + break; + + err = __lrc_timestamp(&data, true); + if (err) + break; + } + +err: + engine_heartbeat_enable(data.engine, heartbeat); + for (i = 0; i < ARRAY_SIZE(data.ce); i++) { + if (!data.ce[i]) + break; + + intel_context_unpin(data.ce[i]); + intel_context_put(data.ce[i]); + } + + if (igt_flush_test(gt->i915)) + err = -EIO; + if (err) + return err; + } + + return 0; +} + +static struct i915_vma * +create_user_vma(struct i915_address_space *vm, unsigned long size) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + obj = i915_gem_object_create_internal(vm->i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return vma; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) { + i915_gem_object_put(obj); + return ERR_PTR(err); + } + + return vma; +} + +static struct i915_vma * +store_context(struct intel_context *ce, struct i915_vma *scratch) +{ + struct i915_vma *batch; + u32 dw, x, *cs, *hw; + + batch = create_user_vma(ce->vm, SZ_64K); + if (IS_ERR(batch)) + return batch; + + cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); + if (IS_ERR(cs)) { + i915_vma_put(batch); + return ERR_CAST(cs); + } + + x = 0; + dw = 0; + hw = ce->engine->pinned_default_state; + hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); + do { + u32 len = hw[dw] & 0x7f; + + if (hw[dw] == 0) { + dw++; + continue; + } + + if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + dw += len + 2; + continue; + } + + dw++; + len = (len + 1) / 2; + while (len--) { + *cs++ = MI_STORE_REGISTER_MEM_GEN8; + *cs++ = hw[dw]; + *cs++ = lower_32_bits(scratch->node.start + x); + *cs++ = upper_32_bits(scratch->node.start + x); + + dw += 2; + x += 4; + } + } while (dw < PAGE_SIZE / sizeof(u32) && + (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); + + *cs++ = MI_BATCH_BUFFER_END; + + i915_gem_object_flush_map(batch->obj); + i915_gem_object_unpin_map(batch->obj); + + return batch; +} + +static int move_to_active(struct i915_request *rq, + struct i915_vma *vma, + unsigned int flags) +{ + int err; + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, flags); + if (!err) + err = i915_vma_move_to_active(vma, rq, flags); + i915_vma_unlock(vma); + + return err; +} + +static struct i915_request * +record_registers(struct intel_context *ce, + struct i915_vma *before, + struct i915_vma *after, + u32 *sema) +{ + struct i915_vma *b_before, *b_after; + struct i915_request *rq; + u32 *cs; + int err; + + b_before = store_context(ce, before); + if (IS_ERR(b_before)) + return ERR_CAST(b_before); + + b_after = store_context(ce, after); + if (IS_ERR(b_after)) { + rq = ERR_CAST(b_after); + goto err_before; + } + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + goto err_after; + + err = move_to_active(rq, before, EXEC_OBJECT_WRITE); + if (err) + goto err_rq; + + err = move_to_active(rq, b_before, 0); + if (err) + goto err_rq; + + err = move_to_active(rq, after, EXEC_OBJECT_WRITE); + if (err) + goto err_rq; + + err = move_to_active(rq, b_after, 0); + if (err) + goto err_rq; + + cs = intel_ring_begin(rq, 14); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_rq; + } + + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); + *cs++ = lower_32_bits(b_before->node.start); + *cs++ = upper_32_bits(b_before->node.start); + + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_NEQ_SDD; + *cs++ = 0; + *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + + offset_in_page(sema); + *cs++ = 0; + *cs++ = MI_NOOP; + + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); + *cs++ = lower_32_bits(b_after->node.start); + *cs++ = upper_32_bits(b_after->node.start); + + intel_ring_advance(rq, cs); + + WRITE_ONCE(*sema, 0); + i915_request_get(rq); + i915_request_add(rq); +err_after: + i915_vma_put(b_after); +err_before: + i915_vma_put(b_before); + return rq; + +err_rq: + i915_request_add(rq); + rq = ERR_PTR(err); + goto err_after; +} + +static struct i915_vma *load_context(struct intel_context *ce, u32 poison) +{ + struct i915_vma *batch; + u32 dw, *cs, *hw; + + batch = create_user_vma(ce->vm, SZ_64K); + if (IS_ERR(batch)) + return batch; + + cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); + if (IS_ERR(cs)) { + i915_vma_put(batch); + return ERR_CAST(cs); + } + + dw = 0; + hw = ce->engine->pinned_default_state; + hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); + do { + u32 len = hw[dw] & 0x7f; + + if (hw[dw] == 0) { + dw++; + continue; + } + + if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + dw += len + 2; + continue; + } + + dw++; + len = (len + 1) / 2; + *cs++ = MI_LOAD_REGISTER_IMM(len); + while (len--) { + *cs++ = hw[dw]; + *cs++ = poison; + dw += 2; + } + } while (dw < PAGE_SIZE / sizeof(u32) && + (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); + + *cs++ = MI_BATCH_BUFFER_END; + + i915_gem_object_flush_map(batch->obj); + i915_gem_object_unpin_map(batch->obj); + + return batch; +} + +static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema) +{ + struct i915_request *rq; + struct i915_vma *batch; + u32 *cs; + int err; + + batch = load_context(ce, poison); + if (IS_ERR(batch)) + return PTR_ERR(batch); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_batch; + } + + err = move_to_active(rq, batch, 0); + if (err) + goto err_rq; + + cs = intel_ring_begin(rq, 8); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_rq; + } + + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); + *cs++ = lower_32_bits(batch->node.start); + *cs++ = upper_32_bits(batch->node.start); + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + + offset_in_page(sema); + *cs++ = 0; + *cs++ = 1; + + intel_ring_advance(rq, cs); + + rq->sched.attr.priority = I915_PRIORITY_BARRIER; +err_rq: + i915_request_add(rq); +err_batch: + i915_vma_put(batch); + return err; +} + +static bool is_moving(u32 a, u32 b) +{ + return a != b; +} + +static int compare_isolation(struct intel_engine_cs *engine, + struct i915_vma *ref[2], + struct i915_vma *result[2], + struct intel_context *ce, + u32 poison) +{ + u32 x, dw, *hw, *lrc; + u32 *A[2], *B[2]; + int err = 0; + + A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC); + if (IS_ERR(A[0])) + return PTR_ERR(A[0]); + + A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC); + if (IS_ERR(A[1])) { + err = PTR_ERR(A[1]); + goto err_A0; + } + + B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC); + if (IS_ERR(B[0])) { + err = PTR_ERR(B[0]); + goto err_A1; + } + + B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC); + if (IS_ERR(B[1])) { + err = PTR_ERR(B[1]); + goto err_B0; + } + + lrc = i915_gem_object_pin_map(ce->state->obj, + i915_coherent_map_type(engine->i915)); + if (IS_ERR(lrc)) { + err = PTR_ERR(lrc); + goto err_B1; + } + lrc += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); + + x = 0; + dw = 0; + hw = engine->pinned_default_state; + hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); + do { + u32 len = hw[dw] & 0x7f; + + if (hw[dw] == 0) { + dw++; + continue; + } + + if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + dw += len + 2; + continue; + } + + dw++; + len = (len + 1) / 2; + while (len--) { + if (!is_moving(A[0][x], A[1][x]) && + (A[0][x] != B[0][x] || A[1][x] != B[1][x])) { + switch (hw[dw] & 4095) { + case 0x30: /* RING_HEAD */ + case 0x34: /* RING_TAIL */ + break; + + default: + pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n", + engine->name, dw, + hw[dw], hw[dw + 1], + A[0][x], B[0][x], B[1][x], + poison, lrc[dw + 1]); + err = -EINVAL; + break; + } + } + dw += 2; + x++; + } + } while (dw < PAGE_SIZE / sizeof(u32) && + (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); + + i915_gem_object_unpin_map(ce->state->obj); +err_B1: + i915_gem_object_unpin_map(result[1]->obj); +err_B0: + i915_gem_object_unpin_map(result[0]->obj); +err_A1: + i915_gem_object_unpin_map(ref[1]->obj); +err_A0: + i915_gem_object_unpin_map(ref[0]->obj); + return err; +} + +static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison) +{ + u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1); + struct i915_vma *ref[2], *result[2]; + struct intel_context *A, *B; + struct i915_request *rq; + int err; + + A = intel_context_create(engine); + if (IS_ERR(A)) + return PTR_ERR(A); + + B = intel_context_create(engine); + if (IS_ERR(B)) { + err = PTR_ERR(B); + goto err_A; + } + + ref[0] = create_user_vma(A->vm, SZ_64K); + if (IS_ERR(ref[0])) { + err = PTR_ERR(ref[0]); + goto err_B; + } + + ref[1] = create_user_vma(A->vm, SZ_64K); + if (IS_ERR(ref[1])) { + err = PTR_ERR(ref[1]); + goto err_ref0; + } + + rq = record_registers(A, ref[0], ref[1], sema); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ref1; + } + + WRITE_ONCE(*sema, 1); + wmb(); + + if (i915_request_wait(rq, 0, HZ / 2) < 0) { + i915_request_put(rq); + err = -ETIME; + goto err_ref1; + } + i915_request_put(rq); + + result[0] = create_user_vma(A->vm, SZ_64K); + if (IS_ERR(result[0])) { + err = PTR_ERR(result[0]); + goto err_ref1; + } + + result[1] = create_user_vma(A->vm, SZ_64K); + if (IS_ERR(result[1])) { + err = PTR_ERR(result[1]); + goto err_result0; + } + + rq = record_registers(A, result[0], result[1], sema); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_result1; + } + + err = poison_registers(B, poison, sema); + if (err) { + WRITE_ONCE(*sema, -1); + i915_request_put(rq); + goto err_result1; + } + + if (i915_request_wait(rq, 0, HZ / 2) < 0) { + i915_request_put(rq); + err = -ETIME; + goto err_result1; + } + i915_request_put(rq); + + err = compare_isolation(engine, ref, result, A, poison); + +err_result1: + i915_vma_put(result[1]); +err_result0: + i915_vma_put(result[0]); +err_ref1: + i915_vma_put(ref[1]); +err_ref0: + i915_vma_put(ref[0]); +err_B: + intel_context_put(B); +err_A: + intel_context_put(A); + return err; +} + +static bool skip_isolation(const struct intel_engine_cs *engine) +{ + if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9) + return true; + + if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11) + return true; + + return false; +} + +static int live_lrc_isolation(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + const u32 poison[] = { + STACK_MAGIC, + 0x3a3a3a3a, + 0x5c5c5c5c, + 0xffffffff, + 0xffff0000, + }; + + /* + * Our goal is try and verify that per-context state cannot be + * tampered with by another non-privileged client. + * + * We take the list of context registers from the LRI in the default + * context image and attempt to modify that list from a remote context. + */ + + for_each_engine(engine, gt, id) { + int err = 0; + int i; + + /* Just don't even ask */ + if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) && + skip_isolation(engine)) + continue; + + intel_engine_pm_get(engine); + if (engine->pinned_default_state) { + for (i = 0; i < ARRAY_SIZE(poison); i++) { + err = __lrc_isolation(engine, poison[i]); + if (err) + break; + + err = __lrc_isolation(engine, ~poison[i]); + if (err) + break; + } + } + intel_engine_pm_put(engine); + if (igt_flush_test(gt->i915)) + err = -EIO; + if (err) + return err; + } + + return 0; +} + +static void garbage_reset(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + const unsigned int bit = I915_RESET_ENGINE + engine->id; + unsigned long *lock = &engine->gt->reset.flags; + + if (test_and_set_bit(bit, lock)) + return; + + tasklet_disable(&engine->execlists.tasklet); + + if (!rq->fence.error) + intel_engine_reset(engine, NULL); + + tasklet_enable(&engine->execlists.tasklet); + clear_and_wake_up_bit(bit, lock); +} + +static struct i915_request *garbage(struct intel_context *ce, + struct rnd_state *prng) +{ + struct i915_request *rq; + int err; + + err = intel_context_pin(ce); + if (err) + return ERR_PTR(err); + + prandom_bytes_state(prng, + ce->lrc_reg_state, + ce->engine->context_size - + LRC_STATE_PN * PAGE_SIZE); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_unpin; + } + + i915_request_get(rq); + i915_request_add(rq); + return rq; + +err_unpin: + intel_context_unpin(ce); + return ERR_PTR(err); +} + +static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng) +{ + struct intel_context *ce; + struct i915_request *hang; + int err = 0; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + hang = garbage(ce, prng); + if (IS_ERR(hang)) { + err = PTR_ERR(hang); + goto err_ce; + } + + if (wait_for_submit(engine, hang, HZ / 2)) { + i915_request_put(hang); + err = -ETIME; + goto err_ce; + } + + intel_context_set_banned(ce); + garbage_reset(engine, hang); + + intel_engine_flush_submission(engine); + if (!hang->fence.error) { + i915_request_put(hang); + pr_err("%s: corrupted context was not reset\n", + engine->name); + err = -EINVAL; + goto err_ce; + } + + if (i915_request_wait(hang, 0, HZ / 2) < 0) { + pr_err("%s: corrupted context did not recover\n", + engine->name); + i915_request_put(hang); + err = -EIO; + goto err_ce; + } + i915_request_put(hang); + +err_ce: + intel_context_put(ce); + return err; +} + +static int live_lrc_garbage(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* + * Verify that we can recover if one context state is completely + * corrupted. + */ + + if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN)) + return 0; + + for_each_engine(engine, gt, id) { + I915_RND_STATE(prng); + int err = 0, i; + + if (!intel_has_reset_engine(engine->gt)) + continue; + + intel_engine_pm_get(engine); + for (i = 0; i < 3; i++) { + err = __lrc_garbage(engine, &prng); + if (err) + break; + } + intel_engine_pm_put(engine); + + if (igt_flush_test(gt->i915)) + err = -EIO; + if (err) + return err; + } + + return 0; +} + +static int __live_pphwsp_runtime(struct intel_engine_cs *engine) +{ + struct intel_context *ce; + struct i915_request *rq; + IGT_TIMEOUT(end_time); + int err; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + ce->runtime.num_underflow = 0; + ce->runtime.max_underflow = 0; + + do { + unsigned int loop = 1024; + + while (loop) { + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_rq; + } + + if (--loop == 0) + i915_request_get(rq); + + i915_request_add(rq); + } + + if (__igt_timeout(end_time, NULL)) + break; + + i915_request_put(rq); + } while (1); + + err = i915_request_wait(rq, 0, HZ / 5); + if (err < 0) { + pr_err("%s: request not completed!\n", engine->name); + goto err_wait; + } + + igt_flush_test(engine->i915); + + pr_info("%s: pphwsp runtime %lluns, average %lluns\n", + engine->name, + intel_context_get_total_runtime_ns(ce), + intel_context_get_avg_runtime_ns(ce)); + + err = 0; + if (ce->runtime.num_underflow) { + pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n", + engine->name, + ce->runtime.num_underflow, + ce->runtime.max_underflow); + GEM_TRACE_DUMP(); + err = -EOVERFLOW; + } + +err_wait: + i915_request_put(rq); +err_rq: + intel_context_put(ce); + return err; +} + +static int live_pphwsp_runtime(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* + * Check that cumulative context runtime as stored in the pphwsp[16] + * is monotonic. + */ + + for_each_engine(engine, gt, id) { + err = __live_pphwsp_runtime(engine); if (err) break; } @@ -4076,7 +5555,6 @@ static int live_gpr_clear(void *arg) if (igt_flush_test(gt->i915)) err = -EIO; - i915_vma_unpin_and_release(&scratch, 0); return err; } @@ -4086,7 +5564,11 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915) SUBTEST(live_lrc_layout), SUBTEST(live_lrc_fixed), SUBTEST(live_lrc_state), - SUBTEST(live_gpr_clear), + SUBTEST(live_lrc_gpr), + SUBTEST(live_lrc_isolation), + SUBTEST(live_lrc_timestamp), + SUBTEST(live_lrc_garbage), + SUBTEST(live_pphwsp_runtime), }; if (!HAS_LOGICAL_RING_CONTEXTS(i915)) diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c index de1f83100fb6..8831ffee2061 100644 --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -12,7 +12,8 @@ #include "selftests/igt_spinner.h" struct live_mocs { - struct drm_i915_mocs_table table; + struct drm_i915_mocs_table mocs; + struct drm_i915_mocs_table l3cc; struct i915_vma *scratch; void *vaddr; }; @@ -70,11 +71,22 @@ static struct i915_vma *create_scratch(struct intel_gt *gt) static int live_mocs_init(struct live_mocs *arg, struct intel_gt *gt) { + struct drm_i915_mocs_table table; + unsigned int flags; int err; - if (!get_mocs_settings(gt->i915, &arg->table)) + memset(arg, 0, sizeof(*arg)); + + flags = get_mocs_settings(gt->i915, &table); + if (!flags) return -EINVAL; + if (flags & HAS_RENDER_L3CC) + arg->l3cc = table; + + if (flags & (HAS_GLOBAL_MOCS | HAS_ENGINE_MOCS)) + arg->mocs = table; + arg->scratch = create_scratch(gt); if (IS_ERR(arg->scratch)) return PTR_ERR(arg->scratch); @@ -223,9 +235,9 @@ static int check_mocs_engine(struct live_mocs *arg, /* Read the mocs tables back using SRM */ offset = i915_ggtt_offset(vma); if (!err) - err = read_mocs_table(rq, &arg->table, &offset); + err = read_mocs_table(rq, &arg->mocs, &offset); if (!err && ce->engine->class == RENDER_CLASS) - err = read_l3cc_table(rq, &arg->table, &offset); + err = read_l3cc_table(rq, &arg->l3cc, &offset); offset -= i915_ggtt_offset(vma); GEM_BUG_ON(offset > PAGE_SIZE); @@ -236,9 +248,9 @@ static int check_mocs_engine(struct live_mocs *arg, /* Compare the results against the expected tables */ vaddr = arg->vaddr; if (!err) - err = check_mocs_table(ce->engine, &arg->table, &vaddr); + err = check_mocs_table(ce->engine, &arg->mocs, &vaddr); if (!err && ce->engine->class == RENDER_CLASS) - err = check_l3cc_table(ce->engine, &arg->table, &vaddr); + err = check_l3cc_table(ce->engine, &arg->l3cc, &vaddr); if (err) return err; diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c index 8cc55a0e9e06..95b165faeba7 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rc6.c +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c @@ -12,6 +12,21 @@ #include "selftests/i915_random.h" +static u64 rc6_residency(struct intel_rc6 *rc6) +{ + u64 result; + + /* XXX VLV_GT_MEDIA_RC6? */ + + result = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + if (HAS_RC6p(rc6_to_i915(rc6))) + result += intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6p); + if (HAS_RC6pp(rc6_to_i915(rc6))) + result += intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6pp); + + return result; +} + int live_rc6_manual(void *arg) { struct intel_gt *gt = arg; @@ -38,9 +53,9 @@ int live_rc6_manual(void *arg) __intel_rc6_disable(rc6); msleep(1); /* wakeup is not immediate, takes about 100us on icl */ - res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + res[0] = rc6_residency(rc6); msleep(250); - res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + res[1] = rc6_residency(rc6); if ((res[1] - res[0]) >> 10) { pr_err("RC6 residency increased by %lldus while disabled for 250ms!\n", (res[1] - res[0]) >> 10); @@ -51,14 +66,15 @@ int live_rc6_manual(void *arg) /* Manually enter RC6 */ intel_rc6_park(rc6); - res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + res[0] = rc6_residency(rc6); msleep(100); - res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6); + res[1] = rc6_residency(rc6); if (res[1] == res[0]) { - pr_err("Did not enter RC6! RC6_STATE=%08x, RC6_CONTROL=%08x\n", + pr_err("Did not enter RC6! RC6_STATE=%08x, RC6_CONTROL=%08x, residency=%lld\n", intel_uncore_read_fw(gt->uncore, GEN6_RC_STATE), - intel_uncore_read_fw(gt->uncore, GEN6_RC_CONTROL)); + intel_uncore_read_fw(gt->uncore, GEN6_RC_CONTROL), + res[0]); err = -EINVAL; } diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 6ad6aca315f6..35406ecdf0b2 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -115,7 +115,7 @@ static int igt_atomic_engine_reset(void *arg) if (!intel_has_reset_engine(gt)) return 0; - if (USES_GUC_SUBMISSION(gt->i915)) + if (intel_uc_uses_guc_submission(>->uc)) return 0; intel_gt_pm_get(gt); diff --git a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c new file mode 100644 index 000000000000..9995faadd7e8 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c @@ -0,0 +1,296 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include "intel_engine_pm.h" +#include "selftests/igt_flush_test.h" + +static struct i915_vma *create_wally(struct intel_engine_cs *engine) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *cs; + int err; + + obj = i915_gem_object_create_internal(engine->i915, 4096); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, engine->gt->vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return vma; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH); + if (err) { + i915_gem_object_put(obj); + return ERR_PTR(err); + } + + err = i915_vma_sync(vma); + if (err) { + i915_gem_object_put(obj); + return ERR_PTR(err); + } + + cs = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(cs)) { + i915_gem_object_put(obj); + return ERR_CAST(cs); + } + + if (INTEL_GEN(engine->i915) >= 6) { + *cs++ = MI_STORE_DWORD_IMM_GEN4; + *cs++ = 0; + } else if (INTEL_GEN(engine->i915) >= 4) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = 0; + } else { + *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + } + *cs++ = vma->node.start + 4000; + *cs++ = STACK_MAGIC; + + *cs++ = MI_BATCH_BUFFER_END; + i915_gem_object_unpin_map(obj); + + vma->private = intel_context_create(engine); /* dummy residuals */ + if (IS_ERR(vma->private)) { + vma = ERR_CAST(vma->private); + i915_gem_object_put(obj); + } + + return vma; +} + +static int context_sync(struct intel_context *ce) +{ + struct i915_request *rq; + int err = 0; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_get(rq); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -ETIME; + i915_request_put(rq); + + return err; +} + +static int new_context_sync(struct intel_engine_cs *engine) +{ + struct intel_context *ce; + int err; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = context_sync(ce); + intel_context_put(ce); + + return err; +} + +static int mixed_contexts_sync(struct intel_engine_cs *engine, u32 *result) +{ + int pass; + int err; + + for (pass = 0; pass < 2; pass++) { + WRITE_ONCE(*result, 0); + err = context_sync(engine->kernel_context); + if (err || READ_ONCE(*result)) { + if (!err) { + pr_err("pass[%d] wa_bb emitted for the kernel context\n", + pass); + err = -EINVAL; + } + return err; + } + + WRITE_ONCE(*result, 0); + err = new_context_sync(engine); + if (READ_ONCE(*result) != STACK_MAGIC) { + if (!err) { + pr_err("pass[%d] wa_bb *NOT* emitted after the kernel context\n", + pass); + err = -EINVAL; + } + return err; + } + + WRITE_ONCE(*result, 0); + err = new_context_sync(engine); + if (READ_ONCE(*result) != STACK_MAGIC) { + if (!err) { + pr_err("pass[%d] wa_bb *NOT* emitted for the user context switch\n", + pass); + err = -EINVAL; + } + return err; + } + } + + return 0; +} + +static int double_context_sync_00(struct intel_engine_cs *engine, u32 *result) +{ + struct intel_context *ce; + int err, i; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + for (i = 0; i < 2; i++) { + WRITE_ONCE(*result, 0); + err = context_sync(ce); + if (err) + break; + } + intel_context_put(ce); + if (err) + return err; + + if (READ_ONCE(*result)) { + pr_err("wa_bb emitted between the same user context\n"); + return -EINVAL; + } + + return 0; +} + +static int kernel_context_sync_00(struct intel_engine_cs *engine, u32 *result) +{ + struct intel_context *ce; + int err, i; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + for (i = 0; i < 2; i++) { + WRITE_ONCE(*result, 0); + err = context_sync(ce); + if (err) + break; + + err = context_sync(engine->kernel_context); + if (err) + break; + } + intel_context_put(ce); + if (err) + return err; + + if (READ_ONCE(*result)) { + pr_err("wa_bb emitted between the same user context [with intervening kernel]\n"); + return -EINVAL; + } + + return 0; +} + +static int __live_ctx_switch_wa(struct intel_engine_cs *engine) +{ + struct i915_vma *bb; + u32 *result; + int err; + + bb = create_wally(engine); + if (IS_ERR(bb)) + return PTR_ERR(bb); + + result = i915_gem_object_pin_map(bb->obj, I915_MAP_WC); + if (IS_ERR(result)) { + intel_context_put(bb->private); + i915_vma_unpin_and_release(&bb, 0); + return PTR_ERR(result); + } + result += 1000; + + engine->wa_ctx.vma = bb; + + err = mixed_contexts_sync(engine, result); + if (err) + goto out; + + err = double_context_sync_00(engine, result); + if (err) + goto out; + + err = kernel_context_sync_00(engine, result); + if (err) + goto out; + +out: + intel_context_put(engine->wa_ctx.vma->private); + i915_vma_unpin_and_release(&engine->wa_ctx.vma, I915_VMA_RELEASE_MAP); + return err; +} + +static int live_ctx_switch_wa(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* + * Exercise the inter-context wa batch. + * + * Between each user context we run a wa batch, and since it may + * have implications for user visible state, we have to check that + * we do actually execute it. + * + * The trick we use is to replace the normal wa batch with a custom + * one that writes to a marker within it, and we can then look for + * that marker to confirm if the batch was run when we expect it, + * and equally important it was wasn't run when we don't! + */ + + for_each_engine(engine, gt, id) { + struct i915_vma *saved_wa; + int err; + + if (!intel_engine_can_store_dword(engine)) + continue; + + if (IS_GEN_RANGE(gt->i915, 4, 5)) + continue; /* MI_STORE_DWORD is privileged! */ + + saved_wa = fetch_and_zero(&engine->wa_ctx.vma); + + intel_engine_pm_get(engine); + err = __live_ctx_switch_wa(engine); + intel_engine_pm_put(engine); + if (igt_flush_test(gt->i915)) + err = -EIO; + + engine->wa_ctx.vma = saved_wa; + if (err) + return err; + } + + return 0; +} + +int intel_ring_submission_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_ctx_switch_wa), + }; + + if (HAS_EXECLISTS(i915)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); +} diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index e2d78cc22fb4..c2578a0f2f14 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -6,6 +6,8 @@ #include <linux/prime_numbers.h> +#include "intel_context.h" +#include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" #include "intel_gt.h" #include "intel_gt_requests.h" @@ -604,7 +606,6 @@ static int live_hwsp_alternate(void *arg) tl = checked_intel_timeline_create(gt); if (IS_ERR(tl)) { - intel_engine_pm_put(engine); err = PTR_ERR(tl); goto out; } @@ -750,6 +751,189 @@ out_free: return err; } +static void engine_heartbeat_disable(struct intel_engine_cs *engine, + unsigned long *saved) +{ + *saved = engine->props.heartbeat_interval_ms; + engine->props.heartbeat_interval_ms = 0; + + intel_engine_pm_get(engine); + intel_engine_park_heartbeat(engine); +} + +static void engine_heartbeat_enable(struct intel_engine_cs *engine, + unsigned long saved) +{ + intel_engine_pm_put(engine); + + engine->props.heartbeat_interval_ms = saved; +} + +static int live_hwsp_rollover_kernel(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* + * Run the host for long enough, and even the kernel context will + * see a seqno rollover. + */ + + for_each_engine(engine, gt, id) { + struct intel_context *ce = engine->kernel_context; + struct intel_timeline *tl = ce->timeline; + struct i915_request *rq[3] = {}; + unsigned long heartbeat; + int i; + + engine_heartbeat_disable(engine, &heartbeat); + if (intel_gt_wait_for_idle(gt, HZ / 2)) { + err = -EIO; + goto out; + } + + GEM_BUG_ON(i915_active_fence_isset(&tl->last_request)); + tl->seqno = 0; + timeline_rollback(tl); + timeline_rollback(tl); + WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno); + + for (i = 0; i < ARRAY_SIZE(rq); i++) { + struct i915_request *this; + + this = i915_request_create(ce); + if (IS_ERR(this)) { + err = PTR_ERR(this); + goto out; + } + + pr_debug("%s: create fence.seqnp:%d\n", + engine->name, + lower_32_bits(this->fence.seqno)); + + GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl); + + rq[i] = i915_request_get(this); + i915_request_add(this); + } + + /* We expected a wrap! */ + GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno); + + if (i915_request_wait(rq[2], 0, HZ / 5) < 0) { + pr_err("Wait for timeline wrap timed out!\n"); + err = -EIO; + goto out; + } + + for (i = 0; i < ARRAY_SIZE(rq); i++) { + if (!i915_request_completed(rq[i])) { + pr_err("Pre-wrap request not completed!\n"); + err = -EINVAL; + goto out; + } + } + +out: + for (i = 0; i < ARRAY_SIZE(rq); i++) + i915_request_put(rq[i]); + engine_heartbeat_enable(engine, heartbeat); + if (err) + break; + } + + if (igt_flush_test(gt->i915)) + err = -EIO; + + return err; +} + +static int live_hwsp_rollover_user(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* + * Simulate a long running user context, and force the seqno wrap + * on the user's timeline. + */ + + for_each_engine(engine, gt, id) { + struct i915_request *rq[3] = {}; + struct intel_timeline *tl; + struct intel_context *ce; + int i; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = intel_context_alloc_state(ce); + if (err) + goto out; + + tl = ce->timeline; + if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline) + goto out; + + timeline_rollback(tl); + timeline_rollback(tl); + WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno); + + for (i = 0; i < ARRAY_SIZE(rq); i++) { + struct i915_request *this; + + this = intel_context_create_request(ce); + if (IS_ERR(this)) { + err = PTR_ERR(this); + goto out; + } + + pr_debug("%s: create fence.seqnp:%d\n", + engine->name, + lower_32_bits(this->fence.seqno)); + + GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl); + + rq[i] = i915_request_get(this); + i915_request_add(this); + } + + /* We expected a wrap! */ + GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno); + + if (i915_request_wait(rq[2], 0, HZ / 5) < 0) { + pr_err("Wait for timeline wrap timed out!\n"); + err = -EIO; + goto out; + } + + for (i = 0; i < ARRAY_SIZE(rq); i++) { + if (!i915_request_completed(rq[i])) { + pr_err("Pre-wrap request not completed!\n"); + err = -EINVAL; + goto out; + } + } + +out: + for (i = 0; i < ARRAY_SIZE(rq); i++) + i915_request_put(rq[i]); + intel_context_put(ce); + if (err) + break; + } + + if (igt_flush_test(gt->i915)) + err = -EIO; + + return err; +} + static int live_hwsp_recycle(void *arg) { struct intel_gt *gt = arg; @@ -827,6 +1011,8 @@ int intel_timeline_live_selftests(struct drm_i915_private *i915) SUBTEST(live_hwsp_engine), SUBTEST(live_hwsp_alternate), SUBTEST(live_hwsp_wrap), + SUBTEST(live_hwsp_rollover_kernel), + SUBTEST(live_hwsp_rollover_user), }; if (intel_gt_is_wedged(&i915->gt)) diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index ac1921854cbf..5ed323254ee1 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -583,6 +583,15 @@ static int check_dirty_whitelist(struct intel_context *ce) if (err) goto err_request; + i915_vma_lock(scratch); + err = i915_request_await_object(rq, scratch->obj, true); + if (err == 0) + err = i915_vma_move_to_active(scratch, rq, + EXEC_OBJECT_WRITE); + i915_vma_unlock(scratch); + if (err) + goto err_request; + err = engine->emit_bb_start(rq, batch->node.start, PAGE_SIZE, 0); diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c new file mode 100644 index 000000000000..8f9b2f33dbaf --- /dev/null +++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c @@ -0,0 +1,445 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include <linux/kobject.h> +#include <linux/sysfs.h> + +#include "i915_drv.h" +#include "intel_engine.h" +#include "intel_engine_heartbeat.h" +#include "sysfs_engines.h" + +struct kobj_engine { + struct kobject base; + struct intel_engine_cs *engine; +}; + +static struct intel_engine_cs *kobj_to_engine(struct kobject *kobj) +{ + return container_of(kobj, struct kobj_engine, base)->engine; +} + +static ssize_t +name_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%s\n", kobj_to_engine(kobj)->name); +} + +static struct kobj_attribute name_attr = +__ATTR(name, 0444, name_show, NULL); + +static ssize_t +class_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", kobj_to_engine(kobj)->uabi_class); +} + +static struct kobj_attribute class_attr = +__ATTR(class, 0444, class_show, NULL); + +static ssize_t +inst_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", kobj_to_engine(kobj)->uabi_instance); +} + +static struct kobj_attribute inst_attr = +__ATTR(instance, 0444, inst_show, NULL); + +static ssize_t +mmio_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "0x%x\n", kobj_to_engine(kobj)->mmio_base); +} + +static struct kobj_attribute mmio_attr = +__ATTR(mmio_base, 0444, mmio_show, NULL); + +static const char * const vcs_caps[] = { + [ilog2(I915_VIDEO_CLASS_CAPABILITY_HEVC)] = "hevc", + [ilog2(I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC)] = "sfc", +}; + +static const char * const vecs_caps[] = { + [ilog2(I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC)] = "sfc", +}; + +static ssize_t repr_trim(char *buf, ssize_t len) +{ + /* Trim off the trailing space and replace with a newline */ + if (len > PAGE_SIZE) + len = PAGE_SIZE; + if (len > 0) + buf[len - 1] = '\n'; + + return len; +} + +static ssize_t +__caps_show(struct intel_engine_cs *engine, + u32 caps, char *buf, bool show_unknown) +{ + const char * const *repr; + int count, n; + ssize_t len; + + BUILD_BUG_ON(!typecheck(typeof(caps), engine->uabi_capabilities)); + + switch (engine->class) { + case VIDEO_DECODE_CLASS: + repr = vcs_caps; + count = ARRAY_SIZE(vcs_caps); + break; + + case VIDEO_ENHANCEMENT_CLASS: + repr = vecs_caps; + count = ARRAY_SIZE(vecs_caps); + break; + + default: + repr = NULL; + count = 0; + break; + } + GEM_BUG_ON(count > BITS_PER_TYPE(typeof(caps))); + + len = 0; + for_each_set_bit(n, + (unsigned long *)&caps, + show_unknown ? BITS_PER_TYPE(typeof(caps)) : count) { + if (n >= count || !repr[n]) { + if (GEM_WARN_ON(show_unknown)) + len += snprintf(buf + len, PAGE_SIZE - len, + "[%x] ", n); + } else { + len += snprintf(buf + len, PAGE_SIZE - len, + "%s ", repr[n]); + } + if (GEM_WARN_ON(len >= PAGE_SIZE)) + break; + } + return repr_trim(buf, len); +} + +static ssize_t +caps_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return __caps_show(engine, engine->uabi_capabilities, buf, true); +} + +static struct kobj_attribute caps_attr = +__ATTR(capabilities, 0444, caps_show, NULL); + +static ssize_t +all_caps_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + return __caps_show(kobj_to_engine(kobj), -1, buf, false); +} + +static struct kobj_attribute all_caps_attr = +__ATTR(known_capabilities, 0444, all_caps_show, NULL); + +static ssize_t +max_spin_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + unsigned long long duration; + int err; + + /* + * When waiting for a request, if is it currently being executed + * on the GPU, we busywait for a short while before sleeping. The + * premise is that most requests are short, and if it is already + * executing then there is a good chance that it will complete + * before we can setup the interrupt handler and go to sleep. + * We try to offset the cost of going to sleep, by first spinning + * on the request -- if it completed in less time than it would take + * to go sleep, process the interrupt and return back to the client, + * then we have saved the client some latency, albeit at the cost + * of spinning on an expensive CPU core. + * + * While we try to avoid waiting at all for a request that is unlikely + * to complete, deciding how long it is worth spinning is for is an + * arbitrary decision: trading off power vs latency. + */ + + err = kstrtoull(buf, 0, &duration); + if (err) + return err; + + if (duration > jiffies_to_nsecs(2)) + return -EINVAL; + + WRITE_ONCE(engine->props.max_busywait_duration_ns, duration); + + return count; +} + +static ssize_t +max_spin_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->props.max_busywait_duration_ns); +} + +static struct kobj_attribute max_spin_attr = +__ATTR(max_busywait_duration_ns, 0644, max_spin_show, max_spin_store); + +static ssize_t +timeslice_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + unsigned long long duration; + int err; + + /* + * Execlists uses a scheduling quantum (a timeslice) to alternate + * execution between ready-to-run contexts of equal priority. This + * ensures that all users (though only if they of equal importance) + * have the opportunity to run and prevents livelocks where contexts + * may have implicit ordering due to userspace semaphores. + */ + + err = kstrtoull(buf, 0, &duration); + if (err) + return err; + + if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)) + return -EINVAL; + + WRITE_ONCE(engine->props.timeslice_duration_ms, duration); + + if (execlists_active(&engine->execlists)) + set_timer_ms(&engine->execlists.timer, duration); + + return count; +} + +static ssize_t +timeslice_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->props.timeslice_duration_ms); +} + +static struct kobj_attribute timeslice_duration_attr = +__ATTR(timeslice_duration_ms, 0644, timeslice_show, timeslice_store); + +static ssize_t +stop_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + unsigned long long duration; + int err; + + /* + * When we allow ourselves to sleep before a GPU reset after disabling + * submission, even for a few milliseconds, gives an innocent context + * the opportunity to clear the GPU before the reset occurs. However, + * how long to sleep depends on the typical non-preemptible duration + * (a similar problem to determining the ideal preempt-reset timeout + * or even the heartbeat interval). + */ + + err = kstrtoull(buf, 0, &duration); + if (err) + return err; + + if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)) + return -EINVAL; + + WRITE_ONCE(engine->props.stop_timeout_ms, duration); + return count; +} + +static ssize_t +stop_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->props.stop_timeout_ms); +} + +static struct kobj_attribute stop_timeout_attr = +__ATTR(stop_timeout_ms, 0644, stop_show, stop_store); + +static ssize_t +preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + unsigned long long timeout; + int err; + + /* + * After initialising a preemption request, we give the current + * resident a small amount of time to vacate the GPU. The preemption + * request is for a higher priority context and should be immediate to + * maintain high quality of service (and avoid priority inversion). + * However, the preemption granularity of the GPU can be quite coarse + * and so we need a compromise. + */ + + err = kstrtoull(buf, 0, &timeout); + if (err) + return err; + + if (timeout > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)) + return -EINVAL; + + WRITE_ONCE(engine->props.preempt_timeout_ms, timeout); + + if (READ_ONCE(engine->execlists.pending[0])) + set_timer_ms(&engine->execlists.preempt, timeout); + + return count; +} + +static ssize_t +preempt_timeout_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->props.preempt_timeout_ms); +} + +static struct kobj_attribute preempt_timeout_attr = +__ATTR(preempt_timeout_ms, 0644, preempt_timeout_show, preempt_timeout_store); + +static ssize_t +heartbeat_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + unsigned long long delay; + int err; + + /* + * We monitor the health of the system via periodic heartbeat pulses. + * The pulses also provide the opportunity to perform garbage + * collection. However, we interpret an incomplete pulse (a missed + * heartbeat) as an indication that the system is no longer responsive, + * i.e. hung, and perform an engine or full GPU reset. Given that the + * preemption granularity can be very coarse on a system, the optimal + * value for any workload is unknowable! + */ + + err = kstrtoull(buf, 0, &delay); + if (err) + return err; + + if (delay >= jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)) + return -EINVAL; + + err = intel_engine_set_heartbeat(engine, delay); + if (err) + return err; + + return count; +} + +static ssize_t +heartbeat_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->props.heartbeat_interval_ms); +} + +static struct kobj_attribute heartbeat_interval_attr = +__ATTR(heartbeat_interval_ms, 0644, heartbeat_show, heartbeat_store); + +static void kobj_engine_release(struct kobject *kobj) +{ + kfree(kobj); +} + +static struct kobj_type kobj_engine_type = { + .release = kobj_engine_release, + .sysfs_ops = &kobj_sysfs_ops +}; + +static struct kobject * +kobj_engine(struct kobject *dir, struct intel_engine_cs *engine) +{ + struct kobj_engine *ke; + + ke = kzalloc(sizeof(*ke), GFP_KERNEL); + if (!ke) + return NULL; + + kobject_init(&ke->base, &kobj_engine_type); + ke->engine = engine; + + if (kobject_add(&ke->base, dir, "%s", engine->name)) { + kobject_put(&ke->base); + return NULL; + } + + /* xfer ownership to sysfs tree */ + return &ke->base; +} + +void intel_engines_add_sysfs(struct drm_i915_private *i915) +{ + static const struct attribute *files[] = { + &name_attr.attr, + &class_attr.attr, + &inst_attr.attr, + &mmio_attr.attr, + &caps_attr.attr, + &all_caps_attr.attr, + &max_spin_attr.attr, + &stop_timeout_attr.attr, +#if CONFIG_DRM_I915_HEARTBEAT_INTERVAL + &heartbeat_interval_attr.attr, +#endif + NULL + }; + + struct device *kdev = i915->drm.primary->kdev; + struct intel_engine_cs *engine; + struct kobject *dir; + + dir = kobject_create_and_add("engine", &kdev->kobj); + if (!dir) + return; + + for_each_uabi_engine(engine, i915) { + struct kobject *kobj; + + kobj = kobj_engine(dir, engine); + if (!kobj) + goto err_engine; + + if (sysfs_create_files(kobj, files)) + goto err_object; + + if (intel_engine_has_timeslices(engine) && + sysfs_create_file(kobj, ×lice_duration_attr.attr)) + goto err_engine; + + if (intel_engine_has_preempt_reset(engine) && + sysfs_create_file(kobj, &preempt_timeout_attr.attr)) + goto err_engine; + + if (0) { +err_object: + kobject_put(kobj); +err_engine: + dev_err(kdev, "Failed to add sysfs engine '%s'\n", + engine->name); + break; + } + } +} diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.h b/drivers/gpu/drm/i915/gt/sysfs_engines.h new file mode 100644 index 000000000000..9546fffe03a7 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/sysfs_engines.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_ENGINE_SYSFS_H +#define INTEL_ENGINE_SYSFS_H + +struct drm_i915_private; + +void intel_engines_add_sysfs(struct drm_i915_private *i915); + +#endif /* INTEL_ENGINE_SYSFS_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 5d00a3b2d914..819f09ef51fc 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -207,7 +207,7 @@ static u32 guc_ctl_feature_flags(struct intel_guc *guc) { u32 flags = 0; - if (!intel_guc_is_submission_supported(guc)) + if (!intel_guc_submission_is_used(guc)) flags |= GUC_CTL_DISABLE_SCHEDULER; return flags; @@ -217,7 +217,7 @@ static u32 guc_ctl_ctxinfo_flags(struct intel_guc *guc) { u32 flags = 0; - if (intel_guc_is_submission_supported(guc)) { + if (intel_guc_submission_is_used(guc)) { u32 ctxnum, base; base = intel_guc_ggtt_offset(guc, guc->stage_desc_pool); @@ -333,7 +333,7 @@ int intel_guc_init(struct intel_guc *guc) ret = intel_uc_fw_init(&guc->fw); if (ret) - goto err_fetch; + goto out; ret = intel_guc_log_create(&guc->log); if (ret) @@ -348,7 +348,7 @@ int intel_guc_init(struct intel_guc *guc) if (ret) goto err_ads; - if (intel_guc_is_submission_supported(guc)) { + if (intel_guc_submission_is_used(guc)) { /* * This is stuff we need to have available at fw load time * if we are planning to enable submission later @@ -364,6 +364,8 @@ int intel_guc_init(struct intel_guc *guc) /* We need to notify the guc whenever we change the GGTT */ i915_ggtt_enable_guc(gt->ggtt); + intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_LOADABLE); + return 0; err_ct: @@ -374,9 +376,8 @@ err_log: intel_guc_log_destroy(&guc->log); err_fw: intel_uc_fw_fini(&guc->fw); -err_fetch: - intel_uc_fw_cleanup_fetch(&guc->fw); - DRM_DEV_DEBUG_DRIVER(gt->i915->drm.dev, "failed with %d\n", ret); +out: + i915_probe_error(gt->i915, "failed with %d\n", ret); return ret; } @@ -384,12 +385,12 @@ void intel_guc_fini(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); - if (!intel_uc_fw_is_available(&guc->fw)) + if (!intel_uc_fw_is_loadable(&guc->fw)) return; i915_ggtt_disable_guc(gt->ggtt); - if (intel_guc_is_submission_supported(guc)) + if (intel_guc_submission_is_used(guc)) intel_guc_submission_fini(guc); intel_guc_ct_fini(&guc->ct); @@ -397,9 +398,6 @@ void intel_guc_fini(struct intel_guc *guc) intel_guc_ads_destroy(guc); intel_guc_log_destroy(&guc->log); intel_uc_fw_fini(&guc->fw); - intel_uc_fw_cleanup_fetch(&guc->fw); - - intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_DISABLED); } /* @@ -544,7 +542,7 @@ int intel_guc_suspend(struct intel_guc *guc) * If GuC communication is enabled but submission is not supported, * we do not need to suspend the GuC. */ - if (!intel_guc_submission_is_enabled(guc)) + if (!intel_guc_submission_is_used(guc) || !intel_guc_is_ready(guc)) return 0; /* @@ -609,7 +607,7 @@ int intel_guc_resume(struct intel_guc *guc) * we do not need to resume the GuC but we do need to enable the * GuC communication on resume (above). */ - if (!intel_guc_submission_is_enabled(guc)) + if (!intel_guc_submission_is_used(guc) || !intel_guc_is_ready(guc)) return 0; return intel_guc_send(guc, action, ARRAY_SIZE(action)); @@ -678,8 +676,8 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) if (IS_ERR(vma)) goto err; - flags = PIN_GLOBAL | PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); - ret = i915_vma_pin(vma, 0, 0, flags); + flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); + ret = i915_ggtt_pin(vma, 0, flags); if (ret) { vma = ERR_PTR(ret); goto err; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 910d49590068..4594ccbeaa34 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -39,7 +39,7 @@ struct intel_guc { void (*disable)(struct intel_guc *guc); } interrupts; - bool submission_supported; + bool submission_selected; struct i915_vma *ads_vma; struct __guc_ads_blob *ads_blob; @@ -143,29 +143,36 @@ static inline bool intel_guc_is_supported(struct intel_guc *guc) return intel_uc_fw_is_supported(&guc->fw); } -static inline bool intel_guc_is_enabled(struct intel_guc *guc) +static inline bool intel_guc_is_wanted(struct intel_guc *guc) { return intel_uc_fw_is_enabled(&guc->fw); } -static inline bool intel_guc_is_running(struct intel_guc *guc) +static inline bool intel_guc_is_used(struct intel_guc *guc) +{ + GEM_BUG_ON(__intel_uc_fw_status(&guc->fw) == INTEL_UC_FIRMWARE_SELECTED); + return intel_uc_fw_is_available(&guc->fw); +} + +static inline bool intel_guc_is_fw_running(struct intel_guc *guc) { return intel_uc_fw_is_running(&guc->fw); } +static inline bool intel_guc_is_ready(struct intel_guc *guc) +{ + return intel_guc_is_fw_running(guc) && intel_guc_ct_enabled(&guc->ct); +} + static inline int intel_guc_sanitize(struct intel_guc *guc) { intel_uc_fw_sanitize(&guc->fw); + intel_guc_ct_sanitize(&guc->ct); guc->mmio_msg = 0; return 0; } -static inline bool intel_guc_is_submission_supported(struct intel_guc *guc) -{ - return guc->submission_supported; -} - static inline void intel_guc_enable_msg(struct intel_guc *guc, u32 mask) { spin_lock_irq(&guc->irq_lock); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index c6f971a049f9..11742fca0e9e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -5,11 +5,15 @@ #include "i915_drv.h" #include "intel_guc_ct.h" +#include "gt/intel_gt.h" +#define CT_ERROR(_ct, _fmt, ...) \ + DRM_DEV_ERROR(ct_to_dev(_ct), "CT: " _fmt, ##__VA_ARGS__) #ifdef CONFIG_DRM_I915_DEBUG_GUC -#define CT_DEBUG_DRIVER(...) DRM_DEBUG_DRIVER(__VA_ARGS__) +#define CT_DEBUG(_ct, _fmt, ...) \ + DRM_DEV_DEBUG_DRIVER(ct_to_dev(_ct), "CT: " _fmt, ##__VA_ARGS__) #else -#define CT_DEBUG_DRIVER(...) do { } while (0) +#define CT_DEBUG(...) do { } while (0) #endif struct ct_request { @@ -48,6 +52,21 @@ static inline struct intel_guc *ct_to_guc(struct intel_guc_ct *ct) return container_of(ct, struct intel_guc, ct); } +static inline struct intel_gt *ct_to_gt(struct intel_guc_ct *ct) +{ + return guc_to_gt(ct_to_guc(ct)); +} + +static inline struct drm_i915_private *ct_to_i915(struct intel_guc_ct *ct) +{ + return ct_to_gt(ct)->i915; +} + +static inline struct device *ct_to_dev(struct intel_guc_ct *ct) +{ + return ct_to_i915(ct)->drm.dev; +} + static inline const char *guc_ct_buffer_type_to_str(u32 type) { switch (type) { @@ -63,7 +82,6 @@ static inline const char *guc_ct_buffer_type_to_str(u32 type) static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc, u32 cmds_addr, u32 size) { - CT_DEBUG_DRIVER("CT: init addr=%#x size=%u\n", cmds_addr, size); memset(desc, 0, sizeof(*desc)); desc->addr = cmds_addr; desc->size = size; @@ -72,8 +90,6 @@ static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc, static void guc_ct_buffer_desc_reset(struct guc_ct_buffer_desc *desc) { - CT_DEBUG_DRIVER("CT: desc %p reset head=%u tail=%u\n", - desc, desc->head, desc->tail); desc->head = 0; desc->tail = 0; desc->is_in_error = 0; @@ -89,31 +105,40 @@ static int guc_action_register_ct_buffer(struct intel_guc *guc, sizeof(struct guc_ct_buffer_desc), type }; - int err; /* Can't use generic send(), CT registration must go over MMIO */ - err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); - if (err) - DRM_ERROR("CT: register %s buffer failed; err=%d\n", - guc_ct_buffer_type_to_str(type), err); + return intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); +} + +static int ct_register_buffer(struct intel_guc_ct *ct, u32 desc_addr, u32 type) +{ + int err = guc_action_register_ct_buffer(ct_to_guc(ct), desc_addr, type); + + if (unlikely(err)) + CT_ERROR(ct, "Failed to register %s buffer (err=%d)\n", + guc_ct_buffer_type_to_str(type), err); return err; } -static int guc_action_deregister_ct_buffer(struct intel_guc *guc, - u32 type) +static int guc_action_deregister_ct_buffer(struct intel_guc *guc, u32 type) { u32 action[] = { INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER, CTB_OWNER_HOST, type }; - int err; /* Can't use generic send(), CT deregistration must go over MMIO */ - err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); - if (err) - DRM_ERROR("CT: deregister %s buffer failed; err=%d\n", - guc_ct_buffer_type_to_str(type), err); + return intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); +} + +static int ct_deregister_buffer(struct intel_guc_ct *ct, u32 type) +{ + int err = guc_action_deregister_ct_buffer(ct_to_guc(ct), type); + + if (unlikely(err)) + CT_ERROR(ct, "Failed to deregister %s buffer (err=%d)\n", + guc_ct_buffer_type_to_str(type), err); return err; } @@ -157,13 +182,12 @@ int intel_guc_ct_init(struct intel_guc_ct *ct) */ err = intel_guc_allocate_and_map_vma(guc, PAGE_SIZE, &ct->vma, &blob); - if (err) { - DRM_ERROR("CT: channel allocation failed; err=%d\n", err); + if (unlikely(err)) { + CT_ERROR(ct, "Failed to allocate CT channel (err=%d)\n", err); return err; } - CT_DEBUG_DRIVER("CT: vma base=%#x\n", - intel_guc_ggtt_offset(guc, ct->vma)); + CT_DEBUG(ct, "vma base=%#x\n", intel_guc_ggtt_offset(guc, ct->vma)); /* store pointers to desc and cmds */ for (i = 0; i < ARRAY_SIZE(ct->ctbs); i++) { @@ -197,7 +221,7 @@ void intel_guc_ct_fini(struct intel_guc_ct *ct) int intel_guc_ct_enable(struct intel_guc_ct *ct) { struct intel_guc *guc = ct_to_guc(ct); - u32 base; + u32 base, cmds, size; int err; int i; @@ -212,23 +236,23 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct) */ for (i = 0; i < ARRAY_SIZE(ct->ctbs); i++) { GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV)); - guc_ct_buffer_desc_init(ct->ctbs[i].desc, - base + PAGE_SIZE/4 * i + PAGE_SIZE/2, - PAGE_SIZE/4); + cmds = base + PAGE_SIZE / 4 * i + PAGE_SIZE / 2; + size = PAGE_SIZE / 4; + CT_DEBUG(ct, "%d: addr=%#x size=%u\n", i, cmds, size); + guc_ct_buffer_desc_init(ct->ctbs[i].desc, cmds, size); } - /* register buffers, starting wirh RECV buffer - * descriptors are in first half of the blob + /* + * Register both CT buffers starting with RECV buffer. + * Descriptors are in first half of the blob. */ - err = guc_action_register_ct_buffer(guc, - base + PAGE_SIZE/4 * CTB_RECV, - INTEL_GUC_CT_BUFFER_TYPE_RECV); + err = ct_register_buffer(ct, base + PAGE_SIZE / 4 * CTB_RECV, + INTEL_GUC_CT_BUFFER_TYPE_RECV); if (unlikely(err)) goto err_out; - err = guc_action_register_ct_buffer(guc, - base + PAGE_SIZE/4 * CTB_SEND, - INTEL_GUC_CT_BUFFER_TYPE_SEND); + err = ct_register_buffer(ct, base + PAGE_SIZE / 4 * CTB_SEND, + INTEL_GUC_CT_BUFFER_TYPE_SEND); if (unlikely(err)) goto err_deregister; @@ -237,10 +261,9 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct) return 0; err_deregister: - guc_action_deregister_ct_buffer(guc, - INTEL_GUC_CT_BUFFER_TYPE_RECV); + ct_deregister_buffer(ct, INTEL_GUC_CT_BUFFER_TYPE_RECV); err_out: - DRM_ERROR("CT: can't open channel; err=%d\n", err); + CT_ERROR(ct, "Failed to open open CT channel (err=%d)\n", err); return err; } @@ -256,18 +279,16 @@ void intel_guc_ct_disable(struct intel_guc_ct *ct) ct->enabled = false; - if (intel_guc_is_running(guc)) { - guc_action_deregister_ct_buffer(guc, - INTEL_GUC_CT_BUFFER_TYPE_SEND); - guc_action_deregister_ct_buffer(guc, - INTEL_GUC_CT_BUFFER_TYPE_RECV); + if (intel_guc_is_fw_running(guc)) { + ct_deregister_buffer(ct, INTEL_GUC_CT_BUFFER_TYPE_SEND); + ct_deregister_buffer(ct, INTEL_GUC_CT_BUFFER_TYPE_RECV); } } static u32 ct_get_next_fence(struct intel_guc_ct *ct) { /* For now it's trivial */ - return ++ct->requests.next_fence; + return ++ct->requests.last_fence; } /** @@ -288,25 +309,33 @@ static u32 ct_get_next_fence(struct intel_guc_ct *ct) * ^-----------------len-------------------^ */ -static int ctb_write(struct intel_guc_ct_buffer *ctb, - const u32 *action, - u32 len /* in dwords */, - u32 fence, - bool want_response) +static int ct_write(struct intel_guc_ct *ct, + const u32 *action, + u32 len /* in dwords */, + u32 fence, + bool want_response) { + struct intel_guc_ct_buffer *ctb = &ct->ctbs[CTB_SEND]; struct guc_ct_buffer_desc *desc = ctb->desc; - u32 head = desc->head / 4; /* in dwords */ - u32 tail = desc->tail / 4; /* in dwords */ - u32 size = desc->size / 4; /* in dwords */ - u32 used; /* in dwords */ + u32 head = desc->head; + u32 tail = desc->tail; + u32 size = desc->size; + u32 used; u32 header; u32 *cmds = ctb->cmds; unsigned int i; - GEM_BUG_ON(desc->size % 4); - GEM_BUG_ON(desc->head % 4); - GEM_BUG_ON(desc->tail % 4); - GEM_BUG_ON(tail >= size); + if (unlikely(desc->is_in_error)) + return -EPIPE; + + if (unlikely(!IS_ALIGNED(head | tail | size, 4) || + (tail | head) >= size)) + goto corrupted; + + /* later calculations will be done in dwords */ + head /= 4; + tail /= 4; + size /= 4; /* * tail == head condition indicates empty. GuC FW does not support @@ -332,9 +361,8 @@ static int ctb_write(struct intel_guc_ct_buffer *ctb, (want_response ? GUC_CT_MSG_SEND_STATUS : 0) | (action[0] << GUC_CT_MSG_ACTION_SHIFT); - CT_DEBUG_DRIVER("CT: writing %*ph %*ph %*ph\n", - 4, &header, 4, &fence, - 4 * (len - 1), &action[1]); + CT_DEBUG(ct, "writing %*ph %*ph %*ph\n", + 4, &header, 4, &fence, 4 * (len - 1), &action[1]); cmds[tail] = header; tail = (tail + 1) % size; @@ -346,12 +374,17 @@ static int ctb_write(struct intel_guc_ct_buffer *ctb, cmds[tail] = action[i]; tail = (tail + 1) % size; } + GEM_BUG_ON(tail > size); /* now update desc tail (back in bytes) */ desc->tail = tail * 4; - GEM_BUG_ON(desc->tail > desc->size); - return 0; + +corrupted: + CT_ERROR(ct, "Corrupted descriptor addr=%#x head=%u tail=%u size=%u\n", + desc->addr, desc->head, desc->tail, desc->size); + desc->is_in_error = 1; + return -EPIPE; } /** @@ -469,7 +502,7 @@ static int ct_send(struct intel_guc_ct *ct, list_add_tail(&request.link, &ct->requests.pending); spin_unlock_irqrestore(&ct->requests.lock, flags); - err = ctb_write(ctb, action, len, fence, !!response_buf); + err = ct_write(ct, action, len, fence, !!response_buf); if (unlikely(err)) goto unlink; @@ -526,11 +559,11 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, ret = ct_send(ct, action, len, response_buf, response_buf_size, &status); if (unlikely(ret < 0)) { - DRM_ERROR("CT: send action %#X failed; err=%d status=%#X\n", - action[0], ret, status); + CT_ERROR(ct, "Sending action %#x failed (err=%d status=%#X)\n", + action[0], ret, status); } else if (unlikely(ret)) { - CT_DEBUG_DRIVER("CT: send action %#x returned %d (%#x)\n", - action[0], ret, ret); + CT_DEBUG(ct, "send action %#x returned %d (%#x)\n", + action[0], ret, ret); } mutex_unlock(&guc->send_mutex); @@ -552,22 +585,29 @@ static inline bool ct_header_is_response(u32 header) return !!(header & GUC_CT_MSG_IS_RESPONSE); } -static int ctb_read(struct intel_guc_ct_buffer *ctb, u32 *data) +static int ct_read(struct intel_guc_ct *ct, u32 *data) { + struct intel_guc_ct_buffer *ctb = &ct->ctbs[CTB_RECV]; struct guc_ct_buffer_desc *desc = ctb->desc; - u32 head = desc->head / 4; /* in dwords */ - u32 tail = desc->tail / 4; /* in dwords */ - u32 size = desc->size / 4; /* in dwords */ + u32 head = desc->head; + u32 tail = desc->tail; + u32 size = desc->size; u32 *cmds = ctb->cmds; - s32 available; /* in dwords */ + s32 available; unsigned int len; unsigned int i; - GEM_BUG_ON(desc->size % 4); - GEM_BUG_ON(desc->head % 4); - GEM_BUG_ON(desc->tail % 4); - GEM_BUG_ON(tail >= size); - GEM_BUG_ON(head >= size); + if (unlikely(desc->is_in_error)) + return -EPIPE; + + if (unlikely(!IS_ALIGNED(head | tail | size, 4) || + (tail | head) >= size)) + goto corrupted; + + /* later calculations will be done in dwords */ + head /= 4; + tail /= 4; + size /= 4; /* tail == head condition indicates empty */ available = tail - head; @@ -577,7 +617,7 @@ static int ctb_read(struct intel_guc_ct_buffer *ctb, u32 *data) /* beware of buffer wrap case */ if (unlikely(available < 0)) available += size; - CT_DEBUG_DRIVER("CT: available %d (%u:%u)\n", available, head, tail); + CT_DEBUG(ct, "available %d (%u:%u)\n", available, head, tail); GEM_BUG_ON(available < 0); data[0] = cmds[head]; @@ -586,23 +626,29 @@ static int ctb_read(struct intel_guc_ct_buffer *ctb, u32 *data) /* message len with header */ len = ct_header_get_len(data[0]) + 1; if (unlikely(len > (u32)available)) { - DRM_ERROR("CT: incomplete message %*ph %*ph %*ph\n", - 4, data, - 4 * (head + available - 1 > size ? - size - head : available - 1), &cmds[head], - 4 * (head + available - 1 > size ? - available - 1 - size + head : 0), &cmds[0]); - return -EPROTO; + CT_ERROR(ct, "Incomplete message %*ph %*ph %*ph\n", + 4, data, + 4 * (head + available - 1 > size ? + size - head : available - 1), &cmds[head], + 4 * (head + available - 1 > size ? + available - 1 - size + head : 0), &cmds[0]); + goto corrupted; } for (i = 1; i < len; i++) { data[i] = cmds[head]; head = (head + 1) % size; } - CT_DEBUG_DRIVER("CT: received %*ph\n", 4 * len, data); + CT_DEBUG(ct, "received %*ph\n", 4 * len, data); desc->head = head * 4; return 0; + +corrupted: + CT_ERROR(ct, "Corrupted descriptor addr=%#x head=%u tail=%u size=%u\n", + desc->addr, desc->head, desc->tail, desc->size); + desc->is_in_error = 1; + return -EPIPE; } /** @@ -627,7 +673,7 @@ static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg) { u32 header = msg[0]; u32 len = ct_header_get_len(header); - u32 msglen = len + 1; /* total message length including header */ + u32 msgsize = (len + 1) * sizeof(u32); /* msg size in bytes w/header */ u32 fence; u32 status; u32 datalen; @@ -639,7 +685,7 @@ static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg) /* Response payload shall at least include fence and status */ if (unlikely(len < 2)) { - DRM_ERROR("CT: corrupted response %*ph\n", 4 * msglen, msg); + CT_ERROR(ct, "Corrupted response %*ph\n", msgsize, msg); return -EPROTO; } @@ -649,22 +695,22 @@ static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg) /* Format of the status follows RESPONSE message */ if (unlikely(!INTEL_GUC_MSG_IS_RESPONSE(status))) { - DRM_ERROR("CT: corrupted response %*ph\n", 4 * msglen, msg); + CT_ERROR(ct, "Corrupted response %*ph\n", msgsize, msg); return -EPROTO; } - CT_DEBUG_DRIVER("CT: response fence %u status %#x\n", fence, status); + CT_DEBUG(ct, "response fence %u status %#x\n", fence, status); spin_lock(&ct->requests.lock); list_for_each_entry(req, &ct->requests.pending, link) { if (unlikely(fence != req->fence)) { - CT_DEBUG_DRIVER("CT: request %u awaits response\n", - req->fence); + CT_DEBUG(ct, "request %u awaits response\n", + req->fence); continue; } if (unlikely(datalen > req->response_len)) { - DRM_ERROR("CT: response %u too long %*ph\n", - req->fence, 4 * msglen, msg); + CT_ERROR(ct, "Response for %u is too long %*ph\n", + req->fence, msgsize, msg); datalen = 0; } if (datalen) @@ -677,7 +723,7 @@ static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg) spin_unlock(&ct->requests.lock); if (!found) - DRM_ERROR("CT: unsolicited response %*ph\n", 4 * msglen, msg); + CT_ERROR(ct, "Unsolicited response %*ph\n", msgsize, msg); return 0; } @@ -687,7 +733,7 @@ static void ct_process_request(struct intel_guc_ct *ct, struct intel_guc *guc = ct_to_guc(ct); int ret; - CT_DEBUG_DRIVER("CT: request %x %*ph\n", action, 4 * len, payload); + CT_DEBUG(ct, "request %x %*ph\n", action, 4 * len, payload); switch (action) { case INTEL_GUC_ACTION_DEFAULT: @@ -698,8 +744,8 @@ static void ct_process_request(struct intel_guc_ct *ct, default: fail_unexpected: - DRM_ERROR("CT: unexpected request %x %*ph\n", - action, 4 * len, payload); + CT_ERROR(ct, "Unexpected request %x %*ph\n", + action, 4 * len, payload); break; } } @@ -767,18 +813,18 @@ static int ct_handle_request(struct intel_guc_ct *ct, const u32 *msg) { u32 header = msg[0]; u32 len = ct_header_get_len(header); - u32 msglen = len + 1; /* total message length including header */ + u32 msgsize = (len + 1) * sizeof(u32); /* msg size in bytes w/header */ struct ct_incoming_request *request; unsigned long flags; GEM_BUG_ON(ct_header_is_response(header)); - request = kmalloc(sizeof(*request) + 4 * msglen, GFP_ATOMIC); + request = kmalloc(sizeof(*request) + msgsize, GFP_ATOMIC); if (unlikely(!request)) { - DRM_ERROR("CT: dropping request %*ph\n", 4 * msglen, msg); + CT_ERROR(ct, "Dropping request %*ph\n", msgsize, msg); return 0; /* XXX: -ENOMEM ? */ } - memcpy(request->msg, msg, 4 * msglen); + memcpy(request->msg, msg, msgsize); spin_lock_irqsave(&ct->requests.lock, flags); list_add_tail(&request->link, &ct->requests.incoming); @@ -794,7 +840,6 @@ static int ct_handle_request(struct intel_guc_ct *ct, const u32 *msg) */ void intel_guc_ct_event_handler(struct intel_guc_ct *ct) { - struct intel_guc_ct_buffer *ctb = &ct->ctbs[CTB_RECV]; u32 msg[GUC_CT_MSG_LEN_MASK + 1]; /* one extra dw for the header */ int err = 0; @@ -804,7 +849,7 @@ void intel_guc_ct_event_handler(struct intel_guc_ct *ct) } do { - err = ctb_read(ctb, msg); + err = ct_read(ct, msg); if (err) break; @@ -813,10 +858,4 @@ void intel_guc_ct_event_handler(struct intel_guc_ct *ct) else err = ct_handle_request(ct, msg); } while (!err); - - if (GEM_WARN_ON(err == -EPROTO)) { - DRM_ERROR("CT: corrupted message detected!\n"); - ctb->desc->is_in_error = 1; - } } - diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h index 3e7fe237cfa5..494a51a5200f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h @@ -49,7 +49,7 @@ struct intel_guc_ct { struct intel_guc_ct_buffer ctbs[2]; struct { - u32 next_fence; /* fence to be used with next request to send */ + u32 last_fence; /* last fence used to send request */ spinlock_t lock; /* protects pending requests list */ struct list_head pending; /* requests waiting for response */ @@ -65,6 +65,11 @@ void intel_guc_ct_fini(struct intel_guc_ct *ct); int intel_guc_ct_enable(struct intel_guc_ct *ct); void intel_guc_ct_disable(struct intel_guc_ct *ct); +static inline void intel_guc_ct_sanitize(struct intel_guc_ct *ct) +{ + ct->enabled = false; +} + static inline bool intel_guc_ct_enabled(struct intel_guc_ct *ct) { return ct->enabled; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 9e42324fdecd..fe7778c28d2d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -456,9 +456,7 @@ static void guc_reset_cancel(struct intel_engine_cs *engine) /* Mark all executing requests as skipped. */ list_for_each_entry(rq, &engine->active.requests, sched.link) { - if (!i915_request_signaled(rq)) - dma_fence_set_error(&rq->fence, -EIO); - + i915_request_set_error_once(rq, -EIO); i915_request_mark_complete(rq); } @@ -660,12 +658,9 @@ void intel_guc_submission_disable(struct intel_guc *guc) guc_proc_desc_fini(guc); } -static bool __guc_submission_support(struct intel_guc *guc) +static bool __guc_submission_selected(struct intel_guc *guc) { - /* XXX: GuC submission is unavailable for now */ - return false; - - if (!intel_guc_is_supported(guc)) + if (!intel_guc_submission_is_supported(guc)) return false; return i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION; @@ -673,7 +668,7 @@ static bool __guc_submission_support(struct intel_guc *guc) void intel_guc_submission_init_early(struct intel_guc *guc) { - guc->submission_supported = __guc_submission_support(guc); + guc->submission_selected = __guc_submission_selected(guc); } bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h index e402a2932592..4cf9d3e50263 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h @@ -8,7 +8,8 @@ #include <linux/types.h> -struct intel_guc; +#include "intel_guc.h" + struct intel_engine_cs; void intel_guc_submission_init_early(struct intel_guc *guc); @@ -20,4 +21,20 @@ int intel_guc_preempt_work_create(struct intel_guc *guc); void intel_guc_preempt_work_destroy(struct intel_guc *guc); bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine); +static inline bool intel_guc_submission_is_supported(struct intel_guc *guc) +{ + /* XXX: GuC submission is unavailable for now */ + return false; +} + +static inline bool intel_guc_submission_is_wanted(struct intel_guc *guc) +{ + return guc->submission_selected; +} + +static inline bool intel_guc_submission_is_used(struct intel_guc *guc) +{ + return intel_guc_is_used(guc) && intel_guc_submission_is_wanted(guc); +} + #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index 32a069841c14..a74b65694512 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -121,19 +121,20 @@ int intel_huc_init(struct intel_huc *huc) if (err) goto out_fini; + intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOADABLE); + return 0; out_fini: intel_uc_fw_fini(&huc->fw); out: - intel_uc_fw_cleanup_fetch(&huc->fw); - DRM_DEV_DEBUG_DRIVER(i915->drm.dev, "failed with %d\n", err); + i915_probe_error(i915, "failed with %d\n", err); return err; } void intel_huc_fini(struct intel_huc *huc) { - if (!intel_uc_fw_is_available(&huc->fw)) + if (!intel_uc_fw_is_loadable(&huc->fw)) return; intel_huc_rsa_data_destroy(huc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h index 644c059fe01d..a40b9cfc6c22 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h @@ -41,11 +41,17 @@ static inline bool intel_huc_is_supported(struct intel_huc *huc) return intel_uc_fw_is_supported(&huc->fw); } -static inline bool intel_huc_is_enabled(struct intel_huc *huc) +static inline bool intel_huc_is_wanted(struct intel_huc *huc) { return intel_uc_fw_is_enabled(&huc->fw); } +static inline bool intel_huc_is_used(struct intel_huc *huc) +{ + GEM_BUG_ON(__intel_uc_fw_status(&huc->fw) == INTEL_UC_FIRMWARE_SELECTED); + return intel_uc_fw_is_available(&huc->fw); +} + static inline bool intel_huc_is_authenticated(struct intel_huc *huc) { return intel_uc_fw_is_running(&huc->fw); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c index eee193bf2cc4..9cdf4cbe691c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c @@ -20,7 +20,7 @@ void intel_huc_fw_init_early(struct intel_huc *huc) struct drm_i915_private *i915 = gt->i915; intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC, - intel_uc_uses_guc(uc), + intel_uc_wants_guc(uc), INTEL_INFO(i915)->platform, INTEL_REVID(i915)); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 64934a876a50..a4cbe06e06bd 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -48,17 +48,17 @@ static void __confirm_options(struct intel_uc *uc) DRM_DEV_DEBUG_DRIVER(i915->drm.dev, "enable_guc=%d (guc:%s submission:%s huc:%s)\n", i915_modparams.enable_guc, - yesno(intel_uc_uses_guc(uc)), - yesno(intel_uc_uses_guc_submission(uc)), - yesno(intel_uc_uses_huc(uc))); + yesno(intel_uc_wants_guc(uc)), + yesno(intel_uc_wants_guc_submission(uc)), + yesno(intel_uc_wants_huc(uc))); if (i915_modparams.enable_guc == -1) return; if (i915_modparams.enable_guc == 0) { - GEM_BUG_ON(intel_uc_uses_guc(uc)); - GEM_BUG_ON(intel_uc_uses_guc_submission(uc)); - GEM_BUG_ON(intel_uc_uses_huc(uc)); + GEM_BUG_ON(intel_uc_wants_guc(uc)); + GEM_BUG_ON(intel_uc_wants_guc_submission(uc)); + GEM_BUG_ON(intel_uc_wants_huc(uc)); return; } @@ -93,7 +93,7 @@ void intel_uc_init_early(struct intel_uc *uc) __confirm_options(uc); - if (intel_uc_uses_guc(uc)) + if (intel_uc_wants_guc(uc)) uc->ops = &uc_ops_on; else uc->ops = &uc_ops_off; @@ -257,13 +257,13 @@ static void __uc_fetch_firmwares(struct intel_uc *uc) { int err; - GEM_BUG_ON(!intel_uc_uses_guc(uc)); + GEM_BUG_ON(!intel_uc_wants_guc(uc)); err = intel_uc_fw_fetch(&uc->guc.fw); if (err) return; - if (intel_uc_uses_huc(uc)) + if (intel_uc_wants_huc(uc)) intel_uc_fw_fetch(&uc->huc.fw); } @@ -273,25 +273,38 @@ static void __uc_cleanup_firmwares(struct intel_uc *uc) intel_uc_fw_cleanup_fetch(&uc->guc.fw); } -static void __uc_init(struct intel_uc *uc) +static int __uc_init(struct intel_uc *uc) { struct intel_guc *guc = &uc->guc; struct intel_huc *huc = &uc->huc; int ret; - GEM_BUG_ON(!intel_uc_uses_guc(uc)); + GEM_BUG_ON(!intel_uc_wants_guc(uc)); + + if (!intel_uc_uses_guc(uc)) + return 0; + + if (i915_inject_probe_failure(uc_to_gt(uc)->i915)) + return -ENOMEM; /* XXX: GuC submission is unavailable for now */ - GEM_BUG_ON(intel_uc_supports_guc_submission(uc)); + GEM_BUG_ON(intel_uc_uses_guc_submission(uc)); ret = intel_guc_init(guc); - if (ret) { - intel_uc_fw_cleanup_fetch(&huc->fw); - return; + if (ret) + return ret; + + if (intel_uc_uses_huc(uc)) { + ret = intel_huc_init(huc); + if (ret) + goto out_guc; } - if (intel_uc_uses_huc(uc)) - intel_huc_init(huc); + return 0; + +out_guc: + intel_guc_fini(guc); + return ret; } static void __uc_fini(struct intel_uc *uc) @@ -402,12 +415,12 @@ static int __uc_init_hw(struct intel_uc *uc) int ret, attempts; GEM_BUG_ON(!intel_uc_supports_guc(uc)); - GEM_BUG_ON(!intel_uc_uses_guc(uc)); + GEM_BUG_ON(!intel_uc_wants_guc(uc)); - if (!intel_uc_fw_is_available(&guc->fw)) { + if (!intel_uc_fw_is_loadable(&guc->fw)) { ret = __uc_check_hw(uc) || intel_uc_fw_is_overridden(&guc->fw) || - intel_uc_supports_guc_submission(uc) ? + intel_uc_wants_guc_submission(uc) ? intel_uc_fw_status_to_error(guc->fw.status) : 0; goto err_out; } @@ -459,14 +472,14 @@ static int __uc_init_hw(struct intel_uc *uc) if (ret) goto err_communication; - if (intel_uc_supports_guc_submission(uc)) + if (intel_uc_uses_guc_submission(uc)) intel_guc_submission_enable(guc); dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n", intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_GUC), guc->fw.path, guc->fw.major_ver_found, guc->fw.minor_ver_found, "submission", - enableddisabled(intel_uc_supports_guc_submission(uc))); + enableddisabled(intel_uc_uses_guc_submission(uc))); if (intel_uc_uses_huc(uc)) { dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n", @@ -505,10 +518,10 @@ static void __uc_fini_hw(struct intel_uc *uc) { struct intel_guc *guc = &uc->guc; - if (!intel_guc_is_running(guc)) + if (!intel_guc_is_fw_running(guc)) return; - if (intel_uc_supports_guc_submission(uc)) + if (intel_uc_uses_guc_submission(uc)) intel_guc_submission_disable(guc); if (guc_communication_enabled(guc)) @@ -527,7 +540,7 @@ void intel_uc_reset_prepare(struct intel_uc *uc) { struct intel_guc *guc = &uc->guc; - if (!intel_guc_is_running(guc)) + if (!intel_guc_is_ready(guc)) return; guc_disable_communication(guc); @@ -539,7 +552,7 @@ void intel_uc_runtime_suspend(struct intel_uc *uc) struct intel_guc *guc = &uc->guc; int err; - if (!intel_guc_is_running(guc)) + if (!intel_guc_is_ready(guc)) return; err = intel_guc_suspend(guc); @@ -554,7 +567,7 @@ void intel_uc_suspend(struct intel_uc *uc) struct intel_guc *guc = &uc->guc; intel_wakeref_t wakeref; - if (!intel_guc_is_running(guc)) + if (!intel_guc_is_ready(guc)) return; with_intel_runtime_pm(uc_to_gt(uc)->uncore->rpm, wakeref) @@ -566,7 +579,7 @@ static int __uc_resume(struct intel_uc *uc, bool enable_communication) struct intel_guc *guc = &uc->guc; int err; - if (!intel_guc_is_running(guc)) + if (!intel_guc_is_fw_running(guc)) return 0; /* Make sure we enable communication if and only if it's disabled */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h index 49c913524686..5ae7b50b7dc1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h @@ -7,6 +7,7 @@ #define _INTEL_UC_H_ #include "intel_guc.h" +#include "intel_guc_submission.h" #include "intel_huc.h" #include "i915_params.h" @@ -16,7 +17,7 @@ struct intel_uc_ops { int (*sanitize)(struct intel_uc *uc); void (*init_fw)(struct intel_uc *uc); void (*fini_fw)(struct intel_uc *uc); - void (*init)(struct intel_uc *uc); + int (*init)(struct intel_uc *uc); void (*fini)(struct intel_uc *uc); int (*init_hw)(struct intel_uc *uc); void (*fini_hw)(struct intel_uc *uc); @@ -40,35 +41,44 @@ void intel_uc_runtime_suspend(struct intel_uc *uc); int intel_uc_resume(struct intel_uc *uc); int intel_uc_runtime_resume(struct intel_uc *uc); -static inline bool intel_uc_supports_guc(struct intel_uc *uc) -{ - return intel_guc_is_supported(&uc->guc); -} - -static inline bool intel_uc_uses_guc(struct intel_uc *uc) -{ - return intel_guc_is_enabled(&uc->guc); -} +/* + * We need to know as early as possible if we're going to use GuC or not to + * take the correct setup paths. Additionally, once we've started loading the + * GuC, it is unsafe to keep executing without it because some parts of the HW, + * a subset of which is not cleaned on GT reset, will start expecting the GuC FW + * to be running. + * To solve both these requirements, we commit to using the microcontrollers if + * the relevant modparam is set and the blobs are found on the system. At this + * stage, the only thing that can stop us from attempting to load the blobs on + * the HW and use them is a fundamental issue (e.g. no memory for our + * structures); if we hit such a problem during driver load we're broken even + * without GuC, so there is no point in trying to fall back. + * + * Given the above, we can be in one of 4 states, with the last one implying + * we're committed to using the microcontroller: + * - Not supported: not available in HW and/or firmware not defined. + * - Supported: available in HW and firmware defined. + * - Wanted: supported + enabled in modparam. + * - In use: wanted + firmware found on the system and successfully fetched. + */ -static inline bool intel_uc_supports_guc_submission(struct intel_uc *uc) -{ - return intel_guc_is_submission_supported(&uc->guc); +#define __uc_state_checker(x, func, state, required) \ +static inline bool intel_uc_##state##_##func(struct intel_uc *uc) \ +{ \ + return intel_##func##_is_##required(&uc->x); \ } -static inline bool intel_uc_uses_guc_submission(struct intel_uc *uc) -{ - return intel_guc_is_submission_supported(&uc->guc); -} +#define uc_state_checkers(x, func) \ +__uc_state_checker(x, func, supports, supported) \ +__uc_state_checker(x, func, wants, wanted) \ +__uc_state_checker(x, func, uses, used) -static inline bool intel_uc_supports_huc(struct intel_uc *uc) -{ - return intel_uc_supports_guc(uc); -} +uc_state_checkers(guc, guc); +uc_state_checkers(huc, huc); +uc_state_checkers(guc, guc_submission); -static inline bool intel_uc_uses_huc(struct intel_uc *uc) -{ - return intel_huc_is_enabled(&uc->huc); -} +#undef uc_state_checkers +#undef __uc_state_checker #define intel_uc_ops_function(_NAME, _OPS, _TYPE, _RET) \ static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \ @@ -80,7 +90,7 @@ static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \ intel_uc_ops_function(sanitize, sanitize, int, 0); intel_uc_ops_function(fetch_firmwares, init_fw, void, ); intel_uc_ops_function(cleanup_firmwares, fini_fw, void, ); -intel_uc_ops_function(init, init, void, ); +intel_uc_ops_function(init, init, int, 0); intel_uc_ops_function(fini, fini, void, ); intel_uc_ops_function(init_hw, init_hw, int, 0); intel_uc_ops_function(fini_hw, fini_hw, void, ); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 8ee0a0c7f447..18c755203688 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -43,7 +43,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * features. */ #define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ - fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 3)) \ + fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 12)) \ fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \ fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 9, 0, 0)) \ fw_def(COFFEELAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \ @@ -279,7 +279,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) err = i915_inject_probe_error(i915, -ENXIO); if (err) - return err; + goto fail; __force_fw_fetch_failures(uc_fw, -EINVAL); __force_fw_fetch_failures(uc_fw, -ESTALE); @@ -501,7 +501,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags) if (err) return err; - if (!intel_uc_fw_is_available(uc_fw)) + if (!intel_uc_fw_is_loadable(uc_fw)) return -ENOEXEC; /* Call custom loader */ @@ -544,7 +544,10 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw) void intel_uc_fw_fini(struct intel_uc_fw *uc_fw) { - intel_uc_fw_cleanup_fetch(uc_fw); + if (i915_gem_object_has_pinned_pages(uc_fw->obj)) + i915_gem_object_unpin_pages(uc_fw->obj); + + intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_AVAILABLE); } /** diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 1f30543d0d2d..888ff0de0244 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -29,8 +29,11 @@ struct intel_gt; * | | SELECTED | * +------------+- / | \ -+ * | | MISSING <--/ | \--> ERROR | - * | fetch | | | - * | | /------> AVAILABLE <---<-----------\ | + * | fetch | V | + * | | AVAILABLE | + * +------------+- | -+ + * | init | V | + * | | /------> LOADABLE <----<-----------\ | * +------------+- \ / \ \ \ -+ * | | FAIL <--< \--> TRANSFERRED \ | * | upload | \ / \ / | @@ -46,6 +49,7 @@ enum intel_uc_fw_status { INTEL_UC_FIRMWARE_MISSING, /* blob not found on the system */ INTEL_UC_FIRMWARE_ERROR, /* invalid format or version */ INTEL_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */ + INTEL_UC_FIRMWARE_LOADABLE, /* all fw-required objects are ready */ INTEL_UC_FIRMWARE_FAIL, /* failed to xfer or init/auth the fw */ INTEL_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */ INTEL_UC_FIRMWARE_RUNNING /* init/auth done */ @@ -115,6 +119,8 @@ const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) return "ERROR"; case INTEL_UC_FIRMWARE_AVAILABLE: return "AVAILABLE"; + case INTEL_UC_FIRMWARE_LOADABLE: + return "LOADABLE"; case INTEL_UC_FIRMWARE_FAIL: return "FAIL"; case INTEL_UC_FIRMWARE_TRANSFERRED: @@ -143,6 +149,7 @@ static inline int intel_uc_fw_status_to_error(enum intel_uc_fw_status status) case INTEL_UC_FIRMWARE_SELECTED: return -ESTALE; case INTEL_UC_FIRMWARE_AVAILABLE: + case INTEL_UC_FIRMWARE_LOADABLE: case INTEL_UC_FIRMWARE_TRANSFERRED: case INTEL_UC_FIRMWARE_RUNNING: return 0; @@ -184,6 +191,11 @@ static inline bool intel_uc_fw_is_available(struct intel_uc_fw *uc_fw) return __intel_uc_fw_status(uc_fw) >= INTEL_UC_FIRMWARE_AVAILABLE; } +static inline bool intel_uc_fw_is_loadable(struct intel_uc_fw *uc_fw) +{ + return __intel_uc_fw_status(uc_fw) >= INTEL_UC_FIRMWARE_LOADABLE; +} + static inline bool intel_uc_fw_is_loaded(struct intel_uc_fw *uc_fw) { return __intel_uc_fw_status(uc_fw) >= INTEL_UC_FIRMWARE_TRANSFERRED; @@ -202,7 +214,7 @@ static inline bool intel_uc_fw_is_overridden(const struct intel_uc_fw *uc_fw) static inline void intel_uc_fw_sanitize(struct intel_uc_fw *uc_fw) { if (intel_uc_fw_is_loaded(uc_fw)) - intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_AVAILABLE); + intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_LOADABLE); } static inline u32 __intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw) |