diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
60 files changed, 1904 insertions, 897 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index 1bb766c79dcb..5aaacc53fa4c 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -247,6 +247,7 @@ err_scratch1: i915_gem_object_put(vm->scratch[1]); err_scratch0: i915_gem_object_put(vm->scratch[0]); + vm->scratch[0] = NULL; return ret; } @@ -268,9 +269,10 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) gen6_ppgtt_free_pd(ppgtt); free_scratch(vm); - mutex_destroy(&ppgtt->flush); + if (ppgtt->base.pd) + free_pd(&ppgtt->base.vm, ppgtt->base.pd); - free_pd(&ppgtt->base.vm, ppgtt->base.pd); + mutex_destroy(&ppgtt->flush); } static void pd_vma_bind(struct i915_address_space *vm, @@ -449,19 +451,17 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) err = gen6_ppgtt_init_scratch(ppgtt); if (err) - goto err_free; + goto err_put; ppgtt->base.pd = gen6_alloc_top_pd(ppgtt); if (IS_ERR(ppgtt->base.pd)) { err = PTR_ERR(ppgtt->base.pd); - goto err_scratch; + goto err_put; } return &ppgtt->base; -err_scratch: - free_scratch(&ppgtt->base.vm); -err_free: - kfree(ppgtt); +err_put: + i915_vm_put(&ppgtt->base.vm); return ERR_PTR(err); } diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 98645797962f..e49fa6fa6aee 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -165,10 +165,12 @@ static u32 preparser_disable(bool state) return MI_ARB_CHECK | 1 << 8 | state; } -u32 *gen12_emit_aux_table_inv(u32 *cs, const i915_reg_t inv_reg) +u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg) { + u32 gsi_offset = gt->uncore->gsi_offset; + *cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN; - *cs++ = i915_mmio_reg_offset(inv_reg); + *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset; *cs++ = AUX_INV; *cs++ = MI_NOOP; @@ -254,7 +256,8 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) if (!HAS_FLAT_CCS(rq->engine->i915)) { /* hsdes: 1809175790 */ - cs = gen12_emit_aux_table_inv(cs, GEN12_GFX_CCS_AUX_NV); + cs = gen12_emit_aux_table_inv(rq->engine->gt, + cs, GEN12_GFX_CCS_AUX_NV); } *cs++ = preparser_disable(false); @@ -313,9 +316,11 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) if (aux_inv) { /* hsdes: 1809175790 */ if (rq->engine->class == VIDEO_DECODE_CLASS) - cs = gen12_emit_aux_table_inv(cs, GEN12_VD0_AUX_NV); + cs = gen12_emit_aux_table_inv(rq->engine->gt, + cs, GEN12_VD0_AUX_NV); else - cs = gen12_emit_aux_table_inv(cs, GEN12_VE0_AUX_NV); + cs = gen12_emit_aux_table_inv(rq->engine->gt, + cs, GEN12_VE0_AUX_NV); } if (mode & EMIT_INVALIDATE) diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h index 32e3d2b831bb..e4d24c811dd6 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h @@ -13,6 +13,7 @@ #include "intel_gt_regs.h" #include "intel_gpu_commands.h" +struct intel_gt; struct i915_request; int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode); @@ -45,7 +46,7 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); -u32 *gen12_emit_aux_table_inv(u32 *cs, const i915_reg_t inv_reg); +u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg); static inline u32 * __gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index c7bd5d71b03e..2128b7a72a25 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -196,7 +196,10 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) if (intel_vgpu_active(vm->i915)) gen8_ppgtt_notify_vgt(ppgtt, false); - __gen8_ppgtt_cleanup(vm, ppgtt->pd, gen8_pd_top_count(vm), vm->top); + if (ppgtt->pd) + __gen8_ppgtt_cleanup(vm, ppgtt->pd, + gen8_pd_top_count(vm), vm->top); + free_scratch(vm); } @@ -803,8 +806,10 @@ static int gen8_init_scratch(struct i915_address_space *vm) struct drm_i915_gem_object *obj; obj = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); - if (IS_ERR(obj)) + if (IS_ERR(obj)) { + ret = PTR_ERR(obj); goto free_scratch; + } ret = map_pt_dma(vm, obj); if (ret) { @@ -823,7 +828,8 @@ static int gen8_init_scratch(struct i915_address_space *vm) free_scratch: while (i--) i915_gem_object_put(vm->scratch[i]); - return -ENOMEM; + vm->scratch[0] = NULL; + return ret; } static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) @@ -901,6 +907,7 @@ err_pd: struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, unsigned long lmem_pt_obj_flags) { + struct i915_page_directory *pd; struct i915_ppgtt *ppgtt; int err; @@ -946,21 +953,7 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; } - err = gen8_init_scratch(&ppgtt->vm); - if (err) - goto err_free; - - ppgtt->pd = gen8_alloc_top_pd(&ppgtt->vm); - if (IS_ERR(ppgtt->pd)) { - err = PTR_ERR(ppgtt->pd); - goto err_free_scratch; - } - - if (!i915_vm_is_4lvl(&ppgtt->vm)) { - err = gen8_preallocate_top_level_pdp(ppgtt); - if (err) - goto err_free_pd; - } + ppgtt->vm.pte_encode = gen8_pte_encode; ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; ppgtt->vm.insert_entries = gen8_ppgtt_insert; @@ -971,22 +964,31 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; ppgtt->vm.clear_range = gen8_ppgtt_clear; ppgtt->vm.foreach = gen8_ppgtt_foreach; + ppgtt->vm.cleanup = gen8_ppgtt_cleanup; - ppgtt->vm.pte_encode = gen8_pte_encode; + err = gen8_init_scratch(&ppgtt->vm); + if (err) + goto err_put; + + pd = gen8_alloc_top_pd(&ppgtt->vm); + if (IS_ERR(pd)) { + err = PTR_ERR(pd); + goto err_put; + } + ppgtt->pd = pd; + + if (!i915_vm_is_4lvl(&ppgtt->vm)) { + err = gen8_preallocate_top_level_pdp(ppgtt); + if (err) + goto err_put; + } if (intel_vgpu_active(gt->i915)) gen8_ppgtt_notify_vgt(ppgtt, true); - ppgtt->vm.cleanup = gen8_ppgtt_cleanup; - return ppgtt; -err_free_pd: - __gen8_ppgtt_cleanup(&ppgtt->vm, ppgtt->pd, - gen8_pd_top_count(&ppgtt->vm), ppgtt->vm.top); -err_free_scratch: - free_scratch(&ppgtt->vm); -err_free: - kfree(ppgtt); +err_put: + i915_vm_put(&ppgtt->vm); return ERR_PTR(err); } diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 654a092ed3d6..e94365b08f1e 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -614,13 +614,12 @@ bool intel_context_ban(struct intel_context *ce, struct i915_request *rq) return ret; } -bool intel_context_exit_nonpersistent(struct intel_context *ce, - struct i915_request *rq) +bool intel_context_revoke(struct intel_context *ce) { bool ret = intel_context_set_exiting(ce); if (ce->ops->revoke) - ce->ops->revoke(ce, rq, ce->engine->props.preempt_timeout_ms); + ce->ops->revoke(ce, NULL, ce->engine->props.preempt_timeout_ms); return ret; } diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 8e2d70630c49..be09fb2e883a 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -329,8 +329,7 @@ static inline bool intel_context_set_exiting(struct intel_context *ce) return test_and_set_bit(CONTEXT_EXITING, &ce->flags); } -bool intel_context_exit_nonpersistent(struct intel_context *ce, - struct i915_request *rq); +bool intel_context_revoke(struct intel_context *ce); static inline bool intel_context_force_single_submission(const struct intel_context *ce) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 37fa813af766..1f7188129cd1 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -654,16 +654,83 @@ bool gen11_vdbox_has_sfc(struct intel_gt *gt, */ if ((gt->info.sfc_mask & BIT(physical_vdbox / 2)) == 0) return false; - else if (GRAPHICS_VER(i915) == 12) + else if (MEDIA_VER(i915) >= 12) return (physical_vdbox % 2 == 0) || !(BIT(physical_vdbox - 1) & vdbox_mask); - else if (GRAPHICS_VER(i915) == 11) + else if (MEDIA_VER(i915) == 11) return logical_vdbox % 2 == 0; - MISSING_CASE(GRAPHICS_VER(i915)); return false; } +static void engine_mask_apply_media_fuses(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + unsigned int logical_vdbox = 0; + unsigned int i; + u32 media_fuse, fuse1; + u16 vdbox_mask; + u16 vebox_mask; + + if (MEDIA_VER(gt->i915) < 11) + return; + + /* + * On newer platforms the fusing register is called 'enable' and has + * enable semantics, while on older platforms it is called 'disable' + * and bits have disable semantices. + */ + media_fuse = intel_uncore_read(gt->uncore, GEN11_GT_VEBOX_VDBOX_DISABLE); + if (MEDIA_VER_FULL(i915) < IP_VER(12, 50)) + media_fuse = ~media_fuse; + + vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK; + vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >> + GEN11_GT_VEBOX_DISABLE_SHIFT; + + if (MEDIA_VER_FULL(i915) >= IP_VER(12, 50)) { + fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1); + gt->info.sfc_mask = REG_FIELD_GET(XEHP_SFC_ENABLE_MASK, fuse1); + } else { + gt->info.sfc_mask = ~0; + } + + for (i = 0; i < I915_MAX_VCS; i++) { + if (!HAS_ENGINE(gt, _VCS(i))) { + vdbox_mask &= ~BIT(i); + continue; + } + + if (!(BIT(i) & vdbox_mask)) { + gt->info.engine_mask &= ~BIT(_VCS(i)); + drm_dbg(&i915->drm, "vcs%u fused off\n", i); + continue; + } + + if (gen11_vdbox_has_sfc(gt, i, logical_vdbox, vdbox_mask)) + gt->info.vdbox_sfc_access |= BIT(i); + logical_vdbox++; + } + drm_dbg(&i915->drm, "vdbox enable: %04x, instances: %04lx\n", + vdbox_mask, VDBOX_MASK(gt)); + GEM_BUG_ON(vdbox_mask != VDBOX_MASK(gt)); + + for (i = 0; i < I915_MAX_VECS; i++) { + if (!HAS_ENGINE(gt, _VECS(i))) { + vebox_mask &= ~BIT(i); + continue; + } + + if (!(BIT(i) & vebox_mask)) { + gt->info.engine_mask &= ~BIT(_VECS(i)); + drm_dbg(&i915->drm, "vecs%u fused off\n", i); + } + } + drm_dbg(&i915->drm, "vebox enable: %04x, instances: %04lx\n", + vebox_mask, VEBOX_MASK(gt)); + GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt)); +} + static void engine_mask_apply_compute_fuses(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; @@ -672,7 +739,10 @@ static void engine_mask_apply_compute_fuses(struct intel_gt *gt) unsigned long ccs_mask; unsigned int i; - if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) + if (GRAPHICS_VER(i915) < 11) + return; + + if (hweight32(CCS_MASK(gt)) <= 1) return; ccs_mask = intel_slicemask_from_xehp_dssmask(info->sseu.compute_subslice_mask, @@ -694,6 +764,10 @@ static void engine_mask_apply_copy_fuses(struct intel_gt *gt) unsigned long meml3_mask; unsigned long quad; + if (!(GRAPHICS_VER_FULL(i915) >= IP_VER(12, 60) && + GRAPHICS_VER_FULL(i915) < IP_VER(12, 70))) + return; + meml3_mask = intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3); meml3_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, meml3_mask); @@ -727,75 +801,11 @@ static void engine_mask_apply_copy_fuses(struct intel_gt *gt) */ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) { - struct drm_i915_private *i915 = gt->i915; struct intel_gt_info *info = >->info; - struct intel_uncore *uncore = gt->uncore; - unsigned int logical_vdbox = 0; - unsigned int i; - u32 media_fuse, fuse1; - u16 vdbox_mask; - u16 vebox_mask; - - info->engine_mask = INTEL_INFO(i915)->platform_engine_mask; - - if (GRAPHICS_VER(i915) < 11) - return info->engine_mask; - /* - * On newer platforms the fusing register is called 'enable' and has - * enable semantics, while on older platforms it is called 'disable' - * and bits have disable semantices. - */ - media_fuse = intel_uncore_read(uncore, GEN11_GT_VEBOX_VDBOX_DISABLE); - if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) - media_fuse = ~media_fuse; - - vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK; - vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >> - GEN11_GT_VEBOX_DISABLE_SHIFT; - - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { - fuse1 = intel_uncore_read(uncore, HSW_PAVP_FUSE1); - gt->info.sfc_mask = REG_FIELD_GET(XEHP_SFC_ENABLE_MASK, fuse1); - } else { - gt->info.sfc_mask = ~0; - } - - for (i = 0; i < I915_MAX_VCS; i++) { - if (!HAS_ENGINE(gt, _VCS(i))) { - vdbox_mask &= ~BIT(i); - continue; - } - - if (!(BIT(i) & vdbox_mask)) { - info->engine_mask &= ~BIT(_VCS(i)); - drm_dbg(&i915->drm, "vcs%u fused off\n", i); - continue; - } - - if (gen11_vdbox_has_sfc(gt, i, logical_vdbox, vdbox_mask)) - gt->info.vdbox_sfc_access |= BIT(i); - logical_vdbox++; - } - drm_dbg(&i915->drm, "vdbox enable: %04x, instances: %04lx\n", - vdbox_mask, VDBOX_MASK(gt)); - GEM_BUG_ON(vdbox_mask != VDBOX_MASK(gt)); - - for (i = 0; i < I915_MAX_VECS; i++) { - if (!HAS_ENGINE(gt, _VECS(i))) { - vebox_mask &= ~BIT(i); - continue; - } - - if (!(BIT(i) & vebox_mask)) { - info->engine_mask &= ~BIT(_VECS(i)); - drm_dbg(&i915->drm, "vecs%u fused off\n", i); - } - } - drm_dbg(&i915->drm, "vebox enable: %04x, instances: %04lx\n", - vebox_mask, VEBOX_MASK(gt)); - GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt)); + GEM_BUG_ON(!info->engine_mask); + engine_mask_apply_media_fuses(gt); engine_mask_apply_compute_fuses(gt); engine_mask_apply_copy_fuses(gt); @@ -1688,9 +1698,9 @@ bool intel_engine_irq_enable(struct intel_engine_cs *engine) return false; /* Caller disables interrupts */ - spin_lock(&engine->gt->irq_lock); + spin_lock(engine->gt->irq_lock); engine->irq_enable(engine); - spin_unlock(&engine->gt->irq_lock); + spin_unlock(engine->gt->irq_lock); return true; } @@ -1701,9 +1711,9 @@ void intel_engine_irq_disable(struct intel_engine_cs *engine) return; /* Caller disables interrupts */ - spin_lock(&engine->gt->irq_lock); + spin_lock(engine->gt->irq_lock); engine->irq_disable(engine); - spin_unlock(&engine->gt->irq_lock); + spin_unlock(engine->gt->irq_lock); } void intel_engines_reset_default_submission(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h index 889f0df3940b..fe1a0d5fd4b1 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h @@ -110,6 +110,7 @@ #define RING_SBBSTATE(base) _MMIO((base) + 0x118) /* hsw+ */ #define RING_SBBADDR_UDW(base) _MMIO((base) + 0x11c) /* gen8+ */ #define RING_BBADDR(base) _MMIO((base) + 0x140) +#define RING_BB_OFFSET(base) _MMIO((base) + 0x158) #define RING_BBADDR_UDW(base) _MMIO((base) + 0x168) /* gen8+ */ #define CCID(base) _MMIO((base) + 0x180) #define CCID_EN BIT(0) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 15a915bb4088..2049a00417af 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -16,7 +16,9 @@ #include "intel_ggtt_gmch.h" #include "intel_gt.h" #include "intel_gt_regs.h" +#include "intel_pci_config.h" #include "i915_drv.h" +#include "i915_pci.h" #include "i915_scatterlist.h" #include "i915_utils.h" #include "i915_vgpu.h" @@ -869,8 +871,8 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) u32 pte_flags; int ret; - GEM_WARN_ON(pci_resource_len(pdev, 0) != gen6_gttmmadr_size(i915)); - phys_addr = pci_resource_start(pdev, 0) + gen6_gttadr_offset(i915); + GEM_WARN_ON(pci_resource_len(pdev, GTTMMADR_BAR) != gen6_gttmmadr_size(i915)); + phys_addr = pci_resource_start(pdev, GTTMMADR_BAR) + gen6_gttadr_offset(i915); /* * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range @@ -930,7 +932,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) u16 snb_gmch_ctl; if (!HAS_LMEM(i915)) { - ggtt->gmadr = pci_resource(pdev, 2); + if (!i915_pci_resource_valid(pdev, GTT_APERTURE_BAR)) + return -ENXIO; + + ggtt->gmadr = pci_resource(pdev, GTT_APERTURE_BAR); ggtt->mappable_end = resource_size(&ggtt->gmadr); } @@ -1084,7 +1089,10 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) unsigned int size; u16 snb_gmch_ctl; - ggtt->gmadr = pci_resource(pdev, 2); + if (!i915_pci_resource_valid(pdev, GTT_APERTURE_BAR)) + return -ENXIO; + + ggtt->gmadr = pci_resource(pdev, GTT_APERTURE_BAR); ggtt->mappable_end = resource_size(&ggtt->gmadr); /* @@ -1267,10 +1275,16 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm) atomic_read(&vma->flags) & I915_VMA_BIND_MASK; GEM_BUG_ON(!was_bound); - if (!retained_ptes) + if (!retained_ptes) { + /* + * Clear the bound flags of the vma resource to allow + * ptes to be repopulated. + */ + vma->resource->bound_flags = 0; vma->ops->bind_vma(vm, NULL, vma->resource, obj ? obj->cache_level : 0, was_bound); + } if (obj) { /* only used during resume => exclusive access */ write_domain_objs |= fetch_and_zero(&obj->write_domain); obj->read_domains |= I915_GEM_DOMAIN_GTT; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c index 6ebda3d65086..ea775e601686 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c @@ -727,7 +727,7 @@ static void detect_bit_6_swizzle(struct i915_ggtt *ggtt) * bit17 dependent, and so we need to also prevent the pages * from being moved. */ - i915->quirks |= QUIRK_PIN_SWIZZLED_PAGES; + i915->gem_quirks |= GEM_QUIRK_PIN_SWIZZLED_PAGES; swizzle_x = I915_BIT_6_SWIZZLE_NONE; swizzle_y = I915_BIT_6_SWIZZLE_NONE; } @@ -842,7 +842,6 @@ void intel_ggtt_init_fences(struct i915_ggtt *ggtt) INIT_LIST_HEAD(&ggtt->fence_list); INIT_LIST_HEAD(&ggtt->userfault_list); - intel_wakeref_auto_init(&ggtt->userfault_wakeref, uncore->rpm); detect_bit_6_swizzle(ggtt); diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.c b/drivers/gpu/drm/i915/gt/intel_gsc.c index 0e494028b81d..7af6db3194dd 100644 --- a/drivers/gpu/drm/i915/gt/intel_gsc.c +++ b/drivers/gpu/drm/i915/gt/intel_gsc.c @@ -7,6 +7,7 @@ #include <linux/mei_aux.h> #include "i915_drv.h" #include "i915_reg.h" +#include "gem/i915_gem_region.h" #include "gt/intel_gsc.h" #include "gt/intel_gt.h" @@ -36,10 +37,56 @@ static int gsc_irq_init(int irq) return irq_set_chip_data(irq, NULL); } +static int +gsc_ext_om_alloc(struct intel_gsc *gsc, struct intel_gsc_intf *intf, size_t size) +{ + struct intel_gt *gt = gsc_to_gt(gsc); + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_lmem(gt->i915, size, + I915_BO_ALLOC_CONTIGUOUS | + I915_BO_ALLOC_CPU_CLEAR); + if (IS_ERR(obj)) { + drm_err(>->i915->drm, "Failed to allocate gsc memory\n"); + return PTR_ERR(obj); + } + + err = i915_gem_object_pin_pages_unlocked(obj); + if (err) { + drm_err(>->i915->drm, "Failed to pin pages for gsc memory\n"); + goto out_put; + } + + intf->gem_obj = obj; + + return 0; + +out_put: + i915_gem_object_put(obj); + return err; +} + +static void gsc_ext_om_destroy(struct intel_gsc_intf *intf) +{ + struct drm_i915_gem_object *obj = fetch_and_zero(&intf->gem_obj); + + if (!obj) + return; + + if (i915_gem_object_has_pinned_pages(obj)) + i915_gem_object_unpin_pages(obj); + + i915_gem_object_put(obj); +} + struct gsc_def { const char *name; unsigned long bar; size_t bar_size; + bool use_polling; + bool slow_firmware; + size_t lmem_size; }; /* gsc resources and definitions (HECI1 and HECI2) */ @@ -54,11 +101,25 @@ static const struct gsc_def gsc_def_dg1[] = { } }; +static const struct gsc_def gsc_def_xehpsdv[] = { + { + /* HECI1 not enabled on the device. */ + }, + { + .name = "mei-gscfi", + .bar = DG1_GSC_HECI2_BASE, + .bar_size = GSC_BAR_LENGTH, + .use_polling = true, + .slow_firmware = true, + } +}; + static const struct gsc_def gsc_def_dg2[] = { { .name = "mei-gsc", .bar = DG2_GSC_HECI1_BASE, .bar_size = GSC_BAR_LENGTH, + .lmem_size = SZ_4M, }, { .name = "mei-gscfi", @@ -75,26 +136,32 @@ static void gsc_release_dev(struct device *dev) kfree(adev); } -static void gsc_destroy_one(struct intel_gsc_intf *intf) +static void gsc_destroy_one(struct drm_i915_private *i915, + struct intel_gsc *gsc, unsigned int intf_id) { + struct intel_gsc_intf *intf = &gsc->intf[intf_id]; + if (intf->adev) { auxiliary_device_delete(&intf->adev->aux_dev); auxiliary_device_uninit(&intf->adev->aux_dev); intf->adev = NULL; } + if (intf->irq >= 0) irq_free_desc(intf->irq); intf->irq = -1; + + gsc_ext_om_destroy(intf); } -static void gsc_init_one(struct drm_i915_private *i915, - struct intel_gsc_intf *intf, +static void gsc_init_one(struct drm_i915_private *i915, struct intel_gsc *gsc, unsigned int intf_id) { struct pci_dev *pdev = to_pci_dev(i915->drm.dev); struct mei_aux_device *adev; struct auxiliary_device *aux_dev; const struct gsc_def *def; + struct intel_gsc_intf *intf = &gsc->intf[intf_id]; int ret; intf->irq = -1; @@ -105,6 +172,8 @@ static void gsc_init_one(struct drm_i915_private *i915, if (IS_DG1(i915)) { def = &gsc_def_dg1[intf_id]; + } else if (IS_XEHPSDV(i915)) { + def = &gsc_def_xehpsdv[intf_id]; } else if (IS_DG2(i915)) { def = &gsc_def_dg2[intf_id]; } else { @@ -117,10 +186,14 @@ static void gsc_init_one(struct drm_i915_private *i915, return; } + /* skip irq initialization */ + if (def->use_polling) + goto add_device; + intf->irq = irq_alloc_desc(0); if (intf->irq < 0) { drm_err(&i915->drm, "gsc irq error %d\n", intf->irq); - return; + goto fail; } ret = gsc_irq_init(intf->irq); @@ -129,16 +202,31 @@ static void gsc_init_one(struct drm_i915_private *i915, goto fail; } +add_device: adev = kzalloc(sizeof(*adev), GFP_KERNEL); if (!adev) goto fail; + if (def->lmem_size) { + drm_dbg(&i915->drm, "setting up GSC lmem\n"); + + if (gsc_ext_om_alloc(gsc, intf, def->lmem_size)) { + drm_err(&i915->drm, "setting up gsc extended operational memory failed\n"); + kfree(adev); + goto fail; + } + + adev->ext_op_mem.start = i915_gem_object_get_dma_address(intf->gem_obj, 0); + adev->ext_op_mem.end = adev->ext_op_mem.start + def->lmem_size; + } + adev->irq = intf->irq; adev->bar.parent = &pdev->resource[0]; adev->bar.start = def->bar + pdev->resource[0].start; adev->bar.end = adev->bar.start + def->bar_size - 1; adev->bar.flags = IORESOURCE_MEM; adev->bar.desc = IORES_DESC_NONE; + adev->slow_firmware = def->slow_firmware; aux_dev = &adev->aux_dev; aux_dev->name = def->name; @@ -165,7 +253,7 @@ static void gsc_init_one(struct drm_i915_private *i915, return; fail: - gsc_destroy_one(intf); + gsc_destroy_one(i915, gsc, intf->id); } static void gsc_irq_handler(struct intel_gt *gt, unsigned int intf_id) @@ -182,10 +270,8 @@ static void gsc_irq_handler(struct intel_gt *gt, unsigned int intf_id) return; } - if (gt->gsc.intf[intf_id].irq < 0) { - drm_err_ratelimited(>->i915->drm, "GSC irq: irq not set"); + if (gt->gsc.intf[intf_id].irq < 0) return; - } ret = generic_handle_irq(gt->gsc.intf[intf_id].irq); if (ret) @@ -208,7 +294,7 @@ void intel_gsc_init(struct intel_gsc *gsc, struct drm_i915_private *i915) return; for (i = 0; i < INTEL_GSC_NUM_INTERFACES; i++) - gsc_init_one(i915, &gsc->intf[i], i); + gsc_init_one(i915, gsc, i); } void intel_gsc_fini(struct intel_gsc *gsc) @@ -220,5 +306,5 @@ void intel_gsc_fini(struct intel_gsc *gsc) return; for (i = 0; i < INTEL_GSC_NUM_INTERFACES; i++) - gsc_destroy_one(&gsc->intf[i]); + gsc_destroy_one(gt->i915, gsc, i); } diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.h b/drivers/gpu/drm/i915/gt/intel_gsc.h index 68582f912b21..fcac1775e9c3 100644 --- a/drivers/gpu/drm/i915/gt/intel_gsc.h +++ b/drivers/gpu/drm/i915/gt/intel_gsc.h @@ -20,11 +20,14 @@ struct mei_aux_device; /** * struct intel_gsc - graphics security controller + * + * @gem_obj: scratch memory GSC operations * @intf : gsc interface */ struct intel_gsc { struct intel_gsc_intf { struct mei_aux_device *adev; + struct drm_i915_gem_object *gem_obj; int irq; unsigned int id; } intf[INTEL_GSC_NUM_INTERFACES]; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index f435e06125aa..d0b03a928b9a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -26,18 +26,22 @@ #include "intel_gt_requests.h" #include "intel_migrate.h" #include "intel_mocs.h" +#include "intel_pci_config.h" #include "intel_pm.h" #include "intel_rc6.h" #include "intel_renderstate.h" #include "intel_rps.h" +#include "intel_sa_media.h" #include "intel_gt_sysfs.h" #include "intel_uncore.h" #include "shmem_utils.h" -static void __intel_gt_init_early(struct intel_gt *gt) +void intel_gt_common_init_early(struct intel_gt *gt) { - spin_lock_init(>->irq_lock); + spin_lock_init(gt->irq_lock); + INIT_LIST_HEAD(>->lmem_userfault_list); + mutex_init(>->lmem_userfault_lock); INIT_LIST_HEAD(>->closed_vma); spin_lock_init(>->closed_lock); @@ -57,14 +61,19 @@ static void __intel_gt_init_early(struct intel_gt *gt) } /* Preliminary initialization of Tile 0 */ -void intel_root_gt_init_early(struct drm_i915_private *i915) +int intel_root_gt_init_early(struct drm_i915_private *i915) { struct intel_gt *gt = to_gt(i915); gt->i915 = i915; gt->uncore = &i915->uncore; + gt->irq_lock = drmm_kzalloc(&i915->drm, sizeof(*gt->irq_lock), GFP_KERNEL); + if (!gt->irq_lock) + return -ENOMEM; - __intel_gt_init_early(gt); + intel_gt_common_init_early(gt); + + return 0; } static int intel_gt_probe_lmem(struct intel_gt *gt) @@ -780,26 +789,25 @@ static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr) int ret; if (!gt_is_root(gt)) { - struct intel_uncore_mmio_debug *mmio_debug; struct intel_uncore *uncore; + spinlock_t *irq_lock; - uncore = kzalloc(sizeof(*uncore), GFP_KERNEL); + uncore = drmm_kzalloc(>->i915->drm, sizeof(*uncore), GFP_KERNEL); if (!uncore) return -ENOMEM; - mmio_debug = kzalloc(sizeof(*mmio_debug), GFP_KERNEL); - if (!mmio_debug) { - kfree(uncore); + irq_lock = drmm_kzalloc(>->i915->drm, sizeof(*irq_lock), GFP_KERNEL); + if (!irq_lock) return -ENOMEM; - } gt->uncore = uncore; - gt->uncore->debug = mmio_debug; + gt->irq_lock = irq_lock; - __intel_gt_init_early(gt); + intel_gt_common_init_early(gt); } intel_uncore_init_early(gt->uncore, gt); + intel_wakeref_auto_init(>->userfault_wakeref, gt->uncore->rpm); ret = intel_uncore_setup_mmio(gt->uncore, phys_addr); if (ret) @@ -810,27 +818,17 @@ static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr) return 0; } -static void -intel_gt_tile_cleanup(struct intel_gt *gt) -{ - intel_uncore_cleanup_mmio(gt->uncore); - - if (!gt_is_root(gt)) { - kfree(gt->uncore->debug); - kfree(gt->uncore); - kfree(gt); - } -} - int intel_gt_probe_all(struct drm_i915_private *i915) { struct pci_dev *pdev = to_pci_dev(i915->drm.dev); struct intel_gt *gt = &i915->gt0; + const struct intel_gt_definition *gtdef; phys_addr_t phys_addr; unsigned int mmio_bar; + unsigned int i; int ret; - mmio_bar = GRAPHICS_VER(i915) == 2 ? 1 : 0; + mmio_bar = GRAPHICS_VER(i915) == 2 ? GEN2_GTTMMADR_BAR : GTTMMADR_BAR; phys_addr = pci_resource_start(pdev, mmio_bar); /* @@ -838,14 +836,74 @@ int intel_gt_probe_all(struct drm_i915_private *i915) * and it has been already initialized early during probe * in i915_driver_probe() */ + gt->i915 = i915; + gt->name = "Primary GT"; + gt->info.engine_mask = RUNTIME_INFO(i915)->platform_engine_mask; + + drm_dbg(&i915->drm, "Setting up %s\n", gt->name); ret = intel_gt_tile_setup(gt, phys_addr); if (ret) return ret; i915->gt[0] = gt; - /* TODO: add more tiles */ + if (!HAS_EXTRA_GT_LIST(i915)) + return 0; + + for (i = 1, gtdef = &INTEL_INFO(i915)->extra_gt_list[i - 1]; + gtdef->name != NULL; + i++, gtdef = &INTEL_INFO(i915)->extra_gt_list[i - 1]) { + gt = drmm_kzalloc(&i915->drm, sizeof(*gt), GFP_KERNEL); + if (!gt) { + ret = -ENOMEM; + goto err; + } + + gt->i915 = i915; + gt->name = gtdef->name; + gt->type = gtdef->type; + gt->info.engine_mask = gtdef->engine_mask; + gt->info.id = i; + + drm_dbg(&i915->drm, "Setting up %s\n", gt->name); + if (GEM_WARN_ON(range_overflows_t(resource_size_t, + gtdef->mapping_base, + SZ_16M, + pci_resource_len(pdev, mmio_bar)))) { + ret = -ENODEV; + goto err; + } + + switch (gtdef->type) { + case GT_TILE: + ret = intel_gt_tile_setup(gt, phys_addr + gtdef->mapping_base); + break; + + case GT_MEDIA: + ret = intel_sa_mediagt_setup(gt, phys_addr + gtdef->mapping_base, + gtdef->gsi_offset); + break; + + case GT_PRIMARY: + /* Primary GT should not appear in extra GT list */ + default: + MISSING_CASE(gtdef->type); + ret = -ENODEV; + } + + if (ret) + goto err; + + i915->gt[i] = gt; + } + return 0; + +err: + i915_probe_error(i915, "Failed to initialize %s! (%d)\n", gtdef->name, ret); + intel_gt_release_all(i915); + + return ret; } int intel_gt_tiles_init(struct drm_i915_private *i915) @@ -868,10 +926,8 @@ void intel_gt_release_all(struct drm_i915_private *i915) struct intel_gt *gt; unsigned int id; - for_each_gt(gt, i915, id) { - intel_gt_tile_cleanup(gt); + for_each_gt(gt, i915, id) i915->gt[id] = NULL; - } } void intel_gt_info_print(const struct intel_gt_info *info, diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 40b06adf509a..2ee582e287c8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -44,7 +44,8 @@ static inline struct intel_gt *gsc_to_gt(struct intel_gsc *gsc) return container_of(gsc, struct intel_gt, gsc); } -void intel_root_gt_init_early(struct drm_i915_private *i915); +void intel_gt_common_init_early(struct intel_gt *gt); +int intel_root_gt_init_early(struct drm_i915_private *i915); int intel_gt_assign_ggtt(struct intel_gt *gt); int intel_gt_init_mmio(struct intel_gt *gt); int __must_check intel_gt_init_hw(struct intel_gt *gt); @@ -54,7 +55,6 @@ void intel_gt_driver_register(struct intel_gt *gt); void intel_gt_driver_unregister(struct intel_gt *gt); void intel_gt_driver_remove(struct intel_gt *gt); void intel_gt_driver_release(struct intel_gt *gt); - void intel_gt_driver_late_release_all(struct drm_i915_private *i915); int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c index d5d1b04dbcad..3f656d3dba9a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c @@ -26,26 +26,6 @@ static u32 read_reference_ts_freq(struct intel_uncore *uncore) return base_freq + frac_freq; } -static u32 gen9_get_crystal_clock_freq(struct intel_uncore *uncore, - u32 rpm_config_reg) -{ - u32 f19_2_mhz = 19200000; - u32 f24_mhz = 24000000; - u32 crystal_clock = - (rpm_config_reg & GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >> - GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; - - switch (crystal_clock) { - case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ: - return f19_2_mhz; - case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ: - return f24_mhz; - default: - MISSING_CASE(crystal_clock); - return 0; - } -} - static u32 gen11_get_crystal_clock_freq(struct intel_uncore *uncore, u32 rpm_config_reg) { @@ -72,98 +52,106 @@ static u32 gen11_get_crystal_clock_freq(struct intel_uncore *uncore, } } -static u32 read_clock_frequency(struct intel_uncore *uncore) +static u32 gen11_read_clock_frequency(struct intel_uncore *uncore) { - u32 f12_5_mhz = 12500000; - u32 f19_2_mhz = 19200000; - u32 f24_mhz = 24000000; + u32 ctc_reg = intel_uncore_read(uncore, CTC_MODE); + u32 freq = 0; + + /* + * Note that on gen11+, the clock frequency may be reconfigured. + * We do not, and we assume nobody else does. + * + * First figure out the reference frequency. There are 2 ways + * we can compute the frequency, either through the + * TIMESTAMP_OVERRIDE register or through RPM_CONFIG. CTC_MODE + * tells us which one we should use. + */ + if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) { + freq = read_reference_ts_freq(uncore); + } else { + u32 c0 = intel_uncore_read(uncore, RPM_CONFIG0); + + freq = gen11_get_crystal_clock_freq(uncore, c0); - if (GRAPHICS_VER(uncore->i915) <= 4) { - /* - * PRMs say: - * - * "The value in this register increments once every 16 - * hclks." (through the “Clocking Configuration” - * (“CLKCFG”) MCHBAR register) - */ - return RUNTIME_INFO(uncore->i915)->rawclk_freq * 1000 / 16; - } else if (GRAPHICS_VER(uncore->i915) <= 8) { /* - * PRMs say: - * - * "The PCU TSC counts 10ns increments; this timestamp - * reflects bits 38:3 of the TSC (i.e. 80ns granularity, - * rolling over every 1.5 hours). + * Now figure out how the command stream's timestamp + * register increments from this frequency (it might + * increment only every few clock cycle). */ - return f12_5_mhz; - } else if (GRAPHICS_VER(uncore->i915) <= 9) { - u32 ctc_reg = intel_uncore_read(uncore, CTC_MODE); - u32 freq = 0; - - if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) { - freq = read_reference_ts_freq(uncore); - } else { - freq = IS_GEN9_LP(uncore->i915) ? f19_2_mhz : f24_mhz; - - /* - * Now figure out how the command stream's timestamp - * register increments from this frequency (it might - * increment only every few clock cycle). - */ - freq >>= 3 - ((ctc_reg & CTC_SHIFT_PARAMETER_MASK) >> - CTC_SHIFT_PARAMETER_SHIFT); - } - - return freq; - } else if (GRAPHICS_VER(uncore->i915) <= 12) { - u32 ctc_reg = intel_uncore_read(uncore, CTC_MODE); - u32 freq = 0; + freq >>= 3 - ((c0 & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> + GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT); + } + + return freq; +} + +static u32 gen9_read_clock_frequency(struct intel_uncore *uncore) +{ + u32 ctc_reg = intel_uncore_read(uncore, CTC_MODE); + u32 freq = 0; + + if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) { + freq = read_reference_ts_freq(uncore); + } else { + freq = IS_GEN9_LP(uncore->i915) ? 19200000 : 24000000; /* - * First figure out the reference frequency. There are 2 ways - * we can compute the frequency, either through the - * TIMESTAMP_OVERRIDE register or through RPM_CONFIG. CTC_MODE - * tells us which one we should use. + * Now figure out how the command stream's timestamp + * register increments from this frequency (it might + * increment only every few clock cycle). */ - if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) { - freq = read_reference_ts_freq(uncore); - } else { - u32 c0 = intel_uncore_read(uncore, RPM_CONFIG0); - - if (GRAPHICS_VER(uncore->i915) >= 11) - freq = gen11_get_crystal_clock_freq(uncore, c0); - else - freq = gen9_get_crystal_clock_freq(uncore, c0); - - /* - * Now figure out how the command stream's timestamp - * register increments from this frequency (it might - * increment only every few clock cycle). - */ - freq >>= 3 - ((c0 & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> - GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT); - } - - return freq; + freq >>= 3 - ((ctc_reg & CTC_SHIFT_PARAMETER_MASK) >> + CTC_SHIFT_PARAMETER_SHIFT); } - MISSING_CASE("Unknown gen, unable to read command streamer timestamp frequency\n"); - return 0; + return freq; } -void intel_gt_init_clock_frequency(struct intel_gt *gt) +static u32 gen5_read_clock_frequency(struct intel_uncore *uncore) { /* - * Note that on gen11+, the clock frequency may be reconfigured. - * We do not, and we assume nobody else does. + * PRMs say: + * + * "The PCU TSC counts 10ns increments; this timestamp + * reflects bits 38:3 of the TSC (i.e. 80ns granularity, + * rolling over every 1.5 hours). + */ + return 12500000; +} + +static u32 gen2_read_clock_frequency(struct intel_uncore *uncore) +{ + /* + * PRMs say: + * + * "The value in this register increments once every 16 + * hclks." (through the “Clocking Configuration” + * (“CLKCFG”) MCHBAR register) */ + return RUNTIME_INFO(uncore->i915)->rawclk_freq * 1000 / 16; +} + +static u32 read_clock_frequency(struct intel_uncore *uncore) +{ + if (GRAPHICS_VER(uncore->i915) >= 11) + return gen11_read_clock_frequency(uncore); + else if (GRAPHICS_VER(uncore->i915) >= 9) + return gen9_read_clock_frequency(uncore); + else if (GRAPHICS_VER(uncore->i915) >= 5) + return gen5_read_clock_frequency(uncore); + else + return gen2_read_clock_frequency(uncore); +} + +void intel_gt_init_clock_frequency(struct intel_gt *gt) +{ gt->clock_frequency = read_clock_frequency(gt->uncore); - if (gt->clock_frequency) - gt->clock_period_ns = intel_gt_clock_interval_to_ns(gt, 1); /* Icelake appears to use another fixed frequency for CTX_TIMESTAMP */ if (GRAPHICS_VER(gt->i915) == 11) gt->clock_period_ns = NSEC_PER_SEC / 13750000; + else if (gt->clock_frequency) + gt->clock_period_ns = intel_gt_clock_interval_to_ns(gt, 1); GT_TRACE(gt, "Using clock frequency: %dkHz, period: %dns, wrap: %lldms\n", diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index 3a72d4fd0214..f26882fdc24c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -29,7 +29,7 @@ gen11_gt_engine_identity(struct intel_gt *gt, u32 timeout_ts; u32 ident; - lockdep_assert_held(>->irq_lock); + lockdep_assert_held(gt->irq_lock); raw_reg_write(regs, GEN11_IIR_REG_SELECTOR(bank), BIT(bit)); @@ -59,11 +59,17 @@ static void gen11_other_irq_handler(struct intel_gt *gt, const u8 instance, const u16 iir) { + struct intel_gt *media_gt = gt->i915->media_gt; + if (instance == OTHER_GUC_INSTANCE) return guc_irq_handler(>->uc.guc, iir); + if (instance == OTHER_MEDIA_GUC_INSTANCE && media_gt) + return guc_irq_handler(&media_gt->uc.guc, iir); if (instance == OTHER_GTPM_INSTANCE) return gen11_rps_irq_handler(>->rps, iir); + if (instance == OTHER_MEDIA_GTPM_INSTANCE && media_gt) + return gen11_rps_irq_handler(&media_gt->rps, iir); if (instance == OTHER_KCR_INSTANCE) return intel_pxp_irq_handler(>->pxp, iir); @@ -81,6 +87,18 @@ gen11_engine_irq_handler(struct intel_gt *gt, const u8 class, { struct intel_engine_cs *engine; + /* + * Platforms with standalone media have their media engines in another + * GT. + */ + if (MEDIA_VER(gt->i915) >= 13 && + (class == VIDEO_DECODE_CLASS || class == VIDEO_ENHANCEMENT_CLASS)) { + if (!gt->i915->media_gt) + goto err; + + gt = gt->i915->media_gt; + } + if (instance <= MAX_ENGINE_INSTANCE) engine = gt->engine_class[class][instance]; else @@ -89,6 +107,7 @@ gen11_engine_irq_handler(struct intel_gt *gt, const u8 class, if (likely(engine)) return intel_engine_cs_irq(engine, iir); +err: WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n", class, instance); } @@ -120,7 +139,7 @@ gen11_gt_bank_handler(struct intel_gt *gt, const unsigned int bank) unsigned long intr_dw; unsigned int bit; - lockdep_assert_held(>->irq_lock); + lockdep_assert_held(gt->irq_lock); intr_dw = raw_reg_read(regs, GEN11_GT_INTR_DW(bank)); @@ -138,14 +157,14 @@ void gen11_gt_irq_handler(struct intel_gt *gt, const u32 master_ctl) { unsigned int bank; - spin_lock(>->irq_lock); + spin_lock(gt->irq_lock); for (bank = 0; bank < 2; bank++) { if (master_ctl & GEN11_GT_DW_IRQ(bank)) gen11_gt_bank_handler(gt, bank); } - spin_unlock(>->irq_lock); + spin_unlock(gt->irq_lock); } bool gen11_gt_reset_one_iir(struct intel_gt *gt, @@ -154,7 +173,7 @@ bool gen11_gt_reset_one_iir(struct intel_gt *gt, void __iomem * const regs = gt->uncore->regs; u32 dw; - lockdep_assert_held(>->irq_lock); + lockdep_assert_held(gt->irq_lock); dw = raw_reg_read(regs, GEN11_GT_INTR_DW(bank)); if (dw & BIT(bit)) { @@ -310,9 +329,9 @@ static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir) if (!HAS_L3_DPF(gt->i915)) return; - spin_lock(>->irq_lock); + spin_lock(gt->irq_lock); gen5_gt_disable_irq(gt, GT_PARITY_ERROR(gt->i915)); - spin_unlock(>->irq_lock); + spin_unlock(gt->irq_lock); if (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1) gt->i915->l3_parity.which_slice |= 1 << 1; @@ -434,7 +453,7 @@ static void gen5_gt_update_irq(struct intel_gt *gt, u32 interrupt_mask, u32 enabled_irq_mask) { - lockdep_assert_held(>->irq_lock); + lockdep_assert_held(gt->irq_lock); GEM_BUG_ON(enabled_irq_mask & ~interrupt_mask); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index a334787a4939..6c9a46452364 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -55,6 +55,14 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt) for (tmp = 1, intel_gt_pm_get(gt); tmp; \ intel_gt_pm_put(gt), tmp = 0) +/** + * with_intel_gt_pm_if_awake - if GT is PM awake, get a reference to prevent + * it to sleep, run some code and then asynchrously put the reference + * away. + * + * @gt: pointer to the gt + * @wf: pointer to a temporary wakeref. + */ #define with_intel_gt_pm_if_awake(gt, wf) \ for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt), wf = 0) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 40bdd4cb629f..108b9e76c32e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -504,8 +504,8 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p) drm_puts(p, "no P-state info available\n"); } - drm_printf(p, "Current CD clock frequency: %d kHz\n", i915->cdclk.hw.cdclk); - drm_printf(p, "Max CD clock frequency: %d kHz\n", i915->max_cdclk_freq); + drm_printf(p, "Current CD clock frequency: %d kHz\n", i915->display.cdclk.hw.cdclk); + drm_printf(p, "Max CD clock frequency: %d kHz\n", i915->display.cdclk.max_cdclk_freq); drm_printf(p, "Max pixel clock frequency: %d kHz\n", i915->max_dotclk_freq); intel_runtime_pm_put(uncore->rpm, wakeref); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c index 11060f5a4c89..52f2a28b2058 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c @@ -37,7 +37,7 @@ static void gen6_gt_pm_update_irq(struct intel_gt *gt, WARN_ON(enabled_irq_mask & ~interrupt_mask); - lockdep_assert_held(>->irq_lock); + lockdep_assert_held(gt->irq_lock); new_val = gt->pm_imr; new_val &= ~interrupt_mask; @@ -64,7 +64,7 @@ void gen6_gt_pm_reset_iir(struct intel_gt *gt, u32 reset_mask) struct intel_uncore *uncore = gt->uncore; i915_reg_t reg = GRAPHICS_VER(gt->i915) >= 8 ? GEN8_GT_IIR(2) : GEN6_PMIIR; - lockdep_assert_held(>->irq_lock); + lockdep_assert_held(gt->irq_lock); intel_uncore_write(uncore, reg, reset_mask); intel_uncore_write(uncore, reg, reset_mask); @@ -92,7 +92,7 @@ static void write_pm_ier(struct intel_gt *gt) void gen6_gt_pm_enable_irq(struct intel_gt *gt, u32 enable_mask) { - lockdep_assert_held(>->irq_lock); + lockdep_assert_held(gt->irq_lock); gt->pm_ier |= enable_mask; write_pm_ier(gt); @@ -101,7 +101,7 @@ void gen6_gt_pm_enable_irq(struct intel_gt *gt, u32 enable_mask) void gen6_gt_pm_disable_irq(struct intel_gt *gt, u32 disable_mask) { - lockdep_assert_held(>->irq_lock); + lockdep_assert_held(gt->irq_lock); gt->pm_ier &= ~disable_mask; gen6_gt_pm_mask_irq(gt, disable_mask); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 60d6eb5f245b..2275ee47da95 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -259,6 +259,9 @@ #define GEN9_PREEMPT_GPGPU_COMMAND_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(1, 0) #define GEN9_PREEMPT_GPGPU_LEVEL_MASK GEN9_PREEMPT_GPGPU_LEVEL(1, 1) +#define DRAW_WATERMARK _MMIO(0x26c0) +#define VERT_WM_VAL REG_GENMASK(9, 0) + #define GEN12_GLOBAL_MOCS(i) _MMIO(0x4000 + (i) * 4) /* Global MOCS regs */ #define RENDER_HWS_PGA_GEN7 _MMIO(0x4080) @@ -374,6 +377,9 @@ #define CHICKEN_RASTER_1 _MMIO(0x6204) #define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8) +#define CHICKEN_RASTER_2 _MMIO(0x6208) +#define TBIMR_FAST_CLIP REG_BIT(5) + #define VFLSKPD _MMIO(0x62a8) #define DIS_OVER_FETCH_CACHE REG_BIT(1) #define DIS_MULT_MISS_RD_SQUASH REG_BIT(0) @@ -1007,6 +1013,8 @@ #define GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC (1 << 9) #define GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC (1 << 7) +#define GUCPMTIMESTAMP _MMIO(0xc3e8) + #define __GEN9_RCS0_MOCS0 0xc800 #define GEN9_GFX_MOCS(i) _MMIO(__GEN9_RCS0_MOCS0 + (i) * 4) #define __GEN9_VCS0_MOCS0 0xc900 @@ -1078,6 +1086,7 @@ #define GEN10_SAMPLER_MODE _MMIO(0xe18c) #define ENABLE_SMALLPL REG_BIT(15) +#define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9) #define GEN11_SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5) #define GEN9_HALF_SLICE_CHICKEN7 _MMIO(0xe194) @@ -1101,6 +1110,8 @@ #define GEN12_DISABLE_TDL_PUSH REG_BIT(9) #define GEN11_DIS_PICK_2ND_EU REG_BIT(7) #define GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX REG_BIT(4) +#define THREAD_EX_ARB_MODE REG_GENMASK(3, 2) +#define THREAD_EX_ARB_MODE_RR_AFTER_DEP REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2) #define HSW_ROW_CHICKEN3 _MMIO(0xe49c) #define HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE (1 << 6) @@ -1123,6 +1134,8 @@ #define RT_CTRL _MMIO(0xe530) #define DIS_NULL_QUERY REG_BIT(10) +#define STACKID_CTRL REG_GENMASK(6, 5) +#define STACKID_CTRL_512 REG_FIELD_PREP(STACKID_CTRL, 0x2) #define EU_PERF_CNTL1 _MMIO(0xe558) #define EU_PERF_CNTL5 _MMIO(0xe55c) @@ -1541,6 +1554,8 @@ #define OTHER_GTPM_INSTANCE 1 #define OTHER_KCR_INSTANCE 4 #define OTHER_GSC_INSTANCE 6 +#define OTHER_MEDIA_GUC_INSTANCE 16 +#define OTHER_MEDIA_GTPM_INSTANCE 17 #define GEN11_IIR_REG_SELECTOR(x) _MMIO(0x190070 + ((x) * 4)) @@ -1565,4 +1580,12 @@ #define GEN12_SFC_DONE(n) _MMIO(0x1cc000 + (n) * 0x1000) +/* + * Standalone Media's non-engine GT registers are located at their regular GT + * offsets plus 0x380000. This extra offset is stored inside the intel_uncore + * structure so that the existing code can be used for both GTs without + * modification. + */ +#define MTL_MEDIA_GSI_BASE 0x380000 + #endif /* __INTEL_GT_REGS__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c index 9e4ebf53379b..d651ccd0ab20 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c @@ -22,11 +22,6 @@ bool is_object_gt(struct kobject *kobj) return !strncmp(kobj->name, "gt", 2); } -static struct intel_gt *kobj_to_gt(struct kobject *kobj) -{ - return container_of(kobj, struct intel_gt, sysfs_gt); -} - struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev, const char *name) { @@ -101,6 +96,10 @@ void intel_gt_sysfs_register(struct intel_gt *gt) gt->i915->sysfs_gt, "gt%d", gt->info.id)) goto exit_fail; + gt->sysfs_defaults = kobject_create_and_add(".defaults", >->sysfs_gt); + if (!gt->sysfs_defaults) + goto exit_fail; + intel_gt_sysfs_pm_init(gt, >->sysfs_gt); return; @@ -113,5 +112,6 @@ exit_fail: void intel_gt_sysfs_unregister(struct intel_gt *gt) { + kobject_put(gt->sysfs_defaults); kobject_put(>->sysfs_gt); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h index a99aa7e8b01a..6232923a420d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h @@ -10,6 +10,7 @@ #include <linux/kobject.h> #include "i915_gem.h" /* GEM_BUG_ON() */ +#include "intel_gt_types.h" struct intel_gt; @@ -22,6 +23,11 @@ intel_gt_create_kobj(struct intel_gt *gt, struct kobject *dir, const char *name); +static inline struct intel_gt *kobj_to_gt(struct kobject *kobj) +{ + return container_of(kobj, struct intel_gt, sysfs_gt); +} + void intel_gt_sysfs_register(struct intel_gt *gt); void intel_gt_sysfs_unregister(struct intel_gt *gt); struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev, diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index d09a0e845d09..180dd6f3ef57 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -726,6 +726,34 @@ static const struct attribute *media_perf_power_attrs[] = { NULL }; +static ssize_t +default_min_freq_mhz_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_gt *gt = kobj_to_gt(kobj->parent); + + return sysfs_emit(buf, "%u\n", gt->defaults.min_freq); +} + +static struct kobj_attribute default_min_freq_mhz = +__ATTR(rps_min_freq_mhz, 0444, default_min_freq_mhz_show, NULL); + +static ssize_t +default_max_freq_mhz_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_gt *gt = kobj_to_gt(kobj->parent); + + return sysfs_emit(buf, "%u\n", gt->defaults.max_freq); +} + +static struct kobj_attribute default_max_freq_mhz = +__ATTR(rps_max_freq_mhz, 0444, default_max_freq_mhz_show, NULL); + +static const struct attribute * const rps_defaults_attrs[] = { + &default_min_freq_mhz.attr, + &default_max_freq_mhz.attr, + NULL +}; + static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj, const struct attribute * const *attrs) { @@ -783,4 +811,10 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj) "failed to create gt%u media_perf_power_attrs sysfs (%pe)\n", gt->info.id, ERR_PTR(ret)); } + + ret = sysfs_create_files(gt->sysfs_defaults, rps_defaults_attrs); + if (ret) + drm_warn(>->i915->drm, + "failed to add gt%u rps defaults (%pe)\n", + gt->info.id, ERR_PTR(ret)); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 3804a583382b..f19c2de77ff6 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -76,8 +76,22 @@ enum intel_submission_method { INTEL_SUBMISSION_GUC, }; +struct gt_defaults { + u32 min_freq; + u32 max_freq; +}; + +enum intel_gt_type { + GT_PRIMARY, + GT_TILE, + GT_MEDIA, +}; + struct intel_gt { struct drm_i915_private *i915; + const char *name; + enum intel_gt_type type; + struct intel_uncore *uncore; struct i915_ggtt *ggtt; @@ -127,6 +141,20 @@ struct intel_gt { struct intel_wakeref wakeref; atomic_t user_wakeref; + /** + * Protects access to lmem usefault list. + * It is required, if we are outside of the runtime suspend path, + * access to @lmem_userfault_list requires always first grabbing the + * runtime pm, to ensure we can't race against runtime suspend. + * Once we have that we also need to grab @lmem_userfault_lock, + * at which point we have exclusive access. + * The runtime suspend path is special since it doesn't really hold any locks, + * but instead has exclusive access by virtue of all other accesses requiring + * holding the runtime pm wakeref. + */ + struct mutex lmem_userfault_lock; + struct list_head lmem_userfault_list; + struct list_head closed_vma; spinlock_t closed_lock; /* guards the list of closed_vma */ @@ -142,6 +170,9 @@ struct intel_gt { */ intel_wakeref_t awake; + /* Manual runtime pm autosuspend delay for user GGTT/lmem mmaps */ + struct intel_wakeref_auto userfault_wakeref; + u32 clock_frequency; u32 clock_period_ns; @@ -149,7 +180,7 @@ struct intel_gt { struct intel_rc6 rc6; struct intel_rps rps; - spinlock_t irq_lock; + spinlock_t *irq_lock; u32 gt_imr; u32 pm_ier; u32 pm_imr; @@ -251,6 +282,18 @@ struct intel_gt { /* gt/gtN sysfs */ struct kobject sysfs_gt; + + /* sysfs defaults per gt */ + struct gt_defaults defaults; + struct kobject *sysfs_defaults; +}; + +struct intel_gt_definition { + enum intel_gt_type type; + char *name; + u32 mapping_base; + u32 gsi_offset; + intel_engine_mask_t engine_mask; }; enum intel_gt_scratch_field { diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index b67831833c9a..2eaeba14319e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -405,6 +405,9 @@ void free_scratch(struct i915_address_space *vm) { int i; + if (!vm->scratch[0]) + return; + for (i = 0; i <= vm->top; i++) i915_gem_object_put(vm->scratch[i]); } diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index e639434e97fd..c0ca53cba9f0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -386,9 +386,6 @@ struct i915_ggtt { */ struct list_head userfault_list; - /* Manual runtime pm autosuspend delay for user GGTT mmaps */ - struct intel_wakeref_auto userfault_wakeref; - struct mutex error_mutex; struct drm_mm_node error_capture; struct drm_mm_node uc_fw; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index eec73c66406c..3955292483a6 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -662,6 +662,21 @@ static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) return -1; } +static int lrc_ring_bb_offset(const struct intel_engine_cs *engine) +{ + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + return 0x80; + else if (GRAPHICS_VER(engine->i915) >= 12) + return 0x70; + else if (GRAPHICS_VER(engine->i915) >= 9) + return 0x64; + else if (GRAPHICS_VER(engine->i915) >= 8 && + engine->class == RENDER_CLASS) + return 0xc4; + else + return -1; +} + static int lrc_ring_gpr0(const struct intel_engine_cs *engine) { if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) @@ -768,6 +783,7 @@ static void init_common_regs(u32 * const regs, bool inhibit) { u32 ctl; + int loc; ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); @@ -779,6 +795,10 @@ static void init_common_regs(u32 * const regs, regs[CTX_CONTEXT_CONTROL] = ctl; regs[CTX_TIMESTAMP] = ce->stats.runtime.last; + + loc = lrc_ring_bb_offset(engine); + if (loc != -1) + regs[loc + 1] = 0; } static void init_wa_bb_regs(u32 * const regs, @@ -1242,6 +1262,23 @@ dg2_emit_rcs_hang_wabb(const struct intel_context *ce, u32 *cs) return cs; } +/* + * The bspec's tuning guide asks us to program a vertical watermark value of + * 0x3FF. However this register is not saved/restored properly by the + * hardware, so we're required to apply the desired value via INDIRECT_CTX + * batch buffer to ensure the value takes effect properly. All other bits + * in this register should remain at 0 (the hardware default). + */ +static u32 * +dg2_emit_draw_watermark_setting(u32 *cs) +{ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(DRAW_WATERMARK); + *cs++ = REG_FIELD_PREP(VERT_WM_VAL, 0x3FF); + + return cs; +} + static u32 * gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) { @@ -1261,7 +1298,12 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) /* hsdes: 1809175790 */ if (!HAS_FLAT_CCS(ce->engine->i915)) - cs = gen12_emit_aux_table_inv(cs, GEN12_GFX_CCS_AUX_NV); + cs = gen12_emit_aux_table_inv(ce->engine->gt, + cs, GEN12_GFX_CCS_AUX_NV); + + /* Wa_16014892111 */ + if (IS_DG2(ce->engine->i915)) + cs = dg2_emit_draw_watermark_setting(cs); return cs; } @@ -1283,9 +1325,11 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) /* hsdes: 1809175790 */ if (!HAS_FLAT_CCS(ce->engine->i915)) { if (ce->engine->class == VIDEO_DECODE_CLASS) - cs = gen12_emit_aux_table_inv(cs, GEN12_VD0_AUX_NV); + cs = gen12_emit_aux_table_inv(ce->engine->gt, + cs, GEN12_VD0_AUX_NV); else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS) - cs = gen12_emit_aux_table_inv(cs, GEN12_VE0_AUX_NV); + cs = gen12_emit_aux_table_inv(ce->engine->gt, + cs, GEN12_VE0_AUX_NV); } return cs; diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index 933648cc90ff..aaaf1906026c 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -511,44 +511,16 @@ static inline u32 *i915_flush_dw(u32 *cmd, u32 flags) return cmd; } -static u32 calc_ctrl_surf_instr_size(struct drm_i915_private *i915, int size) -{ - u32 num_cmds, num_blks, total_size; - - if (!GET_CCS_BYTES(i915, size)) - return 0; - - /* - * XY_CTRL_SURF_COPY_BLT transfers CCS in 256 byte - * blocks. one XY_CTRL_SURF_COPY_BLT command can - * transfer upto 1024 blocks. - */ - num_blks = DIV_ROUND_UP(GET_CCS_BYTES(i915, size), - NUM_CCS_BYTES_PER_BLOCK); - num_cmds = DIV_ROUND_UP(num_blks, NUM_CCS_BLKS_PER_XFER); - total_size = XY_CTRL_SURF_INSTR_SIZE * num_cmds; - - /* - * Adding a flush before and after XY_CTRL_SURF_COPY_BLT - */ - total_size += 2 * MI_FLUSH_DW_SIZE; - - return total_size; -} - static int emit_copy_ccs(struct i915_request *rq, u32 dst_offset, u8 dst_access, u32 src_offset, u8 src_access, int size) { struct drm_i915_private *i915 = rq->engine->i915; int mocs = rq->engine->gt->mocs.uc_index << 1; - u32 num_ccs_blks, ccs_ring_size; + u32 num_ccs_blks; u32 *cs; - ccs_ring_size = calc_ctrl_surf_instr_size(i915, size); - WARN_ON(!ccs_ring_size); - - cs = intel_ring_begin(rq, round_up(ccs_ring_size, 2)); + cs = intel_ring_begin(rq, 12); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -583,8 +555,7 @@ static int emit_copy_ccs(struct i915_request *rq, FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs); cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS); - if (ccs_ring_size & 1) - *cs++ = MI_NOOP; + *cs++ = MI_NOOP; intel_ring_advance(rq, cs); @@ -645,7 +616,7 @@ static u64 scatter_list_length(struct scatterlist *sg) while (sg && sg_dma_len(sg)) { len += sg_dma_len(sg); sg = sg_next(sg); - }; + } return len; } diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index c6ebe2781076..152244d7f62a 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -207,6 +207,14 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { MOCS_ENTRY(15, \ LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ L3_3_WB), \ + /* Bypass LLC - Uncached (EHL+) */ \ + MOCS_ENTRY(16, \ + LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \ + L3_1_UC), \ + /* Bypass LLC - L3 (Read-Only) (EHL+) */ \ + MOCS_ENTRY(17, \ + LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \ + L3_3_WB), \ /* Self-Snoop - L3 + LLC */ \ MOCS_ENTRY(18, \ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \ diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c index 6ee8d1127016..7ecfa672f738 100644 --- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c @@ -312,7 +312,7 @@ void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt, ppgtt->vm.gt = gt; ppgtt->vm.i915 = i915; ppgtt->vm.dma = i915->drm.dev; - ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size); + ppgtt->vm.total = BIT_ULL(RUNTIME_INFO(i915)->ppgtt_size); ppgtt->vm.lmem_pt_obj_flags = lmem_pt_obj_flags; dma_resv_init(&ppgtt->vm._resv); diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index aa6aed837194..f3ad93db0b21 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -4,8 +4,10 @@ */ #include "i915_drv.h" +#include "i915_pci.h" #include "i915_reg.h" #include "intel_memory_region.h" +#include "intel_pci_config.h" #include "intel_region_lmem.h" #include "intel_region_ttm.h" #include "gem/i915_gem_lmem.h" @@ -45,7 +47,6 @@ _resize_bar(struct drm_i915_private *i915, int resno, resource_size_t size) drm_info(&i915->drm, "BAR%d resized to %dM\n", resno, 1 << bar_size); } -#define LMEM_BAR_NUM 2 static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t lmem_size) { struct pci_dev *pdev = to_pci_dev(i915->drm.dev); @@ -56,15 +57,14 @@ static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t u32 pci_cmd; int i; - current_size = roundup_pow_of_two(pci_resource_len(pdev, LMEM_BAR_NUM)); + current_size = roundup_pow_of_two(pci_resource_len(pdev, GEN12_LMEM_BAR)); if (i915->params.lmem_bar_size) { u32 bar_sizes; rebar_size = i915->params.lmem_bar_size * (resource_size_t)SZ_1M; - bar_sizes = pci_rebar_get_possible_sizes(pdev, - LMEM_BAR_NUM); + bar_sizes = pci_rebar_get_possible_sizes(pdev, GEN12_LMEM_BAR); if (rebar_size == current_size) return; @@ -107,7 +107,7 @@ static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY); - _resize_bar(i915, LMEM_BAR_NUM, rebar_size); + _resize_bar(i915, GEN12_LMEM_BAR, rebar_size); pci_assign_unassigned_bus_resources(pdev->bus); pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); @@ -202,6 +202,9 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) if (!IS_DGFX(i915)) return ERR_PTR(-ENODEV); + if (!i915_pci_resource_valid(pdev, GEN12_LMEM_BAR)) + return ERR_PTR(-ENXIO); + if (HAS_FLAT_CCS(i915)) { resource_size_t lmem_range; u64 tile_stolen, flat_ccs_base; @@ -236,8 +239,8 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) mul_u32_u32(i915->params.lmem_size, SZ_1M)); } - io_start = pci_resource_start(pdev, 2); - io_size = min(pci_resource_len(pdev, 2), lmem_size); + io_start = pci_resource_start(pdev, GEN12_LMEM_BAR); + io_size = min(pci_resource_len(pdev, GEN12_LMEM_BAR), lmem_size); if (!io_size) return ERR_PTR(-EIO); diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index c68d36fb5bbd..b36674356986 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -776,7 +776,7 @@ static void revoke_mmaps(struct intel_gt *gt) continue; node = &vma->mmo->vma_node; - vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT; + vma_offset = vma->gtt_view.partial.offset << PAGE_SHIFT; unmap_mapping_range(gt->i915->drm.anon_inode->i_mapping, drm_vma_node_offset_addr(node) + vma_offset, @@ -1281,9 +1281,6 @@ static void intel_gt_reset_global(struct intel_gt *gt, intel_wedge_on_timeout(&w, gt, 5 * HZ) { intel_display_prepare_reset(gt->i915); - /* Flush everyone using a resource about to be clobbered */ - synchronize_srcu_expedited(>->reset.backoff_srcu); - intel_gt_reset(gt, engine_mask, reason); intel_display_finish_reset(gt->i915); @@ -1392,6 +1389,9 @@ void intel_gt_handle_error(struct intel_gt *gt, } } + /* Flush everyone using a resource about to be clobbered */ + synchronize_srcu_expedited(>->reset.backoff_srcu); + intel_gt_reset_global(gt, engine_mask, msg); if (!intel_uc_uses_guc_submission(>->uc)) { diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 7bb967034679..6b86250c31ab 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -194,9 +194,9 @@ static void rps_enable_interrupts(struct intel_rps *rps) rps_reset_ei(rps); - spin_lock_irq(>->irq_lock); + spin_lock_irq(gt->irq_lock); gen6_gt_pm_enable_irq(gt, rps->pm_events); - spin_unlock_irq(>->irq_lock); + spin_unlock_irq(gt->irq_lock); intel_uncore_write(gt->uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, rps->last_freq)); @@ -217,14 +217,14 @@ static void rps_reset_interrupts(struct intel_rps *rps) { struct intel_gt *gt = rps_to_gt(rps); - spin_lock_irq(>->irq_lock); + spin_lock_irq(gt->irq_lock); if (GRAPHICS_VER(gt->i915) >= 11) gen11_rps_reset_interrupts(rps); else gen6_rps_reset_interrupts(rps); rps->pm_iir = 0; - spin_unlock_irq(>->irq_lock); + spin_unlock_irq(gt->irq_lock); } static void rps_disable_interrupts(struct intel_rps *rps) @@ -234,9 +234,9 @@ static void rps_disable_interrupts(struct intel_rps *rps) intel_uncore_write(gt->uncore, GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u)); - spin_lock_irq(>->irq_lock); + spin_lock_irq(gt->irq_lock); gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS); - spin_unlock_irq(>->irq_lock); + spin_unlock_irq(gt->irq_lock); intel_synchronize_irq(gt->i915); @@ -1107,7 +1107,12 @@ void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *c caps->min_freq = (rp_state_cap >> 0) & 0xff; } else { caps->rp0_freq = (rp_state_cap >> 0) & 0xff; - caps->rp1_freq = (rp_state_cap >> 8) & 0xff; + if (GRAPHICS_VER(i915) >= 10) + caps->rp1_freq = REG_FIELD_GET(RPE_MASK, + intel_uncore_read(to_gt(i915)->uncore, + GEN10_FREQ_INFO_REC)); + else + caps->rp1_freq = (rp_state_cap >> 8) & 0xff; caps->min_freq = (rp_state_cap >> 16) & 0xff; } @@ -1546,6 +1551,9 @@ void intel_rps_disable(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); + if (!intel_rps_is_enabled(rps)) + return; + intel_rps_clear_enabled(rps); intel_rps_clear_interrupts(rps); intel_rps_clear_timer(rps); @@ -1789,10 +1797,10 @@ static void rps_work(struct work_struct *work) int new_freq, adj, min, max; u32 pm_iir = 0; - spin_lock_irq(>->irq_lock); + spin_lock_irq(gt->irq_lock); pm_iir = fetch_and_zero(&rps->pm_iir) & rps->pm_events; client_boost = atomic_read(&rps->num_waiters); - spin_unlock_irq(>->irq_lock); + spin_unlock_irq(gt->irq_lock); /* Make sure we didn't queue anything we're not going to process. */ if (!pm_iir && !client_boost) @@ -1865,9 +1873,9 @@ static void rps_work(struct work_struct *work) mutex_unlock(&rps->lock); out: - spin_lock_irq(>->irq_lock); + spin_lock_irq(gt->irq_lock); gen6_gt_pm_unmask_irq(gt, rps->pm_events); - spin_unlock_irq(>->irq_lock); + spin_unlock_irq(gt->irq_lock); } void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) @@ -1875,7 +1883,7 @@ void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) struct intel_gt *gt = rps_to_gt(rps); const u32 events = rps->pm_events & pm_iir; - lockdep_assert_held(>->irq_lock); + lockdep_assert_held(gt->irq_lock); if (unlikely(!events)) return; @@ -1895,7 +1903,7 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) events = pm_iir & rps->pm_events; if (events) { - spin_lock(>->irq_lock); + spin_lock(gt->irq_lock); GT_TRACE(gt, "irq events:%x\n", events); @@ -1903,7 +1911,7 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) rps->pm_iir |= events; schedule_work(&rps->work); - spin_unlock(>->irq_lock); + spin_unlock(gt->irq_lock); } if (GRAPHICS_VER(gt->i915) >= 8) @@ -1979,7 +1987,9 @@ void intel_rps_init(struct intel_rps *rps) /* Derive initial user preferences/limits from the hardware limits */ rps->max_freq_softlimit = rps->max_freq; + rps_to_gt(rps)->defaults.max_freq = rps->max_freq_softlimit; rps->min_freq_softlimit = rps->min_freq; + rps_to_gt(rps)->defaults.min_freq = rps->min_freq_softlimit; /* After setting max-softlimit, find the overclock max freq */ if (GRAPHICS_VER(i915) == 6 || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) { diff --git a/drivers/gpu/drm/i915/gt/intel_sa_media.c b/drivers/gpu/drm/i915/gt/intel_sa_media.c new file mode 100644 index 000000000000..e8f3d18c12b8 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_sa_media.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include <drm/drm_managed.h> + +#include "i915_drv.h" +#include "gt/intel_gt.h" +#include "gt/intel_sa_media.h" + +int intel_sa_mediagt_setup(struct intel_gt *gt, phys_addr_t phys_addr, + u32 gsi_offset) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore; + + uncore = drmm_kzalloc(&i915->drm, sizeof(*uncore), GFP_KERNEL); + if (!uncore) + return -ENOMEM; + + uncore->gsi_offset = gsi_offset; + + gt->irq_lock = to_gt(i915)->irq_lock; + intel_gt_common_init_early(gt); + intel_uncore_init_early(uncore, gt); + + /* + * Standalone media shares the general MMIO space with the primary + * GT. We'll re-use the primary GT's mapping. + */ + uncore->regs = i915->uncore.regs; + if (drm_WARN_ON(&i915->drm, uncore->regs == NULL)) + return -EIO; + + gt->uncore = uncore; + gt->phys_addr = phys_addr; + + /* + * For current platforms we can assume there's only a single + * media GT and cache it for quick lookup. + */ + drm_WARN_ON(&i915->drm, i915->media_gt); + i915->media_gt = gt; + + return 0; +} diff --git a/drivers/gpu/drm/i915/gt/intel_sa_media.h b/drivers/gpu/drm/i915/gt/intel_sa_media.h new file mode 100644 index 000000000000..3afb310de932 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_sa_media.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ +#ifndef __INTEL_SA_MEDIA__ +#define __INTEL_SA_MEDIA__ + +#include <linux/types.h> + +struct intel_gt; + +int intel_sa_mediagt_setup(struct intel_gt *gt, phys_addr_t phys_addr, + u32 gsi_offset); + +#endif /* __INTEL_SA_MEDIA_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index c6d3050604c8..66f21c735d54 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -382,7 +382,6 @@ static void cherryview_sseu_info_init(struct intel_gt *gt) static void gen9_sseu_info_init(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; - struct intel_device_info *info = mkwrite_device_info(i915); struct sseu_dev_info *sseu = >->info.sseu; struct intel_uncore *uncore = gt->uncore; u32 fuse2, eu_disable, subslice_mask; @@ -471,10 +470,10 @@ static void gen9_sseu_info_init(struct intel_gt *gt) if (IS_GEN9_LP(i915)) { #define IS_SS_DISABLED(ss) (!(sseu->subslice_mask.hsw[0] & BIT(ss))) - info->has_pooled_eu = hweight8(sseu->subslice_mask.hsw[0]) == 3; + RUNTIME_INFO(i915)->has_pooled_eu = hweight8(sseu->subslice_mask.hsw[0]) == 3; sseu->min_eu_in_pool = 0; - if (info->has_pooled_eu) { + if (HAS_POOLED_EU(i915)) { if (IS_SS_DISABLED(2) || IS_SS_DISABLED(0)) sseu->min_eu_in_pool = 3; else if (IS_SS_DISABLED(1)) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index e8111fce56d0..6d2003d598e6 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -568,6 +568,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { + wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP); wa_write_clr_set(wal, GEN11_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)); wa_add(wal, @@ -2102,13 +2103,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) /* Wa_1509235366:dg2 */ wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE); - - /* - * The following are not actually "workarounds" but rather - * recommended tuning settings documented in the bspec's - * performance guide section. - */ - wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); } if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { @@ -2119,6 +2113,13 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); } + if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || + IS_DG2_G11(i915) || IS_DG2_G12(i915)) { + /* Wa_1509727124:dg2 */ + wa_masked_en(wal, GEN10_SAMPLER_MODE, + SC_DISABLE_POWER_OPTIMIZATION_EBB); + } + if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) || IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { /* Wa_14012419201:dg2 */ @@ -2195,15 +2196,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) || - IS_DG2_G11(i915)) { - /* Wa_22012654132:dg2 */ - wa_add(wal, GEN10_CACHE_MODE_SS, 0, - _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), - 0 /* write-only, so skip validation */, - true); - } - /* Wa_14013202645:dg2 */ if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) @@ -2397,7 +2389,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) FF_DOP_CLOCK_GATE_DISABLE); } - if (HAS_PERCTX_PREEMPT_CTRL(i915)) { + if (IS_GRAPHICS_VER(i915, 9, 12)) { /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */ wa_masked_en(wal, GEN7_FF_SLICE_CS_CHICKEN1, @@ -2670,6 +2662,56 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) } /* + * The bspec performance guide has recommended MMIO tuning settings. These + * aren't truly "workarounds" but we want to program them with the same + * workaround infrastructure to ensure that they're automatically added to + * the GuC save/restore lists, re-applied at the right times, and checked for + * any conflicting programming requested by real workarounds. + * + * Programming settings should be added here only if their registers are not + * part of an engine's register state context. If a register is part of a + * context, then any tuning settings should be programmed in an appropriate + * function invoked by __intel_engine_init_ctx_wa(). + */ +static void +add_render_compute_tuning_settings(struct drm_i915_private *i915, + struct i915_wa_list *wal) +{ + if (IS_PONTEVECCHIO(i915)) { + wa_write(wal, XEHPC_L3SCRUB, + SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK); + } + + if (IS_DG2(i915)) { + wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); + wa_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); + + /* + * This is also listed as Wa_22012654132 for certain DG2 + * steppings, but the tuning setting programming is a superset + * since it applies to all DG2 variants and steppings. + * + * Note that register 0xE420 is write-only and cannot be read + * back for verification on DG2 (due to Wa_14012342262), so + * we need to explicitly skip the readback. + */ + wa_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), + 0 /* write-only, so skip validation */, + true); + } + + /* + * This tuning setting proves beneficial only on ATS-M designs; the + * default "age based" setting is optimal on regular DG2 and other + * platforms. + */ + if (INTEL_INFO(i915)->tuning_thread_rr_after_dep) + wa_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE, + THREAD_EX_ARB_MODE_RR_AFTER_DEP); +} + +/* * The workarounds in this function apply to shared registers in * the general render reset domain that aren't tied to a * specific engine. Since all render+compute engines get reset @@ -2683,14 +2725,9 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li { struct drm_i915_private *i915 = engine->i915; - if (IS_PONTEVECCHIO(i915)) { - /* - * The following is not actually a "workaround" but rather - * a recommended tuning setting documented in the bspec's - * performance guide section. - */ - wa_write(wal, XEHPC_L3SCRUB, SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK); + add_render_compute_tuning_settings(i915, wal); + if (IS_PONTEVECCHIO(i915)) { /* Wa_16016694945 */ wa_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC); } diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 09f8cd2d0e2c..1e08b2473b99 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -2077,7 +2077,7 @@ static int __cancel_active0(struct live_preempt_cancel *arg) goto out; } - intel_context_set_banned(rq->context); + intel_context_ban(rq->context, rq); err = intel_engine_pulse(arg->engine); if (err) goto out; @@ -2136,7 +2136,7 @@ static int __cancel_active1(struct live_preempt_cancel *arg) if (err) goto out; - intel_context_set_banned(rq[1]->context); + intel_context_ban(rq[1]->context, rq[1]); err = intel_engine_pulse(arg->engine); if (err) goto out; @@ -2219,7 +2219,7 @@ static int __cancel_queued(struct live_preempt_cancel *arg) if (err) goto out; - intel_context_set_banned(rq[2]->context); + intel_context_ban(rq[2]->context, rq[2]); err = intel_engine_pulse(arg->engine); if (err) goto out; @@ -2234,7 +2234,13 @@ static int __cancel_queued(struct live_preempt_cancel *arg) goto out; } - if (rq[1]->fence.error != 0) { + /* + * The behavior between having semaphores and not is different. With + * semaphores the subsequent request is on the hardware and not cancelled + * while without the request is held in the driver and cancelled. + */ + if (intel_engine_has_semaphores(rq[1]->engine) && + rq[1]->fence.error != 0) { pr_err("Normal inflight1 request did not complete\n"); err = -EINVAL; goto out; @@ -2282,7 +2288,7 @@ static int __cancel_hostile(struct live_preempt_cancel *arg) goto out; } - intel_context_set_banned(rq->context); + intel_context_ban(rq->context, rq); err = intel_engine_pulse(arg->engine); /* force reset */ if (err) goto out; diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 6493265d5f64..7f3bb1d34dfb 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -1302,13 +1302,15 @@ static int igt_reset_wait(void *arg) { struct intel_gt *gt = arg; struct i915_gpu_error *global = >->i915->gpu_error; - struct intel_engine_cs *engine = gt->engine[RCS0]; + struct intel_engine_cs *engine; struct i915_request *rq; unsigned int reset_count; struct hang h; long timeout; int err; + engine = intel_selftest_find_any_engine(gt); + if (!engine || !intel_engine_can_store_dword(engine)) return 0; @@ -1432,7 +1434,7 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, int (*fn)(void *), unsigned int flags) { - struct intel_engine_cs *engine = gt->engine[RCS0]; + struct intel_engine_cs *engine; struct drm_i915_gem_object *obj; struct task_struct *tsk = NULL; struct i915_request *rq; @@ -1444,6 +1446,8 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, if (!gt->ggtt->num_fences && flags & EXEC_OBJECT_NEEDS_FENCE) return 0; + engine = intel_selftest_find_any_engine(gt); + if (!engine || !intel_engine_can_store_dword(engine)) return 0; @@ -1819,12 +1823,14 @@ static int igt_handle_error(void *arg) { struct intel_gt *gt = arg; struct i915_gpu_error *global = >->i915->gpu_error; - struct intel_engine_cs *engine = gt->engine[RCS0]; + struct intel_engine_cs *engine; struct hang h; struct i915_request *rq; struct i915_gpu_coredump *error; int err; + engine = intel_selftest_find_any_engine(gt); + /* Check that we can issue a global GPU and engine reset */ if (!intel_has_reset_engine(gt)) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 1109088fe8f6..82d3f8058995 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -27,6 +27,9 @@ #define NUM_GPR 16 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */ +#define LRI_HEADER MI_INSTR(0x22, 0) +#define LRI_LENGTH_MASK GENMASK(7, 0) + static struct i915_vma *create_scratch(struct intel_gt *gt) { return __vm_create_scratch_for_read_pinned(>->ggtt->vm, PAGE_SIZE); @@ -202,7 +205,7 @@ static int live_lrc_layout(void *arg) continue; } - if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + if ((lri & GENMASK(31, 23)) != LRI_HEADER) { pr_err("%s: Expected LRI command at dword %d, found %08x\n", engine->name, dw, lri); err = -EINVAL; @@ -357,6 +360,11 @@ static int live_lrc_fixed(void *arg) lrc_ring_cmd_buf_cctl(engine), "RING_CMD_BUF_CCTL" }, + { + i915_mmio_reg_offset(RING_BB_OFFSET(engine->mmio_base)), + lrc_ring_bb_offset(engine), + "RING_BB_OFFSET" + }, { }, }, *t; u32 *hw; @@ -987,18 +995,40 @@ store_context(struct intel_context *ce, struct i915_vma *scratch) hw = defaults; hw += LRC_STATE_OFFSET / sizeof(*hw); do { - u32 len = hw[dw] & 0x7f; + u32 len = hw[dw] & LRI_LENGTH_MASK; + + /* + * Keep it simple, skip parsing complex commands + * + * At present, there are no more MI_LOAD_REGISTER_IMM + * commands after the first 3D state command. Rather + * than include a table (see i915_cmd_parser.c) of all + * the possible commands and their instruction lengths + * (or mask for variable length instructions), assume + * we have gathered the complete list of registers and + * bail out. + */ + if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT) + break; if (hw[dw] == 0) { dw++; continue; } - if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) { + /* Assume all other MI commands match LRI length mask */ dw += len + 2; continue; } + if (!len) { + pr_err("%s: invalid LRI found in context image\n", + ce->engine->name); + igt_hexdump(defaults, PAGE_SIZE); + break; + } + dw++; len = (len + 1) / 2; while (len--) { @@ -1150,18 +1180,29 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison) hw = defaults; hw += LRC_STATE_OFFSET / sizeof(*hw); do { - u32 len = hw[dw] & 0x7f; + u32 len = hw[dw] & LRI_LENGTH_MASK; + + /* For simplicity, break parsing at the first complex command */ + if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT) + break; if (hw[dw] == 0) { dw++; continue; } - if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) { dw += len + 2; continue; } + if (!len) { + pr_err("%s: invalid LRI found in context image\n", + ce->engine->name); + igt_hexdump(defaults, PAGE_SIZE); + break; + } + dw++; len = (len + 1) / 2; *cs++ = MI_LOAD_REGISTER_IMM(len); @@ -1292,18 +1333,29 @@ static int compare_isolation(struct intel_engine_cs *engine, hw = defaults; hw += LRC_STATE_OFFSET / sizeof(*hw); do { - u32 len = hw[dw] & 0x7f; + u32 len = hw[dw] & LRI_LENGTH_MASK; + + /* For simplicity, break parsing at the first complex command */ + if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT) + break; if (hw[dw] == 0) { dw++; continue; } - if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) { dw += len + 2; continue; } + if (!len) { + pr_err("%s: invalid LRI found in context image\n", + engine->name); + igt_hexdump(defaults, PAGE_SIZE); + break; + } + dw++; len = (len + 1) / 2; while (len--) { @@ -1343,6 +1395,30 @@ err_A0: return err; } +static struct i915_vma * +create_result_vma(struct i915_address_space *vm, unsigned long sz) +{ + struct i915_vma *vma; + void *ptr; + + vma = create_user_vma(vm, sz); + if (IS_ERR(vma)) + return vma; + + /* Set the results to a known value distinct from the poison */ + ptr = i915_gem_object_pin_map_unlocked(vma->obj, I915_MAP_WC); + if (IS_ERR(ptr)) { + i915_vma_put(vma); + return ERR_CAST(ptr); + } + + memset(ptr, POISON_INUSE, vma->size); + i915_gem_object_flush_map(vma->obj); + i915_gem_object_unpin_map(vma->obj); + + return vma; +} + static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison) { u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1); @@ -1361,13 +1437,13 @@ static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison) goto err_A; } - ref[0] = create_user_vma(A->vm, SZ_64K); + ref[0] = create_result_vma(A->vm, SZ_64K); if (IS_ERR(ref[0])) { err = PTR_ERR(ref[0]); goto err_B; } - ref[1] = create_user_vma(A->vm, SZ_64K); + ref[1] = create_result_vma(A->vm, SZ_64K); if (IS_ERR(ref[1])) { err = PTR_ERR(ref[1]); goto err_ref0; @@ -1389,13 +1465,13 @@ static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison) } i915_request_put(rq); - result[0] = create_user_vma(A->vm, SZ_64K); + result[0] = create_result_vma(A->vm, SZ_64K); if (IS_ERR(result[0])) { err = PTR_ERR(result[0]); goto err_ref1; } - result[1] = create_user_vma(A->vm, SZ_64K); + result[1] = create_result_vma(A->vm, SZ_64K); if (IS_ERR(result[1])) { err = PTR_ERR(result[1]); goto err_result0; @@ -1408,18 +1484,17 @@ static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison) } err = poison_registers(B, poison, sema); - if (err) { - WRITE_ONCE(*sema, -1); - i915_request_put(rq); - goto err_result1; - } - - if (i915_request_wait(rq, 0, HZ / 2) < 0) { - i915_request_put(rq); + if (err == 0 && i915_request_wait(rq, 0, HZ / 2) < 0) { + pr_err("%s(%s): wait for results timed out\n", + __func__, engine->name); err = -ETIME; - goto err_result1; } + + /* Always cancel the semaphore wait, just in case the GPU gets stuck */ + WRITE_ONCE(*sema, -1); i915_request_put(rq); + if (err) + goto err_result1; err = compare_isolation(engine, ref, result, A, poison); diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c index ac29691e0b1a..f8a1d27df272 100644 --- a/drivers/gpu/drm/i915/gt/selftest_slpc.c +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -166,6 +166,15 @@ static int run_test(struct intel_gt *gt, int test_type) return -EIO; } + /* + * FIXME: With efficient frequency enabled, GuC can request + * frequencies higher than the SLPC max. While this is fixed + * in GuC, we level set these tests with RPn as min. + */ + err = slpc_set_min_freq(slpc, slpc->min_freq); + if (err) + return err; + if (slpc->min_freq == slpc->rp0_freq) { pr_err("Min/Max are fused to the same value\n"); return -EINVAL; diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h index df83c1cc7c7a..28b8387f97b7 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h @@ -37,6 +37,7 @@ * | | | - _`GUC_CTB_STATUS_OVERFLOW` = 1 (head/tail too large) | * | | | - _`GUC_CTB_STATUS_UNDERFLOW` = 2 (truncated message) | * | | | - _`GUC_CTB_STATUS_MISMATCH` = 4 (head/tail modified) | + * | | | - _`GUC_CTB_STATUS_UNUSED` = 8 (CTB is not in use) | * +---+-------+--------------------------------------------------------------+ * |...| | RESERVED = MBZ | * +---+-------+--------------------------------------------------------------+ @@ -49,9 +50,10 @@ struct guc_ct_buffer_desc { u32 tail; u32 status; #define GUC_CTB_STATUS_NO_ERROR 0 -#define GUC_CTB_STATUS_OVERFLOW (1 << 0) -#define GUC_CTB_STATUS_UNDERFLOW (1 << 1) -#define GUC_CTB_STATUS_MISMATCH (1 << 2) +#define GUC_CTB_STATUS_OVERFLOW BIT(0) +#define GUC_CTB_STATUS_UNDERFLOW BIT(1) +#define GUC_CTB_STATUS_MISMATCH BIT(2) +#define GUC_CTB_STATUS_UNUSED BIT(3) u32 reserved[13]; } __packed; static_assert(sizeof(struct guc_ct_buffer_desc) == 64); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 2706a8c65090..bac06e3d6f2c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -82,9 +82,9 @@ static void gen9_reset_guc_interrupts(struct intel_guc *guc) assert_rpm_wakelock_held(>->i915->runtime_pm); - spin_lock_irq(>->irq_lock); + spin_lock_irq(gt->irq_lock); gen6_gt_pm_reset_iir(gt, gt->pm_guc_events); - spin_unlock_irq(>->irq_lock); + spin_unlock_irq(gt->irq_lock); } static void gen9_enable_guc_interrupts(struct intel_guc *guc) @@ -93,11 +93,11 @@ static void gen9_enable_guc_interrupts(struct intel_guc *guc) assert_rpm_wakelock_held(>->i915->runtime_pm); - spin_lock_irq(>->irq_lock); + spin_lock_irq(gt->irq_lock); WARN_ON_ONCE(intel_uncore_read(gt->uncore, GEN8_GT_IIR(2)) & gt->pm_guc_events); gen6_gt_pm_enable_irq(gt, gt->pm_guc_events); - spin_unlock_irq(>->irq_lock); + spin_unlock_irq(gt->irq_lock); } static void gen9_disable_guc_interrupts(struct intel_guc *guc) @@ -106,11 +106,11 @@ static void gen9_disable_guc_interrupts(struct intel_guc *guc) assert_rpm_wakelock_held(>->i915->runtime_pm); - spin_lock_irq(>->irq_lock); + spin_lock_irq(gt->irq_lock); gen6_gt_pm_disable_irq(gt, gt->pm_guc_events); - spin_unlock_irq(>->irq_lock); + spin_unlock_irq(gt->irq_lock); intel_synchronize_irq(gt->i915); gen9_reset_guc_interrupts(guc); @@ -120,9 +120,9 @@ static void gen11_reset_guc_interrupts(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); - spin_lock_irq(>->irq_lock); + spin_lock_irq(gt->irq_lock); gen11_gt_reset_one_iir(gt, 0, GEN11_GUC); - spin_unlock_irq(>->irq_lock); + spin_unlock_irq(gt->irq_lock); } static void gen11_enable_guc_interrupts(struct intel_guc *guc) @@ -130,25 +130,25 @@ static void gen11_enable_guc_interrupts(struct intel_guc *guc) struct intel_gt *gt = guc_to_gt(guc); u32 events = REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST); - spin_lock_irq(>->irq_lock); + spin_lock_irq(gt->irq_lock); WARN_ON_ONCE(gen11_gt_reset_one_iir(gt, 0, GEN11_GUC)); intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_ENABLE, events); intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_MASK, ~events); - spin_unlock_irq(>->irq_lock); + spin_unlock_irq(gt->irq_lock); } static void gen11_disable_guc_interrupts(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); - spin_lock_irq(>->irq_lock); + spin_lock_irq(gt->irq_lock); intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_MASK, ~0); intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_ENABLE, 0); - spin_unlock_irq(>->irq_lock); + spin_unlock_irq(gt->irq_lock); intel_synchronize_irq(gt->i915); gen11_reset_guc_interrupts(guc); @@ -224,53 +224,22 @@ static u32 guc_ctl_feature_flags(struct intel_guc *guc) static u32 guc_ctl_log_params_flags(struct intel_guc *guc) { - u32 offset = intel_guc_ggtt_offset(guc, guc->log.vma) >> PAGE_SHIFT; - u32 flags; - - #if (((CRASH_BUFFER_SIZE) % SZ_1M) == 0) - #define LOG_UNIT SZ_1M - #define LOG_FLAG GUC_LOG_LOG_ALLOC_UNITS - #else - #define LOG_UNIT SZ_4K - #define LOG_FLAG 0 - #endif - - #if (((CAPTURE_BUFFER_SIZE) % SZ_1M) == 0) - #define CAPTURE_UNIT SZ_1M - #define CAPTURE_FLAG GUC_LOG_CAPTURE_ALLOC_UNITS - #else - #define CAPTURE_UNIT SZ_4K - #define CAPTURE_FLAG 0 - #endif - - BUILD_BUG_ON(!CRASH_BUFFER_SIZE); - BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, LOG_UNIT)); - BUILD_BUG_ON(!DEBUG_BUFFER_SIZE); - BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, LOG_UNIT)); - BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE); - BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT)); - - BUILD_BUG_ON((CRASH_BUFFER_SIZE / LOG_UNIT - 1) > - (GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT)); - BUILD_BUG_ON((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) > - (GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT)); - BUILD_BUG_ON((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) > - (GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT)); + struct intel_guc_log *log = &guc->log; + u32 offset, flags; + + GEM_BUG_ON(!log->sizes_initialised); + + offset = intel_guc_ggtt_offset(guc, log->vma) >> PAGE_SHIFT; flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL | - CAPTURE_FLAG | - LOG_FLAG | - ((CRASH_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_CRASH_SHIFT) | - ((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_DEBUG_SHIFT) | - ((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) << GUC_LOG_CAPTURE_SHIFT) | + log->sizes[GUC_LOG_SECTIONS_DEBUG].flag | + log->sizes[GUC_LOG_SECTIONS_CAPTURE].flag | + (log->sizes[GUC_LOG_SECTIONS_CRASH].count << GUC_LOG_CRASH_SHIFT) | + (log->sizes[GUC_LOG_SECTIONS_DEBUG].count << GUC_LOG_DEBUG_SHIFT) | + (log->sizes[GUC_LOG_SECTIONS_CAPTURE].count << GUC_LOG_CAPTURE_SHIFT) | (offset << GUC_LOG_BUF_ADDR_SHIFT); - #undef LOG_UNIT - #undef LOG_FLAG - #undef CAPTURE_UNIT - #undef CAPTURE_FLAG - return flags; } @@ -389,6 +358,23 @@ void intel_guc_write_params(struct intel_guc *guc) intel_uncore_forcewake_put(uncore, FORCEWAKE_GT); } +void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p) +{ + struct intel_gt *gt = guc_to_gt(guc); + intel_wakeref_t wakeref; + u32 stamp = 0; + u64 ktime; + + with_intel_runtime_pm(>->i915->runtime_pm, wakeref) + stamp = intel_uncore_read(gt->uncore, GUCPMTIMESTAMP); + ktime = ktime_get_boottime_ns(); + + drm_printf(p, "Kernel timestamp: 0x%08llX [%llu]\n", ktime, ktime); + drm_printf(p, "GuC timestamp: 0x%08X [%u]\n", stamp, stamp); + drm_printf(p, "CS timestamp frequency: %u Hz, %u ns\n", + gt->clock_frequency, gt->clock_period_ns); +} + int intel_guc_init(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index a7acffbf15d1..804133df1ac9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -464,4 +464,6 @@ void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p); void intel_guc_write_barrier(struct intel_guc *guc); +void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p); + #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index ba7541f3ca61..74cbe8eaf531 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -464,7 +464,11 @@ static void fill_engine_enable_masks(struct intel_gt *gt, } #define LR_HW_CONTEXT_SIZE (80 * sizeof(u32)) -#define LRC_SKIP_SIZE (LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE) +#define XEHP_LR_HW_CONTEXT_SIZE (96 * sizeof(u32)) +#define LR_HW_CONTEXT_SZ(i915) (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50) ? \ + XEHP_LR_HW_CONTEXT_SIZE : \ + LR_HW_CONTEXT_SIZE) +#define LRC_SKIP_SIZE(i915) (LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SZ(i915)) static int guc_prep_golden_context(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -525,7 +529,7 @@ static int guc_prep_golden_context(struct intel_guc *guc) * on all engines). */ ads_blob_write(guc, ads.eng_state_size[guc_class], - real_size - LRC_SKIP_SIZE); + real_size - LRC_SKIP_SIZE(gt->i915)); ads_blob_write(guc, ads.golden_context_lrca[guc_class], addr_ggtt); @@ -599,7 +603,7 @@ static void guc_init_golden_context(struct intel_guc *guc) } GEM_BUG_ON(ads_blob_read(guc, ads.eng_state_size[guc_class]) != - real_size - LRC_SKIP_SIZE); + real_size - LRC_SKIP_SIZE(gt->i915)); GEM_BUG_ON(ads_blob_read(guc, ads.golden_context_lrca[guc_class]) != addr_ggtt); addr_ggtt += alloc_size; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index 75257bd20ff0..8f1165146013 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -600,10 +600,8 @@ intel_guc_capture_getnullheader(struct intel_guc *guc, return 0; } -#define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3 - -int -intel_guc_capture_output_min_size_est(struct intel_guc *guc) +static int +guc_capture_output_min_size_est(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); struct intel_engine_cs *engine; @@ -623,13 +621,8 @@ intel_guc_capture_output_min_size_est(struct intel_guc *guc) * For each engine instance, there would be 1 x guc_state_capture_group_t output * followed by 3 x guc_state_capture_t lists. The latter is how the register * dumps are split across different register types (where the '3' are global vs class - * vs instance). Finally, let's multiply the whole thing by 3x (just so we are - * not limited to just 1 round of data in a worst case full register dump log) - * - * NOTE: intel_guc_log that allocates the log buffer would round this size up to - * a power of two. + * vs instance). */ - for_each_engine(engine, gt, id) { worst_min_size += sizeof(struct guc_state_capture_group_header_t) + (3 * sizeof(struct guc_state_capture_header_t)); @@ -649,7 +642,31 @@ intel_guc_capture_output_min_size_est(struct intel_guc *guc) worst_min_size += (num_regs * sizeof(struct guc_mmio_reg)); - return (worst_min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER); + return worst_min_size; +} + +/* + * Add on a 3x multiplier to allow for multiple back-to-back captures occurring + * before the i915 can read the data out and process it + */ +#define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3 + +static void check_guc_capture_size(struct intel_guc *guc) +{ + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + int min_size = guc_capture_output_min_size_est(guc); + int spare_size = min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER; + u32 buffer_size = intel_guc_log_section_size_capture(&guc->log); + + if (min_size < 0) + drm_warn(&i915->drm, "Failed to calculate GuC error state capture buffer minimum size: %d!\n", + min_size); + else if (min_size > buffer_size) + drm_warn(&i915->drm, "GuC error state capture buffer is too small: %d < %d\n", + buffer_size, min_size); + else if (spare_size > buffer_size) + drm_notice(&i915->drm, "GuC error state capture buffer maybe too small: %d < %d (min = %d)\n", + buffer_size, spare_size, min_size); } /* @@ -1278,7 +1295,8 @@ static void __guc_capture_process_output(struct intel_guc *guc) log_buf_state = guc->log.buf_addr + (sizeof(struct guc_log_buffer_state) * GUC_CAPTURE_LOG_BUFFER); - src_data = guc->log.buf_addr + intel_guc_get_log_buffer_offset(GUC_CAPTURE_LOG_BUFFER); + src_data = guc->log.buf_addr + + intel_guc_get_log_buffer_offset(&guc->log, GUC_CAPTURE_LOG_BUFFER); /* * Make a copy of the state structure, inside GuC log buffer @@ -1286,7 +1304,7 @@ static void __guc_capture_process_output(struct intel_guc *guc) * from it multiple times. */ memcpy(&log_buf_state_local, log_buf_state, sizeof(struct guc_log_buffer_state)); - buffer_size = intel_guc_get_log_buffer_size(GUC_CAPTURE_LOG_BUFFER); + buffer_size = intel_guc_get_log_buffer_size(&guc->log, GUC_CAPTURE_LOG_BUFFER); read_offset = log_buf_state_local.read_ptr; write_offset = log_buf_state_local.sampled_write_ptr; full_count = log_buf_state_local.buffer_full_cnt; @@ -1365,33 +1383,22 @@ guc_capture_reg_to_str(const struct intel_guc *guc, u32 owner, u32 type, return NULL; } -#ifdef CONFIG_DRM_I915_DEBUG_GUC -#define __out(a, ...) \ - do { \ - drm_warn((&(a)->i915->drm), __VA_ARGS__); \ - i915_error_printf((a), __VA_ARGS__); \ - } while (0) -#else -#define __out(a, ...) \ - i915_error_printf(a, __VA_ARGS__) -#endif - #define GCAP_PRINT_INTEL_ENG_INFO(ebuf, eng) \ do { \ - __out(ebuf, " i915-Eng-Name: %s command stream\n", \ - (eng)->name); \ - __out(ebuf, " i915-Eng-Inst-Class: 0x%02x\n", (eng)->class); \ - __out(ebuf, " i915-Eng-Inst-Id: 0x%02x\n", (eng)->instance); \ - __out(ebuf, " i915-Eng-LogicalMask: 0x%08x\n", \ - (eng)->logical_mask); \ + i915_error_printf(ebuf, " i915-Eng-Name: %s command stream\n", \ + (eng)->name); \ + i915_error_printf(ebuf, " i915-Eng-Inst-Class: 0x%02x\n", (eng)->class); \ + i915_error_printf(ebuf, " i915-Eng-Inst-Id: 0x%02x\n", (eng)->instance); \ + i915_error_printf(ebuf, " i915-Eng-LogicalMask: 0x%08x\n", \ + (eng)->logical_mask); \ } while (0) #define GCAP_PRINT_GUC_INST_INFO(ebuf, node) \ do { \ - __out(ebuf, " GuC-Engine-Inst-Id: 0x%08x\n", \ - (node)->eng_inst); \ - __out(ebuf, " GuC-Context-Id: 0x%08x\n", (node)->guc_id); \ - __out(ebuf, " LRCA: 0x%08x\n", (node)->lrca); \ + i915_error_printf(ebuf, " GuC-Engine-Inst-Id: 0x%08x\n", \ + (node)->eng_inst); \ + i915_error_printf(ebuf, " GuC-Context-Id: 0x%08x\n", (node)->guc_id); \ + i915_error_printf(ebuf, " LRCA: 0x%08x\n", (node)->lrca); \ } while (0) int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *ebuf, @@ -1423,57 +1430,57 @@ int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *ebuf, guc = &ee->engine->gt->uc.guc; - __out(ebuf, "global --- GuC Error Capture on %s command stream:\n", - ee->engine->name); + i915_error_printf(ebuf, "global --- GuC Error Capture on %s command stream:\n", + ee->engine->name); node = ee->guc_capture_node; if (!node) { - __out(ebuf, " No matching ee-node\n"); + i915_error_printf(ebuf, " No matching ee-node\n"); return 0; } - __out(ebuf, "Coverage: %s\n", grptype[node->is_partial]); + i915_error_printf(ebuf, "Coverage: %s\n", grptype[node->is_partial]); for (i = GUC_CAPTURE_LIST_TYPE_GLOBAL; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i) { - __out(ebuf, " RegListType: %s\n", - datatype[i % GUC_CAPTURE_LIST_TYPE_MAX]); - __out(ebuf, " Owner-Id: %d\n", node->reginfo[i].vfid); + i915_error_printf(ebuf, " RegListType: %s\n", + datatype[i % GUC_CAPTURE_LIST_TYPE_MAX]); + i915_error_printf(ebuf, " Owner-Id: %d\n", node->reginfo[i].vfid); switch (i) { case GUC_CAPTURE_LIST_TYPE_GLOBAL: default: break; case GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS: - __out(ebuf, " GuC-Eng-Class: %d\n", node->eng_class); - __out(ebuf, " i915-Eng-Class: %d\n", - guc_class_to_engine_class(node->eng_class)); + i915_error_printf(ebuf, " GuC-Eng-Class: %d\n", node->eng_class); + i915_error_printf(ebuf, " i915-Eng-Class: %d\n", + guc_class_to_engine_class(node->eng_class)); break; case GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE: eng = intel_guc_lookup_engine(guc, node->eng_class, node->eng_inst); if (eng) GCAP_PRINT_INTEL_ENG_INFO(ebuf, eng); else - __out(ebuf, " i915-Eng-Lookup Fail!\n"); + i915_error_printf(ebuf, " i915-Eng-Lookup Fail!\n"); GCAP_PRINT_GUC_INST_INFO(ebuf, node); break; } numregs = node->reginfo[i].num_regs; - __out(ebuf, " NumRegs: %d\n", numregs); + i915_error_printf(ebuf, " NumRegs: %d\n", numregs); j = 0; while (numregs--) { regs = node->reginfo[i].regs; str = guc_capture_reg_to_str(guc, GUC_CAPTURE_LIST_INDEX_PF, i, node->eng_class, 0, regs[j].offset, &is_ext); if (!str) - __out(ebuf, " REG-0x%08x", regs[j].offset); + i915_error_printf(ebuf, " REG-0x%08x", regs[j].offset); else - __out(ebuf, " %s", str); + i915_error_printf(ebuf, " %s", str); if (is_ext) - __out(ebuf, "[%ld][%ld]", - FIELD_GET(GUC_REGSET_STEERING_GROUP, regs[j].flags), - FIELD_GET(GUC_REGSET_STEERING_INSTANCE, regs[j].flags)); - __out(ebuf, ": 0x%08x\n", regs[j].value); + i915_error_printf(ebuf, "[%ld][%ld]", + FIELD_GET(GUC_REGSET_STEERING_GROUP, regs[j].flags), + FIELD_GET(GUC_REGSET_STEERING_INSTANCE, regs[j].flags)); + i915_error_printf(ebuf, ": 0x%08x\n", regs[j].value); ++j; } } @@ -1580,5 +1587,7 @@ int intel_guc_capture_init(struct intel_guc *guc) INIT_LIST_HEAD(&guc->capture->outlist); INIT_LIST_HEAD(&guc->capture->cachelist); + check_guc_capture_size(guc); + return 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h index d3d7bd0b6db6..fbd3713c7832 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h @@ -21,7 +21,6 @@ int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *m, void intel_guc_capture_get_matching_node(struct intel_gt *gt, struct intel_engine_coredump *ee, struct intel_context *ce); void intel_guc_capture_process(struct intel_guc *guc); -int intel_guc_capture_output_min_size_est(struct intel_guc *guc); int intel_guc_capture_getlist(struct intel_guc *guc, u32 owner, u32 type, u32 classid, void **outptr); int intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index f01325cd1b62..2b22065e87bf 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -455,6 +455,7 @@ corrupted: /** * wait_for_ct_request_update - Wait for CT request state update. + * @ct: pointer to CT * @req: pointer to pending request * @status: placeholder for status * @@ -467,9 +468,10 @@ corrupted: * * 0 response received (status is valid) * * -ETIMEDOUT no response within hardcoded timeout */ -static int wait_for_ct_request_update(struct ct_request *req, u32 *status) +static int wait_for_ct_request_update(struct intel_guc_ct *ct, struct ct_request *req, u32 *status) { int err; + bool ct_enabled; /* * Fast commands should complete in less than 10us, so sample quickly @@ -481,12 +483,15 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status) #define GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS 10 #define GUC_CTB_RESPONSE_TIMEOUT_LONG_MS 1000 #define done \ - (FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \ + (!(ct_enabled = intel_guc_ct_enabled(ct)) || \ + FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \ GUC_HXG_ORIGIN_GUC) err = wait_for_us(done, GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS); if (err) err = wait_for(done, GUC_CTB_RESPONSE_TIMEOUT_LONG_MS); #undef done + if (!ct_enabled) + err = -ENODEV; *status = req->status; return err; @@ -703,11 +708,18 @@ retry: intel_guc_notify(ct_to_guc(ct)); - err = wait_for_ct_request_update(&request, status); + err = wait_for_ct_request_update(ct, &request, status); g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); if (unlikely(err)) { - CT_ERROR(ct, "No response for request %#x (fence %u)\n", - action[0], request.fence); + if (err == -ENODEV) + /* wait_for_ct_request_update returns -ENODEV on reset/suspend in progress. + * In this case, output is debug rather than error info + */ + CT_DEBUG(ct, "Request %#x (fence %u) cancelled as CTB is disabled\n", + action[0], request.fence); + else + CT_ERROR(ct, "No response for request %#x (fence %u)\n", + action[0], request.fence); goto unlink; } @@ -771,8 +783,9 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, ret = ct_send(ct, action, len, response_buf, response_buf_size, &status); if (unlikely(ret < 0)) { - CT_ERROR(ct, "Sending action %#x failed (%pe) status=%#X\n", - action[0], ERR_PTR(ret), status); + if (ret != -ENODEV) + CT_ERROR(ct, "Sending action %#x failed (%pe) status=%#X\n", + action[0], ERR_PTR(ret), status); } else if (unlikely(ret)) { CT_DEBUG(ct, "send action %#x returned %d (%#x)\n", action[0], ret, ret); @@ -816,8 +829,22 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) if (unlikely(ctb->broken)) return -EPIPE; - if (unlikely(desc->status)) - goto corrupted; + if (unlikely(desc->status)) { + u32 status = desc->status; + + if (status & GUC_CTB_STATUS_UNUSED) { + /* + * Potentially valid if a CLIENT_RESET request resulted in + * contexts/engines being reset. But should never happen as + * no contexts should be active when CLIENT_RESET is sent. + */ + CT_ERROR(ct, "Unexpected G2H after GuC has stopped!\n"); + status &= ~GUC_CTB_STATUS_UNUSED; + } + + if (status) + goto corrupted; + } GEM_BUG_ON(head > size); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 25b2d7ce6640..55d3ef93e86f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -13,8 +13,163 @@ #include "intel_guc_capture.h" #include "intel_guc_log.h" +#if defined(CONFIG_DRM_I915_DEBUG_GUC) +#define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_2M +#define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_16M +#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_4M +#elif defined(CONFIG_DRM_I915_DEBUG_GEM) +#define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_1M +#define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_2M +#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_4M +#else +#define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_8K +#define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_64K +#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_2M +#endif + static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log); +struct guc_log_section { + u32 max; + u32 flag; + u32 default_val; + const char *name; +}; + +static void _guc_log_init_sizes(struct intel_guc_log *log) +{ + struct intel_guc *guc = log_to_guc(log); + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + static const struct guc_log_section sections[GUC_LOG_SECTIONS_LIMIT] = { + { + GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT, + GUC_LOG_LOG_ALLOC_UNITS, + GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE, + "crash dump" + }, + { + GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT, + GUC_LOG_LOG_ALLOC_UNITS, + GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE, + "debug", + }, + { + GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT, + GUC_LOG_CAPTURE_ALLOC_UNITS, + GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE, + "capture", + } + }; + int i; + + for (i = 0; i < GUC_LOG_SECTIONS_LIMIT; i++) + log->sizes[i].bytes = sections[i].default_val; + + /* If debug size > 1MB then bump default crash size to keep the same units */ + if (log->sizes[GUC_LOG_SECTIONS_DEBUG].bytes >= SZ_1M && + GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE < SZ_1M) + log->sizes[GUC_LOG_SECTIONS_CRASH].bytes = SZ_1M; + + /* Prepare the GuC API structure fields: */ + for (i = 0; i < GUC_LOG_SECTIONS_LIMIT; i++) { + /* Convert to correct units */ + if ((log->sizes[i].bytes % SZ_1M) == 0) { + log->sizes[i].units = SZ_1M; + log->sizes[i].flag = sections[i].flag; + } else { + log->sizes[i].units = SZ_4K; + log->sizes[i].flag = 0; + } + + if (!IS_ALIGNED(log->sizes[i].bytes, log->sizes[i].units)) + drm_err(&i915->drm, "Mis-aligned GuC log %s size: 0x%X vs 0x%X!", + sections[i].name, log->sizes[i].bytes, log->sizes[i].units); + log->sizes[i].count = log->sizes[i].bytes / log->sizes[i].units; + + if (!log->sizes[i].count) { + drm_err(&i915->drm, "Zero GuC log %s size!", sections[i].name); + } else { + /* Size is +1 unit */ + log->sizes[i].count--; + } + + /* Clip to field size */ + if (log->sizes[i].count > sections[i].max) { + drm_err(&i915->drm, "GuC log %s size too large: %d vs %d!", + sections[i].name, log->sizes[i].count + 1, sections[i].max + 1); + log->sizes[i].count = sections[i].max; + } + } + + if (log->sizes[GUC_LOG_SECTIONS_CRASH].units != log->sizes[GUC_LOG_SECTIONS_DEBUG].units) { + drm_err(&i915->drm, "Unit mis-match for GuC log crash and debug sections: %d vs %d!", + log->sizes[GUC_LOG_SECTIONS_CRASH].units, + log->sizes[GUC_LOG_SECTIONS_DEBUG].units); + log->sizes[GUC_LOG_SECTIONS_CRASH].units = log->sizes[GUC_LOG_SECTIONS_DEBUG].units; + log->sizes[GUC_LOG_SECTIONS_CRASH].count = 0; + } + + log->sizes_initialised = true; +} + +static void guc_log_init_sizes(struct intel_guc_log *log) +{ + if (log->sizes_initialised) + return; + + _guc_log_init_sizes(log); +} + +static u32 intel_guc_log_section_size_crash(struct intel_guc_log *log) +{ + guc_log_init_sizes(log); + + return log->sizes[GUC_LOG_SECTIONS_CRASH].bytes; +} + +static u32 intel_guc_log_section_size_debug(struct intel_guc_log *log) +{ + guc_log_init_sizes(log); + + return log->sizes[GUC_LOG_SECTIONS_DEBUG].bytes; +} + +u32 intel_guc_log_section_size_capture(struct intel_guc_log *log) +{ + guc_log_init_sizes(log); + + return log->sizes[GUC_LOG_SECTIONS_CAPTURE].bytes; +} + +static u32 intel_guc_log_size(struct intel_guc_log *log) +{ + /* + * GuC Log buffer Layout: + * + * NB: Ordering must follow "enum guc_log_buffer_type". + * + * +===============================+ 00B + * | Debug state header | + * +-------------------------------+ 32B + * | Crash dump state header | + * +-------------------------------+ 64B + * | Capture state header | + * +-------------------------------+ 96B + * | | + * +===============================+ PAGE_SIZE (4KB) + * | Debug logs | + * +===============================+ + DEBUG_SIZE + * | Crash Dump logs | + * +===============================+ + CRASH_SIZE + * | Capture logs | + * +===============================+ + CAPTURE_SIZE + */ + return PAGE_SIZE + + intel_guc_log_section_size_crash(log) + + intel_guc_log_section_size_debug(log) + + intel_guc_log_section_size_capture(log); +} + /** * DOC: GuC firmware log * @@ -139,7 +294,8 @@ static void guc_move_to_next_buf(struct intel_guc_log *log) smp_wmb(); /* All data has been written, so now move the offset of sub buffer. */ - relay_reserve(log->relay.channel, log->vma->obj->base.size - CAPTURE_BUFFER_SIZE); + relay_reserve(log->relay.channel, log->vma->obj->base.size - + intel_guc_log_section_size_capture(log)); /* Switch to the next sub buffer */ relay_flush(log->relay.channel); @@ -184,15 +340,16 @@ bool intel_guc_check_log_buf_overflow(struct intel_guc_log *log, return overflow; } -unsigned int intel_guc_get_log_buffer_size(enum guc_log_buffer_type type) +unsigned int intel_guc_get_log_buffer_size(struct intel_guc_log *log, + enum guc_log_buffer_type type) { switch (type) { case GUC_DEBUG_LOG_BUFFER: - return DEBUG_BUFFER_SIZE; + return intel_guc_log_section_size_debug(log); case GUC_CRASH_DUMP_LOG_BUFFER: - return CRASH_BUFFER_SIZE; + return intel_guc_log_section_size_crash(log); case GUC_CAPTURE_LOG_BUFFER: - return CAPTURE_BUFFER_SIZE; + return intel_guc_log_section_size_capture(log); default: MISSING_CASE(type); } @@ -200,7 +357,8 @@ unsigned int intel_guc_get_log_buffer_size(enum guc_log_buffer_type type) return 0; } -size_t intel_guc_get_log_buffer_offset(enum guc_log_buffer_type type) +size_t intel_guc_get_log_buffer_offset(struct intel_guc_log *log, + enum guc_log_buffer_type type) { enum guc_log_buffer_type i; size_t offset = PAGE_SIZE;/* for the log_buffer_states */ @@ -208,7 +366,7 @@ size_t intel_guc_get_log_buffer_offset(enum guc_log_buffer_type type) for (i = GUC_DEBUG_LOG_BUFFER; i < GUC_MAX_LOG_BUFFER; ++i) { if (i == type) break; - offset += intel_guc_get_log_buffer_size(i); + offset += intel_guc_get_log_buffer_size(log, i); } return offset; @@ -259,7 +417,7 @@ static void _guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log) */ memcpy(&log_buf_state_local, log_buf_state, sizeof(struct guc_log_buffer_state)); - buffer_size = intel_guc_get_log_buffer_size(type); + buffer_size = intel_guc_get_log_buffer_size(log, type); read_offset = log_buf_state_local.read_ptr; write_offset = log_buf_state_local.sampled_write_ptr; full_cnt = log_buf_state_local.buffer_full_cnt; @@ -374,7 +532,7 @@ static int guc_log_relay_create(struct intel_guc_log *log) * Keep the size of sub buffers same as shared log buffer * but GuC log-events excludes the error-state-capture logs */ - subbuf_size = log->vma->size - CAPTURE_BUFFER_SIZE; + subbuf_size = log->vma->size - intel_guc_log_section_size_capture(log); /* * Store up to 8 snapshots, which is large enough to buffer sufficient @@ -461,32 +619,7 @@ int intel_guc_log_create(struct intel_guc_log *log) GEM_BUG_ON(log->vma); - /* - * GuC Log buffer Layout - * (this ordering must follow "enum guc_log_buffer_type" definition) - * - * +===============================+ 00B - * | Debug state header | - * +-------------------------------+ 32B - * | Crash dump state header | - * +-------------------------------+ 64B - * | Capture state header | - * +-------------------------------+ 96B - * | | - * +===============================+ PAGE_SIZE (4KB) - * | Debug logs | - * +===============================+ + DEBUG_SIZE - * | Crash Dump logs | - * +===============================+ + CRASH_SIZE - * | Capture logs | - * +===============================+ + CAPTURE_SIZE - */ - if (intel_guc_capture_output_min_size_est(guc) > CAPTURE_BUFFER_SIZE) - DRM_WARN("GuC log buffer for state_capture maybe too small. %d < %d\n", - CAPTURE_BUFFER_SIZE, intel_guc_capture_output_min_size_est(guc)); - - guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE + - CAPTURE_BUFFER_SIZE; + guc_log_size = intel_guc_log_size(log); vma = intel_guc_allocate_vma(guc, guc_log_size); if (IS_ERR(vma)) { @@ -749,8 +882,9 @@ int intel_guc_log_dump(struct intel_guc_log *log, struct drm_printer *p, struct intel_guc *guc = log_to_guc(log); struct intel_uc *uc = container_of(guc, struct intel_uc, guc); struct drm_i915_gem_object *obj = NULL; - u32 *map; - int i = 0; + void *map; + u32 *page; + int i, j; if (!intel_guc_is_supported(guc)) return -ENODEV; @@ -763,21 +897,34 @@ int intel_guc_log_dump(struct intel_guc_log *log, struct drm_printer *p, if (!obj) return 0; + page = (u32 *)__get_free_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + intel_guc_dump_time_info(guc, p); + map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); if (IS_ERR(map)) { DRM_DEBUG("Failed to pin object\n"); drm_puts(p, "(log data unaccessible)\n"); + free_page((unsigned long)page); return PTR_ERR(map); } - for (i = 0; i < obj->base.size / sizeof(u32); i += 4) - drm_printf(p, "0x%08x 0x%08x 0x%08x 0x%08x\n", - *(map + i), *(map + i + 1), - *(map + i + 2), *(map + i + 3)); + for (i = 0; i < obj->base.size; i += PAGE_SIZE) { + if (!i915_memcpy_from_wc(page, map + i, PAGE_SIZE)) + memcpy(page, map + i, PAGE_SIZE); + + for (j = 0; j < PAGE_SIZE / sizeof(u32); j += 4) + drm_printf(p, "0x%08x 0x%08x 0x%08x 0x%08x\n", + *(page + j + 0), *(page + j + 1), + *(page + j + 2), *(page + j + 3)); + } drm_puts(p, "\n"); i915_gem_object_unpin_map(obj); + free_page((unsigned long)page); return 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h index 18007e639be9..02127703be80 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h @@ -15,20 +15,6 @@ struct intel_guc; -#if defined(CONFIG_DRM_I915_DEBUG_GUC) -#define CRASH_BUFFER_SIZE SZ_2M -#define DEBUG_BUFFER_SIZE SZ_16M -#define CAPTURE_BUFFER_SIZE SZ_4M -#elif defined(CONFIG_DRM_I915_DEBUG_GEM) -#define CRASH_BUFFER_SIZE SZ_1M -#define DEBUG_BUFFER_SIZE SZ_2M -#define CAPTURE_BUFFER_SIZE SZ_1M -#else -#define CRASH_BUFFER_SIZE SZ_8K -#define DEBUG_BUFFER_SIZE SZ_64K -#define CAPTURE_BUFFER_SIZE SZ_16K -#endif - /* * While we're using plain log level in i915, GuC controls are much more... * "elaborate"? We have a couple of bits for verbosity, separate bit for actual @@ -46,10 +32,30 @@ struct intel_guc; #define GUC_VERBOSITY_TO_LOG_LEVEL(x) ((x) + 2) #define GUC_LOG_LEVEL_MAX GUC_VERBOSITY_TO_LOG_LEVEL(GUC_LOG_VERBOSITY_MAX) +enum { + GUC_LOG_SECTIONS_CRASH, + GUC_LOG_SECTIONS_DEBUG, + GUC_LOG_SECTIONS_CAPTURE, + GUC_LOG_SECTIONS_LIMIT +}; + struct intel_guc_log { u32 level; + + /* Allocation settings */ + struct { + s32 bytes; /* Size in bytes */ + s32 units; /* GuC API units - 1MB or 4KB */ + s32 count; /* Number of API units */ + u32 flag; /* GuC API units flag */ + } sizes[GUC_LOG_SECTIONS_LIMIT]; + bool sizes_initialised; + + /* Combined buffer allocation */ struct i915_vma *vma; void *buf_addr; + + /* RelayFS support */ struct { bool buf_in_use; bool started; @@ -58,6 +64,7 @@ struct intel_guc_log { struct mutex lock; u32 full_count; } relay; + /* logging related stats */ struct { u32 sampled_overflow; @@ -69,8 +76,9 @@ struct intel_guc_log { void intel_guc_log_init_early(struct intel_guc_log *log); bool intel_guc_check_log_buf_overflow(struct intel_guc_log *log, enum guc_log_buffer_type type, unsigned int full_cnt); -unsigned int intel_guc_get_log_buffer_size(enum guc_log_buffer_type type); -size_t intel_guc_get_log_buffer_offset(enum guc_log_buffer_type type); +unsigned int intel_guc_get_log_buffer_size(struct intel_guc_log *log, + enum guc_log_buffer_type type); +size_t intel_guc_get_log_buffer_offset(struct intel_guc_log *log, enum guc_log_buffer_type type); int intel_guc_log_create(struct intel_guc_log *log); void intel_guc_log_destroy(struct intel_guc_log *log); @@ -92,4 +100,6 @@ void intel_guc_log_info(struct intel_guc_log *log, struct drm_printer *p); int intel_guc_log_dump(struct intel_guc_log *log, struct drm_printer *p, bool dump_load_err); +u32 intel_guc_log_section_size_capture(struct intel_guc_log *log); + #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h index 8dc063f087eb..a7092f711e9c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h @@ -102,6 +102,10 @@ #define GUC_SEND_TRIGGER (1<<0) #define GEN11_GUC_HOST_INTERRUPT _MMIO(0x1901f0) +#define GEN12_GUC_SEM_INTR_ENABLES _MMIO(0xc71c) +#define GUC_SEM_INTR_ROUTE_TO_GUC BIT(31) +#define GUC_SEM_INTR_ENABLE_ALL (0xff) + #define GUC_NUM_DOORBELLS 256 /* format of the HW-monitored doorbell cacheline */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index ec9c4ca0f615..fdd895f73f9f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -137,17 +137,6 @@ static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) return ret > 0 ? -EPROTO : ret; } -static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id) -{ - u32 request[] = { - GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, - SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1), - id, - }; - - return intel_guc_send(guc, request, ARRAY_SIZE(request)); -} - static bool slpc_is_running(struct intel_guc_slpc *slpc) { return slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING; @@ -201,16 +190,6 @@ static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value) return ret; } -static int slpc_unset_param(struct intel_guc_slpc *slpc, - u8 id) -{ - struct intel_guc *guc = slpc_to_guc(slpc); - - GEM_BUG_ON(id >= SLPC_MAX_PARAM); - - return guc_action_slpc_unset_param(guc, id); -} - static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) { struct drm_i915_private *i915 = slpc_to_i915(slpc); @@ -488,23 +467,33 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) /* Need a lock now since waitboost can be modifying min as well */ mutex_lock(&slpc->lock); - - with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - - ret = slpc_set_param(slpc, - SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, - val); - - /* Return standardized err code for sysfs calls */ - if (ret) - ret = -EIO; + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + /* Ignore efficient freq if lower min freq is requested */ + ret = slpc_set_param(slpc, + SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, + val < slpc->rp1_freq); + if (ret) { + i915_probe_error(i915, "Failed to toggle efficient freq (%pe)\n", + ERR_PTR(ret)); + goto out; } + ret = slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + val); + if (!ret) slpc->min_freq_softlimit = val; +out: + intel_runtime_pm_put(&i915->runtime_pm, wakeref); mutex_unlock(&slpc->lock); + /* Return standardized err code for sysfs calls */ + if (ret) + ret = -EIO; + return ret; } @@ -575,45 +564,28 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc) * unless they have deviated from defaults, in which case, * we retain the values and set min/max accordingly. */ - if (!slpc->max_freq_softlimit) + if (!slpc->max_freq_softlimit) { slpc->max_freq_softlimit = slpc->rp0_freq; - else if (slpc->max_freq_softlimit != slpc->rp0_freq) + slpc_to_gt(slpc)->defaults.max_freq = slpc->max_freq_softlimit; + } else if (slpc->max_freq_softlimit != slpc->rp0_freq) { ret = intel_guc_slpc_set_max_freq(slpc, slpc->max_freq_softlimit); + } if (unlikely(ret)) return ret; - if (!slpc->min_freq_softlimit) - slpc->min_freq_softlimit = slpc->min_freq; - else if (slpc->min_freq_softlimit != slpc->min_freq) + if (!slpc->min_freq_softlimit) { + ret = intel_guc_slpc_get_min_freq(slpc, &slpc->min_freq_softlimit); + if (unlikely(ret)) + return ret; + slpc_to_gt(slpc)->defaults.min_freq = slpc->min_freq_softlimit; + } else if (slpc->min_freq_softlimit != slpc->min_freq) { return intel_guc_slpc_set_min_freq(slpc, slpc->min_freq_softlimit); - - return 0; -} - -static int slpc_ignore_eff_freq(struct intel_guc_slpc *slpc, bool ignore) -{ - int ret = 0; - - if (ignore) { - ret = slpc_set_param(slpc, - SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, - ignore); - if (!ret) - return slpc_set_param(slpc, - SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, - slpc->min_freq); - } else { - ret = slpc_unset_param(slpc, - SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY); - if (!ret) - return slpc_unset_param(slpc, - SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ); } - return ret; + return 0; } static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc) @@ -675,14 +647,6 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) slpc_get_rp_values(slpc); - /* Ignore efficient freq and set min to platform min */ - ret = slpc_ignore_eff_freq(slpc, true); - if (unlikely(ret)) { - i915_probe_error(i915, "Failed to set SLPC min to RPn (%pe)\n", - ERR_PTR(ret)); - return ret; - } - /* Set SLPC max limit to RP0 */ ret = slpc_use_fused_rp0(slpc); if (unlikely(ret)) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 3e91f44829e9..1db59eeb34db 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -684,7 +684,7 @@ static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) * Corner case where requests were sitting in the priority list or a * request resubmitted after the context was banned. */ - if (unlikely(intel_context_is_banned(ce))) { + if (unlikely(!intel_context_is_schedulable(ce))) { i915_request_put(i915_request_mark_eio(rq)); intel_engine_signal_breadcrumbs(ce->engine); return 0; @@ -870,15 +870,15 @@ static int guc_wq_item_append(struct intel_guc *guc, struct i915_request *rq) { struct intel_context *ce = request_to_scheduling_context(rq); - int ret = 0; + int ret; - if (likely(!intel_context_is_banned(ce))) { - ret = __guc_wq_item_append(rq); + if (unlikely(!intel_context_is_schedulable(ce))) + return 0; - if (unlikely(ret == -EBUSY)) { - guc->stalled_request = rq; - guc->submission_stall_reason = STALL_MOVE_LRC_TAIL; - } + ret = __guc_wq_item_append(rq); + if (unlikely(ret == -EBUSY)) { + guc->stalled_request = rq; + guc->submission_stall_reason = STALL_MOVE_LRC_TAIL; } return ret; @@ -897,7 +897,7 @@ static bool multi_lrc_submit(struct i915_request *rq) * submitting all the requests generated in parallel. */ return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) || - intel_context_is_banned(ce); + !intel_context_is_schedulable(ce); } static int guc_dequeue_one_context(struct intel_guc *guc) @@ -966,7 +966,7 @@ register_context: struct intel_context *ce = request_to_scheduling_context(last); if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) && - !intel_context_is_banned(ce))) { + intel_context_is_schedulable(ce))) { ret = try_context_registration(ce, false); if (unlikely(ret == -EPIPE)) { goto deadlk; @@ -1537,8 +1537,8 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc) __reset_guc_busyness_stats(guc); /* Flush IRQ handler */ - spin_lock_irq(&guc_to_gt(guc)->irq_lock); - spin_unlock_irq(&guc_to_gt(guc)->irq_lock); + spin_lock_irq(guc_to_gt(guc)->irq_lock); + spin_unlock_irq(guc_to_gt(guc)->irq_lock); guc_flush_submissions(guc); guc_flush_destroyed_contexts(guc); @@ -1576,7 +1576,7 @@ static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) { struct intel_engine_cs *engine = __context_to_physical_engine(ce); - if (intel_context_is_banned(ce)) + if (!intel_context_is_schedulable(ce)) return; GEM_BUG_ON(!intel_context_is_pinned(ce)); @@ -1873,7 +1873,7 @@ int intel_guc_submission_init(struct intel_guc *guc) if (guc->submission_initialized) return 0; - if (guc->fw.major_ver_found < 70) { + if (GET_UC_VER(guc) < MAKE_UC_VER(70, 0, 0)) { ret = guc_lrc_desc_pool_create_v69(guc); if (ret) return ret; @@ -2308,7 +2308,7 @@ static int register_context(struct intel_context *ce, bool loop) GEM_BUG_ON(intel_context_is_child(ce)); trace_intel_context_register(ce); - if (guc->fw.major_ver_found >= 70) + if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) ret = register_context_v70(guc, ce, loop); else ret = register_context_v69(guc, ce, loop); @@ -2320,7 +2320,7 @@ static int register_context(struct intel_context *ce, bool loop) set_context_registered(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); - if (guc->fw.major_ver_found >= 70) + if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) guc_context_policy_init_v70(ce, loop); } @@ -2425,7 +2425,6 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) struct context_policy policy; u32 execution_quantum; u32 preemption_timeout; - bool missing = false; unsigned long flags; int ret; @@ -2443,32 +2442,9 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) __guc_context_policy_add_preempt_to_idle(&policy, 1); ret = __guc_context_set_context_policies(guc, &policy, loop); - missing = ret != 0; - - if (!missing && intel_context_is_parent(ce)) { - struct intel_context *child; - - for_each_child(ce, child) { - __guc_context_policy_start_klv(&policy, child->guc_id.id); - - if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) - __guc_context_policy_add_preempt_to_idle(&policy, 1); - - child->guc_state.prio = ce->guc_state.prio; - __guc_context_policy_add_priority(&policy, ce->guc_state.prio); - __guc_context_policy_add_execution_quantum(&policy, execution_quantum); - __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); - - ret = __guc_context_set_context_policies(guc, &policy, loop); - if (ret) { - missing = true; - break; - } - } - } spin_lock_irqsave(&ce->guc_state.lock, flags); - if (missing) + if (ret != 0) set_context_policy_required(ce); else clr_context_policy_required(ce); @@ -2950,7 +2926,7 @@ static void __guc_context_set_preemption_timeout(struct intel_guc *guc, u16 guc_id, u32 preemption_timeout) { - if (guc->fw.major_ver_found >= 70) { + if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) { struct context_policy policy; __guc_context_policy_start_klv(&policy, guc_id); @@ -3215,7 +3191,7 @@ static int guc_context_alloc(struct intel_context *ce) static void __guc_context_set_prio(struct intel_guc *guc, struct intel_context *ce) { - if (guc->fw.major_ver_found >= 70) { + if (GET_UC_VER(guc) >= MAKE_UC_VER(70, 0, 0)) { struct context_policy policy; __guc_context_policy_start_klv(&policy, ce->guc_id.id); @@ -4203,13 +4179,27 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) void intel_guc_submission_enable(struct intel_guc *guc) { + struct intel_gt *gt = guc_to_gt(guc); + + /* Enable and route to GuC */ + if (GRAPHICS_VER(gt->i915) >= 12) + intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, + GUC_SEM_INTR_ROUTE_TO_GUC | + GUC_SEM_INTR_ENABLE_ALL); + guc_init_lrc_mapping(guc); guc_init_engine_stats(guc); } void intel_guc_submission_disable(struct intel_guc *guc) { + struct intel_gt *gt = guc_to_gt(guc); + /* Note: By the time we're here, GuC may have already been reset */ + + /* Disable and route to host */ + if (GRAPHICS_VER(gt->i915) >= 12) + intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, 0x0); } static bool __guc_submission_supported(struct intel_guc *guc) @@ -4434,12 +4424,12 @@ static void guc_handle_context_reset(struct intel_guc *guc, { trace_intel_context_reset(ce); - if (likely(!intel_context_is_banned(ce))) { + if (likely(intel_context_is_schedulable(ce))) { capture_error_state(guc, ce); guc_context_replay(ce); } else { drm_info(&guc_to_gt(guc)->i915->drm, - "Ignoring context reset notification of banned context 0x%04X on %s", + "Ignoring context reset notification of exiting context 0x%04X on %s", ce->guc_id.id, ce->engine->name); } } @@ -5175,4 +5165,5 @@ bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_guc.c" #include "selftest_guc_multi_lrc.c" +#include "selftest_guc_hangcheck.c" #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index f2e7c82985ef..dbd048b77e19 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -245,9 +245,9 @@ static int guc_enable_communication(struct intel_guc *guc) intel_guc_enable_interrupts(guc); /* check for CT messages received before we enabled interrupts */ - spin_lock_irq(>->irq_lock); + spin_lock_irq(gt->irq_lock); intel_guc_ct_event_handler(&guc->ct); - spin_unlock_irq(>->irq_lock); + spin_unlock_irq(gt->irq_lock); drm_dbg(&i915->drm, "GuC communication enabled\n"); @@ -435,9 +435,11 @@ static void print_fw_ver(struct intel_uc *uc, struct intel_uc_fw *fw) { struct drm_i915_private *i915 = uc_to_gt(uc)->i915; - drm_info(&i915->drm, "%s firmware %s version %u.%u\n", - intel_uc_fw_type_repr(fw->type), fw->path, - fw->major_ver_found, fw->minor_ver_found); + drm_info(&i915->drm, "%s firmware %s version %u.%u.%u\n", + intel_uc_fw_type_repr(fw->type), fw->file_selected.path, + fw->file_selected.major_ver, + fw->file_selected.minor_ver, + fw->file_selected.patch_ver); } static int __uc_init_hw(struct intel_uc *uc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 56a0d80f88ba..b91ad4aede1f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -41,7 +41,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, "%s firmware -> %s\n", intel_uc_fw_type_repr(uc_fw->type), status == INTEL_UC_FIRMWARE_SELECTED ? - uc_fw->path : intel_uc_fw_status_repr(status)); + uc_fw->file_selected.path : intel_uc_fw_status_repr(status)); } #endif @@ -51,84 +51,153 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * * Note that RKL and ADL-S have the same GuC/HuC device ID's and use the same * firmware as TGL. + * + * Version numbers: + * Originally, the driver required an exact match major/minor/patch furmware + * file and only supported that one version for any given platform. However, + * the new direction from upstream is to be backwards compatible with all + * prior releases and to be as flexible as possible as to what firmware is + * loaded. + * + * For GuC, the major version number signifies a backwards breaking API change. + * So, new format GuC firmware files are labelled by their major version only. + * For HuC, there is no KMD interaction, hence no version matching requirement. + * So, new format HuC firmware files have no version number at all. + * + * All of which means that the table below must keep all old format files with + * full three point version number. But newer files have reduced requirements. + * Having said that, the driver still needs to track the minor version number + * for GuC at least. As it is useful to report to the user that they are not + * running with a recent enough version for all KMD supported features, + * security fixes, etc. to be enabled. + */ +#define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_maj, guc_mmp) \ + fw_def(DG2, 0, guc_maj(dg2, 70, 5)) \ + fw_def(ALDERLAKE_P, 0, guc_maj(adlp, 70, 5)) \ + fw_def(ALDERLAKE_P, 0, guc_mmp(adlp, 70, 1, 1)) \ + fw_def(ALDERLAKE_P, 0, guc_mmp(adlp, 69, 0, 3)) \ + fw_def(ALDERLAKE_S, 0, guc_maj(tgl, 70, 5)) \ + fw_def(ALDERLAKE_S, 0, guc_mmp(tgl, 70, 1, 1)) \ + fw_def(ALDERLAKE_S, 0, guc_mmp(tgl, 69, 0, 3)) \ + fw_def(DG1, 0, guc_maj(dg1, 70, 5)) \ + fw_def(ROCKETLAKE, 0, guc_mmp(tgl, 70, 1, 1)) \ + fw_def(TIGERLAKE, 0, guc_mmp(tgl, 70, 1, 1)) \ + fw_def(JASPERLAKE, 0, guc_mmp(ehl, 70, 1, 1)) \ + fw_def(ELKHARTLAKE, 0, guc_mmp(ehl, 70, 1, 1)) \ + fw_def(ICELAKE, 0, guc_mmp(icl, 70, 1, 1)) \ + fw_def(COMETLAKE, 5, guc_mmp(cml, 70, 1, 1)) \ + fw_def(COMETLAKE, 0, guc_mmp(kbl, 70, 1, 1)) \ + fw_def(COFFEELAKE, 0, guc_mmp(kbl, 70, 1, 1)) \ + fw_def(GEMINILAKE, 0, guc_mmp(glk, 70, 1, 1)) \ + fw_def(KABYLAKE, 0, guc_mmp(kbl, 70, 1, 1)) \ + fw_def(BROXTON, 0, guc_mmp(bxt, 70, 1, 1)) \ + fw_def(SKYLAKE, 0, guc_mmp(skl, 70, 1, 1)) + +#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_raw, huc_mmp) \ + fw_def(ALDERLAKE_P, 0, huc_raw(tgl)) \ + fw_def(ALDERLAKE_P, 0, huc_mmp(tgl, 7, 9, 3)) \ + fw_def(ALDERLAKE_S, 0, huc_raw(tgl)) \ + fw_def(ALDERLAKE_S, 0, huc_mmp(tgl, 7, 9, 3)) \ + fw_def(DG1, 0, huc_raw(dg1)) \ + fw_def(ROCKETLAKE, 0, huc_mmp(tgl, 7, 9, 3)) \ + fw_def(TIGERLAKE, 0, huc_mmp(tgl, 7, 9, 3)) \ + fw_def(JASPERLAKE, 0, huc_mmp(ehl, 9, 0, 0)) \ + fw_def(ELKHARTLAKE, 0, huc_mmp(ehl, 9, 0, 0)) \ + fw_def(ICELAKE, 0, huc_mmp(icl, 9, 0, 0)) \ + fw_def(COMETLAKE, 5, huc_mmp(cml, 4, 0, 0)) \ + fw_def(COMETLAKE, 0, huc_mmp(kbl, 4, 0, 0)) \ + fw_def(COFFEELAKE, 0, huc_mmp(kbl, 4, 0, 0)) \ + fw_def(GEMINILAKE, 0, huc_mmp(glk, 4, 0, 0)) \ + fw_def(KABYLAKE, 0, huc_mmp(kbl, 4, 0, 0)) \ + fw_def(BROXTON, 0, huc_mmp(bxt, 2, 0, 0)) \ + fw_def(SKYLAKE, 0, huc_mmp(skl, 2, 0, 0)) + +/* + * Set of macros for producing a list of filenames from the above table. */ -#define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \ - fw_def(DG2, 0, guc_def(dg2, 70, 1, 2)) \ - fw_def(ALDERLAKE_P, 0, guc_def(adlp, 70, 1, 1)) \ - fw_def(ALDERLAKE_S, 0, guc_def(tgl, 70, 1, 1)) \ - fw_def(DG1, 0, guc_def(dg1, 70, 1, 1)) \ - fw_def(ROCKETLAKE, 0, guc_def(tgl, 70, 1, 1)) \ - fw_def(TIGERLAKE, 0, guc_def(tgl, 70, 1, 1)) \ - fw_def(JASPERLAKE, 0, guc_def(ehl, 70, 1, 1)) \ - fw_def(ELKHARTLAKE, 0, guc_def(ehl, 70, 1, 1)) \ - fw_def(ICELAKE, 0, guc_def(icl, 70, 1, 1)) \ - fw_def(COMETLAKE, 5, guc_def(cml, 70, 1, 1)) \ - fw_def(COMETLAKE, 0, guc_def(kbl, 70, 1, 1)) \ - fw_def(COFFEELAKE, 0, guc_def(kbl, 70, 1, 1)) \ - fw_def(GEMINILAKE, 0, guc_def(glk, 70, 1, 1)) \ - fw_def(KABYLAKE, 0, guc_def(kbl, 70, 1, 1)) \ - fw_def(BROXTON, 0, guc_def(bxt, 70, 1, 1)) \ - fw_def(SKYLAKE, 0, guc_def(skl, 70, 1, 1)) - -#define INTEL_GUC_FIRMWARE_DEFS_FALLBACK(fw_def, guc_def) \ - fw_def(ALDERLAKE_P, 0, guc_def(adlp, 69, 0, 3)) \ - fw_def(ALDERLAKE_S, 0, guc_def(tgl, 69, 0, 3)) - -#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_def) \ - fw_def(ALDERLAKE_P, 0, huc_def(tgl, 7, 9, 3)) \ - fw_def(ALDERLAKE_S, 0, huc_def(tgl, 7, 9, 3)) \ - fw_def(DG1, 0, huc_def(dg1, 7, 9, 3)) \ - fw_def(ROCKETLAKE, 0, huc_def(tgl, 7, 9, 3)) \ - fw_def(TIGERLAKE, 0, huc_def(tgl, 7, 9, 3)) \ - fw_def(JASPERLAKE, 0, huc_def(ehl, 9, 0, 0)) \ - fw_def(ELKHARTLAKE, 0, huc_def(ehl, 9, 0, 0)) \ - fw_def(ICELAKE, 0, huc_def(icl, 9, 0, 0)) \ - fw_def(COMETLAKE, 5, huc_def(cml, 4, 0, 0)) \ - fw_def(COMETLAKE, 0, huc_def(kbl, 4, 0, 0)) \ - fw_def(COFFEELAKE, 0, huc_def(kbl, 4, 0, 0)) \ - fw_def(GEMINILAKE, 0, huc_def(glk, 4, 0, 0)) \ - fw_def(KABYLAKE, 0, huc_def(kbl, 4, 0, 0)) \ - fw_def(BROXTON, 0, huc_def(bxt, 2, 0, 0)) \ - fw_def(SKYLAKE, 0, huc_def(skl, 2, 0, 0)) - -#define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \ +#define __MAKE_UC_FW_PATH_BLANK(prefix_, name_) \ + "i915/" \ + __stringify(prefix_) name_ ".bin" + +#define __MAKE_UC_FW_PATH_MAJOR(prefix_, name_, major_) \ + "i915/" \ + __stringify(prefix_) name_ \ + __stringify(major_) ".bin" + +#define __MAKE_UC_FW_PATH_MMP(prefix_, name_, major_, minor_, patch_) \ "i915/" \ __stringify(prefix_) name_ \ __stringify(major_) "." \ __stringify(minor_) "." \ __stringify(patch_) ".bin" -#define MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_) \ - __MAKE_UC_FW_PATH(prefix_, "_guc_", major_, minor_, patch_) +/* Minor for internal driver use, not part of file name */ +#define MAKE_GUC_FW_PATH_MAJOR(prefix_, major_, minor_) \ + __MAKE_UC_FW_PATH_MAJOR(prefix_, "_guc_", major_) + +#define MAKE_GUC_FW_PATH_MMP(prefix_, major_, minor_, patch_) \ + __MAKE_UC_FW_PATH_MMP(prefix_, "_guc_", major_, minor_, patch_) -#define MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_) \ - __MAKE_UC_FW_PATH(prefix_, "_huc_", major_, minor_, bld_num_) +#define MAKE_HUC_FW_PATH_BLANK(prefix_) \ + __MAKE_UC_FW_PATH_BLANK(prefix_, "_huc") -/* All blobs need to be declared via MODULE_FIRMWARE() */ +#define MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_) \ + __MAKE_UC_FW_PATH_MMP(prefix_, "_huc_", major_, minor_, patch_) + +/* + * All blobs need to be declared via MODULE_FIRMWARE(). + * This first expansion of the table macros is solely to provide + * that declaration. + */ #define INTEL_UC_MODULE_FW(platform_, revid_, uc_) \ MODULE_FIRMWARE(uc_); -INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH) -INTEL_GUC_FIRMWARE_DEFS_FALLBACK(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH) -INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH) +INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH_MAJOR, MAKE_GUC_FW_PATH_MMP) +INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH_BLANK, MAKE_HUC_FW_PATH_MMP) -/* The below structs and macros are used to iterate across the list of blobs */ +/* + * The next expansion of the table macros (in __uc_fw_auto_select below) provides + * actual data structures with both the filename and the version information. + * These structure arrays are then iterated over to the list of suitable files + * for the current platform and to then attempt to load those files, in the order + * listed, until one is successfully found. + */ struct __packed uc_fw_blob { + const char *path; + bool legacy; u8 major; u8 minor; - const char *path; + u8 patch; }; -#define UC_FW_BLOB(major_, minor_, path_) \ - { .major = major_, .minor = minor_, .path = path_ } +#define UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \ + .major = major_, \ + .minor = minor_, \ + .patch = patch_, \ + .path = path_, + +#define UC_FW_BLOB_NEW(major_, minor_, patch_, path_) \ + { UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \ + .legacy = false } -#define GUC_FW_BLOB(prefix_, major_, minor_, patch_) \ - UC_FW_BLOB(major_, minor_, \ - MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_)) +#define UC_FW_BLOB_OLD(major_, minor_, patch_, path_) \ + { UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \ + .legacy = true } -#define HUC_FW_BLOB(prefix_, major_, minor_, bld_num_) \ - UC_FW_BLOB(major_, minor_, \ - MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_)) +#define GUC_FW_BLOB(prefix_, major_, minor_) \ + UC_FW_BLOB_NEW(major_, minor_, 0, \ + MAKE_GUC_FW_PATH_MAJOR(prefix_, major_, minor_)) + +#define GUC_FW_BLOB_MMP(prefix_, major_, minor_, patch_) \ + UC_FW_BLOB_OLD(major_, minor_, patch_, \ + MAKE_GUC_FW_PATH_MMP(prefix_, major_, minor_, patch_)) + +#define HUC_FW_BLOB(prefix_) \ + UC_FW_BLOB_NEW(0, 0, 0, MAKE_HUC_FW_PATH_BLANK(prefix_)) + +#define HUC_FW_BLOB_MMP(prefix_, major_, minor_, patch_) \ + UC_FW_BLOB_OLD(major_, minor_, patch_, \ + MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_)) struct __packed uc_fw_platform_requirement { enum intel_platform p; @@ -152,23 +221,22 @@ static void __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) { static const struct uc_fw_platform_requirement blobs_guc[] = { - INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB) - }; - static const struct uc_fw_platform_requirement blobs_guc_fallback[] = { - INTEL_GUC_FIRMWARE_DEFS_FALLBACK(MAKE_FW_LIST, GUC_FW_BLOB) + INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, GUC_FW_BLOB_MMP) }; static const struct uc_fw_platform_requirement blobs_huc[] = { - INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB) + INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_BLOB_MMP) }; static const struct fw_blobs_by_type blobs_all[INTEL_UC_FW_NUM_TYPES] = { [INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) }, [INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) }, }; + static bool verified; const struct uc_fw_platform_requirement *fw_blobs; enum intel_platform p = INTEL_INFO(i915)->platform; u32 fw_count; u8 rev = INTEL_REVID(i915); int i; + bool found; /* * The only difference between the ADL GuC FWs is the HWConfig support. @@ -183,50 +251,102 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) fw_blobs = blobs_all[uc_fw->type].blobs; fw_count = blobs_all[uc_fw->type].count; + found = false; for (i = 0; i < fw_count && p <= fw_blobs[i].p; i++) { - if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) { - const struct uc_fw_blob *blob = &fw_blobs[i].blob; - uc_fw->path = blob->path; - uc_fw->wanted_path = blob->path; - uc_fw->major_ver_wanted = blob->major; - uc_fw->minor_ver_wanted = blob->minor; - break; - } - } + const struct uc_fw_blob *blob = &fw_blobs[i].blob; - if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) { - const struct uc_fw_platform_requirement *blobs = blobs_guc_fallback; - u32 count = ARRAY_SIZE(blobs_guc_fallback); + if (p != fw_blobs[i].p) + continue; - for (i = 0; i < count && p <= blobs[i].p; i++) { - if (p == blobs[i].p && rev >= blobs[i].rev) { - const struct uc_fw_blob *blob = &blobs[i].blob; + if (rev < fw_blobs[i].rev) + continue; - uc_fw->fallback.path = blob->path; - uc_fw->fallback.major_ver = blob->major; - uc_fw->fallback.minor_ver = blob->minor; - break; - } + if (uc_fw->file_selected.path) { + if (uc_fw->file_selected.path == blob->path) + uc_fw->file_selected.path = NULL; + + continue; } + + uc_fw->file_selected.path = blob->path; + uc_fw->file_wanted.path = blob->path; + uc_fw->file_wanted.major_ver = blob->major; + uc_fw->file_wanted.minor_ver = blob->minor; + found = true; + break; + } + + if (!found && uc_fw->file_selected.path) { + /* Failed to find a match for the last attempt?! */ + uc_fw->file_selected.path = NULL; } /* make sure the list is ordered as expected */ - if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST)) { + if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST) && !verified) { + verified = true; + for (i = 1; i < fw_count; i++) { + /* Next platform is good: */ if (fw_blobs[i].p < fw_blobs[i - 1].p) continue; + /* Next platform revision is good: */ if (fw_blobs[i].p == fw_blobs[i - 1].p && fw_blobs[i].rev < fw_blobs[i - 1].rev) continue; - pr_err("invalid FW blob order: %s r%u comes before %s r%u\n", - intel_platform_name(fw_blobs[i - 1].p), - fw_blobs[i - 1].rev, - intel_platform_name(fw_blobs[i].p), - fw_blobs[i].rev); + /* Platform/revision must be in order: */ + if (fw_blobs[i].p != fw_blobs[i - 1].p || + fw_blobs[i].rev != fw_blobs[i - 1].rev) + goto bad; + + /* Next major version is good: */ + if (fw_blobs[i].blob.major < fw_blobs[i - 1].blob.major) + continue; + + /* New must be before legacy: */ + if (!fw_blobs[i].blob.legacy && fw_blobs[i - 1].blob.legacy) + goto bad; + + /* New to legacy also means 0.0 to X.Y (HuC), or X.0 to X.Y (GuC) */ + if (fw_blobs[i].blob.legacy && !fw_blobs[i - 1].blob.legacy) { + if (!fw_blobs[i - 1].blob.major) + continue; + + if (fw_blobs[i].blob.major == fw_blobs[i - 1].blob.major) + continue; + } + + /* Major versions must be in order: */ + if (fw_blobs[i].blob.major != fw_blobs[i - 1].blob.major) + goto bad; + + /* Next minor version is good: */ + if (fw_blobs[i].blob.minor < fw_blobs[i - 1].blob.minor) + continue; + + /* Minor versions must be in order: */ + if (fw_blobs[i].blob.minor != fw_blobs[i - 1].blob.minor) + goto bad; + + /* Patch versions must be in order: */ + if (fw_blobs[i].blob.patch <= fw_blobs[i - 1].blob.patch) + continue; - uc_fw->path = NULL; +bad: + drm_err(&i915->drm, "Invalid FW blob order: %s r%u %s%d.%d.%d comes before %s r%u %s%d.%d.%d\n", + intel_platform_name(fw_blobs[i - 1].p), fw_blobs[i - 1].rev, + fw_blobs[i - 1].blob.legacy ? "L" : "v", + fw_blobs[i - 1].blob.major, + fw_blobs[i - 1].blob.minor, + fw_blobs[i - 1].blob.patch, + intel_platform_name(fw_blobs[i].p), fw_blobs[i].rev, + fw_blobs[i].blob.legacy ? "L" : "v", + fw_blobs[i].blob.major, + fw_blobs[i].blob.minor, + fw_blobs[i].blob.patch); + + uc_fw->file_selected.path = NULL; } } } @@ -259,7 +379,7 @@ static void __uc_fw_user_override(struct drm_i915_private *i915, struct intel_uc } if (unlikely(path)) { - uc_fw->path = path; + uc_fw->file_selected.path = path; uc_fw->user_overridden = true; } } @@ -283,7 +403,7 @@ void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, */ BUILD_BUG_ON(INTEL_UC_FIRMWARE_UNINITIALIZED); GEM_BUG_ON(uc_fw->status); - GEM_BUG_ON(uc_fw->path); + GEM_BUG_ON(uc_fw->file_selected.path); uc_fw->type = type; @@ -292,7 +412,7 @@ void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw, __uc_fw_user_override(i915, uc_fw); } - intel_uc_fw_change_status(uc_fw, uc_fw->path ? *uc_fw->path ? + intel_uc_fw_change_status(uc_fw, uc_fw->file_selected.path ? *uc_fw->file_selected.path ? INTEL_UC_FIRMWARE_SELECTED : INTEL_UC_FIRMWARE_DISABLED : INTEL_UC_FIRMWARE_NOT_SUPPORTED); @@ -305,32 +425,32 @@ static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw, int e) if (i915_inject_probe_error(i915, e)) { /* non-existing blob */ - uc_fw->path = "<invalid>"; + uc_fw->file_selected.path = "<invalid>"; uc_fw->user_overridden = user; } else if (i915_inject_probe_error(i915, e)) { /* require next major version */ - uc_fw->major_ver_wanted += 1; - uc_fw->minor_ver_wanted = 0; + uc_fw->file_wanted.major_ver += 1; + uc_fw->file_wanted.minor_ver = 0; uc_fw->user_overridden = user; } else if (i915_inject_probe_error(i915, e)) { /* require next minor version */ - uc_fw->minor_ver_wanted += 1; + uc_fw->file_wanted.minor_ver += 1; uc_fw->user_overridden = user; - } else if (uc_fw->major_ver_wanted && + } else if (uc_fw->file_wanted.major_ver && i915_inject_probe_error(i915, e)) { /* require prev major version */ - uc_fw->major_ver_wanted -= 1; - uc_fw->minor_ver_wanted = 0; + uc_fw->file_wanted.major_ver -= 1; + uc_fw->file_wanted.minor_ver = 0; uc_fw->user_overridden = user; - } else if (uc_fw->minor_ver_wanted && + } else if (uc_fw->file_wanted.minor_ver && i915_inject_probe_error(i915, e)) { /* require prev minor version - hey, this should work! */ - uc_fw->minor_ver_wanted -= 1; + uc_fw->file_wanted.minor_ver -= 1; uc_fw->user_overridden = user; } else if (user && i915_inject_probe_error(i915, e)) { /* officially unsupported platform */ - uc_fw->major_ver_wanted = 0; - uc_fw->minor_ver_wanted = 0; + uc_fw->file_wanted.major_ver = 0; + uc_fw->file_wanted.minor_ver = 0; uc_fw->user_overridden = true; } } @@ -339,10 +459,12 @@ static int check_gsc_manifest(const struct firmware *fw, struct intel_uc_fw *uc_fw) { u32 *dw = (u32 *)fw->data; - u32 version = dw[HUC_GSC_VERSION_DW]; + u32 version_hi = dw[HUC_GSC_VERSION_HI_DW]; + u32 version_lo = dw[HUC_GSC_VERSION_LO_DW]; - uc_fw->major_ver_found = FIELD_GET(HUC_GSC_MAJOR_VER_MASK, version); - uc_fw->minor_ver_found = FIELD_GET(HUC_GSC_MINOR_VER_MASK, version); + uc_fw->file_selected.major_ver = FIELD_GET(HUC_GSC_MAJOR_VER_HI_MASK, version_hi); + uc_fw->file_selected.minor_ver = FIELD_GET(HUC_GSC_MINOR_VER_HI_MASK, version_hi); + uc_fw->file_selected.patch_ver = FIELD_GET(HUC_GSC_PATCH_VER_LO_MASK, version_lo); return 0; } @@ -357,7 +479,7 @@ static int check_ccs_header(struct drm_i915_private *i915, /* Check the size of the blob before examining buffer contents */ if (unlikely(fw->size < sizeof(struct uc_css_header))) { drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu < %zu\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, + intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, fw->size, sizeof(struct uc_css_header)); return -ENODATA; } @@ -370,7 +492,7 @@ static int check_ccs_header(struct drm_i915_private *i915, if (unlikely(size != sizeof(struct uc_css_header))) { drm_warn(&i915->drm, "%s firmware %s: unexpected header size: %zu != %zu\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, + intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, fw->size, sizeof(struct uc_css_header)); return -EPROTO; } @@ -385,7 +507,7 @@ static int check_ccs_header(struct drm_i915_private *i915, size = sizeof(struct uc_css_header) + uc_fw->ucode_size + uc_fw->rsa_size; if (unlikely(fw->size < size)) { drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu < %zu\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, + intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, fw->size, size); return -ENOEXEC; } @@ -394,16 +516,18 @@ static int check_ccs_header(struct drm_i915_private *i915, size = __intel_uc_fw_get_upload_size(uc_fw); if (unlikely(size >= i915->wopcm.size)) { drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu > %zu\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, + intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, size, (size_t)i915->wopcm.size); return -E2BIG; } /* Get version numbers from the CSS header */ - uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, - css->sw_version); - uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR, - css->sw_version); + uc_fw->file_selected.major_ver = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, + css->sw_version); + uc_fw->file_selected.minor_ver = FIELD_GET(CSS_SW_VERSION_UC_MINOR, + css->sw_version); + uc_fw->file_selected.patch_ver = FIELD_GET(CSS_SW_VERSION_UC_PATCH, + css->sw_version); if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) uc_fw->private_data_size = css->private_data_size; @@ -422,9 +546,11 @@ static int check_ccs_header(struct drm_i915_private *i915, int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) { struct drm_i915_private *i915 = __uc_fw_to_gt(uc_fw)->i915; + struct intel_uc_fw_file file_ideal; struct device *dev = i915->drm.dev; struct drm_i915_gem_object *obj; const struct firmware *fw = NULL; + bool old_ver = false; int err; GEM_BUG_ON(!i915->wopcm.size); @@ -437,24 +563,32 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) __force_fw_fetch_failures(uc_fw, -EINVAL); __force_fw_fetch_failures(uc_fw, -ESTALE); - err = firmware_request_nowarn(&fw, uc_fw->path, dev); - if (err && !intel_uc_fw_is_overridden(uc_fw) && uc_fw->fallback.path) { - err = firmware_request_nowarn(&fw, uc_fw->fallback.path, dev); - if (!err) { - drm_notice(&i915->drm, - "%s firmware %s is recommended, but only %s was found\n", - intel_uc_fw_type_repr(uc_fw->type), - uc_fw->wanted_path, - uc_fw->fallback.path); - drm_info(&i915->drm, - "Consider updating your linux-firmware pkg or downloading from %s\n", - INTEL_UC_FIRMWARE_URL); - - uc_fw->path = uc_fw->fallback.path; - uc_fw->major_ver_wanted = uc_fw->fallback.major_ver; - uc_fw->minor_ver_wanted = uc_fw->fallback.minor_ver; + err = firmware_request_nowarn(&fw, uc_fw->file_selected.path, dev); + memcpy(&file_ideal, &uc_fw->file_wanted, sizeof(file_ideal)); + + /* Any error is terminal if overriding. Don't bother searching for older versions */ + if (err && intel_uc_fw_is_overridden(uc_fw)) + goto fail; + + while (err == -ENOENT) { + old_ver = true; + + __uc_fw_auto_select(i915, uc_fw); + if (!uc_fw->file_selected.path) { + /* + * No more options! But set the path back to something + * valid just in case it gets dereferenced. + */ + uc_fw->file_selected.path = file_ideal.path; + + /* Also, preserve the version that was really wanted */ + memcpy(&uc_fw->file_wanted, &file_ideal, sizeof(uc_fw->file_wanted)); + break; } + + err = firmware_request_nowarn(&fw, uc_fw->file_selected.path, dev); } + if (err) goto fail; @@ -465,18 +599,39 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) if (err) goto fail; - if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || - uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { - drm_notice(&i915->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, - uc_fw->major_ver_found, uc_fw->minor_ver_found, - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); - if (!intel_uc_fw_is_overridden(uc_fw)) { - err = -ENOEXEC; - goto fail; + if (uc_fw->file_wanted.major_ver) { + /* Check the file's major version was as it claimed */ + if (uc_fw->file_selected.major_ver != uc_fw->file_wanted.major_ver) { + drm_notice(&i915->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, + uc_fw->file_selected.major_ver, uc_fw->file_selected.minor_ver, + uc_fw->file_wanted.major_ver, uc_fw->file_wanted.minor_ver); + if (!intel_uc_fw_is_overridden(uc_fw)) { + err = -ENOEXEC; + goto fail; + } + } else { + if (uc_fw->file_selected.minor_ver < uc_fw->file_wanted.minor_ver) + old_ver = true; } } + if (old_ver) { + /* Preserve the version that was really wanted */ + memcpy(&uc_fw->file_wanted, &file_ideal, sizeof(uc_fw->file_wanted)); + + drm_notice(&i915->drm, + "%s firmware %s (%d.%d) is recommended, but only %s (%d.%d) was found\n", + intel_uc_fw_type_repr(uc_fw->type), + uc_fw->file_wanted.path, + uc_fw->file_wanted.major_ver, uc_fw->file_wanted.minor_ver, + uc_fw->file_selected.path, + uc_fw->file_selected.major_ver, uc_fw->file_selected.minor_ver); + drm_info(&i915->drm, + "Consider updating your linux-firmware pkg or downloading from %s\n", + INTEL_UC_FIRMWARE_URL); + } + if (HAS_LMEM(i915)) { obj = i915_gem_object_create_lmem_from_data(i915, fw->data, fw->size); if (!IS_ERR(obj)) @@ -503,7 +658,7 @@ fail: INTEL_UC_FIRMWARE_ERROR); i915_probe_error(i915, "%s firmware %s: fetch failed with error %d\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); + intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, err); drm_info(&i915->drm, "%s firmware(s) can be downloaded from %s\n", intel_uc_fw_type_repr(uc_fw->type), INTEL_UC_FIRMWARE_URL); @@ -645,7 +800,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags) fail: i915_probe_error(gt->i915, "Failed to load %s firmware %s (%d)\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, + intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, err); intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_LOAD_FAIL); return err; @@ -863,19 +1018,34 @@ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len) */ void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p) { + u32 ver_sel, ver_want; + drm_printf(p, "%s firmware: %s\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->wanted_path); - if (uc_fw->fallback.path) { - drm_printf(p, "%s firmware fallback: %s\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->fallback.path); - drm_printf(p, "fallback selected: %s\n", - str_yes_no(uc_fw->path == uc_fw->fallback.path)); - } + intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path); + if (uc_fw->file_selected.path != uc_fw->file_wanted.path) + drm_printf(p, "%s firmware wanted: %s\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_wanted.path); drm_printf(p, "\tstatus: %s\n", intel_uc_fw_status_repr(uc_fw->status)); - drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n", - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted, - uc_fw->major_ver_found, uc_fw->minor_ver_found); + ver_sel = MAKE_UC_VER(uc_fw->file_selected.major_ver, + uc_fw->file_selected.minor_ver, + uc_fw->file_selected.patch_ver); + ver_want = MAKE_UC_VER(uc_fw->file_wanted.major_ver, + uc_fw->file_wanted.minor_ver, + uc_fw->file_wanted.patch_ver); + if (ver_sel < ver_want) + drm_printf(p, "\tversion: wanted %u.%u.%u, found %u.%u.%u\n", + uc_fw->file_wanted.major_ver, + uc_fw->file_wanted.minor_ver, + uc_fw->file_wanted.patch_ver, + uc_fw->file_selected.major_ver, + uc_fw->file_selected.minor_ver, + uc_fw->file_selected.patch_ver); + else + drm_printf(p, "\tversion: found %u.%u.%u\n", + uc_fw->file_selected.major_ver, + uc_fw->file_selected.minor_ver, + uc_fw->file_selected.patch_ver); drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size); drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 7aa2644400b9..cb586f7df270 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -65,6 +65,18 @@ enum intel_uc_fw_type { #define INTEL_UC_FW_NUM_TYPES 2 /* + * The firmware build process will generate a version header file with major and + * minor version defined. The versions are built into CSS header of firmware. + * i915 kernel driver set the minimal firmware version required per platform. + */ +struct intel_uc_fw_file { + const char *path; + u16 major_ver; + u16 minor_ver; + u16 patch_ver; +}; + +/* * This structure encapsulates all the data needed during the process * of fetching, caching, and loading the firmware image into the uC. */ @@ -74,11 +86,12 @@ struct intel_uc_fw { const enum intel_uc_fw_status status; enum intel_uc_fw_status __status; /* no accidental overwrites */ }; - const char *wanted_path; - const char *path; + struct intel_uc_fw_file file_wanted; + struct intel_uc_fw_file file_selected; bool user_overridden; size_t size; struct drm_i915_gem_object *obj; + /** * @dummy: A vma used in binding the uc fw to ggtt. We can't define this * vma on the stack as it can lead to a stack overflow, so we define it @@ -89,30 +102,18 @@ struct intel_uc_fw { struct i915_vma_resource dummy; struct i915_vma *rsa_data; - /* - * The firmware build process will generate a version header file with major and - * minor version defined. The versions are built into CSS header of firmware. - * i915 kernel driver set the minimal firmware version required per platform. - */ - u16 major_ver_wanted; - u16 minor_ver_wanted; - u16 major_ver_found; - u16 minor_ver_found; - - struct { - const char *path; - u16 major_ver; - u16 minor_ver; - } fallback; - u32 rsa_size; u32 ucode_size; - u32 private_data_size; bool loaded_via_gsc; }; +#define MAKE_UC_VER(maj, min, pat) ((pat) | ((min) << 8) | ((maj) << 16)) +#define GET_UC_VER(uc) (MAKE_UC_VER((uc)->fw.file_selected.major_ver, \ + (uc)->fw.file_selected.minor_ver, \ + (uc)->fw.file_selected.patch_ver)) + #ifdef CONFIG_DRM_I915_DEBUG_GUC void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, enum intel_uc_fw_status status); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h index b05e0e35b734..7a411178bdbf 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h @@ -83,8 +83,10 @@ struct uc_css_header { } __packed; static_assert(sizeof(struct uc_css_header) == 128); -#define HUC_GSC_VERSION_DW 44 -#define HUC_GSC_MAJOR_VER_MASK (0xFF << 0) -#define HUC_GSC_MINOR_VER_MASK (0xFF << 16) +#define HUC_GSC_VERSION_HI_DW 44 +#define HUC_GSC_MAJOR_VER_HI_MASK (0xFF << 0) +#define HUC_GSC_MINOR_VER_HI_MASK (0xFF << 16) +#define HUC_GSC_VERSION_LO_DW 45 +#define HUC_GSC_PATCH_VER_LO_MASK (0xFF << 0) #endif /* _INTEL_UC_FW_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c index 1df71d0796ae..e28518fe8b90 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c @@ -54,6 +54,9 @@ static int intel_guc_scrub_ctbs(void *arg) struct intel_engine_cs *engine; struct intel_context *ce; + if (!intel_has_gpu_reset(gt)) + return 0; + wakeref = intel_runtime_pm_get(gt->uncore->rpm); engine = intel_selftest_find_any_engine(gt); @@ -62,7 +65,7 @@ static int intel_guc_scrub_ctbs(void *arg) ce = intel_context_create(engine); if (IS_ERR(ce)) { ret = PTR_ERR(ce); - pr_err("Failed to create context, %d: %d\n", i, ret); + drm_err(>->i915->drm, "Failed to create context, %d: %d\n", i, ret); goto err; } @@ -83,7 +86,7 @@ static int intel_guc_scrub_ctbs(void *arg) if (IS_ERR(rq)) { ret = PTR_ERR(rq); - pr_err("Failed to create request, %d: %d\n", i, ret); + drm_err(>->i915->drm, "Failed to create request, %d: %d\n", i, ret); goto err; } @@ -93,7 +96,7 @@ static int intel_guc_scrub_ctbs(void *arg) for (i = 0; i < 3; ++i) { ret = i915_request_wait(last[i], 0, HZ); if (ret < 0) { - pr_err("Last request failed to complete: %d\n", ret); + drm_err(>->i915->drm, "Last request failed to complete: %d\n", ret); goto err; } i915_request_put(last[i]); @@ -110,7 +113,7 @@ static int intel_guc_scrub_ctbs(void *arg) /* GT will not idle if G2H are lost */ ret = intel_gt_wait_for_idle(gt, HZ); if (ret < 0) { - pr_err("GT failed to idle: %d\n", ret); + drm_err(>->i915->drm, "GT failed to idle: %d\n", ret); goto err; } @@ -150,7 +153,7 @@ static int intel_guc_steal_guc_ids(void *arg) ce = kcalloc(GUC_MAX_CONTEXT_ID, sizeof(*ce), GFP_KERNEL); if (!ce) { - pr_err("Context array allocation failed\n"); + drm_err(>->i915->drm, "Context array allocation failed\n"); return -ENOMEM; } @@ -164,24 +167,24 @@ static int intel_guc_steal_guc_ids(void *arg) if (IS_ERR(ce[context_index])) { ret = PTR_ERR(ce[context_index]); ce[context_index] = NULL; - pr_err("Failed to create context: %d\n", ret); + drm_err(>->i915->drm, "Failed to create context: %d\n", ret); goto err_wakeref; } ret = igt_spinner_init(&spin, engine->gt); if (ret) { - pr_err("Failed to create spinner: %d\n", ret); + drm_err(>->i915->drm, "Failed to create spinner: %d\n", ret); goto err_contexts; } spin_rq = igt_spinner_create_request(&spin, ce[context_index], MI_ARB_CHECK); if (IS_ERR(spin_rq)) { ret = PTR_ERR(spin_rq); - pr_err("Failed to create spinner request: %d\n", ret); + drm_err(>->i915->drm, "Failed to create spinner request: %d\n", ret); goto err_contexts; } ret = request_add_spin(spin_rq, &spin); if (ret) { - pr_err("Failed to add Spinner request: %d\n", ret); + drm_err(>->i915->drm, "Failed to add Spinner request: %d\n", ret); goto err_spin_rq; } @@ -191,7 +194,7 @@ static int intel_guc_steal_guc_ids(void *arg) if (IS_ERR(ce[context_index])) { ret = PTR_ERR(ce[context_index--]); ce[context_index] = NULL; - pr_err("Failed to create context: %d\n", ret); + drm_err(>->i915->drm, "Failed to create context: %d\n", ret); goto err_spin_rq; } @@ -200,8 +203,8 @@ static int intel_guc_steal_guc_ids(void *arg) ret = PTR_ERR(rq); rq = NULL; if (ret != -EAGAIN) { - pr_err("Failed to create request, %d: %d\n", - context_index, ret); + drm_err(>->i915->drm, "Failed to create request, %d: %d\n", + context_index, ret); goto err_spin_rq; } } else { @@ -215,7 +218,7 @@ static int intel_guc_steal_guc_ids(void *arg) igt_spinner_end(&spin); ret = intel_selftest_wait_for_rq(spin_rq); if (ret) { - pr_err("Spin request failed to complete: %d\n", ret); + drm_err(>->i915->drm, "Spin request failed to complete: %d\n", ret); i915_request_put(last); goto err_spin_rq; } @@ -227,7 +230,7 @@ static int intel_guc_steal_guc_ids(void *arg) ret = i915_request_wait(last, 0, HZ * 30); i915_request_put(last); if (ret < 0) { - pr_err("Last request failed to complete: %d\n", ret); + drm_err(>->i915->drm, "Last request failed to complete: %d\n", ret); goto err_spin_rq; } @@ -235,7 +238,7 @@ static int intel_guc_steal_guc_ids(void *arg) rq = nop_user_request(ce[context_index], NULL); if (IS_ERR(rq)) { ret = PTR_ERR(rq); - pr_err("Failed to steal guc_id, %d: %d\n", context_index, ret); + drm_err(>->i915->drm, "Failed to steal guc_id, %d: %d\n", context_index, ret); goto err_spin_rq; } @@ -243,21 +246,20 @@ static int intel_guc_steal_guc_ids(void *arg) ret = i915_request_wait(rq, 0, HZ); i915_request_put(rq); if (ret < 0) { - pr_err("Request with stolen guc_id failed to complete: %d\n", - ret); + drm_err(>->i915->drm, "Request with stolen guc_id failed to complete: %d\n", ret); goto err_spin_rq; } /* Wait for idle */ ret = intel_gt_wait_for_idle(gt, HZ * 30); if (ret < 0) { - pr_err("GT failed to idle: %d\n", ret); + drm_err(>->i915->drm, "GT failed to idle: %d\n", ret); goto err_spin_rq; } /* Verify a guc_id was stolen */ if (guc->number_guc_id_stolen == number_guc_id_stolen) { - pr_err("No guc_id was stolen"); + drm_err(>->i915->drm, "No guc_id was stolen"); ret = -EINVAL; } else { ret = 0; diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c new file mode 100644 index 000000000000..01f8cd3c3134 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "selftests/igt_spinner.h" +#include "selftests/igt_reset.h" +#include "selftests/intel_scheduler_helpers.h" +#include "gt/intel_engine_heartbeat.h" +#include "gem/selftests/mock_context.h" + +#define BEAT_INTERVAL 100 + +static struct i915_request *nop_request(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + + rq = intel_engine_create_kernel_request(engine); + if (IS_ERR(rq)) + return rq; + + i915_request_get(rq); + i915_request_add(rq); + + return rq; +} + +static int intel_hang_guc(void *arg) +{ + struct intel_gt *gt = arg; + int ret = 0; + struct i915_gem_context *ctx; + struct intel_context *ce; + struct igt_spinner spin; + struct i915_request *rq; + intel_wakeref_t wakeref; + struct i915_gpu_error *global = >->i915->gpu_error; + struct intel_engine_cs *engine; + unsigned int reset_count; + u32 guc_status; + u32 old_beat; + + ctx = kernel_context(gt->i915, NULL); + if (IS_ERR(ctx)) { + drm_err(>->i915->drm, "Failed get kernel context: %ld\n", PTR_ERR(ctx)); + return PTR_ERR(ctx); + } + + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + + ce = intel_context_create(gt->engine[BCS0]); + if (IS_ERR(ce)) { + ret = PTR_ERR(ce); + drm_err(>->i915->drm, "Failed to create spinner request: %d\n", ret); + goto err; + } + + engine = ce->engine; + reset_count = i915_reset_count(global); + + old_beat = engine->props.heartbeat_interval_ms; + ret = intel_engine_set_heartbeat(engine, BEAT_INTERVAL); + if (ret) { + drm_err(>->i915->drm, "Failed to boost heatbeat interval: %d\n", ret); + goto err; + } + + ret = igt_spinner_init(&spin, engine->gt); + if (ret) { + drm_err(>->i915->drm, "Failed to create spinner: %d\n", ret); + goto err; + } + + rq = igt_spinner_create_request(&spin, ce, MI_NOOP); + intel_context_put(ce); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + drm_err(>->i915->drm, "Failed to create spinner request: %d\n", ret); + goto err_spin; + } + + ret = request_add_spin(rq, &spin); + if (ret) { + i915_request_put(rq); + drm_err(>->i915->drm, "Failed to add Spinner request: %d\n", ret); + goto err_spin; + } + + ret = intel_reset_guc(gt); + if (ret) { + i915_request_put(rq); + drm_err(>->i915->drm, "Failed to reset GuC, ret = %d\n", ret); + goto err_spin; + } + + guc_status = intel_uncore_read(gt->uncore, GUC_STATUS); + if (!(guc_status & GS_MIA_IN_RESET)) { + i915_request_put(rq); + drm_err(>->i915->drm, "GuC failed to reset: status = 0x%08X\n", guc_status); + ret = -EIO; + goto err_spin; + } + + /* Wait for the heartbeat to cause a reset */ + ret = intel_selftest_wait_for_rq(rq); + i915_request_put(rq); + if (ret) { + drm_err(>->i915->drm, "Request failed to complete: %d\n", ret); + goto err_spin; + } + + if (i915_reset_count(global) == reset_count) { + drm_err(>->i915->drm, "Failed to record a GPU reset\n"); + ret = -EINVAL; + goto err_spin; + } + +err_spin: + igt_spinner_end(&spin); + igt_spinner_fini(&spin); + intel_engine_set_heartbeat(engine, old_beat); + + if (ret == 0) { + rq = nop_request(engine); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + goto err; + } + + ret = intel_selftest_wait_for_rq(rq); + i915_request_put(rq); + if (ret) { + drm_err(>->i915->drm, "No-op failed to complete: %d\n", ret); + goto err; + } + } + +err: + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + kernel_context_close(ctx); + + return ret; +} + +int intel_guc_hang_check(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(intel_hang_guc), + }; + struct intel_gt *gt = to_gt(i915); + + if (intel_gt_is_wedged(gt)) + return 0; + + if (!intel_uc_uses_guc_submission(>->uc)) + return 0; + + return intel_gt_live_subtests(tests, gt); +} diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c index 812220a43df8..d17982c36d25 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c @@ -115,30 +115,30 @@ static int __intel_guc_multi_lrc_basic(struct intel_gt *gt, unsigned int class) parent = multi_lrc_create_parent(gt, class, 0); if (IS_ERR(parent)) { - pr_err("Failed creating contexts: %ld", PTR_ERR(parent)); + drm_err(>->i915->drm, "Failed creating contexts: %ld", PTR_ERR(parent)); return PTR_ERR(parent); } else if (!parent) { - pr_debug("Not enough engines in class: %d", class); + drm_dbg(>->i915->drm, "Not enough engines in class: %d", class); return 0; } rq = multi_lrc_nop_request(parent); if (IS_ERR(rq)) { ret = PTR_ERR(rq); - pr_err("Failed creating requests: %d", ret); + drm_err(>->i915->drm, "Failed creating requests: %d", ret); goto out; } ret = intel_selftest_wait_for_rq(rq); if (ret) - pr_err("Failed waiting on request: %d", ret); + drm_err(>->i915->drm, "Failed waiting on request: %d", ret); i915_request_put(rq); if (ret >= 0) { ret = intel_gt_wait_for_idle(gt, HZ * 5); if (ret < 0) - pr_err("GT failed to idle: %d\n", ret); + drm_err(>->i915->drm, "GT failed to idle: %d\n", ret); } out: |