aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_client_blt.c18
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.h20
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_types.h19
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pages.c21
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_phys.c55
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_stolen.c48
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_ppgtt.c6
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c17
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c42
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c37
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.c10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c129
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc_reg.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.c9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.c22
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c2
-rw-r--r--drivers/gpu/drm/i915/gt/mock_engine.c29
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c13
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rps.c8
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_timeline.c378
-rw-r--r--drivers/gpu/drm/i915/gt/sysfs_engines.c10
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c18
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c132
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c1
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c31
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h80
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h5
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c6
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c29
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h6
-rw-r--r--drivers/gpu/drm/i915/i915_cmd_parser.c67
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c2
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h3
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c32
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c68
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.h10
-rw-r--r--drivers/gpu/drm/i915/i915_pci.c7
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.c96
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.h10
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h12
-rw-r--r--drivers/gpu/drm/i915/i915_scatterlist.h17
-rw-r--r--drivers/gpu/drm/i915/intel_memory_region.c2
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c16
52 files changed, 1176 insertions, 406 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
index 272cf3ea68d5..44821d94544f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -202,12 +202,6 @@ retry:
if (unlikely(err))
goto out_request;
- if (w->ce->engine->emit_init_breadcrumb) {
- err = w->ce->engine->emit_init_breadcrumb(rq);
- if (unlikely(err))
- goto out_request;
- }
-
/*
* w->dma is already exported via (vma|obj)->resv we need only
* keep track of the GPU activity within this vma/request, and
@@ -217,9 +211,15 @@ retry:
if (err)
goto out_request;
- err = w->ce->engine->emit_bb_start(rq,
- batch->node.start, batch->node.size,
- 0);
+ if (rq->engine->emit_init_breadcrumb) {
+ err = rq->engine->emit_init_breadcrumb(rq);
+ if (unlikely(err))
+ goto out_request;
+ }
+
+ err = rq->engine->emit_bb_start(rq,
+ batch->node.start, batch->node.size,
+ 0);
out_request:
if (unlikely(err)) {
i915_request_set_error_once(rq, err);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 3389ac972d16..00d24000b5e8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -82,6 +82,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
obj->mm.madv = I915_MADV_WILLNEED;
INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
mutex_init(&obj->mm.get_page.lock);
+ INIT_RADIX_TREE(&obj->mm.get_dma_page.radix, GFP_KERNEL | __GFP_NOWARN);
+ mutex_init(&obj->mm.get_dma_page.lock);
if (IS_ENABLED(CONFIG_LOCKDEP) && i915_gem_object_is_shrinkable(obj))
i915_gem_shrinker_taints_mutex(to_i915(obj->base.dev),
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index eaf3d4147be0..be14486f63a7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -272,8 +272,26 @@ int i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
unsigned int tiling, unsigned int stride);
struct scatterlist *
+__i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+ struct i915_gem_object_page_iter *iter,
+ unsigned int n,
+ unsigned int *offset);
+
+static inline struct scatterlist *
i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
- unsigned int n, unsigned int *offset);
+ unsigned int n,
+ unsigned int *offset)
+{
+ return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset);
+}
+
+static inline struct scatterlist *
+i915_gem_object_get_sg_dma(struct drm_i915_gem_object *obj,
+ unsigned int n,
+ unsigned int *offset)
+{
+ return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset);
+}
struct page *
i915_gem_object_get_page(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index b5c15557cc87..e2d9b7e1e152 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -56,6 +56,8 @@ struct drm_i915_gem_object_ops {
void (*truncate)(struct drm_i915_gem_object *obj);
void (*writeback)(struct drm_i915_gem_object *obj);
+ int (*pread)(struct drm_i915_gem_object *obj,
+ const struct drm_i915_gem_pread *arg);
int (*pwrite)(struct drm_i915_gem_object *obj,
const struct drm_i915_gem_pwrite *arg);
@@ -80,6 +82,14 @@ struct i915_mmap_offset {
struct rb_node offset;
};
+struct i915_gem_object_page_iter {
+ struct scatterlist *sg_pos;
+ unsigned int sg_idx; /* in pages, but 32bit eek! */
+
+ struct radix_tree_root radix;
+ struct mutex lock; /* protects this cache */
+};
+
struct drm_i915_gem_object {
struct drm_gem_object base;
@@ -246,13 +256,8 @@ struct drm_i915_gem_object {
I915_SELFTEST_DECLARE(unsigned int page_mask);
- struct i915_gem_object_page_iter {
- struct scatterlist *sg_pos;
- unsigned int sg_idx; /* in pages, but 32bit eek! */
-
- struct radix_tree_root radix;
- struct mutex lock; /* protects this cache */
- } get_page;
+ struct i915_gem_object_page_iter get_page;
+ struct i915_gem_object_page_iter get_dma_page;
/**
* Element within i915->mm.unbound_list or i915->mm.bound_list,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index f60ca6dc911f..e2c7b2a7895f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -33,6 +33,8 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
obj->mm.get_page.sg_pos = pages->sgl;
obj->mm.get_page.sg_idx = 0;
+ obj->mm.get_dma_page.sg_pos = pages->sgl;
+ obj->mm.get_dma_page.sg_idx = 0;
obj->mm.pages = pages;
@@ -155,6 +157,8 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
rcu_read_lock();
radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
radix_tree_delete(&obj->mm.get_page.radix, iter.index);
+ radix_tree_for_each_slot(slot, &obj->mm.get_dma_page.radix, &iter, 0)
+ radix_tree_delete(&obj->mm.get_dma_page.radix, iter.index);
rcu_read_unlock();
}
@@ -438,11 +442,12 @@ void __i915_gem_object_release_map(struct drm_i915_gem_object *obj)
}
struct scatterlist *
-i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
- unsigned int n,
- unsigned int *offset)
+__i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+ struct i915_gem_object_page_iter *iter,
+ unsigned int n,
+ unsigned int *offset)
{
- struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
+ const bool dma = iter == &obj->mm.get_dma_page;
struct scatterlist *sg;
unsigned int idx, count;
@@ -471,7 +476,7 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
sg = iter->sg_pos;
idx = iter->sg_idx;
- count = __sg_page_count(sg);
+ count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg);
while (idx + count <= n) {
void *entry;
@@ -499,7 +504,7 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
idx += count;
sg = ____sg_next(sg);
- count = __sg_page_count(sg);
+ count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg);
}
scan:
@@ -517,7 +522,7 @@ scan:
while (idx + count <= n) {
idx += count;
sg = ____sg_next(sg);
- count = __sg_page_count(sg);
+ count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg);
}
*offset = n - idx;
@@ -584,7 +589,7 @@ i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj,
struct scatterlist *sg;
unsigned int offset;
- sg = i915_gem_object_get_sg(obj, n, &offset);
+ sg = i915_gem_object_get_sg_dma(obj, n, &offset);
if (len)
*len = sg_dma_len(sg) - (offset << PAGE_SHIFT);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
index 28147aab47b9..3a4dfe2ef1da 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -134,6 +134,58 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
vaddr, dma);
}
+static int
+phys_pwrite(struct drm_i915_gem_object *obj,
+ const struct drm_i915_gem_pwrite *args)
+{
+ void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset;
+ char __user *user_data = u64_to_user_ptr(args->data_ptr);
+ int err;
+
+ err = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_ALL,
+ MAX_SCHEDULE_TIMEOUT);
+ if (err)
+ return err;
+
+ /*
+ * We manually control the domain here and pretend that it
+ * remains coherent i.e. in the GTT domain, like shmem_pwrite.
+ */
+ i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
+
+ if (copy_from_user(vaddr, user_data, args->size))
+ return -EFAULT;
+
+ drm_clflush_virt_range(vaddr, args->size);
+ intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt);
+
+ i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
+ return 0;
+}
+
+static int
+phys_pread(struct drm_i915_gem_object *obj,
+ const struct drm_i915_gem_pread *args)
+{
+ void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset;
+ char __user *user_data = u64_to_user_ptr(args->data_ptr);
+ int err;
+
+ err = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT);
+ if (err)
+ return err;
+
+ drm_clflush_virt_range(vaddr, args->size);
+ if (copy_to_user(user_data, vaddr, args->size))
+ return -EFAULT;
+
+ return 0;
+}
+
static void phys_release(struct drm_i915_gem_object *obj)
{
fput(obj->base.filp);
@@ -144,6 +196,9 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
.get_pages = i915_gem_object_get_pages_phys,
.put_pages = i915_gem_object_put_pages_phys,
+ .pread = phys_pread,
+ .pwrite = phys_pwrite,
+
.release = phys_release,
};
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 84b2707d8b17..29bffc6afcc1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -497,6 +497,43 @@ static int i915_gem_init_stolen(struct drm_i915_private *i915)
return 0;
}
+static void dbg_poison(struct i915_ggtt *ggtt,
+ dma_addr_t addr, resource_size_t size,
+ u8 x)
+{
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+ if (!drm_mm_node_allocated(&ggtt->error_capture))
+ return;
+
+ if (ggtt->vm.bind_async_flags & I915_VMA_GLOBAL_BIND)
+ return; /* beware stop_machine() inversion */
+
+ GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
+
+ mutex_lock(&ggtt->error_mutex);
+ while (size) {
+ void __iomem *s;
+
+ ggtt->vm.insert_page(&ggtt->vm, addr,
+ ggtt->error_capture.start,
+ I915_CACHE_NONE, 0);
+ mb();
+
+ s = io_mapping_map_wc(&ggtt->iomap,
+ ggtt->error_capture.start,
+ PAGE_SIZE);
+ memset_io(s, x, PAGE_SIZE);
+ io_mapping_unmap(s);
+
+ addr += PAGE_SIZE;
+ size -= PAGE_SIZE;
+ }
+ mb();
+ ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE);
+ mutex_unlock(&ggtt->error_mutex);
+#endif
+}
+
static struct sg_table *
i915_pages_create_for_stolen(struct drm_device *dev,
resource_size_t offset, resource_size_t size)
@@ -540,6 +577,11 @@ static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
if (IS_ERR(pages))
return PTR_ERR(pages);
+ dbg_poison(&to_i915(obj->base.dev)->ggtt,
+ sg_dma_address(pages->sgl),
+ sg_dma_len(pages->sgl),
+ POISON_INUSE);
+
__i915_gem_object_set_pages(obj, pages, obj->stolen->size);
return 0;
@@ -549,6 +591,12 @@ static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj,
struct sg_table *pages)
{
/* Should only be called from i915_gem_object_release_stolen() */
+
+ dbg_poison(&to_i915(obj->base.dev)->ggtt,
+ sg_dma_address(pages->sgl),
+ sg_dma_len(pages->sgl),
+ POISON_FREE);
+
sg_free_table(pages);
kfree(pages);
}
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index c30adc05fa98..680bd9442eb0 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -131,17 +131,17 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
do {
- GEM_BUG_ON(iter.sg->length < I915_GTT_PAGE_SIZE);
+ GEM_BUG_ON(sg_dma_len(iter.sg) < I915_GTT_PAGE_SIZE);
vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
iter.dma += I915_GTT_PAGE_SIZE;
if (iter.dma == iter.max) {
iter.sg = __sg_next(iter.sg);
- if (!iter.sg)
+ if (!iter.sg || sg_dma_len(iter.sg) == 0)
break;
iter.dma = sg_dma_address(iter.sg);
- iter.max = iter.dma + iter.sg->length;
+ iter.max = iter.dma + sg_dma_len(iter.sg);
}
if (++act_pte == GEN6_PTES) {
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 38c7069b7749..a37c968ef8f7 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -372,19 +372,19 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
do {
- GEM_BUG_ON(iter->sg->length < I915_GTT_PAGE_SIZE);
+ GEM_BUG_ON(sg_dma_len(iter->sg) < I915_GTT_PAGE_SIZE);
vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
iter->dma += I915_GTT_PAGE_SIZE;
if (iter->dma >= iter->max) {
iter->sg = __sg_next(iter->sg);
- if (!iter->sg) {
+ if (!iter->sg || sg_dma_len(iter->sg) == 0) {
idx = 0;
break;
}
iter->dma = sg_dma_address(iter->sg);
- iter->max = iter->dma + iter->sg->length;
+ iter->max = iter->dma + sg_dma_len(iter->sg);
}
if (gen8_pd_index(++idx, 0) == 0) {
@@ -413,8 +413,8 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
u32 flags)
{
const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+ unsigned int rem = sg_dma_len(iter->sg);
u64 start = vma->node.start;
- dma_addr_t rem = iter->sg->length;
GEM_BUG_ON(!i915_vm_is_4lvl(vma->vm));
@@ -456,7 +456,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
}
do {
- GEM_BUG_ON(iter->sg->length < page_size);
+ GEM_BUG_ON(sg_dma_len(iter->sg) < page_size);
vaddr[index++] = encode | iter->dma;
start += page_size;
@@ -467,7 +467,10 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
if (!iter->sg)
break;
- rem = iter->sg->length;
+ rem = sg_dma_len(iter->sg);
+ if (!rem)
+ break;
+
iter->dma = sg_dma_address(iter->sg);
iter->max = iter->dma + rem;
@@ -525,7 +528,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
}
vma->page_sizes.gtt |= page_size;
- } while (iter->sg);
+ } while (iter->sg && sg_dma_len(iter->sg));
}
static void gen8_ppgtt_insert(struct i915_address_space *vm,
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 5bfb5f7ed02c..0b31670343f5 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -305,8 +305,9 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
engine->i915 = i915;
engine->gt = gt;
engine->uncore = gt->uncore;
- engine->hw_id = engine->guc_id = info->hw_id;
engine->mmio_base = __engine_mmio_base(i915, info->mmio_bases);
+ engine->hw_id = info->hw_id;
+ engine->guc_id = MAKE_GUC_ID(info->class, info->instance);
engine->class = info->class;
engine->instance = info->instance;
@@ -371,7 +372,8 @@ static void __setup_engine_capabilities(struct intel_engine_cs *engine)
* instances.
*/
if ((INTEL_GEN(i915) >= 11 &&
- engine->gt->info.vdbox_sfc_access & engine->mask) ||
+ (engine->gt->info.vdbox_sfc_access &
+ BIT(engine->instance))) ||
(INTEL_GEN(i915) >= 9 && engine->instance == 0))
engine->uabi_capabilities |=
I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
@@ -1599,6 +1601,41 @@ static unsigned long list_count(struct list_head *list)
return count;
}
+static unsigned long read_ul(void *p, size_t x)
+{
+ return *(unsigned long *)(p + x);
+}
+
+static void print_properties(struct intel_engine_cs *engine,
+ struct drm_printer *m)
+{
+ static const struct pmap {
+ size_t offset;
+ const char *name;
+ } props[] = {
+#define P(x) { \
+ .offset = offsetof(typeof(engine->props), x), \
+ .name = #x \
+}
+ P(heartbeat_interval_ms),
+ P(max_busywait_duration_ns),
+ P(preempt_timeout_ms),
+ P(stop_timeout_ms),
+ P(timeslice_duration_ms),
+
+ {},
+#undef P
+ };
+ const struct pmap *p;
+
+ drm_printf(m, "\tProperties:\n");
+ for (p = props; p->name; p++)
+ drm_printf(m, "\t\t%s: %lu [default %lu]\n",
+ p->name,
+ read_ul(&engine->props, p->offset),
+ read_ul(&engine->defaults, p->offset));
+}
+
void intel_engine_dump(struct intel_engine_cs *engine,
struct drm_printer *m,
const char *header, ...)
@@ -1641,6 +1678,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
drm_printf(m, "\tReset count: %d (global %d)\n",
i915_reset_engine_count(error, engine),
i915_reset_count(error));
+ print_properties(engine, m);
drm_printf(m, "\tRequests:\n");
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 5067d0524d4b..9060385cd69e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -41,6 +41,8 @@ static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
{
engine->wakeref_serial = READ_ONCE(engine->serial) + 1;
i915_request_add_active_barriers(rq);
+ if (!engine->heartbeat.systole && intel_engine_has_heartbeat(engine))
+ engine->heartbeat.systole = i915_request_get(rq);
}
static void show_heartbeat(const struct i915_request *rq,
@@ -144,8 +146,6 @@ static void heartbeat(struct work_struct *wrk)
goto unlock;
idle_pulse(engine, rq);
- if (engine->i915->params.enable_hangcheck)
- engine->heartbeat.systole = i915_request_get(rq);
__i915_request_commit(rq);
__i915_request_queue(rq, &attr);
@@ -153,7 +153,7 @@ static void heartbeat(struct work_struct *wrk)
unlock:
mutex_unlock(&ce->timeline->mutex);
out:
- if (!next_heartbeat(engine))
+ if (!engine->i915->params.enable_hangcheck || !next_heartbeat(engine))
i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
intel_engine_pm_put(engine);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index f7b2e07e2229..499b09cb4acf 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -17,6 +17,25 @@
#include "intel_ring.h"
#include "shmem_utils.h"
+static void dbg_poison_ce(struct intel_context *ce)
+{
+ if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ return;
+
+ if (ce->state) {
+ struct drm_i915_gem_object *obj = ce->state->obj;
+ int type = i915_coherent_map_type(ce->engine->i915);
+ void *map;
+
+ map = i915_gem_object_pin_map(obj, type);
+ if (!IS_ERR(map)) {
+ memset(map, CONTEXT_REDZONE, obj->base.size);
+ i915_gem_object_flush_map(obj);
+ i915_gem_object_unpin_map(obj);
+ }
+ }
+}
+
static int __engine_unpark(struct intel_wakeref *wf)
{
struct intel_engine_cs *engine =
@@ -32,20 +51,14 @@ static int __engine_unpark(struct intel_wakeref *wf)
if (ce) {
GEM_BUG_ON(test_bit(CONTEXT_VALID_BIT, &ce->flags));
+ /* Flush all pending HW writes before we touch the context */
+ while (unlikely(intel_context_inflight(ce)))
+ intel_engine_flush_submission(engine);
+
/* First poison the image to verify we never fully trust it */
- if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) {
- struct drm_i915_gem_object *obj = ce->state->obj;
- int type = i915_coherent_map_type(engine->i915);
- void *map;
-
- map = i915_gem_object_pin_map(obj, type);
- if (!IS_ERR(map)) {
- memset(map, CONTEXT_REDZONE, obj->base.size);
- i915_gem_object_flush_map(obj);
- i915_gem_object_unpin_map(obj);
- }
- }
+ dbg_poison_ce(ce);
+ /* Scrub the context image after our loss of control */
ce->ops->reset(ce);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 188a5f70177d..cf94525be2c1 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -1383,7 +1383,7 @@ intel_partial_pages(const struct i915_ggtt_view *view,
if (ret)
goto err_sg_alloc;
- iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset);
+ iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset);
GEM_BUG_ON(!iter);
sg = st->sgl;
@@ -1391,7 +1391,7 @@ intel_partial_pages(const struct i915_ggtt_view *view,
do {
unsigned int len;
- len = min(iter->length - (offset << PAGE_SHIFT),
+ len = min(sg_dma_len(iter) - (offset << PAGE_SHIFT),
count << PAGE_SHIFT);
sg_set_page(sg, NULL, len, 0);
sg_dma_address(sg) =
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 39b428c5049c..44f1d51e5ae5 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -614,6 +614,8 @@ void intel_gt_driver_remove(struct intel_gt *gt)
void intel_gt_driver_unregister(struct intel_gt *gt)
{
+ intel_wakeref_t wakeref;
+
intel_rps_driver_unregister(&gt->rps);
/*
@@ -622,16 +624,15 @@ void intel_gt_driver_unregister(struct intel_gt *gt)
* resources.
*/
intel_gt_set_wedged(gt);
+
+ /* Scrub all HW state upon release */
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+ __intel_gt_reset(gt, ALL_ENGINES);
}
void intel_gt_driver_release(struct intel_gt *gt)
{
struct i915_address_space *vm;
- intel_wakeref_t wakeref;
-
- /* Scrub all HW state upon release */
- with_intel_runtime_pm(gt->uncore->rpm, wakeref)
- __intel_gt_reset(gt, ALL_ENGINES);
vm = fetch_and_zero(&gt->vm);
if (vm) /* FIXME being called twice on error paths :( */
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 3f1114b58b01..7bfe9072be9a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -324,7 +324,7 @@ static void cnl_setup_private_ppat(struct intel_uncore *uncore)
GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
intel_uncore_write(uncore,
GEN10_PAT_INDEX(2),
- GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
+ GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE);
intel_uncore_write(uncore,
GEN10_PAT_INDEX(3),
GEN8_PPAT_UC);
@@ -349,17 +349,23 @@ static void cnl_setup_private_ppat(struct intel_uncore *uncore)
*/
static void bdw_setup_private_ppat(struct intel_uncore *uncore)
{
+ struct drm_i915_private *i915 = uncore->i915;
u64 pat;
pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */
GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
- GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */
GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
+ /* for scanout with eLLC */
+ if (INTEL_GEN(i915) >= 9)
+ pat |= GEN8_PPAT(2, GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE);
+ else
+ pat |= GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
+
intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index c13c650ced22..8a33940a71f3 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -580,7 +580,7 @@ static inline struct sgt_dma {
struct scatterlist *sg = vma->pages->sgl;
dma_addr_t addr = sg_dma_address(sg);
- return (struct sgt_dma){ sg, addr, addr + sg->length };
+ return (struct sgt_dma){ sg, addr, addr + sg_dma_len(sg) };
}
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index f82c6dd1de18..8a51c1c3a091 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1215,7 +1215,8 @@ static void intel_engine_context_out(struct intel_engine_cs *engine)
static void
execlists_check_context(const struct intel_context *ce,
- const struct intel_engine_cs *engine)
+ const struct intel_engine_cs *engine,
+ const char *when)
{
const struct intel_ring *ring = ce->ring;
u32 *regs = ce->lrc_reg_state;
@@ -1250,7 +1251,7 @@ execlists_check_context(const struct intel_context *ce,
valid = false;
}
- WARN_ONCE(!valid, "Invalid lrc state found before submission\n");
+ WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when);
}
static void restore_default_state(struct intel_context *ce,
@@ -1346,7 +1347,7 @@ __execlists_schedule_in(struct i915_request *rq)
reset_active(rq, engine);
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
- execlists_check_context(ce, engine);
+ execlists_check_context(ce, engine, "before");
if (ce->tag) {
/* Use a fixed tag for OA and friends */
@@ -1417,6 +1418,9 @@ __execlists_schedule_out(struct i915_request *rq,
* refrain from doing non-trivial work here.
*/
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ execlists_check_context(ce, engine, "after");
+
/*
* If we have just completed this context, the engine may now be
* idle and we want to re-enter powersaving.
@@ -2495,25 +2499,11 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
* bits 47-57: sw context id of the lrc the GT switched away from
* bits 58-63: sw counter of the lrc the GT switched away from
*/
-static inline bool gen12_csb_parse(const u64 *csb)
+static inline bool gen12_csb_parse(const u64 csb)
{
- bool ctx_away_valid;
- bool new_queue;
- u64 entry;
-
- /* HSD#22011248461 */
- entry = READ_ONCE(*csb);
- if (unlikely(entry == -1)) {
- preempt_disable();
- if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
- GEM_WARN_ON("50us CSB timeout");
- preempt_enable();
- }
- WRITE_ONCE(*(u64 *)csb, -1);
-
- ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
- new_queue =
- lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
+ bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb));
+ bool new_queue =
+ lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
/*
* The context switch detail is not guaranteed to be 5 when a preemption
@@ -2523,7 +2513,7 @@ static inline bool gen12_csb_parse(const u64 *csb)
* would require some extra handling, but we don't support that.
*/
if (!ctx_away_valid || new_queue) {
- GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry)));
+ GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(csb)));
return true;
}
@@ -2532,19 +2522,79 @@ static inline bool gen12_csb_parse(const u64 *csb)
* context switch on an unsuccessful wait instruction since we always
* use polling mode.
*/
- GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry)));
+ GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb)));
return false;
}
-static inline bool gen8_csb_parse(const u64 *csb)
+static inline bool gen8_csb_parse(const u64 csb)
+{
+ return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
+}
+
+static noinline u64
+wa_csb_read(const struct intel_engine_cs *engine, u64 * const csb)
{
- return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
+ u64 entry;
+
+ /*
+ * Reading from the HWSP has one particular advantage: we can detect
+ * a stale entry. Since the write into HWSP is broken, we have no reason
+ * to trust the HW at all, the mmio entry may equally be unordered, so
+ * we prefer the path that is self-checking and as a last resort,
+ * return the mmio value.
+ *
+ * tgl,dg1:HSDES#22011327657
+ */
+ preempt_disable();
+ if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 10)) {
+ int idx = csb - engine->execlists.csb_status;
+ int status;
+
+ status = GEN8_EXECLISTS_STATUS_BUF;
+ if (idx >= 6) {
+ status = GEN11_EXECLISTS_STATUS_BUF2;
+ idx -= 6;
+ }
+ status += sizeof(u64) * idx;
+
+ entry = intel_uncore_read64(engine->uncore,
+ _MMIO(engine->mmio_base + status));
+ }
+ preempt_enable();
+
+ return entry;
+}
+
+static inline u64
+csb_read(const struct intel_engine_cs *engine, u64 * const csb)
+{
+ u64 entry = READ_ONCE(*csb);
+
+ /*
+ * Unfortunately, the GPU does not always serialise its write
+ * of the CSB entries before its write of the CSB pointer, at least
+ * from the perspective of the CPU, using what is known as a Global
+ * Observation Point. We may read a new CSB tail pointer, but then
+ * read the stale CSB entries, causing us to misinterpret the
+ * context-switch events, and eventually declare the GPU hung.
+ *
+ * icl:HSDES#1806554093
+ * tgl:HSDES#22011248461
+ */
+ if (unlikely(entry == -1))
+ entry = wa_csb_read(engine, csb);
+
+ /* Consume this entry so that we can spot its future reuse. */
+ WRITE_ONCE(*csb, -1);
+
+ /* ELSP is an implicit wmb() before the GPU wraps and overwrites csb */
+ return entry;
}
static void process_csb(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
- const u64 * const buf = execlists->csb_status;
+ u64 * const buf = execlists->csb_status;
const u8 num_entries = execlists->csb_size;
u8 head, tail;
@@ -2602,6 +2652,7 @@ static void process_csb(struct intel_engine_cs *engine)
rmb();
do {
bool promote;
+ u64 csb;
if (++head == num_entries)
head = 0;
@@ -2624,15 +2675,14 @@ static void process_csb(struct intel_engine_cs *engine)
* status notifier.
*/
+ csb = csb_read(engine, buf + head);
ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
- head,
- upper_32_bits(buf[head]),
- lower_32_bits(buf[head]));
+ head, upper_32_bits(csb), lower_32_bits(csb));
if (INTEL_GEN(engine->i915) >= 12)
- promote = gen12_csb_parse(buf + head);
+ promote = gen12_csb_parse(csb);
else
- promote = gen8_csb_parse(buf + head);
+ promote = gen8_csb_parse(csb);
if (promote) {
struct i915_request * const *old = execlists->active;
@@ -2987,6 +3037,8 @@ static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
if (!cap->error->gt->engine)
goto err_gt;
+ cap->error->gt->engine->hung = true;
+
return cap;
err_gt:
@@ -4047,6 +4099,8 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
static void execlists_sanitize(struct intel_engine_cs *engine)
{
+ GEM_BUG_ON(execlists_active(&engine->execlists));
+
/*
* Poison residual state on resume, in case the suspend didn't!
*
@@ -4376,6 +4430,7 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine)
/* Mark all executing requests as skipped. */
list_for_each_entry(rq, &engine->active.requests, sched.link)
mark_eio(rq);
+ intel_engine_signal_breadcrumbs(engine);
/* Flush the queued requests to the timeline list (for retiring). */
while ((rb = rb_first_cached(&execlists->queue))) {
@@ -5922,18 +5977,6 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
return 0;
}
-struct intel_engine_cs *
-intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
- unsigned int sibling)
-{
- struct virtual_engine *ve = to_virtual_engine(engine);
-
- if (sibling >= ve->num_siblings)
- return NULL;
-
- return ve->siblings[sibling];
-}
-
void intel_execlists_show_requests(struct intel_engine_cs *engine,
struct drm_printer *m,
void (*show_request)(struct drm_printer *m,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index 91fd8e452d9b..c2d287f25497 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -121,10 +121,6 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
const struct intel_engine_cs *master,
const struct intel_engine_cs *sibling);
-struct intel_engine_cs *
-intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
- unsigned int sibling);
-
bool
intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index 93cb6c460508..1b51f7b9a5c3 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -49,4 +49,7 @@
#define GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x1A
#define GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0xD
+#define GEN8_EXECLISTS_STATUS_BUF 0x370
+#define GEN11_EXECLISTS_STATUS_BUF2 0x3c0
+
#endif /* _INTEL_LRC_REG_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 39179a3eee98..254873e1646e 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -124,7 +124,7 @@ struct drm_i915_mocs_table {
LE_1_UC | LE_TC_2_LLC_ELLC, \
L3_1_UC), \
MOCS_ENTRY(I915_MOCS_PTE, \
- LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \
+ LE_0_PAGETABLE | LE_TC_0_PAGETABLE | LE_LRUM(3), \
L3_3_WB)
static const struct drm_i915_mocs_entry skl_mocs_table[] = {
@@ -243,8 +243,9 @@ static const struct drm_i915_mocs_entry tgl_mocs_table[] = {
* only, __init_mocs_table() take care to program unused index with
* this entry.
*/
- MOCS_ENTRY(1, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
- L3_3_WB),
+ MOCS_ENTRY(I915_MOCS_PTE,
+ LE_0_PAGETABLE | LE_TC_0_PAGETABLE,
+ L3_1_UC),
GEN11_MOCS_ENTRIES,
/* Implicitly enable L1 - HDC:L1 + L3 + LLC */
@@ -280,7 +281,7 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = {
L3_1_UC),
/* Base - L3 + LeCC:PAT (Deprecated) */
MOCS_ENTRY(I915_MOCS_PTE,
- LE_0_PAGETABLE | LE_TC_1_LLC,
+ LE_0_PAGETABLE | LE_TC_0_PAGETABLE,
L3_3_WB),
GEN11_MOCS_ENTRIES
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index ab675d35030d..d7b8e4457fc2 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -56,9 +56,12 @@ static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
static void gen11_rc6_enable(struct intel_rc6 *rc6)
{
- struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct intel_gt *gt = rc6_to_gt(rc6);
+ struct intel_uncore *uncore = gt->uncore;
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ u32 pg_enable;
+ int i;
/* 2b: Program RC6 thresholds.*/
set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
@@ -102,10 +105,19 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
GEN6_RC_CTL_RC6_ENABLE |
GEN6_RC_CTL_EI_MODE(1);
- set(uncore, GEN9_PG_ENABLE,
- GEN9_RENDER_PG_ENABLE |
- GEN9_MEDIA_PG_ENABLE |
- GEN11_MEDIA_SAMPLER_PG_ENABLE);
+ pg_enable =
+ GEN9_RENDER_PG_ENABLE |
+ GEN9_MEDIA_PG_ENABLE |
+ GEN11_MEDIA_SAMPLER_PG_ENABLE;
+
+ if (INTEL_GEN(gt->i915) >= 12) {
+ for (i = 0; i < I915_MAX_VCS; i++)
+ if (HAS_ENGINE(gt, _VCS(i)))
+ pg_enable |= (VDN_HCP_POWERGATE_ENABLE(i) |
+ VDN_MFX_POWERGATE_ENABLE(i));
+ }
+
+ set(uncore, GEN9_PG_ENABLE, pg_enable);
}
static void gen9_rc6_enable(struct intel_rc6 *rc6)
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index ac36b67fb46b..9fb4306b2900 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -19,6 +19,7 @@
#include "intel_engine_pm.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
#include "intel_reset.h"
#include "uc/intel_guc.h"
@@ -1250,7 +1251,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
engine_mask &= gt->info.engine_mask;
if (flags & I915_ERROR_CAPTURE) {
- i915_capture_error_state(gt->i915);
+ i915_capture_error_state(gt, engine_mask);
intel_gt_clear_error_registers(gt, engine_mask);
}
@@ -1370,6 +1371,7 @@ void intel_gt_set_wedged_on_fini(struct intel_gt *gt)
{
intel_gt_set_wedged(gt);
set_bit(I915_WEDGED_ON_FINI, &gt->reset.flags);
+ intel_gt_retire_requests(gt); /* cleanup any wedged requests */
}
void intel_gt_init_reset(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 16b48e72c369..a41b43f445b8 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -444,6 +444,7 @@ static void reset_cancel(struct intel_engine_cs *engine)
i915_request_set_error_once(request, -EIO);
i915_request_mark_complete(request);
}
+ intel_engine_signal_breadcrumbs(engine);
/* Remaining _unready_ requests will be nop'ed when submitted */
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index 466ec671b379..0d88f17799ff 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1973,7 +1973,7 @@ static struct drm_i915_private *mchdev_get(void)
rcu_read_lock();
i915 = rcu_dereference(ips_mchdev);
- if (!kref_get_unless_zero(&i915->drm.ref))
+ if (i915 && !kref_get_unless_zero(&i915->drm.ref))
i915 = NULL;
rcu_read_unlock();
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index dfd1cfb8a7ec..2f830017c51d 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -245,18 +245,39 @@ static void mock_reset_rewind(struct intel_engine_cs *engine, bool stalled)
GEM_BUG_ON(stalled);
}
+static void mark_eio(struct i915_request *rq)
+{
+ if (i915_request_completed(rq))
+ return;
+
+ GEM_BUG_ON(i915_request_signaled(rq));
+
+ i915_request_set_error_once(rq, -EIO);
+ i915_request_mark_complete(rq);
+}
+
static void mock_reset_cancel(struct intel_engine_cs *engine)
{
- struct i915_request *request;
+ struct mock_engine *mock =
+ container_of(engine, typeof(*mock), base);
+ struct i915_request *rq;
unsigned long flags;
+ del_timer_sync(&mock->hw_delay);
+
spin_lock_irqsave(&engine->active.lock, flags);
/* Mark all submitted requests as skipped. */
- list_for_each_entry(request, &engine->active.requests, sched.link) {
- i915_request_set_error_once(request, -EIO);
- i915_request_mark_complete(request);
+ list_for_each_entry(rq, &engine->active.requests, sched.link)
+ mark_eio(rq);
+ intel_engine_signal_breadcrumbs(engine);
+
+ /* Cancel and submit all pending requests. */
+ list_for_each_entry(rq, &mock->hw_queue, mock.link) {
+ mark_eio(rq);
+ __i915_request_submit(rq);
}
+ INIT_LIST_HEAD(&mock->hw_queue);
spin_unlock_irqrestore(&engine->active.lock, flags);
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
index e73854dd2fe0..b88aa35ad75b 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -215,16 +215,17 @@ static int __live_heartbeat_fast(struct intel_engine_cs *engine)
goto err_pm;
for (i = 0; i < ARRAY_SIZE(times); i++) {
- /* Manufacture a tick */
do {
- while (READ_ONCE(engine->heartbeat.systole))
- flush_delayed_work(&engine->heartbeat.work);
+ /* Manufacture a tick */
+ intel_engine_park_heartbeat(engine);
+ GEM_BUG_ON(engine->heartbeat.systole);
+ engine->serial++; /* pretend we are not idle! */
+ intel_engine_unpark_heartbeat(engine);
- engine->serial++; /* quick, pretend we are not idle! */
flush_delayed_work(&engine->heartbeat.work);
if (!delayed_work_pending(&engine->heartbeat.work)) {
- pr_err("%s: heartbeat did not start\n",
- engine->name);
+ pr_err("%s: heartbeat %d did not start\n",
+ engine->name, i);
err = -EINVAL;
goto err_pm;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index 3540ba9bd459..aa5675ecb5cc 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -219,7 +219,7 @@ int live_rps_clock_interval(void *arg)
struct igt_spinner spin;
int err = 0;
- if (!intel_rps_is_enabled(rps))
+ if (!intel_rps_is_enabled(rps) || INTEL_GEN(gt->i915) < 6)
return 0;
if (igt_spinner_init(&spin, gt))
@@ -1028,7 +1028,7 @@ int live_rps_interrupt(void *arg)
* First, let's check whether or not we are receiving interrupts.
*/
- if (!intel_rps_has_interrupts(rps))
+ if (!intel_rps_has_interrupts(rps) || INTEL_GEN(gt->i915) < 6)
return 0;
intel_gt_pm_get(gt);
@@ -1133,7 +1133,7 @@ int live_rps_power(void *arg)
* that theory.
*/
- if (!intel_rps_is_enabled(rps))
+ if (!intel_rps_is_enabled(rps) || INTEL_GEN(gt->i915) < 6)
return 0;
if (!librapl_energy_uJ())
@@ -1237,7 +1237,7 @@ int live_rps_dynamic(void *arg)
* moving parts into dynamic reclocking based on load.
*/
- if (!intel_rps_is_enabled(rps))
+ if (!intel_rps_is_enabled(rps) || INTEL_GEN(gt->i915) < 6)
return 0;
if (igt_spinner_init(&spin, gt))
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index 19c2cb166e7c..2edf2b15885f 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -17,8 +17,9 @@
#include "../selftests/i915_random.h"
#include "../i915_selftest.h"
-#include "../selftests/igt_flush_test.h"
-#include "../selftests/mock_gem_device.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/lib_sw_fence.h"
+#include "selftests/mock_gem_device.h"
#include "selftests/mock_timeline.h"
static struct page *hwsp_page(struct intel_timeline *tl)
@@ -755,6 +756,378 @@ out_free:
return err;
}
+static int emit_read_hwsp(struct i915_request *rq,
+ u32 seqno, u32 hwsp,
+ u32 *addr)
+{
+ const u32 gpr = i915_mmio_reg_offset(GEN8_RING_CS_GPR(rq->engine->mmio_base, 0));
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 12);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = *addr;
+ *cs++ = 0;
+ *cs++ = seqno;
+ *addr += 4;
+
+ *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+ *cs++ = gpr;
+ *cs++ = hwsp;
+ *cs++ = 0;
+
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+ *cs++ = gpr;
+ *cs++ = *addr;
+ *cs++ = 0;
+ *addr += 4;
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+struct hwsp_watcher {
+ struct i915_vma *vma;
+ struct i915_request *rq;
+ u32 addr;
+ u32 *map;
+};
+
+static bool cmp_lt(u32 a, u32 b)
+{
+ return a < b;
+}
+
+static bool cmp_gte(u32 a, u32 b)
+{
+ return a >= b;
+}
+
+static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+
+ obj = i915_gem_object_create_internal(gt->i915, SZ_2M);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ w->map = i915_gem_object_pin_map(obj, I915_MAP_WB);
+ if (IS_ERR(w->map)) {
+ i915_gem_object_put(obj);
+ return PTR_ERR(w->map);
+ }
+
+ vma = i915_gem_object_ggtt_pin_ww(obj, NULL, NULL, 0, 0, 0);
+ if (IS_ERR(vma)) {
+ i915_gem_object_put(obj);
+ return PTR_ERR(vma);
+ }
+
+ w->vma = vma;
+ w->addr = i915_ggtt_offset(vma);
+ return 0;
+}
+
+static int create_watcher(struct hwsp_watcher *w,
+ struct intel_engine_cs *engine,
+ int ringsz)
+{
+ struct intel_context *ce;
+ struct intel_timeline *tl;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ ce->ring = __intel_context_ring_size(ringsz);
+ w->rq = intel_context_create_request(ce);
+ intel_context_put(ce);
+ if (IS_ERR(w->rq))
+ return PTR_ERR(w->rq);
+
+ w->addr = i915_ggtt_offset(w->vma);
+ tl = w->rq->context->timeline;
+
+ /* some light mutex juggling required; think co-routines */
+ lockdep_unpin_lock(&tl->mutex, w->rq->cookie);
+ mutex_unlock(&tl->mutex);
+
+ return 0;
+}
+
+static int check_watcher(struct hwsp_watcher *w, const char *name,
+ bool (*op)(u32 hwsp, u32 seqno))
+{
+ struct i915_request *rq = fetch_and_zero(&w->rq);
+ struct intel_timeline *tl = rq->context->timeline;
+ u32 offset, end;
+ int err;
+
+ GEM_BUG_ON(w->addr - i915_ggtt_offset(w->vma) > w->vma->size);
+
+ i915_request_get(rq);
+ mutex_lock(&tl->mutex);
+ rq->cookie = lockdep_pin_lock(&tl->mutex);
+ i915_request_add(rq);
+
+ if (i915_request_wait(rq, 0, HZ) < 0) {
+ err = -ETIME;
+ goto out;
+ }
+
+ err = 0;
+ offset = 0;
+ end = (w->addr - i915_ggtt_offset(w->vma)) / sizeof(*w->map);
+ while (offset < end) {
+ if (!op(w->map[offset + 1], w->map[offset])) {
+ pr_err("Watcher '%s' found HWSP value %x for seqno %x\n",
+ name, w->map[offset + 1], w->map[offset]);
+ err = -EINVAL;
+ }
+
+ offset += 2;
+ }
+
+out:
+ i915_request_put(rq);
+ return err;
+}
+
+static void cleanup_watcher(struct hwsp_watcher *w)
+{
+ if (w->rq) {
+ struct intel_timeline *tl = w->rq->context->timeline;
+
+ mutex_lock(&tl->mutex);
+ w->rq->cookie = lockdep_pin_lock(&tl->mutex);
+
+ i915_request_add(w->rq);
+ }
+
+ i915_vma_unpin_and_release(&w->vma, I915_VMA_RELEASE_MAP);
+}
+
+static bool retire_requests(struct intel_timeline *tl)
+{
+ struct i915_request *rq, *rn;
+
+ mutex_lock(&tl->mutex);
+ list_for_each_entry_safe(rq, rn, &tl->requests, link)
+ if (!i915_request_retire(rq))
+ break;
+ mutex_unlock(&tl->mutex);
+
+ return !i915_active_fence_isset(&tl->last_request);
+}
+
+static struct i915_request *wrap_timeline(struct i915_request *rq)
+{
+ struct intel_context *ce = rq->context;
+ struct intel_timeline *tl = ce->timeline;
+ u32 seqno = rq->fence.seqno;
+
+ while (tl->seqno >= seqno) { /* Cause a wrap */
+ i915_request_put(rq);
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ return rq;
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+ }
+
+ i915_request_put(rq);
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ return rq;
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ return rq;
+}
+
+static int live_hwsp_read(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct hwsp_watcher watcher[2] = {};
+ struct intel_engine_cs *engine;
+ struct intel_timeline *tl;
+ enum intel_engine_id id;
+ int err = 0;
+ int i;
+
+ /*
+ * If we take a reference to the HWSP for reading on the GPU, that
+ * read may be arbitrarily delayed (either by foreign fence or
+ * priority saturation) and a wrap can happen within 30 minutes.
+ * When the GPU read is finally submitted it should be correct,
+ * even across multiple wraps.
+ */
+
+ if (INTEL_GEN(gt->i915) < 8) /* CS convenience [SRM/LRM] */
+ return 0;
+
+ tl = intel_timeline_create(gt);
+ if (IS_ERR(tl))
+ return PTR_ERR(tl);
+
+ if (!tl->hwsp_cacheline)
+ goto out_free;
+
+ for (i = 0; i < ARRAY_SIZE(watcher); i++) {
+ err = setup_watcher(&watcher[i], gt);
+ if (err)
+ goto out;
+ }
+
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce;
+ unsigned long count = 0;
+ IGT_TIMEOUT(end_time);
+
+ /* Create a request we can use for remote reading of the HWSP */
+ err = create_watcher(&watcher[1], engine, SZ_512K);
+ if (err)
+ goto out;
+
+ do {
+ struct i915_sw_fence *submit;
+ struct i915_request *rq;
+ u32 hwsp;
+
+ submit = heap_fence_create(GFP_KERNEL);
+ if (!submit) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = create_watcher(&watcher[0], engine, SZ_4K);
+ if (err)
+ goto out;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto out;
+ }
+
+ /* Skip to the end, saving 30 minutes of nops */
+ tl->seqno = -10u + 2 * (count & 3);
+ WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
+ ce->timeline = intel_timeline_get(tl);
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ intel_context_put(ce);
+ goto out;
+ }
+
+ err = i915_sw_fence_await_dma_fence(&rq->submit,
+ &watcher[0].rq->fence, 0,
+ GFP_KERNEL);
+ if (err < 0) {
+ i915_request_add(rq);
+ intel_context_put(ce);
+ goto out;
+ }
+
+ mutex_lock(&watcher[0].rq->context->timeline->mutex);
+ err = intel_timeline_read_hwsp(rq, watcher[0].rq, &hwsp);
+ if (err == 0)
+ err = emit_read_hwsp(watcher[0].rq, /* before */
+ rq->fence.seqno, hwsp,
+ &watcher[0].addr);
+ mutex_unlock(&watcher[0].rq->context->timeline->mutex);
+ if (err) {
+ i915_request_add(rq);
+ intel_context_put(ce);
+ goto out;
+ }
+
+ mutex_lock(&watcher[1].rq->context->timeline->mutex);
+ err = intel_timeline_read_hwsp(rq, watcher[1].rq, &hwsp);
+ if (err == 0)
+ err = emit_read_hwsp(watcher[1].rq, /* after */
+ rq->fence.seqno, hwsp,
+ &watcher[1].addr);
+ mutex_unlock(&watcher[1].rq->context->timeline->mutex);
+ if (err) {
+ i915_request_add(rq);
+ intel_context_put(ce);
+ goto out;
+ }
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ rq = wrap_timeline(rq);
+ intel_context_put(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out;
+ }
+
+ err = i915_sw_fence_await_dma_fence(&watcher[1].rq->submit,
+ &rq->fence, 0,
+ GFP_KERNEL);
+ if (err < 0) {
+ i915_request_put(rq);
+ goto out;
+ }
+
+ err = check_watcher(&watcher[0], "before", cmp_lt);
+ i915_sw_fence_commit(submit);
+ heap_fence_put(submit);
+ if (err) {
+ i915_request_put(rq);
+ goto out;
+ }
+ count++;
+
+ if (8 * watcher[1].rq->ring->emit >
+ 3 * watcher[1].rq->ring->size) {
+ i915_request_put(rq);
+ break;
+ }
+
+ /* Flush the timeline before manually wrapping again */
+ if (i915_request_wait(rq,
+ I915_WAIT_INTERRUPTIBLE,
+ HZ) < 0) {
+ err = -ETIME;
+ i915_request_put(rq);
+ goto out;
+ }
+
+ retire_requests(tl);
+ i915_request_put(rq);
+ } while (!__igt_timeout(end_time, NULL));
+ WRITE_ONCE(*(u32 *)tl->hwsp_seqno, 0xdeadbeef);
+
+ pr_info("%s: simulated %lu wraps\n", engine->name, count);
+ err = check_watcher(&watcher[1], "after", cmp_gte);
+ if (err)
+ goto out;
+ }
+
+out:
+ for (i = 0; i < ARRAY_SIZE(watcher); i++)
+ cleanup_watcher(&watcher[i]);
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+
+out_free:
+ intel_timeline_put(tl);
+ return err;
+}
+
static int live_hwsp_rollover_kernel(void *arg)
{
struct intel_gt *gt = arg;
@@ -998,6 +1371,7 @@ int intel_timeline_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_hwsp_engine),
SUBTEST(live_hwsp_alternate),
SUBTEST(live_hwsp_wrap),
+ SUBTEST(live_hwsp_read),
SUBTEST(live_hwsp_rollover_kernel),
SUBTEST(live_hwsp_rollover_user),
};
diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c
index 535cc1169e54..967031056202 100644
--- a/drivers/gpu/drm/i915/gt/sysfs_engines.c
+++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c
@@ -79,14 +79,12 @@ static ssize_t repr_trim(char *buf, ssize_t len)
static ssize_t
__caps_show(struct intel_engine_cs *engine,
- u32 caps, char *buf, bool show_unknown)
+ unsigned long caps, char *buf, bool show_unknown)
{
const char * const *repr;
int count, n;
ssize_t len;
- BUILD_BUG_ON(!typecheck(typeof(caps), engine->uabi_capabilities));
-
switch (engine->class) {
case VIDEO_DECODE_CLASS:
repr = vcs_caps;
@@ -103,12 +101,10 @@ __caps_show(struct intel_engine_cs *engine,
count = 0;
break;
}
- GEM_BUG_ON(count > BITS_PER_TYPE(typeof(caps)));
+ GEM_BUG_ON(count > BITS_PER_LONG);
len = 0;
- for_each_set_bit(n,
- (unsigned long *)&caps,
- show_unknown ? BITS_PER_TYPE(typeof(caps)) : count) {
+ for_each_set_bit(n, &caps, show_unknown ? BITS_PER_LONG : count) {
if (n >= count || !repr[n]) {
if (GEM_WARN_ON(show_unknown))
len += snprintf(buf + len, PAGE_SIZE - len,
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index e4aaa5f29796..2a343a977987 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -213,23 +213,6 @@ static u32 guc_ctl_feature_flags(struct intel_guc *guc)
return flags;
}
-static u32 guc_ctl_ctxinfo_flags(struct intel_guc *guc)
-{
- u32 flags = 0;
-
- if (intel_guc_submission_is_used(guc)) {
- u32 ctxnum, base;
-
- base = intel_guc_ggtt_offset(guc, guc->stage_desc_pool);
- ctxnum = GUC_MAX_STAGE_DESCRIPTORS / 16;
-
- base >>= PAGE_SHIFT;
- flags |= (base << GUC_CTL_BASE_ADDR_SHIFT) |
- (ctxnum << GUC_CTL_CTXNUM_IN16_SHIFT);
- }
- return flags;
-}
-
static u32 guc_ctl_log_params_flags(struct intel_guc *guc)
{
u32 offset = intel_guc_ggtt_offset(guc, guc->log.vma) >> PAGE_SHIFT;
@@ -291,7 +274,6 @@ static void guc_init_params(struct intel_guc *guc)
BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32));
- params[GUC_CTL_CTXINFO] = guc_ctl_ctxinfo_flags(guc);
params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc);
params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc);
params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index d44061033f23..5212ff844292 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -10,11 +10,52 @@
/*
* The Additional Data Struct (ADS) has pointers for different buffers used by
- * the GuC. One single gem object contains the ADS struct itself (guc_ads), the
- * scheduling policies (guc_policies), a structure describing a collection of
- * register sets (guc_mmio_reg_state) and some extra pages for the GuC to save
- * its internal state for sleep.
+ * the GuC. One single gem object contains the ADS struct itself (guc_ads) and
+ * all the extra buffers indirectly linked via the ADS struct's entries.
+ *
+ * Layout of the ADS blob allocated for the GuC:
+ *
+ * +---------------------------------------+ <== base
+ * | guc_ads |
+ * +---------------------------------------+
+ * | guc_policies |
+ * +---------------------------------------+
+ * | guc_gt_system_info |
+ * +---------------------------------------+
+ * | guc_clients_info |
+ * +---------------------------------------+
+ * | guc_ct_pool_entry[size] |
+ * +---------------------------------------+
+ * | padding |
+ * +---------------------------------------+ <== 4K aligned
+ * | private data |
+ * +---------------------------------------+
+ * | padding |
+ * +---------------------------------------+ <== 4K aligned
*/
+struct __guc_ads_blob {
+ struct guc_ads ads;
+ struct guc_policies policies;
+ struct guc_gt_system_info system_info;
+ struct guc_clients_info clients_info;
+ struct guc_ct_pool_entry ct_pool[GUC_CT_POOL_SIZE];
+} __packed;
+
+static u32 guc_ads_private_data_size(struct intel_guc *guc)
+{
+ return PAGE_ALIGN(guc->fw.private_data_size);
+}
+
+static u32 guc_ads_private_data_offset(struct intel_guc *guc)
+{
+ return PAGE_ALIGN(sizeof(struct __guc_ads_blob));
+}
+
+static u32 guc_ads_blob_size(struct intel_guc *guc)
+{
+ return guc_ads_private_data_offset(guc) +
+ guc_ads_private_data_size(guc);
+}
static void guc_policy_init(struct guc_policy *policy)
{
@@ -48,26 +89,37 @@ static void guc_ct_pool_entries_init(struct guc_ct_pool_entry *pool, u32 num)
memset(pool, 0, num * sizeof(*pool));
}
+static void guc_mapping_table_init(struct intel_gt *gt,
+ struct guc_gt_system_info *system_info)
+{
+ unsigned int i, j;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ /* Table must be set to invalid values for entries not used */
+ for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i)
+ for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j)
+ system_info->mapping_table[i][j] =
+ GUC_MAX_INSTANCES_PER_CLASS;
+
+ for_each_engine(engine, gt, id) {
+ u8 guc_class = engine->class;
+
+ system_info->mapping_table[guc_class][engine->instance] =
+ engine->instance;
+ }
+}
+
/*
* The first 80 dwords of the register state context, containing the
* execlists and ppgtt registers.
*/
#define LR_HW_CONTEXT_SIZE (80 * sizeof(u32))
-/* The ads obj includes the struct itself and buffers passed to GuC */
-struct __guc_ads_blob {
- struct guc_ads ads;
- struct guc_policies policies;
- struct guc_mmio_reg_state reg_state;
- struct guc_gt_system_info system_info;
- struct guc_clients_info clients_info;
- struct guc_ct_pool_entry ct_pool[GUC_CT_POOL_SIZE];
- u8 reg_state_buffer[GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE];
-} __packed;
-
static void __guc_ads_init(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
+ struct drm_i915_private *i915 = gt->i915;
struct __guc_ads_blob *blob = guc->ads_blob;
const u32 skipped_size = LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE;
u32 base;
@@ -99,13 +151,25 @@ static void __guc_ads_init(struct intel_guc *guc)
}
/* System info */
- blob->system_info.slice_enabled = hweight8(gt->info.sseu.slice_mask);
- blob->system_info.rcs_enabled = 1;
- blob->system_info.bcs_enabled = 1;
+ blob->system_info.engine_enabled_masks[RENDER_CLASS] = 1;
+ blob->system_info.engine_enabled_masks[COPY_ENGINE_CLASS] = 1;
+ blob->system_info.engine_enabled_masks[VIDEO_DECODE_CLASS] = VDBOX_MASK(gt);
+ blob->system_info.engine_enabled_masks[VIDEO_ENHANCEMENT_CLASS] = VEBOX_MASK(gt);
+
+ blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED] =
+ hweight8(gt->info.sseu.slice_mask);
+ blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK] =
+ gt->info.vdbox_sfc_access;
+
+ if (INTEL_GEN(i915) >= 12 && !IS_DGFX(i915)) {
+ u32 distdbreg = intel_uncore_read(gt->uncore,
+ GEN12_DIST_DBS_POPULATED);
+ blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI] =
+ ((distdbreg >> GEN12_DOORBELLS_PER_SQIDI_SHIFT) &
+ GEN12_DOORBELLS_PER_SQIDI) + 1;
+ }
- blob->system_info.vdbox_enable_mask = VDBOX_MASK(gt);
- blob->system_info.vebox_enable_mask = VEBOX_MASK(gt);
- blob->system_info.vdbox_sfc_support_mask = gt->info.vdbox_sfc_access;
+ guc_mapping_table_init(guc_to_gt(guc), &blob->system_info);
base = intel_guc_ggtt_offset(guc, guc->ads_vma);
@@ -118,11 +182,12 @@ static void __guc_ads_init(struct intel_guc *guc)
/* ADS */
blob->ads.scheduler_policies = base + ptr_offset(blob, policies);
- blob->ads.reg_state_buffer = base + ptr_offset(blob, reg_state_buffer);
- blob->ads.reg_state_addr = base + ptr_offset(blob, reg_state);
blob->ads.gt_system_info = base + ptr_offset(blob, system_info);
blob->ads.clients_info = base + ptr_offset(blob, clients_info);
+ /* Private Data */
+ blob->ads.private_data = base + guc_ads_private_data_offset(guc);
+
i915_gem_object_flush_map(guc->ads_vma->obj);
}
@@ -135,14 +200,15 @@ static void __guc_ads_init(struct intel_guc *guc)
*/
int intel_guc_ads_create(struct intel_guc *guc)
{
- const u32 size = PAGE_ALIGN(sizeof(struct __guc_ads_blob));
+ u32 size;
int ret;
GEM_BUG_ON(guc->ads_vma);
+ size = guc_ads_blob_size(guc);
+
ret = intel_guc_allocate_and_map_vma(guc, size, &guc->ads_vma,
(void **)&guc->ads_blob);
-
if (ret)
return ret;
@@ -154,6 +220,19 @@ int intel_guc_ads_create(struct intel_guc *guc)
void intel_guc_ads_destroy(struct intel_guc *guc)
{
i915_vma_unpin_and_release(&guc->ads_vma, I915_VMA_RELEASE_MAP);
+ guc->ads_blob = NULL;
+}
+
+static void guc_ads_private_data_reset(struct intel_guc *guc)
+{
+ u32 size;
+
+ size = guc_ads_private_data_size(guc);
+ if (!size)
+ return;
+
+ memset((void *)guc->ads_blob + guc_ads_private_data_offset(guc), 0,
+ size);
}
/**
@@ -168,5 +247,8 @@ void intel_guc_ads_reset(struct intel_guc *guc)
{
if (!guc->ads_vma)
return;
+
__guc_ads_init(guc);
+
+ guc_ads_private_data_reset(guc);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 11742fca0e9e..fa9e048cc65f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -210,6 +210,7 @@ void intel_guc_ct_fini(struct intel_guc_ct *ct)
GEM_BUG_ON(ct->enabled);
i915_vma_unpin_and_release(&ct->vma, I915_VMA_RELEASE_MAP);
+ memset(ct, 0, sizeof(*ct));
}
/**
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index d4a87f4c9421..f9d0907ea1a5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -76,6 +76,7 @@ static inline bool guc_ready(struct intel_uncore *uncore, u32 *status)
static int guc_wait_ucode(struct intel_uncore *uncore)
{
+ struct drm_device *drm = &uncore->i915->drm;
u32 status;
int ret;
@@ -90,15 +91,27 @@ static int guc_wait_ucode(struct intel_uncore *uncore)
ret = wait_for(guc_ready(uncore, &status), 100);
DRM_DEBUG_DRIVER("GuC status %#x\n", status);
- if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
- DRM_ERROR("GuC firmware signature verification failed\n");
- ret = -ENOEXEC;
- }
-
- if ((status & GS_UKERNEL_MASK) == GS_UKERNEL_EXCEPTION) {
- DRM_ERROR("GuC firmware exception. EIP: %#x\n",
- intel_uncore_read(uncore, SOFT_SCRATCH(13)));
- ret = -ENXIO;
+ if (ret) {
+ drm_err(drm, "GuC load failed: status = 0x%08X\n", status);
+ drm_err(drm, "GuC load failed: status: Reset = %d, "
+ "BootROM = 0x%02X, UKernel = 0x%02X, "
+ "MIA = 0x%02X, Auth = 0x%02X\n",
+ REG_FIELD_GET(GS_MIA_IN_RESET, status),
+ REG_FIELD_GET(GS_BOOTROM_MASK, status),
+ REG_FIELD_GET(GS_UKERNEL_MASK, status),
+ REG_FIELD_GET(GS_MIA_MASK, status),
+ REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
+
+ if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
+ drm_err(drm, "GuC firmware signature verification failed\n");
+ ret = -ENOEXEC;
+ }
+
+ if ((status & GS_UKERNEL_MASK) == GS_UKERNEL_EXCEPTION) {
+ drm_err(drm, "GuC firmware exception. EIP: %#x\n",
+ intel_uncore_read(uncore, SOFT_SCRATCH(13)));
+ ret = -ENXIO;
+ }
}
return ret;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index a6b733c146c9..79c560d9c0b6 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -26,8 +26,8 @@
#define GUC_VIDEO_ENGINE2 4
#define GUC_MAX_ENGINES_NUM (GUC_VIDEO_ENGINE2 + 1)
-#define GUC_MAX_ENGINE_CLASSES 5
-#define GUC_MAX_INSTANCES_PER_CLASS 16
+#define GUC_MAX_ENGINE_CLASSES 16
+#define GUC_MAX_INSTANCES_PER_CLASS 32
#define GUC_DOORBELL_INVALID 256
@@ -62,12 +62,7 @@
#define GUC_STAGE_DESC_ATTR_PCH BIT(6)
#define GUC_STAGE_DESC_ATTR_TERMINATED BIT(7)
-/* New GuC control data */
-#define GUC_CTL_CTXINFO 0
-#define GUC_CTL_CTXNUM_IN16_SHIFT 0
-#define GUC_CTL_BASE_ADDR_SHIFT 12
-
-#define GUC_CTL_LOG_PARAMS 1
+#define GUC_CTL_LOG_PARAMS 0
#define GUC_LOG_VALID (1 << 0)
#define GUC_LOG_NOTIFY_ON_HALF_FULL (1 << 1)
#define GUC_LOG_ALLOC_IN_MEGABYTE (1 << 3)
@@ -79,11 +74,11 @@
#define GUC_LOG_ISR_MASK (0x7 << GUC_LOG_ISR_SHIFT)
#define GUC_LOG_BUF_ADDR_SHIFT 12
-#define GUC_CTL_WA 2
-#define GUC_CTL_FEATURE 3
+#define GUC_CTL_WA 1
+#define GUC_CTL_FEATURE 2
#define GUC_CTL_DISABLE_SCHEDULER (1 << 14)
-#define GUC_CTL_DEBUG 4
+#define GUC_CTL_DEBUG 3
#define GUC_LOG_VERBOSITY_SHIFT 0
#define GUC_LOG_VERBOSITY_LOW (0 << GUC_LOG_VERBOSITY_SHIFT)
#define GUC_LOG_VERBOSITY_MED (1 << GUC_LOG_VERBOSITY_SHIFT)
@@ -97,12 +92,37 @@
#define GUC_LOG_DISABLED (1 << 6)
#define GUC_PROFILE_ENABLED (1 << 7)
-#define GUC_CTL_ADS 5
+#define GUC_CTL_ADS 4
#define GUC_ADS_ADDR_SHIFT 1
#define GUC_ADS_ADDR_MASK (0xFFFFF << GUC_ADS_ADDR_SHIFT)
#define GUC_CTL_MAX_DWORDS (SOFT_SCRATCH_COUNT - 2) /* [1..14] */
+/* Generic GT SysInfo data types */
+#define GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED 0
+#define GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK 1
+#define GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI 2
+#define GUC_GENERIC_GT_SYSINFO_MAX 16
+
+/*
+ * The class goes in bits [0..2] of the GuC ID, the instance in bits [3..6].
+ * Bit 7 can be used for operations that apply to all engine classes&instances.
+ */
+#define GUC_ENGINE_CLASS_SHIFT 0
+#define GUC_ENGINE_CLASS_MASK (0x7 << GUC_ENGINE_CLASS_SHIFT)
+#define GUC_ENGINE_INSTANCE_SHIFT 3
+#define GUC_ENGINE_INSTANCE_MASK (0xf << GUC_ENGINE_INSTANCE_SHIFT)
+#define GUC_ENGINE_ALL_INSTANCES BIT(7)
+
+#define MAKE_GUC_ID(class, instance) \
+ (((class) << GUC_ENGINE_CLASS_SHIFT) | \
+ ((instance) << GUC_ENGINE_INSTANCE_SHIFT))
+
+#define GUC_ID_TO_ENGINE_CLASS(guc_id) \
+ (((guc_id) & GUC_ENGINE_CLASS_MASK) >> GUC_ENGINE_CLASS_SHIFT)
+#define GUC_ID_TO_ENGINE_INSTANCE(guc_id) \
+ (((guc_id) & GUC_ENGINE_INSTANCE_MASK) >> GUC_ENGINE_INSTANCE_SHIFT)
+
/* Work item for submitting workloads into work queue of GuC. */
struct guc_wq_item {
u32 header;
@@ -336,11 +356,6 @@ struct guc_policies {
} __packed;
/* GuC MMIO reg state struct */
-
-
-#define GUC_REGSET_MAX_REGISTERS 64
-#define GUC_S3_SAVE_SPACE_PAGES 10
-
struct guc_mmio_reg {
u32 offset;
u32 value;
@@ -348,28 +363,18 @@ struct guc_mmio_reg {
#define GUC_REGSET_MASKED (1 << 0)
} __packed;
-struct guc_mmio_regset {
- struct guc_mmio_reg registers[GUC_REGSET_MAX_REGISTERS];
- u32 values_valid;
- u32 number_of_registers;
-} __packed;
-
/* GuC register sets */
-struct guc_mmio_reg_state {
- struct guc_mmio_regset engine_reg[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
- u32 reserved[98];
+struct guc_mmio_reg_set {
+ u32 address;
+ u16 count;
+ u16 reserved;
} __packed;
/* HW info */
struct guc_gt_system_info {
- u32 slice_enabled;
- u32 rcs_enabled;
- u32 reserved0;
- u32 bcs_enabled;
- u32 vdbox_enable_mask;
- u32 vdbox_sfc_support_mask;
- u32 vebox_enable_mask;
- u32 reserved[9];
+ u8 mapping_table[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+ u32 engine_enabled_masks[GUC_MAX_ENGINE_CLASSES];
+ u32 generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_MAX];
} __packed;
/* Clients info */
@@ -390,15 +395,16 @@ struct guc_clients_info {
/* GuC Additional Data Struct */
struct guc_ads {
- u32 reg_state_addr;
- u32 reg_state_buffer;
+ struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+ u32 reserved0;
u32 scheduler_policies;
u32 gt_system_info;
u32 clients_info;
u32 control_data;
u32 golden_context_lrca[GUC_MAX_ENGINE_CLASSES];
u32 eng_state_size[GUC_MAX_ENGINE_CLASSES];
- u32 reserved[16];
+ u32 private_data;
+ u32 reserved[15];
} __packed;
/* GuC logging structures */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
index 1949346e714e..b37fc2ffaef2 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
@@ -118,6 +118,11 @@ struct guc_doorbell_info {
#define GEN8_DRB_VALID (1<<0)
#define GEN8_DRBREGU(x) _MMIO(0x1000 + (x) * 8 + 4)
+#define GEN12_DIST_DBS_POPULATED _MMIO(0xd08)
+#define GEN12_DOORBELLS_PER_SQIDI_SHIFT 16
+#define GEN12_DOORBELLS_PER_SQIDI (0xff)
+#define GEN12_SQIDIS_DOORBELL_EXIST (0xffff)
+
#define DE_GUCRMR _MMIO(0x44054)
#define GUC_BCS_RCS_IER _MMIO(0xC550)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index d6f55f70889d..4e6070e95fe9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -231,13 +231,15 @@ static int guc_enable_communication(struct intel_guc *guc)
intel_guc_ct_event_handler(&guc->ct);
spin_unlock_irq(&i915->irq_lock);
- DRM_INFO("GuC communication enabled\n");
+ drm_dbg(&i915->drm, "GuC communication enabled\n");
return 0;
}
static void guc_disable_communication(struct intel_guc *guc)
{
+ struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+
/*
* Events generated during or after CT disable are logged by guc in
* via mmio. Make sure the register is clear before disabling CT since
@@ -257,7 +259,7 @@ static void guc_disable_communication(struct intel_guc *guc)
*/
guc_get_mmio_msg(guc);
- DRM_INFO("GuC communication disabled\n");
+ drm_dbg(&i915->drm, "GuC communication disabled\n");
}
static void __uc_fetch_firmwares(struct intel_uc *uc)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 037bcaf3c8b5..180c23e2e25e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -44,24 +44,20 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
* List of required GuC and HuC binaries per-platform.
* Must be ordered based on platform + revid, from newer to older.
*
- * TGL 35.2 is interface-compatible with 33.0 for previous Gens. The deltas
- * between 33.0 and 35.2 are only related to new additions to support new Gen12
- * features.
- *
* Note that RKL uses the same firmware as TGL.
*/
#define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \
- fw_def(ROCKETLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 5, 0)) \
- fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 5, 0)) \
- fw_def(JASPERLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \
- fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \
- fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 9, 0, 0)) \
- fw_def(COMETLAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \
- fw_def(COFFEELAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \
- fw_def(GEMINILAKE, 0, guc_def(glk, 33, 0, 0), huc_def(glk, 4, 0, 0)) \
- fw_def(KABYLAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \
- fw_def(BROXTON, 0, guc_def(bxt, 33, 0, 0), huc_def(bxt, 2, 0, 0)) \
- fw_def(SKYLAKE, 0, guc_def(skl, 33, 0, 0), huc_def(skl, 2, 0, 0))
+ fw_def(ROCKETLAKE, 0, guc_def(tgl, 49, 0, 1), huc_def(tgl, 7, 5, 0)) \
+ fw_def(TIGERLAKE, 0, guc_def(tgl, 49, 0, 1), huc_def(tgl, 7, 5, 0)) \
+ fw_def(JASPERLAKE, 0, guc_def(ehl, 49, 0, 1), huc_def(ehl, 9, 0, 0)) \
+ fw_def(ELKHARTLAKE, 0, guc_def(ehl, 49, 0, 1), huc_def(ehl, 9, 0, 0)) \
+ fw_def(ICELAKE, 0, guc_def(icl, 49, 0, 1), huc_def(icl, 9, 0, 0)) \
+ fw_def(COMETLAKE, 5, guc_def(cml, 49, 0, 1), huc_def(cml, 4, 0, 0)) \
+ fw_def(COFFEELAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \
+ fw_def(GEMINILAKE, 0, guc_def(glk, 49, 0, 1), huc_def(glk, 4, 0, 0)) \
+ fw_def(KABYLAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \
+ fw_def(BROXTON, 0, guc_def(bxt, 49, 0, 1), huc_def(bxt, 2, 0, 0)) \
+ fw_def(SKYLAKE, 0, guc_def(skl, 49, 0, 1), huc_def(skl, 2, 0, 0))
#define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \
"i915/" \
@@ -372,6 +368,9 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
}
}
+ if (uc_fw->type == INTEL_UC_FW_TYPE_GUC)
+ uc_fw->private_data_size = css->private_data_size;
+
obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index 23d3a423ac0f..99bb1fe1af66 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -88,6 +88,8 @@ struct intel_uc_fw {
u32 rsa_size;
u32 ucode_size;
+
+ u32 private_data_size;
};
#ifdef CONFIG_DRM_I915_DEBUG_GUC
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
index 029214cdedd5..e41ffc7a7fbc 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
@@ -69,7 +69,11 @@ struct uc_css_header {
#define CSS_SW_VERSION_UC_MAJOR (0xFF << 16)
#define CSS_SW_VERSION_UC_MINOR (0xFF << 8)
#define CSS_SW_VERSION_UC_PATCH (0xFF << 0)
- u32 reserved[14];
+ u32 reserved0[13];
+ union {
+ u32 private_data_size; /* only applies to GuC */
+ u32 reserved1;
+ };
u32 header_info;
} __packed;
static_assert(sizeof(struct uc_css_header) == 128);
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index e88970256e8e..93265951fdbb 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1452,43 +1452,42 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
* space. Parsing should be faster in some cases this way.
*/
batch_end = cmd + batch_length / sizeof(*batch_end);
- do {
- u32 length;
-
- if (*cmd == MI_BATCH_BUFFER_END)
- break;
-
- desc = find_cmd(engine, *cmd, desc, &default_desc);
- if (!desc) {
- DRM_DEBUG("CMD: Unrecognized command: 0x%08X\n", *cmd);
- ret = -EINVAL;
- break;
- }
+ while (*cmd != MI_BATCH_BUFFER_END) {
+ u32 length = 1;
+
+ if (*cmd != MI_NOOP) { /* MI_NOOP == 0 */
+ desc = find_cmd(engine, *cmd, desc, &default_desc);
+ if (!desc) {
+ DRM_DEBUG("CMD: Unrecognized command: 0x%08X\n", *cmd);
+ ret = -EINVAL;
+ break;
+ }
- if (desc->flags & CMD_DESC_FIXED)
- length = desc->length.fixed;
- else
- length = (*cmd & desc->length.mask) + LENGTH_BIAS;
+ if (desc->flags & CMD_DESC_FIXED)
+ length = desc->length.fixed;
+ else
+ length = (*cmd & desc->length.mask) + LENGTH_BIAS;
- if ((batch_end - cmd) < length) {
- DRM_DEBUG("CMD: Command length exceeds batch length: 0x%08X length=%u batchlen=%td\n",
- *cmd,
- length,
- batch_end - cmd);
- ret = -EINVAL;
- break;
- }
+ if ((batch_end - cmd) < length) {
+ DRM_DEBUG("CMD: Command length exceeds batch length: 0x%08X length=%u batchlen=%td\n",
+ *cmd,
+ length,
+ batch_end - cmd);
+ ret = -EINVAL;
+ break;
+ }
- if (!check_cmd(engine, desc, cmd, length)) {
- ret = -EACCES;
- break;
- }
+ if (!check_cmd(engine, desc, cmd, length)) {
+ ret = -EACCES;
+ break;
+ }
- if (cmd_desc_is(desc, MI_BATCH_BUFFER_START)) {
- ret = check_bbstart(cmd, offset, length, batch_length,
- batch_addr, shadow_addr,
- jump_whitelist);
- break;
+ if (cmd_desc_is(desc, MI_BATCH_BUFFER_START)) {
+ ret = check_bbstart(cmd, offset, length, batch_length,
+ batch_addr, shadow_addr,
+ jump_whitelist);
+ break;
+ }
}
if (!IS_ERR_OR_NULL(jump_whitelist))
@@ -1501,7 +1500,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
ret = -EINVAL;
break;
}
- } while (1);
+ }
if (trampoline) {
/*
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 200f6b86f864..77e76b665098 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -725,7 +725,7 @@ static int i915_gpu_info_open(struct inode *inode, struct file *file)
gpu = NULL;
with_intel_runtime_pm(&i915->runtime_pm, wakeref)
- gpu = i915_gpu_coredump(i915);
+ gpu = i915_gpu_coredump(&i915->gt, ALL_ENGINES);
if (IS_ERR(gpu))
return PTR_ERR(gpu);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d7765b31fbef..d548e10e1600 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1641,8 +1641,7 @@ tgl_revids_get(struct drm_i915_private *dev_priv)
#define HAS_SNOOP(dev_priv) (INTEL_INFO(dev_priv)->has_snoop)
#define HAS_EDRAM(dev_priv) ((dev_priv)->edram_size_mb)
#define HAS_SECURE_BATCHES(dev_priv) (INTEL_GEN(dev_priv) < 6)
-#define HAS_WT(dev_priv) ((IS_HASWELL(dev_priv) || \
- IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv))
+#define HAS_WT(dev_priv) HAS_EDRAM(dev_priv)
#define HWS_NEEDS_PHYSICAL(dev_priv) (INTEL_INFO(dev_priv)->hws_needs_physical)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index bb0c12975f38..58276694c848 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -180,30 +180,6 @@ try_again:
}
static int
-i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
- struct drm_i915_gem_pwrite *args,
- struct drm_file *file)
-{
- void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset;
- char __user *user_data = u64_to_user_ptr(args->data_ptr);
-
- /*
- * We manually control the domain here and pretend that it
- * remains coherent i.e. in the GTT domain, like shmem_pwrite.
- */
- i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
-
- if (copy_from_user(vaddr, user_data, args->size))
- return -EFAULT;
-
- drm_clflush_virt_range(vaddr, args->size);
- intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt);
-
- i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
- return 0;
-}
-
-static int
i915_gem_create(struct drm_file *file,
struct intel_memory_region *mr,
u64 *size_p,
@@ -527,6 +503,12 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
trace_i915_gem_object_pread(obj, args->offset, args->size);
+ ret = -ENODEV;
+ if (obj->ops->pread)
+ ret = obj->ops->pread(obj, args);
+ if (ret != -ENODEV)
+ goto out;
+
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT);
@@ -866,8 +848,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
if (ret == -EFAULT || ret == -ENOSPC) {
if (i915_gem_object_has_struct_page(obj))
ret = i915_gem_shmem_pwrite(obj, args);
- else
- ret = i915_gem_phys_pwrite(obj, args, file);
}
i915_gem_object_unpin_pages(obj);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index cf6e47adfde6..d8cac4c5881f 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -570,6 +570,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
ee->vm_info.pp_dir_base);
}
}
+ err_printf(m, " hung: %u\n", ee->hung);
err_printf(m, " engine reset count: %u\n", ee->reset_count);
for (n = 0; n < ee->num_ports; n++) {
@@ -1026,6 +1027,7 @@ i915_vma_coredump_create(const struct intel_gt *gt,
dma_addr_t dma;
for_each_sgt_daddr(dma, iter, vma->pages) {
+ mutex_lock(&ggtt->error_mutex);
ggtt->vm.insert_page(&ggtt->vm, dma, slot,
I915_CACHE_NONE, 0);
mb();
@@ -1035,6 +1037,10 @@ i915_vma_coredump_create(const struct intel_gt *gt,
(void __force *)s, dst,
true);
io_mapping_unmap(s);
+
+ mb();
+ ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE);
+ mutex_unlock(&ggtt->error_mutex);
if (ret)
break;
}
@@ -1451,6 +1457,7 @@ capture_engine(struct intel_engine_cs *engine,
static void
gt_record_engines(struct intel_gt_coredump *gt,
+ intel_engine_mask_t engine_mask,
struct i915_vma_compress *compress)
{
struct intel_engine_cs *engine;
@@ -1466,6 +1473,8 @@ gt_record_engines(struct intel_gt_coredump *gt,
if (!ee)
continue;
+ ee->hung = engine->mask & engine_mask;
+
gt->simulated |= ee->simulated;
if (ee->simulated) {
kfree(ee);
@@ -1505,25 +1514,6 @@ gt_record_uc(struct intel_gt_coredump *gt,
return error_uc;
}
-static void gt_capture_prepare(struct intel_gt_coredump *gt)
-{
- struct i915_ggtt *ggtt = gt->_gt->ggtt;
-
- mutex_lock(&ggtt->error_mutex);
-}
-
-static void gt_capture_finish(struct intel_gt_coredump *gt)
-{
- struct i915_ggtt *ggtt = gt->_gt->ggtt;
-
- if (drm_mm_node_allocated(&ggtt->error_capture))
- ggtt->vm.clear_range(&ggtt->vm,
- ggtt->error_capture.start,
- PAGE_SIZE);
-
- mutex_unlock(&ggtt->error_mutex);
-}
-
/* Capture all registers which don't fit into another category. */
static void gt_record_regs(struct intel_gt_coredump *gt)
{
@@ -1669,24 +1659,25 @@ static u32 generate_ecode(const struct intel_engine_coredump *ee)
static const char *error_msg(struct i915_gpu_coredump *error)
{
struct intel_engine_coredump *first = NULL;
+ unsigned int hung_classes = 0;
struct intel_gt_coredump *gt;
- intel_engine_mask_t engines;
int len;
- engines = 0;
for (gt = error->gt; gt; gt = gt->next) {
struct intel_engine_coredump *cs;
- if (gt->engine && !first)
- first = gt->engine;
-
- for (cs = gt->engine; cs; cs = cs->next)
- engines |= cs->engine->mask;
+ for (cs = gt->engine; cs; cs = cs->next) {
+ if (cs->hung) {
+ hung_classes |= BIT(cs->engine->uabi_class);
+ if (!first)
+ first = cs;
+ }
+ }
}
len = scnprintf(error->error_msg, sizeof(error->error_msg),
"GPU HANG: ecode %d:%x:%08x",
- INTEL_GEN(error->i915), engines,
+ INTEL_GEN(error->i915), hung_classes,
generate_ecode(first));
if (first && first->context.pid) {
/* Just show the first executing process, more is confusing */
@@ -1782,8 +1773,6 @@ i915_vma_capture_prepare(struct intel_gt_coredump *gt)
return NULL;
}
- gt_capture_prepare(gt);
-
return compress;
}
@@ -1793,14 +1782,14 @@ void i915_vma_capture_finish(struct intel_gt_coredump *gt,
if (!compress)
return;
- gt_capture_finish(gt);
-
compress_fini(compress);
kfree(compress);
}
-struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915)
+struct i915_gpu_coredump *
+i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
{
+ struct drm_i915_private *i915 = gt->i915;
struct i915_gpu_coredump *error;
/* Check if GPU capture has been disabled */
@@ -1812,7 +1801,7 @@ struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915)
if (!error)
return ERR_PTR(-ENOMEM);
- error->gt = intel_gt_coredump_alloc(&i915->gt, ALLOW_FAIL);
+ error->gt = intel_gt_coredump_alloc(gt, ALLOW_FAIL);
if (error->gt) {
struct i915_vma_compress *compress;
@@ -1824,7 +1813,7 @@ struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915)
}
gt_record_info(error->gt);
- gt_record_engines(error->gt, compress);
+ gt_record_engines(error->gt, engine_mask, compress);
if (INTEL_INFO(i915)->has_gt_uc)
error->gt->uc = gt_record_uc(error->gt, compress);
@@ -1871,20 +1860,23 @@ void i915_error_state_store(struct i915_gpu_coredump *error)
/**
* i915_capture_error_state - capture an error record for later analysis
- * @i915: i915 device
+ * @gt: intel_gt which originated the hang
+ * @engine_mask: hung engines
+ *
*
* Should be called when an error is detected (either a hang or an error
* interrupt) to capture error state from the time of the error. Fills
* out a structure which becomes available in debugfs for user level tools
* to pick up.
*/
-void i915_capture_error_state(struct drm_i915_private *i915)
+void i915_capture_error_state(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask)
{
struct i915_gpu_coredump *error;
- error = i915_gpu_coredump(i915);
+ error = i915_gpu_coredump(gt, engine_mask);
if (IS_ERR(error)) {
- cmpxchg(&i915->gpu_error.first_error, NULL, error);
+ cmpxchg(&gt->i915->gpu_error.first_error, NULL, error);
return;
}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 0220b0992808..16bc42de4b84 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -59,6 +59,7 @@ struct i915_request_coredump {
struct intel_engine_coredump {
const struct intel_engine_cs *engine;
+ bool hung;
bool simulated;
u32 reset_count;
@@ -218,8 +219,10 @@ struct drm_i915_error_state_buf {
__printf(2, 3)
void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
-struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915);
-void i915_capture_error_state(struct drm_i915_private *i915);
+struct i915_gpu_coredump *i915_gpu_coredump(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask);
+void i915_capture_error_state(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask);
struct i915_gpu_coredump *
i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp);
@@ -271,7 +274,8 @@ void i915_disable_error_state(struct drm_i915_private *i915, int err);
#else
-static inline void i915_capture_error_state(struct drm_i915_private *i915)
+static inline void
+i915_capture_error_state(struct intel_gt *gt, intel_engine_mask_t engine_mask)
{
}
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 1fe390727d80..11fe790b1969 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1151,9 +1151,13 @@ static int __init i915_init(void)
return 0;
}
+ i915_pmu_init();
+
err = pci_register_driver(&i915_pci_driver);
- if (err)
+ if (err) {
+ i915_pmu_exit();
return err;
+ }
i915_perf_sysctl_register();
return 0;
@@ -1167,6 +1171,7 @@ static void __exit i915_exit(void)
i915_perf_sysctl_unregister();
pci_unregister_driver(&i915_pci_driver);
i915_globals_exit();
+ i915_pmu_exit();
}
module_init(i915_init);
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 69c0fa20eba1..cd786ad12be7 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -30,6 +30,7 @@
#define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
static cpumask_t i915_pmu_cpumask;
+static unsigned int i915_pmu_target_cpu = -1;
static u8 engine_config_sample(u64 config)
{
@@ -445,6 +446,8 @@ static void i915_pmu_event_destroy(struct perf_event *event)
container_of(event->pmu, typeof(*i915), pmu.base);
drm_WARN_ON(&i915->drm, event->parent);
+
+ drm_dev_put(&i915->drm);
}
static int
@@ -510,8 +513,12 @@ static int i915_pmu_event_init(struct perf_event *event)
{
struct drm_i915_private *i915 =
container_of(event->pmu, typeof(*i915), pmu.base);
+ struct i915_pmu *pmu = &i915->pmu;
int ret;
+ if (pmu->closed)
+ return -ENODEV;
+
if (event->attr.type != event->pmu->type)
return -ENOENT;
@@ -536,8 +543,10 @@ static int i915_pmu_event_init(struct perf_event *event)
if (ret)
return ret;
- if (!event->parent)
+ if (!event->parent) {
+ drm_dev_get(&i915->drm);
event->destroy = i915_pmu_event_destroy;
+ }
return 0;
}
@@ -594,9 +603,16 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
static void i915_pmu_event_read(struct perf_event *event)
{
+ struct drm_i915_private *i915 =
+ container_of(event->pmu, typeof(*i915), pmu.base);
struct hw_perf_event *hwc = &event->hw;
+ struct i915_pmu *pmu = &i915->pmu;
u64 prev, new;
+ if (pmu->closed) {
+ event->hw.state = PERF_HES_STOPPED;
+ return;
+ }
again:
prev = local64_read(&hwc->prev_count);
new = __i915_pmu_event_read(event);
@@ -724,6 +740,13 @@ static void i915_pmu_disable(struct perf_event *event)
static void i915_pmu_event_start(struct perf_event *event, int flags)
{
+ struct drm_i915_private *i915 =
+ container_of(event->pmu, typeof(*i915), pmu.base);
+ struct i915_pmu *pmu = &i915->pmu;
+
+ if (pmu->closed)
+ return;
+
i915_pmu_enable(event);
event->hw.state = 0;
}
@@ -738,6 +761,13 @@ static void i915_pmu_event_stop(struct perf_event *event, int flags)
static int i915_pmu_event_add(struct perf_event *event, int flags)
{
+ struct drm_i915_private *i915 =
+ container_of(event->pmu, typeof(*i915), pmu.base);
+ struct i915_pmu *pmu = &i915->pmu;
+
+ if (pmu->closed)
+ return -ENODEV;
+
if (flags & PERF_EF_START)
i915_pmu_event_start(event, flags);
@@ -1020,25 +1050,39 @@ static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
{
struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
- unsigned int target;
+ unsigned int target = i915_pmu_target_cpu;
GEM_BUG_ON(!pmu->base.event_init);
+ /*
+ * Unregistering an instance generates a CPU offline event which we must
+ * ignore to avoid incorrectly modifying the shared i915_pmu_cpumask.
+ */
+ if (pmu->closed)
+ return 0;
+
if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) {
target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
+
/* Migrate events if there is a valid target */
if (target < nr_cpu_ids) {
cpumask_set_cpu(target, &i915_pmu_cpumask);
- perf_pmu_migrate_context(&pmu->base, cpu, target);
+ i915_pmu_target_cpu = target;
}
}
+ if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) {
+ perf_pmu_migrate_context(&pmu->base, cpu, target);
+ pmu->cpuhp.cpu = target;
+ }
+
return 0;
}
-static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
+static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
+
+void i915_pmu_init(void)
{
- enum cpuhp_state slot;
int ret;
ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
@@ -1046,27 +1090,29 @@ static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
i915_pmu_cpu_online,
i915_pmu_cpu_offline);
if (ret < 0)
- return ret;
+ pr_notice("Failed to setup cpuhp state for i915 PMU! (%d)\n",
+ ret);
+ else
+ cpuhp_slot = ret;
+}
- slot = ret;
- ret = cpuhp_state_add_instance(slot, &pmu->cpuhp.node);
- if (ret) {
- cpuhp_remove_multi_state(slot);
- return ret;
- }
+void i915_pmu_exit(void)
+{
+ if (cpuhp_slot != CPUHP_INVALID)
+ cpuhp_remove_multi_state(cpuhp_slot);
+}
- pmu->cpuhp.slot = slot;
- return 0;
+static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
+{
+ if (cpuhp_slot == CPUHP_INVALID)
+ return -EINVAL;
+
+ return cpuhp_state_add_instance(cpuhp_slot, &pmu->cpuhp.node);
}
static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
{
- struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
-
- drm_WARN_ON(&i915->drm, pmu->cpuhp.slot == CPUHP_INVALID);
- drm_WARN_ON(&i915->drm, cpuhp_state_remove_instance(pmu->cpuhp.slot, &pmu->cpuhp.node));
- cpuhp_remove_multi_state(pmu->cpuhp.slot);
- pmu->cpuhp.slot = CPUHP_INVALID;
+ cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node);
}
static bool is_igp(struct drm_i915_private *i915)
@@ -1100,7 +1146,7 @@ void i915_pmu_register(struct drm_i915_private *i915)
spin_lock_init(&pmu->lock);
hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
pmu->timer.function = i915_sample;
- pmu->cpuhp.slot = CPUHP_INVALID;
+ pmu->cpuhp.cpu = -1;
if (!is_igp(i915)) {
pmu->name = kasprintf(GFP_KERNEL,
@@ -1167,7 +1213,13 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
if (!pmu->base.event_init)
return;
- drm_WARN_ON(&i915->drm, pmu->enable);
+ /*
+ * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu
+ * ensures all currently executing ones will have exited before we
+ * proceed with unregistration.
+ */
+ pmu->closed = true;
+ synchronize_rcu();
hrtimer_cancel(&pmu->timer);
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 941f0c14037c..a24885ab415c 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -43,13 +43,17 @@ struct i915_pmu {
*/
struct {
struct hlist_node node;
- enum cpuhp_state slot;
+ unsigned int cpu;
} cpuhp;
/**
* @base: PMU base.
*/
struct pmu base;
/**
+ * @closed: i915 is unregistering.
+ */
+ bool closed;
+ /**
* @name: Name as registered with perf core.
*/
const char *name;
@@ -122,11 +126,15 @@ struct i915_pmu {
};
#ifdef CONFIG_PERF_EVENTS
+void i915_pmu_init(void);
+void i915_pmu_exit(void);
void i915_pmu_register(struct drm_i915_private *i915);
void i915_pmu_unregister(struct drm_i915_private *i915);
void i915_pmu_gt_parked(struct drm_i915_private *i915);
void i915_pmu_gt_unparked(struct drm_i915_private *i915);
#else
+static inline void i915_pmu_init(void) {}
+static inline void i915_pmu_exit(void) {}
static inline void i915_pmu_register(struct drm_i915_private *i915) {}
static inline void i915_pmu_unregister(struct drm_i915_private *i915) {}
static inline void i915_pmu_gt_parked(struct drm_i915_private *i915) {}
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index bb0656875697..c61302c69b11 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -8994,10 +8994,6 @@ enum {
#define GEN9_PWRGT_MEDIA_STATUS_MASK (1 << 0)
#define GEN9_PWRGT_RENDER_STATUS_MASK (1 << 1)
-#define POWERGATE_ENABLE _MMIO(0xa210)
-#define VDN_HCP_POWERGATE_ENABLE(n) BIT(((n) * 2) + 3)
-#define VDN_MFX_POWERGATE_ENABLE(n) BIT(((n) * 2) + 4)
-
#define GTFIFODBG _MMIO(0x120000)
#define GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV (0x1f << 20)
#define GT_FIFO_FREE_ENTRIES_CHV (0x7f << 13)
@@ -9137,9 +9133,11 @@ enum {
#define GEN9_MEDIA_PG_IDLE_HYSTERESIS _MMIO(0xA0C4)
#define GEN9_RENDER_PG_IDLE_HYSTERESIS _MMIO(0xA0C8)
#define GEN9_PG_ENABLE _MMIO(0xA210)
-#define GEN9_RENDER_PG_ENABLE REG_BIT(0)
-#define GEN9_MEDIA_PG_ENABLE REG_BIT(1)
-#define GEN11_MEDIA_SAMPLER_PG_ENABLE REG_BIT(2)
+#define GEN9_RENDER_PG_ENABLE REG_BIT(0)
+#define GEN9_MEDIA_PG_ENABLE REG_BIT(1)
+#define GEN11_MEDIA_SAMPLER_PG_ENABLE REG_BIT(2)
+#define VDN_HCP_POWERGATE_ENABLE(n) REG_BIT(3 + 2 * (n))
+#define VDN_MFX_POWERGATE_ENABLE(n) REG_BIT(4 + 2 * (n))
#define GEN8_PUSHBUS_CONTROL _MMIO(0xA248)
#define GEN8_PUSHBUS_ENABLE _MMIO(0xA250)
#define GEN8_PUSHBUS_SHIFT _MMIO(0xA25C)
diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h
index 883dd8d09d6b..9cb26a224034 100644
--- a/drivers/gpu/drm/i915/i915_scatterlist.h
+++ b/drivers/gpu/drm/i915/i915_scatterlist.h
@@ -27,13 +27,17 @@ static __always_inline struct sgt_iter {
} __sgt_iter(struct scatterlist *sgl, bool dma) {
struct sgt_iter s = { .sgp = sgl };
- if (s.sgp) {
+ if (dma && s.sgp && sg_dma_len(s.sgp) == 0) {
+ s.sgp = NULL;
+ } else if (s.sgp) {
s.max = s.curr = s.sgp->offset;
- s.max += s.sgp->length;
- if (dma)
+ if (dma) {
s.dma = sg_dma_address(s.sgp);
- else
+ s.max += sg_dma_len(s.sgp);
+ } else {
s.pfn = page_to_pfn(sg_page(s.sgp));
+ s.max += s.sgp->length;
+ }
}
return s;
@@ -44,6 +48,11 @@ static inline int __sg_page_count(const struct scatterlist *sg)
return sg->length >> PAGE_SHIFT;
}
+static inline int __sg_dma_page_count(const struct scatterlist *sg)
+{
+ return sg_dma_len(sg) >> PAGE_SHIFT;
+}
+
static inline struct scatterlist *____sg_next(struct scatterlist *sg)
{
++sg;
diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c
index 180e1078ef7c..b326993a1026 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/intel_memory_region.c
@@ -114,7 +114,7 @@ __intel_memory_region_get_pages_buddy(struct intel_memory_region *mem,
n_pages -= BIT(order);
block->private = mem;
- list_add(&block->link, blocks);
+ list_add_tail(&block->link, blocks);
if (!n_pages)
break;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index f54375b11964..bbec56f97832 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -7111,22 +7111,8 @@ static void icl_init_clock_gating(struct drm_i915_private *dev_priv)
0, CNL_DELAY_PMRSP);
}
-static void gen12_init_clock_gating(struct drm_i915_private *i915)
-{
- unsigned int i;
-
- /* This is not a WA. Enable VD HCP & MFX_ENC powergate */
- for (i = 0; i < I915_MAX_VCS; i++)
- if (HAS_ENGINE(&i915->gt, _VCS(i)))
- intel_uncore_rmw(&i915->uncore, POWERGATE_ENABLE, 0,
- VDN_HCP_POWERGATE_ENABLE(i) |
- VDN_MFX_POWERGATE_ENABLE(i));
-}
-
static void tgl_init_clock_gating(struct drm_i915_private *dev_priv)
{
- gen12_init_clock_gating(dev_priv);
-
/* Wa_1409120013:tgl */
I915_WRITE(ILK_DPFC_CHICKEN,
ILK_DPFC_CHICKEN_COMP_DUMMY_PIXEL);
@@ -7143,8 +7129,6 @@ static void tgl_init_clock_gating(struct drm_i915_private *dev_priv)
static void dg1_init_clock_gating(struct drm_i915_private *dev_priv)
{
- gen12_init_clock_gating(dev_priv);
-
/* Wa_1409836686:dg1[a0] */
if (IS_DG1_REVID(dev_priv, DG1_REVID_A0, DG1_REVID_A0))
I915_WRITE(GEN9_CLKGATE_DIS_3, I915_READ(GEN9_CLKGATE_DIS_3) |