aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gpu_error.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gpu_error.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c234
1 files changed, 156 insertions, 78 deletions
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 2a2d7643b551..5ae812d60abe 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -48,8 +48,9 @@
#include "i915_gpu_error.h"
#include "i915_memcpy.h"
#include "i915_scatterlist.h"
+#include "i915_vma_snapshot.h"
-#define ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
+#define ALLOW_FAIL (__GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
#define ATOMIC_MAYFAIL (GFP_ATOMIC | __GFP_NOWARN)
static void __sg_set_buf(struct scatterlist *sg,
@@ -275,16 +276,16 @@ static bool compress_start(struct i915_vma_compress *c)
static void *compress_next_page(struct i915_vma_compress *c,
struct i915_vma_coredump *dst)
{
- void *page;
+ void *page_addr;
+ struct page *page;
- if (dst->page_count >= dst->num_pages)
- return ERR_PTR(-ENOSPC);
-
- page = pool_alloc(&c->pool, ALLOW_FAIL);
- if (!page)
+ page_addr = pool_alloc(&c->pool, ALLOW_FAIL);
+ if (!page_addr)
return ERR_PTR(-ENOMEM);
- return dst->pages[dst->page_count++] = page;
+ page = virt_to_page(page_addr);
+ list_add_tail(&page->lru, &dst->page_list);
+ return page_addr;
}
static int compress_page(struct i915_vma_compress *c,
@@ -397,7 +398,7 @@ static int compress_page(struct i915_vma_compress *c,
if (!(wc && i915_memcpy_from_wc(ptr, src, PAGE_SIZE)))
memcpy(ptr, src, PAGE_SIZE);
- dst->pages[dst->page_count++] = ptr;
+ list_add_tail(&virt_to_page(ptr)->lru, &dst->page_list);
cond_resched();
return 0;
@@ -504,7 +505,7 @@ static void error_print_context(struct drm_i915_error_state_buf *m,
const char *header,
const struct i915_gem_context_coredump *ctx)
{
- const u32 period = m->i915->gt.clock_period_ns;
+ const u32 period = to_gt(m->i915)->clock_period_ns;
err_printf(m, "%s%s[%d] prio %d, guilty %d active %d, runtime total %lluns, avg %lluns\n",
header, ctx->comm, ctx->pid, ctx->sched_attr.priority,
@@ -614,7 +615,7 @@ static void print_error_vma(struct drm_i915_error_state_buf *m,
const struct i915_vma_coredump *vma)
{
char out[ASCII85_BUFSZ];
- int page;
+ struct page *page;
if (!vma)
return;
@@ -628,16 +629,17 @@ static void print_error_vma(struct drm_i915_error_state_buf *m,
err_printf(m, "gtt_page_sizes = 0x%08x\n", vma->gtt_page_sizes);
err_compression_marker(m);
- for (page = 0; page < vma->page_count; page++) {
+ list_for_each_entry(page, &vma->page_list, lru) {
int i, len;
+ const u32 *addr = page_address(page);
len = PAGE_SIZE;
- if (page == vma->page_count - 1)
+ if (page == list_last_entry(&vma->page_list, typeof(*page), lru))
len -= vma->unused;
len = ascii85_encode_len(len);
for (i = 0; i < len; i++)
- err_puts(m, ascii85_encode(vma->pages[page][i], out));
+ err_puts(m, ascii85_encode(addr[i], out));
}
err_puts(m, "\n");
}
@@ -946,10 +948,12 @@ static void i915_vma_coredump_free(struct i915_vma_coredump *vma)
{
while (vma) {
struct i915_vma_coredump *next = vma->next;
- int page;
+ struct page *page, *n;
- for (page = 0; page < vma->page_count; page++)
- free_page((unsigned long)vma->pages[page]);
+ list_for_each_entry_safe(page, n, &vma->page_list, lru) {
+ list_del_init(&page->lru);
+ __free_page(page);
+ }
kfree(vma);
vma = next;
@@ -1009,25 +1013,21 @@ void __i915_gpu_coredump_free(struct kref *error_ref)
static struct i915_vma_coredump *
i915_vma_coredump_create(const struct intel_gt *gt,
- const struct i915_vma *vma,
- const char *name,
+ const struct i915_vma_snapshot *vsnap,
struct i915_vma_compress *compress)
{
struct i915_ggtt *ggtt = gt->ggtt;
const u64 slot = ggtt->error_capture.start;
struct i915_vma_coredump *dst;
- unsigned long num_pages;
struct sgt_iter iter;
int ret;
might_sleep();
- if (!vma || !vma->pages || !compress)
+ if (!vsnap || !vsnap->pages || !compress)
return NULL;
- num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT;
- num_pages = DIV_ROUND_UP(10 * num_pages, 8); /* worstcase zlib growth */
- dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), ALLOW_FAIL);
+ dst = kmalloc(sizeof(*dst), ALLOW_FAIL);
if (!dst)
return NULL;
@@ -1036,14 +1036,13 @@ i915_vma_coredump_create(const struct intel_gt *gt,
return NULL;
}
- strcpy(dst->name, name);
+ INIT_LIST_HEAD(&dst->page_list);
+ strcpy(dst->name, vsnap->name);
dst->next = NULL;
- dst->gtt_offset = vma->node.start;
- dst->gtt_size = vma->node.size;
- dst->gtt_page_sizes = vma->page_sizes.gtt;
- dst->num_pages = num_pages;
- dst->page_count = 0;
+ dst->gtt_offset = vsnap->gtt_offset;
+ dst->gtt_size = vsnap->gtt_size;
+ dst->gtt_page_sizes = vsnap->page_sizes;
dst->unused = 0;
ret = -EINVAL;
@@ -1051,7 +1050,7 @@ i915_vma_coredump_create(const struct intel_gt *gt,
void __iomem *s;
dma_addr_t dma;
- for_each_sgt_daddr(dma, iter, vma->pages) {
+ for_each_sgt_daddr(dma, iter, vsnap->pages) {
mutex_lock(&ggtt->error_mutex);
ggtt->vm.insert_page(&ggtt->vm, dma, slot,
I915_CACHE_NONE, 0);
@@ -1069,11 +1068,11 @@ i915_vma_coredump_create(const struct intel_gt *gt,
if (ret)
break;
}
- } else if (__i915_gem_object_is_lmem(vma->obj)) {
- struct intel_memory_region *mem = vma->obj->mm.region;
+ } else if (vsnap->mr && vsnap->mr->type != INTEL_MEMORY_SYSTEM) {
+ struct intel_memory_region *mem = vsnap->mr;
dma_addr_t dma;
- for_each_sgt_daddr(dma, iter, vma->pages) {
+ for_each_sgt_daddr(dma, iter, vsnap->pages) {
void __iomem *s;
s = io_mapping_map_wc(&mem->iomap,
@@ -1089,7 +1088,7 @@ i915_vma_coredump_create(const struct intel_gt *gt,
} else {
struct page *page;
- for_each_sgt_page(page, iter, vma->pages) {
+ for_each_sgt_page(page, iter, vsnap->pages) {
void *s;
drm_clflush_pages(&page, 1);
@@ -1106,8 +1105,13 @@ i915_vma_coredump_create(const struct intel_gt *gt,
}
if (ret || compress_flush(compress, dst)) {
- while (dst->page_count--)
- pool_free(&compress->pool, dst->pages[dst->page_count]);
+ struct page *page, *n;
+
+ list_for_each_entry_safe_reverse(page, n, &dst->page_list, lru) {
+ list_del_init(&page->lru);
+ pool_free(&compress->pool, page_address(page));
+ }
+
kfree(dst);
dst = NULL;
}
@@ -1320,38 +1324,72 @@ static bool record_context(struct i915_gem_context_coredump *e,
struct intel_engine_capture_vma {
struct intel_engine_capture_vma *next;
- struct i915_vma *vma;
+ struct i915_vma_snapshot *vsnap;
char name[16];
+ bool lockdep_cookie;
};
static struct intel_engine_capture_vma *
-capture_vma(struct intel_engine_capture_vma *next,
- struct i915_vma *vma,
- const char *name,
- gfp_t gfp)
+capture_vma_snapshot(struct intel_engine_capture_vma *next,
+ struct i915_vma_snapshot *vsnap,
+ gfp_t gfp)
{
struct intel_engine_capture_vma *c;
- if (!vma)
+ if (!i915_vma_snapshot_present(vsnap))
return next;
c = kmalloc(sizeof(*c), gfp);
if (!c)
return next;
- if (!i915_active_acquire_if_busy(&vma->active)) {
+ if (!i915_vma_snapshot_resource_pin(vsnap, &c->lockdep_cookie)) {
kfree(c);
return next;
}
- strcpy(c->name, name);
- c->vma = vma; /* reference held while active */
+ strcpy(c->name, vsnap->name);
+ c->vsnap = vsnap;
+ i915_vma_snapshot_get(vsnap);
c->next = next;
return c;
}
static struct intel_engine_capture_vma *
+capture_vma(struct intel_engine_capture_vma *next,
+ struct i915_vma *vma,
+ const char *name,
+ gfp_t gfp)
+{
+ struct i915_vma_snapshot *vsnap;
+
+ if (!vma)
+ return next;
+
+ /*
+ * If the vma isn't pinned, then the vma should be snapshotted
+ * to a struct i915_vma_snapshot at command submission time.
+ * Not here.
+ */
+ GEM_WARN_ON(!i915_vma_is_pinned(vma));
+ if (!i915_vma_is_pinned(vma))
+ return next;
+
+ vsnap = i915_vma_snapshot_alloc(gfp);
+ if (!vsnap)
+ return next;
+
+ i915_vma_snapshot_init(vsnap, vma, name);
+ next = capture_vma_snapshot(next, vsnap, gfp);
+
+ /* FIXME: Replace on async unbind. */
+ i915_vma_snapshot_put(vsnap);
+
+ return next;
+}
+
+static struct intel_engine_capture_vma *
capture_user(struct intel_engine_capture_vma *capture,
const struct i915_request *rq,
gfp_t gfp)
@@ -1359,7 +1397,7 @@ capture_user(struct intel_engine_capture_vma *capture,
struct i915_capture_list *c;
for (c = rq->capture_list; c; c = c->next)
- capture = capture_vma(capture, c->vma, "user", gfp);
+ capture = capture_vma_snapshot(capture, c->vma_snapshot, gfp);
return capture;
}
@@ -1373,6 +1411,33 @@ static void add_vma(struct intel_engine_coredump *ee,
}
}
+static struct i915_vma_coredump *
+create_vma_coredump(const struct intel_gt *gt, struct i915_vma *vma,
+ const char *name, struct i915_vma_compress *compress)
+{
+ struct i915_vma_coredump *ret;
+ struct i915_vma_snapshot tmp;
+
+ if (!vma)
+ return NULL;
+
+ GEM_WARN_ON(!i915_vma_is_pinned(vma));
+ i915_vma_snapshot_init_onstack(&tmp, vma, name);
+ ret = i915_vma_coredump_create(gt, &tmp, compress);
+ i915_vma_snapshot_put_onstack(&tmp);
+
+ return ret;
+}
+
+static void add_vma_coredump(struct intel_engine_coredump *ee,
+ const struct intel_gt *gt,
+ struct i915_vma *vma,
+ const char *name,
+ struct i915_vma_compress *compress)
+{
+ add_vma(ee, create_vma_coredump(gt, vma, name, compress));
+}
+
struct intel_engine_coredump *
intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
{
@@ -1406,7 +1471,7 @@ intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
* as the simplest method to avoid being overwritten
* by userspace.
*/
- vma = capture_vma(vma, rq->batch, "batch", gfp);
+ vma = capture_vma_snapshot(vma, &rq->batch_snapshot, gfp);
vma = capture_user(vma, rq, gfp);
vma = capture_vma(vma, rq->ring->vma, "ring", gfp);
vma = capture_vma(vma, rq->context->state, "HW context", gfp);
@@ -1427,30 +1492,24 @@ intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
while (capture) {
struct intel_engine_capture_vma *this = capture;
- struct i915_vma *vma = this->vma;
+ struct i915_vma_snapshot *vsnap = this->vsnap;
add_vma(ee,
i915_vma_coredump_create(engine->gt,
- vma, this->name,
- compress));
+ vsnap, compress));
- i915_active_release(&vma->active);
+ i915_vma_snapshot_resource_unpin(vsnap, this->lockdep_cookie);
+ i915_vma_snapshot_put(vsnap);
capture = this->next;
kfree(this);
}
- add_vma(ee,
- i915_vma_coredump_create(engine->gt,
- engine->status_page.vma,
- "HW Status",
- compress));
+ add_vma_coredump(ee, engine->gt, engine->status_page.vma,
+ "HW Status", compress);
- add_vma(ee,
- i915_vma_coredump_create(engine->gt,
- engine->wa_ctx.vma,
- "WA context",
- compress));
+ add_vma_coredump(ee, engine->gt, engine->wa_ctx.vma,
+ "WA context", compress);
}
static struct intel_engine_coredump *
@@ -1486,17 +1545,25 @@ capture_engine(struct intel_engine_cs *engine,
}
}
if (rq)
- capture = intel_engine_coredump_add_request(ee, rq,
- ATOMIC_MAYFAIL);
+ rq = i915_request_get_rcu(rq);
+
+ if (!rq)
+ goto no_request_capture;
+
+ capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL);
if (!capture) {
-no_request_capture:
- kfree(ee);
- return NULL;
+ i915_request_put(rq);
+ goto no_request_capture;
}
intel_engine_coredump_add_vma(ee, capture, compress);
+ i915_request_put(rq);
return ee;
+
+no_request_capture:
+ kfree(ee);
+ return NULL;
}
static void
@@ -1550,10 +1617,8 @@ gt_record_uc(struct intel_gt_coredump *gt,
*/
error_uc->guc_fw.path = kstrdup(uc->guc.fw.path, ALLOW_FAIL);
error_uc->huc_fw.path = kstrdup(uc->huc.fw.path, ALLOW_FAIL);
- error_uc->guc_log =
- i915_vma_coredump_create(gt->_gt,
- uc->guc.log.vma, "GuC log buffer",
- compress);
+ error_uc->guc_log = create_vma_coredump(gt->_gt, uc->guc.log.vma,
+ "GuC log buffer", compress);
return error_uc;
}
@@ -1750,10 +1815,7 @@ static void capture_gen(struct i915_gpu_coredump *error)
error->wakelock = atomic_read(&i915->runtime_pm.wakeref_count);
error->suspended = i915->runtime_pm.suspended;
- error->iommu = -1;
-#ifdef CONFIG_INTEL_IOMMU
- error->iommu = intel_iommu_gfx_mapped;
-#endif
+ error->iommu = intel_vtd_active(i915);
error->reset_count = i915_reset_count(&i915->gpu_error);
error->suspend_count = i915->suspend_count;
@@ -1784,7 +1846,7 @@ i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
error->time = ktime_get_real();
error->boottime = ktime_get_boottime();
- error->uptime = ktime_sub(ktime_get(), i915->gt.last_init_time);
+ error->uptime = ktime_sub(ktime_get(), to_gt(i915)->last_init_time);
error->capture = jiffies;
capture_gen(error);
@@ -1839,8 +1901,8 @@ void i915_vma_capture_finish(struct intel_gt_coredump *gt,
kfree(compress);
}
-struct i915_gpu_coredump *
-i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
+static struct i915_gpu_coredump *
+__i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
{
struct drm_i915_private *i915 = gt->i915;
struct i915_gpu_coredump *error;
@@ -1881,6 +1943,22 @@ i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
return error;
}
+struct i915_gpu_coredump *
+i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
+{
+ static DEFINE_MUTEX(capture_mutex);
+ int ret = mutex_lock_interruptible(&capture_mutex);
+ struct i915_gpu_coredump *dump;
+
+ if (ret)
+ return ERR_PTR(ret);
+
+ dump = __i915_gpu_coredump(gt, engine_mask);
+ mutex_unlock(&capture_mutex);
+
+ return dump;
+}
+
void i915_error_state_store(struct i915_gpu_coredump *error)
{
struct drm_i915_private *i915;