aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorMichał Winiarski <michal.winiarski@intel.com>2017-12-13 23:13:47 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2017-12-14 08:06:54 +0000
commit3176ff49bc3e56871ec9f338cac759dacd0135e1 (patch)
tree04fc7e8a743a34aea79249b5cf05d16aa2f3957e /drivers/gpu
parentdrm/i915/guc: Move shared data allocation away from submission path (diff)
downloadlinux-dev-3176ff49bc3e56871ec9f338cac759dacd0135e1.tar.xz
linux-dev-3176ff49bc3e56871ec9f338cac759dacd0135e1.zip
drm/i915/guc: Move GuC workqueue allocations outside of the mutex
This gets rid of the following lockdep splat: ====================================================== WARNING: possible circular locking dependency detected 4.15.0-rc2-CI-Patchwork_7428+ #1 Not tainted ------------------------------------------------------ debugfs_test/1351 is trying to acquire lock: (&dev->struct_mutex){+.+.}, at: [<000000009d90d1a3>] i915_mutex_lock_interruptible+0x47/0x130 [i915] but task is already holding lock: (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #6 (&mm->mmap_sem){++++}: __might_fault+0x63/0x90 _copy_to_user+0x1e/0x70 filldir+0x8c/0xf0 dcache_readdir+0xeb/0x160 iterate_dir+0xe6/0x150 SyS_getdents+0xa0/0x130 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #5 (&sb->s_type->i_mutex_key#5){++++}: lockref_get+0x9/0x20 -> #4 ((completion)&req.done){+.+.}: wait_for_common+0x54/0x210 devtmpfs_create_node+0x130/0x150 device_add+0x5ad/0x5e0 device_create_groups_vargs+0xd4/0xe0 device_create+0x35/0x40 msr_device_create+0x22/0x40 cpuhp_invoke_callback+0xc5/0xbf0 cpuhp_thread_fun+0x167/0x210 smpboot_thread_fn+0x17f/0x270 kthread+0x173/0x1b0 ret_from_fork+0x24/0x30 -> #3 (cpuhp_state-up){+.+.}: cpuhp_issue_call+0x132/0x1c0 __cpuhp_setup_state_cpuslocked+0x12f/0x2a0 __cpuhp_setup_state+0x3a/0x50 page_writeback_init+0x3a/0x5c start_kernel+0x393/0x3e2 secondary_startup_64+0xa5/0xb0 -> #2 (cpuhp_state_mutex){+.+.}: __mutex_lock+0x81/0x9b0 __cpuhp_setup_state_cpuslocked+0x4b/0x2a0 __cpuhp_setup_state+0x3a/0x50 page_alloc_init+0x1f/0x26 start_kernel+0x139/0x3e2 secondary_startup_64+0xa5/0xb0 -> #1 (cpu_hotplug_lock.rw_sem){++++}: cpus_read_lock+0x34/0xa0 apply_workqueue_attrs+0xd/0x40 __alloc_workqueue_key+0x2c7/0x4e1 intel_guc_submission_init+0x10c/0x650 [i915] intel_uc_init_hw+0x29e/0x460 [i915] i915_gem_init_hw+0xca/0x290 [i915] i915_gem_init+0x115/0x3a0 [i915] i915_driver_load+0x9a8/0x16c0 [i915] i915_pci_probe+0x2e/0x90 [i915] pci_device_probe+0x9c/0x120 driver_probe_device+0x2a3/0x480 __driver_attach+0xd9/0xe0 bus_for_each_dev+0x57/0x90 bus_add_driver+0x168/0x260 driver_register+0x52/0xc0 do_one_initcall+0x39/0x150 do_init_module+0x56/0x1ef load_module+0x231c/0x2d70 SyS_finit_module+0xa5/0xe0 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #0 (&dev->struct_mutex){+.+.}: lock_acquire+0xaf/0x200 __mutex_lock+0x81/0x9b0 i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_gem_fault+0x201/0x760 [i915] __do_fault+0x15/0x70 __handle_mm_fault+0x85b/0xe40 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 page_fault+0x22/0x30 other info that might help us debug this: Chain exists of: &dev->struct_mutex --> &sb->s_type->i_mutex_key#5 --> &mm->mmap_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(&sb->s_type->i_mutex_key#5); lock(&mm->mmap_sem); lock(&dev->struct_mutex); *** DEADLOCK *** 1 lock held by debugfs_test/1351: #0: (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560 stack backtrace: CPU: 2 PID: 1351 Comm: debugfs_test Not tainted 4.15.0-rc2-CI-Patchwork_7428+ #1 Hardware name: /NUC6i5SYB, BIOS SYSKLi35.86A.0057.2017.0119.1758 01/19/2017 Call Trace: dump_stack+0x5f/0x86 print_circular_bug+0x230/0x3b0 check_prev_add+0x439/0x7b0 ? lockdep_init_map_crosslock+0x20/0x20 ? unwind_get_return_address+0x16/0x30 ? __lock_acquire+0x1385/0x15a0 __lock_acquire+0x1385/0x15a0 lock_acquire+0xaf/0x200 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] __mutex_lock+0x81/0x9b0 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_mutex_lock_interruptible+0x47/0x130 [i915] ? __pm_runtime_resume+0x4f/0x80 i915_gem_fault+0x201/0x760 [i915] __do_fault+0x15/0x70 __handle_mm_fault+0x85b/0xe40 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 page_fault+0x22/0x30 RIP: 0033:0x7f98d6f49116 RSP: 002b:00007ffd6ffc3278 EFLAGS: 00010283 RAX: 00007f98d39a2bc0 RBX: 0000000000000000 RCX: 0000000000001680 RDX: 0000000000001680 RSI: 00007ffd6ffc3400 RDI: 00007f98d39a2bc0 RBP: 00007ffd6ffc33a0 R08: 0000000000000000 R09: 00000000000005a0 R10: 000055e847c2a830 R11: 0000000000000002 R12: 0000000000000001 R13: 000055e847c1d040 R14: 00007ffd6ffc3400 R15: 00007f98d6752ba0 v2: Init preempt_work unconditionally (Chris) v3: Mention that we need the enable_guc=1 for lockdep splat (Chris) Testcase: igt/debugfs_test/read_all_entries # with i915.enable_guc=1 Signed-off-by: Michał Winiarski <michal.winiarski@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20171213221352.7173-2-michal.winiarski@intel.com
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c1
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c4
-rw-r--r--drivers/gpu/drm/i915/intel_guc.c57
-rw-r--r--drivers/gpu/drm/i915/intel_guc.h2
-rw-r--r--drivers/gpu/drm/i915/intel_guc_log.c23
-rw-r--r--drivers/gpu/drm/i915/intel_guc_submission.c70
-rw-r--r--drivers/gpu/drm/i915/intel_guc_submission.h2
-rw-r--r--drivers/gpu/drm/i915/intel_uc.c26
-rw-r--r--drivers/gpu/drm/i915/intel_uc.h2
9 files changed, 110 insertions, 77 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 721ccce1832f..285c8b238bff 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -621,6 +621,7 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv)
i915_gem_contexts_fini(dev_priv);
mutex_unlock(&dev_priv->drm.struct_mutex);
+ intel_uc_fini_wq(dev_priv);
i915_gem_cleanup_userptr(dev_priv);
i915_gem_drain_freed_objects(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 13fa26238e89..2c13e3a4f45a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5163,6 +5163,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
if (ret)
return ret;
+ ret = intel_uc_init_wq(dev_priv);
+ if (ret)
+ return ret;
+
/* This is just a security blanket to placate dragons.
* On some systems, we very sporadically observe that the first TLBs
* used by the CS may be stale, despite us poking the TLB reset. If
diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c
index 92ed22f38fc4..3c6bf5a34c3c 100644
--- a/drivers/gpu/drm/i915/intel_guc.c
+++ b/drivers/gpu/drm/i915/intel_guc.c
@@ -69,6 +69,63 @@ void intel_guc_init_early(struct intel_guc *guc)
guc->notify = gen8_guc_raise_irq;
}
+int intel_guc_init_wq(struct intel_guc *guc)
+{
+ struct drm_i915_private *dev_priv = guc_to_i915(guc);
+
+ /*
+ * GuC log buffer flush work item has to do register access to
+ * send the ack to GuC and this work item, if not synced before
+ * suspend, can potentially get executed after the GFX device is
+ * suspended.
+ * By marking the WQ as freezable, we don't have to bother about
+ * flushing of this work item from the suspend hooks, the pending
+ * work item if any will be either executed before the suspend
+ * or scheduled later on resume. This way the handling of work
+ * item can be kept same between system suspend & rpm suspend.
+ */
+ guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log",
+ WQ_HIGHPRI | WQ_FREEZABLE);
+ if (!guc->log.runtime.flush_wq)
+ return -ENOMEM;
+
+ /*
+ * Even though both sending GuC action, and adding a new workitem to
+ * GuC workqueue are serialized (each with its own locking), since
+ * we're using mutliple engines, it's possible that we're going to
+ * issue a preempt request with two (or more - each for different
+ * engine) workitems in GuC queue. In this situation, GuC may submit
+ * all of them, which will make us very confused.
+ * Our preemption contexts may even already be complete - before we
+ * even had the chance to sent the preempt action to GuC!. Rather
+ * than introducing yet another lock, we can just use ordered workqueue
+ * to make sure we're always sending a single preemption request with a
+ * single workitem.
+ */
+ if (HAS_LOGICAL_RING_PREEMPTION(dev_priv) &&
+ USES_GUC_SUBMISSION(dev_priv)) {
+ guc->preempt_wq = alloc_ordered_workqueue("i915-guc_preempt",
+ WQ_HIGHPRI);
+ if (!guc->preempt_wq) {
+ destroy_workqueue(guc->log.runtime.flush_wq);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+void intel_guc_fini_wq(struct intel_guc *guc)
+{
+ struct drm_i915_private *dev_priv = guc_to_i915(guc);
+
+ if (HAS_LOGICAL_RING_PREEMPTION(dev_priv) &&
+ USES_GUC_SUBMISSION(dev_priv))
+ destroy_workqueue(guc->preempt_wq);
+
+ destroy_workqueue(guc->log.runtime.flush_wq);
+}
+
static int guc_shared_data_create(struct intel_guc *guc)
{
struct i915_vma *vma;
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 81659e223e11..52856a97477d 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -119,6 +119,8 @@ static inline u32 guc_ggtt_offset(struct i915_vma *vma)
void intel_guc_init_early(struct intel_guc *guc);
void intel_guc_init_send_regs(struct intel_guc *guc);
void intel_guc_init_params(struct intel_guc *guc);
+int intel_guc_init_wq(struct intel_guc *guc);
+void intel_guc_fini_wq(struct intel_guc *guc);
int intel_guc_init(struct intel_guc *guc);
void intel_guc_fini(struct intel_guc *guc);
int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len);
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
index 1a2c5eed9929..eaedd63e3819 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -411,30 +411,8 @@ static int guc_log_runtime_create(struct intel_guc *guc)
guc->log.runtime.relay_chan = guc_log_relay_chan;
INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work);
-
- /*
- * GuC log buffer flush work item has to do register access to
- * send the ack to GuC and this work item, if not synced before
- * suspend, can potentially get executed after the GFX device is
- * suspended.
- * By marking the WQ as freezable, we don't have to bother about
- * flushing of this work item from the suspend hooks, the pending
- * work item if any will be either executed before the suspend
- * or scheduled later on resume. This way the handling of work
- * item can be kept same between system suspend & rpm suspend.
- */
- guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log",
- WQ_HIGHPRI | WQ_FREEZABLE);
- if (!guc->log.runtime.flush_wq) {
- DRM_ERROR("Couldn't allocate the wq for GuC logging\n");
- ret = -ENOMEM;
- goto err_relaychan;
- }
-
return 0;
-err_relaychan:
- relay_close(guc->log.runtime.relay_chan);
err_vaddr:
i915_gem_object_unpin_map(guc->log.vma->obj);
guc->log.runtime.buf_addr = NULL;
@@ -450,7 +428,6 @@ static void guc_log_runtime_destroy(struct intel_guc *guc)
if (!guc_log_has_runtime(guc))
return;
- destroy_workqueue(guc->log.runtime.flush_wq);
relay_close(guc->log.runtime.relay_chan);
i915_gem_object_unpin_map(guc->log.vma->obj);
guc->log.runtime.buf_addr = NULL;
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index c020560c395e..8f4b274d66a7 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -1187,57 +1187,15 @@ static void guc_ads_destroy(struct intel_guc *guc)
i915_vma_unpin_and_release(&guc->ads_vma);
}
-static int guc_preempt_work_create(struct intel_guc *guc)
-{
- struct drm_i915_private *dev_priv = guc_to_i915(guc);
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- /*
- * Even though both sending GuC action, and adding a new workitem to
- * GuC workqueue are serialized (each with its own locking), since
- * we're using mutliple engines, it's possible that we're going to
- * issue a preempt request with two (or more - each for different
- * engine) workitems in GuC queue. In this situation, GuC may submit
- * all of them, which will make us very confused.
- * Our preemption contexts may even already be complete - before we
- * even had the chance to sent the preempt action to GuC!. Rather
- * than introducing yet another lock, we can just use ordered workqueue
- * to make sure we're always sending a single preemption request with a
- * single workitem.
- */
- guc->preempt_wq = alloc_ordered_workqueue("i915-guc_preempt",
- WQ_HIGHPRI);
- if (!guc->preempt_wq)
- return -ENOMEM;
-
- for_each_engine(engine, dev_priv, id) {
- guc->preempt_work[id].engine = engine;
- INIT_WORK(&guc->preempt_work[id].work, inject_preempt_context);
- }
-
- return 0;
-}
-
-static void guc_preempt_work_destroy(struct intel_guc *guc)
-{
- struct drm_i915_private *dev_priv = guc_to_i915(guc);
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- for_each_engine(engine, dev_priv, id)
- cancel_work_sync(&guc->preempt_work[id].work);
-
- destroy_workqueue(guc->preempt_wq);
- guc->preempt_wq = NULL;
-}
-
/*
* Set up the memory resources to be shared with the GuC (via the GGTT)
* at firmware loading time.
*/
int intel_guc_submission_init(struct intel_guc *guc)
{
+ struct drm_i915_private *dev_priv = guc_to_i915(guc);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
int ret;
if (guc->stage_desc_pool)
@@ -1256,20 +1214,18 @@ int intel_guc_submission_init(struct intel_guc *guc)
if (ret < 0)
goto err_stage_desc_pool;
- ret = guc_preempt_work_create(guc);
- if (ret)
- goto err_log;
- GEM_BUG_ON(!guc->preempt_wq);
-
ret = guc_ads_create(guc);
if (ret < 0)
- goto err_wq;
+ goto err_log;
GEM_BUG_ON(!guc->ads_vma);
+ for_each_engine(engine, dev_priv, id) {
+ guc->preempt_work[id].engine = engine;
+ INIT_WORK(&guc->preempt_work[id].work, inject_preempt_context);
+ }
+
return 0;
-err_wq:
- guc_preempt_work_destroy(guc);
err_log:
intel_guc_log_destroy(guc);
err_stage_desc_pool:
@@ -1279,8 +1235,14 @@ err_stage_desc_pool:
void intel_guc_submission_fini(struct intel_guc *guc)
{
+ struct drm_i915_private *dev_priv = guc_to_i915(guc);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ for_each_engine(engine, dev_priv, id)
+ cancel_work_sync(&guc->preempt_work[id].work);
+
guc_ads_destroy(guc);
- guc_preempt_work_destroy(guc);
intel_guc_log_destroy(guc);
guc_stage_desc_pool_destroy(guc);
}
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.h b/drivers/gpu/drm/i915/intel_guc_submission.h
index 021fe85c8f71..fb081cefef93 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.h
+++ b/drivers/gpu/drm/i915/intel_guc_submission.h
@@ -77,5 +77,7 @@ int intel_guc_submission_init(struct intel_guc *guc);
int intel_guc_submission_enable(struct intel_guc *guc);
void intel_guc_submission_disable(struct intel_guc *guc);
void intel_guc_submission_fini(struct intel_guc *guc);
+int intel_guc_preempt_work_create(struct intel_guc *guc);
+void intel_guc_preempt_work_destroy(struct intel_guc *guc);
#endif
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index 3040a0e00142..785850838a44 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -188,6 +188,32 @@ static void guc_disable_communication(struct intel_guc *guc)
guc->send = intel_guc_send_nop;
}
+int intel_uc_init_wq(struct drm_i915_private *dev_priv)
+{
+ int ret;
+
+ if (!USES_GUC(dev_priv))
+ return 0;
+
+ ret = intel_guc_init_wq(&dev_priv->guc);
+ if (ret) {
+ DRM_ERROR("Couldn't allocate workqueues for GuC\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+void intel_uc_fini_wq(struct drm_i915_private *dev_priv)
+{
+ if (!USES_GUC(dev_priv))
+ return;
+
+ GEM_BUG_ON(!HAS_GUC(dev_priv));
+
+ intel_guc_fini_wq(&dev_priv->guc);
+}
+
int intel_uc_init_hw(struct drm_i915_private *dev_priv)
{
struct intel_guc *guc = &dev_priv->guc;
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index 7a59e2486e9e..53edfeaf56b0 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -33,6 +33,8 @@ void intel_uc_init_early(struct drm_i915_private *dev_priv);
void intel_uc_init_mmio(struct drm_i915_private *dev_priv);
void intel_uc_init_fw(struct drm_i915_private *dev_priv);
void intel_uc_fini_fw(struct drm_i915_private *dev_priv);
+int intel_uc_init_wq(struct drm_i915_private *dev_priv);
+void intel_uc_fini_wq(struct drm_i915_private *dev_priv);
int intel_uc_init_hw(struct drm_i915_private *dev_priv);
void intel_uc_fini_hw(struct drm_i915_private *dev_priv);