aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c48
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c2
2 files changed, 48 insertions, 2 deletions
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index cae637fc3ead..f3dcae4b9d45 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1375,7 +1375,45 @@ static void guc_enable_busyness_worker(struct intel_guc *guc)
static void guc_cancel_busyness_worker(struct intel_guc *guc)
{
- cancel_delayed_work_sync(&guc->timestamp.work);
+ /*
+ * There are many different call stacks that can get here. Some of them
+ * hold the reset mutex. The busyness worker also attempts to acquire the
+ * reset mutex. Synchronously flushing a worker thread requires acquiring
+ * the worker mutex. Lockdep sees this as a conflict. It thinks that the
+ * flush can deadlock because it holds the worker mutex while waiting for
+ * the reset mutex, but another thread is holding the reset mutex and might
+ * attempt to use other worker functions.
+ *
+ * In practice, this scenario does not exist because the busyness worker
+ * does not block waiting for the reset mutex. It does a try-lock on it and
+ * immediately exits if the lock is already held. Unfortunately, the mutex
+ * in question (I915_RESET_BACKOFF) is an i915 implementation which has lockdep
+ * annotation but not to the extent of explaining the 'might lock' is also a
+ * 'does not need to lock'. So one option would be to add more complex lockdep
+ * annotations to ignore the issue (if at all possible). A simpler option is to
+ * just not flush synchronously when a rest in progress. Given that the worker
+ * will just early exit and re-schedule itself anyway, there is no advantage
+ * to running it immediately.
+ *
+ * If a reset is not in progress, then the synchronous flush may be required.
+ * As noted many call stacks lead here, some during suspend and driver unload
+ * which do require a synchronous flush to make sure the worker is stopped
+ * before memory is freed.
+ *
+ * Trying to pass a 'need_sync' or 'in_reset' flag all the way down through
+ * every possible call stack is unfeasible. It would be too intrusive to many
+ * areas that really don't care about the GuC backend. However, there is the
+ * 'reset_in_progress' flag available, so just use that.
+ *
+ * And note that in the case of a reset occurring during driver unload
+ * (wedge_on_fini), skipping the cancel in _prepare (when the reset flag is set
+ * is fine because there is another cancel in _finish (when the reset flag is
+ * not).
+ */
+ if (guc_to_gt(guc)->uc.reset_in_progress)
+ cancel_delayed_work(&guc->timestamp.work);
+ else
+ cancel_delayed_work_sync(&guc->timestamp.work);
}
static void __reset_guc_busyness_stats(struct intel_guc *guc)
@@ -1966,8 +2004,16 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc)
void intel_guc_submission_reset_finish(struct intel_guc *guc)
{
+ /*
+ * Ensure the busyness worker gets cancelled even on a fatal wedge.
+ * Note that reset_prepare is not allowed to because it confuses lockdep.
+ */
+ if (guc_submission_initialized(guc))
+ guc_cancel_busyness_worker(guc);
+
/* Reset called during driver load or during wedge? */
if (unlikely(!guc_submission_initialized(guc) ||
+ !intel_guc_is_fw_running(guc) ||
intel_gt_is_wedged(guc_to_gt(guc)))) {
return;
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index b8b09b1bee3e..6dfe5d9456c6 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -640,7 +640,7 @@ void intel_uc_reset_finish(struct intel_uc *uc)
uc->reset_in_progress = false;
/* Firmware expected to be running when this function is called */
- if (intel_guc_is_fw_running(guc) && intel_uc_uses_guc_submission(uc))
+ if (intel_uc_uses_guc_submission(uc))
intel_guc_submission_reset_finish(guc);
}