aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm
diff options
context:
space:
mode:
authorUmesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>2021-11-08 13:10:57 -0800
committerJohn Harrison <John.C.Harrison@Intel.com>2021-11-30 17:08:07 -0800
commit2a67b18e67f30b526ce69b7796a16d847e94e2df (patch)
tree0eadf690d47ac0745d2dd3d2c99e84abbb9c7712 /drivers/gpu/drm
parentdma_fence_array: Fix PENDING_ERROR leak in dma_fence_array_signaled() (diff)
downloadlinux-dev-2a67b18e67f30b526ce69b7796a16d847e94e2df.tar.xz
linux-dev-2a67b18e67f30b526ce69b7796a16d847e94e2df.zip
drm/i915/pmu: Fix synchronization of PMU callback with reset
Since the PMU callback runs in irq context, it synchronizes with gt reset using the reset count. We could run into a case where the PMU callback could read the reset count before it is updated. This has a potential of corrupting the busyness stats. In addition to the reset count, check if the reset bit is set before capturing busyness. In addition save the previous stats only if you intend to update them. v2: - The 2 reset counts captured in the PMU callback can end up being the same if they were captured right after the count is incremented in the reset flow. This can lead to a bad busyness state. Ensure that reset is not in progress when the initial reset count is captured. Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> Reviewed-by: Matthew Brost <matthew.brost@intel.com> Signed-off-by: John Harrison <John.C.Harrison@Intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20211108211057.68783-1-umesh.nerlige.ramappa@intel.com
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c17
1 files changed, 11 insertions, 6 deletions
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index a7108b38973e..1f9d4fde421f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1183,15 +1183,20 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
u64 total, gt_stamp_saved;
unsigned long flags;
u32 reset_count;
+ bool in_reset;
spin_lock_irqsave(&guc->timestamp.lock, flags);
/*
- * If a reset happened, we risk reading partially updated
- * engine busyness from GuC, so we just use the driver stored
- * copy of busyness. Synchronize with gt reset using reset_count.
+ * If a reset happened, we risk reading partially updated engine
+ * busyness from GuC, so we just use the driver stored copy of busyness.
+ * Synchronize with gt reset using reset_count and the
+ * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count
+ * after I915_RESET_BACKOFF flag, so ensure that the reset_count is
+ * usable by checking the flag afterwards.
*/
reset_count = i915_reset_count(gpu_error);
+ in_reset = test_bit(I915_RESET_BACKOFF, &gt->reset.flags);
*now = ktime_get();
@@ -1201,9 +1206,9 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
* start_gt_clk is derived from GuC state. To get a consistent
* view of activity, we query the GuC state only if gt is awake.
*/
- stats_saved = *stats;
- gt_stamp_saved = guc->timestamp.gt_stamp;
- if (intel_gt_pm_get_if_awake(gt)) {
+ if (intel_gt_pm_get_if_awake(gt) && !in_reset) {
+ stats_saved = *stats;
+ gt_stamp_saved = guc->timestamp.gt_stamp;
guc_update_engine_gt_clks(engine);
guc_update_pm_timestamp(guc, engine, now);
intel_gt_pm_put_async(gt);