From 54fc577d90d01a28dbfccd277aa3de4d4abaf1ff Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 11 Sep 2019 17:07:30 +0100 Subject: drm/i915/pmu: Skip busyness sampling when and where not needed Since d0aa694b9239 ("drm/i915/pmu: Always sample an active ringbuffer") the cost of sampling the engine state on execlists platforms became a little bit higher when both engine busyness and one of the wait states are being monitored. (Previously the busyness sampling on legacy platforms was done via seqno comparison so there was no cost of mmio read.) We can avoid that by skipping busyness sampling when engine supports software busy stats and so avoid the cost of potential mmio read and sample accumulation. Signed-off-by: Tvrtko Ursulin Cc: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190911160730.22687-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_pmu.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_pmu.c') diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 8e251e719390..623ad32303a1 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -194,6 +194,10 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns) if (val & RING_WAIT_SEMAPHORE) add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns); + /* No need to sample when busy stats are supported. */ + if (intel_engine_supports_stats(engine)) + goto skip; + /* * While waiting on a semaphore or event, MI_MODE reports the * ring as idle. However, previously using the seqno, and with -- cgit v1.2.3-59-g8ed1b From 16ffe73c186b7897a1a6888ffb4949a8b4026624 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 12 Sep 2019 13:48:13 +0100 Subject: drm/i915/pmu: Use GT parked for estimating RC6 while asleep As we track when we put the GT device to sleep upon idling, we can use that callback to sample the current rc6 counters and record the timestamp for estimating samples after that point while asleep. v2: Stick to using ktime_t v3: Track user_wakerefs that interfere with the new intel_gt_pm_wait_for_idle v4: No need for parked/unparked estimation if !CONFIG_PM v5: Keep timer park/unpark logic as was v6: Refactor duplicated estimate/update rc6 logic v7: Pull intel_get_pm_get_if_awake() out from the pmu->lock. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105010 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190912124813.19225-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 22 +++ drivers/gpu/drm/i915/gt/intel_gt_types.h | 1 + drivers/gpu/drm/i915/i915_debugfs.c | 22 ++- drivers/gpu/drm/i915/i915_pmu.c | 242 +++++++++++++++++-------------- drivers/gpu/drm/i915/i915_pmu.h | 4 +- 5 files changed, 170 insertions(+), 121 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_pmu.c') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 3bd764104d41..a11ad4d914ca 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -141,6 +141,24 @@ bool i915_gem_load_power_context(struct drm_i915_private *i915) return switch_to_kernel_context_sync(&i915->gt); } +static void user_forcewake(struct intel_gt *gt, bool suspend) +{ + int count = atomic_read(>->user_wakeref); + + /* Inside suspend/resume so single threaded, no races to worry about. */ + if (likely(!count)) + return; + + intel_gt_pm_get(gt); + if (suspend) { + GEM_BUG_ON(count > atomic_read(>->wakeref.count)); + atomic_sub(count, >->wakeref.count); + } else { + atomic_add(count, >->wakeref.count); + } + intel_gt_pm_put(gt); +} + void i915_gem_suspend(struct drm_i915_private *i915) { GEM_TRACE("\n"); @@ -148,6 +166,8 @@ void i915_gem_suspend(struct drm_i915_private *i915) intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0); flush_workqueue(i915->wq); + user_forcewake(&i915->gt, true); + mutex_lock(&i915->drm.struct_mutex); /* @@ -259,6 +279,8 @@ void i915_gem_resume(struct drm_i915_private *i915) if (!i915_gem_load_power_context(i915)) goto err_wedged; + user_forcewake(&i915->gt, false); + out_unlock: intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); mutex_unlock(&i915->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index dc295c196d11..3039cef64b11 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -50,6 +50,7 @@ struct intel_gt { } timelines; struct intel_wakeref wakeref; + atomic_t user_wakeref; struct list_head closed_vma; spinlock_t closed_lock; /* guards the list of closed_vma */ diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e5835337f022..f3ae525b77c0 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3995,13 +3995,12 @@ static int i915_sseu_status(struct seq_file *m, void *unused) static int i915_forcewake_open(struct inode *inode, struct file *file) { struct drm_i915_private *i915 = inode->i_private; + struct intel_gt *gt = &i915->gt; - if (INTEL_GEN(i915) < 6) - return 0; - - file->private_data = - (void *)(uintptr_t)intel_runtime_pm_get(&i915->runtime_pm); - intel_uncore_forcewake_user_get(&i915->uncore); + atomic_inc(>->user_wakeref); + intel_gt_pm_get(gt); + if (INTEL_GEN(i915) >= 6) + intel_uncore_forcewake_user_get(gt->uncore); return 0; } @@ -4009,13 +4008,12 @@ static int i915_forcewake_open(struct inode *inode, struct file *file) static int i915_forcewake_release(struct inode *inode, struct file *file) { struct drm_i915_private *i915 = inode->i_private; + struct intel_gt *gt = &i915->gt; - if (INTEL_GEN(i915) < 6) - return 0; - - intel_uncore_forcewake_user_put(&i915->uncore); - intel_runtime_pm_put(&i915->runtime_pm, - (intel_wakeref_t)(uintptr_t)file->private_data); + if (INTEL_GEN(i915) >= 6) + intel_uncore_forcewake_user_put(&i915->uncore); + intel_gt_pm_put(gt); + atomic_dec(>->user_wakeref); return 0; } diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 623ad32303a1..3310353890fb 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -116,22 +116,124 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active) return enable; } -void i915_pmu_gt_parked(struct drm_i915_private *i915) +static u64 __get_rc6(const struct intel_gt *gt) { - struct i915_pmu *pmu = &i915->pmu; + struct drm_i915_private *i915 = gt->i915; + u64 val; - if (!pmu->base.event_init) - return; + val = intel_rc6_residency_ns(i915, + IS_VALLEYVIEW(i915) ? + VLV_GT_RENDER_RC6 : + GEN6_GT_GFX_RC6); + + if (HAS_RC6p(i915)) + val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p); + + if (HAS_RC6pp(i915)) + val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp); + + return val; +} + +#if IS_ENABLED(CONFIG_PM) + +static inline s64 ktime_since(const ktime_t kt) +{ + return ktime_to_ns(ktime_sub(ktime_get(), kt)); +} + +static u64 __pmu_estimate_rc6(struct i915_pmu *pmu) +{ + u64 val; - spin_lock_irq(&pmu->lock); /* - * Signal sampling timer to stop if only engine events are enabled and - * GPU went idle. + * We think we are runtime suspended. + * + * Report the delta from when the device was suspended to now, + * on top of the last known real value, as the approximated RC6 + * counter value. */ - pmu->timer_enabled = pmu_needs_timer(pmu, false); - spin_unlock_irq(&pmu->lock); + val = ktime_since(pmu->sleep_last); + val += pmu->sample[__I915_SAMPLE_RC6].cur; + + pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; + + return val; +} + +static u64 __pmu_update_rc6(struct i915_pmu *pmu, u64 val) +{ + /* + * If we are coming back from being runtime suspended we must + * be careful not to report a larger value than returned + * previously. + */ + if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { + pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0; + pmu->sample[__I915_SAMPLE_RC6].cur = val; + } else { + val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur; + } + + return val; +} + +static u64 get_rc6(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct i915_pmu *pmu = &i915->pmu; + unsigned long flags; + u64 val; + + val = 0; + if (intel_gt_pm_get_if_awake(gt)) { + val = __get_rc6(gt); + intel_gt_pm_put(gt); + } + + spin_lock_irqsave(&pmu->lock, flags); + + if (val) + val = __pmu_update_rc6(pmu, val); + else + val = __pmu_estimate_rc6(pmu); + + spin_unlock_irqrestore(&pmu->lock, flags); + + return val; +} + +static void park_rc6(struct drm_i915_private *i915) +{ + struct i915_pmu *pmu = &i915->pmu; + + if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY)) + __pmu_update_rc6(pmu, __get_rc6(&i915->gt)); + + pmu->sleep_last = ktime_get(); +} + +static void unpark_rc6(struct drm_i915_private *i915) +{ + struct i915_pmu *pmu = &i915->pmu; + + /* Estimate how long we slept and accumulate that into rc6 counters */ + if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY)) + __pmu_estimate_rc6(pmu); +} + +#else + +static u64 get_rc6(struct intel_gt *gt) +{ + return __get_rc6(gt); } +static void park_rc6(struct drm_i915_private *i915) {} +static void unpark_rc6(struct drm_i915_private *i915) {} + +#endif + static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu) { if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) { @@ -143,6 +245,26 @@ static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu) } } +void i915_pmu_gt_parked(struct drm_i915_private *i915) +{ + struct i915_pmu *pmu = &i915->pmu; + + if (!pmu->base.event_init) + return; + + spin_lock_irq(&pmu->lock); + + park_rc6(i915); + + /* + * Signal sampling timer to stop if only engine events are enabled and + * GPU went idle. + */ + pmu->timer_enabled = pmu_needs_timer(pmu, false); + + spin_unlock_irq(&pmu->lock); +} + void i915_pmu_gt_unparked(struct drm_i915_private *i915) { struct i915_pmu *pmu = &i915->pmu; @@ -151,10 +273,14 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915) return; spin_lock_irq(&pmu->lock); + /* * Re-enable sampling timer when GPU goes active. */ __i915_pmu_maybe_start_timer(pmu); + + unpark_rc6(i915); + spin_unlock_irq(&pmu->lock); } @@ -430,104 +556,6 @@ static int i915_pmu_event_init(struct perf_event *event) return 0; } -static u64 __get_rc6(struct intel_gt *gt) -{ - struct drm_i915_private *i915 = gt->i915; - u64 val; - - val = intel_rc6_residency_ns(i915, - IS_VALLEYVIEW(i915) ? - VLV_GT_RENDER_RC6 : - GEN6_GT_GFX_RC6); - - if (HAS_RC6p(i915)) - val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p); - - if (HAS_RC6pp(i915)) - val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp); - - return val; -} - -static u64 get_rc6(struct intel_gt *gt) -{ -#if IS_ENABLED(CONFIG_PM) - struct drm_i915_private *i915 = gt->i915; - struct intel_runtime_pm *rpm = &i915->runtime_pm; - struct i915_pmu *pmu = &i915->pmu; - intel_wakeref_t wakeref; - unsigned long flags; - u64 val; - - wakeref = intel_runtime_pm_get_if_in_use(rpm); - if (wakeref) { - val = __get_rc6(gt); - intel_runtime_pm_put(rpm, wakeref); - - /* - * If we are coming back from being runtime suspended we must - * be careful not to report a larger value than returned - * previously. - */ - - spin_lock_irqsave(&pmu->lock, flags); - - if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { - pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0; - pmu->sample[__I915_SAMPLE_RC6].cur = val; - } else { - val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur; - } - - spin_unlock_irqrestore(&pmu->lock, flags); - } else { - struct device *kdev = rpm->kdev; - - /* - * We are runtime suspended. - * - * Report the delta from when the device was suspended to now, - * on top of the last known real value, as the approximated RC6 - * counter value. - */ - spin_lock_irqsave(&pmu->lock, flags); - - /* - * After the above branch intel_runtime_pm_get_if_in_use failed - * to get the runtime PM reference we cannot assume we are in - * runtime suspend since we can either: a) race with coming out - * of it before we took the power.lock, or b) there are other - * states than suspended which can bring us here. - * - * We need to double-check that we are indeed currently runtime - * suspended and if not we cannot do better than report the last - * known RC6 value. - */ - if (pm_runtime_status_suspended(kdev)) { - val = pm_runtime_suspended_time(kdev); - - if (!pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) - pmu->suspended_time_last = val; - - val -= pmu->suspended_time_last; - val += pmu->sample[__I915_SAMPLE_RC6].cur; - - pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; - } else if (pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { - val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur; - } else { - val = pmu->sample[__I915_SAMPLE_RC6].cur; - } - - spin_unlock_irqrestore(&pmu->lock, flags); - } - - return val; -#else - return __get_rc6(gt); -#endif -} - static u64 __i915_pmu_event_read(struct perf_event *event) { struct drm_i915_private *i915 = diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 4fc4f2478301..067dbbf3bdff 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -97,9 +97,9 @@ struct i915_pmu { */ struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS]; /** - * @suspended_time_last: Cached suspend time from PM core. + * @sleep_last: Last time GT parked for RC6 estimation. */ - u64 suspended_time_last; + ktime_t sleep_last; /** * @i915_attr: Memory block holding device attributes. */ -- cgit v1.2.3-59-g8ed1b From c113236718e89561f309705f1a78126b3df93a21 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Fri, 27 Sep 2019 12:08:49 +0100 Subject: drm/i915: Extract GT render sleep (rc6) management Continuing the theme of breaking intel_pm.c up in a reasonable chunk of powermanagement utilities, pull out the rc6 setup into its GT handler. Based on a patch by Chris Wilson. Signed-off-by: Andi Shyti Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190919143840.20384-1-andi.shyti@intel.com Link: https://patchwork.freedesktop.org/patch/msgid/20190927110849.28734-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gem/i915_gem_pm.c | 2 +- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 1 + drivers/gpu/drm/i915/gt/intel_gt.c | 5 +- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 92 ++- drivers/gpu/drm/i915/gt/intel_gt_pm.h | 11 +- drivers/gpu/drm/i915/gt/intel_gt_types.h | 3 + drivers/gpu/drm/i915/gt/intel_rc6.c | 712 ++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_rc6.h | 25 + drivers/gpu/drm/i915/gt/intel_rc6_types.h | 28 + drivers/gpu/drm/i915/gt/selftest_gt_pm.c | 50 ++ drivers/gpu/drm/i915/i915_debugfs.c | 11 +- drivers/gpu/drm/i915/i915_drv.h | 7 - drivers/gpu/drm/i915/i915_pmu.c | 9 +- drivers/gpu/drm/i915/i915_sysfs.c | 4 +- drivers/gpu/drm/i915/intel_pm.c | 794 +-------------------- drivers/gpu/drm/i915/intel_pm.h | 3 - .../gpu/drm/i915/selftests/i915_live_selftests.h | 1 + 18 files changed, 916 insertions(+), 843 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_rc6.c create mode 100644 drivers/gpu/drm/i915/gt/intel_rc6.h create mode 100644 drivers/gpu/drm/i915/gt/intel_rc6_types.h create mode 100644 drivers/gpu/drm/i915/gt/selftest_gt_pm.c (limited to 'drivers/gpu/drm/i915/i915_pmu.c') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 6313e7b4bd78..8d407ff7d59d 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -85,6 +85,7 @@ gt-y += \ gt/intel_gt_pm_irq.o \ gt/intel_hangcheck.o \ gt/intel_lrc.o \ + gt/intel_rc6.o \ gt/intel_renderstate.o \ gt/intel_reset.o \ gt/intel_ringbuffer.o \ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index a11ad4d914ca..cca192ecff8b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -137,7 +137,6 @@ static bool switch_to_kernel_context_sync(struct intel_gt *gt) bool i915_gem_load_power_context(struct drm_i915_private *i915) { - intel_gt_pm_enable(&i915->gt); return switch_to_kernel_context_sync(&i915->gt); } @@ -188,6 +187,7 @@ void i915_gem_suspend(struct drm_i915_private *i915) i915_gem_drain_freed_objects(i915); intel_uc_suspend(&i915->gt.uc); + intel_gt_suspend(&i915->gt); } static struct drm_i915_gem_object *first_mm_object(struct list_head *list) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index ce61c83d68e9..66fc49b76ea8 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -11,6 +11,7 @@ #include "intel_engine_pool.h" #include "intel_gt.h" #include "intel_gt_pm.h" +#include "intel_rc6.h" static int __engine_unpark(struct intel_wakeref *wf) { diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index eef9bdae8ebb..0e5909ee0657 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -7,6 +7,7 @@ #include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_mocs.h" +#include "intel_rc6.h" #include "intel_uncore.h" #include "intel_pm.h" @@ -369,6 +370,8 @@ int intel_gt_init(struct intel_gt *gt) if (err) return err; + intel_gt_pm_init(gt); + return 0; } @@ -387,8 +390,8 @@ void intel_gt_driver_release(struct intel_gt *gt) { /* Paranoia: make sure we have disabled everything before we exit. */ intel_gt_pm_disable(gt); + intel_gt_pm_fini(gt); - intel_cleanup_gt_powersave(gt->i915); intel_gt_fini_scratch(gt); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 2ccf8cacaa0a..42f175d9b98c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -11,6 +11,7 @@ #include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_pm.h" +#include "intel_rc6.h" #include "intel_wakeref.h" static void pm_notify(struct intel_gt *gt, int state) @@ -90,6 +91,16 @@ void intel_gt_pm_init_early(struct intel_gt *gt) BLOCKING_INIT_NOTIFIER_HEAD(>->pm_notifications); } +void intel_gt_pm_init(struct intel_gt *gt) +{ + /* + * Enabling power-management should be "self-healing". If we cannot + * enable a feature, simply leave it disabled with a notice to the + * user. + */ + intel_rc6_init(>->rc6); +} + static bool reset_engines(struct intel_gt *gt) { if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) @@ -124,40 +135,14 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force) __intel_engine_reset(engine, false); } -static bool is_mock_device(const struct intel_gt *gt) -{ - return I915_SELFTEST_ONLY(gt->awake == -1); -} - -void intel_gt_pm_enable(struct intel_gt *gt) +void intel_gt_pm_disable(struct intel_gt *gt) { - struct intel_engine_cs *engine; - enum intel_engine_id id; - - /* Powersaving is controlled by the host when inside a VM */ - if (intel_vgpu_active(gt->i915)) - return; - - if (is_mock_device(gt)) - return; - - intel_gt_pm_get(gt); - - for_each_engine(engine, gt->i915, id) { - intel_engine_pm_get(engine); - engine->serial++; /* force kernel context reload */ - intel_engine_pm_put(engine); - } - - intel_gt_pm_put(gt); + intel_sanitize_gt_powersave(gt->i915); } -void intel_gt_pm_disable(struct intel_gt *gt) +void intel_gt_pm_fini(struct intel_gt *gt) { - if (is_mock_device(gt)) - return; - - intel_sanitize_gt_powersave(gt->i915); + intel_rc6_fini(>->rc6); } int intel_gt_resume(struct intel_gt *gt) @@ -173,6 +158,9 @@ int intel_gt_resume(struct intel_gt *gt) * allowing us to fixup the user contexts on their first pin. */ intel_gt_pm_get(gt); + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); + intel_rc6_sanitize(>->rc6); + for_each_engine(engine, gt->i915, id) { struct intel_context *ce; @@ -197,11 +185,51 @@ int intel_gt_resume(struct intel_gt *gt) break; } } + + intel_rc6_enable(>->rc6); + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); intel_gt_pm_put(gt); return err; } +static void wait_for_idle(struct intel_gt *gt) +{ + mutex_lock(>->i915->drm.struct_mutex); /* XXX */ + do { + if (i915_gem_wait_for_idle(gt->i915, + I915_WAIT_LOCKED, + I915_GEM_IDLE_TIMEOUT) == -ETIME) { + /* XXX hide warning from gem_eio */ + if (i915_modparams.reset) { + dev_err(gt->i915->drm.dev, + "Failed to idle engines, declaring wedged!\n"); + GEM_TRACE_DUMP(); + } + + /* + * Forcibly cancel outstanding work and leave + * the gpu quiet. + */ + intel_gt_set_wedged(gt); + } + } while (i915_retire_requests(gt->i915)); + mutex_unlock(>->i915->drm.struct_mutex); + + intel_gt_pm_wait_for_idle(gt); +} + +void intel_gt_suspend(struct intel_gt *gt) +{ + intel_wakeref_t wakeref; + + /* We expect to be idle already; but also want to be independent */ + wait_for_idle(gt); + + with_intel_runtime_pm(>->i915->runtime_pm, wakeref) + intel_rc6_disable(>->rc6); +} + void intel_gt_runtime_suspend(struct intel_gt *gt) { intel_uc_runtime_suspend(>->uc); @@ -213,3 +241,7 @@ int intel_gt_runtime_resume(struct intel_gt *gt) return intel_uc_runtime_resume(>->uc); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_gt_pm.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index d1f3e2e23937..ab794e853356 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -43,12 +43,21 @@ static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) } void intel_gt_pm_init_early(struct intel_gt *gt); -void intel_gt_pm_enable(struct intel_gt *gt); +void intel_gt_pm_init(struct intel_gt *gt); void intel_gt_pm_disable(struct intel_gt *gt); +void intel_gt_pm_fini(struct intel_gt *gt); void intel_gt_sanitize(struct intel_gt *gt, bool force); + int intel_gt_resume(struct intel_gt *gt); +void intel_gt_suspend(struct intel_gt *gt); + void intel_gt_runtime_suspend(struct intel_gt *gt); int intel_gt_runtime_resume(struct intel_gt *gt); +static inline bool is_mock_gt(const struct intel_gt *gt) +{ + return I915_SELFTEST_ONLY(gt->awake == -1); +} + #endif /* INTEL_GT_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index e64db4c13df6..7134f1319bbe 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -18,6 +18,7 @@ #include "i915_vma.h" #include "intel_engine_types.h" #include "intel_reset_types.h" +#include "intel_rc6_types.h" #include "intel_wakeref.h" struct drm_i915_private; @@ -67,6 +68,8 @@ struct intel_gt { */ intel_wakeref_t awake; + struct intel_rc6 rc6; + struct blocking_notifier_head pm_notifications; ktime_t last_init_time; diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c new file mode 100644 index 000000000000..d6167dd592e9 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -0,0 +1,712 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include + +#include "i915_drv.h" +#include "intel_gt.h" +#include "intel_gt_pm.h" +#include "intel_rc6.h" +#include "intel_sideband.h" + +/** + * DOC: RC6 + * + * RC6 is a special power stage which allows the GPU to enter an very + * low-voltage mode when idle, using down to 0V while at this stage. This + * stage is entered automatically when the GPU is idle when RC6 support is + * enabled, and as soon as new workload arises GPU wakes up automatically as + * well. + * + * There are different RC6 modes available in Intel GPU, which differentiate + * among each other with the latency required to enter and leave RC6 and + * voltage consumed by the GPU in different states. + * + * The combination of the following flags define which states GPU is allowed + * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and + * RC6pp is deepest RC6. Their support by hardware varies according to the + * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one + * which brings the most power savings; deeper states save more power, but + * require higher latency to switch to and wake up. + */ + +static struct intel_gt *rc6_to_gt(struct intel_rc6 *rc6) +{ + return container_of(rc6, struct intel_gt, rc6); +} + +static struct intel_uncore *rc6_to_uncore(struct intel_rc6 *rc) +{ + return rc6_to_gt(rc)->uncore; +} + +static struct drm_i915_private *rc6_to_i915(struct intel_rc6 *rc) +{ + return rc6_to_gt(rc)->i915; +} + +static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) +{ + intel_uncore_write_fw(uncore, reg, val); +} + +static void gen11_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* 2b: Program RC6 thresholds.*/ + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); + set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); + + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + for_each_engine(engine, rc6_to_gt(rc6)->i915, id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + + set(uncore, GUC_MAX_IDLE_COUNT, 0xA); + + set(uncore, GEN6_RC_SLEEP, 0); + + set(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ + + /* + * 2c: Program Coarse Power Gating Policies. + * + * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we + * use instead is a more conservative estimate for the maximum time + * it takes us to service a CS interrupt and submit a new ELSP - that + * is the time which the GPU is idle waiting for the CPU to select the + * next request to execute. If the idle hysteresis is less than that + * interrupt service latency, the hardware will automatically gate + * the power well and we will then incur the wake up cost on top of + * the service latency. A similar guide from plane_state is that we + * do not want the enable hysteresis to less than the wakeup latency. + * + * igt/gem_exec_nop/sequential provides a rough estimate for the + * service latency, and puts it around 10us for Broadwell (and other + * big core) and around 40us for Broxton (and other low power cores). + * [Note that for legacy ringbuffer submission, this is less than 1us!] + * However, the wakeup latency on Broxton is closer to 100us. To be + * conservative, we have to factor in a context switch on top (due + * to ksoftirqd). + */ + set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250); + set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250); + + /* 3a: Enable RC6 */ + set(uncore, GEN6_RC_CONTROL, + GEN6_RC_CTL_HW_ENABLE | + GEN6_RC_CTL_RC6_ENABLE | + GEN6_RC_CTL_EI_MODE(1)); + + set(uncore, GEN9_PG_ENABLE, + GEN9_RENDER_PG_ENABLE | + GEN9_MEDIA_PG_ENABLE | + GEN11_MEDIA_SAMPLER_PG_ENABLE); +} + +static void gen9_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + u32 rc6_mode; + + /* 2b: Program RC6 thresholds.*/ + if (INTEL_GEN(rc6_to_i915(rc6)) >= 10) { + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); + set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); + } else if (IS_SKYLAKE(rc6_to_i915(rc6))) { + /* + * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only + * when CPG is enabled + */ + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16); + } else { + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16); + } + + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + for_each_engine(engine, rc6_to_gt(rc6)->i915, id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + + set(uncore, GUC_MAX_IDLE_COUNT, 0xA); + + set(uncore, GEN6_RC_SLEEP, 0); + + /* + * 2c: Program Coarse Power Gating Policies. + * + * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we + * use instead is a more conservative estimate for the maximum time + * it takes us to service a CS interrupt and submit a new ELSP - that + * is the time which the GPU is idle waiting for the CPU to select the + * next request to execute. If the idle hysteresis is less than that + * interrupt service latency, the hardware will automatically gate + * the power well and we will then incur the wake up cost on top of + * the service latency. A similar guide from plane_state is that we + * do not want the enable hysteresis to less than the wakeup latency. + * + * igt/gem_exec_nop/sequential provides a rough estimate for the + * service latency, and puts it around 10us for Broadwell (and other + * big core) and around 40us for Broxton (and other low power cores). + * [Note that for legacy ringbuffer submission, this is less than 1us!] + * However, the wakeup latency on Broxton is closer to 100us. To be + * conservative, we have to factor in a context switch on top (due + * to ksoftirqd). + */ + set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250); + set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250); + + /* 3a: Enable RC6 */ + set(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ + + /* WaRsUseTimeoutMode:cnl (pre-prod) */ + if (IS_CNL_REVID(rc6_to_i915(rc6), CNL_REVID_A0, CNL_REVID_C0)) + rc6_mode = GEN7_RC_CTL_TO_MODE; + else + rc6_mode = GEN6_RC_CTL_EI_MODE(1); + + set(uncore, GEN6_RC_CONTROL, + GEN6_RC_CTL_HW_ENABLE | + GEN6_RC_CTL_RC6_ENABLE | + rc6_mode); + + set(uncore, GEN9_PG_ENABLE, + GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE); +} + +static void gen8_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* 2b: Program RC6 thresholds.*/ + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + for_each_engine(engine, rc6_to_gt(rc6)->i915, id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + set(uncore, GEN6_RC_SLEEP, 0); + set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ + + /* 3: Enable RC6 */ + set(uncore, GEN6_RC_CONTROL, + GEN6_RC_CTL_HW_ENABLE | + GEN7_RC_CTL_TO_MODE | + GEN6_RC_CTL_RC6_ENABLE); +} + +static void gen6_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + u32 rc6vids, rc6_mask; + int ret; + + set(uncore, GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16); + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30); + set(uncore, GEN6_RC6pp_WAKE_RATE_LIMIT, 30); + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); + + for_each_engine(engine, i915, id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + + set(uncore, GEN6_RC_SLEEP, 0); + set(uncore, GEN6_RC1e_THRESHOLD, 1000); + if (IS_IVYBRIDGE(i915)) + set(uncore, GEN6_RC6_THRESHOLD, 125000); + else + set(uncore, GEN6_RC6_THRESHOLD, 50000); + set(uncore, GEN6_RC6p_THRESHOLD, 150000); + set(uncore, GEN6_RC6pp_THRESHOLD, 64000); /* unused */ + + /* We don't use those on Haswell */ + rc6_mask = GEN6_RC_CTL_RC6_ENABLE; + if (HAS_RC6p(i915)) + rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; + if (HAS_RC6pp(i915)) + rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; + set(uncore, GEN6_RC_CONTROL, + rc6_mask | + GEN6_RC_CTL_EI_MODE(1) | + GEN6_RC_CTL_HW_ENABLE); + + rc6vids = 0; + ret = sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS, + &rc6vids, NULL); + if (IS_GEN(i915, 6) && ret) { + DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n"); + } else if (IS_GEN(i915, 6) && + (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { + DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", + GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); + rc6vids &= 0xffff00; + rc6vids |= GEN6_ENCODE_RC6_VID(450); + ret = sandybridge_pcode_write(i915, GEN6_PCODE_WRITE_RC6VIDS, rc6vids); + if (ret) + DRM_ERROR("Couldn't fix incorrect rc6 voltage\n"); + } +} + +/* Check that the pcbr address is not empty. */ +static int chv_rc6_init(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + resource_size_t pctx_paddr, paddr; + resource_size_t pctx_size = 32 * SZ_1K; + u32 pcbr; + + pcbr = intel_uncore_read(uncore, VLV_PCBR); + if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) { + DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); + paddr = rc6_to_i915(rc6)->dsm.end + 1 - pctx_size; + GEM_BUG_ON(paddr > U32_MAX); + + pctx_paddr = (paddr & ~4095); + intel_uncore_write(uncore, VLV_PCBR, pctx_paddr); + } + + return 0; +} + +static int vlv_rc6_init(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct drm_i915_gem_object *pctx; + resource_size_t pctx_paddr; + resource_size_t pctx_size = 24 * SZ_1K; + u32 pcbr; + + pcbr = intel_uncore_read(uncore, VLV_PCBR); + if (pcbr) { + /* BIOS set it up already, grab the pre-alloc'd space */ + resource_size_t pcbr_offset; + + pcbr_offset = (pcbr & ~4095) - i915->dsm.start; + pctx = i915_gem_object_create_stolen_for_preallocated(i915, + pcbr_offset, + I915_GTT_OFFSET_NONE, + pctx_size); + if (!pctx) + return -ENOMEM; + + goto out; + } + + DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); + + /* + * From the Gunit register HAS: + * The Gfx driver is expected to program this register and ensure + * proper allocation within Gfx stolen memory. For example, this + * register should be programmed such than the PCBR range does not + * overlap with other ranges, such as the frame buffer, protected + * memory, or any other relevant ranges. + */ + pctx = i915_gem_object_create_stolen(i915, pctx_size); + if (!pctx) { + DRM_DEBUG("not enough stolen space for PCTX, disabling\n"); + return -ENOMEM; + } + + GEM_BUG_ON(range_overflows_t(u64, + i915->dsm.start, + pctx->stolen->start, + U32_MAX)); + pctx_paddr = i915->dsm.start + pctx->stolen->start; + intel_uncore_write(uncore, VLV_PCBR, pctx_paddr); + +out: + rc6->pctx = pctx; + return 0; +} + +static void chv_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* 2a: Program RC6 thresholds.*/ + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + + for_each_engine(engine, rc6_to_gt(rc6)->i915, id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + set(uncore, GEN6_RC_SLEEP, 0); + + /* TO threshold set to 500 us (0x186 * 1.28 us) */ + set(uncore, GEN6_RC6_THRESHOLD, 0x186); + + /* Allows RC6 residency counter to work */ + set(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | + VLV_MEDIA_RC6_COUNT_EN | + VLV_RENDER_RC6_COUNT_EN)); + + /* 3: Enable RC6 */ + set(uncore, GEN6_RC_CONTROL, GEN7_RC_CTL_TO_MODE); +} + +static void vlv_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); + + for_each_engine(engine, rc6_to_gt(rc6)->i915, id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + + set(uncore, GEN6_RC6_THRESHOLD, 0x557); + + /* Allows RC6 residency counter to work */ + set(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | + VLV_MEDIA_RC0_COUNT_EN | + VLV_RENDER_RC0_COUNT_EN | + VLV_MEDIA_RC6_COUNT_EN | + VLV_RENDER_RC6_COUNT_EN)); + + set(uncore, GEN6_RC_CONTROL, + GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL); +} + +static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct drm_i915_private *i915 = rc6_to_i915(rc6); + u32 rc6_ctx_base, rc_ctl, rc_sw_target; + bool enable_rc6 = true; + + rc_ctl = intel_uncore_read(uncore, GEN6_RC_CONTROL); + rc_sw_target = intel_uncore_read(uncore, GEN6_RC_STATE); + rc_sw_target &= RC_SW_TARGET_STATE_MASK; + rc_sw_target >>= RC_SW_TARGET_STATE_SHIFT; + DRM_DEBUG_DRIVER("BIOS enabled RC states: " + "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n", + onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE), + onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE), + rc_sw_target); + + if (!(intel_uncore_read(uncore, RC6_LOCATION) & RC6_CTX_IN_DRAM)) { + DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n"); + enable_rc6 = false; + } + + /* + * The exact context size is not known for BXT, so assume a page size + * for this check. + */ + rc6_ctx_base = + intel_uncore_read(uncore, RC6_CTX_BASE) & RC6_CTX_BASE_MASK; + if (!(rc6_ctx_base >= i915->dsm_reserved.start && + rc6_ctx_base + PAGE_SIZE < i915->dsm_reserved.end)) { + DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n"); + enable_rc6 = false; + } + + if (!((intel_uncore_read(uncore, PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1 && + (intel_uncore_read(uncore, PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1 && + (intel_uncore_read(uncore, PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1 && + (intel_uncore_read(uncore, PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1)) { + DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n"); + enable_rc6 = false; + } + + if (!intel_uncore_read(uncore, GEN8_PUSHBUS_CONTROL) || + !intel_uncore_read(uncore, GEN8_PUSHBUS_ENABLE) || + !intel_uncore_read(uncore, GEN8_PUSHBUS_SHIFT)) { + DRM_DEBUG_DRIVER("Pushbus not setup properly.\n"); + enable_rc6 = false; + } + + if (!intel_uncore_read(uncore, GEN6_GFXPAUSE)) { + DRM_DEBUG_DRIVER("GFX pause not setup properly.\n"); + enable_rc6 = false; + } + + if (!intel_uncore_read(uncore, GEN8_MISC_CTRL0)) { + DRM_DEBUG_DRIVER("GPM control not setup properly.\n"); + enable_rc6 = false; + } + + return enable_rc6; +} + +static bool rc6_supported(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + + if (!HAS_RC6(i915)) + return false; + + if (intel_vgpu_active(i915)) + return false; + + if (is_mock_gt(rc6_to_gt(rc6))) + return false; + + if (IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(rc6)) { + dev_notice(i915->drm.dev, + "RC6 and powersaving disabled by BIOS\n"); + return false; + } + + return true; +} + +static void rpm_get(struct intel_rc6 *rc6) +{ + GEM_BUG_ON(rc6->wakeref); + pm_runtime_get_sync(&rc6_to_i915(rc6)->drm.pdev->dev); + rc6->wakeref = true; +} + +static void rpm_put(struct intel_rc6 *rc6) +{ + GEM_BUG_ON(!rc6->wakeref); + pm_runtime_put(&rc6_to_i915(rc6)->drm.pdev->dev); + rc6->wakeref = false; +} + +static void __intel_rc6_disable(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_uncore *uncore = rc6_to_uncore(rc6); + + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + if (INTEL_GEN(i915) >= 9) + set(uncore, GEN9_PG_ENABLE, 0); + set(uncore, GEN6_RC_CONTROL, 0); + set(uncore, GEN6_RC_STATE, 0); + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); +} + +void intel_rc6_init(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + int err; + + /* Disable runtime-pm until we can save the GPU state with rc6 pctx */ + rpm_get(rc6); + + if (!rc6_supported(rc6)) + return; + + if (IS_CHERRYVIEW(i915)) + err = chv_rc6_init(rc6); + else if (IS_VALLEYVIEW(i915)) + err = vlv_rc6_init(rc6); + else + err = 0; + + /* Sanitize rc6, ensure it is disabled before we are ready. */ + __intel_rc6_disable(rc6); + + rc6->supported = err == 0; +} + +void intel_rc6_sanitize(struct intel_rc6 *rc6) +{ + if (rc6->supported) + __intel_rc6_disable(rc6); +} + +void intel_rc6_enable(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_uncore *uncore = rc6_to_uncore(rc6); + + if (!rc6->supported) + return; + + GEM_BUG_ON(rc6->enabled); + + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + if (IS_CHERRYVIEW(i915)) + chv_rc6_enable(rc6); + else if (IS_VALLEYVIEW(i915)) + vlv_rc6_enable(rc6); + else if (INTEL_GEN(i915) >= 11) + gen11_rc6_enable(rc6); + else if (INTEL_GEN(i915) >= 9) + gen9_rc6_enable(rc6); + else if (IS_BROADWELL(i915)) + gen8_rc6_enable(rc6); + else if (INTEL_GEN(i915) >= 6) + gen6_rc6_enable(rc6); + + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + + /* rc6 is ready, runtime-pm is go! */ + rpm_put(rc6); + rc6->enabled = true; +} + +void intel_rc6_disable(struct intel_rc6 *rc6) +{ + if (!rc6->enabled) + return; + + rpm_get(rc6); + rc6->enabled = false; + + __intel_rc6_disable(rc6); +} + +void intel_rc6_fini(struct intel_rc6 *rc6) +{ + struct drm_i915_gem_object *pctx; + + intel_rc6_disable(rc6); + + pctx = fetch_and_zero(&rc6->pctx); + if (pctx) + i915_gem_object_put(pctx); + + if (rc6->wakeref) + rpm_put(rc6); +} + +static u64 vlv_residency_raw(struct intel_uncore *uncore, const i915_reg_t reg) +{ + u32 lower, upper, tmp; + int loop = 2; + + /* + * The register accessed do not need forcewake. We borrow + * uncore lock to prevent concurrent access to range reg. + */ + lockdep_assert_held(&uncore->lock); + + /* + * vlv and chv residency counters are 40 bits in width. + * With a control bit, we can choose between upper or lower + * 32bit window into this counter. + * + * Although we always use the counter in high-range mode elsewhere, + * userspace may attempt to read the value before rc6 is initialised, + * before we have set the default VLV_COUNTER_CONTROL value. So always + * set the high bit to be safe. + */ + set(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); + upper = intel_uncore_read_fw(uncore, reg); + do { + tmp = upper; + + set(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH)); + lower = intel_uncore_read_fw(uncore, reg); + + set(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); + upper = intel_uncore_read_fw(uncore, reg); + } while (upper != tmp && --loop); + + /* + * Everywhere else we always use VLV_COUNTER_CONTROL with the + * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set + * now. + */ + + return lower | (u64)upper << 8; +} + +u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, const i915_reg_t reg) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_uncore *uncore = rc6_to_uncore(rc6); + u64 time_hw, prev_hw, overflow_hw; + unsigned int fw_domains; + unsigned long flags; + unsigned int i; + u32 mul, div; + + if (!rc6->supported) + return 0; + + /* + * Store previous hw counter values for counter wrap-around handling. + * + * There are only four interesting registers and they live next to each + * other so we can use the relative address, compared to the smallest + * one as the index into driver storage. + */ + i = (i915_mmio_reg_offset(reg) - + i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32); + if (WARN_ON_ONCE(i >= ARRAY_SIZE(rc6->cur_residency))) + return 0; + + fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ); + + spin_lock_irqsave(&uncore->lock, flags); + intel_uncore_forcewake_get__locked(uncore, fw_domains); + + /* On VLV and CHV, residency time is in CZ units rather than 1.28us */ + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + mul = 1000000; + div = i915->czclk_freq; + overflow_hw = BIT_ULL(40); + time_hw = vlv_residency_raw(uncore, reg); + } else { + /* 833.33ns units on Gen9LP, 1.28us elsewhere. */ + if (IS_GEN9_LP(i915)) { + mul = 10000; + div = 12; + } else { + mul = 1280; + div = 1; + } + + overflow_hw = BIT_ULL(32); + time_hw = intel_uncore_read_fw(uncore, reg); + } + + /* + * Counter wrap handling. + * + * But relying on a sufficient frequency of queries otherwise counters + * can still wrap. + */ + prev_hw = rc6->prev_hw_residency[i]; + rc6->prev_hw_residency[i] = time_hw; + + /* RC6 delta from last sample. */ + if (time_hw >= prev_hw) + time_hw -= prev_hw; + else + time_hw += overflow_hw - prev_hw; + + /* Add delta to RC6 extended raw driver copy. */ + time_hw += rc6->cur_residency[i]; + rc6->cur_residency[i] = time_hw; + + intel_uncore_forcewake_put__locked(uncore, fw_domains); + spin_unlock_irqrestore(&uncore->lock, flags); + + return mul_u64_u32_div(time_hw, mul, div); +} + +u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg) +{ + return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(rc6, reg), 1000); +} diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.h b/drivers/gpu/drm/i915/gt/intel_rc6.h new file mode 100644 index 000000000000..5e6711f36457 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rc6.h @@ -0,0 +1,25 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RC6_H +#define INTEL_RC6_H + +#include "i915_reg.h" + +struct intel_engine_cs; +struct intel_rc6; + +void intel_rc6_init(struct intel_rc6 *rc6); +void intel_rc6_fini(struct intel_rc6 *rc6); + +void intel_rc6_sanitize(struct intel_rc6 *rc6); +void intel_rc6_enable(struct intel_rc6 *rc6); +void intel_rc6_disable(struct intel_rc6 *rc6); + +u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, i915_reg_t reg); +u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg); + +#endif /* INTEL_RC6_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h new file mode 100644 index 000000000000..214f354d6ae4 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RC6_TYPES_H +#define INTEL_RC6_TYPES_H + +#include +#include + +#include "intel_engine_types.h" + +struct drm_i915_gem_object; + +struct intel_rc6 { + u64 prev_hw_residency[4]; + u64 cur_residency[4]; + + struct drm_i915_gem_object *pctx; + + bool supported : 1; + bool enabled : 1; + bool wakeref : 1; +}; + +#endif /* INTEL_RC6_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c new file mode 100644 index 000000000000..87985bd46423 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c @@ -0,0 +1,50 @@ + +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +static int live_gt_resume(void *arg) +{ + struct intel_gt *gt = arg; + IGT_TIMEOUT(end_time); + int err; + + /* Do several suspend/resume cycles to check we don't explode! */ + do { + intel_gt_suspend(gt); + + if (gt->rc6.enabled) { + pr_err("rc6 still enabled after suspend!\n"); + intel_gt_set_wedged_on_init(gt); + err = -EINVAL; + break; + } + + err = intel_gt_resume(gt); + if (err) + break; + + if (gt->rc6.supported && !gt->rc6.enabled) { + pr_err("rc6 not enabled upon resume!\n"); + intel_gt_set_wedged_on_init(gt); + err = -EINVAL; + break; + } + } while (!__igt_timeout(end_time, NULL)); + + return err; +} + +int intel_gt_pm_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_gt_resume), + }; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); +} diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index b5b449a88cf1..fec9fb7cc384 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -42,6 +42,7 @@ #include "gem/i915_gem_context.h" #include "gt/intel_gt_pm.h" #include "gt/intel_reset.h" +#include "gt/intel_rc6.h" #include "gt/uc/intel_guc_submission.h" #include "i915_debugfs.h" @@ -1168,11 +1169,13 @@ static void print_rc6_res(struct seq_file *m, const char *title, const i915_reg_t reg) { - struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_i915_private *i915 = node_to_i915(m->private); + intel_wakeref_t wakeref; - seq_printf(m, "%s %u (%llu us)\n", - title, I915_READ(reg), - intel_rc6_residency_us(dev_priv, reg)); + with_intel_runtime_pm(&i915->runtime_pm, wakeref) + seq_printf(m, "%s %u (%llu us)\n", title, + intel_uncore_read(&i915->uncore, reg), + intel_rc6_residency_us(&i915->gt.rc6, reg)); } static int vlv_drpc_info(struct seq_file *m) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b3c7dbc1832a..0e4e77373ee5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -599,19 +599,12 @@ struct intel_rps { struct intel_rps_ei ei; }; -struct intel_rc6 { - bool enabled; - u64 prev_hw_residency[4]; - u64 cur_residency[4]; -}; - struct intel_llc_pstate { bool enabled; }; struct intel_gen6_power_mgmt { struct intel_rps rps; - struct intel_rc6 rc6; struct intel_llc_pstate llc_pstate; }; diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 3310353890fb..d0508719492e 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -11,6 +11,7 @@ #include "gt/intel_engine_pm.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_rc6.h" #include "i915_drv.h" #include "i915_pmu.h" @@ -116,21 +117,21 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active) return enable; } -static u64 __get_rc6(const struct intel_gt *gt) +static u64 __get_rc6(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; u64 val; - val = intel_rc6_residency_ns(i915, + val = intel_rc6_residency_ns(>->rc6, IS_VALLEYVIEW(i915) ? VLV_GT_RENDER_RC6 : GEN6_GT_GFX_RC6); if (HAS_RC6p(i915)) - val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p); + val += intel_rc6_residency_ns(>->rc6, GEN6_GT_GFX_RC6p); if (HAS_RC6pp(i915)) - val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp); + val += intel_rc6_residency_ns(>->rc6, GEN6_GT_GFX_RC6pp); return val; } diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index d8a3b180c084..034b8abc5062 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -30,6 +30,8 @@ #include #include +#include "gt/intel_rc6.h" + #include "i915_drv.h" #include "i915_sysfs.h" #include "intel_pm.h" @@ -49,7 +51,7 @@ static u32 calc_residency(struct drm_i915_private *dev_priv, u64 res = 0; with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) - res = intel_rc6_residency_us(dev_priv, reg); + res = intel_rc6_residency_us(&dev_priv->gt.rc6, reg); return DIV_ROUND_CLOSEST_ULL(res, 1000); } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 6bed2ed14574..bfcf03ab5245 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -45,26 +45,6 @@ #include "intel_sideband.h" #include "../../../platform/x86/intel_ips.h" -/** - * DOC: RC6 - * - * RC6 is a special power stage which allows the GPU to enter an very - * low-voltage mode when idle, using down to 0V while at this stage. This - * stage is entered automatically when the GPU is idle when RC6 support is - * enabled, and as soon as new workload arises GPU wakes up automatically as well. - * - * There are different RC6 modes available in Intel GPU, which differentiate - * among each other with the latency required to enter and leave RC6 and - * voltage consumed by the GPU in different states. - * - * The combination of the following flags define which states GPU is allowed - * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and - * RC6pp is deepest RC6. Their support by hardware varies according to the - * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one - * which brings the most power savings; deeper states save more power, but - * require higher latency to switch to and wake up. - */ - static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) { if (HAS_LLC(dev_priv)) { @@ -6918,142 +6898,27 @@ int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) return err; } -static void gen9_disable_rc6(struct drm_i915_private *dev_priv) -{ - I915_WRITE(GEN6_RC_CONTROL, 0); - I915_WRITE(GEN9_PG_ENABLE, 0); -} - static void gen9_disable_rps(struct drm_i915_private *dev_priv) { I915_WRITE(GEN6_RP_CONTROL, 0); } -static void gen6_disable_rc6(struct drm_i915_private *dev_priv) -{ - I915_WRITE(GEN6_RC_CONTROL, 0); -} - static void gen6_disable_rps(struct drm_i915_private *dev_priv) { I915_WRITE(GEN6_RPNSWREQ, 1 << 31); I915_WRITE(GEN6_RP_CONTROL, 0); } -static void cherryview_disable_rc6(struct drm_i915_private *dev_priv) -{ - I915_WRITE(GEN6_RC_CONTROL, 0); -} - static void cherryview_disable_rps(struct drm_i915_private *dev_priv) { I915_WRITE(GEN6_RP_CONTROL, 0); } -static void valleyview_disable_rc6(struct drm_i915_private *dev_priv) -{ - /* We're doing forcewake before Disabling RC6, - * This what the BIOS expects when going into suspend */ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - I915_WRITE(GEN6_RC_CONTROL, 0); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - static void valleyview_disable_rps(struct drm_i915_private *dev_priv) { I915_WRITE(GEN6_RP_CONTROL, 0); } -static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv) -{ - bool enable_rc6 = true; - unsigned long rc6_ctx_base; - u32 rc_ctl; - int rc_sw_target; - - rc_ctl = I915_READ(GEN6_RC_CONTROL); - rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >> - RC_SW_TARGET_STATE_SHIFT; - DRM_DEBUG_DRIVER("BIOS enabled RC states: " - "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n", - onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE), - onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE), - rc_sw_target); - - if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) { - DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n"); - enable_rc6 = false; - } - - /* - * The exact context size is not known for BXT, so assume a page size - * for this check. - */ - rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK; - if (!((rc6_ctx_base >= dev_priv->dsm_reserved.start) && - (rc6_ctx_base + PAGE_SIZE < dev_priv->dsm_reserved.end))) { - DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n"); - enable_rc6 = false; - } - - if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) && - ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) && - ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) && - ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) { - DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n"); - enable_rc6 = false; - } - - if (!I915_READ(GEN8_PUSHBUS_CONTROL) || - !I915_READ(GEN8_PUSHBUS_ENABLE) || - !I915_READ(GEN8_PUSHBUS_SHIFT)) { - DRM_DEBUG_DRIVER("Pushbus not setup properly.\n"); - enable_rc6 = false; - } - - if (!I915_READ(GEN6_GFXPAUSE)) { - DRM_DEBUG_DRIVER("GFX pause not setup properly.\n"); - enable_rc6 = false; - } - - if (!I915_READ(GEN8_MISC_CTRL0)) { - DRM_DEBUG_DRIVER("GPM control not setup properly.\n"); - enable_rc6 = false; - } - - return enable_rc6; -} - -static bool sanitize_rc6(struct drm_i915_private *i915) -{ - struct intel_device_info *info = mkwrite_device_info(i915); - - /* Powersaving is controlled by the host when inside a VM */ - if (intel_vgpu_active(i915)) { - info->has_rc6 = 0; - info->has_rps = false; - } - - if (info->has_rc6 && - IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) { - DRM_INFO("RC6 disabled by BIOS\n"); - info->has_rc6 = 0; - } - - /* - * We assume that we do not have any deep rc6 levels if we don't have - * have the previous rc6 level supported, i.e. we use HAS_RC6() - * as the initial coarse check for rc6 in general, moving on to - * progressively finer/deeper levels. - */ - if (!info->has_rc6 && info->has_rc6p) - info->has_rc6p = 0; - - return info->has_rc6; -} - static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) { struct intel_rps *rps = &dev_priv->gt_pm.rps; @@ -7140,203 +7005,6 @@ static void gen9_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); } -static void gen11_enable_rc6(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - /* 1a: Software RC state - RC0 */ - I915_WRITE(GEN6_RC_STATE, 0); - - /* - * 1b: Get forcewake during program sequence. Although the driver - * hasn't enabled a state yet where we need forcewake, BIOS may have. - */ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* 2a: Disable RC states. */ - I915_WRITE(GEN6_RC_CONTROL, 0); - - /* 2b: Program RC6 thresholds.*/ - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); - I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150); - - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - - if (HAS_GT_UC(dev_priv)) - I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA); - - I915_WRITE(GEN6_RC_SLEEP, 0); - - I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ - - /* - * 2c: Program Coarse Power Gating Policies. - * - * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we - * use instead is a more conservative estimate for the maximum time - * it takes us to service a CS interrupt and submit a new ELSP - that - * is the time which the GPU is idle waiting for the CPU to select the - * next request to execute. If the idle hysteresis is less than that - * interrupt service latency, the hardware will automatically gate - * the power well and we will then incur the wake up cost on top of - * the service latency. A similar guide from plane_state is that we - * do not want the enable hysteresis to less than the wakeup latency. - * - * igt/gem_exec_nop/sequential provides a rough estimate for the - * service latency, and puts it around 10us for Broadwell (and other - * big core) and around 40us for Broxton (and other low power cores). - * [Note that for legacy ringbuffer submission, this is less than 1us!] - * However, the wakeup latency on Broxton is closer to 100us. To be - * conservative, we have to factor in a context switch on top (due - * to ksoftirqd). - */ - I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250); - I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250); - - /* 3a: Enable RC6 */ - I915_WRITE(GEN6_RC_CONTROL, - GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_RC6_ENABLE | - GEN6_RC_CTL_EI_MODE(1)); - - /* 3b: Enable Coarse Power Gating only when RC6 is enabled. */ - I915_WRITE(GEN9_PG_ENABLE, - GEN9_RENDER_PG_ENABLE | - GEN9_MEDIA_PG_ENABLE | - GEN11_MEDIA_SAMPLER_PG_ENABLE); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static void gen9_enable_rc6(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - u32 rc6_mode; - - /* 1a: Software RC state - RC0 */ - I915_WRITE(GEN6_RC_STATE, 0); - - /* 1b: Get forcewake during program sequence. Although the driver - * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* 2a: Disable RC states. */ - I915_WRITE(GEN6_RC_CONTROL, 0); - - /* 2b: Program RC6 thresholds.*/ - if (INTEL_GEN(dev_priv) >= 10) { - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); - I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150); - } else if (IS_SKYLAKE(dev_priv)) { - /* - * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only - * when CPG is enabled - */ - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16); - } else { - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16); - } - - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - - if (HAS_GT_UC(dev_priv)) - I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA); - - I915_WRITE(GEN6_RC_SLEEP, 0); - - /* - * 2c: Program Coarse Power Gating Policies. - * - * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we - * use instead is a more conservative estimate for the maximum time - * it takes us to service a CS interrupt and submit a new ELSP - that - * is the time which the GPU is idle waiting for the CPU to select the - * next request to execute. If the idle hysteresis is less than that - * interrupt service latency, the hardware will automatically gate - * the power well and we will then incur the wake up cost on top of - * the service latency. A similar guide from plane_state is that we - * do not want the enable hysteresis to less than the wakeup latency. - * - * igt/gem_exec_nop/sequential provides a rough estimate for the - * service latency, and puts it around 10us for Broadwell (and other - * big core) and around 40us for Broxton (and other low power cores). - * [Note that for legacy ringbuffer submission, this is less than 1us!] - * However, the wakeup latency on Broxton is closer to 100us. To be - * conservative, we have to factor in a context switch on top (due - * to ksoftirqd). - */ - I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250); - I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250); - - /* 3a: Enable RC6 */ - I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ - - /* WaRsUseTimeoutMode:cnl (pre-prod) */ - if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0)) - rc6_mode = GEN7_RC_CTL_TO_MODE; - else - rc6_mode = GEN6_RC_CTL_EI_MODE(1); - - I915_WRITE(GEN6_RC_CONTROL, - GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_RC6_ENABLE | - rc6_mode); - - /* - * 3b: Enable Coarse Power Gating only when RC6 is enabled. - * WaRsDisableCoarsePowerGating:skl,cnl - Render/Media PG need to be disabled with RC6. - */ - if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) - I915_WRITE(GEN9_PG_ENABLE, 0); - else - I915_WRITE(GEN9_PG_ENABLE, - GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static void gen8_enable_rc6(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - /* 1a: Software RC state - RC0 */ - I915_WRITE(GEN6_RC_STATE, 0); - - /* 1b: Get forcewake during program sequence. Although the driver - * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* 2a: Disable RC states. */ - I915_WRITE(GEN6_RC_CONTROL, 0); - - /* 2b: Program RC6 thresholds.*/ - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - I915_WRITE(GEN6_RC_SLEEP, 0); - I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ - - /* 3: Enable RC6 */ - - I915_WRITE(GEN6_RC_CONTROL, - GEN6_RC_CTL_HW_ENABLE | - GEN7_RC_CTL_TO_MODE | - GEN6_RC_CTL_RC6_ENABLE); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - static void gen8_enable_rps(struct drm_i915_private *dev_priv) { struct intel_rps *rps = &dev_priv->gt_pm.rps; @@ -7377,75 +7045,6 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); } -static void gen6_enable_rc6(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - u32 rc6vids, rc6_mask; - u32 gtfifodbg; - int ret; - - I915_WRITE(GEN6_RC_STATE, 0); - - /* Clear the DBG now so we don't confuse earlier errors */ - gtfifodbg = I915_READ(GTFIFODBG); - if (gtfifodbg) { - DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg); - I915_WRITE(GTFIFODBG, gtfifodbg); - } - - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* disable the counters and set deterministic thresholds */ - I915_WRITE(GEN6_RC_CONTROL, 0); - - I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16); - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30); - I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30); - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); - - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - - I915_WRITE(GEN6_RC_SLEEP, 0); - I915_WRITE(GEN6_RC1e_THRESHOLD, 1000); - if (IS_IVYBRIDGE(dev_priv)) - I915_WRITE(GEN6_RC6_THRESHOLD, 125000); - else - I915_WRITE(GEN6_RC6_THRESHOLD, 50000); - I915_WRITE(GEN6_RC6p_THRESHOLD, 150000); - I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ - - /* We don't use those on Haswell */ - rc6_mask = GEN6_RC_CTL_RC6_ENABLE; - if (HAS_RC6p(dev_priv)) - rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; - if (HAS_RC6pp(dev_priv)) - rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; - I915_WRITE(GEN6_RC_CONTROL, - rc6_mask | - GEN6_RC_CTL_EI_MODE(1) | - GEN6_RC_CTL_HW_ENABLE); - - rc6vids = 0; - ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, - &rc6vids, NULL); - if (IS_GEN(dev_priv, 6) && ret) { - DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n"); - } else if (IS_GEN(dev_priv, 6) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { - DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", - GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); - rc6vids &= 0xffff00; - rc6vids |= GEN6_ENCODE_RC6_VID(450); - ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids); - if (ret) - DRM_ERROR("Couldn't fix incorrect rc6 voltage\n"); - } - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - static void gen6_enable_rps(struct drm_i915_private *dev_priv) { /* Here begins a magic sequence of register writes to enable @@ -7662,100 +7261,6 @@ static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv) return max_t(u32, val, 0xc0); } -/* Check that the pctx buffer wasn't move under us. */ -static void valleyview_check_pctx(struct drm_i915_private *dev_priv) -{ - unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; - - WARN_ON(pctx_addr != dev_priv->dsm.start + - dev_priv->vlv_pctx->stolen->start); -} - - -/* Check that the pcbr address is not empty. */ -static void cherryview_check_pctx(struct drm_i915_private *dev_priv) -{ - unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; - - WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0); -} - -static void cherryview_setup_pctx(struct drm_i915_private *dev_priv) -{ - resource_size_t pctx_paddr, paddr; - resource_size_t pctx_size = 32*1024; - u32 pcbr; - - pcbr = I915_READ(VLV_PCBR); - if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) { - DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); - paddr = dev_priv->dsm.end + 1 - pctx_size; - GEM_BUG_ON(paddr > U32_MAX); - - pctx_paddr = (paddr & (~4095)); - I915_WRITE(VLV_PCBR, pctx_paddr); - } - - DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); -} - -static void valleyview_setup_pctx(struct drm_i915_private *dev_priv) -{ - struct drm_i915_gem_object *pctx; - resource_size_t pctx_paddr; - resource_size_t pctx_size = 24*1024; - u32 pcbr; - - pcbr = I915_READ(VLV_PCBR); - if (pcbr) { - /* BIOS set it up already, grab the pre-alloc'd space */ - resource_size_t pcbr_offset; - - pcbr_offset = (pcbr & (~4095)) - dev_priv->dsm.start; - pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv, - pcbr_offset, - I915_GTT_OFFSET_NONE, - pctx_size); - goto out; - } - - DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); - - /* - * From the Gunit register HAS: - * The Gfx driver is expected to program this register and ensure - * proper allocation within Gfx stolen memory. For example, this - * register should be programmed such than the PCBR range does not - * overlap with other ranges, such as the frame buffer, protected - * memory, or any other relevant ranges. - */ - pctx = i915_gem_object_create_stolen(dev_priv, pctx_size); - if (!pctx) { - DRM_DEBUG("not enough stolen space for PCTX, disabling\n"); - goto out; - } - - GEM_BUG_ON(range_overflows_t(u64, - dev_priv->dsm.start, - pctx->stolen->start, - U32_MAX)); - pctx_paddr = dev_priv->dsm.start + pctx->stolen->start; - I915_WRITE(VLV_PCBR, pctx_paddr); - -out: - DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); - dev_priv->vlv_pctx = pctx; -} - -static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv) -{ - struct drm_i915_gem_object *pctx; - - pctx = fetch_and_zero(&dev_priv->vlv_pctx); - if (pctx) - i915_gem_object_put(pctx); -} - static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv) { dev_priv->gt_pm.rps.gpll_ref_freq = @@ -7772,8 +7277,6 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; - valleyview_setup_pctx(dev_priv); - vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_PUNIT) | BIT(VLV_IOSF_SB_NC) | @@ -7828,8 +7331,6 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; - cherryview_setup_pctx(dev_priv); - vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_PUNIT) | BIT(VLV_IOSF_SB_NC) | @@ -7880,64 +7381,6 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) "Odd GPU freq values\n"); } -static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv) -{ - valleyview_cleanup_pctx(dev_priv); -} - -static void cherryview_enable_rc6(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - u32 gtfifodbg, rc6_mode, pcbr; - - gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV | - GT_FIFO_FREE_ENTRIES_CHV); - if (gtfifodbg) { - DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", - gtfifodbg); - I915_WRITE(GTFIFODBG, gtfifodbg); - } - - cherryview_check_pctx(dev_priv); - - /* 1a & 1b: Get forcewake during program sequence. Although the driver - * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* Disable RC states. */ - I915_WRITE(GEN6_RC_CONTROL, 0); - - /* 2a: Program RC6 thresholds.*/ - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - I915_WRITE(GEN6_RC_SLEEP, 0); - - /* TO threshold set to 500 us ( 0x186 * 1.28 us) */ - I915_WRITE(GEN6_RC6_THRESHOLD, 0x186); - - /* Allows RC6 residency counter to work */ - I915_WRITE(VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | - VLV_MEDIA_RC6_COUNT_EN | - VLV_RENDER_RC6_COUNT_EN)); - - /* For now we assume BIOS is allocating and populating the PCBR */ - pcbr = I915_READ(VLV_PCBR); - - /* 3: Enable RC6 */ - rc6_mode = 0; - if (pcbr >> VLV_PCBR_ADDR_SHIFT) - rc6_mode = GEN7_RC_CTL_TO_MODE; - I915_WRITE(GEN6_RC_CONTROL, rc6_mode); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - static void cherryview_enable_rps(struct drm_i915_private *dev_priv) { u32 val; @@ -7982,49 +7425,6 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); } -static void valleyview_enable_rc6(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - u32 gtfifodbg; - - valleyview_check_pctx(dev_priv); - - gtfifodbg = I915_READ(GTFIFODBG); - if (gtfifodbg) { - DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", - gtfifodbg); - I915_WRITE(GTFIFODBG, gtfifodbg); - } - - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* Disable RC states. */ - I915_WRITE(GEN6_RC_CONTROL, 0); - - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); - - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - - I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); - - /* Allows RC6 residency counter to work */ - I915_WRITE(VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | - VLV_MEDIA_RC0_COUNT_EN | - VLV_RENDER_RC0_COUNT_EN | - VLV_MEDIA_RC6_COUNT_EN | - VLV_RENDER_RC6_COUNT_EN)); - - I915_WRITE(GEN6_RC_CONTROL, - GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - static void valleyview_enable_rps(struct drm_i915_private *dev_priv) { u32 val; @@ -8551,14 +7951,9 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) { struct intel_rps *rps = &dev_priv->gt_pm.rps; - /* - * RPM depends on RC6 to save restore the GT HW context, so make RC6 a - * requirement. - */ - if (!sanitize_rc6(dev_priv)) { - DRM_INFO("RC6 disabled, disabling runtime PM support\n"); - pm_runtime_get(&dev_priv->drm.pdev->dev); - } + /* Powersaving is controlled by the host when inside a VM */ + if (intel_vgpu_active(dev_priv)) + mkwrite_device_info(dev_priv)->has_rps = false; /* Initialize RPS limits (for userspace) */ if (IS_CHERRYVIEW(dev_priv)) @@ -8593,19 +7988,9 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) rps->cur_freq = rps->idle_freq; } -void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) -{ - if (IS_VALLEYVIEW(dev_priv)) - valleyview_cleanup_gt_powersave(dev_priv); - - if (!HAS_RC6(dev_priv)) - pm_runtime_put(&dev_priv->drm.pdev->dev); -} - void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) { dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */ - dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */ intel_disable_gt_powersave(dev_priv); if (INTEL_GEN(dev_priv) >= 11) @@ -8626,25 +8011,6 @@ static inline void intel_disable_llc_pstate(struct drm_i915_private *i915) i915->gt_pm.llc_pstate.enabled = false; } -static void intel_disable_rc6(struct drm_i915_private *dev_priv) -{ - lockdep_assert_held(&dev_priv->gt_pm.rps.lock); - - if (!dev_priv->gt_pm.rc6.enabled) - return; - - if (INTEL_GEN(dev_priv) >= 9) - gen9_disable_rc6(dev_priv); - else if (IS_CHERRYVIEW(dev_priv)) - cherryview_disable_rc6(dev_priv); - else if (IS_VALLEYVIEW(dev_priv)) - valleyview_disable_rc6(dev_priv); - else if (INTEL_GEN(dev_priv) >= 6) - gen6_disable_rc6(dev_priv); - - dev_priv->gt_pm.rc6.enabled = false; -} - static void intel_disable_rps(struct drm_i915_private *dev_priv) { lockdep_assert_held(&dev_priv->gt_pm.rps.lock); @@ -8670,8 +8036,6 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) { mutex_lock(&dev_priv->gt_pm.rps.lock); - intel_disable_rc6(dev_priv); - intel_disable_rps(dev_priv); if (HAS_LLC(dev_priv)) intel_disable_llc_pstate(dev_priv); @@ -8691,29 +8055,6 @@ static inline void intel_enable_llc_pstate(struct drm_i915_private *i915) i915->gt_pm.llc_pstate.enabled = true; } -static void intel_enable_rc6(struct drm_i915_private *dev_priv) -{ - lockdep_assert_held(&dev_priv->gt_pm.rps.lock); - - if (dev_priv->gt_pm.rc6.enabled) - return; - - if (IS_CHERRYVIEW(dev_priv)) - cherryview_enable_rc6(dev_priv); - else if (IS_VALLEYVIEW(dev_priv)) - valleyview_enable_rc6(dev_priv); - else if (INTEL_GEN(dev_priv) >= 11) - gen11_enable_rc6(dev_priv); - else if (INTEL_GEN(dev_priv) >= 9) - gen9_enable_rc6(dev_priv); - else if (IS_BROADWELL(dev_priv)) - gen8_enable_rc6(dev_priv); - else if (INTEL_GEN(dev_priv) >= 6) - gen6_enable_rc6(dev_priv); - - dev_priv->gt_pm.rc6.enabled = true; -} - static void intel_enable_rps(struct drm_i915_private *dev_priv) { struct intel_rps *rps = &dev_priv->gt_pm.rps; @@ -8755,8 +8096,6 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->gt_pm.rps.lock); - if (HAS_RC6(dev_priv)) - intel_enable_rc6(dev_priv); if (HAS_RPS(dev_priv)) intel_enable_rps(dev_priv); if (HAS_LLC(dev_priv)) @@ -9818,133 +9157,6 @@ void intel_pm_setup(struct drm_i915_private *dev_priv) atomic_set(&dev_priv->runtime_pm.wakeref_count, 0); } -static u64 vlv_residency_raw(struct drm_i915_private *dev_priv, - const i915_reg_t reg) -{ - u32 lower, upper, tmp; - int loop = 2; - - /* - * The register accessed do not need forcewake. We borrow - * uncore lock to prevent concurrent access to range reg. - */ - lockdep_assert_held(&dev_priv->uncore.lock); - - /* - * vlv and chv residency counters are 40 bits in width. - * With a control bit, we can choose between upper or lower - * 32bit window into this counter. - * - * Although we always use the counter in high-range mode elsewhere, - * userspace may attempt to read the value before rc6 is initialised, - * before we have set the default VLV_COUNTER_CONTROL value. So always - * set the high bit to be safe. - */ - I915_WRITE_FW(VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); - upper = I915_READ_FW(reg); - do { - tmp = upper; - - I915_WRITE_FW(VLV_COUNTER_CONTROL, - _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH)); - lower = I915_READ_FW(reg); - - I915_WRITE_FW(VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); - upper = I915_READ_FW(reg); - } while (upper != tmp && --loop); - - /* - * Everywhere else we always use VLV_COUNTER_CONTROL with the - * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set - * now. - */ - - return lower | (u64)upper << 8; -} - -u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv, - const i915_reg_t reg) -{ - struct intel_uncore *uncore = &dev_priv->uncore; - u64 time_hw, prev_hw, overflow_hw; - unsigned int fw_domains; - unsigned long flags; - unsigned int i; - u32 mul, div; - - if (!HAS_RC6(dev_priv)) - return 0; - - /* - * Store previous hw counter values for counter wrap-around handling. - * - * There are only four interesting registers and they live next to each - * other so we can use the relative address, compared to the smallest - * one as the index into driver storage. - */ - i = (i915_mmio_reg_offset(reg) - - i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32); - if (WARN_ON_ONCE(i >= ARRAY_SIZE(dev_priv->gt_pm.rc6.cur_residency))) - return 0; - - fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ); - - spin_lock_irqsave(&uncore->lock, flags); - intel_uncore_forcewake_get__locked(uncore, fw_domains); - - /* On VLV and CHV, residency time is in CZ units rather than 1.28us */ - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - mul = 1000000; - div = dev_priv->czclk_freq; - overflow_hw = BIT_ULL(40); - time_hw = vlv_residency_raw(dev_priv, reg); - } else { - /* 833.33ns units on Gen9LP, 1.28us elsewhere. */ - if (IS_GEN9_LP(dev_priv)) { - mul = 10000; - div = 12; - } else { - mul = 1280; - div = 1; - } - - overflow_hw = BIT_ULL(32); - time_hw = intel_uncore_read_fw(uncore, reg); - } - - /* - * Counter wrap handling. - * - * But relying on a sufficient frequency of queries otherwise counters - * can still wrap. - */ - prev_hw = dev_priv->gt_pm.rc6.prev_hw_residency[i]; - dev_priv->gt_pm.rc6.prev_hw_residency[i] = time_hw; - - /* RC6 delta from last sample. */ - if (time_hw >= prev_hw) - time_hw -= prev_hw; - else - time_hw += overflow_hw - prev_hw; - - /* Add delta to RC6 extended raw driver copy. */ - time_hw += dev_priv->gt_pm.rc6.cur_residency[i]; - dev_priv->gt_pm.rc6.cur_residency[i] = time_hw; - - intel_uncore_forcewake_put__locked(uncore, fw_domains); - spin_unlock_irqrestore(&uncore->lock, flags); - - return mul_u64_u32_div(time_hw, mul, div); -} - -u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, - i915_reg_t reg) -{ - return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(dev_priv, reg), 1000); -} - u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat) { u32 cagf; diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h index e3573e1e16e3..93d192d0610a 100644 --- a/drivers/gpu/drm/i915/intel_pm.h +++ b/drivers/gpu/drm/i915/intel_pm.h @@ -32,7 +32,6 @@ void intel_pm_setup(struct drm_i915_private *dev_priv); void intel_gpu_ips_init(struct drm_i915_private *dev_priv); void intel_gpu_ips_teardown(void); void intel_init_gt_powersave(struct drm_i915_private *dev_priv); -void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv); void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv); void intel_enable_gt_powersave(struct drm_i915_private *dev_priv); void intel_disable_gt_powersave(struct drm_i915_private *dev_priv); @@ -72,8 +71,6 @@ void intel_enable_ipc(struct drm_i915_private *dev_priv); int intel_gpu_freq(struct drm_i915_private *dev_priv, int val); int intel_freq_opcode(struct drm_i915_private *dev_priv, int val); -u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv, i915_reg_t reg); -u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, i915_reg_t reg); u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat1); diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 66d83c1390c1..6713efea350b 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -16,6 +16,7 @@ selftest(gt_engines, intel_engine_live_selftests) selftest(gt_timelines, intel_timeline_live_selftests) selftest(gt_contexts, intel_context_live_selftests) selftest(gt_lrc, intel_lrc_live_selftests) +selftest(gt_pm, intel_gt_pm_live_selftests) selftest(requests, i915_request_live_selftests) selftest(active, i915_active_live_selftests) selftest(objects, i915_gem_object_live_selftests) -- cgit v1.2.3-59-g8ed1b From 05488673a4d41383f9dd537f298e525e6b00fb93 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 16 Oct 2019 10:38:02 +0100 Subject: drm/i915/pmu: Support multiple GPUs With discrete graphics system can have both integrated and discrete GPU handled by i915. Currently we use a fixed name ("i915") when registering as the uncore PMU provider which stops working in this case. To fix this we add the PCI device name string to non-integrated devices handled by us. Integrated devices keep the legacy name preserving backward compatibility. v2: * Detect IGP and keep legacy name. (Michal) * Use PCI device name as suffix. (Michal, Chris) v3: * Constify the name. (Chris) * Use pci_domain_nr. (Chris) v4: * Fix kfree_const usage. (Chris) v5: * kfree_const does not work for modules. (Chris) * Changed is_igp helper to take i915. Signed-off-by: Tvrtko Ursulin Cc: Chris Wilson Cc: Michal Wajdeczko Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191016093802.12483-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_pmu.c | 29 +++++++++++++++++++++++++++-- drivers/gpu/drm/i915/i915_pmu.h | 4 ++++ 2 files changed, 31 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_pmu.c') diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index d0508719492e..00fe401868c4 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -1080,6 +1080,17 @@ static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu) cpuhp_remove_multi_state(cpuhp_slot); } +static bool is_igp(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + + /* IGP is 0000:00:02.0 */ + return pci_domain_nr(pdev->bus) == 0 && + pdev->bus->number == 0 && + PCI_SLOT(pdev->devfn) == 2 && + PCI_FUNC(pdev->devfn) == 0; +} + void i915_pmu_register(struct drm_i915_private *i915) { struct i915_pmu *pmu = &i915->pmu; @@ -1110,10 +1121,19 @@ void i915_pmu_register(struct drm_i915_private *i915) hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); pmu->timer.function = i915_sample; - ret = perf_pmu_register(&pmu->base, "i915", -1); - if (ret) + if (!is_igp(i915)) + pmu->name = kasprintf(GFP_KERNEL, + "i915-%s", + dev_name(i915->drm.dev)); + else + pmu->name = "i915"; + if (!pmu->name) goto err; + ret = perf_pmu_register(&pmu->base, pmu->name, -1); + if (ret) + goto err_name; + ret = i915_pmu_register_cpuhp_state(pmu); if (ret) goto err_unreg; @@ -1122,6 +1142,9 @@ void i915_pmu_register(struct drm_i915_private *i915) err_unreg: perf_pmu_unregister(&pmu->base); +err_name: + if (!is_igp(i915)) + kfree(pmu->name); err: pmu->base.event_init = NULL; free_event_attributes(pmu); @@ -1143,5 +1166,7 @@ void i915_pmu_unregister(struct drm_i915_private *i915) perf_pmu_unregister(&pmu->base); pmu->base.event_init = NULL; + if (!is_igp(i915)) + kfree(pmu->name); free_event_attributes(pmu); } diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 067dbbf3bdff..bf52e3983631 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -46,6 +46,10 @@ struct i915_pmu { * @base: PMU base. */ struct pmu base; + /** + * @name: Name as registered with perf core. + */ + const char *name; /** * @lock: Lock protecting enable mask and ref count handling. */ -- cgit v1.2.3-59-g8ed1b From fb26eee0600dbd6b2eb20356b7d425cb71413ea2 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 18 Oct 2019 10:05:14 +0100 Subject: drm/i915/pmu: Fix uninitialized variable on error path If name allocation failed the log message will contain an uninitialized error code which can be confusing. Fixes: 05488673a4d4 ("drm/i915/pmu: Support multiple GPUs") Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191018090514.1818-1-tvrtko.ursulin@linux.intel.com [tursulin: Commit message spelling fix.] --- drivers/gpu/drm/i915/i915_pmu.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_pmu.c') diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 00fe401868c4..144c32eed045 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -1094,7 +1094,7 @@ static bool is_igp(struct drm_i915_private *i915) void i915_pmu_register(struct drm_i915_private *i915) { struct i915_pmu *pmu = &i915->pmu; - int ret; + int ret = -ENOMEM; if (INTEL_GEN(i915) <= 2) { dev_info(i915->drm.dev, "PMU not supported for this GPU."); @@ -1102,10 +1102,8 @@ void i915_pmu_register(struct drm_i915_private *i915) } i915_pmu_events_attr_group.attrs = create_event_attributes(pmu); - if (!i915_pmu_events_attr_group.attrs) { - ret = -ENOMEM; + if (!i915_pmu_events_attr_group.attrs) goto err; - } pmu->base.attr_groups = i915_pmu_attr_groups; pmu->base.task_ctx_nr = perf_invalid_context; -- cgit v1.2.3-59-g8ed1b From c6e07ada8eaa3ff66868a8bd6b1b9cdfa6d9fe21 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 18 Oct 2019 12:53:31 +0100 Subject: drm/i915/gt: Convert the leftover for_each_engine(gt) Use the local gt for iterating over the available set of engines. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191018115331.8980-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_rc6.c | 12 ++++++------ drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 6 +++--- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- drivers/gpu/drm/i915/i915_pmu.c | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_pmu.c') diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 71184aa72896..70f0e01a38b9 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -65,7 +65,7 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - for_each_engine(engine, rc6_to_gt(rc6)->i915, id) + for_each_engine(engine, rc6_to_gt(rc6), id) set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); set(uncore, GUC_MAX_IDLE_COUNT, 0xA); @@ -133,7 +133,7 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6) set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - for_each_engine(engine, rc6_to_gt(rc6)->i915, id) + for_each_engine(engine, rc6_to_gt(rc6), id) set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); set(uncore, GUC_MAX_IDLE_COUNT, 0xA); @@ -192,7 +192,7 @@ static void gen8_rc6_enable(struct intel_rc6 *rc6) set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - for_each_engine(engine, rc6_to_gt(rc6)->i915, id) + for_each_engine(engine, rc6_to_gt(rc6), id) set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); set(uncore, GEN6_RC_SLEEP, 0); set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ @@ -219,7 +219,7 @@ static void gen6_rc6_enable(struct intel_rc6 *rc6) set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); - for_each_engine(engine, i915, id) + for_each_engine(engine, rc6_to_gt(rc6), id) set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); set(uncore, GEN6_RC_SLEEP, 0); @@ -344,7 +344,7 @@ static void chv_rc6_enable(struct intel_rc6 *rc6) set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - for_each_engine(engine, rc6_to_gt(rc6)->i915, id) + for_each_engine(engine, rc6_to_gt(rc6), id) set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); set(uncore, GEN6_RC_SLEEP, 0); @@ -371,7 +371,7 @@ static void vlv_rc6_enable(struct intel_rc6 *rc6) set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); - for_each_engine(engine, rc6_to_gt(rc6)->i915, id) + for_each_engine(engine, rc6_to_gt(rc6), id) set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); set(uncore, GEN6_RC6_THRESHOLD, 0x557); diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index 311fdc0a21bc..bf631f15aa78 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1609,7 +1609,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) struct intel_engine_cs *signaller; *cs++ = MI_LOAD_REGISTER_IMM(num_engines); - for_each_engine(signaller, i915, id) { + for_each_engine(signaller, engine->gt, id) { if (signaller == engine) continue; @@ -1663,7 +1663,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) i915_reg_t last_reg = {}; /* keep gcc quiet */ *cs++ = MI_LOAD_REGISTER_IMM(num_engines); - for_each_engine(signaller, i915, id) { + for_each_engine(signaller, engine->gt, id) { if (signaller == engine) continue; @@ -1676,7 +1676,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) /* Insert a delay before the next switch! */ *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; *cs++ = i915_mmio_reg_offset(last_reg); - *cs++ = intel_gt_scratch_offset(rq->engine->gt, + *cs++ = intel_gt_scratch_offset(engine->gt, INTEL_GT_SCRATCH_FIELD_DEFAULT); *cs++ = MI_NOOP; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 0df057838a24..3148d5946b63 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1569,7 +1569,7 @@ static void gen7_ppgtt_enable(struct intel_gt *gt) } intel_uncore_write(uncore, GAM_ECOCHK, ecochk); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { /* GFX_MODE is per-ring on gen7+ */ ENGINE_WRITE(engine, RING_MODE_GEN7, diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 144c32eed045..85912917c062 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -301,7 +301,7 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns) if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0) return; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct intel_engine_pmu *pmu = &engine->pmu; unsigned long flags; bool busy; -- cgit v1.2.3-59-g8ed1b From c442292a661bec3a32cf2a351c53c5f07da20e21 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 25 Oct 2019 17:54:42 +0100 Subject: drm/i915/pmu: Initialise the spinlock before registering As the GT may be running in parallel with the module initialisation code, we may enter i915_pmu_gt_parked() as we are executing i915_pmu_register(). We have to init the spinlock before we mark pmu.event_init so that it is available for use by i915_pmu_gt_parked() (which may run as soon as event_init is set). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=112127 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191025165442.23356-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_pmu.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_pmu.c') diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 85912917c062..b5b67c0624ff 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -1101,20 +1101,6 @@ void i915_pmu_register(struct drm_i915_private *i915) return; } - i915_pmu_events_attr_group.attrs = create_event_attributes(pmu); - if (!i915_pmu_events_attr_group.attrs) - goto err; - - pmu->base.attr_groups = i915_pmu_attr_groups; - pmu->base.task_ctx_nr = perf_invalid_context; - pmu->base.event_init = i915_pmu_event_init; - pmu->base.add = i915_pmu_event_add; - pmu->base.del = i915_pmu_event_del; - pmu->base.start = i915_pmu_event_start; - pmu->base.stop = i915_pmu_event_stop; - pmu->base.read = i915_pmu_event_read; - pmu->base.event_idx = i915_pmu_event_event_idx; - spin_lock_init(&pmu->lock); hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); pmu->timer.function = i915_sample; @@ -1128,9 +1114,23 @@ void i915_pmu_register(struct drm_i915_private *i915) if (!pmu->name) goto err; + i915_pmu_events_attr_group.attrs = create_event_attributes(pmu); + if (!i915_pmu_events_attr_group.attrs) + goto err_name; + + pmu->base.attr_groups = i915_pmu_attr_groups; + pmu->base.task_ctx_nr = perf_invalid_context; + pmu->base.event_init = i915_pmu_event_init; + pmu->base.add = i915_pmu_event_add; + pmu->base.del = i915_pmu_event_del; + pmu->base.start = i915_pmu_event_start; + pmu->base.stop = i915_pmu_event_stop; + pmu->base.read = i915_pmu_event_read; + pmu->base.event_idx = i915_pmu_event_event_idx; + ret = perf_pmu_register(&pmu->base, pmu->name, -1); if (ret) - goto err_name; + goto err_attr; ret = i915_pmu_register_cpuhp_state(pmu); if (ret) @@ -1140,13 +1140,14 @@ void i915_pmu_register(struct drm_i915_private *i915) err_unreg: perf_pmu_unregister(&pmu->base); +err_attr: + pmu->base.event_init = NULL; + free_event_attributes(pmu); err_name: if (!is_igp(i915)) kfree(pmu->name); err: - pmu->base.event_init = NULL; - free_event_attributes(pmu); - DRM_NOTE("Failed to register PMU! (err=%d)\n", ret); + dev_notice(i915->drm.dev, "Failed to register PMU!\n"); } void i915_pmu_unregister(struct drm_i915_private *i915) -- cgit v1.2.3-59-g8ed1b From 3e7abf8141935ded77abeb622480bf4a14241ece Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Thu, 24 Oct 2019 22:16:41 +0100 Subject: drm/i915: Extract GT render power state management i915_irq.c is large. One reason for this is that has a large chunk of the GT render power management stashed away in it. Extract that logic out of i915_irq.c and intel_pm.c and put it under one roof. Based on a patch by Chris Wilson. Signed-off-by: Andi Shyti Cc: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191024211642.7688-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/display/intel_display.c | 8 +- drivers/gpu/drm/i915/gt/intel_gt.c | 13 +- drivers/gpu/drm/i915/gt/intel_gt_irq.c | 5 +- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 28 +- drivers/gpu/drm/i915/gt/intel_gt_pm.h | 1 - drivers/gpu/drm/i915/gt/intel_gt_types.h | 2 + drivers/gpu/drm/i915/gt/intel_llc.c | 2 +- drivers/gpu/drm/i915/gt/intel_rps.c | 1872 ++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_rps.h | 37 + drivers/gpu/drm/i915/gt/intel_rps_types.h | 93 + drivers/gpu/drm/i915/gt/selftest_llc.c | 7 +- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 +- drivers/gpu/drm/i915/i915_debugfs.c | 73 +- drivers/gpu/drm/i915/i915_drv.c | 3 - drivers/gpu/drm/i915/i915_drv.h | 96 - drivers/gpu/drm/i915/i915_gem.c | 3 +- drivers/gpu/drm/i915/i915_irq.c | 359 +--- drivers/gpu/drm/i915/i915_irq.h | 3 - drivers/gpu/drm/i915/i915_pmu.c | 10 +- drivers/gpu/drm/i915/i915_request.c | 7 +- drivers/gpu/drm/i915/i915_sysfs.c | 74 +- drivers/gpu/drm/i915/intel_pm.c | 2121 ++------------------- drivers/gpu/drm/i915/intel_pm.h | 22 - 24 files changed, 2330 insertions(+), 2514 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_rps.c create mode 100644 drivers/gpu/drm/i915/gt/intel_rps.h create mode 100644 drivers/gpu/drm/i915/gt/intel_rps_types.h (limited to 'drivers/gpu/drm/i915/i915_pmu.c') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 2f64db45f8e0..526f881325c8 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -95,6 +95,7 @@ gt-y += \ gt/intel_reset.o \ gt/intel_ring.o \ gt/intel_ring_submission.o \ + gt/intel_rps.o \ gt/intel_sseu.o \ gt/intel_timeline.o \ gt/intel_workarounds.o diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index cbf9cf30050c..0f0c582a56d5 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -55,6 +55,8 @@ #include "display/intel_tv.h" #include "display/intel_vdsc.h" +#include "gt/intel_rps.h" + #include "i915_drv.h" #include "i915_trace.h" #include "intel_acpi.h" @@ -14944,7 +14946,7 @@ static int do_rps_boost(struct wait_queue_entry *_wait, * vblank without our intervention, so leave RPS alone. */ if (!i915_request_started(rq)) - gen6_rps_boost(rq); + intel_rps_boost(rq); i915_request_put(rq); drm_crtc_vblank_put(wait->crtc); @@ -15138,7 +15140,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, * maximum clocks following a vblank miss (see do_rps_boost()). */ if (!intel_state->rps_interactive) { - intel_rps_mark_interactive(dev_priv, true); + intel_rps_mark_interactive(&dev_priv->gt.rps, true); intel_state->rps_interactive = true; } @@ -15163,7 +15165,7 @@ intel_cleanup_plane_fb(struct drm_plane *plane, struct drm_i915_private *dev_priv = to_i915(plane->dev); if (intel_state->rps_interactive) { - intel_rps_mark_interactive(dev_priv, false); + intel_rps_mark_interactive(&dev_priv->gt.rps, false); intel_state->rps_interactive = false; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 35127699591d..f2440113fdf8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -9,6 +9,7 @@ #include "intel_gt_requests.h" #include "intel_mocs.h" #include "intel_rc6.h" +#include "intel_rps.h" #include "intel_uncore.h" #include "intel_pm.h" @@ -31,9 +32,6 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) void intel_gt_init_hw_early(struct drm_i915_private *i915) { i915->gt.ggtt = &i915->ggtt; - - /* BIOS often leaves RC6 enabled, but disable it for hw init */ - intel_gt_pm_disable(&i915->gt); } static void init_unused_ring(struct intel_gt *gt, u32 base) @@ -320,8 +318,7 @@ void intel_gt_chipset_flush(struct intel_gt *gt) void intel_gt_driver_register(struct intel_gt *gt) { - if (IS_GEN(gt->i915, 5)) - intel_gpu_ips_init(gt->i915); + intel_rps_driver_register(>->rps); } static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) @@ -379,20 +376,16 @@ int intel_gt_init(struct intel_gt *gt) void intel_gt_driver_remove(struct intel_gt *gt) { GEM_BUG_ON(gt->awake); - intel_gt_pm_disable(gt); } void intel_gt_driver_unregister(struct intel_gt *gt) { - intel_gpu_ips_teardown(); + intel_rps_driver_unregister(>->rps); } void intel_gt_driver_release(struct intel_gt *gt) { - /* Paranoia: make sure we have disabled everything before we exit. */ - intel_gt_pm_disable(gt); intel_gt_pm_fini(gt); - intel_gt_fini_scratch(gt); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index 34a4fb624bf7..973ee7eded64 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -11,6 +11,7 @@ #include "intel_gt.h" #include "intel_gt_irq.h" #include "intel_uncore.h" +#include "intel_rps.h" static void guc_irq_handler(struct intel_guc *guc, u16 iir) { @@ -77,7 +78,7 @@ gen11_other_irq_handler(struct intel_gt *gt, const u8 instance, return guc_irq_handler(>->uc.guc, iir); if (instance == OTHER_GTPM_INSTANCE) - return gen11_rps_irq_handler(gt, iir); + return gen11_rps_irq_handler(>->rps, iir); WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n", instance, iir); @@ -336,7 +337,7 @@ void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4]) } if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) { - gen6_rps_irq_handler(gt->i915, gt_iir[2]); + gen6_rps_irq_handler(>->rps, gt_iir[2]); guc_irq_handler(>->uc.guc, gt_iir[2] >> 16); } } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index aff4eda47819..32becf15d4e0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -12,8 +12,10 @@ #include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" +#include "intel_llc.h" #include "intel_pm.h" #include "intel_rc6.h" +#include "intel_rps.h" #include "intel_wakeref.h" static int __gt_unpark(struct intel_wakeref *wf) @@ -39,12 +41,7 @@ static int __gt_unpark(struct intel_wakeref *wf) gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); GEM_BUG_ON(!gt->awake); - intel_enable_gt_powersave(i915); - - i915_update_gfx_val(i915); - if (INTEL_GEN(i915) >= 6) - gen6_rps_busy(i915); - + intel_rps_unpark(>->rps); i915_pmu_gt_unparked(i915); intel_gt_unpark_requests(gt); @@ -64,8 +61,7 @@ static int __gt_park(struct intel_wakeref *wf) i915_vma_parked(gt); i915_pmu_gt_parked(i915); - if (INTEL_GEN(i915) >= 6) - gen6_rps_idle(i915); + intel_rps_park(>->rps); /* Everything switched off, flush any residual interrupt just in case */ intel_synchronize_irq(i915); @@ -97,6 +93,7 @@ void intel_gt_pm_init(struct intel_gt *gt) * user. */ intel_rc6_init(>->rc6); + intel_rps_init(>->rps); } static bool reset_engines(struct intel_gt *gt) @@ -140,12 +137,6 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force) engine->reset.finish(engine); } -void intel_gt_pm_disable(struct intel_gt *gt) -{ - if (!is_mock_gt(gt)) - intel_sanitize_gt_powersave(gt->i915); -} - void intel_gt_pm_fini(struct intel_gt *gt) { intel_rc6_fini(>->rc6); @@ -164,9 +155,13 @@ int intel_gt_resume(struct intel_gt *gt) * allowing us to fixup the user contexts on their first pin. */ intel_gt_pm_get(gt); + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); intel_rc6_sanitize(>->rc6); + intel_rps_enable(>->rps); + intel_llc_enable(>->llc); + for_each_engine(engine, gt, id) { struct intel_context *ce; @@ -217,8 +212,11 @@ void intel_gt_suspend(struct intel_gt *gt) /* We expect to be idle already; but also want to be independent */ wait_for_idle(gt); - with_intel_runtime_pm(gt->uncore->rpm, wakeref) + with_intel_runtime_pm(gt->uncore->rpm, wakeref) { + intel_rps_disable(>->rps); intel_rc6_disable(>->rc6); + intel_llc_disable(>->llc); + } } void intel_gt_runtime_suspend(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index 0ed87da4bb68..d924c984c74d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -39,7 +39,6 @@ static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) void intel_gt_pm_init_early(struct intel_gt *gt); void intel_gt_pm_init(struct intel_gt *gt); -void intel_gt_pm_disable(struct intel_gt *gt); void intel_gt_pm_fini(struct intel_gt *gt); void intel_gt_sanitize(struct intel_gt *gt, bool force); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index fcb3a6c9421c..d4e14dbd172e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -20,6 +20,7 @@ #include "intel_llc_types.h" #include "intel_reset_types.h" #include "intel_rc6_types.h" +#include "intel_rps_types.h" #include "intel_wakeref.h" struct drm_i915_private; @@ -73,6 +74,7 @@ struct intel_gt { struct intel_llc llc; struct intel_rc6 rc6; + struct intel_rps rps; ktime_t last_init_time; diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c b/drivers/gpu/drm/i915/gt/intel_llc.c index 35093eb5f24e..ceb785b75c25 100644 --- a/drivers/gpu/drm/i915/gt/intel_llc.c +++ b/drivers/gpu/drm/i915/gt/intel_llc.c @@ -48,7 +48,7 @@ static bool get_ia_constants(struct intel_llc *llc, struct ia_constants *consts) { struct drm_i915_private *i915 = llc_to_gt(llc)->i915; - struct intel_rps *rps = &i915->gt_pm.rps; + struct intel_rps *rps = &llc_to_gt(llc)->rps; if (rps->max_freq <= rps->min_freq) return false; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c new file mode 100644 index 000000000000..30f56c786468 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -0,0 +1,1872 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_gt.h" +#include "intel_gt_irq.h" +#include "intel_gt_pm_irq.h" +#include "intel_rps.h" +#include "intel_sideband.h" +#include "../../../platform/x86/intel_ips.h" + +/* + * Lock protecting IPS related data structures + */ +static DEFINE_SPINLOCK(mchdev_lock); + +static struct intel_gt *rps_to_gt(struct intel_rps *rps) +{ + return container_of(rps, struct intel_gt, rps); +} + +static struct drm_i915_private *rps_to_i915(struct intel_rps *rps) +{ + return rps_to_gt(rps)->i915; +} + +static struct intel_uncore *rps_to_uncore(struct intel_rps *rps) +{ + return rps_to_gt(rps)->uncore; +} + +static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask) +{ + return mask & ~rps->pm_intrmsk_mbz; +} + +static u32 rps_pm_mask(struct intel_rps *rps, u8 val) +{ + u32 mask = 0; + + /* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */ + if (val > rps->min_freq_softlimit) + mask |= (GEN6_PM_RP_UP_EI_EXPIRED | + GEN6_PM_RP_DOWN_THRESHOLD | + GEN6_PM_RP_DOWN_TIMEOUT); + + if (val < rps->max_freq_softlimit) + mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; + + mask &= rps->pm_events; + + return rps_pm_sanitize_mask(rps, ~mask); +} + +static void rps_reset_ei(struct intel_rps *rps) +{ + memset(&rps->ei, 0, sizeof(rps->ei)); +} + +static void rps_enable_interrupts(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + rps_reset_ei(rps); + + if (IS_VALLEYVIEW(gt->i915)) + /* WaGsvRC0ResidencyMethod:vlv */ + rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED; + else + rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | + GEN6_PM_RP_DOWN_THRESHOLD | + GEN6_PM_RP_DOWN_TIMEOUT); + + spin_lock_irq(>->irq_lock); + gen6_gt_pm_enable_irq(gt, rps->pm_events); + spin_unlock_irq(>->irq_lock); + + intel_uncore_write(gt->uncore, GEN6_PMINTRMSK, + rps_pm_mask(rps, rps->cur_freq)); +} + +static void gen6_rps_reset_interrupts(struct intel_rps *rps) +{ + gen6_gt_pm_reset_iir(rps_to_gt(rps), GEN6_PM_RPS_EVENTS); +} + +static void gen11_rps_reset_interrupts(struct intel_rps *rps) +{ + while (gen11_gt_reset_one_iir(rps_to_gt(rps), 0, GEN11_GTPM)) + ; +} + +static void rps_reset_interrupts(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + spin_lock_irq(>->irq_lock); + if (INTEL_GEN(gt->i915) >= 11) + gen11_rps_reset_interrupts(rps); + else + gen6_rps_reset_interrupts(rps); + + rps->pm_iir = 0; + spin_unlock_irq(>->irq_lock); +} + +static void rps_disable_interrupts(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + rps->pm_events = 0; + + intel_uncore_write(gt->uncore, GEN6_PMINTRMSK, + rps_pm_sanitize_mask(rps, ~0u)); + + spin_lock_irq(>->irq_lock); + gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS); + spin_unlock_irq(>->irq_lock); + + intel_synchronize_irq(gt->i915); + + /* + * Now that we will not be generating any more work, flush any + * outstanding tasks. As we are called on the RPS idle path, + * we will reset the GPU to minimum frequencies, so the current + * state of the worker can be discarded. + */ + cancel_work_sync(&rps->work); + + rps_reset_interrupts(rps); +} + +static const struct cparams { + u16 i; + u16 t; + u16 m; + u16 c; +} cparams[] = { + { 1, 1333, 301, 28664 }, + { 1, 1066, 294, 24460 }, + { 1, 800, 294, 25192 }, + { 0, 1333, 276, 27605 }, + { 0, 1066, 276, 27605 }, + { 0, 800, 231, 23784 }, +}; + +static void gen5_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); + u8 fmax, fmin, fstart; + u32 rgvmodectl; + int c_m, i; + + if (i915->fsb_freq <= 3200) + c_m = 0; + else if (i915->fsb_freq <= 4800) + c_m = 1; + else + c_m = 2; + + for (i = 0; i < ARRAY_SIZE(cparams); i++) { + if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) { + rps->ips.m = cparams[i].m; + rps->ips.c = cparams[i].c; + break; + } + } + + rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); + + /* Set up min, max, and cur for interrupt handling */ + fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; + fmin = (rgvmodectl & MEMMODE_FMIN_MASK); + fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> + MEMMODE_FSTART_SHIFT; + DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", + fmax, fmin, fstart); + + rps->min_freq = -fstart; + rps->max_freq = -fmin; + + rps->idle_freq = rps->min_freq; + rps->cur_freq = rps->idle_freq; +} + +static unsigned long +__ips_chipset_val(struct intel_ips *ips) +{ + struct intel_uncore *uncore = + rps_to_uncore(container_of(ips, struct intel_rps, ips)); + unsigned long now = jiffies_to_msecs(jiffies), dt; + unsigned long result; + u64 total, delta; + + lockdep_assert_held(&mchdev_lock); + + /* + * Prevent division-by-zero if we are asking too fast. + * Also, we don't get interesting results if we are polling + * faster than once in 10ms, so just return the saved value + * in such cases. + */ + dt = now - ips->last_time1; + if (dt <= 10) + return ips->chipset_power; + + /* FIXME: handle per-counter overflow */ + total = intel_uncore_read(uncore, DMIEC); + total += intel_uncore_read(uncore, DDREC); + total += intel_uncore_read(uncore, CSIEC); + + delta = total - ips->last_count1; + + result = div_u64(div_u64(ips->m * delta, dt) + ips->c, 10); + + ips->last_count1 = total; + ips->last_time1 = now; + + ips->chipset_power = result; + + return result; +} + +static unsigned long ips_mch_val(struct intel_uncore *uncore) +{ + unsigned int m, x, b; + u32 tsfs; + + tsfs = intel_uncore_read(uncore, TSFS); + x = intel_uncore_read8(uncore, TR1); + + b = tsfs & TSFS_INTR_MASK; + m = (tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT; + + return m * x / 127 - b; +} + +static int _pxvid_to_vd(u8 pxvid) +{ + if (pxvid == 0) + return 0; + + if (pxvid >= 8 && pxvid < 31) + pxvid = 31; + + return (pxvid + 2) * 125; +} + +static u32 pvid_to_extvid(struct drm_i915_private *i915, u8 pxvid) +{ + const int vd = _pxvid_to_vd(pxvid); + + if (INTEL_INFO(i915)->is_mobile) + return max(vd - 1125, 0); + + return vd; +} + +static void __gen5_ips_update(struct intel_ips *ips) +{ + struct intel_uncore *uncore = + rps_to_uncore(container_of(ips, struct intel_rps, ips)); + u64 now, delta, dt; + u32 count; + + lockdep_assert_held(&mchdev_lock); + + now = ktime_get_raw_ns(); + dt = now - ips->last_time2; + do_div(dt, NSEC_PER_MSEC); + + /* Don't divide by 0 */ + if (dt <= 10) + return; + + count = intel_uncore_read(uncore, GFXEC); + delta = count - ips->last_count2; + + ips->last_count2 = count; + ips->last_time2 = now; + + /* More magic constants... */ + ips->gfx_power = div_u64(delta * 1181, dt * 10); +} + +static void gen5_rps_update(struct intel_rps *rps) +{ + spin_lock_irq(&mchdev_lock); + __gen5_ips_update(&rps->ips); + spin_unlock_irq(&mchdev_lock); +} + +static bool gen5_rps_set(struct intel_rps *rps, u8 val) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u16 rgvswctl; + + lockdep_assert_held(&mchdev_lock); + + rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); + if (rgvswctl & MEMCTL_CMD_STS) { + DRM_DEBUG("gpu busy, RCS change rejected\n"); + return false; /* still busy with another command */ + } + + val = -val; + + rgvswctl = + (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | + (val << MEMCTL_FREQ_SHIFT) | + MEMCTL_SFCAVM; + intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); + intel_uncore_posting_read16(uncore, MEMSWCTL); + + rgvswctl |= MEMCTL_CMD_STS; + intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); + + return true; +} + +static unsigned long intel_pxfreq(u32 vidfreq) +{ + int div = (vidfreq & 0x3f0000) >> 16; + int post = (vidfreq & 0x3000) >> 12; + int pre = (vidfreq & 0x7); + + if (!pre) + return 0; + + return div * 133333 / (pre << post); +} + +static unsigned int init_emon(struct intel_uncore *uncore) +{ + u8 pxw[16]; + int i; + + /* Disable to program */ + intel_uncore_write(uncore, ECR, 0); + intel_uncore_posting_read(uncore, ECR); + + /* Program energy weights for various events */ + intel_uncore_write(uncore, SDEW, 0x15040d00); + intel_uncore_write(uncore, CSIEW0, 0x007f0000); + intel_uncore_write(uncore, CSIEW1, 0x1e220004); + intel_uncore_write(uncore, CSIEW2, 0x04000004); + + for (i = 0; i < 5; i++) + intel_uncore_write(uncore, PEW(i), 0); + for (i = 0; i < 3; i++) + intel_uncore_write(uncore, DEW(i), 0); + + /* Program P-state weights to account for frequency power adjustment */ + for (i = 0; i < 16; i++) { + u32 pxvidfreq = intel_uncore_read(uncore, PXVFREQ(i)); + unsigned int freq = intel_pxfreq(pxvidfreq); + unsigned int vid = + (pxvidfreq & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; + unsigned int val; + + val = vid * vid * freq / 1000 * 255; + val /= 127 * 127 * 900; + + pxw[i] = val; + } + /* Render standby states get 0 weight */ + pxw[14] = 0; + pxw[15] = 0; + + for (i = 0; i < 4; i++) { + intel_uncore_write(uncore, PXW(i), + pxw[i * 4 + 0] << 24 | + pxw[i * 4 + 1] << 16 | + pxw[i * 4 + 2] << 8 | + pxw[i * 4 + 3] << 0); + } + + /* Adjust magic regs to magic values (more experimental results) */ + intel_uncore_write(uncore, OGW0, 0); + intel_uncore_write(uncore, OGW1, 0); + intel_uncore_write(uncore, EG0, 0x00007f00); + intel_uncore_write(uncore, EG1, 0x0000000e); + intel_uncore_write(uncore, EG2, 0x000e0000); + intel_uncore_write(uncore, EG3, 0x68000300); + intel_uncore_write(uncore, EG4, 0x42000000); + intel_uncore_write(uncore, EG5, 0x00140031); + intel_uncore_write(uncore, EG6, 0); + intel_uncore_write(uncore, EG7, 0); + + for (i = 0; i < 8; i++) + intel_uncore_write(uncore, PXWL(i), 0); + + /* Enable PMON + select events */ + intel_uncore_write(uncore, ECR, 0x80000019); + + return intel_uncore_read(uncore, LCFUSE02) & LCFUSE_HIV_MASK; +} + +static bool gen5_rps_enable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u8 fstart, vstart; + u32 rgvmodectl; + + spin_lock_irq(&mchdev_lock); + + rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); + + /* Enable temp reporting */ + intel_uncore_write16(uncore, PMMISC, + intel_uncore_read16(uncore, PMMISC) | MCPPCE_EN); + intel_uncore_write16(uncore, TSC1, + intel_uncore_read16(uncore, TSC1) | TSE); + + /* 100ms RC evaluation intervals */ + intel_uncore_write(uncore, RCUPEI, 100000); + intel_uncore_write(uncore, RCDNEI, 100000); + + /* Set max/min thresholds to 90ms and 80ms respectively */ + intel_uncore_write(uncore, RCBMAXAVG, 90000); + intel_uncore_write(uncore, RCBMINAVG, 80000); + + intel_uncore_write(uncore, MEMIHYST, 1); + + /* Set up min, max, and cur for interrupt handling */ + fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> + MEMMODE_FSTART_SHIFT; + + vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) & + PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; + + intel_uncore_write(uncore, + MEMINTREN, + MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); + + intel_uncore_write(uncore, VIDSTART, vstart); + intel_uncore_posting_read(uncore, VIDSTART); + + rgvmodectl |= MEMMODE_SWMODE_EN; + intel_uncore_write(uncore, MEMMODECTL, rgvmodectl); + + if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) & + MEMCTL_CMD_STS) == 0, 10)) + DRM_ERROR("stuck trying to change perf mode\n"); + mdelay(1); + + gen5_rps_set(rps, rps->cur_freq); + + rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC); + rps->ips.last_count1 += intel_uncore_read(uncore, DDREC); + rps->ips.last_count1 += intel_uncore_read(uncore, CSIEC); + rps->ips.last_time1 = jiffies_to_msecs(jiffies); + + rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC); + rps->ips.last_time2 = ktime_get_raw_ns(); + + spin_unlock_irq(&mchdev_lock); + + rps->ips.corr = init_emon(uncore); + + return true; +} + +static void gen5_rps_disable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u16 rgvswctl; + + spin_lock_irq(&mchdev_lock); + + rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); + + /* Ack interrupts, disable EFC interrupt */ + intel_uncore_write(uncore, MEMINTREN, + intel_uncore_read(uncore, MEMINTREN) & + ~MEMINT_EVAL_CHG_EN); + intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); + intel_uncore_write(uncore, DEIER, + intel_uncore_read(uncore, DEIER) & ~DE_PCU_EVENT); + intel_uncore_write(uncore, DEIIR, DE_PCU_EVENT); + intel_uncore_write(uncore, DEIMR, + intel_uncore_read(uncore, DEIMR) | DE_PCU_EVENT); + + /* Go back to the starting frequency */ + gen5_rps_set(rps, rps->idle_freq); + mdelay(1); + rgvswctl |= MEMCTL_CMD_STS; + intel_uncore_write(uncore, MEMSWCTL, rgvswctl); + mdelay(1); + + spin_unlock_irq(&mchdev_lock); +} + +static u32 rps_limits(struct intel_rps *rps, u8 val) +{ + u32 limits; + + /* + * Only set the down limit when we've reached the lowest level to avoid + * getting more interrupts, otherwise leave this clear. This prevents a + * race in the hw when coming out of rc6: There's a tiny window where + * the hw runs at the minimal clock before selecting the desired + * frequency, if the down threshold expires in that window we will not + * receive a down interrupt. + */ + if (INTEL_GEN(rps_to_i915(rps)) >= 9) { + limits = rps->max_freq_softlimit << 23; + if (val <= rps->min_freq_softlimit) + limits |= rps->min_freq_softlimit << 14; + } else { + limits = rps->max_freq_softlimit << 24; + if (val <= rps->min_freq_softlimit) + limits |= rps->min_freq_softlimit << 16; + } + + return limits; +} + +static void rps_set_power(struct intel_rps *rps, int new_power) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 threshold_up = 0, threshold_down = 0; /* in % */ + u32 ei_up = 0, ei_down = 0; + + lockdep_assert_held(&rps->power.mutex); + + if (new_power == rps->power.mode) + return; + + /* Note the units here are not exactly 1us, but 1280ns. */ + switch (new_power) { + case LOW_POWER: + /* Upclock if more than 95% busy over 16ms */ + ei_up = 16000; + threshold_up = 95; + + /* Downclock if less than 85% busy over 32ms */ + ei_down = 32000; + threshold_down = 85; + break; + + case BETWEEN: + /* Upclock if more than 90% busy over 13ms */ + ei_up = 13000; + threshold_up = 90; + + /* Downclock if less than 75% busy over 32ms */ + ei_down = 32000; + threshold_down = 75; + break; + + case HIGH_POWER: + /* Upclock if more than 85% busy over 10ms */ + ei_up = 10000; + threshold_up = 85; + + /* Downclock if less than 60% busy over 32ms */ + ei_down = 32000; + threshold_down = 60; + break; + } + + /* When byt can survive without system hang with dynamic + * sw freq adjustments, this restriction can be lifted. + */ + if (IS_VALLEYVIEW(i915)) + goto skip_hw_write; + + intel_uncore_write(uncore, GEN6_RP_UP_EI, + GT_INTERVAL_FROM_US(i915, ei_up)); + intel_uncore_write(uncore, GEN6_RP_UP_THRESHOLD, + GT_INTERVAL_FROM_US(i915, + ei_up * threshold_up / 100)); + + intel_uncore_write(uncore, GEN6_RP_DOWN_EI, + GT_INTERVAL_FROM_US(i915, ei_down)); + intel_uncore_write(uncore, GEN6_RP_DOWN_THRESHOLD, + GT_INTERVAL_FROM_US(i915, + ei_down * threshold_down / 100)); + + intel_uncore_write(uncore, GEN6_RP_CONTROL, + (INTEL_GEN(i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG); + +skip_hw_write: + rps->power.mode = new_power; + rps->power.up_threshold = threshold_up; + rps->power.down_threshold = threshold_down; +} + +static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val) +{ + int new_power; + + new_power = rps->power.mode; + switch (rps->power.mode) { + case LOW_POWER: + if (val > rps->efficient_freq + 1 && + val > rps->cur_freq) + new_power = BETWEEN; + break; + + case BETWEEN: + if (val <= rps->efficient_freq && + val < rps->cur_freq) + new_power = LOW_POWER; + else if (val >= rps->rp0_freq && + val > rps->cur_freq) + new_power = HIGH_POWER; + break; + + case HIGH_POWER: + if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 && + val < rps->cur_freq) + new_power = BETWEEN; + break; + } + /* Max/min bins are special */ + if (val <= rps->min_freq_softlimit) + new_power = LOW_POWER; + if (val >= rps->max_freq_softlimit) + new_power = HIGH_POWER; + + mutex_lock(&rps->power.mutex); + if (rps->power.interactive) + new_power = HIGH_POWER; + rps_set_power(rps, new_power); + mutex_unlock(&rps->power.mutex); +} + +void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive) +{ + if (!rps->enabled) + return; + + mutex_lock(&rps->power.mutex); + if (interactive) { + if (!rps->power.interactive++ && rps->active) + rps_set_power(rps, HIGH_POWER); + } else { + GEM_BUG_ON(!rps->power.interactive); + rps->power.interactive--; + } + mutex_unlock(&rps->power.mutex); +} + +static int gen6_rps_set(struct intel_rps *rps, u8 val) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 swreq; + + if (INTEL_GEN(i915) >= 9) + swreq = GEN9_FREQUENCY(val); + else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) + swreq = HSW_FREQUENCY(val); + else + swreq = (GEN6_FREQUENCY(val) | + GEN6_OFFSET(0) | + GEN6_AGGRESSIVE_TURBO); + intel_uncore_write(uncore, GEN6_RPNSWREQ, swreq); + + return 0; +} + +static int vlv_rps_set(struct intel_rps *rps, u8 val) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + int err; + + vlv_punit_get(i915); + err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val); + vlv_punit_put(i915); + + return err; +} + +static int rps_set(struct intel_rps *rps, u8 val) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + int err; + + if (INTEL_GEN(i915) < 6) + return 0; + + if (val == rps->last_freq) + return 0; + + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + err = vlv_rps_set(rps, val); + else + err = gen6_rps_set(rps, val); + if (err) + return err; + + gen6_rps_set_thresholds(rps, val); + rps->last_freq = val; + + return 0; +} + +void intel_rps_unpark(struct intel_rps *rps) +{ + u8 freq; + + if (!rps->enabled) + return; + + /* + * Use the user's desired frequency as a guide, but for better + * performance, jump directly to RPe as our starting frequency. + */ + mutex_lock(&rps->lock); + rps->active = true; + freq = max(rps->cur_freq, rps->efficient_freq), + freq = clamp(freq, rps->min_freq_softlimit, rps->max_freq_softlimit); + intel_rps_set(rps, freq); + rps->last_adj = 0; + mutex_unlock(&rps->lock); + + if (INTEL_GEN(rps_to_i915(rps)) >= 6) + rps_enable_interrupts(rps); + + if (IS_GEN(rps_to_i915(rps), 5)) + gen5_rps_update(rps); +} + +void intel_rps_park(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + if (!rps->enabled) + return; + + if (INTEL_GEN(i915) >= 6) + rps_disable_interrupts(rps); + + rps->active = false; + if (rps->last_freq <= rps->idle_freq) + return; + + /* + * The punit delays the write of the frequency and voltage until it + * determines the GPU is awake. During normal usage we don't want to + * waste power changing the frequency if the GPU is sleeping (rc6). + * However, the GPU and driver is now idle and we do not want to delay + * switching to minimum voltage (reducing power whilst idle) as we do + * not expect to be woken in the near future and so must flush the + * change by waking the device. + * + * We choose to take the media powerwell (either would do to trick the + * punit into committing the voltage change) as that takes a lot less + * power than the render powerwell. + */ + intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA); + rps_set(rps, rps->idle_freq); + intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA); +} + +void intel_rps_boost(struct i915_request *rq) +{ + struct intel_rps *rps = &rq->engine->gt->rps; + unsigned long flags; + + if (i915_request_signaled(rq) || !rps->active) + return; + + /* Serializes with i915_request_retire() */ + spin_lock_irqsave(&rq->lock, flags); + if (!i915_request_has_waitboost(rq) && + !dma_fence_is_signaled_locked(&rq->fence)) { + rq->flags |= I915_REQUEST_WAITBOOST; + + if (!atomic_fetch_inc(&rps->num_waiters) && + READ_ONCE(rps->cur_freq) < rps->boost_freq) + schedule_work(&rps->work); + + atomic_inc(&rps->boosts); + } + spin_unlock_irqrestore(&rq->lock, flags); +} + +int intel_rps_set(struct intel_rps *rps, u8 val) +{ + int err = 0; + + lockdep_assert_held(&rps->lock); + GEM_BUG_ON(val > rps->max_freq); + GEM_BUG_ON(val < rps->min_freq); + + if (rps->active) { + err = rps_set(rps, val); + + /* + * Make sure we continue to get interrupts + * until we hit the minimum or maximum frequencies. + */ + if (INTEL_GEN(rps_to_i915(rps)) >= 6) { + struct intel_uncore *uncore = rps_to_uncore(rps); + + intel_uncore_write(uncore, GEN6_RP_INTERRUPT_LIMITS, + rps_limits(rps, val)); + + intel_uncore_write(uncore, GEN6_PMINTRMSK, + rps_pm_mask(rps, val)); + } + } + + if (err == 0) + rps->cur_freq = val; + + return err; +} + +static void gen6_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); + + /* All of these values are in units of 50MHz */ + + /* static values from HW: RP0 > RP1 > RPn (min_freq) */ + if (IS_GEN9_LP(i915)) { + u32 rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP); + + rps->rp0_freq = (rp_state_cap >> 16) & 0xff; + rps->rp1_freq = (rp_state_cap >> 8) & 0xff; + rps->min_freq = (rp_state_cap >> 0) & 0xff; + } else { + u32 rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP); + + rps->rp0_freq = (rp_state_cap >> 0) & 0xff; + rps->rp1_freq = (rp_state_cap >> 8) & 0xff; + rps->min_freq = (rp_state_cap >> 16) & 0xff; + } + + /* hw_max = RP0 until we check for overclocking */ + rps->max_freq = rps->rp0_freq; + + rps->efficient_freq = rps->rp1_freq; + if (IS_HASWELL(i915) || IS_BROADWELL(i915) || + IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) { + u32 ddcc_status = 0; + + if (sandybridge_pcode_read(i915, + HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, + &ddcc_status, NULL) == 0) + rps->efficient_freq = + clamp_t(u8, + (ddcc_status >> 8) & 0xff, + rps->min_freq, + rps->max_freq); + } + + if (IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) { + /* Store the frequency values in 16.66 MHZ units, which is + * the natural hardware unit for SKL + */ + rps->rp0_freq *= GEN9_FREQ_SCALER; + rps->rp1_freq *= GEN9_FREQ_SCALER; + rps->min_freq *= GEN9_FREQ_SCALER; + rps->max_freq *= GEN9_FREQ_SCALER; + rps->efficient_freq *= GEN9_FREQ_SCALER; + } +} + +static bool rps_reset(struct intel_rps *rps) +{ + /* force a reset */ + rps->power.mode = -1; + rps->last_freq = -1; + + if (rps_set(rps, rps->min_freq)) { + DRM_ERROR("Failed to reset RPS to initial values\n"); + return false; + } + + rps->cur_freq = rps->min_freq; + return true; +} + +/* See the Gen9_GT_PM_Programming_Guide doc for the below */ +static bool gen9_rps_enable(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); + + /* Program defaults and thresholds for RPS */ + if (IS_GEN(i915, 9)) + intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, + GEN9_FREQUENCY(rps->rp1_freq)); + + /* 1 second timeout */ + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, + GT_INTERVAL_FROM_US(i915, 1000000)); + + intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa); + + return rps_reset(rps); +} + +static bool gen8_rps_enable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + + intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, + HSW_FREQUENCY(rps->rp1_freq)); + + /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, + 100000000 / 128); /* 1 second timeout */ + + intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); + + return rps_reset(rps); +} + +static bool gen6_rps_enable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + + /* Power down if completely idle for over 50ms */ + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000); + intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); + + return rps_reset(rps); +} + +static int chv_rps_max_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); + + switch (RUNTIME_INFO(i915)->sseu.eu_total) { + case 8: + /* (2 * 4) config */ + val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT; + break; + case 12: + /* (2 * 6) config */ + val >>= FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT; + break; + case 16: + /* (2 * 8) config */ + default: + /* Setting (2 * 8) Min RP0 for any other combination */ + val >>= FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT; + break; + } + + return val & FB_GFX_FREQ_FUSE_MASK; +} + +static int chv_rps_rpe_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG); + val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT; + + return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; +} + +static int chv_rps_guar_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); + + return val & FB_GFX_FREQ_FUSE_MASK; +} + +static u32 chv_rps_min_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE); + val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT; + + return val & FB_GFX_FREQ_FUSE_MASK; +} + +static bool chv_rps_enable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + /* 1: Program defaults and thresholds for RPS*/ + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000); + intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400); + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000); + intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000); + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000); + + intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); + + /* 2: Enable RPS */ + intel_uncore_write_fw(uncore, GEN6_RP_CONTROL, + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG); + + /* Setting Fixed Bias */ + vlv_punit_get(i915); + + val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50; + vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val); + + val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); + + vlv_punit_put(i915); + + /* RPS code assumes GPLL is used */ + WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); + + DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); + DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); + + return rps_reset(rps); +} + +static int vlv_rps_guar_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val, rp1; + + val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE); + + rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK; + rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; + + return rp1; +} + +static int vlv_rps_max_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val, rp0; + + val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE); + + rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; + /* Clamp to max */ + rp0 = min_t(u32, rp0, 0xea); + + return rp0; +} + +static int vlv_rps_rpe_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val, rpe; + + val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO); + rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; + val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI); + rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; + + return rpe; +} + +static int vlv_rps_min_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff; + /* + * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value + * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on + * a BYT-M B0 the above register contains 0xbf. Moreover when setting + * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0 + * to make sure it matches what Punit accepts. + */ + return max_t(u32, val, 0xc0); +} + +static bool vlv_rps_enable(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000); + intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400); + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000); + intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000); + intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000); + + intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); + + intel_uncore_write_fw(uncore, GEN6_RP_CONTROL, + GEN6_RP_MEDIA_TURBO | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_CONT); + + vlv_punit_get(i915); + + /* Setting Fixed Bias */ + val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875; + vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val); + + val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); + + vlv_punit_put(i915); + + /* RPS code assumes GPLL is used */ + WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); + + DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); + DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); + + return rps_reset(rps); +} + +static unsigned long __ips_gfx_val(struct intel_ips *ips) +{ + struct intel_rps *rps = container_of(ips, typeof(*rps), ips); + struct intel_uncore *uncore = rps_to_uncore(rps); + unsigned long t, corr, state1, corr2, state2; + u32 pxvid, ext_v; + + lockdep_assert_held(&mchdev_lock); + + pxvid = intel_uncore_read(uncore, PXVFREQ(rps->cur_freq)); + pxvid = (pxvid >> 24) & 0x7f; + ext_v = pvid_to_extvid(rps_to_i915(rps), pxvid); + + state1 = ext_v; + + /* Revel in the empirically derived constants */ + + /* Correction factor in 1/100000 units */ + t = ips_mch_val(uncore); + if (t > 80) + corr = t * 2349 + 135940; + else if (t >= 50) + corr = t * 964 + 29317; + else /* < 50 */ + corr = t * 301 + 1004; + + corr = corr * 150142 * state1 / 10000 - 78642; + corr /= 100000; + corr2 = corr * ips->corr; + + state2 = corr2 * state1 / 10000; + state2 /= 100; /* convert to mW */ + + __gen5_ips_update(ips); + + return ips->gfx_power + state2; +} + +void intel_rps_enable(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); + + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + if (IS_CHERRYVIEW(i915)) + rps->enabled = chv_rps_enable(rps); + else if (IS_VALLEYVIEW(i915)) + rps->enabled = vlv_rps_enable(rps); + else if (INTEL_GEN(i915) >= 9) + rps->enabled = gen9_rps_enable(rps); + else if (INTEL_GEN(i915) >= 8) + rps->enabled = gen8_rps_enable(rps); + else if (INTEL_GEN(i915) >= 6) + rps->enabled = gen6_rps_enable(rps); + else if (IS_IRONLAKE_M(i915)) + rps->enabled = gen5_rps_enable(rps); + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + if (!rps->enabled) + return; + + WARN_ON(rps->max_freq < rps->min_freq); + WARN_ON(rps->idle_freq > rps->max_freq); + + WARN_ON(rps->efficient_freq < rps->min_freq); + WARN_ON(rps->efficient_freq > rps->max_freq); +} + +static void gen6_rps_disable(struct intel_rps *rps) +{ + intel_uncore_write(rps_to_uncore(rps), GEN6_RP_CONTROL, 0); +} + +void intel_rps_disable(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + rps->enabled = false; + + if (INTEL_GEN(i915) >= 6) + gen6_rps_disable(rps); + else if (IS_IRONLAKE_M(i915)) + gen5_rps_disable(rps); +} + +static int byt_gpu_freq(struct intel_rps *rps, int val) +{ + /* + * N = val - 0xb7 + * Slow = Fast = GPLL ref * N + */ + return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000); +} + +static int byt_freq_opcode(struct intel_rps *rps, int val) +{ + return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7; +} + +static int chv_gpu_freq(struct intel_rps *rps, int val) +{ + /* + * N = val / 2 + * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2 + */ + return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000); +} + +static int chv_freq_opcode(struct intel_rps *rps, int val) +{ + /* CHV needs even values */ + return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2; +} + +int intel_gpu_freq(struct intel_rps *rps, int val) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + if (INTEL_GEN(i915) >= 9) + return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER, + GEN9_FREQ_SCALER); + else if (IS_CHERRYVIEW(i915)) + return chv_gpu_freq(rps, val); + else if (IS_VALLEYVIEW(i915)) + return byt_gpu_freq(rps, val); + else + return val * GT_FREQUENCY_MULTIPLIER; +} + +int intel_freq_opcode(struct intel_rps *rps, int val) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + if (INTEL_GEN(i915) >= 9) + return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER, + GT_FREQUENCY_MULTIPLIER); + else if (IS_CHERRYVIEW(i915)) + return chv_freq_opcode(rps, val); + else if (IS_VALLEYVIEW(i915)) + return byt_freq_opcode(rps, val); + else + return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER); +} + +static void vlv_init_gpll_ref_freq(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + rps->gpll_ref_freq = + vlv_get_cck_clock(i915, "GPLL ref", + CCK_GPLL_CLOCK_CONTROL, + i915->czclk_freq); + + DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", rps->gpll_ref_freq); +} + +static void vlv_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + vlv_iosf_sb_get(i915, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); + + vlv_init_gpll_ref_freq(rps); + + val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); + switch ((val >> 6) & 3) { + case 0: + case 1: + i915->mem_freq = 800; + break; + case 2: + i915->mem_freq = 1066; + break; + case 3: + i915->mem_freq = 1333; + break; + } + DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq); + + rps->max_freq = vlv_rps_max_freq(rps); + rps->rp0_freq = rps->max_freq; + DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->max_freq), + rps->max_freq); + + rps->efficient_freq = vlv_rps_rpe_freq(rps); + DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->efficient_freq), + rps->efficient_freq); + + rps->rp1_freq = vlv_rps_guar_freq(rps); + DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->rp1_freq), + rps->rp1_freq); + + rps->min_freq = vlv_rps_min_freq(rps); + DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->min_freq), + rps->min_freq); + + vlv_iosf_sb_put(i915, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); +} + +static void chv_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 val; + + vlv_iosf_sb_get(i915, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); + + vlv_init_gpll_ref_freq(rps); + + val = vlv_cck_read(i915, CCK_FUSE_REG); + + switch ((val >> 2) & 0x7) { + case 3: + i915->mem_freq = 2000; + break; + default: + i915->mem_freq = 1600; + break; + } + DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq); + + rps->max_freq = chv_rps_max_freq(rps); + rps->rp0_freq = rps->max_freq; + DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->max_freq), + rps->max_freq); + + rps->efficient_freq = chv_rps_rpe_freq(rps); + DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->efficient_freq), + rps->efficient_freq); + + rps->rp1_freq = chv_rps_guar_freq(rps); + DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->rp1_freq), + rps->rp1_freq); + + rps->min_freq = chv_rps_min_freq(rps); + DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->min_freq), + rps->min_freq); + + vlv_iosf_sb_put(i915, + BIT(VLV_IOSF_SB_PUNIT) | + BIT(VLV_IOSF_SB_NC) | + BIT(VLV_IOSF_SB_CCK)); + + WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq | + rps->min_freq) & 1, + "Odd GPU freq values\n"); +} + +static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei) +{ + ei->ktime = ktime_get_raw(); + ei->render_c0 = intel_uncore_read(uncore, VLV_RENDER_C0_COUNT); + ei->media_c0 = intel_uncore_read(uncore, VLV_MEDIA_C0_COUNT); +} + +static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + const struct intel_rps_ei *prev = &rps->ei; + struct intel_rps_ei now; + u32 events = 0; + + if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0) + return 0; + + vlv_c0_read(uncore, &now); + + if (prev->ktime) { + u64 time, c0; + u32 render, media; + + time = ktime_us_delta(now.ktime, prev->ktime); + + time *= rps_to_i915(rps)->czclk_freq; + + /* Workload can be split between render + media, + * e.g. SwapBuffers being blitted in X after being rendered in + * mesa. To account for this we need to combine both engines + * into our activity counter. + */ + render = now.render_c0 - prev->render_c0; + media = now.media_c0 - prev->media_c0; + c0 = max(render, media); + c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */ + + if (c0 > time * rps->power.up_threshold) + events = GEN6_PM_RP_UP_THRESHOLD; + else if (c0 < time * rps->power.down_threshold) + events = GEN6_PM_RP_DOWN_THRESHOLD; + } + + rps->ei = now; + return events; +} + +static void rps_work(struct work_struct *work) +{ + struct intel_rps *rps = container_of(work, typeof(*rps), work); + struct intel_gt *gt = rps_to_gt(rps); + bool client_boost = false; + int new_freq, adj, min, max; + u32 pm_iir = 0; + + spin_lock_irq(>->irq_lock); + pm_iir = fetch_and_zero(&rps->pm_iir); + client_boost = atomic_read(&rps->num_waiters); + spin_unlock_irq(>->irq_lock); + + /* Make sure we didn't queue anything we're not going to process. */ + if ((pm_iir & rps->pm_events) == 0 && !client_boost) + goto out; + + mutex_lock(&rps->lock); + + pm_iir |= vlv_wa_c0_ei(rps, pm_iir); + + adj = rps->last_adj; + new_freq = rps->cur_freq; + min = rps->min_freq_softlimit; + max = rps->max_freq_softlimit; + if (client_boost) + max = rps->max_freq; + if (client_boost && new_freq < rps->boost_freq) { + new_freq = rps->boost_freq; + adj = 0; + } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { + if (adj > 0) + adj *= 2; + else /* CHV needs even encode values */ + adj = IS_CHERRYVIEW(gt->i915) ? 2 : 1; + + if (new_freq >= rps->max_freq_softlimit) + adj = 0; + } else if (client_boost) { + adj = 0; + } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { + if (rps->cur_freq > rps->efficient_freq) + new_freq = rps->efficient_freq; + else if (rps->cur_freq > rps->min_freq_softlimit) + new_freq = rps->min_freq_softlimit; + adj = 0; + } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { + if (adj < 0) + adj *= 2; + else /* CHV needs even encode values */ + adj = IS_CHERRYVIEW(gt->i915) ? -2 : -1; + + if (new_freq <= rps->min_freq_softlimit) + adj = 0; + } else { /* unknown event */ + adj = 0; + } + + rps->last_adj = adj; + + /* + * Limit deboosting and boosting to keep ourselves at the extremes + * when in the respective power modes (i.e. slowly decrease frequencies + * while in the HIGH_POWER zone and slowly increase frequencies while + * in the LOW_POWER zone). On idle, we will hit the timeout and drop + * to the next level quickly, and conversely if busy we expect to + * hit a waitboost and rapidly switch into max power. + */ + if ((adj < 0 && rps->power.mode == HIGH_POWER) || + (adj > 0 && rps->power.mode == LOW_POWER)) + rps->last_adj = 0; + + /* sysfs frequency interfaces may have snuck in while servicing the + * interrupt + */ + new_freq += adj; + new_freq = clamp_t(int, new_freq, min, max); + + if (intel_rps_set(rps, new_freq)) { + DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); + rps->last_adj = 0; + } + + mutex_unlock(&rps->lock); + +out: + spin_lock_irq(>->irq_lock); + gen6_gt_pm_unmask_irq(gt, rps->pm_events); + spin_unlock_irq(>->irq_lock); +} + +void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) +{ + struct intel_gt *gt = rps_to_gt(rps); + const u32 events = rps->pm_events & pm_iir; + + lockdep_assert_held(>->irq_lock); + + if (unlikely(!events)) + return; + + gen6_gt_pm_mask_irq(gt, events); + + rps->pm_iir |= events; + schedule_work(&rps->work); +} + +void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + if (pm_iir & rps->pm_events) { + struct intel_gt *gt = rps_to_gt(rps); + + spin_lock(>->irq_lock); + gen6_gt_pm_mask_irq(gt, pm_iir & rps->pm_events); + rps->pm_iir |= pm_iir & rps->pm_events; + schedule_work(&rps->work); + spin_unlock(>->irq_lock); + } + + if (INTEL_GEN(i915) >= 8) + return; + + if (pm_iir & PM_VEBOX_USER_INTERRUPT) + intel_engine_breadcrumbs_irq(i915->engine[VECS0]); + + if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) + DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); +} + +void gen5_rps_irq_handler(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + u32 busy_up, busy_down, max_avg, min_avg; + u8 new_freq; + + spin_lock(&mchdev_lock); + + intel_uncore_write16(uncore, + MEMINTRSTS, + intel_uncore_read(uncore, MEMINTRSTS)); + + intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); + busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG); + busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG); + max_avg = intel_uncore_read(uncore, RCBMAXAVG); + min_avg = intel_uncore_read(uncore, RCBMINAVG); + + /* Handle RCS change request from hw */ + new_freq = rps->cur_freq; + if (busy_up > max_avg) + new_freq++; + else if (busy_down < min_avg) + new_freq--; + new_freq = clamp(new_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit); + + if (new_freq != rps->cur_freq && gen5_rps_set(rps, new_freq)) + rps->cur_freq = new_freq; + + spin_unlock(&mchdev_lock); +} + +void intel_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + + mutex_init(&rps->lock); + mutex_init(&rps->power.mutex); + + INIT_WORK(&rps->work, rps_work); + + atomic_set(&rps->num_waiters, 0); + + if (IS_CHERRYVIEW(i915)) + chv_rps_init(rps); + else if (IS_VALLEYVIEW(i915)) + vlv_rps_init(rps); + else if (INTEL_GEN(i915) >= 6) + gen6_rps_init(rps); + else if (IS_IRONLAKE_M(i915)) + gen5_rps_init(rps); + + /* Derive initial user preferences/limits from the hardware limits */ + rps->max_freq_softlimit = rps->max_freq; + rps->min_freq_softlimit = rps->min_freq; + + /* After setting max-softlimit, find the overclock max freq */ + if (IS_GEN(i915, 6) || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) { + u32 params = 0; + + sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS, + ¶ms, NULL); + if (params & BIT(31)) { /* OC supported */ + DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n", + (rps->max_freq & 0xff) * 50, + (params & 0xff) * 50); + rps->max_freq = params & 0xff; + } + } + + /* Finally allow us to boost to max by default */ + rps->boost_freq = rps->max_freq; + rps->idle_freq = rps->min_freq; + rps->cur_freq = rps->idle_freq; + + rps->pm_intrmsk_mbz = 0; + + /* + * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer + * if GEN6_PM_UP_EI_EXPIRED is masked. + * + * TODO: verify if this can be reproduced on VLV,CHV. + */ + if (INTEL_GEN(i915) <= 7) + rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; + + if (INTEL_GEN(i915) >= 8) + rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; +} + +u32 intel_get_cagf(struct intel_rps *rps, u32 rpstat) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 cagf; + + if (INTEL_GEN(i915) >= 9) + cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; + else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) + cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; + else + cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; + + return cagf; +} + +/* External interface for intel_ips.ko */ + +static struct drm_i915_private __rcu *ips_mchdev; + +/** + * Tells the intel_ips driver that the i915 driver is now loaded, if + * IPS got loaded first. + * + * This awkward dance is so that neither module has to depend on the + * other in order for IPS to do the appropriate communication of + * GPU turbo limits to i915. + */ +static void +ips_ping_for_i915_load(void) +{ + void (*link)(void); + + link = symbol_get(ips_link_to_i915_driver); + if (link) { + link(); + symbol_put(ips_link_to_i915_driver); + } +} + +void intel_rps_driver_register(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + /* + * We only register the i915 ips part with intel-ips once everything is + * set up, to avoid intel-ips sneaking in and reading bogus values. + */ + if (IS_GEN(gt->i915, 5)) { + rcu_assign_pointer(ips_mchdev, gt->i915); + ips_ping_for_i915_load(); + } +} + +void intel_rps_driver_unregister(struct intel_rps *rps) +{ + rcu_assign_pointer(ips_mchdev, NULL); +} + +static struct drm_i915_private *mchdev_get(void) +{ + struct drm_i915_private *i915; + + rcu_read_lock(); + i915 = rcu_dereference(ips_mchdev); + if (!kref_get_unless_zero(&i915->drm.ref)) + i915 = NULL; + rcu_read_unlock(); + + return i915; +} + +/** + * i915_read_mch_val - return value for IPS use + * + * Calculate and return a value for the IPS driver to use when deciding whether + * we have thermal and power headroom to increase CPU or GPU power budget. + */ +unsigned long i915_read_mch_val(void) +{ + struct drm_i915_private *i915; + unsigned long chipset_val = 0; + unsigned long graphics_val = 0; + intel_wakeref_t wakeref; + + i915 = mchdev_get(); + if (!i915) + return 0; + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + struct intel_ips *ips = &i915->gt.rps.ips; + + spin_lock_irq(&mchdev_lock); + chipset_val = __ips_chipset_val(ips); + graphics_val = __ips_gfx_val(ips); + spin_unlock_irq(&mchdev_lock); + } + + drm_dev_put(&i915->drm); + return chipset_val + graphics_val; +} +EXPORT_SYMBOL_GPL(i915_read_mch_val); + +/** + * i915_gpu_raise - raise GPU frequency limit + * + * Raise the limit; IPS indicates we have thermal headroom. + */ +bool i915_gpu_raise(void) +{ + struct drm_i915_private *i915; + struct intel_rps *rps; + + i915 = mchdev_get(); + if (!i915) + return false; + + rps = &i915->gt.rps; + + spin_lock_irq(&mchdev_lock); + if (rps->max_freq_softlimit < rps->max_freq) + rps->max_freq_softlimit++; + spin_unlock_irq(&mchdev_lock); + + drm_dev_put(&i915->drm); + return true; +} +EXPORT_SYMBOL_GPL(i915_gpu_raise); + +/** + * i915_gpu_lower - lower GPU frequency limit + * + * IPS indicates we're close to a thermal limit, so throttle back the GPU + * frequency maximum. + */ +bool i915_gpu_lower(void) +{ + struct drm_i915_private *i915; + struct intel_rps *rps; + + i915 = mchdev_get(); + if (!i915) + return false; + + rps = &i915->gt.rps; + + spin_lock_irq(&mchdev_lock); + if (rps->max_freq_softlimit > rps->min_freq) + rps->max_freq_softlimit--; + spin_unlock_irq(&mchdev_lock); + + drm_dev_put(&i915->drm); + return true; +} +EXPORT_SYMBOL_GPL(i915_gpu_lower); + +/** + * i915_gpu_busy - indicate GPU business to IPS + * + * Tell the IPS driver whether or not the GPU is busy. + */ +bool i915_gpu_busy(void) +{ + struct drm_i915_private *i915; + bool ret; + + i915 = mchdev_get(); + if (!i915) + return false; + + ret = i915->gt.awake; + + drm_dev_put(&i915->drm); + return ret; +} +EXPORT_SYMBOL_GPL(i915_gpu_busy); + +/** + * i915_gpu_turbo_disable - disable graphics turbo + * + * Disable graphics turbo by resetting the max frequency and setting the + * current frequency to the default. + */ +bool i915_gpu_turbo_disable(void) +{ + struct drm_i915_private *i915; + struct intel_rps *rps; + bool ret; + + i915 = mchdev_get(); + if (!i915) + return false; + + rps = &i915->gt.rps; + + spin_lock_irq(&mchdev_lock); + rps->max_freq_softlimit = rps->min_freq; + ret = gen5_rps_set(&i915->gt.rps, rps->min_freq); + spin_unlock_irq(&mchdev_lock); + + drm_dev_put(&i915->drm); + return ret; +} +EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h new file mode 100644 index 000000000000..997a4b4e0207 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -0,0 +1,37 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RPS_H +#define INTEL_RPS_H + +#include "intel_rps_types.h" + +struct i915_request; + +void intel_rps_init(struct intel_rps *rps); + +void intel_rps_driver_register(struct intel_rps *rps); +void intel_rps_driver_unregister(struct intel_rps *rps); + +void intel_rps_enable(struct intel_rps *rps); +void intel_rps_disable(struct intel_rps *rps); + +void intel_rps_park(struct intel_rps *rps); +void intel_rps_unpark(struct intel_rps *rps); +void intel_rps_boost(struct i915_request *rq); + +int intel_rps_set(struct intel_rps *rps, u8 val); +void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive); + +int intel_gpu_freq(struct intel_rps *rps, int val); +int intel_freq_opcode(struct intel_rps *rps, int val); +u32 intel_get_cagf(struct intel_rps *rps, u32 rpstat1); + +void gen5_rps_irq_handler(struct intel_rps *rps); +void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); +void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); + +#endif /* INTEL_RPS_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_rps_types.h b/drivers/gpu/drm/i915/gt/intel_rps_types.h new file mode 100644 index 000000000000..c2e279154bd5 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rps_types.h @@ -0,0 +1,93 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RPS_TYPES_H +#define INTEL_RPS_TYPES_H + +#include +#include +#include +#include +#include + +struct intel_ips { + u64 last_count1; + unsigned long last_time1; + unsigned long chipset_power; + u64 last_count2; + u64 last_time2; + unsigned long gfx_power; + u8 corr; + + int c, m; +}; + +struct intel_rps_ei { + ktime_t ktime; + u32 render_c0; + u32 media_c0; +}; + +struct intel_rps { + struct mutex lock; /* protects enabling and the worker */ + + /* + * work, interrupts_enabled and pm_iir are protected by + * dev_priv->irq_lock + */ + struct work_struct work; + bool enabled; + bool active; + u32 pm_iir; + + /* PM interrupt bits that should never be masked */ + u32 pm_intrmsk_mbz; + u32 pm_events; + + /* Frequencies are stored in potentially platform dependent multiples. + * In other words, *_freq needs to be multiplied by X to be interesting. + * Soft limits are those which are used for the dynamic reclocking done + * by the driver (raise frequencies under heavy loads, and lower for + * lighter loads). Hard limits are those imposed by the hardware. + * + * A distinction is made for overclocking, which is never enabled by + * default, and is considered to be above the hard limit if it's + * possible at all. + */ + u8 cur_freq; /* Current frequency (cached, may not == HW) */ + u8 last_freq; /* Last SWREQ frequency */ + u8 min_freq_softlimit; /* Minimum frequency permitted by the driver */ + u8 max_freq_softlimit; /* Max frequency permitted by the driver */ + u8 max_freq; /* Maximum frequency, RP0 if not overclocking */ + u8 min_freq; /* AKA RPn. Minimum frequency */ + u8 boost_freq; /* Frequency to request when wait boosting */ + u8 idle_freq; /* Frequency to request when we are idle */ + u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */ + u8 rp1_freq; /* "less than" RP0 power/freqency */ + u8 rp0_freq; /* Non-overclocked max frequency. */ + u16 gpll_ref_freq; /* vlv/chv GPLL reference frequency */ + + int last_adj; + + struct { + struct mutex mutex; + + enum { LOW_POWER, BETWEEN, HIGH_POWER } mode; + unsigned int interactive; + + u8 up_threshold; /* Current %busy required to uplock */ + u8 down_threshold; /* Current %busy required to downclock */ + } power; + + atomic_t num_waiters; + atomic_t boosts; + + /* manual wa residency calculations */ + struct intel_rps_ei ei; + struct intel_ips ips; +}; + +#endif /* INTEL_RPS_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.c b/drivers/gpu/drm/i915/gt/selftest_llc.c index a7057785e420..fd3770e48ac7 100644 --- a/drivers/gpu/drm/i915/gt/selftest_llc.c +++ b/drivers/gpu/drm/i915/gt/selftest_llc.c @@ -6,6 +6,7 @@ #include "intel_pm.h" /* intel_gpu_freq() */ #include "selftest_llc.h" +#include "intel_rps.h" static int gen6_verify_ring_freq(struct intel_llc *llc) { @@ -25,6 +26,8 @@ static int gen6_verify_ring_freq(struct intel_llc *llc) for (gpu_freq = consts.min_gpu_freq; gpu_freq <= consts.max_gpu_freq; gpu_freq++) { + struct intel_rps *rps = &llc_to_gt(llc)->rps; + unsigned int ia_freq, ring_freq, found; u32 val; @@ -44,7 +47,7 @@ static int gen6_verify_ring_freq(struct intel_llc *llc) if (found != ia_freq) { pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected CPU freq, found %d, expected %d\n", gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq, - intel_gpu_freq(i915, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), + intel_gpu_freq(rps, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), found, ia_freq); err = -EINVAL; break; @@ -54,7 +57,7 @@ static int gen6_verify_ring_freq(struct intel_llc *llc) if (found != ring_freq) { pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected ring freq, found %d, expected %d\n", gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq, - intel_gpu_freq(i915, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), + intel_gpu_freq(rps, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), found, ring_freq); err = -EINVAL; break; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 1b1691aaed28..b7f8514e9b1d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1011,7 +1011,7 @@ void intel_guc_submission_fini(struct intel_guc *guc) static void guc_interrupts_capture(struct intel_gt *gt) { - struct intel_rps *rps = >->i915->gt_pm.rps; + struct intel_rps *rps = >->rps; struct intel_uncore *uncore = gt->uncore; struct intel_engine_cs *engine; enum intel_engine_id id; @@ -1057,7 +1057,7 @@ static void guc_interrupts_capture(struct intel_gt *gt) static void guc_interrupts_release(struct intel_gt *gt) { - struct intel_rps *rps = >->i915->gt_pm.rps; + struct intel_rps *rps = >->rps; struct intel_uncore *uncore = gt->uncore; struct intel_engine_cs *engine; enum intel_engine_id id; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 4b6ce07c35d2..8016484ebcd3 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -44,6 +44,7 @@ #include "gt/intel_gt_requests.h" #include "gt/intel_reset.h" #include "gt/intel_rc6.h" +#include "gt/intel_rps.h" #include "gt/uc/intel_guc_submission.h" #include "i915_debugfs.h" @@ -791,7 +792,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_uncore *uncore = &dev_priv->uncore; - struct intel_rps *rps = &dev_priv->gt_pm.rps; + struct intel_rps *rps = &dev_priv->gt.rps; intel_wakeref_t wakeref; int ret = 0; @@ -827,23 +828,23 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_printf(m, "DDR freq: %d MHz\n", dev_priv->mem_freq); seq_printf(m, "actual GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, (freq_sts >> 8) & 0xff)); + intel_gpu_freq(rps, (freq_sts >> 8) & 0xff)); seq_printf(m, "current GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->cur_freq)); + intel_gpu_freq(rps, rps->cur_freq)); seq_printf(m, "max GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->max_freq)); + intel_gpu_freq(rps, rps->max_freq)); seq_printf(m, "min GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->min_freq)); + intel_gpu_freq(rps, rps->min_freq)); seq_printf(m, "idle GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->idle_freq)); + intel_gpu_freq(rps, rps->idle_freq)); seq_printf(m, "efficient (RPe) frequency: %d MHz\n", - intel_gpu_freq(dev_priv, rps->efficient_freq)); + intel_gpu_freq(rps, rps->efficient_freq)); } else if (INTEL_GEN(dev_priv) >= 6) { u32 rp_state_limits; u32 gt_perf_status; @@ -877,7 +878,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) else reqf >>= 25; } - reqf = intel_gpu_freq(dev_priv, reqf); + reqf = intel_gpu_freq(rps, reqf); rpmodectl = I915_READ(GEN6_RP_CONTROL); rpinclimit = I915_READ(GEN6_RP_UP_THRESHOLD); @@ -890,8 +891,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) rpdownei = I915_READ(GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; rpcurdown = I915_READ(GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK; rpprevdown = I915_READ(GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK; - cagf = intel_gpu_freq(dev_priv, - intel_get_cagf(dev_priv, rpstat)); + cagf = intel_gpu_freq(rps, intel_get_cagf(rps, rpstat)); intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); @@ -968,37 +968,37 @@ static int i915_frequency_info(struct seq_file *m, void *unused) max_freq *= (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", - intel_gpu_freq(dev_priv, max_freq)); + intel_gpu_freq(rps, max_freq)); max_freq = (rp_state_cap & 0xff00) >> 8; max_freq *= (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", - intel_gpu_freq(dev_priv, max_freq)); + intel_gpu_freq(rps, max_freq)); max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 16 : rp_state_cap >> 0) & 0xff; max_freq *= (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", - intel_gpu_freq(dev_priv, max_freq)); + intel_gpu_freq(rps, max_freq)); seq_printf(m, "Max overclocked frequency: %dMHz\n", - intel_gpu_freq(dev_priv, rps->max_freq)); + intel_gpu_freq(rps, rps->max_freq)); seq_printf(m, "Current freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->cur_freq)); + intel_gpu_freq(rps, rps->cur_freq)); seq_printf(m, "Actual freq: %d MHz\n", cagf); seq_printf(m, "Idle freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->idle_freq)); + intel_gpu_freq(rps, rps->idle_freq)); seq_printf(m, "Min freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->min_freq)); + intel_gpu_freq(rps, rps->min_freq)); seq_printf(m, "Boost freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->boost_freq)); + intel_gpu_freq(rps, rps->boost_freq)); seq_printf(m, "Max freq: %d MHz\n", - intel_gpu_freq(dev_priv, rps->max_freq)); + intel_gpu_freq(rps, rps->max_freq)); seq_printf(m, "efficient (RPe) frequency: %d MHz\n", - intel_gpu_freq(dev_priv, rps->efficient_freq)); + intel_gpu_freq(rps, rps->efficient_freq)); } else { seq_puts(m, "no P-state info available\n"); } @@ -1375,7 +1375,7 @@ static int i915_sr_status(struct seq_file *m, void *unused) static int i915_ring_freq_table(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct intel_rps *rps = &dev_priv->gt_pm.rps; + struct intel_rps *rps = &dev_priv->gt.rps; unsigned int max_gpu_freq, min_gpu_freq; intel_wakeref_t wakeref; int gpu_freq, ia_freq; @@ -1400,10 +1400,11 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) GEN6_PCODE_READ_MIN_FREQ_TABLE, &ia_freq, NULL); seq_printf(m, "%d\t\t%d\t\t\t\t%d\n", - intel_gpu_freq(dev_priv, (gpu_freq * - (IS_GEN9_BC(dev_priv) || - INTEL_GEN(dev_priv) >= 10 ? - GEN9_FREQ_SCALER : 1))), + intel_gpu_freq(rps, + (gpu_freq * + (IS_GEN9_BC(dev_priv) || + INTEL_GEN(dev_priv) >= 10 ? + GEN9_FREQ_SCALER : 1))), ((ia_freq >> 0) & 0xff) * 100, ((ia_freq >> 8) & 0xff) * 100); } @@ -1631,7 +1632,7 @@ static const char *rps_power_to_str(unsigned int power) static int i915_rps_boost_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct intel_rps *rps = &dev_priv->gt_pm.rps; + struct intel_rps *rps = &dev_priv->gt.rps; u32 act_freq = rps->cur_freq; intel_wakeref_t wakeref; @@ -1643,7 +1644,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) vlv_punit_put(dev_priv); act_freq = (act_freq >> 8) & 0xff; } else { - act_freq = intel_get_cagf(dev_priv, + act_freq = intel_get_cagf(rps, I915_READ(GEN6_RPSTAT1)); } } @@ -1654,17 +1655,17 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) atomic_read(&rps->num_waiters)); seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive)); seq_printf(m, "Frequency requested %d, actual %d\n", - intel_gpu_freq(dev_priv, rps->cur_freq), - intel_gpu_freq(dev_priv, act_freq)); + intel_gpu_freq(rps, rps->cur_freq), + intel_gpu_freq(rps, act_freq)); seq_printf(m, " min hard:%d, soft:%d; max soft:%d, hard:%d\n", - intel_gpu_freq(dev_priv, rps->min_freq), - intel_gpu_freq(dev_priv, rps->min_freq_softlimit), - intel_gpu_freq(dev_priv, rps->max_freq_softlimit), - intel_gpu_freq(dev_priv, rps->max_freq)); + intel_gpu_freq(rps, rps->min_freq), + intel_gpu_freq(rps, rps->min_freq_softlimit), + intel_gpu_freq(rps, rps->max_freq_softlimit), + intel_gpu_freq(rps, rps->max_freq)); seq_printf(m, " idle:%d, efficient:%d, boost:%d\n", - intel_gpu_freq(dev_priv, rps->idle_freq), - intel_gpu_freq(dev_priv, rps->efficient_freq), - intel_gpu_freq(dev_priv, rps->boost_freq)); + intel_gpu_freq(rps, rps->idle_freq), + intel_gpu_freq(rps, rps->efficient_freq), + intel_gpu_freq(rps, rps->boost_freq)); seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts)); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 24a3988281d2..355526a35d58 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1794,7 +1794,6 @@ static int i915_drm_resume(struct drm_device *dev) int ret; disable_rpm_wakeref_asserts(&dev_priv->runtime_pm); - intel_gt_pm_disable(&dev_priv->gt); i915_gem_sanitize(dev_priv); @@ -1925,8 +1924,6 @@ static int i915_drm_resume_early(struct drm_device *dev) intel_display_power_resume_early(dev_priv); - intel_gt_pm_disable(&dev_priv->gt); - intel_power_domains_resume(dev_priv); intel_gt_sanitize(&dev_priv->gt, true); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c3b2f29942f9..96fe0e8d468a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -545,94 +545,6 @@ struct i915_suspend_saved_registers { struct vlv_s0ix_state; -struct intel_rps_ei { - ktime_t ktime; - u32 render_c0; - u32 media_c0; -}; - -struct intel_rps { - struct mutex lock; /* protects enabling and the worker */ - - /* - * work, interrupts_enabled and pm_iir are protected by - * dev_priv->irq_lock - */ - struct work_struct work; - bool interrupts_enabled; - u32 pm_iir; - - /* PM interrupt bits that should never be masked */ - u32 pm_intrmsk_mbz; - - /* Frequencies are stored in potentially platform dependent multiples. - * In other words, *_freq needs to be multiplied by X to be interesting. - * Soft limits are those which are used for the dynamic reclocking done - * by the driver (raise frequencies under heavy loads, and lower for - * lighter loads). Hard limits are those imposed by the hardware. - * - * A distinction is made for overclocking, which is never enabled by - * default, and is considered to be above the hard limit if it's - * possible at all. - */ - u8 cur_freq; /* Current frequency (cached, may not == HW) */ - u8 min_freq_softlimit; /* Minimum frequency permitted by the driver */ - u8 max_freq_softlimit; /* Max frequency permitted by the driver */ - u8 max_freq; /* Maximum frequency, RP0 if not overclocking */ - u8 min_freq; /* AKA RPn. Minimum frequency */ - u8 boost_freq; /* Frequency to request when wait boosting */ - u8 idle_freq; /* Frequency to request when we are idle */ - u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */ - u8 rp1_freq; /* "less than" RP0 power/freqency */ - u8 rp0_freq; /* Non-overclocked max frequency. */ - u16 gpll_ref_freq; /* vlv/chv GPLL reference frequency */ - - int last_adj; - - struct { - struct mutex mutex; - - enum { LOW_POWER, BETWEEN, HIGH_POWER } mode; - unsigned int interactive; - - u8 up_threshold; /* Current %busy required to uplock */ - u8 down_threshold; /* Current %busy required to downclock */ - } power; - - bool enabled; - atomic_t num_waiters; - atomic_t boosts; - - /* manual wa residency calculations */ - struct intel_rps_ei ei; -}; - -struct intel_gen6_power_mgmt { - struct intel_rps rps; -}; - -/* defined intel_pm.c */ -extern spinlock_t mchdev_lock; - -struct intel_ilk_power_mgmt { - u8 cur_delay; - u8 min_delay; - u8 max_delay; - u8 fmax; - u8 fstart; - - u64 last_count1; - unsigned long last_time1; - unsigned long chipset_power; - u64 last_count2; - u64 last_time2; - unsigned long gfx_power; - u8 corr; - - int c_m; - int r_t; -}; - #define MAX_L3_SLICES 2 struct intel_l3_parity { u32 *remap_info[MAX_L3_SLICES]; @@ -1069,7 +981,6 @@ struct drm_i915_private { u32 irq_mask; u32 de_irq_mask[I915_MAX_PIPES]; }; - u32 pm_rps_events; u32 pipestat_irq_mask[I915_MAX_PIPES]; struct i915_hotplug hotplug; @@ -1209,13 +1120,6 @@ struct drm_i915_private { */ u32 edram_size_mb; - /* gen6+ GT PM state */ - struct intel_gen6_power_mgmt gt_pm; - - /* ilk-only ips/rps state. Everything in here is protected by the global - * mchdev_lock in intel_pm.c */ - struct intel_ilk_power_mgmt ips; - struct i915_power_domains power_domains; struct i915_psr psr; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 319e96d833fa..0f271a53012d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -52,6 +52,7 @@ #include "gt/intel_mocs.h" #include "gt/intel_reset.h" #include "gt/intel_renderstate.h" +#include "gt/intel_rps.h" #include "gt/intel_workarounds.h" #include "i915_drv.h" @@ -1269,8 +1270,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv) goto err_context; } - intel_init_gt_powersave(dev_priv); - intel_uc_init(&dev_priv->gt.uc); ret = intel_gt_init_hw(&dev_priv->gt); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 33020c8ca5f6..8a3aed17c3b7 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -45,6 +45,7 @@ #include "gt/intel_gt.h" #include "gt/intel_gt_irq.h" #include "gt/intel_gt_pm_irq.h" +#include "gt/intel_rps.h" #include "i915_drv.h" #include "i915_irq.h" @@ -327,87 +328,6 @@ static i915_reg_t gen6_pm_iir(struct drm_i915_private *dev_priv) return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IIR(2) : GEN6_PMIIR; } -void gen11_reset_rps_interrupts(struct drm_i915_private *dev_priv) -{ - struct intel_gt *gt = &dev_priv->gt; - - spin_lock_irq(>->irq_lock); - - while (gen11_gt_reset_one_iir(gt, 0, GEN11_GTPM)) - ; - - dev_priv->gt_pm.rps.pm_iir = 0; - - spin_unlock_irq(>->irq_lock); -} - -void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv) -{ - struct intel_gt *gt = &dev_priv->gt; - - spin_lock_irq(>->irq_lock); - gen6_gt_pm_reset_iir(gt, GEN6_PM_RPS_EVENTS); - dev_priv->gt_pm.rps.pm_iir = 0; - spin_unlock_irq(>->irq_lock); -} - -void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv) -{ - struct intel_gt *gt = &dev_priv->gt; - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - if (READ_ONCE(rps->interrupts_enabled)) - return; - - spin_lock_irq(>->irq_lock); - WARN_ON_ONCE(rps->pm_iir); - - if (INTEL_GEN(dev_priv) >= 11) - WARN_ON_ONCE(gen11_gt_reset_one_iir(gt, 0, GEN11_GTPM)); - else - WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events); - - rps->interrupts_enabled = true; - gen6_gt_pm_enable_irq(gt, dev_priv->pm_rps_events); - - spin_unlock_irq(>->irq_lock); -} - -u32 gen6_sanitize_rps_pm_mask(const struct drm_i915_private *i915, u32 mask) -{ - return mask & ~i915->gt_pm.rps.pm_intrmsk_mbz; -} - -void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - struct intel_gt *gt = &dev_priv->gt; - - if (!READ_ONCE(rps->interrupts_enabled)) - return; - - spin_lock_irq(>->irq_lock); - rps->interrupts_enabled = false; - - I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u)); - - gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS); - - spin_unlock_irq(>->irq_lock); - intel_synchronize_irq(dev_priv); - - /* Now that we will not be generating any more work, flush any - * outstanding tasks. As we are called on the RPS idle path, - * we will reset the GPU to minimum frequencies, so the current - * state of the worker can be discarded. - */ - cancel_work_sync(&rps->work); - if (INTEL_GEN(dev_priv) >= 11) - gen11_reset_rps_interrupts(dev_priv); - else - gen6_reset_rps_interrupts(dev_priv); -} - void gen9_reset_guc_interrupts(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -1065,199 +985,6 @@ int intel_get_crtc_scanline(struct intel_crtc *crtc) return position; } -static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) -{ - struct intel_uncore *uncore = &dev_priv->uncore; - u32 busy_up, busy_down, max_avg, min_avg; - u8 new_delay; - - spin_lock(&mchdev_lock); - - intel_uncore_write16(uncore, - MEMINTRSTS, - intel_uncore_read(uncore, MEMINTRSTS)); - - new_delay = dev_priv->ips.cur_delay; - - intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); - busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG); - busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG); - max_avg = intel_uncore_read(uncore, RCBMAXAVG); - min_avg = intel_uncore_read(uncore, RCBMINAVG); - - /* Handle RCS change request from hw */ - if (busy_up > max_avg) { - if (dev_priv->ips.cur_delay != dev_priv->ips.max_delay) - new_delay = dev_priv->ips.cur_delay - 1; - if (new_delay < dev_priv->ips.max_delay) - new_delay = dev_priv->ips.max_delay; - } else if (busy_down < min_avg) { - if (dev_priv->ips.cur_delay != dev_priv->ips.min_delay) - new_delay = dev_priv->ips.cur_delay + 1; - if (new_delay > dev_priv->ips.min_delay) - new_delay = dev_priv->ips.min_delay; - } - - if (ironlake_set_drps(dev_priv, new_delay)) - dev_priv->ips.cur_delay = new_delay; - - spin_unlock(&mchdev_lock); - - return; -} - -static void vlv_c0_read(struct drm_i915_private *dev_priv, - struct intel_rps_ei *ei) -{ - ei->ktime = ktime_get_raw(); - ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT); - ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT); -} - -void gen6_rps_reset_ei(struct drm_i915_private *dev_priv) -{ - memset(&dev_priv->gt_pm.rps.ei, 0, sizeof(dev_priv->gt_pm.rps.ei)); -} - -static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - const struct intel_rps_ei *prev = &rps->ei; - struct intel_rps_ei now; - u32 events = 0; - - if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0) - return 0; - - vlv_c0_read(dev_priv, &now); - - if (prev->ktime) { - u64 time, c0; - u32 render, media; - - time = ktime_us_delta(now.ktime, prev->ktime); - - time *= dev_priv->czclk_freq; - - /* Workload can be split between render + media, - * e.g. SwapBuffers being blitted in X after being rendered in - * mesa. To account for this we need to combine both engines - * into our activity counter. - */ - render = now.render_c0 - prev->render_c0; - media = now.media_c0 - prev->media_c0; - c0 = max(render, media); - c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */ - - if (c0 > time * rps->power.up_threshold) - events = GEN6_PM_RP_UP_THRESHOLD; - else if (c0 < time * rps->power.down_threshold) - events = GEN6_PM_RP_DOWN_THRESHOLD; - } - - rps->ei = now; - return events; -} - -static void gen6_pm_rps_work(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, gt_pm.rps.work); - struct intel_gt *gt = &dev_priv->gt; - struct intel_rps *rps = &dev_priv->gt_pm.rps; - bool client_boost = false; - int new_delay, adj, min, max; - u32 pm_iir = 0; - - spin_lock_irq(>->irq_lock); - if (rps->interrupts_enabled) { - pm_iir = fetch_and_zero(&rps->pm_iir); - client_boost = atomic_read(&rps->num_waiters); - } - spin_unlock_irq(>->irq_lock); - - /* Make sure we didn't queue anything we're not going to process. */ - WARN_ON(pm_iir & ~dev_priv->pm_rps_events); - if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost) - goto out; - - mutex_lock(&rps->lock); - - pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir); - - adj = rps->last_adj; - new_delay = rps->cur_freq; - min = rps->min_freq_softlimit; - max = rps->max_freq_softlimit; - if (client_boost) - max = rps->max_freq; - if (client_boost && new_delay < rps->boost_freq) { - new_delay = rps->boost_freq; - adj = 0; - } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { - if (adj > 0) - adj *= 2; - else /* CHV needs even encode values */ - adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1; - - if (new_delay >= rps->max_freq_softlimit) - adj = 0; - } else if (client_boost) { - adj = 0; - } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { - if (rps->cur_freq > rps->efficient_freq) - new_delay = rps->efficient_freq; - else if (rps->cur_freq > rps->min_freq_softlimit) - new_delay = rps->min_freq_softlimit; - adj = 0; - } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { - if (adj < 0) - adj *= 2; - else /* CHV needs even encode values */ - adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1; - - if (new_delay <= rps->min_freq_softlimit) - adj = 0; - } else { /* unknown event */ - adj = 0; - } - - rps->last_adj = adj; - - /* - * Limit deboosting and boosting to keep ourselves at the extremes - * when in the respective power modes (i.e. slowly decrease frequencies - * while in the HIGH_POWER zone and slowly increase frequencies while - * in the LOW_POWER zone). On idle, we will hit the timeout and drop - * to the next level quickly, and conversely if busy we expect to - * hit a waitboost and rapidly switch into max power. - */ - if ((adj < 0 && rps->power.mode == HIGH_POWER) || - (adj > 0 && rps->power.mode == LOW_POWER)) - rps->last_adj = 0; - - /* sysfs frequency interfaces may have snuck in while servicing the - * interrupt - */ - new_delay += adj; - new_delay = clamp_t(int, new_delay, min, max); - - if (intel_set_rps(dev_priv, new_delay)) { - DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); - rps->last_adj = 0; - } - - mutex_unlock(&rps->lock); - -out: - /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */ - spin_lock_irq(>->irq_lock); - if (rps->interrupts_enabled) - gen6_gt_pm_unmask_irq(gt, dev_priv->pm_rps_events); - spin_unlock_irq(>->irq_lock); -} - - /** * ivybridge_parity_work - Workqueue called when a parity error interrupt * occurred. @@ -1631,54 +1358,6 @@ static void i9xx_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, res1, res2); } -/* The RPS events need forcewake, so we add them to a work queue and mask their - * IMR bits until the work is done. Other interrupts can be processed without - * the work queue. */ -void gen11_rps_irq_handler(struct intel_gt *gt, u32 pm_iir) -{ - struct drm_i915_private *i915 = gt->i915; - struct intel_rps *rps = &i915->gt_pm.rps; - const u32 events = i915->pm_rps_events & pm_iir; - - lockdep_assert_held(>->irq_lock); - - if (unlikely(!events)) - return; - - gen6_gt_pm_mask_irq(gt, events); - - if (!rps->interrupts_enabled) - return; - - rps->pm_iir |= events; - schedule_work(&rps->work); -} - -void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - struct intel_gt *gt = &dev_priv->gt; - - if (pm_iir & dev_priv->pm_rps_events) { - spin_lock(>->irq_lock); - gen6_gt_pm_mask_irq(gt, pm_iir & dev_priv->pm_rps_events); - if (rps->interrupts_enabled) { - rps->pm_iir |= pm_iir & dev_priv->pm_rps_events; - schedule_work(&rps->work); - } - spin_unlock(>->irq_lock); - } - - if (INTEL_GEN(dev_priv) >= 8) - return; - - if (pm_iir & PM_VEBOX_USER_INTERRUPT) - intel_engine_breadcrumbs_irq(dev_priv->engine[VECS0]); - - if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) - DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); -} - static void i9xx_pipestat_irq_reset(struct drm_i915_private *dev_priv) { enum pipe pipe; @@ -1989,7 +1668,7 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg) if (gt_iir) gen6_gt_irq_handler(&dev_priv->gt, gt_iir); if (pm_iir) - gen6_rps_irq_handler(dev_priv, pm_iir); + gen6_rps_irq_handler(&dev_priv->gt.rps, pm_iir); if (hotplug_status) i9xx_hpd_irq_handler(dev_priv, hotplug_status); @@ -2393,7 +2072,7 @@ static void ilk_display_irq_handler(struct drm_i915_private *dev_priv, } if (IS_GEN(dev_priv, 5) && de_iir & DE_PCU_EVENT) - ironlake_rps_change_irq_handler(dev_priv); + gen5_rps_irq_handler(&dev_priv->gt.rps); } static void ivb_display_irq_handler(struct drm_i915_private *dev_priv, @@ -2498,7 +2177,7 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg) if (pm_iir) { I915_WRITE(GEN6_PMIIR, pm_iir); ret = IRQ_HANDLED; - gen6_rps_irq_handler(dev_priv, pm_iir); + gen6_rps_irq_handler(&dev_priv->gt.rps, pm_iir); } } @@ -4281,13 +3960,10 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) void intel_irq_init(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; - struct intel_rps *rps = &dev_priv->gt_pm.rps; int i; intel_hpd_init_work(dev_priv); - INIT_WORK(&rps->work, gen6_pm_rps_work); - INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work); for (i = 0; i < MAX_L3_SLICES; ++i) dev_priv->l3_parity.remap_info[i] = NULL; @@ -4296,33 +3972,6 @@ void intel_irq_init(struct drm_i915_private *dev_priv) if (HAS_GT_UC(dev_priv) && INTEL_GEN(dev_priv) < 11) dev_priv->gt.pm_guc_events = GUC_INTR_GUC2HOST << 16; - /* Let's track the enabled rps events */ - if (IS_VALLEYVIEW(dev_priv)) - /* WaGsvRC0ResidencyMethod:vlv */ - dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED; - else - dev_priv->pm_rps_events = (GEN6_PM_RP_UP_THRESHOLD | - GEN6_PM_RP_DOWN_THRESHOLD | - GEN6_PM_RP_DOWN_TIMEOUT); - - /* We share the register with other engine */ - if (INTEL_GEN(dev_priv) > 9) - GEM_WARN_ON(dev_priv->pm_rps_events & 0xffff0000); - - rps->pm_intrmsk_mbz = 0; - - /* - * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer - * if GEN6_PM_UP_EI_EXPIRED is masked. - * - * TODO: verify if this can be reproduced on VLV,CHV. - */ - if (INTEL_GEN(dev_priv) <= 7) - rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; - - if (INTEL_GEN(dev_priv) >= 8) - rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; - dev->vblank_disable_immediate = true; /* Most platforms treat the display irq block as an always-on diff --git a/drivers/gpu/drm/i915/i915_irq.h b/drivers/gpu/drm/i915/i915_irq.h index 19a3bc019535..d0d91c6e00d7 100644 --- a/drivers/gpu/drm/i915/i915_irq.h +++ b/drivers/gpu/drm/i915/i915_irq.h @@ -22,9 +22,6 @@ struct intel_gt; struct intel_guc; struct intel_uncore; -void gen11_rps_irq_handler(struct intel_gt *gt, u32 pm_iir); -void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir); - void intel_irq_init(struct drm_i915_private *dev_priv); void intel_irq_fini(struct drm_i915_private *dev_priv); int intel_irq_install(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index b5b67c0624ff..05395015d1f2 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -12,6 +12,7 @@ #include "gt/intel_engine_user.h" #include "gt/intel_gt_pm.h" #include "gt/intel_rc6.h" +#include "gt/intel_rps.h" #include "i915_drv.h" #include "i915_pmu.h" @@ -358,25 +359,26 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) struct drm_i915_private *i915 = gt->i915; struct intel_uncore *uncore = gt->uncore; struct i915_pmu *pmu = &i915->pmu; + struct intel_rps *rps = >->rps; if (pmu->enable & config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) { u32 val; - val = i915->gt_pm.rps.cur_freq; + val = rps->cur_freq; if (intel_gt_pm_get_if_awake(gt)) { val = intel_uncore_read_notrace(uncore, GEN6_RPSTAT1); - val = intel_get_cagf(i915, val); + val = intel_get_cagf(rps, val); intel_gt_pm_put(gt); } add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT], - intel_gpu_freq(i915, val), + intel_gpu_freq(rps, val), period_ns / 1000); } if (pmu->enable & config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) { add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ], - intel_gpu_freq(i915, i915->gt_pm.rps.cur_freq), + intel_gpu_freq(rps, rps->cur_freq), period_ns / 1000); } } diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 19a1d447ab8d..00011f9533b6 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -32,6 +32,7 @@ #include "gem/i915_gem_context.h" #include "gt/intel_context.h" #include "gt/intel_ring.h" +#include "gt/intel_rps.h" #include "i915_active.h" #include "i915_drv.h" @@ -258,8 +259,8 @@ bool i915_request_retire(struct i915_request *rq) if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) i915_request_cancel_breadcrumb(rq); if (i915_request_has_waitboost(rq)) { - GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters)); - atomic_dec(&rq->i915->gt_pm.rps.num_waiters); + GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters)); + atomic_dec(&rq->engine->gt->rps.num_waiters); } if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) { set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); @@ -1467,7 +1468,7 @@ long i915_request_wait(struct i915_request *rq, */ if (flags & I915_WAIT_PRIORITY) { if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6) - gen6_rps_boost(rq); + intel_rps_boost(rq); i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT); } diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index bf039b8ba593..65476909d1bf 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -31,6 +31,7 @@ #include #include "gt/intel_rc6.h" +#include "gt/intel_rps.h" #include "i915_drv.h" #include "i915_sysfs.h" @@ -259,6 +260,7 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt.rps; intel_wakeref_t wakeref; u32 freq; @@ -271,31 +273,31 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, freq = (freq >> 8) & 0xff; } else { - freq = intel_get_cagf(dev_priv, I915_READ(GEN6_RPSTAT1)); + freq = intel_get_cagf(rps, I915_READ(GEN6_RPSTAT1)); } intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, freq)); + return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(rps, freq)); } static ssize_t gt_cur_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt.rps; return snprintf(buf, PAGE_SIZE, "%d\n", - intel_gpu_freq(dev_priv, - dev_priv->gt_pm.rps.cur_freq)); + intel_gpu_freq(rps, rps->cur_freq)); } static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt.rps; return snprintf(buf, PAGE_SIZE, "%d\n", - intel_gpu_freq(dev_priv, - dev_priv->gt_pm.rps.boost_freq)); + intel_gpu_freq(rps, rps->boost_freq)); } static ssize_t gt_boost_freq_mhz_store(struct device *kdev, @@ -303,7 +305,7 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt_pm.rps; + struct intel_rps *rps = &dev_priv->gt.rps; bool boost = false; ssize_t ret; u32 val; @@ -313,7 +315,7 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, return ret; /* Validate against (static) hardware limits */ - val = intel_freq_opcode(dev_priv, val); + val = intel_freq_opcode(rps, val); if (val < rps->min_freq || val > rps->max_freq) return -EINVAL; @@ -333,19 +335,19 @@ static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt.rps; return snprintf(buf, PAGE_SIZE, "%d\n", - intel_gpu_freq(dev_priv, - dev_priv->gt_pm.rps.efficient_freq)); + intel_gpu_freq(rps, rps->efficient_freq)); } static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt.rps; return snprintf(buf, PAGE_SIZE, "%d\n", - intel_gpu_freq(dev_priv, - dev_priv->gt_pm.rps.max_freq_softlimit)); + intel_gpu_freq(rps, rps->max_freq_softlimit)); } static ssize_t gt_max_freq_mhz_store(struct device *kdev, @@ -353,19 +355,17 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt_pm.rps; - intel_wakeref_t wakeref; - u32 val; + struct intel_rps *rps = &dev_priv->gt.rps; ssize_t ret; + u32 val; ret = kstrtou32(buf, 0, &val); if (ret) return ret; - wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); mutex_lock(&rps->lock); - val = intel_freq_opcode(dev_priv, val); + val = intel_freq_opcode(rps, val); if (val < rps->min_freq || val > rps->max_freq || val < rps->min_freq_softlimit) { @@ -375,7 +375,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, if (val > rps->rp0_freq) DRM_DEBUG("User requested overclocking to %d\n", - intel_gpu_freq(dev_priv, val)); + intel_gpu_freq(rps, val)); rps->max_freq_softlimit = val; @@ -383,14 +383,15 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, rps->min_freq_softlimit, rps->max_freq_softlimit); - /* We still need *_set_rps to process the new max_delay and + /* + * We still need *_set_rps to process the new max_delay and * update the interrupt limits and PMINTRMSK even though - * frequency request may be unchanged. */ - ret = intel_set_rps(dev_priv, val); + * frequency request may be unchanged. + */ + intel_rps_set(rps, val); unlock: mutex_unlock(&rps->lock); - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); return ret ?: count; } @@ -398,10 +399,10 @@ unlock: static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt.rps; return snprintf(buf, PAGE_SIZE, "%d\n", - intel_gpu_freq(dev_priv, - dev_priv->gt_pm.rps.min_freq_softlimit)); + intel_gpu_freq(rps, rps->min_freq_softlimit)); } static ssize_t gt_min_freq_mhz_store(struct device *kdev, @@ -409,19 +410,17 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt_pm.rps; - intel_wakeref_t wakeref; - u32 val; + struct intel_rps *rps = &dev_priv->gt.rps; ssize_t ret; + u32 val; ret = kstrtou32(buf, 0, &val); if (ret) return ret; - wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); mutex_lock(&rps->lock); - val = intel_freq_opcode(dev_priv, val); + val = intel_freq_opcode(rps, val); if (val < rps->min_freq || val > rps->max_freq || val > rps->max_freq_softlimit) { @@ -435,14 +434,15 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, rps->min_freq_softlimit, rps->max_freq_softlimit); - /* We still need *_set_rps to process the new min_delay and + /* + * We still need *_set_rps to process the new min_delay and * update the interrupt limits and PMINTRMSK even though - * frequency request may be unchanged. */ - ret = intel_set_rps(dev_priv, val); + * frequency request may be unchanged. + */ + intel_rps_set(rps, val); unlock: mutex_unlock(&rps->lock); - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); return ret ?: count; } @@ -464,15 +464,15 @@ static DEVICE_ATTR(gt_RPn_freq_mhz, S_IRUGO, gt_rp_mhz_show, NULL); static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt_pm.rps; + struct intel_rps *rps = &dev_priv->gt.rps; u32 val; if (attr == &dev_attr_gt_RP0_freq_mhz) - val = intel_gpu_freq(dev_priv, rps->rp0_freq); + val = intel_gpu_freq(rps, rps->rp0_freq); else if (attr == &dev_attr_gt_RP1_freq_mhz) - val = intel_gpu_freq(dev_priv, rps->rp1_freq); + val = intel_gpu_freq(rps, rps->rp1_freq); else if (attr == &dev_attr_gt_RPn_freq_mhz) - val = intel_gpu_freq(dev_priv, rps->min_freq); + val = intel_gpu_freq(rps, rps->min_freq); else BUG(); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 6b37d22fc68d..5d2b460d3ee5 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -197,8 +197,6 @@ static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv) break; } - dev_priv->ips.r_t = dev_priv->mem_freq; - switch (csipll & 0x3ff) { case 0x00c: dev_priv->fsb_freq = 3200; @@ -227,14 +225,6 @@ static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv) dev_priv->fsb_freq = 0; break; } - - if (dev_priv->fsb_freq == 3200) { - dev_priv->ips.c_m = 0; - } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) { - dev_priv->ips.c_m = 1; - } else { - dev_priv->ips.c_m = 2; - } } static const struct cxsr_latency cxsr_latency_table[] = { @@ -6252,1885 +6242,264 @@ void intel_init_ipc(struct drm_i915_private *dev_priv) intel_enable_ipc(dev_priv); } -/* - * Lock protecting IPS related data structures - */ -DEFINE_SPINLOCK(mchdev_lock); +static void ibx_init_clock_gating(struct drm_i915_private *dev_priv) +{ + /* + * On Ibex Peak and Cougar Point, we need to disable clock + * gating for the panel power sequencer or it will fail to + * start up when no ports are active. + */ + I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE); +} -bool ironlake_set_drps(struct drm_i915_private *i915, u8 val) +static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv) { - struct intel_uncore *uncore = &i915->uncore; - u16 rgvswctl; + enum pipe pipe; - lockdep_assert_held(&mchdev_lock); + for_each_pipe(dev_priv, pipe) { + I915_WRITE(DSPCNTR(pipe), + I915_READ(DSPCNTR(pipe)) | + DISPPLANE_TRICKLE_FEED_DISABLE); - rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); - if (rgvswctl & MEMCTL_CMD_STS) { - DRM_DEBUG("gpu busy, RCS change rejected\n"); - return false; /* still busy with another command */ + I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe))); + POSTING_READ(DSPSURF(pipe)); } - - rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | - (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM; - intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); - intel_uncore_posting_read16(uncore, MEMSWCTL); - - rgvswctl |= MEMCTL_CMD_STS; - intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); - - return true; } -static void ironlake_enable_drps(struct drm_i915_private *dev_priv) +static void ilk_init_clock_gating(struct drm_i915_private *dev_priv) { - struct intel_uncore *uncore = &dev_priv->uncore; - u32 rgvmodectl; - u8 fmax, fmin, fstart, vstart; - - spin_lock_irq(&mchdev_lock); - - rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); - - /* Enable temp reporting */ - intel_uncore_write16(uncore, PMMISC, I915_READ(PMMISC) | MCPPCE_EN); - intel_uncore_write16(uncore, TSC1, I915_READ(TSC1) | TSE); - - /* 100ms RC evaluation intervals */ - intel_uncore_write(uncore, RCUPEI, 100000); - intel_uncore_write(uncore, RCDNEI, 100000); - - /* Set max/min thresholds to 90ms and 80ms respectively */ - intel_uncore_write(uncore, RCBMAXAVG, 90000); - intel_uncore_write(uncore, RCBMINAVG, 80000); - - intel_uncore_write(uncore, MEMIHYST, 1); - - /* Set up min, max, and cur for interrupt handling */ - fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; - fmin = (rgvmodectl & MEMMODE_FMIN_MASK); - fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> - MEMMODE_FSTART_SHIFT; - - vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) & - PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; - - dev_priv->ips.fmax = fmax; /* IPS callback will increase this */ - dev_priv->ips.fstart = fstart; + u32 dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; - dev_priv->ips.max_delay = fstart; - dev_priv->ips.min_delay = fmin; - dev_priv->ips.cur_delay = fstart; + /* + * Required for FBC + * WaFbcDisableDpfcClockGating:ilk + */ + dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE | + ILK_DPFCUNIT_CLOCK_GATE_DISABLE | + ILK_DPFDUNIT_CLOCK_GATE_ENABLE; - DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", - fmax, fmin, fstart); + I915_WRITE(PCH_3DCGDIS0, + MARIUNIT_CLOCK_GATE_DISABLE | + SVSMUNIT_CLOCK_GATE_DISABLE); + I915_WRITE(PCH_3DCGDIS1, + VFMUNIT_CLOCK_GATE_DISABLE); - intel_uncore_write(uncore, - MEMINTREN, - MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); + /* + * According to the spec the following bits should be set in + * order to enable memory self-refresh + * The bit 22/21 of 0x42004 + * The bit 5 of 0x42020 + * The bit 15 of 0x45000 + */ + I915_WRITE(ILK_DISPLAY_CHICKEN2, + (I915_READ(ILK_DISPLAY_CHICKEN2) | + ILK_DPARB_GATE | ILK_VSDPFD_FULL)); + dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE; + I915_WRITE(DISP_ARB_CTL, + (I915_READ(DISP_ARB_CTL) | + DISP_FBC_WM_DIS)); /* - * Interrupts will be enabled in ironlake_irq_postinstall + * Based on the document from hardware guys the following bits + * should be set unconditionally in order to enable FBC. + * The bit 22 of 0x42000 + * The bit 22 of 0x42004 + * The bit 7,8,9 of 0x42020. */ + if (IS_IRONLAKE_M(dev_priv)) { + /* WaFbcAsynchFlipDisableFbcQueue:ilk */ + I915_WRITE(ILK_DISPLAY_CHICKEN1, + I915_READ(ILK_DISPLAY_CHICKEN1) | + ILK_FBCQ_DIS); + I915_WRITE(ILK_DISPLAY_CHICKEN2, + I915_READ(ILK_DISPLAY_CHICKEN2) | + ILK_DPARB_GATE); + } - intel_uncore_write(uncore, VIDSTART, vstart); - intel_uncore_posting_read(uncore, VIDSTART); + I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); - rgvmodectl |= MEMMODE_SWMODE_EN; - intel_uncore_write(uncore, MEMMODECTL, rgvmodectl); + I915_WRITE(ILK_DISPLAY_CHICKEN2, + I915_READ(ILK_DISPLAY_CHICKEN2) | + ILK_ELPIN_409_SELECT); + I915_WRITE(_3D_CHICKEN2, + _3D_CHICKEN2_WM_READ_PIPELINED << 16 | + _3D_CHICKEN2_WM_READ_PIPELINED); - if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) & - MEMCTL_CMD_STS) == 0, 10)) - DRM_ERROR("stuck trying to change perf mode\n"); - mdelay(1); + /* WaDisableRenderCachePipelinedFlush:ilk */ + I915_WRITE(CACHE_MODE_0, + _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); - ironlake_set_drps(dev_priv, fstart); + /* WaDisable_RenderCache_OperationalFlush:ilk */ + I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - dev_priv->ips.last_count1 = - intel_uncore_read(uncore, DMIEC) + - intel_uncore_read(uncore, DDREC) + - intel_uncore_read(uncore, CSIEC); - dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies); - dev_priv->ips.last_count2 = intel_uncore_read(uncore, GFXEC); - dev_priv->ips.last_time2 = ktime_get_raw_ns(); + g4x_disable_trickle_feed(dev_priv); - spin_unlock_irq(&mchdev_lock); + ibx_init_clock_gating(dev_priv); } -static void ironlake_disable_drps(struct drm_i915_private *i915) +static void cpt_init_clock_gating(struct drm_i915_private *dev_priv) { - struct intel_uncore *uncore = &i915->uncore; - u16 rgvswctl; - - spin_lock_irq(&mchdev_lock); - - rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); - - /* Ack interrupts, disable EFC interrupt */ - intel_uncore_write(uncore, - MEMINTREN, - intel_uncore_read(uncore, MEMINTREN) & - ~MEMINT_EVAL_CHG_EN); - intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); - intel_uncore_write(uncore, - DEIER, - intel_uncore_read(uncore, DEIER) & ~DE_PCU_EVENT); - intel_uncore_write(uncore, DEIIR, DE_PCU_EVENT); - intel_uncore_write(uncore, - DEIMR, - intel_uncore_read(uncore, DEIMR) | DE_PCU_EVENT); - - /* Go back to the starting frequency */ - ironlake_set_drps(i915, i915->ips.fstart); - mdelay(1); - rgvswctl |= MEMCTL_CMD_STS; - intel_uncore_write(uncore, MEMSWCTL, rgvswctl); - mdelay(1); + enum pipe pipe; + u32 val; - spin_unlock_irq(&mchdev_lock); + /* + * On Ibex Peak and Cougar Point, we need to disable clock + * gating for the panel power sequencer or it will fail to + * start up when no ports are active. + */ + I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE | + PCH_DPLUNIT_CLOCK_GATE_DISABLE | + PCH_CPUNIT_CLOCK_GATE_DISABLE); + I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) | + DPLS_EDP_PPS_FIX_DIS); + /* The below fixes the weird display corruption, a few pixels shifted + * downward, on (only) LVDS of some HP laptops with IVY. + */ + for_each_pipe(dev_priv, pipe) { + val = I915_READ(TRANS_CHICKEN2(pipe)); + val |= TRANS_CHICKEN2_TIMING_OVERRIDE; + val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED; + if (dev_priv->vbt.fdi_rx_polarity_inverted) + val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED; + val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK; + val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER; + val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH; + I915_WRITE(TRANS_CHICKEN2(pipe), val); + } + /* WADP0ClockGatingDisable */ + for_each_pipe(dev_priv, pipe) { + I915_WRITE(TRANS_CHICKEN1(pipe), + TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); + } } -/* There's a funny hw issue where the hw returns all 0 when reading from - * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value - * ourselves, instead of doing a rmw cycle (which might result in us clearing - * all limits and the gpu stuck at whatever frequency it is at atm). - */ -static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val) +static void gen6_check_mch_setup(struct drm_i915_private *dev_priv) { - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 limits; - - /* Only set the down limit when we've reached the lowest level to avoid - * getting more interrupts, otherwise leave this clear. This prevents a - * race in the hw when coming out of rc6: There's a tiny window where - * the hw runs at the minimal clock before selecting the desired - * frequency, if the down threshold expires in that window we will not - * receive a down interrupt. */ - if (INTEL_GEN(dev_priv) >= 9) { - limits = (rps->max_freq_softlimit) << 23; - if (val <= rps->min_freq_softlimit) - limits |= (rps->min_freq_softlimit) << 14; - } else { - limits = rps->max_freq_softlimit << 24; - if (val <= rps->min_freq_softlimit) - limits |= rps->min_freq_softlimit << 16; - } + u32 tmp; - return limits; + tmp = I915_READ(MCH_SSKPD); + if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL) + DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n", + tmp); } -static void rps_set_power(struct drm_i915_private *dev_priv, int new_power) +static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) { - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 threshold_up = 0, threshold_down = 0; /* in % */ - u32 ei_up = 0, ei_down = 0; - - lockdep_assert_held(&rps->power.mutex); + u32 dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; - if (new_power == rps->power.mode) - return; + I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); - /* Note the units here are not exactly 1us, but 1280ns. */ - switch (new_power) { - case LOW_POWER: - /* Upclock if more than 95% busy over 16ms */ - ei_up = 16000; - threshold_up = 95; + I915_WRITE(ILK_DISPLAY_CHICKEN2, + I915_READ(ILK_DISPLAY_CHICKEN2) | + ILK_ELPIN_409_SELECT); - /* Downclock if less than 85% busy over 32ms */ - ei_down = 32000; - threshold_down = 85; - break; + /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ + I915_WRITE(_3D_CHICKEN, + _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); - case BETWEEN: - /* Upclock if more than 90% busy over 13ms */ - ei_up = 13000; - threshold_up = 90; + /* WaDisable_RenderCache_OperationalFlush:snb */ + I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - /* Downclock if less than 75% busy over 32ms */ - ei_down = 32000; - threshold_down = 75; - break; + /* + * BSpec recoomends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + I915_WRITE(GEN6_GT_MODE, + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - case HIGH_POWER: - /* Upclock if more than 85% busy over 10ms */ - ei_up = 10000; - threshold_up = 85; + I915_WRITE(CACHE_MODE_0, + _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); - /* Downclock if less than 60% busy over 32ms */ - ei_down = 32000; - threshold_down = 60; - break; - } + I915_WRITE(GEN6_UCGCTL1, + I915_READ(GEN6_UCGCTL1) | + GEN6_BLBUNIT_CLOCK_GATE_DISABLE | + GEN6_CSUNIT_CLOCK_GATE_DISABLE); - /* When byt can survive without system hang with dynamic - * sw freq adjustments, this restriction can be lifted. + /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock + * gating disable must be set. Failure to set it results in + * flickering pixels due to Z write ordering failures after + * some amount of runtime in the Mesa "fire" demo, and Unigine + * Sanctuary and Tropics, and apparently anything else with + * alpha test or pixel discard. + * + * According to the spec, bit 11 (RCCUNIT) must also be set, + * but we didn't debug actual testcases to find it out. + * + * WaDisableRCCUnitClockGating:snb + * WaDisableRCPBUnitClockGating:snb */ - if (IS_VALLEYVIEW(dev_priv)) - goto skip_hw_write; - - I915_WRITE(GEN6_RP_UP_EI, - GT_INTERVAL_FROM_US(dev_priv, ei_up)); - I915_WRITE(GEN6_RP_UP_THRESHOLD, - GT_INTERVAL_FROM_US(dev_priv, - ei_up * threshold_up / 100)); - - I915_WRITE(GEN6_RP_DOWN_EI, - GT_INTERVAL_FROM_US(dev_priv, ei_down)); - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, - GT_INTERVAL_FROM_US(dev_priv, - ei_down * threshold_down / 100)); - - I915_WRITE(GEN6_RP_CONTROL, - (INTEL_GEN(dev_priv) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_AVG); - -skip_hw_write: - rps->power.mode = new_power; - rps->power.up_threshold = threshold_up; - rps->power.down_threshold = threshold_down; -} - -static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - int new_power; - - new_power = rps->power.mode; - switch (rps->power.mode) { - case LOW_POWER: - if (val > rps->efficient_freq + 1 && - val > rps->cur_freq) - new_power = BETWEEN; - break; + I915_WRITE(GEN6_UCGCTL2, + GEN6_RCPBUNIT_CLOCK_GATE_DISABLE | + GEN6_RCCUNIT_CLOCK_GATE_DISABLE); - case BETWEEN: - if (val <= rps->efficient_freq && - val < rps->cur_freq) - new_power = LOW_POWER; - else if (val >= rps->rp0_freq && - val > rps->cur_freq) - new_power = HIGH_POWER; - break; + /* WaStripsFansDisableFastClipPerformanceFix:snb */ + I915_WRITE(_3D_CHICKEN3, + _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL)); - case HIGH_POWER: - if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 && - val < rps->cur_freq) - new_power = BETWEEN; - break; - } - /* Max/min bins are special */ - if (val <= rps->min_freq_softlimit) - new_power = LOW_POWER; - if (val >= rps->max_freq_softlimit) - new_power = HIGH_POWER; + /* + * Bspec says: + * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and + * 3DSTATE_SF number of SF output attributes is more than 16." + */ + I915_WRITE(_3D_CHICKEN3, + _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH)); - mutex_lock(&rps->power.mutex); - if (rps->power.interactive) - new_power = HIGH_POWER; - rps_set_power(dev_priv, new_power); - mutex_unlock(&rps->power.mutex); -} + /* + * According to the spec the following bits should be + * set in order to enable memory self-refresh and fbc: + * The bit21 and bit22 of 0x42000 + * The bit21 and bit22 of 0x42004 + * The bit5 and bit7 of 0x42020 + * The bit14 of 0x70180 + * The bit14 of 0x71180 + * + * WaFbcAsynchFlipDisableFbcQueue:snb + */ + I915_WRITE(ILK_DISPLAY_CHICKEN1, + I915_READ(ILK_DISPLAY_CHICKEN1) | + ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS); + I915_WRITE(ILK_DISPLAY_CHICKEN2, + I915_READ(ILK_DISPLAY_CHICKEN2) | + ILK_DPARB_GATE | ILK_VSDPFD_FULL); + I915_WRITE(ILK_DSPCLK_GATE_D, + I915_READ(ILK_DSPCLK_GATE_D) | + ILK_DPARBUNIT_CLOCK_GATE_ENABLE | + ILK_DPFDUNIT_CLOCK_GATE_ENABLE); -void intel_rps_mark_interactive(struct drm_i915_private *i915, bool interactive) -{ - struct intel_rps *rps = &i915->gt_pm.rps; + g4x_disable_trickle_feed(dev_priv); - if (INTEL_GEN(i915) < 6) - return; + cpt_init_clock_gating(dev_priv); - mutex_lock(&rps->power.mutex); - if (interactive) { - if (!rps->power.interactive++ && READ_ONCE(i915->gt.awake)) - rps_set_power(i915, HIGH_POWER); - } else { - GEM_BUG_ON(!rps->power.interactive); - rps->power.interactive--; - } - mutex_unlock(&rps->power.mutex); + gen6_check_mch_setup(dev_priv); } -static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) +static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) { - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 mask = 0; - - /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */ - if (val > rps->min_freq_softlimit) - mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; - if (val < rps->max_freq_softlimit) - mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; + u32 reg = I915_READ(GEN7_FF_THREAD_MODE); - mask &= dev_priv->pm_rps_events; + /* + * WaVSThreadDispatchOverride:ivb,vlv + * + * This actually overrides the dispatch + * mode for all thread types. + */ + reg &= ~GEN7_FF_SCHED_MASK; + reg |= GEN7_FF_TS_SCHED_HW; + reg |= GEN7_FF_VS_SCHED_HW; + reg |= GEN7_FF_DS_SCHED_HW; - return gen6_sanitize_rps_pm_mask(dev_priv, ~mask); + I915_WRITE(GEN7_FF_THREAD_MODE, reg); } -/* gen6_set_rps is called to update the frequency request, but should also be - * called when the range (min_delay and max_delay) is modified so that we can - * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ -static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) +static void lpt_init_clock_gating(struct drm_i915_private *dev_priv) { - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - /* min/max delay may still have been modified so be sure to - * write the limits value. - */ - if (val != rps->cur_freq) { - gen6_set_rps_thresholds(dev_priv, val); - - if (INTEL_GEN(dev_priv) >= 9) - I915_WRITE(GEN6_RPNSWREQ, - GEN9_FREQUENCY(val)); - else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - I915_WRITE(GEN6_RPNSWREQ, - HSW_FREQUENCY(val)); - else - I915_WRITE(GEN6_RPNSWREQ, - GEN6_FREQUENCY(val) | - GEN6_OFFSET(0) | - GEN6_AGGRESSIVE_TURBO); - } - - /* Make sure we continue to get interrupts - * until we hit the minimum or maximum frequencies. - */ - I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val)); - I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); - - rps->cur_freq = val; - trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); - - return 0; -} - -static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) -{ - int err; - - if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1), - "Odd GPU freq value\n")) - val &= ~1; - - I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); - - if (val != dev_priv->gt_pm.rps.cur_freq) { - vlv_punit_get(dev_priv); - err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); - vlv_punit_put(dev_priv); - if (err) - return err; - - gen6_set_rps_thresholds(dev_priv, val); - } - - dev_priv->gt_pm.rps.cur_freq = val; - trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); - - return 0; -} - -/* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down - * - * * If Gfx is Idle, then - * 1. Forcewake Media well. - * 2. Request idle freq. - * 3. Release Forcewake of Media well. -*/ -static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 val = rps->idle_freq; - int err; - - if (rps->cur_freq <= val) - return; - - /* The punit delays the write of the frequency and voltage until it - * determines the GPU is awake. During normal usage we don't want to - * waste power changing the frequency if the GPU is sleeping (rc6). - * However, the GPU and driver is now idle and we do not want to delay - * switching to minimum voltage (reducing power whilst idle) as we do - * not expect to be woken in the near future and so must flush the - * change by waking the device. - * - * We choose to take the media powerwell (either would do to trick the - * punit into committing the voltage change) as that takes a lot less - * power than the render powerwell. - */ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_MEDIA); - err = valleyview_set_rps(dev_priv, val); - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_MEDIA); - - if (err) - DRM_ERROR("Failed to set RPS for idle\n"); -} - -void gen6_rps_busy(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - mutex_lock(&rps->lock); - if (rps->enabled) { - u8 freq; - - if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED) - gen6_rps_reset_ei(dev_priv); - I915_WRITE(GEN6_PMINTRMSK, - gen6_rps_pm_mask(dev_priv, rps->cur_freq)); - - gen6_enable_rps_interrupts(dev_priv); - - /* Use the user's desired frequency as a guide, but for better - * performance, jump directly to RPe as our starting frequency. - */ - freq = max(rps->cur_freq, - rps->efficient_freq); - - if (intel_set_rps(dev_priv, - clamp(freq, - rps->min_freq_softlimit, - rps->max_freq_softlimit))) - DRM_DEBUG_DRIVER("Failed to set idle frequency\n"); - } - mutex_unlock(&rps->lock); -} - -void gen6_rps_idle(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - /* Flush our bottom-half so that it does not race with us - * setting the idle frequency and so that it is bounded by - * our rpm wakeref. And then disable the interrupts to stop any - * futher RPS reclocking whilst we are asleep. - */ - gen6_disable_rps_interrupts(dev_priv); - - mutex_lock(&rps->lock); - if (rps->enabled) { - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - vlv_set_rps_idle(dev_priv); - else - gen6_set_rps(dev_priv, rps->idle_freq); - rps->last_adj = 0; - I915_WRITE(GEN6_PMINTRMSK, - gen6_sanitize_rps_pm_mask(dev_priv, ~0)); - } - mutex_unlock(&rps->lock); -} - -void gen6_rps_boost(struct i915_request *rq) -{ - struct intel_rps *rps = &rq->i915->gt_pm.rps; - unsigned long flags; - bool boost; - - /* This is intentionally racy! We peek at the state here, then - * validate inside the RPS worker. - */ - if (!rps->enabled) - return; - - if (i915_request_signaled(rq)) - return; - - /* Serializes with i915_request_retire() */ - boost = false; - spin_lock_irqsave(&rq->lock, flags); - if (!i915_request_has_waitboost(rq) && - !dma_fence_is_signaled_locked(&rq->fence)) { - boost = !atomic_fetch_inc(&rps->num_waiters); - rq->flags |= I915_REQUEST_WAITBOOST; - } - spin_unlock_irqrestore(&rq->lock, flags); - if (!boost) - return; - - if (READ_ONCE(rps->cur_freq) < rps->boost_freq) - schedule_work(&rps->work); - - atomic_inc(&rps->boosts); -} - -int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - int err; - - lockdep_assert_held(&rps->lock); - GEM_BUG_ON(val > rps->max_freq); - GEM_BUG_ON(val < rps->min_freq); - - if (!rps->enabled) { - rps->cur_freq = val; - return 0; - } - - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - err = valleyview_set_rps(dev_priv, val); - else - err = gen6_set_rps(dev_priv, val); - - return err; -} - -static void gen9_disable_rps(struct drm_i915_private *dev_priv) -{ - I915_WRITE(GEN6_RP_CONTROL, 0); -} - -static void gen6_disable_rps(struct drm_i915_private *dev_priv) -{ - I915_WRITE(GEN6_RPNSWREQ, 1 << 31); - I915_WRITE(GEN6_RP_CONTROL, 0); -} - -static void cherryview_disable_rps(struct drm_i915_private *dev_priv) -{ - I915_WRITE(GEN6_RP_CONTROL, 0); -} - -static void valleyview_disable_rps(struct drm_i915_private *dev_priv) -{ - I915_WRITE(GEN6_RP_CONTROL, 0); -} - -static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - /* All of these values are in units of 50MHz */ - - /* static values from HW: RP0 > RP1 > RPn (min_freq) */ - if (IS_GEN9_LP(dev_priv)) { - u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP); - rps->rp0_freq = (rp_state_cap >> 16) & 0xff; - rps->rp1_freq = (rp_state_cap >> 8) & 0xff; - rps->min_freq = (rp_state_cap >> 0) & 0xff; - } else { - u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); - rps->rp0_freq = (rp_state_cap >> 0) & 0xff; - rps->rp1_freq = (rp_state_cap >> 8) & 0xff; - rps->min_freq = (rp_state_cap >> 16) & 0xff; - } - /* hw_max = RP0 until we check for overclocking */ - rps->max_freq = rps->rp0_freq; - - rps->efficient_freq = rps->rp1_freq; - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || - IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { - u32 ddcc_status = 0; - - if (sandybridge_pcode_read(dev_priv, - HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, - &ddcc_status, NULL) == 0) - rps->efficient_freq = - clamp_t(u8, - ((ddcc_status >> 8) & 0xff), - rps->min_freq, - rps->max_freq); - } - - if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { - /* Store the frequency values in 16.66 MHZ units, which is - * the natural hardware unit for SKL - */ - rps->rp0_freq *= GEN9_FREQ_SCALER; - rps->rp1_freq *= GEN9_FREQ_SCALER; - rps->min_freq *= GEN9_FREQ_SCALER; - rps->max_freq *= GEN9_FREQ_SCALER; - rps->efficient_freq *= GEN9_FREQ_SCALER; - } -} - -static void reset_rps(struct drm_i915_private *dev_priv, - int (*set)(struct drm_i915_private *, u8)) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u8 freq = rps->cur_freq; - - /* force a reset */ - rps->power.mode = -1; - rps->cur_freq = -1; - - if (set(dev_priv, freq)) - DRM_ERROR("Failed to reset RPS to initial values\n"); -} - -/* See the Gen9_GT_PM_Programming_Guide doc for the below */ -static void gen9_enable_rps(struct drm_i915_private *dev_priv) -{ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* Program defaults and thresholds for RPS */ - if (IS_GEN(dev_priv, 9)) - I915_WRITE(GEN6_RC_VIDEO_FREQ, - GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq)); - - /* 1 second timeout*/ - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, - GT_INTERVAL_FROM_US(dev_priv, 1000000)); - - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa); - - /* Leaning on the below call to gen6_set_rps to program/setup the - * Up/Down EI & threshold registers, as well as the RP_CONTROL, - * RP_INTERRUPT_LIMITS & RPNSWREQ registers */ - reset_rps(dev_priv, gen6_set_rps); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static void gen8_enable_rps(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* 1 Program defaults and thresholds for RPS*/ - I915_WRITE(GEN6_RPNSWREQ, - HSW_FREQUENCY(rps->rp1_freq)); - I915_WRITE(GEN6_RC_VIDEO_FREQ, - HSW_FREQUENCY(rps->rp1_freq)); - /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */ - - /* Docs recommend 900MHz, and 300 MHz respectively */ - I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, - rps->max_freq_softlimit << 24 | - rps->min_freq_softlimit << 16); - - I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */ - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/ - I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */ - I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */ - - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - - /* 2: Enable RPS */ - I915_WRITE(GEN6_RP_CONTROL, - GEN6_RP_MEDIA_TURBO | - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_AVG); - - reset_rps(dev_priv, gen6_set_rps); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static void gen6_enable_rps(struct drm_i915_private *dev_priv) -{ - /* Here begins a magic sequence of register writes to enable - * auto-downclocking. - * - * Perhaps there might be some value in exposing these to - * userspace... - */ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* Power down if completely idle for over 50ms */ - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000); - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - - reset_rps(dev_priv, gen6_set_rps); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rp0; - - val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE); - - switch (RUNTIME_INFO(dev_priv)->sseu.eu_total) { - case 8: - /* (2 * 4) config */ - rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT); - break; - case 12: - /* (2 * 6) config */ - rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT); - break; - case 16: - /* (2 * 8) config */ - default: - /* Setting (2 * 8) Min RP0 for any other combination */ - rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT); - break; - } - - rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK); - - return rp0; -} - -static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rpe; - - val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG); - rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; - - return rpe; -} - -static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rp1; - - val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE); - rp1 = (val & FB_GFX_FREQ_FUSE_MASK); - - return rp1; -} - -static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rpn; - - val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE); - rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) & - FB_GFX_FREQ_FUSE_MASK); - - return rpn; -} - -static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rp1; - - val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); - - rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; - - return rp1; -} - -static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rp0; - - val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); - - rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; - /* Clamp to max */ - rp0 = min_t(u32, rp0, 0xea); - - return rp0; -} - -static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rpe; - - val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO); - rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; - val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI); - rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; - - return rpe; -} - -static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv) -{ - u32 val; - - val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff; - /* - * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value - * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on - * a BYT-M B0 the above register contains 0xbf. Moreover when setting - * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0 - * to make sure it matches what Punit accepts. - */ - return max_t(u32, val, 0xc0); -} - -static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv) -{ - dev_priv->gt_pm.rps.gpll_ref_freq = - vlv_get_cck_clock(dev_priv, "GPLL ref", - CCK_GPLL_CLOCK_CONTROL, - dev_priv->czclk_freq); - - DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", - dev_priv->gt_pm.rps.gpll_ref_freq); -} - -static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 val; - - vlv_iosf_sb_get(dev_priv, - BIT(VLV_IOSF_SB_PUNIT) | - BIT(VLV_IOSF_SB_NC) | - BIT(VLV_IOSF_SB_CCK)); - - vlv_init_gpll_ref_freq(dev_priv); - - val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); - switch ((val >> 6) & 3) { - case 0: - case 1: - dev_priv->mem_freq = 800; - break; - case 2: - dev_priv->mem_freq = 1066; - break; - case 3: - dev_priv->mem_freq = 1333; - break; - } - DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); - - rps->max_freq = valleyview_rps_max_freq(dev_priv); - rps->rp0_freq = rps->max_freq; - DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, rps->max_freq), - rps->max_freq); - - rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv); - DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, rps->efficient_freq), - rps->efficient_freq); - - rps->rp1_freq = valleyview_rps_guar_freq(dev_priv); - DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, rps->rp1_freq), - rps->rp1_freq); - - rps->min_freq = valleyview_rps_min_freq(dev_priv); - DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, rps->min_freq), - rps->min_freq); - - vlv_iosf_sb_put(dev_priv, - BIT(VLV_IOSF_SB_PUNIT) | - BIT(VLV_IOSF_SB_NC) | - BIT(VLV_IOSF_SB_CCK)); -} - -static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 val; - - vlv_iosf_sb_get(dev_priv, - BIT(VLV_IOSF_SB_PUNIT) | - BIT(VLV_IOSF_SB_NC) | - BIT(VLV_IOSF_SB_CCK)); - - vlv_init_gpll_ref_freq(dev_priv); - - val = vlv_cck_read(dev_priv, CCK_FUSE_REG); - - switch ((val >> 2) & 0x7) { - case 3: - dev_priv->mem_freq = 2000; - break; - default: - dev_priv->mem_freq = 1600; - break; - } - DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); - - rps->max_freq = cherryview_rps_max_freq(dev_priv); - rps->rp0_freq = rps->max_freq; - DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, rps->max_freq), - rps->max_freq); - - rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv); - DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, rps->efficient_freq), - rps->efficient_freq); - - rps->rp1_freq = cherryview_rps_guar_freq(dev_priv); - DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, rps->rp1_freq), - rps->rp1_freq); - - rps->min_freq = cherryview_rps_min_freq(dev_priv); - DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, rps->min_freq), - rps->min_freq); - - vlv_iosf_sb_put(dev_priv, - BIT(VLV_IOSF_SB_PUNIT) | - BIT(VLV_IOSF_SB_NC) | - BIT(VLV_IOSF_SB_CCK)); - - WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq | - rps->min_freq) & 1, - "Odd GPU freq values\n"); -} - -static void cherryview_enable_rps(struct drm_i915_private *dev_priv) -{ - u32 val; - - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* 1: Program defaults and thresholds for RPS*/ - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); - I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); - I915_WRITE(GEN6_RP_UP_EI, 66000); - I915_WRITE(GEN6_RP_DOWN_EI, 350000); - - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - - /* 2: Enable RPS */ - I915_WRITE(GEN6_RP_CONTROL, - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_AVG); - - /* Setting Fixed Bias */ - vlv_punit_get(dev_priv); - - val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50; - vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); - - val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); - - vlv_punit_put(dev_priv); - - /* RPS code assumes GPLL is used */ - WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); - - DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); - DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); - - reset_rps(dev_priv, valleyview_set_rps); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static void valleyview_enable_rps(struct drm_i915_private *dev_priv) -{ - u32 val; - - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); - I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); - I915_WRITE(GEN6_RP_UP_EI, 66000); - I915_WRITE(GEN6_RP_DOWN_EI, 350000); - - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - - I915_WRITE(GEN6_RP_CONTROL, - GEN6_RP_MEDIA_TURBO | - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_CONT); - - vlv_punit_get(dev_priv); - - /* Setting Fixed Bias */ - val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875; - vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); - - val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); - - vlv_punit_put(dev_priv); - - /* RPS code assumes GPLL is used */ - WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); - - DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); - DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); - - reset_rps(dev_priv, valleyview_set_rps); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static unsigned long intel_pxfreq(u32 vidfreq) -{ - unsigned long freq; - int div = (vidfreq & 0x3f0000) >> 16; - int post = (vidfreq & 0x3000) >> 12; - int pre = (vidfreq & 0x7); - - if (!pre) - return 0; - - freq = ((div * 133333) / ((1<ips.last_time1; - - /* Prevent division-by-zero if we are asking too fast. - * Also, we don't get interesting results if we are polling - * faster than once in 10ms, so just return the saved value - * in such cases. - */ - if (diff1 <= 10) - return dev_priv->ips.chipset_power; - - count1 = I915_READ(DMIEC); - count2 = I915_READ(DDREC); - count3 = I915_READ(CSIEC); - - total_count = count1 + count2 + count3; - - /* FIXME: handle per-counter overflow */ - if (total_count < dev_priv->ips.last_count1) { - diff = ~0UL - dev_priv->ips.last_count1; - diff += total_count; - } else { - diff = total_count - dev_priv->ips.last_count1; - } - - for (i = 0; i < ARRAY_SIZE(cparams); i++) { - if (cparams[i].i == dev_priv->ips.c_m && - cparams[i].t == dev_priv->ips.r_t) { - m = cparams[i].m; - c = cparams[i].c; - break; - } - } - - diff = div_u64(diff, diff1); - ret = ((m * diff) + c); - ret = div_u64(ret, 10); - - dev_priv->ips.last_count1 = total_count; - dev_priv->ips.last_time1 = now; - - dev_priv->ips.chipset_power = ret; - - return ret; -} - -unsigned long i915_chipset_val(struct drm_i915_private *dev_priv) -{ - intel_wakeref_t wakeref; - unsigned long val = 0; - - if (!IS_GEN(dev_priv, 5)) - return 0; - - with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) { - spin_lock_irq(&mchdev_lock); - val = __i915_chipset_val(dev_priv); - spin_unlock_irq(&mchdev_lock); - } - - return val; -} - -unsigned long i915_mch_val(struct drm_i915_private *i915) -{ - unsigned long m, x, b; - u32 tsfs; - - tsfs = intel_uncore_read(&i915->uncore, TSFS); - - m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT); - x = intel_uncore_read8(&i915->uncore, TR1); - - b = tsfs & TSFS_INTR_MASK; - - return ((m * x) / 127) - b; -} - -static int _pxvid_to_vd(u8 pxvid) -{ - if (pxvid == 0) - return 0; - - if (pxvid >= 8 && pxvid < 31) - pxvid = 31; - - return (pxvid + 2) * 125; -} - -static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid) -{ - const int vd = _pxvid_to_vd(pxvid); - const int vm = vd - 1125; - - if (INTEL_INFO(dev_priv)->is_mobile) - return vm > 0 ? vm : 0; - - return vd; -} - -static void __i915_update_gfx_val(struct drm_i915_private *dev_priv) -{ - u64 now, diff, diffms; - u32 count; - - lockdep_assert_held(&mchdev_lock); - - now = ktime_get_raw_ns(); - diffms = now - dev_priv->ips.last_time2; - do_div(diffms, NSEC_PER_MSEC); - - /* Don't divide by 0 */ - if (!diffms) - return; - - count = I915_READ(GFXEC); - - if (count < dev_priv->ips.last_count2) { - diff = ~0UL - dev_priv->ips.last_count2; - diff += count; - } else { - diff = count - dev_priv->ips.last_count2; - } - - dev_priv->ips.last_count2 = count; - dev_priv->ips.last_time2 = now; - - /* More magic constants... */ - diff = diff * 1181; - diff = div_u64(diff, diffms * 10); - dev_priv->ips.gfx_power = diff; -} - -void i915_update_gfx_val(struct drm_i915_private *dev_priv) -{ - intel_wakeref_t wakeref; - - if (!IS_GEN(dev_priv, 5)) - return; - - with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) { - spin_lock_irq(&mchdev_lock); - __i915_update_gfx_val(dev_priv); - spin_unlock_irq(&mchdev_lock); - } -} - -static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) -{ - unsigned long t, corr, state1, corr2, state2; - u32 pxvid, ext_v; - - lockdep_assert_held(&mchdev_lock); - - pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq)); - pxvid = (pxvid >> 24) & 0x7f; - ext_v = pvid_to_extvid(dev_priv, pxvid); - - state1 = ext_v; - - t = i915_mch_val(dev_priv); - - /* Revel in the empirically derived constants */ - - /* Correction factor in 1/100000 units */ - if (t > 80) - corr = ((t * 2349) + 135940); - else if (t >= 50) - corr = ((t * 964) + 29317); - else /* < 50 */ - corr = ((t * 301) + 1004); - - corr = corr * ((150142 * state1) / 10000 - 78642); - corr /= 100000; - corr2 = (corr * dev_priv->ips.corr); - - state2 = (corr2 * state1) / 10000; - state2 /= 100; /* convert to mW */ - - __i915_update_gfx_val(dev_priv); - - return dev_priv->ips.gfx_power + state2; -} - -unsigned long i915_gfx_val(struct drm_i915_private *dev_priv) -{ - intel_wakeref_t wakeref; - unsigned long val = 0; - - if (!IS_GEN(dev_priv, 5)) - return 0; - - with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) { - spin_lock_irq(&mchdev_lock); - val = __i915_gfx_val(dev_priv); - spin_unlock_irq(&mchdev_lock); - } - - return val; -} - -static struct drm_i915_private __rcu *i915_mch_dev; - -static struct drm_i915_private *mchdev_get(void) -{ - struct drm_i915_private *i915; - - rcu_read_lock(); - i915 = rcu_dereference(i915_mch_dev); - if (!kref_get_unless_zero(&i915->drm.ref)) - i915 = NULL; - rcu_read_unlock(); - - return i915; -} - -/** - * i915_read_mch_val - return value for IPS use - * - * Calculate and return a value for the IPS driver to use when deciding whether - * we have thermal and power headroom to increase CPU or GPU power budget. - */ -unsigned long i915_read_mch_val(void) -{ - struct drm_i915_private *i915; - unsigned long chipset_val = 0; - unsigned long graphics_val = 0; - intel_wakeref_t wakeref; - - i915 = mchdev_get(); - if (!i915) - return 0; - - with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - spin_lock_irq(&mchdev_lock); - chipset_val = __i915_chipset_val(i915); - graphics_val = __i915_gfx_val(i915); - spin_unlock_irq(&mchdev_lock); - } - - drm_dev_put(&i915->drm); - return chipset_val + graphics_val; -} -EXPORT_SYMBOL_GPL(i915_read_mch_val); - -/** - * i915_gpu_raise - raise GPU frequency limit - * - * Raise the limit; IPS indicates we have thermal headroom. - */ -bool i915_gpu_raise(void) -{ - struct drm_i915_private *i915; - - i915 = mchdev_get(); - if (!i915) - return false; - - spin_lock_irq(&mchdev_lock); - if (i915->ips.max_delay > i915->ips.fmax) - i915->ips.max_delay--; - spin_unlock_irq(&mchdev_lock); - - drm_dev_put(&i915->drm); - return true; -} -EXPORT_SYMBOL_GPL(i915_gpu_raise); - -/** - * i915_gpu_lower - lower GPU frequency limit - * - * IPS indicates we're close to a thermal limit, so throttle back the GPU - * frequency maximum. - */ -bool i915_gpu_lower(void) -{ - struct drm_i915_private *i915; - - i915 = mchdev_get(); - if (!i915) - return false; - - spin_lock_irq(&mchdev_lock); - if (i915->ips.max_delay < i915->ips.min_delay) - i915->ips.max_delay++; - spin_unlock_irq(&mchdev_lock); - - drm_dev_put(&i915->drm); - return true; -} -EXPORT_SYMBOL_GPL(i915_gpu_lower); - -/** - * i915_gpu_busy - indicate GPU business to IPS - * - * Tell the IPS driver whether or not the GPU is busy. - */ -bool i915_gpu_busy(void) -{ - struct drm_i915_private *i915; - bool ret; - - i915 = mchdev_get(); - if (!i915) - return false; - - ret = i915->gt.awake; - - drm_dev_put(&i915->drm); - return ret; -} -EXPORT_SYMBOL_GPL(i915_gpu_busy); - -/** - * i915_gpu_turbo_disable - disable graphics turbo - * - * Disable graphics turbo by resetting the max frequency and setting the - * current frequency to the default. - */ -bool i915_gpu_turbo_disable(void) -{ - struct drm_i915_private *i915; - bool ret; - - i915 = mchdev_get(); - if (!i915) - return false; - - spin_lock_irq(&mchdev_lock); - i915->ips.max_delay = i915->ips.fstart; - ret = ironlake_set_drps(i915, i915->ips.fstart); - spin_unlock_irq(&mchdev_lock); - - drm_dev_put(&i915->drm); - return ret; -} -EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); - -/** - * Tells the intel_ips driver that the i915 driver is now loaded, if - * IPS got loaded first. - * - * This awkward dance is so that neither module has to depend on the - * other in order for IPS to do the appropriate communication of - * GPU turbo limits to i915. - */ -static void -ips_ping_for_i915_load(void) -{ - void (*link)(void); - - link = symbol_get(ips_link_to_i915_driver); - if (link) { - link(); - symbol_put(ips_link_to_i915_driver); - } -} - -void intel_gpu_ips_init(struct drm_i915_private *dev_priv) -{ - /* We only register the i915 ips part with intel-ips once everything is - * set up, to avoid intel-ips sneaking in and reading bogus values. */ - rcu_assign_pointer(i915_mch_dev, dev_priv); - - ips_ping_for_i915_load(); -} - -void intel_gpu_ips_teardown(void) -{ - rcu_assign_pointer(i915_mch_dev, NULL); -} - -static void intel_init_emon(struct drm_i915_private *dev_priv) -{ - u32 lcfuse; - u8 pxw[16]; - int i; - - /* Disable to program */ - I915_WRITE(ECR, 0); - POSTING_READ(ECR); - - /* Program energy weights for various events */ - I915_WRITE(SDEW, 0x15040d00); - I915_WRITE(CSIEW0, 0x007f0000); - I915_WRITE(CSIEW1, 0x1e220004); - I915_WRITE(CSIEW2, 0x04000004); - - for (i = 0; i < 5; i++) - I915_WRITE(PEW(i), 0); - for (i = 0; i < 3; i++) - I915_WRITE(DEW(i), 0); - - /* Program P-state weights to account for frequency power adjustment */ - for (i = 0; i < 16; i++) { - u32 pxvidfreq = I915_READ(PXVFREQ(i)); - unsigned long freq = intel_pxfreq(pxvidfreq); - unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >> - PXVFREQ_PX_SHIFT; - unsigned long val; - - val = vid * vid; - val *= (freq / 1000); - val *= 255; - val /= (127*127*900); - if (val > 0xff) - DRM_ERROR("bad pxval: %ld\n", val); - pxw[i] = val; - } - /* Render standby states get 0 weight */ - pxw[14] = 0; - pxw[15] = 0; - - for (i = 0; i < 4; i++) { - u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) | - (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]); - I915_WRITE(PXW(i), val); - } - - /* Adjust magic regs to magic values (more experimental results) */ - I915_WRITE(OGW0, 0); - I915_WRITE(OGW1, 0); - I915_WRITE(EG0, 0x00007f00); - I915_WRITE(EG1, 0x0000000e); - I915_WRITE(EG2, 0x000e0000); - I915_WRITE(EG3, 0x68000300); - I915_WRITE(EG4, 0x42000000); - I915_WRITE(EG5, 0x00140031); - I915_WRITE(EG6, 0); - I915_WRITE(EG7, 0); - - for (i = 0; i < 8; i++) - I915_WRITE(PXWL(i), 0); - - /* Enable PMON + select events */ - I915_WRITE(ECR, 0x80000019); - - lcfuse = I915_READ(LCFUSE02); - - dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK); -} - -void intel_init_gt_powersave(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - /* Powersaving is controlled by the host when inside a VM */ - if (intel_vgpu_active(dev_priv)) - mkwrite_device_info(dev_priv)->has_rps = false; - - /* Initialize RPS limits (for userspace) */ - if (IS_CHERRYVIEW(dev_priv)) - cherryview_init_gt_powersave(dev_priv); - else if (IS_VALLEYVIEW(dev_priv)) - valleyview_init_gt_powersave(dev_priv); - else if (INTEL_GEN(dev_priv) >= 6) - gen6_init_rps_frequencies(dev_priv); - - /* Derive initial user preferences/limits from the hardware limits */ - rps->max_freq_softlimit = rps->max_freq; - rps->min_freq_softlimit = rps->min_freq; - - /* After setting max-softlimit, find the overclock max freq */ - if (IS_GEN(dev_priv, 6) || - IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) { - u32 params = 0; - - sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, - ¶ms, NULL); - if (params & BIT(31)) { /* OC supported */ - DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n", - (rps->max_freq & 0xff) * 50, - (params & 0xff) * 50); - rps->max_freq = params & 0xff; - } - } - - /* Finally allow us to boost to max by default */ - rps->boost_freq = rps->max_freq; - rps->idle_freq = rps->min_freq; - rps->cur_freq = rps->idle_freq; -} - -void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) -{ - dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */ - intel_disable_gt_powersave(dev_priv); - - if (INTEL_GEN(dev_priv) >= 11) - gen11_reset_rps_interrupts(dev_priv); - else if (INTEL_GEN(dev_priv) >= 6) - gen6_reset_rps_interrupts(dev_priv); -} - -static void intel_disable_rps(struct drm_i915_private *dev_priv) -{ - lockdep_assert_held(&dev_priv->gt_pm.rps.lock); - - if (!dev_priv->gt_pm.rps.enabled) - return; - - if (INTEL_GEN(dev_priv) >= 9) - gen9_disable_rps(dev_priv); - else if (IS_CHERRYVIEW(dev_priv)) - cherryview_disable_rps(dev_priv); - else if (IS_VALLEYVIEW(dev_priv)) - valleyview_disable_rps(dev_priv); - else if (INTEL_GEN(dev_priv) >= 6) - gen6_disable_rps(dev_priv); - else if (IS_IRONLAKE_M(dev_priv)) - ironlake_disable_drps(dev_priv); - - dev_priv->gt_pm.rps.enabled = false; -} - -void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) -{ - mutex_lock(&dev_priv->gt_pm.rps.lock); - - intel_disable_rps(dev_priv); - if (HAS_LLC(dev_priv)) - intel_llc_disable(&dev_priv->gt.llc); - - mutex_unlock(&dev_priv->gt_pm.rps.lock); -} - -static void intel_enable_rps(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - lockdep_assert_held(&rps->lock); - - if (rps->enabled) - return; - - if (IS_CHERRYVIEW(dev_priv)) { - cherryview_enable_rps(dev_priv); - } else if (IS_VALLEYVIEW(dev_priv)) { - valleyview_enable_rps(dev_priv); - } else if (INTEL_GEN(dev_priv) >= 9) { - gen9_enable_rps(dev_priv); - } else if (IS_BROADWELL(dev_priv)) { - gen8_enable_rps(dev_priv); - } else if (INTEL_GEN(dev_priv) >= 6) { - gen6_enable_rps(dev_priv); - } else if (IS_IRONLAKE_M(dev_priv)) { - ironlake_enable_drps(dev_priv); - intel_init_emon(dev_priv); - } - - WARN_ON(rps->max_freq < rps->min_freq); - WARN_ON(rps->idle_freq > rps->max_freq); - - WARN_ON(rps->efficient_freq < rps->min_freq); - WARN_ON(rps->efficient_freq > rps->max_freq); - - rps->enabled = true; -} - -void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) -{ - /* Powersaving is controlled by the host when inside a VM */ - if (intel_vgpu_active(dev_priv)) - return; - - mutex_lock(&dev_priv->gt_pm.rps.lock); - - if (HAS_RPS(dev_priv)) - intel_enable_rps(dev_priv); - - intel_llc_enable(&dev_priv->gt.llc); - - mutex_unlock(&dev_priv->gt_pm.rps.lock); -} - -static void ibx_init_clock_gating(struct drm_i915_private *dev_priv) -{ - /* - * On Ibex Peak and Cougar Point, we need to disable clock - * gating for the panel power sequencer or it will fail to - * start up when no ports are active. - */ - I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE); -} - -static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv) -{ - enum pipe pipe; - - for_each_pipe(dev_priv, pipe) { - I915_WRITE(DSPCNTR(pipe), - I915_READ(DSPCNTR(pipe)) | - DISPPLANE_TRICKLE_FEED_DISABLE); - - I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe))); - POSTING_READ(DSPSURF(pipe)); - } -} - -static void ilk_init_clock_gating(struct drm_i915_private *dev_priv) -{ - u32 dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; - - /* - * Required for FBC - * WaFbcDisableDpfcClockGating:ilk - */ - dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE | - ILK_DPFCUNIT_CLOCK_GATE_DISABLE | - ILK_DPFDUNIT_CLOCK_GATE_ENABLE; - - I915_WRITE(PCH_3DCGDIS0, - MARIUNIT_CLOCK_GATE_DISABLE | - SVSMUNIT_CLOCK_GATE_DISABLE); - I915_WRITE(PCH_3DCGDIS1, - VFMUNIT_CLOCK_GATE_DISABLE); - - /* - * According to the spec the following bits should be set in - * order to enable memory self-refresh - * The bit 22/21 of 0x42004 - * The bit 5 of 0x42020 - * The bit 15 of 0x45000 - */ - I915_WRITE(ILK_DISPLAY_CHICKEN2, - (I915_READ(ILK_DISPLAY_CHICKEN2) | - ILK_DPARB_GATE | ILK_VSDPFD_FULL)); - dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE; - I915_WRITE(DISP_ARB_CTL, - (I915_READ(DISP_ARB_CTL) | - DISP_FBC_WM_DIS)); - - /* - * Based on the document from hardware guys the following bits - * should be set unconditionally in order to enable FBC. - * The bit 22 of 0x42000 - * The bit 22 of 0x42004 - * The bit 7,8,9 of 0x42020. - */ - if (IS_IRONLAKE_M(dev_priv)) { - /* WaFbcAsynchFlipDisableFbcQueue:ilk */ - I915_WRITE(ILK_DISPLAY_CHICKEN1, - I915_READ(ILK_DISPLAY_CHICKEN1) | - ILK_FBCQ_DIS); - I915_WRITE(ILK_DISPLAY_CHICKEN2, - I915_READ(ILK_DISPLAY_CHICKEN2) | - ILK_DPARB_GATE); - } - - I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); - - I915_WRITE(ILK_DISPLAY_CHICKEN2, - I915_READ(ILK_DISPLAY_CHICKEN2) | - ILK_ELPIN_409_SELECT); - I915_WRITE(_3D_CHICKEN2, - _3D_CHICKEN2_WM_READ_PIPELINED << 16 | - _3D_CHICKEN2_WM_READ_PIPELINED); - - /* WaDisableRenderCachePipelinedFlush:ilk */ - I915_WRITE(CACHE_MODE_0, - _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); - - /* WaDisable_RenderCache_OperationalFlush:ilk */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - - g4x_disable_trickle_feed(dev_priv); - - ibx_init_clock_gating(dev_priv); -} - -static void cpt_init_clock_gating(struct drm_i915_private *dev_priv) -{ - enum pipe pipe; - u32 val; - - /* - * On Ibex Peak and Cougar Point, we need to disable clock - * gating for the panel power sequencer or it will fail to - * start up when no ports are active. - */ - I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE | - PCH_DPLUNIT_CLOCK_GATE_DISABLE | - PCH_CPUNIT_CLOCK_GATE_DISABLE); - I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) | - DPLS_EDP_PPS_FIX_DIS); - /* The below fixes the weird display corruption, a few pixels shifted - * downward, on (only) LVDS of some HP laptops with IVY. - */ - for_each_pipe(dev_priv, pipe) { - val = I915_READ(TRANS_CHICKEN2(pipe)); - val |= TRANS_CHICKEN2_TIMING_OVERRIDE; - val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED; - if (dev_priv->vbt.fdi_rx_polarity_inverted) - val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED; - val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK; - val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER; - val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH; - I915_WRITE(TRANS_CHICKEN2(pipe), val); - } - /* WADP0ClockGatingDisable */ - for_each_pipe(dev_priv, pipe) { - I915_WRITE(TRANS_CHICKEN1(pipe), - TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); - } -} - -static void gen6_check_mch_setup(struct drm_i915_private *dev_priv) -{ - u32 tmp; - - tmp = I915_READ(MCH_SSKPD); - if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL) - DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n", - tmp); -} - -static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) -{ - u32 dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; - - I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); - - I915_WRITE(ILK_DISPLAY_CHICKEN2, - I915_READ(ILK_DISPLAY_CHICKEN2) | - ILK_ELPIN_409_SELECT); - - /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ - I915_WRITE(_3D_CHICKEN, - _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); - - /* WaDisable_RenderCache_OperationalFlush:snb */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - - /* - * BSpec recoomends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - I915_WRITE(GEN6_GT_MODE, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - - I915_WRITE(CACHE_MODE_0, - _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); - - I915_WRITE(GEN6_UCGCTL1, - I915_READ(GEN6_UCGCTL1) | - GEN6_BLBUNIT_CLOCK_GATE_DISABLE | - GEN6_CSUNIT_CLOCK_GATE_DISABLE); - - /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock - * gating disable must be set. Failure to set it results in - * flickering pixels due to Z write ordering failures after - * some amount of runtime in the Mesa "fire" demo, and Unigine - * Sanctuary and Tropics, and apparently anything else with - * alpha test or pixel discard. - * - * According to the spec, bit 11 (RCCUNIT) must also be set, - * but we didn't debug actual testcases to find it out. - * - * WaDisableRCCUnitClockGating:snb - * WaDisableRCPBUnitClockGating:snb - */ - I915_WRITE(GEN6_UCGCTL2, - GEN6_RCPBUNIT_CLOCK_GATE_DISABLE | - GEN6_RCCUNIT_CLOCK_GATE_DISABLE); - - /* WaStripsFansDisableFastClipPerformanceFix:snb */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL)); - - /* - * Bspec says: - * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and - * 3DSTATE_SF number of SF output attributes is more than 16." - */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH)); - - /* - * According to the spec the following bits should be - * set in order to enable memory self-refresh and fbc: - * The bit21 and bit22 of 0x42000 - * The bit21 and bit22 of 0x42004 - * The bit5 and bit7 of 0x42020 - * The bit14 of 0x70180 - * The bit14 of 0x71180 - * - * WaFbcAsynchFlipDisableFbcQueue:snb - */ - I915_WRITE(ILK_DISPLAY_CHICKEN1, - I915_READ(ILK_DISPLAY_CHICKEN1) | - ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS); - I915_WRITE(ILK_DISPLAY_CHICKEN2, - I915_READ(ILK_DISPLAY_CHICKEN2) | - ILK_DPARB_GATE | ILK_VSDPFD_FULL); - I915_WRITE(ILK_DSPCLK_GATE_D, - I915_READ(ILK_DSPCLK_GATE_D) | - ILK_DPARBUNIT_CLOCK_GATE_ENABLE | - ILK_DPFDUNIT_CLOCK_GATE_ENABLE); - - g4x_disable_trickle_feed(dev_priv); - - cpt_init_clock_gating(dev_priv); - - gen6_check_mch_setup(dev_priv); -} - -static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) -{ - u32 reg = I915_READ(GEN7_FF_THREAD_MODE); - - /* - * WaVSThreadDispatchOverride:ivb,vlv - * - * This actually overrides the dispatch - * mode for all thread types. - */ - reg &= ~GEN7_FF_SCHED_MASK; - reg |= GEN7_FF_TS_SCHED_HW; - reg |= GEN7_FF_VS_SCHED_HW; - reg |= GEN7_FF_DS_SCHED_HW; - - I915_WRITE(GEN7_FF_THREAD_MODE, reg); -} - -static void lpt_init_clock_gating(struct drm_i915_private *dev_priv) -{ - /* - * TODO: this bit should only be enabled when really needed, then - * disabled when not needed anymore in order to save power. + /* + * TODO: this bit should only be enabled when really needed, then + * disabled when not needed anymore in order to save power. */ if (HAS_PCH_LPT_LP(dev_priv)) I915_WRITE(SOUTH_DSPCLK_GATE_D, @@ -8855,90 +7224,8 @@ void intel_init_pm(struct drm_i915_private *dev_priv) } } -static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - /* - * N = val - 0xb7 - * Slow = Fast = GPLL ref * N - */ - return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000); -} - -static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7; -} - -static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - /* - * N = val / 2 - * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2 - */ - return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000); -} - -static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - - /* CHV needs even values */ - return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2; -} - -int intel_gpu_freq(struct drm_i915_private *dev_priv, int val) -{ - if (INTEL_GEN(dev_priv) >= 9) - return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER, - GEN9_FREQ_SCALER); - else if (IS_CHERRYVIEW(dev_priv)) - return chv_gpu_freq(dev_priv, val); - else if (IS_VALLEYVIEW(dev_priv)) - return byt_gpu_freq(dev_priv, val); - else - return val * GT_FREQUENCY_MULTIPLIER; -} - -int intel_freq_opcode(struct drm_i915_private *dev_priv, int val) -{ - if (INTEL_GEN(dev_priv) >= 9) - return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER, - GT_FREQUENCY_MULTIPLIER); - else if (IS_CHERRYVIEW(dev_priv)) - return chv_freq_opcode(dev_priv, val); - else if (IS_VALLEYVIEW(dev_priv)) - return byt_freq_opcode(dev_priv, val); - else - return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER); -} - void intel_pm_setup(struct drm_i915_private *dev_priv) { - mutex_init(&dev_priv->gt_pm.rps.lock); - mutex_init(&dev_priv->gt_pm.rps.power.mutex); - - atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0); - dev_priv->runtime_pm.suspended = false; atomic_set(&dev_priv->runtime_pm.wakeref_count, 0); } - -u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat) -{ - u32 cagf; - - if (INTEL_GEN(dev_priv) >= 9) - cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; - else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; - else - cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; - - return cagf; -} diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h index 00a5801dfc06..b579c724b915 100644 --- a/drivers/gpu/drm/i915/intel_pm.h +++ b/drivers/gpu/drm/i915/intel_pm.h @@ -29,15 +29,6 @@ void intel_update_watermarks(struct intel_crtc *crtc); void intel_init_pm(struct drm_i915_private *dev_priv); void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv); void intel_pm_setup(struct drm_i915_private *dev_priv); -void intel_gpu_ips_init(struct drm_i915_private *dev_priv); -void intel_gpu_ips_teardown(void); -void intel_init_gt_powersave(struct drm_i915_private *dev_priv); -void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv); -void intel_enable_gt_powersave(struct drm_i915_private *dev_priv); -void intel_disable_gt_powersave(struct drm_i915_private *dev_priv); -void gen6_rps_busy(struct drm_i915_private *dev_priv); -void gen6_rps_idle(struct drm_i915_private *dev_priv); -void gen6_rps_boost(struct i915_request *rq); void g4x_wm_get_hw_state(struct drm_i915_private *dev_priv); void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv); void ilk_wm_get_hw_state(struct drm_i915_private *dev_priv); @@ -67,19 +58,6 @@ bool ilk_disable_lp_wm(struct drm_device *dev); void intel_init_ipc(struct drm_i915_private *dev_priv); void intel_enable_ipc(struct drm_i915_private *dev_priv); -int intel_gpu_freq(struct drm_i915_private *dev_priv, int val); -int intel_freq_opcode(struct drm_i915_private *dev_priv, int val); - -u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat1); - -unsigned long i915_chipset_val(struct drm_i915_private *dev_priv); -unsigned long i915_mch_val(struct drm_i915_private *dev_priv); -unsigned long i915_gfx_val(struct drm_i915_private *dev_priv); -void i915_update_gfx_val(struct drm_i915_private *dev_priv); - -bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val); -int intel_set_rps(struct drm_i915_private *dev_priv, u8 val); -void intel_rps_mark_interactive(struct drm_i915_private *i915, bool interactive); bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable); #endif /* __INTEL_PM_H__ */ -- cgit v1.2.3-59-g8ed1b From a7d87b70d6da96c6772e50728c8b4e78e4cbfd55 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 9 Nov 2019 10:53:56 +0000 Subject: drm/i915/pmu: "Frequency" is reported as accumulated cycles We report "frequencies" (actual-frequency, requested-frequency) as the number of accumulated cycles so that the average frequency over that period may be determined by the user. This means the units we report to the user are Mcycles (or just M), not MHz. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: stable@vger.kernel.org Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191109105356.5273-1-chris@chris-wilson.co.uk (cherry picked from commit e88866ef02851c88fe95a4bb97820b94b4d46f36) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_pmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_pmu.c') diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 05395015d1f2..0d40dccd1409 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -878,8 +878,8 @@ create_event_attributes(struct i915_pmu *pmu) const char *name; const char *unit; } events[] = { - __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"), - __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"), + __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "M"), + __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "M"), __event(I915_PMU_INTERRUPTS, "interrupts", NULL), __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"), }; -- cgit v1.2.3-59-g8ed1b