aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/drivers/gpu/drm/i915/gt/intel_rps.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_rps.c')
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c449
1 files changed, 305 insertions, 144 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index 19542fd9e207..2f59fc6df3c2 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -8,12 +8,15 @@
#include "i915_drv.h"
#include "intel_gt.h"
+#include "intel_gt_clock_utils.h"
#include "intel_gt_irq.h"
#include "intel_gt_pm_irq.h"
#include "intel_rps.h"
#include "intel_sideband.h"
#include "../../../platform/x86/intel_ips.h"
+#define BUSY_MAX_EI 20u /* ms */
+
/*
* Lock protecting IPS related data structures
*/
@@ -44,6 +47,100 @@ static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
intel_uncore_write_fw(uncore, reg, val);
}
+static void rps_timer(struct timer_list *t)
+{
+ struct intel_rps *rps = from_timer(rps, t, timer);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ s64 max_busy[3] = {};
+ ktime_t dt, last;
+
+ for_each_engine(engine, rps_to_gt(rps), id) {
+ s64 busy;
+ int i;
+
+ dt = intel_engine_get_busy_time(engine);
+ last = engine->stats.rps;
+ engine->stats.rps = dt;
+
+ busy = ktime_to_ns(ktime_sub(dt, last));
+ for (i = 0; i < ARRAY_SIZE(max_busy); i++) {
+ if (busy > max_busy[i])
+ swap(busy, max_busy[i]);
+ }
+ }
+
+ dt = ktime_get();
+ last = rps->pm_timestamp;
+ rps->pm_timestamp = dt;
+
+ if (intel_rps_is_active(rps)) {
+ s64 busy;
+ int i;
+
+ dt = ktime_sub(dt, last);
+
+ /*
+ * Our goal is to evaluate each engine independently, so we run
+ * at the lowest clocks required to sustain the heaviest
+ * workload. However, a task may be split into sequential
+ * dependent operations across a set of engines, such that
+ * the independent contributions do not account for high load,
+ * but overall the task is GPU bound. For example, consider
+ * video decode on vcs followed by colour post-processing
+ * on vecs, followed by general post-processing on rcs.
+ * Since multi-engines being active does imply a single
+ * continuous workload across all engines, we hedge our
+ * bets by only contributing a factor of the distributed
+ * load into our busyness calculation.
+ */
+ busy = max_busy[0];
+ for (i = 1; i < ARRAY_SIZE(max_busy); i++) {
+ if (!max_busy[i])
+ break;
+
+ busy += div_u64(max_busy[i], 1 << i);
+ }
+ GT_TRACE(rps_to_gt(rps),
+ "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n",
+ busy, (int)div64_u64(100 * busy, dt),
+ max_busy[0], max_busy[1], max_busy[2],
+ rps->pm_interval);
+
+ if (100 * busy > rps->power.up_threshold * dt &&
+ rps->cur_freq < rps->max_freq_softlimit) {
+ rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD;
+ rps->pm_interval = 1;
+ schedule_work(&rps->work);
+ } else if (100 * busy < rps->power.down_threshold * dt &&
+ rps->cur_freq > rps->min_freq_softlimit) {
+ rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD;
+ rps->pm_interval = 1;
+ schedule_work(&rps->work);
+ } else {
+ rps->last_adj = 0;
+ }
+
+ mod_timer(&rps->timer,
+ jiffies + msecs_to_jiffies(rps->pm_interval));
+ rps->pm_interval = min(rps->pm_interval * 2, BUSY_MAX_EI);
+ }
+}
+
+static void rps_start_timer(struct intel_rps *rps)
+{
+ rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
+ rps->pm_interval = 1;
+ mod_timer(&rps->timer, jiffies + 1);
+}
+
+static void rps_stop_timer(struct intel_rps *rps)
+{
+ del_timer_sync(&rps->timer);
+ rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
+ cancel_work_sync(&rps->work);
+}
+
static u32 rps_pm_mask(struct intel_rps *rps, u8 val)
{
u32 mask = 0;
@@ -57,7 +154,7 @@ static u32 rps_pm_mask(struct intel_rps *rps, u8 val)
if (val < rps->max_freq_softlimit)
mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
- mask &= READ_ONCE(rps->pm_events);
+ mask &= rps->pm_events;
return rps_pm_sanitize_mask(rps, ~mask);
}
@@ -70,18 +167,11 @@ static void rps_reset_ei(struct intel_rps *rps)
static void rps_enable_interrupts(struct intel_rps *rps)
{
struct intel_gt *gt = rps_to_gt(rps);
- u32 events;
- rps_reset_ei(rps);
+ GT_TRACE(gt, "interrupts:on rps->pm_events: %x, rps_pm_mask:%x\n",
+ rps->pm_events, rps_pm_mask(rps, rps->last_freq));
- if (IS_VALLEYVIEW(gt->i915))
- /* WaGsvRC0ResidencyMethod:vlv */
- events = GEN6_PM_RP_UP_EI_EXPIRED;
- else
- events = (GEN6_PM_RP_UP_THRESHOLD |
- GEN6_PM_RP_DOWN_THRESHOLD |
- GEN6_PM_RP_DOWN_TIMEOUT);
- WRITE_ONCE(rps->pm_events, events);
+ rps_reset_ei(rps);
spin_lock_irq(&gt->irq_lock);
gen6_gt_pm_enable_irq(gt, rps->pm_events);
@@ -120,8 +210,6 @@ static void rps_disable_interrupts(struct intel_rps *rps)
{
struct intel_gt *gt = rps_to_gt(rps);
- WRITE_ONCE(rps->pm_events, 0);
-
intel_uncore_write(gt->uncore,
GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u));
@@ -140,6 +228,7 @@ static void rps_disable_interrupts(struct intel_rps *rps)
cancel_work_sync(&rps->work);
rps_reset_interrupts(rps);
+ GT_TRACE(gt, "interrupts:off\n");
}
static const struct cparams {
@@ -186,14 +275,12 @@ static void gen5_rps_init(struct intel_rps *rps)
fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
MEMMODE_FSTART_SHIFT;
- DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
- fmax, fmin, fstart);
+ drm_dbg(&i915->drm, "fmax: %d, fmin: %d, fstart: %d\n",
+ fmax, fmin, fstart);
rps->min_freq = fmax;
+ rps->efficient_freq = fstart;
rps->max_freq = fmin;
-
- rps->idle_freq = rps->min_freq;
- rps->cur_freq = rps->idle_freq;
}
static unsigned long
@@ -456,7 +543,8 @@ static bool gen5_rps_enable(struct intel_rps *rps)
if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) &
MEMCTL_CMD_STS) == 0, 10))
- DRM_ERROR("stuck trying to change perf mode\n");
+ drm_err(&uncore->i915->drm,
+ "stuck trying to change perf mode\n");
mdelay(1);
gen5_rps_set(rps, rps->cur_freq);
@@ -533,8 +621,8 @@ static u32 rps_limits(struct intel_rps *rps, u8 val)
static void rps_set_power(struct intel_rps *rps, int new_power)
{
- struct intel_uncore *uncore = rps_to_uncore(rps);
- struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_gt *gt = rps_to_gt(rps);
+ struct intel_uncore *uncore = gt->uncore;
u32 threshold_up = 0, threshold_down = 0; /* in % */
u32 ei_up = 0, ei_down = 0;
@@ -543,55 +631,49 @@ static void rps_set_power(struct intel_rps *rps, int new_power)
if (new_power == rps->power.mode)
return;
+ threshold_up = 95;
+ threshold_down = 85;
+
/* Note the units here are not exactly 1us, but 1280ns. */
switch (new_power) {
case LOW_POWER:
- /* Upclock if more than 95% busy over 16ms */
ei_up = 16000;
- threshold_up = 95;
-
- /* Downclock if less than 85% busy over 32ms */
ei_down = 32000;
- threshold_down = 85;
break;
case BETWEEN:
- /* Upclock if more than 90% busy over 13ms */
ei_up = 13000;
- threshold_up = 90;
-
- /* Downclock if less than 75% busy over 32ms */
ei_down = 32000;
- threshold_down = 75;
break;
case HIGH_POWER:
- /* Upclock if more than 85% busy over 10ms */
ei_up = 10000;
- threshold_up = 85;
-
- /* Downclock if less than 60% busy over 32ms */
ei_down = 32000;
- threshold_down = 60;
break;
}
/* When byt can survive without system hang with dynamic
* sw freq adjustments, this restriction can be lifted.
*/
- if (IS_VALLEYVIEW(i915))
+ if (IS_VALLEYVIEW(gt->i915))
goto skip_hw_write;
- set(uncore, GEN6_RP_UP_EI, GT_INTERVAL_FROM_US(i915, ei_up));
+ GT_TRACE(gt,
+ "changing power mode [%d], up %d%% @ %dus, down %d%% @ %dus\n",
+ new_power, threshold_up, ei_up, threshold_down, ei_down);
+
+ set(uncore, GEN6_RP_UP_EI,
+ intel_gt_ns_to_pm_interval(gt, ei_up * 1000));
set(uncore, GEN6_RP_UP_THRESHOLD,
- GT_INTERVAL_FROM_US(i915, ei_up * threshold_up / 100));
+ intel_gt_ns_to_pm_interval(gt, ei_up * threshold_up * 10));
- set(uncore, GEN6_RP_DOWN_EI, GT_INTERVAL_FROM_US(i915, ei_down));
+ set(uncore, GEN6_RP_DOWN_EI,
+ intel_gt_ns_to_pm_interval(gt, ei_down * 1000));
set(uncore, GEN6_RP_DOWN_THRESHOLD,
- GT_INTERVAL_FROM_US(i915, ei_down * threshold_down / 100));
+ intel_gt_ns_to_pm_interval(gt, ei_down * threshold_down * 10));
set(uncore, GEN6_RP_CONTROL,
- (INTEL_GEN(i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
+ (INTEL_GEN(gt->i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
GEN6_RP_MEDIA_HW_NORMAL_MODE |
GEN6_RP_MEDIA_IS_GFX |
GEN6_RP_ENABLE |
@@ -646,9 +728,11 @@ static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val)
void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive)
{
+ GT_TRACE(rps_to_gt(rps), "mark interactive: %s\n", yesno(interactive));
+
mutex_lock(&rps->power.mutex);
if (interactive) {
- if (!rps->power.interactive++ && READ_ONCE(rps->active))
+ if (!rps->power.interactive++ && intel_rps_is_active(rps))
rps_set_power(rps, HIGH_POWER);
} else {
GEM_BUG_ON(!rps->power.interactive);
@@ -673,6 +757,9 @@ static int gen6_rps_set(struct intel_rps *rps, u8 val)
GEN6_AGGRESSIVE_TURBO);
set(uncore, GEN6_RPNSWREQ, swreq);
+ GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d, swreq:%x\n",
+ val, intel_gpu_freq(rps, val), swreq);
+
return 0;
}
@@ -685,6 +772,9 @@ static int vlv_rps_set(struct intel_rps *rps, u8 val)
err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val);
vlv_punit_put(i915);
+ GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d\n",
+ val, intel_gpu_freq(rps, val));
+
return err;
}
@@ -715,29 +805,30 @@ static int rps_set(struct intel_rps *rps, u8 val, bool update)
void intel_rps_unpark(struct intel_rps *rps)
{
- u8 freq;
-
- if (!rps->enabled)
+ if (!intel_rps_is_enabled(rps))
return;
+ GT_TRACE(rps_to_gt(rps), "unpark:%x\n", rps->cur_freq);
+
/*
* Use the user's desired frequency as a guide, but for better
* performance, jump directly to RPe as our starting frequency.
*/
mutex_lock(&rps->lock);
- WRITE_ONCE(rps->active, true);
-
- freq = max(rps->cur_freq, rps->efficient_freq),
- freq = clamp(freq, rps->min_freq_softlimit, rps->max_freq_softlimit);
- intel_rps_set(rps, freq);
-
- rps->last_adj = 0;
+ intel_rps_set_active(rps);
+ intel_rps_set(rps,
+ clamp(rps->cur_freq,
+ rps->min_freq_softlimit,
+ rps->max_freq_softlimit));
mutex_unlock(&rps->lock);
- if (INTEL_GEN(rps_to_i915(rps)) >= 6)
+ rps->pm_iir = 0;
+ if (intel_rps_has_interrupts(rps))
rps_enable_interrupts(rps);
+ if (intel_rps_uses_timer(rps))
+ rps_start_timer(rps);
if (IS_GEN(rps_to_i915(rps), 5))
gen5_rps_update(rps);
@@ -745,15 +836,16 @@ void intel_rps_unpark(struct intel_rps *rps)
void intel_rps_park(struct intel_rps *rps)
{
- struct drm_i915_private *i915 = rps_to_i915(rps);
+ int adj;
- if (!rps->enabled)
+ if (!intel_rps_clear_active(rps))
return;
- if (INTEL_GEN(i915) >= 6)
+ if (intel_rps_uses_timer(rps))
+ rps_stop_timer(rps);
+ if (intel_rps_has_interrupts(rps))
rps_disable_interrupts(rps);
- WRITE_ONCE(rps->active, false);
if (rps->last_freq <= rps->idle_freq)
return;
@@ -784,8 +876,15 @@ void intel_rps_park(struct intel_rps *rps)
* (Note we accommodate Cherryview's limitation of only using an
* even bin by applying it to all.)
*/
- rps->cur_freq =
- max_t(int, round_down(rps->cur_freq - 1, 2), rps->min_freq);
+ adj = rps->last_adj;
+ if (adj < 0)
+ adj *= 2;
+ else /* CHV needs even encode values */
+ adj = -2;
+ rps->last_adj = adj;
+ rps->cur_freq = max_t(int, rps->cur_freq + adj, rps->min_freq);
+
+ GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq);
}
void intel_rps_boost(struct i915_request *rq)
@@ -793,7 +892,7 @@ void intel_rps_boost(struct i915_request *rq)
struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps;
unsigned long flags;
- if (i915_request_signaled(rq) || !READ_ONCE(rps->active))
+ if (i915_request_signaled(rq) || !intel_rps_is_active(rps))
return;
/* Serializes with i915_request_retire() */
@@ -802,6 +901,9 @@ void intel_rps_boost(struct i915_request *rq)
!dma_fence_is_signaled_locked(&rq->fence)) {
set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags);
+ GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
+ rq->fence.context, rq->fence.seqno);
+
if (!atomic_fetch_inc(&rps->num_waiters) &&
READ_ONCE(rps->cur_freq) < rps->boost_freq)
schedule_work(&rps->work);
@@ -819,7 +921,7 @@ int intel_rps_set(struct intel_rps *rps, u8 val)
GEM_BUG_ON(val > rps->max_freq);
GEM_BUG_ON(val < rps->min_freq);
- if (rps->active) {
+ if (intel_rps_is_active(rps)) {
err = rps_set(rps, val, true);
if (err)
return err;
@@ -828,7 +930,7 @@ int intel_rps_set(struct intel_rps *rps, u8 val)
* Make sure we continue to get interrupts
* until we hit the minimum or maximum frequencies.
*/
- if (INTEL_GEN(rps_to_i915(rps)) >= 6) {
+ if (intel_rps_has_interrupts(rps)) {
struct intel_uncore *uncore = rps_to_uncore(rps);
set(uncore,
@@ -896,12 +998,14 @@ static void gen6_rps_init(struct intel_rps *rps)
static bool rps_reset(struct intel_rps *rps)
{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+
/* force a reset */
rps->power.mode = -1;
rps->last_freq = -1;
if (rps_set(rps, rps->min_freq, true)) {
- DRM_ERROR("Failed to reset RPS to initial values\n");
+ drm_err(&i915->drm, "Failed to reset RPS to initial values\n");
return false;
}
@@ -912,20 +1016,18 @@ static bool rps_reset(struct intel_rps *rps)
/* See the Gen9_GT_PM_Programming_Guide doc for the below */
static bool gen9_rps_enable(struct intel_rps *rps)
{
- struct drm_i915_private *i915 = rps_to_i915(rps);
- struct intel_uncore *uncore = rps_to_uncore(rps);
+ struct intel_gt *gt = rps_to_gt(rps);
+ struct intel_uncore *uncore = gt->uncore;
/* Program defaults and thresholds for RPS */
- if (IS_GEN(i915, 9))
+ if (IS_GEN(gt->i915, 9))
intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
GEN9_FREQUENCY(rps->rp1_freq));
- /* 1 second timeout */
- intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT,
- GT_INTERVAL_FROM_US(i915, 1000000));
-
intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa);
+ rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD;
+
return rps_reset(rps);
}
@@ -936,12 +1038,10 @@ static bool gen8_rps_enable(struct intel_rps *rps)
intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
HSW_FREQUENCY(rps->rp1_freq));
- /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
- intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT,
- 100000000 / 128); /* 1 second timeout */
-
intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
+ rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD;
+
return rps_reset(rps);
}
@@ -953,6 +1053,10 @@ static bool gen6_rps_enable(struct intel_rps *rps)
intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000);
intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
+ rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
+ GEN6_PM_RP_DOWN_THRESHOLD |
+ GEN6_PM_RP_DOWN_TIMEOUT);
+
return rps_reset(rps);
}
@@ -1038,6 +1142,10 @@ static bool chv_rps_enable(struct intel_rps *rps)
GEN6_RP_UP_BUSY_AVG |
GEN6_RP_DOWN_IDLE_AVG);
+ rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
+ GEN6_PM_RP_DOWN_THRESHOLD |
+ GEN6_PM_RP_DOWN_TIMEOUT);
+
/* Setting Fixed Bias */
vlv_punit_get(i915);
@@ -1052,8 +1160,8 @@ static bool chv_rps_enable(struct intel_rps *rps)
drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
"GPLL not enabled\n");
- DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
- DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
+ drm_dbg(&i915->drm, "GPLL enabled? %s\n", yesno(val & GPLLENABLE));
+ drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val);
return rps_reset(rps);
}
@@ -1136,6 +1244,9 @@ static bool vlv_rps_enable(struct intel_rps *rps)
GEN6_RP_UP_BUSY_AVG |
GEN6_RP_DOWN_IDLE_CONT);
+ /* WaGsvRC0ResidencyMethod:vlv */
+ rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED;
+
vlv_punit_get(i915);
/* Setting Fixed Bias */
@@ -1150,8 +1261,8 @@ static bool vlv_rps_enable(struct intel_rps *rps)
drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
"GPLL not enabled\n");
- DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
- DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
+ drm_dbg(&i915->drm, "GPLL enabled? %s\n", yesno(val & GPLLENABLE));
+ drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val);
return rps_reset(rps);
}
@@ -1194,33 +1305,71 @@ static unsigned long __ips_gfx_val(struct intel_ips *ips)
return ips->gfx_power + state2;
}
+static bool has_busy_stats(struct intel_rps *rps)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ for_each_engine(engine, rps_to_gt(rps), id) {
+ if (!intel_engine_supports_stats(engine))
+ return false;
+ }
+
+ return true;
+}
+
void intel_rps_enable(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
struct intel_uncore *uncore = rps_to_uncore(rps);
+ bool enabled = false;
+
+ if (!HAS_RPS(i915))
+ return;
+
+ intel_gt_check_clock_frequency(rps_to_gt(rps));
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
- if (IS_CHERRYVIEW(i915))
- rps->enabled = chv_rps_enable(rps);
+ if (rps->max_freq <= rps->min_freq)
+ /* leave disabled, no room for dynamic reclocking */;
+ else if (IS_CHERRYVIEW(i915))
+ enabled = chv_rps_enable(rps);
else if (IS_VALLEYVIEW(i915))
- rps->enabled = vlv_rps_enable(rps);
+ enabled = vlv_rps_enable(rps);
else if (INTEL_GEN(i915) >= 9)
- rps->enabled = gen9_rps_enable(rps);
+ enabled = gen9_rps_enable(rps);
else if (INTEL_GEN(i915) >= 8)
- rps->enabled = gen8_rps_enable(rps);
+ enabled = gen8_rps_enable(rps);
else if (INTEL_GEN(i915) >= 6)
- rps->enabled = gen6_rps_enable(rps);
+ enabled = gen6_rps_enable(rps);
else if (IS_IRONLAKE_M(i915))
- rps->enabled = gen5_rps_enable(rps);
+ enabled = gen5_rps_enable(rps);
+ else
+ MISSING_CASE(INTEL_GEN(i915));
intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
- if (!rps->enabled)
+ if (!enabled)
return;
- drm_WARN_ON(&i915->drm, rps->max_freq < rps->min_freq);
- drm_WARN_ON(&i915->drm, rps->idle_freq > rps->max_freq);
+ GT_TRACE(rps_to_gt(rps),
+ "min:%x, max:%x, freq:[%d, %d]\n",
+ rps->min_freq, rps->max_freq,
+ intel_gpu_freq(rps, rps->min_freq),
+ intel_gpu_freq(rps, rps->max_freq));
- drm_WARN_ON(&i915->drm, rps->efficient_freq < rps->min_freq);
- drm_WARN_ON(&i915->drm, rps->efficient_freq > rps->max_freq);
+ GEM_BUG_ON(rps->max_freq < rps->min_freq);
+ GEM_BUG_ON(rps->idle_freq > rps->max_freq);
+
+ GEM_BUG_ON(rps->efficient_freq < rps->min_freq);
+ GEM_BUG_ON(rps->efficient_freq > rps->max_freq);
+
+ if (has_busy_stats(rps))
+ intel_rps_set_timer(rps);
+ else if (INTEL_GEN(i915) >= 6)
+ intel_rps_set_interrupts(rps);
+ else
+ /* Ironlake currently uses intel_ips.ko */ {}
+
+ intel_rps_set_enabled(rps);
}
static void gen6_rps_disable(struct intel_rps *rps)
@@ -1232,7 +1381,9 @@ void intel_rps_disable(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
- rps->enabled = false;
+ intel_rps_clear_enabled(rps);
+ intel_rps_clear_interrupts(rps);
+ intel_rps_clear_timer(rps);
if (INTEL_GEN(i915) >= 6)
gen6_rps_disable(rps);
@@ -1308,7 +1459,8 @@ static void vlv_init_gpll_ref_freq(struct intel_rps *rps)
CCK_GPLL_CLOCK_CONTROL,
i915->czclk_freq);
- DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", rps->gpll_ref_freq);
+ drm_dbg(&i915->drm, "GPLL reference freq: %d kHz\n",
+ rps->gpll_ref_freq);
}
static void vlv_rps_init(struct intel_rps *rps)
@@ -1336,28 +1488,24 @@ static void vlv_rps_init(struct intel_rps *rps)
i915->mem_freq = 1333;
break;
}
- DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq);
+ drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq);
rps->max_freq = vlv_rps_max_freq(rps);
rps->rp0_freq = rps->max_freq;
- DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(rps, rps->max_freq),
- rps->max_freq);
+ drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->max_freq), rps->max_freq);
rps->efficient_freq = vlv_rps_rpe_freq(rps);
- DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(rps, rps->efficient_freq),
- rps->efficient_freq);
+ drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq);
rps->rp1_freq = vlv_rps_guar_freq(rps);
- DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(rps, rps->rp1_freq),
- rps->rp1_freq);
+ drm_dbg(&i915->drm, "RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq);
rps->min_freq = vlv_rps_min_freq(rps);
- DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(rps, rps->min_freq),
- rps->min_freq);
+ drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->min_freq), rps->min_freq);
vlv_iosf_sb_put(i915,
BIT(VLV_IOSF_SB_PUNIT) |
@@ -1387,28 +1535,24 @@ static void chv_rps_init(struct intel_rps *rps)
i915->mem_freq = 1600;
break;
}
- DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq);
+ drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq);
rps->max_freq = chv_rps_max_freq(rps);
rps->rp0_freq = rps->max_freq;
- DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(rps, rps->max_freq),
- rps->max_freq);
+ drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->max_freq), rps->max_freq);
rps->efficient_freq = chv_rps_rpe_freq(rps);
- DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(rps, rps->efficient_freq),
- rps->efficient_freq);
+ drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq);
rps->rp1_freq = chv_rps_guar_freq(rps);
- DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(rps, rps->rp1_freq),
- rps->rp1_freq);
+ drm_dbg(&i915->drm, "RP1(Guar) GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq);
rps->min_freq = chv_rps_min_freq(rps);
- DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
- intel_gpu_freq(rps, rps->min_freq),
- rps->min_freq);
+ drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->min_freq), rps->min_freq);
vlv_iosf_sb_put(i915,
BIT(VLV_IOSF_SB_PUNIT) |
@@ -1471,12 +1615,13 @@ static void rps_work(struct work_struct *work)
{
struct intel_rps *rps = container_of(work, typeof(*rps), work);
struct intel_gt *gt = rps_to_gt(rps);
+ struct drm_i915_private *i915 = rps_to_i915(rps);
bool client_boost = false;
int new_freq, adj, min, max;
u32 pm_iir = 0;
spin_lock_irq(&gt->irq_lock);
- pm_iir = fetch_and_zero(&rps->pm_iir) & READ_ONCE(rps->pm_events);
+ pm_iir = fetch_and_zero(&rps->pm_iir) & rps->pm_events;
client_boost = atomic_read(&rps->num_waiters);
spin_unlock_irq(&gt->irq_lock);
@@ -1485,6 +1630,10 @@ static void rps_work(struct work_struct *work)
goto out;
mutex_lock(&rps->lock);
+ if (!intel_rps_is_active(rps)) {
+ mutex_unlock(&rps->lock);
+ return;
+ }
pm_iir |= vlv_wa_c0_ei(rps, pm_iir);
@@ -1494,6 +1643,12 @@ static void rps_work(struct work_struct *work)
max = rps->max_freq_softlimit;
if (client_boost)
max = rps->max_freq;
+
+ GT_TRACE(gt,
+ "pm_iir:%x, client_boost:%s, last:%d, cur:%x, min:%x, max:%x\n",
+ pm_iir, yesno(client_boost),
+ adj, new_freq, min, max);
+
if (client_boost && new_freq < rps->boost_freq) {
new_freq = rps->boost_freq;
adj = 0;
@@ -1525,30 +1680,18 @@ static void rps_work(struct work_struct *work)
adj = 0;
}
- rps->last_adj = adj;
-
/*
- * Limit deboosting and boosting to keep ourselves at the extremes
- * when in the respective power modes (i.e. slowly decrease frequencies
- * while in the HIGH_POWER zone and slowly increase frequencies while
- * in the LOW_POWER zone). On idle, we will hit the timeout and drop
- * to the next level quickly, and conversely if busy we expect to
- * hit a waitboost and rapidly switch into max power.
- */
- if ((adj < 0 && rps->power.mode == HIGH_POWER) ||
- (adj > 0 && rps->power.mode == LOW_POWER))
- rps->last_adj = 0;
-
- /* sysfs frequency interfaces may have snuck in while servicing the
- * interrupt
+ * sysfs frequency limits may have snuck in while
+ * servicing the interrupt
*/
new_freq += adj;
new_freq = clamp_t(int, new_freq, min, max);
if (intel_rps_set(rps, new_freq)) {
- DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
- rps->last_adj = 0;
+ drm_dbg(&i915->drm, "Failed to set new GPU frequency\n");
+ adj = 0;
}
+ rps->last_adj = adj;
mutex_unlock(&rps->lock);
@@ -1568,6 +1711,8 @@ void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
if (unlikely(!events))
return;
+ GT_TRACE(gt, "irq events:%x\n", events);
+
gen6_gt_pm_mask_irq(gt, events);
rps->pm_iir |= events;
@@ -1579,10 +1724,12 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
struct intel_gt *gt = rps_to_gt(rps);
u32 events;
- events = pm_iir & READ_ONCE(rps->pm_events);
+ events = pm_iir & rps->pm_events;
if (events) {
spin_lock(&gt->irq_lock);
+ GT_TRACE(gt, "irq events:%x\n", events);
+
gen6_gt_pm_mask_irq(gt, events);
rps->pm_iir |= events;
@@ -1640,6 +1787,7 @@ void intel_rps_init_early(struct intel_rps *rps)
mutex_init(&rps->power.mutex);
INIT_WORK(&rps->work, rps_work);
+ timer_setup(&rps->timer, rps_timer, 0);
atomic_set(&rps->num_waiters, 0);
}
@@ -1668,9 +1816,10 @@ void intel_rps_init(struct intel_rps *rps)
sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS,
&params, NULL);
if (params & BIT(31)) { /* OC supported */
- DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
- (rps->max_freq & 0xff) * 50,
- (params & 0xff) * 50);
+ drm_dbg(&i915->drm,
+ "Overclocking supported, max: %dMHz, overclock: %dMHz\n",
+ (rps->max_freq & 0xff) * 50,
+ (params & 0xff) * 50);
rps->max_freq = params & 0xff;
}
}
@@ -1678,7 +1827,9 @@ void intel_rps_init(struct intel_rps *rps)
/* Finally allow us to boost to max by default */
rps->boost_freq = rps->max_freq;
rps->idle_freq = rps->min_freq;
- rps->cur_freq = rps->idle_freq;
+
+ /* Start in the middle, from here we will autotune based on workload */
+ rps->cur_freq = rps->efficient_freq;
rps->pm_intrmsk_mbz = 0;
@@ -1695,6 +1846,12 @@ void intel_rps_init(struct intel_rps *rps)
rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
}
+void intel_rps_sanitize(struct intel_rps *rps)
+{
+ if (INTEL_GEN(rps_to_i915(rps)) >= 6)
+ rps_disable_interrupts(rps);
+}
+
u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
@@ -1722,7 +1879,7 @@ static u32 read_cagf(struct intel_rps *rps)
freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
vlv_punit_put(i915);
} else {
- freq = intel_uncore_read(rps_to_gt(rps)->uncore, GEN6_RPSTAT1);
+ freq = intel_uncore_read(rps_to_uncore(rps), GEN6_RPSTAT1);
}
return intel_rps_get_cagf(rps, freq);
@@ -1730,7 +1887,7 @@ static u32 read_cagf(struct intel_rps *rps)
u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
{
- struct intel_runtime_pm *rpm = rps_to_gt(rps)->uncore->rpm;
+ struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
intel_wakeref_t wakeref;
u32 freq = 0;
@@ -1930,3 +2087,7 @@ bool i915_gpu_turbo_disable(void)
return ret;
}
EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_rps.c"
+#endif