aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_pm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/intel_pm.c')
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c378
1 files changed, 268 insertions, 110 deletions
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index fa4ccb346389..eadc15cddbeb 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -67,7 +67,7 @@ static void skl_init_clock_gating(struct drm_device *dev)
gen9_init_clock_gating(dev);
- if (INTEL_REVID(dev) == SKL_REVID_A0) {
+ if (INTEL_REVID(dev) <= SKL_REVID_B0) {
/*
* WaDisableSDEUnitClockGating:skl
* WaSetGAPSunitClckGateDisable:skl
@@ -75,6 +75,10 @@ static void skl_init_clock_gating(struct drm_device *dev)
I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
GEN8_GAPSUNIT_CLOCK_GATE_DISABLE |
GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+
+ /* WaDisableVFUnitClockGating:skl */
+ I915_WRITE(GEN6_UCGCTL2, I915_READ(GEN6_UCGCTL2) |
+ GEN6_VFUNIT_CLOCK_GATE_DISABLE);
}
if (INTEL_REVID(dev) <= SKL_REVID_D0) {
@@ -84,8 +88,7 @@ static void skl_init_clock_gating(struct drm_device *dev)
/* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */
I915_WRITE(FF_SLICE_CS_CHICKEN2,
- I915_READ(FF_SLICE_CS_CHICKEN2) |
- GEN9_TSG_BARRIER_ACK_DISABLE);
+ _MASKED_BIT_ENABLE(GEN9_TSG_BARRIER_ACK_DISABLE));
}
if (INTEL_REVID(dev) <= SKL_REVID_E0)
@@ -94,6 +97,26 @@ static void skl_init_clock_gating(struct drm_device *dev)
GEN8_LQSC_RO_PERF_DIS);
}
+static void bxt_init_clock_gating(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+
+ gen9_init_clock_gating(dev);
+
+ /*
+ * FIXME:
+ * GEN8_SDEUNIT_CLOCK_GATE_DISABLE applies on A0 only.
+ * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
+ */
+ /* WaDisableSDEUnitClockGating:bxt */
+ I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
+ GEN8_SDEUNIT_CLOCK_GATE_DISABLE |
+ GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
+
+ /* FIXME: apply on A0 only */
+ I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
+}
+
static void i915_pineview_get_mem_freq(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1792,7 +1815,7 @@ hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
linetime = DIV_ROUND_CLOSEST(mode->crtc_htotal * 1000 * 8,
mode->crtc_clock);
ips_linetime = DIV_ROUND_CLOSEST(mode->crtc_htotal * 1000 * 8,
- intel_ddi_get_cdclk_freq(dev_priv));
+ dev_priv->display.get_display_clock_speed(dev_priv->dev));
return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
PIPE_WM_LINETIME_TIME(linetime);
@@ -1923,7 +1946,7 @@ static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5])
int ilk_wm_max_level(const struct drm_device *dev)
{
/* how many WM levels are we expecting */
- if (IS_GEN9(dev))
+ if (INTEL_INFO(dev)->gen >= 9)
return 7;
else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
return 4;
@@ -2045,22 +2068,20 @@ static void ilk_compute_wm_parameters(struct drm_crtc *crtc,
p->pipe_htotal = intel_crtc->config->base.adjusted_mode.crtc_htotal;
p->pixel_rate = ilk_pipe_pixel_rate(dev, crtc);
- if (crtc->primary->state->fb) {
- p->pri.enabled = true;
+ if (crtc->primary->state->fb)
p->pri.bytes_per_pixel =
crtc->primary->state->fb->bits_per_pixel / 8;
- } else {
- p->pri.enabled = false;
- p->pri.bytes_per_pixel = 0;
- }
+ else
+ p->pri.bytes_per_pixel = 4;
+
+ p->cur.bytes_per_pixel = 4;
+ /*
+ * TODO: for now, assume primary and cursor planes are always enabled.
+ * Setting them to false makes the screen flicker.
+ */
+ p->pri.enabled = true;
+ p->cur.enabled = true;
- if (crtc->cursor->state->fb) {
- p->cur.enabled = true;
- p->cur.bytes_per_pixel = 4;
- } else {
- p->cur.enabled = false;
- p->cur.bytes_per_pixel = 0;
- }
p->pri.horiz_pixels = intel_crtc->config->pipe_src_w;
p->cur.horiz_pixels = intel_crtc->base.cursor->state->crtc_w;
@@ -2538,6 +2559,7 @@ static bool ilk_disable_lp_wm(struct drm_device *dev)
*/
#define SKL_DDB_SIZE 896 /* in blocks */
+#define BXT_DDB_SIZE 512
static void
skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
@@ -2556,7 +2578,10 @@ skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
return;
}
- ddb_size = SKL_DDB_SIZE;
+ if (IS_BROXTON(dev))
+ ddb_size = BXT_DDB_SIZE;
+ else
+ ddb_size = SKL_DDB_SIZE;
ddb_size -= 4; /* 4 blocks for bypass path allocation */
@@ -2612,8 +2637,18 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
}
static unsigned int
-skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p)
+skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p, int y)
{
+
+ /* for planar format */
+ if (p->y_bytes_per_pixel) {
+ if (y) /* y-plane data rate */
+ return p->horiz_pixels * p->vert_pixels * p->y_bytes_per_pixel;
+ else /* uv-plane data rate */
+ return (p->horiz_pixels/2) * (p->vert_pixels/2) * p->bytes_per_pixel;
+ }
+
+ /* for packed formats */
return p->horiz_pixels * p->vert_pixels * p->bytes_per_pixel;
}
@@ -2636,7 +2671,10 @@ skl_get_total_relative_data_rate(struct intel_crtc *intel_crtc,
if (!p->enabled)
continue;
- total_data_rate += skl_plane_relative_data_rate(p);
+ total_data_rate += skl_plane_relative_data_rate(p, 0); /* packed/uv */
+ if (p->y_bytes_per_pixel) {
+ total_data_rate += skl_plane_relative_data_rate(p, 1); /* y-plane */
+ }
}
return total_data_rate;
@@ -2655,6 +2693,7 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,
struct skl_ddb_entry *alloc = &ddb->pipe[pipe];
uint16_t alloc_size, start, cursor_blocks;
uint16_t minimum[I915_MAX_PLANES];
+ uint16_t y_minimum[I915_MAX_PLANES];
unsigned int total_data_rate;
int plane;
@@ -2683,6 +2722,8 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,
minimum[plane] = 8;
alloc_size -= minimum[plane];
+ y_minimum[plane] = p->y_bytes_per_pixel ? 8 : 0;
+ alloc_size -= y_minimum[plane];
}
/*
@@ -2696,16 +2737,17 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,
start = alloc->start;
for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {
const struct intel_plane_wm_parameters *p;
- unsigned int data_rate;
- uint16_t plane_blocks;
+ unsigned int data_rate, y_data_rate;
+ uint16_t plane_blocks, y_plane_blocks = 0;
p = &params->plane[plane];
if (!p->enabled)
continue;
- data_rate = skl_plane_relative_data_rate(p);
+ data_rate = skl_plane_relative_data_rate(p, 0);
/*
+ * allocation for (packed formats) or (uv-plane part of planar format):
* promote the expression to 64 bits to avoid overflowing, the
* result is < available as data_rate / total_data_rate < 1
*/
@@ -2717,6 +2759,22 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,
ddb->plane[pipe][plane].end = start + plane_blocks;
start += plane_blocks;
+
+ /*
+ * allocation for y_plane part of planar format:
+ */
+ if (p->y_bytes_per_pixel) {
+ y_data_rate = skl_plane_relative_data_rate(p, 1);
+ y_plane_blocks = y_minimum[plane];
+ y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate,
+ total_data_rate);
+
+ ddb->y_plane[pipe][plane].start = start;
+ ddb->y_plane[pipe][plane].end = start + y_plane_blocks;
+
+ start += y_plane_blocks;
+ }
+
}
}
@@ -2829,13 +2887,18 @@ static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,
p->pixel_rate = skl_pipe_pixel_rate(intel_crtc->config);
fb = crtc->primary->state->fb;
+ /* For planar: Bpp is for uv plane, y_Bpp is for y plane */
if (fb) {
p->plane[0].enabled = true;
- p->plane[0].bytes_per_pixel = fb->bits_per_pixel / 8;
+ p->plane[0].bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ?
+ drm_format_plane_cpp(fb->pixel_format, 1) : fb->bits_per_pixel / 8;
+ p->plane[0].y_bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ?
+ drm_format_plane_cpp(fb->pixel_format, 0) : 0;
p->plane[0].tiling = fb->modifier[0];
} else {
p->plane[0].enabled = false;
p->plane[0].bytes_per_pixel = 0;
+ p->plane[0].y_bytes_per_pixel = 0;
p->plane[0].tiling = DRM_FORMAT_MOD_NONE;
}
p->plane[0].horiz_pixels = intel_crtc->config->pipe_src_w;
@@ -2843,6 +2906,7 @@ static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,
p->plane[0].rotation = crtc->primary->state->rotation;
fb = crtc->cursor->state->fb;
+ p->cursor.y_bytes_per_pixel = 0;
if (fb) {
p->cursor.enabled = true;
p->cursor.bytes_per_pixel = fb->bits_per_pixel / 8;
@@ -2878,22 +2942,25 @@ static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
uint32_t plane_bytes_per_line, plane_blocks_per_line;
uint32_t res_blocks, res_lines;
uint32_t selected_result;
+ uint8_t bytes_per_pixel;
if (latency == 0 || !p->active || !p_params->enabled)
return false;
+ bytes_per_pixel = p_params->y_bytes_per_pixel ?
+ p_params->y_bytes_per_pixel :
+ p_params->bytes_per_pixel;
method1 = skl_wm_method1(p->pixel_rate,
- p_params->bytes_per_pixel,
+ bytes_per_pixel,
latency);
method2 = skl_wm_method2(p->pixel_rate,
p->pipe_htotal,
p_params->horiz_pixels,
- p_params->bytes_per_pixel,
+ bytes_per_pixel,
p_params->tiling,
latency);
- plane_bytes_per_line = p_params->horiz_pixels *
- p_params->bytes_per_pixel;
+ plane_bytes_per_line = p_params->horiz_pixels * bytes_per_pixel;
plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
if (p_params->tiling == I915_FORMAT_MOD_Y_TILED ||
@@ -3110,10 +3177,14 @@ static void skl_write_wm_values(struct drm_i915_private *dev_priv,
new->plane_trans[pipe][i]);
I915_WRITE(CUR_WM_TRANS(pipe), new->cursor_trans[pipe]);
- for (i = 0; i < intel_num_planes(crtc); i++)
+ for (i = 0; i < intel_num_planes(crtc); i++) {
skl_ddb_entry_write(dev_priv,
PLANE_BUF_CFG(pipe, i),
&new->ddb.plane[pipe][i]);
+ skl_ddb_entry_write(dev_priv,
+ PLANE_NV12_BUF_CFG(pipe, i),
+ &new->ddb.y_plane[pipe][i]);
+ }
skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe),
&new->ddb.cursor[pipe]);
@@ -3178,7 +3249,7 @@ static void skl_flush_wm_values(struct drm_i915_private *dev_priv,
{
struct drm_device *dev = dev_priv->dev;
struct skl_ddb_allocation *cur_ddb, *new_ddb;
- bool reallocated[I915_MAX_PIPES] = {false, false, false};
+ bool reallocated[I915_MAX_PIPES] = {};
struct intel_crtc *crtc;
enum pipe pipe;
@@ -3271,6 +3342,7 @@ static bool skl_update_pipe_wm(struct drm_crtc *crtc,
return false;
intel_crtc->wm.skl_active = *pipe_wm;
+
return true;
}
@@ -3364,8 +3436,16 @@ skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc,
intel_plane->wm.scaled = scaled;
intel_plane->wm.horiz_pixels = sprite_width;
intel_plane->wm.vert_pixels = sprite_height;
- intel_plane->wm.bytes_per_pixel = pixel_size;
intel_plane->wm.tiling = DRM_FORMAT_MOD_NONE;
+
+ /* For planar: Bpp is for UV plane, y_Bpp is for Y plane */
+ intel_plane->wm.bytes_per_pixel =
+ (fb && fb->pixel_format == DRM_FORMAT_NV12) ?
+ drm_format_plane_cpp(plane->state->fb->pixel_format, 1) : pixel_size;
+ intel_plane->wm.y_bytes_per_pixel =
+ (fb && fb->pixel_format == DRM_FORMAT_NV12) ?
+ drm_format_plane_cpp(plane->state->fb->pixel_format, 0) : 0;
+
/*
* Framebuffer can be NULL on plane disable, but it does not
* matter for watermarks if we assume no tiling in that case.
@@ -3930,6 +4010,8 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
GEN6_RP_DOWN_IDLE_AVG);
dev_priv->rps.power = new_power;
+ dev_priv->rps.up_threshold = threshold_up;
+ dev_priv->rps.down_threshold = threshold_down;
dev_priv->rps.last_adj = 0;
}
@@ -4001,8 +4083,11 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val)
"Odd GPU freq value\n"))
val &= ~1;
- if (val != dev_priv->rps.cur_freq)
+ if (val != dev_priv->rps.cur_freq) {
vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
+ if (!IS_CHERRYVIEW(dev_priv))
+ gen6_set_rps_thresholds(dev_priv, val);
+ }
I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
@@ -4010,50 +4095,25 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val)
trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
}
-/* vlv_set_rps_idle: Set the frequency to Rpn if Gfx clocks are down
+/* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
*
* * If Gfx is Idle, then
- * 1. Mask Turbo interrupts
- * 2. Bring up Gfx clock
- * 3. Change the freq to Rpn and wait till P-Unit updates freq
- * 4. Clear the Force GFX CLK ON bit so that Gfx can down
- * 5. Unmask Turbo interrupts
+ * 1. Forcewake Media well.
+ * 2. Request idle freq.
+ * 3. Release Forcewake of Media well.
*/
static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
{
- struct drm_device *dev = dev_priv->dev;
u32 val = dev_priv->rps.idle_freq;
- /* CHV and latest VLV don't need to force the gfx clock */
- if (IS_CHERRYVIEW(dev) || dev->pdev->revision >= 0xd) {
- valleyview_set_rps(dev_priv->dev, val);
- return;
- }
-
- /*
- * When we are idle. Drop to min voltage state.
- */
-
if (dev_priv->rps.cur_freq <= val)
return;
- /* Mask turbo interrupt so that they will not come in between */
- I915_WRITE(GEN6_PMINTRMSK,
- gen6_sanitize_rps_pm_mask(dev_priv, ~0));
-
- vlv_force_gfx_clock(dev_priv, true);
-
- dev_priv->rps.cur_freq = val;
-
- vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
-
- if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS))
- & GENFREQSTATUS) == 0, 100))
- DRM_ERROR("timed out waiting for Punit\n");
-
- vlv_force_gfx_clock(dev_priv, false);
-
- I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
+ /* Wake up the media well, as that takes a lot less
+ * power than the Render well. */
+ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
+ valleyview_set_rps(dev_priv->dev, val);
+ intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
}
void gen6_rps_busy(struct drm_i915_private *dev_priv)
@@ -4082,21 +4142,47 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
}
mutex_unlock(&dev_priv->rps.hw_lock);
+
+ spin_lock(&dev_priv->rps.client_lock);
+ while (!list_empty(&dev_priv->rps.clients))
+ list_del_init(dev_priv->rps.clients.next);
+ spin_unlock(&dev_priv->rps.client_lock);
}
-void gen6_rps_boost(struct drm_i915_private *dev_priv)
+void gen6_rps_boost(struct drm_i915_private *dev_priv,
+ struct intel_rps_client *rps,
+ unsigned long submitted)
{
- u32 val;
+ /* This is intentionally racy! We peek at the state here, then
+ * validate inside the RPS worker.
+ */
+ if (!(dev_priv->mm.busy &&
+ dev_priv->rps.enabled &&
+ dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit))
+ return;
- mutex_lock(&dev_priv->rps.hw_lock);
- val = dev_priv->rps.max_freq_softlimit;
- if (dev_priv->rps.enabled &&
- dev_priv->mm.busy &&
- dev_priv->rps.cur_freq < val) {
- intel_set_rps(dev_priv->dev, val);
- dev_priv->rps.last_adj = 0;
+ /* Force a RPS boost (and don't count it against the client) if
+ * the GPU is severely congested.
+ */
+ if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES))
+ rps = NULL;
+
+ spin_lock(&dev_priv->rps.client_lock);
+ if (rps == NULL || list_empty(&rps->link)) {
+ spin_lock_irq(&dev_priv->irq_lock);
+ if (dev_priv->rps.interrupts_enabled) {
+ dev_priv->rps.client_boost = true;
+ queue_work(dev_priv->wq, &dev_priv->rps.work);
+ }
+ spin_unlock_irq(&dev_priv->irq_lock);
+
+ if (rps != NULL) {
+ list_add(&rps->link, &dev_priv->rps.clients);
+ rps->boosts++;
+ } else
+ dev_priv->rps.boosts++;
}
- mutex_unlock(&dev_priv->rps.hw_lock);
+ spin_unlock(&dev_priv->rps.client_lock);
}
void intel_set_rps(struct drm_device *dev, u8 val)
@@ -4250,8 +4336,8 @@ static void gen6_init_rps_frequencies(struct drm_device *dev)
if (dev_priv->rps.min_freq_softlimit == 0) {
if (IS_HASWELL(dev) || IS_BROADWELL(dev))
dev_priv->rps.min_freq_softlimit =
- /* max(RPe, 450 MHz) */
- max(dev_priv->rps.efficient_freq, (u8) 9);
+ max_t(int, dev_priv->rps.efficient_freq,
+ intel_freq_opcode(dev_priv, 450));
else
dev_priv->rps.min_freq_softlimit =
dev_priv->rps.min_freq;
@@ -4325,8 +4411,13 @@ static void gen9_enable_rc6(struct drm_device *dev)
GEN6_RC_CTL_EI_MODE(1) |
rc6_mask);
- /* 3b: Enable Coarse Power Gating only when RC6 is enabled */
- I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? 3 : 0);
+ /*
+ * 3b: Enable Coarse Power Gating only when RC6 is enabled.
+ * WaDisableRenderPowerGating:skl,bxt - Render PG need to be disabled with RC6.
+ */
+ I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
+ GEN9_MEDIA_PG_ENABLE : 0);
+
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
@@ -4665,24 +4756,6 @@ static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
return rp1;
}
-static int cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
-{
- struct drm_device *dev = dev_priv->dev;
- u32 val, rpn;
-
- if (dev->pdev->revision >= 0x20) {
- val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
- rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
- FB_GFX_FREQ_FUSE_MASK);
- } else { /* For pre-production hardware */
- val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG);
- rpn = ((val >> PUNIT_GPU_STATIS_GFX_MIN_FREQ_SHIFT) &
- PUNIT_GPU_STATUS_GFX_MIN_FREQ_MASK);
- }
-
- return rpn;
-}
-
static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
{
u32 val, rp1;
@@ -4889,9 +4962,9 @@ static void cherryview_init_gt_powersave(struct drm_device *dev)
mutex_lock(&dev_priv->rps.hw_lock);
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
switch ((val >> 2) & 0x7) {
case 0:
@@ -4934,7 +5007,8 @@ static void cherryview_init_gt_powersave(struct drm_device *dev)
intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
dev_priv->rps.rp1_freq);
- dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv);
+ /* PUnit validated range is only [RPe, RP0] */
+ dev_priv->rps.min_freq = dev_priv->rps.efficient_freq;
DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
dev_priv->rps.min_freq);
@@ -4996,8 +5070,8 @@ static void cherryview_enable_rps(struct drm_device *dev)
I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
I915_WRITE(GEN6_RC_SLEEP, 0);
- /* TO threshold set to 1750 us ( 0x557 * 1.28 us) */
- I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
+ /* TO threshold set to 500 us ( 0x186 * 1.28 us) */
+ I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
/* allows RC6 residency counter to work */
I915_WRITE(VLV_COUNTER_CONTROL,
@@ -5032,6 +5106,12 @@ static void cherryview_enable_rps(struct drm_device *dev)
GEN6_RP_UP_BUSY_AVG |
GEN6_RP_DOWN_IDLE_AVG);
+ /* Setting Fixed Bias */
+ val = VLV_OVERRIDE_EN |
+ VLV_SOC_TDP_EN |
+ CHV_BIAS_CPU_50_SOC_50;
+ vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
+
val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
/* RPS code assumes GPLL is used */
@@ -5116,6 +5196,12 @@ static void valleyview_enable_rps(struct drm_device *dev)
I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
+ /* Setting Fixed Bias */
+ val = VLV_OVERRIDE_EN |
+ VLV_SOC_TDP_EN |
+ VLV_BIAS_CPU_125_SOC_875;
+ vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
+
val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
/* RPS code assumes GPLL is used */
@@ -5798,13 +5884,15 @@ static void ibx_init_clock_gating(struct drm_device *dev)
static void g4x_disable_trickle_feed(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
- int pipe;
+ enum pipe pipe;
for_each_pipe(dev_priv, pipe) {
I915_WRITE(DSPCNTR(pipe),
I915_READ(DSPCNTR(pipe)) |
DISPPLANE_TRICKLE_FEED_DISABLE);
- intel_flush_primary_plane(dev_priv, pipe);
+
+ I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
+ POSTING_READ(DSPSURF(pipe));
}
}
@@ -6094,10 +6182,9 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
enum pipe pipe;
+ uint32_t misccpctl;
- I915_WRITE(WM3_LP_ILK, 0);
- I915_WRITE(WM2_LP_ILK, 0);
- I915_WRITE(WM1_LP_ILK, 0);
+ ilk_init_lp_watermarks(dev);
/* WaSwitchSolVfFArbitrationPriority:bdw */
I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
@@ -6126,6 +6213,22 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+ /*
+ * WaProgramL3SqcReg1Default:bdw
+ * WaTempDisableDOPClkGating:bdw
+ */
+ misccpctl = I915_READ(GEN7_MISCCPCTL);
+ I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
+ I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
+ I915_WRITE(GEN7_MISCCPCTL, misccpctl);
+
+ /*
+ * WaGttCachingOffByDefault:bdw
+ * GTT cache may not work with big pages, so if those
+ * are ever enabled GTT cache may need to be disabled.
+ */
+ I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
+
lpt_init_clock_gating(dev);
}
@@ -6401,6 +6504,12 @@ static void cherryview_init_clock_gating(struct drm_device *dev)
/* WaDisableSDEUnitClockGating:chv */
I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+
+ /*
+ * GTT cache may not work with big pages, so if those
+ * are ever enabled GTT cache may need to be disabled.
+ */
+ I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
}
static void g4x_init_clock_gating(struct drm_device *dev)
@@ -6544,7 +6653,12 @@ void intel_init_pm(struct drm_device *dev)
if (INTEL_INFO(dev)->gen >= 9) {
skl_setup_wm_latency(dev);
- dev_priv->display.init_clock_gating = skl_init_clock_gating;
+ if (IS_BROXTON(dev))
+ dev_priv->display.init_clock_gating =
+ bxt_init_clock_gating;
+ else if (IS_SKYLAKE(dev))
+ dev_priv->display.init_clock_gating =
+ skl_init_clock_gating;
dev_priv->display.update_wm = skl_update_wm;
dev_priv->display.update_sprite_wm = skl_update_sprite_wm;
} else if (HAS_PCH_SPLIT(dev)) {
@@ -6762,14 +6876,58 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
return val / GT_FREQUENCY_MULTIPLIER;
}
+struct request_boost {
+ struct work_struct work;
+ struct drm_i915_gem_request *req;
+};
+
+static void __intel_rps_boost_work(struct work_struct *work)
+{
+ struct request_boost *boost = container_of(work, struct request_boost, work);
+ struct drm_i915_gem_request *req = boost->req;
+
+ if (!i915_gem_request_completed(req, true))
+ gen6_rps_boost(to_i915(req->ring->dev), NULL,
+ req->emitted_jiffies);
+
+ i915_gem_request_unreference__unlocked(req);
+ kfree(boost);
+}
+
+void intel_queue_rps_boost_for_request(struct drm_device *dev,
+ struct drm_i915_gem_request *req)
+{
+ struct request_boost *boost;
+
+ if (req == NULL || INTEL_INFO(dev)->gen < 6)
+ return;
+
+ if (i915_gem_request_completed(req, true))
+ return;
+
+ boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
+ if (boost == NULL)
+ return;
+
+ i915_gem_request_reference(req);
+ boost->req = req;
+
+ INIT_WORK(&boost->work, __intel_rps_boost_work);
+ queue_work(to_i915(dev)->wq, &boost->work);
+}
+
void intel_pm_setup(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
mutex_init(&dev_priv->rps.hw_lock);
+ spin_lock_init(&dev_priv->rps.client_lock);
INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
intel_gen6_powersave_work);
+ INIT_LIST_HEAD(&dev_priv->rps.clients);
+ INIT_LIST_HEAD(&dev_priv->rps.semaphores.link);
+ INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link);
dev_priv->pm.suspended = false;
}