diff options
Diffstat (limited to 'drivers/gpu/drm/i915')
179 files changed, 7773 insertions, 4532 deletions
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 9afa5c4a6bf0..1e1cb245fca7 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -25,6 +25,7 @@ config DRM_I915 select CRC32 select SND_HDA_I915 if SND_HDA_CORE select CEC_CORE if CEC_NOTIFIER + select VMAP_PFN help Choose this option if you have a system that has "Intel Graphics Media Accelerator" or "HD Graphics" integrated graphics, diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 1cb28c20807c..25cd9788a4d5 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -153,6 +153,7 @@ config DRM_I915_SELFTEST select DRM_EXPORT_FOR_TESTS if m select FAULT_INJECTION select PRIME_NUMBERS + select CRC32 help Choose this option to allow the driver to perform selftests upon loading; also requires the i915.selftest=1 module parameter. To diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index bda4c0e408f8..e5574e506a5c 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -234,6 +234,7 @@ i915-y += \ display/intel_ddi.o \ display/intel_dp.o \ display/intel_dp_aux_backlight.o \ + display/intel_dp_hdcp.o \ display/intel_dp_link_training.o \ display/intel_dp_mst.o \ display/intel_dsi.o \ diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 8c55f5bee9ab..520715b7d5b5 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -712,7 +712,7 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder, switch (intel_dsi->pixel_format) { default: MISSING_CASE(intel_dsi->pixel_format); - /* fallthrough */ + fallthrough; case MIPI_DSI_FMT_RGB565: tmp |= PIX_FMT_RGB565; break; @@ -739,7 +739,7 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder, switch (intel_dsi->video_mode_format) { default: MISSING_CASE(intel_dsi->video_mode_format); - /* fallthrough */ + fallthrough; case VIDEO_MODE_NON_BURST_WITH_SYNC_EVENTS: tmp |= VIDEO_MODE_SYNC_EVENT; break; @@ -792,7 +792,7 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder, switch (pipe) { default: MISSING_CASE(pipe); - /* fallthrough */ + fallthrough; case PIPE_A: tmp |= TRANS_DDI_EDP_INPUT_A_ON; break; @@ -1646,6 +1646,7 @@ static const struct drm_encoder_funcs gen11_dsi_encoder_funcs = { }; static const struct drm_connector_funcs gen11_dsi_connector_funcs = { + .detect = intel_panel_detect, .late_register = intel_connector_register, .early_unregister = intel_connector_unregister, .destroy = intel_connector_destroy, diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index 630f49b7aa01..86be032bcf96 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -527,8 +527,6 @@ void intel_atomic_state_clear(struct drm_atomic_state *s) intel_atomic_clear_global_state(state); state->dpll_set = state->modeset = false; - state->global_state_changed = false; - state->active_pipes = 0; } struct intel_crtc_state * @@ -542,40 +540,3 @@ intel_atomic_get_crtc_state(struct drm_atomic_state *state, return to_intel_crtc_state(crtc_state); } - -int _intel_atomic_lock_global_state(struct intel_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->base.dev); - struct intel_crtc *crtc; - - state->global_state_changed = true; - - for_each_intel_crtc(&dev_priv->drm, crtc) { - int ret; - - ret = drm_modeset_lock(&crtc->base.mutex, - state->base.acquire_ctx); - if (ret) - return ret; - } - - return 0; -} - -int _intel_atomic_serialize_global_state(struct intel_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->base.dev); - struct intel_crtc *crtc; - - state->global_state_changed = true; - - for_each_intel_crtc(&dev_priv->drm, crtc) { - struct intel_crtc_state *crtc_state; - - crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); - if (IS_ERR(crtc_state)) - return PTR_ERR(crtc_state); - } - - return 0; -} diff --git a/drivers/gpu/drm/i915/display/intel_atomic.h b/drivers/gpu/drm/i915/display/intel_atomic.h index 11146292b06f..285de07011dc 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.h +++ b/drivers/gpu/drm/i915/display/intel_atomic.h @@ -56,8 +56,4 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state); -int _intel_atomic_lock_global_state(struct intel_atomic_state *state); - -int _intel_atomic_serialize_global_state(struct intel_atomic_state *state); - #endif /* __INTEL_ATOMIC_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index ad4aa66fd676..f7de55707746 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -958,13 +958,8 @@ static int glk_force_audio_cdclk_commit(struct intel_atomic_state *state, if (IS_ERR(cdclk_state)) return PTR_ERR(cdclk_state); - cdclk_state->force_min_cdclk_changed = true; cdclk_state->force_min_cdclk = enable ? 2 * 96000 : 0; - ret = intel_atomic_lock_global_state(&cdclk_state->base); - if (ret) - return ret; - return drm_atomic_commit(&state->base); } diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index c53c85d38fa5..4716484af62d 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -905,7 +905,7 @@ parse_psr(struct drm_i915_private *dev_priv, const struct bdb_header *bdb) drm_dbg_kms(&dev_priv->drm, "VBT tp1 wakeup time value %d is outside range[0-3], defaulting to max value 2500us\n", psr_table->tp1_wakeup_time); - /* fallthrough */ + fallthrough; case 2: dev_priv->vbt.psr.tp1_wakeup_time_us = 2500; break; @@ -925,7 +925,7 @@ parse_psr(struct drm_i915_private *dev_priv, const struct bdb_header *bdb) drm_dbg_kms(&dev_priv->drm, "VBT tp2_tp3 wakeup time value %d is outside range[0-3], defaulting to max value 2500us\n", psr_table->tp2_tp3_wakeup_time); - /* fallthrough */ + fallthrough; case 2: dev_priv->vbt.psr.tp2_tp3_wakeup_time_us = 2500; break; @@ -1656,6 +1656,8 @@ static enum port dvo_port_to_port(struct drm_i915_private *dev_priv, [PORT_E] = { DVO_PORT_HDMIE, DVO_PORT_DPE, DVO_PORT_CRT }, [PORT_F] = { DVO_PORT_HDMIF, DVO_PORT_DPF, -1 }, [PORT_G] = { DVO_PORT_HDMIG, DVO_PORT_DPG, -1 }, + [PORT_H] = { DVO_PORT_HDMIH, DVO_PORT_DPH, -1 }, + [PORT_I] = { DVO_PORT_HDMII, DVO_PORT_DPI, -1 }, }; /* * Bspec lists the ports as A, B, C, D - however internally in our @@ -1775,7 +1777,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, switch (child->hdmi_max_data_rate) { default: MISSING_CASE(child->hdmi_max_data_rate); - /* fall through */ + fallthrough; case HDMI_MAX_DATA_RATE_PLATFORM: max_tmds_clock = 0; break; @@ -2133,7 +2135,7 @@ void intel_bios_init(struct drm_i915_private *dev_priv) INIT_LIST_HEAD(&dev_priv->vbt.display_devices); - if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv)) { + if (!HAS_DISPLAY(dev_priv)) { drm_dbg_kms(&dev_priv->drm, "Skipping VBT init due to disabled display.\n"); return; @@ -2650,6 +2652,12 @@ enum aux_ch intel_bios_port_aux_ch(struct drm_i915_private *dev_priv, case DP_AUX_G: aux_ch = AUX_CH_G; break; + case DP_AUX_H: + aux_ch = AUX_CH_H; + break; + case DP_AUX_I: + aux_ch = AUX_CH_I; + break; default: MISSING_CASE(info->alternate_aux_channel); aux_ch = AUX_CH_A; diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index bb91dace304a..cb93f6cf6d37 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -326,7 +326,7 @@ static void pnv_get_cdclk(struct drm_i915_private *dev_priv, default: drm_err(&dev_priv->drm, "Unknown pnv display core clock 0x%04x\n", gcfgc); - /* fall through */ + fallthrough; case GC_DISPLAY_CLOCK_133_MHZ_PNV: cdclk_config->cdclk = 133333; break; @@ -766,7 +766,7 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, switch (cdclk) { default: MISSING_CASE(cdclk); - /* fall through */ + fallthrough; case 337500: val |= LCPLL_CLK_FREQ_337_5_BDW; break; @@ -1042,7 +1042,7 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, drm_WARN_ON(&dev_priv->drm, cdclk != dev_priv->cdclk.hw.bypass); drm_WARN_ON(&dev_priv->drm, vco != 0); - /* fall through */ + fallthrough; case 308571: case 337500: freq_select = CDCLK_FREQ_337_308; @@ -1333,7 +1333,7 @@ static void icl_readout_refclk(struct drm_i915_private *dev_priv, switch (dssm) { default: MISSING_CASE(dssm); - /* fall through */ + fallthrough; case ICL_DSSM_CDCLK_PLL_REFCLK_24MHz: cdclk_config->ref = 24000; break; @@ -1561,7 +1561,7 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, drm_WARN_ON(&dev_priv->drm, cdclk != dev_priv->cdclk.hw.bypass); drm_WARN_ON(&dev_priv->drm, vco != 0); - /* fall through */ + fallthrough; case 2: divider = BXT_CDCLK_CD2X_DIV_SEL_1; break; @@ -2426,7 +2426,6 @@ static struct intel_global_state *intel_cdclk_duplicate_state(struct intel_globa if (!cdclk_state) return NULL; - cdclk_state->force_min_cdclk_changed = false; cdclk_state->pipe = INVALID_PIPE; return &cdclk_state->base; @@ -2501,6 +2500,7 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state *state) if (ret) return ret; } else if (old_cdclk_state->active_pipes != new_cdclk_state->active_pipes || + old_cdclk_state->force_min_cdclk != new_cdclk_state->force_min_cdclk || intel_cdclk_changed(&old_cdclk_state->logical, &new_cdclk_state->logical)) { ret = intel_atomic_lock_global_state(&new_cdclk_state->base); @@ -2677,7 +2677,7 @@ void intel_update_cdclk(struct drm_i915_private *dev_priv) */ if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) intel_de_write(dev_priv, GMBUSFREQ_VLV, - DIV_ROUND_UP(dev_priv->cdclk.hw.cdclk, 1000)); + DIV_ROUND_UP(dev_priv->cdclk.hw.cdclk, 1000)); } static int cnp_rawclk(struct drm_i915_private *dev_priv) @@ -2903,9 +2903,10 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) dev_priv->display.get_cdclk = i85x_get_cdclk; else if (IS_I845G(dev_priv)) dev_priv->display.get_cdclk = fixed_200mhz_get_cdclk; - else { /* 830 */ - drm_WARN(&dev_priv->drm, !IS_I830(dev_priv), - "Unknown platform. Assuming 133 MHz CDCLK\n"); + else if (IS_I830(dev_priv)) + dev_priv->display.get_cdclk = fixed_133mhz_get_cdclk; + + if (drm_WARN(&dev_priv->drm, !dev_priv->display.get_cdclk, + "Unknown platform. Assuming 133 MHz CDCLK\n")) dev_priv->display.get_cdclk = fixed_133mhz_get_cdclk; - } } diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h index 5731806e4cee..b34eb00fb327 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.h +++ b/drivers/gpu/drm/i915/display/intel_cdclk.h @@ -17,8 +17,8 @@ struct intel_atomic_state; struct intel_crtc_state; struct intel_cdclk_vals { - u16 refclk; u32 cdclk; + u16 refclk; u8 divider; /* CD2X divider * 2 */ u8 ratio; }; @@ -49,7 +49,6 @@ struct intel_cdclk_state { /* forced minimum cdclk for glk+ audio w/a */ int force_min_cdclk; - bool force_min_cdclk_changed; /* bitmask of active pipes */ u8 active_pipes; diff --git a/drivers/gpu/drm/i915/display/intel_combo_phy.c b/drivers/gpu/drm/i915/display/intel_combo_phy.c index eccaa79cb4a9..157d8c8c605a 100644 --- a/drivers/gpu/drm/i915/display/intel_combo_phy.c +++ b/drivers/gpu/drm/i915/display/intel_combo_phy.c @@ -52,7 +52,7 @@ cnl_get_procmon_ref_values(struct drm_i915_private *dev_priv, enum phy phy) switch (val & (PROCESS_INFO_MASK | VOLTAGE_INFO_MASK)) { default: MISSING_CASE(val); - /* fall through */ + fallthrough; case VOLTAGE_INFO_0_85V | PROCESS_INFO_DOT_0: procmon = &cnl_procmon_values[PROCMON_0_85V_DOT_0]; break; @@ -258,7 +258,7 @@ static bool phy_is_master(struct drm_i915_private *dev_priv, enum phy phy) static bool icl_combo_phy_verify_state(struct drm_i915_private *dev_priv, enum phy phy) { - bool ret; + bool ret = true; u32 expected_val = 0; if (!icl_combo_phy_enabled(dev_priv, phy)) @@ -276,7 +276,7 @@ static bool icl_combo_phy_verify_state(struct drm_i915_private *dev_priv, DCC_MODE_SELECT_CONTINUOSLY); } - ret = cnl_verify_procmon_ref_values(dev_priv, phy); + ret &= cnl_verify_procmon_ref_values(dev_priv, phy); if (phy_is_master(dev_priv, phy)) { ret &= check_phy_reg(dev_priv, phy, ICL_PORT_COMP_DW8(phy), @@ -320,7 +320,7 @@ void intel_combo_phy_power_up_lanes(struct drm_i915_private *dev_priv, break; default: MISSING_CASE(lane_count); - /* fall-through */ + fallthrough; case 4: lane_mask = PWR_UP_ALL_LANES; break; @@ -337,7 +337,7 @@ void intel_combo_phy_power_up_lanes(struct drm_i915_private *dev_priv, break; default: MISSING_CASE(lane_count); - /* fall-through */ + fallthrough; case 4: lane_mask = PWR_UP_ALL_LANES; break; diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index 5b4510ce5693..4934edd51cb0 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -833,6 +833,9 @@ intel_crt_detect(struct drm_connector *connector, connector->base.id, connector->name, force); + if (!INTEL_DISPLAY_ENABLED(dev_priv)) + return connector_status_disconnected; + if (dev_priv->params.load_detect_test) { wakeref = intel_display_power_get(dev_priv, intel_encoder->power_domain); diff --git a/drivers/gpu/drm/i915/display/intel_csr.c b/drivers/gpu/drm/i915/display/intel_csr.c index f22a7645c249..d5db16764619 100644 --- a/drivers/gpu/drm/i915/display/intel_csr.c +++ b/drivers/gpu/drm/i915/display/intel_csr.c @@ -40,12 +40,12 @@ #define GEN12_CSR_MAX_FW_SIZE ICL_CSR_MAX_FW_SIZE -#define RKL_CSR_PATH "i915/rkl_dmc_ver2_01.bin" -#define RKL_CSR_VERSION_REQUIRED CSR_VERSION(2, 1) +#define RKL_CSR_PATH "i915/rkl_dmc_ver2_02.bin" +#define RKL_CSR_VERSION_REQUIRED CSR_VERSION(2, 2) MODULE_FIRMWARE(RKL_CSR_PATH); -#define TGL_CSR_PATH "i915/tgl_dmc_ver2_06.bin" -#define TGL_CSR_VERSION_REQUIRED CSR_VERSION(2, 6) +#define TGL_CSR_PATH "i915/tgl_dmc_ver2_08.bin" +#define TGL_CSR_VERSION_REQUIRED CSR_VERSION(2, 8) #define TGL_CSR_MAX_FW_SIZE 0x6000 MODULE_FIRMWARE(TGL_CSR_PATH); diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 2c484b55bcdf..cdcb7b1034ae 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -572,13 +572,13 @@ static const struct cnl_ddi_buf_trans ehl_combo_phy_ddi_translations_dp[] = { /* NT mV Trans mV db */ { 0xA, 0x33, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ { 0xA, 0x47, 0x36, 0x00, 0x09 }, /* 350 500 3.1 */ - { 0xC, 0x64, 0x30, 0x00, 0x0F }, /* 350 700 6.0 */ - { 0x6, 0x7F, 0x2C, 0x00, 0x13 }, /* 350 900 8.2 */ + { 0xC, 0x64, 0x34, 0x00, 0x0B }, /* 350 700 6.0 */ + { 0x6, 0x7F, 0x30, 0x00, 0x0F }, /* 350 900 8.2 */ { 0xA, 0x46, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ - { 0xC, 0x64, 0x36, 0x00, 0x09 }, /* 500 700 2.9 */ - { 0x6, 0x7F, 0x30, 0x00, 0x0F }, /* 500 900 5.1 */ + { 0xC, 0x64, 0x38, 0x00, 0x07 }, /* 500 700 2.9 */ + { 0x6, 0x7F, 0x32, 0x00, 0x0D }, /* 500 900 5.1 */ { 0xC, 0x61, 0x3F, 0x00, 0x00 }, /* 650 700 0.6 */ - { 0x6, 0x7F, 0x37, 0x00, 0x08 }, /* 600 900 3.5 */ + { 0x6, 0x7F, 0x38, 0x00, 0x07 }, /* 600 900 3.5 */ { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ }; @@ -706,6 +706,42 @@ static const struct cnl_ddi_buf_trans tgl_combo_phy_ddi_translations_dp_hbr2[] = { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ }; +static const struct cnl_ddi_buf_trans tgl_uy_combo_phy_ddi_translations_dp_hbr2[] = { + /* NT mV Trans mV db */ + { 0xA, 0x35, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ + { 0xA, 0x4F, 0x36, 0x00, 0x09 }, /* 350 500 3.1 */ + { 0xC, 0x60, 0x32, 0x00, 0x0D }, /* 350 700 6.0 */ + { 0xC, 0x7F, 0x2D, 0x00, 0x12 }, /* 350 900 8.2 */ + { 0xC, 0x47, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ + { 0xC, 0x6F, 0x36, 0x00, 0x09 }, /* 500 700 2.9 */ + { 0x6, 0x7D, 0x32, 0x00, 0x0D }, /* 500 900 5.1 */ + { 0x6, 0x60, 0x3C, 0x00, 0x03 }, /* 650 700 0.6 */ + { 0x6, 0x7F, 0x34, 0x00, 0x0B }, /* 600 900 3.5 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ +}; + +/* + * Cloned the HOBL entry to comply with the voltage and pre-emphasis entries + * that DisplayPort specification requires + */ +static const struct cnl_ddi_buf_trans tgl_combo_phy_ddi_translations_edp_hbr2_hobl[] = { + /* VS pre-emp */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 0 0 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 0 1 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 0 2 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 0 3 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 1 0 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 1 1 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 1 2 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 2 0 */ + { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 2 1 */ +}; + +static bool is_hobl_buf_trans(const struct cnl_ddi_buf_trans *table) +{ + return table == tgl_combo_phy_ddi_translations_edp_hbr2_hobl; +} + static const struct ddi_buf_trans * bdw_get_buf_trans_edp(struct intel_encoder *encoder, int *n_entries) { @@ -1038,27 +1074,74 @@ static const struct cnl_ddi_buf_trans * ehl_get_combo_buf_trans(struct intel_encoder *encoder, int type, int rate, int *n_entries) { - if (type != INTEL_OUTPUT_HDMI && type != INTEL_OUTPUT_EDP) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + switch (type) { + case INTEL_OUTPUT_HDMI: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_hdmi); + return icl_combo_phy_ddi_translations_hdmi; + case INTEL_OUTPUT_EDP: + if (dev_priv->vbt.edp.low_vswing) { + if (rate > 540000) { + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_hbr3); + return icl_combo_phy_ddi_translations_edp_hbr3; + } else { + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_hbr2); + return icl_combo_phy_ddi_translations_edp_hbr2; + } + } + /* fall through */ + default: + /* All combo DP and eDP ports that do not support low_vswing */ *n_entries = ARRAY_SIZE(ehl_combo_phy_ddi_translations_dp); return ehl_combo_phy_ddi_translations_dp; } - - return icl_get_combo_buf_trans(encoder, type, rate, n_entries); } static const struct cnl_ddi_buf_trans * tgl_get_combo_buf_trans(struct intel_encoder *encoder, int type, int rate, int *n_entries) { - if (type == INTEL_OUTPUT_HDMI || type == INTEL_OUTPUT_EDP) { - return icl_get_combo_buf_trans(encoder, type, rate, n_entries); - } else if (rate > 270000) { - *n_entries = ARRAY_SIZE(tgl_combo_phy_ddi_translations_dp_hbr2); - return tgl_combo_phy_ddi_translations_dp_hbr2; - } + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + switch (type) { + case INTEL_OUTPUT_HDMI: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_hdmi); + return icl_combo_phy_ddi_translations_hdmi; + case INTEL_OUTPUT_EDP: + if (dev_priv->vbt.edp.hobl) { + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + + if (!intel_dp->hobl_failed && rate <= 540000) { + /* Same table applies to TGL, RKL and DG1 */ + *n_entries = ARRAY_SIZE(tgl_combo_phy_ddi_translations_edp_hbr2_hobl); + return tgl_combo_phy_ddi_translations_edp_hbr2_hobl; + } + } + + if (rate > 540000) { + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_hbr3); + return icl_combo_phy_ddi_translations_edp_hbr3; + } else if (dev_priv->vbt.edp.low_vswing) { + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_hbr2); + return icl_combo_phy_ddi_translations_edp_hbr2; + } + /* fall through */ + default: + /* All combo DP and eDP ports that do not support low_vswing */ + if (rate > 270000) { + if (IS_TGL_U(dev_priv) || IS_TGL_Y(dev_priv)) { + *n_entries = ARRAY_SIZE(tgl_uy_combo_phy_ddi_translations_dp_hbr2); + return tgl_uy_combo_phy_ddi_translations_dp_hbr2; + } + + *n_entries = ARRAY_SIZE(tgl_combo_phy_ddi_translations_dp_hbr2); + return tgl_combo_phy_ddi_translations_dp_hbr2; + } - *n_entries = ARRAY_SIZE(tgl_combo_phy_ddi_translations_dp_hbr); - return tgl_combo_phy_ddi_translations_dp_hbr; + *n_entries = ARRAY_SIZE(tgl_combo_phy_ddi_translations_dp_hbr); + return tgl_combo_phy_ddi_translations_dp_hbr; + } } static const struct tgl_dkl_phy_ddi_buf_trans * @@ -1738,6 +1821,8 @@ void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state ctl = intel_de_read(dev_priv, TRANS_DDI_FUNC_CTL(cpu_transcoder)); + drm_WARN_ON(crtc->base.dev, ctl & TRANS_DDI_HDCP_SIGNALLING); + ctl &= ~TRANS_DDI_FUNC_ENABLE; if (IS_GEN_RANGE(dev_priv, 8, 10)) @@ -1765,12 +1850,12 @@ void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state } int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder, + enum transcoder cpu_transcoder, bool enable) { struct drm_device *dev = intel_encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); intel_wakeref_t wakeref; - enum pipe pipe = 0; int ret = 0; u32 tmp; @@ -1779,19 +1864,12 @@ int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder, if (drm_WARN_ON(dev, !wakeref)) return -ENXIO; - if (drm_WARN_ON(dev, - !intel_encoder->get_hw_state(intel_encoder, &pipe))) { - ret = -EIO; - goto out; - } - - tmp = intel_de_read(dev_priv, TRANS_DDI_FUNC_CTL(pipe)); + tmp = intel_de_read(dev_priv, TRANS_DDI_FUNC_CTL(cpu_transcoder)); if (enable) tmp |= TRANS_DDI_HDCP_SIGNALLING; else tmp &= ~TRANS_DDI_HDCP_SIGNALLING; - intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(pipe), tmp); -out: + intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(cpu_transcoder), tmp); intel_display_power_put(dev_priv, intel_encoder->power_domain, wakeref); return ret; } @@ -1888,7 +1966,7 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder, switch (tmp & TRANS_DDI_EDP_INPUT_MASK) { default: MISSING_CASE(tmp & TRANS_DDI_EDP_INPUT_MASK); - /* fallthrough */ + fallthrough; case TRANS_DDI_EDP_INPUT_A_ON: case TRANS_DDI_EDP_INPUT_A_ONOFF: *pipe_mask = BIT(PIPE_A); @@ -2392,6 +2470,15 @@ static void icl_ddi_combo_vswing_program(struct intel_encoder *encoder, level = n_entries - 1; } + if (type == INTEL_OUTPUT_EDP) { + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + + val = EDP4K2K_MODE_OVRD_EN | EDP4K2K_MODE_OVRD_OPTIMIZED; + intel_dp->hobl_active = is_hobl_buf_trans(ddi_translations); + intel_de_rmw(dev_priv, ICL_PORT_CL_DW10(phy), val, + intel_dp->hobl_active ? val : 0); + } + /* Set PORT_TX_DW5 */ val = intel_de_read(dev_priv, ICL_PORT_TX_DW5_LN0(phy)); val &= ~(SCALING_MODE_SEL_MASK | RTERM_SELECT_MASK | @@ -2655,7 +2742,7 @@ tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, int link_clock, u32 n_entries, val, ln, dpcnt_mask, dpcnt_val; int rate = 0; - if (type == INTEL_OUTPUT_HDMI) { + if (type != INTEL_OUTPUT_HDMI) { struct intel_dp *intel_dp = enc_to_intel_dp(encoder); rate = intel_dp->link_rate; @@ -2802,7 +2889,9 @@ hsw_set_signal_levels(struct intel_dp *intel_dp) static u32 icl_dpclka_cfgcr0_clk_off(struct drm_i915_private *dev_priv, enum phy phy) { - if (intel_phy_is_combo(dev_priv, phy)) { + if (IS_ROCKETLAKE(dev_priv)) { + return RKL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy); + } else if (intel_phy_is_combo(dev_priv, phy)) { return ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy); } else if (intel_phy_is_tc(dev_priv, phy)) { enum tc_port tc_port = intel_port_to_tc(dev_priv, @@ -2829,6 +2918,16 @@ static void icl_map_plls_to_ports(struct intel_encoder *encoder, (val & icl_dpclka_cfgcr0_clk_off(dev_priv, phy)) == 0); if (intel_phy_is_combo(dev_priv, phy)) { + u32 mask, sel; + + if (IS_ROCKETLAKE(dev_priv)) { + mask = RKL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy); + sel = RKL_DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, phy); + } else { + mask = ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy); + sel = ICL_DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, phy); + } + /* * Even though this register references DDIs, note that we * want to pass the PHY rather than the port (DDI). For @@ -2839,8 +2938,8 @@ static void icl_map_plls_to_ports(struct intel_encoder *encoder, * Clock Select chooses the PLL for both DDIA and DDID and * drives port A in all cases." */ - val &= ~ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy); - val |= ICL_DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, phy); + val &= ~mask; + val |= sel; intel_de_write(dev_priv, ICL_DPCLKA_CFGCR0, val); intel_de_posting_read(dev_priv, ICL_DPCLKA_CFGCR0); } @@ -3371,6 +3470,7 @@ static void hsw_ddi_pre_enable_dp(struct intel_atomic_state *state, intel_ddi_init_dp_buf_reg(encoder); if (!is_mst) intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); + intel_dp_configure_protocol_converter(intel_dp); intel_dp_sink_set_decompression_state(intel_dp, crtc_state, true); intel_dp_sink_set_fec_ready(intel_dp, crtc_state); @@ -3482,19 +3582,17 @@ static void intel_ddi_pre_enable(struct intel_atomic_state *state, intel_ddi_pre_enable_hdmi(state, encoder, crtc_state, conn_state); } else { - struct intel_lspcon *lspcon = - enc_to_intel_lspcon(encoder); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); intel_ddi_pre_enable_dp(state, encoder, crtc_state, conn_state); - if (lspcon->active) { - struct intel_digital_port *dig_port = - enc_to_dig_port(encoder); + /* FIXME precompute everything properly */ + /* FIXME how do we turn infoframes off again? */ + if (dig_port->lspcon.active && dig_port->dp.has_hdmi_sink) dig_port->set_infoframes(encoder, crtc_state->has_infoframe, crtc_state, conn_state); - } } } @@ -3938,18 +4036,19 @@ static void intel_ddi_update_pipe_dp(struct intel_atomic_state *state, intel_psr_update(intel_dp, crtc_state, conn_state); intel_dp_set_infoframes(encoder, true, crtc_state, conn_state); - intel_edp_drrs_enable(intel_dp, crtc_state); + intel_edp_drrs_update(intel_dp, crtc_state); intel_panel_update_backlight(state, encoder, crtc_state, conn_state); } -static void intel_ddi_update_pipe(struct intel_atomic_state *state, - struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - const struct drm_connector_state *conn_state) +void intel_ddi_update_pipe(struct intel_atomic_state *state, + struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { - if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI) && + !intel_encoder_is_mst(encoder)) intel_ddi_update_pipe_dp(state, encoder, crtc_state, conn_state); @@ -4037,8 +4136,7 @@ static void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp) intel_wait_ddi_buf_idle(dev_priv, port); } - dp_tp_ctl = DP_TP_CTL_ENABLE | - DP_TP_CTL_LINK_TRAIN_PAT1 | DP_TP_CTL_SCRAMBLE_DISABLE; + dp_tp_ctl = DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_PAT1; if (intel_dp->link_mst) dp_tp_ctl |= DP_TP_CTL_MODE_MST; else { @@ -4061,16 +4159,10 @@ static void intel_ddi_set_link_train(struct intel_dp *intel_dp, { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); u8 train_pat_mask = drm_dp_training_pattern_mask(intel_dp->dpcd); - enum port port = dp_to_dig_port(intel_dp)->base.port; u32 temp; temp = intel_de_read(dev_priv, intel_dp->regs.dp_tp_ctl); - if (dp_train_pat & DP_LINK_SCRAMBLING_DISABLE) - temp |= DP_TP_CTL_SCRAMBLE_DISABLE; - else - temp &= ~DP_TP_CTL_SCRAMBLE_DISABLE; - temp &= ~DP_TP_CTL_LINK_TRAIN_MASK; switch (dp_train_pat & train_pat_mask) { case DP_TRAINING_PATTERN_DISABLE: @@ -4091,9 +4183,6 @@ static void intel_ddi_set_link_train(struct intel_dp *intel_dp, } intel_de_write(dev_priv, intel_dp->regs.dp_tp_ctl, temp); - - intel_de_write(dev_priv, DDI_BUF_CTL(port), intel_dp->DP); - intel_de_posting_read(dev_priv, DDI_BUF_CTL(port)); } static void intel_ddi_set_idle_link_train(struct intel_dp *intel_dp) @@ -4268,7 +4357,7 @@ void intel_ddi_get_config(struct intel_encoder *encoder, pipe_config->hdmi_scrambling = true; if (temp & TRANS_DDI_HIGH_TMDS_CHAR_RATE) pipe_config->hdmi_high_tmds_clock_ratio = true; - /* fall through */ + fallthrough; case TRANS_DDI_MODE_SELECT_DVI: pipe_config->output_types |= BIT(INTEL_OUTPUT_HDMI); pipe_config->lane_count = 4; @@ -4878,6 +4967,64 @@ intel_ddi_max_lanes(struct intel_digital_port *dig_port) return max_lanes; } +static bool hti_uses_phy(struct drm_i915_private *i915, enum phy phy) +{ + return i915->hti_state & HDPORT_ENABLED && + (i915->hti_state & HDPORT_PHY_USED_DP(phy) || + i915->hti_state & HDPORT_PHY_USED_HDMI(phy)); +} + +static enum hpd_pin tgl_hpd_pin(struct drm_i915_private *dev_priv, + enum port port) +{ + if (port >= PORT_D) + return HPD_PORT_TC1 + port - PORT_D; + else + return HPD_PORT_A + port - PORT_A; +} + +static enum hpd_pin rkl_hpd_pin(struct drm_i915_private *dev_priv, + enum port port) +{ + if (HAS_PCH_TGP(dev_priv)) + return tgl_hpd_pin(dev_priv, port); + + if (port >= PORT_D) + return HPD_PORT_C + port - PORT_D; + else + return HPD_PORT_A + port - PORT_A; +} + +static enum hpd_pin icl_hpd_pin(struct drm_i915_private *dev_priv, + enum port port) +{ + if (port >= PORT_C) + return HPD_PORT_TC1 + port - PORT_C; + else + return HPD_PORT_A + port - PORT_A; +} + +static enum hpd_pin ehl_hpd_pin(struct drm_i915_private *dev_priv, + enum port port) +{ + if (port == PORT_D) + return HPD_PORT_A; + + if (HAS_PCH_MCC(dev_priv)) + return icl_hpd_pin(dev_priv, port); + + return HPD_PORT_A + port - PORT_A; +} + +static enum hpd_pin cnl_hpd_pin(struct drm_i915_private *dev_priv, + enum port port) +{ + if (port == PORT_F) + return HPD_PORT_E; + + return HPD_PORT_A + port - PORT_A; +} + void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) { struct intel_digital_port *dig_port; @@ -4885,6 +5032,18 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) bool init_hdmi, init_dp, init_lspcon = false; enum phy phy = intel_port_to_phy(dev_priv, port); + /* + * On platforms with HTI (aka HDPORT), if it's enabled at boot it may + * have taken over some of the PHYs and made them unavailable to the + * driver. In that case we should skip initializing the corresponding + * outputs. + */ + if (hti_uses_phy(dev_priv, phy)) { + drm_dbg_kms(&dev_priv->drm, "PORT %c / PHY %c reserved by HTI\n", + port_name(port), phy_name(phy)); + return; + } + init_hdmi = intel_bios_port_supports_dvi(dev_priv, port) || intel_bios_port_supports_hdmi(dev_priv, port); init_dp = intel_bios_port_supports_dp(dev_priv, port); @@ -4918,6 +5077,9 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) drm_encoder_init(&dev_priv->drm, &encoder->base, &intel_ddi_funcs, DRM_MODE_ENCODER_TMDS, "DDI %c", port_name(port)); + mutex_init(&dig_port->hdcp_mutex); + dig_port->num_hdcp_streams = 0; + encoder->hotplug = intel_ddi_hotplug; encoder->compute_output_type = intel_ddi_compute_output_type; encoder->compute_config = intel_ddi_compute_config; @@ -4939,6 +5101,19 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) encoder->cloneable = 0; encoder->pipe_mask = ~0; + if (IS_ROCKETLAKE(dev_priv)) + encoder->hpd_pin = rkl_hpd_pin(dev_priv, port); + else if (INTEL_GEN(dev_priv) >= 12) + encoder->hpd_pin = tgl_hpd_pin(dev_priv, port); + else if (IS_ELKHARTLAKE(dev_priv)) + encoder->hpd_pin = ehl_hpd_pin(dev_priv, port); + else if (IS_GEN(dev_priv, 11)) + encoder->hpd_pin = icl_hpd_pin(dev_priv, port); + else if (IS_GEN(dev_priv, 10)) + encoder->hpd_pin = cnl_hpd_pin(dev_priv, port); + else + encoder->hpd_pin = intel_hpd_pin_default(dev_priv, port); + if (INTEL_GEN(dev_priv) >= 11) dig_port->saved_port_bits = intel_de_read(dev_priv, DDI_BUF_CTL(port)) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.h b/drivers/gpu/drm/i915/display/intel_ddi.h index 077e9dbbe367..f5fb62fc9400 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.h +++ b/drivers/gpu/drm/i915/display/intel_ddi.h @@ -16,6 +16,7 @@ struct intel_crtc_state; struct intel_dp; struct intel_dpll_hw_state; struct intel_encoder; +enum transcoder; void intel_ddi_fdi_post_disable(struct intel_atomic_state *state, struct intel_encoder *intel_encoder, @@ -43,6 +44,7 @@ void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv, u32 bxt_signal_levels(struct intel_dp *intel_dp); u32 ddi_signal_levels(struct intel_dp *intel_dp); int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder, + enum transcoder cpu_transcoder, bool enable); void icl_sanitize_encoder_pll_mapping(struct intel_encoder *encoder); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 729ec6e0d43a..99e682563d47 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -47,6 +47,7 @@ #include "display/intel_ddi.h" #include "display/intel_dp.h" #include "display/intel_dp_mst.h" +#include "display/intel_dpll_mgr.h" #include "display/intel_dsi.h" #include "display/intel_dvo.h" #include "display/intel_gmbus.h" @@ -66,6 +67,7 @@ #include "intel_bw.h" #include "intel_cdclk.h" #include "intel_color.h" +#include "intel_csr.h" #include "intel_display_types.h" #include "intel_dp_link_training.h" #include "intel_fbc.h" @@ -2029,12 +2031,12 @@ intel_tile_width_bytes(const struct drm_framebuffer *fb, int color_plane) case I915_FORMAT_MOD_Y_TILED_CCS: if (is_ccs_plane(fb, color_plane)) return 128; - /* fall through */ + fallthrough; case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS: case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS: if (is_ccs_plane(fb, color_plane)) return 64; - /* fall through */ + fallthrough; case I915_FORMAT_MOD_Y_TILED: if (IS_GEN(dev_priv, 2) || HAS_128_BYTE_Y_TILING(dev_priv)) return 128; @@ -2043,7 +2045,7 @@ intel_tile_width_bytes(const struct drm_framebuffer *fb, int color_plane) case I915_FORMAT_MOD_Yf_TILED_CCS: if (is_ccs_plane(fb, color_plane)) return 128; - /* fall through */ + fallthrough; case I915_FORMAT_MOD_Yf_TILED: switch (cpp) { case 1: @@ -2185,7 +2187,7 @@ static unsigned int intel_surf_alignment(const struct drm_framebuffer *fb, case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS: if (is_semiplanar_uv_plane(fb, color_plane)) return intel_tile_row_size(fb, color_plane); - /* Fall-through */ + fallthrough; case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS: return 16 * 1024; case I915_FORMAT_MOD_Y_TILED_CCS: @@ -2194,7 +2196,7 @@ static unsigned int intel_surf_alignment(const struct drm_framebuffer *fb, if (INTEL_GEN(dev_priv) >= 12 && is_semiplanar_uv_plane(fb, color_plane)) return intel_tile_row_size(fb, color_plane); - /* Fall-through */ + fallthrough; case I915_FORMAT_MOD_Yf_TILED: return 1 * 1024 * 1024; default: @@ -2310,7 +2312,7 @@ err: void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags) { - i915_gem_object_lock(vma->obj); + i915_gem_object_lock(vma->obj, NULL); if (flags & PLANE_HAS_FENCE) i915_vma_unpin_fence(vma); i915_gem_object_unpin_from_display_plane(vma); @@ -3432,6 +3434,14 @@ initial_plane_vma(struct drm_i915_private *i915, if (IS_ERR(obj)) return NULL; + /* + * Mark it WT ahead of time to avoid changing the + * cache_level during fbdev initialization. The + * unbind there would get stuck waiting for rcu. + */ + i915_gem_object_set_cache_coherency(obj, HAS_WT(i915) ? + I915_CACHE_WT : I915_CACHE_NONE); + switch (plane_config->tiling) { case I915_TILING_NONE: break; @@ -3450,7 +3460,7 @@ initial_plane_vma(struct drm_i915_private *i915, if (IS_ERR(vma)) goto err_obj; - if (i915_ggtt_pin(vma, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base)) + if (i915_ggtt_pin(vma, NULL, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base)) goto err_obj; if (i915_gem_object_is_tiled(obj) && @@ -3761,6 +3771,44 @@ static int glk_max_plane_width(const struct drm_framebuffer *fb, } } +static int icl_min_plane_width(const struct drm_framebuffer *fb) +{ + /* Wa_14011264657, Wa_14011050563: gen11+ */ + switch (fb->format->format) { + case DRM_FORMAT_C8: + return 18; + case DRM_FORMAT_RGB565: + return 10; + case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_ARGB8888: + case DRM_FORMAT_ABGR8888: + case DRM_FORMAT_XRGB2101010: + case DRM_FORMAT_XBGR2101010: + case DRM_FORMAT_ARGB2101010: + case DRM_FORMAT_ABGR2101010: + case DRM_FORMAT_XVYU2101010: + case DRM_FORMAT_Y212: + case DRM_FORMAT_Y216: + return 6; + case DRM_FORMAT_NV12: + return 20; + case DRM_FORMAT_P010: + case DRM_FORMAT_P012: + case DRM_FORMAT_P016: + return 12; + case DRM_FORMAT_XRGB16161616F: + case DRM_FORMAT_XBGR16161616F: + case DRM_FORMAT_ARGB16161616F: + case DRM_FORMAT_ABGR16161616F: + case DRM_FORMAT_XVYU12_16161616: + case DRM_FORMAT_XVYU16161616: + return 4; + default: + return 1; + } +} + static int icl_max_plane_width(const struct drm_framebuffer *fb, int color_plane, unsigned int rotation) @@ -3843,29 +3891,31 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) int y = plane_state->uapi.src.y1 >> 16; int w = drm_rect_width(&plane_state->uapi.src) >> 16; int h = drm_rect_height(&plane_state->uapi.src) >> 16; - int max_width; - int max_height; - u32 alignment; - u32 offset; + int max_width, min_width, max_height; + u32 alignment, offset; int aux_plane = intel_main_to_aux_plane(fb, 0); u32 aux_offset = plane_state->color_plane[aux_plane].offset; - if (INTEL_GEN(dev_priv) >= 11) + if (INTEL_GEN(dev_priv) >= 11) { max_width = icl_max_plane_width(fb, 0, rotation); - else if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + min_width = icl_min_plane_width(fb); + } else if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { max_width = glk_max_plane_width(fb, 0, rotation); - else + min_width = 1; + } else { max_width = skl_max_plane_width(fb, 0, rotation); + min_width = 1; + } if (INTEL_GEN(dev_priv) >= 11) max_height = icl_max_plane_height(); else max_height = skl_max_plane_height(); - if (w > max_width || h > max_height) { + if (w > max_width || w < min_width || h > max_height) { drm_dbg_kms(&dev_priv->drm, - "requested Y/RGB source size %dx%d too big (limit %dx%d)\n", - w, h, max_width, max_height); + "requested Y/RGB source size %dx%d outside limits (min: %dx1 max: %dx%d)\n", + w, h, min_width, max_width, max_height); return -EINVAL; } @@ -4051,8 +4101,7 @@ static int skl_check_ccs_aux_surface(struct intel_plane_state *plane_state) int skl_check_plane_surface(struct intel_plane_state *plane_state) { const struct drm_framebuffer *fb = plane_state->hw.fb; - int ret; - bool needs_aux = false; + int ret, i; ret = intel_plane_compute_gtt(plane_state); if (ret) @@ -4066,7 +4115,6 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state) * it. */ if (is_ccs_modifier(fb->modifier)) { - needs_aux = true; ret = skl_check_ccs_aux_surface(plane_state); if (ret) return ret; @@ -4074,20 +4122,15 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state) if (intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier)) { - needs_aux = true; ret = skl_check_nv12_aux_surface(plane_state); if (ret) return ret; } - if (!needs_aux) { - int i; - - for (i = 1; i < fb->format->num_planes; i++) { - plane_state->color_plane[i].offset = ~0xfff; - plane_state->color_plane[i].x = 0; - plane_state->color_plane[i].y = 0; - } + for (i = fb->format->num_planes; i < ARRAY_SIZE(plane_state->color_plane); i++) { + plane_state->color_plane[i].offset = ~0xfff; + plane_state->color_plane[i].x = 0; + plane_state->color_plane[i].y = 0; } ret = skl_check_main_surface(plane_state); @@ -6211,7 +6254,7 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, case DRM_FORMAT_ARGB16161616F: if (INTEL_GEN(dev_priv) >= 11) break; - /* fall through */ + fallthrough; default: drm_dbg_kms(&dev_priv->drm, "[PLANE:%d:%s] FB:%d unsupported scaling format 0x%x\n", @@ -7290,6 +7333,10 @@ enum intel_display_power_domain intel_port_to_power_domain(enum port port) return POWER_DOMAIN_PORT_DDI_F_LANES; case PORT_G: return POWER_DOMAIN_PORT_DDI_G_LANES; + case PORT_H: + return POWER_DOMAIN_PORT_DDI_H_LANES; + case PORT_I: + return POWER_DOMAIN_PORT_DDI_I_LANES; default: MISSING_CASE(port); return POWER_DOMAIN_PORT_OTHER; @@ -7315,6 +7362,10 @@ intel_aux_power_domain(struct intel_digital_port *dig_port) return POWER_DOMAIN_AUX_F_TBT; case AUX_CH_G: return POWER_DOMAIN_AUX_G_TBT; + case AUX_CH_H: + return POWER_DOMAIN_AUX_H_TBT; + case AUX_CH_I: + return POWER_DOMAIN_AUX_I_TBT; default: MISSING_CASE(dig_port->aux_ch); return POWER_DOMAIN_AUX_C_TBT; @@ -7346,6 +7397,10 @@ intel_legacy_aux_to_power_domain(enum aux_ch aux_ch) return POWER_DOMAIN_AUX_F; case AUX_CH_G: return POWER_DOMAIN_AUX_G; + case AUX_CH_H: + return POWER_DOMAIN_AUX_H; + case AUX_CH_I: + return POWER_DOMAIN_AUX_I; default: MISSING_CASE(aux_ch); return POWER_DOMAIN_AUX_A; @@ -8114,7 +8169,7 @@ static void compute_m_n(unsigned int m, unsigned int n, * which the devices expect also in synchronous clock mode. */ if (constant_n) - *ret_n = 0x8000; + *ret_n = DP_LINK_CONSTANT_N_VALUE; else *ret_n = min_t(unsigned int, roundup_pow_of_two(n), DATA_LINK_N_MAX); @@ -10581,6 +10636,10 @@ skl_get_initial_plane_config(struct intel_crtc *crtc, val & PLANE_CTL_FLIP_HORIZONTAL) plane_config->rotation |= DRM_MODE_REFLECT_X; + /* 90/270 degree rotation would require extra work */ + if (drm_rotation_90_or_270(plane_config->rotation)) + goto error; + base = intel_de_read(dev_priv, PLANE_SURF(pipe, plane_id)) & 0xfffff000; plane_config->base = base; @@ -10802,9 +10861,18 @@ static void icl_get_ddi_pll(struct drm_i915_private *dev_priv, enum port port, u32 temp; if (intel_phy_is_combo(dev_priv, phy)) { - temp = intel_de_read(dev_priv, ICL_DPCLKA_CFGCR0) & - ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy); - id = temp >> ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy); + u32 mask, shift; + + if (IS_ROCKETLAKE(dev_priv)) { + mask = RKL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy); + shift = RKL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy); + } else { + mask = ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy); + shift = ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy); + } + + temp = intel_de_read(dev_priv, ICL_DPCLKA_CFGCR0) & mask; + id = temp >> shift; port_dpll_id = ICL_PORT_DPLL_DEFAULT; } else if (intel_phy_is_tc(dev_priv, phy)) { u32 clk_sel = intel_de_read(dev_priv, DDI_CLK_SEL(port)) & DDI_CLK_SEL_MASK; @@ -10896,7 +10964,7 @@ static void hsw_get_ddi_pll(struct drm_i915_private *dev_priv, enum port port, break; default: MISSING_CASE(ddi_pll_sel); - /* fall through */ + fallthrough; case PORT_CLK_SEL_NONE: return; } @@ -10956,10 +11024,10 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc, drm_WARN(dev, 1, "unknown pipe linked to transcoder %s\n", transcoder_name(panel_transcoder)); - /* fall through */ + fallthrough; case TRANS_DDI_EDP_INPUT_A_ONOFF: force_thru = true; - /* fall through */ + fallthrough; case TRANS_DDI_EDP_INPUT_A_ON: trans_pipe = PIPE_A; break; @@ -12760,6 +12828,9 @@ static int intel_crtc_atomic_check(struct intel_atomic_state *state, } + if (!mode_changed) + intel_psr2_sel_fetch_update(state, crtc); + return 0; } @@ -12807,10 +12878,11 @@ compute_sink_pipe_bpp(const struct drm_connector_state *conn_state, case 10 ... 11: bpp = 10 * 3; break; - case 12: + case 12 ... 16: bpp = 12 * 3; break; default: + MISSING_CASE(conn_state->max_bpc); return -EINVAL; } @@ -13183,7 +13255,7 @@ static bool check_digital_port_conflicts(struct intel_atomic_state *state) case INTEL_OUTPUT_DDI: if (drm_WARN_ON(dev, !HAS_DDI(to_i915(dev)))) break; - /* else, fall through */ + fallthrough; case INTEL_OUTPUT_DP: case INTEL_OUTPUT_HDMI: case INTEL_OUTPUT_EDP: @@ -13418,12 +13490,6 @@ encoder_retry: "hw max bpp: %i, pipe bpp: %i, dithering: %i\n", base_bpp, pipe_config->pipe_bpp, pipe_config->dither); - /* - * Make drm_calc_timestamping_constants in - * drm_atomic_helper_update_legacy_modeset_state() happy - */ - pipe_config->uapi.adjusted_mode = pipe_config->hw.adjusted_mode; - return 0; } @@ -14244,7 +14310,6 @@ verify_crtc_state(struct intel_crtc *crtc, struct intel_encoder *encoder; struct intel_crtc_state *pipe_config = old_crtc_state; struct drm_atomic_state *state = old_crtc_state->uapi.state; - bool active; __drm_atomic_helper_crtc_destroy_state(&old_crtc_state->uapi); intel_crtc_free_hw_state(old_crtc_state); @@ -14254,16 +14319,19 @@ verify_crtc_state(struct intel_crtc *crtc, drm_dbg_kms(&dev_priv->drm, "[CRTC:%d:%s]\n", crtc->base.base.id, crtc->base.name); - active = dev_priv->display.get_pipe_config(crtc, pipe_config); + pipe_config->hw.enable = new_crtc_state->hw.enable; + + pipe_config->hw.active = + dev_priv->display.get_pipe_config(crtc, pipe_config); /* we keep both pipes enabled on 830 */ - if (IS_I830(dev_priv)) - active = new_crtc_state->hw.active; + if (IS_I830(dev_priv) && pipe_config->hw.active) + pipe_config->hw.active = new_crtc_state->hw.active; - I915_STATE_WARN(new_crtc_state->hw.active != active, + I915_STATE_WARN(new_crtc_state->hw.active != pipe_config->hw.active, "crtc active state doesn't match with hw state " "(expected %i, found %i)\n", - new_crtc_state->hw.active, active); + new_crtc_state->hw.active, pipe_config->hw.active); I915_STATE_WARN(crtc->active != new_crtc_state->hw.active, "transitional active state does not match atomic hw state " @@ -14272,6 +14340,7 @@ verify_crtc_state(struct intel_crtc *crtc, for_each_encoder_on_crtc(dev, &crtc->base, encoder) { enum pipe pipe; + bool active; active = encoder->get_hw_state(encoder, &pipe); I915_STATE_WARN(active != new_crtc_state->hw.active, @@ -14583,16 +14652,8 @@ u8 intel_calc_active_pipes(struct intel_atomic_state *state, static int intel_modeset_checks(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); - int ret; state->modeset = true; - state->active_pipes = intel_calc_active_pipes(state, dev_priv->active_pipes); - - if (state->active_pipes != dev_priv->active_pipes) { - ret = _intel_atomic_lock_global_state(state); - if (ret) - return ret; - } if (IS_HASWELL(dev_priv)) return hsw_mode_set_planes_workaround(state); @@ -14736,7 +14797,8 @@ static int intel_atomic_check_cdclk(struct intel_atomic_state *state, bool *need_cdclk_calc) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); - struct intel_cdclk_state *new_cdclk_state; + const struct intel_cdclk_state *old_cdclk_state; + const struct intel_cdclk_state *new_cdclk_state; struct intel_plane_state *plane_state; struct intel_bw_state *new_bw_state; struct intel_plane *plane; @@ -14755,9 +14817,11 @@ static int intel_atomic_check_cdclk(struct intel_atomic_state *state, return ret; } + old_cdclk_state = intel_atomic_get_old_cdclk_state(state); new_cdclk_state = intel_atomic_get_new_cdclk_state(state); - if (new_cdclk_state && new_cdclk_state->force_min_cdclk_changed) + if (new_cdclk_state && + old_cdclk_state->force_min_cdclk != new_cdclk_state->force_min_cdclk) *need_cdclk_calc = true; ret = dev_priv->display.bw_calc_min_cdclk(state); @@ -14930,7 +14994,7 @@ static int intel_atomic_check(struct drm_device *dev, if (any_ms && !check_digital_port_conflicts(state)) { drm_dbg_kms(&dev_priv->drm, "rejecting conflicting digital port configuration\n"); - ret = EINVAL; + ret = -EINVAL; goto fail; } @@ -14956,12 +15020,6 @@ static int intel_atomic_check(struct drm_device *dev, if (dev_priv->wm.distrust_bios_wm) any_ms = true; - if (any_ms) { - ret = intel_modeset_checks(state); - if (ret) - goto fail; - } - intel_fbc_choose_crtc(dev_priv, state); ret = calc_watermark_data(state); if (ret) @@ -14976,6 +15034,10 @@ static int intel_atomic_check(struct drm_device *dev, goto fail; if (any_ms) { + ret = intel_modeset_checks(state); + if (ret) + goto fail; + ret = intel_modeset_calc_cdclk(state); if (ret) return ret; @@ -15136,6 +15198,8 @@ static void commit_pipe_config(struct intel_atomic_state *state, if (new_crtc_state->update_pipe) intel_pipe_fastset(old_crtc_state, new_crtc_state); + + intel_psr2_program_trans_man_trk_ctl(new_crtc_state); } if (dev_priv->display.atomic_update_watermarks) @@ -15704,14 +15768,6 @@ static void intel_atomic_track_fbs(struct intel_atomic_state *state) plane->frontbuffer_bit); } -static void assert_global_state_locked(struct drm_i915_private *dev_priv) -{ - struct intel_crtc *crtc; - - for_each_intel_crtc(&dev_priv->drm, crtc) - drm_modeset_lock_assert_held(&crtc->base.mutex); -} - static int intel_atomic_commit(struct drm_device *dev, struct drm_atomic_state *_state, bool nonblock) @@ -15787,12 +15843,6 @@ static int intel_atomic_commit(struct drm_device *dev, intel_shared_dpll_swap_state(state); intel_atomic_track_fbs(state); - if (state->global_state_changed) { - assert_global_state_locked(dev_priv); - - dev_priv->active_pipes = state->active_pipes; - } - drm_atomic_state_get(&state->base); INIT_WORK(&state->base.commit_work, intel_atomic_commit_work); @@ -16839,7 +16889,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_pps_init(dev_priv); - if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv)) + if (!HAS_DISPLAY(dev_priv)) return; if (IS_ROCKETLAKE(dev_priv)) { @@ -17139,7 +17189,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (!intel_fb->frontbuffer) return -ENOMEM; - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); tiling = i915_gem_object_get_tiling(obj); stride = i915_gem_object_get_stride(obj); i915_gem_object_unlock(obj); @@ -17825,6 +17875,27 @@ int intel_modeset_init_noirq(struct drm_i915_private *i915) { int ret; + if (i915_inject_probe_failure(i915)) + return -ENODEV; + + if (HAS_DISPLAY(i915)) { + ret = drm_vblank_init(&i915->drm, + INTEL_NUM_PIPES(i915)); + if (ret) + return ret; + } + + intel_bios_init(i915); + + ret = intel_vga_register(i915); + if (ret) + goto cleanup_bios; + + /* FIXME: completely on the wrong abstraction layer */ + intel_power_domains_init_hw(i915, false); + + intel_csr_ucode_init(i915); + i915->modeset_wq = alloc_ordered_workqueue("i915_modeset", 0); i915->flip_wq = alloc_workqueue("i915_flip", WQ_HIGHPRI | WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE); @@ -17833,15 +17904,15 @@ int intel_modeset_init_noirq(struct drm_i915_private *i915) ret = intel_cdclk_init(i915); if (ret) - return ret; + goto cleanup_vga_client_pw_domain_csr; ret = intel_dbuf_init(i915); if (ret) - return ret; + goto cleanup_vga_client_pw_domain_csr; ret = intel_bw_init(i915); if (ret) - return ret; + goto cleanup_vga_client_pw_domain_csr; init_llist_head(&i915->atomic_helper.free_list); INIT_WORK(&i915->atomic_helper.free_work, @@ -17852,10 +17923,19 @@ int intel_modeset_init_noirq(struct drm_i915_private *i915) intel_fbc_init(i915); return 0; + +cleanup_vga_client_pw_domain_csr: + intel_csr_ucode_fini(i915); + intel_power_domains_driver_remove(i915); + intel_vga_unregister(i915); +cleanup_bios: + intel_bios_driver_remove(i915); + + return ret; } -/* part #2: call after irq install */ -int intel_modeset_init(struct drm_i915_private *i915) +/* part #2: call after irq install, but before gem init */ +int intel_modeset_init_nogem(struct drm_i915_private *i915) { struct drm_device *dev = &i915->drm; enum pipe pipe; @@ -17872,7 +17952,7 @@ int intel_modeset_init(struct drm_i915_private *i915) INTEL_NUM_PIPES(i915), INTEL_NUM_PIPES(i915) > 1 ? "s" : ""); - if (HAS_DISPLAY(i915) && INTEL_DISPLAY_ENABLED(i915)) { + if (HAS_DISPLAY(i915)) { for_each_pipe(i915, pipe) { ret = intel_crtc_init(i915, pipe); if (ret) { @@ -17894,6 +17974,13 @@ int intel_modeset_init(struct drm_i915_private *i915) if (i915->max_cdclk_freq == 0) intel_update_max_cdclk(i915); + /* + * If the platform has HTI, we need to find out whether it has reserved + * any display resources before we create our display outputs. + */ + if (INTEL_INFO(i915)->display.has_hti) + i915->hti_state = intel_de_read(i915, HDPORT_STATE); + /* Just disable it once at startup */ intel_vga_disable(i915); intel_setup_outputs(i915); @@ -17947,6 +18034,30 @@ int intel_modeset_init(struct drm_i915_private *i915) return 0; } +/* part #3: call after gem init */ +int intel_modeset_init(struct drm_i915_private *i915) +{ + int ret; + + intel_overlay_setup(i915); + + if (!HAS_DISPLAY(i915)) + return 0; + + ret = intel_fbdev_init(&i915->drm); + if (ret) + return ret; + + /* Only enable hotplug handling once the fbdev is fully set up. */ + intel_hpd_init(i915); + + intel_init_ipc(i915); + + intel_psr_set_force_mode_changed(i915->psr.dp); + + return 0; +} + void i830_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) { struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); @@ -18831,6 +18942,18 @@ void intel_modeset_driver_remove_noirq(struct drm_i915_private *i915) intel_fbc_cleanup_cfb(i915); } +/* part #3: call after gem init */ +void intel_modeset_driver_remove_nogem(struct drm_i915_private *i915) +{ + intel_csr_ucode_fini(i915); + + intel_power_domains_driver_remove(i915); + + intel_vga_unregister(i915); + + intel_bios_driver_remove(i915); +} + #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) struct intel_display_error_state { @@ -18891,7 +19014,7 @@ intel_display_capture_error_state(struct drm_i915_private *dev_priv) BUILD_BUG_ON(ARRAY_SIZE(transcoders) != ARRAY_SIZE(error->transcoder)); - if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv)) + if (!HAS_DISPLAY(dev_priv)) return NULL; error = kzalloc(sizeof(*error), GFP_ATOMIC); diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index e890c8fb779b..d10b7c8cde3f 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -272,8 +272,6 @@ enum dpio_phy { DPIO_PHY2, }; -#define I915_NUM_PHYS_VLV 2 - enum aux_ch { AUX_CH_A, AUX_CH_B, @@ -282,6 +280,8 @@ enum aux_ch { AUX_CH_E, /* ICL+ */ AUX_CH_F, AUX_CH_G, + AUX_CH_H, + AUX_CH_I, }; #define aux_ch_name(a) ((a) + 'A') @@ -629,9 +629,11 @@ intel_format_info_is_yuv_semiplanar(const struct drm_format_info *info, /* modesetting */ void intel_modeset_init_hw(struct drm_i915_private *i915); int intel_modeset_init_noirq(struct drm_i915_private *i915); +int intel_modeset_init_nogem(struct drm_i915_private *i915); int intel_modeset_init(struct drm_i915_private *i915); void intel_modeset_driver_remove(struct drm_i915_private *i915); void intel_modeset_driver_remove_noirq(struct drm_i915_private *i915); +void intel_modeset_driver_remove_nogem(struct drm_i915_private *i915); void intel_display_resume(struct drm_device *dev); void intel_init_pch_refclk(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 3644752cc5ec..0bf31f9a8af5 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -417,6 +417,9 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) su_blocks = su_blocks >> PSR2_SU_STATUS_SHIFT(frame); seq_printf(m, "%d\t%d\n", frame, su_blocks); } + + seq_printf(m, "PSR2 selective fetch: %s\n", + enableddisabled(psr->psr2_sel_fetch_enabled)); } unlock: @@ -598,6 +601,11 @@ static void intel_hdcp_info(struct seq_file *m, { bool hdcp_cap, hdcp2_cap; + if (!intel_connector->hdcp.shim) { + seq_puts(m, "No Connector Support"); + goto out; + } + hdcp_cap = intel_hdcp_capable(intel_connector); hdcp2_cap = intel_hdcp2_capable(intel_connector); @@ -609,6 +617,7 @@ static void intel_hdcp_info(struct seq_file *m, if (!hdcp_cap && !hdcp2_cap) seq_puts(m, "None"); +out: seq_puts(m, "\n"); } @@ -617,6 +626,7 @@ static void intel_dp_info(struct seq_file *m, { struct intel_encoder *intel_encoder = intel_attached_encoder(intel_connector); struct intel_dp *intel_dp = enc_to_intel_dp(intel_encoder); + const struct drm_property_blob *edid = intel_connector->base.edid_blob_ptr; seq_printf(m, "\tDPCD rev: %x\n", intel_dp->dpcd[DP_DPCD_REV]); seq_printf(m, "\taudio support: %s\n", yesno(intel_dp->has_audio)); @@ -624,11 +634,7 @@ static void intel_dp_info(struct seq_file *m, intel_panel_info(m, &intel_connector->panel); drm_dp_downstream_debug(m, intel_dp->dpcd, intel_dp->downstream_ports, - &intel_dp->aux); - if (intel_connector->hdcp.shim) { - seq_puts(m, "\tHDCP version: "); - intel_hdcp_info(m, intel_connector); - } + edid ? edid->data : NULL, &intel_dp->aux); } static void intel_dp_mst_info(struct seq_file *m, @@ -646,10 +652,6 @@ static void intel_hdmi_info(struct seq_file *m, struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(intel_encoder); seq_printf(m, "\taudio support: %s\n", yesno(intel_hdmi->has_audio)); - if (intel_connector->hdcp.shim) { - seq_puts(m, "\tHDCP version: "); - intel_hdcp_info(m, intel_connector); - } } static void intel_lvds_info(struct seq_file *m, @@ -705,6 +707,9 @@ static void intel_connector_info(struct seq_file *m, break; } + seq_puts(m, "\tHDCP version: "); + intel_hdcp_info(m, intel_connector); + seq_printf(m, "\tmodes:\n"); list_for_each_entry(mode, &connector->modes, head) intel_seq_print_mode(m, 2, mode); @@ -1066,10 +1071,18 @@ static void drrs_status_per_crtc(struct seq_file *m, drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) { + bool supported = false; + if (connector->state->crtc != &intel_crtc->base) continue; seq_printf(m, "%s:\n", connector->name); + + if (connector->connector_type == DRM_MODE_CONNECTOR_eDP && + drrs->type == SEAMLESS_DRRS_SUPPORT) + supported = true; + + seq_printf(m, "\tDRRS Supported: %s\n", yesno(supported)); } drm_connector_list_iter_end(&conn_iter); @@ -1080,7 +1093,7 @@ static void drrs_status_per_crtc(struct seq_file *m, mutex_lock(&drrs->mutex); /* DRRS Supported */ - seq_puts(m, "\tDRRS Supported: Yes\n"); + seq_puts(m, "\tDRRS Enabled: Yes\n"); /* disable_drrs() will make drrs->dp NULL */ if (!drrs->dp) { @@ -1115,7 +1128,7 @@ static void drrs_status_per_crtc(struct seq_file *m, mutex_unlock(&drrs->mutex); } else { /* DRRS not supported. Print the VBT parameter*/ - seq_puts(m, "\tDRRS Supported : No"); + seq_puts(m, "\tDRRS Enabled : No"); } seq_puts(m, "\n"); } @@ -2026,10 +2039,6 @@ static int i915_hdcp_sink_capability_show(struct seq_file *m, void *data) if (connector->status != connector_status_connected) return -ENODEV; - /* HDCP is supported by connector */ - if (!intel_connector->hdcp.shim) - return -EINVAL; - seq_printf(m, "%s:%d HDCP version: ", connector->name, connector->base.id); intel_hdcp_info(m, intel_connector); @@ -2044,9 +2053,12 @@ DEFINE_SHOW_ATTRIBUTE(i915_hdcp_sink_capability); static int i915_lpsp_capability_show(struct seq_file *m, void *data) { struct drm_connector *connector = m->private; - struct intel_encoder *encoder = - intel_attached_encoder(to_intel_connector(connector)); struct drm_i915_private *i915 = to_i915(connector->dev); + struct intel_encoder *encoder; + + encoder = intel_attached_encoder(to_intel_connector(connector)); + if (!encoder) + return -ENODEV; if (connector->status != connector_status_connected) return -ENODEV; diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 0c713e83274d..7277e58b01f1 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -3927,12 +3927,13 @@ tgl_tc_cold_request(struct drm_i915_private *i915, bool block) int ret; while (1) { - u32 low_val = 0, high_val; + u32 low_val; + u32 high_val = 0; if (block) - high_val = TGL_PCODE_EXIT_TCCOLD_DATA_H_BLOCK_REQ; + low_val = TGL_PCODE_EXIT_TCCOLD_DATA_L_BLOCK_REQ; else - high_val = TGL_PCODE_EXIT_TCCOLD_DATA_H_UNBLOCK_REQ; + low_val = TGL_PCODE_EXIT_TCCOLD_DATA_L_UNBLOCK_REQ; /* * Spec states that we should timeout the request after 200us @@ -3951,8 +3952,7 @@ tgl_tc_cold_request(struct drm_i915_private *i915, bool block) if (++tries == 3) break; - if (ret == -EAGAIN) - msleep(1); + msleep(1); } if (ret) @@ -4147,6 +4147,12 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { + .name = "TC cold off", + .domains = TGL_TC_COLD_OFF_POWER_DOMAINS, + .ops = &tgl_tc_cold_off_ops, + .id = DISP_PW_ID_NONE, + }, + { .name = "AUX A", .domains = TGL_AUX_A_IO_POWER_DOMAINS, .ops = &icl_aux_power_well_ops, @@ -4332,12 +4338,6 @@ static const struct i915_power_well_desc tgl_power_wells[] = { .hsw.irq_pipe_mask = BIT(PIPE_D), }, }, - { - .name = "TC cold off", - .domains = TGL_TC_COLD_OFF_POWER_DOMAINS, - .ops = &tgl_tc_cold_off_ops, - .id = DISP_PW_ID_NONE, - }, }; static const struct i915_power_well_desc rkl_power_wells[] = { @@ -5240,10 +5240,10 @@ struct buddy_page_mask { }; static const struct buddy_page_mask tgl_buddy_page_masks[] = { - { .num_channels = 1, .type = INTEL_DRAM_LPDDR4, .page_mask = 0xE }, { .num_channels = 1, .type = INTEL_DRAM_DDR4, .page_mask = 0xF }, { .num_channels = 2, .type = INTEL_DRAM_LPDDR4, .page_mask = 0x1C }, { .num_channels = 2, .type = INTEL_DRAM_DDR4, .page_mask = 0x1F }, + { .num_channels = 4, .type = INTEL_DRAM_LPDDR4, .page_mask = 0x38 }, {} }; @@ -5263,7 +5263,7 @@ static void tgl_bw_buddy_init(struct drm_i915_private *dev_priv) unsigned long abox_mask = INTEL_INFO(dev_priv)->abox_mask; int config, i; - if (IS_TGL_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_B0)) + if (IS_TGL_DISP_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_B0)) /* Wa_1409767108: tgl */ table = wa_1409767108_buddy_page_masks; else @@ -5302,6 +5302,12 @@ static void icl_display_core_init(struct drm_i915_private *dev_priv, gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); + /* Wa_14011294188:ehl,jsl,tgl,rkl */ + if (INTEL_PCH_TYPE(dev_priv) >= PCH_JSP && + INTEL_PCH_TYPE(dev_priv) < PCH_DG1) + intel_de_rmw(dev_priv, SOUTH_DSPCLK_GATE_D, 0, + PCH_DPMGUNIT_CLOCK_GATE_DISABLE); + /* 1. Enable PCH reset handshake. */ intel_pch_reset_handshake(dev_priv, !HAS_PCH_NOP(dev_priv)); diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index e8f809161c75..3d4bf9b6a0a2 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -28,6 +28,7 @@ #include <linux/async.h> #include <linux/i2c.h> +#include <linux/pwm.h> #include <linux/sched/clock.h> #include <drm/drm_atomic.h> @@ -223,6 +224,7 @@ struct intel_panel { bool util_pin_active_low; /* bxt+ */ u8 controller; /* bxt+ only */ struct pwm_device *pwm; + struct pwm_state pwm_state; /* DPCD backlight */ u8 pwmgen_bit_count; @@ -314,10 +316,12 @@ struct intel_hdcp_shim { /* Enables HDCP signalling on the port */ int (*toggle_signalling)(struct intel_digital_port *dig_port, + enum transcoder cpu_transcoder, bool enable); /* Ensures the link is still protected */ - bool (*check_link)(struct intel_digital_port *dig_port); + bool (*check_link)(struct intel_digital_port *dig_port, + struct intel_connector *connector); /* Detects panel's hdcp capability. This is optional for HDMI. */ int (*hdcp_capable)(struct intel_digital_port *dig_port, @@ -479,8 +483,6 @@ struct intel_atomic_state { bool dpll_set, modeset; - u8 active_pipes; - struct intel_shared_dpll_state shared_dpll[I915_NUM_PLLS]; /* @@ -491,11 +493,6 @@ struct intel_atomic_state { bool rps_interactive; - /* - * active_pipes - */ - bool global_state_changed; - struct i915_sw_fence commit_ready; struct llist_node freed; @@ -931,6 +928,7 @@ struct intel_crtc_state { bool has_psr; bool has_psr2; + bool enable_psr2_sel_fetch; u32 dc3co_exitline; /* @@ -1073,6 +1071,8 @@ struct intel_crtc_state { /* For DSB related info */ struct intel_dsb *dsb; + + u32 psr2_man_track_ctl; }; enum intel_pipe_crc_source { @@ -1272,6 +1272,7 @@ struct intel_dp { u8 sink_count; bool link_mst; bool link_trained; + bool has_hdmi_sink; bool has_audio; bool reset_link_params; u8 dpcd[DP_RECEIVER_CAP_SIZE]; @@ -1373,8 +1374,19 @@ struct intel_dp { /* Displayport compliance testing */ struct intel_dp_compliance compliance; + /* Downstream facing port caps */ + struct { + int min_tmds_clock, max_tmds_clock; + int max_dotclock; + u8 max_bpc; + bool ycbcr_444_to_420; + } dfp; + /* Display stream compression testing */ bool force_dsc_en; + + bool hobl_failed; + bool hobl_active; }; enum lspcon_vendor { @@ -1409,6 +1421,11 @@ struct intel_digital_port { enum phy_fia tc_phy_fia; u8 tc_phy_fia_idx; + /* protects num_hdcp_streams reference count */ + struct mutex hdcp_mutex; + /* the number of pipes using HDCP signalling out of this port */ + unsigned int num_hdcp_streams; + void (*write_infoframe)(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, unsigned int type, @@ -1519,6 +1536,18 @@ static inline bool intel_encoder_is_dig_port(struct intel_encoder *encoder) } } +static inline bool intel_encoder_is_mst(struct intel_encoder *encoder) +{ + return encoder->type == INTEL_OUTPUT_DP_MST; +} + +static inline struct intel_dp_mst_encoder * +enc_to_mst(struct intel_encoder *encoder) +{ + return container_of(&encoder->base, struct intel_dp_mst_encoder, + base.base); +} + static inline struct intel_digital_port * enc_to_dig_port(struct intel_encoder *encoder) { @@ -1527,6 +1556,8 @@ enc_to_dig_port(struct intel_encoder *encoder) if (intel_encoder_is_dig_port(intel_encoder)) return container_of(&encoder->base, struct intel_digital_port, base.base); + else if (intel_encoder_is_mst(intel_encoder)) + return enc_to_mst(encoder)->primary; else return NULL; } @@ -1537,13 +1568,6 @@ intel_attached_dig_port(struct intel_connector *connector) return enc_to_dig_port(intel_attached_encoder(connector)); } -static inline struct intel_dp_mst_encoder * -enc_to_mst(struct intel_encoder *encoder) -{ - return container_of(&encoder->base, struct intel_dp_mst_encoder, - base.base); -} - static inline struct intel_dp *enc_to_intel_dp(struct intel_encoder *encoder) { return &enc_to_dig_port(encoder)->dp; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index d6295eb20b63..bf1e9cf1c0f3 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -38,7 +38,6 @@ #include <drm/drm_crtc.h> #include <drm/drm_dp_helper.h> #include <drm/drm_edid.h> -#include <drm/drm_hdcp.h> #include <drm/drm_probe_helper.h> #include "i915_debugfs.h" @@ -248,29 +247,6 @@ intel_dp_max_data_rate(int max_link_clock, int max_lanes) return max_link_clock * max_lanes; } -static int -intel_dp_downstream_max_dotclock(struct intel_dp *intel_dp) -{ - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *encoder = &dig_port->base; - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - int max_dotclk = dev_priv->max_dotclk_freq; - int ds_max_dotclk; - - int type = intel_dp->downstream_ports[0] & DP_DS_PORT_TYPE_MASK; - - if (type != DP_DS_PORT_TYPE_VGA) - return max_dotclk; - - ds_max_dotclk = drm_dp_downstream_max_clock(intel_dp->dpcd, - intel_dp->downstream_ports); - - if (ds_max_dotclk != 0) - max_dotclk = min(max_dotclk, ds_max_dotclk); - - return max_dotclk; -} - static int cnl_max_source_rate(struct intel_dp *intel_dp) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); @@ -636,6 +612,34 @@ static bool intel_dp_hdisplay_bad(struct drm_i915_private *dev_priv, } static enum drm_mode_status +intel_dp_mode_valid_downstream(struct intel_connector *connector, + const struct drm_display_mode *mode, + int target_clock) +{ + struct intel_dp *intel_dp = intel_attached_dp(connector); + const struct drm_display_info *info = &connector->base.display_info; + int tmds_clock; + + if (intel_dp->dfp.max_dotclock && + target_clock > intel_dp->dfp.max_dotclock) + return MODE_CLOCK_HIGH; + + /* Assume 8bpc for the DP++/HDMI/DVI TMDS clock check */ + tmds_clock = target_clock; + if (drm_mode_is_420_only(info, mode)) + tmds_clock /= 2; + + if (intel_dp->dfp.min_tmds_clock && + tmds_clock < intel_dp->dfp.min_tmds_clock) + return MODE_CLOCK_LOW; + if (intel_dp->dfp.max_tmds_clock && + tmds_clock > intel_dp->dfp.max_tmds_clock) + return MODE_CLOCK_HIGH; + + return MODE_OK; +} + +static enum drm_mode_status intel_dp_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { @@ -645,15 +649,14 @@ intel_dp_mode_valid(struct drm_connector *connector, struct drm_i915_private *dev_priv = to_i915(connector->dev); int target_clock = mode->clock; int max_rate, mode_rate, max_lanes, max_link_clock; - int max_dotclk; + int max_dotclk = dev_priv->max_dotclk_freq; u16 dsc_max_output_bpp = 0; u8 dsc_slice_count = 0; + enum drm_mode_status status; if (mode->flags & DRM_MODE_FLAG_DBLSCAN) return MODE_NO_DBLESCAN; - max_dotclk = intel_dp_downstream_max_dotclock(intel_dp); - if (intel_dp_is_edp(intel_dp) && fixed_mode) { if (mode->hdisplay > fixed_mode->hdisplay) return MODE_PANEL; @@ -709,6 +712,11 @@ intel_dp_mode_valid(struct drm_connector *connector, if (mode->flags & DRM_MODE_FLAG_DBLCLK) return MODE_H_ILLEGAL; + status = intel_dp_mode_valid_downstream(intel_connector, + mode, target_clock); + if (status != MODE_OK) + return status; + return intel_mode_valid_max_plane_size(dev_priv, mode); } @@ -1563,6 +1571,20 @@ intel_dp_aux_header(u8 txbuf[HEADER_SIZE], txbuf[3] = msg->size - 1; } +static u32 intel_dp_aux_xfer_flags(const struct drm_dp_aux_msg *msg) +{ + /* + * If we're trying to send the HDCP Aksv, we need to set a the Aksv + * select bit to inform the hardware to send the Aksv after our header + * since we can't access that data from software. + */ + if ((msg->request & ~DP_AUX_I2C_MOT) == DP_AUX_NATIVE_WRITE && + msg->address == DP_AUX_HDCP_AKSV) + return DP_AUX_CH_CTL_AUX_AKSV_SELECT; + + return 0; +} + static ssize_t intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) { @@ -1570,6 +1592,7 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) struct drm_i915_private *i915 = dp_to_i915(intel_dp); u8 txbuf[20], rxbuf[20]; size_t txsize, rxsize; + u32 flags = intel_dp_aux_xfer_flags(msg); int ret; intel_dp_aux_header(txbuf, msg); @@ -1590,7 +1613,7 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) memcpy(txbuf + HEADER_SIZE, msg->buffer, msg->size); ret = intel_dp_aux_xfer(intel_dp, txbuf, txsize, - rxbuf, rxsize, 0); + rxbuf, rxsize, flags); if (ret > 0) { msg->reply = rxbuf[0] >> 4; @@ -1613,7 +1636,7 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) return -E2BIG; ret = intel_dp_aux_xfer(intel_dp, txbuf, txsize, - rxbuf, rxsize, 0); + rxbuf, rxsize, flags); if (ret > 0) { msg->reply = rxbuf[0] >> 4; /* @@ -1954,19 +1977,72 @@ static bool intel_dp_supports_dsc(struct intel_dp *intel_dp, drm_dp_sink_supports_dsc(intel_dp->dsc_dpcd); } -static int intel_dp_compute_bpp(struct intel_dp *intel_dp, - struct intel_crtc_state *pipe_config) +static bool intel_dp_hdmi_ycbcr420(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) +{ + return crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420 || + (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR444 && + intel_dp->dfp.ycbcr_444_to_420); +} + +static int intel_dp_hdmi_tmds_clock(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, int bpc) +{ + int clock = crtc_state->hw.adjusted_mode.crtc_clock * bpc / 8; + + if (intel_dp_hdmi_ycbcr420(intel_dp, crtc_state)) + clock /= 2; + + return clock; +} + +static bool intel_dp_hdmi_tmds_clock_valid(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, int bpc) +{ + int tmds_clock = intel_dp_hdmi_tmds_clock(intel_dp, crtc_state, bpc); + + if (intel_dp->dfp.min_tmds_clock && + tmds_clock < intel_dp->dfp.min_tmds_clock) + return false; + + if (intel_dp->dfp.max_tmds_clock && + tmds_clock > intel_dp->dfp.max_tmds_clock) + return false; + + return true; +} + +static bool intel_dp_hdmi_deep_color_possible(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + int bpc) +{ + + return intel_hdmi_deep_color_possible(crtc_state, bpc, + intel_dp->has_hdmi_sink, + intel_dp_hdmi_ycbcr420(intel_dp, crtc_state)) && + intel_dp_hdmi_tmds_clock_valid(intel_dp, crtc_state, bpc); +} + +static int intel_dp_max_bpp(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct intel_connector *intel_connector = intel_dp->attached_connector; int bpp, bpc; - bpp = pipe_config->pipe_bpp; - bpc = drm_dp_downstream_max_bpc(intel_dp->dpcd, intel_dp->downstream_ports); + bpc = crtc_state->pipe_bpp / 3; + + if (intel_dp->dfp.max_bpc) + bpc = min_t(int, bpc, intel_dp->dfp.max_bpc); - if (bpc > 0) - bpp = min(bpp, 3*bpc); + if (intel_dp->dfp.min_tmds_clock) { + for (; bpc >= 10; bpc -= 2) { + if (intel_dp_hdmi_deep_color_possible(intel_dp, crtc_state, bpc)) + break; + } + } + bpp = bpc * 3; if (intel_dp_is_edp(intel_dp)) { /* Get bpp from vbt only for panels that dont have bpp in edid */ if (intel_connector->base.display_info.bpc == 0 && @@ -2288,7 +2364,7 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, limits.max_lane_count = intel_dp_max_lane_count(intel_dp); limits.min_bpp = intel_dp_min_bpp(pipe_config); - limits.max_bpp = intel_dp_compute_bpp(intel_dp, pipe_config); + limits.max_bpp = intel_dp_max_bpp(intel_dp, pipe_config); if (intel_dp_is_edp(intel_dp)) { /* @@ -2363,10 +2439,16 @@ intel_dp_ycbcr420_config(struct intel_dp *intel_dp, const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; - if (!drm_mode_is_420_only(info, adjusted_mode) || - !intel_dp_get_colorimetry_status(intel_dp) || - !connector->ycbcr_420_allowed) + if (!connector->ycbcr_420_allowed) + return 0; + + if (!drm_mode_is_420_only(info, adjusted_mode)) + return 0; + + if (intel_dp->dfp.ycbcr_444_to_420) { + crtc_state->output_format = INTEL_OUTPUT_FORMAT_YCBCR444; return 0; + } crtc_state->output_format = INTEL_OUTPUT_FORMAT_YCBCR420; @@ -2575,6 +2657,34 @@ intel_dp_compute_hdr_metadata_infoframe_sdp(struct intel_dp *intel_dp, intel_hdmi_infoframe_enable(HDMI_PACKET_TYPE_GAMUT_METADATA); } +static void +intel_dp_drrs_compute_config(struct intel_dp *intel_dp, + struct intel_crtc_state *pipe_config, + int output_bpp, bool constant_n) +{ + struct intel_connector *intel_connector = intel_dp->attached_connector; + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + + /* + * DRRS and PSR can't be enable together, so giving preference to PSR + * as it allows more power-savings by complete shutting down display, + * so to guarantee this, intel_dp_drrs_compute_config() must be called + * after intel_psr_compute_config(). + */ + if (pipe_config->has_psr) + return; + + if (!intel_connector->panel.downclock_mode || + dev_priv->drrs.type != SEAMLESS_DRRS_SUPPORT) + return; + + pipe_config->has_drrs = true; + intel_link_compute_m_n(output_bpp, pipe_config->lane_count, + intel_connector->panel.downclock_mode->clock, + pipe_config->port_clock, &pipe_config->dp_m2_n2, + constant_n, pipe_config->fec_enable); +} + int intel_dp_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, @@ -2605,7 +2715,6 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (ret) return ret; - pipe_config->has_drrs = false; if (!intel_dp_port_has_audio(dev_priv, port)) pipe_config->has_audio = false; else if (intel_conn_state->force_audio == HDMI_AUDIO_AUTO) @@ -2657,21 +2766,12 @@ intel_dp_compute_config(struct intel_encoder *encoder, &pipe_config->dp_m_n, constant_n, pipe_config->fec_enable); - if (intel_connector->panel.downclock_mode != NULL && - dev_priv->drrs.type == SEAMLESS_DRRS_SUPPORT) { - pipe_config->has_drrs = true; - intel_link_compute_m_n(output_bpp, - pipe_config->lane_count, - intel_connector->panel.downclock_mode->clock, - pipe_config->port_clock, - &pipe_config->dp_m2_n2, - constant_n, pipe_config->fec_enable); - } - if (!HAS_DDI(dev_priv)) intel_dp_set_clock(encoder, pipe_config); intel_psr_compute_config(intel_dp, pipe_config); + intel_dp_drrs_compute_config(intel_dp, pipe_config, output_bpp, + constant_n); intel_dp_compute_vsc_sdp(intel_dp, pipe_config, conn_state); intel_dp_compute_hdr_metadata_infoframe_sdp(intel_dp, pipe_config, conn_state); @@ -3752,6 +3852,43 @@ static void intel_dp_enable_port(struct intel_dp *intel_dp, intel_de_posting_read(dev_priv, intel_dp->output_reg); } +void intel_dp_configure_protocol_converter(struct intel_dp *intel_dp) +{ + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + u8 tmp; + + if (intel_dp->dpcd[DP_DPCD_REV] < 0x13) + return; + + if (!drm_dp_is_branch(intel_dp->dpcd)) + return; + + tmp = intel_dp->has_hdmi_sink ? + DP_HDMI_DVI_OUTPUT_CONFIG : 0; + + if (drm_dp_dpcd_writeb(&intel_dp->aux, + DP_PROTOCOL_CONVERTER_CONTROL_0, tmp) != 1) + drm_dbg_kms(&i915->drm, "Failed to set protocol converter HDMI mode to %s\n", + enableddisabled(intel_dp->has_hdmi_sink)); + + tmp = intel_dp->dfp.ycbcr_444_to_420 ? + DP_CONVERSION_TO_YCBCR420_ENABLE : 0; + + if (drm_dp_dpcd_writeb(&intel_dp->aux, + DP_PROTOCOL_CONVERTER_CONTROL_1, tmp) != 1) + drm_dbg_kms(&i915->drm, + "Failed to set protocol converter YCbCr 4:2:0 conversion mode to %s\n", + enableddisabled(intel_dp->dfp.ycbcr_444_to_420)); + + tmp = 0; + + if (drm_dp_dpcd_writeb(&intel_dp->aux, + DP_PROTOCOL_CONVERTER_CONTROL_2, tmp) <= 0) + drm_dbg_kms(&i915->drm, + "Failed to set protocol converter YCbCr 4:2:2 conversion mode to %s\n", + enableddisabled(false)); +} + static void intel_enable_dp(struct intel_atomic_state *state, struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, @@ -3789,6 +3926,7 @@ static void intel_enable_dp(struct intel_atomic_state *state, } intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); + intel_dp_configure_protocol_converter(intel_dp); intel_dp_start_link_train(intel_dp); intel_dp_stop_link_train(intel_dp); @@ -4449,62 +4587,6 @@ intel_dp_link_down(struct intel_encoder *encoder, } } -static void -intel_dp_extended_receiver_capabilities(struct intel_dp *intel_dp) -{ - struct drm_i915_private *i915 = dp_to_i915(intel_dp); - u8 dpcd_ext[6]; - - /* - * Prior to DP1.3 the bit represented by - * DP_EXTENDED_RECEIVER_CAP_FIELD_PRESENT was reserved. - * if it is set DP_DPCD_REV at 0000h could be at a value less than - * the true capability of the panel. The only way to check is to - * then compare 0000h and 2200h. - */ - if (!(intel_dp->dpcd[DP_TRAINING_AUX_RD_INTERVAL] & - DP_EXTENDED_RECEIVER_CAP_FIELD_PRESENT)) - return; - - if (drm_dp_dpcd_read(&intel_dp->aux, DP_DP13_DPCD_REV, - &dpcd_ext, sizeof(dpcd_ext)) != sizeof(dpcd_ext)) { - drm_err(&i915->drm, - "DPCD failed read at extended capabilities\n"); - return; - } - - if (intel_dp->dpcd[DP_DPCD_REV] > dpcd_ext[DP_DPCD_REV]) { - drm_dbg_kms(&i915->drm, - "DPCD extended DPCD rev less than base DPCD rev\n"); - return; - } - - if (!memcmp(intel_dp->dpcd, dpcd_ext, sizeof(dpcd_ext))) - return; - - drm_dbg_kms(&i915->drm, "Base DPCD: %*ph\n", - (int)sizeof(intel_dp->dpcd), intel_dp->dpcd); - - memcpy(intel_dp->dpcd, dpcd_ext, sizeof(dpcd_ext)); -} - -bool -intel_dp_read_dpcd(struct intel_dp *intel_dp) -{ - struct drm_i915_private *i915 = dp_to_i915(intel_dp); - - if (drm_dp_dpcd_read(&intel_dp->aux, 0x000, intel_dp->dpcd, - sizeof(intel_dp->dpcd)) < 0) - return false; /* aux transfer failed */ - - intel_dp_extended_receiver_capabilities(intel_dp); - - drm_dbg_kms(&i915->drm, "DPCD: %*ph\n", (int)sizeof(intel_dp->dpcd), - intel_dp->dpcd); - - return intel_dp->dpcd[DP_DPCD_REV] != 0; -} - bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp) { u8 dprx = 0; @@ -4563,7 +4645,7 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) /* this function is meant to be called only once */ drm_WARN_ON(&dev_priv->drm, intel_dp->dpcd[DP_DPCD_REV] != 0); - if (!intel_dp_read_dpcd(intel_dp)) + if (drm_dp_read_dpcd_caps(&intel_dp->aux, intel_dp->dpcd) != 0) return false; drm_dp_read_desc(&intel_dp->aux, &intel_dp->desc, @@ -4634,11 +4716,23 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) return true; } +static bool +intel_dp_has_sink_count(struct intel_dp *intel_dp) +{ + if (!intel_dp->attached_connector) + return false; + + return drm_dp_read_sink_count_cap(&intel_dp->attached_connector->base, + intel_dp->dpcd, + &intel_dp->desc); +} static bool intel_dp_get_dpcd(struct intel_dp *intel_dp) { - if (!intel_dp_read_dpcd(intel_dp)) + int ret; + + if (drm_dp_read_dpcd_caps(&intel_dp->aux, intel_dp->dpcd)) return false; /* @@ -4653,18 +4747,9 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) intel_dp_set_common_rates(intel_dp); } - /* - * Some eDP panels do not set a valid value for sink count, that is why - * it don't care about read it here and in intel_edp_init_dpcd(). - */ - if (!intel_dp_is_edp(intel_dp) && - !drm_dp_has_quirk(&intel_dp->desc, 0, - DP_DPCD_QUIRK_NO_SINK_COUNT)) { - u8 count; - ssize_t r; - - r = drm_dp_dpcd_readb(&intel_dp->aux, DP_SINK_COUNT, &count); - if (r < 1) + if (intel_dp_has_sink_count(intel_dp)) { + ret = drm_dp_read_sink_count(&intel_dp->aux); + if (ret < 0) return false; /* @@ -4672,7 +4757,7 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) * a member variable in intel_dp will track any changes * between short pulse interrupts. */ - intel_dp->sink_count = DP_GET_SINK_COUNT(count); + intel_dp->sink_count = ret; /* * SINK_COUNT == 0 and DOWNSTREAM_PORT_PRESENT == 1 implies that @@ -4685,32 +4770,8 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) return false; } - if (!drm_dp_is_branch(intel_dp->dpcd)) - return true; /* native DP sink */ - - if (intel_dp->dpcd[DP_DPCD_REV] == 0x10) - return true; /* no per-port downstream info */ - - if (drm_dp_dpcd_read(&intel_dp->aux, DP_DOWNSTREAM_PORT_0, - intel_dp->downstream_ports, - DP_MAX_DOWNSTREAM_PORTS) < 0) - return false; /* downstream port status fetch failed */ - - return true; -} - -static bool -intel_dp_sink_can_mst(struct intel_dp *intel_dp) -{ - u8 mstm_cap; - - if (intel_dp->dpcd[DP_DPCD_REV] < 0x12) - return false; - - if (drm_dp_dpcd_readb(&intel_dp->aux, DP_MSTM_CAP, &mstm_cap) != 1) - return false; - - return mstm_cap & DP_MST_CAP; + return drm_dp_read_downstream_info(&intel_dp->aux, intel_dp->dpcd, + intel_dp->downstream_ports) == 0; } static bool @@ -4720,7 +4781,7 @@ intel_dp_can_mst(struct intel_dp *intel_dp) return i915->params.enable_dp_mst && intel_dp->can_mst && - intel_dp_sink_can_mst(intel_dp); + drm_dp_read_mst_cap(&intel_dp->aux, intel_dp->dpcd); } static void @@ -4729,7 +4790,7 @@ intel_dp_configure_mst(struct intel_dp *intel_dp) struct drm_i915_private *i915 = dp_to_i915(intel_dp); struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; - bool sink_can_mst = intel_dp_sink_can_mst(intel_dp); + bool sink_can_mst = drm_dp_read_mst_cap(&intel_dp->aux, intel_dp->dpcd); drm_dbg_kms(&i915->drm, "[ENCODER:%d:%s] MST support: port: %s, sink: %s, modparam: %s\n", @@ -5963,9 +6024,8 @@ intel_dp_detect_dpcd(struct intel_dp *intel_dp) return connector_status_connected; /* If we're HPD-aware, SINK_COUNT changes dynamically */ - if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11 && + if (intel_dp_has_sink_count(intel_dp) && intel_dp->downstream_ports[0] & DP_DS_PORT_HPD) { - return intel_dp->sink_count ? connector_status_connected : connector_status_disconnected; } @@ -6106,16 +6166,103 @@ intel_dp_get_edid(struct intel_dp *intel_dp) } static void +intel_dp_update_dfp(struct intel_dp *intel_dp, + const struct edid *edid) +{ + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_connector *connector = intel_dp->attached_connector; + + intel_dp->dfp.max_bpc = + drm_dp_downstream_max_bpc(intel_dp->dpcd, + intel_dp->downstream_ports, edid); + + intel_dp->dfp.max_dotclock = + drm_dp_downstream_max_dotclock(intel_dp->dpcd, + intel_dp->downstream_ports); + + intel_dp->dfp.min_tmds_clock = + drm_dp_downstream_min_tmds_clock(intel_dp->dpcd, + intel_dp->downstream_ports, + edid); + intel_dp->dfp.max_tmds_clock = + drm_dp_downstream_max_tmds_clock(intel_dp->dpcd, + intel_dp->downstream_ports, + edid); + + drm_dbg_kms(&i915->drm, + "[CONNECTOR:%d:%s] DFP max bpc %d, max dotclock %d, TMDS clock %d-%d\n", + connector->base.base.id, connector->base.name, + intel_dp->dfp.max_bpc, + intel_dp->dfp.max_dotclock, + intel_dp->dfp.min_tmds_clock, + intel_dp->dfp.max_tmds_clock); +} + +static void +intel_dp_update_420(struct intel_dp *intel_dp) +{ + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_connector *connector = intel_dp->attached_connector; + bool is_branch, ycbcr_420_passthrough, ycbcr_444_to_420; + + /* No YCbCr output support on gmch platforms */ + if (HAS_GMCH(i915)) + return; + + /* + * ILK doesn't seem capable of DP YCbCr output. The + * displayed image is severly corrupted. SNB+ is fine. + */ + if (IS_GEN(i915, 5)) + return; + + is_branch = drm_dp_is_branch(intel_dp->dpcd); + ycbcr_420_passthrough = + drm_dp_downstream_420_passthrough(intel_dp->dpcd, + intel_dp->downstream_ports); + ycbcr_444_to_420 = + drm_dp_downstream_444_to_420_conversion(intel_dp->dpcd, + intel_dp->downstream_ports); + + if (INTEL_GEN(i915) >= 11) { + /* Prefer 4:2:0 passthrough over 4:4:4->4:2:0 conversion */ + intel_dp->dfp.ycbcr_444_to_420 = + ycbcr_444_to_420 && !ycbcr_420_passthrough; + + connector->base.ycbcr_420_allowed = + !is_branch || ycbcr_444_to_420 || ycbcr_420_passthrough; + } else { + /* 4:4:4->4:2:0 conversion is the only way */ + intel_dp->dfp.ycbcr_444_to_420 = ycbcr_444_to_420; + + connector->base.ycbcr_420_allowed = ycbcr_444_to_420; + } + + drm_dbg_kms(&i915->drm, + "[CONNECTOR:%d:%s] YCbCr 4:2:0 allowed? %s, YCbCr 4:4:4->4:2:0 conversion? %s\n", + connector->base.base.id, connector->base.name, + yesno(connector->base.ycbcr_420_allowed), + yesno(intel_dp->dfp.ycbcr_444_to_420)); +} + +static void intel_dp_set_edid(struct intel_dp *intel_dp) { - struct intel_connector *intel_connector = intel_dp->attached_connector; + struct intel_connector *connector = intel_dp->attached_connector; struct edid *edid; intel_dp_unset_edid(intel_dp); edid = intel_dp_get_edid(intel_dp); - intel_connector->detect_edid = edid; + connector->detect_edid = edid; + + intel_dp_update_dfp(intel_dp, edid); + intel_dp_update_420(intel_dp); + + if (edid && edid->input & DRM_EDID_INPUT_DIGITAL) { + intel_dp->has_hdmi_sink = drm_detect_hdmi_monitor(edid); + intel_dp->has_audio = drm_detect_monitor_audio(edid); + } - intel_dp->has_audio = drm_detect_monitor_audio(edid); drm_dp_cec_set_edid(&intel_dp->aux, edid); intel_dp->edid_quirks = drm_dp_get_edid_quirks(edid); } @@ -6123,14 +6270,23 @@ intel_dp_set_edid(struct intel_dp *intel_dp) static void intel_dp_unset_edid(struct intel_dp *intel_dp) { - struct intel_connector *intel_connector = intel_dp->attached_connector; + struct intel_connector *connector = intel_dp->attached_connector; drm_dp_cec_unset_edid(&intel_dp->aux); - kfree(intel_connector->detect_edid); - intel_connector->detect_edid = NULL; + kfree(connector->detect_edid); + connector->detect_edid = NULL; + intel_dp->has_hdmi_sink = false; intel_dp->has_audio = false; intel_dp->edid_quirks = 0; + + intel_dp->dfp.max_bpc = 0; + intel_dp->dfp.max_dotclock = 0; + intel_dp->dfp.min_tmds_clock = 0; + intel_dp->dfp.max_tmds_clock = 0; + + intel_dp->dfp.ycbcr_444_to_420 = false; + connector->base.ycbcr_420_allowed = false; } static int @@ -6149,6 +6305,9 @@ intel_dp_detect(struct drm_connector *connector, drm_WARN_ON(&dev_priv->drm, !drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); + if (!INTEL_DISPLAY_ENABLED(dev_priv)) + return connector_status_disconnected; + /* Can't disconnect eDP */ if (intel_dp_is_edp(intel_dp)) status = edp_detect(intel_dp); @@ -6243,6 +6402,11 @@ out: */ intel_display_power_flush_work(dev_priv); + if (!intel_dp_is_edp(intel_dp)) + drm_dp_set_subconnector_property(connector, + status, + intel_dp->dpcd, + intel_dp->downstream_ports); return status; } @@ -6284,7 +6448,7 @@ static int intel_dp_get_modes(struct drm_connector *connector) } /* if eDP has no EDID, fall back to fixed mode */ - if (intel_dp_is_edp(intel_attached_dp(to_intel_connector(connector))) && + if (intel_dp_is_edp(intel_attached_dp(intel_connector)) && intel_connector->panel.fixed_mode) { struct drm_display_mode *mode; @@ -6296,6 +6460,19 @@ static int intel_dp_get_modes(struct drm_connector *connector) } } + if (!edid) { + struct intel_dp *intel_dp = intel_attached_dp(intel_connector); + struct drm_display_mode *mode; + + mode = drm_dp_downstream_mode(connector->dev, + intel_dp->dpcd, + intel_dp->downstream_ports); + if (mode) { + drm_mode_probed_add(connector, mode); + return 1; + } + } + return 0; } @@ -6381,628 +6558,6 @@ void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder) edp_panel_vdd_off_sync(intel_dp); } -static void intel_dp_hdcp_wait_for_cp_irq(struct intel_hdcp *hdcp, int timeout) -{ - long ret; - -#define C (hdcp->cp_irq_count_cached != atomic_read(&hdcp->cp_irq_count)) - ret = wait_event_interruptible_timeout(hdcp->cp_irq_queue, C, - msecs_to_jiffies(timeout)); - - if (!ret) - DRM_DEBUG_KMS("Timedout at waiting for CP_IRQ\n"); -} - -static -int intel_dp_hdcp_write_an_aksv(struct intel_digital_port *dig_port, - u8 *an) -{ - struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(to_intel_encoder(&dig_port->base.base)); - static const struct drm_dp_aux_msg msg = { - .request = DP_AUX_NATIVE_WRITE, - .address = DP_AUX_HDCP_AKSV, - .size = DRM_HDCP_KSV_LEN, - }; - u8 txbuf[HEADER_SIZE + DRM_HDCP_KSV_LEN] = {}, rxbuf[2], reply = 0; - ssize_t dpcd_ret; - int ret; - - /* Output An first, that's easy */ - dpcd_ret = drm_dp_dpcd_write(&dig_port->dp.aux, DP_AUX_HDCP_AN, - an, DRM_HDCP_AN_LEN); - if (dpcd_ret != DRM_HDCP_AN_LEN) { - drm_dbg_kms(&i915->drm, - "Failed to write An over DP/AUX (%zd)\n", - dpcd_ret); - return dpcd_ret >= 0 ? -EIO : dpcd_ret; - } - - /* - * Since Aksv is Oh-So-Secret, we can't access it in software. So in - * order to get it on the wire, we need to create the AUX header as if - * we were writing the data, and then tickle the hardware to output the - * data once the header is sent out. - */ - intel_dp_aux_header(txbuf, &msg); - - ret = intel_dp_aux_xfer(intel_dp, txbuf, HEADER_SIZE + msg.size, - rxbuf, sizeof(rxbuf), - DP_AUX_CH_CTL_AUX_AKSV_SELECT); - if (ret < 0) { - drm_dbg_kms(&i915->drm, - "Write Aksv over DP/AUX failed (%d)\n", ret); - return ret; - } else if (ret == 0) { - drm_dbg_kms(&i915->drm, "Aksv write over DP/AUX was empty\n"); - return -EIO; - } - - reply = (rxbuf[0] >> 4) & DP_AUX_NATIVE_REPLY_MASK; - if (reply != DP_AUX_NATIVE_REPLY_ACK) { - drm_dbg_kms(&i915->drm, - "Aksv write: no DP_AUX_NATIVE_REPLY_ACK %x\n", - reply); - return -EIO; - } - return 0; -} - -static int intel_dp_hdcp_read_bksv(struct intel_digital_port *dig_port, - u8 *bksv) -{ - struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - ssize_t ret; - - ret = drm_dp_dpcd_read(&dig_port->dp.aux, DP_AUX_HDCP_BKSV, bksv, - DRM_HDCP_KSV_LEN); - if (ret != DRM_HDCP_KSV_LEN) { - drm_dbg_kms(&i915->drm, - "Read Bksv from DP/AUX failed (%zd)\n", ret); - return ret >= 0 ? -EIO : ret; - } - return 0; -} - -static int intel_dp_hdcp_read_bstatus(struct intel_digital_port *dig_port, - u8 *bstatus) -{ - struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - ssize_t ret; - - /* - * For some reason the HDMI and DP HDCP specs call this register - * definition by different names. In the HDMI spec, it's called BSTATUS, - * but in DP it's called BINFO. - */ - ret = drm_dp_dpcd_read(&dig_port->dp.aux, DP_AUX_HDCP_BINFO, - bstatus, DRM_HDCP_BSTATUS_LEN); - if (ret != DRM_HDCP_BSTATUS_LEN) { - drm_dbg_kms(&i915->drm, - "Read bstatus from DP/AUX failed (%zd)\n", ret); - return ret >= 0 ? -EIO : ret; - } - return 0; -} - -static -int intel_dp_hdcp_read_bcaps(struct intel_digital_port *dig_port, - u8 *bcaps) -{ - struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - ssize_t ret; - - ret = drm_dp_dpcd_read(&dig_port->dp.aux, DP_AUX_HDCP_BCAPS, - bcaps, 1); - if (ret != 1) { - drm_dbg_kms(&i915->drm, - "Read bcaps from DP/AUX failed (%zd)\n", ret); - return ret >= 0 ? -EIO : ret; - } - - return 0; -} - -static -int intel_dp_hdcp_repeater_present(struct intel_digital_port *dig_port, - bool *repeater_present) -{ - ssize_t ret; - u8 bcaps; - - ret = intel_dp_hdcp_read_bcaps(dig_port, &bcaps); - if (ret) - return ret; - - *repeater_present = bcaps & DP_BCAPS_REPEATER_PRESENT; - return 0; -} - -static -int intel_dp_hdcp_read_ri_prime(struct intel_digital_port *dig_port, - u8 *ri_prime) -{ - struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - ssize_t ret; - - ret = drm_dp_dpcd_read(&dig_port->dp.aux, DP_AUX_HDCP_RI_PRIME, - ri_prime, DRM_HDCP_RI_LEN); - if (ret != DRM_HDCP_RI_LEN) { - drm_dbg_kms(&i915->drm, "Read Ri' from DP/AUX failed (%zd)\n", - ret); - return ret >= 0 ? -EIO : ret; - } - return 0; -} - -static -int intel_dp_hdcp_read_ksv_ready(struct intel_digital_port *dig_port, - bool *ksv_ready) -{ - struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - ssize_t ret; - u8 bstatus; - - ret = drm_dp_dpcd_read(&dig_port->dp.aux, DP_AUX_HDCP_BSTATUS, - &bstatus, 1); - if (ret != 1) { - drm_dbg_kms(&i915->drm, - "Read bstatus from DP/AUX failed (%zd)\n", ret); - return ret >= 0 ? -EIO : ret; - } - *ksv_ready = bstatus & DP_BSTATUS_READY; - return 0; -} - -static -int intel_dp_hdcp_read_ksv_fifo(struct intel_digital_port *dig_port, - int num_downstream, u8 *ksv_fifo) -{ - struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - ssize_t ret; - int i; - - /* KSV list is read via 15 byte window (3 entries @ 5 bytes each) */ - for (i = 0; i < num_downstream; i += 3) { - size_t len = min(num_downstream - i, 3) * DRM_HDCP_KSV_LEN; - ret = drm_dp_dpcd_read(&dig_port->dp.aux, - DP_AUX_HDCP_KSV_FIFO, - ksv_fifo + i * DRM_HDCP_KSV_LEN, - len); - if (ret != len) { - drm_dbg_kms(&i915->drm, - "Read ksv[%d] from DP/AUX failed (%zd)\n", - i, ret); - return ret >= 0 ? -EIO : ret; - } - } - return 0; -} - -static -int intel_dp_hdcp_read_v_prime_part(struct intel_digital_port *dig_port, - int i, u32 *part) -{ - struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - ssize_t ret; - - if (i >= DRM_HDCP_V_PRIME_NUM_PARTS) - return -EINVAL; - - ret = drm_dp_dpcd_read(&dig_port->dp.aux, - DP_AUX_HDCP_V_PRIME(i), part, - DRM_HDCP_V_PRIME_PART_LEN); - if (ret != DRM_HDCP_V_PRIME_PART_LEN) { - drm_dbg_kms(&i915->drm, - "Read v'[%d] from DP/AUX failed (%zd)\n", i, ret); - return ret >= 0 ? -EIO : ret; - } - return 0; -} - -static -int intel_dp_hdcp_toggle_signalling(struct intel_digital_port *dig_port, - bool enable) -{ - /* Not used for single stream DisplayPort setups */ - return 0; -} - -static -bool intel_dp_hdcp_check_link(struct intel_digital_port *dig_port) -{ - struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - ssize_t ret; - u8 bstatus; - - ret = drm_dp_dpcd_read(&dig_port->dp.aux, DP_AUX_HDCP_BSTATUS, - &bstatus, 1); - if (ret != 1) { - drm_dbg_kms(&i915->drm, - "Read bstatus from DP/AUX failed (%zd)\n", ret); - return false; - } - - return !(bstatus & (DP_BSTATUS_LINK_FAILURE | DP_BSTATUS_REAUTH_REQ)); -} - -static -int intel_dp_hdcp_capable(struct intel_digital_port *dig_port, - bool *hdcp_capable) -{ - ssize_t ret; - u8 bcaps; - - ret = intel_dp_hdcp_read_bcaps(dig_port, &bcaps); - if (ret) - return ret; - - *hdcp_capable = bcaps & DP_BCAPS_HDCP_CAPABLE; - return 0; -} - -struct hdcp2_dp_errata_stream_type { - u8 msg_id; - u8 stream_type; -} __packed; - -struct hdcp2_dp_msg_data { - u8 msg_id; - u32 offset; - bool msg_detectable; - u32 timeout; - u32 timeout2; /* Added for non_paired situation */ -}; - -static const struct hdcp2_dp_msg_data hdcp2_dp_msg_data[] = { - { HDCP_2_2_AKE_INIT, DP_HDCP_2_2_AKE_INIT_OFFSET, false, 0, 0 }, - { HDCP_2_2_AKE_SEND_CERT, DP_HDCP_2_2_AKE_SEND_CERT_OFFSET, - false, HDCP_2_2_CERT_TIMEOUT_MS, 0 }, - { HDCP_2_2_AKE_NO_STORED_KM, DP_HDCP_2_2_AKE_NO_STORED_KM_OFFSET, - false, 0, 0 }, - { HDCP_2_2_AKE_STORED_KM, DP_HDCP_2_2_AKE_STORED_KM_OFFSET, - false, 0, 0 }, - { HDCP_2_2_AKE_SEND_HPRIME, DP_HDCP_2_2_AKE_SEND_HPRIME_OFFSET, - true, HDCP_2_2_HPRIME_PAIRED_TIMEOUT_MS, - HDCP_2_2_HPRIME_NO_PAIRED_TIMEOUT_MS }, - { HDCP_2_2_AKE_SEND_PAIRING_INFO, - DP_HDCP_2_2_AKE_SEND_PAIRING_INFO_OFFSET, true, - HDCP_2_2_PAIRING_TIMEOUT_MS, 0 }, - { HDCP_2_2_LC_INIT, DP_HDCP_2_2_LC_INIT_OFFSET, false, 0, 0 }, - { HDCP_2_2_LC_SEND_LPRIME, DP_HDCP_2_2_LC_SEND_LPRIME_OFFSET, - false, HDCP_2_2_DP_LPRIME_TIMEOUT_MS, 0 }, - { HDCP_2_2_SKE_SEND_EKS, DP_HDCP_2_2_SKE_SEND_EKS_OFFSET, false, - 0, 0 }, - { HDCP_2_2_REP_SEND_RECVID_LIST, - DP_HDCP_2_2_REP_SEND_RECVID_LIST_OFFSET, true, - HDCP_2_2_RECVID_LIST_TIMEOUT_MS, 0 }, - { HDCP_2_2_REP_SEND_ACK, DP_HDCP_2_2_REP_SEND_ACK_OFFSET, false, - 0, 0 }, - { HDCP_2_2_REP_STREAM_MANAGE, - DP_HDCP_2_2_REP_STREAM_MANAGE_OFFSET, false, - 0, 0 }, - { HDCP_2_2_REP_STREAM_READY, DP_HDCP_2_2_REP_STREAM_READY_OFFSET, - false, HDCP_2_2_STREAM_READY_TIMEOUT_MS, 0 }, -/* local define to shovel this through the write_2_2 interface */ -#define HDCP_2_2_ERRATA_DP_STREAM_TYPE 50 - { HDCP_2_2_ERRATA_DP_STREAM_TYPE, - DP_HDCP_2_2_REG_STREAM_TYPE_OFFSET, false, - 0, 0 }, -}; - -static int -intel_dp_hdcp2_read_rx_status(struct intel_digital_port *dig_port, - u8 *rx_status) -{ - struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - ssize_t ret; - - ret = drm_dp_dpcd_read(&dig_port->dp.aux, - DP_HDCP_2_2_REG_RXSTATUS_OFFSET, rx_status, - HDCP_2_2_DP_RXSTATUS_LEN); - if (ret != HDCP_2_2_DP_RXSTATUS_LEN) { - drm_dbg_kms(&i915->drm, - "Read bstatus from DP/AUX failed (%zd)\n", ret); - return ret >= 0 ? -EIO : ret; - } - - return 0; -} - -static -int hdcp2_detect_msg_availability(struct intel_digital_port *dig_port, - u8 msg_id, bool *msg_ready) -{ - u8 rx_status; - int ret; - - *msg_ready = false; - ret = intel_dp_hdcp2_read_rx_status(dig_port, &rx_status); - if (ret < 0) - return ret; - - switch (msg_id) { - case HDCP_2_2_AKE_SEND_HPRIME: - if (HDCP_2_2_DP_RXSTATUS_H_PRIME(rx_status)) - *msg_ready = true; - break; - case HDCP_2_2_AKE_SEND_PAIRING_INFO: - if (HDCP_2_2_DP_RXSTATUS_PAIRING(rx_status)) - *msg_ready = true; - break; - case HDCP_2_2_REP_SEND_RECVID_LIST: - if (HDCP_2_2_DP_RXSTATUS_READY(rx_status)) - *msg_ready = true; - break; - default: - DRM_ERROR("Unidentified msg_id: %d\n", msg_id); - return -EINVAL; - } - - return 0; -} - -static ssize_t -intel_dp_hdcp2_wait_for_msg(struct intel_digital_port *dig_port, - const struct hdcp2_dp_msg_data *hdcp2_msg_data) -{ - struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - struct intel_dp *dp = &dig_port->dp; - struct intel_hdcp *hdcp = &dp->attached_connector->hdcp; - u8 msg_id = hdcp2_msg_data->msg_id; - int ret, timeout; - bool msg_ready = false; - - if (msg_id == HDCP_2_2_AKE_SEND_HPRIME && !hdcp->is_paired) - timeout = hdcp2_msg_data->timeout2; - else - timeout = hdcp2_msg_data->timeout; - - /* - * There is no way to detect the CERT, LPRIME and STREAM_READY - * availability. So Wait for timeout and read the msg. - */ - if (!hdcp2_msg_data->msg_detectable) { - mdelay(timeout); - ret = 0; - } else { - /* - * As we want to check the msg availability at timeout, Ignoring - * the timeout at wait for CP_IRQ. - */ - intel_dp_hdcp_wait_for_cp_irq(hdcp, timeout); - ret = hdcp2_detect_msg_availability(dig_port, - msg_id, &msg_ready); - if (!msg_ready) - ret = -ETIMEDOUT; - } - - if (ret) - drm_dbg_kms(&i915->drm, - "msg_id %d, ret %d, timeout(mSec): %d\n", - hdcp2_msg_data->msg_id, ret, timeout); - - return ret; -} - -static const struct hdcp2_dp_msg_data *get_hdcp2_dp_msg_data(u8 msg_id) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(hdcp2_dp_msg_data); i++) - if (hdcp2_dp_msg_data[i].msg_id == msg_id) - return &hdcp2_dp_msg_data[i]; - - return NULL; -} - -static -int intel_dp_hdcp2_write_msg(struct intel_digital_port *dig_port, - void *buf, size_t size) -{ - struct intel_dp *dp = &dig_port->dp; - struct intel_hdcp *hdcp = &dp->attached_connector->hdcp; - unsigned int offset; - u8 *byte = buf; - ssize_t ret, bytes_to_write, len; - const struct hdcp2_dp_msg_data *hdcp2_msg_data; - - hdcp2_msg_data = get_hdcp2_dp_msg_data(*byte); - if (!hdcp2_msg_data) - return -EINVAL; - - offset = hdcp2_msg_data->offset; - - /* No msg_id in DP HDCP2.2 msgs */ - bytes_to_write = size - 1; - byte++; - - hdcp->cp_irq_count_cached = atomic_read(&hdcp->cp_irq_count); - - while (bytes_to_write) { - len = bytes_to_write > DP_AUX_MAX_PAYLOAD_BYTES ? - DP_AUX_MAX_PAYLOAD_BYTES : bytes_to_write; - - ret = drm_dp_dpcd_write(&dig_port->dp.aux, - offset, (void *)byte, len); - if (ret < 0) - return ret; - - bytes_to_write -= ret; - byte += ret; - offset += ret; - } - - return size; -} - -static -ssize_t get_receiver_id_list_size(struct intel_digital_port *dig_port) -{ - u8 rx_info[HDCP_2_2_RXINFO_LEN]; - u32 dev_cnt; - ssize_t ret; - - ret = drm_dp_dpcd_read(&dig_port->dp.aux, - DP_HDCP_2_2_REG_RXINFO_OFFSET, - (void *)rx_info, HDCP_2_2_RXINFO_LEN); - if (ret != HDCP_2_2_RXINFO_LEN) - return ret >= 0 ? -EIO : ret; - - dev_cnt = (HDCP_2_2_DEV_COUNT_HI(rx_info[0]) << 4 | - HDCP_2_2_DEV_COUNT_LO(rx_info[1])); - - if (dev_cnt > HDCP_2_2_MAX_DEVICE_COUNT) - dev_cnt = HDCP_2_2_MAX_DEVICE_COUNT; - - ret = sizeof(struct hdcp2_rep_send_receiverid_list) - - HDCP_2_2_RECEIVER_IDS_MAX_LEN + - (dev_cnt * HDCP_2_2_RECEIVER_ID_LEN); - - return ret; -} - -static -int intel_dp_hdcp2_read_msg(struct intel_digital_port *dig_port, - u8 msg_id, void *buf, size_t size) -{ - struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - unsigned int offset; - u8 *byte = buf; - ssize_t ret, bytes_to_recv, len; - const struct hdcp2_dp_msg_data *hdcp2_msg_data; - - hdcp2_msg_data = get_hdcp2_dp_msg_data(msg_id); - if (!hdcp2_msg_data) - return -EINVAL; - offset = hdcp2_msg_data->offset; - - ret = intel_dp_hdcp2_wait_for_msg(dig_port, hdcp2_msg_data); - if (ret < 0) - return ret; - - if (msg_id == HDCP_2_2_REP_SEND_RECVID_LIST) { - ret = get_receiver_id_list_size(dig_port); - if (ret < 0) - return ret; - - size = ret; - } - bytes_to_recv = size - 1; - - /* DP adaptation msgs has no msg_id */ - byte++; - - while (bytes_to_recv) { - len = bytes_to_recv > DP_AUX_MAX_PAYLOAD_BYTES ? - DP_AUX_MAX_PAYLOAD_BYTES : bytes_to_recv; - - ret = drm_dp_dpcd_read(&dig_port->dp.aux, offset, - (void *)byte, len); - if (ret < 0) { - drm_dbg_kms(&i915->drm, "msg_id %d, ret %zd\n", - msg_id, ret); - return ret; - } - - bytes_to_recv -= ret; - byte += ret; - offset += ret; - } - byte = buf; - *byte = msg_id; - - return size; -} - -static -int intel_dp_hdcp2_config_stream_type(struct intel_digital_port *dig_port, - bool is_repeater, u8 content_type) -{ - int ret; - struct hdcp2_dp_errata_stream_type stream_type_msg; - - if (is_repeater) - return 0; - - /* - * Errata for DP: As Stream type is used for encryption, Receiver - * should be communicated with stream type for the decryption of the - * content. - * Repeater will be communicated with stream type as a part of it's - * auth later in time. - */ - stream_type_msg.msg_id = HDCP_2_2_ERRATA_DP_STREAM_TYPE; - stream_type_msg.stream_type = content_type; - - ret = intel_dp_hdcp2_write_msg(dig_port, &stream_type_msg, - sizeof(stream_type_msg)); - - return ret < 0 ? ret : 0; - -} - -static -int intel_dp_hdcp2_check_link(struct intel_digital_port *dig_port) -{ - u8 rx_status; - int ret; - - ret = intel_dp_hdcp2_read_rx_status(dig_port, &rx_status); - if (ret) - return ret; - - if (HDCP_2_2_DP_RXSTATUS_REAUTH_REQ(rx_status)) - ret = HDCP_REAUTH_REQUEST; - else if (HDCP_2_2_DP_RXSTATUS_LINK_FAILED(rx_status)) - ret = HDCP_LINK_INTEGRITY_FAILURE; - else if (HDCP_2_2_DP_RXSTATUS_READY(rx_status)) - ret = HDCP_TOPOLOGY_CHANGE; - - return ret; -} - -static -int intel_dp_hdcp2_capable(struct intel_digital_port *dig_port, - bool *capable) -{ - u8 rx_caps[3]; - int ret; - - *capable = false; - ret = drm_dp_dpcd_read(&dig_port->dp.aux, - DP_HDCP_2_2_REG_RX_CAPS_OFFSET, - rx_caps, HDCP_2_2_RXCAPS_LEN); - if (ret != HDCP_2_2_RXCAPS_LEN) - return ret >= 0 ? -EIO : ret; - - if (rx_caps[0] == HDCP_2_2_RX_CAPS_VERSION_VAL && - HDCP_2_2_DP_HDCP_CAPABLE(rx_caps[2])) - *capable = true; - - return 0; -} - -static const struct intel_hdcp_shim intel_dp_hdcp_shim = { - .write_an_aksv = intel_dp_hdcp_write_an_aksv, - .read_bksv = intel_dp_hdcp_read_bksv, - .read_bstatus = intel_dp_hdcp_read_bstatus, - .repeater_present = intel_dp_hdcp_repeater_present, - .read_ri_prime = intel_dp_hdcp_read_ri_prime, - .read_ksv_ready = intel_dp_hdcp_read_ksv_ready, - .read_ksv_fifo = intel_dp_hdcp_read_ksv_fifo, - .read_v_prime_part = intel_dp_hdcp_read_v_prime_part, - .toggle_signalling = intel_dp_hdcp_toggle_signalling, - .check_link = intel_dp_hdcp_check_link, - .hdcp_capable = intel_dp_hdcp_capable, - .write_2_2_msg = intel_dp_hdcp2_write_msg, - .read_2_2_msg = intel_dp_hdcp2_read_msg, - .config_stream_type = intel_dp_hdcp2_config_stream_type, - .check_2_2_link = intel_dp_hdcp2_check_link, - .hdcp_2_2_capable = intel_dp_hdcp2_capable, - .protocol = HDCP_PROTOCOL_DP, -}; - static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); @@ -7312,6 +6867,9 @@ intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connect struct drm_i915_private *dev_priv = to_i915(connector->dev); enum port port = dp_to_dig_port(intel_dp)->base.port; + if (!intel_dp_is_edp(intel_dp)) + drm_connector_attach_dp_subconnector_property(connector); + if (!IS_G4X(dev_priv) && port != PORT_A) intel_attach_force_audio_property(connector); @@ -7710,6 +7268,15 @@ static void intel_dp_set_drrs_state(struct drm_i915_private *dev_priv, refresh_rate); } +static void +intel_edp_drrs_enable_locked(struct intel_dp *intel_dp) +{ + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + + dev_priv->drrs.busy_frontbuffer_bits = 0; + dev_priv->drrs.dp = intel_dp; +} + /** * intel_edp_drrs_enable - init drrs struct if supported * @intel_dp: DP struct @@ -7722,31 +7289,40 @@ void intel_edp_drrs_enable(struct intel_dp *intel_dp, { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - if (!crtc_state->has_drrs) { - drm_dbg_kms(&dev_priv->drm, "Panel doesn't support DRRS\n"); + if (!crtc_state->has_drrs) return; - } - if (dev_priv->psr.enabled) { - drm_dbg_kms(&dev_priv->drm, - "PSR enabled. Not enabling DRRS.\n"); - return; - } + drm_dbg_kms(&dev_priv->drm, "Enabling DRRS\n"); mutex_lock(&dev_priv->drrs.mutex); + if (dev_priv->drrs.dp) { - drm_dbg_kms(&dev_priv->drm, "DRRS already enabled\n"); + drm_warn(&dev_priv->drm, "DRRS already enabled\n"); goto unlock; } - dev_priv->drrs.busy_frontbuffer_bits = 0; - - dev_priv->drrs.dp = intel_dp; + intel_edp_drrs_enable_locked(intel_dp); unlock: mutex_unlock(&dev_priv->drrs.mutex); } +static void +intel_edp_drrs_disable_locked(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + + if (dev_priv->drrs.refresh_rate_type == DRRS_LOW_RR) { + int refresh; + + refresh = drm_mode_vrefresh(intel_dp->attached_connector->panel.fixed_mode); + intel_dp_set_drrs_state(dev_priv, crtc_state, refresh); + } + + dev_priv->drrs.dp = NULL; +} + /** * intel_edp_drrs_disable - Disable DRRS * @intel_dp: DP struct @@ -7767,16 +7343,45 @@ void intel_edp_drrs_disable(struct intel_dp *intel_dp, return; } - if (dev_priv->drrs.refresh_rate_type == DRRS_LOW_RR) - intel_dp_set_drrs_state(dev_priv, old_crtc_state, - drm_mode_vrefresh(intel_dp->attached_connector->panel.fixed_mode)); - - dev_priv->drrs.dp = NULL; + intel_edp_drrs_disable_locked(intel_dp, old_crtc_state); mutex_unlock(&dev_priv->drrs.mutex); cancel_delayed_work_sync(&dev_priv->drrs.work); } +/** + * intel_edp_drrs_update - Update DRRS state + * @intel_dp: Intel DP + * @crtc_state: new CRTC state + * + * This function will update DRRS states, disabling or enabling DRRS when + * executing fastsets. For full modeset, intel_edp_drrs_disable() and + * intel_edp_drrs_enable() should be called instead. + */ +void +intel_edp_drrs_update(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + + if (dev_priv->drrs.type != SEAMLESS_DRRS_SUPPORT) + return; + + mutex_lock(&dev_priv->drrs.mutex); + + /* New state matches current one? */ + if (crtc_state->has_drrs == !!dev_priv->drrs.dp) + goto unlock; + + if (crtc_state->has_drrs) + intel_edp_drrs_enable_locked(intel_dp); + else + intel_edp_drrs_disable_locked(intel_dp, crtc_state); + +unlock: + mutex_unlock(&dev_priv->drrs.mutex); +} + static void intel_edp_drrs_downclock_work(struct work_struct *work) { struct drm_i915_private *dev_priv = @@ -8208,10 +7813,6 @@ intel_dp_init_connector(struct intel_digital_port *dig_port, connector->interlace_allowed = true; connector->doublescan_allowed = 0; - if (INTEL_GEN(dev_priv) >= 11) - connector->ycbcr_420_allowed = true; - - intel_encoder->hpd_pin = intel_hpd_pin_default(dev_priv, port); intel_connector->polled = DRM_CONNECTOR_POLL_HPD; intel_dp_aux_init(intel_dp); @@ -8236,7 +7837,7 @@ intel_dp_init_connector(struct intel_digital_port *dig_port, intel_dp_add_properties(intel_dp, connector); if (is_hdcp_supported(dev_priv, port) && !intel_dp_is_edp(intel_dp)) { - int ret = intel_hdcp_init(intel_connector, &intel_dp_hdcp_shim); + int ret = intel_dp_init_hdcp(dig_port, intel_connector); if (ret) drm_dbg_kms(&dev_priv->drm, "HDCP init failed, skipping.\n"); @@ -8280,6 +7881,8 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, intel_encoder = &dig_port->base; encoder = &intel_encoder->base; + mutex_init(&dig_port->hdcp_mutex); + if (drm_encoder_init(&dev_priv->drm, &intel_encoder->base, &intel_dp_enc_funcs, DRM_MODE_ENCODER_TMDS, "DP %c", port_name(port))) @@ -8354,6 +7957,7 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, } intel_encoder->cloneable = 0; intel_encoder->port = port; + intel_encoder->hpd_pin = intel_hpd_pin_default(dev_priv, port); dig_port->hpd_pulse = intel_dp_hpd_pulse; diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index b901ab850cbd..08a1c0aa8b94 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -17,6 +17,7 @@ struct drm_encoder; struct drm_i915_private; struct drm_modeset_acquire_ctx; struct drm_dp_vsc_sdp; +struct intel_atomic_state; struct intel_connector; struct intel_crtc_state; struct intel_digital_port; @@ -50,6 +51,7 @@ int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, int intel_dp_retrain_link(struct intel_encoder *encoder, struct drm_modeset_acquire_ctx *ctx); void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode); +void intel_dp_configure_protocol_converter(struct intel_dp *intel_dp); void intel_dp_sink_set_decompression_state(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, bool enable); @@ -81,6 +83,8 @@ void intel_edp_drrs_enable(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state); void intel_edp_drrs_disable(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state); +void intel_edp_drrs_update(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state); void intel_edp_drrs_invalidate(struct drm_i915_private *dev_priv, unsigned int frontbuffer_bits); void intel_edp_drrs_flush(struct drm_i915_private *dev_priv, @@ -99,7 +103,6 @@ bool intel_dp_source_supports_hbr3(struct intel_dp *intel_dp); bool intel_dp_get_link_status(struct intel_dp *intel_dp, u8 *link_status); -bool intel_dp_read_dpcd(struct intel_dp *intel_dp); bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp); int intel_dp_link_required(int pixel_clock, int bpp); int intel_dp_max_data_rate(int max_link_clock, int max_lanes); @@ -128,4 +131,12 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count) u32 intel_dp_mode_to_fec_clock(u32 mode_clock); +void intel_ddi_update_pipe(struct intel_atomic_state *state, + struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); + +int intel_dp_init_hdcp(struct intel_digital_port *dig_port, + struct intel_connector *intel_connector); + #endif /* __INTEL_DP_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index acbd7eb66cbe..036f504ac7db 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -52,17 +52,11 @@ static void set_aux_backlight_enable(struct intel_dp *intel_dp, bool enable) } } -/* - * Read the current backlight value from DPCD register(s) based - * on if 8-bit(MSB) or 16-bit(MSB and LSB) values are supported - */ -static u32 intel_dp_aux_get_backlight(struct intel_connector *connector) +static bool intel_dp_aux_backlight_dpcd_mode(struct intel_connector *connector) { struct intel_dp *intel_dp = intel_attached_dp(connector); struct drm_i915_private *i915 = dp_to_i915(intel_dp); - u8 read_val[2] = { 0x0 }; u8 mode_reg; - u16 level = 0; if (drm_dp_dpcd_readb(&intel_dp->aux, DP_EDP_BACKLIGHT_MODE_SET_REGISTER, @@ -70,15 +64,29 @@ static u32 intel_dp_aux_get_backlight(struct intel_connector *connector) drm_dbg_kms(&i915->drm, "Failed to read the DPCD register 0x%x\n", DP_EDP_BACKLIGHT_MODE_SET_REGISTER); - return 0; + return false; } + return (mode_reg & DP_EDP_BACKLIGHT_CONTROL_MODE_MASK) == + DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD; +} + +/* + * Read the current backlight value from DPCD register(s) based + * on if 8-bit(MSB) or 16-bit(MSB and LSB) values are supported + */ +static u32 intel_dp_aux_get_backlight(struct intel_connector *connector) +{ + struct intel_dp *intel_dp = intel_attached_dp(connector); + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + u8 read_val[2] = { 0x0 }; + u16 level = 0; + /* * If we're not in DPCD control mode yet, the programmed brightness * value is meaningless and we should assume max brightness */ - if ((mode_reg & DP_EDP_BACKLIGHT_CONTROL_MODE_MASK) != - DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD) + if (!intel_dp_aux_backlight_dpcd_mode(connector)) return connector->panel.backlight.max; if (drm_dp_dpcd_read(&intel_dp->aux, DP_EDP_BACKLIGHT_BRIGHTNESS_MSB, @@ -319,7 +327,8 @@ static int intel_dp_aux_setup_backlight(struct intel_connector *connector, panel->backlight.min = 0; panel->backlight.level = intel_dp_aux_get_backlight(connector); - panel->backlight.enabled = panel->backlight.level != 0; + panel->backlight.enabled = intel_dp_aux_backlight_dpcd_mode(connector) && + panel->backlight.level != 0; return 0; } diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c new file mode 100644 index 000000000000..03424d20e9f7 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c @@ -0,0 +1,703 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) 2020 Google, Inc. + * + * Authors: + * Sean Paul <seanpaul@chromium.org> + */ + +#include <drm/drm_dp_helper.h> +#include <drm/drm_dp_mst_helper.h> +#include <drm/drm_hdcp.h> +#include <drm/drm_print.h> + +#include "intel_display_types.h" +#include "intel_ddi.h" +#include "intel_dp.h" +#include "intel_hdcp.h" + +static void intel_dp_hdcp_wait_for_cp_irq(struct intel_hdcp *hdcp, int timeout) +{ + long ret; + +#define C (hdcp->cp_irq_count_cached != atomic_read(&hdcp->cp_irq_count)) + ret = wait_event_interruptible_timeout(hdcp->cp_irq_queue, C, + msecs_to_jiffies(timeout)); + + if (!ret) + DRM_DEBUG_KMS("Timedout at waiting for CP_IRQ\n"); +} + +static +int intel_dp_hdcp_write_an_aksv(struct intel_digital_port *dig_port, + u8 *an) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + u8 aksv[DRM_HDCP_KSV_LEN] = {}; + ssize_t dpcd_ret; + + /* Output An first, that's easy */ + dpcd_ret = drm_dp_dpcd_write(&dig_port->dp.aux, DP_AUX_HDCP_AN, + an, DRM_HDCP_AN_LEN); + if (dpcd_ret != DRM_HDCP_AN_LEN) { + drm_dbg_kms(&i915->drm, + "Failed to write An over DP/AUX (%zd)\n", + dpcd_ret); + return dpcd_ret >= 0 ? -EIO : dpcd_ret; + } + + /* + * Since Aksv is Oh-So-Secret, we can't access it in software. So we + * send an empty buffer of the correct length through the DP helpers. On + * the other side, in the transfer hook, we'll generate a flag based on + * the destination address which will tickle the hardware to output the + * Aksv on our behalf after the header is sent. + */ + dpcd_ret = drm_dp_dpcd_write(&dig_port->dp.aux, DP_AUX_HDCP_AKSV, + aksv, DRM_HDCP_KSV_LEN); + if (dpcd_ret != DRM_HDCP_KSV_LEN) { + drm_dbg_kms(&i915->drm, + "Failed to write Aksv over DP/AUX (%zd)\n", + dpcd_ret); + return dpcd_ret >= 0 ? -EIO : dpcd_ret; + } + return 0; +} + +static int intel_dp_hdcp_read_bksv(struct intel_digital_port *dig_port, + u8 *bksv) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + ssize_t ret; + + ret = drm_dp_dpcd_read(&dig_port->dp.aux, DP_AUX_HDCP_BKSV, bksv, + DRM_HDCP_KSV_LEN); + if (ret != DRM_HDCP_KSV_LEN) { + drm_dbg_kms(&i915->drm, + "Read Bksv from DP/AUX failed (%zd)\n", ret); + return ret >= 0 ? -EIO : ret; + } + return 0; +} + +static int intel_dp_hdcp_read_bstatus(struct intel_digital_port *dig_port, + u8 *bstatus) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + ssize_t ret; + + /* + * For some reason the HDMI and DP HDCP specs call this register + * definition by different names. In the HDMI spec, it's called BSTATUS, + * but in DP it's called BINFO. + */ + ret = drm_dp_dpcd_read(&dig_port->dp.aux, DP_AUX_HDCP_BINFO, + bstatus, DRM_HDCP_BSTATUS_LEN); + if (ret != DRM_HDCP_BSTATUS_LEN) { + drm_dbg_kms(&i915->drm, + "Read bstatus from DP/AUX failed (%zd)\n", ret); + return ret >= 0 ? -EIO : ret; + } + return 0; +} + +static +int intel_dp_hdcp_read_bcaps(struct intel_digital_port *dig_port, + u8 *bcaps) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + ssize_t ret; + + ret = drm_dp_dpcd_read(&dig_port->dp.aux, DP_AUX_HDCP_BCAPS, + bcaps, 1); + if (ret != 1) { + drm_dbg_kms(&i915->drm, + "Read bcaps from DP/AUX failed (%zd)\n", ret); + return ret >= 0 ? -EIO : ret; + } + + return 0; +} + +static +int intel_dp_hdcp_repeater_present(struct intel_digital_port *dig_port, + bool *repeater_present) +{ + ssize_t ret; + u8 bcaps; + + ret = intel_dp_hdcp_read_bcaps(dig_port, &bcaps); + if (ret) + return ret; + + *repeater_present = bcaps & DP_BCAPS_REPEATER_PRESENT; + return 0; +} + +static +int intel_dp_hdcp_read_ri_prime(struct intel_digital_port *dig_port, + u8 *ri_prime) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + ssize_t ret; + + ret = drm_dp_dpcd_read(&dig_port->dp.aux, DP_AUX_HDCP_RI_PRIME, + ri_prime, DRM_HDCP_RI_LEN); + if (ret != DRM_HDCP_RI_LEN) { + drm_dbg_kms(&i915->drm, "Read Ri' from DP/AUX failed (%zd)\n", + ret); + return ret >= 0 ? -EIO : ret; + } + return 0; +} + +static +int intel_dp_hdcp_read_ksv_ready(struct intel_digital_port *dig_port, + bool *ksv_ready) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + ssize_t ret; + u8 bstatus; + + ret = drm_dp_dpcd_read(&dig_port->dp.aux, DP_AUX_HDCP_BSTATUS, + &bstatus, 1); + if (ret != 1) { + drm_dbg_kms(&i915->drm, + "Read bstatus from DP/AUX failed (%zd)\n", ret); + return ret >= 0 ? -EIO : ret; + } + *ksv_ready = bstatus & DP_BSTATUS_READY; + return 0; +} + +static +int intel_dp_hdcp_read_ksv_fifo(struct intel_digital_port *dig_port, + int num_downstream, u8 *ksv_fifo) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + ssize_t ret; + int i; + + /* KSV list is read via 15 byte window (3 entries @ 5 bytes each) */ + for (i = 0; i < num_downstream; i += 3) { + size_t len = min(num_downstream - i, 3) * DRM_HDCP_KSV_LEN; + ret = drm_dp_dpcd_read(&dig_port->dp.aux, + DP_AUX_HDCP_KSV_FIFO, + ksv_fifo + i * DRM_HDCP_KSV_LEN, + len); + if (ret != len) { + drm_dbg_kms(&i915->drm, + "Read ksv[%d] from DP/AUX failed (%zd)\n", + i, ret); + return ret >= 0 ? -EIO : ret; + } + } + return 0; +} + +static +int intel_dp_hdcp_read_v_prime_part(struct intel_digital_port *dig_port, + int i, u32 *part) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + ssize_t ret; + + if (i >= DRM_HDCP_V_PRIME_NUM_PARTS) + return -EINVAL; + + ret = drm_dp_dpcd_read(&dig_port->dp.aux, + DP_AUX_HDCP_V_PRIME(i), part, + DRM_HDCP_V_PRIME_PART_LEN); + if (ret != DRM_HDCP_V_PRIME_PART_LEN) { + drm_dbg_kms(&i915->drm, + "Read v'[%d] from DP/AUX failed (%zd)\n", i, ret); + return ret >= 0 ? -EIO : ret; + } + return 0; +} + +static +int intel_dp_hdcp_toggle_signalling(struct intel_digital_port *dig_port, + enum transcoder cpu_transcoder, + bool enable) +{ + /* Not used for single stream DisplayPort setups */ + return 0; +} + +static +bool intel_dp_hdcp_check_link(struct intel_digital_port *dig_port, + struct intel_connector *connector) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + ssize_t ret; + u8 bstatus; + + ret = drm_dp_dpcd_read(&dig_port->dp.aux, DP_AUX_HDCP_BSTATUS, + &bstatus, 1); + if (ret != 1) { + drm_dbg_kms(&i915->drm, + "Read bstatus from DP/AUX failed (%zd)\n", ret); + return false; + } + + return !(bstatus & (DP_BSTATUS_LINK_FAILURE | DP_BSTATUS_REAUTH_REQ)); +} + +static +int intel_dp_hdcp_capable(struct intel_digital_port *dig_port, + bool *hdcp_capable) +{ + ssize_t ret; + u8 bcaps; + + ret = intel_dp_hdcp_read_bcaps(dig_port, &bcaps); + if (ret) + return ret; + + *hdcp_capable = bcaps & DP_BCAPS_HDCP_CAPABLE; + return 0; +} + +struct hdcp2_dp_errata_stream_type { + u8 msg_id; + u8 stream_type; +} __packed; + +struct hdcp2_dp_msg_data { + u8 msg_id; + u32 offset; + bool msg_detectable; + u32 timeout; + u32 timeout2; /* Added for non_paired situation */ +}; + +static const struct hdcp2_dp_msg_data hdcp2_dp_msg_data[] = { + { HDCP_2_2_AKE_INIT, DP_HDCP_2_2_AKE_INIT_OFFSET, false, 0, 0 }, + { HDCP_2_2_AKE_SEND_CERT, DP_HDCP_2_2_AKE_SEND_CERT_OFFSET, + false, HDCP_2_2_CERT_TIMEOUT_MS, 0 }, + { HDCP_2_2_AKE_NO_STORED_KM, DP_HDCP_2_2_AKE_NO_STORED_KM_OFFSET, + false, 0, 0 }, + { HDCP_2_2_AKE_STORED_KM, DP_HDCP_2_2_AKE_STORED_KM_OFFSET, + false, 0, 0 }, + { HDCP_2_2_AKE_SEND_HPRIME, DP_HDCP_2_2_AKE_SEND_HPRIME_OFFSET, + true, HDCP_2_2_HPRIME_PAIRED_TIMEOUT_MS, + HDCP_2_2_HPRIME_NO_PAIRED_TIMEOUT_MS }, + { HDCP_2_2_AKE_SEND_PAIRING_INFO, + DP_HDCP_2_2_AKE_SEND_PAIRING_INFO_OFFSET, true, + HDCP_2_2_PAIRING_TIMEOUT_MS, 0 }, + { HDCP_2_2_LC_INIT, DP_HDCP_2_2_LC_INIT_OFFSET, false, 0, 0 }, + { HDCP_2_2_LC_SEND_LPRIME, DP_HDCP_2_2_LC_SEND_LPRIME_OFFSET, + false, HDCP_2_2_DP_LPRIME_TIMEOUT_MS, 0 }, + { HDCP_2_2_SKE_SEND_EKS, DP_HDCP_2_2_SKE_SEND_EKS_OFFSET, false, + 0, 0 }, + { HDCP_2_2_REP_SEND_RECVID_LIST, + DP_HDCP_2_2_REP_SEND_RECVID_LIST_OFFSET, true, + HDCP_2_2_RECVID_LIST_TIMEOUT_MS, 0 }, + { HDCP_2_2_REP_SEND_ACK, DP_HDCP_2_2_REP_SEND_ACK_OFFSET, false, + 0, 0 }, + { HDCP_2_2_REP_STREAM_MANAGE, + DP_HDCP_2_2_REP_STREAM_MANAGE_OFFSET, false, + 0, 0 }, + { HDCP_2_2_REP_STREAM_READY, DP_HDCP_2_2_REP_STREAM_READY_OFFSET, + false, HDCP_2_2_STREAM_READY_TIMEOUT_MS, 0 }, +/* local define to shovel this through the write_2_2 interface */ +#define HDCP_2_2_ERRATA_DP_STREAM_TYPE 50 + { HDCP_2_2_ERRATA_DP_STREAM_TYPE, + DP_HDCP_2_2_REG_STREAM_TYPE_OFFSET, false, + 0, 0 }, +}; + +static int +intel_dp_hdcp2_read_rx_status(struct intel_digital_port *dig_port, + u8 *rx_status) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + ssize_t ret; + + ret = drm_dp_dpcd_read(&dig_port->dp.aux, + DP_HDCP_2_2_REG_RXSTATUS_OFFSET, rx_status, + HDCP_2_2_DP_RXSTATUS_LEN); + if (ret != HDCP_2_2_DP_RXSTATUS_LEN) { + drm_dbg_kms(&i915->drm, + "Read bstatus from DP/AUX failed (%zd)\n", ret); + return ret >= 0 ? -EIO : ret; + } + + return 0; +} + +static +int hdcp2_detect_msg_availability(struct intel_digital_port *dig_port, + u8 msg_id, bool *msg_ready) +{ + u8 rx_status; + int ret; + + *msg_ready = false; + ret = intel_dp_hdcp2_read_rx_status(dig_port, &rx_status); + if (ret < 0) + return ret; + + switch (msg_id) { + case HDCP_2_2_AKE_SEND_HPRIME: + if (HDCP_2_2_DP_RXSTATUS_H_PRIME(rx_status)) + *msg_ready = true; + break; + case HDCP_2_2_AKE_SEND_PAIRING_INFO: + if (HDCP_2_2_DP_RXSTATUS_PAIRING(rx_status)) + *msg_ready = true; + break; + case HDCP_2_2_REP_SEND_RECVID_LIST: + if (HDCP_2_2_DP_RXSTATUS_READY(rx_status)) + *msg_ready = true; + break; + default: + DRM_ERROR("Unidentified msg_id: %d\n", msg_id); + return -EINVAL; + } + + return 0; +} + +static ssize_t +intel_dp_hdcp2_wait_for_msg(struct intel_digital_port *dig_port, + const struct hdcp2_dp_msg_data *hdcp2_msg_data) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + struct intel_dp *dp = &dig_port->dp; + struct intel_hdcp *hdcp = &dp->attached_connector->hdcp; + u8 msg_id = hdcp2_msg_data->msg_id; + int ret, timeout; + bool msg_ready = false; + + if (msg_id == HDCP_2_2_AKE_SEND_HPRIME && !hdcp->is_paired) + timeout = hdcp2_msg_data->timeout2; + else + timeout = hdcp2_msg_data->timeout; + + /* + * There is no way to detect the CERT, LPRIME and STREAM_READY + * availability. So Wait for timeout and read the msg. + */ + if (!hdcp2_msg_data->msg_detectable) { + mdelay(timeout); + ret = 0; + } else { + /* + * As we want to check the msg availability at timeout, Ignoring + * the timeout at wait for CP_IRQ. + */ + intel_dp_hdcp_wait_for_cp_irq(hdcp, timeout); + ret = hdcp2_detect_msg_availability(dig_port, + msg_id, &msg_ready); + if (!msg_ready) + ret = -ETIMEDOUT; + } + + if (ret) + drm_dbg_kms(&i915->drm, + "msg_id %d, ret %d, timeout(mSec): %d\n", + hdcp2_msg_data->msg_id, ret, timeout); + + return ret; +} + +static const struct hdcp2_dp_msg_data *get_hdcp2_dp_msg_data(u8 msg_id) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(hdcp2_dp_msg_data); i++) + if (hdcp2_dp_msg_data[i].msg_id == msg_id) + return &hdcp2_dp_msg_data[i]; + + return NULL; +} + +static +int intel_dp_hdcp2_write_msg(struct intel_digital_port *dig_port, + void *buf, size_t size) +{ + struct intel_dp *dp = &dig_port->dp; + struct intel_hdcp *hdcp = &dp->attached_connector->hdcp; + unsigned int offset; + u8 *byte = buf; + ssize_t ret, bytes_to_write, len; + const struct hdcp2_dp_msg_data *hdcp2_msg_data; + + hdcp2_msg_data = get_hdcp2_dp_msg_data(*byte); + if (!hdcp2_msg_data) + return -EINVAL; + + offset = hdcp2_msg_data->offset; + + /* No msg_id in DP HDCP2.2 msgs */ + bytes_to_write = size - 1; + byte++; + + hdcp->cp_irq_count_cached = atomic_read(&hdcp->cp_irq_count); + + while (bytes_to_write) { + len = bytes_to_write > DP_AUX_MAX_PAYLOAD_BYTES ? + DP_AUX_MAX_PAYLOAD_BYTES : bytes_to_write; + + ret = drm_dp_dpcd_write(&dig_port->dp.aux, + offset, (void *)byte, len); + if (ret < 0) + return ret; + + bytes_to_write -= ret; + byte += ret; + offset += ret; + } + + return size; +} + +static +ssize_t get_receiver_id_list_size(struct intel_digital_port *dig_port) +{ + u8 rx_info[HDCP_2_2_RXINFO_LEN]; + u32 dev_cnt; + ssize_t ret; + + ret = drm_dp_dpcd_read(&dig_port->dp.aux, + DP_HDCP_2_2_REG_RXINFO_OFFSET, + (void *)rx_info, HDCP_2_2_RXINFO_LEN); + if (ret != HDCP_2_2_RXINFO_LEN) + return ret >= 0 ? -EIO : ret; + + dev_cnt = (HDCP_2_2_DEV_COUNT_HI(rx_info[0]) << 4 | + HDCP_2_2_DEV_COUNT_LO(rx_info[1])); + + if (dev_cnt > HDCP_2_2_MAX_DEVICE_COUNT) + dev_cnt = HDCP_2_2_MAX_DEVICE_COUNT; + + ret = sizeof(struct hdcp2_rep_send_receiverid_list) - + HDCP_2_2_RECEIVER_IDS_MAX_LEN + + (dev_cnt * HDCP_2_2_RECEIVER_ID_LEN); + + return ret; +} + +static +int intel_dp_hdcp2_read_msg(struct intel_digital_port *dig_port, + u8 msg_id, void *buf, size_t size) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + unsigned int offset; + u8 *byte = buf; + ssize_t ret, bytes_to_recv, len; + const struct hdcp2_dp_msg_data *hdcp2_msg_data; + + hdcp2_msg_data = get_hdcp2_dp_msg_data(msg_id); + if (!hdcp2_msg_data) + return -EINVAL; + offset = hdcp2_msg_data->offset; + + ret = intel_dp_hdcp2_wait_for_msg(dig_port, hdcp2_msg_data); + if (ret < 0) + return ret; + + if (msg_id == HDCP_2_2_REP_SEND_RECVID_LIST) { + ret = get_receiver_id_list_size(dig_port); + if (ret < 0) + return ret; + + size = ret; + } + bytes_to_recv = size - 1; + + /* DP adaptation msgs has no msg_id */ + byte++; + + while (bytes_to_recv) { + len = bytes_to_recv > DP_AUX_MAX_PAYLOAD_BYTES ? + DP_AUX_MAX_PAYLOAD_BYTES : bytes_to_recv; + + ret = drm_dp_dpcd_read(&dig_port->dp.aux, offset, + (void *)byte, len); + if (ret < 0) { + drm_dbg_kms(&i915->drm, "msg_id %d, ret %zd\n", + msg_id, ret); + return ret; + } + + bytes_to_recv -= ret; + byte += ret; + offset += ret; + } + byte = buf; + *byte = msg_id; + + return size; +} + +static +int intel_dp_hdcp2_config_stream_type(struct intel_digital_port *dig_port, + bool is_repeater, u8 content_type) +{ + int ret; + struct hdcp2_dp_errata_stream_type stream_type_msg; + + if (is_repeater) + return 0; + + /* + * Errata for DP: As Stream type is used for encryption, Receiver + * should be communicated with stream type for the decryption of the + * content. + * Repeater will be communicated with stream type as a part of it's + * auth later in time. + */ + stream_type_msg.msg_id = HDCP_2_2_ERRATA_DP_STREAM_TYPE; + stream_type_msg.stream_type = content_type; + + ret = intel_dp_hdcp2_write_msg(dig_port, &stream_type_msg, + sizeof(stream_type_msg)); + + return ret < 0 ? ret : 0; + +} + +static +int intel_dp_hdcp2_check_link(struct intel_digital_port *dig_port) +{ + u8 rx_status; + int ret; + + ret = intel_dp_hdcp2_read_rx_status(dig_port, &rx_status); + if (ret) + return ret; + + if (HDCP_2_2_DP_RXSTATUS_REAUTH_REQ(rx_status)) + ret = HDCP_REAUTH_REQUEST; + else if (HDCP_2_2_DP_RXSTATUS_LINK_FAILED(rx_status)) + ret = HDCP_LINK_INTEGRITY_FAILURE; + else if (HDCP_2_2_DP_RXSTATUS_READY(rx_status)) + ret = HDCP_TOPOLOGY_CHANGE; + + return ret; +} + +static +int intel_dp_hdcp2_capable(struct intel_digital_port *dig_port, + bool *capable) +{ + u8 rx_caps[3]; + int ret; + + *capable = false; + ret = drm_dp_dpcd_read(&dig_port->dp.aux, + DP_HDCP_2_2_REG_RX_CAPS_OFFSET, + rx_caps, HDCP_2_2_RXCAPS_LEN); + if (ret != HDCP_2_2_RXCAPS_LEN) + return ret >= 0 ? -EIO : ret; + + if (rx_caps[0] == HDCP_2_2_RX_CAPS_VERSION_VAL && + HDCP_2_2_DP_HDCP_CAPABLE(rx_caps[2])) + *capable = true; + + return 0; +} + +static const struct intel_hdcp_shim intel_dp_hdcp_shim = { + .write_an_aksv = intel_dp_hdcp_write_an_aksv, + .read_bksv = intel_dp_hdcp_read_bksv, + .read_bstatus = intel_dp_hdcp_read_bstatus, + .repeater_present = intel_dp_hdcp_repeater_present, + .read_ri_prime = intel_dp_hdcp_read_ri_prime, + .read_ksv_ready = intel_dp_hdcp_read_ksv_ready, + .read_ksv_fifo = intel_dp_hdcp_read_ksv_fifo, + .read_v_prime_part = intel_dp_hdcp_read_v_prime_part, + .toggle_signalling = intel_dp_hdcp_toggle_signalling, + .check_link = intel_dp_hdcp_check_link, + .hdcp_capable = intel_dp_hdcp_capable, + .write_2_2_msg = intel_dp_hdcp2_write_msg, + .read_2_2_msg = intel_dp_hdcp2_read_msg, + .config_stream_type = intel_dp_hdcp2_config_stream_type, + .check_2_2_link = intel_dp_hdcp2_check_link, + .hdcp_2_2_capable = intel_dp_hdcp2_capable, + .protocol = HDCP_PROTOCOL_DP, +}; + +static int +intel_dp_mst_hdcp_toggle_signalling(struct intel_digital_port *dig_port, + enum transcoder cpu_transcoder, + bool enable) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + int ret; + + if (!enable) + usleep_range(6, 60); /* Bspec says >= 6us */ + + ret = intel_ddi_toggle_hdcp_signalling(&dig_port->base, + cpu_transcoder, enable); + if (ret) + drm_dbg_kms(&i915->drm, "%s HDCP signalling failed (%d)\n", + enable ? "Enable" : "Disable", ret); + return ret; +} + +static +bool intel_dp_mst_hdcp_check_link(struct intel_digital_port *dig_port, + struct intel_connector *connector) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + struct intel_dp *intel_dp = &dig_port->dp; + struct drm_dp_query_stream_enc_status_ack_reply reply; + int ret; + + if (!intel_dp_hdcp_check_link(dig_port, connector)) + return false; + + ret = drm_dp_send_query_stream_enc_status(&intel_dp->mst_mgr, + connector->port, &reply); + if (ret) { + drm_dbg_kms(&i915->drm, + "[CONNECTOR:%d:%s] failed QSES ret=%d\n", + connector->base.base.id, connector->base.name, ret); + return false; + } + + return reply.auth_completed && reply.encryption_enabled; +} + +static const struct intel_hdcp_shim intel_dp_mst_hdcp_shim = { + .write_an_aksv = intel_dp_hdcp_write_an_aksv, + .read_bksv = intel_dp_hdcp_read_bksv, + .read_bstatus = intel_dp_hdcp_read_bstatus, + .repeater_present = intel_dp_hdcp_repeater_present, + .read_ri_prime = intel_dp_hdcp_read_ri_prime, + .read_ksv_ready = intel_dp_hdcp_read_ksv_ready, + .read_ksv_fifo = intel_dp_hdcp_read_ksv_fifo, + .read_v_prime_part = intel_dp_hdcp_read_v_prime_part, + .toggle_signalling = intel_dp_mst_hdcp_toggle_signalling, + .check_link = intel_dp_mst_hdcp_check_link, + .hdcp_capable = intel_dp_hdcp_capable, + + .protocol = HDCP_PROTOCOL_DP, +}; + +int intel_dp_init_hdcp(struct intel_digital_port *dig_port, + struct intel_connector *intel_connector) +{ + struct drm_device *dev = intel_connector->base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_encoder *intel_encoder = &dig_port->base; + enum port port = intel_encoder->port; + struct intel_dp *intel_dp = &dig_port->dp; + + if (!is_hdcp_supported(dev_priv, port)) + return 0; + + if (intel_connector->mst_port) + return intel_hdcp_init(intel_connector, port, + &intel_dp_mst_hdcp_shim); + else if (!intel_dp_is_edp(intel_dp)) + return intel_hdcp_init(intel_connector, port, + &intel_dp_hdcp_shim); + + return 0; +} diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index a23ed7290843..f2c8b56be9ea 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -410,10 +410,17 @@ intel_dp_start_link_train(struct intel_dp *intel_dp) intel_connector->base.base.id, intel_connector->base.name, intel_dp->link_rate, intel_dp->lane_count); - if (!intel_dp_get_link_train_fallback_values(intel_dp, - intel_dp->link_rate, - intel_dp->lane_count)) - /* Schedule a Hotplug Uevent to userspace to start modeset */ - schedule_work(&intel_connector->modeset_retry_work); - return; + + if (intel_dp->hobl_active) { + drm_dbg_kms(&dp_to_i915(intel_dp)->drm, + "Link Training failed with HOBL active, not enabling it from now on"); + intel_dp->hobl_failed = true; + } else if (intel_dp_get_link_train_fallback_values(intel_dp, + intel_dp->link_rate, + intel_dp->lane_count)) { + return; + } + + /* Schedule a Hotplug Uevent to userspace to start modeset */ + schedule_work(&intel_connector->modeset_retry_work); } diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index a2d91a499700..64d885539e94 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -37,6 +37,7 @@ #include "intel_dp.h" #include "intel_dp_mst.h" #include "intel_dpio_phy.h" +#include "intel_hdcp.h" static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state, @@ -352,6 +353,8 @@ static void intel_mst_disable_dp(struct intel_atomic_state *state, drm_dbg_kms(&i915->drm, "active links %d\n", intel_dp->active_mst_links); + intel_hdcp_disable(intel_mst->connector); + drm_dp_mst_reset_vcpi_slots(&intel_dp->mst_mgr, connector->port); ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr); @@ -556,6 +559,13 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, if (pipe_config->has_audio) intel_audio_codec_enable(encoder, pipe_config, conn_state); + + /* Enable hdcp if it's desired */ + if (conn_state->content_protection == + DRM_MODE_CONTENT_PROTECTION_DESIRED) + intel_hdcp_enable(to_intel_connector(conn_state->connector), + pipe_config->cpu_transcoder, + (u8)conn_state->hdcp_content_type); } static bool intel_dp_mst_enc_get_hw_state(struct intel_encoder *encoder, @@ -709,9 +719,13 @@ static int intel_dp_mst_detect(struct drm_connector *connector, struct drm_modeset_acquire_ctx *ctx, bool force) { + struct drm_i915_private *i915 = to_i915(connector->dev); struct intel_connector *intel_connector = to_intel_connector(connector); struct intel_dp *intel_dp = intel_connector->mst_port; + if (!INTEL_DISPLAY_ENABLED(i915)) + return connector_status_disconnected; + if (drm_connector_is_unregistered(connector)) return connector_status_disconnected; @@ -799,6 +813,14 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo intel_attach_force_audio_property(connector); intel_attach_broadcast_rgb_property(connector); + + /* TODO: Figure out how to make HDCP work on GEN12+ */ + if (INTEL_GEN(dev_priv) < 12) { + ret = intel_dp_init_hdcp(dig_port, intel_connector); + if (ret) + DRM_DEBUG_KMS("HDCP init failed, skipping.\n"); + } + /* * Reuse the prop from the SST connector because we're * not allowed to create new props after device registration. @@ -865,6 +887,7 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *dig_port, enum pipe intel_encoder->compute_config_late = intel_dp_mst_compute_config_late; intel_encoder->disable = intel_mst_disable_dp; intel_encoder->post_disable = intel_mst_post_disable_dp; + intel_encoder->update_pipe = intel_ddi_update_pipe; intel_encoder->pre_pll_enable = intel_mst_pre_pll_enable_dp; intel_encoder->pre_enable = intel_mst_pre_enable_dp; intel_encoder->enable = intel_mst_enable_dp; diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index aeb6ee395cce..e08684e34078 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -147,6 +147,18 @@ void assert_shared_dpll(struct drm_i915_private *dev_priv, pll->info->name, onoff(state), onoff(cur_state)); } +static i915_reg_t +intel_combo_pll_enable_reg(struct drm_i915_private *i915, + struct intel_shared_dpll *pll) +{ + + if (IS_ELKHARTLAKE(i915) && (pll->info->id == DPLL_ID_EHL_DPLL4)) + return MG_PLL_ENABLE(0); + + return CNL_DPLL_ENABLE(pll->info->id); + + +} /** * intel_prepare_shared_dpll - call a dpll's prepare hook * @crtc_state: CRTC, and its state, which has a shared dpll @@ -892,7 +904,7 @@ static int hsw_ddi_wrpll_get_freq(struct drm_i915_private *dev_priv, refclk = dev_priv->dpll.ref_clks.nssc; break; } - /* fall through */ + fallthrough; case WRPLL_REF_PCH_SSC: /* * We could calculate spread here, but our checking @@ -2977,7 +2989,7 @@ static bool icl_calc_tbt_pll(struct intel_crtc_state *crtc_state, switch (dev_priv->dpll.ref_clks.nssc) { default: MISSING_CASE(dev_priv->dpll.ref_clks.nssc); - /* fall-through */ + fallthrough; case 19200: *pll_params = tgl_tbt_pll_19_2MHz_values; break; @@ -2992,7 +3004,7 @@ static bool icl_calc_tbt_pll(struct intel_crtc_state *crtc_state, switch (dev_priv->dpll.ref_clks.nssc) { default: MISSING_CASE(dev_priv->dpll.ref_clks.nssc); - /* fall-through */ + fallthrough; case 19200: case 38400: *pll_params = icl_tbt_pll_19_2MHz_values; @@ -3120,7 +3132,7 @@ static bool icl_mg_pll_find_divisors(int clock_khz, bool is_dp, bool use_ssc, switch (div1) { default: MISSING_CASE(div1); - /* fall through */ + fallthrough; case 2: hsdiv = MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_2; break; @@ -3475,6 +3487,14 @@ static void icl_update_active_dpll(struct intel_atomic_state *state, icl_set_active_port_dpll(crtc_state, port_dpll_id); } +static u32 intel_get_hti_plls(struct drm_i915_private *i915) +{ + if (!(i915->hti_state & HDPORT_ENABLED)) + return 0; + + return REG_FIELD_GET(HDPORT_DPLL_USED_MASK, i915->hti_state); +} + static bool icl_get_combo_phy_dpll(struct intel_atomic_state *state, struct intel_crtc *crtc, struct intel_encoder *encoder) @@ -3504,13 +3524,22 @@ static bool icl_get_combo_phy_dpll(struct intel_atomic_state *state, icl_calc_dpll_state(dev_priv, &pll_params, &port_dpll->hw_state); - if (IS_ELKHARTLAKE(dev_priv) && port != PORT_A) + if (IS_ROCKETLAKE(dev_priv)) { dpll_mask = BIT(DPLL_ID_EHL_DPLL4) | BIT(DPLL_ID_ICL_DPLL1) | BIT(DPLL_ID_ICL_DPLL0); - else + } else if (IS_ELKHARTLAKE(dev_priv) && port != PORT_A) { + dpll_mask = + BIT(DPLL_ID_EHL_DPLL4) | + BIT(DPLL_ID_ICL_DPLL1) | + BIT(DPLL_ID_ICL_DPLL0); + } else { dpll_mask = BIT(DPLL_ID_ICL_DPLL1) | BIT(DPLL_ID_ICL_DPLL0); + } + + /* Eliminate DPLLs from consideration if reserved by HTI */ + dpll_mask &= ~intel_get_hti_plls(dev_priv); port_dpll->pll = intel_find_shared_dpll(state, crtc, &port_dpll->hw_state, @@ -3791,7 +3820,12 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, if (!(val & PLL_ENABLE)) goto out; - if (INTEL_GEN(dev_priv) >= 12) { + if (IS_ROCKETLAKE(dev_priv)) { + hw_state->cfgcr0 = intel_de_read(dev_priv, + RKL_DPLL_CFGCR0(id)); + hw_state->cfgcr1 = intel_de_read(dev_priv, + RKL_DPLL_CFGCR1(id)); + } else if (INTEL_GEN(dev_priv) >= 12) { hw_state->cfgcr0 = intel_de_read(dev_priv, TGL_DPLL_CFGCR0(id)); hw_state->cfgcr1 = intel_de_read(dev_priv, @@ -3820,12 +3854,7 @@ static bool combo_pll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct intel_dpll_hw_state *hw_state) { - i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id); - - if (IS_ELKHARTLAKE(dev_priv) && - pll->info->id == DPLL_ID_EHL_DPLL4) { - enable_reg = MG_PLL_ENABLE(0); - } + i915_reg_t enable_reg = intel_combo_pll_enable_reg(dev_priv, pll); return icl_pll_get_hw_state(dev_priv, pll, hw_state, enable_reg); } @@ -3844,7 +3873,10 @@ static void icl_dpll_write(struct drm_i915_private *dev_priv, const enum intel_dpll_id id = pll->info->id; i915_reg_t cfgcr0_reg, cfgcr1_reg; - if (INTEL_GEN(dev_priv) >= 12) { + if (IS_ROCKETLAKE(dev_priv)) { + cfgcr0_reg = RKL_DPLL_CFGCR0(id); + cfgcr1_reg = RKL_DPLL_CFGCR1(id); + } else if (INTEL_GEN(dev_priv) >= 12) { cfgcr0_reg = TGL_DPLL_CFGCR0(id); cfgcr1_reg = TGL_DPLL_CFGCR1(id); } else { @@ -4020,11 +4052,10 @@ static void icl_pll_enable(struct drm_i915_private *dev_priv, static void combo_pll_enable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { - i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id); + i915_reg_t enable_reg = intel_combo_pll_enable_reg(dev_priv, pll); if (IS_ELKHARTLAKE(dev_priv) && pll->info->id == DPLL_ID_EHL_DPLL4) { - enable_reg = MG_PLL_ENABLE(0); /* * We need to disable DC states when this DPLL is enabled. @@ -4132,19 +4163,14 @@ static void icl_pll_disable(struct drm_i915_private *dev_priv, static void combo_pll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { - i915_reg_t enable_reg = CNL_DPLL_ENABLE(pll->info->id); + i915_reg_t enable_reg = intel_combo_pll_enable_reg(dev_priv, pll); - if (IS_ELKHARTLAKE(dev_priv) && - pll->info->id == DPLL_ID_EHL_DPLL4) { - enable_reg = MG_PLL_ENABLE(0); - icl_pll_disable(dev_priv, pll, enable_reg); + icl_pll_disable(dev_priv, pll, enable_reg); + if (IS_ELKHARTLAKE(dev_priv) && + pll->info->id == DPLL_ID_EHL_DPLL4) intel_display_power_put(dev_priv, POWER_DOMAIN_DPLL_DC_OFF, pll->wakeref); - return; - } - - icl_pll_disable(dev_priv, pll, enable_reg); } static void tbt_pll_disable(struct drm_i915_private *dev_priv, @@ -4276,6 +4302,21 @@ static const struct intel_dpll_mgr tgl_pll_mgr = { .dump_hw_state = icl_dump_hw_state, }; +static const struct dpll_info rkl_plls[] = { + { "DPLL 0", &combo_pll_funcs, DPLL_ID_ICL_DPLL0, 0 }, + { "DPLL 1", &combo_pll_funcs, DPLL_ID_ICL_DPLL1, 0 }, + { "DPLL 4", &combo_pll_funcs, DPLL_ID_EHL_DPLL4, 0 }, + { }, +}; + +static const struct intel_dpll_mgr rkl_pll_mgr = { + .dpll_info = rkl_plls, + .get_dplls = icl_get_dplls, + .put_dplls = icl_put_dplls, + .update_ref_clks = icl_update_dpll_ref_clks, + .dump_hw_state = icl_dump_hw_state, +}; + /** * intel_shared_dpll_init - Initialize shared DPLLs * @dev: drm device @@ -4289,7 +4330,9 @@ void intel_shared_dpll_init(struct drm_device *dev) const struct dpll_info *dpll_info; int i; - if (INTEL_GEN(dev_priv) >= 12) + if (IS_ROCKETLAKE(dev_priv)) + dpll_mgr = &rkl_pll_mgr; + else if (INTEL_GEN(dev_priv) >= 12) dpll_mgr = &tgl_pll_mgr; else if (IS_ELKHARTLAKE(dev_priv)) dpll_mgr = &ehl_pll_mgr; diff --git a/drivers/gpu/drm/i915/display/intel_dvo.c b/drivers/gpu/drm/i915/display/intel_dvo.c index 307ed8ae9a19..237dbb1ba0ee 100644 --- a/drivers/gpu/drm/i915/display/intel_dvo.c +++ b/drivers/gpu/drm/i915/display/intel_dvo.c @@ -313,9 +313,15 @@ static void intel_dvo_pre_enable(struct intel_atomic_state *state, static enum drm_connector_status intel_dvo_detect(struct drm_connector *connector, bool force) { + struct drm_i915_private *i915 = to_i915(connector->dev); struct intel_dvo *intel_dvo = intel_attached_dvo(to_intel_connector(connector)); + DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); + + if (!INTEL_DISPLAY_ENABLED(i915)) + return connector_status_disconnected; + return intel_dvo->dev.dev_ops->detect(&intel_dvo->dev); } diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 24c3a0f212c6..135f5e8a4d70 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -424,6 +424,14 @@ static void intel_fbc_deactivate(struct drm_i915_private *dev_priv, fbc->no_fbc_reason = reason; } +static u64 intel_fbc_cfb_base_max(struct drm_i915_private *i915) +{ + if (INTEL_GEN(i915) >= 5 || IS_G4X(i915)) + return BIT_ULL(28); + else + return BIT_ULL(32); +} + static int find_compression_threshold(struct drm_i915_private *dev_priv, struct drm_mm_node *node, unsigned int size, @@ -442,6 +450,8 @@ static int find_compression_threshold(struct drm_i915_private *dev_priv, else end = U64_MAX; + end = min(end, intel_fbc_cfb_base_max(dev_priv)); + /* HACK: This code depends on what we will do in *_enable_fbc. If that * code changes, this code needs to change as well. * @@ -1416,6 +1426,13 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv) if (!HAS_FBC(dev_priv)) return 0; + /* + * Fbc is causing random underruns in CI execution on TGL platforms. + * Disabling the same while the problem is being debugged and analyzed. + */ + if (IS_TIGERLAKE(dev_priv)) + return 0; + if (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9) return 1; diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index bd39eb6a21b8..842c04e63214 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -451,8 +451,7 @@ int intel_fbdev_init(struct drm_device *dev) struct intel_fbdev *ifbdev; int ret; - if (drm_WARN_ON(dev, !HAS_DISPLAY(dev_priv) || - !INTEL_DISPLAY_ENABLED(dev_priv))) + if (drm_WARN_ON(dev, !HAS_DISPLAY(dev_priv))) return -ENODEV; ifbdev = kzalloc(sizeof(struct intel_fbdev), GFP_KERNEL); diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c index 2979ed2588eb..d898b370d7a4 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c @@ -232,6 +232,8 @@ static void frontbuffer_release(struct kref *ref) RCU_INIT_POINTER(obj->frontbuffer, NULL); spin_unlock(&to_i915(obj->base.dev)->fb_tracking.lock); + i915_active_fini(&front->write); + i915_gem_object_put(obj); kfree_rcu(front, rcu); } diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index a8d119b6b45c..e6b8d6dfb598 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c @@ -834,7 +834,7 @@ int intel_gmbus_setup(struct drm_i915_private *dev_priv) unsigned int pin; int ret; - if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv)) + if (!HAS_DISPLAY(dev_priv)) return 0; if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index 89a4d294822d..5492076d1ae0 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -148,9 +148,8 @@ static int intel_hdcp_poll_ksv_fifo(struct intel_digital_port *dig_port, static bool hdcp_key_loadable(struct drm_i915_private *dev_priv) { - struct i915_power_domains *power_domains = &dev_priv->power_domains; - struct i915_power_well *power_well; enum i915_power_well_id id; + intel_wakeref_t wakeref; bool enabled = false; /* @@ -162,17 +161,9 @@ static bool hdcp_key_loadable(struct drm_i915_private *dev_priv) else id = SKL_DISP_PW_1; - mutex_lock(&power_domains->lock); - /* PG1 (power well #1) needs to be enabled */ - for_each_power_well(dev_priv, power_well) { - if (power_well->desc->id == id) { - enabled = power_well->desc->ops->is_enabled(dev_priv, - power_well); - break; - } - } - mutex_unlock(&power_domains->lock); + with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) + enabled = intel_display_power_well_is_enabled(dev_priv, id); /* * Another req for hdcp key loadability is enabled state of pll for @@ -336,8 +327,10 @@ int intel_hdcp_validate_v_prime(struct intel_connector *connector, /* Fill up the empty slots in sha_text and write it out */ sha_empty = sizeof(sha_text) - sha_leftovers; - for (j = 0; j < sha_empty; j++) - sha_text |= ksv[j] << ((sizeof(sha_text) - j - 1) * 8); + for (j = 0; j < sha_empty; j++) { + u8 off = ((sizeof(sha_text) - j - 1 - sha_leftovers) * 8); + sha_text |= ksv[j] << off; + } ret = intel_write_sha_text(dev_priv, sha_text); if (ret < 0) @@ -435,7 +428,7 @@ int intel_hdcp_validate_v_prime(struct intel_connector *connector, /* Write 32 bits of text */ intel_de_write(dev_priv, HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_32); - sha_text |= bstatus[0] << 24 | bstatus[1] << 16; + sha_text |= bstatus[0] << 8 | bstatus[1]; ret = intel_write_sha_text(dev_priv, sha_text); if (ret < 0) return ret; @@ -450,17 +443,29 @@ int intel_hdcp_validate_v_prime(struct intel_connector *connector, return ret; sha_idx += sizeof(sha_text); } + + /* + * Terminate the SHA-1 stream by hand. For the other leftover + * cases this is appended by the hardware. + */ + intel_de_write(dev_priv, HDCP_REP_CTL, + rep_ctl | HDCP_SHA1_TEXT_32); + sha_text = DRM_HDCP_SHA1_TERMINATOR << 24; + ret = intel_write_sha_text(dev_priv, sha_text); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); } else if (sha_leftovers == 3) { - /* Write 32 bits of text */ + /* Write 32 bits of text (filled from LSB) */ intel_de_write(dev_priv, HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_32); - sha_text |= bstatus[0] << 24; + sha_text |= bstatus[0]; ret = intel_write_sha_text(dev_priv, sha_text); if (ret < 0) return ret; sha_idx += sizeof(sha_text); - /* Write 8 bits of text, 24 bits of M0 */ + /* Write 8 bits of text (filled from LSB), 24 bits of M0 */ intel_de_write(dev_priv, HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_8); ret = intel_write_sha_text(dev_priv, bstatus[1]); @@ -699,7 +704,7 @@ static int intel_hdcp_auth(struct intel_connector *connector) intel_de_write(dev_priv, HDCP_REP_CTL, intel_hdcp_get_repeater_ctl(dev_priv, cpu_transcoder, port)); - ret = shim->toggle_signalling(dig_port, true); + ret = shim->toggle_signalling(dig_port, cpu_transcoder, true); if (ret) return ret; @@ -781,11 +786,25 @@ static int _intel_hdcp_disable(struct intel_connector *connector) struct intel_hdcp *hdcp = &connector->hdcp; enum port port = dig_port->base.port; enum transcoder cpu_transcoder = hdcp->cpu_transcoder; + u32 repeater_ctl; int ret; drm_dbg_kms(&dev_priv->drm, "[%s:%d] HDCP is being disabled...\n", connector->base.name, connector->base.base.id); + /* + * If there are other connectors on this port using HDCP, don't disable + * it. Instead, toggle the HDCP signalling off on that particular + * connector/pipe and exit. + */ + if (dig_port->num_hdcp_streams > 0) { + ret = hdcp->shim->toggle_signalling(dig_port, + cpu_transcoder, false); + if (ret) + DRM_ERROR("Failed to disable HDCP signalling\n"); + return ret; + } + hdcp->hdcp_encrypted = false; intel_de_write(dev_priv, HDCP_CONF(dev_priv, cpu_transcoder, port), 0); if (intel_de_wait_for_clear(dev_priv, @@ -796,7 +815,12 @@ static int _intel_hdcp_disable(struct intel_connector *connector) return -ETIMEDOUT; } - ret = hdcp->shim->toggle_signalling(dig_port, false); + repeater_ctl = intel_hdcp_get_repeater_ctl(dev_priv, cpu_transcoder, + port); + intel_de_write(dev_priv, HDCP_REP_CTL, + intel_de_read(dev_priv, HDCP_REP_CTL) & ~repeater_ctl); + + ret = hdcp->shim->toggle_signalling(dig_port, cpu_transcoder, false); if (ret) { drm_err(&dev_priv->drm, "Failed to disable HDCP signalling\n"); return ret; @@ -856,6 +880,34 @@ static struct intel_connector *intel_hdcp_to_connector(struct intel_hdcp *hdcp) return container_of(hdcp, struct intel_connector, hdcp); } +static void intel_hdcp_update_value(struct intel_connector *connector, + u64 value, bool update_property) +{ + struct drm_device *dev = connector->base.dev; + struct intel_digital_port *dig_port = intel_attached_dig_port(connector); + struct intel_hdcp *hdcp = &connector->hdcp; + + drm_WARN_ON(connector->base.dev, !mutex_is_locked(&hdcp->mutex)); + + if (hdcp->value == value) + return; + + drm_WARN_ON(dev, !mutex_is_locked(&dig_port->hdcp_mutex)); + + if (hdcp->value == DRM_MODE_CONTENT_PROTECTION_ENABLED) { + if (!drm_WARN_ON(dev, dig_port->num_hdcp_streams == 0)) + dig_port->num_hdcp_streams--; + } else if (value == DRM_MODE_CONTENT_PROTECTION_ENABLED) { + dig_port->num_hdcp_streams++; + } + + hdcp->value = value; + if (update_property) { + drm_connector_get(&connector->base); + schedule_work(&hdcp->prop_work); + } +} + /* Implements Part 3 of the HDCP authorization procedure */ static int intel_hdcp_check_link(struct intel_connector *connector) { @@ -867,6 +919,8 @@ static int intel_hdcp_check_link(struct intel_connector *connector) int ret = 0; mutex_lock(&hdcp->mutex); + mutex_lock(&dig_port->hdcp_mutex); + cpu_transcoder = hdcp->cpu_transcoder; /* Check_link valid only when HDCP1.4 is enabled */ @@ -883,15 +937,16 @@ static int intel_hdcp_check_link(struct intel_connector *connector) connector->base.name, connector->base.base.id, intel_de_read(dev_priv, HDCP_STATUS(dev_priv, cpu_transcoder, port))); ret = -ENXIO; - hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED; - schedule_work(&hdcp->prop_work); + intel_hdcp_update_value(connector, + DRM_MODE_CONTENT_PROTECTION_DESIRED, + true); goto out; } - if (hdcp->shim->check_link(dig_port)) { + if (hdcp->shim->check_link(dig_port, connector)) { if (hdcp->value != DRM_MODE_CONTENT_PROTECTION_UNDESIRED) { - hdcp->value = DRM_MODE_CONTENT_PROTECTION_ENABLED; - schedule_work(&hdcp->prop_work); + intel_hdcp_update_value(connector, + DRM_MODE_CONTENT_PROTECTION_ENABLED, true); } goto out; } @@ -903,20 +958,23 @@ static int intel_hdcp_check_link(struct intel_connector *connector) ret = _intel_hdcp_disable(connector); if (ret) { drm_err(&dev_priv->drm, "Failed to disable hdcp (%d)\n", ret); - hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED; - schedule_work(&hdcp->prop_work); + intel_hdcp_update_value(connector, + DRM_MODE_CONTENT_PROTECTION_DESIRED, + true); goto out; } ret = _intel_hdcp_enable(connector); if (ret) { drm_err(&dev_priv->drm, "Failed to enable hdcp (%d)\n", ret); - hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED; - schedule_work(&hdcp->prop_work); + intel_hdcp_update_value(connector, + DRM_MODE_CONTENT_PROTECTION_DESIRED, + true); goto out; } out: + mutex_unlock(&dig_port->hdcp_mutex); mutex_unlock(&hdcp->mutex); return ret; } @@ -942,6 +1000,8 @@ static void intel_hdcp_prop_work(struct work_struct *work) mutex_unlock(&hdcp->mutex); drm_modeset_unlock(&dev_priv->drm.mode_config.connection_mutex); + + drm_connector_put(&connector->base); } bool is_hdcp_supported(struct drm_i915_private *dev_priv, enum port port) @@ -1580,7 +1640,8 @@ static int hdcp2_enable_encryption(struct intel_connector *connector) intel_de_read(dev_priv, HDCP2_STATUS(dev_priv, cpu_transcoder, port)) & LINK_ENCRYPTION_STATUS); if (hdcp->shim->toggle_signalling) { - ret = hdcp->shim->toggle_signalling(dig_port, true); + ret = hdcp->shim->toggle_signalling(dig_port, cpu_transcoder, + true); if (ret) { drm_err(&dev_priv->drm, "Failed to enable HDCP signalling. %d\n", @@ -1630,7 +1691,8 @@ static int hdcp2_disable_encryption(struct intel_connector *connector) drm_dbg_kms(&dev_priv->drm, "Disable Encryption Timedout"); if (hdcp->shim->toggle_signalling) { - ret = hdcp->shim->toggle_signalling(dig_port, false); + ret = hdcp->shim->toggle_signalling(dig_port, cpu_transcoder, + false); if (ret) { drm_err(&dev_priv->drm, "Failed to disable HDCP signalling. %d\n", @@ -1746,16 +1808,18 @@ static int intel_hdcp2_check_link(struct intel_connector *connector) "HDCP2.2 link stopped the encryption, %x\n", intel_de_read(dev_priv, HDCP2_STATUS(dev_priv, cpu_transcoder, port))); ret = -ENXIO; - hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED; - schedule_work(&hdcp->prop_work); + intel_hdcp_update_value(connector, + DRM_MODE_CONTENT_PROTECTION_DESIRED, + true); goto out; } ret = hdcp->shim->check_2_2_link(dig_port); if (ret == HDCP_LINK_PROTECTED) { if (hdcp->value != DRM_MODE_CONTENT_PROTECTION_UNDESIRED) { - hdcp->value = DRM_MODE_CONTENT_PROTECTION_ENABLED; - schedule_work(&hdcp->prop_work); + intel_hdcp_update_value(connector, + DRM_MODE_CONTENT_PROTECTION_ENABLED, + true); } goto out; } @@ -1768,8 +1832,9 @@ static int intel_hdcp2_check_link(struct intel_connector *connector) "HDCP2.2 Downstream topology change\n"); ret = hdcp2_authenticate_repeater_topology(connector); if (!ret) { - hdcp->value = DRM_MODE_CONTENT_PROTECTION_ENABLED; - schedule_work(&hdcp->prop_work); + intel_hdcp_update_value(connector, + DRM_MODE_CONTENT_PROTECTION_ENABLED, + true); goto out; } drm_dbg_kms(&dev_priv->drm, @@ -1787,8 +1852,8 @@ static int intel_hdcp2_check_link(struct intel_connector *connector) drm_err(&dev_priv->drm, "[%s:%d] Failed to disable hdcp2.2 (%d)\n", connector->base.name, connector->base.base.id, ret); - hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED; - schedule_work(&hdcp->prop_work); + intel_hdcp_update_value(connector, + DRM_MODE_CONTENT_PROTECTION_DESIRED, true); goto out; } @@ -1798,8 +1863,9 @@ static int intel_hdcp2_check_link(struct intel_connector *connector) "[%s:%d] Failed to enable hdcp2.2 (%d)\n", connector->base.name, connector->base.base.id, ret); - hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED; - schedule_work(&hdcp->prop_work); + intel_hdcp_update_value(connector, + DRM_MODE_CONTENT_PROTECTION_DESIRED, + true); goto out; } @@ -1815,6 +1881,9 @@ static void intel_hdcp_check_work(struct work_struct *work) check_work); struct intel_connector *connector = intel_hdcp_to_connector(hdcp); + if (drm_connector_is_unregistered(&connector->base)) + return; + if (!intel_hdcp2_check_link(connector)) schedule_delayed_work(&hdcp->check_work, DRM_HDCP2_CHECK_PERIOD_MS); @@ -1876,6 +1945,7 @@ static enum mei_fw_tc intel_get_mei_fw_tc(enum transcoder cpu_transcoder) } static int initialize_hdcp_port_data(struct intel_connector *connector, + enum port port, const struct intel_hdcp_shim *shim) { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); @@ -1883,8 +1953,7 @@ static int initialize_hdcp_port_data(struct intel_connector *connector, struct hdcp_port_data *data = &hdcp->port_data; if (INTEL_GEN(dev_priv) < 12) - data->fw_ddi = - intel_get_mei_fw_ddi_index(intel_attached_encoder(connector)->port); + data->fw_ddi = intel_get_mei_fw_ddi_index(port); else /* * As per ME FW API expectation, for GEN 12+, fw_ddi is filled @@ -1954,14 +2023,14 @@ void intel_hdcp_component_init(struct drm_i915_private *dev_priv) } } -static void intel_hdcp2_init(struct intel_connector *connector, +static void intel_hdcp2_init(struct intel_connector *connector, enum port port, const struct intel_hdcp_shim *shim) { struct drm_i915_private *i915 = to_i915(connector->base.dev); struct intel_hdcp *hdcp = &connector->hdcp; int ret; - ret = initialize_hdcp_port_data(connector, shim); + ret = initialize_hdcp_port_data(connector, port, shim); if (ret) { drm_dbg_kms(&i915->drm, "Mei hdcp data init failed\n"); return; @@ -1971,6 +2040,7 @@ static void intel_hdcp2_init(struct intel_connector *connector, } int intel_hdcp_init(struct intel_connector *connector, + enum port port, const struct intel_hdcp_shim *shim) { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); @@ -1980,8 +2050,8 @@ int intel_hdcp_init(struct intel_connector *connector, if (!shim) return -EINVAL; - if (is_hdcp2_supported(dev_priv)) - intel_hdcp2_init(connector, shim); + if (is_hdcp2_supported(dev_priv) && !connector->mst_port) + intel_hdcp2_init(connector, port, shim); ret = drm_connector_attach_content_protection_property(&connector->base, @@ -2005,6 +2075,7 @@ int intel_hdcp_enable(struct intel_connector *connector, enum transcoder cpu_transcoder, u8 content_type) { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_digital_port *dig_port = intel_attached_dig_port(connector); struct intel_hdcp *hdcp = &connector->hdcp; unsigned long check_link_interval = DRM_HDCP_CHECK_PERIOD_MS; int ret = -EINVAL; @@ -2013,14 +2084,14 @@ int intel_hdcp_enable(struct intel_connector *connector, return -ENOENT; mutex_lock(&hdcp->mutex); + mutex_lock(&dig_port->hdcp_mutex); drm_WARN_ON(&dev_priv->drm, hdcp->value == DRM_MODE_CONTENT_PROTECTION_ENABLED); hdcp->content_type = content_type; + hdcp->cpu_transcoder = cpu_transcoder; - if (INTEL_GEN(dev_priv) >= 12) { - hdcp->cpu_transcoder = cpu_transcoder; + if (INTEL_GEN(dev_priv) >= 12) hdcp->port_data.fw_tc = intel_get_mei_fw_tc(cpu_transcoder); - } /* * Considering that HDCP2.2 is more secure than HDCP1.4, If the setup @@ -2043,16 +2114,19 @@ int intel_hdcp_enable(struct intel_connector *connector, if (!ret) { schedule_delayed_work(&hdcp->check_work, check_link_interval); - hdcp->value = DRM_MODE_CONTENT_PROTECTION_ENABLED; - schedule_work(&hdcp->prop_work); + intel_hdcp_update_value(connector, + DRM_MODE_CONTENT_PROTECTION_ENABLED, + true); } + mutex_unlock(&dig_port->hdcp_mutex); mutex_unlock(&hdcp->mutex); return ret; } int intel_hdcp_disable(struct intel_connector *connector) { + struct intel_digital_port *dig_port = intel_attached_dig_port(connector); struct intel_hdcp *hdcp = &connector->hdcp; int ret = 0; @@ -2060,15 +2134,20 @@ int intel_hdcp_disable(struct intel_connector *connector) return -ENOENT; mutex_lock(&hdcp->mutex); + mutex_lock(&dig_port->hdcp_mutex); - if (hdcp->value != DRM_MODE_CONTENT_PROTECTION_UNDESIRED) { - hdcp->value = DRM_MODE_CONTENT_PROTECTION_UNDESIRED; - if (hdcp->hdcp2_encrypted) - ret = _intel_hdcp2_disable(connector); - else if (hdcp->hdcp_encrypted) - ret = _intel_hdcp_disable(connector); - } + if (hdcp->value == DRM_MODE_CONTENT_PROTECTION_UNDESIRED) + goto out; + intel_hdcp_update_value(connector, + DRM_MODE_CONTENT_PROTECTION_UNDESIRED, false); + if (hdcp->hdcp2_encrypted) + ret = _intel_hdcp2_disable(connector); + else if (hdcp->hdcp_encrypted) + ret = _intel_hdcp_disable(connector); + +out: + mutex_unlock(&dig_port->hdcp_mutex); mutex_unlock(&hdcp->mutex); cancel_delayed_work_sync(&hdcp->check_work); return ret; @@ -2082,11 +2161,15 @@ void intel_hdcp_update_pipe(struct intel_atomic_state *state, struct intel_connector *connector = to_intel_connector(conn_state->connector); struct intel_hdcp *hdcp = &connector->hdcp; - bool content_protection_type_changed = + bool content_protection_type_changed, desired_and_not_enabled = false; + + if (!connector->hdcp.shim) + return; + + content_protection_type_changed = (conn_state->hdcp_content_type != hdcp->content_type && conn_state->content_protection != DRM_MODE_CONTENT_PROTECTION_UNDESIRED); - bool desired_and_not_enabled = false; /* * During the HDCP encryption session if Type change is requested, @@ -2139,12 +2222,39 @@ void intel_hdcp_component_fini(struct drm_i915_private *dev_priv) void intel_hdcp_cleanup(struct intel_connector *connector) { - if (!connector->hdcp.shim) + struct intel_hdcp *hdcp = &connector->hdcp; + + if (!hdcp->shim) return; - mutex_lock(&connector->hdcp.mutex); - kfree(connector->hdcp.port_data.streams); - mutex_unlock(&connector->hdcp.mutex); + /* + * If the connector is registered, it's possible userspace could kick + * off another HDCP enable, which would re-spawn the workers. + */ + drm_WARN_ON(connector->base.dev, + connector->base.registration_state == DRM_CONNECTOR_REGISTERED); + + /* + * Now that the connector is not registered, check_work won't be run, + * but cancel any outstanding instances of it + */ + cancel_delayed_work_sync(&hdcp->check_work); + + /* + * We don't cancel prop_work in the same way as check_work since it + * requires connection_mutex which could be held while calling this + * function. Instead, we rely on the connector references grabbed before + * scheduling prop_work to ensure the connector is alive when prop_work + * is run. So if we're in the destroy path (which is where this + * function should be called), we're "guaranteed" that prop_work is not + * active (tl;dr This Should Never Happen). + */ + drm_WARN_ON(connector->base.dev, work_pending(&hdcp->prop_work)); + + mutex_lock(&hdcp->mutex); + kfree(hdcp->port_data.streams); + hdcp->shim = NULL; + mutex_unlock(&hdcp->mutex); } void intel_hdcp_atomic_check(struct drm_connector *connector, diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.h b/drivers/gpu/drm/i915/display/intel_hdcp.h index 86bbaec120cc..1bbf5b67ed0a 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.h +++ b/drivers/gpu/drm/i915/display/intel_hdcp.h @@ -22,7 +22,7 @@ enum transcoder; void intel_hdcp_atomic_check(struct drm_connector *connector, struct drm_connector_state *old_state, struct drm_connector_state *new_state); -int intel_hdcp_init(struct intel_connector *connector, +int intel_hdcp_init(struct intel_connector *connector, enum port port, const struct intel_hdcp_shim *hdcp_shim); int intel_hdcp_enable(struct intel_connector *connector, enum transcoder cpu_transcoder, u8 content_type); diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index de2ce5632b94..3f2008d845c2 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -1477,7 +1477,8 @@ int intel_hdmi_hdcp_read_v_prime_part(struct intel_digital_port *dig_port, return ret; } -static int kbl_repositioning_enc_en_signal(struct intel_connector *connector) +static int kbl_repositioning_enc_en_signal(struct intel_connector *connector, + enum transcoder cpu_transcoder) { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_digital_port *dig_port = intel_attached_dig_port(connector); @@ -1494,13 +1495,15 @@ static int kbl_repositioning_enc_en_signal(struct intel_connector *connector) usleep_range(25, 50); } - ret = intel_ddi_toggle_hdcp_signalling(&dig_port->base, false); + ret = intel_ddi_toggle_hdcp_signalling(&dig_port->base, cpu_transcoder, + false); if (ret) { drm_err(&dev_priv->drm, "Disable HDCP signalling failed (%d)\n", ret); return ret; } - ret = intel_ddi_toggle_hdcp_signalling(&dig_port->base, true); + ret = intel_ddi_toggle_hdcp_signalling(&dig_port->base, cpu_transcoder, + true); if (ret) { drm_err(&dev_priv->drm, "Enable HDCP signalling failed (%d)\n", ret); @@ -1512,6 +1515,7 @@ static int kbl_repositioning_enc_en_signal(struct intel_connector *connector) static int intel_hdmi_hdcp_toggle_signalling(struct intel_digital_port *dig_port, + enum transcoder cpu_transcoder, bool enable) { struct intel_hdmi *hdmi = &dig_port->hdmi; @@ -1522,7 +1526,8 @@ int intel_hdmi_hdcp_toggle_signalling(struct intel_digital_port *dig_port, if (!enable) usleep_range(6, 60); /* Bspec says >= 6us */ - ret = intel_ddi_toggle_hdcp_signalling(&dig_port->base, enable); + ret = intel_ddi_toggle_hdcp_signalling(&dig_port->base, cpu_transcoder, + enable); if (ret) { drm_err(&dev_priv->drm, "%s HDCP signalling failed (%d)\n", enable ? "Enable" : "Disable", ret); @@ -1534,17 +1539,17 @@ int intel_hdmi_hdcp_toggle_signalling(struct intel_digital_port *dig_port, * opportunity and enc_en signalling in KABYLAKE. */ if (IS_KABYLAKE(dev_priv) && enable) - return kbl_repositioning_enc_en_signal(connector); + return kbl_repositioning_enc_en_signal(connector, + cpu_transcoder); return 0; } static -bool intel_hdmi_hdcp_check_link_once(struct intel_digital_port *dig_port) +bool intel_hdmi_hdcp_check_link_once(struct intel_digital_port *dig_port, + struct intel_connector *connector) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - struct intel_connector *connector = - dig_port->hdmi.attached_connector; enum port port = dig_port->base.port; enum transcoder cpu_transcoder = connector->hdcp.cpu_transcoder; int ret; @@ -1572,13 +1577,14 @@ bool intel_hdmi_hdcp_check_link_once(struct intel_digital_port *dig_port) } static -bool intel_hdmi_hdcp_check_link(struct intel_digital_port *dig_port) +bool intel_hdmi_hdcp_check_link(struct intel_digital_port *dig_port, + struct intel_connector *connector) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); int retry; for (retry = 0; retry < 3; retry++) - if (intel_hdmi_hdcp_check_link_once(dig_port)) + if (intel_hdmi_hdcp_check_link_once(dig_port, connector)) return true; drm_err(&i915->drm, "Link check failed\n"); @@ -2271,35 +2277,18 @@ intel_hdmi_mode_valid(struct drm_connector *connector, return intel_mode_valid_max_plane_size(dev_priv, mode); } -static bool hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state, - int bpc) +bool intel_hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state, + int bpc, bool has_hdmi_sink, bool ycbcr420_output) { - struct drm_i915_private *dev_priv = - to_i915(crtc_state->uapi.crtc->dev); struct drm_atomic_state *state = crtc_state->uapi.state; struct drm_connector_state *connector_state; struct drm_connector *connector; - const struct drm_display_mode *adjusted_mode = - &crtc_state->hw.adjusted_mode; int i; - if (HAS_GMCH(dev_priv)) - return false; - - if (bpc == 10 && INTEL_GEN(dev_priv) < 11) - return false; - if (crtc_state->pipe_bpp < bpc * 3) return false; - if (!crtc_state->has_hdmi_sink) - return false; - - /* - * HDMI deep color affects the clocks, so it's only possible - * when not cloning with other encoder types. - */ - if (crtc_state->output_types != 1 << INTEL_OUTPUT_HDMI) + if (!has_hdmi_sink) return false; for_each_new_connector_in_state(state, connector, connector_state, i) { @@ -2308,7 +2297,7 @@ static bool hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state, if (connector_state->crtc != crtc_state->uapi.crtc) continue; - if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420) { + if (ycbcr420_output) { const struct drm_hdmi_info *hdmi = &info->hdmi; if (bpc == 12 && !(hdmi->y420_dc_modes & @@ -2327,6 +2316,30 @@ static bool hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state, } } + return true; +} + +static bool hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state, + int bpc) +{ + struct drm_i915_private *dev_priv = + to_i915(crtc_state->uapi.crtc->dev); + const struct drm_display_mode *adjusted_mode = + &crtc_state->hw.adjusted_mode; + + if (HAS_GMCH(dev_priv)) + return false; + + if (bpc == 10 && INTEL_GEN(dev_priv) < 11) + return false; + + /* + * HDMI deep color affects the clocks, so it's only possible + * when not cloning with other encoder types. + */ + if (crtc_state->output_types != BIT(INTEL_OUTPUT_HDMI)) + return false; + /* Display Wa_1405510057:icl,ehl */ if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420 && bpc == 10 && IS_GEN(dev_priv, 11) && @@ -2334,7 +2347,10 @@ static bool hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state, adjusted_mode->crtc_hblank_start) % 8 == 2) return false; - return true; + return intel_hdmi_deep_color_possible(crtc_state, bpc, + crtc_state->has_hdmi_sink, + crtc_state->output_format == + INTEL_OUTPUT_FORMAT_YCBCR420); } static int @@ -2459,6 +2475,23 @@ bool intel_hdmi_limited_color_range(const struct intel_crtc_state *crtc_state, } } +static bool intel_hdmi_has_audio(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); + const struct intel_digital_connector_state *intel_conn_state = + to_intel_digital_connector_state(conn_state); + + if (!crtc_state->has_hdmi_sink) + return false; + + if (intel_conn_state->force_audio == HDMI_AUDIO_AUTO) + return intel_hdmi->has_audio; + else + return intel_conn_state->force_audio == HDMI_AUDIO_ON; +} + int intel_hdmi_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) @@ -2468,8 +2501,6 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder, struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; struct drm_connector *connector = conn_state->connector; struct drm_scdc *scdc = &connector->display_info.hdmi.scdc; - struct intel_digital_connector_state *intel_conn_state = - to_intel_digital_connector_state(conn_state); int ret; if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) @@ -2495,13 +2526,8 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder, if (HAS_PCH_SPLIT(dev_priv) && !HAS_DDI(dev_priv)) pipe_config->has_pch_encoder = true; - if (pipe_config->has_hdmi_sink) { - if (intel_conn_state->force_audio == HDMI_AUDIO_AUTO) - pipe_config->has_audio = intel_hdmi->has_audio; - else - pipe_config->has_audio = - intel_conn_state->force_audio == HDMI_AUDIO_ON; - } + pipe_config->has_audio = + intel_hdmi_has_audio(encoder, pipe_config, conn_state); ret = intel_hdmi_compute_clock(encoder, pipe_config); if (ret) @@ -2667,6 +2693,9 @@ intel_hdmi_detect(struct drm_connector *connector, bool force) drm_dbg_kms(&dev_priv->drm, "[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); + if (!INTEL_DISPLAY_ENABLED(dev_priv)) + return connector_status_disconnected; + wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); if (INTEL_GEN(dev_priv) >= 11 && @@ -3250,7 +3279,6 @@ void intel_hdmi_init_connector(struct intel_digital_port *dig_port, if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) connector->ycbcr_420_allowed = true; - intel_encoder->hpd_pin = intel_hpd_pin_default(dev_priv, port); intel_connector->polled = DRM_CONNECTOR_POLL_HPD; if (HAS_DDI(dev_priv)) @@ -3264,7 +3292,7 @@ void intel_hdmi_init_connector(struct intel_digital_port *dig_port, intel_hdmi->attached_connector = intel_connector; if (is_hdcp_supported(dev_priv, port)) { - int ret = intel_hdcp_init(intel_connector, + int ret = intel_hdcp_init(intel_connector, port, &intel_hdmi_hdcp_shim); if (ret) drm_dbg_kms(&dev_priv->drm, @@ -3335,6 +3363,8 @@ void intel_hdmi_init(struct drm_i915_private *dev_priv, intel_encoder = &dig_port->base; + mutex_init(&dig_port->hdcp_mutex); + drm_encoder_init(&dev_priv->drm, &intel_encoder->base, &intel_hdmi_enc_funcs, DRM_MODE_ENCODER_TMDS, "HDMI %c", port_name(port)); @@ -3382,6 +3412,7 @@ void intel_hdmi_init(struct drm_i915_private *dev_priv, intel_encoder->pipe_mask = ~0; } intel_encoder->cloneable = 1 << INTEL_OUTPUT_ANALOG; + intel_encoder->hpd_pin = intel_hpd_pin_default(dev_priv, port); /* * BSpec is unclear about HDMI+HDMI cloning on g4x, but it seems * to work on real hardware. And since g4x can send infoframes to diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.h b/drivers/gpu/drm/i915/display/intel_hdmi.h index 5b348dcab77a..15eb0ccde76e 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.h +++ b/drivers/gpu/drm/i915/display/intel_hdmi.h @@ -48,5 +48,7 @@ void intel_read_infoframe(struct intel_encoder *encoder, union hdmi_infoframe *frame); bool intel_hdmi_limited_color_range(const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); +bool intel_hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state, int bpc, + bool has_hdmi_sink, bool ycbcr420_output); #endif /* __INTEL_HDMI_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c index 3f1d7b804a66..5c58c1ed6493 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug.c +++ b/drivers/gpu/drm/i915/display/intel_hotplug.c @@ -81,33 +81,12 @@ * * It is only valid and used by digital port encoder. * - * Return pin that is associatade with @port and HDP_NONE if no pin is - * hard associated with that @port. + * Return pin that is associatade with @port. */ enum hpd_pin intel_hpd_pin_default(struct drm_i915_private *dev_priv, enum port port) { - enum phy phy = intel_port_to_phy(dev_priv, port); - - /* - * RKL + TGP PCH is a special case; we effectively choose the hpd_pin - * based on the DDI rather than the PHY (i.e., the last two outputs - * shold be HPD_PORT_{D,E} rather than {C,D}. Note that this differs - * from the behavior of both TGL+TGP and RKL+CMP. - */ - if (IS_ROCKETLAKE(dev_priv) && HAS_PCH_TGP(dev_priv)) - return HPD_PORT_A + port - PORT_A; - - switch (phy) { - case PHY_F: - return IS_CNL_WITH_PORT_F(dev_priv) ? HPD_PORT_E : HPD_PORT_F; - case PHY_A ... PHY_E: - case PHY_G ... PHY_I: - return HPD_PORT_A + phy - PHY_A; - default: - MISSING_CASE(phy); - return HPD_NONE; - } + return HPD_PORT_A + port - PORT_A; } #define HPD_STORM_DETECT_PERIOD 1000 @@ -503,7 +482,6 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, * only the one of them (DP) will have ->hpd_pulse(). */ for_each_intel_encoder(&dev_priv->drm, encoder) { - bool has_hpd_pulse = intel_encoder_has_hpd_pulse(encoder); enum port port = encoder->port; bool long_hpd; @@ -511,7 +489,7 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, if (!(BIT(pin) & pin_mask)) continue; - if (!has_hpd_pulse) + if (!intel_encoder_has_hpd_pulse(encoder)) continue; long_hpd = long_mask & BIT(pin); diff --git a/drivers/gpu/drm/i915/display/intel_lspcon.c b/drivers/gpu/drm/i915/display/intel_lspcon.c index b781bf469644..dc1b35559afd 100644 --- a/drivers/gpu/drm/i915/display/intel_lspcon.c +++ b/drivers/gpu/drm/i915/display/intel_lspcon.c @@ -571,7 +571,7 @@ bool lspcon_init(struct intel_digital_port *dig_port) return false; } - if (!intel_dp_read_dpcd(dp)) { + if (drm_dp_read_dpcd_caps(&dp->aux, dp->dpcd) != 0) { DRM_ERROR("LSPCON DPCD read failed\n"); return false; } diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index 1888611244db..e65c2de522c3 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -456,12 +456,6 @@ static int intel_lvds_compute_config(struct intel_encoder *intel_encoder, return 0; } -static enum drm_connector_status -intel_lvds_detect(struct drm_connector *connector, bool force) -{ - return connector_status_connected; -} - /* * Return the list of DDC modes if available, or the BIOS fixed mode otherwise. */ @@ -490,7 +484,7 @@ static const struct drm_connector_helper_funcs intel_lvds_connector_helper_funcs }; static const struct drm_connector_funcs intel_lvds_connector_funcs = { - .detect = intel_lvds_detect, + .detect = intel_panel_detect, .fill_modes = drm_helper_probe_single_connector_modes, .atomic_get_property = intel_digital_connector_atomic_get_property, .atomic_set_property = intel_digital_connector_atomic_set_property, diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c index bbde3b12c311..9f23bac0d792 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.c +++ b/drivers/gpu/drm/i915/display/intel_panel.c @@ -40,8 +40,6 @@ #include "intel_dsi_dcs_backlight.h" #include "intel_panel.h" -#define CRC_PMIC_PWM_PERIOD_NS 21333 - void intel_fixed_panel_mode(const struct drm_display_mode *fixed_mode, struct drm_display_mode *adjusted_mode) @@ -229,7 +227,7 @@ int intel_pch_panel_fitting(struct intel_crtc_state *crtc_state, case DRM_MODE_SCALE_NONE: WARN_ON(adjusted_mode->crtc_hdisplay != crtc_state->pipe_src_w); WARN_ON(adjusted_mode->crtc_vdisplay != crtc_state->pipe_src_h); - /* fall through */ + fallthrough; case DRM_MODE_SCALE_FULLSCREEN: x = y = 0; width = adjusted_mode->crtc_hdisplay; @@ -594,10 +592,10 @@ static u32 bxt_get_backlight(struct intel_connector *connector) static u32 pwm_get_backlight(struct intel_connector *connector) { struct intel_panel *panel = &connector->panel; - int duty_ns; + struct pwm_state state; - duty_ns = pwm_get_duty_cycle(panel->backlight.pwm); - return DIV_ROUND_UP(duty_ns * 100, CRC_PMIC_PWM_PERIOD_NS); + pwm_get_state(panel->backlight.pwm, &state); + return pwm_get_relative_duty_cycle(&state, 100); } static void lpt_set_backlight(const struct drm_connector_state *conn_state, u32 level) @@ -671,9 +669,9 @@ static void bxt_set_backlight(const struct drm_connector_state *conn_state, u32 static void pwm_set_backlight(const struct drm_connector_state *conn_state, u32 level) { struct intel_panel *panel = &to_intel_connector(conn_state->connector)->panel; - int duty_ns = DIV_ROUND_UP(level * CRC_PMIC_PWM_PERIOD_NS, 100); - pwm_config(panel->backlight.pwm, duty_ns, CRC_PMIC_PWM_PERIOD_NS); + pwm_set_relative_duty_cycle(&panel->backlight.pwm_state, level, 100); + pwm_apply_state(panel->backlight.pwm, &panel->backlight.pwm_state); } static void @@ -842,10 +840,8 @@ static void pwm_disable_backlight(const struct drm_connector_state *old_conn_sta struct intel_connector *connector = to_intel_connector(old_conn_state->connector); struct intel_panel *panel = &connector->panel; - /* Disable the backlight */ - intel_panel_actually_set_backlight(old_conn_state, 0); - usleep_range(2000, 3000); - pwm_disable(panel->backlight.pwm); + panel->backlight.pwm_state.enabled = false; + pwm_apply_state(panel->backlight.pwm, &panel->backlight.pwm_state); } void intel_panel_disable_backlight(const struct drm_connector_state *old_conn_state) @@ -1177,9 +1173,12 @@ static void pwm_enable_backlight(const struct intel_crtc_state *crtc_state, { struct intel_connector *connector = to_intel_connector(conn_state->connector); struct intel_panel *panel = &connector->panel; + int level = panel->backlight.level; - pwm_enable(panel->backlight.pwm); - intel_panel_actually_set_backlight(conn_state, panel->backlight.level); + level = intel_panel_compute_brightness(connector, level); + pwm_set_relative_duty_cycle(&panel->backlight.pwm_state, level, 100); + panel->backlight.pwm_state.enabled = true; + pwm_apply_state(panel->backlight.pwm, &panel->backlight.pwm_state); } static void __intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state, @@ -1543,18 +1542,9 @@ static u32 vlv_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) return DIV_ROUND_CLOSEST(clock, pwm_freq_hz * mul); } -static u32 get_backlight_max_vbt(struct intel_connector *connector) +static u16 get_vbt_pwm_freq(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(connector->base.dev); - struct intel_panel *panel = &connector->panel; u16 pwm_freq_hz = dev_priv->vbt.backlight.pwm_freq_hz; - u32 pwm; - - if (!panel->backlight.hz_to_pwm) { - drm_dbg_kms(&dev_priv->drm, - "backlight frequency conversion not supported\n"); - return 0; - } if (pwm_freq_hz) { drm_dbg_kms(&dev_priv->drm, @@ -1567,6 +1557,22 @@ static u32 get_backlight_max_vbt(struct intel_connector *connector) pwm_freq_hz); } + return pwm_freq_hz; +} + +static u32 get_backlight_max_vbt(struct intel_connector *connector) +{ + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_panel *panel = &connector->panel; + u16 pwm_freq_hz = get_vbt_pwm_freq(dev_priv); + u32 pwm; + + if (!panel->backlight.hz_to_pwm) { + drm_dbg_kms(&dev_priv->drm, + "backlight frequency conversion not supported\n"); + return 0; + } + pwm = panel->backlight.hz_to_pwm(connector, pwm_freq_hz); if (!pwm) { drm_dbg_kms(&dev_priv->drm, @@ -1891,8 +1897,7 @@ static int pwm_setup_backlight(struct intel_connector *connector, struct drm_i915_private *dev_priv = to_i915(dev); struct intel_panel *panel = &connector->panel; const char *desc; - u32 level, ns; - int retval; + u32 level; /* Get the right PWM chip for DSI backlight according to VBT */ if (dev_priv->vbt.dsi.config->pwm_blc == PPS_BLC_PMIC) { @@ -1910,30 +1915,28 @@ static int pwm_setup_backlight(struct intel_connector *connector, return -ENODEV; } - /* - * FIXME: pwm_apply_args() should be removed when switching to - * the atomic PWM API. - */ - pwm_apply_args(panel->backlight.pwm); - - panel->backlight.min = 0; /* 0% */ panel->backlight.max = 100; /* 100% */ - level = intel_panel_compute_brightness(connector, 100); - ns = DIV_ROUND_UP(level * CRC_PMIC_PWM_PERIOD_NS, 100); + panel->backlight.min = get_backlight_min_vbt(connector); - retval = pwm_config(panel->backlight.pwm, ns, CRC_PMIC_PWM_PERIOD_NS); - if (retval < 0) { - drm_err(&dev_priv->drm, "Failed to configure the pwm chip\n"); - pwm_put(panel->backlight.pwm); - panel->backlight.pwm = NULL; - return retval; - } + if (pwm_is_enabled(panel->backlight.pwm)) { + /* PWM is already enabled, use existing settings */ + pwm_get_state(panel->backlight.pwm, &panel->backlight.pwm_state); + + level = pwm_get_relative_duty_cycle(&panel->backlight.pwm_state, + 100); + level = intel_panel_compute_brightness(connector, level); + panel->backlight.level = clamp(level, panel->backlight.min, + panel->backlight.max); + panel->backlight.enabled = true; - level = DIV_ROUND_UP_ULL(pwm_get_duty_cycle(panel->backlight.pwm) * 100, - CRC_PMIC_PWM_PERIOD_NS); - panel->backlight.level = - intel_panel_compute_brightness(connector, level); - panel->backlight.enabled = panel->backlight.level != 0; + drm_dbg_kms(&dev_priv->drm, "PWM already enabled at freq %ld, VBT freq %d, level %d\n", + NSEC_PER_SEC / (unsigned long)panel->backlight.pwm_state.period, + get_vbt_pwm_freq(dev_priv), level); + } else { + /* Set period from VBT frequency, leave other settings at 0. */ + panel->backlight.pwm_state.period = + NSEC_PER_SEC / get_vbt_pwm_freq(dev_priv); + } drm_info(&dev_priv->drm, "Using %s PWM for LCD backlight control\n", desc); @@ -2092,6 +2095,17 @@ intel_panel_init_backlight_funcs(struct intel_panel *panel) } } +enum drm_connector_status +intel_panel_detect(struct drm_connector *connector, bool force) +{ + struct drm_i915_private *i915 = to_i915(connector->dev); + + if (!INTEL_DISPLAY_ENABLED(i915)) + return connector_status_disconnected; + + return connector_status_connected; +} + int intel_panel_init(struct intel_panel *panel, struct drm_display_mode *fixed_mode, struct drm_display_mode *downclock_mode) diff --git a/drivers/gpu/drm/i915/display/intel_panel.h b/drivers/gpu/drm/i915/display/intel_panel.h index 968b95281cb4..5b813fe90557 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.h +++ b/drivers/gpu/drm/i915/display/intel_panel.h @@ -23,6 +23,8 @@ int intel_panel_init(struct intel_panel *panel, struct drm_display_mode *fixed_mode, struct drm_display_mode *downclock_mode); void intel_panel_fini(struct intel_panel *panel); +enum drm_connector_status +intel_panel_detect(struct drm_connector *connector, bool force); void intel_fixed_panel_mode(const struct drm_display_mode *fixed_mode, struct drm_display_mode *adjusted_mode); int intel_pch_panel_fitting(struct intel_crtc_state *crtc_state, diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index bf9e320c547d..40e9cb29233d 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -553,6 +553,22 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) val |= EDP_PSR2_FAST_WAKE(7); } + if (dev_priv->psr.psr2_sel_fetch_enabled) { + /* WA 1408330847 */ + if (IS_TGL_DISP_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_A0) || + IS_RKL_REVID(dev_priv, RKL_REVID_A0, RKL_REVID_A0)) + intel_de_rmw(dev_priv, CHICKEN_PAR1_1, + DIS_RAM_BYPASS_PSR2_MAN_TRACK, + DIS_RAM_BYPASS_PSR2_MAN_TRACK); + + intel_de_write(dev_priv, + PSR2_MAN_TRK_CTL(dev_priv->psr.transcoder), + PSR2_MAN_TRK_CTL_ENABLE); + } else if (HAS_PSR2_SEL_FETCH(dev_priv)) { + intel_de_write(dev_priv, + PSR2_MAN_TRK_CTL(dev_priv->psr.transcoder), 0); + } + /* * PSR2 HW is incorrectly using EDP_PSR_TP1_TP3_SEL and BSpec is * recommending keep this bit unset while PSR2 is enabled. @@ -663,6 +679,38 @@ tgl_dc3co_exitline_compute_config(struct intel_dp *intel_dp, crtc_state->dc3co_exitline = crtc_vdisplay - exit_scanlines; } +static bool intel_psr2_sel_fetch_config_valid(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state) +{ + struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->uapi.state); + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + struct intel_plane_state *plane_state; + struct intel_plane *plane; + int i; + + if (!dev_priv->params.enable_psr2_sel_fetch) { + drm_dbg_kms(&dev_priv->drm, + "PSR2 sel fetch not enabled, disabled by parameter\n"); + return false; + } + + if (crtc_state->uapi.async_flip) { + drm_dbg_kms(&dev_priv->drm, + "PSR2 sel fetch not enabled, async flip enabled\n"); + return false; + } + + for_each_new_intel_plane_in_state(state, plane, plane_state, i) { + if (plane_state->uapi.rotation != DRM_MODE_ROTATE_0) { + drm_dbg_kms(&dev_priv->drm, + "PSR2 sel fetch not enabled, plane rotated\n"); + return false; + } + } + + return crtc_state->enable_psr2_sel_fetch = true; +} + static bool intel_psr2_config_valid(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state) { @@ -732,22 +780,17 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, return false; } - /* - * Some platforms lack PSR2 HW tracking and instead require manual - * tracking by software. In this case, the driver is required to track - * the areas that need updates and program hardware to send selective - * updates. - * - * So until the software tracking is implemented, PSR2 needs to be - * disabled for platforms without PSR2 HW tracking. - */ - if (!HAS_PSR_HW_TRACKING(dev_priv)) { - drm_dbg_kms(&dev_priv->drm, - "No PSR2 HW tracking in the platform\n"); - return false; + if (HAS_PSR2_SEL_FETCH(dev_priv)) { + if (!intel_psr2_sel_fetch_config_valid(intel_dp, crtc_state) && + !HAS_PSR_HW_TRACKING(dev_priv)) { + drm_dbg_kms(&dev_priv->drm, + "PSR2 not enabled, selective fetch not valid and no HW tracking available\n"); + return false; + } } - if (crtc_hdisplay > psr_max_h || crtc_vdisplay > psr_max_v) { + if (!crtc_state->enable_psr2_sel_fetch && + (crtc_hdisplay > psr_max_h || crtc_vdisplay > psr_max_v)) { drm_dbg_kms(&dev_priv->drm, "PSR2 not enabled, resolution %dx%d > max supported %dx%d\n", crtc_hdisplay, crtc_vdisplay, @@ -898,6 +941,11 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp, val |= EXITLINE_ENABLE; intel_de_write(dev_priv, EXITLINE(cpu_transcoder), val); } + + if (HAS_PSR_HW_TRACKING(dev_priv)) + intel_de_rmw(dev_priv, CHICKEN_PAR1_1, IGNORE_PSR2_HW_TRACKING, + dev_priv->psr.psr2_sel_fetch_enabled ? + IGNORE_PSR2_HW_TRACKING : 0); } static void intel_psr_enable_locked(struct drm_i915_private *dev_priv, @@ -919,6 +967,7 @@ static void intel_psr_enable_locked(struct drm_i915_private *dev_priv, /* DC5/DC6 requires at least 6 idle frames */ val = usecs_to_jiffies(intel_get_frame_time_us(crtc_state) * 6); dev_priv->psr.dc3co_exit_delay = val; + dev_priv->psr.psr2_sel_fetch_enabled = crtc_state->enable_psr2_sel_fetch; /* * If a PSR error happened and the driver is reloaded, the EDP_PSR_IIR @@ -1058,6 +1107,13 @@ static void intel_psr_disable_locked(struct intel_dp *intel_dp) psr_status_mask, 2000)) drm_err(&dev_priv->drm, "Timed out waiting PSR idle state\n"); + /* WA 1408330847 */ + if (dev_priv->psr.psr2_sel_fetch_enabled && + (IS_TGL_DISP_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_A0) || + IS_RKL_REVID(dev_priv, RKL_REVID_A0, RKL_REVID_A0))) + intel_de_rmw(dev_priv, CHICKEN_PAR1_1, + DIS_RAM_BYPASS_PSR2_MAN_TRACK, 0); + /* Disable PSR on Sink */ drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, 0); @@ -1115,6 +1171,32 @@ static void psr_force_hw_tracking_exit(struct drm_i915_private *dev_priv) intel_psr_exit(dev_priv); } +void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct i915_psr *psr = &dev_priv->psr; + + if (!HAS_PSR2_SEL_FETCH(dev_priv) || + !crtc_state->enable_psr2_sel_fetch) + return; + + intel_de_write(dev_priv, PSR2_MAN_TRK_CTL(psr->transcoder), + crtc_state->psr2_man_track_ctl); +} + +void intel_psr2_sel_fetch_update(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); + + if (!crtc_state->enable_psr2_sel_fetch) + return; + + crtc_state->psr2_man_track_ctl = PSR2_MAN_TRK_CTL_ENABLE | + PSR2_MAN_TRK_CTL_SF_SINGLE_FULL_FRAME; +} + /** * intel_psr_update - Update PSR state * @intel_dp: Intel DP @@ -1672,7 +1754,7 @@ void intel_psr_atomic_check(struct drm_connector *connector, return; intel_connector = to_intel_connector(connector); - dig_port = enc_to_dig_port(intel_attached_encoder(intel_connector)); + dig_port = enc_to_dig_port(to_intel_encoder(new_state->best_encoder)); if (dev_priv->psr.dp != &dig_port->dp) return; diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h index b4515186d5f4..6a83c8e682e6 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.h +++ b/drivers/gpu/drm/i915/display/intel_psr.h @@ -13,6 +13,8 @@ struct drm_connector_state; struct drm_i915_private; struct intel_crtc_state; struct intel_dp; +struct intel_crtc; +struct intel_atomic_state; #define CAN_PSR(dev_priv) (HAS_PSR(dev_priv) && dev_priv->psr.sink_support) void intel_psr_init_dpcd(struct intel_dp *intel_dp); @@ -43,5 +45,8 @@ void intel_psr_atomic_check(struct drm_connector *connector, struct drm_connector_state *old_state, struct drm_connector_state *new_state); void intel_psr_set_force_mode_changed(struct intel_dp *intel_dp); +void intel_psr2_sel_fetch_update(struct intel_atomic_state *state, + struct intel_crtc *crtc); +void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_state); #endif /* __INTEL_PSR_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 2da4388e1540..4eaa4aa86ecd 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -1531,7 +1531,7 @@ static void intel_sdvo_pre_enable(struct intel_atomic_state *state, default: drm_WARN(&dev_priv->drm, 1, "unknown pixel multiplier specified\n"); - /* fall through */ + fallthrough; case 1: rate = SDVO_CLOCK_RATE_MULT_1X; break; case 2: rate = SDVO_CLOCK_RATE_MULT_2X; break; case 4: rate = SDVO_CLOCK_RATE_MULT_4X; break; @@ -2084,14 +2084,18 @@ intel_sdvo_connector_matches_edid(struct intel_sdvo_connector *sdvo, static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connector, bool force) { - u16 response; + struct drm_i915_private *i915 = to_i915(connector->dev); struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector)); struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector); enum drm_connector_status ret; + u16 response; DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); + if (!INTEL_DISPLAY_ENABLED(i915)) + return connector_status_disconnected; + if (!intel_sdvo_get_value(intel_sdvo, SDVO_CMD_GET_ATTACHED_DISPLAYS, &response, 2)) @@ -2549,19 +2553,19 @@ intel_sdvo_guess_ddc_bus(struct intel_sdvo *sdvo) switch (sdvo->controlled_output) { case SDVO_OUTPUT_LVDS1: mask |= SDVO_OUTPUT_LVDS1; - /* fall through */ + fallthrough; case SDVO_OUTPUT_LVDS0: mask |= SDVO_OUTPUT_LVDS0; - /* fall through */ + fallthrough; case SDVO_OUTPUT_TMDS1: mask |= SDVO_OUTPUT_TMDS1; - /* fall through */ + fallthrough; case SDVO_OUTPUT_TMDS0: mask |= SDVO_OUTPUT_TMDS0; - /* fall through */ + fallthrough; case SDVO_OUTPUT_RGB1: mask |= SDVO_OUTPUT_RGB1; - /* fall through */ + fallthrough; case SDVO_OUTPUT_RGB0: mask |= SDVO_OUTPUT_RGB0; break; diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index d03860fef2d7..63040cb0d4e1 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -1626,8 +1626,7 @@ static int g4x_sprite_min_cdclk(const struct intel_crtc_state *crtc_state, hscale = drm_rect_calc_hscale(&plane_state->uapi.src, &plane_state->uapi.dst, 0, INT_MAX); - if (hscale < 0x10000) - return pixel_rate; + hscale = max(hscale, 0x10000u); /* Decimation steps at 2x,4x,8x,16x */ decimate = ilog2(hscale >> 16); @@ -1640,8 +1639,8 @@ static int g4x_sprite_min_cdclk(const struct intel_crtc_state *crtc_state, limit -= decimate; /* -10% for RGB */ - if (fb->format->cpp[0] >= 4) - limit--; /* -10% for RGB */ + if (!fb->format->is_yuv) + limit--; /* * We should also do -10% if sprite scaling is enabled @@ -2147,7 +2146,7 @@ static int skl_plane_check_fb(const struct intel_crtc_state *crtc_state, case DRM_FORMAT_RGB565: if (INTEL_GEN(dev_priv) >= 11) break; - /* fall through */ + fallthrough; case DRM_FORMAT_C8: case DRM_FORMAT_XRGB16161616F: case DRM_FORMAT_XBGR16161616F: @@ -2702,7 +2701,7 @@ static bool g4x_sprite_format_mod_supported(struct drm_plane *_plane, if (modifier == DRM_FORMAT_MOD_LINEAR || modifier == I915_FORMAT_MOD_X_TILED) return true; - /* fall through */ + fallthrough; default: return false; } @@ -2733,7 +2732,7 @@ static bool snb_sprite_format_mod_supported(struct drm_plane *_plane, if (modifier == DRM_FORMAT_MOD_LINEAR || modifier == I915_FORMAT_MOD_X_TILED) return true; - /* fall through */ + fallthrough; default: return false; } @@ -2768,7 +2767,7 @@ static bool vlv_sprite_format_mod_supported(struct drm_plane *_plane, if (modifier == DRM_FORMAT_MOD_LINEAR || modifier == I915_FORMAT_MOD_X_TILED) return true; - /* fall through */ + fallthrough; default: return false; } @@ -2801,7 +2800,7 @@ static bool skl_plane_format_mod_supported(struct drm_plane *_plane, case DRM_FORMAT_ABGR8888: if (is_ccs_modifier(modifier)) return true; - /* fall through */ + fallthrough; case DRM_FORMAT_RGB565: case DRM_FORMAT_XRGB2101010: case DRM_FORMAT_XBGR2101010: @@ -2819,7 +2818,7 @@ static bool skl_plane_format_mod_supported(struct drm_plane *_plane, case DRM_FORMAT_XVYU2101010: if (modifier == I915_FORMAT_MOD_Yf_TILED) return true; - /* fall through */ + fallthrough; case DRM_FORMAT_C8: case DRM_FORMAT_XBGR16161616F: case DRM_FORMAT_ABGR16161616F: @@ -2834,7 +2833,7 @@ static bool skl_plane_format_mod_supported(struct drm_plane *_plane, modifier == I915_FORMAT_MOD_X_TILED || modifier == I915_FORMAT_MOD_Y_TILED) return true; - /* fall through */ + fallthrough; default: return false; } @@ -2843,8 +2842,9 @@ static bool skl_plane_format_mod_supported(struct drm_plane *_plane, static bool gen12_plane_supports_mc_ccs(struct drm_i915_private *dev_priv, enum plane_id plane_id) { - /* Wa_14010477008:tgl[a0..c0] */ - if (IS_TGL_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_C0)) + /* Wa_14010477008:tgl[a0..c0],rkl[all] */ + if (IS_ROCKETLAKE(dev_priv) || + IS_TGL_DISP_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_C0)) return false; return plane_id < PLANE_SPRITE4; @@ -2860,7 +2860,7 @@ static bool gen12_plane_format_mod_supported(struct drm_plane *_plane, case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS: if (!gen12_plane_supports_mc_ccs(dev_priv, plane->id)) return false; - /* fall through */ + fallthrough; case DRM_FORMAT_MOD_LINEAR: case I915_FORMAT_MOD_X_TILED: case I915_FORMAT_MOD_Y_TILED: @@ -2877,7 +2877,7 @@ static bool gen12_plane_format_mod_supported(struct drm_plane *_plane, case DRM_FORMAT_ABGR8888: if (is_ccs_modifier(modifier)) return true; - /* fall through */ + fallthrough; case DRM_FORMAT_YUYV: case DRM_FORMAT_YVYU: case DRM_FORMAT_UYVY: @@ -2889,7 +2889,7 @@ static bool gen12_plane_format_mod_supported(struct drm_plane *_plane, case DRM_FORMAT_P016: if (modifier == I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS) return true; - /* fall through */ + fallthrough; case DRM_FORMAT_RGB565: case DRM_FORMAT_XRGB2101010: case DRM_FORMAT_XBGR2101010: @@ -2910,7 +2910,7 @@ static bool gen12_plane_format_mod_supported(struct drm_plane *_plane, modifier == I915_FORMAT_MOD_X_TILED || modifier == I915_FORMAT_MOD_Y_TILED) return true; - /* fall through */ + fallthrough; default: return false; } diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 5b5dc86a5737..8f67aef18b2d 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -159,7 +159,7 @@ int intel_tc_port_fia_max_lane_count(struct intel_digital_port *dig_port) switch (lane_mask) { default: MISSING_CASE(lane_mask); - /* fall-through */ + fallthrough; case 0x1: case 0x2: case 0x4: diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index 777032d9697b..7a7b99b015a5 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -1706,6 +1706,9 @@ intel_tv_detect(struct drm_connector *connector, drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] force=%d\n", connector->base.id, connector->name, force); + if (!INTEL_DISPLAY_ENABLED(i915)) + return connector_status_disconnected; + if (force) { struct intel_load_detect_pipe tmp; int ret; diff --git a/drivers/gpu/drm/i915/display/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h index 6faabd4f6d49..54bcc6a6947c 100644 --- a/drivers/gpu/drm/i915/display/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h @@ -293,8 +293,12 @@ struct bdb_general_features { #define DVO_PORT_HDMIE 12 /* 193 */ #define DVO_PORT_DPF 13 /* N/A */ #define DVO_PORT_HDMIF 14 /* N/A */ -#define DVO_PORT_DPG 15 -#define DVO_PORT_HDMIG 16 +#define DVO_PORT_DPG 15 /* 217 */ +#define DVO_PORT_HDMIG 16 /* 217 */ +#define DVO_PORT_DPH 17 /* 217 */ +#define DVO_PORT_HDMIH 18 /* 217 */ +#define DVO_PORT_DPI 19 /* 217 */ +#define DVO_PORT_HDMII 20 /* 217 */ #define DVO_PORT_MIPIA 21 /* 171 */ #define DVO_PORT_MIPIB 22 /* 171 */ #define DVO_PORT_MIPIC 23 /* 171 */ @@ -330,6 +334,8 @@ enum vbt_gmbus_ddi { #define DP_AUX_E 0x50 #define DP_AUX_F 0x60 #define DP_AUX_G 0x70 +#define DP_AUX_H 0x80 +#define DP_AUX_I 0x90 #define VBT_DP_MAX_LINK_RATE_HBR3 0 #define VBT_DP_MAX_LINK_RATE_HBR2 1 diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 052e0b31a2da..5e5522923b1e 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -1585,6 +1585,7 @@ static const struct drm_connector_helper_funcs intel_dsi_connector_helper_funcs }; static const struct drm_connector_funcs intel_dsi_connector_funcs = { + .detect = intel_panel_detect, .late_register = intel_connector_register, .early_unregister = intel_connector_unregister, .destroy = intel_connector_destroy, diff --git a/drivers/gpu/drm/i915/display/vlv_dsi_pll.c b/drivers/gpu/drm/i915/display/vlv_dsi_pll.c index d0a514301575..4070b00c3690 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi_pll.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi_pll.c @@ -483,7 +483,7 @@ int bxt_dsi_pll_compute(struct intel_encoder *encoder, if (dsi_ratio < dsi_ratio_min || dsi_ratio > dsi_ratio_max) { drm_err(&dev_priv->drm, - "Cant get a suitable ratio from DSI PLL ratios\n"); + "Can't get a suitable ratio from DSI PLL ratios\n"); return -ECHRNG; } else drm_dbg_kms(&dev_priv->drm, "DSI PLL calculation is Done!!\n"); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c index 278664f831e7..272cf3ea68d5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c @@ -32,12 +32,13 @@ static void vma_clear_pages(struct i915_vma *vma) vma->pages = NULL; } -static int vma_bind(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) +static void vma_bind(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) { - return vm->vma_ops.bind_vma(vm, vma, cache_level, flags); + vm->vma_ops.bind_vma(vm, stash, vma, cache_level, flags); } static void vma_unbind(struct i915_address_space *vm, struct i915_vma *vma) @@ -157,6 +158,7 @@ static void clear_pages_worker(struct work_struct *work) struct clear_pages_work *w = container_of(work, typeof(*w), work); struct drm_i915_gem_object *obj = w->sleeve->vma->obj; struct i915_vma *vma = w->sleeve->vma; + struct i915_gem_ww_ctx ww; struct i915_request *rq; struct i915_vma *batch; int err = w->dma.error; @@ -172,17 +174,20 @@ static void clear_pages_worker(struct work_struct *work) obj->read_domains = I915_GEM_GPU_DOMAINS; obj->write_domain = 0; - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (unlikely(err)) + i915_gem_ww_ctx_init(&ww, false); + intel_engine_pm_get(w->ce->engine); +retry: + err = intel_context_pin_ww(w->ce, &ww); + if (err) goto out_signal; - batch = intel_emit_vma_fill_blt(w->ce, vma, w->value); + batch = intel_emit_vma_fill_blt(w->ce, vma, &ww, w->value); if (IS_ERR(batch)) { err = PTR_ERR(batch); - goto out_unpin; + goto out_ctx; } - rq = intel_context_create_request(w->ce); + rq = i915_request_create(w->ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out_batch; @@ -224,9 +229,19 @@ out_request: i915_request_add(rq); out_batch: intel_emit_vma_release(w->ce, batch); -out_unpin: - i915_vma_unpin(vma); +out_ctx: + intel_context_unpin(w->ce); out_signal: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + + i915_vma_unpin(w->sleeve->vma); + intel_engine_pm_put(w->ce->engine); + if (unlikely(err)) { dma_fence_set_error(&w->dma, err); dma_fence_signal(&w->dma); @@ -234,6 +249,44 @@ out_signal: } } +static int pin_wait_clear_pages_work(struct clear_pages_work *w, + struct intel_context *ce) +{ + struct i915_vma *vma = w->sleeve->vma; + struct i915_gem_ww_ctx ww; + int err; + + i915_gem_ww_ctx_init(&ww, false); +retry: + err = i915_gem_object_lock(vma->obj, &ww); + if (err) + goto out; + + err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); + if (unlikely(err)) + goto out; + + err = i915_sw_fence_await_reservation(&w->wait, + vma->obj->base.resv, NULL, + true, 0, I915_FENCE_GFP); + if (err) + goto err_unpin_vma; + + dma_resv_add_excl_fence(vma->obj->base.resv, &w->dma); + +err_unpin_vma: + if (err) + i915_vma_unpin(vma); +out: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + return err; +} + static int __i915_sw_fence_call clear_pages_work_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) @@ -287,17 +340,9 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj, dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0); i915_sw_fence_init(&work->wait, clear_pages_work_notify); - i915_gem_object_lock(obj); - err = i915_sw_fence_await_reservation(&work->wait, - obj->base.resv, NULL, true, 0, - I915_FENCE_GFP); - if (err < 0) { + err = pin_wait_clear_pages_work(work, ce); + if (err < 0) dma_fence_set_error(&work->dma, err); - } else { - dma_resv_add_excl_fence(obj->base.resv, &work->dma); - err = 0; - } - i915_gem_object_unlock(obj); dma_fence_get(&work->dma); i915_sw_fence_commit(&work->wait); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index d0bdb6d447ed..4fd38101bb56 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -390,24 +390,6 @@ __context_engines_static(const struct i915_gem_context *ctx) return rcu_dereference_protected(ctx->engines, true); } -static bool __reset_engine(struct intel_engine_cs *engine) -{ - struct intel_gt *gt = engine->gt; - bool success = false; - - if (!intel_has_reset_engine(gt)) - return false; - - if (!test_and_set_bit(I915_RESET_ENGINE + engine->id, - >->reset.flags)) { - success = intel_engine_reset(engine, NULL) == 0; - clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, - >->reset.flags); - } - - return success; -} - static void __reset_context(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { @@ -431,37 +413,39 @@ static bool __cancel_engine(struct intel_engine_cs *engine) * kill the banned context, we fallback to doing a local reset * instead. */ - if (IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT) && - !intel_engine_pulse(engine)) - return true; - - /* If we are unable to send a pulse, try resetting this engine. */ - return __reset_engine(engine); + return intel_engine_pulse(engine) == 0; } -static struct intel_engine_cs *__active_engine(struct i915_request *rq) +static bool +__active_engine(struct i915_request *rq, struct intel_engine_cs **active) { struct intel_engine_cs *engine, *locked; + bool ret = false; /* * Serialise with __i915_request_submit() so that it sees * is-banned?, or we know the request is already inflight. + * + * Note that rq->engine is unstable, and so we double + * check that we have acquired the lock on the final engine. */ locked = READ_ONCE(rq->engine); spin_lock_irq(&locked->active.lock); while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { spin_unlock(&locked->active.lock); - spin_lock(&engine->active.lock); locked = engine; + spin_lock(&locked->active.lock); } - engine = NULL; - if (i915_request_is_active(rq) && rq->fence.error != -EIO) - engine = rq->engine; + if (i915_request_is_active(rq)) { + if (!i915_request_completed(rq)) + *active = locked; + ret = true; + } spin_unlock_irq(&locked->active.lock); - return engine; + return ret; } static struct intel_engine_cs *active_engine(struct intel_context *ce) @@ -472,22 +456,34 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce) if (!ce->timeline) return NULL; - mutex_lock(&ce->timeline->mutex); + /* + * rq->link is only SLAB_TYPESAFE_BY_RCU, we need to hold a reference + * to the request to prevent it being transferred to a new timeline + * (and onto a new timeline->requests list). + */ + rcu_read_lock(); list_for_each_entry_reverse(rq, &ce->timeline->requests, link) { - if (i915_request_completed(rq)) + bool found; + + /* timeline is already completed upto this point? */ + if (!i915_request_get_rcu(rq)) break; /* Check with the backend if the request is inflight */ - engine = __active_engine(rq); - if (engine) + found = true; + if (likely(rcu_access_pointer(rq->timeline) == ce->timeline)) + found = __active_engine(rq, &engine); + + i915_request_put(rq); + if (found) break; } - mutex_unlock(&ce->timeline->mutex); + rcu_read_unlock(); return engine; } -static void kill_engines(struct i915_gem_engines *engines) +static void kill_engines(struct i915_gem_engines *engines, bool ban) { struct i915_gem_engines_iter it; struct intel_context *ce; @@ -502,7 +498,7 @@ static void kill_engines(struct i915_gem_engines *engines) for_each_gem_engine(ce, engines, it) { struct intel_engine_cs *engine; - if (intel_context_set_banned(ce)) + if (ban && intel_context_set_banned(ce)) continue; /* @@ -515,7 +511,7 @@ static void kill_engines(struct i915_gem_engines *engines) engine = active_engine(ce); /* First attempt to gracefully cancel the context */ - if (engine && !__cancel_engine(engine)) + if (engine && !__cancel_engine(engine) && ban) /* * If we are unable to send a preemptive pulse to bump * the context from the GPU, we have to resort to a full @@ -525,8 +521,10 @@ static void kill_engines(struct i915_gem_engines *engines) } } -static void kill_stale_engines(struct i915_gem_context *ctx) +static void kill_context(struct i915_gem_context *ctx) { + bool ban = (!i915_gem_context_is_persistent(ctx) || + !ctx->i915->params.enable_hangcheck); struct i915_gem_engines *pos, *next; spin_lock_irq(&ctx->stale.lock); @@ -539,7 +537,7 @@ static void kill_stale_engines(struct i915_gem_context *ctx) spin_unlock_irq(&ctx->stale.lock); - kill_engines(pos); + kill_engines(pos, ban); spin_lock_irq(&ctx->stale.lock); GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence)); @@ -551,11 +549,6 @@ static void kill_stale_engines(struct i915_gem_context *ctx) spin_unlock_irq(&ctx->stale.lock); } -static void kill_context(struct i915_gem_context *ctx) -{ - kill_stale_engines(ctx); -} - static void engines_idle_release(struct i915_gem_context *ctx, struct i915_gem_engines *engines) { @@ -590,7 +583,7 @@ static void engines_idle_release(struct i915_gem_context *ctx, kill: if (list_empty(&engines->link)) /* raced, already closed */ - kill_engines(engines); + kill_engines(engines, true); i915_sw_fence_commit(&engines->fence); } @@ -648,9 +641,7 @@ static void context_close(struct i915_gem_context *ctx) * case we opt to forcibly kill off all remaining requests on * context close. */ - if (!i915_gem_context_is_persistent(ctx) || - !ctx->i915->params.enable_hangcheck) - kill_context(ctx); + kill_context(ctx); i915_gem_context_put(ctx); } @@ -713,6 +704,7 @@ __create_context(struct drm_i915_private *i915) ctx->i915 = i915; ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL); mutex_init(&ctx->mutex); + INIT_LIST_HEAD(&ctx->link); spin_lock_init(&ctx->stale.lock); INIT_LIST_HEAD(&ctx->stale.engines); @@ -740,10 +732,6 @@ __create_context(struct drm_i915_private *i915) for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; - spin_lock(&i915->gem.contexts.lock); - list_add_tail(&ctx->link, &i915->gem.contexts.list); - spin_unlock(&i915->gem.contexts.lock); - return ctx; err_free: @@ -889,7 +877,7 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { struct intel_timeline *timeline; - timeline = intel_timeline_create(&i915->gt, NULL); + timeline = intel_timeline_create(&i915->gt); if (IS_ERR(timeline)) { context_close(ctx); return ERR_CAST(timeline); @@ -931,6 +919,7 @@ static int gem_context_register(struct i915_gem_context *ctx, struct drm_i915_file_private *fpriv, u32 *id) { + struct drm_i915_private *i915 = ctx->i915; struct i915_address_space *vm; int ret; @@ -949,8 +938,16 @@ static int gem_context_register(struct i915_gem_context *ctx, /* And finally expose ourselves to userspace via the idr */ ret = xa_alloc(&fpriv->context_xa, id, ctx, xa_limit_32b, GFP_KERNEL); if (ret) - put_pid(fetch_and_zero(&ctx->pid)); + goto err_pid; + spin_lock(&i915->gem.contexts.lock); + list_add_tail(&ctx->link, &i915->gem.contexts.list); + spin_unlock(&i915->gem.contexts.lock); + + return 0; + +err_pid: + put_pid(fetch_and_zero(&ctx->pid)); return ret; } @@ -1094,6 +1091,7 @@ I915_SELFTEST_DECLARE(static intel_engine_mask_t context_barrier_inject_fault); static int context_barrier_task(struct i915_gem_context *ctx, intel_engine_mask_t engines, bool (*skip)(struct intel_context *ce, void *data), + int (*pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data), int (*emit)(struct i915_request *rq, void *data), void (*task)(void *data), void *data) @@ -1101,6 +1099,7 @@ static int context_barrier_task(struct i915_gem_context *ctx, struct context_barrier_task *cb; struct i915_gem_engines_iter it; struct i915_gem_engines *e; + struct i915_gem_ww_ctx ww; struct intel_context *ce; int err = 0; @@ -1138,10 +1137,21 @@ static int context_barrier_task(struct i915_gem_context *ctx, if (skip && skip(ce, data)) continue; - rq = intel_context_create_request(ce); + i915_gem_ww_ctx_init(&ww, true); +retry: + err = intel_context_pin_ww(ce, &ww); + if (err) + goto err; + + if (pin) + err = pin(ce, &ww, data); + if (err) + goto err_unpin; + + rq = i915_request_create(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); - break; + goto err_unpin; } err = 0; @@ -1151,6 +1161,16 @@ static int context_barrier_task(struct i915_gem_context *ctx, err = i915_active_add_request(&cb->base, rq); i915_request_add(rq); +err_unpin: + intel_context_unpin(ce); +err: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + if (err) break; } @@ -1206,6 +1226,17 @@ static void set_ppgtt_barrier(void *data) i915_vm_close(old); } +static int pin_ppgtt_update(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data) +{ + struct i915_address_space *vm = ce->vm; + + if (!HAS_LOGICAL_RING_CONTEXTS(vm->i915)) + /* ppGTT is not part of the legacy context image */ + return gen6_ppgtt_pin(i915_vm_to_ppgtt(vm), ww); + + return 0; +} + static int emit_ppgtt_update(struct i915_request *rq, void *data) { struct i915_address_space *vm = rq->context->vm; @@ -1262,20 +1293,10 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data) static bool skip_ppgtt_update(struct intel_context *ce, void *data) { - if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) - return true; - if (HAS_LOGICAL_RING_CONTEXTS(ce->engine->i915)) - return false; - - if (!atomic_read(&ce->pin_count)) - return true; - - /* ppGTT is not part of the legacy context image */ - if (gen6_ppgtt_pin(i915_vm_to_ppgtt(ce->vm))) - return true; - - return false; + return !ce->state; + else + return !atomic_read(&ce->pin_count); } static int set_ppgtt(struct drm_i915_file_private *file_priv, @@ -1326,6 +1347,7 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, */ err = context_barrier_task(ctx, ALL_ENGINES, skip_ppgtt_update, + pin_ppgtt_update, emit_ppgtt_update, set_ppgtt_barrier, old); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 2679380159fc..8dd295dbe241 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -48,12 +48,9 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme src = sg_next(src); } - if (!dma_map_sg_attrs(attachment->dev, - st->sgl, st->nents, dir, - DMA_ATTR_SKIP_CPU_SYNC)) { - ret = -ENOMEM; + ret = dma_map_sgtable(attachment->dev, st, dir, DMA_ATTR_SKIP_CPU_SYNC); + if (ret) goto err_free_sg; - } return st; @@ -73,9 +70,7 @@ static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment, { struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf); - dma_unmap_sg_attrs(attachment->dev, - sg->sgl, sg->nents, dir, - DMA_ATTR_SKIP_CPU_SYNC); + dma_unmap_sgtable(attachment->dev, sg, dir, DMA_ATTR_SKIP_CPU_SYNC); sg_free_table(sg); kfree(sg); @@ -128,7 +123,7 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire if (err) return err; - err = i915_gem_object_lock_interruptible(obj); + err = i915_gem_object_lock_interruptible(obj, NULL); if (err) goto out; @@ -149,7 +144,7 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direct if (err) return err; - err = i915_gem_object_lock_interruptible(obj); + err = i915_gem_object_lock_interruptible(obj, NULL); if (err) goto out; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 7f76fc68f498..fcce6909f201 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -32,11 +32,17 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) if (!i915_gem_object_is_framebuffer(obj)) return; - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); __i915_gem_object_flush_for_display(obj); i915_gem_object_unlock(obj); } +void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj) +{ + if (i915_gem_object_is_framebuffer(obj)) + __i915_gem_object_flush_for_display(obj); +} + /** * Moves a single object to the WC read, and possibly write domain. * @obj: object to act on @@ -197,18 +203,12 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, if (ret) return ret; - ret = i915_gem_object_lock_interruptible(obj); - if (ret) - return ret; - /* Always invalidate stale cachelines */ if (obj->cache_level != cache_level) { i915_gem_object_set_cache_coherency(obj, cache_level); obj->cache_dirty = true; } - i915_gem_object_unlock(obj); - /* The cache-level will be applied when each vma is rebound. */ return i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE | @@ -293,7 +293,12 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, goto out; } + ret = i915_gem_object_lock_interruptible(obj, NULL); + if (ret) + goto out; + ret = i915_gem_object_set_cache_level(obj, level); + i915_gem_object_unlock(obj); out: i915_gem_object_put(obj); @@ -313,6 +318,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, unsigned int flags) { struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_gem_ww_ctx ww; struct i915_vma *vma; int ret; @@ -320,6 +326,11 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) return ERR_PTR(-EINVAL); + i915_gem_ww_ctx_init(&ww, true); +retry: + ret = i915_gem_object_lock(obj, &ww); + if (ret) + goto err; /* * The display engine is not coherent with the LLC cache on gen6. As * a result, we make sure that the pinning that is about to occur is @@ -334,7 +345,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE); if (ret) - return ERR_PTR(ret); + goto err; /* * As the user may map the buffer once pinned in the display plane @@ -347,18 +358,31 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, vma = ERR_PTR(-ENOSPC); if ((flags & PIN_MAPPABLE) == 0 && (!view || view->type == I915_GGTT_VIEW_NORMAL)) - vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, - flags | - PIN_MAPPABLE | - PIN_NONBLOCK); - if (IS_ERR(vma)) - vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); - if (IS_ERR(vma)) - return vma; + vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0, alignment, + flags | PIN_MAPPABLE | + PIN_NONBLOCK); + if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) + vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0, + alignment, flags); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err; + } vma->display_alignment = max_t(u64, vma->display_alignment, alignment); - i915_gem_object_flush_if_display(obj); + i915_gem_object_flush_if_display_locked(obj); + +err: + if (ret == -EDEADLK) { + ret = i915_gem_ww_ctx_backoff(&ww); + if (!ret) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + + if (ret) + return ERR_PTR(ret); return vma; } @@ -485,21 +509,6 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, return -ENOENT; /* - * Already in the desired write domain? Nothing for us to do! - * - * We apply a little bit of cunning here to catch a broader set of - * no-ops. If obj->write_domain is set, we must be in the same - * obj->read_domains, and only that domain. Therefore, if that - * obj->write_domain matches the request read_domains, we are - * already in the same read/write domain and can skip the operation, - * without having to further check the requested write_domain. - */ - if (READ_ONCE(obj->write_domain) == read_domains) { - err = 0; - goto out; - } - - /* * Try to flush the object off the GPU without holding the lock. * We will repeat the flush holding the lock in the normal manner * to catch cases where we are gazumped. @@ -536,7 +545,20 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (err) goto out; - err = i915_gem_object_lock_interruptible(obj); + /* + * Already in the desired write domain? Nothing for us to do! + * + * We apply a little bit of cunning here to catch a broader set of + * no-ops. If obj->write_domain is set, we must be in the same + * obj->read_domains, and only that domain. Therefore, if that + * obj->write_domain matches the request read_domains, we are + * already in the same read/write domain and can skip the operation, + * without having to further check the requested write_domain. + */ + if (READ_ONCE(obj->write_domain) == read_domains) + goto out_unpin; + + err = i915_gem_object_lock_interruptible(obj, NULL); if (err) goto out_unpin; @@ -576,19 +598,17 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj, if (!i915_gem_object_has_struct_page(obj)) return -ENODEV; - ret = i915_gem_object_lock_interruptible(obj); - if (ret) - return ret; + assert_object_held(obj); ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); if (ret) - goto err_unlock; + return ret; ret = i915_gem_object_pin_pages(obj); if (ret) - goto err_unlock; + return ret; if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || !static_cpu_has(X86_FEATURE_CLFLUSH)) { @@ -616,8 +636,6 @@ out: err_unpin: i915_gem_object_unpin_pages(obj); -err_unlock: - i915_gem_object_unlock(obj); return ret; } @@ -630,20 +648,18 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj, if (!i915_gem_object_has_struct_page(obj)) return -ENODEV; - ret = i915_gem_object_lock_interruptible(obj); - if (ret) - return ret; + assert_object_held(obj); ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT); if (ret) - goto err_unlock; + return ret; ret = i915_gem_object_pin_pages(obj); if (ret) - goto err_unlock; + return ret; if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || !static_cpu_has(X86_FEATURE_CLFLUSH)) { @@ -680,7 +696,5 @@ out: err_unpin: i915_gem_object_unpin_pages(obj); -err_unlock: - i915_gem_object_unlock(obj); return ret; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 6b4ec66cb558..1904e6e5ea64 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -26,6 +26,7 @@ #include "i915_gem_ioctls.h" #include "i915_sw_fence_work.h" #include "i915_trace.h" +#include "i915_user_extensions.h" struct eb_vma { struct i915_vma *vma; @@ -40,9 +41,11 @@ struct eb_vma { u32 handle; }; -struct eb_vma_array { - struct kref kref; - struct eb_vma vma[]; +enum { + FORCE_CPU_RELOC = 1, + FORCE_GTT_RELOC, + FORCE_GPU_RELOC, +#define DBG_FORCE_RELOC 0 /* choose one of the above! */ }; #define __EXEC_OBJECT_HAS_PIN BIT(31) @@ -50,9 +53,11 @@ struct eb_vma_array { #define __EXEC_OBJECT_NEEDS_MAP BIT(29) #define __EXEC_OBJECT_NEEDS_BIAS BIT(28) #define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 28) /* all of the above */ +#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE) #define __EXEC_HAS_RELOC BIT(31) -#define __EXEC_INTERNAL_FLAGS (~0u << 31) +#define __EXEC_ENGINE_PINNED BIT(30) +#define __EXEC_INTERNAL_FLAGS (~0u << 30) #define UPDATE PIN_OFFSET_FIXED #define BATCH_OFFSET_BIAS (256*1024) @@ -222,6 +227,13 @@ struct eb_vma_array { * the batchbuffer in trusted mode, otherwise the ioctl is rejected. */ +struct eb_fence { + struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */ + struct dma_fence *dma_fence; + u64 value; + struct dma_fence_chain *chain_fence; +}; + struct i915_execbuffer { struct drm_i915_private *i915; /** i915 backpointer */ struct drm_file *file; /** per-file lookup tables and limits */ @@ -246,6 +258,8 @@ struct i915_execbuffer { /** list of vma that have execobj.relocation_count */ struct list_head relocs; + struct i915_gem_ww_ctx ww; + /** * Track the most recently used object for relocations, as we * frequently have to perform multiple relocations within the same @@ -253,25 +267,30 @@ struct i915_execbuffer { */ struct reloc_cache { struct drm_mm_node node; /** temporary GTT binding */ + unsigned long vaddr; /** Current kmap address */ + unsigned long page; /** Currently mapped page index */ unsigned int gen; /** Cached value of INTEL_GEN */ bool use_64bit_reloc : 1; bool has_llc : 1; bool has_fence : 1; bool needs_unfenced : 1; - struct i915_vma *target; struct i915_request *rq; - struct i915_vma *rq_vma; u32 *rq_cmd; unsigned int rq_size; + struct intel_gt_buffer_pool_node *pool; } reloc_cache; + struct intel_gt_buffer_pool_node *reloc_pool; /** relocation pool for -EDEADLK handling */ + struct intel_context *reloc_context; + u64 invalid_flags; /** Set of execobj.flags that are invalid */ u32 context_flags; /** Set of execobj.flags to insert from the ctx */ + u64 batch_len; /** Length of batch within object */ u32 batch_start_offset; /** Location within object of batch */ - u32 batch_len; /** Length of batch within object */ u32 batch_flags; /** Flags composed for emit_bb_start() */ + struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */ /** * Indicate either the size of the hastable used to resolve @@ -280,9 +299,16 @@ struct i915_execbuffer { */ int lut_size; struct hlist_head *buckets; /** ht for relocation handles */ - struct eb_vma_array *array; + + struct eb_fence *fences; + unsigned long num_fences; }; +static int eb_parse(struct i915_execbuffer *eb); +static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, + bool throttle); +static void eb_unpin_engine(struct i915_execbuffer *eb); + static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) { return intel_engine_requires_cmd_parser(eb->engine) || @@ -290,62 +316,8 @@ static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) eb->args->batch_len); } -static struct eb_vma_array *eb_vma_array_create(unsigned int count) -{ - struct eb_vma_array *arr; - - arr = kvmalloc(struct_size(arr, vma, count), GFP_KERNEL | __GFP_NOWARN); - if (!arr) - return NULL; - - kref_init(&arr->kref); - arr->vma[0].vma = NULL; - - return arr; -} - -static inline void eb_unreserve_vma(struct eb_vma *ev) -{ - struct i915_vma *vma = ev->vma; - - if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE)) - __i915_vma_unpin_fence(vma); - - if (ev->flags & __EXEC_OBJECT_HAS_PIN) - __i915_vma_unpin(vma); - - ev->flags &= ~(__EXEC_OBJECT_HAS_PIN | - __EXEC_OBJECT_HAS_FENCE); -} - -static void eb_vma_array_destroy(struct kref *kref) -{ - struct eb_vma_array *arr = container_of(kref, typeof(*arr), kref); - struct eb_vma *ev = arr->vma; - - while (ev->vma) { - eb_unreserve_vma(ev); - i915_vma_put(ev->vma); - ev++; - } - - kvfree(arr); -} - -static void eb_vma_array_put(struct eb_vma_array *arr) -{ - kref_put(&arr->kref, eb_vma_array_destroy); -} - static int eb_create(struct i915_execbuffer *eb) { - /* Allocate an extra slot for use by the command parser + sentinel */ - eb->array = eb_vma_array_create(eb->buffer_count + 2); - if (!eb->array) - return -ENOMEM; - - eb->vma = eb->array->vma; - if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) { unsigned int size = 1 + ilog2(eb->buffer_count); @@ -379,10 +351,8 @@ static int eb_create(struct i915_execbuffer *eb) break; } while (--size); - if (unlikely(!size)) { - eb_vma_array_put(eb->array); + if (unlikely(!size)) return -ENOMEM; - } eb->lut_size = size; } else { @@ -466,16 +436,17 @@ eb_pin_vma(struct i915_execbuffer *eb, pin_flags |= PIN_GLOBAL; /* Attempt to reuse the current location if available */ - if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags))) { + /* TODO: Add -EDEADLK handling here */ + if (unlikely(i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags))) { if (entry->flags & EXEC_OBJECT_PINNED) return false; /* Failing that pick any _free_ space if suitable */ - if (unlikely(i915_vma_pin(vma, - entry->pad_to_size, - entry->alignment, - eb_pin_flags(entry, ev->flags) | - PIN_USER | PIN_NOEVICT))) + if (unlikely(i915_vma_pin_ww(vma, &eb->ww, + entry->pad_to_size, + entry->alignment, + eb_pin_flags(entry, ev->flags) | + PIN_USER | PIN_NOEVICT))) return false; } @@ -493,6 +464,19 @@ eb_pin_vma(struct i915_execbuffer *eb, return !eb_vma_misplaced(entry, vma, ev->flags); } +static inline void +eb_unreserve_vma(struct eb_vma *ev) +{ + if (!(ev->flags & __EXEC_OBJECT_HAS_PIN)) + return; + + if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE)) + __i915_vma_unpin_fence(ev->vma); + + __i915_vma_unpin(ev->vma); + ev->flags &= ~__EXEC_OBJECT_RESERVED; +} + static int eb_validate_vma(struct i915_execbuffer *eb, struct drm_i915_gem_exec_object2 *entry, @@ -584,19 +568,26 @@ eb_add_vma(struct i915_execbuffer *eb, eb->batch = ev; } +} - if (eb_pin_vma(eb, entry, ev)) { - if (entry->offset != vma->node.start) { - entry->offset = vma->node.start | UPDATE; - eb->args->flags |= __EXEC_HAS_RELOC; - } - } else { - eb_unreserve_vma(ev); - list_add_tail(&ev->bind_link, &eb->unbound); - } +static inline int use_cpu_reloc(const struct reloc_cache *cache, + const struct drm_i915_gem_object *obj) +{ + if (!i915_gem_object_has_struct_page(obj)) + return false; + + if (DBG_FORCE_RELOC == FORCE_CPU_RELOC) + return true; + + if (DBG_FORCE_RELOC == FORCE_GTT_RELOC) + return false; + + return (cache->has_llc || + obj->cache_dirty || + obj->cache_level != I915_CACHE_NONE); } -static int eb_reserve_vma(const struct i915_execbuffer *eb, +static int eb_reserve_vma(struct i915_execbuffer *eb, struct eb_vma *ev, u64 pin_flags) { @@ -611,7 +602,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb, return err; } - err = i915_vma_pin(vma, + err = i915_vma_pin_ww(vma, &eb->ww, entry->pad_to_size, entry->alignment, eb_pin_flags(entry, ev->flags) | pin_flags); if (err) @@ -661,10 +652,6 @@ static int eb_reserve(struct i915_execbuffer *eb) * This avoid unnecessary unbinding of later objects in order to make * room for the earlier objects *unless* we need to defragment. */ - - if (mutex_lock_interruptible(&eb->i915->drm.struct_mutex)) - return -EINTR; - pass = 0; do { list_for_each_entry(ev, &eb->unbound, bind_link) { @@ -672,8 +659,8 @@ static int eb_reserve(struct i915_execbuffer *eb) if (err) break; } - if (!(err == -ENOSPC || err == -EAGAIN)) - break; + if (err != -ENOSPC) + return err; /* Resort *all* the objects into priority order */ INIT_LIST_HEAD(&eb->unbound); @@ -703,13 +690,6 @@ static int eb_reserve(struct i915_execbuffer *eb) } list_splice_tail(&last, &eb->unbound); - if (err == -EAGAIN) { - mutex_unlock(&eb->i915->drm.struct_mutex); - flush_workqueue(eb->i915->mm.userptr_wq); - mutex_lock(&eb->i915->drm.struct_mutex); - continue; - } - switch (pass++) { case 0: break; @@ -720,20 +700,15 @@ static int eb_reserve(struct i915_execbuffer *eb) err = i915_gem_evict_vm(eb->context->vm); mutex_unlock(&eb->context->vm->mutex); if (err) - goto unlock; + return err; break; default: - err = -ENOSPC; - goto unlock; + return -ENOSPC; } pin_flags = PIN_USER; } while (1); - -unlock: - mutex_unlock(&eb->i915->drm.struct_mutex); - return err; } static unsigned int eb_batch_index(const struct i915_execbuffer *eb) @@ -856,12 +831,12 @@ static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle) static int eb_lookup_vmas(struct i915_execbuffer *eb) { + struct drm_i915_private *i915 = eb->i915; unsigned int batch = eb_batch_index(eb); unsigned int i; int err = 0; INIT_LIST_HEAD(&eb->relocs); - INIT_LIST_HEAD(&eb->unbound); for (i = 0; i < eb->buffer_count; i++) { struct i915_vma *vma; @@ -869,22 +844,87 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) vma = eb_lookup_vma(eb, eb->exec[i].handle); if (IS_ERR(vma)) { err = PTR_ERR(vma); - break; + goto err; } err = eb_validate_vma(eb, &eb->exec[i], vma); if (unlikely(err)) { i915_vma_put(vma); - break; + goto err; } eb_add_vma(eb, i, batch, vma); } + if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) { + drm_dbg(&i915->drm, + "Attempting to use self-modifying batch buffer\n"); + return -EINVAL; + } + + if (range_overflows_t(u64, + eb->batch_start_offset, eb->batch_len, + eb->batch->vma->size)) { + drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n"); + return -EINVAL; + } + + if (eb->batch_len == 0) + eb->batch_len = eb->batch->vma->size - eb->batch_start_offset; + if (unlikely(eb->batch_len == 0)) { /* impossible! */ + drm_dbg(&i915->drm, "Invalid batch length\n"); + return -EINVAL; + } + + return 0; + +err: eb->vma[i].vma = NULL; return err; } +static int eb_validate_vmas(struct i915_execbuffer *eb) +{ + unsigned int i; + int err; + + INIT_LIST_HEAD(&eb->unbound); + + for (i = 0; i < eb->buffer_count; i++) { + struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; + struct eb_vma *ev = &eb->vma[i]; + struct i915_vma *vma = ev->vma; + + err = i915_gem_object_lock(vma->obj, &eb->ww); + if (err) + return err; + + if (eb_pin_vma(eb, entry, ev)) { + if (entry->offset != vma->node.start) { + entry->offset = vma->node.start | UPDATE; + eb->args->flags |= __EXEC_HAS_RELOC; + } + } else { + eb_unreserve_vma(ev); + + list_add_tail(&ev->bind_link, &eb->unbound); + if (drm_mm_node_allocated(&vma->node)) { + err = i915_vma_unbind(vma); + if (err) + return err; + } + } + + GEM_BUG_ON(drm_mm_node_allocated(&vma->node) && + eb_vma_misplaced(&eb->exec[i], vma, ev->flags)); + } + + if (!list_empty(&eb->unbound)) + return eb_reserve(eb); + + return 0; +} + static struct eb_vma * eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) { @@ -905,13 +945,31 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) } } +static void eb_release_vmas(struct i915_execbuffer *eb, bool final) +{ + const unsigned int count = eb->buffer_count; + unsigned int i; + + for (i = 0; i < count; i++) { + struct eb_vma *ev = &eb->vma[i]; + struct i915_vma *vma = ev->vma; + + if (!vma) + break; + + eb_unreserve_vma(ev); + + if (final) + i915_vma_put(vma); + } + + eb_unpin_engine(eb); +} + static void eb_destroy(const struct i915_execbuffer *eb) { GEM_BUG_ON(eb->reloc_cache.rq); - if (eb->array) - eb_vma_array_put(eb->array); - if (eb->lut_size > 0) kfree(eb->buckets); } @@ -923,9 +981,19 @@ relocation_target(const struct drm_i915_gem_relocation_entry *reloc, return gen8_canonical_addr((int)reloc->delta + target->node.start); } +static void reloc_cache_clear(struct reloc_cache *cache) +{ + cache->rq = NULL; + cache->rq_cmd = NULL; + cache->pool = NULL; + cache->rq_size = 0; +} + static void reloc_cache_init(struct reloc_cache *cache, struct drm_i915_private *i915) { + cache->page = -1; + cache->vaddr = 0; /* Must be a variable in the struct to allow GCC to unroll. */ cache->gen = INTEL_GEN(i915); cache->has_llc = HAS_LLC(i915); @@ -933,120 +1001,249 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->has_fence = cache->gen < 4; cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; cache->node.flags = 0; - cache->rq = NULL; - cache->target = NULL; + reloc_cache_clear(cache); } -#define RELOC_TAIL 4 +static inline void *unmask_page(unsigned long p) +{ + return (void *)(uintptr_t)(p & PAGE_MASK); +} -static int reloc_gpu_chain(struct reloc_cache *cache) +static inline unsigned int unmask_flags(unsigned long p) { - struct intel_gt_buffer_pool_node *pool; - struct i915_request *rq = cache->rq; - struct i915_vma *batch; - u32 *cmd; - int err; + return p & ~PAGE_MASK; +} - pool = intel_gt_get_buffer_pool(rq->engine->gt, PAGE_SIZE); - if (IS_ERR(pool)) - return PTR_ERR(pool); +#define KMAP 0x4 /* after CLFLUSH_FLAGS */ - batch = i915_vma_instance(pool->obj, rq->context->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_pool; - } +static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) +{ + struct drm_i915_private *i915 = + container_of(cache, struct i915_execbuffer, reloc_cache)->i915; + return &i915->ggtt; +} - err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK); - if (err) - goto out_pool; +static void reloc_cache_put_pool(struct i915_execbuffer *eb, struct reloc_cache *cache) +{ + if (!cache->pool) + return; - GEM_BUG_ON(cache->rq_size + RELOC_TAIL > PAGE_SIZE / sizeof(u32)); - cmd = cache->rq_cmd + cache->rq_size; - *cmd++ = MI_ARB_CHECK; - if (cache->gen >= 8) - *cmd++ = MI_BATCH_BUFFER_START_GEN8; - else if (cache->gen >= 6) - *cmd++ = MI_BATCH_BUFFER_START; - else - *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; - *cmd++ = lower_32_bits(batch->node.start); - *cmd++ = upper_32_bits(batch->node.start); /* Always 0 for gen<8 */ - i915_gem_object_flush_map(cache->rq_vma->obj); - i915_gem_object_unpin_map(cache->rq_vma->obj); - cache->rq_vma = NULL; + /* + * This is a bit nasty, normally we keep objects locked until the end + * of execbuffer, but we already submit this, and have to unlock before + * dropping the reference. Fortunately we can only hold 1 pool node at + * a time, so this should be harmless. + */ + i915_gem_ww_unlock_single(cache->pool->obj); + intel_gt_buffer_pool_put(cache->pool); + cache->pool = NULL; +} - err = intel_gt_buffer_pool_mark_active(pool, rq); - if (err == 0) { - i915_vma_lock(batch); - err = i915_request_await_object(rq, batch->obj, false); - if (err == 0) - err = i915_vma_move_to_active(batch, rq, 0); - i915_vma_unlock(batch); - } - i915_vma_unpin(batch); - if (err) - goto out_pool; +static void reloc_gpu_flush(struct i915_execbuffer *eb, struct reloc_cache *cache) +{ + struct drm_i915_gem_object *obj = cache->rq->batch->obj; - cmd = i915_gem_object_pin_map(batch->obj, - cache->has_llc ? - I915_MAP_FORCE_WB : - I915_MAP_FORCE_WC); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto out_pool; - } + GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32)); + cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END; - /* Return with batch mapping (cmd) still pinned */ - cache->rq_cmd = cmd; - cache->rq_size = 0; - cache->rq_vma = batch; + __i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1)); + i915_gem_object_unpin_map(obj); -out_pool: - intel_gt_buffer_pool_put(pool); - return err; + intel_gt_chipset_flush(cache->rq->engine->gt); + + i915_request_add(cache->rq); + reloc_cache_put_pool(eb, cache); + reloc_cache_clear(cache); + + eb->reloc_pool = NULL; } -static unsigned int reloc_bb_flags(const struct reloc_cache *cache) +static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb) { - return cache->gen > 5 ? 0 : I915_DISPATCH_SECURE; + void *vaddr; + + if (cache->rq) + reloc_gpu_flush(eb, cache); + + if (!cache->vaddr) + return; + + vaddr = unmask_page(cache->vaddr); + if (cache->vaddr & KMAP) { + struct drm_i915_gem_object *obj = + (struct drm_i915_gem_object *)cache->node.mm; + if (cache->vaddr & CLFLUSH_AFTER) + mb(); + + kunmap_atomic(vaddr); + i915_gem_object_finish_access(obj); + } else { + struct i915_ggtt *ggtt = cache_to_ggtt(cache); + + intel_gt_flush_ggtt_writes(ggtt->vm.gt); + io_mapping_unmap_atomic((void __iomem *)vaddr); + + if (drm_mm_node_allocated(&cache->node)) { + ggtt->vm.clear_range(&ggtt->vm, + cache->node.start, + cache->node.size); + mutex_lock(&ggtt->vm.mutex); + drm_mm_remove_node(&cache->node); + mutex_unlock(&ggtt->vm.mutex); + } else { + i915_vma_unpin((struct i915_vma *)cache->node.mm); + } + } + + cache->vaddr = 0; + cache->page = -1; } -static int reloc_gpu_flush(struct reloc_cache *cache) +static void *reloc_kmap(struct drm_i915_gem_object *obj, + struct reloc_cache *cache, + unsigned long pageno) { - struct i915_request *rq; - int err; + void *vaddr; + struct page *page; - rq = fetch_and_zero(&cache->rq); - if (!rq) - return 0; + if (cache->vaddr) { + kunmap_atomic(unmask_page(cache->vaddr)); + } else { + unsigned int flushes; + int err; - if (cache->rq_vma) { - struct drm_i915_gem_object *obj = cache->rq_vma->obj; + err = i915_gem_object_prepare_write(obj, &flushes); + if (err) + return ERR_PTR(err); - GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32)); - cache->rq_cmd[cache->rq_size++] = MI_BATCH_BUFFER_END; + BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); + BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK); - __i915_gem_object_flush_map(obj, - 0, sizeof(u32) * cache->rq_size); - i915_gem_object_unpin_map(obj); + cache->vaddr = flushes | KMAP; + cache->node.mm = (void *)obj; + if (flushes) + mb(); } - err = 0; - if (rq->engine->emit_init_breadcrumb) - err = rq->engine->emit_init_breadcrumb(rq); - if (!err) - err = rq->engine->emit_bb_start(rq, - rq->batch->node.start, - PAGE_SIZE, - reloc_bb_flags(cache)); - if (err) - i915_request_set_error_once(rq, err); + page = i915_gem_object_get_page(obj, pageno); + if (!obj->mm.dirty) + set_page_dirty(page); - intel_gt_chipset_flush(rq->engine->gt); - i915_request_add(rq); + vaddr = kmap_atomic(page); + cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; + cache->page = pageno; - return err; + return vaddr; +} + +static void *reloc_iomap(struct drm_i915_gem_object *obj, + struct i915_execbuffer *eb, + unsigned long page) +{ + struct reloc_cache *cache = &eb->reloc_cache; + struct i915_ggtt *ggtt = cache_to_ggtt(cache); + unsigned long offset; + void *vaddr; + + if (cache->vaddr) { + intel_gt_flush_ggtt_writes(ggtt->vm.gt); + io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr)); + } else { + struct i915_vma *vma; + int err; + + if (i915_gem_object_is_tiled(obj)) + return ERR_PTR(-EINVAL); + + if (use_cpu_reloc(cache, obj)) + return NULL; + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return ERR_PTR(err); + + vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0, + PIN_MAPPABLE | + PIN_NONBLOCK /* NOWARN */ | + PIN_NOEVICT); + if (vma == ERR_PTR(-EDEADLK)) + return vma; + + if (IS_ERR(vma)) { + memset(&cache->node, 0, sizeof(cache->node)); + mutex_lock(&ggtt->vm.mutex); + err = drm_mm_insert_node_in_range + (&ggtt->vm.mm, &cache->node, + PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, + 0, ggtt->mappable_end, + DRM_MM_INSERT_LOW); + mutex_unlock(&ggtt->vm.mutex); + if (err) /* no inactive aperture space, use cpu reloc */ + return NULL; + } else { + cache->node.start = vma->node.start; + cache->node.mm = (void *)vma; + } + } + + offset = cache->node.start; + if (drm_mm_node_allocated(&cache->node)) { + ggtt->vm.insert_page(&ggtt->vm, + i915_gem_object_get_dma_address(obj, page), + offset, I915_CACHE_NONE, 0); + } else { + offset += page << PAGE_SHIFT; + } + + vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap, + offset); + cache->page = page; + cache->vaddr = (unsigned long)vaddr; + + return vaddr; +} + +static void *reloc_vaddr(struct drm_i915_gem_object *obj, + struct i915_execbuffer *eb, + unsigned long page) +{ + struct reloc_cache *cache = &eb->reloc_cache; + void *vaddr; + + if (cache->page == page) { + vaddr = unmask_page(cache->vaddr); + } else { + vaddr = NULL; + if ((cache->vaddr & KMAP) == 0) + vaddr = reloc_iomap(obj, eb, page); + if (!vaddr) + vaddr = reloc_kmap(obj, cache, page); + } + + return vaddr; +} + +static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) +{ + if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) { + if (flushes & CLFLUSH_BEFORE) { + clflushopt(addr); + mb(); + } + + *addr = value; + + /* + * Writes to the same cacheline are serialised by the CPU + * (including clflush). On the write path, we only require + * that it hits memory in an orderly fashion and place + * mb barriers at the start and end of the relocation phase + * to ensure ordering of clflush wrt to the system. + */ + if (flushes & CLFLUSH_AFTER) + clflushopt(addr); + } else + *addr = value; } static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) @@ -1054,7 +1251,7 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) struct drm_i915_gem_object *obj = vma->obj; int err; - i915_vma_lock(vma); + assert_vma_held(vma); if (obj->cache_dirty & ~obj->cache_coherent) i915_gem_clflush_object(obj, 0); @@ -1064,25 +1261,31 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) if (err == 0) err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); - return err; } static int __reloc_gpu_alloc(struct i915_execbuffer *eb, struct intel_engine_cs *engine, + struct i915_vma *vma, unsigned int len) { struct reloc_cache *cache = &eb->reloc_cache; - struct intel_gt_buffer_pool_node *pool; + struct intel_gt_buffer_pool_node *pool = eb->reloc_pool; struct i915_request *rq; struct i915_vma *batch; u32 *cmd; int err; - pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE); - if (IS_ERR(pool)) - return PTR_ERR(pool); + if (!pool) { + pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE); + if (IS_ERR(pool)) + return PTR_ERR(pool); + } + eb->reloc_pool = NULL; + + err = i915_gem_object_lock(pool->obj, &eb->ww); + if (err) + goto err_pool; cmd = i915_gem_object_pin_map(pool->obj, cache->has_llc ? @@ -1090,35 +1293,42 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, I915_MAP_FORCE_WC); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto out_pool; + goto err_pool; } - batch = i915_vma_instance(pool->obj, eb->context->vm, NULL); + batch = i915_vma_instance(pool->obj, vma->vm, NULL); if (IS_ERR(batch)) { err = PTR_ERR(batch); goto err_unmap; } - err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK); + err = i915_vma_pin_ww(batch, &eb->ww, 0, 0, PIN_USER | PIN_NONBLOCK); if (err) goto err_unmap; if (engine == eb->context->engine) { rq = i915_request_create(eb->context); } else { - struct intel_context *ce; + struct intel_context *ce = eb->reloc_context; - ce = intel_context_create(engine); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto err_unpin; + if (!ce) { + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto err_unpin; + } + + i915_vm_put(ce->vm); + ce->vm = i915_vm_get(eb->context->vm); + eb->reloc_context = ce; } - i915_vm_put(ce->vm); - ce->vm = i915_vm_get(eb->context->vm); + err = intel_context_pin_ww(ce, &eb->ww); + if (err) + goto err_unpin; - rq = intel_context_create_request(ce); - intel_context_put(ce); + rq = i915_request_create(ce); + intel_context_unpin(ce); } if (IS_ERR(rq)) { err = PTR_ERR(rq); @@ -1129,11 +1339,20 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (err) goto err_request; - i915_vma_lock(batch); + err = reloc_move_to_gpu(rq, vma); + if (err) + goto err_request; + + err = eb->engine->emit_bb_start(rq, + batch->node.start, PAGE_SIZE, + cache->gen > 5 ? 0 : I915_DISPATCH_SECURE); + if (err) + goto skip_request; + + assert_vma_held(batch); err = i915_request_await_object(rq, batch->obj, false); if (err == 0) err = i915_vma_move_to_active(batch, rq, 0); - i915_vma_unlock(batch); if (err) goto skip_request; @@ -1143,10 +1362,10 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, cache->rq = rq; cache->rq_cmd = cmd; cache->rq_size = 0; - cache->rq_vma = batch; + cache->pool = pool; /* Return with batch mapping (cmd) still pinned */ - goto out_pool; + return 0; skip_request: i915_request_set_error_once(rq, err); @@ -1156,8 +1375,8 @@ err_unpin: i915_vma_unpin(batch); err_unmap: i915_gem_object_unpin_map(pool->obj); -out_pool: - intel_gt_buffer_pool_put(pool); +err_pool: + eb->reloc_pool = pool; return err; } @@ -1172,9 +1391,12 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, { struct reloc_cache *cache = &eb->reloc_cache; u32 *cmd; - int err; + + if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1)) + reloc_gpu_flush(eb, cache); if (unlikely(!cache->rq)) { + int err; struct intel_engine_cs *engine = eb->engine; if (!reloc_can_use_engine(engine)) { @@ -1183,37 +1405,28 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, return ERR_PTR(-ENODEV); } - err = __reloc_gpu_alloc(eb, engine, len); + err = __reloc_gpu_alloc(eb, engine, vma, len); if (unlikely(err)) return ERR_PTR(err); } - if (vma != cache->target) { - err = reloc_move_to_gpu(cache->rq, vma); - if (unlikely(err)) { - i915_request_set_error_once(cache->rq, err); - return ERR_PTR(err); - } - - cache->target = vma; - } - - if (unlikely(cache->rq_size + len > - PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) { - err = reloc_gpu_chain(cache); - if (unlikely(err)) { - i915_request_set_error_once(cache->rq, err); - return ERR_PTR(err); - } - } - - GEM_BUG_ON(cache->rq_size + len >= PAGE_SIZE / sizeof(u32)); cmd = cache->rq_cmd + cache->rq_size; cache->rq_size += len; return cmd; } +static inline bool use_reloc_gpu(struct i915_vma *vma) +{ + if (DBG_FORCE_RELOC == FORCE_GPU_RELOC) + return true; + + if (DBG_FORCE_RELOC) + return false; + + return !dma_resv_test_signaled_rcu(vma->resv, true); +} + static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset) { struct page *page; @@ -1229,9 +1442,9 @@ static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset) } static int __reloc_entry_gpu(struct i915_execbuffer *eb, - struct i915_vma *vma, - u64 offset, - u64 target_addr) + struct i915_vma *vma, + u64 offset, + u64 target_addr) { const unsigned int gen = eb->reloc_cache.gen; unsigned int len; @@ -1246,8 +1459,10 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb, len = 3; batch = reloc_gpu(eb, vma, len); - if (IS_ERR(batch)) - return PTR_ERR(batch); + if (batch == ERR_PTR(-EDEADLK)) + return -EDEADLK; + else if (IS_ERR(batch)) + return false; addr = gen8_canonical_addr(vma->node.start + offset); if (gen >= 8) { @@ -1296,21 +1511,58 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb, *batch++ = target_addr; } - return 0; + return true; +} + +static int reloc_entry_gpu(struct i915_execbuffer *eb, + struct i915_vma *vma, + u64 offset, + u64 target_addr) +{ + if (eb->reloc_cache.vaddr) + return false; + + if (!use_reloc_gpu(vma)) + return false; + + return __reloc_entry_gpu(eb, vma, offset, target_addr); } static u64 -relocate_entry(struct i915_execbuffer *eb, - struct i915_vma *vma, +relocate_entry(struct i915_vma *vma, const struct drm_i915_gem_relocation_entry *reloc, + struct i915_execbuffer *eb, const struct i915_vma *target) { u64 target_addr = relocation_target(reloc, target); - int err; - - err = __reloc_entry_gpu(eb, vma, reloc->offset, target_addr); - if (err) - return err; + u64 offset = reloc->offset; + int reloc_gpu = reloc_entry_gpu(eb, vma, offset, target_addr); + + if (reloc_gpu < 0) + return reloc_gpu; + + if (!reloc_gpu) { + bool wide = eb->reloc_cache.use_64bit_reloc; + void *vaddr; + +repeat: + vaddr = reloc_vaddr(vma->obj, eb, + offset >> PAGE_SHIFT); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32))); + clflush_write32(vaddr + offset_in_page(offset), + lower_32_bits(target_addr), + eb->reloc_cache.vaddr); + + if (wide) { + offset += sizeof(u32); + target_addr >>= 32; + wide = false; + goto repeat; + } + } return target->node.start | UPDATE; } @@ -1375,7 +1627,8 @@ eb_relocate_entry(struct i915_execbuffer *eb, * If the relocation already has the right value in it, no * more work needs to be done. */ - if (gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset) + if (!DBG_FORCE_RELOC && + gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset) return 0; /* Check that the relocation address is valid... */ @@ -1407,7 +1660,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, ev->flags &= ~EXEC_OBJECT_ASYNC; /* and update the user's relocation entry */ - return relocate_entry(eb, ev->vma, reloc, target->vma); + return relocate_entry(ev->vma, reloc, eb, target->vma); } static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) @@ -1444,9 +1697,13 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) * we would try to acquire the struct mutex again. Obviously * this is bad and so lockdep complains vehemently. */ - copied = __copy_from_user(r, urelocs, count * sizeof(r[0])); - if (unlikely(copied)) - return -EFAULT; + pagefault_disable(); + copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0])); + pagefault_enable(); + if (unlikely(copied)) { + remain = -EFAULT; + goto out; + } remain -= count; do { @@ -1454,7 +1711,8 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) if (likely(offset == 0)) { } else if ((s64)offset < 0) { - return (int)offset; + remain = (int)offset; + goto out; } else { /* * Note that reporting an error now @@ -1484,74 +1742,401 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) } while (r++, --count); urelocs += ARRAY_SIZE(stack); } while (remain); +out: + reloc_cache_reset(&eb->reloc_cache, eb); + return remain; +} - return 0; +static int +eb_relocate_vma_slow(struct i915_execbuffer *eb, struct eb_vma *ev) +{ + const struct drm_i915_gem_exec_object2 *entry = ev->exec; + struct drm_i915_gem_relocation_entry *relocs = + u64_to_ptr(typeof(*relocs), entry->relocs_ptr); + unsigned int i; + int err; + + for (i = 0; i < entry->relocation_count; i++) { + u64 offset = eb_relocate_entry(eb, ev, &relocs[i]); + + if ((s64)offset < 0) { + err = (int)offset; + goto err; + } + } + err = 0; +err: + reloc_cache_reset(&eb->reloc_cache, eb); + return err; } -static int eb_relocate(struct i915_execbuffer *eb) +static int check_relocations(const struct drm_i915_gem_exec_object2 *entry) { + const char __user *addr, *end; + unsigned long size; + char __maybe_unused c; + + size = entry->relocation_count; + if (size == 0) + return 0; + + if (size > N_RELOC(ULONG_MAX)) + return -EINVAL; + + addr = u64_to_user_ptr(entry->relocs_ptr); + size *= sizeof(struct drm_i915_gem_relocation_entry); + if (!access_ok(addr, size)) + return -EFAULT; + + end = addr + size; + for (; addr < end; addr += PAGE_SIZE) { + int err = __get_user(c, addr); + if (err) + return err; + } + return __get_user(c, end - 1); +} + +static int eb_copy_relocations(const struct i915_execbuffer *eb) +{ + struct drm_i915_gem_relocation_entry *relocs; + const unsigned int count = eb->buffer_count; + unsigned int i; int err; - err = eb_lookup_vmas(eb); - if (err) - return err; + for (i = 0; i < count; i++) { + const unsigned int nreloc = eb->exec[i].relocation_count; + struct drm_i915_gem_relocation_entry __user *urelocs; + unsigned long size; + unsigned long copied; + + if (nreloc == 0) + continue; - if (!list_empty(&eb->unbound)) { - err = eb_reserve(eb); + err = check_relocations(&eb->exec[i]); + if (err) + goto err; + + urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr); + size = nreloc * sizeof(*relocs); + + relocs = kvmalloc_array(size, 1, GFP_KERNEL); + if (!relocs) { + err = -ENOMEM; + goto err; + } + + /* copy_from_user is limited to < 4GiB */ + copied = 0; + do { + unsigned int len = + min_t(u64, BIT_ULL(31), size - copied); + + if (__copy_from_user((char *)relocs + copied, + (char __user *)urelocs + copied, + len)) + goto end; + + copied += len; + } while (copied < size); + + /* + * As we do not update the known relocation offsets after + * relocating (due to the complexities in lock handling), + * we need to mark them as invalid now so that we force the + * relocation processing next time. Just in case the target + * object is evicted and then rebound into its old + * presumed_offset before the next execbuffer - if that + * happened we would make the mistake of assuming that the + * relocations were valid. + */ + if (!user_access_begin(urelocs, size)) + goto end; + + for (copied = 0; copied < nreloc; copied++) + unsafe_put_user(-1, + &urelocs[copied].presumed_offset, + end_user); + user_access_end(); + + eb->exec[i].relocs_ptr = (uintptr_t)relocs; + } + + return 0; + +end_user: + user_access_end(); +end: + kvfree(relocs); + err = -EFAULT; +err: + while (i--) { + relocs = u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr); + if (eb->exec[i].relocation_count) + kvfree(relocs); + } + return err; +} + +static int eb_prefault_relocations(const struct i915_execbuffer *eb) +{ + const unsigned int count = eb->buffer_count; + unsigned int i; + + for (i = 0; i < count; i++) { + int err; + + err = check_relocations(&eb->exec[i]); if (err) return err; } - /* The objects are in their final locations, apply the relocations. */ - if (eb->args->flags & __EXEC_HAS_RELOC) { - struct eb_vma *ev; - int flush; + return 0; +} - list_for_each_entry(ev, &eb->relocs, reloc_link) { +static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb, + struct i915_request *rq) +{ + bool have_copy = false; + struct eb_vma *ev; + int err = 0; + +repeat: + if (signal_pending(current)) { + err = -ERESTARTSYS; + goto out; + } + + /* We may process another execbuffer during the unlock... */ + eb_release_vmas(eb, false); + i915_gem_ww_ctx_fini(&eb->ww); + + if (rq) { + /* nonblocking is always false */ + if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT) < 0) { + i915_request_put(rq); + rq = NULL; + + err = -EINTR; + goto err_relock; + } + + i915_request_put(rq); + rq = NULL; + } + + /* + * We take 3 passes through the slowpatch. + * + * 1 - we try to just prefault all the user relocation entries and + * then attempt to reuse the atomic pagefault disabled fast path again. + * + * 2 - we copy the user entries to a local buffer here outside of the + * local and allow ourselves to wait upon any rendering before + * relocations + * + * 3 - we already have a local copy of the relocation entries, but + * were interrupted (EAGAIN) whilst waiting for the objects, try again. + */ + if (!err) { + err = eb_prefault_relocations(eb); + } else if (!have_copy) { + err = eb_copy_relocations(eb); + have_copy = err == 0; + } else { + cond_resched(); + err = 0; + } + + if (!err) + flush_workqueue(eb->i915->mm.userptr_wq); + +err_relock: + i915_gem_ww_ctx_init(&eb->ww, true); + if (err) + goto out; + + /* reacquire the objects */ +repeat_validate: + rq = eb_pin_engine(eb, false); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + rq = NULL; + goto err; + } + + /* We didn't throttle, should be NULL */ + GEM_WARN_ON(rq); + + err = eb_validate_vmas(eb); + if (err) + goto err; + + GEM_BUG_ON(!eb->batch); + + list_for_each_entry(ev, &eb->relocs, reloc_link) { + if (!have_copy) { + pagefault_disable(); err = eb_relocate_vma(eb, ev); + pagefault_enable(); + if (err) + break; + } else { + err = eb_relocate_vma_slow(eb, ev); if (err) break; } + } + + if (err == -EDEADLK) + goto err; + + if (err && !have_copy) + goto repeat; + + if (err) + goto err; - flush = reloc_gpu_flush(&eb->reloc_cache); + /* as last step, parse the command buffer */ + err = eb_parse(eb); + if (err) + goto err; + + /* + * Leave the user relocations as are, this is the painfully slow path, + * and we want to avoid the complication of dropping the lock whilst + * having buffers reserved in the aperture and so causing spurious + * ENOSPC for random operations. + */ + +err: + if (err == -EDEADLK) { + eb_release_vmas(eb, false); + err = i915_gem_ww_ctx_backoff(&eb->ww); if (!err) - err = flush; + goto repeat_validate; + } + + if (err == -EAGAIN) + goto repeat; + +out: + if (have_copy) { + const unsigned int count = eb->buffer_count; + unsigned int i; + + for (i = 0; i < count; i++) { + const struct drm_i915_gem_exec_object2 *entry = + &eb->exec[i]; + struct drm_i915_gem_relocation_entry *relocs; + + if (!entry->relocation_count) + continue; + + relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr); + kvfree(relocs); + } } + if (rq) + i915_request_put(rq); + return err; } -static int eb_move_to_gpu(struct i915_execbuffer *eb) +static int eb_relocate_parse(struct i915_execbuffer *eb) { - const unsigned int count = eb->buffer_count; - struct ww_acquire_ctx acquire; - unsigned int i; - int err = 0; + int err; + struct i915_request *rq = NULL; + bool throttle = true; - ww_acquire_init(&acquire, &reservation_ww_class); +retry: + rq = eb_pin_engine(eb, throttle); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + rq = NULL; + if (err != -EDEADLK) + return err; - for (i = 0; i < count; i++) { - struct eb_vma *ev = &eb->vma[i]; - struct i915_vma *vma = ev->vma; + goto err; + } - err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire); - if (err == -EDEADLK) { - GEM_BUG_ON(i == 0); - do { - int j = i - 1; + if (rq) { + bool nonblock = eb->file->filp->f_flags & O_NONBLOCK; - ww_mutex_unlock(&eb->vma[j].vma->resv->lock); + /* Need to drop all locks now for throttling, take slowpath */ + err = i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, 0); + if (err == -ETIME) { + if (nonblock) { + err = -EWOULDBLOCK; + i915_request_put(rq); + goto err; + } + goto slow; + } + i915_request_put(rq); + rq = NULL; + } + + /* only throttle once, even if we didn't need to throttle */ + throttle = false; + + err = eb_validate_vmas(eb); + if (err == -EAGAIN) + goto slow; + else if (err) + goto err; - swap(eb->vma[i], eb->vma[j]); - } while (--i); + /* The objects are in their final locations, apply the relocations. */ + if (eb->args->flags & __EXEC_HAS_RELOC) { + struct eb_vma *ev; - err = ww_mutex_lock_slow_interruptible(&vma->resv->lock, - &acquire); + list_for_each_entry(ev, &eb->relocs, reloc_link) { + err = eb_relocate_vma(eb, ev); + if (err) + break; } - if (err) - break; + + if (err == -EDEADLK) + goto err; + else if (err) + goto slow; + } + + if (!err) + err = eb_parse(eb); + +err: + if (err == -EDEADLK) { + eb_release_vmas(eb, false); + err = i915_gem_ww_ctx_backoff(&eb->ww); + if (!err) + goto retry; } - ww_acquire_done(&acquire); + + return err; + +slow: + err = eb_relocate_parse_slow(eb, rq); + if (err) + /* + * If the user expects the execobject.offset and + * reloc.presumed_offset to be an exact match, + * as for using NO_RELOC, then we cannot update + * the execobject.offset until we have completed + * relocation. + */ + eb->args->flags &= ~__EXEC_HAS_RELOC; + + return err; +} + +static int eb_move_to_gpu(struct i915_execbuffer *eb) +{ + const unsigned int count = eb->buffer_count; + unsigned int i = count; + int err = 0; while (i--) { struct eb_vma *ev = &eb->vma[i]; @@ -1596,13 +2181,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) if (err == 0) err = i915_vma_move_to_active(vma, eb->request, flags); - - i915_vma_unlock(vma); - eb_unreserve_vma(ev); } - ww_acquire_fini(&acquire); - - eb_vma_array_put(fetch_and_zero(&eb->array)); if (unlikely(err)) goto err_skip; @@ -1622,7 +2201,8 @@ static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) return -EINVAL; /* Kernel clipping was a DRI1 misfeature */ - if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) { + if (!(exec->flags & (I915_EXEC_FENCE_ARRAY | + I915_EXEC_USE_EXTENSIONS))) { if (exec->num_cliprects || exec->cliprects_ptr) return -EINVAL; } @@ -1666,7 +2246,8 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq) } static struct i915_vma * -shadow_batch_pin(struct drm_i915_gem_object *obj, +shadow_batch_pin(struct i915_execbuffer *eb, + struct drm_i915_gem_object *obj, struct i915_address_space *vm, unsigned int flags) { @@ -1677,7 +2258,7 @@ shadow_batch_pin(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return vma; - err = i915_vma_pin(vma, 0, 0, flags); + err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags); if (err) return ERR_PTR(err); @@ -1690,8 +2271,8 @@ struct eb_parse_work { struct i915_vma *batch; struct i915_vma *shadow; struct i915_vma *trampoline; - unsigned int batch_offset; - unsigned int batch_length; + unsigned long batch_offset; + unsigned long batch_length; }; static int __eb_parse(struct dma_fence_work *work) @@ -1729,7 +2310,7 @@ __parser_mark_active(struct i915_vma *vma, { struct intel_gt_buffer_pool_node *node = vma->private; - return i915_active_ref(&node->active, tl, fence); + return i915_active_ref(&node->active, tl->fence_context, fence); } static int @@ -1761,6 +2342,9 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, struct eb_parse_work *pw; int err; + GEM_BUG_ON(overflows_type(eb->batch_start_offset, pw->batch_offset)); + GEM_BUG_ON(overflows_type(eb->batch_len, pw->batch_length)); + pw = kzalloc(sizeof(*pw), GFP_KERNEL); if (!pw) return -ENOMEM; @@ -1793,36 +2377,26 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, if (err) goto err_commit; - err = dma_resv_lock_interruptible(pw->batch->resv, NULL); - if (err) - goto err_commit; - err = dma_resv_reserve_shared(pw->batch->resv, 1); if (err) - goto err_commit_unlock; + goto err_commit; /* Wait for all writes (and relocs) into the batch to complete */ err = i915_sw_fence_await_reservation(&pw->base.chain, pw->batch->resv, NULL, false, 0, I915_FENCE_GFP); if (err < 0) - goto err_commit_unlock; + goto err_commit; /* Keep the batch alive and unwritten as we parse */ dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma); - dma_resv_unlock(pw->batch->resv); - /* Force execution to wait for completion of the parser */ - dma_resv_lock(shadow->resv, NULL); dma_resv_add_excl_fence(shadow->resv, &pw->base.dma); - dma_resv_unlock(shadow->resv); dma_fence_work_commit_imm(&pw->base); return 0; -err_commit_unlock: - dma_resv_unlock(pw->batch->resv); err_commit: i915_sw_fence_set_error_once(&pw->base.chain, err); dma_fence_work_commit_imm(&pw->base); @@ -1837,16 +2411,33 @@ err_free: return err; } +static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma) +{ + /* + * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure + * batch" bit. Hence we need to pin secure batches into the global gtt. + * hsw should have this fixed, but bdw mucks it up again. */ + if (eb->batch_flags & I915_DISPATCH_SECURE) + return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, 0); + + return NULL; +} + static int eb_parse(struct i915_execbuffer *eb) { struct drm_i915_private *i915 = eb->i915; - struct intel_gt_buffer_pool_node *pool; - struct i915_vma *shadow, *trampoline; - unsigned int len; + struct intel_gt_buffer_pool_node *pool = eb->batch_pool; + struct i915_vma *shadow, *trampoline, *batch; + unsigned long len; int err; - if (!eb_use_cmdparser(eb)) - return 0; + if (!eb_use_cmdparser(eb)) { + batch = eb_dispatch_secure(eb, eb->batch->vma); + if (IS_ERR(batch)) + return PTR_ERR(batch); + + goto secure_batch; + } len = eb->batch_len; if (!CMDPARSER_USES_GGTT(eb->i915)) { @@ -1862,12 +2453,21 @@ static int eb_parse(struct i915_execbuffer *eb) } else { len += I915_CMD_PARSER_TRAMPOLINE_SIZE; } + if (unlikely(len < eb->batch_len)) /* last paranoid check of overflow */ + return -EINVAL; - pool = intel_gt_get_buffer_pool(eb->engine->gt, len); - if (IS_ERR(pool)) - return PTR_ERR(pool); + if (!pool) { + pool = intel_gt_get_buffer_pool(eb->engine->gt, len); + if (IS_ERR(pool)) + return PTR_ERR(pool); + eb->batch_pool = pool; + } - shadow = shadow_batch_pin(pool->obj, eb->context->vm, PIN_USER); + err = i915_gem_object_lock(pool->obj, &eb->ww); + if (err) + goto err; + + shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER); if (IS_ERR(shadow)) { err = PTR_ERR(shadow); goto err; @@ -1879,7 +2479,7 @@ static int eb_parse(struct i915_execbuffer *eb) if (CMDPARSER_USES_GGTT(eb->i915)) { trampoline = shadow; - shadow = shadow_batch_pin(pool->obj, + shadow = shadow_batch_pin(eb, pool->obj, &eb->engine->gt->ggtt->vm, PIN_GLOBAL); if (IS_ERR(shadow)) { @@ -1892,42 +2492,43 @@ static int eb_parse(struct i915_execbuffer *eb) eb->batch_flags |= I915_DISPATCH_SECURE; } + batch = eb_dispatch_secure(eb, shadow); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err_trampoline; + } + err = eb_parse_pipeline(eb, shadow, trampoline); if (err) - goto err_trampoline; + goto err_unpin_batch; - eb->vma[eb->buffer_count].vma = i915_vma_get(shadow); - eb->vma[eb->buffer_count].flags = __EXEC_OBJECT_HAS_PIN; eb->batch = &eb->vma[eb->buffer_count++]; - eb->vma[eb->buffer_count].vma = NULL; + eb->batch->vma = i915_vma_get(shadow); + eb->batch->flags = __EXEC_OBJECT_HAS_PIN; eb->trampoline = trampoline; eb->batch_start_offset = 0; +secure_batch: + if (batch) { + eb->batch = &eb->vma[eb->buffer_count++]; + eb->batch->flags = __EXEC_OBJECT_HAS_PIN; + eb->batch->vma = i915_vma_get(batch); + } return 0; +err_unpin_batch: + if (batch) + i915_vma_unpin(batch); err_trampoline: if (trampoline) i915_vma_unpin(trampoline); err_shadow: i915_vma_unpin(shadow); err: - intel_gt_buffer_pool_put(pool); return err; } -static void -add_to_client(struct i915_request *rq, struct drm_file *file) -{ - struct drm_i915_file_private *file_priv = file->driver_priv; - - rq->file_priv = file_priv; - - spin_lock(&file_priv->mm.lock); - list_add_tail(&rq->client_link, &file_priv->mm.request_list); - spin_unlock(&file_priv->mm.lock); -} - static int eb_submit(struct i915_execbuffer *eb, struct i915_vma *batch) { int err; @@ -2009,7 +2610,7 @@ static const enum intel_engine_id user_ring_map[] = { [I915_EXEC_VEBOX] = VECS0 }; -static struct i915_request *eb_throttle(struct intel_context *ce) +static struct i915_request *eb_throttle(struct i915_execbuffer *eb, struct intel_context *ce) { struct intel_ring *ring = ce->ring; struct intel_timeline *tl = ce->timeline; @@ -2043,31 +2644,26 @@ static struct i915_request *eb_throttle(struct intel_context *ce) return i915_request_get(rq); } -static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) +static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, bool throttle) { + struct intel_context *ce = eb->context; struct intel_timeline *tl; - struct i915_request *rq; + struct i915_request *rq = NULL; int err; - /* - * ABI: Before userspace accesses the GPU (e.g. execbuffer), report - * EIO if the GPU is already wedged. - */ - err = intel_gt_terminally_wedged(ce->engine->gt); - if (err) - return err; + GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED); if (unlikely(intel_context_is_banned(ce))) - return -EIO; + return ERR_PTR(-EIO); /* * Pinning the contexts may generate requests in order to acquire * GGTT space, so do this first before we reserve a seqno for * ourselves. */ - err = intel_context_pin(ce); + err = intel_context_pin_ww(ce, &eb->ww); if (err) - return err; + return ERR_PTR(err); /* * Take a local wakeref for preparing to dispatch the execbuf as @@ -2079,45 +2675,17 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) */ tl = intel_context_timeline_lock(ce); if (IS_ERR(tl)) { - err = PTR_ERR(tl); - goto err_unpin; + intel_context_unpin(ce); + return ERR_CAST(tl); } intel_context_enter(ce); - rq = eb_throttle(ce); - + if (throttle) + rq = eb_throttle(eb, ce); intel_context_timeline_unlock(tl); - if (rq) { - bool nonblock = eb->file->filp->f_flags & O_NONBLOCK; - long timeout; - - timeout = MAX_SCHEDULE_TIMEOUT; - if (nonblock) - timeout = 0; - - timeout = i915_request_wait(rq, - I915_WAIT_INTERRUPTIBLE, - timeout); - i915_request_put(rq); - - if (timeout < 0) { - err = nonblock ? -EWOULDBLOCK : timeout; - goto err_exit; - } - } - - eb->engine = ce->engine; - eb->context = ce; - return 0; - -err_exit: - mutex_lock(&tl->mutex); - intel_context_exit(ce); - intel_context_timeline_unlock(tl); -err_unpin: - intel_context_unpin(ce); - return err; + eb->args->flags |= __EXEC_ENGINE_PINNED; + return rq; } static void eb_unpin_engine(struct i915_execbuffer *eb) @@ -2125,6 +2693,11 @@ static void eb_unpin_engine(struct i915_execbuffer *eb) struct intel_context *ce = eb->context; struct intel_timeline *tl = ce->timeline; + if (!(eb->args->flags & __EXEC_ENGINE_PINNED)) + return; + + eb->args->flags &= ~__EXEC_ENGINE_PINNED; + mutex_lock(&tl->mutex); intel_context_exit(ce); mutex_unlock(&tl->mutex); @@ -2133,11 +2706,10 @@ static void eb_unpin_engine(struct i915_execbuffer *eb) } static unsigned int -eb_select_legacy_ring(struct i915_execbuffer *eb, - struct drm_file *file, - struct drm_i915_gem_execbuffer2 *args) +eb_select_legacy_ring(struct i915_execbuffer *eb) { struct drm_i915_private *i915 = eb->i915; + struct drm_i915_gem_execbuffer2 *args = eb->args; unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; if (user_ring_id != I915_EXEC_BSD && @@ -2152,7 +2724,7 @@ eb_select_legacy_ring(struct i915_execbuffer *eb, unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; if (bsd_idx == I915_EXEC_BSD_DEFAULT) { - bsd_idx = gen8_dispatch_bsd_engine(i915, file); + bsd_idx = gen8_dispatch_bsd_engine(i915, eb->file); } else if (bsd_idx >= I915_EXEC_BSD_RING1 && bsd_idx <= I915_EXEC_BSD_RING2) { bsd_idx >>= I915_EXEC_BSD_SHIFT; @@ -2177,131 +2749,297 @@ eb_select_legacy_ring(struct i915_execbuffer *eb, } static int -eb_pin_engine(struct i915_execbuffer *eb, - struct drm_file *file, - struct drm_i915_gem_execbuffer2 *args) +eb_select_engine(struct i915_execbuffer *eb) { struct intel_context *ce; unsigned int idx; int err; if (i915_gem_context_user_engines(eb->gem_context)) - idx = args->flags & I915_EXEC_RING_MASK; + idx = eb->args->flags & I915_EXEC_RING_MASK; else - idx = eb_select_legacy_ring(eb, file, args); + idx = eb_select_legacy_ring(eb); ce = i915_gem_context_get_engine(eb->gem_context, idx); if (IS_ERR(ce)) return PTR_ERR(ce); - err = __eb_pin_engine(eb, ce); - intel_context_put(ce); + intel_gt_pm_get(ce->engine->gt); + + if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { + err = intel_context_alloc_state(ce); + if (err) + goto err; + } + + /* + * ABI: Before userspace accesses the GPU (e.g. execbuffer), report + * EIO if the GPU is already wedged. + */ + err = intel_gt_terminally_wedged(ce->engine->gt); + if (err) + goto err; + + eb->context = ce; + eb->engine = ce->engine; + /* + * Make sure engine pool stays alive even if we call intel_context_put + * during ww handling. The pool is destroyed when last pm reference + * is dropped, which breaks our -EDEADLK handling. + */ + return err; + +err: + intel_gt_pm_put(ce->engine->gt); + intel_context_put(ce); return err; } static void -__free_fence_array(struct drm_syncobj **fences, unsigned int n) +eb_put_engine(struct i915_execbuffer *eb) { - while (n--) - drm_syncobj_put(ptr_mask_bits(fences[n], 2)); + intel_gt_pm_put(eb->engine->gt); + intel_context_put(eb->context); +} + +static void +__free_fence_array(struct eb_fence *fences, unsigned int n) +{ + while (n--) { + drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2)); + dma_fence_put(fences[n].dma_fence); + kfree(fences[n].chain_fence); + } kvfree(fences); } -static struct drm_syncobj ** -get_fence_array(struct drm_i915_gem_execbuffer2 *args, - struct drm_file *file) +static int +add_timeline_fence_array(struct i915_execbuffer *eb, + const struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences) { - const unsigned long nfences = args->num_cliprects; - struct drm_i915_gem_exec_fence __user *user; - struct drm_syncobj **fences; - unsigned long n; - int err; + struct drm_i915_gem_exec_fence __user *user_fences; + u64 __user *user_values; + struct eb_fence *f; + u64 nfences; + int err = 0; - if (!(args->flags & I915_EXEC_FENCE_ARRAY)) - return NULL; + nfences = timeline_fences->fence_count; + if (!nfences) + return 0; /* Check multiplication overflow for access_ok() and kvmalloc_array() */ BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); if (nfences > min_t(unsigned long, - ULONG_MAX / sizeof(*user), - SIZE_MAX / sizeof(*fences))) - return ERR_PTR(-EINVAL); + ULONG_MAX / sizeof(*user_fences), + SIZE_MAX / sizeof(*f)) - eb->num_fences) + return -EINVAL; - user = u64_to_user_ptr(args->cliprects_ptr); - if (!access_ok(user, nfences * sizeof(*user))) - return ERR_PTR(-EFAULT); + user_fences = u64_to_user_ptr(timeline_fences->handles_ptr); + if (!access_ok(user_fences, nfences * sizeof(*user_fences))) + return -EFAULT; - fences = kvmalloc_array(nfences, sizeof(*fences), - __GFP_NOWARN | GFP_KERNEL); - if (!fences) - return ERR_PTR(-ENOMEM); + user_values = u64_to_user_ptr(timeline_fences->values_ptr); + if (!access_ok(user_values, nfences * sizeof(*user_values))) + return -EFAULT; + + f = krealloc(eb->fences, + (eb->num_fences + nfences) * sizeof(*f), + __GFP_NOWARN | GFP_KERNEL); + if (!f) + return -ENOMEM; - for (n = 0; n < nfences; n++) { - struct drm_i915_gem_exec_fence fence; + eb->fences = f; + f += eb->num_fences; + + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & + ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); + + while (nfences--) { + struct drm_i915_gem_exec_fence user_fence; struct drm_syncobj *syncobj; + struct dma_fence *fence = NULL; + u64 point; - if (__copy_from_user(&fence, user++, sizeof(fence))) { - err = -EFAULT; - goto err; + if (__copy_from_user(&user_fence, + user_fences++, + sizeof(user_fence))) + return -EFAULT; + + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) + return -EINVAL; + + if (__get_user(point, user_values++)) + return -EFAULT; + + syncobj = drm_syncobj_find(eb->file, user_fence.handle); + if (!syncobj) { + DRM_DEBUG("Invalid syncobj handle provided\n"); + return -ENOENT; } - if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { - err = -EINVAL; - goto err; + fence = drm_syncobj_fence_get(syncobj); + + if (!fence && user_fence.flags && + !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { + DRM_DEBUG("Syncobj handle has no fence\n"); + drm_syncobj_put(syncobj); + return -EINVAL; + } + + if (fence) + err = dma_fence_chain_find_seqno(&fence, point); + + if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { + DRM_DEBUG("Syncobj handle missing requested point %llu\n", point); + dma_fence_put(fence); + drm_syncobj_put(syncobj); + return err; + } + + /* + * A point might have been signaled already and + * garbage collected from the timeline. In this case + * just ignore the point and carry on. + */ + if (!fence && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { + drm_syncobj_put(syncobj); + continue; + } + + /* + * For timeline syncobjs we need to preallocate chains for + * later signaling. + */ + if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) { + /* + * Waiting and signaling the same point (when point != + * 0) would break the timeline. + */ + if (user_fence.flags & I915_EXEC_FENCE_WAIT) { + DRM_DEBUG("Trying to wait & signal the same timeline point.\n"); + dma_fence_put(fence); + drm_syncobj_put(syncobj); + return -EINVAL; + } + + f->chain_fence = + kmalloc(sizeof(*f->chain_fence), + GFP_KERNEL); + if (!f->chain_fence) { + drm_syncobj_put(syncobj); + dma_fence_put(fence); + return -ENOMEM; + } + } else { + f->chain_fence = NULL; } - syncobj = drm_syncobj_find(file, fence.handle); + f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2); + f->dma_fence = fence; + f->value = point; + f++; + eb->num_fences++; + } + + return 0; +} + +static int add_fence_array(struct i915_execbuffer *eb) +{ + struct drm_i915_gem_execbuffer2 *args = eb->args; + struct drm_i915_gem_exec_fence __user *user; + unsigned long num_fences = args->num_cliprects; + struct eb_fence *f; + + if (!(args->flags & I915_EXEC_FENCE_ARRAY)) + return 0; + + if (!num_fences) + return 0; + + /* Check multiplication overflow for access_ok() and kvmalloc_array() */ + BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); + if (num_fences > min_t(unsigned long, + ULONG_MAX / sizeof(*user), + SIZE_MAX / sizeof(*f) - eb->num_fences)) + return -EINVAL; + + user = u64_to_user_ptr(args->cliprects_ptr); + if (!access_ok(user, num_fences * sizeof(*user))) + return -EFAULT; + + f = krealloc(eb->fences, + (eb->num_fences + num_fences) * sizeof(*f), + __GFP_NOWARN | GFP_KERNEL); + if (!f) + return -ENOMEM; + + eb->fences = f; + f += eb->num_fences; + while (num_fences--) { + struct drm_i915_gem_exec_fence user_fence; + struct drm_syncobj *syncobj; + struct dma_fence *fence = NULL; + + if (__copy_from_user(&user_fence, user++, sizeof(user_fence))) + return -EFAULT; + + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) + return -EINVAL; + + syncobj = drm_syncobj_find(eb->file, user_fence.handle); if (!syncobj) { DRM_DEBUG("Invalid syncobj handle provided\n"); - err = -ENOENT; - goto err; + return -ENOENT; + } + + if (user_fence.flags & I915_EXEC_FENCE_WAIT) { + fence = drm_syncobj_fence_get(syncobj); + if (!fence) { + DRM_DEBUG("Syncobj handle has no fence\n"); + drm_syncobj_put(syncobj); + return -EINVAL; + } } BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); - fences[n] = ptr_pack_bits(syncobj, fence.flags, 2); + f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2); + f->dma_fence = fence; + f->value = 0; + f->chain_fence = NULL; + f++; + eb->num_fences++; } - return fences; - -err: - __free_fence_array(fences, n); - return ERR_PTR(err); + return 0; } -static void -put_fence_array(struct drm_i915_gem_execbuffer2 *args, - struct drm_syncobj **fences) +static void put_fence_array(struct eb_fence *fences, int num_fences) { if (fences) - __free_fence_array(fences, args->num_cliprects); + __free_fence_array(fences, num_fences); } static int -await_fence_array(struct i915_execbuffer *eb, - struct drm_syncobj **fences) +await_fence_array(struct i915_execbuffer *eb) { - const unsigned int nfences = eb->args->num_cliprects; unsigned int n; int err; - for (n = 0; n < nfences; n++) { + for (n = 0; n < eb->num_fences; n++) { struct drm_syncobj *syncobj; - struct dma_fence *fence; unsigned int flags; - syncobj = ptr_unpack_bits(fences[n], &flags, 2); - if (!(flags & I915_EXEC_FENCE_WAIT)) - continue; + syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2); - fence = drm_syncobj_fence_get(syncobj); - if (!fence) - return -EINVAL; + if (!eb->fences[n].dma_fence) + continue; - err = i915_request_await_dma_fence(eb->request, fence); - dma_fence_put(fence); + err = i915_request_await_dma_fence(eb->request, + eb->fences[n].dma_fence); if (err < 0) return err; } @@ -2309,26 +3047,47 @@ await_fence_array(struct i915_execbuffer *eb, return 0; } -static void -signal_fence_array(struct i915_execbuffer *eb, - struct drm_syncobj **fences) +static void signal_fence_array(const struct i915_execbuffer *eb) { - const unsigned int nfences = eb->args->num_cliprects; struct dma_fence * const fence = &eb->request->fence; unsigned int n; - for (n = 0; n < nfences; n++) { + for (n = 0; n < eb->num_fences; n++) { struct drm_syncobj *syncobj; unsigned int flags; - syncobj = ptr_unpack_bits(fences[n], &flags, 2); + syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2); if (!(flags & I915_EXEC_FENCE_SIGNAL)) continue; - drm_syncobj_replace_fence(syncobj, fence); + if (eb->fences[n].chain_fence) { + drm_syncobj_add_point(syncobj, + eb->fences[n].chain_fence, + fence, + eb->fences[n].value); + /* + * The chain's ownership is transferred to the + * timeline. + */ + eb->fences[n].chain_fence = NULL; + } else { + drm_syncobj_replace_fence(syncobj, fence); + } } } +static int +parse_timeline_fences(struct i915_user_extension __user *ext, void *data) +{ + struct i915_execbuffer *eb = data; + struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; + + if (copy_from_user(&timeline_fences, ext, sizeof(timeline_fences))) + return -EFAULT; + + return add_timeline_fence_array(eb, &timeline_fences); +} + static void retire_requests(struct intel_timeline *tl, struct i915_request *end) { struct i915_request *rq, *rn; @@ -2370,12 +3129,37 @@ static void eb_request_add(struct i915_execbuffer *eb) mutex_unlock(&tl->mutex); } +static const i915_user_extension_fn execbuf_extensions[] = { + [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences, +}; + +static int +parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args, + struct i915_execbuffer *eb) +{ + if (!(args->flags & I915_EXEC_USE_EXTENSIONS)) + return 0; + + /* The execbuf2 extension mechanism reuses cliprects_ptr. So we cannot + * have another flag also using it at the same time. + */ + if (eb->args->flags & I915_EXEC_FENCE_ARRAY) + return -EINVAL; + + if (args->num_cliprects != 0) + return -EINVAL; + + return i915_user_extensions(u64_to_user_ptr(args->cliprects_ptr), + execbuf_extensions, + ARRAY_SIZE(execbuf_extensions), + eb); +} + static int i915_gem_do_execbuffer(struct drm_device *dev, struct drm_file *file, struct drm_i915_gem_execbuffer2 *args, - struct drm_i915_gem_exec_object2 *exec, - struct drm_syncobj **fences) + struct drm_i915_gem_exec_object2 *exec) { struct drm_i915_private *i915 = to_i915(dev); struct i915_execbuffer eb; @@ -2392,10 +3176,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.i915 = i915; eb.file = file; eb.args = args; - if (!(args->flags & I915_EXEC_NO_RELOC)) + if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) args->flags |= __EXEC_HAS_RELOC; eb.exec = exec; + eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1); + eb.vma[0].vma = NULL; + eb.reloc_pool = eb.batch_pool = NULL; + eb.reloc_context = NULL; eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; reloc_cache_init(&eb.reloc_cache, eb.i915); @@ -2405,6 +3193,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.batch_len = args->batch_len; eb.trampoline = NULL; + eb.fences = NULL; + eb.num_fences = 0; + eb.batch_flags = 0; if (args->flags & I915_EXEC_SECURE) { if (INTEL_GEN(i915) >= 11) @@ -2422,14 +3213,24 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (args->flags & I915_EXEC_IS_PINNED) eb.batch_flags |= I915_DISPATCH_PINNED; + err = parse_execbuf2_extensions(args, &eb); + if (err) + goto err_ext; + + err = add_fence_array(&eb); + if (err) + goto err_ext; + #define IN_FENCES (I915_EXEC_FENCE_IN | I915_EXEC_FENCE_SUBMIT) if (args->flags & IN_FENCES) { if ((args->flags & IN_FENCES) == IN_FENCES) return -EINVAL; in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); - if (!in_fence) - return -EINVAL; + if (!in_fence) { + err = -EINVAL; + goto err_ext; + } } #undef IN_FENCES @@ -2451,11 +3252,19 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (unlikely(err)) goto err_destroy; - err = eb_pin_engine(&eb, file, args); + err = eb_select_engine(&eb); if (unlikely(err)) goto err_context; - err = eb_relocate(&eb); + err = eb_lookup_vmas(&eb); + if (err) { + eb_release_vmas(&eb, true); + goto err_engine; + } + + i915_gem_ww_ctx_init(&eb.ww, true); + + err = eb_relocate_parse(&eb); if (err) { /* * If the user expects the execobject.offset and @@ -2468,54 +3277,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_vma; } - if (unlikely(eb.batch->flags & EXEC_OBJECT_WRITE)) { - drm_dbg(&i915->drm, - "Attempting to use self-modifying batch buffer\n"); - err = -EINVAL; - goto err_vma; - } - - if (range_overflows_t(u64, - eb.batch_start_offset, eb.batch_len, - eb.batch->vma->size)) { - drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n"); - err = -EINVAL; - goto err_vma; - } - - if (eb.batch_len == 0) - eb.batch_len = eb.batch->vma->size - eb.batch_start_offset; + ww_acquire_done(&eb.ww.ctx); - err = eb_parse(&eb); - if (err) - goto err_vma; - - /* - * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure - * batch" bit. Hence we need to pin secure batches into the global gtt. - * hsw should have this fixed, but bdw mucks it up again. */ batch = eb.batch->vma; - if (eb.batch_flags & I915_DISPATCH_SECURE) { - struct i915_vma *vma; - - /* - * So on first glance it looks freaky that we pin the batch here - * outside of the reservation loop. But: - * - The batch is already pinned into the relevant ppgtt, so we - * already have the backing storage fully allocated. - * - No other BO uses the global gtt (well contexts, but meh), - * so we don't really have issues with multiple objects not - * fitting due to fragmentation. - * So this is actually safe. - */ - vma = i915_gem_object_ggtt_pin(batch->obj, NULL, 0, 0, 0); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_parse; - } - - batch = vma; - } /* All GPU relocation batches must be submitted prior to the user rq */ GEM_BUG_ON(eb.reloc_cache.rq); @@ -2524,7 +3288,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.request = i915_request_create(eb.context); if (IS_ERR(eb.request)) { err = PTR_ERR(eb.request); - goto err_batch_unpin; + goto err_vma; } if (in_fence) { @@ -2539,8 +3303,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_request; } - if (fences) { - err = await_fence_array(&eb, fences); + if (eb.fences) { + err = await_fence_array(&eb); if (err) goto err_request; } @@ -2561,18 +3325,17 @@ i915_gem_do_execbuffer(struct drm_device *dev, * to explicitly hold another reference here. */ eb.request->batch = batch; - if (batch->private) - intel_gt_buffer_pool_mark_active(batch->private, eb.request); + if (eb.batch_pool) + intel_gt_buffer_pool_mark_active(eb.batch_pool, eb.request); trace_i915_request_queue(eb.request, eb.batch_flags); err = eb_submit(&eb, batch); err_request: - add_to_client(eb.request, file); i915_request_get(eb.request); eb_request_add(&eb); - if (fences) - signal_fence_array(&eb, fences); + if (eb.fences) + signal_fence_array(&eb); if (out_fence) { if (err == 0) { @@ -2586,16 +3349,21 @@ err_request: } i915_request_put(eb.request); -err_batch_unpin: - if (eb.batch_flags & I915_DISPATCH_SECURE) - i915_vma_unpin(batch); -err_parse: - if (batch->private) - intel_gt_buffer_pool_put(batch->private); err_vma: + eb_release_vmas(&eb, true); if (eb.trampoline) i915_vma_unpin(eb.trampoline); - eb_unpin_engine(&eb); + WARN_ON(err == -EDEADLK); + i915_gem_ww_ctx_fini(&eb.ww); + + if (eb.batch_pool) + intel_gt_buffer_pool_put(eb.batch_pool); + if (eb.reloc_pool) + intel_gt_buffer_pool_put(eb.reloc_pool); + if (eb.reloc_context) + intel_context_put(eb.reloc_context); +err_engine: + eb_put_engine(&eb); err_context: i915_gem_context_put(eb.gem_context); err_destroy: @@ -2605,12 +3373,14 @@ err_out_fence: put_unused_fd(out_fence_fd); err_in_fence: dma_fence_put(in_fence); +err_ext: + put_fence_array(eb.fences, eb.num_fences); return err; } static size_t eb_element_size(void) { - return sizeof(struct drm_i915_gem_exec_object2); + return sizeof(struct drm_i915_gem_exec_object2) + sizeof(struct eb_vma); } static bool check_buffer_count(size_t count) @@ -2666,7 +3436,9 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, /* Copy in the exec list from userland */ exec_list = kvmalloc_array(count, sizeof(*exec_list), __GFP_NOWARN | GFP_KERNEL); - exec2_list = kvmalloc_array(count, eb_element_size(), + + /* Allocate extra slots for use by the command parser */ + exec2_list = kvmalloc_array(count + 2, eb_element_size(), __GFP_NOWARN | GFP_KERNEL); if (exec_list == NULL || exec2_list == NULL) { drm_dbg(&i915->drm, @@ -2699,7 +3471,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, exec2_list[i].flags = 0; } - err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL); + err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list); if (exec2.flags & __EXEC_HAS_RELOC) { struct drm_i915_gem_exec_object __user *user_exec_list = u64_to_user_ptr(args->buffers_ptr); @@ -2731,7 +3503,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_gem_execbuffer2 *args = data; struct drm_i915_gem_exec_object2 *exec2_list; - struct drm_syncobj **fences = NULL; const size_t count = args->buffer_count; int err; @@ -2744,7 +3515,8 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, if (err) return err; - exec2_list = kvmalloc_array(count, eb_element_size(), + /* Allocate extra slots for use by the command parser */ + exec2_list = kvmalloc_array(count + 2, eb_element_size(), __GFP_NOWARN | GFP_KERNEL); if (exec2_list == NULL) { drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n", @@ -2759,15 +3531,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, return -EFAULT; } - if (args->flags & I915_EXEC_FENCE_ARRAY) { - fences = get_fence_array(args, file); - if (IS_ERR(fences)) { - kvfree(exec2_list); - return PTR_ERR(fences); - } - } - - err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences); + err = i915_gem_do_execbuffer(dev, file, args, exec2_list); /* * Now that we have begun execution of the batchbuffer, we ignore @@ -2808,7 +3572,6 @@ end:; } args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS; - put_fence_array(args, fences); kvfree(exec2_list); return err; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index b23368529a40..3d69e51f3e4d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -209,7 +209,7 @@ static vm_fault_t i915_error_to_vmf_fault(int err) switch (err) { default: WARN_ONCE(err, "unhandled error in %s: %i\n", __func__, err); - /* fallthrough */ + fallthrough; case -EIO: /* shmemfs failure from swap device */ case -EFAULT: /* purged object */ case -ENODEV: /* bad object, how did you get here! */ @@ -283,37 +283,46 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) struct intel_runtime_pm *rpm = &i915->runtime_pm; struct i915_ggtt *ggtt = &i915->ggtt; bool write = area->vm_flags & VM_WRITE; + struct i915_gem_ww_ctx ww; intel_wakeref_t wakeref; struct i915_vma *vma; pgoff_t page_offset; int srcu; int ret; - /* Sanity check that we allow writing into this object */ - if (i915_gem_object_is_readonly(obj) && write) - return VM_FAULT_SIGBUS; - /* We don't use vmf->pgoff since that has the fake offset */ page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; trace_i915_gem_object_fault(obj, page_offset, true, write); - ret = i915_gem_object_pin_pages(obj); + wakeref = intel_runtime_pm_get(rpm); + + i915_gem_ww_ctx_init(&ww, true); +retry: + ret = i915_gem_object_lock(obj, &ww); if (ret) - goto err; + goto err_rpm; - wakeref = intel_runtime_pm_get(rpm); + /* Sanity check that we allow writing into this object */ + if (i915_gem_object_is_readonly(obj) && write) { + ret = -EFAULT; + goto err_rpm; + } - ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu); + ret = i915_gem_object_pin_pages(obj); if (ret) goto err_rpm; + ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu); + if (ret) + goto err_pages; + /* Now pin it into the GTT as needed */ - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | - PIN_NONBLOCK /* NOWARN */ | - PIN_NOEVICT); - if (IS_ERR(vma)) { + vma = i915_gem_object_ggtt_pin_ww(obj, &ww, NULL, 0, 0, + PIN_MAPPABLE | + PIN_NONBLOCK /* NOWARN */ | + PIN_NOEVICT); + if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) { /* Use a partial view if it is bigger than available space */ struct i915_ggtt_view view = compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); @@ -328,11 +337,11 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) * all hope that the hardware is able to track future writes. */ - vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); - if (IS_ERR(vma)) { + vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags); + if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) { flags = PIN_MAPPABLE; view.type = I915_GGTT_VIEW_PARTIAL; - vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); + vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags); } /* The entire mappable GGTT is pinned? Unexpected! */ @@ -389,10 +398,16 @@ err_unpin: __i915_vma_unpin(vma); err_reset: intel_gt_reset_unlock(ggtt->vm.gt, srcu); +err_pages: + i915_gem_object_unpin_pages(obj); err_rpm: + if (ret == -EDEADLK) { + ret = i915_gem_ww_ctx_backoff(&ww); + if (!ret) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); intel_runtime_pm_put(rpm, wakeref); - i915_gem_object_unpin_pages(obj); -err: return i915_error_to_vmf_fault(ret); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index e5b9276d254c..d46db8d8f38e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -110,20 +110,44 @@ i915_gem_object_put(struct drm_i915_gem_object *obj) #define assert_object_held(obj) dma_resv_assert_held((obj)->base.resv) -static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj) +static inline int __i915_gem_object_lock(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww, + bool intr) { - dma_resv_lock(obj->base.resv, NULL); + int ret; + + if (intr) + ret = dma_resv_lock_interruptible(obj->base.resv, ww ? &ww->ctx : NULL); + else + ret = dma_resv_lock(obj->base.resv, ww ? &ww->ctx : NULL); + + if (!ret && ww) + list_add_tail(&obj->obj_link, &ww->obj_list); + if (ret == -EALREADY) + ret = 0; + + if (ret == -EDEADLK) + ww->contended = obj; + + return ret; } -static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj) +static inline int i915_gem_object_lock(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww) { - return dma_resv_trylock(obj->base.resv); + return __i915_gem_object_lock(obj, ww, ww && ww->intr); } -static inline int -i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj) +static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww) { - return dma_resv_lock_interruptible(obj->base.resv, NULL); + WARN_ON(ww && !ww->intr); + return __i915_gem_object_lock(obj, ww, true); +} + +static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj) +{ + return dma_resv_trylock(obj->base.resv); } static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj) @@ -258,6 +282,10 @@ struct page * i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n); +struct page * +i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, + unsigned int n); + dma_addr_t i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, unsigned long n, @@ -408,7 +436,6 @@ static inline void i915_gem_object_finish_access(struct drm_i915_gem_object *obj) { i915_gem_object_unpin_pages(obj); - i915_gem_object_unlock(obj); } static inline struct intel_engine_cs * @@ -431,6 +458,7 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, unsigned int cache_level); void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); +void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj); int __must_check i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c index bfdb32d46877..aee7ad3cc3c6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c @@ -14,6 +14,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, struct i915_vma *vma, + struct i915_gem_ww_ctx *ww, u32 value) { struct drm_i915_private *i915 = ce->vm->i915; @@ -39,10 +40,24 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, goto out_pm; } + err = i915_gem_object_lock(pool->obj, ww); + if (err) + goto out_put; + + batch = i915_vma_instance(pool->obj, ce->vm, NULL); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_put; + } + + err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER); + if (unlikely(err)) + goto out_put; + cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto out_put; + goto out_unpin; } rem = vma->size; @@ -84,19 +99,11 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, intel_gt_chipset_flush(ce->vm->gt); - batch = i915_vma_instance(pool->obj, ce->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_put; - } - - err = i915_vma_pin(batch, 0, 0, PIN_USER); - if (unlikely(err)) - goto out_put; - batch->private = pool; return batch; +out_unpin: + i915_vma_unpin(batch); out_put: intel_gt_buffer_pool_put(pool); out_pm: @@ -108,11 +115,9 @@ int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq) { int err; - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, false); if (err == 0) err = i915_vma_move_to_active(vma, rq, 0); - i915_vma_unlock(vma); if (unlikely(err)) return err; @@ -141,6 +146,7 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, struct intel_context *ce, u32 value) { + struct i915_gem_ww_ctx ww; struct i915_request *rq; struct i915_vma *batch; struct i915_vma *vma; @@ -150,17 +156,28 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return PTR_ERR(vma); - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (unlikely(err)) - return err; + i915_gem_ww_ctx_init(&ww, true); + intel_engine_pm_get(ce->engine); +retry: + err = i915_gem_object_lock(obj, &ww); + if (err) + goto out; - batch = intel_emit_vma_fill_blt(ce, vma, value); + err = intel_context_pin_ww(ce, &ww); + if (err) + goto out; + + err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); + if (err) + goto out_ctx; + + batch = intel_emit_vma_fill_blt(ce, vma, &ww, value); if (IS_ERR(batch)) { err = PTR_ERR(batch); - goto out_unpin; + goto out_vma; } - rq = intel_context_create_request(ce); + rq = i915_request_create(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out_batch; @@ -170,11 +187,9 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, if (unlikely(err)) goto out_request; - i915_vma_lock(vma); err = move_obj_to_gpu(vma->obj, rq, true); if (err == 0) err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); if (unlikely(err)) goto out_request; @@ -193,8 +208,18 @@ out_request: i915_request_add(rq); out_batch: intel_emit_vma_release(ce, batch); -out_unpin: +out_vma: i915_vma_unpin(vma); +out_ctx: + intel_context_unpin(ce); +out: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + intel_engine_pm_put(ce->engine); return err; } @@ -210,6 +235,7 @@ static bool wa_1209644611_applies(struct drm_i915_private *i915, u32 size) } struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, + struct i915_gem_ww_ctx *ww, struct i915_vma *src, struct i915_vma *dst) { @@ -236,10 +262,24 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, goto out_pm; } + err = i915_gem_object_lock(pool->obj, ww); + if (err) + goto out_put; + + batch = i915_vma_instance(pool->obj, ce->vm, NULL); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_put; + } + + err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER); + if (unlikely(err)) + goto out_put; + cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto out_put; + goto out_unpin; } rem = src->size; @@ -296,20 +336,11 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, i915_gem_object_unpin_map(pool->obj); intel_gt_chipset_flush(ce->vm->gt); - - batch = i915_vma_instance(pool->obj, ce->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_put; - } - - err = i915_vma_pin(batch, 0, 0, PIN_USER); - if (unlikely(err)) - goto out_put; - batch->private = pool; return batch; +out_unpin: + i915_vma_unpin(batch); out_put: intel_gt_buffer_pool_put(pool); out_pm: @@ -321,10 +352,9 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, struct drm_i915_gem_object *dst, struct intel_context *ce) { - struct drm_gem_object *objs[] = { &src->base, &dst->base }; struct i915_address_space *vm = ce->vm; struct i915_vma *vma[2], *batch; - struct ww_acquire_ctx acquire; + struct i915_gem_ww_ctx ww; struct i915_request *rq; int err, i; @@ -332,25 +362,36 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, if (IS_ERR(vma[0])) return PTR_ERR(vma[0]); - err = i915_vma_pin(vma[0], 0, 0, PIN_USER); - if (unlikely(err)) - return err; - vma[1] = i915_vma_instance(dst, vm, NULL); if (IS_ERR(vma[1])) - goto out_unpin_src; + return PTR_ERR(vma[1]); - err = i915_vma_pin(vma[1], 0, 0, PIN_USER); + i915_gem_ww_ctx_init(&ww, true); + intel_engine_pm_get(ce->engine); +retry: + err = i915_gem_object_lock(src, &ww); + if (!err) + err = i915_gem_object_lock(dst, &ww); + if (!err) + err = intel_context_pin_ww(ce, &ww); + if (err) + goto out; + + err = i915_vma_pin_ww(vma[0], &ww, 0, 0, PIN_USER); + if (err) + goto out_ctx; + + err = i915_vma_pin_ww(vma[1], &ww, 0, 0, PIN_USER); if (unlikely(err)) goto out_unpin_src; - batch = intel_emit_vma_copy_blt(ce, vma[0], vma[1]); + batch = intel_emit_vma_copy_blt(ce, &ww, vma[0], vma[1]); if (IS_ERR(batch)) { err = PTR_ERR(batch); goto out_unpin_dst; } - rq = intel_context_create_request(ce); + rq = i915_request_create(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out_batch; @@ -360,14 +401,10 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, if (unlikely(err)) goto out_request; - err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire); - if (unlikely(err)) - goto out_request; - for (i = 0; i < ARRAY_SIZE(vma); i++) { err = move_obj_to_gpu(vma[i]->obj, rq, i); if (unlikely(err)) - goto out_unlock; + goto out_request; } for (i = 0; i < ARRAY_SIZE(vma); i++) { @@ -375,20 +412,19 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, err = i915_vma_move_to_active(vma[i], rq, flags); if (unlikely(err)) - goto out_unlock; + goto out_request; } if (rq->engine->emit_init_breadcrumb) { err = rq->engine->emit_init_breadcrumb(rq); if (unlikely(err)) - goto out_unlock; + goto out_request; } err = rq->engine->emit_bb_start(rq, batch->node.start, batch->node.size, 0); -out_unlock: - drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire); + out_request: if (unlikely(err)) i915_request_set_error_once(rq, err); @@ -400,6 +436,16 @@ out_unpin_dst: i915_vma_unpin(vma[1]); out_unpin_src: i915_vma_unpin(vma[0]); +out_ctx: + intel_context_unpin(ce); +out: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + intel_engine_pm_put(ce->engine); return err; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h index 8bcd336a90dc..2409fdcccf0e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h @@ -13,12 +13,15 @@ #include "i915_vma.h" struct drm_i915_gem_object; +struct i915_gem_ww_ctx; struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, struct i915_vma *vma, + struct i915_gem_ww_ctx *ww, u32 value); struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, + struct i915_gem_ww_ctx *ww, struct i915_vma *src, struct i915_vma *dst); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 5335f799b548..d6711caa7f39 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -56,6 +56,8 @@ struct drm_i915_gem_object_ops { void (*truncate)(struct drm_i915_gem_object *obj); void (*writeback)(struct drm_i915_gem_object *obj); + int (*pread)(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pread *arg); int (*pwrite)(struct drm_i915_gem_object *obj, const struct drm_i915_gem_pwrite *arg); @@ -123,6 +125,15 @@ struct drm_i915_gem_object { struct list_head lut_list; spinlock_t lut_lock; /* guards lut_list */ + /** + * @obj_link: Link into @i915_gem_ww_ctx.obj_list + * + * When we lock this object through i915_gem_object_lock() with a + * context, we add it to the list to ensure we can unlock everything + * when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called. + */ + struct list_head obj_link; + /** Stolen memory for this object, instead of being backed by shmem. */ struct drm_mm_node *stolen; union { @@ -282,6 +293,7 @@ struct drm_i915_gem_object { } userptr; unsigned long scratch; + u64 encode; void *gvt_info; }; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 7050519c87a4..f60ca6dc911f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -162,8 +162,6 @@ static void unmap_object(struct drm_i915_gem_object *obj, void *ptr) { if (is_vmalloc_addr(ptr)) vunmap(ptr); - else - kunmap(kmap_to_page(ptr)); } struct sg_table * @@ -234,50 +232,40 @@ unlock: return err; } -static inline pte_t iomap_pte(resource_size_t base, - dma_addr_t offset, - pgprot_t prot) -{ - return pte_mkspecial(pfn_pte((base + offset) >> PAGE_SHIFT, prot)); -} - /* The 'mapping' part of i915_gem_object_pin_map() below */ -static void *i915_gem_object_map(struct drm_i915_gem_object *obj, - enum i915_map_type type) +static void *i915_gem_object_map_page(struct drm_i915_gem_object *obj, + enum i915_map_type type) { - unsigned long n_pte = obj->base.size >> PAGE_SHIFT; - struct sg_table *sgt = obj->mm.pages; - pte_t *stack[32], **mem; - struct vm_struct *area; + unsigned long n_pages = obj->base.size >> PAGE_SHIFT, i; + struct page *stack[32], **pages = stack, *page; + struct sgt_iter iter; pgprot_t pgprot; - - if (!i915_gem_object_has_struct_page(obj) && type != I915_MAP_WC) - return NULL; - - /* A single page can always be kmapped */ - if (n_pte == 1 && type == I915_MAP_WB) - return kmap(sg_page(sgt->sgl)); - - mem = stack; - if (n_pte > ARRAY_SIZE(stack)) { - /* Too big for stack -- allocate temporary array instead */ - mem = kvmalloc_array(n_pte, sizeof(*mem), GFP_KERNEL); - if (!mem) - return NULL; - } - - area = alloc_vm_area(obj->base.size, mem); - if (!area) { - if (mem != stack) - kvfree(mem); - return NULL; - } + void *vaddr; switch (type) { default: MISSING_CASE(type); - /* fallthrough - to use PAGE_KERNEL anyway */ + fallthrough; /* to use PAGE_KERNEL anyway */ case I915_MAP_WB: + /* + * On 32b, highmem using a finite set of indirect PTE (i.e. + * vmap) to provide virtual mappings of the high pages. + * As these are finite, map_new_virtual() must wait for some + * other kmap() to finish when it runs out. If we map a large + * number of objects, there is no method for it to tell us + * to release the mappings, and we deadlock. + * + * However, if we make an explicit vmap of the page, that + * uses a larger vmalloc arena, and also has the ability + * to tell us to release unwanted mappings. Most importantly, + * it will fail and propagate an error instead of waiting + * forever. + * + * So if the page is beyond the 32b boundary, make an explicit + * vmap. + */ + if (n_pages == 1 && !PageHighMem(sg_page(obj->mm.pages->sgl))) + return page_address(sg_page(obj->mm.pages->sgl)); pgprot = PAGE_KERNEL; break; case I915_MAP_WC: @@ -285,30 +273,50 @@ static void *i915_gem_object_map(struct drm_i915_gem_object *obj, break; } - if (i915_gem_object_has_struct_page(obj)) { - struct sgt_iter iter; - struct page *page; - pte_t **ptes = mem; + if (n_pages > ARRAY_SIZE(stack)) { + /* Too big for stack -- allocate temporary array instead */ + pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL); + if (!pages) + return NULL; + } - for_each_sgt_page(page, iter, sgt) - **ptes++ = mk_pte(page, pgprot); - } else { - resource_size_t iomap; - struct sgt_iter iter; - pte_t **ptes = mem; - dma_addr_t addr; + i = 0; + for_each_sgt_page(page, iter, obj->mm.pages) + pages[i++] = page; + vaddr = vmap(pages, n_pages, 0, pgprot); + if (pages != stack) + kvfree(pages); + return vaddr; +} - iomap = obj->mm.region->iomap.base; - iomap -= obj->mm.region->region.start; +static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj, + enum i915_map_type type) +{ + resource_size_t iomap = obj->mm.region->iomap.base - + obj->mm.region->region.start; + unsigned long n_pfn = obj->base.size >> PAGE_SHIFT; + unsigned long stack[32], *pfns = stack, i; + struct sgt_iter iter; + dma_addr_t addr; + void *vaddr; + + if (type != I915_MAP_WC) + return NULL; - for_each_sgt_daddr(addr, iter, sgt) - **ptes++ = iomap_pte(iomap, addr, pgprot); + if (n_pfn > ARRAY_SIZE(stack)) { + /* Too big for stack -- allocate temporary array instead */ + pfns = kvmalloc_array(n_pfn, sizeof(*pfns), GFP_KERNEL); + if (!pfns) + return NULL; } - if (mem != stack) - kvfree(mem); - - return area->addr; + i = 0; + for_each_sgt_daddr(addr, iter, obj->mm.pages) + pfns[i++] = (iomap + addr) >> PAGE_SHIFT; + vaddr = vmap_pfn(pfns, n_pfn, pgprot_writecombine(PAGE_KERNEL_IO)); + if (pfns != stack) + kvfree(pfns); + return vaddr; } /* get, pin, and map the pages of the object into kernel space */ @@ -360,7 +368,13 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, } if (!ptr) { - ptr = i915_gem_object_map(obj, type); + if (GEM_WARN_ON(type == I915_MAP_WC && + !static_cpu_has(X86_FEATURE_PAT))) + ptr = NULL; + else if (i915_gem_object_has_struct_page(obj)) + ptr = i915_gem_object_map_page(obj, type); + else + ptr = i915_gem_object_map_pfn(obj, type); if (!ptr) { err = -ENOMEM; goto err_unpin; @@ -548,6 +562,20 @@ i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) return nth_page(sg_page(sg), offset); } +/* Like i915_gem_object_get_page(), but mark the returned page dirty */ +struct page * +i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, + unsigned int n) +{ + struct page *page; + + page = i915_gem_object_get_page(obj, n); + if (!obj->mm.dirty) + set_page_dirty(page); + + return page; +} + dma_addr_t i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, unsigned long n, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index 28147aab47b9..3a4dfe2ef1da 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -134,6 +134,58 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, vaddr, dma); } +static int +phys_pwrite(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pwrite *args) +{ + void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset; + char __user *user_data = u64_to_user_ptr(args->data_ptr); + int err; + + err = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT); + if (err) + return err; + + /* + * We manually control the domain here and pretend that it + * remains coherent i.e. in the GTT domain, like shmem_pwrite. + */ + i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); + + if (copy_from_user(vaddr, user_data, args->size)) + return -EFAULT; + + drm_clflush_virt_range(vaddr, args->size); + intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); + + i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); + return 0; +} + +static int +phys_pread(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pread *args) +{ + void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset; + char __user *user_data = u64_to_user_ptr(args->data_ptr); + int err; + + err = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); + if (err) + return err; + + drm_clflush_virt_range(vaddr, args->size); + if (copy_to_user(user_data, vaddr, args->size)) + return -EFAULT; + + return 0; +} + static void phys_release(struct drm_i915_gem_object *obj) { fput(obj->base.filp); @@ -144,6 +196,9 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { .get_pages = i915_gem_object_get_pages_phys, .put_pages = i915_gem_object_put_pages_phys, + .pread = phys_pread, + .pwrite = phys_pwrite, + .release = phys_release, }; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 3d215164dd5a..40d3e40500fa 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -84,7 +84,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) spin_unlock_irqrestore(&i915->mm.obj_lock, flags); - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); drm_WARN_ON(&i915->drm, i915_gem_object_set_to_gtt_domain(obj, false)); i915_gem_object_unlock(obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 38113d3c0138..75e8b71c18b9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -258,8 +258,8 @@ shmem_writeback(struct drm_i915_gem_object *obj) for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) { struct page *page; - page = find_lock_entry(mapping, i); - if (!page || xa_is_value(page)) + page = find_lock_page(mapping, i); + if (!page) continue; if (!page_mapped(page) && clear_page_dirty_for_io(page)) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index e0f21f12d3ce..84b2707d8b17 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -53,8 +53,10 @@ int i915_gem_stolen_insert_node(struct drm_i915_private *i915, struct drm_mm_node *node, u64 size, unsigned alignment) { - return i915_gem_stolen_insert_node_in_range(i915, node, size, - alignment, 0, U64_MAX); + return i915_gem_stolen_insert_node_in_range(i915, node, + size, alignment, + I915_GEM_STOLEN_BIAS, + U64_MAX); } void i915_gem_stolen_remove_node(struct drm_i915_private *i915, @@ -249,7 +251,7 @@ static void vlv_get_stolen_reserved(struct drm_i915_private *i915, switch (reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK) { default: MISSING_CASE(reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK); - /* fall through */ + fallthrough; case GEN7_STOLEN_RESERVED_1M: *size = 1024 * 1024; break; @@ -416,7 +418,7 @@ static int i915_gem_init_stolen(struct drm_i915_private *i915) case 4: if (!IS_G4X(i915)) break; - /* fall through */ + fallthrough; case 5: g4x_get_stolen_reserved(i915, uncore, &reserved_base, &reserved_size); @@ -445,7 +447,7 @@ static int i915_gem_init_stolen(struct drm_i915_private *i915) break; default: MISSING_CASE(INTEL_GEN(i915)); - /* fall-through */ + fallthrough; case 11: case 12: icl_get_stolen_reserved(i915, uncore, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h index e15c0adad8af..61e028063f9f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h @@ -30,4 +30,6 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv resource_size_t stolen_offset, resource_size_t size); +#define I915_GEM_STOLEN_BIAS SZ_128K + #endif /* __I915_GEM_STOLEN_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c index 540ef0551789..1929d6cf4150 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c @@ -9,6 +9,7 @@ #include <drm/drm_file.h> #include "i915_drv.h" +#include "i915_gem_context.h" #include "i915_gem_ioctls.h" #include "i915_gem_object.h" @@ -35,9 +36,10 @@ int i915_gem_throttle_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + const unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; struct drm_i915_file_private *file_priv = file->driver_priv; - unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; - struct i915_request *request, *target = NULL; + struct i915_gem_context *ctx; + unsigned long idx; long ret; /* ABI: return -EIO if already wedged */ @@ -45,27 +47,54 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data, if (ret) return ret; - spin_lock(&file_priv->mm.lock); - list_for_each_entry(request, &file_priv->mm.request_list, client_link) { - if (time_after_eq(request->emitted_jiffies, recent_enough)) - break; + rcu_read_lock(); + xa_for_each(&file_priv->context_xa, idx, ctx) { + struct i915_gem_engines_iter it; + struct intel_context *ce; - if (target && xchg(&target->file_priv, NULL)) - list_del(&target->client_link); + if (!kref_get_unless_zero(&ctx->ref)) + continue; + rcu_read_unlock(); - target = request; - } - if (target) - i915_request_get(target); - spin_unlock(&file_priv->mm.lock); + for_each_gem_engine(ce, + i915_gem_context_lock_engines(ctx), + it) { + struct i915_request *rq, *target = NULL; + + if (!ce->timeline) + continue; + + mutex_lock(&ce->timeline->mutex); + list_for_each_entry_reverse(rq, + &ce->timeline->requests, + link) { + if (i915_request_completed(rq)) + break; - if (!target) - return 0; + if (time_after(rq->emitted_jiffies, + recent_enough)) + continue; - ret = i915_request_wait(target, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - i915_request_put(target); + target = i915_request_get(rq); + break; + } + mutex_unlock(&ce->timeline->mutex); + if (!target) + continue; + + ret = i915_request_wait(target, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); + i915_request_put(target); + if (ret < 0) + break; + } + i915_gem_context_unlock_engines(ctx); + i915_gem_context_put(ctx); + + rcu_read_lock(); + } + rcu_read_unlock(); return ret < 0 ? ret : 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c index ff72ee2fd9cd..ffcaee74a249 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -249,7 +249,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, * whilst executing a fenced command for an untiled object. */ - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); if (i915_gem_object_is_framebuffer(obj)) { i915_gem_object_unlock(obj); return -EBUSY; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 2c2bf24140c9..f2eaed6aca3d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -403,6 +403,7 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj, unsigned int max_segment = i915_sg_segment_size(); struct sg_table *st; unsigned int sg_page_sizes; + struct scatterlist *sg; int ret; st = kmalloc(sizeof(*st), GFP_KERNEL); @@ -410,13 +411,12 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj, return ERR_PTR(-ENOMEM); alloc_table: - ret = __sg_alloc_table_from_pages(st, pvec, num_pages, - 0, num_pages << PAGE_SHIFT, - max_segment, - GFP_KERNEL); - if (ret) { + sg = __sg_alloc_table_from_pages(st, pvec, num_pages, 0, + num_pages << PAGE_SHIFT, max_segment, + NULL, 0, GFP_KERNEL); + if (IS_ERR(sg)) { kfree(st); - return ERR_PTR(ret); + return ERR_CAST(sg); } ret = i915_gem_gtt_prepare_pages(obj, st); @@ -596,14 +596,6 @@ static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); - /* - * Using __get_user_pages_fast() with a read-only - * access is questionable. A read-only page may be - * COW-broken, and then this might end up giving - * the wrong side of the COW.. - * - * We may or may not care. - */ if (pvec) { /* defer to worker if malloc fails */ if (!i915_gem_object_is_readonly(obj)) diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 8291ede6902c..1f35e71429b4 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -393,7 +393,7 @@ static int igt_mock_exhaust_device_supported_pages(void *arg) */ for (i = 1; i < BIT(ARRAY_SIZE(page_sizes)); i++) { - unsigned int combination = 0; + unsigned int combination = SZ_4K; /* Required for ppGTT */ for (j = 0; j < ARRAY_SIZE(page_sizes); j++) { if (i & BIT(j)) @@ -947,7 +947,7 @@ static int gpu_write(struct intel_context *ce, { int err; - i915_gem_object_lock(vma->obj); + i915_gem_object_lock(vma->obj, NULL); err = i915_gem_object_set_to_gtt_domain(vma->obj, true); i915_gem_object_unlock(vma->obj); if (err) @@ -964,9 +964,10 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val) unsigned long n; int err; + i915_gem_object_lock(obj, NULL); err = i915_gem_object_prepare_read(obj, &needs_flush); if (err) - return err; + goto err_unlock; for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) { u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n)); @@ -986,6 +987,8 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val) } i915_gem_object_finish_access(obj); +err_unlock: + i915_gem_object_unlock(obj); return err; } @@ -1614,7 +1617,7 @@ int i915_gem_huge_page_mock_selftests(void) out_put: i915_vm_put(&ppgtt->vm); out_unlock: - drm_dev_put(&dev_priv->drm); + mock_destroy_device(dev_priv); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index 299c29e9ad86..4e36d4897ea6 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -75,7 +75,7 @@ static int __igt_client_fill(struct intel_engine_cs *engine) if (err) goto err_unpin; - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); err = i915_gem_object_set_to_cpu_domain(obj, false); i915_gem_object_unlock(obj); if (err) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 87d7d8aa080f..7049a6bbc03d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -27,9 +27,10 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v) u32 *cpu; int err; + i915_gem_object_lock(ctx->obj, NULL); err = i915_gem_object_prepare_write(ctx->obj, &needs_clflush); if (err) - return err; + goto out; page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); map = kmap_atomic(page); @@ -46,7 +47,9 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v) kunmap_atomic(map); i915_gem_object_finish_access(ctx->obj); - return 0; +out: + i915_gem_object_unlock(ctx->obj); + return err; } static int cpu_get(struct context *ctx, unsigned long offset, u32 *v) @@ -57,9 +60,10 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v) u32 *cpu; int err; + i915_gem_object_lock(ctx->obj, NULL); err = i915_gem_object_prepare_read(ctx->obj, &needs_clflush); if (err) - return err; + goto out; page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); map = kmap_atomic(page); @@ -73,7 +77,9 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v) kunmap_atomic(map); i915_gem_object_finish_access(ctx->obj); - return 0; +out: + i915_gem_object_unlock(ctx->obj); + return err; } static int gtt_set(struct context *ctx, unsigned long offset, u32 v) @@ -82,7 +88,7 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v) u32 __iomem *map; int err = 0; - i915_gem_object_lock(ctx->obj); + i915_gem_object_lock(ctx->obj, NULL); err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); i915_gem_object_unlock(ctx->obj); if (err) @@ -115,7 +121,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v) u32 __iomem *map; int err = 0; - i915_gem_object_lock(ctx->obj); + i915_gem_object_lock(ctx->obj, NULL); err = i915_gem_object_set_to_gtt_domain(ctx->obj, false); i915_gem_object_unlock(ctx->obj); if (err) @@ -147,7 +153,7 @@ static int wc_set(struct context *ctx, unsigned long offset, u32 v) u32 *map; int err; - i915_gem_object_lock(ctx->obj); + i915_gem_object_lock(ctx->obj, NULL); err = i915_gem_object_set_to_wc_domain(ctx->obj, true); i915_gem_object_unlock(ctx->obj); if (err) @@ -170,7 +176,7 @@ static int wc_get(struct context *ctx, unsigned long offset, u32 *v) u32 *map; int err; - i915_gem_object_lock(ctx->obj); + i915_gem_object_lock(ctx->obj, NULL); err = i915_gem_object_set_to_wc_domain(ctx->obj, false); i915_gem_object_unlock(ctx->obj); if (err) @@ -193,27 +199,27 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v) u32 *cs; int err; - i915_gem_object_lock(ctx->obj); + i915_gem_object_lock(ctx->obj, NULL); err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); - i915_gem_object_unlock(ctx->obj); if (err) - return err; + goto out_unlock; vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0); - if (IS_ERR(vma)) - return PTR_ERR(vma); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_unlock; + } rq = intel_engine_create_kernel_request(ctx->engine); if (IS_ERR(rq)) { - i915_vma_unpin(vma); - return PTR_ERR(rq); + err = PTR_ERR(rq); + goto out_unpin; } cs = intel_ring_begin(rq, 4); if (IS_ERR(cs)) { - i915_request_add(rq); - i915_vma_unpin(vma); - return PTR_ERR(cs); + err = PTR_ERR(cs); + goto out_rq; } if (INTEL_GEN(ctx->engine->i915) >= 8) { @@ -234,14 +240,16 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v) } intel_ring_advance(rq, cs); - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, true); if (err == 0) err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); - i915_vma_unpin(vma); +out_rq: i915_request_add(rq); +out_unpin: + i915_vma_unpin(vma); +out_unlock: + i915_gem_object_unlock(ctx->obj); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 7ffc3c751432..d3f87dc4eda3 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -461,9 +461,10 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) unsigned int n, m, need_flush; int err; + i915_gem_object_lock(obj, NULL); err = i915_gem_object_prepare_write(obj, &need_flush); if (err) - return err; + goto out; for (n = 0; n < real_page_count(obj); n++) { u32 *map; @@ -479,7 +480,9 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) i915_gem_object_finish_access(obj); obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; obj->write_domain = 0; - return 0; +out: + i915_gem_object_unlock(obj); + return err; } static noinline int cpu_check(struct drm_i915_gem_object *obj, @@ -488,9 +491,10 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj, unsigned int n, m, needs_flush; int err; + i915_gem_object_lock(obj, NULL); err = i915_gem_object_prepare_read(obj, &needs_flush); if (err) - return err; + goto out_unlock; for (n = 0; n < real_page_count(obj); n++) { u32 *map; @@ -527,6 +531,8 @@ out_unmap: } i915_gem_object_finish_access(obj); +out_unlock: + i915_gem_object_unlock(obj); return err; } @@ -887,24 +893,15 @@ out_file: return err; } -static struct i915_vma *rpcs_query_batch(struct i915_vma *vma) +static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, struct i915_vma *vma) { - struct drm_i915_gem_object *obj; u32 *cmd; - int err; - if (INTEL_GEN(vma->vm->i915) < 8) - return ERR_PTR(-EINVAL); + GEM_BUG_ON(INTEL_GEN(vma->vm->i915) < 8); - obj = i915_gem_object_create_internal(vma->vm->i915, PAGE_SIZE); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err; - } + cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB); + if (IS_ERR(cmd)) + return PTR_ERR(cmd); *cmd++ = MI_STORE_REGISTER_MEM_GEN8; *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE); @@ -912,26 +909,12 @@ static struct i915_vma *rpcs_query_batch(struct i915_vma *vma) *cmd++ = upper_32_bits(vma->node.start); *cmd = MI_BATCH_BUFFER_END; - __i915_gem_object_flush_map(obj, 0, 64); - i915_gem_object_unpin_map(obj); + __i915_gem_object_flush_map(rpcs, 0, 64); + i915_gem_object_unpin_map(rpcs); intel_gt_chipset_flush(vma->vm->gt); - vma = i915_vma_instance(obj, vma->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - goto err; - - return vma; - -err: - i915_gem_object_put(obj); - return ERR_PTR(err); + return 0; } static int @@ -939,52 +922,68 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, struct intel_context *ce, struct i915_request **rq_out) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_request *rq; + struct i915_gem_ww_ctx ww; struct i915_vma *batch; struct i915_vma *vma; + struct drm_i915_gem_object *rpcs; int err; GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); + if (INTEL_GEN(i915) < 8) + return -EINVAL; + vma = i915_vma_instance(obj, ce->vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_gtt_domain(obj, false); - i915_gem_object_unlock(obj); - if (err) - return err; - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - return err; + rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(rpcs)) + return PTR_ERR(rpcs); - batch = rpcs_query_batch(vma); + batch = i915_vma_instance(rpcs, ce->vm, NULL); if (IS_ERR(batch)) { err = PTR_ERR(batch); - goto err_vma; + goto err_put; } + i915_gem_ww_ctx_init(&ww, false); +retry: + err = i915_gem_object_lock(obj, &ww); + if (!err) + err = i915_gem_object_lock(rpcs, &ww); + if (!err) + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (!err) + err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); + if (err) + goto err_put; + + err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER); + if (err) + goto err_vma; + + err = rpcs_query_batch(rpcs, vma); + if (err) + goto err_batch; + rq = i915_request_create(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_batch; } - i915_vma_lock(batch); err = i915_request_await_object(rq, batch->obj, false); if (err == 0) err = i915_vma_move_to_active(batch, rq, 0); - i915_vma_unlock(batch); if (err) goto skip_request; - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, true); if (err == 0) err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); if (err) goto skip_request; @@ -1000,23 +999,24 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, if (err) goto skip_request; - i915_vma_unpin_and_release(&batch, 0); - i915_vma_unpin(vma); - *rq_out = i915_request_get(rq); - i915_request_add(rq); - - return 0; - skip_request: - i915_request_set_error_once(rq, err); + if (err) + i915_request_set_error_once(rq, err); i915_request_add(rq); err_batch: - i915_vma_unpin_and_release(&batch, 0); + i915_vma_unpin(batch); err_vma: i915_vma_unpin(vma); - +err_put: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + i915_gem_object_put(rpcs); return err; } @@ -1709,7 +1709,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, i915_request_add(rq); - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); err = i915_gem_object_set_to_cpu_domain(obj, false); i915_gem_object_unlock(obj); if (err) @@ -1748,7 +1748,7 @@ static int check_scratch_page(struct i915_gem_context *ctx, u32 *out) if (!vm) return -ENODEV; - page = vm->scratch[0].base.page; + page = __px_page(vm->scratch[0]); if (!page) { pr_err("No scratch page!\n"); return -EINVAL; @@ -1914,8 +1914,8 @@ static int mock_context_barrier(void *arg) return -ENOMEM; counter = 0; - err = context_barrier_task(ctx, 0, - NULL, NULL, mock_barrier_task, &counter); + err = context_barrier_task(ctx, 0, NULL, NULL, NULL, + mock_barrier_task, &counter); if (err) { pr_err("Failed at line %d, err=%d\n", __LINE__, err); goto out; @@ -1927,11 +1927,8 @@ static int mock_context_barrier(void *arg) } counter = 0; - err = context_barrier_task(ctx, ALL_ENGINES, - skip_unused_engines, - NULL, - mock_barrier_task, - &counter); + err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines, + NULL, NULL, mock_barrier_task, &counter); if (err) { pr_err("Failed at line %d, err=%d\n", __LINE__, err); goto out; @@ -1951,8 +1948,8 @@ static int mock_context_barrier(void *arg) counter = 0; context_barrier_inject_fault = BIT(RCS0); - err = context_barrier_task(ctx, ALL_ENGINES, - NULL, NULL, mock_barrier_task, &counter); + err = context_barrier_task(ctx, ALL_ENGINES, NULL, NULL, NULL, + mock_barrier_task, &counter); context_barrier_inject_fault = 0; if (err == -ENXIO) err = 0; @@ -1966,11 +1963,8 @@ static int mock_context_barrier(void *arg) goto out; counter = 0; - err = context_barrier_task(ctx, ALL_ENGINES, - skip_unused_engines, - NULL, - mock_barrier_task, - &counter); + err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines, + NULL, NULL, mock_barrier_task, &counter); if (err) { pr_err("Failed at line %d, err=%d\n", __LINE__, err); goto out; @@ -2003,7 +1997,7 @@ int i915_gem_context_mock_selftests(void) err = i915_subtests(tests, i915); - drm_dev_put(&i915->drm); + mock_destroy_device(i915); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index 2a52b92586b9..0845ce1ae37c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -272,7 +272,7 @@ int i915_gem_dmabuf_mock_selftests(void) err = i915_subtests(tests, i915); - drm_dev_put(&i915->drm); + mock_destroy_device(i915); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c index 57c14d3340cd..e1d50a5a1477 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c @@ -32,37 +32,39 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb, if (IS_ERR(vma)) return PTR_ERR(vma); - err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH); + err = i915_gem_object_lock(obj, &eb->ww); + if (err) + return err; + + err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, PIN_USER | PIN_HIGH); if (err) return err; /* 8-Byte aligned */ err = __reloc_entry_gpu(eb, vma, offsets[0] * sizeof(u32), 0); - if (err) - goto unpin_vma; + if (err <= 0) + goto reloc_err; /* !8-Byte aligned */ err = __reloc_entry_gpu(eb, vma, offsets[1] * sizeof(u32), 1); - if (err) - goto unpin_vma; + if (err <= 0) + goto reloc_err; /* Skip to the end of the cmd page */ - i = PAGE_SIZE / sizeof(u32) - RELOC_TAIL - 1; + i = PAGE_SIZE / sizeof(u32) - 1; i -= eb->reloc_cache.rq_size; memset32(eb->reloc_cache.rq_cmd + eb->reloc_cache.rq_size, MI_NOOP, i); eb->reloc_cache.rq_size += i; - /* Force batch chaining */ + /* Force next batch */ err = __reloc_entry_gpu(eb, vma, offsets[2] * sizeof(u32), 2); - if (err) - goto unpin_vma; + if (err <= 0) + goto reloc_err; GEM_BUG_ON(!eb->reloc_cache.rq); rq = i915_request_get(eb->reloc_cache.rq); - err = reloc_gpu_flush(&eb->reloc_cache); - if (err) - goto put_rq; + reloc_gpu_flush(eb, &eb->reloc_cache); GEM_BUG_ON(eb->reloc_cache.rq); err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, HZ / 2); @@ -94,6 +96,11 @@ put_rq: unpin_vma: i915_vma_unpin(vma); return err; + +reloc_err: + if (!err) + err = -EIO; + goto unpin_vma; } static int igt_gpu_reloc(void *arg) @@ -115,6 +122,8 @@ static int igt_gpu_reloc(void *arg) goto err_scratch; } + intel_gt_pm_get(&eb.i915->gt); + for_each_uabi_engine(eb.engine, eb.i915) { reloc_cache_init(&eb.reloc_cache, eb.i915); memset(map, POISON_INUSE, 4096); @@ -125,15 +134,29 @@ static int igt_gpu_reloc(void *arg) err = PTR_ERR(eb.context); goto err_pm; } + eb.reloc_pool = NULL; + eb.reloc_context = NULL; - err = intel_context_pin(eb.context); - if (err) - goto err_put; + i915_gem_ww_ctx_init(&eb.ww, false); +retry: + err = intel_context_pin_ww(eb.context, &eb.ww); + if (!err) { + err = __igt_gpu_reloc(&eb, scratch); + + intel_context_unpin(eb.context); + } + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&eb.ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&eb.ww); - err = __igt_gpu_reloc(&eb, scratch); + if (eb.reloc_pool) + intel_gt_buffer_pool_put(eb.reloc_pool); + if (eb.reloc_context) + intel_context_put(eb.reloc_context); - intel_context_unpin(eb.context); -err_put: intel_context_put(eb.context); err_pm: intel_engine_pm_put(eb.engine); @@ -144,6 +167,7 @@ err_pm: if (igt_flush_test(eb.i915)) err = -EIO; + intel_gt_pm_put(&eb.i915->gt); err_scratch: i915_gem_object_put(scratch); return err; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 9c7402ce5bf9..d27d87a678c8 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -103,7 +103,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling); GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride); - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); err = i915_gem_object_set_to_gtt_domain(obj, true); i915_gem_object_unlock(obj); if (err) { @@ -188,7 +188,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj, GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling); GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride); - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); err = i915_gem_object_set_to_gtt_domain(obj, true); i915_gem_object_unlock(obj); if (err) { @@ -528,31 +528,42 @@ static int make_obj_busy(struct drm_i915_gem_object *obj) for_each_uabi_engine(engine, i915) { struct i915_request *rq; struct i915_vma *vma; + struct i915_gem_ww_ctx ww; int err; vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); - err = i915_vma_pin(vma, 0, 0, PIN_USER); + i915_gem_ww_ctx_init(&ww, false); +retry: + err = i915_gem_object_lock(obj, &ww); + if (!err) + err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); if (err) - return err; + goto err; rq = intel_engine_create_kernel_request(engine); if (IS_ERR(rq)) { - i915_vma_unpin(vma); - return PTR_ERR(rq); + err = PTR_ERR(rq); + goto err_unpin; } - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, true); if (err == 0) err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); i915_request_add(rq); +err_unpin: i915_vma_unpin(vma); +err: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); if (err) return err; } @@ -1123,6 +1134,7 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915, for_each_uabi_engine(engine, i915) { struct i915_request *rq; struct i915_vma *vma; + struct i915_gem_ww_ctx ww; vma = i915_vma_instance(obj, engine->kernel_context->vm, NULL); if (IS_ERR(vma)) { @@ -1130,9 +1142,13 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915, goto out_unmap; } - err = i915_vma_pin(vma, 0, 0, PIN_USER); + i915_gem_ww_ctx_init(&ww, false); +retry: + err = i915_gem_object_lock(obj, &ww); + if (!err) + err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); if (err) - goto out_unmap; + goto out_ww; rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) { @@ -1140,11 +1156,9 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915, goto out_unpin; } - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, false); if (err == 0) err = i915_vma_move_to_active(vma, rq, 0); - i915_vma_unlock(vma); err = engine->emit_bb_start(rq, vma->node.start, 0, 0); i915_request_get(rq); @@ -1166,6 +1180,13 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915, out_unpin: i915_vma_unpin(vma); +out_ww: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); if (err) goto out_unmap; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c index faa5b6d91795..bf853c40ec65 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c @@ -85,7 +85,7 @@ int i915_gem_object_mock_selftests(void) err = i915_subtests(tests, i915); - drm_dev_put(&i915->drm); + mock_destroy_device(i915); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c index 34932871b3a5..8cee68c6a6dc 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c @@ -44,7 +44,7 @@ static int mock_phys_object(void *arg) } /* Make the object dirty so that put_pages must do copy back the data */ - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); err = i915_gem_object_set_to_gtt_domain(obj, true); i915_gem_object_unlock(obj); if (err) { @@ -73,6 +73,6 @@ int i915_gem_phys_mock_selftests(void) err = i915_subtests(tests, i915); - drm_dev_put(&i915->drm); + mock_destroy_device(i915); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c index debaf7b18ab5..be30b27e2926 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c @@ -28,10 +28,9 @@ static struct sg_table *mock_map_dma_buf(struct dma_buf_attachment *attachment, sg = sg_next(sg); } - if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) { - err = -ENOMEM; + err = dma_map_sgtable(attachment->dev, st, dir, 0); + if (err) goto err_st; - } return st; @@ -46,7 +45,7 @@ static void mock_unmap_dma_buf(struct dma_buf_attachment *attachment, struct sg_table *st, enum dma_data_direction dir) { - dma_unmap_sg(attachment->dev, st->sgl, st->nents, dir); + dma_unmap_sgtable(attachment->dev, st, dir, 0); sg_free_table(st); kfree(st); } diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index cdc0b9c54305..c30adc05fa98 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -16,8 +16,10 @@ static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt, const unsigned int pde, const struct i915_page_table *pt) { + dma_addr_t addr = pt ? px_dma(pt) : px_dma(ppgtt->base.vm.scratch[1]); + /* Caller needs to make sure the write completes if necessary */ - iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID, + iowrite32(GEN6_PDE_ADDR_ENCODE(addr) | GEN6_PDE_VALID, ppgtt->pd_addr + pde); } @@ -79,7 +81,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, { struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); const unsigned int first_entry = start / I915_GTT_PAGE_SIZE; - const gen6_pte_t scratch_pte = vm->scratch[0].encode; + const gen6_pte_t scratch_pte = vm->scratch[0]->encode; unsigned int pde = first_entry / GEN6_PTES; unsigned int pte = first_entry % GEN6_PTES; unsigned int num_entries = length / I915_GTT_PAGE_SIZE; @@ -90,8 +92,6 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, const unsigned int count = min(num_entries, GEN6_PTES - pte); gen6_pte_t *vaddr; - GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1])); - num_entries -= count; GEM_BUG_ON(count > atomic_read(&pt->used)); @@ -127,7 +127,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, struct sgt_dma iter = sgt_dma(vma); gen6_pte_t *vaddr; - GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]); + GEM_BUG_ON(!pd->entry[act_pt]); vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt)); do { @@ -177,39 +177,36 @@ static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end) mutex_unlock(&ppgtt->flush); } -static int gen6_alloc_va_range(struct i915_address_space *vm, - u64 start, u64 length) +static void gen6_alloc_va_range(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + u64 start, u64 length) { struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); struct i915_page_directory * const pd = ppgtt->base.pd; - struct i915_page_table *pt, *alloc = NULL; + struct i915_page_table *pt; bool flush = false; u64 from = start; unsigned int pde; - int ret = 0; spin_lock(&pd->lock); gen6_for_each_pde(pt, pd, start, length, pde) { const unsigned int count = gen6_pte_count(start, length); - if (px_base(pt) == px_base(&vm->scratch[1])) { + if (!pt) { spin_unlock(&pd->lock); - pt = fetch_and_zero(&alloc); - if (!pt) - pt = alloc_pt(vm); - if (IS_ERR(pt)) { - ret = PTR_ERR(pt); - goto unwind_out; - } + pt = stash->pt[0]; + __i915_gem_object_pin_pages(pt->base); + i915_gem_object_make_unshrinkable(pt->base); - fill32_px(pt, vm->scratch[0].encode); + fill32_px(pt, vm->scratch[0]->encode); spin_lock(&pd->lock); - if (pd->entry[pde] == &vm->scratch[1]) { + if (!pd->entry[pde]) { + stash->pt[0] = pt->stash; + atomic_set(&pt->used, 0); pd->entry[pde] = pt; } else { - alloc = pt; pt = pd->entry[pde]; } @@ -226,53 +223,51 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, with_intel_runtime_pm(&vm->i915->runtime_pm, wakeref) gen6_flush_pd(ppgtt, from, start); } - - goto out; - -unwind_out: - gen6_ppgtt_clear_range(vm, from, start - from); -out: - if (alloc) - free_px(vm, alloc); - return ret; } static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt) { struct i915_address_space * const vm = &ppgtt->base.vm; - struct i915_page_directory * const pd = ppgtt->base.pd; int ret; - ret = setup_scratch_page(vm, __GFP_HIGHMEM); + ret = setup_scratch_page(vm); if (ret) return ret; - vm->scratch[0].encode = - vm->pte_encode(px_dma(&vm->scratch[0]), + vm->scratch[0]->encode = + vm->pte_encode(px_dma(vm->scratch[0]), I915_CACHE_NONE, PTE_READ_ONLY); - if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[1])))) { - cleanup_scratch_page(vm); - return -ENOMEM; + vm->scratch[1] = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); + if (IS_ERR(vm->scratch[1])) { + ret = PTR_ERR(vm->scratch[1]); + goto err_scratch0; } - fill32_px(&vm->scratch[1], vm->scratch[0].encode); - memset_p(pd->entry, &vm->scratch[1], I915_PDES); + ret = pin_pt_dma(vm, vm->scratch[1]); + if (ret) + goto err_scratch1; + + fill32_px(vm->scratch[1], vm->scratch[0]->encode); return 0; + +err_scratch1: + i915_gem_object_put(vm->scratch[1]); +err_scratch0: + i915_gem_object_put(vm->scratch[0]); + return ret; } static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt) { struct i915_page_directory * const pd = ppgtt->base.pd; - struct i915_page_dma * const scratch = - px_base(&ppgtt->base.vm.scratch[1]); struct i915_page_table *pt; u32 pde; gen6_for_all_pdes(pt, pd, pde) - if (px_base(pt) != scratch) - free_px(&ppgtt->base.vm, pt); + if (pt) + free_pt(&ppgtt->base.vm, pt); } static void gen6_ppgtt_cleanup(struct i915_address_space *vm) @@ -286,7 +281,8 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) mutex_destroy(&ppgtt->flush); mutex_destroy(&ppgtt->pin_mutex); - kfree(ppgtt->base.pd); + + free_pd(&ppgtt->base.vm, ppgtt->base.pd); } static int pd_vma_set_pages(struct i915_vma *vma) @@ -302,28 +298,26 @@ static void pd_vma_clear_pages(struct i915_vma *vma) vma->pages = NULL; } -static int pd_vma_bind(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 unused) +static void pd_vma_bind(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 unused) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); struct gen6_ppgtt *ppgtt = vma->private; u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE; - px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t); + ppgtt->pp_dir = ggtt_offset * sizeof(gen6_pte_t) << 10; ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset; gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total); - return 0; } static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma) { struct gen6_ppgtt *ppgtt = vma->private; struct i915_page_directory * const pd = ppgtt->base.pd; - struct i915_page_dma * const scratch = - px_base(&ppgtt->base.vm.scratch[1]); struct i915_page_table *pt; unsigned int pde; @@ -332,11 +326,11 @@ static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma) /* Free all no longer used page tables */ gen6_for_all_pdes(pt, ppgtt->base.pd, pde) { - if (px_base(pt) == scratch || atomic_read(&pt->used)) + if (!pt || atomic_read(&pt->used)) continue; - free_px(&ppgtt->base.vm, pt); - pd->entry[pde] = scratch; + free_pt(&ppgtt->base.vm, pt); + pd->entry[pde] = NULL; } ppgtt->scan_for_unused_pt = false; @@ -380,7 +374,7 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) return vma; } -int gen6_ppgtt_pin(struct i915_ppgtt *base) +int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww) { struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); int err; @@ -406,7 +400,7 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base) */ err = 0; if (!atomic_read(&ppgtt->pin_count)) - err = i915_ggtt_pin(ppgtt->vma, GEN6_PD_ALIGN, PIN_HIGH); + err = i915_ggtt_pin(ppgtt->vma, ww, GEN6_PD_ALIGN, PIN_HIGH); if (!err) atomic_inc(&ppgtt->pin_count); mutex_unlock(&ppgtt->pin_mutex); @@ -448,6 +442,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) mutex_init(&ppgtt->pin_mutex); ppgtt_init(&ppgtt->base, gt); + ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t)); ppgtt->base.vm.top = 1; ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND; @@ -456,9 +451,10 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries; ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup; + ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma; ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode; - ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd)); + ppgtt->base.pd = __alloc_pd(I915_PDES); if (!ppgtt->base.pd) { err = -ENOMEM; goto err_free; @@ -479,7 +475,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) err_scratch: free_scratch(&ppgtt->base.vm); err_pd: - kfree(ppgtt->base.pd); + free_pd(&ppgtt->base.vm, ppgtt->base.pd); err_free: mutex_destroy(&ppgtt->pin_mutex); kfree(ppgtt); diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h index 72e481806c96..3357228f3304 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h @@ -8,12 +8,15 @@ #include "intel_gtt.h" +struct i915_gem_ww_ctx; + struct gen6_ppgtt { struct i915_ppgtt base; struct mutex flush; struct i915_vma *vma; gen6_pte_t __iomem *pd_addr; + u32 pp_dir; atomic_t pin_count; struct mutex pin_mutex; @@ -66,7 +69,7 @@ static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base) (pt = i915_pt_entry(pd, iter), true); \ ++iter) -int gen6_ppgtt_pin(struct i915_ppgtt *base); +int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww); void gen6_ppgtt_unpin(struct i915_ppgtt *base); void gen6_ppgtt_unpin_all(struct i915_ppgtt *base); void gen6_ppgtt_enable(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 699125928272..38c7069b7749 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -181,7 +181,7 @@ static void __gen8_ppgtt_cleanup(struct i915_address_space *vm, } while (pde++, --count); } - free_px(vm, pd); + free_px(vm, &pd->pt, lvl); } static void gen8_ppgtt_cleanup(struct i915_address_space *vm) @@ -199,7 +199,7 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, struct i915_page_directory * const pd, u64 start, const u64 end, int lvl) { - const struct i915_page_scratch * const scratch = &vm->scratch[lvl]; + const struct drm_i915_gem_object * const scratch = vm->scratch[lvl]; unsigned int idx, len; GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); @@ -239,7 +239,7 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, vaddr = kmap_atomic_px(pt); memset64(vaddr + gen8_pd_index(start, 0), - vm->scratch[0].encode, + vm->scratch[0]->encode, count); kunmap_atomic(vaddr); @@ -248,7 +248,7 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, } if (release_pd_entry(pd, idx, pt, scratch)) - free_px(vm, pt); + free_px(vm, pt, lvl); } while (idx++, --len); return start; @@ -269,14 +269,12 @@ static void gen8_ppgtt_clear(struct i915_address_space *vm, start, start + length, vm->top); } -static int __gen8_ppgtt_alloc(struct i915_address_space * const vm, - struct i915_page_directory * const pd, - u64 * const start, const u64 end, int lvl) +static void __gen8_ppgtt_alloc(struct i915_address_space * const vm, + struct i915_vm_pt_stash *stash, + struct i915_page_directory * const pd, + u64 * const start, const u64 end, int lvl) { - const struct i915_page_scratch * const scratch = &vm->scratch[lvl]; - struct i915_page_table *alloc = NULL; unsigned int idx, len; - int ret = 0; GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); @@ -297,49 +295,31 @@ static int __gen8_ppgtt_alloc(struct i915_address_space * const vm, DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n", __func__, vm, lvl + 1, idx); - pt = fetch_and_zero(&alloc); - if (lvl) { - if (!pt) { - pt = &alloc_pd(vm)->pt; - if (IS_ERR(pt)) { - ret = PTR_ERR(pt); - goto out; - } - } - - fill_px(pt, vm->scratch[lvl].encode); - } else { - if (!pt) { - pt = alloc_pt(vm); - if (IS_ERR(pt)) { - ret = PTR_ERR(pt); - goto out; - } - } - - if (intel_vgpu_active(vm->i915) || - gen8_pt_count(*start, end) < I915_PDES) - fill_px(pt, vm->scratch[lvl].encode); - } + pt = stash->pt[!!lvl]; + __i915_gem_object_pin_pages(pt->base); + i915_gem_object_make_unshrinkable(pt->base); + + if (lvl || + gen8_pt_count(*start, end) < I915_PDES || + intel_vgpu_active(vm->i915)) + fill_px(pt, vm->scratch[lvl]->encode); spin_lock(&pd->lock); - if (likely(!pd->entry[idx])) + if (likely(!pd->entry[idx])) { + stash->pt[!!lvl] = pt->stash; + atomic_set(&pt->used, 0); set_pd_entry(pd, idx, pt); - else - alloc = pt, pt = pd->entry[idx]; + } else { + pt = pd->entry[idx]; + } } if (lvl) { atomic_inc(&pt->used); spin_unlock(&pd->lock); - ret = __gen8_ppgtt_alloc(vm, as_pd(pt), - start, end, lvl); - if (unlikely(ret)) { - if (release_pd_entry(pd, idx, pt, scratch)) - free_px(vm, pt); - goto out; - } + __gen8_ppgtt_alloc(vm, stash, + as_pd(pt), start, end, lvl); spin_lock(&pd->lock); atomic_dec(&pt->used); @@ -359,18 +339,12 @@ static int __gen8_ppgtt_alloc(struct i915_address_space * const vm, } } while (idx++, --len); spin_unlock(&pd->lock); -out: - if (alloc) - free_px(vm, alloc); - return ret; } -static int gen8_ppgtt_alloc(struct i915_address_space *vm, - u64 start, u64 length) +static void gen8_ppgtt_alloc(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + u64 start, u64 length) { - u64 from; - int err; - GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); GEM_BUG_ON(range_overflows(start, length, vm->total)); @@ -378,25 +352,9 @@ static int gen8_ppgtt_alloc(struct i915_address_space *vm, start >>= GEN8_PTE_SHIFT; length >>= GEN8_PTE_SHIFT; GEM_BUG_ON(length == 0); - from = start; - - err = __gen8_ppgtt_alloc(vm, i915_vm_to_ppgtt(vm)->pd, - &start, start + length, vm->top); - if (unlikely(err && from != start)) - __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd, - from, start, vm->top); - - return err; -} -static __always_inline void -write_pte(gen8_pte_t *pte, const gen8_pte_t val) -{ - /* Magic delays? Or can we refine these to flush all in one pass? */ - *pte = val; - wmb(); /* cpu to cache */ - clflush(pte); /* cache to memory */ - wmb(); /* visible to all */ + __gen8_ppgtt_alloc(vm, stash, i915_vm_to_ppgtt(vm)->pd, + &start, start + length, vm->top); } static __always_inline u64 @@ -415,8 +373,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1))); do { GEM_BUG_ON(iter->sg->length < I915_GTT_PAGE_SIZE); - write_pte(&vaddr[gen8_pd_index(idx, 0)], - pte_encode | iter->dma); + vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma; iter->dma += I915_GTT_PAGE_SIZE; if (iter->dma >= iter->max) { @@ -439,10 +396,12 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, pd = pdp->entry[gen8_pd_index(idx, 2)]; } + clflush_cache_range(vaddr, PAGE_SIZE); kunmap_atomic(vaddr); vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1))); } } while (1); + clflush_cache_range(vaddr, PAGE_SIZE); kunmap_atomic(vaddr); return idx; @@ -498,7 +457,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, do { GEM_BUG_ON(iter->sg->length < page_size); - write_pte(&vaddr[index++], encode | iter->dma); + vaddr[index++] = encode | iter->dma; start += page_size; iter->dma += page_size; @@ -523,6 +482,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, } } while (rem >= page_size && index < I915_PDES); + clflush_cache_range(vaddr, PAGE_SIZE); kunmap_atomic(vaddr); /* @@ -554,7 +514,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) { u16 i; - encode = vma->vm->scratch[0].encode; + encode = vma->vm->scratch[0]->encode; vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K)); for (i = 1; i < index; i += 16) @@ -608,33 +568,44 @@ static int gen8_init_scratch(struct i915_address_space *vm) GEM_BUG_ON(!clone->has_read_only); vm->scratch_order = clone->scratch_order; - memcpy(vm->scratch, clone->scratch, sizeof(vm->scratch)); - px_dma(&vm->scratch[0]) = 0; /* no xfer of ownership */ + for (i = 0; i <= vm->top; i++) + vm->scratch[i] = i915_gem_object_get(clone->scratch[i]); + return 0; } - ret = setup_scratch_page(vm, __GFP_HIGHMEM); + ret = setup_scratch_page(vm); if (ret) return ret; - vm->scratch[0].encode = - gen8_pte_encode(px_dma(&vm->scratch[0]), + vm->scratch[0]->encode = + gen8_pte_encode(px_dma(vm->scratch[0]), I915_CACHE_LLC, vm->has_read_only); for (i = 1; i <= vm->top; i++) { - if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[i])))) + struct drm_i915_gem_object *obj; + + obj = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); + if (IS_ERR(obj)) goto free_scratch; - fill_px(&vm->scratch[i], vm->scratch[i - 1].encode); - vm->scratch[i].encode = - gen8_pde_encode(px_dma(&vm->scratch[i]), - I915_CACHE_LLC); + ret = pin_pt_dma(vm, obj); + if (ret) { + i915_gem_object_put(obj); + goto free_scratch; + } + + fill_px(obj, vm->scratch[i - 1]->encode); + obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_LLC); + + vm->scratch[i] = obj; } return 0; free_scratch: - free_scratch(vm); + while (i--) + i915_gem_object_put(vm->scratch[i]); return -ENOMEM; } @@ -649,12 +620,20 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) { struct i915_page_directory *pde; + int err; pde = alloc_pd(vm); if (IS_ERR(pde)) return PTR_ERR(pde); - fill_px(pde, vm->scratch[1].encode); + err = pin_pt_dma(vm, pde->pt.base); + if (err) { + i915_gem_object_put(pde->pt.base); + free_pd(vm, pde); + return err; + } + + fill_px(pde, vm->scratch[1]->encode); set_pd_entry(pd, idx, pde); atomic_inc(px_used(pde)); /* keep pinned */ } @@ -668,21 +647,32 @@ gen8_alloc_top_pd(struct i915_address_space *vm) { const unsigned int count = gen8_pd_top_count(vm); struct i915_page_directory *pd; + int err; - GEM_BUG_ON(count > ARRAY_SIZE(pd->entry)); + GEM_BUG_ON(count > I915_PDES); - pd = __alloc_pd(offsetof(typeof(*pd), entry[count])); + pd = __alloc_pd(count); if (unlikely(!pd)) return ERR_PTR(-ENOMEM); - if (unlikely(setup_page_dma(vm, px_base(pd)))) { - kfree(pd); - return ERR_PTR(-ENOMEM); + pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); + if (IS_ERR(pd->pt.base)) { + err = PTR_ERR(pd->pt.base); + pd->pt.base = NULL; + goto err_pd; } - fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count); + err = pin_pt_dma(vm, pd->pt.base); + if (err) + goto err_pd; + + fill_page_dma(px_base(pd), vm->scratch[vm->top]->encode, count); atomic_inc(px_used(pd)); /* mark as pinned */ return pd; + +err_pd: + free_pd(vm, pd); + return ERR_PTR(err); } /* @@ -703,6 +693,7 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt) ppgtt_init(ppgtt, gt); ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2; + ppgtt->vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen8_pte_t)); /* * From bdw, there is hw support for read-only pages in the PPGTT. @@ -714,12 +705,7 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt) */ ppgtt->vm.has_read_only = !IS_GEN_RANGE(gt->i915, 11, 12); - /* - * There are only few exceptions for gen >=6. chv and bxt. - * And we are not sure about the latter so play safe for now. - */ - if (IS_CHERRYVIEW(gt->i915) || IS_BROXTON(gt->i915)) - ppgtt->vm.pt_kmap_wc = true; + ppgtt->vm.alloc_pt_dma = alloc_pt_dma; err = gen8_init_scratch(&ppgtt->vm); if (err) diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 91786310c114..d8b206e53660 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -28,6 +28,8 @@ #include "i915_drv.h" #include "i915_trace.h" +#include "intel_breadcrumbs.h" +#include "intel_context.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" @@ -53,33 +55,65 @@ static void irq_disable(struct intel_engine_cs *engine) spin_unlock(&engine->gt->irq_lock); } -static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) +static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) { - struct intel_engine_cs *engine = - container_of(b, struct intel_engine_cs, breadcrumbs); + lockdep_assert_held(&b->irq_lock); + + if (!b->irq_engine || b->irq_armed) + return; + + if (!intel_gt_pm_get_if_awake(b->irq_engine->gt)) + return; + + /* + * The breadcrumb irq will be disarmed on the interrupt after the + * waiters are signaled. This gives us a single interrupt window in + * which we can add a new waiter and avoid the cost of re-enabling + * the irq. + */ + WRITE_ONCE(b->irq_armed, true); + + /* + * Since we are waiting on a request, the GPU should be busy + * and should have its own rpm reference. This is tracked + * by i915->gt.awake, we can forgo holding our own wakref + * for the interrupt as before i915->gt.awake is released (when + * the driver is idle) we disarm the breadcrumbs. + */ + if (!b->irq_enabled++) + irq_enable(b->irq_engine); +} + +static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) +{ lockdep_assert_held(&b->irq_lock); + if (!b->irq_engine || !b->irq_armed) + return; + GEM_BUG_ON(!b->irq_enabled); if (!--b->irq_enabled) - irq_disable(engine); + irq_disable(b->irq_engine); WRITE_ONCE(b->irq_armed, false); - intel_gt_pm_put_async(engine->gt); + intel_gt_pm_put_async(b->irq_engine->gt); } -void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) +static void add_signaling_context(struct intel_breadcrumbs *b, + struct intel_context *ce) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; - unsigned long flags; - - if (!READ_ONCE(b->irq_armed)) - return; + intel_context_get(ce); + list_add_tail(&ce->signal_link, &b->signalers); + if (list_is_first(&ce->signal_link, &b->signalers)) + __intel_breadcrumbs_arm_irq(b); +} - spin_lock_irqsave(&b->irq_lock, flags); - if (b->irq_armed) - __intel_breadcrumbs_disarm_irq(b); - spin_unlock_irqrestore(&b->irq_lock, flags); +static void remove_signaling_context(struct intel_breadcrumbs *b, + struct intel_context *ce) +{ + list_del(&ce->signal_link); + intel_context_put(ce); } static inline bool __request_completed(const struct i915_request *rq) @@ -90,6 +124,9 @@ static inline bool __request_completed(const struct i915_request *rq) __maybe_unused static bool check_signal_order(struct intel_context *ce, struct i915_request *rq) { + if (rq->context != ce) + return false; + if (!list_is_last(&rq->signal_link, &ce->signals) && i915_seqno_passed(rq->fence.seqno, list_next_entry(rq, signal_link)->fence.seqno)) @@ -133,25 +170,21 @@ __dma_fence_signal__notify(struct dma_fence *fence, static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl) { - struct intel_engine_cs *engine = - container_of(b, struct intel_engine_cs, breadcrumbs); - - if (unlikely(intel_engine_is_virtual(engine))) - engine = intel_virtual_engine_get_sibling(engine, 0); - - intel_engine_add_retire(engine, tl); + if (b->irq_engine) + intel_engine_add_retire(b->irq_engine, tl); } -static void __signal_request(struct i915_request *rq, struct list_head *signals) +static bool __signal_request(struct i915_request *rq, struct list_head *signals) { - GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)); clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); - if (!__dma_fence_signal(&rq->fence)) - return; + if (!__dma_fence_signal(&rq->fence)) { + i915_request_put(rq); + return false; + } - i915_request_get(rq); list_add_tail(&rq->signal_link, signals); + return true; } static void signal_irq_work(struct irq_work *work) @@ -164,7 +197,7 @@ static void signal_irq_work(struct irq_work *work) spin_lock(&b->irq_lock); - if (b->irq_armed && list_empty(&b->signalers)) + if (list_empty(&b->signalers)) __intel_breadcrumbs_disarm_irq(b); list_splice_init(&b->signaled_requests, &signal); @@ -197,8 +230,8 @@ static void signal_irq_work(struct irq_work *work) /* Advance the list to the first incomplete request */ __list_del_many(&ce->signals, pos); if (&ce->signals == pos) { /* now empty */ - list_del_init(&ce->signal_link); add_retire(b, ce->timeline); + remove_signaling_context(b, ce); } } } @@ -220,116 +253,89 @@ static void signal_irq_work(struct irq_work *work) } } -static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) +struct intel_breadcrumbs * +intel_breadcrumbs_create(struct intel_engine_cs *irq_engine) { - struct intel_engine_cs *engine = - container_of(b, struct intel_engine_cs, breadcrumbs); - - lockdep_assert_held(&b->irq_lock); - if (b->irq_armed) - return true; - - if (!intel_gt_pm_get_if_awake(engine->gt)) - return false; - - /* - * The breadcrumb irq will be disarmed on the interrupt after the - * waiters are signaled. This gives us a single interrupt window in - * which we can add a new waiter and avoid the cost of re-enabling - * the irq. - */ - WRITE_ONCE(b->irq_armed, true); - - /* - * Since we are waiting on a request, the GPU should be busy - * and should have its own rpm reference. This is tracked - * by i915->gt.awake, we can forgo holding our own wakref - * for the interrupt as before i915->gt.awake is released (when - * the driver is idle) we disarm the breadcrumbs. - */ - - if (!b->irq_enabled++) - irq_enable(engine); + struct intel_breadcrumbs *b; - return true; -} - -void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; + b = kzalloc(sizeof(*b), GFP_KERNEL); + if (!b) + return NULL; spin_lock_init(&b->irq_lock); INIT_LIST_HEAD(&b->signalers); INIT_LIST_HEAD(&b->signaled_requests); init_irq_work(&b->irq_work, signal_irq_work); + + b->irq_engine = irq_engine; + + return b; } -void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) +void intel_breadcrumbs_reset(struct intel_breadcrumbs *b) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; unsigned long flags; + if (!b->irq_engine) + return; + spin_lock_irqsave(&b->irq_lock, flags); if (b->irq_enabled) - irq_enable(engine); + irq_enable(b->irq_engine); else - irq_disable(engine); + irq_disable(b->irq_engine); spin_unlock_irqrestore(&b->irq_lock, flags); } -void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine, - struct intel_context *ce) +void intel_breadcrumbs_park(struct intel_breadcrumbs *b) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; unsigned long flags; - spin_lock_irqsave(&b->irq_lock, flags); - if (!list_empty(&ce->signals)) { - struct i915_request *rq, *next; - - /* Queue for executing the signal callbacks in the irq_work */ - list_for_each_entry_safe(rq, next, &ce->signals, signal_link) { - GEM_BUG_ON(rq->engine != engine); - GEM_BUG_ON(!__request_completed(rq)); - - __signal_request(rq, &b->signaled_requests); - } + if (!READ_ONCE(b->irq_armed)) + return; - INIT_LIST_HEAD(&ce->signals); - list_del_init(&ce->signal_link); + spin_lock_irqsave(&b->irq_lock, flags); + __intel_breadcrumbs_disarm_irq(b); + spin_unlock_irqrestore(&b->irq_lock, flags); + if (!list_empty(&b->signalers)) irq_work_queue(&b->irq_work); - } - spin_unlock_irqrestore(&b->irq_lock, flags); } -void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) +void intel_breadcrumbs_free(struct intel_breadcrumbs *b) { + kfree(b); } -bool i915_request_enable_breadcrumb(struct i915_request *rq) +static void insert_breadcrumb(struct i915_request *rq, + struct intel_breadcrumbs *b) { - lockdep_assert_held(&rq->lock); - - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) - return true; + struct intel_context *ce = rq->context; + struct list_head *pos; - if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) { - struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; - struct intel_context *ce = rq->context; - struct list_head *pos; - - spin_lock(&b->irq_lock); + if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) + return; - if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) - goto unlock; + i915_request_get(rq); - if (!__intel_breadcrumbs_arm_irq(b)) - goto unlock; + /* + * If the request is already completed, we can transfer it + * straight onto a signaled list, and queue the irq worker for + * its signal completion. + */ + if (__request_completed(rq)) { + if (__signal_request(rq, &b->signaled_requests)) + irq_work_queue(&b->irq_work); + return; + } + if (list_empty(&ce->signals)) { + add_signaling_context(b, ce); + pos = &ce->signals; + } else { /* * We keep the seqno in retirement order, so we can break * inside intel_engine_signal_breadcrumbs as soon as we've @@ -351,24 +357,75 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq) if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno)) break; } - list_add(&rq->signal_link, pos); - if (pos == &ce->signals) /* catch transitions from empty list */ - list_move_tail(&ce->signal_link, &b->signalers); - GEM_BUG_ON(!check_signal_order(ce, rq)); + } + list_add(&rq->signal_link, pos); + GEM_BUG_ON(!check_signal_order(ce, rq)); + set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); + + /* Check after attaching to irq, interrupt may have already fired. */ + if (__request_completed(rq)) + irq_work_queue(&b->irq_work); +} - set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); -unlock: +bool i915_request_enable_breadcrumb(struct i915_request *rq) +{ + struct intel_breadcrumbs *b; + + /* Serialises with i915_request_retire() using rq->lock */ + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) + return true; + + /* + * Peek at i915_request_submit()/i915_request_unsubmit() status. + * + * If the request is not yet active (and not signaled), we will + * attach the breadcrumb later. + */ + if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) + return true; + + /* + * rq->engine is locked by rq->engine->active.lock. That however + * is not known until after rq->engine has been dereferenced and + * the lock acquired. Hence we acquire the lock and then validate + * that rq->engine still matches the lock we hold for it. + * + * Here, we are using the breadcrumb lock as a proxy for the + * rq->engine->active.lock, and we know that since the breadcrumb + * will be serialised within i915_request_submit/i915_request_unsubmit, + * the engine cannot change while active as long as we hold the + * breadcrumb lock on that engine. + * + * From the dma_fence_enable_signaling() path, we are outside of the + * request submit/unsubmit path, and so we must be more careful to + * acquire the right lock. + */ + b = READ_ONCE(rq->engine)->breadcrumbs; + spin_lock(&b->irq_lock); + while (unlikely(b != READ_ONCE(rq->engine)->breadcrumbs)) { spin_unlock(&b->irq_lock); + b = READ_ONCE(rq->engine)->breadcrumbs; + spin_lock(&b->irq_lock); } - return !__request_completed(rq); + /* + * Now that we are finally serialised with request submit/unsubmit, + * [with b->irq_lock] and with i915_request_retire() [via checking + * SIGNALED with rq->lock] confirm the request is indeed active. If + * it is no longer active, the breadcrumb will be attached upon + * i915_request_submit(). + */ + if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) + insert_breadcrumb(rq, b); + + spin_unlock(&b->irq_lock); + + return true; } void i915_request_cancel_breadcrumb(struct i915_request *rq) { - struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; - - lockdep_assert_held(&rq->lock); + struct intel_breadcrumbs *b = rq->engine->breadcrumbs; /* * We must wait for b->irq_lock so that we know the interrupt handler @@ -382,23 +439,19 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq) list_del(&rq->signal_link); if (list_empty(&ce->signals)) - list_del_init(&ce->signal_link); + remove_signaling_context(b, ce); clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); + i915_request_put(rq); } spin_unlock(&b->irq_lock); } -void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, - struct drm_printer *p) +static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; struct intel_context *ce; struct i915_request *rq; - if (list_empty(&b->signalers)) - return; - drm_printf(p, "Signals:\n"); spin_lock_irq(&b->irq_lock); @@ -414,3 +467,17 @@ void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, } spin_unlock_irq(&b->irq_lock); } + +void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, + struct drm_printer *p) +{ + struct intel_breadcrumbs *b; + + b = engine->breadcrumbs; + if (!b) + return; + + drm_printf(p, "IRQ: %s\n", enableddisabled(b->irq_armed)); + if (!list_empty(&b->signalers)) + print_signals(b, p); +} diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h new file mode 100644 index 000000000000..ed3d1deabfbd --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_BREADCRUMBS__ +#define __INTEL_BREADCRUMBS__ + +#include <linux/irq_work.h> + +#include "intel_engine_types.h" + +struct drm_printer; +struct i915_request; +struct intel_breadcrumbs; + +struct intel_breadcrumbs * +intel_breadcrumbs_create(struct intel_engine_cs *irq_engine); +void intel_breadcrumbs_free(struct intel_breadcrumbs *b); + +void intel_breadcrumbs_reset(struct intel_breadcrumbs *b); +void intel_breadcrumbs_park(struct intel_breadcrumbs *b); + +static inline void +intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine) +{ + irq_work_queue(&engine->breadcrumbs->irq_work); +} + +void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, + struct drm_printer *p); + +bool i915_request_enable_breadcrumb(struct i915_request *request); +void i915_request_cancel_breadcrumb(struct i915_request *request); + +#endif /* __INTEL_BREADCRUMBS__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h new file mode 100644 index 000000000000..8e53b9942695 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_BREADCRUMBS_TYPES__ +#define __INTEL_BREADCRUMBS_TYPES__ + +#include <linux/irq_work.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/types.h> + +/* + * Rather than have every client wait upon all user interrupts, + * with the herd waking after every interrupt and each doing the + * heavyweight seqno dance, we delegate the task (of being the + * bottom-half of the user interrupt) to the first client. After + * every interrupt, we wake up one client, who does the heavyweight + * coherent seqno read and either goes back to sleep (if incomplete), + * or wakes up all the completed clients in parallel, before then + * transferring the bottom-half status to the next client in the queue. + * + * Compared to walking the entire list of waiters in a single dedicated + * bottom-half, we reduce the latency of the first waiter by avoiding + * a context switch, but incur additional coherent seqno reads when + * following the chain of request breadcrumbs. Since it is most likely + * that we have a single client waiting on each seqno, then reducing + * the overhead of waking that client is much preferred. + */ +struct intel_breadcrumbs { + spinlock_t irq_lock; /* protects the lists used in hardirq context */ + + /* Not all breadcrumbs are attached to physical HW */ + struct intel_engine_cs *irq_engine; + + struct list_head signalers; + struct list_head signaled_requests; + + struct irq_work irq_work; /* for use from inside irq_lock */ + + unsigned int irq_enabled; + + bool irq_armed; +}; + +#endif /* __INTEL_BREADCRUMBS_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 52db2bde44a3..92a3f25c4006 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -93,57 +93,210 @@ static void intel_context_active_release(struct intel_context *ce) i915_active_release(&ce->active); } -int __intel_context_do_pin(struct intel_context *ce) +static int __context_pin_state(struct i915_vma *vma, struct i915_gem_ww_ctx *ww) +{ + unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS; + int err; + + err = i915_ggtt_pin(vma, ww, 0, bias | PIN_HIGH); + if (err) + return err; + + err = i915_active_acquire(&vma->active); + if (err) + goto err_unpin; + + /* + * And mark it as a globally pinned object to let the shrinker know + * it cannot reclaim the object until we release it. + */ + i915_vma_make_unshrinkable(vma); + vma->obj->mm.dirty = true; + + return 0; + +err_unpin: + i915_vma_unpin(vma); + return err; +} + +static void __context_unpin_state(struct i915_vma *vma) +{ + i915_vma_make_shrinkable(vma); + i915_active_release(&vma->active); + __i915_vma_unpin(vma); +} + +static int __ring_active(struct intel_ring *ring, + struct i915_gem_ww_ctx *ww) +{ + int err; + + err = intel_ring_pin(ring, ww); + if (err) + return err; + + err = i915_active_acquire(&ring->vma->active); + if (err) + goto err_pin; + + return 0; + +err_pin: + intel_ring_unpin(ring); + return err; +} + +static void __ring_retire(struct intel_ring *ring) +{ + i915_active_release(&ring->vma->active); + intel_ring_unpin(ring); +} + +static int intel_context_pre_pin(struct intel_context *ce, + struct i915_gem_ww_ctx *ww) { int err; + CE_TRACE(ce, "active\n"); + + err = __ring_active(ce->ring, ww); + if (err) + return err; + + err = intel_timeline_pin(ce->timeline, ww); + if (err) + goto err_ring; + + if (!ce->state) + return 0; + + err = __context_pin_state(ce->state, ww); + if (err) + goto err_timeline; + + + return 0; + +err_timeline: + intel_timeline_unpin(ce->timeline); +err_ring: + __ring_retire(ce->ring); + return err; +} + +static void intel_context_post_unpin(struct intel_context *ce) +{ + if (ce->state) + __context_unpin_state(ce->state); + + intel_timeline_unpin(ce->timeline); + __ring_retire(ce->ring); +} + +int __intel_context_do_pin_ww(struct intel_context *ce, + struct i915_gem_ww_ctx *ww) +{ + bool handoff = false; + void *vaddr; + int err = 0; + if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) { err = intel_context_alloc_state(ce); if (err) return err; } - err = i915_active_acquire(&ce->active); + /* + * We always pin the context/ring/timeline here, to ensure a pin + * refcount for __intel_context_active(), which prevent a lock + * inversion of ce->pin_mutex vs dma_resv_lock(). + */ + + err = i915_gem_object_lock(ce->timeline->hwsp_ggtt->obj, ww); + if (!err && ce->ring->vma->obj) + err = i915_gem_object_lock(ce->ring->vma->obj, ww); + if (!err && ce->state) + err = i915_gem_object_lock(ce->state->obj, ww); + if (!err) + err = intel_context_pre_pin(ce, ww); if (err) return err; - if (mutex_lock_interruptible(&ce->pin_mutex)) { - err = -EINTR; - goto out_release; - } + err = i915_active_acquire(&ce->active); + if (err) + goto err_ctx_unpin; + + err = ce->ops->pre_pin(ce, ww, &vaddr); + if (err) + goto err_release; + + err = mutex_lock_interruptible(&ce->pin_mutex); + if (err) + goto err_post_unpin; if (unlikely(intel_context_is_closed(ce))) { err = -ENOENT; - goto out_unlock; + goto err_unlock; } if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) { err = intel_context_active_acquire(ce); if (unlikely(err)) - goto out_unlock; + goto err_unlock; - err = ce->ops->pin(ce); - if (unlikely(err)) - goto err_active; + err = ce->ops->pin(ce, vaddr); + if (err) { + intel_context_active_release(ce); + goto err_unlock; + } CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n", i915_ggtt_offset(ce->ring->vma), ce->ring->head, ce->ring->tail); + handoff = true; smp_mb__before_atomic(); /* flush pin before it is visible */ atomic_inc(&ce->pin_count); } GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */ - GEM_BUG_ON(i915_active_is_idle(&ce->active)); - goto out_unlock; -err_active: - intel_context_active_release(ce); -out_unlock: +err_unlock: mutex_unlock(&ce->pin_mutex); -out_release: +err_post_unpin: + if (!handoff) + ce->ops->post_unpin(ce); +err_release: i915_active_release(&ce->active); +err_ctx_unpin: + intel_context_post_unpin(ce); + + /* + * Unlock the hwsp_ggtt object since it's shared. + * In principle we can unlock all the global state locked above + * since it's pinned and doesn't need fencing, and will + * thus remain resident until it is explicitly unpinned. + */ + i915_gem_ww_unlock_single(ce->timeline->hwsp_ggtt->obj); + + return err; +} + +int __intel_context_do_pin(struct intel_context *ce) +{ + struct i915_gem_ww_ctx ww; + int err; + + i915_gem_ww_ctx_init(&ww, true); +retry: + err = __intel_context_do_pin_ww(ce, &ww); + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); return err; } @@ -154,6 +307,7 @@ void intel_context_unpin(struct intel_context *ce) CE_TRACE(ce, "unpin\n"); ce->ops->unpin(ce); + ce->ops->post_unpin(ce); /* * Once released, we may asynchronously drop the active reference. @@ -166,65 +320,6 @@ void intel_context_unpin(struct intel_context *ce) intel_context_put(ce); } -static int __context_pin_state(struct i915_vma *vma) -{ - unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS; - int err; - - err = i915_ggtt_pin(vma, 0, bias | PIN_HIGH); - if (err) - return err; - - err = i915_active_acquire(&vma->active); - if (err) - goto err_unpin; - - /* - * And mark it as a globally pinned object to let the shrinker know - * it cannot reclaim the object until we release it. - */ - i915_vma_make_unshrinkable(vma); - vma->obj->mm.dirty = true; - - return 0; - -err_unpin: - i915_vma_unpin(vma); - return err; -} - -static void __context_unpin_state(struct i915_vma *vma) -{ - i915_vma_make_shrinkable(vma); - i915_active_release(&vma->active); - __i915_vma_unpin(vma); -} - -static int __ring_active(struct intel_ring *ring) -{ - int err; - - err = intel_ring_pin(ring); - if (err) - return err; - - err = i915_active_acquire(&ring->vma->active); - if (err) - goto err_pin; - - return 0; - -err_pin: - intel_ring_unpin(ring); - return err; -} - -static void __ring_retire(struct intel_ring *ring) -{ - i915_active_release(&ring->vma->active); - intel_ring_unpin(ring); -} - __i915_active_call static void __intel_context_retire(struct i915_active *active) { @@ -235,48 +330,29 @@ static void __intel_context_retire(struct i915_active *active) intel_context_get_avg_runtime_ns(ce)); set_bit(CONTEXT_VALID_BIT, &ce->flags); - if (ce->state) - __context_unpin_state(ce->state); - - intel_timeline_unpin(ce->timeline); - __ring_retire(ce->ring); - + intel_context_post_unpin(ce); intel_context_put(ce); } static int __intel_context_active(struct i915_active *active) { struct intel_context *ce = container_of(active, typeof(*ce), active); - int err; - - CE_TRACE(ce, "active\n"); intel_context_get(ce); - err = __ring_active(ce->ring); - if (err) - goto err_put; + /* everything should already be activated by intel_context_pre_pin() */ + GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->ring->vma->active)); + __intel_ring_pin(ce->ring); - err = intel_timeline_pin(ce->timeline); - if (err) - goto err_ring; + __intel_timeline_pin(ce->timeline); - if (!ce->state) - return 0; - - err = __context_pin_state(ce->state); - if (err) - goto err_timeline; + if (ce->state) { + GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->state->active)); + __i915_vma_pin(ce->state); + i915_vma_make_unshrinkable(ce->state); + } return 0; - -err_timeline: - intel_timeline_unpin(ce->timeline); -err_ring: - __ring_retire(ce->ring); -err_put: - intel_context_put(ce); - return err; } void @@ -382,15 +458,38 @@ int intel_context_prepare_remote_request(struct intel_context *ce, struct i915_request *intel_context_create_request(struct intel_context *ce) { + struct i915_gem_ww_ctx ww; struct i915_request *rq; int err; - err = intel_context_pin(ce); - if (unlikely(err)) - return ERR_PTR(err); + i915_gem_ww_ctx_init(&ww, true); +retry: + err = intel_context_pin_ww(ce, &ww); + if (!err) { + rq = i915_request_create(ce); + intel_context_unpin(ce); + } else if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + rq = ERR_PTR(err); + } else { + rq = ERR_PTR(err); + } + + i915_gem_ww_ctx_fini(&ww); - rq = i915_request_create(ce); - intel_context_unpin(ce); + if (IS_ERR(rq)) + return rq; + + /* + * timeline->mutex should be the inner lock, but is used as outer lock. + * Hack around this to shut up lockdep in selftests.. + */ + lockdep_unpin_lock(&ce->timeline->mutex, rq->cookie); + mutex_release(&ce->timeline->mutex.dep_map, _RET_IP_); + mutex_acquire(&ce->timeline->mutex.dep_map, SINGLE_DEPTH_NESTING, 0, _RET_IP_); + rq->cookie = lockdep_pin_lock(&ce->timeline->mutex); return rq; } diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 07be021882cc..fda2eba81e22 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -25,6 +25,8 @@ ##__VA_ARGS__); \ } while (0) +struct i915_gem_ww_ctx; + void intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine); void intel_context_fini(struct intel_context *ce); @@ -81,6 +83,8 @@ static inline void intel_context_unlock_pinned(struct intel_context *ce) } int __intel_context_do_pin(struct intel_context *ce); +int __intel_context_do_pin_ww(struct intel_context *ce, + struct i915_gem_ww_ctx *ww); static inline bool intel_context_pin_if_active(struct intel_context *ce) { @@ -95,6 +99,15 @@ static inline int intel_context_pin(struct intel_context *ce) return __intel_context_do_pin(ce); } +static inline int intel_context_pin_ww(struct intel_context *ce, + struct i915_gem_ww_ctx *ww) +{ + if (likely(intel_context_pin_if_active(ce))) + return 0; + + return __intel_context_do_pin_ww(ce, ww); +} + static inline void __intel_context_pin(struct intel_context *ce) { GEM_BUG_ON(!intel_context_is_pinned(ce)); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 4954b0df4864..552cb57a2e8c 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -23,6 +23,7 @@ DECLARE_EWMA(runtime, 3, 8); struct i915_gem_context; +struct i915_gem_ww_ctx; struct i915_vma; struct intel_context; struct intel_ring; @@ -30,8 +31,10 @@ struct intel_ring; struct intel_context_ops { int (*alloc)(struct intel_context *ce); - int (*pin)(struct intel_context *ce); + int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr); + int (*pin)(struct intel_context *ce, void *vaddr); void (*unpin)(struct intel_context *ce); + void (*post_unpin)(struct intel_context *ce); void (*enter)(struct intel_context *ce); void (*exit)(struct intel_context *ce); diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index a9249a23903a..760fefdfe392 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -223,26 +223,6 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, void intel_engine_init_execlists(struct intel_engine_cs *engine); -void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); -void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); - -void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); - -static inline void -intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine) -{ - irq_work_queue(&engine->breadcrumbs.irq_work); -} - -void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); -void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); - -void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine, - struct intel_context *ce); - -void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, - struct drm_printer *p); - static inline u32 *__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) { memset(batch, 0, 6 * sizeof(u32)); @@ -265,22 +245,14 @@ static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u } static inline u32 * -__gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1) +__gen8_emit_write_rcs(u32 *cs, u32 value, u32 offset, u32 flags0, u32 flags1) { - /* We're using qword write, offset should be aligned to 8 bytes. */ - GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); - - /* w/a for post sync ops following a GPGPU operation we - * need a prior CS_STALL, which is emitted by the flush - * following the batch. - */ *cs++ = GFX_OP_PIPE_CONTROL(6) | flags0; - *cs++ = flags1 | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB; - *cs++ = gtt_offset; + *cs++ = flags1 | PIPE_CONTROL_QW_WRITE; + *cs++ = offset; *cs++ = 0; *cs++ = value; - /* We're thrashing one dword of HWS. */ - *cs++ = 0; + *cs++ = 0; /* We're thrashing one extra dword. */ return cs; } @@ -288,13 +260,38 @@ __gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 f static inline u32* gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags) { - return __gen8_emit_ggtt_write_rcs(cs, value, gtt_offset, 0, flags); + /* We're using qword write, offset should be aligned to 8 bytes. */ + GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); + + return __gen8_emit_write_rcs(cs, + value, + gtt_offset, + 0, + flags | PIPE_CONTROL_GLOBAL_GTT_IVB); } static inline u32* gen12_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1) { - return __gen8_emit_ggtt_write_rcs(cs, value, gtt_offset, flags0, flags1); + /* We're using qword write, offset should be aligned to 8 bytes. */ + GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); + + return __gen8_emit_write_rcs(cs, + value, + gtt_offset, + flags0, + flags1 | PIPE_CONTROL_GLOBAL_GTT_IVB); +} + +static inline u32 * +__gen8_emit_flush_dw(u32 *cs, u32 value, u32 gtt_offset, u32 flags) +{ + *cs++ = (MI_FLUSH_DW + 1) | flags; + *cs++ = gtt_offset; + *cs++ = 0; + *cs++ = value; + + return cs; } static inline u32 * @@ -305,12 +302,10 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags) /* Offset should be aligned to 8 bytes for both (QW/DW) write types */ GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); - *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW | flags; - *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT; - *cs++ = 0; - *cs++ = value; - - return cs; + return __gen8_emit_flush_dw(cs, + value, + gtt_offset | MI_FLUSH_DW_USE_GTT, + flags | MI_FLUSH_DW_OP_STOREDW); } static inline void __intel_engine_reset(struct intel_engine_cs *engine, @@ -357,4 +352,13 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine) return intel_engine_has_preemption(engine); } +static inline bool +intel_engine_has_heartbeat(const struct intel_engine_cs *engine) +{ + if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL)) + return false; + + return READ_ONCE(engine->props.heartbeat_interval_ms); +} + #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index dd1a42c4d344..efdeb7b7b2a0 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -28,6 +28,7 @@ #include "i915_drv.h" +#include "intel_breadcrumbs.h" #include "intel_context.h" #include "intel_engine.h" #include "intel_engine_pm.h" @@ -213,7 +214,7 @@ u32 intel_engine_context_size(struct intel_gt *gt, u8 class) break; default: MISSING_CASE(class); - /* fall through */ + fallthrough; case VIDEO_DECODE_CLASS: case VIDEO_ENHANCEMENT_CLASS: case COPY_ENGINE_CLASS: @@ -370,7 +371,8 @@ static void __setup_engine_capabilities(struct intel_engine_cs *engine) * instances. */ if ((INTEL_GEN(i915) >= 11 && - engine->gt->info.vdbox_sfc_access & engine->mask) || + (engine->gt->info.vdbox_sfc_access & + BIT(engine->instance))) || (INTEL_GEN(i915) >= 9 && engine->instance == 0)) engine->uabi_capabilities |= I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC; @@ -634,7 +636,7 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine, else flags = PIN_HIGH; - return i915_ggtt_pin(vma, 0, flags); + return i915_ggtt_pin(vma, NULL, 0, flags); } static int init_status_page(struct intel_engine_cs *engine) @@ -700,8 +702,13 @@ static int engine_setup_common(struct intel_engine_cs *engine) if (err) return err; + engine->breadcrumbs = intel_breadcrumbs_create(engine); + if (!engine->breadcrumbs) { + err = -ENOMEM; + goto err_status; + } + intel_engine_init_active(engine, ENGINE_PHYSICAL); - intel_engine_init_breadcrumbs(engine); intel_engine_init_execlists(engine); intel_engine_init_cmd_parser(engine); intel_engine_init__pm(engine); @@ -716,6 +723,10 @@ static int engine_setup_common(struct intel_engine_cs *engine) intel_engine_init_ctx_wa(engine); return 0; + +err_status: + cleanup_status_page(engine); + return err; } struct measure_breadcrumb { @@ -785,9 +796,11 @@ intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) } static struct intel_context * -create_kernel_context(struct intel_engine_cs *engine) +create_pinned_context(struct intel_engine_cs *engine, + unsigned int hwsp, + struct lock_class_key *key, + const char *name) { - static struct lock_class_key kernel; struct intel_context *ce; int err; @@ -796,6 +809,7 @@ create_kernel_context(struct intel_engine_cs *engine) return ce; __set_bit(CONTEXT_BARRIER_BIT, &ce->flags); + ce->timeline = page_pack_bits(NULL, hwsp); err = intel_context_pin(ce); /* perma-pin so it is always available */ if (err) { @@ -809,11 +823,20 @@ create_kernel_context(struct intel_engine_cs *engine) * should we need to inject GPU operations during their request * construction. */ - lockdep_set_class(&ce->timeline->mutex, &kernel); + lockdep_set_class_and_name(&ce->timeline->mutex, key, name); return ce; } +static struct intel_context * +create_kernel_context(struct intel_engine_cs *engine) +{ + static struct lock_class_key kernel; + + return create_pinned_context(engine, I915_GEM_HWS_SEQNO_ADDR, + &kernel, "kernel_context"); +} + /** * intel_engines_init_common - initialize cengine state which might require hw access * @engine: Engine to initialize. @@ -902,9 +925,9 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) tasklet_kill(&engine->execlists.tasklet); /* flush the callback */ cleanup_status_page(engine); + intel_breadcrumbs_free(engine->breadcrumbs); intel_engine_fini_retire(engine); - intel_engine_fini_breadcrumbs(engine); intel_engine_cleanup_cmd_parser(engine); if (engine->default_state) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 8ffdf676c0a0..5067d0524d4b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -177,36 +177,82 @@ void intel_engine_init_heartbeat(struct intel_engine_cs *engine) INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat); } +static int __intel_engine_pulse(struct intel_engine_cs *engine) +{ + struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER }; + struct intel_context *ce = engine->kernel_context; + struct i915_request *rq; + + lockdep_assert_held(&ce->timeline->mutex); + GEM_BUG_ON(!intel_engine_has_preemption(engine)); + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); + + intel_context_enter(ce); + rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN); + intel_context_exit(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + __set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags); + idle_pulse(engine, rq); + + __i915_request_commit(rq); + __i915_request_queue(rq, &attr); + GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER); + + return 0; +} + +static unsigned long set_heartbeat(struct intel_engine_cs *engine, + unsigned long delay) +{ + unsigned long old; + + old = xchg(&engine->props.heartbeat_interval_ms, delay); + if (delay) + intel_engine_unpark_heartbeat(engine); + else + intel_engine_park_heartbeat(engine); + + return old; +} + int intel_engine_set_heartbeat(struct intel_engine_cs *engine, unsigned long delay) { - int err; + struct intel_context *ce = engine->kernel_context; + int err = 0; - /* Send one last pulse before to cleanup persistent hogs */ - if (!delay && IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) { - err = intel_engine_pulse(engine); - if (err) - return err; - } + if (!delay && !intel_engine_has_preempt_reset(engine)) + return -ENODEV; + + intel_engine_pm_get(engine); + + err = mutex_lock_interruptible(&ce->timeline->mutex); + if (err) + goto out_rpm; - WRITE_ONCE(engine->props.heartbeat_interval_ms, delay); + if (delay != engine->props.heartbeat_interval_ms) { + unsigned long saved = set_heartbeat(engine, delay); - if (intel_engine_pm_get_if_awake(engine)) { - if (delay) - intel_engine_unpark_heartbeat(engine); - else - intel_engine_park_heartbeat(engine); - intel_engine_pm_put(engine); + /* recheck current execution */ + if (intel_engine_has_preemption(engine)) { + err = __intel_engine_pulse(engine); + if (err) + set_heartbeat(engine, saved); + } } - return 0; + mutex_unlock(&ce->timeline->mutex); + +out_rpm: + intel_engine_pm_put(engine); + return err; } int intel_engine_pulse(struct intel_engine_cs *engine) { - struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER }; struct intel_context *ce = engine->kernel_context; - struct i915_request *rq; int err; if (!intel_engine_has_preemption(engine)) @@ -215,30 +261,12 @@ int intel_engine_pulse(struct intel_engine_cs *engine) if (!intel_engine_pm_get_if_awake(engine)) return 0; - if (mutex_lock_interruptible(&ce->timeline->mutex)) { - err = -EINTR; - goto out_rpm; - } - - intel_context_enter(ce); - rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN); - intel_context_exit(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_unlock; + err = -EINTR; + if (!mutex_lock_interruptible(&ce->timeline->mutex)) { + err = __intel_engine_pulse(engine); + mutex_unlock(&ce->timeline->mutex); } - __set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags); - idle_pulse(engine, rq); - - __i915_request_commit(rq); - __i915_request_queue(rq, &attr); - GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER); - err = 0; - -out_unlock: - mutex_unlock(&ce->timeline->mutex); -out_rpm: intel_engine_pm_put(engine); return err; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 8ec3eecf3e39..f7b2e07e2229 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -6,6 +6,7 @@ #include "i915_drv.h" +#include "intel_breadcrumbs.h" #include "intel_context.h" #include "intel_engine.h" #include "intel_engine_heartbeat.h" @@ -247,7 +248,7 @@ static int __engine_park(struct intel_wakeref *wf) call_idle_barriers(engine); /* cleanup after wedging */ intel_engine_park_heartbeat(engine); - intel_engine_disarm_breadcrumbs(engine); + intel_breadcrumbs_park(engine->breadcrumbs); /* Must be reset upon idling, or we may miss the busy wakeup. */ GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 8de92fd7d392..ee6312601c56 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -22,6 +22,7 @@ #include "i915_pmu.h" #include "i915_priolist_types.h" #include "i915_selftest.h" +#include "intel_breadcrumbs_types.h" #include "intel_sseu.h" #include "intel_timeline_types.h" #include "intel_uncore.h" @@ -277,7 +278,7 @@ struct intel_engine_execlists { * * Note these register may be either mmio or HWSP shadow. */ - u32 *csb_status; + u64 *csb_status; /** * @csb_size: context status buffer FIFO size @@ -373,34 +374,8 @@ struct intel_engine_cs { */ struct ewma__engine_latency latency; - /* Rather than have every client wait upon all user interrupts, - * with the herd waking after every interrupt and each doing the - * heavyweight seqno dance, we delegate the task (of being the - * bottom-half of the user interrupt) to the first client. After - * every interrupt, we wake up one client, who does the heavyweight - * coherent seqno read and either goes back to sleep (if incomplete), - * or wakes up all the completed clients in parallel, before then - * transferring the bottom-half status to the next client in the queue. - * - * Compared to walking the entire list of waiters in a single dedicated - * bottom-half, we reduce the latency of the first waiter by avoiding - * a context switch, but incur additional coherent seqno reads when - * following the chain of request breadcrumbs. Since it is most likely - * that we have a single client waiting on each seqno, then reducing - * the overhead of waking that client is much preferred. - */ - struct intel_breadcrumbs { - spinlock_t irq_lock; - struct list_head signalers; - - struct list_head signaled_requests; - - struct irq_work irq_work; /* for use from inside irq_lock */ - - unsigned int irq_enabled; - - bool irq_armed; - } breadcrumbs; + /* Keep track of all the seqno used, a trail of breadcrumbs */ + struct intel_breadcrumbs *breadcrumbs; struct intel_engine_pmu { /** diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 62979ea591f0..81c05f551b9c 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -78,8 +78,6 @@ int i915_ggtt_init_hw(struct drm_i915_private *i915) { int ret; - stash_init(&i915->mm.wc_stash); - /* * Note that we use page colouring to enforce a guard page at the * end of the address space. This is required as the CS may prefetch @@ -232,7 +230,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, /* Fill the allocated but "unused" space beyond the end of the buffer */ while (gte < end) - gen8_set_pte(gte++, vm->scratch[0].encode); + gen8_set_pte(gte++, vm->scratch[0]->encode); /* * We want to flush the TLBs only after we're certain all the PTE @@ -283,7 +281,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, /* Fill the allocated but "unused" space beyond the end of the buffer */ while (gte < end) - iowrite32(vm->scratch[0].encode, gte++); + iowrite32(vm->scratch[0]->encode, gte++); /* * We want to flush the TLBs only after we're certain all the PTE @@ -303,7 +301,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); unsigned int first_entry = start / I915_GTT_PAGE_SIZE; unsigned int num_entries = length / I915_GTT_PAGE_SIZE; - const gen8_pte_t scratch_pte = vm->scratch[0].encode; + const gen8_pte_t scratch_pte = vm->scratch[0]->encode; gen8_pte_t __iomem *gtt_base = (gen8_pte_t __iomem *)ggtt->gsm + first_entry; const int max_entries = ggtt_total_entries(ggtt) - first_entry; @@ -401,7 +399,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, first_entry, num_entries, max_entries)) num_entries = max_entries; - scratch_pte = vm->scratch[0].encode; + scratch_pte = vm->scratch[0]->encode; for (i = 0; i < num_entries; i++) iowrite32(scratch_pte, >t_base[i]); } @@ -436,16 +434,17 @@ static void i915_ggtt_clear_range(struct i915_address_space *vm, intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); } -static int ggtt_bind_vma(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) +static void ggtt_bind_vma(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) { struct drm_i915_gem_object *obj = vma->obj; u32 pte_flags; if (i915_vma_is_bound(vma, ~flags & I915_VMA_BIND_MASK)) - return 0; + return; /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ pte_flags = 0; @@ -454,8 +453,6 @@ static int ggtt_bind_vma(struct i915_address_space *vm, vm->insert_entries(vm, vma, cache_level, pte_flags); vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; - - return 0; } static void ggtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma) @@ -568,31 +565,25 @@ err: return ret; } -static int aliasing_gtt_bind_vma(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) +static void aliasing_gtt_bind_vma(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) { u32 pte_flags; - int ret; /* Currently applicable only to VLV */ pte_flags = 0; if (i915_gem_object_is_readonly(vma->obj)) pte_flags |= PTE_READ_ONLY; - if (flags & I915_VMA_LOCAL_BIND) { - struct i915_ppgtt *alias = i915_vm_to_ggtt(vm)->alias; - - ret = ppgtt_bind_vma(&alias->vm, vma, cache_level, flags); - if (ret) - return ret; - } + if (flags & I915_VMA_LOCAL_BIND) + ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm, + stash, vma, cache_level, flags); if (flags & I915_VMA_GLOBAL_BIND) vm->insert_entries(vm, vma, cache_level, pte_flags); - - return 0; } static void aliasing_gtt_unbind_vma(struct i915_address_space *vm, @@ -607,6 +598,7 @@ static void aliasing_gtt_unbind_vma(struct i915_address_space *vm, static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) { + struct i915_vm_pt_stash stash = {}; struct i915_ppgtt *ppgtt; int err; @@ -619,15 +611,21 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) goto err_ppgtt; } + err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, ggtt->vm.total); + if (err) + goto err_ppgtt; + + err = i915_vm_pin_pt_stash(&ppgtt->vm, &stash); + if (err) + goto err_stash; + /* * Note we only pre-allocate as far as the end of the global * GTT. On 48b / 4-level page-tables, the difference is very, * very significant! We have to preallocate as GVT/vgpu does * not like the page directory disappearing. */ - err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total); - if (err) - goto err_ppgtt; + ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total); ggtt->alias = ppgtt; ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags; @@ -638,8 +636,11 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma); ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; + i915_vm_free_pt_stash(&ppgtt->vm, &stash); return 0; +err_stash: + i915_vm_free_pt_stash(&ppgtt->vm, &stash); err_ppgtt: i915_vm_put(&ppgtt->vm); return err; @@ -715,18 +716,11 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) void i915_ggtt_driver_release(struct drm_i915_private *i915) { struct i915_ggtt *ggtt = &i915->ggtt; - struct pagevec *pvec; fini_aliasing_ppgtt(ggtt); intel_ggtt_fini_fences(ggtt); ggtt_cleanup_hw(ggtt); - - pvec = &i915->mm.wc_stash.pvec; - if (pvec->nr) { - set_pages_array_wb(pvec->pages, pvec->nr); - __pagevec_release(pvec); - } } static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) @@ -789,7 +783,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) return -ENOMEM; } - ret = setup_scratch_page(&ggtt->vm, GFP_DMA32); + ret = setup_scratch_page(&ggtt->vm); if (ret) { drm_err(&i915->drm, "Scratch setup failed\n"); /* iounmap will also get called at remove, but meh */ @@ -797,8 +791,8 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) return ret; } - ggtt->vm.scratch[0].encode = - ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]), + ggtt->vm.scratch[0]->encode = + ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]), I915_CACHE_NONE, 0); return 0; @@ -824,7 +818,7 @@ static void gen6_gmch_remove(struct i915_address_space *vm) struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); iounmap(ggtt->gsm); - cleanup_scratch_page(vm); + free_scratch(vm); } static struct resource pci_resource(struct pci_dev *pdev, int bar) @@ -852,6 +846,8 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) else size = gen8_get_total_gtt_size(snb_gmch_ctl); + ggtt->vm.alloc_pt_dma = alloc_pt_dma; + ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; ggtt->vm.cleanup = gen6_gmch_remove; ggtt->vm.insert_page = gen8_ggtt_insert_page; @@ -1000,6 +996,8 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) size = gen6_get_total_gtt_size(snb_gmch_ctl); ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; + ggtt->vm.alloc_pt_dma = alloc_pt_dma; + ggtt->vm.clear_range = nop_clear_range; if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915)) ggtt->vm.clear_range = gen6_ggtt_clear_range; @@ -1050,6 +1048,8 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) ggtt->gmadr = (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end); + ggtt->vm.alloc_pt_dma = alloc_pt_dma; + ggtt->do_idle_maps = needs_idle_maps(i915); ggtt->vm.insert_page = i915_ggtt_insert_page; ggtt->vm.insert_entries = i915_ggtt_insert_entries; @@ -1165,11 +1165,6 @@ void i915_ggtt_disable_guc(struct i915_ggtt *ggtt) ggtt->invalidate(ggtt); } -static unsigned int clear_bind(struct i915_vma *vma) -{ - return atomic_fetch_and(~I915_VMA_BIND_MASK, &vma->flags); -} - void i915_ggtt_resume(struct i915_ggtt *ggtt) { struct i915_vma *vma; @@ -1187,11 +1182,13 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt) /* clflush objects bound into the GGTT and rebind them. */ list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) { struct drm_i915_gem_object *obj = vma->obj; - unsigned int was_bound = clear_bind(vma); + unsigned int was_bound = + atomic_read(&vma->flags) & I915_VMA_BIND_MASK; - WARN_ON(i915_vma_bind(vma, - obj ? obj->cache_level : 0, - was_bound, NULL)); + GEM_BUG_ON(!was_bound); + vma->ops->bind_vma(&ggtt->vm, NULL, vma, + obj ? obj->cache_level : 0, + was_bound); if (obj) { /* only used during resume => exclusive access */ flush |= fetch_and_zero(&obj->write_domain); obj->read_domains |= I915_GEM_DOMAIN_GTT; @@ -1437,7 +1434,7 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) switch (vma->ggtt_view.type) { default: GEM_BUG_ON(vma->ggtt_view.type); - /* fall through */ + fallthrough; case I915_GGTT_VIEW_NORMAL: vma->pages = vma->obj->mm.pages; return 0; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index e0755f1a904b..39b428c5049c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -356,7 +356,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) goto err_unref; } - ret = i915_ggtt_pin(vma, 0, PIN_HIGH); + ret = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH); if (ret) goto err_unref; @@ -406,21 +406,20 @@ static int __engines_record_defaults(struct intel_gt *gt) /* We must be able to switch to something! */ GEM_BUG_ON(!engine->kernel_context); - err = intel_renderstate_init(&so, engine); - if (err) - goto out; - ce = intel_context_create(engine); if (IS_ERR(ce)) { err = PTR_ERR(ce); goto out; } - rq = intel_context_create_request(ce); + err = intel_renderstate_init(&so, ce); + if (err) + goto err; + + rq = i915_request_create(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); - intel_context_put(ce); - goto out; + goto err_fini; } err = intel_engine_emit_ctx_wa(rq); @@ -434,9 +433,13 @@ static int __engines_record_defaults(struct intel_gt *gt) err_rq: requests[id] = i915_request_get(rq); i915_request_add(rq); - intel_renderstate_fini(&so); - if (err) +err_fini: + intel_renderstate_fini(&so, ce); +err: + if (err) { + intel_context_put(ce); goto out; + } } /* Flush the default context image to memory, and enable powersaving. */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c index 418ae184cecf..104cb30e8c13 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c @@ -35,39 +35,65 @@ static void node_free(struct intel_gt_buffer_pool_node *node) { i915_gem_object_put(node->obj); i915_active_fini(&node->active); - kfree(node); + kfree_rcu(node, rcu); } -static void pool_free_work(struct work_struct *wrk) +static bool pool_free_older_than(struct intel_gt_buffer_pool *pool, long keep) { - struct intel_gt_buffer_pool *pool = - container_of(wrk, typeof(*pool), work.work); - struct intel_gt_buffer_pool_node *node, *next; - unsigned long old = jiffies - HZ; + struct intel_gt_buffer_pool_node *node, *stale = NULL; bool active = false; - LIST_HEAD(stale); int n; /* Free buffers that have not been used in the past second */ - spin_lock_irq(&pool->lock); for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) { struct list_head *list = &pool->cache_list[n]; - /* Most recent at head; oldest at tail */ - list_for_each_entry_safe_reverse(node, next, list, link) { - if (time_before(node->age, old)) - break; + if (list_empty(list)) + continue; + + if (spin_trylock_irq(&pool->lock)) { + struct list_head *pos; + + /* Most recent at head; oldest at tail */ + list_for_each_prev(pos, list) { + unsigned long age; + + node = list_entry(pos, typeof(*node), link); + + age = READ_ONCE(node->age); + if (!age || jiffies - age < keep) + break; + + /* Check we are the first to claim this node */ + if (!xchg(&node->age, 0)) + break; - list_move(&node->link, &stale); + node->free = stale; + stale = node; + } + if (!list_is_last(pos, list)) + __list_del_many(pos, list); + + spin_unlock_irq(&pool->lock); } + active |= !list_empty(list); } - spin_unlock_irq(&pool->lock); - list_for_each_entry_safe(node, next, &stale, link) + while ((node = stale)) { + stale = stale->free; node_free(node); + } + + return active; +} + +static void pool_free_work(struct work_struct *wrk) +{ + struct intel_gt_buffer_pool *pool = + container_of(wrk, typeof(*pool), work.work); - if (active) + if (pool_free_older_than(pool, HZ)) schedule_delayed_work(&pool->work, round_jiffies_up_relative(HZ)); } @@ -108,9 +134,10 @@ static void pool_retire(struct i915_active *ref) /* Return this object to the shrinker pool */ i915_gem_object_make_purgeable(node->obj); + GEM_BUG_ON(node->age); spin_lock_irqsave(&pool->lock, flags); - node->age = jiffies; - list_add(&node->link, list); + list_add_rcu(&node->link, list); + WRITE_ONCE(node->age, jiffies ?: 1); /* 0 reserved for active nodes */ spin_unlock_irqrestore(&pool->lock, flags); schedule_delayed_work(&pool->work, @@ -129,6 +156,7 @@ node_create(struct intel_gt_buffer_pool *pool, size_t sz) if (!node) return ERR_PTR(-ENOMEM); + node->age = 0; node->pool = pool; i915_active_init(&node->active, pool_active, pool_retire); @@ -151,20 +179,30 @@ intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size) struct intel_gt_buffer_pool *pool = >->buffer_pool; struct intel_gt_buffer_pool_node *node; struct list_head *list; - unsigned long flags; int ret; size = PAGE_ALIGN(size); list = bucket_for_size(pool, size); - spin_lock_irqsave(&pool->lock, flags); - list_for_each_entry(node, list, link) { + rcu_read_lock(); + list_for_each_entry_rcu(node, list, link) { + unsigned long age; + if (node->obj->base.size < size) continue; - list_del(&node->link); - break; + + age = READ_ONCE(node->age); + if (!age) + continue; + + if (cmpxchg(&node->age, age, 0) == age) { + spin_lock_irq(&pool->lock); + list_del_rcu(&node->link); + spin_unlock_irq(&pool->lock); + break; + } } - spin_unlock_irqrestore(&pool->lock, flags); + rcu_read_unlock(); if (&node->link == list) { node = node_create(pool, size); @@ -192,28 +230,13 @@ void intel_gt_init_buffer_pool(struct intel_gt *gt) INIT_DELAYED_WORK(&pool->work, pool_free_work); } -static void pool_free_imm(struct intel_gt_buffer_pool *pool) -{ - int n; - - spin_lock_irq(&pool->lock); - for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) { - struct intel_gt_buffer_pool_node *node, *next; - struct list_head *list = &pool->cache_list[n]; - - list_for_each_entry_safe(node, next, list, link) - node_free(node); - INIT_LIST_HEAD(list); - } - spin_unlock_irq(&pool->lock); -} - void intel_gt_flush_buffer_pool(struct intel_gt *gt) { struct intel_gt_buffer_pool *pool = >->buffer_pool; do { - pool_free_imm(pool); + while (pool_free_older_than(pool, 0)) + ; } while (cancel_delayed_work_sync(&pool->work)); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h index e28bdda771ed..bcf1658c9633 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h @@ -25,7 +25,11 @@ struct intel_gt_buffer_pool_node { struct i915_active active; struct drm_i915_gem_object *obj; struct list_head link; - struct intel_gt_buffer_pool *pool; + union { + struct intel_gt_buffer_pool *pool; + struct intel_gt_buffer_pool_node *free; + struct rcu_head rcu; + }; unsigned long age; }; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index b05da68e52f4..257063a57101 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -8,6 +8,7 @@ #include "i915_drv.h" #include "i915_irq.h" +#include "intel_breadcrumbs.h" #include "intel_gt.h" #include "intel_gt_irq.h" #include "intel_uncore.h" diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 2a72cce63fd9..3f1114b58b01 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -11,160 +11,24 @@ #include "intel_gt.h" #include "intel_gtt.h" -void stash_init(struct pagestash *stash) +struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz) { - pagevec_init(&stash->pvec); - spin_lock_init(&stash->lock); -} - -static struct page *stash_pop_page(struct pagestash *stash) -{ - struct page *page = NULL; - - spin_lock(&stash->lock); - if (likely(stash->pvec.nr)) - page = stash->pvec.pages[--stash->pvec.nr]; - spin_unlock(&stash->lock); - - return page; -} - -static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec) -{ - unsigned int nr; - - spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING); - - nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec)); - memcpy(stash->pvec.pages + stash->pvec.nr, - pvec->pages + pvec->nr - nr, - sizeof(pvec->pages[0]) * nr); - stash->pvec.nr += nr; - - spin_unlock(&stash->lock); - - pvec->nr -= nr; -} - -static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp) -{ - struct pagevec stack; - struct page *page; - if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) i915_gem_shrink_all(vm->i915); - page = stash_pop_page(&vm->free_pages); - if (page) - return page; - - if (!vm->pt_kmap_wc) - return alloc_page(gfp); - - /* Look in our global stash of WC pages... */ - page = stash_pop_page(&vm->i915->mm.wc_stash); - if (page) - return page; - - /* - * Otherwise batch allocate pages to amortize cost of set_pages_wc. - * - * We have to be careful as page allocation may trigger the shrinker - * (via direct reclaim) which will fill up the WC stash underneath us. - * So we add our WB pages into a temporary pvec on the stack and merge - * them into the WC stash after all the allocations are complete. - */ - pagevec_init(&stack); - do { - struct page *page; - - page = alloc_page(gfp); - if (unlikely(!page)) - break; - - stack.pages[stack.nr++] = page; - } while (pagevec_space(&stack)); - - if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) { - page = stack.pages[--stack.nr]; - - /* Merge spare WC pages to the global stash */ - if (stack.nr) - stash_push_pagevec(&vm->i915->mm.wc_stash, &stack); - - /* Push any surplus WC pages onto the local VM stash */ - if (stack.nr) - stash_push_pagevec(&vm->free_pages, &stack); - } - - /* Return unwanted leftovers */ - if (unlikely(stack.nr)) { - WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr)); - __pagevec_release(&stack); - } - - return page; + return i915_gem_object_create_internal(vm->i915, sz); } -static void vm_free_pages_release(struct i915_address_space *vm, - bool immediate) +int pin_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj) { - struct pagevec *pvec = &vm->free_pages.pvec; - struct pagevec stack; - - lockdep_assert_held(&vm->free_pages.lock); - GEM_BUG_ON(!pagevec_count(pvec)); - - if (vm->pt_kmap_wc) { - /* - * When we use WC, first fill up the global stash and then - * only if full immediately free the overflow. - */ - stash_push_pagevec(&vm->i915->mm.wc_stash, pvec); - - /* - * As we have made some room in the VM's free_pages, - * we can wait for it to fill again. Unless we are - * inside i915_address_space_fini() and must - * immediately release the pages! - */ - if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1)) - return; + int err; - /* - * We have to drop the lock to allow ourselves to sleep, - * so take a copy of the pvec and clear the stash for - * others to use it as we sleep. - */ - stack = *pvec; - pagevec_reinit(pvec); - spin_unlock(&vm->free_pages.lock); - - pvec = &stack; - set_pages_array_wb(pvec->pages, pvec->nr); - - spin_lock(&vm->free_pages.lock); - } + err = i915_gem_object_pin_pages(obj); + if (err) + return err; - __pagevec_release(pvec); -} - -static void vm_free_page(struct i915_address_space *vm, struct page *page) -{ - /* - * On !llc, we need to change the pages back to WB. We only do so - * in bulk, so we rarely need to change the page attributes here, - * but doing so requires a stop_machine() from deep inside arch/x86/mm. - * To make detection of the possible sleep more likely, use an - * unconditional might_sleep() for everybody. - */ - might_sleep(); - spin_lock(&vm->free_pages.lock); - while (!pagevec_space(&vm->free_pages.pvec)) - vm_free_pages_release(vm, false); - GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE); - pagevec_add(&vm->free_pages.pvec, page); - spin_unlock(&vm->free_pages.lock); + i915_gem_object_make_unshrinkable(obj); + return 0; } void __i915_vm_close(struct i915_address_space *vm) @@ -194,14 +58,7 @@ void __i915_vm_close(struct i915_address_space *vm) void i915_address_space_fini(struct i915_address_space *vm) { - spin_lock(&vm->free_pages.lock); - if (pagevec_count(&vm->free_pages.pvec)) - vm_free_pages_release(vm, true); - GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec)); - spin_unlock(&vm->free_pages.lock); - drm_mm_takedown(&vm->mm); - mutex_destroy(&vm->mutex); } @@ -246,8 +103,6 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) drm_mm_init(&vm->mm, 0, vm->total); vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; - stash_init(&vm->free_pages); - INIT_LIST_HEAD(&vm->bound_list); } @@ -264,64 +119,50 @@ void clear_pages(struct i915_vma *vma) memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); } -static int __setup_page_dma(struct i915_address_space *vm, - struct i915_page_dma *p, - gfp_t gfp) -{ - p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL); - if (unlikely(!p->page)) - return -ENOMEM; - - p->daddr = dma_map_page_attrs(vm->dma, - p->page, 0, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL, - DMA_ATTR_SKIP_CPU_SYNC | - DMA_ATTR_NO_WARN); - if (unlikely(dma_mapping_error(vm->dma, p->daddr))) { - vm_free_page(vm, p->page); - return -ENOMEM; - } - - return 0; -} - -int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p) +dma_addr_t __px_dma(struct drm_i915_gem_object *p) { - return __setup_page_dma(vm, p, __GFP_HIGHMEM); + GEM_BUG_ON(!i915_gem_object_has_pages(p)); + return sg_dma_address(p->mm.pages->sgl); } -void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p) +struct page *__px_page(struct drm_i915_gem_object *p) { - dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - vm_free_page(vm, p->page); + GEM_BUG_ON(!i915_gem_object_has_pages(p)); + return sg_page(p->mm.pages->sgl); } void -fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count) +fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count) { - kunmap_atomic(memset64(kmap_atomic(p->page), val, count)); + struct page *page = __px_page(p); + void *vaddr; + + vaddr = kmap(page); + memset64(vaddr, val, count); + clflush_cache_range(vaddr, PAGE_SIZE); + kunmap(page); } -static void poison_scratch_page(struct page *page, unsigned long size) +static void poison_scratch_page(struct drm_i915_gem_object *scratch) { - if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - return; + struct sgt_iter sgt; + struct page *page; + u8 val; - GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); + val = 0; + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + val = POISON_FREE; - do { + for_each_sgt_page(page, sgt, scratch->mm.pages) { void *vaddr; vaddr = kmap(page); - memset(vaddr, POISON_FREE, PAGE_SIZE); + memset(vaddr, val, PAGE_SIZE); kunmap(page); - - page = pfn_to_page(page_to_pfn(page) + 1); - size -= PAGE_SIZE; - } while (size); + } } -int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) +int setup_scratch_page(struct i915_address_space *vm) { unsigned long size; @@ -338,21 +179,27 @@ int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) */ size = I915_GTT_PAGE_SIZE_4K; if (i915_vm_is_4lvl(vm) && - HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) { + HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) size = I915_GTT_PAGE_SIZE_64K; - gfp |= __GFP_NOWARN; - } - gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL; do { - unsigned int order = get_order(size); - struct page *page; - dma_addr_t addr; + struct drm_i915_gem_object *obj; - page = alloc_pages(gfp, order); - if (unlikely(!page)) + obj = vm->alloc_pt_dma(vm, size); + if (IS_ERR(obj)) goto skip; + if (pin_pt_dma(vm, obj)) + goto skip_obj; + + /* We need a single contiguous page for our scratch */ + if (obj->mm.page_sizes.sg < size) + goto skip_obj; + + /* And it needs to be correspondingly aligned */ + if (__px_dma(obj) & (size - 1)) + goto skip_obj; + /* * Use a non-zero scratch page for debugging. * @@ -362,61 +209,28 @@ int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) * should it ever be accidentally used, the effect should be * fairly benign. */ - poison_scratch_page(page, size); - - addr = dma_map_page_attrs(vm->dma, - page, 0, size, - PCI_DMA_BIDIRECTIONAL, - DMA_ATTR_SKIP_CPU_SYNC | - DMA_ATTR_NO_WARN); - if (unlikely(dma_mapping_error(vm->dma, addr))) - goto free_page; - - if (unlikely(!IS_ALIGNED(addr, size))) - goto unmap_page; - - vm->scratch[0].base.page = page; - vm->scratch[0].base.daddr = addr; - vm->scratch_order = order; + poison_scratch_page(obj); + + vm->scratch[0] = obj; + vm->scratch_order = get_order(size); return 0; -unmap_page: - dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL); -free_page: - __free_pages(page, order); +skip_obj: + i915_gem_object_put(obj); skip: if (size == I915_GTT_PAGE_SIZE_4K) return -ENOMEM; size = I915_GTT_PAGE_SIZE_4K; - gfp &= ~__GFP_NOWARN; } while (1); } -void cleanup_scratch_page(struct i915_address_space *vm) -{ - struct i915_page_dma *p = px_base(&vm->scratch[0]); - unsigned int order = vm->scratch_order; - - dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT, - PCI_DMA_BIDIRECTIONAL); - __free_pages(p->page, order); -} - void free_scratch(struct i915_address_space *vm) { int i; - if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */ - return; - - for (i = 1; i <= vm->top; i++) { - if (!px_dma(&vm->scratch[i])) - break; - cleanup_page_dma(vm, px_base(&vm->scratch[i])); - } - - cleanup_scratch_page(vm); + for (i = 0; i <= vm->top; i++) + i915_gem_object_put(vm->scratch[i]); } void gtt_write_workarounds(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index f2b75078e05f..c13c650ced22 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -134,38 +134,29 @@ typedef u64 gen8_pte_t; #define GEN8_PDE_IPS_64K BIT(11) #define GEN8_PDE_PS_2M BIT(7) +enum i915_cache_level; + +struct drm_i915_file_private; +struct drm_i915_gem_object; struct i915_fence_reg; +struct i915_vma; +struct intel_gt; #define for_each_sgt_daddr(__dp, __iter, __sgt) \ __for_each_sgt_daddr(__dp, __iter, __sgt, I915_GTT_PAGE_SIZE) -struct i915_page_dma { - struct page *page; +struct i915_page_table { + struct drm_i915_gem_object *base; union { - dma_addr_t daddr; - - /* - * For gen6/gen7 only. This is the offset in the GGTT - * where the page directory entries for PPGTT begin - */ - u32 ggtt_offset; + atomic_t used; + struct i915_page_table *stash; }; }; -struct i915_page_scratch { - struct i915_page_dma base; - u64 encode; -}; - -struct i915_page_table { - struct i915_page_dma base; - atomic_t used; -}; - struct i915_page_directory { struct i915_page_table pt; spinlock_t lock; - void *entry[512]; + void **entry; }; #define __px_choose_expr(x, type, expr, other) \ @@ -176,12 +167,14 @@ struct i915_page_directory { other) #define px_base(px) \ - __px_choose_expr(px, struct i915_page_dma *, __x, \ - __px_choose_expr(px, struct i915_page_scratch *, &__x->base, \ - __px_choose_expr(px, struct i915_page_table *, &__x->base, \ - __px_choose_expr(px, struct i915_page_directory *, &__x->pt.base, \ - (void)0)))) -#define px_dma(px) (px_base(px)->daddr) + __px_choose_expr(px, struct drm_i915_gem_object *, __x, \ + __px_choose_expr(px, struct i915_page_table *, __x->base, \ + __px_choose_expr(px, struct i915_page_directory *, __x->pt.base, \ + (void)0))) + +struct page *__px_page(struct drm_i915_gem_object *p); +dma_addr_t __px_dma(struct drm_i915_gem_object *p); +#define px_dma(px) (__px_dma(px_base(px))) #define px_pt(px) \ __px_choose_expr(px, struct i915_page_table *, __x, \ @@ -189,19 +182,18 @@ struct i915_page_directory { (void)0)) #define px_used(px) (&px_pt(px)->used) -enum i915_cache_level; - -struct drm_i915_file_private; -struct drm_i915_gem_object; -struct i915_vma; -struct intel_gt; +struct i915_vm_pt_stash { + /* preallocated chains of page tables/directories */ + struct i915_page_table *pt[2]; +}; struct i915_vma_ops { /* Map an object into an address space with the given cache flags. */ - int (*bind_vma)(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags); + void (*bind_vma)(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags); /* * Unmap an object from an address space. This usually consists of * setting the valid PTE entries to a reserved scratch page. @@ -213,13 +205,6 @@ struct i915_vma_ops { void (*clear_pages)(struct i915_vma *vma); }; -struct pagestash { - spinlock_t lock; - struct pagevec pvec; -}; - -void stash_init(struct pagestash *stash); - struct i915_address_space { struct kref ref; struct rcu_work rcu; @@ -256,33 +241,33 @@ struct i915_address_space { #define VM_CLASS_GGTT 0 #define VM_CLASS_PPGTT 1 - struct i915_page_scratch scratch[4]; - unsigned int scratch_order; - unsigned int top; - + struct drm_i915_gem_object *scratch[4]; /** * List of vma currently bound. */ struct list_head bound_list; - struct pagestash free_pages; - /* Global GTT */ bool is_ggtt:1; - /* Some systems require uncached updates of the page directories */ - bool pt_kmap_wc:1; - /* Some systems support read-only mappings for GGTT and/or PPGTT */ bool has_read_only:1; + u8 top; + u8 pd_shift; + u8 scratch_order; + + struct drm_i915_gem_object * + (*alloc_pt_dma)(struct i915_address_space *vm, int sz); + u64 (*pte_encode)(dma_addr_t addr, enum i915_cache_level level, u32 flags); /* Create a valid PTE */ #define PTE_READ_ONLY BIT(0) - int (*allocate_va_range)(struct i915_address_space *vm, - u64 start, u64 length); + void (*allocate_va_range)(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + u64 start, u64 length); void (*clear_range)(struct i915_address_space *vm, u64 start, u64 length); void (*insert_page)(struct i915_address_space *vm, @@ -490,9 +475,9 @@ i915_pd_entry(const struct i915_page_directory * const pdp, static inline dma_addr_t i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n) { - struct i915_page_dma *pt = ppgtt->pd->entry[n]; + struct i915_page_table *pt = ppgtt->pd->entry[n]; - return px_dma(pt ?: px_base(&ppgtt->vm.scratch[ppgtt->vm.top])); + return __px_dma(pt ? px_base(pt) : ppgtt->vm.scratch[ppgtt->vm.top]); } void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt); @@ -517,13 +502,10 @@ struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt); void i915_ggtt_suspend(struct i915_ggtt *gtt); void i915_ggtt_resume(struct i915_ggtt *ggtt); -int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p); -void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p); - -#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page) +#define kmap_atomic_px(px) kmap_atomic(__px_page(px_base(px))) void -fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count); +fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count); #define fill_px(px, v) fill_page_dma(px_base(px), (v), PAGE_SIZE / sizeof(u64)) #define fill32_px(px, v) do { \ @@ -531,47 +513,51 @@ fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count); fill_px((px), v__ << 32 | v__); \ } while (0) -int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp); -void cleanup_scratch_page(struct i915_address_space *vm); +int setup_scratch_page(struct i915_address_space *vm); void free_scratch(struct i915_address_space *vm); +struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz); struct i915_page_table *alloc_pt(struct i915_address_space *vm); struct i915_page_directory *alloc_pd(struct i915_address_space *vm); -struct i915_page_directory *__alloc_pd(size_t sz); +struct i915_page_directory *__alloc_pd(int npde); -void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd); +int pin_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj); -#define free_px(vm, px) free_pd(vm, px_base(px)) +void free_px(struct i915_address_space *vm, + struct i915_page_table *pt, int lvl); +#define free_pt(vm, px) free_px(vm, px, 0) +#define free_pd(vm, px) free_px(vm, px_pt(px), 1) void __set_pd_entry(struct i915_page_directory * const pd, const unsigned short idx, - struct i915_page_dma * const to, + struct i915_page_table *pt, u64 (*encode)(const dma_addr_t, const enum i915_cache_level)); #define set_pd_entry(pd, idx, to) \ - __set_pd_entry((pd), (idx), px_base(to), gen8_pde_encode) + __set_pd_entry((pd), (idx), px_pt(to), gen8_pde_encode) void clear_pd_entry(struct i915_page_directory * const pd, const unsigned short idx, - const struct i915_page_scratch * const scratch); + const struct drm_i915_gem_object * const scratch); bool release_pd_entry(struct i915_page_directory * const pd, const unsigned short idx, struct i915_page_table * const pt, - const struct i915_page_scratch * const scratch); + const struct drm_i915_gem_object * const scratch); void gen6_ggtt_invalidate(struct i915_ggtt *ggtt); int ggtt_set_pages(struct i915_vma *vma); int ppgtt_set_pages(struct i915_vma *vma); void clear_pages(struct i915_vma *vma); -int ppgtt_bind_vma(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags); +void ppgtt_bind_vma(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags); void ppgtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma); @@ -579,6 +565,14 @@ void gtt_write_workarounds(struct intel_gt *gt); void setup_private_pat(struct intel_uncore *uncore); +int i915_vm_alloc_pt_stash(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + u64 size); +int i915_vm_pin_pt_stash(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash); +void i915_vm_free_pt_stash(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash); + static inline struct sgt_dma { struct scatterlist *sg; dma_addr_t dma, max; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 24322ef08aa4..9bb16bdf93cf 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -137,6 +137,7 @@ #include "i915_perf.h" #include "i915_trace.h" #include "i915_vgpu.h" +#include "intel_breadcrumbs.h" #include "intel_context.h" #include "intel_engine_pm.h" #include "intel_gt.h" @@ -1139,29 +1140,14 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) /* Check in case we rollback so far we wrap [size/2] */ if (intel_ring_direction(rq->ring, - intel_ring_wrap(rq->ring, - rq->tail), - rq->ring->tail) > 0) + rq->tail, + rq->ring->tail + 8) > 0) rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE; active = rq; } else { struct intel_engine_cs *owner = rq->context->engine; - /* - * Decouple the virtual breadcrumb before moving it - * back to the virtual engine -- we don't want the - * request to complete in the background and try - * and cancel the breadcrumb on the virtual engine - * (instead of the old engine where it is linked)! - */ - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, - &rq->fence.flags)) { - spin_lock_nested(&rq->lock, - SINGLE_DEPTH_NESTING); - i915_request_cancel_breadcrumb(rq); - spin_unlock(&rq->lock); - } WRITE_ONCE(rq->engine, owner); owner->submit_request(rq); active = NULL; @@ -1819,16 +1805,31 @@ static bool virtual_matches(const struct virtual_engine *ve, return true; } -static void virtual_xfer_breadcrumbs(struct virtual_engine *ve) +static void virtual_xfer_context(struct virtual_engine *ve, + struct intel_engine_cs *engine) { + unsigned int n; + + if (likely(engine == ve->siblings[0])) + return; + + GEM_BUG_ON(READ_ONCE(ve->context.inflight)); + if (!intel_engine_has_relative_mmio(engine)) + virtual_update_register_offsets(ve->context.lrc_reg_state, + engine); + /* - * All the outstanding signals on ve->siblings[0] must have - * been completed, just pending the interrupt handler. As those - * signals still refer to the old sibling (via rq->engine), we must - * transfer those to the old irq_worker to keep our locking - * consistent. + * Move the bound engine to the top of the list for + * future execution. We then kick this tasklet first + * before checking others, so that we preferentially + * reuse this set of bound registers. */ - intel_engine_transfer_stale_breadcrumbs(ve->siblings[0], &ve->context); + for (n = 1; n < ve->num_siblings; n++) { + if (ve->siblings[n] == engine) { + swap(ve->siblings[n], ve->siblings[0]); + break; + } + } } #define for_each_waiter(p__, rq__) \ @@ -2060,6 +2061,14 @@ static inline void clear_ports(struct i915_request **ports, int count) memset_p((void **)ports, NULL, count); } +static inline void +copy_ports(struct i915_request **dst, struct i915_request **src, int count) +{ + /* A memcpy_p() would be very useful here! */ + while (count--) + WRITE_ONCE(*dst++, *src++); /* avoid write tearing */ +} + static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -2271,38 +2280,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(!(rq->execution_mask & engine->mask)); WRITE_ONCE(rq->engine, engine); - if (engine != ve->siblings[0]) { - u32 *regs = ve->context.lrc_reg_state; - unsigned int n; - - GEM_BUG_ON(READ_ONCE(ve->context.inflight)); - - if (!intel_engine_has_relative_mmio(engine)) - virtual_update_register_offsets(regs, - engine); - - if (!list_empty(&ve->context.signals)) - virtual_xfer_breadcrumbs(ve); - + if (__i915_request_submit(rq)) { /* - * Move the bound engine to the top of the list - * for future execution. We then kick this - * tasklet first before checking others, so that - * we preferentially reuse this set of bound - * registers. + * Only after we confirm that we will submit + * this request (i.e. it has not already + * completed), do we want to update the context. + * + * This serves two purposes. It avoids + * unnecessary work if we are resubmitting an + * already completed request after timeslicing. + * But more importantly, it prevents us altering + * ve->siblings[] on an idle context, where + * we may be using ve->siblings[] in + * virtual_context_enter / virtual_context_exit. */ - for (n = 1; n < ve->num_siblings; n++) { - if (ve->siblings[n] == engine) { - swap(ve->siblings[n], - ve->siblings[0]); - break; - } - } - + virtual_xfer_context(ve, engine); GEM_BUG_ON(ve->siblings[0] != engine); - } - if (__i915_request_submit(rq)) { submit = true; last = rq; } @@ -2469,7 +2463,7 @@ cancel_port_requests(struct intel_engine_execlists * const execlists) } static inline void -invalidate_csb_entries(const u32 *first, const u32 *last) +invalidate_csb_entries(const u64 *first, const u64 *last) { clflush((void *)first); clflush((void *)last); @@ -2501,14 +2495,25 @@ invalidate_csb_entries(const u32 *first, const u32 *last) * bits 47-57: sw context id of the lrc the GT switched away from * bits 58-63: sw counter of the lrc the GT switched away from */ -static inline bool -gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) -{ - u32 lower_dw = csb[0]; - u32 upper_dw = csb[1]; - bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw); - bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw); - bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE; +static inline bool gen12_csb_parse(const u64 *csb) +{ + bool ctx_away_valid; + bool new_queue; + u64 entry; + + /* HSD#22011248461 */ + entry = READ_ONCE(*csb); + if (unlikely(entry == -1)) { + preempt_disable(); + if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50)) + GEM_WARN_ON("50us CSB timeout"); + preempt_enable(); + } + WRITE_ONCE(*(u64 *)csb, -1); + + ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry)); + new_queue = + lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE; /* * The context switch detail is not guaranteed to be 5 when a preemption @@ -2518,7 +2523,7 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) * would require some extra handling, but we don't support that. */ if (!ctx_away_valid || new_queue) { - GEM_BUG_ON(!ctx_to_valid); + GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry))); return true; } @@ -2527,12 +2532,11 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) * context switch on an unsuccessful wait instruction since we always * use polling mode. */ - GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw)); + GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry))); return false; } -static inline bool -gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) +static inline bool gen8_csb_parse(const u64 *csb) { return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED); } @@ -2540,7 +2544,7 @@ gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) static void process_csb(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; - const u32 * const buf = execlists->csb_status; + const u64 * const buf = execlists->csb_status; const u8 num_entries = execlists->csb_size; u8 head, tail; @@ -2621,12 +2625,14 @@ static void process_csb(struct intel_engine_cs *engine) */ ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n", - head, buf[2 * head + 0], buf[2 * head + 1]); + head, + upper_32_bits(buf[head]), + lower_32_bits(buf[head])); if (INTEL_GEN(engine->i915) >= 12) - promote = gen12_csb_parse(execlists, buf + 2 * head); + promote = gen12_csb_parse(buf + head); else - promote = gen8_csb_parse(execlists, buf + 2 * head); + promote = gen8_csb_parse(buf + head); if (promote) { struct i915_request * const *old = execlists->active; @@ -2648,13 +2654,15 @@ static void process_csb(struct intel_engine_cs *engine) /* switch pending to inflight */ GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); - memcpy(execlists->inflight, - execlists->pending, - execlists_num_ports(execlists) * - sizeof(*execlists->pending)); + copy_ports(execlists->inflight, + execlists->pending, + execlists_num_ports(execlists)); smp_wmb(); /* complete the seqlock */ WRITE_ONCE(execlists->active, execlists->inflight); + /* XXX Magic delay for tgl */ + ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); + WRITE_ONCE(execlists->pending[0], NULL); } else { if (GEM_WARN_ON(!*execlists->active)) { @@ -3309,7 +3317,10 @@ static void execlists_context_unpin(struct intel_context *ce) { check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, ce->engine); +} +static void execlists_context_post_unpin(struct intel_context *ce) +{ i915_gem_object_unpin_map(ce->state->obj); } @@ -3471,20 +3482,24 @@ __execlists_update_reg_state(const struct intel_context *ce, } static int -__execlists_context_pin(struct intel_context *ce, - struct intel_engine_cs *engine) +execlists_context_pre_pin(struct intel_context *ce, + struct i915_gem_ww_ctx *ww, void **vaddr) { - void *vaddr; - GEM_BUG_ON(!ce->state); GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); - vaddr = i915_gem_object_pin_map(ce->state->obj, - i915_coherent_map_type(engine->i915) | + *vaddr = i915_gem_object_pin_map(ce->state->obj, + i915_coherent_map_type(ce->engine->i915) | I915_MAP_OVERRIDE); - if (IS_ERR(vaddr)) - return PTR_ERR(vaddr); + return PTR_ERR_OR_ZERO(*vaddr); +} + +static int +__execlists_context_pin(struct intel_context *ce, + struct intel_engine_cs *engine, + void *vaddr) +{ ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE; ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET; __execlists_update_reg_state(ce, engine, ce->ring->tail); @@ -3492,9 +3507,9 @@ __execlists_context_pin(struct intel_context *ce, return 0; } -static int execlists_context_pin(struct intel_context *ce) +static int execlists_context_pin(struct intel_context *ce, void *vaddr) { - return __execlists_context_pin(ce, ce->engine); + return __execlists_context_pin(ce, ce->engine, vaddr); } static int execlists_context_alloc(struct intel_context *ce) @@ -3520,8 +3535,10 @@ static void execlists_context_reset(struct intel_context *ce) static const struct intel_context_ops execlists_context_ops = { .alloc = execlists_context_alloc, + .pre_pin = execlists_context_pre_pin, .pin = execlists_context_pin, .unpin = execlists_context_unpin, + .post_unpin = execlists_context_post_unpin, .enter = intel_context_enter_engine, .exit = intel_context_exit_engine, @@ -3530,6 +3547,19 @@ static const struct intel_context_ops execlists_context_ops = { .destroy = execlists_context_destroy, }; +static u32 hwsp_offset(const struct i915_request *rq) +{ + const struct intel_timeline_cacheline *cl; + + /* Before the request is executed, the timeline/cachline is fixed */ + + cl = rcu_dereference_protected(rq->hwsp_cacheline, 1); + if (cl) + return cl->ggtt_offset; + + return rcu_dereference_protected(rq->timeline, 1)->hwsp_offset; +} + static int gen8_emit_init_breadcrumb(struct i915_request *rq) { u32 *cs; @@ -3552,7 +3582,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) *cs++ = MI_NOOP; *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = i915_request_timeline(rq)->hwsp_offset; + *cs++ = hwsp_offset(rq); *cs++ = 0; *cs++ = rq->fence.seqno - 1; @@ -3885,7 +3915,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) goto err; } - err = i915_ggtt_pin(vma, 0, PIN_HIGH); + err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH); if (err) goto err; @@ -4002,6 +4032,8 @@ static void reset_csb_pointers(struct intel_engine_cs *engine) WRITE_ONCE(*execlists->csb_write, reset_value); wmb(); /* Make sure this is visible to HW (paranoia?) */ + /* Check that the GPU does indeed update the CSB entries! */ + memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64)); invalidate_csb_entries(&execlists->csb_status[0], &execlists->csb_status[reset_value]); @@ -4126,7 +4158,7 @@ static int execlists_resume(struct intel_engine_cs *engine) { intel_mocs_init_engine(engine); - intel_engine_reset_breadcrumbs(engine); + intel_breadcrumbs_reset(engine->breadcrumbs); if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) { struct drm_printer p = drm_debug_printer(__func__); @@ -4555,7 +4587,7 @@ static int gen8_emit_flush_render(struct i915_request *request, vf_flush_wa = true; /* WaForGAMHang:kbl */ - if (IS_KBL_REVID(request->engine->i915, 0, KBL_REVID_B0)) + if (IS_KBL_GT_REVID(request->engine->i915, 0, KBL_REVID_B0)) dc_flush_wa = true; } @@ -4757,14 +4789,21 @@ static int gen12_emit_flush(struct i915_request *request, u32 mode) intel_engine_mask_t aux_inv = 0; u32 cmd, *cs; + cmd = 4; + if (mode & EMIT_INVALIDATE) + cmd += 2; if (mode & EMIT_INVALIDATE) aux_inv = request->engine->mask & ~BIT(BCS0); + if (aux_inv) + cmd += 2 * hweight8(aux_inv) + 2; - cs = intel_ring_begin(request, - 4 + (aux_inv ? 2 * hweight8(aux_inv) + 2 : 0)); + cs = intel_ring_begin(request, cmd); if (IS_ERR(cs)) return PTR_ERR(cs); + if (mode & EMIT_INVALIDATE) + *cs++ = preparser_disable(true); + cmd = MI_FLUSH_DW + 1; /* We always require a command barrier so that subsequent @@ -4797,6 +4836,10 @@ static int gen12_emit_flush(struct i915_request *request, u32 mode) } *cs++ = MI_NOOP; } + + if (mode & EMIT_INVALIDATE) + *cs++ = preparser_disable(false); + intel_ring_advance(request, cs); return 0; @@ -4856,11 +4899,9 @@ gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs) return gen8_emit_wa_tail(request, cs); } -static u32 *emit_xcs_breadcrumb(struct i915_request *request, u32 *cs) +static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs) { - u32 addr = i915_request_active_timeline(request)->hwsp_offset; - - return gen8_emit_ggtt_write(cs, request->fence.seqno, addr, 0); + return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0); } static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs) @@ -4879,7 +4920,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */ cs = gen8_emit_ggtt_write_rcs(cs, request->fence.seqno, - i915_request_active_timeline(request)->hwsp_offset, + hwsp_offset(request), PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL); @@ -4891,7 +4932,7 @@ gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) { cs = gen8_emit_ggtt_write_rcs(cs, request->fence.seqno, - i915_request_active_timeline(request)->hwsp_offset, + hwsp_offset(request), PIPE_CONTROL_CS_STALL | PIPE_CONTROL_TILE_CACHE_FLUSH | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | @@ -4953,7 +4994,9 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs) static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs) { - return gen12_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs)); + /* XXX Stalling flush before seqno write; post-sync not */ + cs = emit_xcs_breadcrumb(rq, __gen8_emit_flush_dw(cs, 0, 0, 0)); + return gen12_emit_fini_breadcrumb_tail(rq, cs); } static u32 * @@ -4961,7 +5004,7 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) { cs = gen12_emit_ggtt_write_rcs(cs, request->fence.seqno, - i915_request_active_timeline(request)->hwsp_offset, + hwsp_offset(request), PIPE_CONTROL0_HDC_PIPELINE_FLUSH, PIPE_CONTROL_CS_STALL | PIPE_CONTROL_TILE_CACHE_FLUSH | @@ -5143,7 +5186,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) } execlists->csb_status = - &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; + (u64 *)&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; execlists->csb_write = &engine->status_page.addr[intel_hws_csb_write_index(i915)]; @@ -5295,6 +5338,14 @@ populate_lr_context(struct intel_context *ce, return 0; } +static struct intel_timeline *pinned_timeline(struct intel_context *ce) +{ + struct intel_timeline *tl = fetch_and_zero(&ce->timeline); + + return intel_timeline_create_from_engine(ce->engine, + page_unmask_bits(tl)); +} + static int __execlists_context_alloc(struct intel_context *ce, struct intel_engine_cs *engine) { @@ -5325,19 +5376,17 @@ static int __execlists_context_alloc(struct intel_context *ce, goto error_deref_obj; } - if (!ce->timeline) { + if (!page_mask_bits(ce->timeline)) { struct intel_timeline *tl; - struct i915_vma *hwsp; /* * Use the static global HWSP for the kernel context, and * a dynamically allocated cacheline for everyone else. */ - hwsp = NULL; - if (unlikely(intel_context_is_barrier(ce))) - hwsp = engine->status_page.vma; - - tl = intel_timeline_create(engine->gt, hwsp); + if (unlikely(ce->timeline)) + tl = pinned_timeline(ce); + else + tl = intel_timeline_create(engine->gt); if (IS_ERR(tl)) { ret = PTR_ERR(tl); goto error_deref_obj; @@ -5408,6 +5457,7 @@ static void virtual_context_destroy(struct kref *kref) __execlists_context_fini(&ve->context); intel_context_fini(&ve->context); + intel_breadcrumbs_free(ve->base.breadcrumbs); intel_engine_free_request_pool(&ve->base); kfree(ve->bonds); @@ -5443,12 +5493,12 @@ static int virtual_context_alloc(struct intel_context *ce) return __execlists_context_alloc(ce, ve->siblings[0]); } -static int virtual_context_pin(struct intel_context *ce) +static int virtual_context_pin(struct intel_context *ce, void *vaddr) { struct virtual_engine *ve = container_of(ce, typeof(*ve), context); /* Note: we must use a real engine class for setting up reg state */ - return __execlists_context_pin(ce, ve->siblings[0]); + return __execlists_context_pin(ce, ve->siblings[0], vaddr); } static void virtual_context_enter(struct intel_context *ce) @@ -5476,8 +5526,10 @@ static void virtual_context_exit(struct intel_context *ce) static const struct intel_context_ops virtual_context_ops = { .alloc = virtual_context_alloc, + .pre_pin = execlists_context_pre_pin, .pin = virtual_context_pin, .unpin = execlists_context_unpin, + .post_unpin = execlists_context_post_unpin, .enter = virtual_context_enter, .exit = virtual_context_exit, @@ -5711,9 +5763,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); - intel_engine_init_breadcrumbs(&ve->base); intel_engine_init_execlists(&ve->base); - ve->base.breadcrumbs.irq_armed = true; /* fake HW, used for irq_work */ ve->base.cops = &virtual_context_ops; ve->base.request_alloc = execlists_request_alloc; @@ -5730,6 +5780,12 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, intel_context_init(&ve->context, &ve->base); + ve->base.breadcrumbs = intel_breadcrumbs_create(NULL); + if (!ve->base.breadcrumbs) { + err = -ENOMEM; + goto err_put; + } + for (n = 0; n < count; n++) { struct intel_engine_cs *sibling = siblings[n]; diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 632e08a4592b..313e51e7d4f7 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -234,11 +234,18 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { L3_1_UC) static const struct drm_i915_mocs_entry tgl_mocs_table[] = { - /* Base - Error (Reserved for Non-Use) */ - MOCS_ENTRY(0, 0x0, 0x0), - /* Base - Reserved */ - MOCS_ENTRY(1, 0x0, 0x0), - + /* + * NOTE: + * Reserved and unspecified MOCS indices have been set to (L3 + LCC). + * These reserved entries should never be used, they may be changed + * to low performant variants with better coherency in the future if + * more entries are needed. We are programming index I915_MOCS_PTE(1) + * only, __init_mocs_table() take care to program unused index with + * this entry. + */ + MOCS_ENTRY(I915_MOCS_PTE, + LE_0_PAGETABLE | LE_TC_0_PAGETABLE, + L3_1_UC), GEN11_MOCS_ENTRIES, /* Implicitly enable L1 - HDC:L1 + L3 + LLC */ diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c index f0862e924d11..46d9aceda64c 100644 --- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c @@ -18,7 +18,8 @@ struct i915_page_table *alloc_pt(struct i915_address_space *vm) if (unlikely(!pt)) return ERR_PTR(-ENOMEM); - if (unlikely(setup_page_dma(vm, &pt->base))) { + pt->base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); + if (IS_ERR(pt->base)) { kfree(pt); return ERR_PTR(-ENOMEM); } @@ -27,14 +28,20 @@ struct i915_page_table *alloc_pt(struct i915_address_space *vm) return pt; } -struct i915_page_directory *__alloc_pd(size_t sz) +struct i915_page_directory *__alloc_pd(int count) { struct i915_page_directory *pd; - pd = kzalloc(sz, I915_GFP_ALLOW_FAIL); + pd = kzalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL); if (unlikely(!pd)) return NULL; + pd->entry = kcalloc(count, sizeof(*pd->entry), I915_GFP_ALLOW_FAIL); + if (unlikely(!pd->entry)) { + kfree(pd); + return NULL; + } + spin_lock_init(&pd->lock); return pd; } @@ -43,11 +50,13 @@ struct i915_page_directory *alloc_pd(struct i915_address_space *vm) { struct i915_page_directory *pd; - pd = __alloc_pd(sizeof(*pd)); + pd = __alloc_pd(I915_PDES); if (unlikely(!pd)) return ERR_PTR(-ENOMEM); - if (unlikely(setup_page_dma(vm, px_base(pd)))) { + pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); + if (IS_ERR(pd->pt.base)) { + kfree(pd->entry); kfree(pd); return ERR_PTR(-ENOMEM); } @@ -55,41 +64,52 @@ struct i915_page_directory *alloc_pd(struct i915_address_space *vm) return pd; } -void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd) +void free_px(struct i915_address_space *vm, struct i915_page_table *pt, int lvl) { - cleanup_page_dma(vm, pd); - kfree(pd); + BUILD_BUG_ON(offsetof(struct i915_page_directory, pt)); + + if (lvl) { + struct i915_page_directory *pd = + container_of(pt, typeof(*pd), pt); + kfree(pd->entry); + } + + if (pt->base) + i915_gem_object_put(pt->base); + + kfree(pt); } static inline void -write_dma_entry(struct i915_page_dma * const pdma, +write_dma_entry(struct drm_i915_gem_object * const pdma, const unsigned short idx, const u64 encoded_entry) { - u64 * const vaddr = kmap_atomic(pdma->page); + u64 * const vaddr = kmap_atomic(__px_page(pdma)); vaddr[idx] = encoded_entry; + clflush_cache_range(&vaddr[idx], sizeof(u64)); kunmap_atomic(vaddr); } void __set_pd_entry(struct i915_page_directory * const pd, const unsigned short idx, - struct i915_page_dma * const to, + struct i915_page_table * const to, u64 (*encode)(const dma_addr_t, const enum i915_cache_level)) { /* Each thread pre-pins the pd, and we may have a thread per pde. */ - GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * ARRAY_SIZE(pd->entry)); + GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * I915_PDES); atomic_inc(px_used(pd)); pd->entry[idx] = to; - write_dma_entry(px_base(pd), idx, encode(to->daddr, I915_CACHE_LLC)); + write_dma_entry(px_base(pd), idx, encode(px_dma(to), I915_CACHE_LLC)); } void clear_pd_entry(struct i915_page_directory * const pd, const unsigned short idx, - const struct i915_page_scratch * const scratch) + const struct drm_i915_gem_object * const scratch) { GEM_BUG_ON(atomic_read(px_used(pd)) == 0); @@ -102,7 +122,7 @@ bool release_pd_entry(struct i915_page_directory * const pd, const unsigned short idx, struct i915_page_table * const pt, - const struct i915_page_scratch * const scratch) + const struct drm_i915_gem_object * const scratch) { bool free = false; @@ -155,19 +175,16 @@ struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt) return ppgtt; } -int ppgtt_bind_vma(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) +void ppgtt_bind_vma(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) { u32 pte_flags; - int err; if (!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) { - err = vm->allocate_va_range(vm, vma->node.start, vma->size); - if (err) - return err; - + vm->allocate_va_range(vm, stash, vma->node.start, vma->size); set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)); } @@ -178,8 +195,6 @@ int ppgtt_bind_vma(struct i915_address_space *vm, vm->insert_entries(vm, vma, cache_level, pte_flags); wmb(); - - return 0; } void ppgtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma) @@ -188,12 +203,93 @@ void ppgtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma) vm->clear_range(vm, vma->node.start, vma->size); } +static unsigned long pd_count(u64 size, int shift) +{ + /* Beware later misalignment */ + return (size + 2 * (BIT_ULL(shift) - 1)) >> shift; +} + +int i915_vm_alloc_pt_stash(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + u64 size) +{ + unsigned long count; + int shift, n; + + shift = vm->pd_shift; + if (!shift) + return 0; + + count = pd_count(size, shift); + while (count--) { + struct i915_page_table *pt; + + pt = alloc_pt(vm); + if (IS_ERR(pt)) { + i915_vm_free_pt_stash(vm, stash); + return PTR_ERR(pt); + } + + pt->stash = stash->pt[0]; + stash->pt[0] = pt; + } + + for (n = 1; n < vm->top; n++) { + shift += ilog2(I915_PDES); /* Each PD holds 512 entries */ + count = pd_count(size, shift); + while (count--) { + struct i915_page_directory *pd; + + pd = alloc_pd(vm); + if (IS_ERR(pd)) { + i915_vm_free_pt_stash(vm, stash); + return PTR_ERR(pd); + } + + pd->pt.stash = stash->pt[1]; + stash->pt[1] = &pd->pt; + } + } + + return 0; +} + +int i915_vm_pin_pt_stash(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash) +{ + struct i915_page_table *pt; + int n, err; + + for (n = 0; n < ARRAY_SIZE(stash->pt); n++) { + for (pt = stash->pt[n]; pt; pt = pt->stash) { + err = pin_pt_dma(vm, pt->base); + if (err) + return err; + } + } + + return 0; +} + +void i915_vm_free_pt_stash(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash) +{ + struct i915_page_table *pt; + int n; + + for (n = 0; n < ARRAY_SIZE(stash->pt); n++) { + while ((pt = stash->pt[n])) { + stash->pt[n] = pt->stash; + free_px(vm, pt, n); + } + } +} + int ppgtt_set_pages(struct i915_vma *vma) { GEM_BUG_ON(vma->pages); vma->pages = vma->obj->mm.pages; - vma->page_sizes = vma->obj->mm.page_sizes; return 0; diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index ab675d35030d..d7b8e4457fc2 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -56,9 +56,12 @@ static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) static void gen11_rc6_enable(struct intel_rc6 *rc6) { - struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_gt *gt = rc6_to_gt(rc6); + struct intel_uncore *uncore = gt->uncore; struct intel_engine_cs *engine; enum intel_engine_id id; + u32 pg_enable; + int i; /* 2b: Program RC6 thresholds.*/ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); @@ -102,10 +105,19 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) GEN6_RC_CTL_RC6_ENABLE | GEN6_RC_CTL_EI_MODE(1); - set(uncore, GEN9_PG_ENABLE, - GEN9_RENDER_PG_ENABLE | - GEN9_MEDIA_PG_ENABLE | - GEN11_MEDIA_SAMPLER_PG_ENABLE); + pg_enable = + GEN9_RENDER_PG_ENABLE | + GEN9_MEDIA_PG_ENABLE | + GEN11_MEDIA_SAMPLER_PG_ENABLE; + + if (INTEL_GEN(gt->i915) >= 12) { + for (i = 0; i < I915_MAX_VCS; i++) + if (HAS_ENGINE(gt, _VCS(i))) + pg_enable |= (VDN_HCP_POWERGATE_ENABLE(i) | + VDN_MFX_POWERGATE_ENABLE(i)); + } + + set(uncore, GEN9_PG_ENABLE, pg_enable); } static void gen9_rc6_enable(struct intel_rc6 *rc6) diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c index 1bfad589c63b..ea2a77c7b469 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.c +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c @@ -27,6 +27,7 @@ #include "i915_drv.h" #include "intel_renderstate.h" +#include "gt/intel_context.h" #include "intel_ring.h" static const struct intel_renderstate_rodata * @@ -157,33 +158,47 @@ out: #undef OUT_BATCH int intel_renderstate_init(struct intel_renderstate *so, - struct intel_engine_cs *engine) + struct intel_context *ce) { - struct drm_i915_gem_object *obj; + struct intel_engine_cs *engine = ce->engine; + struct drm_i915_gem_object *obj = NULL; int err; memset(so, 0, sizeof(*so)); so->rodata = render_state_get_rodata(engine); - if (!so->rodata) - return 0; + if (so->rodata) { + if (so->rodata->batch_items * 4 > PAGE_SIZE) + return -EINVAL; + + obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); + if (IS_ERR(so->vma)) { + err = PTR_ERR(so->vma); + goto err_obj; + } + } - if (so->rodata->batch_items * 4 > PAGE_SIZE) - return -EINVAL; + i915_gem_ww_ctx_init(&so->ww, true); +retry: + err = intel_context_pin_ww(ce, &so->ww); + if (err) + goto err_fini; - obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); + /* return early if there's nothing to setup */ + if (!err && !so->rodata) + return 0; - so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); - if (IS_ERR(so->vma)) { - err = PTR_ERR(so->vma); - goto err_obj; - } + err = i915_gem_object_lock(so->vma->obj, &so->ww); + if (err) + goto err_context; err = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH); if (err) - goto err_obj; + goto err_context; err = render_state_setup(so, engine->i915); if (err) @@ -193,8 +208,18 @@ int intel_renderstate_init(struct intel_renderstate *so, err_unpin: i915_vma_unpin(so->vma); +err_context: + intel_context_unpin(ce); +err_fini: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&so->ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&so->ww); err_obj: - i915_gem_object_put(obj); + if (obj) + i915_gem_object_put(obj); so->vma = NULL; return err; } @@ -208,11 +233,9 @@ int intel_renderstate_emit(struct intel_renderstate *so, if (!so->vma) return 0; - i915_vma_lock(so->vma); err = i915_request_await_object(rq, so->vma->obj, false); if (err == 0) err = i915_vma_move_to_active(so->vma, rq, 0); - i915_vma_unlock(so->vma); if (err) return err; @@ -233,7 +256,17 @@ int intel_renderstate_emit(struct intel_renderstate *so, return 0; } -void intel_renderstate_fini(struct intel_renderstate *so) +void intel_renderstate_fini(struct intel_renderstate *so, + struct intel_context *ce) { - i915_vma_unpin_and_release(&so->vma, 0); + if (so->vma) { + i915_vma_unpin(so->vma); + i915_vma_close(so->vma); + } + + intel_context_unpin(ce); + i915_gem_ww_ctx_fini(&so->ww); + + if (so->vma) + i915_gem_object_put(so->vma->obj); } diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.h b/drivers/gpu/drm/i915/gt/intel_renderstate.h index 5700be69a05a..713aa1e86c80 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.h +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.h @@ -25,9 +25,10 @@ #define _INTEL_RENDERSTATE_H_ #include <linux/types.h> +#include "i915_gem.h" struct i915_request; -struct intel_engine_cs; +struct intel_context; struct i915_vma; struct intel_renderstate_rodata { @@ -49,6 +50,7 @@ extern const struct intel_renderstate_rodata gen8_null_state; extern const struct intel_renderstate_rodata gen9_null_state; struct intel_renderstate { + struct i915_gem_ww_ctx ww; const struct intel_renderstate_rodata *rodata; struct i915_vma *vma; u32 batch_offset; @@ -58,9 +60,10 @@ struct intel_renderstate { }; int intel_renderstate_init(struct intel_renderstate *so, - struct intel_engine_cs *engine); + struct intel_context *ce); int intel_renderstate_emit(struct intel_renderstate *so, struct i915_request *rq); -void intel_renderstate_fini(struct intel_renderstate *so); +void intel_renderstate_fini(struct intel_renderstate *so, + struct intel_context *ce); #endif /* _INTEL_RENDERSTATE_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 46a5ceffc22f..ac36b67fb46b 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -15,6 +15,7 @@ #include "i915_drv.h" #include "i915_gpu_error.h" #include "i915_irq.h" +#include "intel_breadcrumbs.h" #include "intel_engine_pm.h" #include "intel_gt.h" #include "intel_gt_pm.h" diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index bdb324167ef3..4034a4bac7f0 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -21,7 +21,13 @@ unsigned int intel_ring_update_space(struct intel_ring *ring) return space; } -int intel_ring_pin(struct intel_ring *ring) +void __intel_ring_pin(struct intel_ring *ring) +{ + GEM_BUG_ON(!atomic_read(&ring->pin_count)); + atomic_inc(&ring->pin_count); +} + +int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww) { struct i915_vma *vma = ring->vma; unsigned int flags; @@ -39,7 +45,7 @@ int intel_ring_pin(struct intel_ring *ring) else flags |= PIN_HIGH; - ret = i915_ggtt_pin(vma, 0, flags); + ret = i915_ggtt_pin(vma, ww, 0, flags); if (unlikely(ret)) goto err_unpin; diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h index cc0ebca65167..1700579bdc93 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.h +++ b/drivers/gpu/drm/i915/gt/intel_ring.h @@ -21,7 +21,8 @@ int intel_ring_cacheline_align(struct i915_request *rq); unsigned int intel_ring_update_space(struct intel_ring *ring); -int intel_ring_pin(struct intel_ring *ring); +void __intel_ring_pin(struct intel_ring *ring); +int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww); void intel_ring_unpin(struct intel_ring *ring); void intel_ring_reset(struct intel_ring *ring, u32 tail); diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 94915f668715..16b48e72c369 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -32,6 +32,7 @@ #include "gen6_ppgtt.h" #include "gen7_renderclear.h" #include "i915_drv.h" +#include "intel_breadcrumbs.h" #include "intel_context.h" #include "intel_gt.h" #include "intel_reset.h" @@ -100,7 +101,7 @@ static void set_hwsp(struct intel_engine_cs *engine, u32 offset) */ default: GEM_BUG_ON(engine->id); - /* fallthrough */ + fallthrough; case RCS0: hwsp = RENDER_HWS_PGA_GEN7; break; @@ -201,16 +202,18 @@ static struct i915_address_space *vm_alias(struct i915_address_space *vm) return vm; } +static u32 pp_dir(struct i915_address_space *vm) +{ + return to_gen6_ppgtt(i915_vm_to_ppgtt(vm))->pp_dir; +} + static void set_pp_dir(struct intel_engine_cs *engine) { struct i915_address_space *vm = vm_alias(engine->gt->vm); if (vm) { - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - ENGINE_WRITE(engine, RING_PP_DIR_DCLV, PP_DIR_DCLV_2G); - ENGINE_WRITE(engine, RING_PP_DIR_BASE, - px_base(ppgtt->pd)->ggtt_offset << 10); + ENGINE_WRITE(engine, RING_PP_DIR_BASE, pp_dir(vm)); } } @@ -255,7 +258,7 @@ static int xcs_resume(struct intel_engine_cs *engine) else ring_setup_status_page(engine); - intel_engine_reset_breadcrumbs(engine); + intel_breadcrumbs_reset(engine->breadcrumbs); /* Enforce ordering by reading HEAD register back */ ENGINE_POSTING_READ(engine, RING_HEAD); @@ -474,14 +477,16 @@ static void ring_context_destroy(struct kref *ref) intel_context_free(ce); } -static int __context_pin_ppgtt(struct intel_context *ce) +static int ring_context_pre_pin(struct intel_context *ce, + struct i915_gem_ww_ctx *ww, + void **unused) { struct i915_address_space *vm; int err = 0; vm = vm_alias(ce->vm); if (vm) - err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm))); + err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)), ww); return err; } @@ -497,6 +502,10 @@ static void __context_unpin_ppgtt(struct intel_context *ce) static void ring_context_unpin(struct intel_context *ce) { +} + +static void ring_context_post_unpin(struct intel_context *ce) +{ __context_unpin_ppgtt(ce); } @@ -584,9 +593,9 @@ static int ring_context_alloc(struct intel_context *ce) return 0; } -static int ring_context_pin(struct intel_context *ce) +static int ring_context_pin(struct intel_context *ce, void *unused) { - return __context_pin_ppgtt(ce); + return 0; } static void ring_context_reset(struct intel_context *ce) @@ -597,8 +606,10 @@ static void ring_context_reset(struct intel_context *ce) static const struct intel_context_ops ring_context_ops = { .alloc = ring_context_alloc, + .pre_pin = ring_context_pre_pin, .pin = ring_context_pin, .unpin = ring_context_unpin, + .post_unpin = ring_context_post_unpin, .enter = intel_context_enter_engine, .exit = intel_context_exit_engine, @@ -608,7 +619,7 @@ static const struct intel_context_ops ring_context_ops = { }; static int load_pd_dir(struct i915_request *rq, - const struct i915_ppgtt *ppgtt, + struct i915_address_space *vm, u32 valid) { const struct intel_engine_cs * const engine = rq->engine; @@ -624,7 +635,7 @@ static int load_pd_dir(struct i915_request *rq, *cs++ = MI_LOAD_REGISTER_IMM(1); *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); - *cs++ = px_base(ppgtt->pd)->ggtt_offset << 10; + *cs++ = pp_dir(vm); /* Stall until the page table load is complete? */ *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; @@ -826,7 +837,7 @@ static int switch_mm(struct i915_request *rq, struct i915_address_space *vm) * post-sync op, this extra pass appears vital before a * mm switch! */ - ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G); + ret = load_pd_dir(rq, vm, PP_DIR_DCLV_2G); if (ret) return ret; @@ -1250,14 +1261,15 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine) return -ENODEV; } - timeline = intel_timeline_create(engine->gt, engine->status_page.vma); + timeline = intel_timeline_create_from_engine(engine, + I915_GEM_HWS_SEQNO_ADDR); if (IS_ERR(timeline)) { err = PTR_ERR(timeline); goto err; } GEM_BUG_ON(timeline->has_initial_breadcrumb); - err = intel_timeline_pin(timeline); + err = intel_timeline_pin(timeline, NULL); if (err) goto err_timeline; @@ -1267,7 +1279,7 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine) goto err_timeline_unpin; } - err = intel_ring_pin(ring); + err = intel_ring_pin(ring, NULL); if (err) goto err_ring; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 97ba14ad52e4..e6a00eea0631 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -7,6 +7,7 @@ #include <drm/i915_drm.h> #include "i915_drv.h" +#include "intel_breadcrumbs.h" #include "intel_gt.h" #include "intel_gt_clock_utils.h" #include "intel_gt_irq.h" diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 46d20f5f3ddc..7ea94d201fe6 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -188,10 +188,14 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline) return cl; } -static void cacheline_acquire(struct intel_timeline_cacheline *cl) +static void cacheline_acquire(struct intel_timeline_cacheline *cl, + u32 ggtt_offset) { - if (cl) - i915_active_acquire(&cl->active); + if (!cl) + return; + + cl->ggtt_offset = ggtt_offset; + i915_active_acquire(&cl->active); } static void cacheline_release(struct intel_timeline_cacheline *cl) @@ -215,7 +219,8 @@ static void cacheline_free(struct intel_timeline_cacheline *cl) static int intel_timeline_init(struct intel_timeline *timeline, struct intel_gt *gt, - struct i915_vma *hwsp) + struct i915_vma *hwsp, + unsigned int offset) { void *vaddr; @@ -246,8 +251,7 @@ static int intel_timeline_init(struct intel_timeline *timeline, vaddr = page_mask_bits(cl->vaddr); } else { - timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR; - + timeline->hwsp_offset = offset; vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); @@ -297,7 +301,9 @@ static void intel_timeline_fini(struct intel_timeline *timeline) } struct intel_timeline * -intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp) +__intel_timeline_create(struct intel_gt *gt, + struct i915_vma *global_hwsp, + unsigned int offset) { struct intel_timeline *timeline; int err; @@ -306,7 +312,7 @@ intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp) if (!timeline) return ERR_PTR(-ENOMEM); - err = intel_timeline_init(timeline, gt, global_hwsp); + err = intel_timeline_init(timeline, gt, global_hwsp, offset); if (err) { kfree(timeline); return ERR_PTR(err); @@ -315,14 +321,20 @@ intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp) return timeline; } -int intel_timeline_pin(struct intel_timeline *tl) +void __intel_timeline_pin(struct intel_timeline *tl) +{ + GEM_BUG_ON(!atomic_read(&tl->pin_count)); + atomic_inc(&tl->pin_count); +} + +int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww) { int err; if (atomic_add_unless(&tl->pin_count, 1, 0)) return 0; - err = i915_ggtt_pin(tl->hwsp_ggtt, 0, PIN_HIGH); + err = i915_ggtt_pin(tl->hwsp_ggtt, ww, 0, PIN_HIGH); if (err) return err; @@ -332,7 +344,7 @@ int intel_timeline_pin(struct intel_timeline *tl) GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n", tl->fence_context, tl->hwsp_offset); - cacheline_acquire(tl->hwsp_cacheline); + cacheline_acquire(tl->hwsp_cacheline, tl->hwsp_offset); if (atomic_fetch_inc(&tl->pin_count)) { cacheline_release(tl->hwsp_cacheline); __i915_vma_unpin(tl->hwsp_ggtt); @@ -465,7 +477,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl, goto err_rollback; } - err = i915_ggtt_pin(vma, 0, PIN_HIGH); + err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH); if (err) { __idle_hwsp_free(vma->private, cacheline); goto err_rollback; @@ -484,7 +496,9 @@ __intel_timeline_get_seqno(struct intel_timeline *tl, * free it after the current request is retired, which ensures that * all writes into the cacheline from previous requests are complete. */ - err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence); + err = i915_active_ref(&tl->hwsp_cacheline->active, + tl->fence_context, + &rq->fence); if (err) goto err_cacheline; @@ -505,7 +519,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl, GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n", tl->fence_context, tl->hwsp_offset); - cacheline_acquire(cl); + cacheline_acquire(cl, tl->hwsp_offset); tl->hwsp_cacheline = cl; *seqno = timeline_advance(tl); @@ -563,9 +577,7 @@ int intel_timeline_read_hwsp(struct i915_request *from, if (err) goto out; - *hwsp = i915_ggtt_offset(cl->hwsp->vma) + - ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES; - + *hwsp = cl->ggtt_offset; out: i915_active_release(&cl->active); return err; diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h index 4298b9ac7327..9882cd911d8e 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h @@ -29,10 +29,27 @@ #include "i915_active.h" #include "i915_syncmap.h" -#include "gt/intel_timeline_types.h" +#include "intel_timeline_types.h" struct intel_timeline * -intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp); +__intel_timeline_create(struct intel_gt *gt, + struct i915_vma *global_hwsp, + unsigned int offset); + +static inline struct intel_timeline * +intel_timeline_create(struct intel_gt *gt) +{ + return __intel_timeline_create(gt, NULL, 0); +} + +static inline struct intel_timeline * +intel_timeline_create_from_engine(struct intel_engine_cs *engine, + unsigned int offset) +{ + return __intel_timeline_create(engine->gt, + engine->status_page.vma, + offset); +} static inline struct intel_timeline * intel_timeline_get(struct intel_timeline *timeline) @@ -71,7 +88,8 @@ static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl, return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno); } -int intel_timeline_pin(struct intel_timeline *tl); +void __intel_timeline_pin(struct intel_timeline *tl); +int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww); void intel_timeline_enter(struct intel_timeline *tl); int intel_timeline_get_seqno(struct intel_timeline *tl, struct i915_request *rq, diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h index 02181c5020db..4474f487f589 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h @@ -94,6 +94,8 @@ struct intel_timeline_cacheline { struct intel_timeline_hwsp *hwsp; void *vaddr; + u32 ggtt_offset; + struct rcu_head rcu; }; diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 5726cd0a37e0..4a3bde7c9f21 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -52,6 +52,37 @@ * - Public functions to init or apply the given workaround type. */ +/* + * KBL revision ID ordering is bizarre; higher revision ID's map to lower + * steppings in some cases. So rather than test against the revision ID + * directly, let's map that into our own range of increasing ID's that we + * can test against in a regular manner. + */ + +const struct i915_rev_steppings kbl_revids[] = { + [0] = { .gt_stepping = KBL_REVID_A0, .disp_stepping = KBL_REVID_A0 }, + [1] = { .gt_stepping = KBL_REVID_B0, .disp_stepping = KBL_REVID_B0 }, + [2] = { .gt_stepping = KBL_REVID_C0, .disp_stepping = KBL_REVID_B0 }, + [3] = { .gt_stepping = KBL_REVID_D0, .disp_stepping = KBL_REVID_B0 }, + [4] = { .gt_stepping = KBL_REVID_F0, .disp_stepping = KBL_REVID_C0 }, + [5] = { .gt_stepping = KBL_REVID_C0, .disp_stepping = KBL_REVID_B1 }, + [6] = { .gt_stepping = KBL_REVID_D1, .disp_stepping = KBL_REVID_B1 }, + [7] = { .gt_stepping = KBL_REVID_G0, .disp_stepping = KBL_REVID_C0 }, +}; + +const struct i915_rev_steppings tgl_uy_revids[] = { + [0] = { .gt_stepping = TGL_REVID_A0, .disp_stepping = TGL_REVID_A0 }, + [1] = { .gt_stepping = TGL_REVID_B0, .disp_stepping = TGL_REVID_C0 }, + [2] = { .gt_stepping = TGL_REVID_B1, .disp_stepping = TGL_REVID_C0 }, + [3] = { .gt_stepping = TGL_REVID_C0, .disp_stepping = TGL_REVID_D0 }, +}; + +/* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */ +const struct i915_rev_steppings tgl_revids[] = { + [0] = { .gt_stepping = TGL_REVID_A0, .disp_stepping = TGL_REVID_B0 }, + [1] = { .gt_stepping = TGL_REVID_B0, .disp_stepping = TGL_REVID_D0 }, +}; + static void wa_init_start(struct i915_wa_list *wal, const char *name, const char *engine_name) { wal->name = name; @@ -100,8 +131,10 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) return; } - if (wal->list) + if (wal->list) { memcpy(list, wal->list, sizeof(*wa) * wal->count); + kfree(wal->list); + } wal->list = list; } @@ -470,7 +503,7 @@ static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine, gen9_ctx_workarounds_init(engine, wal); /* WaToEnableHwFixForPushConstHWBug:kbl */ - if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER)) + if (IS_KBL_GT_REVID(i915, KBL_REVID_C0, REVID_FOREVER)) WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); @@ -596,8 +629,8 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU); } -static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, - struct i915_wa_list *wal) +static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) { /* * Wa_1409142259:tgl @@ -607,12 +640,28 @@ static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, * Wa_1409207793:tgl * Wa_1409178076:tgl * Wa_1408979724:tgl + * Wa_14010443199:rkl + * Wa_14010698770:rkl */ WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); + /* WaDisableGPGPUMidThreadPreemption:gen12 */ + WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, + GEN9_PREEMPT_GPGPU_LEVEL_MASK, + GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); +} + +static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + gen12_ctx_workarounds_init(engine, wal); + /* - * Wa_1604555607:gen12 and Wa_1608008084:gen12 + * Wa_1604555607:tgl,rkl + * + * Note that the implementation of this workaround is further modified + * according to the FF_MODE2 guidance given by Wa_1608008084:gen12. * FF_MODE2 register will return the wrong value when read. The default * value for this register is zero for all fields and there are no bit * masks. So instead of doing a RMW we should just write the GS Timer @@ -623,11 +672,6 @@ static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, FF_MODE2_GS_TIMER_MASK | FF_MODE2_TDS_TIMER_MASK, FF_MODE2_GS_TIMER_224 | FF_MODE2_TDS_TIMER_128, 0); - - /* WaDisableGPGPUMidThreadPreemption:tgl */ - WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, - GEN9_PREEMPT_GPGPU_LEVEL_MASK, - GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); } static void @@ -642,8 +686,10 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, wa_init_start(wal, name, engine->name); - if (IS_GEN(i915, 12)) + if (IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) tgl_ctx_workarounds_init(engine, wal); + else if (IS_GEN(i915, 12)) + gen12_ctx_workarounds_init(engine, wal); else if (IS_GEN(i915, 11)) icl_ctx_workarounds_init(engine, wal); else if (IS_CANNONLAKE(i915)) @@ -995,7 +1041,7 @@ kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) gen9_gt_workarounds_init(i915, wal); /* WaDisableDynamicCreditSharing:kbl */ - if (IS_KBL_REVID(i915, 0, KBL_REVID_B0)) + if (IS_KBL_GT_REVID(i915, 0, KBL_REVID_B0)) wa_write_or(wal, GAMT_CHKN_BIT_REG, GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); @@ -1176,18 +1222,25 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) } static void -tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +gen12_gt_workarounds_init(struct drm_i915_private *i915, + struct i915_wa_list *wal) { wa_init_mcr(i915, wal); +} + +static void +tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + gen12_gt_workarounds_init(i915, wal); /* Wa_1409420604:tgl */ - if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) + if (IS_TGL_UY_GT_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) wa_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS); /* Wa_1607087056:tgl also know as BUG:1409180338 */ - if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) + if (IS_TGL_UY_GT_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); @@ -1196,8 +1249,10 @@ tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) static void gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) { - if (IS_GEN(i915, 12)) + if (IS_TIGERLAKE(i915)) tgl_gt_workarounds_init(i915, wal); + else if (IS_GEN(i915, 12)) + gen12_gt_workarounds_init(i915, wal); else if (IS_GEN(i915, 11)) icl_gt_workarounds_init(i915, wal); else if (IS_CANNONLAKE(i915)) @@ -1620,7 +1675,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; - if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) { + if (IS_TGL_UY_GT_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) { /* * Wa_1607138336:tgl * Wa_1607063988:tgl @@ -1630,18 +1685,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN12_DISABLE_POSH_BUSY_FF_DOP_CG); /* - * Wa_1607030317:tgl - * Wa_1607186500:tgl - * Wa_1607297627:tgl there is 3 entries for this WA on BSpec, 2 - * of then says it is fixed on B0 the other one says it is - * permanent - */ - wa_masked_en(wal, - GEN6_RC_SLEEP_PSMI_CONTROL, - GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | - GEN8_RC_SEMA_IDLE_MSG_DISABLE); - - /* * Wa_1606679103:tgl * (see also Wa_1606682166:icl) */ @@ -1654,22 +1697,17 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) VSUNIT_CLKGATE_DIS_TGL); } - if (IS_TIGERLAKE(i915)) { - /* Wa_1606931601:tgl */ + if (IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { + /* Wa_1606931601:tgl,rkl */ wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ); - /* Wa_1409804808:tgl */ + /* Wa_1409804808:tgl,rkl */ wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_PUSH_CONST_DEREF_HOLD_DIS); - /* Wa_1606700617:tgl */ - wa_masked_en(wal, - GEN9_CS_DEBUG_MODE1, - FF_DOP_CLOCK_GATE_DISABLE); - /* * Wa_1409085225:tgl - * Wa_14010229206:tgl + * Wa_14010229206:tgl,rkl */ wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH); @@ -1677,9 +1715,37 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * Wa_1407928979:tgl A* * Wa_18011464164:tgl B0+ * Wa_22010931296:tgl B0+ + * Wa_14010919138:rkl,tgl */ wa_write_or(wal, GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE); + + /* + * Wa_1607030317:tgl + * Wa_1607186500:tgl + * Wa_1607297627:tgl,rkl there are multiple entries for this + * WA in the BSpec; some indicate this is an A0-only WA, + * others indicate it applies to all steppings. + */ + wa_masked_en(wal, + GEN6_RC_SLEEP_PSMI_CONTROL, + GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | + GEN8_RC_SEMA_IDLE_MSG_DISABLE); + + /* + * Wa_1606700617:tgl + * Wa_22010271021:tgl,rkl + */ + wa_masked_en(wal, + GEN9_CS_DEBUG_MODE1, + FF_DOP_CLOCK_GATE_DISABLE); + } + + if (IS_GEN(i915, 12)) { + /* Wa_1406941453:gen12 */ + wa_masked_en(wal, + GEN10_SAMPLER_MODE, + ENABLE_SMALLPL); } if (IS_GEN(i915, 11)) { @@ -1898,7 +1964,7 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) struct drm_i915_private *i915 = engine->i915; /* WaKBLVECSSemaphoreWaitPoll:kbl */ - if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) { + if (IS_KBL_GT_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) { wa_write(wal, RING_SEMA_WAIT_POLL(engine->mmio_base), 1); @@ -2045,6 +2111,7 @@ static int engine_wa_list_verify(struct intel_context *ce, const struct i915_wa *wa; struct i915_request *rq; struct i915_vma *vma; + struct i915_gem_ww_ctx ww; unsigned int i; u32 *results; int err; @@ -2057,29 +2124,34 @@ static int engine_wa_list_verify(struct intel_context *ce, return PTR_ERR(vma); intel_engine_pm_get(ce->engine); - rq = intel_context_create_request(ce); - intel_engine_pm_put(ce->engine); + i915_gem_ww_ctx_init(&ww, false); +retry: + err = i915_gem_object_lock(vma->obj, &ww); + if (err == 0) + err = intel_context_pin_ww(ce, &ww); + if (err) + goto err_pm; + + rq = i915_request_create(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto err_vma; + goto err_unpin; } - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, true); if (err == 0) err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(vma); - if (err) { - i915_request_add(rq); - goto err_vma; - } - - err = wa_list_srm(rq, wal, vma); - if (err) - goto err_vma; + if (err == 0) + err = wa_list_srm(rq, wal, vma); i915_request_get(rq); + if (err) + i915_request_set_error_once(rq, err); i915_request_add(rq); + + if (err) + goto err_rq; + if (i915_request_wait(rq, 0, HZ / 5) < 0) { err = -ETIME; goto err_rq; @@ -2104,7 +2176,16 @@ static int engine_wa_list_verify(struct intel_context *ce, err_rq: i915_request_put(rq); -err_vma: +err_unpin: + intel_context_unpin(ce); +err_pm: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + intel_engine_pm_put(ce->engine); i915_vma_unpin(vma); i915_vma_put(vma); return err; diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index b8dd3cbc8696..dfd1cfb8a7ec 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -131,6 +131,10 @@ static void mock_context_unpin(struct intel_context *ce) { } +static void mock_context_post_unpin(struct intel_context *ce) +{ +} + static void mock_context_destroy(struct kref *ref) { struct intel_context *ce = container_of(ref, typeof(*ce), ref); @@ -152,8 +156,7 @@ static int mock_context_alloc(struct intel_context *ce) if (!ce->ring) return -ENOMEM; - GEM_BUG_ON(ce->timeline); - ce->timeline = intel_timeline_create(ce->engine->gt, NULL); + ce->timeline = intel_timeline_create(ce->engine->gt); if (IS_ERR(ce->timeline)) { kfree(ce->engine); return PTR_ERR(ce->timeline); @@ -164,7 +167,13 @@ static int mock_context_alloc(struct intel_context *ce) return 0; } -static int mock_context_pin(struct intel_context *ce) +static int mock_context_pre_pin(struct intel_context *ce, + struct i915_gem_ww_ctx *ww, void **unused) +{ + return 0; +} + +static int mock_context_pin(struct intel_context *ce, void *unused) { return 0; } @@ -176,8 +185,10 @@ static void mock_context_reset(struct intel_context *ce) static const struct intel_context_ops mock_context_ops = { .alloc = mock_context_alloc, + .pre_pin = mock_context_pre_pin, .pin = mock_context_pin, .unpin = mock_context_unpin, + .post_unpin = mock_context_post_unpin, .enter = intel_context_enter_engine, .exit = intel_context_exit_engine, @@ -261,11 +272,12 @@ static void mock_engine_release(struct intel_engine_cs *engine) GEM_BUG_ON(timer_pending(&mock->hw_delay)); + intel_breadcrumbs_free(engine->breadcrumbs); + intel_context_unpin(engine->kernel_context); intel_context_put(engine->kernel_context); intel_engine_fini_retire(engine); - intel_engine_fini_breadcrumbs(engine); } struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, @@ -323,20 +335,26 @@ int mock_engine_init(struct intel_engine_cs *engine) struct intel_context *ce; intel_engine_init_active(engine, ENGINE_MOCK); - intel_engine_init_breadcrumbs(engine); intel_engine_init_execlists(engine); intel_engine_init__pm(engine); intel_engine_init_retire(engine); + engine->breadcrumbs = intel_breadcrumbs_create(NULL); + if (!engine->breadcrumbs) + return -ENOMEM; + ce = create_kernel_context(engine); if (IS_ERR(ce)) goto err_breadcrumbs; + /* We insist the kernel context is using the status_page */ + engine->status_page.vma = ce->timeline->hwsp_ggtt; + engine->kernel_context = ce; return 0; err_breadcrumbs: - intel_engine_fini_breadcrumbs(engine); + intel_breadcrumbs_free(engine->breadcrumbs); return -ENOMEM; } diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index 52af1cee9a94..1f4020e906a8 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -68,6 +68,8 @@ static int context_sync(struct intel_context *ce) } while (!err); mutex_unlock(&tl->mutex); + /* Wait for all barriers to complete (remote CPU) before we check */ + i915_active_unlock_wait(&ce->active); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index 73243ba59c7d..e73854dd2fe0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -47,7 +47,10 @@ static int pulse_active(struct i915_active *active) static void pulse_free(struct kref *kref) { - kfree(container_of(kref, struct pulse, kref)); + struct pulse *p = container_of(kref, typeof(*p), kref); + + i915_active_fini(&p->active); + kfree(p); } static void pulse_put(struct pulse *p) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 3fc5de961280..95d41c01d0e0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -2729,7 +2729,7 @@ static int create_gang(struct intel_engine_cs *engine, i915_gem_object_put(obj); intel_context_put(ce); - rq->client_link.next = &(*prev)->client_link; + rq->mock.link.next = &(*prev)->mock.link; *prev = rq; return 0; @@ -2970,8 +2970,7 @@ static int live_preempt_gang(void *arg) } while (rq) { /* wait for each rq from highest to lowest prio */ - struct i915_request *n = - list_next_entry(rq, client_link); + struct i915_request *n = list_next_entry(rq, mock.link); if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) { struct drm_printer p = @@ -3090,7 +3089,7 @@ static struct i915_vma *create_global(struct intel_gt *gt, size_t sz) return vma; } - err = i915_ggtt_pin(vma, 0, 0); + err = i915_ggtt_pin(vma, NULL, 0, 0); if (err) { i915_vma_put(vma); return ERR_PTR(err); @@ -4997,6 +4996,7 @@ static int __live_lrc_state(struct intel_engine_cs *engine, { struct intel_context *ce; struct i915_request *rq; + struct i915_gem_ww_ctx ww; enum { RING_START_IDX = 0, RING_TAIL_IDX, @@ -5011,7 +5011,11 @@ static int __live_lrc_state(struct intel_engine_cs *engine, if (IS_ERR(ce)) return PTR_ERR(ce); - err = intel_context_pin(ce); + i915_gem_ww_ctx_init(&ww, false); +retry: + err = i915_gem_object_lock(scratch->obj, &ww); + if (!err) + err = intel_context_pin_ww(ce, &ww); if (err) goto err_put; @@ -5040,11 +5044,9 @@ static int __live_lrc_state(struct intel_engine_cs *engine, *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32); *cs++ = 0; - i915_vma_lock(scratch); err = i915_request_await_object(rq, scratch->obj, true); if (!err) err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(scratch); i915_request_get(rq); i915_request_add(rq); @@ -5081,6 +5083,12 @@ err_rq: err_unpin: intel_context_unpin(ce); err_put: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); intel_context_put(ce); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 35406ecdf0b2..ef5aeebbeeb0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -3,9 +3,203 @@ * Copyright © 2018 Intel Corporation */ +#include <linux/crc32.h> + +#include "gem/i915_gem_stolen.h" + +#include "i915_memcpy.h" #include "i915_selftest.h" #include "selftests/igt_reset.h" #include "selftests/igt_atomic.h" +#include "selftests/igt_spinner.h" + +static int +__igt_reset_stolen(struct intel_gt *gt, + intel_engine_mask_t mask, + const char *msg) +{ + struct i915_ggtt *ggtt = >->i915->ggtt; + const struct resource *dsm = >->i915->dsm; + resource_size_t num_pages, page; + struct intel_engine_cs *engine; + intel_wakeref_t wakeref; + enum intel_engine_id id; + struct igt_spinner spin; + long max, count; + void *tmp; + u32 *crc; + int err; + + if (!drm_mm_node_allocated(&ggtt->error_capture)) + return 0; + + num_pages = resource_size(dsm) >> PAGE_SHIFT; + if (!num_pages) + return 0; + + crc = kmalloc_array(num_pages, sizeof(u32), GFP_KERNEL); + if (!crc) + return -ENOMEM; + + tmp = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!tmp) { + err = -ENOMEM; + goto err_crc; + } + + igt_global_reset_lock(gt); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + + err = igt_spinner_init(&spin, gt); + if (err) + goto err_lock; + + for_each_engine(engine, gt, id) { + struct intel_context *ce; + struct i915_request *rq; + + if (!(mask & engine->mask)) + continue; + + if (!intel_engine_can_store_dword(engine)) + continue; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto err_spin; + } + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_spin; + } + i915_request_add(rq); + } + + for (page = 0; page < num_pages; page++) { + dma_addr_t dma = (dma_addr_t)dsm->start + (page << PAGE_SHIFT); + void __iomem *s; + void *in; + + ggtt->vm.insert_page(&ggtt->vm, dma, + ggtt->error_capture.start, + I915_CACHE_NONE, 0); + mb(); + + s = io_mapping_map_wc(&ggtt->iomap, + ggtt->error_capture.start, + PAGE_SIZE); + + if (!__drm_mm_interval_first(>->i915->mm.stolen, + page << PAGE_SHIFT, + ((page + 1) << PAGE_SHIFT) - 1)) + memset32(s, STACK_MAGIC, PAGE_SIZE / sizeof(u32)); + + in = s; + if (i915_memcpy_from_wc(tmp, s, PAGE_SIZE)) + in = tmp; + crc[page] = crc32_le(0, in, PAGE_SIZE); + + io_mapping_unmap(s); + } + mb(); + ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE); + + if (mask == ALL_ENGINES) { + intel_gt_reset(gt, mask, NULL); + } else { + for_each_engine(engine, gt, id) { + if (mask & engine->mask) + intel_engine_reset(engine, NULL); + } + } + + max = -1; + count = 0; + for (page = 0; page < num_pages; page++) { + dma_addr_t dma = (dma_addr_t)dsm->start + (page << PAGE_SHIFT); + void __iomem *s; + void *in; + u32 x; + + ggtt->vm.insert_page(&ggtt->vm, dma, + ggtt->error_capture.start, + I915_CACHE_NONE, 0); + mb(); + + s = io_mapping_map_wc(&ggtt->iomap, + ggtt->error_capture.start, + PAGE_SIZE); + + in = s; + if (i915_memcpy_from_wc(tmp, s, PAGE_SIZE)) + in = tmp; + x = crc32_le(0, in, PAGE_SIZE); + + if (x != crc[page] && + !__drm_mm_interval_first(>->i915->mm.stolen, + page << PAGE_SHIFT, + ((page + 1) << PAGE_SHIFT) - 1)) { + pr_debug("unused stolen page %pa modified by GPU reset\n", + &page); + if (count++ == 0) + igt_hexdump(in, PAGE_SIZE); + max = page; + } + + io_mapping_unmap(s); + } + mb(); + ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE); + + if (count > 0) { + pr_info("%s reset clobbered %ld pages of stolen, last clobber at page %ld\n", + msg, count, max); + } + if (max >= I915_GEM_STOLEN_BIAS >> PAGE_SHIFT) { + pr_err("%s reset clobbered unreserved area [above %x] of stolen; may cause severe faults\n", + msg, I915_GEM_STOLEN_BIAS); + err = -EINVAL; + } + +err_spin: + igt_spinner_fini(&spin); + +err_lock: + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + igt_global_reset_unlock(gt); + + kfree(tmp); +err_crc: + kfree(crc); + return err; +} + +static int igt_reset_device_stolen(void *arg) +{ + return __igt_reset_stolen(arg, ALL_ENGINES, "device"); +} + +static int igt_reset_engines_stolen(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err; + + if (!intel_has_reset_engine(gt)) + return 0; + + for_each_engine(engine, gt, id) { + err = __igt_reset_stolen(gt, engine->mask, engine->name); + if (err) + return err; + } + + return 0; +} static int igt_global_reset(void *arg) { @@ -164,6 +358,8 @@ int intel_reset_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_global_reset), /* attempt to recover GPU first */ + SUBTEST(igt_reset_device_stolen), + SUBTEST(igt_reset_engines_stolen), SUBTEST(igt_wedged_reset), SUBTEST(igt_atomic_reset), SUBTEST(igt_atomic_engine_reset), diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index 8624f5d2a1f3..3540ba9bd459 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -77,20 +77,20 @@ create_spin_counter(struct intel_engine_cs *engine, vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { - i915_gem_object_put(obj); - return vma; + err = PTR_ERR(vma); + goto err_put; } err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) { - i915_vma_put(vma); - return ERR_PTR(err); - } + if (err) + goto err_unlock; + + i915_vma_lock(vma); base = i915_gem_object_pin_map(obj, I915_MAP_WC); if (IS_ERR(base)) { - i915_gem_object_put(obj); - return ERR_CAST(base); + err = PTR_ERR(base); + goto err_unpin; } cs = base; @@ -134,6 +134,14 @@ create_spin_counter(struct intel_engine_cs *engine, *cancel = base + loop; *counter = srm ? memset32(base + end, 0, 1) : NULL; return vma; + +err_unpin: + i915_vma_unpin(vma); +err_unlock: + i915_vma_unlock(vma); +err_put: + i915_gem_object_put(obj); + return ERR_PTR(err); } static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms) @@ -639,7 +647,6 @@ int live_rps_frequency_cs(void *arg) goto err_vma; } - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, false); if (!err) err = i915_vma_move_to_active(vma, rq, 0); @@ -647,7 +654,6 @@ int live_rps_frequency_cs(void *arg) err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, 0); - i915_vma_unlock(vma); i915_request_add(rq); if (err) goto err_vma; @@ -700,7 +706,7 @@ int live_rps_frequency_cs(void *arg) f = act; /* may skip ahead [pcu granularity] */ } - err = -EINVAL; + err = -EINTR; /* ignore error, continue on with test */ } err_vma: @@ -708,6 +714,7 @@ err_vma: i915_gem_object_flush_map(vma->obj); i915_gem_object_unpin_map(vma->obj); i915_vma_unpin(vma); + i915_vma_unlock(vma); i915_vma_put(vma); st_engine_heartbeat_enable(engine); @@ -781,7 +788,6 @@ int live_rps_frequency_srm(void *arg) goto err_vma; } - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, false); if (!err) err = i915_vma_move_to_active(vma, rq, 0); @@ -789,7 +795,6 @@ int live_rps_frequency_srm(void *arg) err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, 0); - i915_vma_unlock(vma); i915_request_add(rq); if (err) goto err_vma; @@ -841,7 +846,7 @@ int live_rps_frequency_srm(void *arg) f = act; /* may skip ahead [pcu granularity] */ } - err = -EINVAL; + err = -EINTR; /* ignore error, continue on with test */ } err_vma: @@ -849,6 +854,7 @@ err_vma: i915_gem_object_flush_map(vma->obj); i915_gem_object_unpin_map(vma->obj); i915_vma_unpin(vma); + i915_vma_unlock(vma); i915_vma_put(vma); st_engine_heartbeat_enable(engine); diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index fb5b7d3498a6..19c2cb166e7c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -72,7 +72,7 @@ static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, unsigned long cacheline; int err; - tl = intel_timeline_create(state->gt, NULL); + tl = intel_timeline_create(state->gt); if (IS_ERR(tl)) return PTR_ERR(tl); @@ -158,7 +158,7 @@ out: __mock_hwsp_record(&state, na, NULL); kfree(state.history); err_put: - drm_dev_put(&i915->drm); + mock_destroy_device(i915); return err; } @@ -455,7 +455,7 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value) struct i915_request *rq; int err; - err = intel_timeline_pin(tl); + err = intel_timeline_pin(tl, NULL); if (err) { rq = ERR_PTR(err); goto out; @@ -487,11 +487,11 @@ checked_intel_timeline_create(struct intel_gt *gt) { struct intel_timeline *tl; - tl = intel_timeline_create(gt, NULL); + tl = intel_timeline_create(gt); if (IS_ERR(tl)) return tl; - if (*tl->hwsp_seqno != tl->seqno) { + if (READ_ONCE(*tl->hwsp_seqno) != tl->seqno) { pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n", *tl->hwsp_seqno, tl->seqno); intel_timeline_put(tl); @@ -561,9 +561,9 @@ static int live_hwsp_engine(void *arg) for (n = 0; n < count; n++) { struct intel_timeline *tl = timelines[n]; - if (!err && *tl->hwsp_seqno != n) { - pr_err("Invalid seqno stored in timeline %lu @ %x, found 0x%x\n", - n, tl->hwsp_offset, *tl->hwsp_seqno); + if (!err && READ_ONCE(*tl->hwsp_seqno) != n) { + GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n", + n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno); GEM_TRACE_DUMP(); err = -EINVAL; } @@ -633,9 +633,9 @@ out: for (n = 0; n < count; n++) { struct intel_timeline *tl = timelines[n]; - if (!err && *tl->hwsp_seqno != n) { - pr_err("Invalid seqno stored in timeline %lu @ %x, found 0x%x\n", - n, tl->hwsp_offset, *tl->hwsp_seqno); + if (!err && READ_ONCE(*tl->hwsp_seqno) != n) { + GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n", + n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno); GEM_TRACE_DUMP(); err = -EINVAL; } @@ -660,14 +660,14 @@ static int live_hwsp_wrap(void *arg) * foreign GPU references. */ - tl = intel_timeline_create(gt, NULL); + tl = intel_timeline_create(gt); if (IS_ERR(tl)) return PTR_ERR(tl); if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline) goto out_free; - err = intel_timeline_pin(tl); + err = intel_timeline_pin(tl, NULL); if (err) goto out_free; @@ -733,7 +733,8 @@ static int live_hwsp_wrap(void *arg) goto out; } - if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) { + if (READ_ONCE(*hwsp_seqno[0]) != seqno[0] || + READ_ONCE(*hwsp_seqno[1]) != seqno[1]) { pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n", *hwsp_seqno[0], *hwsp_seqno[1], seqno[0], seqno[1]); @@ -966,9 +967,10 @@ static int live_hwsp_recycle(void *arg) break; } - if (*tl->hwsp_seqno != count) { - pr_err("Invalid seqno stored in timeline %lu @ tl->hwsp_offset, found 0x%x\n", - count, *tl->hwsp_seqno); + if (READ_ONCE(*tl->hwsp_seqno) != count) { + GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x found 0x%x\n", + count, tl->fence_context, + tl->hwsp_offset, *tl->hwsp_seqno); GEM_TRACE_DUMP(); err = -EINVAL; } diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index febc9e6692ba..61a0532d0f3d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -214,7 +214,7 @@ static int check_whitelist(struct i915_gem_context *ctx, return PTR_ERR(results); err = 0; - i915_gem_object_lock(results); + i915_gem_object_lock(results, NULL); intel_wedge_on_timeout(&wedge, engine->gt, HZ / 5) /* safety net! */ err = i915_gem_object_set_to_cpu_domain(results, false); i915_gem_object_unlock(results); diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c index 43c7acbdc79d..f011ea42487e 100644 --- a/drivers/gpu/drm/i915/gt/shmem_utils.c +++ b/drivers/gpu/drm/i915/gt/shmem_utils.c @@ -49,80 +49,40 @@ struct file *shmem_create_from_object(struct drm_i915_gem_object *obj) return file; } -static size_t shmem_npte(struct file *file) -{ - return file->f_mapping->host->i_size >> PAGE_SHIFT; -} - -static void __shmem_unpin_map(struct file *file, void *ptr, size_t n_pte) -{ - unsigned long pfn; - - vunmap(ptr); - - for (pfn = 0; pfn < n_pte; pfn++) { - struct page *page; - - page = shmem_read_mapping_page_gfp(file->f_mapping, pfn, - GFP_KERNEL); - if (!WARN_ON(IS_ERR(page))) { - put_page(page); - put_page(page); - } - } -} - void *shmem_pin_map(struct file *file) { - const size_t n_pte = shmem_npte(file); - pte_t *stack[32], **ptes, **mem; - struct vm_struct *area; - unsigned long pfn; - - mem = stack; - if (n_pte > ARRAY_SIZE(stack)) { - mem = kvmalloc_array(n_pte, sizeof(*mem), GFP_KERNEL); - if (!mem) - return NULL; - } + struct page **pages; + size_t n_pages, i; + void *vaddr; - area = alloc_vm_area(n_pte << PAGE_SHIFT, mem); - if (!area) { - if (mem != stack) - kvfree(mem); + n_pages = file->f_mapping->host->i_size >> PAGE_SHIFT; + pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL); + if (!pages) return NULL; - } - ptes = mem; - for (pfn = 0; pfn < n_pte; pfn++) { - struct page *page; - - page = shmem_read_mapping_page_gfp(file->f_mapping, pfn, - GFP_KERNEL); - if (IS_ERR(page)) + for (i = 0; i < n_pages; i++) { + pages[i] = shmem_read_mapping_page_gfp(file->f_mapping, i, + GFP_KERNEL); + if (IS_ERR(pages[i])) goto err_page; - - **ptes++ = mk_pte(page, PAGE_KERNEL); } - if (mem != stack) - kvfree(mem); - + vaddr = vmap(pages, n_pages, VM_MAP_PUT_PAGES, PAGE_KERNEL); + if (!vaddr) + goto err_page; mapping_set_unevictable(file->f_mapping); - return area->addr; - + return vaddr; err_page: - if (mem != stack) - kvfree(mem); - - __shmem_unpin_map(file, area->addr, pfn); + while (--i >= 0) + put_page(pages[i]); + kvfree(pages); return NULL; } void shmem_unpin_map(struct file *file, void *ptr) { mapping_clear_unevictable(file->f_mapping); - __shmem_unpin_map(file, ptr, shmem_npte(file)); + vfree(ptr); } static int __shmem_rw(struct file *file, loff_t off, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 861657897c0f..942c7c187adb 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -677,7 +677,7 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) goto err; flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); - ret = i915_ggtt_pin(vma, 0, flags); + ret = i915_ggtt_pin(vma, NULL, 0, flags); if (ret) { vma = ERR_PTR(ret); goto err; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 59b27aba15c6..80e8b6c3bc8c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -51,8 +51,8 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * Note that RKL uses the same firmware as TGL. */ #define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ - fw_def(ROCKETLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 12)) \ - fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 12)) \ + fw_def(ROCKETLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 5, 0)) \ + fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 5, 0)) \ fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \ fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 9, 0, 0)) \ fw_def(COMETLAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \ diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c index 072725a448db..ad86c5eb5bba 100644 --- a/drivers/gpu/drm/i915/gvt/cfg_space.c +++ b/drivers/gpu/drm/i915/gvt/cfg_space.c @@ -70,6 +70,7 @@ static void vgpu_pci_cfg_mem_write(struct intel_vgpu *vgpu, unsigned int off, { u8 *cfg_base = vgpu_cfg_space(vgpu); u8 mask, new, old; + pci_power_t pwr; int i = 0; for (; i < bytes && (off + i < sizeof(pci_cfg_space_rw_bmp)); i++) { @@ -91,6 +92,15 @@ static void vgpu_pci_cfg_mem_write(struct intel_vgpu *vgpu, unsigned int off, /* For other configuration space directly copy as it is. */ if (i < bytes) memcpy(cfg_base + off + i, src + i, bytes - i); + + if (off == vgpu->cfg_space.pmcsr_off && vgpu->cfg_space.pmcsr_off) { + pwr = (pci_power_t __force)(*(u16*)(&vgpu_cfg_space(vgpu)[off]) + & PCI_PM_CTRL_STATE_MASK); + if (pwr == PCI_D3hot) + vgpu->d3_entered = true; + gvt_dbg_core("vgpu-%d power status changed to %d\n", + vgpu->id, pwr); + } } /** @@ -366,6 +376,7 @@ void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu, struct intel_gvt *gvt = vgpu->gvt; const struct intel_gvt_device_info *info = &gvt->device_info; u16 *gmch_ctl; + u8 next; memcpy(vgpu_cfg_space(vgpu), gvt->firmware.cfg_space, info->cfg_space_size); @@ -401,6 +412,19 @@ void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu, pci_resource_len(gvt->gt->i915->drm.pdev, 2); memset(vgpu_cfg_space(vgpu) + PCI_ROM_ADDRESS, 0, 4); + + /* PM Support */ + vgpu->cfg_space.pmcsr_off = 0; + if (vgpu_cfg_space(vgpu)[PCI_STATUS] & PCI_STATUS_CAP_LIST) { + next = vgpu_cfg_space(vgpu)[PCI_CAPABILITY_LIST]; + do { + if (vgpu_cfg_space(vgpu)[next + PCI_CAP_LIST_ID] == PCI_CAP_ID_PM) { + vgpu->cfg_space.pmcsr_off = next + PCI_PM_CTRL; + break; + } + next = vgpu_cfg_space(vgpu)[next + PCI_CAP_LIST_NEXT]; + } while (next); + } } /** diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index f1940939260a..16b582cb97ed 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -936,7 +936,7 @@ static int cmd_reg_handler(struct parser_exec_state *s, return -EFAULT; } - if (!intel_gvt_mmio_is_cmd_access(gvt, offset)) { + if (!intel_gvt_mmio_is_cmd_accessible(gvt, offset)) { gvt_vgpu_err("%s access to non-render register (%x)\n", cmd, offset); return -EBADRQC; @@ -976,7 +976,7 @@ static int cmd_reg_handler(struct parser_exec_state *s, * inhibit context will restore with correct values */ if (IS_GEN(s->engine->i915, 9) && - intel_gvt_mmio_is_in_ctx(gvt, offset) && + intel_gvt_mmio_is_sr_in_ctx(gvt, offset) && !strncmp(cmd, "lri", 3)) { intel_gvt_hypervisor_read_gpa(s->vgpu, s->workload->ring_context_gpa + 12, &ctx_sr_ctl, 4); @@ -992,8 +992,6 @@ static int cmd_reg_handler(struct parser_exec_state *s, } } - /* TODO: Update the global mask if this MMIO is a masked-MMIO */ - intel_gvt_mmio_set_cmd_accessed(gvt, offset); return 0; } @@ -1923,6 +1921,7 @@ static int perform_bb_shadow(struct parser_exec_state *s) if (ret) goto err_unmap; + i915_gem_object_unlock(bb->obj); INIT_LIST_HEAD(&bb->list); list_add(&bb->list, &s->workload->shadow_bb); @@ -2982,7 +2981,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx) goto put_obj; } - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); ret = i915_gem_object_set_to_cpu_domain(obj, false); i915_gem_object_unlock(obj); if (ret) { diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 7ba16ddfe75f..d7898e87791f 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -164,7 +164,7 @@ static unsigned char virtual_dp_monitor_edid[GVT_EDID_NUM][EDID_SIZE] = { /* let the virtual display supports DP1.2 */ static u8 dpcd_fix_data[DPCD_HEADER_SIZE] = { - 0x12, 0x014, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + 0x12, 0x014, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; static void emulate_monitor_status_change(struct intel_vgpu *vgpu) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 210016192ce7..a3a4305eda01 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -2501,7 +2501,7 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) return create_scratch_page_tree(vgpu); } -static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu) +void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu) { struct list_head *pos, *n; struct intel_vgpu_mm *mm; diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index 320b8d6ad92f..52d0d88abd86 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -279,4 +279,6 @@ int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, void *p_data, unsigned int bytes); +void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu); + #endif /* _GVT_GTT_H_ */ diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index a4a6db6b7f90..9831361f181e 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -106,6 +106,7 @@ struct intel_vgpu_pci_bar { struct intel_vgpu_cfg_space { unsigned char virtual_cfg_space[PCI_CFG_SPACE_EXP_SIZE]; struct intel_vgpu_pci_bar bar[INTEL_GVT_MAX_BAR_NUM]; + u32 pmcsr_off; }; #define vgpu_cfg_space(vgpu) ((vgpu)->cfg_space.virtual_cfg_space) @@ -198,6 +199,8 @@ struct intel_vgpu { struct intel_vgpu_submission submission; struct radix_tree_root page_track_tree; u32 hws_pga[I915_NUM_ENGINES]; + /* Set on PCI_D3, reset on DMLR, not reflecting the actual PM state */ + bool d3_entered; struct dentry *debugfs; @@ -253,11 +256,11 @@ struct intel_gvt_mmio { /* This reg has been accessed by a VM */ #define F_ACCESSED (1 << 4) /* This reg has been accessed through GPU commands */ -#define F_CMD_ACCESSED (1 << 5) -/* This reg could be accessed by unaligned address */ #define F_UNALIGN (1 << 6) -/* This reg is saved/restored in context */ -#define F_IN_CTX (1 << 7) +/* This reg is in GVT's mmio save-restor list and in hardware + * logical context image + */ +#define F_SR_IN_CTX (1 << 7) struct gvt_mmio_block *mmio_block; unsigned int num_mmio_block; @@ -594,39 +597,42 @@ static inline void intel_gvt_mmio_set_accessed( } /** - * intel_gvt_mmio_is_cmd_accessed - mark a MMIO could be accessed by command + * intel_gvt_mmio_is_cmd_accessible - if a MMIO could be accessed by command * @gvt: a GVT device * @offset: register offset * + * Returns: + * True if an MMIO is able to be accessed by GPU commands */ -static inline bool intel_gvt_mmio_is_cmd_access( +static inline bool intel_gvt_mmio_is_cmd_accessible( struct intel_gvt *gvt, unsigned int offset) { return gvt->mmio.mmio_attribute[offset >> 2] & F_CMD_ACCESS; } /** - * intel_gvt_mmio_is_unalign - mark a MMIO could be accessed unaligned + * intel_gvt_mmio_set_cmd_accessible - + * mark a MMIO could be accessible by command * @gvt: a GVT device * @offset: register offset * */ -static inline bool intel_gvt_mmio_is_unalign( +static inline void intel_gvt_mmio_set_cmd_accessible( struct intel_gvt *gvt, unsigned int offset) { - return gvt->mmio.mmio_attribute[offset >> 2] & F_UNALIGN; + gvt->mmio.mmio_attribute[offset >> 2] |= F_CMD_ACCESS; } /** - * intel_gvt_mmio_set_cmd_accessed - mark a MMIO has been accessed by command + * intel_gvt_mmio_is_unalign - mark a MMIO could be accessed unaligned * @gvt: a GVT device * @offset: register offset * */ -static inline void intel_gvt_mmio_set_cmd_accessed( +static inline bool intel_gvt_mmio_is_unalign( struct intel_gvt *gvt, unsigned int offset) { - gvt->mmio.mmio_attribute[offset >> 2] |= F_CMD_ACCESSED; + return gvt->mmio.mmio_attribute[offset >> 2] & F_UNALIGN; } /** @@ -645,30 +651,33 @@ static inline bool intel_gvt_mmio_has_mode_mask( } /** - * intel_gvt_mmio_is_in_ctx - check if a MMIO has in-ctx mask + * intel_gvt_mmio_is_sr_in_ctx - + * check if an MMIO has F_SR_IN_CTX mask * @gvt: a GVT device * @offset: register offset * * Returns: - * True if a MMIO has a in-context mask, false if it isn't. + * True if an MMIO has an F_SR_IN_CTX mask, false if it isn't. * */ -static inline bool intel_gvt_mmio_is_in_ctx( +static inline bool intel_gvt_mmio_is_sr_in_ctx( struct intel_gvt *gvt, unsigned int offset) { - return gvt->mmio.mmio_attribute[offset >> 2] & F_IN_CTX; + return gvt->mmio.mmio_attribute[offset >> 2] & F_SR_IN_CTX; } /** - * intel_gvt_mmio_set_in_ctx - mask a MMIO in logical context + * intel_gvt_mmio_set_sr_in_ctx - + * mask an MMIO in GVT's mmio save-restore list and also + * in hardware logical context image * @gvt: a GVT device * @offset: register offset * */ -static inline void intel_gvt_mmio_set_in_ctx( +static inline void intel_gvt_mmio_set_sr_in_ctx( struct intel_gvt *gvt, unsigned int offset) { - gvt->mmio.mmio_attribute[offset >> 2] |= F_IN_CTX; + gvt->mmio.mmio_attribute[offset >> 2] |= F_SR_IN_CTX; } void intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu); diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 63bba7b4bb2f..eb342a759943 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1226,7 +1226,7 @@ static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification) switch (notification) { case VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE: root_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY; - /* fall through */ + fallthrough; case VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE: mm = intel_vgpu_get_ppgtt_mm(vgpu, root_entry_type, pdps); return PTR_ERR_OR_ZERO(mm); @@ -1489,7 +1489,8 @@ static int hws_pga_write(struct intel_vgpu *vgpu, unsigned int offset, const struct intel_engine_cs *engine = intel_gvt_render_mmio_to_engine(vgpu->gvt, offset); - if (!intel_gvt_ggtt_validate_range(vgpu, value, I915_GTT_PAGE_SIZE)) { + if (value != 0 && + !intel_gvt_ggtt_validate_range(vgpu, value, I915_GTT_PAGE_SIZE)) { gvt_vgpu_err("write invalid HWSP address, reg:0x%x, value:0x%x\n", offset, value); return -EINVAL; @@ -1650,6 +1651,34 @@ static int edp_psr_imr_iir_write(struct intel_vgpu *vgpu, return 0; } +/** + * FixMe: + * If guest fills non-priv batch buffer on ApolloLake/Broxton as Mesa i965 did: + * 717e7539124d (i965: Use a WC map and memcpy for the batch instead of pwrite.) + * Due to the missing flush of bb filled by VM vCPU, host GPU hangs on executing + * these MI_BATCH_BUFFER. + * Temporarily workaround this by setting SNOOP bit for PAT3 used by PPGTT + * PML4 PTE: PAT(0) PCD(1) PWT(1). + * The performance is still expected to be low, will need further improvement. + */ +static int bxt_ppat_low_write(struct intel_vgpu *vgpu, unsigned int offset, + void *p_data, unsigned int bytes) +{ + u64 pat = + GEN8_PPAT(0, CHV_PPAT_SNOOP) | + GEN8_PPAT(1, 0) | + GEN8_PPAT(2, 0) | + GEN8_PPAT(3, CHV_PPAT_SNOOP) | + GEN8_PPAT(4, CHV_PPAT_SNOOP) | + GEN8_PPAT(5, CHV_PPAT_SNOOP) | + GEN8_PPAT(6, CHV_PPAT_SNOOP) | + GEN8_PPAT(7, CHV_PPAT_SNOOP); + + vgpu_vreg(vgpu, offset) = lower_32_bits(pat); + + return 0; +} + static int guc_status_read(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) @@ -1892,7 +1921,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) struct drm_i915_private *dev_priv = gvt->gt->i915; int ret; - MMIO_RING_DFH(RING_IMR, D_ALL, F_CMD_ACCESS, NULL, + MMIO_RING_DFH(RING_IMR, D_ALL, 0, NULL, intel_vgpu_reg_imr_handler); MMIO_DFH(SDEIMR, D_ALL, 0, NULL, intel_vgpu_reg_imr_handler); @@ -1900,7 +1929,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DFH(SDEIIR, D_ALL, 0, NULL, intel_vgpu_reg_iir_handler); MMIO_D(SDEISR, D_ALL); - MMIO_RING_DFH(RING_HWSTAM, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_DFH(RING_HWSTAM, D_ALL, 0, NULL, NULL); + MMIO_DH(GEN8_GAMW_ECO_DEV_RW_IA, D_BDW_PLUS, NULL, gamw_echo_dev_rw_ia_write); @@ -1927,11 +1957,11 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_GM_RDR(_MMIO(0x12198), D_ALL, NULL, NULL); MMIO_D(GEN7_CXT_SIZE, D_ALL); - MMIO_RING_DFH(RING_TAIL, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_RING_DFH(RING_HEAD, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_RING_DFH(RING_CTL, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_RING_DFH(RING_ACTHD, D_ALL, F_CMD_ACCESS, mmio_read_from_hw, NULL); - MMIO_RING_GM_RDR(RING_START, D_ALL, NULL, NULL); + MMIO_RING_DFH(RING_TAIL, D_ALL, 0, NULL, NULL); + MMIO_RING_DFH(RING_HEAD, D_ALL, 0, NULL, NULL); + MMIO_RING_DFH(RING_CTL, D_ALL, 0, NULL, NULL); + MMIO_RING_DFH(RING_ACTHD, D_ALL, 0, mmio_read_from_hw, NULL); + MMIO_RING_GM(RING_START, D_ALL, NULL, NULL); /* RING MODE */ #define RING_REG(base) _MMIO((base) + 0x29c) @@ -2686,7 +2716,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DFH(_MMIO(0x4094), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(ARB_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_RING_GM_RDR(RING_BBADDR, D_ALL, NULL, NULL); + MMIO_RING_GM(RING_BBADDR, D_ALL, NULL, NULL); MMIO_DFH(_MMIO(0x2220), D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(_MMIO(0x12220), D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(_MMIO(0x22220), D_ALL, F_CMD_ACCESS, NULL, NULL); @@ -2771,7 +2801,7 @@ static int init_bdw_mmio_info(struct intel_gvt *gvt) MMIO_DH(GEN8_MASTER_IRQ, D_BDW_PLUS, NULL, intel_vgpu_reg_master_irq_handler); - MMIO_RING_DFH(RING_ACTHD_UDW, D_BDW_PLUS, F_CMD_ACCESS, + MMIO_RING_DFH(RING_ACTHD_UDW, D_BDW_PLUS, 0, mmio_read_from_hw, NULL); #define RING_REG(base) _MMIO((base) + 0xd0) @@ -2785,7 +2815,7 @@ static int init_bdw_mmio_info(struct intel_gvt *gvt) #undef RING_REG #define RING_REG(base) _MMIO((base) + 0x234) - MMIO_RING_F(RING_REG, 8, F_RO | F_CMD_ACCESS, 0, ~0, D_BDW_PLUS, + MMIO_RING_F(RING_REG, 8, F_RO, 0, ~0, D_BDW_PLUS, NULL, NULL); #undef RING_REG @@ -2811,7 +2841,7 @@ static int init_bdw_mmio_info(struct intel_gvt *gvt) MMIO_DH(GEN6_PCODE_MAILBOX, D_BDW_PLUS, NULL, mailbox_write); - MMIO_D(GEN8_PRIVATE_PAT_LO, D_BDW_PLUS); + MMIO_D(GEN8_PRIVATE_PAT_LO, D_BDW_PLUS & ~D_BXT); MMIO_D(GEN8_PRIVATE_PAT_HI, D_BDW_PLUS); MMIO_D(GAMTARBMODE, D_BDW_PLUS); @@ -2820,7 +2850,7 @@ static int init_bdw_mmio_info(struct intel_gvt *gvt) MMIO_RING_F(RING_REG, 32, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, NULL, NULL); #undef RING_REG - MMIO_RING_GM_RDR(RING_HWS_PGA, D_BDW_PLUS, NULL, hws_pga_write); + MMIO_RING_GM(RING_HWS_PGA, D_BDW_PLUS, NULL, hws_pga_write); MMIO_DFH(HDC_CHICKEN0, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); @@ -2921,7 +2951,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_D(GEN9_RENDER_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_DFH(GEN9_GAMT_ECO_REG_RW_IA, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DH(MMCD_MISC_CTRL, D_SKL_PLUS, NULL, NULL); + MMIO_DFH(MMCD_MISC_CTRL, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_DH(CHICKEN_PAR1_1, D_SKL_PLUS, NULL, NULL); MMIO_D(DC_STATE_EN, D_SKL_PLUS); MMIO_D(DC_STATE_DEBUG, D_SKL_PLUS); @@ -3137,8 +3167,8 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_DFH(GEN9_WM_CHICKEN3, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_D(GAMT_CHKN_BIT_REG, D_KBL | D_CFL); - MMIO_D(GEN9_CTX_PREEMPT_REG, D_SKL_PLUS); + MMIO_DFH(GAMT_CHKN_BIT_REG, D_KBL | D_CFL, F_CMD_ACCESS, NULL, NULL); + MMIO_D(GEN9_CTX_PREEMPT_REG, D_SKL_PLUS & ~D_BXT); return 0; } @@ -3312,9 +3342,21 @@ static int init_bxt_mmio_info(struct intel_gvt *gvt) MMIO_D(GEN8_PUSHBUS_SHIFT, D_BXT); MMIO_D(GEN6_GFXPAUSE, D_BXT); MMIO_DFH(GEN8_L3SQCREG1, D_BXT, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(GEN8_L3CNTLREG, D_BXT, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x20D8), D_BXT, F_CMD_ACCESS, NULL, NULL); + MMIO_F(GEN8_RING_CS_GPR(RENDER_RING_BASE, 0), 0x40, F_CMD_ACCESS, + 0, 0, D_BXT, NULL, NULL); + MMIO_F(GEN8_RING_CS_GPR(GEN6_BSD_RING_BASE, 0), 0x40, F_CMD_ACCESS, + 0, 0, D_BXT, NULL, NULL); + MMIO_F(GEN8_RING_CS_GPR(BLT_RING_BASE, 0), 0x40, F_CMD_ACCESS, + 0, 0, D_BXT, NULL, NULL); + MMIO_F(GEN8_RING_CS_GPR(VEBOX_RING_BASE, 0), 0x40, F_CMD_ACCESS, + 0, 0, D_BXT, NULL, NULL); MMIO_DFH(GEN9_CTX_PREEMPT_REG, D_BXT, F_CMD_ACCESS, NULL, NULL); + MMIO_DH(GEN8_PRIVATE_PAT_LO, D_BXT, NULL, bxt_ppat_low_write); + return 0; } @@ -3357,7 +3399,10 @@ void intel_gvt_clean_mmio_info(struct intel_gvt *gvt) gvt->mmio.mmio_attribute = NULL; } -/* Special MMIO blocks. */ +/* Special MMIO blocks. registers in MMIO block ranges should not be command + * accessible (should have no F_CMD_ACCESS flag). + * otherwise, need to update cmd_reg_handler in cmd_parser.c + */ static struct gvt_mmio_block mmio_blocks[] = { {D_SKL_PLUS, _MMIO(CSR_MMIO_START_RANGE), 0x3000, NULL, NULL}, {D_ALL, _MMIO(MCHBAR_MIRROR_BASE_SNB), 0x40000, NULL, NULL}, diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index ad8a9df49f29..778eb8cab610 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -829,8 +829,10 @@ static int intel_vgpu_open(struct mdev_device *mdev) /* Take a module reference as mdev core doesn't take * a reference for vendor driver. */ - if (!try_module_get(THIS_MODULE)) + if (!try_module_get(THIS_MODULE)) { + ret = -ENODEV; goto undo_group; + } ret = kvmgt_guest_init(mdev); if (ret) diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 291993615af9..b6811f6a230d 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -251,6 +251,9 @@ void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu, bool dmlr) /* set the bit 0:2(Core C-State ) to C0 */ vgpu_vreg_t(vgpu, GEN6_GT_CORE_STATUS) = 0; + /* uc reset hw expect GS_MIA_IN_RESET */ + vgpu_vreg_t(vgpu, GUC_STATUS) |= GS_MIA_IN_RESET; + if (IS_BROXTON(vgpu->gvt->gt->i915)) { vgpu_vreg_t(vgpu, BXT_P_CR_GT_DISP_PWRON) &= ~(BIT(0) | BIT(1)); diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 86a60bdf0818..afe574d6b3b5 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -595,7 +595,7 @@ void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt) i915_mmio_reg_valid(mmio->reg); mmio++) { if (mmio->in_context) { gvt->engine_mmio_list.ctx_mmio_count[mmio->id]++; - intel_gvt_mmio_set_in_ctx(gvt, mmio->reg.reg); + intel_gvt_mmio_set_sr_in_ctx(gvt, mmio->reg.reg); } } } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 3c3b9842bbbd..aed2ef6466a2 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -403,6 +403,14 @@ static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) wa_ctx->indirect_ctx.shadow_va = NULL; } +static void set_dma_address(struct i915_page_directory *pd, dma_addr_t addr) +{ + struct scatterlist *sg = pd->pt.base->mm.pages->sgl; + + /* This is not a good idea */ + sg->dma_address = addr; +} + static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, struct intel_context *ce) { @@ -411,7 +419,7 @@ static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, int i = 0; if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { - px_dma(ppgtt->pd) = mm->ppgtt_mm.shadow_pdps[0]; + set_dma_address(ppgtt->pd, mm->ppgtt_mm.shadow_pdps[0]); } else { for (i = 0; i < GVT_RING_CTX_NR_PDPS; i++) { struct i915_page_directory * const pd = @@ -421,7 +429,8 @@ static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, shadow ppgtt. */ if (!pd) break; - px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i]; + + set_dma_address(pd, mm->ppgtt_mm.shadow_pdps[i]); } } } @@ -1240,13 +1249,13 @@ i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s, int i; if (i915_vm_is_4lvl(&ppgtt->vm)) { - px_dma(ppgtt->pd) = s->i915_context_pml4; + set_dma_address(ppgtt->pd, s->i915_context_pml4); } else { for (i = 0; i < GEN8_3LVL_PDPES; i++) { struct i915_page_directory * const pd = i915_pd_entry(ppgtt->pd, i); - px_dma(pd) = s->i915_context_pdps[i]; + set_dma_address(pd, s->i915_context_pdps[i]); } } } @@ -1268,7 +1277,7 @@ void intel_vgpu_clean_submission(struct intel_vgpu *vgpu) i915_context_ppgtt_root_restore(s, i915_vm_to_ppgtt(s->shadow[0]->vm)); for_each_engine(engine, vgpu->gvt->gt, id) - intel_context_unpin(s->shadow[id]); + intel_context_put(s->shadow[id]); kmem_cache_destroy(s->workloads); } @@ -1360,11 +1369,6 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) ce->ring = __intel_context_ring_size(ring_size); } - ret = intel_context_pin(ce); - intel_context_put(ce); - if (ret) - goto out_shadow_ctx; - s->shadow[i] = ce; } @@ -1396,7 +1400,6 @@ out_shadow_ctx: if (IS_ERR(s->shadow[i])) break; - intel_context_unpin(s->shadow[i]); intel_context_put(s->shadow[i]); } i915_vm_put(&ppgtt->vm); @@ -1470,6 +1473,7 @@ void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu_submission *s = &workload->vgpu->submission; + intel_context_unpin(s->shadow[workload->engine->id]); release_shadow_batch_buffer(workload); release_shadow_wa_ctx(&workload->wa_ctx); @@ -1715,6 +1719,12 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, return ERR_PTR(ret); } + ret = intel_context_pin(s->shadow[engine->id]); + if (ret) { + intel_vgpu_destroy_workload(workload); + return ERR_PTR(ret); + } + return workload; } diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 7d361623ff67..399582aeeefb 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -257,6 +257,7 @@ void intel_gvt_release_vgpu(struct intel_vgpu *vgpu) intel_gvt_deactivate_vgpu(vgpu); mutex_lock(&vgpu->vgpu_lock); + vgpu->d3_entered = false; intel_vgpu_clean_workloads(vgpu, ALL_ENGINES); intel_vgpu_dmabuf_cleanup(vgpu); mutex_unlock(&vgpu->vgpu_lock); @@ -367,6 +368,7 @@ void intel_gvt_destroy_idle_vgpu(struct intel_vgpu *vgpu) static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, struct intel_vgpu_creation_params *param) { + struct drm_i915_private *dev_priv = gvt->gt->i915; struct intel_vgpu *vgpu; int ret; @@ -393,6 +395,7 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, INIT_RADIX_TREE(&vgpu->page_track_tree, GFP_KERNEL); idr_init(&vgpu->object_idr); intel_vgpu_init_cfg_space(vgpu, param->primary); + vgpu->d3_entered = false; ret = intel_vgpu_init_mmio(vgpu); if (ret) @@ -434,7 +437,11 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (ret) goto out_clean_sched_policy; - ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_D); + if (IS_BROADWELL(dev_priv)) + ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_B); + /* FixMe: Re-enable APL/BXT once vfio_edid enabled */ + else if (!IS_BROXTON(dev_priv)) + ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_D); if (ret) goto out_clean_sched_policy; @@ -557,10 +564,15 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, /* full GPU reset or device model level reset */ if (engine_mask == ALL_ENGINES || dmlr) { intel_vgpu_select_submission_ops(vgpu, ALL_ENGINES, 0); - intel_vgpu_invalidate_ppgtt(vgpu); + if (engine_mask == ALL_ENGINES) + intel_vgpu_invalidate_ppgtt(vgpu); /*fence will not be reset during virtual reset */ if (dmlr) { - intel_vgpu_reset_gtt(vgpu); + if(!vgpu->d3_entered) { + intel_vgpu_invalidate_ppgtt(vgpu); + intel_vgpu_destroy_all_ppgtt_mm(vgpu); + } + intel_vgpu_reset_ggtt(vgpu, true); intel_vgpu_reset_resource(vgpu); } @@ -572,7 +584,14 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, intel_vgpu_reset_cfg_space(vgpu); /* only reset the failsafe mode when dmlr reset */ vgpu->failsafe = false; - vgpu->pv_notified = false; + /* + * PCI_D0 is set before dmlr, so reset d3_entered here + * after done using. + */ + if(vgpu->d3_entered) + vgpu->d3_entered = false; + else + vgpu->pv_notified = false; } } diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index d960d0be5bd2..10a865f3dc09 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -28,12 +28,14 @@ static struct i915_global_active { } global; struct active_node { + struct rb_node node; struct i915_active_fence base; struct i915_active *ref; - struct rb_node node; u64 timeline; }; +#define fetch_node(x) rb_entry(READ_ONCE(x), typeof(struct active_node), node) + static inline struct active_node * node_from_active(struct i915_active_fence *active) { @@ -81,7 +83,7 @@ static void *active_debug_hint(void *addr) return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref; } -static struct debug_obj_descr active_debug_desc = { +static const struct debug_obj_descr active_debug_desc = { .name = "i915_active", .debug_hint = active_debug_hint, }; @@ -128,8 +130,8 @@ static inline void debug_active_assert(struct i915_active *ref) { } static void __active_retire(struct i915_active *ref) { + struct rb_root root = RB_ROOT; struct active_node *it, *n; - struct rb_root root; unsigned long flags; GEM_BUG_ON(i915_active_is_idle(ref)); @@ -141,9 +143,25 @@ __active_retire(struct i915_active *ref) GEM_BUG_ON(rcu_access_pointer(ref->excl.fence)); debug_active_deactivate(ref); - root = ref->tree; - ref->tree = RB_ROOT; - ref->cache = NULL; + /* Even if we have not used the cache, we may still have a barrier */ + if (!ref->cache) + ref->cache = fetch_node(ref->tree.rb_node); + + /* Keep the MRU cached node for reuse */ + if (ref->cache) { + /* Discard all other nodes in the tree */ + rb_erase(&ref->cache->node, &ref->tree); + root = ref->tree; + + /* Rebuild the tree with only the cached node */ + rb_link_node(&ref->cache->node, NULL, &ref->tree.rb_node); + rb_insert_color(&ref->cache->node, &ref->tree); + GEM_BUG_ON(ref->tree.rb_node != &ref->cache->node); + + /* Make the cached node available for reuse with any timeline */ + if (IS_ENABLED(CONFIG_64BIT)) + ref->cache->timeline = 0; /* needs cmpxchg(u64) */ + } spin_unlock_irqrestore(&ref->tree_lock, flags); @@ -154,6 +172,7 @@ __active_retire(struct i915_active *ref) /* ... except if you wait on it, you must manage your own references! */ wake_up_var(ref); + /* Finally free the discarded timeline tree */ rbtree_postorder_for_each_entry_safe(it, n, &root, node) { GEM_BUG_ON(i915_active_fence_isset(&it->base)); kmem_cache_free(global.slab_cache, it); @@ -216,12 +235,11 @@ excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb) active_retire(container_of(cb, struct i915_active, excl.cb)); } -static struct i915_active_fence * -active_instance(struct i915_active *ref, struct intel_timeline *tl) +static struct active_node *__active_lookup(struct i915_active *ref, u64 idx) { - struct active_node *node, *prealloc; - struct rb_node **p, *parent; - u64 idx = tl->fence_context; + struct active_node *it; + + GEM_BUG_ON(idx == 0); /* 0 is the unordered timeline, rsvd for cache */ /* * We track the most recently used timeline to skip a rbtree search @@ -230,8 +248,59 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl) * after the previous activity has been retired, or if it matches the * current timeline. */ - node = READ_ONCE(ref->cache); - if (node && node->timeline == idx) + it = READ_ONCE(ref->cache); + if (it) { + u64 cached = READ_ONCE(it->timeline); + + /* Once claimed, this slot will only belong to this idx */ + if (cached == idx) + return it; + +#ifdef CONFIG_64BIT /* for cmpxchg(u64) */ + /* + * An unclaimed cache [.timeline=0] can only be claimed once. + * + * If the value is already non-zero, some other thread has + * claimed the cache and we know that is does not match our + * idx. If, and only if, the timeline is currently zero is it + * worth competing to claim it atomically for ourselves (for + * only the winner of that race will cmpxchg return the old + * value of 0). + */ + if (!cached && !cmpxchg(&it->timeline, 0, idx)) + return it; +#endif + } + + BUILD_BUG_ON(offsetof(typeof(*it), node)); + + /* While active, the tree can only be built; not destroyed */ + GEM_BUG_ON(i915_active_is_idle(ref)); + + it = fetch_node(ref->tree.rb_node); + while (it) { + if (it->timeline < idx) { + it = fetch_node(it->node.rb_right); + } else if (it->timeline > idx) { + it = fetch_node(it->node.rb_left); + } else { + WRITE_ONCE(ref->cache, it); + break; + } + } + + /* NB: If the tree rotated beneath us, we may miss our target. */ + return it; +} + +static struct i915_active_fence * +active_instance(struct i915_active *ref, u64 idx) +{ + struct active_node *node, *prealloc; + struct rb_node **p, *parent; + + node = __active_lookup(ref, idx); + if (likely(node)) return &node->base; /* Preallocate a replacement, just in case */ @@ -268,10 +337,9 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl) rb_insert_color(&node->node, &ref->tree); out: - ref->cache = node; + WRITE_ONCE(ref->cache, node); spin_unlock_irq(&ref->tree_lock); - BUILD_BUG_ON(offsetof(typeof(*node), base)); return &node->base; } @@ -353,69 +421,116 @@ __active_del_barrier(struct i915_active *ref, struct active_node *node) return ____active_del_barrier(ref, node, barrier_to_engine(node)); } -int i915_active_ref(struct i915_active *ref, - struct intel_timeline *tl, - struct dma_fence *fence) +static bool +replace_barrier(struct i915_active *ref, struct i915_active_fence *active) +{ + if (!is_barrier(active)) /* proto-node used by our idle barrier? */ + return false; + + /* + * This request is on the kernel_context timeline, and so + * we can use it to substitute for the pending idle-barrer + * request that we want to emit on the kernel_context. + */ + __active_del_barrier(ref, node_from_active(active)); + return true; +} + +int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence) { struct i915_active_fence *active; int err; - lockdep_assert_held(&tl->mutex); - /* Prevent reaping in case we malloc/wait while building the tree */ err = i915_active_acquire(ref); if (err) return err; - active = active_instance(ref, tl); + active = active_instance(ref, idx); if (!active) { err = -ENOMEM; goto out; } - if (is_barrier(active)) { /* proto-node used by our idle barrier */ - /* - * This request is on the kernel_context timeline, and so - * we can use it to substitute for the pending idle-barrer - * request that we want to emit on the kernel_context. - */ - __active_del_barrier(ref, node_from_active(active)); + if (replace_barrier(ref, active)) { RCU_INIT_POINTER(active->fence, NULL); atomic_dec(&ref->count); } if (!__i915_active_fence_set(active, fence)) - atomic_inc(&ref->count); + __i915_active_acquire(ref); out: i915_active_release(ref); return err; } -struct dma_fence * -i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f) +static struct dma_fence * +__i915_active_set_fence(struct i915_active *ref, + struct i915_active_fence *active, + struct dma_fence *fence) { struct dma_fence *prev; - /* We expect the caller to manage the exclusive timeline ordering */ - GEM_BUG_ON(i915_active_is_idle(ref)); + if (replace_barrier(ref, active)) { + RCU_INIT_POINTER(active->fence, fence); + return NULL; + } rcu_read_lock(); - prev = __i915_active_fence_set(&ref->excl, f); + prev = __i915_active_fence_set(active, fence); if (prev) prev = dma_fence_get_rcu(prev); else - atomic_inc(&ref->count); + __i915_active_acquire(ref); rcu_read_unlock(); return prev; } +static struct i915_active_fence * +__active_fence(struct i915_active *ref, u64 idx) +{ + struct active_node *it; + + it = __active_lookup(ref, idx); + if (unlikely(!it)) { /* Contention with parallel tree builders! */ + spin_lock_irq(&ref->tree_lock); + it = __active_lookup(ref, idx); + spin_unlock_irq(&ref->tree_lock); + } + GEM_BUG_ON(!it); /* slot must be preallocated */ + + return &it->base; +} + +struct dma_fence * +__i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence) +{ + /* Only valid while active, see i915_active_acquire_for_context() */ + return __i915_active_set_fence(ref, __active_fence(ref, idx), fence); +} + +struct dma_fence * +i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f) +{ + /* We expect the caller to manage the exclusive timeline ordering */ + return __i915_active_set_fence(ref, &ref->excl, f); +} + bool i915_active_acquire_if_busy(struct i915_active *ref) { debug_active_assert(ref); return atomic_add_unless(&ref->count, 1, 0); } +static void __i915_active_activate(struct i915_active *ref) +{ + spin_lock_irq(&ref->tree_lock); /* __active_retire() */ + if (!atomic_fetch_inc(&ref->count)) + debug_active_activate(ref); + spin_unlock_irq(&ref->tree_lock); +} + int i915_active_acquire(struct i915_active *ref) { int err; @@ -423,19 +538,19 @@ int i915_active_acquire(struct i915_active *ref) if (i915_active_acquire_if_busy(ref)) return 0; + if (!ref->active) { + __i915_active_activate(ref); + return 0; + } + err = mutex_lock_interruptible(&ref->mutex); if (err) return err; if (likely(!i915_active_acquire_if_busy(ref))) { - if (ref->active) - err = ref->active(ref); - if (!err) { - spin_lock_irq(&ref->tree_lock); /* __active_retire() */ - debug_active_activate(ref); - atomic_inc(&ref->count); - spin_unlock_irq(&ref->tree_lock); - } + err = ref->active(ref); + if (!err) + __i915_active_activate(ref); } mutex_unlock(&ref->mutex); @@ -443,6 +558,24 @@ int i915_active_acquire(struct i915_active *ref) return err; } +int i915_active_acquire_for_context(struct i915_active *ref, u64 idx) +{ + struct i915_active_fence *active; + int err; + + err = i915_active_acquire(ref); + if (err) + return err; + + active = active_instance(ref, idx); + if (!active) { + i915_active_release(ref); + return -ENOMEM; + } + + return 0; /* return with active ref */ +} + void i915_active_release(struct i915_active *ref) { debug_active_assert(ref); @@ -651,16 +784,16 @@ int i915_sw_fence_await_active(struct i915_sw_fence *fence, return await_active(ref, flags, sw_await_fence, fence, fence); } -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) void i915_active_fini(struct i915_active *ref) { debug_active_fini(ref); GEM_BUG_ON(atomic_read(&ref->count)); GEM_BUG_ON(work_pending(&ref->work)); - GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); mutex_destroy(&ref->mutex); + + if (ref->cache) + kmem_cache_free(global.slab_cache, ref->cache); } -#endif static inline bool is_idle_barrier(struct active_node *node, u64 idx) { @@ -674,7 +807,6 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) if (RB_EMPTY_ROOT(&ref->tree)) return NULL; - spin_lock_irq(&ref->tree_lock); GEM_BUG_ON(i915_active_is_idle(ref)); /* @@ -700,9 +832,9 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) prev = p; if (node->timeline < idx) - p = p->rb_right; + p = READ_ONCE(p->rb_right); else - p = p->rb_left; + p = READ_ONCE(p->rb_left); } /* @@ -739,14 +871,13 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) goto match; } - spin_unlock_irq(&ref->tree_lock); - return NULL; match: + spin_lock_irq(&ref->tree_lock); rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */ if (p == &ref->cache->node) - ref->cache = NULL; + WRITE_ONCE(ref->cache, NULL); spin_unlock_irq(&ref->tree_lock); return rb_entry(p, struct active_node, node); @@ -758,7 +889,6 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, intel_engine_mask_t tmp, mask = engine->mask; struct llist_node *first = NULL, *last = NULL; struct intel_gt *gt = engine->gt; - int err; GEM_BUG_ON(i915_active_is_idle(ref)); @@ -778,13 +908,13 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, struct llist_node *prev = first; struct active_node *node; + rcu_read_lock(); node = reuse_idle_barrier(ref, idx); + rcu_read_unlock(); if (!node) { node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); - if (!node) { - err = ENOMEM; + if (!node) goto unwind; - } RCU_INIT_POINTER(node->base.fence, NULL); node->base.cb.func = node_retire; @@ -804,7 +934,7 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, */ RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN)); node->base.cb.node.prev = (void *)engine; - atomic_inc(&ref->count); + __i915_active_acquire(ref); } GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN)); @@ -832,7 +962,7 @@ unwind: kmem_cache_free(global.slab_cache, node); } - return err; + return -ENOMEM; } void i915_active_acquire_barrier(struct i915_active *ref) diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index cf4058150966..fb165d3f01cf 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -163,14 +163,16 @@ void __i915_active_init(struct i915_active *ref, __i915_active_init(ref, active, retire, &__mkey, &__wkey); \ } while (0) -int i915_active_ref(struct i915_active *ref, - struct intel_timeline *tl, - struct dma_fence *fence); +struct dma_fence * +__i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence); +int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence); static inline int i915_active_add_request(struct i915_active *ref, struct i915_request *rq) { - return i915_active_ref(ref, i915_request_timeline(rq), &rq->fence); + return i915_active_ref(ref, + i915_request_timeline(rq)->fence_context, + &rq->fence); } struct dma_fence * @@ -198,7 +200,9 @@ int i915_request_await_active(struct i915_request *rq, #define I915_ACTIVE_AWAIT_BARRIER BIT(2) int i915_active_acquire(struct i915_active *ref); +int i915_active_acquire_for_context(struct i915_active *ref, u64 idx); bool i915_active_acquire_if_busy(struct i915_active *ref); + void i915_active_release(struct i915_active *ref); static inline void __i915_active_acquire(struct i915_active *ref) @@ -213,11 +217,7 @@ i915_active_is_idle(const struct i915_active *ref) return !atomic_read(&ref->count); } -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) void i915_active_fini(struct i915_active *ref); -#else -static inline void i915_active_fini(struct i915_active *ref) { } -#endif int i915_active_acquire_preallocate_barrier(struct i915_active *ref, struct intel_engine_cs *engine); @@ -231,4 +231,19 @@ struct i915_active *i915_active_create(void); struct i915_active *i915_active_get(struct i915_active *ref); void i915_active_put(struct i915_active *ref); +static inline int __i915_request_await_exclusive(struct i915_request *rq, + struct i915_active *active) +{ + struct dma_fence *fence; + int err = 0; + + fence = i915_active_fence_get(&active->excl); + if (fence) { + err = i915_request_await_dma_fence(rq, fence); + dma_fence_put(fence); + } + + return err; +} + #endif /* _I915_ACTIVE_H_ */ diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 372354d33f55..e88970256e8e 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1136,7 +1136,7 @@ find_reg(const struct intel_engine_cs *engine, u32 addr) /* Returns a vmap'd pointer to dst_obj, which the caller must unmap */ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, struct drm_i915_gem_object *src_obj, - u32 offset, u32 length) + unsigned long offset, unsigned long length) { bool needs_clflush; void *dst, *src; @@ -1166,8 +1166,8 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, } } if (IS_ERR(src)) { + unsigned long x, n; void *ptr; - int x, n; /* * We can avoid clflushing partial cachelines before the write @@ -1184,7 +1184,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, ptr = dst; x = offset_in_page(offset); for (n = offset >> PAGE_SHIFT; length; n++) { - int len = min_t(int, length, PAGE_SIZE - x); + int len = min(length, PAGE_SIZE - x); src = kmap_atomic(i915_gem_object_get_page(src_obj, n)); if (needs_clflush) @@ -1204,6 +1204,12 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, return dst; } +static inline bool cmd_desc_is(const struct drm_i915_cmd_descriptor * const desc, + const u32 cmd) +{ + return desc->cmd.value == (cmd & desc->cmd.mask); +} + static bool check_cmd(const struct intel_engine_cs *engine, const struct drm_i915_cmd_descriptor *desc, const u32 *cmd, u32 length) @@ -1242,19 +1248,19 @@ static bool check_cmd(const struct intel_engine_cs *engine, * allowed mask/value pair given in the whitelist entry. */ if (reg->mask) { - if (desc->cmd.value == MI_LOAD_REGISTER_MEM) { + if (cmd_desc_is(desc, MI_LOAD_REGISTER_MEM)) { DRM_DEBUG("CMD: Rejected LRM to masked register 0x%08X\n", reg_addr); return false; } - if (desc->cmd.value == MI_LOAD_REGISTER_REG) { + if (cmd_desc_is(desc, MI_LOAD_REGISTER_REG)) { DRM_DEBUG("CMD: Rejected LRR to masked register 0x%08X\n", reg_addr); return false; } - if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1) && + if (cmd_desc_is(desc, MI_LOAD_REGISTER_IMM(1)) && (offset + 2 > length || (cmd[offset + 1] & reg->mask) != reg->value)) { DRM_DEBUG("CMD: Rejected LRI to masked register 0x%08X\n", @@ -1408,8 +1414,8 @@ static bool shadow_needs_clflush(struct drm_i915_gem_object *obj) */ int intel_engine_cmd_parser(struct intel_engine_cs *engine, struct i915_vma *batch, - u32 batch_offset, - u32 batch_length, + unsigned long batch_offset, + unsigned long batch_length, struct i915_vma *shadow, bool trampoline) { @@ -1478,7 +1484,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, break; } - if (desc->cmd.value == MI_BATCH_BUFFER_START) { + if (cmd_desc_is(desc, MI_BATCH_BUFFER_START)) { ret = check_bbstart(cmd, offset, length, batch_length, batch_addr, shadow_addr, jump_whitelist); diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 784219962193..ea469168cd44 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -326,6 +326,7 @@ static void print_context_stats(struct seq_file *m, } i915_gem_context_unlock_engines(ctx); + mutex_lock(&ctx->mutex); if (!IS_ERR_OR_NULL(ctx->file_priv)) { struct file_stats stats = { .vm = rcu_access_pointer(ctx->vm), @@ -346,6 +347,7 @@ static void print_context_stats(struct seq_file *m, print_file_stats(m, name, stats); } + mutex_unlock(&ctx->mutex); spin_lock(&i915->gem.contexts.lock); list_safe_reset_next(ctx, cn, link); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 5fd5af4bc855..acc32066cec3 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -58,7 +58,6 @@ #include "display/intel_hotplug.h" #include "display/intel_overlay.h" #include "display/intel_pipe_crc.h" -#include "display/intel_psr.h" #include "display/intel_sprite.h" #include "display/intel_vga.h" @@ -216,125 +215,6 @@ intel_teardown_mchbar(struct drm_i915_private *dev_priv) release_resource(&dev_priv->mch_res); } -/* part #1: call before irq install */ -static int i915_driver_modeset_probe_noirq(struct drm_i915_private *i915) -{ - int ret; - - if (i915_inject_probe_failure(i915)) - return -ENODEV; - - if (HAS_DISPLAY(i915) && INTEL_DISPLAY_ENABLED(i915)) { - ret = drm_vblank_init(&i915->drm, - INTEL_NUM_PIPES(i915)); - if (ret) - return ret; - } - - intel_bios_init(i915); - - ret = intel_vga_register(i915); - if (ret) - goto cleanup_bios; - - intel_power_domains_init_hw(i915, false); - - intel_csr_ucode_init(i915); - - ret = intel_modeset_init_noirq(i915); - if (ret) - goto cleanup_vga_client_pw_domain_csr; - - return 0; - -cleanup_vga_client_pw_domain_csr: - intel_csr_ucode_fini(i915); - intel_power_domains_driver_remove(i915); - intel_vga_unregister(i915); -cleanup_bios: - intel_bios_driver_remove(i915); - return ret; -} - -/* part #2: call after irq install */ -static int i915_driver_modeset_probe(struct drm_i915_private *i915) -{ - int ret; - - /* Important: The output setup functions called by modeset_init need - * working irqs for e.g. gmbus and dp aux transfers. */ - ret = intel_modeset_init(i915); - if (ret) - goto out; - - ret = i915_gem_init(i915); - if (ret) - goto cleanup_modeset; - - intel_overlay_setup(i915); - - if (!HAS_DISPLAY(i915) || !INTEL_DISPLAY_ENABLED(i915)) - return 0; - - ret = intel_fbdev_init(&i915->drm); - if (ret) - goto cleanup_gem; - - /* Only enable hotplug handling once the fbdev is fully set up. */ - intel_hpd_init(i915); - - intel_init_ipc(i915); - - intel_psr_set_force_mode_changed(i915->psr.dp); - - return 0; - -cleanup_gem: - i915_gem_suspend(i915); - i915_gem_driver_remove(i915); - i915_gem_driver_release(i915); -cleanup_modeset: - /* FIXME */ - intel_modeset_driver_remove(i915); - intel_irq_uninstall(i915); - intel_modeset_driver_remove_noirq(i915); -out: - return ret; -} - -/* part #1: call before irq uninstall */ -static void i915_driver_modeset_remove(struct drm_i915_private *i915) -{ - intel_modeset_driver_remove(i915); -} - -/* part #2: call after irq uninstall */ -static void i915_driver_modeset_remove_noirq(struct drm_i915_private *i915) -{ - intel_csr_ucode_fini(i915); - - intel_power_domains_driver_remove(i915); - - intel_vga_unregister(i915); - - intel_bios_driver_remove(i915); -} - -static void intel_init_dpio(struct drm_i915_private *dev_priv) -{ - /* - * IOSF_PORT_DPIO is used for VLV x2 PHY (DP/HDMI B and C), - * CHV x1 PHY (DP/HDMI D) - * IOSF_PORT_DPIO_2 is used for CHV x2 PHY (DP/HDMI B and C) - */ - if (IS_CHERRYVIEW(dev_priv)) { - DPIO_PHY_IOSF_PORT(DPIO_PHY0) = IOSF_PORT_DPIO_2; - DPIO_PHY_IOSF_PORT(DPIO_PHY1) = IOSF_PORT_DPIO; - } else if (IS_VALLEYVIEW(dev_priv)) { - DPIO_PHY_IOSF_PORT(DPIO_PHY0) = IOSF_PORT_DPIO; - } -} - static int i915_workqueues_init(struct drm_i915_private *dev_priv) { /* @@ -392,7 +272,7 @@ static void intel_detect_preproduction_hw(struct drm_i915_private *dev_priv) pre |= IS_HSW_EARLY_SDV(dev_priv); pre |= IS_SKL_REVID(dev_priv, 0, SKL_REVID_F0); pre |= IS_BXT_REVID(dev_priv, 0, BXT_REVID_B_LAST); - pre |= IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0); + pre |= IS_KBL_GT_REVID(dev_priv, 0, KBL_REVID_A0); pre |= IS_GLK_REVID(dev_priv, 0, GLK_REVID_A2); if (pre) { @@ -463,7 +343,6 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) intel_detect_pch(dev_priv); intel_pm_setup(dev_priv); - intel_init_dpio(dev_priv); ret = intel_power_domains_init(dev_priv); if (ret < 0) goto err_gem; @@ -798,7 +677,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) drm_err(&dev_priv->drm, "Failed to register driver for userspace access!\n"); - if (HAS_DISPLAY(dev_priv) && INTEL_DISPLAY_ENABLED(dev_priv)) { + if (HAS_DISPLAY(dev_priv)) { /* Must be done after probing outputs */ intel_opregion_register(dev_priv); acpi_video_register(); @@ -821,7 +700,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) * We need to coordinate the hotplugs with the asynchronous fbdev * configuration, for which we use the fbdev->async_cookie. */ - if (HAS_DISPLAY(dev_priv) && INTEL_DISPLAY_ENABLED(dev_priv)) + if (HAS_DISPLAY(dev_priv)) drm_kms_helper_poll_init(dev); intel_power_domains_enable(dev_priv); @@ -988,7 +867,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret < 0) goto out_cleanup_mmio; - ret = i915_driver_modeset_probe_noirq(i915); + ret = intel_modeset_init_noirq(i915); if (ret < 0) goto out_cleanup_hw; @@ -996,10 +875,18 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto out_cleanup_modeset; - ret = i915_driver_modeset_probe(i915); - if (ret < 0) + ret = intel_modeset_init_nogem(i915); + if (ret) goto out_cleanup_irq; + ret = i915_gem_init(i915); + if (ret) + goto out_cleanup_modeset2; + + ret = intel_modeset_init(i915); + if (ret) + goto out_cleanup_gem; + i915_driver_register(i915); enable_rpm_wakeref_asserts(&i915->runtime_pm); @@ -1010,10 +897,20 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; +out_cleanup_gem: + i915_gem_suspend(i915); + i915_gem_driver_remove(i915); + i915_gem_driver_release(i915); +out_cleanup_modeset2: + /* FIXME clean up the error path */ + intel_modeset_driver_remove(i915); + intel_irq_uninstall(i915); + intel_modeset_driver_remove_noirq(i915); + goto out_cleanup_modeset; out_cleanup_irq: intel_irq_uninstall(i915); out_cleanup_modeset: - i915_driver_modeset_remove_noirq(i915); + intel_modeset_driver_remove_nogem(i915); out_cleanup_hw: i915_driver_hw_remove(i915); intel_memory_regions_driver_release(i915); @@ -1045,7 +942,7 @@ void i915_driver_remove(struct drm_i915_private *i915) intel_gvt_driver_remove(i915); - i915_driver_modeset_remove(i915); + intel_modeset_driver_remove(i915); intel_irq_uninstall(i915); @@ -1054,7 +951,7 @@ void i915_driver_remove(struct drm_i915_private *i915) i915_reset_error_state(i915); i915_gem_driver_remove(i915); - i915_driver_modeset_remove_noirq(i915); + intel_modeset_driver_remove_nogem(i915); i915_driver_hw_remove(i915); @@ -1075,6 +972,7 @@ static void i915_driver_release(struct drm_device *dev) intel_memory_regions_driver_release(dev_priv); i915_ggtt_driver_release(dev_priv); + i915_gem_drain_freed_objects(dev_priv); i915_driver_mmio_release(dev_priv); @@ -1119,7 +1017,6 @@ static void i915_driver_postclose(struct drm_device *dev, struct drm_file *file) struct drm_i915_file_private *file_priv = file->driver_priv; i915_gem_context_close(file); - i915_gem_release(dev, file); kfree_rcu(file_priv, rcu); @@ -1846,7 +1743,8 @@ static struct drm_driver driver = { */ .driver_features = DRIVER_GEM | - DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ, + DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ | + DRIVER_SYNCOBJ_TIMELINE, .release = i915_driver_release, .open = i915_driver_open, .lastclose = i915_driver_lastclose, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e4f7f6518945..8426d5974669 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -33,6 +33,8 @@ #include <uapi/drm/i915_drm.h> #include <uapi/drm/drm_fourcc.h> +#include <asm/hypervisor.h> + #include <linux/io-mapping.h> #include <linux/i2c.h> #include <linux/i2c-algo-bit.h> @@ -108,18 +110,11 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20200715" -#define DRIVER_TIMESTAMP 1594811881 +#define DRIVER_DATE "20200917" +#define DRIVER_TIMESTAMP 1600375437 struct drm_i915_gem_object; -/* - * The code assumes that the hpd_pins below have consecutive values and - * starting with HPD_PORT_A, the HPD pin associated with any port can be - * retrieved by adding the corresponding port (or phy) enum value to - * HPD_PORT_A in most cases. For example: - * HPD_PORT_C = HPD_PORT_A + PHY_C - PHY_A - */ enum hpd_pin { HPD_NONE = 0, HPD_TV = HPD_NONE, /* TV is known to be unreliable */ @@ -131,10 +126,12 @@ enum hpd_pin { HPD_PORT_C, HPD_PORT_D, HPD_PORT_E, - HPD_PORT_F, - HPD_PORT_G, - HPD_PORT_H, - HPD_PORT_I, + HPD_PORT_TC1, + HPD_PORT_TC2, + HPD_PORT_TC3, + HPD_PORT_TC4, + HPD_PORT_TC5, + HPD_PORT_TC6, HPD_NUM_PINS }; @@ -203,11 +200,6 @@ struct drm_i915_file_private { struct rcu_head rcu; }; - struct { - spinlock_t lock; - struct list_head request_list; - } mm; - struct xarray context_xa; struct xarray vm_xa; @@ -506,6 +498,7 @@ struct i915_psr { bool link_standby; bool colorimetry_support; bool psr2_enabled; + bool psr2_sel_fetch_enabled; u8 sink_sync_latency; ktime_t last_entry_attempt; ktime_t last_exit; @@ -541,13 +534,9 @@ struct intel_gmbus { struct i915_suspend_saved_registers { u32 saveDSPARB; - u32 saveFBC_CONTROL; - u32 saveCACHE_MODE_0; - u32 saveMI_ARB_STATE; u32 saveSWF0[16]; u32 saveSWF1[16]; u32 saveSWF3[3]; - u32 savePCH_PORT_HOTPLUG; u16 saveGCDGMBUS; }; @@ -592,11 +581,6 @@ struct i915_gem_mm { atomic_t free_count; /** - * Small stash of WC pages - */ - struct pagestash wc_stash; - - /** * tmpfs instance used for shmem backed objects */ struct vfsmount *gemfs; @@ -1029,8 +1013,6 @@ struct drm_i915_private { */ u8 active_pipes; - int dpio_phy_iosf_port[I915_NUM_PHYS_VLV]; - struct i915_wa_list gt_wa_list; struct i915_frontbuffer_tracking fb_tracking; @@ -1045,6 +1027,14 @@ struct drm_i915_private { struct intel_l3_parity l3_parity; /* + * HTI (aka HDPORT) state read during initial hw readout. Most + * platforms don't have HTI, so this will just stay 0. Those that do + * will use this later to figure out which PLLs and PHYs are unavailable + * for driver usage. + */ + u32 hti_state; + + /* * edram size in MB. * Cannot be determined by PCIID. You must always read a register. */ @@ -1489,6 +1479,12 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_ICL_WITH_PORT_F(dev_priv) \ IS_SUBPLATFORM(dev_priv, INTEL_ICELAKE, INTEL_SUBPLATFORM_PORTF) +#define IS_TGL_U(dev_priv) \ + IS_SUBPLATFORM(dev_priv, INTEL_TIGERLAKE, INTEL_SUBPLATFORM_ULT) + +#define IS_TGL_Y(dev_priv) \ + IS_SUBPLATFORM(dev_priv, INTEL_TIGERLAKE, INTEL_SUBPLATFORM_ULX) + #define SKL_REVID_A0 0x0 #define SKL_REVID_B0 0x1 #define SKL_REVID_C0 0x2 @@ -1509,14 +1505,34 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_BXT_REVID(dev_priv, since, until) \ (IS_BROXTON(dev_priv) && IS_REVID(dev_priv, since, until)) -#define KBL_REVID_A0 0x0 -#define KBL_REVID_B0 0x1 -#define KBL_REVID_C0 0x2 -#define KBL_REVID_D0 0x3 -#define KBL_REVID_E0 0x4 +enum { + KBL_REVID_A0, + KBL_REVID_B0, + KBL_REVID_B1, + KBL_REVID_C0, + KBL_REVID_D0, + KBL_REVID_D1, + KBL_REVID_E0, + KBL_REVID_F0, + KBL_REVID_G0, +}; -#define IS_KBL_REVID(dev_priv, since, until) \ - (IS_KABYLAKE(dev_priv) && IS_REVID(dev_priv, since, until)) +struct i915_rev_steppings { + u8 gt_stepping; + u8 disp_stepping; +}; + +/* Defined in intel_workarounds.c */ +extern const struct i915_rev_steppings kbl_revids[]; + +#define IS_KBL_GT_REVID(dev_priv, since, until) \ + (IS_KABYLAKE(dev_priv) && \ + kbl_revids[INTEL_REVID(dev_priv)].gt_stepping >= since && \ + kbl_revids[INTEL_REVID(dev_priv)].gt_stepping <= until) +#define IS_KBL_DISP_REVID(dev_priv, since, until) \ + (IS_KABYLAKE(dev_priv) && \ + kbl_revids[INTEL_REVID(dev_priv)].disp_stepping >= since && \ + kbl_revids[INTEL_REVID(dev_priv)].disp_stepping <= until) #define GLK_REVID_A0 0x0 #define GLK_REVID_A1 0x1 @@ -1547,12 +1563,41 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_EHL_REVID(p, since, until) \ (IS_ELKHARTLAKE(p) && IS_REVID(p, since, until)) -#define TGL_REVID_A0 0x0 -#define TGL_REVID_B0 0x1 -#define TGL_REVID_C0 0x2 +enum { + TGL_REVID_A0, + TGL_REVID_B0, + TGL_REVID_B1, + TGL_REVID_C0, + TGL_REVID_D0, +}; + +extern const struct i915_rev_steppings tgl_uy_revids[]; +extern const struct i915_rev_steppings tgl_revids[]; + +static inline const struct i915_rev_steppings * +tgl_revids_get(struct drm_i915_private *dev_priv) +{ + if (IS_TGL_U(dev_priv) || IS_TGL_Y(dev_priv)) + return tgl_uy_revids; + else + return tgl_revids; +} -#define IS_TGL_REVID(p, since, until) \ - (IS_TIGERLAKE(p) && IS_REVID(p, since, until)) +#define IS_TGL_DISP_REVID(p, since, until) \ + (IS_TIGERLAKE(p) && \ + tgl_revids_get(p)->disp_stepping >= (since) && \ + tgl_revids_get(p)->disp_stepping <= (until)) + +#define IS_TGL_UY_GT_REVID(p, since, until) \ + ((IS_TGL_U(p) || IS_TGL_Y(p)) && \ + tgl_uy_revids->gt_stepping >= (since) && \ + tgl_uy_revids->gt_stepping <= (until)) + +#define IS_TGL_GT_REVID(p, since, until) \ + (IS_TIGERLAKE(p) && \ + !(IS_TGL_U(p) || IS_TGL_Y(p)) && \ + tgl_revids->gt_stepping >= (since) && \ + tgl_revids->gt_stepping <= (until)) #define RKL_REVID_A0 0x0 #define RKL_REVID_B0 0x1 @@ -1665,6 +1710,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_PSR(dev_priv) (INTEL_INFO(dev_priv)->display.has_psr) #define HAS_PSR_HW_TRACKING(dev_priv) \ (INTEL_INFO(dev_priv)->display.has_psr_hw_tracking) +#define HAS_PSR2_SEL_FETCH(dev_priv) (INTEL_GEN(dev_priv) >= 12) #define HAS_TRANSCODER(dev_priv, trans) ((INTEL_INFO(dev_priv)->cpu_transcoder_mask & BIT(trans)) != 0) #define HAS_RC6(dev_priv) (INTEL_INFO(dev_priv)->has_rc6) @@ -1716,7 +1762,9 @@ static inline bool intel_vtd_active(void) if (intel_iommu_gfx_mapped) return true; #endif - return false; + + /* Running as a guest, we assume the host is enforcing VT'd */ + return !hypervisor_is_type(X86_HYPER_NATIVE); } static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv) @@ -1790,11 +1838,18 @@ static inline void i915_gem_drain_workqueue(struct drm_i915_private *i915) } struct i915_vma * __must_check +i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww, + const struct i915_ggtt_view *view, + u64 size, u64 alignment, u64 flags); + +static inline struct i915_vma * __must_check i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, - u64 size, - u64 alignment, - u64 flags); + u64 size, u64 alignment, u64 flags) +{ + return i915_gem_object_ggtt_pin_ww(obj, NULL, view, size, alignment, flags); +} int i915_gem_object_unbind(struct drm_i915_gem_object *obj, unsigned long flags); @@ -1831,7 +1886,6 @@ void i915_gem_suspend_late(struct drm_i915_private *dev_priv); void i915_gem_resume(struct drm_i915_private *dev_priv); int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file); -void i915_gem_release(struct drm_device *dev, struct drm_file *file); int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level); @@ -1899,8 +1953,8 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine); void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine); int intel_engine_cmd_parser(struct intel_engine_cs *engine, struct i915_vma *batch, - u32 batch_offset, - u32 batch_length, + unsigned long batch_offset, + unsigned long batch_length, struct i915_vma *shadow, bool trampoline); #define I915_CMD_PARSER_TRAMPOLINE_SIZE 8 diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9aa3066cb75d..58276694c848 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -180,30 +180,6 @@ try_again: } static int -i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, - struct drm_i915_gem_pwrite *args, - struct drm_file *file) -{ - void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset; - char __user *user_data = u64_to_user_ptr(args->data_ptr); - - /* - * We manually control the domain here and pretend that it - * remains coherent i.e. in the GTT domain, like shmem_pwrite. - */ - i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); - - if (copy_from_user(vaddr, user_data, args->size)) - return -EFAULT; - - drm_clflush_virt_range(vaddr, args->size); - intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); - - i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); - return 0; -} - -static int i915_gem_create(struct drm_file *file, struct intel_memory_region *mr, u64 *size_p, @@ -335,12 +311,20 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj, u64 remain; int ret; - ret = i915_gem_object_prepare_read(obj, &needs_clflush); + ret = i915_gem_object_lock_interruptible(obj, NULL); if (ret) return ret; + ret = i915_gem_object_prepare_read(obj, &needs_clflush); + if (ret) { + i915_gem_object_unlock(obj); + return ret; + } + fence = i915_gem_object_lock_fence(obj); i915_gem_object_finish_access(obj); + i915_gem_object_unlock(obj); + if (!fence) return -ENOMEM; @@ -420,7 +404,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, GEM_BUG_ON(!drm_mm_node_allocated(&node)); } - ret = i915_gem_object_lock_interruptible(obj); + ret = i915_gem_object_lock_interruptible(obj, NULL); if (ret) goto out_unpin; @@ -519,6 +503,12 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, trace_i915_gem_object_pread(obj, args->offset, args->size); + ret = -ENODEV; + if (obj->ops->pread) + ret = obj->ops->pread(obj, args); + if (ret != -ENODEV) + goto out; + ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); @@ -619,7 +609,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, GEM_BUG_ON(!drm_mm_node_allocated(&node)); } - ret = i915_gem_object_lock_interruptible(obj); + ret = i915_gem_object_lock_interruptible(obj, NULL); if (ret) goto out_unpin; @@ -734,12 +724,20 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, u64 remain; int ret; - ret = i915_gem_object_prepare_write(obj, &needs_clflush); + ret = i915_gem_object_lock_interruptible(obj, NULL); if (ret) return ret; + ret = i915_gem_object_prepare_write(obj, &needs_clflush); + if (ret) { + i915_gem_object_unlock(obj); + return ret; + } + fence = i915_gem_object_lock_fence(obj); i915_gem_object_finish_access(obj); + i915_gem_object_unlock(obj); + if (!fence) return -ENOMEM; @@ -850,8 +848,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, if (ret == -EFAULT || ret == -ENOSPC) { if (i915_gem_object_has_struct_page(obj)) ret = i915_gem_shmem_pwrite(obj, args); - else - ret = i915_gem_phys_pwrite(obj, args, file); } i915_gem_object_unpin_pages(obj); @@ -946,11 +942,10 @@ static void discard_ggtt_vma(struct i915_vma *vma) } struct i915_vma * -i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view, - u64 size, - u64 alignment, - u64 flags) +i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww, + const struct i915_ggtt_view *view, + u64 size, u64 alignment, u64 flags) { struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_ggtt *ggtt = &i915->ggtt; @@ -1016,7 +1011,7 @@ new_vma: return ERR_PTR(ret); } - ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); + ret = i915_vma_pin_ww(vma, ww, size, alignment, flags | PIN_GLOBAL); if (ret) return ERR_PTR(ret); @@ -1290,7 +1285,7 @@ int i915_gem_freeze_late(struct drm_i915_private *i915) i915_gem_drain_freed_objects(i915); list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) { - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); drm_WARN_ON(&i915->drm, i915_gem_object_set_to_cpu_domain(obj, true)); i915_gem_object_unlock(obj); @@ -1301,21 +1296,6 @@ int i915_gem_freeze_late(struct drm_i915_private *i915) return 0; } -void i915_gem_release(struct drm_device *dev, struct drm_file *file) -{ - struct drm_i915_file_private *file_priv = file->driver_priv; - struct i915_request *request; - - /* Clean up our request list when the client is going away, so that - * later retire_requests won't dereference our soon-to-be-gone - * file_priv. - */ - spin_lock(&file_priv->mm.lock); - list_for_each_entry(request, &file_priv->mm.request_list, client_link) - request->file_priv = NULL; - spin_unlock(&file_priv->mm.lock); -} - int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) { struct drm_i915_file_private *file_priv; @@ -1331,9 +1311,6 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) file_priv->dev_priv = i915; file_priv->file = file; - spin_lock_init(&file_priv->mm.lock); - INIT_LIST_HEAD(&file_priv->mm.request_list); - file_priv->bsd_engine = -1; file_priv->hang_timestamp = jiffies; @@ -1344,6 +1321,58 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) return ret; } +void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ww, bool intr) +{ + ww_acquire_init(&ww->ctx, &reservation_ww_class); + INIT_LIST_HEAD(&ww->obj_list); + ww->intr = intr; + ww->contended = NULL; +} + +static void i915_gem_ww_ctx_unlock_all(struct i915_gem_ww_ctx *ww) +{ + struct drm_i915_gem_object *obj; + + while ((obj = list_first_entry_or_null(&ww->obj_list, struct drm_i915_gem_object, obj_link))) { + list_del(&obj->obj_link); + i915_gem_object_unlock(obj); + } +} + +void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj) +{ + list_del(&obj->obj_link); + i915_gem_object_unlock(obj); +} + +void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ww) +{ + i915_gem_ww_ctx_unlock_all(ww); + WARN_ON(ww->contended); + ww_acquire_fini(&ww->ctx); +} + +int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ww) +{ + int ret = 0; + + if (WARN_ON(!ww->contended)) + return -EINVAL; + + i915_gem_ww_ctx_unlock_all(ww); + if (ww->intr) + ret = dma_resv_lock_slow_interruptible(ww->contended->base.resv, &ww->ctx); + else + dma_resv_lock_slow(ww->contended->base.resv, &ww->ctx); + + if (!ret) + list_add_tail(&ww->contended->obj_link, &ww->obj_list); + + ww->contended = NULL; + + return ret; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_gem_device.c" #include "selftests/i915_gem.c" diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index f333e88a2b6e..a4cad3f154ca 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -116,4 +116,16 @@ static inline bool __tasklet_is_scheduled(struct tasklet_struct *t) return test_bit(TASKLET_STATE_SCHED, &t->state); } +struct i915_gem_ww_ctx { + struct ww_acquire_ctx ctx; + struct list_head obj_list; + bool intr; + struct drm_i915_gem_object *contended; +}; + +void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr); +void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx); +int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx); +void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj); + #endif /* __I915_GEM_H__ */ diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index 421613219ae9..f96032c60a12 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -132,6 +132,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_BATCH_FIRST: case I915_PARAM_HAS_EXEC_FENCE_ARRAY: case I915_PARAM_HAS_EXEC_SUBMIT_FENCE: + case I915_PARAM_HAS_EXEC_TIMELINE_FENCES: /* For the time being all of these are always true; * if some supported hardware does not have one of these * features this value needs to be provided from diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 6a3a2ce0b394..cf6e47adfde6 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -311,6 +311,8 @@ static int compress_page(struct i915_vma_compress *c, if (zlib_deflate(zstream, Z_NO_FLUSH) != Z_OK) return -EIO; + + cond_resched(); } while (zstream->avail_in); /* Fallback to uncompressed if we increase size? */ @@ -397,6 +399,7 @@ static int compress_page(struct i915_vma_compress *c, if (!(wc && i915_memcpy_from_wc(ptr, src, PAGE_SIZE))) memcpy(ptr, src, PAGE_SIZE); dst->pages[dst->page_count++] = ptr; + cond_resched(); return 0; } @@ -1159,7 +1162,7 @@ static void engine_record_registers(struct intel_engine_coredump *ee) switch (engine->id) { default: MISSING_CASE(engine->id); - /* fall through */ + fallthrough; case RCS0: mmio = RENDER_HWS_PGA_GEN7; break; @@ -1309,7 +1312,7 @@ capture_vma(struct intel_engine_capture_vma *next, } strcpy(c->name, name); - c->vma = i915_vma_get(vma); + c->vma = vma; /* reference held while active */ c->next = next; return c; @@ -1399,7 +1402,6 @@ intel_engine_coredump_add_vma(struct intel_engine_coredump *ee, compress)); i915_active_release(&vma->active); - i915_vma_put(vma); capture = this->next; kfree(this); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 1fa67700d8f4..759f523c6a6b 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -41,6 +41,7 @@ #include "display/intel_lpe_audio.h" #include "display/intel_psr.h" +#include "gt/intel_breadcrumbs.h" #include "gt/intel_gt.h" #include "gt/intel_gt_irq.h" #include "gt/intel_gt_pm_irq.h" @@ -131,40 +132,24 @@ static const u32 hpd_bxt[HPD_NUM_PINS] = { }; static const u32 hpd_gen11[HPD_NUM_PINS] = { - [HPD_PORT_C] = GEN11_TC1_HOTPLUG | GEN11_TBT1_HOTPLUG, - [HPD_PORT_D] = GEN11_TC2_HOTPLUG | GEN11_TBT2_HOTPLUG, - [HPD_PORT_E] = GEN11_TC3_HOTPLUG | GEN11_TBT3_HOTPLUG, - [HPD_PORT_F] = GEN11_TC4_HOTPLUG | GEN11_TBT4_HOTPLUG, -}; - -static const u32 hpd_gen12[HPD_NUM_PINS] = { - [HPD_PORT_D] = GEN11_TC1_HOTPLUG | GEN11_TBT1_HOTPLUG, - [HPD_PORT_E] = GEN11_TC2_HOTPLUG | GEN11_TBT2_HOTPLUG, - [HPD_PORT_F] = GEN11_TC3_HOTPLUG | GEN11_TBT3_HOTPLUG, - [HPD_PORT_G] = GEN11_TC4_HOTPLUG | GEN11_TBT4_HOTPLUG, - [HPD_PORT_H] = GEN12_TC5_HOTPLUG | GEN12_TBT5_HOTPLUG, - [HPD_PORT_I] = GEN12_TC6_HOTPLUG | GEN12_TBT6_HOTPLUG, + [HPD_PORT_TC1] = GEN11_TC_HOTPLUG(PORT_TC1) | GEN11_TBT_HOTPLUG(PORT_TC1), + [HPD_PORT_TC2] = GEN11_TC_HOTPLUG(PORT_TC2) | GEN11_TBT_HOTPLUG(PORT_TC2), + [HPD_PORT_TC3] = GEN11_TC_HOTPLUG(PORT_TC3) | GEN11_TBT_HOTPLUG(PORT_TC3), + [HPD_PORT_TC4] = GEN11_TC_HOTPLUG(PORT_TC4) | GEN11_TBT_HOTPLUG(PORT_TC4), + [HPD_PORT_TC5] = GEN11_TC_HOTPLUG(PORT_TC5) | GEN11_TBT_HOTPLUG(PORT_TC5), + [HPD_PORT_TC6] = GEN11_TC_HOTPLUG(PORT_TC6) | GEN11_TBT_HOTPLUG(PORT_TC6), }; static const u32 hpd_icp[HPD_NUM_PINS] = { [HPD_PORT_A] = SDE_DDI_HOTPLUG_ICP(PORT_A), [HPD_PORT_B] = SDE_DDI_HOTPLUG_ICP(PORT_B), - [HPD_PORT_C] = SDE_TC_HOTPLUG_ICP(PORT_TC1), - [HPD_PORT_D] = SDE_TC_HOTPLUG_ICP(PORT_TC2), - [HPD_PORT_E] = SDE_TC_HOTPLUG_ICP(PORT_TC3), - [HPD_PORT_F] = SDE_TC_HOTPLUG_ICP(PORT_TC4), -}; - -static const u32 hpd_tgp[HPD_NUM_PINS] = { - [HPD_PORT_A] = SDE_DDI_HOTPLUG_ICP(PORT_A), - [HPD_PORT_B] = SDE_DDI_HOTPLUG_ICP(PORT_B), [HPD_PORT_C] = SDE_DDI_HOTPLUG_ICP(PORT_C), - [HPD_PORT_D] = SDE_TC_HOTPLUG_ICP(PORT_TC1), - [HPD_PORT_E] = SDE_TC_HOTPLUG_ICP(PORT_TC2), - [HPD_PORT_F] = SDE_TC_HOTPLUG_ICP(PORT_TC3), - [HPD_PORT_G] = SDE_TC_HOTPLUG_ICP(PORT_TC4), - [HPD_PORT_H] = SDE_TC_HOTPLUG_ICP(PORT_TC5), - [HPD_PORT_I] = SDE_TC_HOTPLUG_ICP(PORT_TC6), + [HPD_PORT_TC1] = SDE_TC_HOTPLUG_ICP(PORT_TC1), + [HPD_PORT_TC2] = SDE_TC_HOTPLUG_ICP(PORT_TC2), + [HPD_PORT_TC3] = SDE_TC_HOTPLUG_ICP(PORT_TC3), + [HPD_PORT_TC4] = SDE_TC_HOTPLUG_ICP(PORT_TC4), + [HPD_PORT_TC5] = SDE_TC_HOTPLUG_ICP(PORT_TC5), + [HPD_PORT_TC6] = SDE_TC_HOTPLUG_ICP(PORT_TC6), }; static void intel_hpd_init_pins(struct drm_i915_private *dev_priv) @@ -180,9 +165,7 @@ static void intel_hpd_init_pins(struct drm_i915_private *dev_priv) return; } - if (INTEL_GEN(dev_priv) >= 12) - hpd->hpd = hpd_gen12; - else if (INTEL_GEN(dev_priv) >= 11) + if (INTEL_GEN(dev_priv) >= 11) hpd->hpd = hpd_gen11; else if (IS_GEN9_LP(dev_priv)) hpd->hpd = hpd_bxt; @@ -196,9 +179,8 @@ static void intel_hpd_init_pins(struct drm_i915_private *dev_priv) if (!HAS_PCH_SPLIT(dev_priv) || HAS_PCH_NOP(dev_priv)) return; - if (HAS_PCH_TGP(dev_priv) || HAS_PCH_JSP(dev_priv)) - hpd->pch_hpd = hpd_tgp; - else if (HAS_PCH_ICP(dev_priv) || HAS_PCH_MCC(dev_priv)) + if (HAS_PCH_TGP(dev_priv) || HAS_PCH_JSP(dev_priv) || + HAS_PCH_ICP(dev_priv) || HAS_PCH_MCC(dev_priv)) hpd->pch_hpd = hpd_icp; else if (HAS_PCH_CNP(dev_priv) || HAS_PCH_SPT(dev_priv)) hpd->pch_hpd = hpd_spt; @@ -1048,33 +1030,17 @@ out: static bool gen11_port_hotplug_long_detect(enum hpd_pin pin, u32 val) { switch (pin) { - case HPD_PORT_C: - return val & GEN11_HOTPLUG_CTL_LONG_DETECT(PORT_TC1); - case HPD_PORT_D: - return val & GEN11_HOTPLUG_CTL_LONG_DETECT(PORT_TC2); - case HPD_PORT_E: - return val & GEN11_HOTPLUG_CTL_LONG_DETECT(PORT_TC3); - case HPD_PORT_F: - return val & GEN11_HOTPLUG_CTL_LONG_DETECT(PORT_TC4); - default: - return false; - } -} - -static bool gen12_port_hotplug_long_detect(enum hpd_pin pin, u32 val) -{ - switch (pin) { - case HPD_PORT_D: + case HPD_PORT_TC1: return val & GEN11_HOTPLUG_CTL_LONG_DETECT(PORT_TC1); - case HPD_PORT_E: + case HPD_PORT_TC2: return val & GEN11_HOTPLUG_CTL_LONG_DETECT(PORT_TC2); - case HPD_PORT_F: + case HPD_PORT_TC3: return val & GEN11_HOTPLUG_CTL_LONG_DETECT(PORT_TC3); - case HPD_PORT_G: + case HPD_PORT_TC4: return val & GEN11_HOTPLUG_CTL_LONG_DETECT(PORT_TC4); - case HPD_PORT_H: + case HPD_PORT_TC5: return val & GEN11_HOTPLUG_CTL_LONG_DETECT(PORT_TC5); - case HPD_PORT_I: + case HPD_PORT_TC6: return val & GEN11_HOTPLUG_CTL_LONG_DETECT(PORT_TC6); default: return false; @@ -1112,33 +1078,17 @@ static bool icp_ddi_port_hotplug_long_detect(enum hpd_pin pin, u32 val) static bool icp_tc_port_hotplug_long_detect(enum hpd_pin pin, u32 val) { switch (pin) { - case HPD_PORT_C: - return val & ICP_TC_HPD_LONG_DETECT(PORT_TC1); - case HPD_PORT_D: - return val & ICP_TC_HPD_LONG_DETECT(PORT_TC2); - case HPD_PORT_E: - return val & ICP_TC_HPD_LONG_DETECT(PORT_TC3); - case HPD_PORT_F: - return val & ICP_TC_HPD_LONG_DETECT(PORT_TC4); - default: - return false; - } -} - -static bool tgp_tc_port_hotplug_long_detect(enum hpd_pin pin, u32 val) -{ - switch (pin) { - case HPD_PORT_D: + case HPD_PORT_TC1: return val & ICP_TC_HPD_LONG_DETECT(PORT_TC1); - case HPD_PORT_E: + case HPD_PORT_TC2: return val & ICP_TC_HPD_LONG_DETECT(PORT_TC2); - case HPD_PORT_F: + case HPD_PORT_TC3: return val & ICP_TC_HPD_LONG_DETECT(PORT_TC3); - case HPD_PORT_G: + case HPD_PORT_TC4: return val & ICP_TC_HPD_LONG_DETECT(PORT_TC4); - case HPD_PORT_H: + case HPD_PORT_TC5: return val & ICP_TC_HPD_LONG_DETECT(PORT_TC5); - case HPD_PORT_I: + case HPD_PORT_TC6: return val & ICP_TC_HPD_LONG_DETECT(PORT_TC6); default: return false; @@ -1892,19 +1842,16 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) { u32 ddi_hotplug_trigger, tc_hotplug_trigger; u32 pin_mask = 0, long_mask = 0; - bool (*tc_port_hotplug_long_detect)(enum hpd_pin pin, u32 val); if (HAS_PCH_TGP(dev_priv)) { ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP; tc_hotplug_trigger = pch_iir & SDE_TC_MASK_TGP; - tc_port_hotplug_long_detect = tgp_tc_port_hotplug_long_detect; } else if (HAS_PCH_JSP(dev_priv)) { ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP; tc_hotplug_trigger = 0; } else if (HAS_PCH_MCC(dev_priv)) { ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; tc_hotplug_trigger = pch_iir & SDE_TC_HOTPLUG_ICP(PORT_TC1); - tc_port_hotplug_long_detect = icp_tc_port_hotplug_long_detect; } else { drm_WARN(&dev_priv->drm, !HAS_PCH_ICP(dev_priv), "Unrecognized PCH type 0x%x\n", @@ -1912,7 +1859,6 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; tc_hotplug_trigger = pch_iir & SDE_TC_MASK_ICP; - tc_port_hotplug_long_detect = icp_tc_port_hotplug_long_detect; } if (ddi_hotplug_trigger) { @@ -1936,7 +1882,7 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, tc_hotplug_trigger, dig_hotplug_reg, dev_priv->hotplug.pch_hpd, - tc_port_hotplug_long_detect); + icp_tc_port_hotplug_long_detect); } if (pin_mask) @@ -2184,12 +2130,6 @@ static void gen11_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 iir) u32 pin_mask = 0, long_mask = 0; u32 trigger_tc = iir & GEN11_DE_TC_HOTPLUG_MASK; u32 trigger_tbt = iir & GEN11_DE_TBT_HOTPLUG_MASK; - long_pulse_detect_func long_pulse_detect; - - if (INTEL_GEN(dev_priv) >= 12) - long_pulse_detect = gen12_port_hotplug_long_detect; - else - long_pulse_detect = gen11_port_hotplug_long_detect; if (trigger_tc) { u32 dig_hotplug_reg; @@ -2200,7 +2140,7 @@ static void gen11_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 iir) intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, trigger_tc, dig_hotplug_reg, dev_priv->hotplug.hpd, - long_pulse_detect); + gen11_port_hotplug_long_detect); } if (trigger_tbt) { @@ -2212,7 +2152,7 @@ static void gen11_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 iir) intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, trigger_tbt, dig_hotplug_reg, dev_priv->hotplug.hpd, - long_pulse_detect); + gen11_port_hotplug_long_detect); } if (pin_mask) @@ -3047,6 +2987,18 @@ static u32 intel_hpd_enabled_irqs(struct drm_i915_private *dev_priv, return enabled_irqs; } +static u32 intel_hpd_hotplug_irqs(struct drm_i915_private *dev_priv, + const u32 hpd[HPD_NUM_PINS]) +{ + struct intel_encoder *encoder; + u32 hotplug_irqs = 0; + + for_each_intel_encoder(&dev_priv->drm, encoder) + hotplug_irqs |= hpd[encoder->hpd_pin]; + + return hotplug_irqs; +} + static void ibx_hpd_detection_setup(struct drm_i915_private *dev_priv) { u32 hotplug; @@ -3076,50 +3028,50 @@ static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) { u32 hotplug_irqs, enabled_irqs; - if (HAS_PCH_IBX(dev_priv)) - hotplug_irqs = SDE_HOTPLUG_MASK; - else - hotplug_irqs = SDE_HOTPLUG_MASK_CPT; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.pch_hpd); + hotplug_irqs = intel_hpd_hotplug_irqs(dev_priv, dev_priv->hotplug.pch_hpd); ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); ibx_hpd_detection_setup(dev_priv); } -static void icp_hpd_detection_setup(struct drm_i915_private *dev_priv, - u32 ddi_hotplug_enable_mask, - u32 tc_hotplug_enable_mask) +static void icp_ddi_hpd_detection_setup(struct drm_i915_private *dev_priv, + u32 enable_mask) { u32 hotplug; hotplug = I915_READ(SHOTPLUG_CTL_DDI); - hotplug |= ddi_hotplug_enable_mask; + hotplug |= enable_mask; I915_WRITE(SHOTPLUG_CTL_DDI, hotplug); +} - if (tc_hotplug_enable_mask) { - hotplug = I915_READ(SHOTPLUG_CTL_TC); - hotplug |= tc_hotplug_enable_mask; - I915_WRITE(SHOTPLUG_CTL_TC, hotplug); - } +static void icp_tc_hpd_detection_setup(struct drm_i915_private *dev_priv, + u32 enable_mask) +{ + u32 hotplug; + + hotplug = I915_READ(SHOTPLUG_CTL_TC); + hotplug |= enable_mask; + I915_WRITE(SHOTPLUG_CTL_TC, hotplug); } static void icp_hpd_irq_setup(struct drm_i915_private *dev_priv, - u32 sde_ddi_mask, u32 sde_tc_mask, u32 ddi_enable_mask, u32 tc_enable_mask) { u32 hotplug_irqs, enabled_irqs; - hotplug_irqs = sde_ddi_mask | sde_tc_mask; enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.pch_hpd); + hotplug_irqs = intel_hpd_hotplug_irqs(dev_priv, dev_priv->hotplug.pch_hpd); if (INTEL_PCH_TYPE(dev_priv) <= PCH_TGP) I915_WRITE(SHPD_FILTER_CNT, SHPD_FILTER_CNT_500_ADJ); ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); - icp_hpd_detection_setup(dev_priv, ddi_enable_mask, tc_enable_mask); + icp_ddi_hpd_detection_setup(dev_priv, ddi_enable_mask); + if (tc_enable_mask) + icp_tc_hpd_detection_setup(dev_priv, tc_enable_mask); } /* @@ -3129,7 +3081,6 @@ static void icp_hpd_irq_setup(struct drm_i915_private *dev_priv, static void mcc_hpd_irq_setup(struct drm_i915_private *dev_priv) { icp_hpd_irq_setup(dev_priv, - SDE_DDI_MASK_ICP, SDE_TC_HOTPLUG_ICP(PORT_TC1), ICP_DDI_HPD_ENABLE_MASK, ICP_TC_HPD_ENABLE(PORT_TC1)); } @@ -3141,7 +3092,6 @@ static void mcc_hpd_irq_setup(struct drm_i915_private *dev_priv) static void jsp_hpd_irq_setup(struct drm_i915_private *dev_priv) { icp_hpd_irq_setup(dev_priv, - SDE_DDI_MASK_TGP, 0, TGP_DDI_HPD_ENABLE_MASK, 0); } @@ -3153,14 +3103,18 @@ static void gen11_hpd_detection_setup(struct drm_i915_private *dev_priv) hotplug |= GEN11_HOTPLUG_CTL_ENABLE(PORT_TC1) | GEN11_HOTPLUG_CTL_ENABLE(PORT_TC2) | GEN11_HOTPLUG_CTL_ENABLE(PORT_TC3) | - GEN11_HOTPLUG_CTL_ENABLE(PORT_TC4); + GEN11_HOTPLUG_CTL_ENABLE(PORT_TC4) | + GEN11_HOTPLUG_CTL_ENABLE(PORT_TC5) | + GEN11_HOTPLUG_CTL_ENABLE(PORT_TC6); I915_WRITE(GEN11_TC_HOTPLUG_CTL, hotplug); hotplug = I915_READ(GEN11_TBT_HOTPLUG_CTL); hotplug |= GEN11_HOTPLUG_CTL_ENABLE(PORT_TC1) | GEN11_HOTPLUG_CTL_ENABLE(PORT_TC2) | GEN11_HOTPLUG_CTL_ENABLE(PORT_TC3) | - GEN11_HOTPLUG_CTL_ENABLE(PORT_TC4); + GEN11_HOTPLUG_CTL_ENABLE(PORT_TC4) | + GEN11_HOTPLUG_CTL_ENABLE(PORT_TC5) | + GEN11_HOTPLUG_CTL_ENABLE(PORT_TC6); I915_WRITE(GEN11_TBT_HOTPLUG_CTL, hotplug); } @@ -3170,7 +3124,7 @@ static void gen11_hpd_irq_setup(struct drm_i915_private *dev_priv) u32 val; enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd); - hotplug_irqs = GEN11_DE_TC_HOTPLUG_MASK | GEN11_DE_TBT_HOTPLUG_MASK; + hotplug_irqs = intel_hpd_hotplug_irqs(dev_priv, dev_priv->hotplug.hpd); val = I915_READ(GEN11_DE_HPD_IMR); val &= ~hotplug_irqs; @@ -3181,10 +3135,10 @@ static void gen11_hpd_irq_setup(struct drm_i915_private *dev_priv) gen11_hpd_detection_setup(dev_priv); if (INTEL_PCH_TYPE(dev_priv) >= PCH_TGP) - icp_hpd_irq_setup(dev_priv, SDE_DDI_MASK_TGP, SDE_TC_MASK_TGP, + icp_hpd_irq_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK, TGP_TC_HPD_ENABLE_MASK); else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) - icp_hpd_irq_setup(dev_priv, SDE_DDI_MASK_ICP, SDE_TC_MASK_ICP, + icp_hpd_irq_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK, ICP_TC_HPD_ENABLE_MASK); } @@ -3220,8 +3174,8 @@ static void spt_hpd_irq_setup(struct drm_i915_private *dev_priv) if (INTEL_PCH_TYPE(dev_priv) >= PCH_CNP) I915_WRITE(SHPD_FILTER_CNT, SHPD_FILTER_CNT_500_ADJ); - hotplug_irqs = SDE_HOTPLUG_MASK_SPT; enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.pch_hpd); + hotplug_irqs = intel_hpd_hotplug_irqs(dev_priv, dev_priv->hotplug.pch_hpd); ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); @@ -3248,22 +3202,13 @@ static void ilk_hpd_irq_setup(struct drm_i915_private *dev_priv) { u32 hotplug_irqs, enabled_irqs; - if (INTEL_GEN(dev_priv) >= 8) { - hotplug_irqs = GEN8_PORT_DP_A_HOTPLUG; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd); + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd); + hotplug_irqs = intel_hpd_hotplug_irqs(dev_priv, dev_priv->hotplug.hpd); + if (INTEL_GEN(dev_priv) >= 8) bdw_update_port_irq(dev_priv, hotplug_irqs, enabled_irqs); - } else if (INTEL_GEN(dev_priv) >= 7) { - hotplug_irqs = DE_DP_A_HOTPLUG_IVB; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd); - - ilk_update_display_irq(dev_priv, hotplug_irqs, enabled_irqs); - } else { - hotplug_irqs = DE_DP_A_HOTPLUG; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd); - + else ilk_update_display_irq(dev_priv, hotplug_irqs, enabled_irqs); - } ilk_hpd_detection_setup(dev_priv); @@ -3312,7 +3257,7 @@ static void bxt_hpd_irq_setup(struct drm_i915_private *dev_priv) u32 hotplug_irqs, enabled_irqs; enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd); - hotplug_irqs = BXT_DE_PORT_HOTPLUG_MASK; + hotplug_irqs = intel_hpd_hotplug_irqs(dev_priv, dev_priv->hotplug.hpd); bdw_update_port_irq(dev_priv, hotplug_irqs, enabled_irqs); @@ -3533,17 +3478,18 @@ static void icp_irq_postinstall(struct drm_i915_private *dev_priv) gen3_assert_iir_is_zero(&dev_priv->uncore, SDEIIR); I915_WRITE(SDEIMR, ~mask); - if (HAS_PCH_TGP(dev_priv)) - icp_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK, - TGP_TC_HPD_ENABLE_MASK); - else if (HAS_PCH_JSP(dev_priv)) - icp_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK, 0); - else if (HAS_PCH_MCC(dev_priv)) - icp_hpd_detection_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK, - ICP_TC_HPD_ENABLE(PORT_TC1)); - else - icp_hpd_detection_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK, - ICP_TC_HPD_ENABLE_MASK); + if (HAS_PCH_TGP(dev_priv)) { + icp_ddi_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK); + icp_tc_hpd_detection_setup(dev_priv, TGP_TC_HPD_ENABLE_MASK); + } else if (HAS_PCH_JSP(dev_priv)) { + icp_ddi_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK); + } else if (HAS_PCH_MCC(dev_priv)) { + icp_ddi_hpd_detection_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK); + icp_tc_hpd_detection_setup(dev_priv, ICP_TC_HPD_ENABLE(PORT_TC1)); + } else { + icp_ddi_hpd_detection_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK); + icp_tc_hpd_detection_setup(dev_priv, ICP_TC_HPD_ENABLE_MASK); + } } static void gen11_irq_postinstall(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 8d8db9ff0a48..7f139ea4a90b 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -102,6 +102,11 @@ i915_param_named(psr_safest_params, bool, 0400, "is helpful to detect if PSR issues are related to bad values set in " " VBT. (0=use VBT parameters, 1=use safest parameters)"); +i915_param_named_unsafe(enable_psr2_sel_fetch, bool, 0400, + "Enable PSR2 selective fetch " + "(0=disabled, 1=enabled) " + "Default: 0"); + i915_param_named_unsafe(force_probe, charp, 0400, "Force probe the driver for specified devices. " "See CONFIG_DRM_I915_FORCE_PROBE for details."); diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 53fb5ba8fbed..330c03e2b4f7 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -54,6 +54,7 @@ struct drm_printer; param(int, enable_fbc, -1, 0600) \ param(int, enable_psr, -1, 0600) \ param(bool, psr_safest_params, false, 0600) \ + param(bool, enable_psr2_sel_fetch, false, 0600) \ param(int, disable_power_well, -1, 0400) \ param(int, enable_ips, 1, 0600) \ param(int, invert_brightness, 0, 0600) \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 2338f92ce490..fb5e30de78c2 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -389,6 +389,7 @@ static const struct intel_device_info ilk_m_info = { GEN5_FEATURES, PLATFORM(INTEL_IRONLAKE), .is_mobile = 1, + .has_rps = true, .display.has_fbc = 1, }; @@ -890,6 +891,7 @@ static const struct intel_device_info rkl_info = { .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | BIT(TRANSCODER_C), .require_force_probe = 1, + .display.has_hti = 1, .display.has_psr_hw_tracking = 0, .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0), diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index c6f6370283cf..e94976976571 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1195,24 +1195,39 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream) struct i915_gem_engines_iter it; struct i915_gem_context *ctx = stream->ctx; struct intel_context *ce; - int err; + struct i915_gem_ww_ctx ww; + int err = -ENODEV; for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { if (ce->engine != stream->engine) /* first match! */ continue; - /* - * As the ID is the gtt offset of the context's vma we - * pin the vma to ensure the ID remains fixed. - */ - err = intel_context_pin(ce); - if (err == 0) { - stream->pinned_ctx = ce; - break; - } + err = 0; + break; } i915_gem_context_unlock_engines(ctx); + if (err) + return ERR_PTR(err); + + i915_gem_ww_ctx_init(&ww, true); +retry: + /* + * As the ID is the gtt offset of the context's vma we + * pin the vma to ensure the ID remains fixed. + */ + err = intel_context_pin_ww(ce, &ww); + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + + if (err) + return ERR_PTR(err); + + stream->pinned_ctx = ce; return stream->pinned_ctx; } @@ -1923,15 +1938,22 @@ emit_oa_config(struct i915_perf_stream *stream, { struct i915_request *rq; struct i915_vma *vma; + struct i915_gem_ww_ctx ww; int err; vma = get_oa_vma(stream, oa_config); if (IS_ERR(vma)) return PTR_ERR(vma); - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + i915_gem_ww_ctx_init(&ww, true); +retry: + err = i915_gem_object_lock(vma->obj, &ww); + if (err) + goto err; + + err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL | PIN_HIGH); if (err) - goto err_vma_put; + goto err; intel_engine_pm_get(ce->engine); rq = i915_request_create(ce); @@ -1953,11 +1975,9 @@ emit_oa_config(struct i915_perf_stream *stream, goto err_add_request; } - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, 0); if (!err) err = i915_vma_move_to_active(vma, rq, 0); - i915_vma_unlock(vma); if (err) goto err_add_request; @@ -1971,7 +1991,14 @@ err_add_request: i915_request_add(rq); err_vma_unpin: i915_vma_unpin(vma); -err_vma_put: +err: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + + i915_gem_ww_ctx_fini(&ww); i915_vma_put(vma); return err; } diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 28bc5f13ae52..69c0fa20eba1 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -445,8 +445,6 @@ static void i915_pmu_event_destroy(struct perf_event *event) container_of(event->pmu, typeof(*i915), pmu.base); drm_WARN_ON(&i915->drm, event->parent); - - module_put(THIS_MODULE); } static int @@ -476,7 +474,7 @@ config_status(struct drm_i915_private *i915, u64 config) if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) /* Requires a mutex for sampling! */ return -ENODEV; - /* Fall-through. */ + fallthrough; case I915_PMU_REQUESTED_FREQUENCY: if (INTEL_GEN(i915) < 6) return -ENODEV; @@ -538,10 +536,8 @@ static int i915_pmu_event_init(struct perf_event *event) if (ret) return ret; - if (!event->parent) { - __module_get(THIS_MODULE); + if (!event->parent) event->destroy = i915_pmu_event_destroy; - } return 0; } @@ -1130,6 +1126,7 @@ void i915_pmu_register(struct drm_i915_private *i915) if (!pmu->base.attr_groups) goto err_attr; + pmu->base.module = THIS_MODULE; pmu->base.task_ctx_nr = perf_invalid_context; pmu->base.event_init = i915_pmu_event_init; pmu->base.add = i915_pmu_event_add; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 4e796ff4d7d0..664f3bf9af03 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1382,7 +1382,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define DPIO_CMNRST (1 << 0) #define DPIO_PHY(pipe) ((pipe) >> 1) -#define DPIO_PHY_IOSF_PORT(phy) (dev_priv->dpio_phy_iosf_port[phy]) /* * Per pipe/PLL DPIO regs @@ -1898,6 +1897,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define PWR_DOWN_LN_3_1_0 (0xb << 4) #define PWR_DOWN_LN_MASK (0xf << 4) #define PWR_DOWN_LN_SHIFT 4 +#define EDP4K2K_MODE_OVRD_EN (1 << 3) +#define EDP4K2K_MODE_OVRD_OPTIMIZED (1 << 2) #define ICL_PORT_CL_DW12(phy) _MMIO(_ICL_PORT_CL_DW(12, phy)) #define ICL_LANE_ENABLE_AUX (1 << 0) @@ -2919,6 +2920,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MBUS_BBOX_CTL_S1 _MMIO(0x45040) #define MBUS_BBOX_CTL_S2 _MMIO(0x45044) +#define HDPORT_STATE _MMIO(0x45050) +#define HDPORT_DPLL_USED_MASK REG_GENMASK(14, 12) +#define HDPORT_PHY_USED_DP(phy) REG_BIT(2 * (phy) + 2) +#define HDPORT_PHY_USED_HDMI(phy) REG_BIT(2 * (phy) + 1) +#define HDPORT_ENABLED REG_BIT(0) + /* Make render/texture TLB fetches lower priorty than associated data * fetches. This is not turned on by default */ @@ -7752,32 +7759,20 @@ enum { #define GEN11_DE_HPD_IMR _MMIO(0x44474) #define GEN11_DE_HPD_IIR _MMIO(0x44478) #define GEN11_DE_HPD_IER _MMIO(0x4447c) -#define GEN12_TC6_HOTPLUG (1 << 21) -#define GEN12_TC5_HOTPLUG (1 << 20) -#define GEN11_TC4_HOTPLUG (1 << 19) -#define GEN11_TC3_HOTPLUG (1 << 18) -#define GEN11_TC2_HOTPLUG (1 << 17) -#define GEN11_TC1_HOTPLUG (1 << 16) #define GEN11_TC_HOTPLUG(tc_port) (1 << ((tc_port) + 16)) -#define GEN11_DE_TC_HOTPLUG_MASK (GEN12_TC6_HOTPLUG | \ - GEN12_TC5_HOTPLUG | \ - GEN11_TC4_HOTPLUG | \ - GEN11_TC3_HOTPLUG | \ - GEN11_TC2_HOTPLUG | \ - GEN11_TC1_HOTPLUG) -#define GEN12_TBT6_HOTPLUG (1 << 5) -#define GEN12_TBT5_HOTPLUG (1 << 4) -#define GEN11_TBT4_HOTPLUG (1 << 3) -#define GEN11_TBT3_HOTPLUG (1 << 2) -#define GEN11_TBT2_HOTPLUG (1 << 1) -#define GEN11_TBT1_HOTPLUG (1 << 0) +#define GEN11_DE_TC_HOTPLUG_MASK (GEN11_TC_HOTPLUG(PORT_TC6) | \ + GEN11_TC_HOTPLUG(PORT_TC5) | \ + GEN11_TC_HOTPLUG(PORT_TC4) | \ + GEN11_TC_HOTPLUG(PORT_TC3) | \ + GEN11_TC_HOTPLUG(PORT_TC2) | \ + GEN11_TC_HOTPLUG(PORT_TC1)) #define GEN11_TBT_HOTPLUG(tc_port) (1 << (tc_port)) -#define GEN11_DE_TBT_HOTPLUG_MASK (GEN12_TBT6_HOTPLUG | \ - GEN12_TBT5_HOTPLUG | \ - GEN11_TBT4_HOTPLUG | \ - GEN11_TBT3_HOTPLUG | \ - GEN11_TBT2_HOTPLUG | \ - GEN11_TBT1_HOTPLUG) +#define GEN11_DE_TBT_HOTPLUG_MASK (GEN11_TBT_HOTPLUG(PORT_TC6) | \ + GEN11_TBT_HOTPLUG(PORT_TC5) | \ + GEN11_TBT_HOTPLUG(PORT_TC4) | \ + GEN11_TBT_HOTPLUG(PORT_TC3) | \ + GEN11_TBT_HOTPLUG(PORT_TC2) | \ + GEN11_TBT_HOTPLUG(PORT_TC1)) #define GEN11_TBT_HOTPLUG_CTL _MMIO(0x44030) #define GEN11_TC_HOTPLUG_CTL _MMIO(0x44038) @@ -7870,6 +7865,7 @@ enum { # define CHICKEN3_DGMG_DONE_FIX_DISABLE (1 << 2) #define CHICKEN_PAR1_1 _MMIO(0x42080) +#define DIS_RAM_BYPASS_PSR2_MAN_TRACK (1 << 16) #define SKL_DE_COMPRESSED_HASH_MODE (1 << 15) #define DPA_MASK_VBLANK_SRD (1 << 15) #define FORCE_ARB_IDLE_PLANES (1 << 14) @@ -8711,6 +8707,7 @@ enum { #define PCH_GMBUSUNIT_CLOCK_GATE_DISABLE (1 << 31) #define PCH_DPLUNIT_CLOCK_GATE_DISABLE (1 << 30) #define PCH_DPLSUNIT_CLOCK_GATE_DISABLE (1 << 29) +#define PCH_DPMGUNIT_CLOCK_GATE_DISABLE (1 << 15) #define PCH_CPUNIT_CLOCK_GATE_DISABLE (1 << 14) #define CNP_PWM_CGE_GATING_DISABLE (1 << 13) #define PCH_LP_PARTITION_LEVEL_DISABLE (1 << 12) @@ -8974,10 +8971,6 @@ enum { #define GEN9_PWRGT_MEDIA_STATUS_MASK (1 << 0) #define GEN9_PWRGT_RENDER_STATUS_MASK (1 << 1) -#define POWERGATE_ENABLE _MMIO(0xa210) -#define VDN_HCP_POWERGATE_ENABLE(n) BIT(((n) * 2) + 3) -#define VDN_MFX_POWERGATE_ENABLE(n) BIT(((n) * 2) + 4) - #define GTFIFODBG _MMIO(0x120000) #define GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV (0x1f << 20) #define GT_FIFO_FREE_ENTRIES_CHV (0x7f << 13) @@ -9117,9 +9110,11 @@ enum { #define GEN9_MEDIA_PG_IDLE_HYSTERESIS _MMIO(0xA0C4) #define GEN9_RENDER_PG_IDLE_HYSTERESIS _MMIO(0xA0C8) #define GEN9_PG_ENABLE _MMIO(0xA210) -#define GEN9_RENDER_PG_ENABLE REG_BIT(0) -#define GEN9_MEDIA_PG_ENABLE REG_BIT(1) -#define GEN11_MEDIA_SAMPLER_PG_ENABLE REG_BIT(2) +#define GEN9_RENDER_PG_ENABLE REG_BIT(0) +#define GEN9_MEDIA_PG_ENABLE REG_BIT(1) +#define GEN11_MEDIA_SAMPLER_PG_ENABLE REG_BIT(2) +#define VDN_HCP_POWERGATE_ENABLE(n) REG_BIT(3 + 2 * (n)) +#define VDN_MFX_POWERGATE_ENABLE(n) REG_BIT(4 + 2 * (n)) #define GEN8_PUSHBUS_CONTROL _MMIO(0xA248) #define GEN8_PUSHBUS_ENABLE _MMIO(0xA250) #define GEN8_PUSHBUS_SHIFT _MMIO(0xA25C) @@ -9217,8 +9212,8 @@ enum { #define DISPLAY_IPS_CONTROL 0x19 #define TGL_PCODE_TCCOLD 0x26 #define TGL_PCODE_EXIT_TCCOLD_DATA_L_EXIT_FAILED REG_BIT(0) -#define TGL_PCODE_EXIT_TCCOLD_DATA_H_BLOCK_REQ 0 -#define TGL_PCODE_EXIT_TCCOLD_DATA_H_UNBLOCK_REQ REG_BIT(0) +#define TGL_PCODE_EXIT_TCCOLD_DATA_L_BLOCK_REQ 0 +#define TGL_PCODE_EXIT_TCCOLD_DATA_L_UNBLOCK_REQ REG_BIT(0) /* See also IPS_CTL */ #define IPS_PCODE_CONTROL (1 << 30) #define HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL 0x1A @@ -9305,6 +9300,7 @@ enum { #define GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC (1 << 7) #define GEN10_SAMPLER_MODE _MMIO(0xE18C) +#define ENABLE_SMALLPL REG_BIT(15) #define GEN11_SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5) /* IVYBRIDGE DPF */ @@ -10277,12 +10273,18 @@ enum skl_power_gate { #define ICL_DPCLKA_CFGCR0 _MMIO(0x164280) #define ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy) (1 << _PICK(phy, 10, 11, 24)) +#define RKL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy) REG_BIT((phy) + 10) #define ICL_DPCLKA_CFGCR0_TC_CLK_OFF(tc_port) (1 << ((tc_port) < PORT_TC4 ? \ (tc_port) + 12 : \ (tc_port) - PORT_TC4 + 21)) #define ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy) ((phy) * 2) #define ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy) (3 << ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy)) #define ICL_DPCLKA_CFGCR0_DDI_CLK_SEL(pll, phy) ((pll) << ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy)) +#define RKL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy) _PICK(phy, 0, 2, 4, 27) +#define RKL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy) \ + (3 << RKL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy)) +#define RKL_DPCLKA_CFGCR0_DDI_CLK_SEL(pll, phy) \ + ((pll) << RKL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy)) /* CNL PLL */ #define DPLL0_ENABLE 0x46010 @@ -10503,19 +10505,21 @@ enum skl_power_gate { #define _TGL_DPLL0_CFGCR0 0x164284 #define _TGL_DPLL1_CFGCR0 0x16428C -/* TODO: add DPLL4 */ #define _TGL_TBTPLL_CFGCR0 0x16429C #define TGL_DPLL_CFGCR0(pll) _MMIO_PLL3(pll, _TGL_DPLL0_CFGCR0, \ _TGL_DPLL1_CFGCR0, \ _TGL_TBTPLL_CFGCR0) +#define RKL_DPLL_CFGCR0(pll) _MMIO_PLL(pll, _TGL_DPLL0_CFGCR0, \ + _TGL_DPLL1_CFGCR0) #define _TGL_DPLL0_CFGCR1 0x164288 #define _TGL_DPLL1_CFGCR1 0x164290 -/* TODO: add DPLL4 */ #define _TGL_TBTPLL_CFGCR1 0x1642A0 #define TGL_DPLL_CFGCR1(pll) _MMIO_PLL3(pll, _TGL_DPLL0_CFGCR1, \ _TGL_DPLL1_CFGCR1, \ _TGL_TBTPLL_CFGCR1) +#define RKL_DPLL_CFGCR1(pll) _MMIO_PLL(pll, _TGL_DPLL0_CFGCR1, \ + _TGL_DPLL1_CFGCR1) #define _DKL_PHY1_BASE 0x168000 #define _DKL_PHY2_BASE 0x169000 @@ -12336,4 +12340,10 @@ enum skl_power_gate { #define DSB_ENABLE (1 << 31) #define DSB_STATUS (1 << 0) +#define TGL_ROOT_DEVICE_ID 0x9A00 +#define TGL_ROOT_DEVICE_MASK 0xFF00 +#define TGL_ROOT_DEVICE_SKU_MASK 0xF +#define TGL_ROOT_DEVICE_SKU_ULX 0x2 +#define TGL_ROOT_DEVICE_SKU_ULT 0x4 + #endif /* _I915_REG_H_ */ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 0b2fe55e6194..0e813819b041 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -31,6 +31,7 @@ #include <linux/sched/signal.h> #include "gem/i915_gem_context.h" +#include "gt/intel_breadcrumbs.h" #include "gt/intel_context.h" #include "gt/intel_ring.h" #include "gt/intel_rps.h" @@ -186,48 +187,34 @@ static void irq_execute_cb_hook(struct irq_work *wrk) irq_execute_cb(wrk); } -static void __notify_execute_cb(struct i915_request *rq) +static __always_inline void +__notify_execute_cb(struct i915_request *rq, bool (*fn)(struct irq_work *wrk)) { struct execute_cb *cb, *cn; - lockdep_assert_held(&rq->lock); - - GEM_BUG_ON(!i915_request_is_active(rq)); if (llist_empty(&rq->execute_cb)) return; - llist_for_each_entry_safe(cb, cn, rq->execute_cb.first, work.llnode) - irq_work_queue(&cb->work); - - /* - * XXX Rollback on __i915_request_unsubmit() - * - * In the future, perhaps when we have an active time-slicing scheduler, - * it will be interesting to unsubmit parallel execution and remove - * busywaits from the GPU until their master is restarted. This is - * quite hairy, we have to carefully rollback the fence and do a - * preempt-to-idle cycle on the target engine, all the while the - * master execute_cb may refire. - */ - init_llist_head(&rq->execute_cb); + llist_for_each_entry_safe(cb, cn, + llist_del_all(&rq->execute_cb), + work.llnode) + fn(&cb->work); } -static inline void -remove_from_client(struct i915_request *request) +static void __notify_execute_cb_irq(struct i915_request *rq) { - struct drm_i915_file_private *file_priv; + __notify_execute_cb(rq, irq_work_queue); +} - if (!READ_ONCE(request->file_priv)) - return; +static bool irq_work_imm(struct irq_work *wrk) +{ + wrk->func(wrk); + return false; +} - rcu_read_lock(); - file_priv = xchg(&request->file_priv, NULL); - if (file_priv) { - spin_lock(&file_priv->mm.lock); - list_del(&request->client_link); - spin_unlock(&file_priv->mm.lock); - } - rcu_read_unlock(); +static void __notify_execute_cb_imm(struct i915_request *rq) +{ + __notify_execute_cb(rq, irq_work_imm); } static void free_capture_list(struct i915_request *request) @@ -274,9 +261,16 @@ static void remove_from_engine(struct i915_request *rq) locked = engine; } list_del_init(&rq->sched.link); + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); + + /* Prevent further __await_execution() registering a cb, then flush */ + set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); + spin_unlock_irq(&locked->active.lock); + + __notify_execute_cb_imm(rq); } bool i915_request_retire(struct i915_request *rq) @@ -288,6 +282,7 @@ bool i915_request_retire(struct i915_request *rq) GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); trace_i915_request_retire(rq); + i915_request_mark_complete(rq); /* * We know the GPU must have read the request to have @@ -305,32 +300,30 @@ bool i915_request_retire(struct i915_request *rq) __i915_request_fill(rq, POISON_FREE); rq->ring->head = rq->postfix; + if (!i915_request_signaled(rq)) { + spin_lock_irq(&rq->lock); + dma_fence_signal_locked(&rq->fence); + spin_unlock_irq(&rq->lock); + } + + if (i915_request_has_waitboost(rq)) { + GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters)); + atomic_dec(&rq->engine->gt->rps.num_waiters); + } + /* * We only loosely track inflight requests across preemption, * and so we may find ourselves attempting to retire a _completed_ * request that we have removed from the HW and put back on a run * queue. + * + * As we set I915_FENCE_FLAG_ACTIVE on the request, this should be + * after removing the breadcrumb and signaling it, so that we do not + * inadvertently attach the breadcrumb to a completed request. */ remove_from_engine(rq); - - spin_lock_irq(&rq->lock); - i915_request_mark_complete(rq); - if (!i915_request_signaled(rq)) - dma_fence_signal_locked(&rq->fence); - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) - i915_request_cancel_breadcrumb(rq); - if (i915_request_has_waitboost(rq)) { - GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters)); - atomic_dec(&rq->engine->gt->rps.num_waiters); - } - if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) { - set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); - __notify_execute_cb(rq); - } GEM_BUG_ON(!llist_empty(&rq->execute_cb)); - spin_unlock_irq(&rq->lock); - remove_from_client(rq); __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */ intel_context_exit(rq->context); @@ -357,12 +350,6 @@ void i915_request_retire_upto(struct i915_request *rq) } while (i915_request_retire(tmp) && tmp != rq); } -static void __llist_add(struct llist_node *node, struct llist_head *head) -{ - node->next = head->first; - head->first = node; -} - static struct i915_request * const * __engine_active(struct intel_engine_cs *engine) { @@ -388,17 +375,38 @@ static bool __request_in_flight(const struct i915_request *signal) * As we know that there are always preemption points between * requests, we know that only the currently executing request * may be still active even though we have cleared the flag. - * However, we can't rely on our tracking of ELSP[0] to known + * However, we can't rely on our tracking of ELSP[0] to know * which request is currently active and so maybe stuck, as * the tracking maybe an event behind. Instead assume that * if the context is still inflight, then it is still active * even if the active flag has been cleared. + * + * To further complicate matters, if there a pending promotion, the HW + * may either perform a context switch to the second inflight execlists, + * or it may switch to the pending set of execlists. In the case of the + * latter, it may send the ACK and we process the event copying the + * pending[] over top of inflight[], _overwriting_ our *active. Since + * this implies the HW is arbitrating and not struck in *active, we do + * not worry about complete accuracy, but we do require no read/write + * tearing of the pointer [the read of the pointer must be valid, even + * as the array is being overwritten, for which we require the writes + * to avoid tearing.] + * + * Note that the read of *execlists->active may race with the promotion + * of execlists->pending[] to execlists->inflight[], overwritting + * the value at *execlists->active. This is fine. The promotion implies + * that we received an ACK from the HW, and so the context is not + * stuck -- if we do not see ourselves in *active, the inflight status + * is valid. If instead we see ourselves being copied into *active, + * we are inflight and may signal the callback. */ if (!intel_context_inflight(signal->context)) return false; rcu_read_lock(); - for (port = __engine_active(signal->engine); (rq = *port); port++) { + for (port = __engine_active(signal->engine); + (rq = READ_ONCE(*port)); /* may race with promotion of pending[] */ + port++) { if (rq->context == signal->context) { inflight = i915_seqno_passed(rq->fence.seqno, signal->fence.seqno); @@ -439,18 +447,24 @@ __await_execution(struct i915_request *rq, cb->work.func = irq_execute_cb_hook; } - spin_lock_irq(&signal->lock); - if (i915_request_is_active(signal) || __request_in_flight(signal)) { - if (hook) { - hook(rq, &signal->fence); - i915_request_put(signal); - } - i915_sw_fence_complete(cb->fence); - kmem_cache_free(global.slab_execute_cbs, cb); - } else { - __llist_add(&cb->work.llnode, &signal->execute_cb); + /* + * Register the callback first, then see if the signaler is already + * active. This ensures that if we race with the + * __notify_execute_cb from i915_request_submit() and we are not + * included in that list, we get a second bite of the cherry and + * execute it ourselves. After this point, a future + * i915_request_submit() will notify us. + * + * In i915_request_retire() we set the ACTIVE bit on a completed + * request (then flush the execute_cb). So by registering the + * callback first, then checking the ACTIVE bit, we serialise with + * the completed/retired request. + */ + if (llist_add(&cb->work.llnode, &signal->execute_cb)) { + if (i915_request_is_active(signal) || + __request_in_flight(signal)) + __notify_execute_cb_imm(signal); } - spin_unlock_irq(&signal->lock); return 0; } @@ -528,8 +542,13 @@ bool __i915_request_submit(struct i915_request *request) if (i915_request_completed(request)) goto xfer; + if (unlikely(intel_context_is_closed(request->context) && + !intel_engine_has_heartbeat(engine))) + intel_context_set_banned(request->context); + if (unlikely(intel_context_is_banned(request->context))) i915_request_set_error_once(request, -EIO); + if (unlikely(fatal_error(request->fence.error))) __i915_request_skip(request); @@ -566,19 +585,21 @@ xfer: clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); } - /* We may be recursing from the signal callback of another i915 fence */ - if (!i915_request_signaled(request)) { - spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); - - __notify_execute_cb(request); - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, - &request->fence.flags) && - !i915_request_enable_breadcrumb(request)) - intel_engine_signal_breadcrumbs(engine); + /* + * XXX Rollback bonded-execution on __i915_request_unsubmit()? + * + * In the future, perhaps when we have an active time-slicing scheduler, + * it will be interesting to unsubmit parallel execution and remove + * busywaits from the GPU until their master is restarted. This is + * quite hairy, we have to carefully rollback the fence and do a + * preempt-to-idle cycle on the target engine, all the while the + * master execute_cb may refire. + */ + __notify_execute_cb_irq(request); - spin_unlock(&request->lock); - GEM_BUG_ON(!llist_empty(&request->execute_cb)); - } + /* We may be recursing from the signal callback of another i915 fence */ + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) + i915_request_enable_breadcrumb(request); return result; } @@ -600,27 +621,27 @@ void __i915_request_unsubmit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; + /* + * Only unwind in reverse order, required so that the per-context list + * is kept in seqno/ring order. + */ RQ_TRACE(request, "\n"); GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->active.lock); /* - * Only unwind in reverse order, required so that the per-context list - * is kept in seqno/ring order. + * Before we remove this breadcrumb from the signal list, we have + * to ensure that a concurrent dma_fence_enable_signaling() does not + * attach itself. We first mark the request as no longer active and + * make sure that is visible to other cores, and then remove the + * breadcrumb if attached. */ - - /* We may be recursing from the signal callback of another i915 fence */ - spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); - + GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); + clear_bit_unlock(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) i915_request_cancel_breadcrumb(request); - GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); - clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); - - spin_unlock(&request->lock); - /* We've already spun, don't charge on resubmitting. */ if (request->sched.semaphores && i915_request_started(request)) request->sched.semaphores = 0; @@ -757,7 +778,6 @@ static void __i915_request_ctor(void *arg) dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, 0, 0); - rq->file_priv = NULL; rq->capture_list = NULL; init_llist_head(&rq->execute_cb); @@ -847,7 +867,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) /* No zalloc, everything must be cleared after use */ rq->batch = NULL; - GEM_BUG_ON(rq->file_priv); GEM_BUG_ON(rq->capture_list); GEM_BUG_ON(!llist_empty(&rq->execute_cb)); @@ -1640,7 +1659,7 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu) return this_cpu != cpu; } -static bool __i915_spin_request(const struct i915_request * const rq, int state) +static bool __i915_spin_request(struct i915_request * const rq, int state) { unsigned long timeout_ns; unsigned int cpu; @@ -1673,7 +1692,7 @@ static bool __i915_spin_request(const struct i915_request * const rq, int state) timeout_ns = READ_ONCE(rq->engine->props.max_busywait_duration_ns); timeout_ns += local_clock_ns(&cpu); do { - if (i915_request_completed(rq)) + if (dma_fence_is_signaled(&rq->fence)) return true; if (signal_pending_state(state, current)) @@ -1697,7 +1716,7 @@ static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb) { struct request_wait *wait = container_of(cb, typeof(*wait), cb); - wake_up_process(wait->tsk); + wake_up_process(fetch_and_zero(&wait->tsk)); } /** @@ -1766,10 +1785,8 @@ long i915_request_wait(struct i915_request *rq, * duration, which we currently lack. */ if (IS_ACTIVE(CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT) && - __i915_spin_request(rq, state)) { - dma_fence_signal(&rq->fence); + __i915_spin_request(rq, state)) goto out; - } /* * This client is about to stall waiting for the GPU. In many cases @@ -1783,25 +1800,36 @@ long i915_request_wait(struct i915_request *rq, * but at a cost of spending more power processing the workload * (bad for battery). */ - if (flags & I915_WAIT_PRIORITY) { - if (!i915_request_started(rq) && - INTEL_GEN(rq->engine->i915) >= 6) - intel_rps_boost(rq); - } + if (flags & I915_WAIT_PRIORITY && !i915_request_started(rq)) + intel_rps_boost(rq); wait.tsk = current; if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake)) goto out; + /* + * Flush the submission tasklet, but only if it may help this request. + * + * We sometimes experience some latency between the HW interrupts and + * tasklet execution (mostly due to ksoftirqd latency, but it can also + * be due to lazy CS events), so lets run the tasklet manually if there + * is a chance it may submit this request. If the request is not ready + * to run, as it is waiting for other fences to be signaled, flushing + * the tasklet is busy work without any advantage for this client. + * + * If the HW is being lazy, this is the last chance before we go to + * sleep to catch any pending events. We will check periodically in + * the heartbeat to flush the submission tasklets as a last resort + * for unhappy HW. + */ + if (i915_request_is_ready(rq)) + intel_engine_flush_submission(rq->engine); + for (;;) { set_current_state(state); - if (i915_request_completed(rq)) { - dma_fence_signal(&rq->fence); + if (dma_fence_is_signaled(&rq->fence)) break; - } - - intel_engine_flush_submission(rq->engine); if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; @@ -1817,7 +1845,9 @@ long i915_request_wait(struct i915_request *rq, } __set_current_state(TASK_RUNNING); - dma_fence_remove_callback(&rq->fence, &wait.cb); + if (READ_ONCE(wait.tsk)) + dma_fence_remove_callback(&rq->fence, &wait.cb); + GEM_BUG_ON(!list_empty(&wait.cb.node)); out: mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 590762820761..16b721080195 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -284,10 +284,6 @@ struct i915_request { /** timeline->request entry for this request */ struct list_head link; - struct drm_i915_file_private *file_priv; - /** file_priv list entry for this request */ - struct list_head client_link; - I915_SELFTEST_DECLARE(struct { struct list_head link; unsigned long delay; @@ -365,10 +361,6 @@ void i915_request_submit(struct i915_request *request); void __i915_request_unsubmit(struct i915_request *request); void i915_request_unsubmit(struct i915_request *request); -/* Note: part of the intel_breadcrumbs family */ -bool i915_request_enable_breadcrumb(struct i915_request *request); -void i915_request_cancel_breadcrumb(struct i915_request *request); - long i915_request_wait(struct i915_request *rq, unsigned int flags, long timeout) diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index ed2be3489f8e..7b64e7137270 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -34,17 +34,25 @@ static void i915_save_display(struct drm_i915_private *dev_priv) { + struct pci_dev *pdev = dev_priv->drm.pdev; + /* Display arbitration control */ if (INTEL_GEN(dev_priv) <= 4) dev_priv->regfile.saveDSPARB = I915_READ(DSPARB); - /* save FBC interval */ - if (HAS_FBC(dev_priv) && INTEL_GEN(dev_priv) <= 4 && !IS_G4X(dev_priv)) - dev_priv->regfile.saveFBC_CONTROL = I915_READ(FBC_CONTROL); + if (IS_GEN(dev_priv, 4)) + pci_read_config_word(pdev, GCDGMBUS, + &dev_priv->regfile.saveGCDGMBUS); } static void i915_restore_display(struct drm_i915_private *dev_priv) { + struct pci_dev *pdev = dev_priv->drm.pdev; + + if (IS_GEN(dev_priv, 4)) + pci_write_config_word(pdev, GCDGMBUS, + dev_priv->regfile.saveGCDGMBUS); + /* Display arbitration */ if (INTEL_GEN(dev_priv) <= 4) I915_WRITE(DSPARB, dev_priv->regfile.saveDSPARB); @@ -52,31 +60,17 @@ static void i915_restore_display(struct drm_i915_private *dev_priv) /* only restore FBC info on the platform that supports FBC*/ intel_fbc_global_disable(dev_priv); - /* restore FBC interval */ - if (HAS_FBC(dev_priv) && INTEL_GEN(dev_priv) <= 4 && !IS_G4X(dev_priv)) - I915_WRITE(FBC_CONTROL, dev_priv->regfile.saveFBC_CONTROL); - intel_vga_redisable(dev_priv); + + intel_gmbus_reset(dev_priv); } int i915_save_state(struct drm_i915_private *dev_priv) { - struct pci_dev *pdev = dev_priv->drm.pdev; int i; i915_save_display(dev_priv); - if (IS_GEN(dev_priv, 4)) - pci_read_config_word(pdev, GCDGMBUS, - &dev_priv->regfile.saveGCDGMBUS); - - /* Cache mode state */ - if (INTEL_GEN(dev_priv) < 7) - dev_priv->regfile.saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0); - - /* Memory Arbitration state */ - dev_priv->regfile.saveMI_ARB_STATE = I915_READ(MI_ARB_STATE); - /* Scratch space */ if (IS_GEN(dev_priv, 2) && IS_MOBILE(dev_priv)) { for (i = 0; i < 7; i++) { @@ -102,22 +96,10 @@ int i915_save_state(struct drm_i915_private *dev_priv) int i915_restore_state(struct drm_i915_private *dev_priv) { - struct pci_dev *pdev = dev_priv->drm.pdev; int i; - if (IS_GEN(dev_priv, 4)) - pci_write_config_word(pdev, GCDGMBUS, - dev_priv->regfile.saveGCDGMBUS); i915_restore_display(dev_priv); - /* Cache mode state */ - if (INTEL_GEN(dev_priv) < 7) - I915_WRITE(CACHE_MODE_0, dev_priv->regfile.saveCACHE_MODE_0 | - 0xffff0000); - - /* Memory arbitration state */ - I915_WRITE(MI_ARB_STATE, dev_priv->regfile.saveMI_ARB_STATE | 0xffff0000); - /* Scratch space */ if (IS_GEN(dev_priv, 2) && IS_MOBILE(dev_priv)) { for (i = 0; i < 7; i++) { @@ -138,7 +120,5 @@ int i915_restore_state(struct drm_i915_private *dev_priv) I915_WRITE(SWF3(i), dev_priv->regfile.saveSWF3[i]); } - intel_gmbus_reset(dev_priv); - return 0; } diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 295b9829e2da..038d4c6884c5 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -34,7 +34,7 @@ static void *i915_sw_fence_debug_hint(void *addr) #ifdef CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS -static struct debug_obj_descr i915_sw_fence_debug_descr = { +static const struct debug_obj_descr i915_sw_fence_debug_descr = { .name = "i915_sw_fence", .debug_hint = i915_sw_fence_debug_hint, }; @@ -164,9 +164,13 @@ static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence, do { list_for_each_entry_safe(pos, next, &x->head, entry) { - pos->func(pos, - TASK_NORMAL, fence->error, - &extra); + int wake_flags; + + wake_flags = fence->error; + if (pos->func == autoremove_wake_function) + wake_flags = 0; + + pos->func(pos, TASK_NORMAL, wake_flags, &extra); } if (list_empty(&extra)) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index bc64f773dcdb..caa9b041616b 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -291,6 +291,8 @@ i915_vma_instance(struct drm_i915_gem_object *obj, struct i915_vma_work { struct dma_fence_work base; + struct i915_address_space *vm; + struct i915_vm_pt_stash stash; struct i915_vma *vma; struct drm_i915_gem_object *pinned; struct i915_sw_dma_fence_cb cb; @@ -302,21 +304,23 @@ static int __vma_bind(struct dma_fence_work *work) { struct i915_vma_work *vw = container_of(work, typeof(*vw), base); struct i915_vma *vma = vw->vma; - int err; - - err = vma->ops->bind_vma(vma->vm, vma, vw->cache_level, vw->flags); - if (err) - atomic_or(I915_VMA_ERROR, &vma->flags); - return err; + vma->ops->bind_vma(vw->vm, &vw->stash, + vma, vw->cache_level, vw->flags); + return 0; } static void __vma_release(struct dma_fence_work *work) { struct i915_vma_work *vw = container_of(work, typeof(*vw), base); - if (vw->pinned) + if (vw->pinned) { __i915_gem_object_unpin_pages(vw->pinned); + i915_gem_object_put(vw->pinned); + } + + i915_vm_free_pt_stash(vw->vm, &vw->stash); + i915_vm_put(vw->vm); } static const struct dma_fence_work_ops bind_ops = { @@ -376,7 +380,6 @@ int i915_vma_bind(struct i915_vma *vma, { u32 bind_flags; u32 vma_flags; - int ret; GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(vma->size > vma->node.size); @@ -430,12 +433,10 @@ int i915_vma_bind(struct i915_vma *vma, if (vma->obj) { __i915_gem_object_pin_pages(vma->obj); - work->pinned = vma->obj; + work->pinned = i915_gem_object_get(vma->obj); } } else { - ret = vma->ops->bind_vma(vma->vm, vma, cache_level, bind_flags); - if (ret) - return ret; + vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags); } atomic_or(bind_flags, &vma->flags); @@ -853,13 +854,19 @@ static void vma_unbind_pages(struct i915_vma *vma) __vma_put_pages(vma, count | count << I915_VMA_PAGES_BIAS); } -int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, + u64 size, u64 alignment, u64 flags) { struct i915_vma_work *work = NULL; intel_wakeref_t wakeref = 0; unsigned int bound; int err; +#ifdef CONFIG_PROVE_LOCKING + if (debug_locks && lockdep_is_held(&vma->vm->i915->drm.struct_mutex)) + WARN_ON(!ww); +#endif + BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND); BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND); @@ -873,16 +880,32 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) if (err) return err; + if (flags & PIN_GLOBAL) + wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); + if (flags & vma->vm->bind_async_flags) { work = i915_vma_work(); if (!work) { err = -ENOMEM; - goto err_pages; + goto err_rpm; } - } - if (flags & PIN_GLOBAL) - wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); + work->vm = i915_vm_get(vma->vm); + + /* Allocate enough page directories to used PTE */ + if (vma->vm->allocate_va_range) { + err = i915_vm_alloc_pt_stash(vma->vm, + &work->stash, + vma->size); + if (err) + goto err_fence; + + err = i915_vm_pin_pt_stash(vma->vm, + &work->stash); + if (err) + goto err_fence; + } + } /* * Differentiate between user/kernel vma inside the aliasing-ppgtt. @@ -971,9 +994,9 @@ err_unlock: err_fence: if (work) dma_fence_work_commit_imm(&work->base); +err_rpm: if (wakeref) intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); -err_pages: vma_put_pages(vma); return err; } @@ -989,7 +1012,8 @@ static void flush_idle_contexts(struct intel_gt *gt) intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); } -int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags) +int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, + u32 align, unsigned int flags) { struct i915_address_space *vm = vma->vm; int err; @@ -997,7 +1021,7 @@ int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags) GEM_BUG_ON(!i915_vma_is_ggtt(vma)); do { - err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL); + err = i915_vma_pin_ww(vma, ww, 0, align, flags | PIN_GLOBAL); if (err != -ENOSPC) { if (!err) { err = i915_vma_wait_for_bind(vma); @@ -1167,6 +1191,12 @@ void i915_vma_revoke_mmap(struct i915_vma *vma) list_del(&vma->obj->userfault_link); } +static int +__i915_request_await_bind(struct i915_request *rq, struct i915_vma *vma) +{ + return __i915_request_await_exclusive(rq, &vma->active); +} + int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq) { int err; @@ -1174,8 +1204,7 @@ int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq) GEM_BUG_ON(!i915_vma_is_pinned(vma)); /* Wait for the vma to be bound before we start! */ - err = i915_request_await_active(rq, &vma->active, - I915_ACTIVE_AWAIT_EXCL); + err = __i915_request_await_bind(rq, vma); if (err) return err; diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index d0d01f909548..5b3a3c653454 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -237,8 +237,17 @@ static inline void i915_vma_unlock(struct i915_vma *vma) } int __must_check -i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); -int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags); +i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, + u64 size, u64 alignment, u64 flags); + +static inline int __must_check +i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +{ + return i915_vma_pin_ww(vma, NULL, size, alignment, flags); +} + +int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, + u32 align, unsigned int flags); static inline int i915_vma_pin_count(const struct i915_vma *vma) { diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 40c590db3c76..adc836f15fde 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -346,6 +346,25 @@ void intel_device_info_subplatform_init(struct drm_i915_private *i915) mask = BIT(INTEL_SUBPLATFORM_PORTF); } + if (IS_TIGERLAKE(i915)) { + struct pci_dev *root, *pdev = i915->drm.pdev; + + root = list_first_entry(&pdev->bus->devices, typeof(*root), bus_list); + + drm_WARN_ON(&i915->drm, mask); + drm_WARN_ON(&i915->drm, (root->device & TGL_ROOT_DEVICE_MASK) != + TGL_ROOT_DEVICE_ID); + + switch (root->device & TGL_ROOT_DEVICE_SKU_MASK) { + case TGL_ROOT_DEVICE_SKU_ULX: + mask = BIT(INTEL_SUBPLATFORM_ULX); + break; + case TGL_ROOT_DEVICE_SKU_ULT: + mask = BIT(INTEL_SUBPLATFORM_ULT); + break; + } + } + GEM_BUG_ON(mask & ~INTEL_SUBPLATFORM_BITS); RUNTIME_INFO(i915)->platform_mask[pi] |= mask; @@ -497,6 +516,14 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) S32_MAX), USEC_PER_SEC)); } + + if (!HAS_DISPLAY(dev_priv)) { + dev_priv->drm.driver_features &= ~(DRIVER_MODESET | + DRIVER_ATOMIC); + memset(&info->display, 0, sizeof(info->display)); + memset(runtime->num_sprites, 0, sizeof(runtime->num_sprites)); + memset(runtime->num_scalers, 0, sizeof(runtime->num_scalers)); + } } void intel_driver_caps_print(const struct intel_driver_caps *caps, diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index fd2385457ab6..6a3d607218aa 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -146,6 +146,7 @@ enum intel_ppgtt_type { func(has_gmch); \ func(has_hdcp); \ func(has_hotplug); \ + func(has_hti); \ func(has_ipc); \ func(has_modular_fia); \ func(has_overlay); \ diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index 6b5e9d88646d..180e1078ef7c 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -87,7 +87,7 @@ __intel_memory_region_get_pages_buddy(struct intel_memory_region *mem, min_order = ilog2(size) - ilog2(mem->mm.chunk_size); } - if (size > BIT(mem->mm.max_order) * mem->mm.chunk_size) + if (size > mem->mm.size) return -E2BIG; n_pages = size >> ilog2(mem->mm.chunk_size); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index cfabbe0481ab..cfb806767fc5 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -100,12 +100,6 @@ static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) */ I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | DISP_FBC_MEMORY_WAKE); - - if (IS_SKYLAKE(dev_priv)) { - /* WaDisableDopClockGating */ - I915_WRITE(GEN7_MISCCPCTL, I915_READ(GEN7_MISCCPCTL) - & ~GEN7_DOP_CLOCK_GATE_ENABLE); - } } static void bxt_init_clock_gating(struct drm_i915_private *dev_priv) @@ -7124,25 +7118,12 @@ static void icl_init_clock_gating(struct drm_i915_private *dev_priv) static void tgl_init_clock_gating(struct drm_i915_private *dev_priv) { - u32 vd_pg_enable = 0; - unsigned int i; - /* Wa_1409120013:tgl */ I915_WRITE(ILK_DPFC_CHICKEN, ILK_DPFC_CHICKEN_COMP_DUMMY_PIXEL); - /* This is not a WA. Enable VD HCP & MFX_ENC powergate */ - for (i = 0; i < I915_MAX_VCS; i++) { - if (HAS_ENGINE(&dev_priv->gt, _VCS(i))) - vd_pg_enable |= VDN_HCP_POWERGATE_ENABLE(i) | - VDN_MFX_POWERGATE_ENABLE(i); - } - - I915_WRITE(POWERGATE_ENABLE, - I915_READ(POWERGATE_ENABLE) | vd_pg_enable); - /* Wa_1409825376:tgl (pre-prod)*/ - if (IS_TGL_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_A0)) + if (IS_TGL_DISP_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_B1)) I915_WRITE(GEN9_CLKGATE_DIS_3, I915_READ(GEN9_CLKGATE_DIS_3) | TGL_VRH_GATING_DIS); @@ -7223,12 +7204,12 @@ static void kbl_init_clock_gating(struct drm_i915_private *dev_priv) gen9_init_clock_gating(dev_priv); /* WaDisableSDEUnitClockGating:kbl */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) + if (IS_KBL_GT_REVID(dev_priv, 0, KBL_REVID_B0)) I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | GEN8_SDEUNIT_CLOCK_GATE_DISABLE); /* WaDisableGamClockGating:kbl */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) + if (IS_KBL_GT_REVID(dev_priv, 0, KBL_REVID_B0)) I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | GEN6_GAMUNIT_CLOCK_GATE_DISABLE); @@ -7251,6 +7232,10 @@ static void skl_init_clock_gating(struct drm_i915_private *dev_priv) { gen9_init_clock_gating(dev_priv); + /* WaDisableDopClockGating:skl */ + I915_WRITE(GEN7_MISCCPCTL, I915_READ(GEN7_MISCCPCTL) & + ~GEN7_DOP_CLOCK_GATE_ENABLE); + /* WAC6entrylatency:skl */ I915_WRITE(FBC_LLC_READ_CTRL, I915_READ(FBC_LLC_READ_CTRL) | FBC_LLC_FULLY_OPEN); diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index 916ccd1c0e96..5b3279262123 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -231,9 +231,21 @@ void vlv_ccu_write(struct drm_i915_private *i915, u32 reg, u32 val) SB_CRWRDA_NP, reg, &val); } +static u32 vlv_dpio_phy_iosf_port(struct drm_i915_private *i915, enum dpio_phy phy) +{ + /* + * IOSF_PORT_DPIO: VLV x2 PHY (DP/HDMI B and C), CHV x1 PHY (DP/HDMI D) + * IOSF_PORT_DPIO_2: CHV x2 PHY (DP/HDMI B and C) + */ + if (IS_CHERRYVIEW(i915)) + return phy == DPIO_PHY0 ? IOSF_PORT_DPIO_2 : IOSF_PORT_DPIO; + else + return IOSF_PORT_DPIO; +} + u32 vlv_dpio_read(struct drm_i915_private *i915, enum pipe pipe, int reg) { - int port = i915->dpio_phy_iosf_port[DPIO_PHY(pipe)]; + u32 port = vlv_dpio_phy_iosf_port(i915, DPIO_PHY(pipe)); u32 val = 0; vlv_sideband_rw(i915, DPIO_DEVFN, port, SB_MRD_NP, reg, &val); @@ -252,7 +264,7 @@ u32 vlv_dpio_read(struct drm_i915_private *i915, enum pipe pipe, int reg) void vlv_dpio_write(struct drm_i915_private *i915, enum pipe pipe, int reg, u32 val) { - int port = i915->dpio_phy_iosf_port[DPIO_PHY(pipe)]; + u32 port = vlv_dpio_phy_iosf_port(i915, DPIO_PHY(pipe)); vlv_sideband_rw(i915, DPIO_DEVFN, port, SB_MWR_NP, reg, &val); } diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 8d5a933e6af6..97ded2a59cf4 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1209,6 +1209,18 @@ unclaimed_reg_debug(struct intel_uncore *uncore, spin_unlock(&uncore->debug->lock); } +#define __vgpu_read(x) \ +static u##x \ +vgpu_read##x(struct intel_uncore *uncore, i915_reg_t reg, bool trace) { \ + u##x val = __raw_uncore_read##x(uncore, reg); \ + trace_i915_reg_rw(false, reg, val, sizeof(val), trace); \ + return val; \ +} +__vgpu_read(8) +__vgpu_read(16) +__vgpu_read(32) +__vgpu_read(64) + #define GEN2_READ_HEADER(x) \ u##x val = 0; \ assert_rpm_wakelock_held(uncore->rpm); @@ -1414,6 +1426,16 @@ __gen_reg_write_funcs(gen8); #undef GEN6_WRITE_FOOTER #undef GEN6_WRITE_HEADER +#define __vgpu_write(x) \ +static void \ +vgpu_write##x(struct intel_uncore *uncore, i915_reg_t reg, u##x val, bool trace) { \ + trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \ + __raw_uncore_write##x(uncore, reg, val); \ +} +__vgpu_write(8) +__vgpu_write(16) +__vgpu_write(32) + #define ASSIGN_RAW_WRITE_MMIO_VFUNCS(uncore, x) \ do { \ (uncore)->funcs.mmio_writeb = x##_write8; \ @@ -1735,7 +1757,10 @@ static void uncore_raw_init(struct intel_uncore *uncore) { GEM_BUG_ON(intel_uncore_has_forcewake(uncore)); - if (IS_GEN(uncore->i915, 5)) { + if (intel_vgpu_active(uncore->i915)) { + ASSIGN_RAW_WRITE_MMIO_VFUNCS(uncore, vgpu); + ASSIGN_RAW_READ_MMIO_VFUNCS(uncore, vgpu); + } else if (IS_GEN(uncore->i915, 5)) { ASSIGN_RAW_WRITE_MMIO_VFUNCS(uncore, gen5); ASSIGN_RAW_READ_MMIO_VFUNCS(uncore, gen5); } else { @@ -1993,13 +2018,14 @@ int __intel_wait_for_register_fw(struct intel_uncore *uncore, unsigned int slow_timeout_ms, u32 *out_value) { - u32 reg_value; + u32 reg_value = 0; #define done (((reg_value = intel_uncore_read_fw(uncore, reg)) & mask) == value) int ret; /* Catch any overuse of this function */ might_sleep_if(slow_timeout_ms); GEM_BUG_ON(fast_timeout_us > 20000); + GEM_BUG_ON(!fast_timeout_us && !slow_timeout_ms); ret = -ETIMEDOUT; if (fast_timeout_us && fast_timeout_us <= 20000) diff --git a/drivers/gpu/drm/i915/selftests/i915_buddy.c b/drivers/gpu/drm/i915/selftests/i915_buddy.c index 939a6caebb03..632b912b0bc9 100644 --- a/drivers/gpu/drm/i915/selftests/i915_buddy.c +++ b/drivers/gpu/drm/i915/selftests/i915_buddy.c @@ -8,8 +8,6 @@ #include "../i915_selftest.h" #include "i915_random.h" -#define SZ_8G (1ULL << 33) - static void __igt_dump_block(struct i915_buddy_mm *mm, struct i915_buddy_block *block, bool buddy) @@ -281,18 +279,22 @@ static int igt_check_mm(struct i915_buddy_mm *mm) static void igt_mm_config(u64 *size, u64 *chunk_size) { I915_RND_STATE(prng); - u64 s, ms; + u32 s, ms; /* Nothing fancy, just try to get an interesting bit pattern */ prandom_seed_state(&prng, i915_selftest.random_seed); - s = i915_prandom_u64_state(&prng) & (SZ_8G - 1); - ms = BIT_ULL(12 + (prandom_u32_state(&prng) % ilog2(s >> 12))); - s = max(s & -ms, ms); + /* Let size be a random number of pages up to 8 GB (2M pages) */ + s = 1 + i915_prandom_u32_max_state((BIT(33 - 12)) - 1, &prng); + /* Let the chunk size be a random power of 2 less than size */ + ms = BIT(i915_prandom_u32_max_state(ilog2(s), &prng)); + /* Round size down to the chunk size */ + s &= -ms; - *chunk_size = ms; - *size = s; + /* Convert from pages to bytes */ + *chunk_size = (u64)ms << 12; + *size = (u64)s << 12; } static int igt_buddy_alloc_smoke(void *arg) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index 88d400b9df88..23a6132c5f4e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -199,11 +199,52 @@ out: return err; } +static int igt_gem_ww_ctx(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj, *obj2; + struct i915_gem_ww_ctx ww; + int err = 0; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + obj2 = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto put1; + } + + i915_gem_ww_ctx_init(&ww, true); +retry: + /* Lock the objects, twice for good measure (-EALREADY handling) */ + err = i915_gem_object_lock(obj, &ww); + if (!err) + err = i915_gem_object_lock_interruptible(obj, &ww); + if (!err) + err = i915_gem_object_lock_interruptible(obj2, &ww); + if (!err) + err = i915_gem_object_lock(obj2, &ww); + + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + i915_gem_object_put(obj2); +put1: + i915_gem_object_put(obj); + return err; +} + int i915_gem_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_gem_suspend), SUBTEST(igt_gem_hibernate), + SUBTEST(igt_gem_ww_ctx), }; if (intel_gt_is_wedged(&i915->gt)) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 028baae9631f..f88473d396f4 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -536,7 +536,7 @@ int i915_gem_evict_mock_selftests(void) with_intel_runtime_pm(&i915->runtime_pm, wakeref) err = i915_subtests(tests, &i915->gt); - drm_dev_put(&i915->drm); + mock_destroy_device(i915); return err; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 0016ffc7d914..c53a222e3dec 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -172,35 +172,45 @@ static int igt_ppgtt_alloc(void *arg) /* Check we can allocate the entire range */ for (size = 4096; size <= limit; size <<= 2) { - err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, size); + struct i915_vm_pt_stash stash = {}; + + err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, size); + if (err) + goto err_ppgtt_cleanup; + + err = i915_vm_pin_pt_stash(&ppgtt->vm, &stash); if (err) { - if (err == -ENOMEM) { - pr_info("[1] Ran out of memory for va_range [0 + %llx] [bit %d]\n", - size, ilog2(size)); - err = 0; /* virtual space too large! */ - } + i915_vm_free_pt_stash(&ppgtt->vm, &stash); goto err_ppgtt_cleanup; } + ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, size); cond_resched(); ppgtt->vm.clear_range(&ppgtt->vm, 0, size); + + i915_vm_free_pt_stash(&ppgtt->vm, &stash); } /* Check we can incrementally allocate the entire range */ for (last = 0, size = 4096; size <= limit; last = size, size <<= 2) { - err = ppgtt->vm.allocate_va_range(&ppgtt->vm, - last, size - last); + struct i915_vm_pt_stash stash = {}; + + err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, size - last); + if (err) + goto err_ppgtt_cleanup; + + err = i915_vm_pin_pt_stash(&ppgtt->vm, &stash); if (err) { - if (err == -ENOMEM) { - pr_info("[2] Ran out of memory for va_range [%llx + %llx] [bit %d]\n", - last, size - last, ilog2(size)); - err = 0; /* virtual space too large! */ - } + i915_vm_free_pt_stash(&ppgtt->vm, &stash); goto err_ppgtt_cleanup; } + ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, + last, size - last); cond_resched(); + + i915_vm_free_pt_stash(&ppgtt->vm, &stash); } err_ppgtt_cleanup: @@ -284,9 +294,23 @@ static int lowlevel_hole(struct i915_address_space *vm, break; } - if (vm->allocate_va_range && - vm->allocate_va_range(vm, addr, BIT_ULL(size))) - break; + if (vm->allocate_va_range) { + struct i915_vm_pt_stash stash = {}; + + if (i915_vm_alloc_pt_stash(vm, &stash, + BIT_ULL(size))) + break; + + if (i915_vm_pin_pt_stash(vm, &stash)) { + i915_vm_free_pt_stash(vm, &stash); + break; + } + + vm->allocate_va_range(vm, &stash, + addr, BIT_ULL(size)); + + i915_vm_free_pt_stash(vm, &stash); + } mock_vma->pages = obj->mm.pages; mock_vma->node.size = BIT_ULL(size); @@ -1703,7 +1727,7 @@ int i915_gem_gtt_mock_selftests(void) mock_fini_ggtt(ggtt); kfree(ggtt); out_put: - drm_dev_put(&i915->drm); + mock_destroy_device(i915); return err; } @@ -1881,6 +1905,7 @@ static int igt_cs_tlb(void *arg) continue; while (!__igt_timeout(end_time, NULL)) { + struct i915_vm_pt_stash stash = {}; struct i915_request *rq; u64 offset; @@ -1888,10 +1913,6 @@ static int igt_cs_tlb(void *arg) 0, vm->total - PAGE_SIZE, chunk_size, PAGE_SIZE); - err = vm->allocate_va_range(vm, offset, chunk_size); - if (err) - goto end; - memset32(result, STACK_MAGIC, PAGE_SIZE / sizeof(u32)); vma = i915_vma_instance(bbe, vm, NULL); @@ -1904,6 +1925,20 @@ static int igt_cs_tlb(void *arg) if (err) goto end; + err = i915_vm_alloc_pt_stash(vm, &stash, chunk_size); + if (err) + goto end; + + err = i915_vm_pin_pt_stash(vm, &stash); + if (err) { + i915_vm_free_pt_stash(vm, &stash); + goto end; + } + + vm->allocate_va_range(vm, &stash, offset, chunk_size); + + i915_vm_free_pt_stash(vm, &stash); + /* Prime the TLB with the dummy pages */ for (i = 0; i < count; i++) { vma->node.start = offset + i * PAGE_SIZE; diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c index c2d001d9c0ec..debbac660519 100644 --- a/drivers/gpu/drm/i915/selftests/i915_perf.c +++ b/drivers/gpu/drm/i915/selftests/i915_perf.c @@ -307,7 +307,7 @@ static int live_noa_gpr(void *arg) } /* Poison the ce->vm so we detect writes not to the GGTT gt->scratch */ - scratch = kmap(ce->vm->scratch[0].base.page); + scratch = kmap(__px_page(ce->vm->scratch[0])); memset(scratch, POISON_FREE, PAGE_SIZE); rq = intel_context_create_request(ce); @@ -405,7 +405,7 @@ static int live_noa_gpr(void *arg) out_rq: i915_request_put(rq); out_ce: - kunmap(ce->vm->scratch[0].base.page); + kunmap(__px_page(ce->vm->scratch[0])); intel_context_put(ce); out: stream_destroy(stream); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 57dd6f5122ee..e424a6d1a68c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -331,7 +331,7 @@ static int __igt_breadcrumbs_smoketest(void *arg) if (!wait) { i915_sw_fence_commit(submit); heap_fence_put(submit); - err = ENOMEM; + err = -ENOMEM; break; } @@ -527,7 +527,7 @@ int i915_request_mock_selftests(void) with_intel_runtime_pm(&i915->runtime_pm, wakeref) err = i915_subtests(tests, i915); - drm_dev_put(&i915->drm); + mock_destroy_device(i915); return err; } @@ -862,6 +862,8 @@ static int live_all_engines(void *arg) goto out_free; } + i915_vma_lock(batch); + idx = 0; for_each_uabi_engine(engine, i915) { request[idx] = intel_engine_create_kernel_request(engine); @@ -872,11 +874,9 @@ static int live_all_engines(void *arg) goto out_request; } - i915_vma_lock(batch); err = i915_request_await_object(request[idx], batch->obj, 0); if (err == 0) err = i915_vma_move_to_active(batch, request[idx], 0); - i915_vma_unlock(batch); GEM_BUG_ON(err); err = engine->emit_bb_start(request[idx], @@ -891,6 +891,8 @@ static int live_all_engines(void *arg) idx++; } + i915_vma_unlock(batch); + idx = 0; for_each_uabi_engine(engine, i915) { if (i915_request_completed(request[idx])) { @@ -981,12 +983,13 @@ static int live_sequential_engines(void *arg) goto out_free; } + i915_vma_lock(batch); request[idx] = intel_engine_create_kernel_request(engine); if (IS_ERR(request[idx])) { err = PTR_ERR(request[idx]); pr_err("%s: Request allocation failed for %s with err=%d\n", __func__, engine->name, err); - goto out_request; + goto out_unlock; } if (prev) { @@ -996,16 +999,14 @@ static int live_sequential_engines(void *arg) i915_request_add(request[idx]); pr_err("%s: Request await failed for %s with err=%d\n", __func__, engine->name, err); - goto out_request; + goto out_unlock; } } - i915_vma_lock(batch); err = i915_request_await_object(request[idx], batch->obj, false); if (err == 0) err = i915_vma_move_to_active(batch, request[idx], 0); - i915_vma_unlock(batch); GEM_BUG_ON(err); err = engine->emit_bb_start(request[idx], @@ -1020,6 +1021,11 @@ static int live_sequential_engines(void *arg) prev = request[idx]; idx++; + +out_unlock: + i915_vma_unlock(batch); + if (err) + goto out_request; } idx = 0; @@ -2287,8 +2293,10 @@ static int perf_request_latency(void *arg) struct intel_context *ce; ce = intel_context_create(engine); - if (IS_ERR(ce)) + if (IS_ERR(ce)) { + err = PTR_ERR(ce); goto out; + } err = intel_context_pin(ce); if (err) { @@ -2461,8 +2469,10 @@ static int perf_series_engines(void *arg) struct intel_context *ce; ce = intel_context_create(engine); - if (IS_ERR(ce)) + if (IS_ERR(ce)) { + err = PTR_ERR(ce); goto out; + } err = intel_context_pin(ce); if (err) { diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index af89c7fc8f59..1b6125e4c1ac 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -841,7 +841,7 @@ int i915_vma_mock_selftests(void) mock_fini_ggtt(ggtt); kfree(ggtt); out_put: - drm_dev_put(&i915->drm); + mock_destroy_device(i915); return err; } @@ -892,7 +892,7 @@ static int igt_vma_remapped_gtt(void *arg) unsigned int x, y; int err; - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); err = i915_gem_object_set_to_gtt_domain(obj, true); i915_gem_object_unlock(obj); if (err) diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 6e80d99048e4..0aeba8e3af28 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -261,6 +261,82 @@ err_close_objects: return err; } +static int igt_mock_splintered_region(void *arg) +{ + struct intel_memory_region *mem = arg; + struct drm_i915_private *i915 = mem->i915; + struct drm_i915_gem_object *obj; + unsigned int expected_order; + LIST_HEAD(objects); + u64 size; + int err = 0; + + /* + * Sanity check we can still allocate everything even if the + * mm.max_order != mm.size. i.e our starting address space size is not a + * power-of-two. + */ + + size = (SZ_4G - 1) & PAGE_MASK; + mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0); + if (IS_ERR(mem)) + return PTR_ERR(mem); + + if (mem->mm.size != size) { + pr_err("%s size mismatch(%llu != %llu)\n", + __func__, mem->mm.size, size); + err = -EINVAL; + goto out_put; + } + + expected_order = get_order(rounddown_pow_of_two(size)); + if (mem->mm.max_order != expected_order) { + pr_err("%s order mismatch(%u != %u)\n", + __func__, mem->mm.max_order, expected_order); + err = -EINVAL; + goto out_put; + } + + obj = igt_object_create(mem, &objects, size, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_close; + } + + close_objects(mem, &objects); + + /* + * While we should be able allocate everything without any flag + * restrictions, if we consider I915_BO_ALLOC_CONTIGUOUS then we are + * actually limited to the largest power-of-two for the region size i.e + * max_order, due to the inner workings of the buddy allocator. So make + * sure that does indeed hold true. + */ + + obj = igt_object_create(mem, &objects, size, I915_BO_ALLOC_CONTIGUOUS); + if (!IS_ERR(obj)) { + pr_err("%s too large contiguous allocation was not rejected\n", + __func__); + err = -EINVAL; + goto out_close; + } + + obj = igt_object_create(mem, &objects, rounddown_pow_of_two(size), + I915_BO_ALLOC_CONTIGUOUS); + if (IS_ERR(obj)) { + pr_err("%s largest possible contiguous allocation failed\n", + __func__); + err = PTR_ERR(obj); + goto out_close; + } + +out_close: + close_objects(mem, &objects); +out_put: + intel_memory_region_put(mem); + return err; +} + static int igt_gpu_write_dw(struct intel_context *ce, struct i915_vma *vma, u32 dword, @@ -509,7 +585,7 @@ static int igt_lmem_write_cpu(void *arg) if (err) goto out_unpin; - i915_gem_object_lock(obj); + i915_gem_object_lock(obj, NULL); err = i915_gem_object_set_to_wc_domain(obj, true); i915_gem_object_unlock(obj); if (err) @@ -522,9 +598,9 @@ static int igt_lmem_write_cpu(void *arg) goto out_unpin; } - /* We want to throw in a random width/align */ - bytes[0] = igt_random_offset(&prng, 0, PAGE_SIZE, sizeof(u32), - sizeof(u32)); + /* A random multiple of u32, picked between [64, PAGE_SIZE - 64] */ + bytes[0] = igt_random_offset(&prng, 64, PAGE_SIZE - 64, 0, sizeof(u32)); + GEM_BUG_ON(!IS_ALIGNED(bytes[0], sizeof(u32))); i = 0; do { @@ -771,6 +847,7 @@ int intel_memory_region_mock_selftests(void) static const struct i915_subtest tests[] = { SUBTEST(igt_mock_fill), SUBTEST(igt_mock_contiguous), + SUBTEST(igt_mock_splintered_region), }; struct intel_memory_region *mem; struct drm_i915_private *i915; @@ -791,7 +868,7 @@ int intel_memory_region_mock_selftests(void) intel_memory_region_put(mem); out_unref: - drm_dev_put(&i915->drm); + mock_destroy_device(i915); return err; } diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index b9810bf156c3..b6c42fd872ad 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -78,8 +78,7 @@ static void mock_device_release(struct drm_device *dev) drm_mode_config_cleanup(&i915->drm); out: - put_device(&i915->drm.pdev->dev); - i915->drm.pdev = NULL; + i915_params_free(&i915->params); } static struct drm_driver mock_driver = { @@ -117,22 +116,15 @@ static struct dev_pm_domain pm_domain = { struct drm_i915_private *mock_gem_device(void) { - struct drm_i915_private *i915; - struct pci_dev *pdev; #if IS_ENABLED(CONFIG_IOMMU_API) && defined(CONFIG_INTEL_IOMMU) - struct dev_iommu iommu; + static struct dev_iommu fake_iommu = { .priv = (void *)-1 }; #endif - int err; + struct drm_i915_private *i915; + struct pci_dev *pdev; pdev = kzalloc(sizeof(*pdev), GFP_KERNEL); if (!pdev) return NULL; - i915 = kzalloc(sizeof(*i915), GFP_KERNEL); - if (!i915) { - kfree(pdev); - return NULL; - } - device_initialize(&pdev->dev); pdev->class = PCI_BASE_CLASS_DISPLAY << 16; pdev->dev.release = release_dev; @@ -140,13 +132,26 @@ struct drm_i915_private *mock_gem_device(void) dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); #if IS_ENABLED(CONFIG_IOMMU_API) && defined(CONFIG_INTEL_IOMMU) - /* HACK HACK HACK to disable iommu for the fake device; force identity mapping */ - memset(&iommu, 0, sizeof(iommu)); - iommu.priv = (void *)-1; - pdev->dev.iommu = &iommu; + /* HACK to disable iommu for the fake device; force identity mapping */ + pdev->dev.iommu = &fake_iommu; #endif + if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) { + put_device(&pdev->dev); + return NULL; + } + + i915 = devm_drm_dev_alloc(&pdev->dev, &mock_driver, + struct drm_i915_private, drm); + if (IS_ERR(i915)) { + pr_err("Failed to allocate mock GEM device: err=%ld\n", PTR_ERR(i915)); + devres_release_group(&pdev->dev, NULL); + put_device(&pdev->dev); + + return NULL; + } pci_set_drvdata(pdev, i915); + i915->drm.pdev = pdev; dev_pm_domain_set(&pdev->dev, &pm_domain); pm_runtime_enable(&pdev->dev); @@ -154,16 +159,8 @@ struct drm_i915_private *mock_gem_device(void) if (pm_runtime_enabled(&pdev->dev)) WARN_ON(pm_runtime_get_sync(&pdev->dev)); - err = drm_dev_init(&i915->drm, &mock_driver, &pdev->dev); - if (err) { - pr_err("Failed to initialise mock GEM device: err=%d\n", err); - put_device(&pdev->dev); - kfree(i915); - return NULL; - } - i915->drm.pdev = pdev; - drmm_add_final_kfree(&i915->drm, i915); + i915_params_copy(&i915->params, &i915_modparams); intel_runtime_pm_init_early(&i915->runtime_pm); @@ -221,7 +218,15 @@ err_drv: intel_gt_driver_late_release(&i915->gt); intel_memory_regions_driver_release(i915); drm_mode_config_cleanup(&i915->drm); - drm_dev_put(&i915->drm); + mock_destroy_device(i915); return NULL; } + +void mock_destroy_device(struct drm_i915_private *i915) +{ + struct device *dev = i915->drm.dev; + + devres_release_group(dev, NULL); + put_device(dev); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.h b/drivers/gpu/drm/i915/selftests/mock_gem_device.h index b5dc4e394555..953cfe4fab34 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.h +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.h @@ -7,4 +7,6 @@ struct drm_i915_private; struct drm_i915_private *mock_gem_device(void); void mock_device_flush(struct drm_i915_private *i915); +void mock_destroy_device(struct drm_i915_private *i915); + #endif /* !__MOCK_GEM_DEVICE_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index b173086411ef..7270fc8ca801 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -38,14 +38,14 @@ static void mock_insert_entries(struct i915_address_space *vm, { } -static int mock_bind_ppgtt(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) +static void mock_bind_ppgtt(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) { GEM_BUG_ON(flags & I915_VMA_GLOBAL_BIND); set_bit(I915_VMA_LOCAL_BIND_BIT, __i915_vma_flags(vma)); - return 0; } static void mock_unbind_ppgtt(struct i915_address_space *vm, @@ -74,9 +74,12 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name) ppgtt->vm.i915 = i915; ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE); ppgtt->vm.file = ERR_PTR(-ENODEV); + ppgtt->vm.dma = &i915->drm.pdev->dev; i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT); + ppgtt->vm.alloc_pt_dma = alloc_pt_dma; + ppgtt->vm.clear_range = mock_clear_range; ppgtt->vm.insert_page = mock_insert_page; ppgtt->vm.insert_entries = mock_insert_entries; @@ -90,13 +93,12 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name) return ppgtt; } -static int mock_bind_ggtt(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) +static void mock_bind_ggtt(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) { - atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags); - return 0; } static void mock_unbind_ggtt(struct i915_address_space *vm, @@ -116,6 +118,8 @@ void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt) ggtt->mappable_end = resource_size(&ggtt->gmadr); ggtt->vm.total = 4096 * PAGE_SIZE; + ggtt->vm.alloc_pt_dma = alloc_pt_dma; + ggtt->vm.clear_range = mock_clear_range; ggtt->vm.insert_page = mock_insert_page; ggtt->vm.insert_entries = mock_insert_entries; diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index 09660f5a0a4c..979d96f27c43 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -24,7 +24,7 @@ mock_object_create(struct intel_memory_region *mem, struct drm_i915_private *i915 = mem->i915; struct drm_i915_gem_object *obj; - if (size > BIT(mem->mm.max_order) * mem->mm.chunk_size) + if (size > mem->mm.size) return ERR_PTR(-E2BIG); obj = i915_gem_object_alloc(); |