diff options
Diffstat (limited to 'drivers/gpu/drm/i915')
203 files changed, 26410 insertions, 14328 deletions
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index e9e64e8e9765..dfd95889f4b7 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -12,6 +12,7 @@ config DRM_I915 select DRM_PANEL select DRM_MIPI_DSI select RELAY + select IRQ_WORK # i915 depends on ACPI_VIDEO when ACPI is enabled # but for select to work, need to select ACPI_VIDEO's dependencies, ick select BACKLIGHT_LCD_SUPPORT if ACPI diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index aed7d207ea84..108d21f34777 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -18,6 +18,7 @@ config DRM_I915_WERROR config DRM_I915_DEBUG bool "Enable additional driver debugging" depends on DRM_I915 + select DEBUG_FS select PREEMPT_COUNT select I2C_CHARDEV select DRM_DP_AUX_CHARDEV @@ -49,6 +50,20 @@ config DRM_I915_DEBUG_GEM If in doubt, say "N". +config DRM_I915_TRACE_GEM + bool "Insert extra ftrace output from the GEM internals" + depends on DRM_I915_DEBUG_GEM + select TRACING + default n + help + Enable additional and verbose debugging output that will spam + ordinary tests, but may be vital for post-mortem debugging when + used with /proc/sys/kernel/ftrace_dump_on_oops + + Recommended for driver developers only. + + If in doubt, say "N". + config DRM_I915_SW_FENCE_DEBUG_OBJECTS bool "Enable additional driver debugging for fence objects" depends on DRM_I915 @@ -90,6 +105,20 @@ config DRM_I915_SELFTEST If in doubt, say "N". +config DRM_I915_SELFTEST_BROKEN + bool "Enable broken and dangerous selftests" + depends on DRM_I915_SELFTEST + depends on BROKEN + default n + help + This option enables the execution of selftests that are "dangerous" + and may trigger unintended HW side-effects as they break strict + rules given in the HW specification. For science. + + Recommended for masochistic driver developers only. + + If in doubt, say "N". + config DRM_I915_LOW_LEVEL_TRACEPOINTS bool "Enable low level request tracing events" depends on DRM_I915 diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 2e034efc4d6d..091aef281963 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -3,7 +3,26 @@ # Makefile for the drm device driver. This driver provides support for the # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. -subdir-ccflags-$(CONFIG_DRM_I915_WERROR) := -Werror +# Add a set of useful warning flags and enable -Werror for CI to prevent +# trivial mistakes from creeping in. We have to do this piecemeal as we reject +# any patch that isn't warning clean, so turning on -Wall -Wextra (or W=1) we +# need to filter out dubious warnings. Still it is our interest +# to keep running locally with W=1 C=1 until we are completely clean. +# +# Note the danger in using -Wall -Wextra is that when CI updates gcc we +# will most likely get a sudden build breakage... Hopefully we will fix +# new warnings before CI updates! +subdir-ccflags-y := -Wall -Wextra +subdir-ccflags-y += $(call cc-disable-warning, unused-parameter) +subdir-ccflags-y += $(call cc-disable-warning, type-limits) +subdir-ccflags-y += $(call cc-disable-warning, missing-field-initializers) +subdir-ccflags-y += $(call cc-disable-warning, implicit-fallthrough) +subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror + +# Fine grained warnings disable +CFLAGS_i915_pci.o = $(call cc-disable-warning, override-init) +CFLAGS_intel_fbdev.o = $(call cc-disable-warning, override-init) + subdir-ccflags-y += \ $(call as-instr,movntdqa (%eax)$(comma)%xmm0,-DCONFIG_AS_MOVNTDQA) @@ -27,6 +46,7 @@ i915-y := i915_drv.o \ i915-$(CONFIG_COMPAT) += i915_ioc32.o i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o +i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o # GEM code i915-y += i915_cmd_parser.o \ @@ -48,6 +68,7 @@ i915-y += i915_cmd_parser.o \ i915_gem_tiling.o \ i915_gem_timeline.o \ i915_gem_userptr.o \ + i915_gemfs.o \ i915_trace_points.o \ i915_vma.o \ intel_breadcrumbs.o \ @@ -60,11 +81,13 @@ i915-y += i915_cmd_parser.o \ # general-purpose microcontroller (GuC) support i915-y += intel_uc.o \ + intel_uc_fw.o \ + intel_guc.o \ intel_guc_ct.o \ + intel_guc_fw.o \ intel_guc_log.o \ - intel_guc_loader.o \ - intel_huc.o \ - i915_guc_submission.o + intel_guc_submission.o \ + intel_huc.o # autogenerated null render state i915-y += intel_renderstate_gen6.o \ @@ -140,7 +163,10 @@ i915-y += i915_perf.o \ i915_oa_bxt.o \ i915_oa_kblgt2.o \ i915_oa_kblgt3.o \ - i915_oa_glk.o + i915_oa_glk.o \ + i915_oa_cflgt2.o \ + i915_oa_cflgt3.o \ + i915_oa_cnl.o ifeq ($(CONFIG_DRM_I915_GVT),y) i915-y += intel_gvt.o @@ -151,5 +177,3 @@ endif i915-y += intel_lpe_audio.o obj-$(CONFIG_DRM_I915) += i915.o - -CFLAGS_i915_trace_points.o := -I$(src) diff --git a/drivers/gpu/drm/i915/gvt/Makefile b/drivers/gpu/drm/i915/gvt/Makefile index 2641ba510a61..347116faa558 100644 --- a/drivers/gpu/drm/i915/gvt/Makefile +++ b/drivers/gpu/drm/i915/gvt/Makefile @@ -2,7 +2,8 @@ GVT_DIR := gvt GVT_SOURCE := gvt.o aperture_gm.o handlers.o vgpu.o trace_points.o firmware.o \ interrupt.o gtt.o cfg_space.o opregion.o mmio.o display.o edid.o \ - execlist.o scheduler.o sched_policy.o render.o cmd_parser.o + execlist.o scheduler.o sched_policy.o mmio_context.o cmd_parser.o debugfs.o \ + fb_decoder.o dmabuf.o ccflags-y += -I$(src) -I$(src)/$(GVT_DIR) i915-y += $(addprefix $(GVT_DIR)/, $(GVT_SOURCE)) diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c index ca3d1925beda..7c9ec4f4f36c 100644 --- a/drivers/gpu/drm/i915/gvt/aperture_gm.c +++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c @@ -173,8 +173,8 @@ static void free_vgpu_fence(struct intel_vgpu *vgpu) _clear_vgpu_fence(vgpu); for (i = 0; i < vgpu_fence_sz(vgpu); i++) { reg = vgpu->fence.regs[i]; - list_add_tail(®->link, - &dev_priv->mm.fence_list); + i915_unreserve_fence(reg); + vgpu->fence.regs[i] = NULL; } mutex_unlock(&dev_priv->drm.struct_mutex); @@ -187,24 +187,19 @@ static int alloc_vgpu_fence(struct intel_vgpu *vgpu) struct drm_i915_private *dev_priv = gvt->dev_priv; struct drm_i915_fence_reg *reg; int i; - struct list_head *pos, *q; intel_runtime_pm_get(dev_priv); /* Request fences from host */ mutex_lock(&dev_priv->drm.struct_mutex); - i = 0; - list_for_each_safe(pos, q, &dev_priv->mm.fence_list) { - reg = list_entry(pos, struct drm_i915_fence_reg, link); - if (reg->pin_count || reg->vma) - continue; - list_del(pos); + + for (i = 0; i < vgpu_fence_sz(vgpu); i++) { + reg = i915_reserve_fence(dev_priv); + if (IS_ERR(reg)) + goto out_free_fence; + vgpu->fence.regs[i] = reg; - if (++i == vgpu_fence_sz(vgpu)) - break; } - if (i != vgpu_fence_sz(vgpu)) - goto out_free_fence; _clear_vgpu_fence(vgpu); @@ -212,13 +207,14 @@ static int alloc_vgpu_fence(struct intel_vgpu *vgpu) intel_runtime_pm_put(dev_priv); return 0; out_free_fence: + gvt_vgpu_err("Failed to alloc fences\n"); /* Return fences to host, if fail */ for (i = 0; i < vgpu_fence_sz(vgpu); i++) { reg = vgpu->fence.regs[i]; if (!reg) continue; - list_add_tail(®->link, - &dev_priv->mm.fence_list); + i915_unreserve_fence(reg); + vgpu->fence.regs[i] = NULL; } mutex_unlock(&dev_priv->drm.struct_mutex); intel_runtime_pm_put(dev_priv); diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c index ff3154fe6588..c62346fdc05d 100644 --- a/drivers/gpu/drm/i915/gvt/cfg_space.c +++ b/drivers/gpu/drm/i915/gvt/cfg_space.c @@ -101,7 +101,7 @@ int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset, if (WARN_ON(bytes > 4)) return -EINVAL; - if (WARN_ON(offset + bytes > INTEL_GVT_MAX_CFG_SPACE_SZ)) + if (WARN_ON(offset + bytes > vgpu->gvt->device_info.cfg_space_size)) return -EINVAL; memcpy(p_data, vgpu_cfg_space(vgpu) + offset, bytes); @@ -110,7 +110,9 @@ int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset, static int map_aperture(struct intel_vgpu *vgpu, bool map) { - u64 first_gfn, first_mfn; + phys_addr_t aperture_pa = vgpu_aperture_pa_base(vgpu); + unsigned long aperture_sz = vgpu_aperture_sz(vgpu); + u64 first_gfn; u64 val; int ret; @@ -124,12 +126,11 @@ static int map_aperture(struct intel_vgpu *vgpu, bool map) val = *(u32 *)(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_2); first_gfn = (val + vgpu_aperture_offset(vgpu)) >> PAGE_SHIFT; - first_mfn = vgpu_aperture_pa_base(vgpu) >> PAGE_SHIFT; ret = intel_gvt_hypervisor_map_gfn_to_mfn(vgpu, first_gfn, - first_mfn, - vgpu_aperture_sz(vgpu) >> - PAGE_SHIFT, map); + aperture_pa >> PAGE_SHIFT, + aperture_sz >> PAGE_SHIFT, + map); if (ret) return ret; @@ -194,6 +195,20 @@ static int emulate_pci_command_write(struct intel_vgpu *vgpu, return 0; } +static int emulate_pci_rom_bar_write(struct intel_vgpu *vgpu, + unsigned int offset, void *p_data, unsigned int bytes) +{ + u32 *pval = (u32 *)(vgpu_cfg_space(vgpu) + offset); + u32 new = *(u32 *)(p_data); + + if ((new & PCI_ROM_ADDRESS_MASK) == PCI_ROM_ADDRESS_MASK) + /* We don't have rom, return size of 0. */ + *pval = 0; + else + vgpu_pci_cfg_mem_write(vgpu, offset, p_data, bytes); + return 0; +} + static int emulate_pci_bar_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { @@ -275,7 +290,7 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, if (WARN_ON(bytes > 4)) return -EINVAL; - if (WARN_ON(offset + bytes > INTEL_GVT_MAX_CFG_SPACE_SZ)) + if (WARN_ON(offset + bytes > vgpu->gvt->device_info.cfg_space_size)) return -EINVAL; /* First check if it's PCI_COMMAND */ @@ -286,6 +301,11 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, } switch (rounddown(offset, 4)) { + case PCI_ROM_ADDRESS: + if (WARN_ON(!IS_ALIGNED(offset, 4))) + return -EINVAL; + return emulate_pci_rom_bar_write(vgpu, offset, p_data, bytes); + case PCI_BASE_ADDRESS_0 ... PCI_BASE_ADDRESS_5: if (WARN_ON(!IS_ALIGNED(offset, 4))) return -EINVAL; @@ -302,7 +322,8 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, case INTEL_GVT_PCI_OPREGION: if (WARN_ON(!IS_ALIGNED(offset, 4))) return -EINVAL; - ret = intel_vgpu_init_opregion(vgpu, *(u32 *)p_data); + ret = intel_vgpu_opregion_base_write_handler(vgpu, + *(u32 *)p_data); if (ret) return ret; @@ -361,6 +382,8 @@ void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu, pci_resource_len(gvt->dev_priv->drm.pdev, 0); vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_APERTURE].size = pci_resource_len(gvt->dev_priv->drm.pdev, 2); + + memset(vgpu_cfg_space(vgpu) + PCI_ROM_ADDRESS, 0, 4); } /** diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index d4726a3358a4..c8454ac43fae 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -709,18 +709,13 @@ static void parser_exec_state_dump(struct parser_exec_state *s) print_opcode(cmd_val(s, 0), s->ring_id); - /* print the whole page to trace */ - pr_err(" ip_va=%p: %08x %08x %08x %08x\n", - s->ip_va, cmd_val(s, 0), cmd_val(s, 1), - cmd_val(s, 2), cmd_val(s, 3)); - s->ip_va = (u32 *)((((u64)s->ip_va) >> 12) << 12); while (cnt < 1024) { - pr_err("ip_va=%p: ", s->ip_va); + gvt_dbg_cmd("ip_va=%p: ", s->ip_va); for (i = 0; i < 8; i++) - pr_err("%08x ", cmd_val(s, i)); - pr_err("\n"); + gvt_dbg_cmd("%08x ", cmd_val(s, i)); + gvt_dbg_cmd("\n"); s->ip_va += 8 * sizeof(u32); cnt += 8; @@ -825,11 +820,26 @@ static int force_nonpriv_reg_handler(struct parser_exec_state *s, if (!intel_gvt_in_force_nonpriv_whitelist(gvt, data)) { gvt_err("Unexpected forcenonpriv 0x%x LRI write, value=0x%x\n", offset, data); - return -EINVAL; + return -EPERM; } return 0; } +static inline bool is_mocs_mmio(unsigned int offset) +{ + return ((offset >= 0xc800) && (offset <= 0xcff8)) || + ((offset >= 0xb020) && (offset <= 0xb0a0)); +} + +static int mocs_cmd_reg_handler(struct parser_exec_state *s, + unsigned int offset, unsigned int index) +{ + if (!is_mocs_mmio(offset)) + return -EINVAL; + vgpu_vreg(s->vgpu, offset) = cmd_val(s, index + 1); + return 0; +} + static int cmd_reg_handler(struct parser_exec_state *s, unsigned int offset, unsigned int index, char *cmd) { @@ -839,7 +849,7 @@ static int cmd_reg_handler(struct parser_exec_state *s, if (offset + 4 > gvt->device_info.mmio_size) { gvt_vgpu_err("%s access to (%x) outside of MMIO range\n", cmd, offset); - return -EINVAL; + return -EFAULT; } if (!intel_gvt_mmio_is_cmd_access(gvt, offset)) { @@ -853,10 +863,14 @@ static int cmd_reg_handler(struct parser_exec_state *s, return 0; } - if (is_force_nonpriv_mmio(offset) && - force_nonpriv_reg_handler(s, offset, index)) + if (is_mocs_mmio(offset) && + mocs_cmd_reg_handler(s, offset, index)) return -EINVAL; + if (is_force_nonpriv_mmio(offset) && + force_nonpriv_reg_handler(s, offset, index)) + return -EPERM; + if (offset == i915_mmio_reg_offset(DERRMR) || offset == i915_mmio_reg_offset(FORCEWAKE_MT)) { /* Writing to HW VGT_PVINFO_PAGE offset will be discarded */ @@ -894,11 +908,14 @@ static int cmd_handler_lri(struct parser_exec_state *s) i915_mmio_reg_offset(DERRMR)) ret |= 0; else - ret |= (cmd_reg_inhibit(s, i)) ? -EINVAL : 0; + ret |= (cmd_reg_inhibit(s, i)) ? + -EBADRQC : 0; } if (ret) break; ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lri"); + if (ret) + break; } return ret; } @@ -912,11 +929,15 @@ static int cmd_handler_lrr(struct parser_exec_state *s) if (IS_BROADWELL(s->vgpu->gvt->dev_priv)) ret |= ((cmd_reg_inhibit(s, i) || (cmd_reg_inhibit(s, i + 1)))) ? - -EINVAL : 0; + -EBADRQC : 0; if (ret) break; ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lrr-src"); + if (ret) + break; ret |= cmd_reg_handler(s, cmd_reg(s, i + 1), i, "lrr-dst"); + if (ret) + break; } return ret; } @@ -934,15 +955,19 @@ static int cmd_handler_lrm(struct parser_exec_state *s) for (i = 1; i < cmd_len;) { if (IS_BROADWELL(gvt->dev_priv)) - ret |= (cmd_reg_inhibit(s, i)) ? -EINVAL : 0; + ret |= (cmd_reg_inhibit(s, i)) ? -EBADRQC : 0; if (ret) break; ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lrm"); + if (ret) + break; if (cmd_val(s, 0) & (1 << 22)) { gma = cmd_gma(s, i + 1); if (gmadr_bytes == 8) gma |= (cmd_gma_hi(s, i + 2)) << 32; ret |= cmd_address_audit(s, gma, sizeof(u32), false); + if (ret) + break; } i += gmadr_dw_number(s) + 1; } @@ -958,11 +983,15 @@ static int cmd_handler_srm(struct parser_exec_state *s) for (i = 1; i < cmd_len;) { ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "srm"); + if (ret) + break; if (cmd_val(s, 0) & (1 << 22)) { gma = cmd_gma(s, i + 1); if (gmadr_bytes == 8) gma |= (cmd_gma_hi(s, i + 2)) << 32; ret |= cmd_address_audit(s, gma, sizeof(u32), false); + if (ret) + break; } i += gmadr_dw_number(s) + 1; } @@ -1116,7 +1145,7 @@ static int gen8_decode_mi_display_flip(struct parser_exec_state *s, v = (dword0 & GENMASK(21, 19)) >> 19; if (WARN_ON(v >= ARRAY_SIZE(gen8_plane_code))) - return -EINVAL; + return -EBADRQC; info->pipe = gen8_plane_code[v].pipe; info->plane = gen8_plane_code[v].plane; @@ -1136,7 +1165,7 @@ static int gen8_decode_mi_display_flip(struct parser_exec_state *s, info->surf_reg = SPRSURF(info->pipe); } else { WARN_ON(1); - return -EINVAL; + return -EBADRQC; } return 0; } @@ -1185,7 +1214,7 @@ static int skl_decode_mi_display_flip(struct parser_exec_state *s, default: gvt_vgpu_err("unknown plane code %d\n", plane); - return -EINVAL; + return -EBADRQC; } info->stride_val = (dword1 & GENMASK(15, 6)) >> 6; @@ -1210,13 +1239,13 @@ static int gen8_check_mi_display_flip(struct parser_exec_state *s, return 0; if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - stride = vgpu_vreg(s->vgpu, info->stride_reg) & GENMASK(9, 0); - tile = (vgpu_vreg(s->vgpu, info->ctrl_reg) & + stride = vgpu_vreg_t(s->vgpu, info->stride_reg) & GENMASK(9, 0); + tile = (vgpu_vreg_t(s->vgpu, info->ctrl_reg) & GENMASK(12, 10)) >> 10; } else { - stride = (vgpu_vreg(s->vgpu, info->stride_reg) & + stride = (vgpu_vreg_t(s->vgpu, info->stride_reg) & GENMASK(15, 6)) >> 6; - tile = (vgpu_vreg(s->vgpu, info->ctrl_reg) & (1 << 10)) >> 10; + tile = (vgpu_vreg_t(s->vgpu, info->ctrl_reg) & (1 << 10)) >> 10; } if (stride != info->stride_val) @@ -1235,21 +1264,21 @@ static int gen8_update_plane_mmio_from_mi_display_flip( struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv; struct intel_vgpu *vgpu = s->vgpu; - set_mask_bits(&vgpu_vreg(vgpu, info->surf_reg), GENMASK(31, 12), + set_mask_bits(&vgpu_vreg_t(vgpu, info->surf_reg), GENMASK(31, 12), info->surf_val << 12); if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - set_mask_bits(&vgpu_vreg(vgpu, info->stride_reg), GENMASK(9, 0), + set_mask_bits(&vgpu_vreg_t(vgpu, info->stride_reg), GENMASK(9, 0), info->stride_val); - set_mask_bits(&vgpu_vreg(vgpu, info->ctrl_reg), GENMASK(12, 10), + set_mask_bits(&vgpu_vreg_t(vgpu, info->ctrl_reg), GENMASK(12, 10), info->tile_val << 10); } else { - set_mask_bits(&vgpu_vreg(vgpu, info->stride_reg), GENMASK(15, 6), + set_mask_bits(&vgpu_vreg_t(vgpu, info->stride_reg), GENMASK(15, 6), info->stride_val << 6); - set_mask_bits(&vgpu_vreg(vgpu, info->ctrl_reg), GENMASK(10, 10), + set_mask_bits(&vgpu_vreg_t(vgpu, info->ctrl_reg), GENMASK(10, 10), info->tile_val << 10); } - vgpu_vreg(vgpu, PIPE_FRMCOUNT_G4X(info->pipe))++; + vgpu_vreg_t(vgpu, PIPE_FRMCOUNT_G4X(info->pipe))++; intel_vgpu_trigger_virtual_event(vgpu, info->event); return 0; } @@ -1348,10 +1377,13 @@ static unsigned long get_gma_bb_from_cmd(struct parser_exec_state *s, int index) { unsigned long addr; unsigned long gma_high, gma_low; - int gmadr_bytes = s->vgpu->gvt->device_info.gmadr_bytes_in_cmd; + struct intel_vgpu *vgpu = s->vgpu; + int gmadr_bytes = vgpu->gvt->device_info.gmadr_bytes_in_cmd; - if (WARN_ON(gmadr_bytes != 4 && gmadr_bytes != 8)) + if (WARN_ON(gmadr_bytes != 4 && gmadr_bytes != 8)) { + gvt_vgpu_err("invalid gma bytes %d\n", gmadr_bytes); return INTEL_GVT_INVALID_ADDR; + } gma_low = cmd_val(s, index) & BATCH_BUFFER_ADDR_MASK; if (gmadr_bytes == 4) { @@ -1374,16 +1406,16 @@ static inline int cmd_address_audit(struct parser_exec_state *s, if (op_size > max_surface_size) { gvt_vgpu_err("command address audit fail name %s\n", s->info->name); - return -EINVAL; + return -EFAULT; } if (index_mode) { - if (guest_gma >= GTT_PAGE_SIZE / sizeof(u64)) { - ret = -EINVAL; + if (guest_gma >= I915_GTT_PAGE_SIZE / sizeof(u64)) { + ret = -EFAULT; goto err; } } else if (!intel_gvt_ggtt_validate_range(vgpu, guest_gma, op_size)) { - ret = -EINVAL; + ret = -EFAULT; goto err; } @@ -1439,7 +1471,7 @@ static inline int unexpected_cmd(struct parser_exec_state *s) gvt_vgpu_err("Unexpected %s in command buffer!\n", s->info->name); - return -EINVAL; + return -EBADRQC; } static int cmd_handler_mi_semaphore_wait(struct parser_exec_state *s) @@ -1545,10 +1577,10 @@ static int copy_gma_to_hva(struct intel_vgpu *vgpu, struct intel_vgpu_mm *mm, return -EFAULT; } - offset = gma & (GTT_PAGE_SIZE - 1); + offset = gma & (I915_GTT_PAGE_SIZE - 1); - copy_len = (end_gma - gma) >= (GTT_PAGE_SIZE - offset) ? - GTT_PAGE_SIZE - offset : end_gma - gma; + copy_len = (end_gma - gma) >= (I915_GTT_PAGE_SIZE - offset) ? + I915_GTT_PAGE_SIZE - offset : end_gma - gma; intel_gvt_hypervisor_read_gpa(vgpu, gpa, va + len, copy_len); @@ -1576,108 +1608,113 @@ static int batch_buffer_needs_scan(struct parser_exec_state *s) return 1; } -static uint32_t find_bb_size(struct parser_exec_state *s) +static int find_bb_size(struct parser_exec_state *s, unsigned long *bb_size) { unsigned long gma = 0; struct cmd_info *info; - uint32_t bb_size = 0; uint32_t cmd_len = 0; - bool met_bb_end = false; + bool bb_end = false; struct intel_vgpu *vgpu = s->vgpu; u32 cmd; + *bb_size = 0; + /* get the start gm address of the batch buffer */ gma = get_gma_bb_from_cmd(s, 1); - cmd = cmd_val(s, 0); + if (gma == INTEL_GVT_INVALID_ADDR) + return -EFAULT; + cmd = cmd_val(s, 0); info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); if (info == NULL) { gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", cmd, get_opcode(cmd, s->ring_id)); - return -EINVAL; + return -EBADRQC; } do { - copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm, - gma, gma + 4, &cmd); + if (copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm, + gma, gma + 4, &cmd) < 0) + return -EFAULT; info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); if (info == NULL) { gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", cmd, get_opcode(cmd, s->ring_id)); - return -EINVAL; + return -EBADRQC; } if (info->opcode == OP_MI_BATCH_BUFFER_END) { - met_bb_end = true; + bb_end = true; } else if (info->opcode == OP_MI_BATCH_BUFFER_START) { - if (BATCH_BUFFER_2ND_LEVEL_BIT(cmd) == 0) { + if (BATCH_BUFFER_2ND_LEVEL_BIT(cmd) == 0) /* chained batch buffer */ - met_bb_end = true; - } + bb_end = true; } cmd_len = get_cmd_length(info, cmd) << 2; - bb_size += cmd_len; + *bb_size += cmd_len; gma += cmd_len; + } while (!bb_end); - } while (!met_bb_end); - - return bb_size; + return 0; } static int perform_bb_shadow(struct parser_exec_state *s) { - struct intel_shadow_bb_entry *entry_obj; struct intel_vgpu *vgpu = s->vgpu; + struct intel_vgpu_shadow_bb *bb; unsigned long gma = 0; - uint32_t bb_size; - void *dst = NULL; + unsigned long bb_size; int ret = 0; /* get the start gm address of the batch buffer */ gma = get_gma_bb_from_cmd(s, 1); + if (gma == INTEL_GVT_INVALID_ADDR) + return -EFAULT; - /* get the size of the batch buffer */ - bb_size = find_bb_size(s); + ret = find_bb_size(s, &bb_size); + if (ret) + return ret; - /* allocate shadow batch buffer */ - entry_obj = kmalloc(sizeof(*entry_obj), GFP_KERNEL); - if (entry_obj == NULL) + bb = kzalloc(sizeof(*bb), GFP_KERNEL); + if (!bb) return -ENOMEM; - entry_obj->obj = - i915_gem_object_create(s->vgpu->gvt->dev_priv, - roundup(bb_size, PAGE_SIZE)); - if (IS_ERR(entry_obj->obj)) { - ret = PTR_ERR(entry_obj->obj); - goto free_entry; + bb->obj = i915_gem_object_create(s->vgpu->gvt->dev_priv, + roundup(bb_size, PAGE_SIZE)); + if (IS_ERR(bb->obj)) { + ret = PTR_ERR(bb->obj); + goto err_free_bb; } - entry_obj->len = bb_size; - INIT_LIST_HEAD(&entry_obj->list); - dst = i915_gem_object_pin_map(entry_obj->obj, I915_MAP_WB); - if (IS_ERR(dst)) { - ret = PTR_ERR(dst); - goto put_obj; - } + ret = i915_gem_obj_prepare_shmem_write(bb->obj, &bb->clflush); + if (ret) + goto err_free_obj; - ret = i915_gem_object_set_to_cpu_domain(entry_obj->obj, false); - if (ret) { - gvt_vgpu_err("failed to set shadow batch to CPU\n"); - goto unmap_src; + bb->va = i915_gem_object_pin_map(bb->obj, I915_MAP_WB); + if (IS_ERR(bb->va)) { + ret = PTR_ERR(bb->va); + goto err_finish_shmem_access; } - entry_obj->va = dst; - entry_obj->bb_start_cmd_va = s->ip_va; + if (bb->clflush & CLFLUSH_BEFORE) { + drm_clflush_virt_range(bb->va, bb->obj->base.size); + bb->clflush &= ~CLFLUSH_BEFORE; + } - /* copy batch buffer to shadow batch buffer*/ ret = copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm, gma, gma + bb_size, - dst); + bb->va); if (ret < 0) { gvt_vgpu_err("fail to copy guest ring buffer\n"); - goto unmap_src; + ret = -EFAULT; + goto err_unmap; } - list_add(&entry_obj->list, &s->workload->shadow_bb); + INIT_LIST_HEAD(&bb->list); + list_add(&bb->list, &s->workload->shadow_bb); + + bb->accessing = true; + bb->bb_start_cmd_va = s->ip_va; + /* * ip_va saves the virtual address of the shadow batch buffer, while * ip_gma saves the graphics address of the original batch buffer. @@ -1686,17 +1723,17 @@ static int perform_bb_shadow(struct parser_exec_state *s) * buffer's gma in pair. After all, we don't want to pin the shadow * buffer here (too early). */ - s->ip_va = dst; + s->ip_va = bb->va; s->ip_gma = gma; - return 0; - -unmap_src: - i915_gem_object_unpin_map(entry_obj->obj); -put_obj: - i915_gem_object_put(entry_obj->obj); -free_entry: - kfree(entry_obj); +err_unmap: + i915_gem_object_unpin_map(bb->obj); +err_finish_shmem_access: + i915_gem_obj_finish_shmem_access(bb->obj); +err_free_obj: + i915_gem_object_put(bb->obj); +err_free_bb: + kfree(bb); return ret; } @@ -1708,13 +1745,13 @@ static int cmd_handler_mi_batch_buffer_start(struct parser_exec_state *s) if (s->buf_type == BATCH_BUFFER_2ND_LEVEL) { gvt_vgpu_err("Found MI_BATCH_BUFFER_START in 2nd level BB\n"); - return -EINVAL; + return -EFAULT; } second_level = BATCH_BUFFER_2ND_LEVEL_BIT(cmd_val(s, 0)) == 1; if (second_level && (s->buf_type != BATCH_BUFFER_INSTRUCTION)) { gvt_vgpu_err("Jumping to 2nd level BB from RB is not allowed\n"); - return -EINVAL; + return -EFAULT; } s->saved_buf_addr_type = s->buf_addr_type; @@ -1738,7 +1775,6 @@ static int cmd_handler_mi_batch_buffer_start(struct parser_exec_state *s) if (ret < 0) return ret; } - return ret; } @@ -2428,7 +2464,7 @@ static int cmd_parser_exec(struct parser_exec_state *s) if (info == NULL) { gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", cmd, get_opcode(cmd, s->ring_id)); - return -EINVAL; + return -EBADRQC; } s->info = info; @@ -2463,6 +2499,10 @@ static inline bool gma_out_of_range(unsigned long gma, return (gma > gma_tail) && (gma < gma_head); } +/* Keep the consistent return type, e.g EBADRQC for unknown + * cmd, EFAULT for invalid address, EPERM for nonpriv. later + * works as the input of VM healthy status. + */ static int command_scan(struct parser_exec_state *s, unsigned long rb_head, unsigned long rb_tail, unsigned long rb_start, unsigned long rb_len) @@ -2485,7 +2525,7 @@ static int command_scan(struct parser_exec_state *s, s->ip_gma, rb_start, gma_bottom); parser_exec_state_dump(s); - return -EINVAL; + return -EFAULT; } if (gma_out_of_range(s->ip_gma, gma_head, gma_tail)) { gvt_vgpu_err("ip_gma %lx out of range." @@ -2514,7 +2554,7 @@ static int scan_workload(struct intel_vgpu_workload *workload) int ret = 0; /* ring base is page aligned */ - if (WARN_ON(!IS_ALIGNED(workload->rb_start, GTT_PAGE_SIZE))) + if (WARN_ON(!IS_ALIGNED(workload->rb_start, I915_GTT_PAGE_SIZE))) return -EINVAL; gma_head = workload->rb_start + workload->rb_head; @@ -2563,7 +2603,8 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) wa_ctx); /* ring base is page aligned */ - if (WARN_ON(!IS_ALIGNED(wa_ctx->indirect_ctx.guest_gma, GTT_PAGE_SIZE))) + if (WARN_ON(!IS_ALIGNED(wa_ctx->indirect_ctx.guest_gma, + I915_GTT_PAGE_SIZE))) return -EINVAL; ring_tail = wa_ctx->indirect_ctx.size + 3 * sizeof(uint32_t); @@ -2602,8 +2643,10 @@ out: static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; + struct intel_vgpu_submission *s = &vgpu->submission; unsigned long gma_head, gma_tail, gma_top, guest_rb_size; - u32 *cs; + void *shadow_ring_buffer_va; + int ring_id = workload->ring_id; int ret; guest_rb_size = _RING_CTL_BUF_SIZE(workload->rb_ctl); @@ -2616,34 +2659,44 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) gma_tail = workload->rb_start + workload->rb_tail; gma_top = workload->rb_start + guest_rb_size; - /* allocate shadow ring buffer */ - cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32)); - if (IS_ERR(cs)) - return PTR_ERR(cs); + if (workload->rb_len > s->ring_scan_buffer_size[ring_id]) { + void *p; + + /* realloc the new ring buffer if needed */ + p = krealloc(s->ring_scan_buffer[ring_id], workload->rb_len, + GFP_KERNEL); + if (!p) { + gvt_vgpu_err("fail to re-alloc ring scan buffer\n"); + return -ENOMEM; + } + s->ring_scan_buffer[ring_id] = p; + s->ring_scan_buffer_size[ring_id] = workload->rb_len; + } + + shadow_ring_buffer_va = s->ring_scan_buffer[ring_id]; /* get shadow ring buffer va */ - workload->shadow_ring_buffer_va = cs; + workload->shadow_ring_buffer_va = shadow_ring_buffer_va; /* head > tail --> copy head <-> top */ if (gma_head > gma_tail) { ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, - gma_head, gma_top, cs); + gma_head, gma_top, shadow_ring_buffer_va); if (ret < 0) { gvt_vgpu_err("fail to copy guest ring buffer\n"); return ret; } - cs += ret / sizeof(u32); + shadow_ring_buffer_va += ret; gma_head = workload->rb_start; } /* copy head or start <-> tail */ - ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail, cs); + ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail, + shadow_ring_buffer_va); if (ret < 0) { gvt_vgpu_err("fail to copy guest ring buffer\n"); return ret; } - cs += ret / sizeof(u32); - intel_ring_advance(workload->req, cs); return 0; } @@ -2766,12 +2819,12 @@ int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) } static struct cmd_info *find_cmd_entry_any_ring(struct intel_gvt *gvt, - unsigned int opcode, int rings) + unsigned int opcode, unsigned long rings) { struct cmd_info *info = NULL; unsigned int ring; - for_each_set_bit(ring, (unsigned long *)&rings, I915_NUM_ENGINES) { + for_each_set_bit(ring, &rings, I915_NUM_ENGINES) { info = find_cmd_entry(gvt, opcode, ring); if (info) break; diff --git a/drivers/gpu/drm/i915/gvt/debug.h b/drivers/gpu/drm/i915/gvt/debug.h index b0cff4dc2684..c6027125c1ec 100644 --- a/drivers/gpu/drm/i915/gvt/debug.h +++ b/drivers/gpu/drm/i915/gvt/debug.h @@ -25,41 +25,41 @@ #define __GVT_DEBUG_H__ #define gvt_err(fmt, args...) \ - DRM_ERROR("gvt: "fmt, ##args) + pr_err("gvt: "fmt, ##args) #define gvt_vgpu_err(fmt, args...) \ do { \ if (IS_ERR_OR_NULL(vgpu)) \ - DRM_DEBUG_DRIVER("gvt: "fmt, ##args); \ + pr_err("gvt: "fmt, ##args); \ else \ - DRM_DEBUG_DRIVER("gvt: vgpu %d: "fmt, vgpu->id, ##args);\ + pr_err("gvt: vgpu %d: "fmt, vgpu->id, ##args);\ } while (0) #define gvt_dbg_core(fmt, args...) \ - DRM_DEBUG_DRIVER("gvt: core: "fmt, ##args) + pr_debug("gvt: core: "fmt, ##args) #define gvt_dbg_irq(fmt, args...) \ - DRM_DEBUG_DRIVER("gvt: irq: "fmt, ##args) + pr_debug("gvt: irq: "fmt, ##args) #define gvt_dbg_mm(fmt, args...) \ - DRM_DEBUG_DRIVER("gvt: mm: "fmt, ##args) + pr_debug("gvt: mm: "fmt, ##args) #define gvt_dbg_mmio(fmt, args...) \ - DRM_DEBUG_DRIVER("gvt: mmio: "fmt, ##args) + pr_debug("gvt: mmio: "fmt, ##args) #define gvt_dbg_dpy(fmt, args...) \ - DRM_DEBUG_DRIVER("gvt: dpy: "fmt, ##args) + pr_debug("gvt: dpy: "fmt, ##args) #define gvt_dbg_el(fmt, args...) \ - DRM_DEBUG_DRIVER("gvt: el: "fmt, ##args) + pr_debug("gvt: el: "fmt, ##args) #define gvt_dbg_sched(fmt, args...) \ - DRM_DEBUG_DRIVER("gvt: sched: "fmt, ##args) + pr_debug("gvt: sched: "fmt, ##args) #define gvt_dbg_render(fmt, args...) \ - DRM_DEBUG_DRIVER("gvt: render: "fmt, ##args) + pr_debug("gvt: render: "fmt, ##args) #define gvt_dbg_cmd(fmt, args...) \ - DRM_DEBUG_DRIVER("gvt: cmd: "fmt, ##args) + pr_debug("gvt: cmd: "fmt, ##args) #endif diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c new file mode 100644 index 000000000000..32a66dfdf112 --- /dev/null +++ b/drivers/gpu/drm/i915/gvt/debugfs.c @@ -0,0 +1,212 @@ +/* + * Copyright(c) 2011-2017 Intel Corporation. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/debugfs.h> +#include <linux/list_sort.h> +#include "i915_drv.h" +#include "gvt.h" + +struct mmio_diff_param { + struct intel_vgpu *vgpu; + int total; + int diff; + struct list_head diff_mmio_list; +}; + +struct diff_mmio { + struct list_head node; + u32 offset; + u32 preg; + u32 vreg; +}; + +/* Compare two diff_mmio items. */ +static int mmio_offset_compare(void *priv, + struct list_head *a, struct list_head *b) +{ + struct diff_mmio *ma; + struct diff_mmio *mb; + + ma = container_of(a, struct diff_mmio, node); + mb = container_of(b, struct diff_mmio, node); + if (ma->offset < mb->offset) + return -1; + else if (ma->offset > mb->offset) + return 1; + return 0; +} + +static inline int mmio_diff_handler(struct intel_gvt *gvt, + u32 offset, void *data) +{ + struct drm_i915_private *dev_priv = gvt->dev_priv; + struct mmio_diff_param *param = data; + struct diff_mmio *node; + u32 preg, vreg; + + preg = I915_READ_NOTRACE(_MMIO(offset)); + vreg = vgpu_vreg(param->vgpu, offset); + + if (preg != vreg) { + node = kmalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + node->offset = offset; + node->preg = preg; + node->vreg = vreg; + list_add(&node->node, ¶m->diff_mmio_list); + param->diff++; + } + param->total++; + return 0; +} + +/* Show the all the different values of tracked mmio. */ +static int vgpu_mmio_diff_show(struct seq_file *s, void *unused) +{ + struct intel_vgpu *vgpu = s->private; + struct intel_gvt *gvt = vgpu->gvt; + struct mmio_diff_param param = { + .vgpu = vgpu, + .total = 0, + .diff = 0, + }; + struct diff_mmio *node, *next; + + INIT_LIST_HEAD(¶m.diff_mmio_list); + + mutex_lock(&gvt->lock); + spin_lock_bh(&gvt->scheduler.mmio_context_lock); + + mmio_hw_access_pre(gvt->dev_priv); + /* Recognize all the diff mmios to list. */ + intel_gvt_for_each_tracked_mmio(gvt, mmio_diff_handler, ¶m); + mmio_hw_access_post(gvt->dev_priv); + + spin_unlock_bh(&gvt->scheduler.mmio_context_lock); + mutex_unlock(&gvt->lock); + + /* In an ascending order by mmio offset. */ + list_sort(NULL, ¶m.diff_mmio_list, mmio_offset_compare); + + seq_printf(s, "%-8s %-8s %-8s %-8s\n", "Offset", "HW", "vGPU", "Diff"); + list_for_each_entry_safe(node, next, ¶m.diff_mmio_list, node) { + u32 diff = node->preg ^ node->vreg; + + seq_printf(s, "%08x %08x %08x %*pbl\n", + node->offset, node->preg, node->vreg, + 32, &diff); + list_del(&node->node); + kfree(node); + } + seq_printf(s, "Total: %d, Diff: %d\n", param.total, param.diff); + return 0; +} + +static int vgpu_mmio_diff_open(struct inode *inode, struct file *file) +{ + return single_open(file, vgpu_mmio_diff_show, inode->i_private); +} + +static const struct file_operations vgpu_mmio_diff_fops = { + .open = vgpu_mmio_diff_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/** + * intel_gvt_debugfs_add_vgpu - register debugfs entries for a vGPU + * @vgpu: a vGPU + * + * Returns: + * Zero on success, negative error code if failed. + */ +int intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu) +{ + struct dentry *ent; + char name[10] = ""; + + sprintf(name, "vgpu%d", vgpu->id); + vgpu->debugfs = debugfs_create_dir(name, vgpu->gvt->debugfs_root); + if (!vgpu->debugfs) + return -ENOMEM; + + ent = debugfs_create_bool("active", 0444, vgpu->debugfs, + &vgpu->active); + if (!ent) + return -ENOMEM; + + ent = debugfs_create_file("mmio_diff", 0444, vgpu->debugfs, + vgpu, &vgpu_mmio_diff_fops); + if (!ent) + return -ENOMEM; + + return 0; +} + +/** + * intel_gvt_debugfs_remove_vgpu - remove debugfs entries of a vGPU + * @vgpu: a vGPU + */ +void intel_gvt_debugfs_remove_vgpu(struct intel_vgpu *vgpu) +{ + debugfs_remove_recursive(vgpu->debugfs); + vgpu->debugfs = NULL; +} + +/** + * intel_gvt_debugfs_init - register gvt debugfs root entry + * @gvt: GVT device + * + * Returns: + * zero on success, negative if failed. + */ +int intel_gvt_debugfs_init(struct intel_gvt *gvt) +{ + struct drm_minor *minor = gvt->dev_priv->drm.primary; + struct dentry *ent; + + gvt->debugfs_root = debugfs_create_dir("gvt", minor->debugfs_root); + if (!gvt->debugfs_root) { + gvt_err("Cannot create debugfs dir\n"); + return -ENOMEM; + } + + ent = debugfs_create_ulong("num_tracked_mmio", 0444, gvt->debugfs_root, + &gvt->mmio.num_tracked_mmio); + if (!ent) + return -ENOMEM; + + return 0; +} + +/** + * intel_gvt_debugfs_clean - remove debugfs entries + * @gvt: GVT device + */ +void intel_gvt_debugfs_clean(struct intel_gvt *gvt) +{ + debugfs_remove_recursive(gvt->debugfs_root); + gvt->debugfs_root = NULL; +} diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 3c318439a659..dd96ffc878ac 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -59,7 +59,7 @@ static int edp_pipe_is_enabled(struct intel_vgpu *vgpu) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - if (!(vgpu_vreg(vgpu, PIPECONF(_PIPE_EDP)) & PIPECONF_ENABLE)) + if (!(vgpu_vreg_t(vgpu, PIPECONF(_PIPE_EDP)) & PIPECONF_ENABLE)) return 0; if (!(vgpu_vreg(vgpu, _TRANS_DDI_FUNC_CTL_EDP) & TRANS_DDI_FUNC_ENABLE)) @@ -67,14 +67,14 @@ static int edp_pipe_is_enabled(struct intel_vgpu *vgpu) return 1; } -static int pipe_is_enabled(struct intel_vgpu *vgpu, int pipe) +int pipe_is_enabled(struct intel_vgpu *vgpu, int pipe) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; if (WARN_ON(pipe < PIPE_A || pipe >= I915_MAX_PIPES)) return -EINVAL; - if (vgpu_vreg(vgpu, PIPECONF(pipe)) & PIPECONF_ENABLE) + if (vgpu_vreg_t(vgpu, PIPECONF(pipe)) & PIPECONF_ENABLE) return 1; if (edp_pipe_is_enabled(vgpu) && @@ -169,103 +169,105 @@ static u8 dpcd_fix_data[DPCD_HEADER_SIZE] = { static void emulate_monitor_status_change(struct intel_vgpu *vgpu) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - vgpu_vreg(vgpu, SDEISR) &= ~(SDE_PORTB_HOTPLUG_CPT | + vgpu_vreg_t(vgpu, SDEISR) &= ~(SDE_PORTB_HOTPLUG_CPT | SDE_PORTC_HOTPLUG_CPT | SDE_PORTD_HOTPLUG_CPT); if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - vgpu_vreg(vgpu, SDEISR) &= ~(SDE_PORTA_HOTPLUG_SPT | + vgpu_vreg_t(vgpu, SDEISR) &= ~(SDE_PORTA_HOTPLUG_SPT | SDE_PORTE_HOTPLUG_SPT); - vgpu_vreg(vgpu, SKL_FUSE_STATUS) |= + vgpu_vreg_t(vgpu, SKL_FUSE_STATUS) |= SKL_FUSE_DOWNLOAD_STATUS | SKL_FUSE_PG_DIST_STATUS(SKL_PG0) | SKL_FUSE_PG_DIST_STATUS(SKL_PG1) | SKL_FUSE_PG_DIST_STATUS(SKL_PG2); - vgpu_vreg(vgpu, LCPLL1_CTL) |= + vgpu_vreg_t(vgpu, LCPLL1_CTL) |= LCPLL_PLL_ENABLE | LCPLL_PLL_LOCK; - vgpu_vreg(vgpu, LCPLL2_CTL) |= LCPLL_PLL_ENABLE; + vgpu_vreg_t(vgpu, LCPLL2_CTL) |= LCPLL_PLL_ENABLE; } if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) { - vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED; - vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &= + vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED; + vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &= ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK | TRANS_DDI_PORT_MASK); - vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |= + vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |= (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | (PORT_B << TRANS_DDI_PORT_SHIFT) | TRANS_DDI_FUNC_ENABLE); if (IS_BROADWELL(dev_priv)) { - vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_B)) &= + vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_B)) &= ~PORT_CLK_SEL_MASK; - vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_B)) |= + vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_B)) |= PORT_CLK_SEL_LCPLL_810; } - vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_B)) |= DDI_BUF_CTL_ENABLE; - vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_B)) &= ~DDI_BUF_IS_IDLE; - vgpu_vreg(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT; + vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_B)) |= DDI_BUF_CTL_ENABLE; + vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_B)) &= ~DDI_BUF_IS_IDLE; + vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT; } if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) { - vgpu_vreg(vgpu, SDEISR) |= SDE_PORTC_HOTPLUG_CPT; - vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &= + vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTC_HOTPLUG_CPT; + vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &= ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK | TRANS_DDI_PORT_MASK); - vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |= + vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |= (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | (PORT_C << TRANS_DDI_PORT_SHIFT) | TRANS_DDI_FUNC_ENABLE); if (IS_BROADWELL(dev_priv)) { - vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_C)) &= + vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_C)) &= ~PORT_CLK_SEL_MASK; - vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_C)) |= + vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_C)) |= PORT_CLK_SEL_LCPLL_810; } - vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_C)) |= DDI_BUF_CTL_ENABLE; - vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_C)) &= ~DDI_BUF_IS_IDLE; - vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIC_DETECTED; + vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_C)) |= DDI_BUF_CTL_ENABLE; + vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_C)) &= ~DDI_BUF_IS_IDLE; + vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIC_DETECTED; } if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D)) { - vgpu_vreg(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT; - vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &= + vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT; + vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &= ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK | TRANS_DDI_PORT_MASK); - vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |= + vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |= (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | (PORT_D << TRANS_DDI_PORT_SHIFT) | TRANS_DDI_FUNC_ENABLE); if (IS_BROADWELL(dev_priv)) { - vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_D)) &= + vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_D)) &= ~PORT_CLK_SEL_MASK; - vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_D)) |= + vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_D)) |= PORT_CLK_SEL_LCPLL_810; } - vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_D)) |= DDI_BUF_CTL_ENABLE; - vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_D)) &= ~DDI_BUF_IS_IDLE; - vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED; + vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_D)) |= DDI_BUF_CTL_ENABLE; + vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_D)) &= ~DDI_BUF_IS_IDLE; + vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED; } if ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) && intel_vgpu_has_monitor_on_port(vgpu, PORT_E)) { - vgpu_vreg(vgpu, SDEISR) |= SDE_PORTE_HOTPLUG_SPT; + vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTE_HOTPLUG_SPT; } if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) { if (IS_BROADWELL(dev_priv)) - vgpu_vreg(vgpu, GEN8_DE_PORT_ISR) |= + vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= GEN8_PORT_DP_A_HOTPLUG; else - vgpu_vreg(vgpu, SDEISR) |= SDE_PORTA_HOTPLUG_SPT; + vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTA_HOTPLUG_SPT; - vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_A)) |= DDI_INIT_DISPLAY_DETECTED; + vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_A)) |= DDI_INIT_DISPLAY_DETECTED; } /* Clear host CRT status, so guest couldn't detect this host CRT. */ if (IS_BROADWELL(dev_priv)) - vgpu_vreg(vgpu, PCH_ADPA) &= ~ADPA_CRT_HOTPLUG_MONITOR_MASK; + vgpu_vreg_t(vgpu, PCH_ADPA) &= ~ADPA_CRT_HOTPLUG_MONITOR_MASK; + + vgpu_vreg_t(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE; } static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num) @@ -307,6 +309,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, port->type = type; emulate_monitor_status_change(vgpu); + return 0; } @@ -366,12 +369,12 @@ static void emulate_vblank_on_pipe(struct intel_vgpu *vgpu, int pipe) if (!pipe_is_enabled(vgpu, pipe)) continue; - vgpu_vreg(vgpu, PIPE_FLIPCOUNT_G4X(pipe))++; + vgpu_vreg_t(vgpu, PIPE_FLIPCOUNT_G4X(pipe))++; intel_vgpu_trigger_virtual_event(vgpu, event); } if (pipe_is_enabled(vgpu, pipe)) { - vgpu_vreg(vgpu, PIPE_FRMCOUNT_G4X(pipe))++; + vgpu_vreg_t(vgpu, PIPE_FRMCOUNT_G4X(pipe))++; intel_vgpu_trigger_virtual_event(vgpu, vblank_event[pipe]); } } diff --git a/drivers/gpu/drm/i915/gvt/display.h b/drivers/gpu/drm/i915/gvt/display.h index d73de22102e2..b46b86892d58 100644 --- a/drivers/gpu/drm/i915/gvt/display.h +++ b/drivers/gpu/drm/i915/gvt/display.h @@ -179,4 +179,6 @@ int intel_vgpu_init_display(struct intel_vgpu *vgpu, u64 resolution); void intel_vgpu_reset_display(struct intel_vgpu *vgpu); void intel_vgpu_clean_display(struct intel_vgpu *vgpu); +int pipe_is_enabled(struct intel_vgpu *vgpu, int pipe); + #endif diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c new file mode 100644 index 000000000000..2fb7b34ef561 --- /dev/null +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -0,0 +1,536 @@ +/* + * Copyright 2017 Intel Corporation. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Zhiyuan Lv <zhiyuan.lv@intel.com> + * + * Contributors: + * Xiaoguang Chen + * Tina Zhang <tina.zhang@intel.com> + */ + +#include <linux/dma-buf.h> +#include <drm/drmP.h> +#include <linux/vfio.h> + +#include "i915_drv.h" +#include "gvt.h" + +#define GEN8_DECODE_PTE(pte) (pte & GENMASK_ULL(63, 12)) + +static int vgpu_gem_get_pages( + struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct sg_table *st; + struct scatterlist *sg; + int i, ret; + gen8_pte_t __iomem *gtt_entries; + struct intel_vgpu_fb_info *fb_info; + + fb_info = (struct intel_vgpu_fb_info *)obj->gvt_info; + if (WARN_ON(!fb_info)) + return -ENODEV; + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (unlikely(!st)) + return -ENOMEM; + + ret = sg_alloc_table(st, fb_info->size, GFP_KERNEL); + if (ret) { + kfree(st); + return ret; + } + gtt_entries = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + + (fb_info->start >> PAGE_SHIFT); + for_each_sg(st->sgl, sg, fb_info->size, i) { + sg->offset = 0; + sg->length = PAGE_SIZE; + sg_dma_address(sg) = + GEN8_DECODE_PTE(readq(>t_entries[i])); + sg_dma_len(sg) = PAGE_SIZE; + } + + __i915_gem_object_set_pages(obj, st, PAGE_SIZE); + + return 0; +} + +static void vgpu_gem_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + sg_free_table(pages); + kfree(pages); +} + +static void dmabuf_gem_object_free(struct kref *kref) +{ + struct intel_vgpu_dmabuf_obj *obj = + container_of(kref, struct intel_vgpu_dmabuf_obj, kref); + struct intel_vgpu *vgpu = obj->vgpu; + struct list_head *pos; + struct intel_vgpu_dmabuf_obj *dmabuf_obj; + + if (vgpu && vgpu->active && !list_empty(&vgpu->dmabuf_obj_list_head)) { + list_for_each(pos, &vgpu->dmabuf_obj_list_head) { + dmabuf_obj = container_of(pos, + struct intel_vgpu_dmabuf_obj, list); + if (dmabuf_obj == obj) { + intel_gvt_hypervisor_put_vfio_device(vgpu); + idr_remove(&vgpu->object_idr, + dmabuf_obj->dmabuf_id); + kfree(dmabuf_obj->info); + kfree(dmabuf_obj); + list_del(pos); + break; + } + } + } else { + /* Free the orphan dmabuf_objs here */ + kfree(obj->info); + kfree(obj); + } +} + + +static inline void dmabuf_obj_get(struct intel_vgpu_dmabuf_obj *obj) +{ + kref_get(&obj->kref); +} + +static inline void dmabuf_obj_put(struct intel_vgpu_dmabuf_obj *obj) +{ + kref_put(&obj->kref, dmabuf_gem_object_free); +} + +static void vgpu_gem_release(struct drm_i915_gem_object *gem_obj) +{ + + struct intel_vgpu_fb_info *fb_info = gem_obj->gvt_info; + struct intel_vgpu_dmabuf_obj *obj = fb_info->obj; + struct intel_vgpu *vgpu = obj->vgpu; + + if (vgpu) { + mutex_lock(&vgpu->dmabuf_lock); + gem_obj->base.dma_buf = NULL; + dmabuf_obj_put(obj); + mutex_unlock(&vgpu->dmabuf_lock); + } else { + /* vgpu is NULL, as it has been removed already */ + gem_obj->base.dma_buf = NULL; + dmabuf_obj_put(obj); + } +} + +static const struct drm_i915_gem_object_ops intel_vgpu_gem_ops = { + .flags = I915_GEM_OBJECT_IS_PROXY, + .get_pages = vgpu_gem_get_pages, + .put_pages = vgpu_gem_put_pages, + .release = vgpu_gem_release, +}; + +static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev, + struct intel_vgpu_fb_info *info) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_gem_object *obj; + + obj = i915_gem_object_alloc(dev_priv); + if (obj == NULL) + return NULL; + + drm_gem_private_object_init(dev, &obj->base, + info->size << PAGE_SHIFT); + i915_gem_object_init(obj, &intel_vgpu_gem_ops); + + obj->base.read_domains = I915_GEM_DOMAIN_GTT; + obj->base.write_domain = 0; + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + unsigned int tiling_mode = 0; + unsigned int stride = 0; + + switch (info->drm_format_mod << 10) { + case PLANE_CTL_TILED_LINEAR: + tiling_mode = I915_TILING_NONE; + break; + case PLANE_CTL_TILED_X: + tiling_mode = I915_TILING_X; + stride = info->stride; + break; + case PLANE_CTL_TILED_Y: + tiling_mode = I915_TILING_Y; + stride = info->stride; + break; + default: + gvt_dbg_core("not supported tiling mode\n"); + } + obj->tiling_and_stride = tiling_mode | stride; + } else { + obj->tiling_and_stride = info->drm_format_mod ? + I915_TILING_X : 0; + } + + return obj; +} + +static int vgpu_get_plane_info(struct drm_device *dev, + struct intel_vgpu *vgpu, + struct intel_vgpu_fb_info *info, + int plane_id) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_vgpu_primary_plane_format p; + struct intel_vgpu_cursor_plane_format c; + int ret; + + if (plane_id == DRM_PLANE_TYPE_PRIMARY) { + ret = intel_vgpu_decode_primary_plane(vgpu, &p); + if (ret) + return ret; + info->start = p.base; + info->start_gpa = p.base_gpa; + info->width = p.width; + info->height = p.height; + info->stride = p.stride; + info->drm_format = p.drm_format; + info->drm_format_mod = p.tiled; + info->size = (((p.stride * p.height * p.bpp) / 8) + + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + } else if (plane_id == DRM_PLANE_TYPE_CURSOR) { + ret = intel_vgpu_decode_cursor_plane(vgpu, &c); + if (ret) + return ret; + info->start = c.base; + info->start_gpa = c.base_gpa; + info->width = c.width; + info->height = c.height; + info->stride = c.width * (c.bpp / 8); + info->drm_format = c.drm_format; + info->drm_format_mod = 0; + info->x_pos = c.x_pos; + info->y_pos = c.y_pos; + + /* The invalid cursor hotspot value is delivered to host + * until we find a way to get the cursor hotspot info of + * guest OS. + */ + info->x_hot = UINT_MAX; + info->y_hot = UINT_MAX; + info->size = (((info->stride * c.height * c.bpp) / 8) + + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + } else { + gvt_vgpu_err("invalid plane id:%d\n", plane_id); + return -EINVAL; + } + + if (info->size == 0) { + gvt_vgpu_err("fb size is zero\n"); + return -EINVAL; + } + + if (info->start & (PAGE_SIZE - 1)) { + gvt_vgpu_err("Not aligned fb address:0x%llx\n", info->start); + return -EFAULT; + } + if (((info->start >> PAGE_SHIFT) + info->size) > + ggtt_total_entries(&dev_priv->ggtt)) { + gvt_vgpu_err("Invalid GTT offset or size\n"); + return -EFAULT; + } + + if (!intel_gvt_ggtt_validate_range(vgpu, info->start, info->size)) { + gvt_vgpu_err("invalid gma addr\n"); + return -EFAULT; + } + + return 0; +} + +static struct intel_vgpu_dmabuf_obj * +pick_dmabuf_by_info(struct intel_vgpu *vgpu, + struct intel_vgpu_fb_info *latest_info) +{ + struct list_head *pos; + struct intel_vgpu_fb_info *fb_info; + struct intel_vgpu_dmabuf_obj *dmabuf_obj = NULL; + struct intel_vgpu_dmabuf_obj *ret = NULL; + + list_for_each(pos, &vgpu->dmabuf_obj_list_head) { + dmabuf_obj = container_of(pos, struct intel_vgpu_dmabuf_obj, + list); + if ((dmabuf_obj == NULL) || + (dmabuf_obj->info == NULL)) + continue; + + fb_info = (struct intel_vgpu_fb_info *)dmabuf_obj->info; + if ((fb_info->start == latest_info->start) && + (fb_info->start_gpa == latest_info->start_gpa) && + (fb_info->size == latest_info->size) && + (fb_info->drm_format_mod == latest_info->drm_format_mod) && + (fb_info->drm_format == latest_info->drm_format) && + (fb_info->width == latest_info->width) && + (fb_info->height == latest_info->height)) { + ret = dmabuf_obj; + break; + } + } + + return ret; +} + +static struct intel_vgpu_dmabuf_obj * +pick_dmabuf_by_num(struct intel_vgpu *vgpu, u32 id) +{ + struct list_head *pos; + struct intel_vgpu_dmabuf_obj *dmabuf_obj = NULL; + struct intel_vgpu_dmabuf_obj *ret = NULL; + + list_for_each(pos, &vgpu->dmabuf_obj_list_head) { + dmabuf_obj = container_of(pos, struct intel_vgpu_dmabuf_obj, + list); + if (!dmabuf_obj) + continue; + + if (dmabuf_obj->dmabuf_id == id) { + ret = dmabuf_obj; + break; + } + } + + return ret; +} + +static void update_fb_info(struct vfio_device_gfx_plane_info *gvt_dmabuf, + struct intel_vgpu_fb_info *fb_info) +{ + gvt_dmabuf->drm_format = fb_info->drm_format; + gvt_dmabuf->width = fb_info->width; + gvt_dmabuf->height = fb_info->height; + gvt_dmabuf->stride = fb_info->stride; + gvt_dmabuf->size = fb_info->size; + gvt_dmabuf->x_pos = fb_info->x_pos; + gvt_dmabuf->y_pos = fb_info->y_pos; + gvt_dmabuf->x_hot = fb_info->x_hot; + gvt_dmabuf->y_hot = fb_info->y_hot; +} + +int intel_vgpu_query_plane(struct intel_vgpu *vgpu, void *args) +{ + struct drm_device *dev = &vgpu->gvt->dev_priv->drm; + struct vfio_device_gfx_plane_info *gfx_plane_info = args; + struct intel_vgpu_dmabuf_obj *dmabuf_obj; + struct intel_vgpu_fb_info fb_info; + int ret = 0; + + if (gfx_plane_info->flags == (VFIO_GFX_PLANE_TYPE_DMABUF | + VFIO_GFX_PLANE_TYPE_PROBE)) + return ret; + else if ((gfx_plane_info->flags & ~VFIO_GFX_PLANE_TYPE_DMABUF) || + (!gfx_plane_info->flags)) + return -EINVAL; + + ret = vgpu_get_plane_info(dev, vgpu, &fb_info, + gfx_plane_info->drm_plane_type); + if (ret != 0) + goto out; + + mutex_lock(&vgpu->dmabuf_lock); + /* If exists, pick up the exposed dmabuf_obj */ + dmabuf_obj = pick_dmabuf_by_info(vgpu, &fb_info); + if (dmabuf_obj) { + update_fb_info(gfx_plane_info, &fb_info); + gfx_plane_info->dmabuf_id = dmabuf_obj->dmabuf_id; + + /* This buffer may be released between query_plane ioctl and + * get_dmabuf ioctl. Add the refcount to make sure it won't + * be released between the two ioctls. + */ + if (!dmabuf_obj->initref) { + dmabuf_obj->initref = true; + dmabuf_obj_get(dmabuf_obj); + } + ret = 0; + gvt_dbg_dpy("vgpu%d: re-use dmabuf_obj ref %d, id %d\n", + vgpu->id, kref_read(&dmabuf_obj->kref), + gfx_plane_info->dmabuf_id); + mutex_unlock(&vgpu->dmabuf_lock); + goto out; + } + + mutex_unlock(&vgpu->dmabuf_lock); + + /* Need to allocate a new one*/ + dmabuf_obj = kmalloc(sizeof(struct intel_vgpu_dmabuf_obj), GFP_KERNEL); + if (unlikely(!dmabuf_obj)) { + gvt_vgpu_err("alloc dmabuf_obj failed\n"); + ret = -ENOMEM; + goto out; + } + + dmabuf_obj->info = kmalloc(sizeof(struct intel_vgpu_fb_info), + GFP_KERNEL); + if (unlikely(!dmabuf_obj->info)) { + gvt_vgpu_err("allocate intel vgpu fb info failed\n"); + ret = -ENOMEM; + goto out_free_dmabuf; + } + memcpy(dmabuf_obj->info, &fb_info, sizeof(struct intel_vgpu_fb_info)); + + ((struct intel_vgpu_fb_info *)dmabuf_obj->info)->obj = dmabuf_obj; + + dmabuf_obj->vgpu = vgpu; + + ret = idr_alloc(&vgpu->object_idr, dmabuf_obj, 1, 0, GFP_NOWAIT); + if (ret < 0) + goto out_free_info; + gfx_plane_info->dmabuf_id = ret; + dmabuf_obj->dmabuf_id = ret; + + dmabuf_obj->initref = true; + + kref_init(&dmabuf_obj->kref); + + mutex_lock(&vgpu->dmabuf_lock); + if (intel_gvt_hypervisor_get_vfio_device(vgpu)) { + gvt_vgpu_err("get vfio device failed\n"); + mutex_unlock(&vgpu->dmabuf_lock); + goto out_free_info; + } + mutex_unlock(&vgpu->dmabuf_lock); + + update_fb_info(gfx_plane_info, &fb_info); + + INIT_LIST_HEAD(&dmabuf_obj->list); + mutex_lock(&vgpu->dmabuf_lock); + list_add_tail(&dmabuf_obj->list, &vgpu->dmabuf_obj_list_head); + mutex_unlock(&vgpu->dmabuf_lock); + + gvt_dbg_dpy("vgpu%d: %s new dmabuf_obj ref %d, id %d\n", vgpu->id, + __func__, kref_read(&dmabuf_obj->kref), ret); + + return 0; + +out_free_info: + kfree(dmabuf_obj->info); +out_free_dmabuf: + kfree(dmabuf_obj); +out: + /* ENODEV means plane isn't ready, which might be a normal case. */ + return (ret == -ENODEV) ? 0 : ret; +} + +/* To associate an exposed dmabuf with the dmabuf_obj */ +int intel_vgpu_get_dmabuf(struct intel_vgpu *vgpu, unsigned int dmabuf_id) +{ + struct drm_device *dev = &vgpu->gvt->dev_priv->drm; + struct intel_vgpu_dmabuf_obj *dmabuf_obj; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + int dmabuf_fd; + int ret = 0; + + mutex_lock(&vgpu->dmabuf_lock); + + dmabuf_obj = pick_dmabuf_by_num(vgpu, dmabuf_id); + if (dmabuf_obj == NULL) { + gvt_vgpu_err("invalid dmabuf id:%d\n", dmabuf_id); + ret = -EINVAL; + goto out; + } + + obj = vgpu_create_gem(dev, dmabuf_obj->info); + if (obj == NULL) { + gvt_vgpu_err("create gvt gem obj failed:%d\n", vgpu->id); + ret = -ENOMEM; + goto out; + } + + obj->gvt_info = dmabuf_obj->info; + + dmabuf = i915_gem_prime_export(dev, &obj->base, DRM_CLOEXEC | DRM_RDWR); + if (IS_ERR(dmabuf)) { + gvt_vgpu_err("export dma-buf failed\n"); + ret = PTR_ERR(dmabuf); + goto out_free_gem; + } + + i915_gem_object_put(obj); + + ret = dma_buf_fd(dmabuf, DRM_CLOEXEC | DRM_RDWR); + if (ret < 0) { + gvt_vgpu_err("create dma-buf fd failed ret:%d\n", ret); + goto out_free_dmabuf; + } + dmabuf_fd = ret; + + dmabuf_obj_get(dmabuf_obj); + + if (dmabuf_obj->initref) { + dmabuf_obj->initref = false; + dmabuf_obj_put(dmabuf_obj); + } + + mutex_unlock(&vgpu->dmabuf_lock); + + gvt_dbg_dpy("vgpu%d: dmabuf:%d, dmabuf ref %d, fd:%d\n" + " file count: %ld, GEM ref: %d\n", + vgpu->id, dmabuf_obj->dmabuf_id, + kref_read(&dmabuf_obj->kref), + dmabuf_fd, + file_count(dmabuf->file), + kref_read(&obj->base.refcount)); + + return dmabuf_fd; + +out_free_dmabuf: + dma_buf_put(dmabuf); +out_free_gem: + i915_gem_object_put(obj); +out: + mutex_unlock(&vgpu->dmabuf_lock); + return ret; +} + +void intel_vgpu_dmabuf_cleanup(struct intel_vgpu *vgpu) +{ + struct list_head *pos, *n; + struct intel_vgpu_dmabuf_obj *dmabuf_obj; + + mutex_lock(&vgpu->dmabuf_lock); + list_for_each_safe(pos, n, &vgpu->dmabuf_obj_list_head) { + dmabuf_obj = container_of(pos, struct intel_vgpu_dmabuf_obj, + list); + dmabuf_obj->vgpu = NULL; + + idr_remove(&vgpu->object_idr, dmabuf_obj->dmabuf_id); + intel_gvt_hypervisor_put_vfio_device(vgpu); + list_del(pos); + + /* dmabuf_obj might be freed in dmabuf_obj_put */ + if (dmabuf_obj->initref) { + dmabuf_obj->initref = false; + dmabuf_obj_put(dmabuf_obj); + } + + } + mutex_unlock(&vgpu->dmabuf_lock); +} diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.h b/drivers/gpu/drm/i915/gvt/dmabuf.h new file mode 100644 index 000000000000..5f8f03fb1d1b --- /dev/null +++ b/drivers/gpu/drm/i915/gvt/dmabuf.h @@ -0,0 +1,67 @@ +/* + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Zhiyuan Lv <zhiyuan.lv@intel.com> + * + * Contributors: + * Xiaoguang Chen + * Tina Zhang <tina.zhang@intel.com> + */ + +#ifndef _GVT_DMABUF_H_ +#define _GVT_DMABUF_H_ +#include <linux/vfio.h> + +struct intel_vgpu_fb_info { + __u64 start; + __u64 start_gpa; + __u64 drm_format_mod; + __u32 drm_format; /* drm format of plane */ + __u32 width; /* width of plane */ + __u32 height; /* height of plane */ + __u32 stride; /* stride of plane */ + __u32 size; /* size of plane in bytes, align on page */ + __u32 x_pos; /* horizontal position of cursor plane */ + __u32 y_pos; /* vertical position of cursor plane */ + __u32 x_hot; /* horizontal position of cursor hotspot */ + __u32 y_hot; /* vertical position of cursor hotspot */ + struct intel_vgpu_dmabuf_obj *obj; +}; + +/** + * struct intel_vgpu_dmabuf_obj- Intel vGPU device buffer object + */ +struct intel_vgpu_dmabuf_obj { + struct intel_vgpu *vgpu; + struct intel_vgpu_fb_info *info; + __u32 dmabuf_id; + struct kref kref; + bool initref; + struct list_head list; +}; + +int intel_vgpu_query_plane(struct intel_vgpu *vgpu, void *args); +int intel_vgpu_get_dmabuf(struct intel_vgpu *vgpu, unsigned int dmabuf_id); +void intel_vgpu_dmabuf_cleanup(struct intel_vgpu *vgpu); + +#endif diff --git a/drivers/gpu/drm/i915/gvt/edid.c b/drivers/gpu/drm/i915/gvt/edid.c index 42cd09ec63fa..f61337632969 100644 --- a/drivers/gpu/drm/i915/gvt/edid.c +++ b/drivers/gpu/drm/i915/gvt/edid.c @@ -95,9 +95,9 @@ static inline int get_port_from_gmbus0(u32 gmbus0) static void reset_gmbus_controller(struct intel_vgpu *vgpu) { - vgpu_vreg(vgpu, PCH_GMBUS2) = GMBUS_HW_RDY; + vgpu_vreg_t(vgpu, PCH_GMBUS2) = GMBUS_HW_RDY; if (!vgpu->display.i2c_edid.edid_available) - vgpu_vreg(vgpu, PCH_GMBUS2) |= GMBUS_SATOER; + vgpu_vreg_t(vgpu, PCH_GMBUS2) |= GMBUS_SATOER; vgpu->display.i2c_edid.gmbus.phase = GMBUS_IDLE_PHASE; } @@ -123,16 +123,16 @@ static int gmbus0_mmio_write(struct intel_vgpu *vgpu, vgpu->display.i2c_edid.state = I2C_GMBUS; vgpu->display.i2c_edid.gmbus.phase = GMBUS_IDLE_PHASE; - vgpu_vreg(vgpu, PCH_GMBUS2) &= ~GMBUS_ACTIVE; - vgpu_vreg(vgpu, PCH_GMBUS2) |= GMBUS_HW_RDY | GMBUS_HW_WAIT_PHASE; + vgpu_vreg_t(vgpu, PCH_GMBUS2) &= ~GMBUS_ACTIVE; + vgpu_vreg_t(vgpu, PCH_GMBUS2) |= GMBUS_HW_RDY | GMBUS_HW_WAIT_PHASE; if (intel_vgpu_has_monitor_on_port(vgpu, port) && !intel_vgpu_port_is_dp(vgpu, port)) { vgpu->display.i2c_edid.port = port; vgpu->display.i2c_edid.edid_available = true; - vgpu_vreg(vgpu, PCH_GMBUS2) &= ~GMBUS_SATOER; + vgpu_vreg_t(vgpu, PCH_GMBUS2) &= ~GMBUS_SATOER; } else - vgpu_vreg(vgpu, PCH_GMBUS2) |= GMBUS_SATOER; + vgpu_vreg_t(vgpu, PCH_GMBUS2) |= GMBUS_SATOER; return 0; } @@ -159,8 +159,8 @@ static int gmbus1_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, * 2) HW_RDY bit asserted */ if (wvalue & GMBUS_SW_CLR_INT) { - vgpu_vreg(vgpu, PCH_GMBUS2) &= ~GMBUS_INT; - vgpu_vreg(vgpu, PCH_GMBUS2) |= GMBUS_HW_RDY; + vgpu_vreg_t(vgpu, PCH_GMBUS2) &= ~GMBUS_INT; + vgpu_vreg_t(vgpu, PCH_GMBUS2) |= GMBUS_HW_RDY; } /* For virtualization, we suppose that HW is always ready, @@ -208,7 +208,7 @@ static int gmbus1_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, * visible in gmbus interface) */ i2c_edid->gmbus.phase = GMBUS_IDLE_PHASE; - vgpu_vreg(vgpu, PCH_GMBUS2) &= ~GMBUS_ACTIVE; + vgpu_vreg_t(vgpu, PCH_GMBUS2) &= ~GMBUS_ACTIVE; } break; case NIDX_NS_W: @@ -220,7 +220,7 @@ static int gmbus1_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, * START (-->INDEX) -->DATA */ i2c_edid->gmbus.phase = GMBUS_DATA_PHASE; - vgpu_vreg(vgpu, PCH_GMBUS2) |= GMBUS_ACTIVE; + vgpu_vreg_t(vgpu, PCH_GMBUS2) |= GMBUS_ACTIVE; break; default: gvt_vgpu_err("Unknown/reserved GMBUS cycle detected!\n"); @@ -256,7 +256,7 @@ static int gmbus3_mmio_read(struct intel_vgpu *vgpu, unsigned int offset, u32 reg_data = 0; /* Data can only be recevied if previous settings correct */ - if (vgpu_vreg(vgpu, PCH_GMBUS1) & GMBUS_SLAVE_READ) { + if (vgpu_vreg_t(vgpu, PCH_GMBUS1) & GMBUS_SLAVE_READ) { if (byte_left <= 0) { memcpy(p_data, &vgpu_vreg(vgpu, offset), bytes); return 0; diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index e5320b4eb698..70494e394d2c 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -46,8 +46,6 @@ #define same_context(a, b) (((a)->context_id == (b)->context_id) && \ ((a)->lrca == (b)->lrca)) -static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask); - static int context_switch_events[] = { [RCS] = RCS_AS_CONTEXT_SWITCH, [BCS] = BCS_AS_CONTEXT_SWITCH, @@ -135,6 +133,8 @@ static void emulate_csb_update(struct intel_vgpu_execlist *execlist, struct execlist_context_status_pointer_format ctx_status_ptr; u32 write_pointer; u32 ctx_status_ptr_reg, ctx_status_buf_reg, offset; + unsigned long hwsp_gpa; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; ctx_status_ptr_reg = execlist_ring_mmio(vgpu->gvt, ring_id, _EL_OFFSET_STATUS_PTR); @@ -160,6 +160,20 @@ static void emulate_csb_update(struct intel_vgpu_execlist *execlist, ctx_status_ptr.write_ptr = write_pointer; vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw; + /* Update the CSB and CSB write pointer in HWSP */ + hwsp_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, + vgpu->hws_pga[ring_id]); + if (hwsp_gpa != INTEL_GVT_INVALID_ADDR) { + intel_gvt_hypervisor_write_gpa(vgpu, + hwsp_gpa + I915_HWS_CSB_BUF0_INDEX * 4 + + write_pointer * 8, + status, 8); + intel_gvt_hypervisor_write_gpa(vgpu, + hwsp_gpa + + intel_hws_csb_write_index(dev_priv) * 4, + &write_pointer, 4); + } + gvt_dbg_el("vgpu%d: w pointer %u reg %x csb l %x csb h %x\n", vgpu->id, write_pointer, offset, status->ldw, status->udw); @@ -358,179 +372,47 @@ static int emulate_execlist_schedule_in(struct intel_vgpu_execlist *execlist, return 0; } -static void free_workload(struct intel_vgpu_workload *workload) -{ - intel_vgpu_unpin_mm(workload->shadow_mm); - intel_gvt_mm_unreference(workload->shadow_mm); - kmem_cache_free(workload->vgpu->workloads, workload); -} - #define get_desc_from_elsp_dwords(ed, i) \ ((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2])) -static void prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) -{ - const int gmadr_bytes = workload->vgpu->gvt->device_info.gmadr_bytes_in_cmd; - struct intel_shadow_bb_entry *entry_obj; - - /* pin the gem object to ggtt */ - list_for_each_entry(entry_obj, &workload->shadow_bb, list) { - struct i915_vma *vma; - - vma = i915_gem_object_ggtt_pin(entry_obj->obj, NULL, 0, 4, 0); - if (IS_ERR(vma)) { - return; - } - - /* FIXME: we are not tracking our pinned VMA leaving it - * up to the core to fix up the stray pin_count upon - * free. - */ - - /* update the relocate gma with shadow batch buffer*/ - entry_obj->bb_start_cmd_va[1] = i915_ggtt_offset(vma); - if (gmadr_bytes == 8) - entry_obj->bb_start_cmd_va[2] = 0; - } -} - -static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) -{ - struct intel_vgpu_workload *workload = container_of(wa_ctx, - struct intel_vgpu_workload, - wa_ctx); - int ring_id = workload->ring_id; - struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; - struct drm_i915_gem_object *ctx_obj = - shadow_ctx->engine[ring_id].state->obj; - struct execlist_ring_context *shadow_ring_context; - struct page *page; - - page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); - shadow_ring_context = kmap_atomic(page); - - shadow_ring_context->bb_per_ctx_ptr.val = - (shadow_ring_context->bb_per_ctx_ptr.val & - (~PER_CTX_ADDR_MASK)) | wa_ctx->per_ctx.shadow_gma; - shadow_ring_context->rcs_indirect_ctx.val = - (shadow_ring_context->rcs_indirect_ctx.val & - (~INDIRECT_CTX_ADDR_MASK)) | wa_ctx->indirect_ctx.shadow_gma; - - kunmap_atomic(shadow_ring_context); - return 0; -} - -static void prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) -{ - struct i915_vma *vma; - unsigned char *per_ctx_va = - (unsigned char *)wa_ctx->indirect_ctx.shadow_va + - wa_ctx->indirect_ctx.size; - - if (wa_ctx->indirect_ctx.size == 0) - return; - - vma = i915_gem_object_ggtt_pin(wa_ctx->indirect_ctx.obj, NULL, - 0, CACHELINE_BYTES, 0); - if (IS_ERR(vma)) { - return; - } - - /* FIXME: we are not tracking our pinned VMA leaving it - * up to the core to fix up the stray pin_count upon - * free. - */ - - wa_ctx->indirect_ctx.shadow_gma = i915_ggtt_offset(vma); - - wa_ctx->per_ctx.shadow_gma = *((unsigned int *)per_ctx_va + 1); - memset(per_ctx_va, 0, CACHELINE_BYTES); - - update_wa_ctx_2_shadow_ctx(wa_ctx); -} - static int prepare_execlist_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; + struct intel_vgpu_submission *s = &vgpu->submission; struct execlist_ctx_descriptor_format ctx[2]; int ring_id = workload->ring_id; + int ret; - intel_vgpu_pin_mm(workload->shadow_mm); - intel_vgpu_sync_oos_pages(workload->vgpu); - intel_vgpu_flush_post_shadow(workload->vgpu); - prepare_shadow_batch_buffer(workload); - prepare_shadow_wa_ctx(&workload->wa_ctx); if (!workload->emulate_schedule_in) return 0; - ctx[0] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 1); - ctx[1] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 0); - - return emulate_execlist_schedule_in(&vgpu->execlist[ring_id], ctx); -} + ctx[0] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 0); + ctx[1] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 1); -static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) -{ - /* release all the shadow batch buffer */ - if (!list_empty(&workload->shadow_bb)) { - struct intel_shadow_bb_entry *entry_obj = - list_first_entry(&workload->shadow_bb, - struct intel_shadow_bb_entry, - list); - struct intel_shadow_bb_entry *temp; - - list_for_each_entry_safe(entry_obj, temp, &workload->shadow_bb, - list) { - i915_gem_object_unpin_map(entry_obj->obj); - i915_gem_object_put(entry_obj->obj); - list_del(&entry_obj->list); - kfree(entry_obj); - } + ret = emulate_execlist_schedule_in(&s->execlist[ring_id], ctx); + if (ret) { + gvt_vgpu_err("fail to emulate execlist schedule in\n"); + return ret; } -} - -static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) -{ - if (!wa_ctx->indirect_ctx.obj) - return; - - i915_gem_object_unpin_map(wa_ctx->indirect_ctx.obj); - i915_gem_object_put(wa_ctx->indirect_ctx.obj); + return 0; } static int complete_execlist_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; int ring_id = workload->ring_id; - struct intel_vgpu_execlist *execlist = &vgpu->execlist[ring_id]; + struct intel_vgpu_submission *s = &vgpu->submission; + struct intel_vgpu_execlist *execlist = &s->execlist[ring_id]; struct intel_vgpu_workload *next_workload; struct list_head *next = workload_q_head(vgpu, ring_id)->next; bool lite_restore = false; - int ret; + int ret = 0; gvt_dbg_el("complete workload %p status %d\n", workload, workload->status); - release_shadow_batch_buffer(workload); - release_shadow_wa_ctx(&workload->wa_ctx); - - if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) { - /* if workload->status is not successful means HW GPU - * has occurred GPU hang or something wrong with i915/GVT, - * and GVT won't inject context switch interrupt to guest. - * So this error is a vGPU hang actually to the guest. - * According to this we should emunlate a vGPU hang. If - * there are pending workloads which are already submitted - * from guest, we should clean them up like HW GPU does. - * - * if it is in middle of engine resetting, the pending - * workloads won't be submitted to HW GPU and will be - * cleaned up during the resetting process later, so doing - * the workload clean up here doesn't have any impact. - **/ - clean_workloads(vgpu, ENGINE_MASK(ring_id)); + if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) goto out; - } if (!list_empty(workload_q_head(vgpu, ring_id))) { struct execlist_ctx_descriptor_format *this_desc, *next_desc; @@ -545,213 +427,60 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload) if (lite_restore) { gvt_dbg_el("next context == current - no schedule-out\n"); - free_workload(workload); - return 0; + goto out; } ret = emulate_execlist_ctx_schedule_out(execlist, &workload->ctx_desc); - if (ret) - goto err; out: - free_workload(workload); - return 0; -err: - free_workload(workload); + intel_vgpu_unpin_mm(workload->shadow_mm); + intel_vgpu_destroy_workload(workload); return ret; } -#define RING_CTX_OFF(x) \ - offsetof(struct execlist_ring_context, x) - -static void read_guest_pdps(struct intel_vgpu *vgpu, - u64 ring_context_gpa, u32 pdp[8]) -{ - u64 gpa; - int i; - - gpa = ring_context_gpa + RING_CTX_OFF(pdp3_UDW.val); - - for (i = 0; i < 8; i++) - intel_gvt_hypervisor_read_gpa(vgpu, - gpa + i * 8, &pdp[7 - i], 4); -} - -static int prepare_mm(struct intel_vgpu_workload *workload) -{ - struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc; - struct intel_vgpu_mm *mm; - struct intel_vgpu *vgpu = workload->vgpu; - int page_table_level; - u32 pdp[8]; - - if (desc->addressing_mode == 1) { /* legacy 32-bit */ - page_table_level = 3; - } else if (desc->addressing_mode == 3) { /* legacy 64 bit */ - page_table_level = 4; - } else { - gvt_vgpu_err("Advanced Context mode(SVM) is not supported!\n"); - return -EINVAL; - } - - read_guest_pdps(workload->vgpu, workload->ring_context_gpa, pdp); - - mm = intel_vgpu_find_ppgtt_mm(workload->vgpu, page_table_level, pdp); - if (mm) { - intel_gvt_mm_reference(mm); - } else { - - mm = intel_vgpu_create_mm(workload->vgpu, INTEL_GVT_MM_PPGTT, - pdp, page_table_level, 0); - if (IS_ERR(mm)) { - gvt_vgpu_err("fail to create mm object.\n"); - return PTR_ERR(mm); - } - } - workload->shadow_mm = mm; - return 0; -} - -#define get_last_workload(q) \ - (list_empty(q) ? NULL : container_of(q->prev, \ - struct intel_vgpu_workload, list)) - static int submit_context(struct intel_vgpu *vgpu, int ring_id, struct execlist_ctx_descriptor_format *desc, bool emulate_schedule_in) { - struct list_head *q = workload_q_head(vgpu, ring_id); - struct intel_vgpu_workload *last_workload = get_last_workload(q); + struct intel_vgpu_submission *s = &vgpu->submission; struct intel_vgpu_workload *workload = NULL; - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - u64 ring_context_gpa; - u32 head, tail, start, ctl, ctx_ctl, per_ctx, indirect_ctx; - int ret; - - ring_context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, - (u32)((desc->lrca + 1) << GTT_PAGE_SHIFT)); - if (ring_context_gpa == INTEL_GVT_INVALID_ADDR) { - gvt_vgpu_err("invalid guest context LRCA: %x\n", desc->lrca); - return -EINVAL; - } - - intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + - RING_CTX_OFF(ring_header.val), &head, 4); - - intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + - RING_CTX_OFF(ring_tail.val), &tail, 4); - head &= RB_HEAD_OFF_MASK; - tail &= RB_TAIL_OFF_MASK; + workload = intel_vgpu_create_workload(vgpu, ring_id, desc); + if (IS_ERR(workload)) + return PTR_ERR(workload); - if (last_workload && same_context(&last_workload->ctx_desc, desc)) { - gvt_dbg_el("ring id %d cur workload == last\n", ring_id); - gvt_dbg_el("ctx head %x real head %lx\n", head, - last_workload->rb_tail); - /* - * cannot use guest context head pointer here, - * as it might not be updated at this time - */ - head = last_workload->rb_tail; - } - - gvt_dbg_el("ring id %d begin a new workload\n", ring_id); - - workload = kmem_cache_zalloc(vgpu->workloads, GFP_KERNEL); - if (!workload) - return -ENOMEM; - - /* record some ring buffer register values for scan and shadow */ - intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + - RING_CTX_OFF(rb_start.val), &start, 4); - intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + - RING_CTX_OFF(rb_ctrl.val), &ctl, 4); - intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + - RING_CTX_OFF(ctx_ctrl.val), &ctx_ctl, 4); - - INIT_LIST_HEAD(&workload->list); - INIT_LIST_HEAD(&workload->shadow_bb); - - init_waitqueue_head(&workload->shadow_ctx_status_wq); - atomic_set(&workload->shadow_ctx_active, 0); - - workload->vgpu = vgpu; - workload->ring_id = ring_id; - workload->ctx_desc = *desc; - workload->ring_context_gpa = ring_context_gpa; - workload->rb_head = head; - workload->rb_tail = tail; - workload->rb_start = start; - workload->rb_ctl = ctl; workload->prepare = prepare_execlist_workload; workload->complete = complete_execlist_workload; - workload->status = -EINPROGRESS; workload->emulate_schedule_in = emulate_schedule_in; - workload->shadowed = false; - - if (ring_id == RCS) { - intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + - RING_CTX_OFF(bb_per_ctx_ptr.val), &per_ctx, 4); - intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + - RING_CTX_OFF(rcs_indirect_ctx.val), &indirect_ctx, 4); - - workload->wa_ctx.indirect_ctx.guest_gma = - indirect_ctx & INDIRECT_CTX_ADDR_MASK; - workload->wa_ctx.indirect_ctx.size = - (indirect_ctx & INDIRECT_CTX_SIZE_MASK) * - CACHELINE_BYTES; - workload->wa_ctx.per_ctx.guest_gma = - per_ctx & PER_CTX_ADDR_MASK; - workload->wa_ctx.per_ctx.valid = per_ctx & 1; - } if (emulate_schedule_in) - workload->elsp_dwords = vgpu->execlist[ring_id].elsp_dwords; - - gvt_dbg_el("workload %p ring id %d head %x tail %x start %x ctl %x\n", - workload, ring_id, head, tail, start, ctl); + workload->elsp_dwords = s->execlist[ring_id].elsp_dwords; gvt_dbg_el("workload %p emulate schedule_in %d\n", workload, emulate_schedule_in); - ret = prepare_mm(workload); - if (ret) { - kmem_cache_free(vgpu->workloads, workload); - return ret; - } - - /* Only scan and shadow the first workload in the queue - * as there is only one pre-allocated buf-obj for shadow. - */ - if (list_empty(workload_q_head(vgpu, ring_id))) { - intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->drm.struct_mutex); - intel_gvt_scan_and_shadow_workload(workload); - mutex_unlock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_put(dev_priv); - } - - queue_workload(workload); + intel_vgpu_queue_workload(workload); return 0; } int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id) { - struct intel_vgpu_execlist *execlist = &vgpu->execlist[ring_id]; - struct execlist_ctx_descriptor_format desc[2]; + struct intel_vgpu_submission *s = &vgpu->submission; + struct intel_vgpu_execlist *execlist = &s->execlist[ring_id]; + struct execlist_ctx_descriptor_format *desc[2]; int i, ret; - desc[0] = *get_desc_from_elsp_dwords(&execlist->elsp_dwords, 1); - desc[1] = *get_desc_from_elsp_dwords(&execlist->elsp_dwords, 0); + desc[0] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 0); + desc[1] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 1); - if (!desc[0].valid) { + if (!desc[0]->valid) { gvt_vgpu_err("invalid elsp submission, desc0 is invalid\n"); goto inv_desc; } for (i = 0; i < ARRAY_SIZE(desc); i++) { - if (!desc[i].valid) + if (!desc[i]->valid) continue; - if (!desc[i].privilege_access) { + if (!desc[i]->privilege_access) { gvt_vgpu_err("unexpected GGTT elsp submission\n"); goto inv_desc; } @@ -759,9 +488,9 @@ int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id) /* submit workload */ for (i = 0; i < ARRAY_SIZE(desc); i++) { - if (!desc[i].valid) + if (!desc[i]->valid) continue; - ret = submit_context(vgpu, ring_id, &desc[i], i == 0); + ret = submit_context(vgpu, ring_id, desc[i], i == 0); if (ret) { gvt_vgpu_err("failed to submit desc %d\n", i); return ret; @@ -772,13 +501,14 @@ int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id) inv_desc: gvt_vgpu_err("descriptors content: desc0 %08x %08x desc1 %08x %08x\n", - desc[0].udw, desc[0].ldw, desc[1].udw, desc[1].ldw); + desc[0]->udw, desc[0]->ldw, desc[1]->udw, desc[1]->ldw); return -EINVAL; } static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id) { - struct intel_vgpu_execlist *execlist = &vgpu->execlist[ring_id]; + struct intel_vgpu_submission *s = &vgpu->submission; + struct intel_vgpu_execlist *execlist = &s->execlist[ring_id]; struct execlist_context_status_pointer_format ctx_status_ptr; u32 ctx_status_ptr_reg; @@ -791,68 +521,47 @@ static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id) ctx_status_ptr_reg = execlist_ring_mmio(vgpu->gvt, ring_id, _EL_OFFSET_STATUS_PTR); - ctx_status_ptr.dw = vgpu_vreg(vgpu, ctx_status_ptr_reg); ctx_status_ptr.read_ptr = 0; ctx_status_ptr.write_ptr = 0x7; vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw; } -static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask) +static void clean_execlist(struct intel_vgpu *vgpu, unsigned long engine_mask) { + unsigned int tmp; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct intel_engine_cs *engine; - struct intel_vgpu_workload *pos, *n; - unsigned int tmp; + struct intel_vgpu_submission *s = &vgpu->submission; - /* free the unsubmited workloads in the queues. */ for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { - list_for_each_entry_safe(pos, n, - &vgpu->workload_q_head[engine->id], list) { - list_del_init(&pos->list); - free_workload(pos); - } - - clear_bit(engine->id, vgpu->shadow_ctx_desc_updated); - } -} - -void intel_vgpu_clean_execlist(struct intel_vgpu *vgpu) -{ - clean_workloads(vgpu, ALL_ENGINES); - kmem_cache_destroy(vgpu->workloads); -} - -int intel_vgpu_init_execlist(struct intel_vgpu *vgpu) -{ - enum intel_engine_id i; - struct intel_engine_cs *engine; - - /* each ring has a virtual execlist engine */ - for_each_engine(engine, vgpu->gvt->dev_priv, i) { - init_vgpu_execlist(vgpu, i); - INIT_LIST_HEAD(&vgpu->workload_q_head[i]); + kfree(s->ring_scan_buffer[engine->id]); + s->ring_scan_buffer[engine->id] = NULL; + s->ring_scan_buffer_size[engine->id] = 0; } - - vgpu->workloads = kmem_cache_create("gvt-g_vgpu_workload", - sizeof(struct intel_vgpu_workload), 0, - SLAB_HWCACHE_ALIGN, - NULL); - - if (!vgpu->workloads) - return -ENOMEM; - - return 0; } -void intel_vgpu_reset_execlist(struct intel_vgpu *vgpu, +static void reset_execlist(struct intel_vgpu *vgpu, unsigned long engine_mask) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct intel_engine_cs *engine; unsigned int tmp; - clean_workloads(vgpu, engine_mask); for_each_engine_masked(engine, dev_priv, engine_mask, tmp) init_vgpu_execlist(vgpu, engine->id); } + +static int init_execlist(struct intel_vgpu *vgpu, + unsigned long engine_mask) +{ + reset_execlist(vgpu, engine_mask); + return 0; +} + +const struct intel_vgpu_submission_ops intel_vgpu_execlist_submission_ops = { + .name = "execlist", + .init = init_execlist, + .reset = reset_execlist, + .clean = clean_execlist, +}; diff --git a/drivers/gpu/drm/i915/gvt/execlist.h b/drivers/gpu/drm/i915/gvt/execlist.h index 7eced40a1e30..427e40e64d41 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.h +++ b/drivers/gpu/drm/i915/gvt/execlist.h @@ -37,10 +37,6 @@ struct execlist_ctx_descriptor_format { union { - u32 udw; - u32 context_id; - }; - union { u32 ldw; struct { u32 valid : 1; @@ -54,6 +50,10 @@ struct execlist_ctx_descriptor_format { u32 lrca : 20; }; }; + union { + u32 udw; + u32 context_id; + }; }; struct execlist_status_format { diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.c b/drivers/gpu/drm/i915/gvt/fb_decoder.c new file mode 100644 index 000000000000..6b50fe78dc1b --- /dev/null +++ b/drivers/gpu/drm/i915/gvt/fb_decoder.c @@ -0,0 +1,514 @@ +/* + * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Kevin Tian <kevin.tian@intel.com> + * + * Contributors: + * Bing Niu <bing.niu@intel.com> + * Xu Han <xu.han@intel.com> + * Ping Gao <ping.a.gao@intel.com> + * Xiaoguang Chen <xiaoguang.chen@intel.com> + * Yang Liu <yang2.liu@intel.com> + * Tina Zhang <tina.zhang@intel.com> + * + */ + +#include <uapi/drm/drm_fourcc.h> +#include "i915_drv.h" +#include "gvt.h" + +#define PRIMARY_FORMAT_NUM 16 +struct pixel_format { + int drm_format; /* Pixel format in DRM definition */ + int bpp; /* Bits per pixel, 0 indicates invalid */ + char *desc; /* The description */ +}; + +static struct pixel_format bdw_pixel_formats[] = { + {DRM_FORMAT_C8, 8, "8-bit Indexed"}, + {DRM_FORMAT_RGB565, 16, "16-bit BGRX (5:6:5 MSB-R:G:B)"}, + {DRM_FORMAT_XRGB8888, 32, "32-bit BGRX (8:8:8:8 MSB-X:R:G:B)"}, + {DRM_FORMAT_XBGR2101010, 32, "32-bit RGBX (2:10:10:10 MSB-X:B:G:R)"}, + + {DRM_FORMAT_XRGB2101010, 32, "32-bit BGRX (2:10:10:10 MSB-X:R:G:B)"}, + {DRM_FORMAT_XBGR8888, 32, "32-bit RGBX (8:8:8:8 MSB-X:B:G:R)"}, + + /* non-supported format has bpp default to 0 */ + {0, 0, NULL}, +}; + +static struct pixel_format skl_pixel_formats[] = { + {DRM_FORMAT_YUYV, 16, "16-bit packed YUYV (8:8:8:8 MSB-V:Y2:U:Y1)"}, + {DRM_FORMAT_UYVY, 16, "16-bit packed UYVY (8:8:8:8 MSB-Y2:V:Y1:U)"}, + {DRM_FORMAT_YVYU, 16, "16-bit packed YVYU (8:8:8:8 MSB-U:Y2:V:Y1)"}, + {DRM_FORMAT_VYUY, 16, "16-bit packed VYUY (8:8:8:8 MSB-Y2:U:Y1:V)"}, + + {DRM_FORMAT_C8, 8, "8-bit Indexed"}, + {DRM_FORMAT_RGB565, 16, "16-bit BGRX (5:6:5 MSB-R:G:B)"}, + {DRM_FORMAT_ABGR8888, 32, "32-bit RGBA (8:8:8:8 MSB-A:B:G:R)"}, + {DRM_FORMAT_XBGR8888, 32, "32-bit RGBX (8:8:8:8 MSB-X:B:G:R)"}, + + {DRM_FORMAT_ARGB8888, 32, "32-bit BGRA (8:8:8:8 MSB-A:R:G:B)"}, + {DRM_FORMAT_XRGB8888, 32, "32-bit BGRX (8:8:8:8 MSB-X:R:G:B)"}, + {DRM_FORMAT_XBGR2101010, 32, "32-bit RGBX (2:10:10:10 MSB-X:B:G:R)"}, + {DRM_FORMAT_XRGB2101010, 32, "32-bit BGRX (2:10:10:10 MSB-X:R:G:B)"}, + + /* non-supported format has bpp default to 0 */ + {0, 0, NULL}, +}; + +static int bdw_format_to_drm(int format) +{ + int bdw_pixel_formats_index = 6; + + switch (format) { + case DISPPLANE_8BPP: + bdw_pixel_formats_index = 0; + break; + case DISPPLANE_BGRX565: + bdw_pixel_formats_index = 1; + break; + case DISPPLANE_BGRX888: + bdw_pixel_formats_index = 2; + break; + case DISPPLANE_RGBX101010: + bdw_pixel_formats_index = 3; + break; + case DISPPLANE_BGRX101010: + bdw_pixel_formats_index = 4; + break; + case DISPPLANE_RGBX888: + bdw_pixel_formats_index = 5; + break; + + default: + break; + } + + return bdw_pixel_formats_index; +} + +static int skl_format_to_drm(int format, bool rgb_order, bool alpha, + int yuv_order) +{ + int skl_pixel_formats_index = 12; + + switch (format) { + case PLANE_CTL_FORMAT_INDEXED: + skl_pixel_formats_index = 4; + break; + case PLANE_CTL_FORMAT_RGB_565: + skl_pixel_formats_index = 5; + break; + case PLANE_CTL_FORMAT_XRGB_8888: + if (rgb_order) + skl_pixel_formats_index = alpha ? 6 : 7; + else + skl_pixel_formats_index = alpha ? 8 : 9; + break; + case PLANE_CTL_FORMAT_XRGB_2101010: + skl_pixel_formats_index = rgb_order ? 10 : 11; + break; + case PLANE_CTL_FORMAT_YUV422: + skl_pixel_formats_index = yuv_order >> 16; + if (skl_pixel_formats_index > 3) + return -EINVAL; + break; + + default: + break; + } + + return skl_pixel_formats_index; +} + +static u32 intel_vgpu_get_stride(struct intel_vgpu *vgpu, int pipe, + u32 tiled, int stride_mask, int bpp) +{ + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + + u32 stride_reg = vgpu_vreg_t(vgpu, DSPSTRIDE(pipe)) & stride_mask; + u32 stride = stride_reg; + + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + switch (tiled) { + case PLANE_CTL_TILED_LINEAR: + stride = stride_reg * 64; + break; + case PLANE_CTL_TILED_X: + stride = stride_reg * 512; + break; + case PLANE_CTL_TILED_Y: + stride = stride_reg * 128; + break; + case PLANE_CTL_TILED_YF: + if (bpp == 8) + stride = stride_reg * 64; + else if (bpp == 16 || bpp == 32 || bpp == 64) + stride = stride_reg * 128; + else + gvt_dbg_core("skl: unsupported bpp:%d\n", bpp); + break; + default: + gvt_dbg_core("skl: unsupported tile format:%x\n", + tiled); + } + } + + return stride; +} + +static int get_active_pipe(struct intel_vgpu *vgpu) +{ + int i; + + for (i = 0; i < I915_MAX_PIPES; i++) + if (pipe_is_enabled(vgpu, i)) + break; + + return i; +} + +/** + * intel_vgpu_decode_primary_plane - Decode primary plane + * @vgpu: input vgpu + * @plane: primary plane to save decoded info + * This function is called for decoding plane + * + * Returns: + * 0 on success, non-zero if failed. + */ +int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, + struct intel_vgpu_primary_plane_format *plane) +{ + u32 val, fmt; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + int pipe; + + pipe = get_active_pipe(vgpu); + if (pipe >= I915_MAX_PIPES) + return -ENODEV; + + val = vgpu_vreg_t(vgpu, DSPCNTR(pipe)); + plane->enabled = !!(val & DISPLAY_PLANE_ENABLE); + if (!plane->enabled) + return -ENODEV; + + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + plane->tiled = (val & PLANE_CTL_TILED_MASK) >> + _PLANE_CTL_TILED_SHIFT; + fmt = skl_format_to_drm( + val & PLANE_CTL_FORMAT_MASK, + val & PLANE_CTL_ORDER_RGBX, + val & PLANE_CTL_ALPHA_MASK, + val & PLANE_CTL_YUV422_ORDER_MASK); + + if (fmt >= ARRAY_SIZE(skl_pixel_formats)) { + gvt_vgpu_err("Out-of-bounds pixel format index\n"); + return -EINVAL; + } + + plane->bpp = skl_pixel_formats[fmt].bpp; + plane->drm_format = skl_pixel_formats[fmt].drm_format; + } else { + plane->tiled = !!(val & DISPPLANE_TILED); + fmt = bdw_format_to_drm(val & DISPPLANE_PIXFORMAT_MASK); + plane->bpp = bdw_pixel_formats[fmt].bpp; + plane->drm_format = bdw_pixel_formats[fmt].drm_format; + } + + if (!plane->bpp) { + gvt_vgpu_err("Non-supported pixel format (0x%x)\n", fmt); + return -EINVAL; + } + + plane->hw_format = fmt; + + plane->base = vgpu_vreg_t(vgpu, DSPSURF(pipe)) & I915_GTT_PAGE_MASK; + if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) { + gvt_vgpu_err("invalid gma address: %lx\n", + (unsigned long)plane->base); + return -EINVAL; + } + + plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base); + if (plane->base_gpa == INTEL_GVT_INVALID_ADDR) { + gvt_vgpu_err("invalid gma address: %lx\n", + (unsigned long)plane->base); + return -EINVAL; + } + + plane->stride = intel_vgpu_get_stride(vgpu, pipe, (plane->tiled << 10), + (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) ? + (_PRI_PLANE_STRIDE_MASK >> 6) : + _PRI_PLANE_STRIDE_MASK, plane->bpp); + + plane->width = (vgpu_vreg_t(vgpu, PIPESRC(pipe)) & _PIPE_H_SRCSZ_MASK) >> + _PIPE_H_SRCSZ_SHIFT; + plane->width += 1; + plane->height = (vgpu_vreg_t(vgpu, PIPESRC(pipe)) & + _PIPE_V_SRCSZ_MASK) >> _PIPE_V_SRCSZ_SHIFT; + plane->height += 1; /* raw height is one minus the real value */ + + val = vgpu_vreg_t(vgpu, DSPTILEOFF(pipe)); + plane->x_offset = (val & _PRI_PLANE_X_OFF_MASK) >> + _PRI_PLANE_X_OFF_SHIFT; + plane->y_offset = (val & _PRI_PLANE_Y_OFF_MASK) >> + _PRI_PLANE_Y_OFF_SHIFT; + + return 0; +} + +#define CURSOR_FORMAT_NUM (1 << 6) +struct cursor_mode_format { + int drm_format; /* Pixel format in DRM definition */ + u8 bpp; /* Bits per pixel; 0 indicates invalid */ + u32 width; /* In pixel */ + u32 height; /* In lines */ + char *desc; /* The description */ +}; + +static struct cursor_mode_format cursor_pixel_formats[] = { + {DRM_FORMAT_ARGB8888, 32, 128, 128, "128x128 32bpp ARGB"}, + {DRM_FORMAT_ARGB8888, 32, 256, 256, "256x256 32bpp ARGB"}, + {DRM_FORMAT_ARGB8888, 32, 64, 64, "64x64 32bpp ARGB"}, + {DRM_FORMAT_ARGB8888, 32, 64, 64, "64x64 32bpp ARGB"}, + + /* non-supported format has bpp default to 0 */ + {0, 0, 0, 0, NULL}, +}; + +static int cursor_mode_to_drm(int mode) +{ + int cursor_pixel_formats_index = 4; + + switch (mode) { + case CURSOR_MODE_128_ARGB_AX: + cursor_pixel_formats_index = 0; + break; + case CURSOR_MODE_256_ARGB_AX: + cursor_pixel_formats_index = 1; + break; + case CURSOR_MODE_64_ARGB_AX: + cursor_pixel_formats_index = 2; + break; + case CURSOR_MODE_64_32B_AX: + cursor_pixel_formats_index = 3; + break; + + default: + break; + } + + return cursor_pixel_formats_index; +} + +/** + * intel_vgpu_decode_cursor_plane - Decode sprite plane + * @vgpu: input vgpu + * @plane: cursor plane to save decoded info + * This function is called for decoding plane + * + * Returns: + * 0 on success, non-zero if failed. + */ +int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu, + struct intel_vgpu_cursor_plane_format *plane) +{ + u32 val, mode, index; + u32 alpha_plane, alpha_force; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + int pipe; + + pipe = get_active_pipe(vgpu); + if (pipe >= I915_MAX_PIPES) + return -ENODEV; + + val = vgpu_vreg_t(vgpu, CURCNTR(pipe)); + mode = val & CURSOR_MODE; + plane->enabled = (mode != CURSOR_MODE_DISABLE); + if (!plane->enabled) + return -ENODEV; + + index = cursor_mode_to_drm(mode); + + if (!cursor_pixel_formats[index].bpp) { + gvt_vgpu_err("Non-supported cursor mode (0x%x)\n", mode); + return -EINVAL; + } + plane->mode = mode; + plane->bpp = cursor_pixel_formats[index].bpp; + plane->drm_format = cursor_pixel_formats[index].drm_format; + plane->width = cursor_pixel_formats[index].width; + plane->height = cursor_pixel_formats[index].height; + + alpha_plane = (val & _CURSOR_ALPHA_PLANE_MASK) >> + _CURSOR_ALPHA_PLANE_SHIFT; + alpha_force = (val & _CURSOR_ALPHA_FORCE_MASK) >> + _CURSOR_ALPHA_FORCE_SHIFT; + if (alpha_plane || alpha_force) + gvt_dbg_core("alpha_plane=0x%x, alpha_force=0x%x\n", + alpha_plane, alpha_force); + + plane->base = vgpu_vreg_t(vgpu, CURBASE(pipe)) & I915_GTT_PAGE_MASK; + if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) { + gvt_vgpu_err("invalid gma address: %lx\n", + (unsigned long)plane->base); + return -EINVAL; + } + + plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base); + if (plane->base_gpa == INTEL_GVT_INVALID_ADDR) { + gvt_vgpu_err("invalid gma address: %lx\n", + (unsigned long)plane->base); + return -EINVAL; + } + + val = vgpu_vreg_t(vgpu, CURPOS(pipe)); + plane->x_pos = (val & _CURSOR_POS_X_MASK) >> _CURSOR_POS_X_SHIFT; + plane->x_sign = (val & _CURSOR_SIGN_X_MASK) >> _CURSOR_SIGN_X_SHIFT; + plane->y_pos = (val & _CURSOR_POS_Y_MASK) >> _CURSOR_POS_Y_SHIFT; + plane->y_sign = (val & _CURSOR_SIGN_Y_MASK) >> _CURSOR_SIGN_Y_SHIFT; + + return 0; +} + +#define SPRITE_FORMAT_NUM (1 << 3) + +static struct pixel_format sprite_pixel_formats[SPRITE_FORMAT_NUM] = { + [0x0] = {DRM_FORMAT_YUV422, 16, "YUV 16-bit 4:2:2 packed"}, + [0x1] = {DRM_FORMAT_XRGB2101010, 32, "RGB 32-bit 2:10:10:10"}, + [0x2] = {DRM_FORMAT_XRGB8888, 32, "RGB 32-bit 8:8:8:8"}, + [0x4] = {DRM_FORMAT_AYUV, 32, + "YUV 32-bit 4:4:4 packed (8:8:8:8 MSB-X:Y:U:V)"}, +}; + +/** + * intel_vgpu_decode_sprite_plane - Decode sprite plane + * @vgpu: input vgpu + * @plane: sprite plane to save decoded info + * This function is called for decoding plane + * + * Returns: + * 0 on success, non-zero if failed. + */ +int intel_vgpu_decode_sprite_plane(struct intel_vgpu *vgpu, + struct intel_vgpu_sprite_plane_format *plane) +{ + u32 val, fmt; + u32 color_order, yuv_order; + int drm_format; + int pipe; + + pipe = get_active_pipe(vgpu); + if (pipe >= I915_MAX_PIPES) + return -ENODEV; + + val = vgpu_vreg_t(vgpu, SPRCTL(pipe)); + plane->enabled = !!(val & SPRITE_ENABLE); + if (!plane->enabled) + return -ENODEV; + + plane->tiled = !!(val & SPRITE_TILED); + color_order = !!(val & SPRITE_RGB_ORDER_RGBX); + yuv_order = (val & SPRITE_YUV_BYTE_ORDER_MASK) >> + _SPRITE_YUV_ORDER_SHIFT; + + fmt = (val & SPRITE_PIXFORMAT_MASK) >> _SPRITE_FMT_SHIFT; + if (!sprite_pixel_formats[fmt].bpp) { + gvt_vgpu_err("Non-supported pixel format (0x%x)\n", fmt); + return -EINVAL; + } + plane->hw_format = fmt; + plane->bpp = sprite_pixel_formats[fmt].bpp; + drm_format = sprite_pixel_formats[fmt].drm_format; + + /* Order of RGB values in an RGBxxx buffer may be ordered RGB or + * BGR depending on the state of the color_order field + */ + if (!color_order) { + if (drm_format == DRM_FORMAT_XRGB2101010) + drm_format = DRM_FORMAT_XBGR2101010; + else if (drm_format == DRM_FORMAT_XRGB8888) + drm_format = DRM_FORMAT_XBGR8888; + } + + if (drm_format == DRM_FORMAT_YUV422) { + switch (yuv_order) { + case 0: + drm_format = DRM_FORMAT_YUYV; + break; + case 1: + drm_format = DRM_FORMAT_UYVY; + break; + case 2: + drm_format = DRM_FORMAT_YVYU; + break; + case 3: + drm_format = DRM_FORMAT_VYUY; + break; + default: + /* yuv_order has only 2 bits */ + break; + } + } + + plane->drm_format = drm_format; + + plane->base = vgpu_vreg_t(vgpu, SPRSURF(pipe)) & I915_GTT_PAGE_MASK; + if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) { + gvt_vgpu_err("invalid gma address: %lx\n", + (unsigned long)plane->base); + return -EINVAL; + } + + plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base); + if (plane->base_gpa == INTEL_GVT_INVALID_ADDR) { + gvt_vgpu_err("invalid gma address: %lx\n", + (unsigned long)plane->base); + return -EINVAL; + } + + plane->stride = vgpu_vreg_t(vgpu, SPRSTRIDE(pipe)) & + _SPRITE_STRIDE_MASK; + + val = vgpu_vreg_t(vgpu, SPRSIZE(pipe)); + plane->height = (val & _SPRITE_SIZE_HEIGHT_MASK) >> + _SPRITE_SIZE_HEIGHT_SHIFT; + plane->width = (val & _SPRITE_SIZE_WIDTH_MASK) >> + _SPRITE_SIZE_WIDTH_SHIFT; + plane->height += 1; /* raw height is one minus the real value */ + plane->width += 1; /* raw width is one minus the real value */ + + val = vgpu_vreg_t(vgpu, SPRPOS(pipe)); + plane->x_pos = (val & _SPRITE_POS_X_MASK) >> _SPRITE_POS_X_SHIFT; + plane->y_pos = (val & _SPRITE_POS_Y_MASK) >> _SPRITE_POS_Y_SHIFT; + + val = vgpu_vreg_t(vgpu, SPROFFSET(pipe)); + plane->x_offset = (val & _SPRITE_OFFSET_START_X_MASK) >> + _SPRITE_OFFSET_START_X_SHIFT; + plane->y_offset = (val & _SPRITE_OFFSET_START_Y_MASK) >> + _SPRITE_OFFSET_START_Y_SHIFT; + + return 0; +} diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.h b/drivers/gpu/drm/i915/gvt/fb_decoder.h new file mode 100644 index 000000000000..cb055f3c81a2 --- /dev/null +++ b/drivers/gpu/drm/i915/gvt/fb_decoder.h @@ -0,0 +1,169 @@ +/* + * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Kevin Tian <kevin.tian@intel.com> + * + * Contributors: + * Bing Niu <bing.niu@intel.com> + * Xu Han <xu.han@intel.com> + * Ping Gao <ping.a.gao@intel.com> + * Xiaoguang Chen <xiaoguang.chen@intel.com> + * Yang Liu <yang2.liu@intel.com> + * Tina Zhang <tina.zhang@intel.com> + * + */ + +#ifndef _GVT_FB_DECODER_H_ +#define _GVT_FB_DECODER_H_ + +#define _PLANE_CTL_FORMAT_SHIFT 24 +#define _PLANE_CTL_TILED_SHIFT 10 +#define _PIPE_V_SRCSZ_SHIFT 0 +#define _PIPE_V_SRCSZ_MASK (0xfff << _PIPE_V_SRCSZ_SHIFT) +#define _PIPE_H_SRCSZ_SHIFT 16 +#define _PIPE_H_SRCSZ_MASK (0x1fff << _PIPE_H_SRCSZ_SHIFT) + +#define _PRI_PLANE_FMT_SHIFT 26 +#define _PRI_PLANE_STRIDE_MASK (0x3ff << 6) +#define _PRI_PLANE_X_OFF_SHIFT 0 +#define _PRI_PLANE_X_OFF_MASK (0x1fff << _PRI_PLANE_X_OFF_SHIFT) +#define _PRI_PLANE_Y_OFF_SHIFT 16 +#define _PRI_PLANE_Y_OFF_MASK (0xfff << _PRI_PLANE_Y_OFF_SHIFT) + +#define _CURSOR_MODE 0x3f +#define _CURSOR_ALPHA_FORCE_SHIFT 8 +#define _CURSOR_ALPHA_FORCE_MASK (0x3 << _CURSOR_ALPHA_FORCE_SHIFT) +#define _CURSOR_ALPHA_PLANE_SHIFT 10 +#define _CURSOR_ALPHA_PLANE_MASK (0x3 << _CURSOR_ALPHA_PLANE_SHIFT) +#define _CURSOR_POS_X_SHIFT 0 +#define _CURSOR_POS_X_MASK (0x1fff << _CURSOR_POS_X_SHIFT) +#define _CURSOR_SIGN_X_SHIFT 15 +#define _CURSOR_SIGN_X_MASK (1 << _CURSOR_SIGN_X_SHIFT) +#define _CURSOR_POS_Y_SHIFT 16 +#define _CURSOR_POS_Y_MASK (0xfff << _CURSOR_POS_Y_SHIFT) +#define _CURSOR_SIGN_Y_SHIFT 31 +#define _CURSOR_SIGN_Y_MASK (1 << _CURSOR_SIGN_Y_SHIFT) + +#define _SPRITE_FMT_SHIFT 25 +#define _SPRITE_COLOR_ORDER_SHIFT 20 +#define _SPRITE_YUV_ORDER_SHIFT 16 +#define _SPRITE_STRIDE_SHIFT 6 +#define _SPRITE_STRIDE_MASK (0x1ff << _SPRITE_STRIDE_SHIFT) +#define _SPRITE_SIZE_WIDTH_SHIFT 0 +#define _SPRITE_SIZE_HEIGHT_SHIFT 16 +#define _SPRITE_SIZE_WIDTH_MASK (0x1fff << _SPRITE_SIZE_WIDTH_SHIFT) +#define _SPRITE_SIZE_HEIGHT_MASK (0xfff << _SPRITE_SIZE_HEIGHT_SHIFT) +#define _SPRITE_POS_X_SHIFT 0 +#define _SPRITE_POS_Y_SHIFT 16 +#define _SPRITE_POS_X_MASK (0x1fff << _SPRITE_POS_X_SHIFT) +#define _SPRITE_POS_Y_MASK (0xfff << _SPRITE_POS_Y_SHIFT) +#define _SPRITE_OFFSET_START_X_SHIFT 0 +#define _SPRITE_OFFSET_START_Y_SHIFT 16 +#define _SPRITE_OFFSET_START_X_MASK (0x1fff << _SPRITE_OFFSET_START_X_SHIFT) +#define _SPRITE_OFFSET_START_Y_MASK (0xfff << _SPRITE_OFFSET_START_Y_SHIFT) + +enum GVT_FB_EVENT { + FB_MODE_SET_START = 1, + FB_MODE_SET_END, + FB_DISPLAY_FLIP, +}; + +enum DDI_PORT { + DDI_PORT_NONE = 0, + DDI_PORT_B = 1, + DDI_PORT_C = 2, + DDI_PORT_D = 3, + DDI_PORT_E = 4 +}; + +struct intel_gvt; + +/* color space conversion and gamma correction are not included */ +struct intel_vgpu_primary_plane_format { + u8 enabled; /* plane is enabled */ + u8 tiled; /* X-tiled */ + u8 bpp; /* bits per pixel */ + u32 hw_format; /* format field in the PRI_CTL register */ + u32 drm_format; /* format in DRM definition */ + u32 base; /* framebuffer base in graphics memory */ + u64 base_gpa; + u32 x_offset; /* in pixels */ + u32 y_offset; /* in lines */ + u32 width; /* in pixels */ + u32 height; /* in lines */ + u32 stride; /* in bytes */ +}; + +struct intel_vgpu_sprite_plane_format { + u8 enabled; /* plane is enabled */ + u8 tiled; /* X-tiled */ + u8 bpp; /* bits per pixel */ + u32 hw_format; /* format field in the SPR_CTL register */ + u32 drm_format; /* format in DRM definition */ + u32 base; /* sprite base in graphics memory */ + u64 base_gpa; + u32 x_pos; /* in pixels */ + u32 y_pos; /* in lines */ + u32 x_offset; /* in pixels */ + u32 y_offset; /* in lines */ + u32 width; /* in pixels */ + u32 height; /* in lines */ + u32 stride; /* in bytes */ +}; + +struct intel_vgpu_cursor_plane_format { + u8 enabled; + u8 mode; /* cursor mode select */ + u8 bpp; /* bits per pixel */ + u32 drm_format; /* format in DRM definition */ + u32 base; /* cursor base in graphics memory */ + u64 base_gpa; + u32 x_pos; /* in pixels */ + u32 y_pos; /* in lines */ + u8 x_sign; /* X Position Sign */ + u8 y_sign; /* Y Position Sign */ + u32 width; /* in pixels */ + u32 height; /* in lines */ + u32 x_hot; /* in pixels */ + u32 y_hot; /* in pixels */ +}; + +struct intel_vgpu_pipe_format { + struct intel_vgpu_primary_plane_format primary; + struct intel_vgpu_sprite_plane_format sprite; + struct intel_vgpu_cursor_plane_format cursor; + enum DDI_PORT ddi_port; /* the DDI port that pipe is connected to */ +}; + +struct intel_vgpu_fb_format { + struct intel_vgpu_pipe_format pipes[I915_MAX_PIPES]; +}; + +int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, + struct intel_vgpu_primary_plane_format *plane); +int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu, + struct intel_vgpu_cursor_plane_format *plane); +int intel_vgpu_decode_sprite_plane(struct intel_vgpu *vgpu, + struct intel_vgpu_sprite_plane_format *plane); + +#endif diff --git a/drivers/gpu/drm/i915/gvt/firmware.c b/drivers/gpu/drm/i915/gvt/firmware.c index a26c1705430e..a73e1d418c22 100644 --- a/drivers/gpu/drm/i915/gvt/firmware.c +++ b/drivers/gpu/drm/i915/gvt/firmware.c @@ -66,20 +66,23 @@ static struct bin_attribute firmware_attr = { .mmap = NULL, }; -static int expose_firmware_sysfs(struct intel_gvt *gvt) +static int mmio_snapshot_handler(struct intel_gvt *gvt, u32 offset, void *data) { struct drm_i915_private *dev_priv = gvt->dev_priv; + + *(u32 *)(data + offset) = I915_READ_NOTRACE(_MMIO(offset)); + return 0; +} + +static int expose_firmware_sysfs(struct intel_gvt *gvt) +{ struct intel_gvt_device_info *info = &gvt->device_info; struct pci_dev *pdev = gvt->dev_priv->drm.pdev; - struct intel_gvt_mmio_info *e; - struct gvt_mmio_block *block = gvt->mmio.mmio_block; - int num = gvt->mmio.num_mmio_block; struct gvt_firmware_header *h; void *firmware; void *p; unsigned long size, crc32_start; - int i, j; - int ret; + int i, ret; size = sizeof(*h) + info->mmio_size + info->cfg_space_size; firmware = vzalloc(size); @@ -104,15 +107,8 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt) p = firmware + h->mmio_offset; - hash_for_each(gvt->mmio.mmio_info_table, i, e, node) - *(u32 *)(p + e->offset) = I915_READ_NOTRACE(_MMIO(e->offset)); - - for (i = 0; i < num; i++, block++) { - for (j = 0; j < block->size; j += 4) - *(u32 *)(p + INTEL_GVT_MMIO_OFFSET(block->offset) + j) = - I915_READ_NOTRACE(_MMIO(INTEL_GVT_MMIO_OFFSET( - block->offset) + j)); - } + /* Take a snapshot of hw mmio registers. */ + intel_gvt_for_each_tracked_mmio(gvt, mmio_snapshot_handler, p); memcpy(gvt->firmware.mmio, p, info->mmio_size); diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index e6dfc3331f4b..8d5317d0122d 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -94,12 +94,12 @@ int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index, u64 h_addr; int ret; - ret = intel_gvt_ggtt_gmadr_g2h(vgpu, g_index << GTT_PAGE_SHIFT, + ret = intel_gvt_ggtt_gmadr_g2h(vgpu, g_index << I915_GTT_PAGE_SHIFT, &h_addr); if (ret) return ret; - *h_index = h_addr >> GTT_PAGE_SHIFT; + *h_index = h_addr >> I915_GTT_PAGE_SHIFT; return 0; } @@ -109,12 +109,12 @@ int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index, u64 g_addr; int ret; - ret = intel_gvt_ggtt_gmadr_h2g(vgpu, h_index << GTT_PAGE_SHIFT, + ret = intel_gvt_ggtt_gmadr_h2g(vgpu, h_index << I915_GTT_PAGE_SHIFT, &g_addr); if (ret) return ret; - *g_index = g_addr >> GTT_PAGE_SHIFT; + *g_index = g_addr >> I915_GTT_PAGE_SHIFT; return 0; } @@ -156,13 +156,15 @@ int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index, struct gtt_type_table_entry { int entry_type; + int pt_type; int next_pt_type; int pse_entry_type; }; -#define GTT_TYPE_TABLE_ENTRY(type, e_type, npt_type, pse_type) \ +#define GTT_TYPE_TABLE_ENTRY(type, e_type, cpt_type, npt_type, pse_type) \ [type] = { \ .entry_type = e_type, \ + .pt_type = cpt_type, \ .next_pt_type = npt_type, \ .pse_entry_type = pse_type, \ } @@ -170,55 +172,68 @@ struct gtt_type_table_entry { static struct gtt_type_table_entry gtt_type_table[] = { GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY, GTT_TYPE_PPGTT_ROOT_L4_ENTRY, + GTT_TYPE_INVALID, GTT_TYPE_PPGTT_PML4_PT, GTT_TYPE_INVALID), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT, GTT_TYPE_PPGTT_PML4_ENTRY, + GTT_TYPE_PPGTT_PML4_PT, GTT_TYPE_PPGTT_PDP_PT, GTT_TYPE_INVALID), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY, GTT_TYPE_PPGTT_PML4_ENTRY, + GTT_TYPE_PPGTT_PML4_PT, GTT_TYPE_PPGTT_PDP_PT, GTT_TYPE_INVALID), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT, GTT_TYPE_PPGTT_PDP_ENTRY, + GTT_TYPE_PPGTT_PDP_PT, GTT_TYPE_PPGTT_PDE_PT, GTT_TYPE_PPGTT_PTE_1G_ENTRY), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY, GTT_TYPE_PPGTT_ROOT_L3_ENTRY, + GTT_TYPE_INVALID, GTT_TYPE_PPGTT_PDE_PT, GTT_TYPE_PPGTT_PTE_1G_ENTRY), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY, GTT_TYPE_PPGTT_PDP_ENTRY, + GTT_TYPE_PPGTT_PDP_PT, GTT_TYPE_PPGTT_PDE_PT, GTT_TYPE_PPGTT_PTE_1G_ENTRY), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT, GTT_TYPE_PPGTT_PDE_ENTRY, + GTT_TYPE_PPGTT_PDE_PT, GTT_TYPE_PPGTT_PTE_PT, GTT_TYPE_PPGTT_PTE_2M_ENTRY), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY, GTT_TYPE_PPGTT_PDE_ENTRY, + GTT_TYPE_PPGTT_PDE_PT, GTT_TYPE_PPGTT_PTE_PT, GTT_TYPE_PPGTT_PTE_2M_ENTRY), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT, GTT_TYPE_PPGTT_PTE_4K_ENTRY, + GTT_TYPE_PPGTT_PTE_PT, GTT_TYPE_INVALID, GTT_TYPE_INVALID), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY, GTT_TYPE_PPGTT_PTE_4K_ENTRY, + GTT_TYPE_PPGTT_PTE_PT, GTT_TYPE_INVALID, GTT_TYPE_INVALID), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY, GTT_TYPE_PPGTT_PDE_ENTRY, + GTT_TYPE_PPGTT_PDE_PT, GTT_TYPE_INVALID, GTT_TYPE_PPGTT_PTE_2M_ENTRY), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY, GTT_TYPE_PPGTT_PDP_ENTRY, + GTT_TYPE_PPGTT_PDP_PT, GTT_TYPE_INVALID, GTT_TYPE_PPGTT_PTE_1G_ENTRY), GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE, GTT_TYPE_GGTT_PTE, GTT_TYPE_INVALID, + GTT_TYPE_INVALID, GTT_TYPE_INVALID), }; @@ -227,6 +242,11 @@ static inline int get_next_pt_type(int type) return gtt_type_table[type].next_pt_type; } +static inline int get_pt_type(int type) +{ + return gtt_type_table[type].pt_type; +} + static inline int get_entry_type(int type) { return gtt_type_table[type].entry_type; @@ -311,9 +331,9 @@ static inline int gtt_set_entry64(void *pt, #define GTT_HAW 46 -#define ADDR_1G_MASK (((1UL << (GTT_HAW - 30 + 1)) - 1) << 30) -#define ADDR_2M_MASK (((1UL << (GTT_HAW - 21 + 1)) - 1) << 21) -#define ADDR_4K_MASK (((1UL << (GTT_HAW - 12 + 1)) - 1) << 12) +#define ADDR_1G_MASK (((1UL << (GTT_HAW - 30)) - 1) << 30) +#define ADDR_2M_MASK (((1UL << (GTT_HAW - 21)) - 1) << 21) +#define ADDR_4K_MASK (((1UL << (GTT_HAW - 12)) - 1) << 12) static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e) { @@ -351,7 +371,7 @@ static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e) return false; e->type = get_entry_type(e->type); - if (!(e->val64 & (1 << 7))) + if (!(e->val64 & BIT(7))) return false; e->type = get_pse_type(e->type); @@ -369,12 +389,17 @@ static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e) || e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) return (e->val64 != 0); else - return (e->val64 & (1 << 0)); + return (e->val64 & BIT(0)); } static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e) { - e->val64 &= ~(1 << 0); + e->val64 &= ~BIT(0); +} + +static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e) +{ + e->val64 |= BIT(0); } /* @@ -382,7 +407,7 @@ static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e) */ static unsigned long gma_to_ggtt_pte_index(unsigned long gma) { - unsigned long x = (gma >> GTT_PAGE_SHIFT); + unsigned long x = (gma >> I915_GTT_PAGE_SHIFT); trace_gma_index(__func__, gma, x); return x; @@ -406,6 +431,7 @@ static struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = { .get_entry = gtt_get_entry64, .set_entry = gtt_set_entry64, .clear_present = gtt_entry_clear_present, + .set_present = gtt_entry_set_present, .test_present = gen8_gtt_test_present, .test_pse = gen8_gtt_test_pse, .get_pfn = gen8_gtt_get_pfn, @@ -494,7 +520,7 @@ static inline int ppgtt_spt_get_entry( return -EINVAL; ret = ops->get_entry(page_table, e, index, guest, - spt->guest_page.gfn << GTT_PAGE_SHIFT, + spt->guest_page.track.gfn << I915_GTT_PAGE_SHIFT, spt->vgpu); if (ret) return ret; @@ -516,7 +542,7 @@ static inline int ppgtt_spt_set_entry( return -EINVAL; return ops->set_entry(page_table, e, index, guest, - spt->guest_page.gfn << GTT_PAGE_SHIFT, + spt->guest_page.track.gfn << I915_GTT_PAGE_SHIFT, spt->vgpu); } @@ -537,88 +563,103 @@ static inline int ppgtt_spt_set_entry( spt->shadow_page.type, e, index, false) /** - * intel_vgpu_init_guest_page - init a guest page data structure + * intel_vgpu_init_page_track - init a page track data structure * @vgpu: a vGPU - * @p: a guest page data structure + * @t: a page track data structure * @gfn: guest memory page frame number - * @handler: function will be called when target guest memory page has + * @handler: the function will be called when target guest memory page has * been modified. * - * This function is called when user wants to track a guest memory page. + * This function is called when a user wants to prepare a page track data + * structure to track a guest memory page. * * Returns: * Zero on success, negative error code if failed. */ -int intel_vgpu_init_guest_page(struct intel_vgpu *vgpu, - struct intel_vgpu_guest_page *p, +int intel_vgpu_init_page_track(struct intel_vgpu *vgpu, + struct intel_vgpu_page_track *t, unsigned long gfn, int (*handler)(void *, u64, void *, int), void *data) { - INIT_HLIST_NODE(&p->node); + INIT_HLIST_NODE(&t->node); - p->writeprotection = false; - p->gfn = gfn; - p->handler = handler; - p->data = data; - p->oos_page = NULL; - p->write_cnt = 0; + t->tracked = false; + t->gfn = gfn; + t->handler = handler; + t->data = data; - hash_add(vgpu->gtt.guest_page_hash_table, &p->node, p->gfn); + hash_add(vgpu->gtt.tracked_guest_page_hash_table, &t->node, t->gfn); return 0; } -static int detach_oos_page(struct intel_vgpu *vgpu, - struct intel_vgpu_oos_page *oos_page); - /** - * intel_vgpu_clean_guest_page - release the resource owned by guest page data - * structure + * intel_vgpu_clean_page_track - release a page track data structure * @vgpu: a vGPU - * @p: a tracked guest page + * @t: a page track data structure * - * This function is called when user tries to stop tracking a guest memory - * page. + * This function is called before a user frees a page track data structure. */ -void intel_vgpu_clean_guest_page(struct intel_vgpu *vgpu, - struct intel_vgpu_guest_page *p) +void intel_vgpu_clean_page_track(struct intel_vgpu *vgpu, + struct intel_vgpu_page_track *t) { - if (!hlist_unhashed(&p->node)) - hash_del(&p->node); - - if (p->oos_page) - detach_oos_page(vgpu, p->oos_page); + if (!hlist_unhashed(&t->node)) + hash_del(&t->node); - if (p->writeprotection) - intel_gvt_hypervisor_unset_wp_page(vgpu, p); + if (t->tracked) + intel_gvt_hypervisor_disable_page_track(vgpu, t); } /** - * intel_vgpu_find_guest_page - find a guest page data structure by GFN. + * intel_vgpu_find_tracked_page - find a tracked guest page * @vgpu: a vGPU * @gfn: guest memory page frame number * - * This function is called when emulation logic wants to know if a trapped GFN - * is a tracked guest page. + * This function is called when the emulation layer wants to figure out if a + * trapped GFN is a tracked guest page. * * Returns: - * Pointer to guest page data structure, NULL if failed. + * Pointer to page track data structure, NULL if not found. */ -struct intel_vgpu_guest_page *intel_vgpu_find_guest_page( +struct intel_vgpu_page_track *intel_vgpu_find_tracked_page( struct intel_vgpu *vgpu, unsigned long gfn) { - struct intel_vgpu_guest_page *p; + struct intel_vgpu_page_track *t; - hash_for_each_possible(vgpu->gtt.guest_page_hash_table, - p, node, gfn) { - if (p->gfn == gfn) - return p; + hash_for_each_possible(vgpu->gtt.tracked_guest_page_hash_table, + t, node, gfn) { + if (t->gfn == gfn) + return t; } return NULL; } +static int init_guest_page(struct intel_vgpu *vgpu, + struct intel_vgpu_guest_page *p, + unsigned long gfn, + int (*handler)(void *, u64, void *, int), + void *data) +{ + p->oos_page = NULL; + p->write_cnt = 0; + + return intel_vgpu_init_page_track(vgpu, &p->track, gfn, handler, data); +} + +static int detach_oos_page(struct intel_vgpu *vgpu, + struct intel_vgpu_oos_page *oos_page); + +static void clean_guest_page(struct intel_vgpu *vgpu, + struct intel_vgpu_guest_page *p) +{ + if (p->oos_page) + detach_oos_page(vgpu, p->oos_page); + + intel_vgpu_clean_page_track(vgpu, &p->track); +} + static inline int init_shadow_page(struct intel_vgpu *vgpu, - struct intel_vgpu_shadow_page *p, int type) + struct intel_vgpu_shadow_page *p, int type, bool hash) { struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev; dma_addr_t daddr; @@ -634,8 +675,9 @@ static inline int init_shadow_page(struct intel_vgpu *vgpu, INIT_HLIST_NODE(&p->node); - p->mfn = daddr >> GTT_PAGE_SHIFT; - hash_add(vgpu->gtt.shadow_page_hash_table, &p->node, p->mfn); + p->mfn = daddr >> I915_GTT_PAGE_SHIFT; + if (hash) + hash_add(vgpu->gtt.shadow_page_hash_table, &p->node, p->mfn); return 0; } @@ -644,7 +686,7 @@ static inline void clean_shadow_page(struct intel_vgpu *vgpu, { struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev; - dma_unmap_page(kdev, p->mfn << GTT_PAGE_SHIFT, 4096, + dma_unmap_page(kdev, p->mfn << I915_GTT_PAGE_SHIFT, 4096, PCI_DMA_BIDIRECTIONAL); if (!hlist_unhashed(&p->node)) @@ -664,6 +706,9 @@ static inline struct intel_vgpu_shadow_page *find_shadow_page( return NULL; } +#define page_track_to_guest_page(ptr) \ + container_of(ptr, struct intel_vgpu_guest_page, track) + #define guest_page_to_ppgtt_spt(ptr) \ container_of(ptr, struct intel_vgpu_ppgtt_spt, guest_page) @@ -697,7 +742,7 @@ static void ppgtt_free_shadow_page(struct intel_vgpu_ppgtt_spt *spt) trace_spt_free(spt->vgpu->id, spt, spt->shadow_page.type); clean_shadow_page(spt->vgpu, &spt->shadow_page); - intel_vgpu_clean_guest_page(spt->vgpu, &spt->guest_page); + clean_guest_page(spt->vgpu, &spt->guest_page); list_del_init(&spt->post_shadow_list); free_spt(spt); @@ -713,22 +758,24 @@ static void ppgtt_free_all_shadow_page(struct intel_vgpu *vgpu) ppgtt_free_shadow_page(shadow_page_to_ppgtt_spt(sp)); } -static int ppgtt_handle_guest_write_page_table_bytes(void *gp, +static int ppgtt_handle_guest_write_page_table_bytes( + struct intel_vgpu_guest_page *gpt, u64 pa, void *p_data, int bytes); -static int ppgtt_write_protection_handler(void *gp, u64 pa, +static int ppgtt_write_protection_handler(void *data, u64 pa, void *p_data, int bytes) { - struct intel_vgpu_guest_page *gpt = (struct intel_vgpu_guest_page *)gp; + struct intel_vgpu_page_track *t = data; + struct intel_vgpu_guest_page *p = page_track_to_guest_page(t); int ret; if (bytes != 4 && bytes != 8) return -EINVAL; - if (!gpt->writeprotection) + if (!t->tracked) return -EINVAL; - ret = ppgtt_handle_guest_write_page_table_bytes(gp, + ret = ppgtt_handle_guest_write_page_table_bytes(p, pa, p_data, bytes); if (ret) return ret; @@ -762,13 +809,13 @@ retry: * TODO: guest page type may be different with shadow page type, * when we support PSE page in future. */ - ret = init_shadow_page(vgpu, &spt->shadow_page, type); + ret = init_shadow_page(vgpu, &spt->shadow_page, type, true); if (ret) { gvt_vgpu_err("fail to initialize shadow page for spt\n"); goto err; } - ret = intel_vgpu_init_guest_page(vgpu, &spt->guest_page, + ret = init_guest_page(vgpu, &spt->guest_page, gfn, ppgtt_write_protection_handler, NULL); if (ret) { gvt_vgpu_err("fail to initialize guest page for spt\n"); @@ -798,7 +845,7 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_find_shadow_page( ((spt)->vgpu->gvt->device_info.gtt_entry_size_shift) #define pt_entries(spt) \ - (GTT_PAGE_SIZE >> pt_entry_size_shift(spt)) + (I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt)) #define for_each_present_guest_entry(spt, e, i) \ for (i = 0; i < pt_entries(spt); i++) \ @@ -856,7 +903,7 @@ static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) int v = atomic_read(&spt->refcount); trace_spt_change(spt->vgpu->id, "die", spt, - spt->guest_page.gfn, spt->shadow_page.type); + spt->guest_page.track.gfn, spt->shadow_page.type); trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1)); @@ -878,7 +925,7 @@ static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) } release: trace_spt_change(spt->vgpu->id, "release", spt, - spt->guest_page.gfn, spt->shadow_page.type); + spt->guest_page.track.gfn, spt->shadow_page.type); ppgtt_free_shadow_page(spt); return 0; fail: @@ -895,6 +942,7 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_shadow_page_by_guest_entry( struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; struct intel_vgpu_ppgtt_spt *s = NULL; struct intel_vgpu_guest_page *g; + struct intel_vgpu_page_track *t; int ret; if (WARN_ON(!gtt_type_is_pt(get_next_pt_type(we->type)))) { @@ -902,8 +950,9 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_shadow_page_by_guest_entry( goto fail; } - g = intel_vgpu_find_guest_page(vgpu, ops->get_pfn(we)); - if (g) { + t = intel_vgpu_find_tracked_page(vgpu, ops->get_pfn(we)); + if (t) { + g = page_track_to_guest_page(t); s = guest_page_to_ppgtt_spt(g); ppgtt_get_shadow_page(s); } else { @@ -915,7 +964,8 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_shadow_page_by_guest_entry( goto fail; } - ret = intel_gvt_hypervisor_set_wp_page(vgpu, &s->guest_page); + ret = intel_gvt_hypervisor_enable_page_track(vgpu, + &s->guest_page.track); if (ret) goto fail; @@ -923,7 +973,7 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_shadow_page_by_guest_entry( if (ret) goto fail; - trace_spt_change(vgpu->id, "new", s, s->guest_page.gfn, + trace_spt_change(vgpu->id, "new", s, s->guest_page.track.gfn, s->shadow_page.type); } return s; @@ -947,19 +997,22 @@ static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se, static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) { struct intel_vgpu *vgpu = spt->vgpu; + struct intel_gvt *gvt = vgpu->gvt; + struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; struct intel_vgpu_ppgtt_spt *s; struct intel_gvt_gtt_entry se, ge; - unsigned long i; + unsigned long gfn, i; int ret; trace_spt_change(spt->vgpu->id, "born", spt, - spt->guest_page.gfn, spt->shadow_page.type); + spt->guest_page.track.gfn, spt->shadow_page.type); if (gtt_type_is_pte_pt(spt->shadow_page.type)) { for_each_present_guest_entry(spt, &ge, i) { - ret = gtt_entry_p2m(vgpu, &ge, &se); - if (ret) - goto fail; + gfn = ops->get_pfn(&ge); + if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn) || + gtt_entry_p2m(vgpu, &ge, &se)) + ops->set_pfn(&se, gvt->gtt.scratch_mfn); ppgtt_set_shadow_entry(spt, &se, i); } return 0; @@ -1078,11 +1131,11 @@ static int sync_oos_page(struct intel_vgpu *vgpu, old.type = new.type = get_entry_type(spt->guest_page_type); old.val64 = new.val64 = 0; - for (index = 0; index < (GTT_PAGE_SIZE >> info->gtt_entry_size_shift); - index++) { + for (index = 0; index < (I915_GTT_PAGE_SIZE >> + info->gtt_entry_size_shift); index++) { ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu); ops->get_entry(NULL, &new, index, true, - oos_page->guest_page->gfn << PAGE_SHIFT, vgpu); + oos_page->guest_page->track.gfn << PAGE_SHIFT, vgpu); if (old.val64 == new.val64 && !test_and_clear_bit(index, spt->post_shadow_bitmap)) @@ -1132,8 +1185,9 @@ static int attach_oos_page(struct intel_vgpu *vgpu, struct intel_gvt *gvt = vgpu->gvt; int ret; - ret = intel_gvt_hypervisor_read_gpa(vgpu, gpt->gfn << GTT_PAGE_SHIFT, - oos_page->mem, GTT_PAGE_SIZE); + ret = intel_gvt_hypervisor_read_gpa(vgpu, + gpt->track.gfn << I915_GTT_PAGE_SHIFT, + oos_page->mem, I915_GTT_PAGE_SIZE); if (ret) return ret; @@ -1152,7 +1206,7 @@ static int ppgtt_set_guest_page_sync(struct intel_vgpu *vgpu, { int ret; - ret = intel_gvt_hypervisor_set_wp_page(vgpu, gpt); + ret = intel_gvt_hypervisor_enable_page_track(vgpu, &gpt->track); if (ret) return ret; @@ -1200,7 +1254,7 @@ static int ppgtt_set_guest_page_oos(struct intel_vgpu *vgpu, gpt, guest_page_to_ppgtt_spt(gpt)->guest_page_type); list_add_tail(&oos_page->vm_list, &vgpu->gtt.oos_page_list_head); - return intel_gvt_hypervisor_unset_wp_page(vgpu, gpt); + return intel_gvt_hypervisor_disable_page_track(vgpu, &gpt->track); } /** @@ -1335,10 +1389,10 @@ int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu) return 0; } -static int ppgtt_handle_guest_write_page_table_bytes(void *gp, +static int ppgtt_handle_guest_write_page_table_bytes( + struct intel_vgpu_guest_page *gpt, u64 pa, void *p_data, int bytes) { - struct intel_vgpu_guest_page *gpt = (struct intel_vgpu_guest_page *)gp; struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt); struct intel_vgpu *vgpu = spt->vgpu; struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; @@ -1359,12 +1413,15 @@ static int ppgtt_handle_guest_write_page_table_bytes(void *gp, return ret; } else { if (!test_bit(index, spt->post_shadow_bitmap)) { + int type = spt->shadow_page.type; + ppgtt_get_shadow_entry(spt, &se, index); ret = ppgtt_handle_guest_entry_removal(gpt, &se, index); if (ret) return ret; + ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn); + ppgtt_set_shadow_entry(spt, &se, index); } - ppgtt_set_post_shadow(spt, index); } @@ -1415,7 +1472,7 @@ static int gen8_mm_alloc_page_table(struct intel_vgpu_mm *mm) mm->shadow_page_table = mem + mm->page_table_entry_size; } else if (mm->type == INTEL_GVT_MM_GGTT) { mm->page_table_entry_cnt = - (gvt_ggtt_gm_sz(gvt) >> GTT_PAGE_SHIFT); + (gvt_ggtt_gm_sz(gvt) >> I915_GTT_PAGE_SHIFT); mm->page_table_entry_size = mm->page_table_entry_cnt * info->gtt_entry_size; mem = vzalloc(mm->page_table_entry_size); @@ -1647,14 +1704,13 @@ int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm) if (WARN_ON(mm->type != INTEL_GVT_MM_PPGTT)) return 0; - atomic_inc(&mm->pincount); - if (!mm->shadowed) { ret = shadow_mm(mm); if (ret) return ret; } + atomic_inc(&mm->pincount); list_del_init(&mm->lru_list); list_add_tail(&mm->lru_list, &mm->vgpu->gvt->gtt.mm_lru_list_head); return 0; @@ -1738,8 +1794,8 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma) gma_ops->gma_to_ggtt_pte_index(gma)); if (ret) goto err; - gpa = (pte_ops->get_pfn(&e) << GTT_PAGE_SHIFT) - + (gma & ~GTT_PAGE_MASK); + gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) + + (gma & ~I915_GTT_PAGE_MASK); trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa); return gpa; @@ -1791,8 +1847,8 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma) } } - gpa = (pte_ops->get_pfn(&e) << GTT_PAGE_SHIFT) - + (gma & ~GTT_PAGE_MASK); + gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) + + (gma & ~I915_GTT_PAGE_MASK); trace_gma_translate(vgpu->id, "ppgtt", 0, mm->page_table_level, gma, gpa); @@ -1853,14 +1909,14 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm; struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; unsigned long g_gtt_index = off >> info->gtt_entry_size_shift; - unsigned long gma; + unsigned long gma, gfn; struct intel_gvt_gtt_entry e, m; int ret; if (bytes != 4 && bytes != 8) return -EINVAL; - gma = g_gtt_index << GTT_PAGE_SHIFT; + gma = g_gtt_index << I915_GTT_PAGE_SHIFT; /* the VM may configure the whole GM space when ballooning is used */ if (!vgpu_gmadr_is_valid(vgpu, gma)) @@ -1872,6 +1928,16 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, bytes); if (ops->test_present(&e)) { + gfn = ops->get_pfn(&e); + + /* one PTE update may be issued in multiple writes and the + * first write may not construct a valid gfn + */ + if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn)) { + ops->set_pfn(&m, gvt->gtt.scratch_mfn); + goto out; + } + ret = gtt_entry_p2m(vgpu, &e, &m); if (ret) { gvt_vgpu_err("fail to translate guest gtt entry\n"); @@ -1879,13 +1945,14 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, * update the entry in this situation p2m will fail * settting the shadow entry to point to a scratch page */ - ops->set_pfn(&m, gvt->gtt.scratch_ggtt_mfn); + ops->set_pfn(&m, gvt->gtt.scratch_mfn); } } else { m = e; - ops->set_pfn(&m, gvt->gtt.scratch_ggtt_mfn); + ops->set_pfn(&m, gvt->gtt.scratch_mfn); } +out: ggtt_set_shadow_entry(ggtt_mm, &m, g_gtt_index); gtt_invalidate(gvt->dev_priv); ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index); @@ -1918,12 +1985,45 @@ int intel_vgpu_emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, return ret; } +int intel_vgpu_write_protect_handler(struct intel_vgpu *vgpu, u64 pa, + void *p_data, unsigned int bytes) +{ + struct intel_gvt *gvt = vgpu->gvt; + int ret = 0; + + if (atomic_read(&vgpu->gtt.n_tracked_guest_page)) { + struct intel_vgpu_page_track *t; + + mutex_lock(&gvt->lock); + + t = intel_vgpu_find_tracked_page(vgpu, pa >> PAGE_SHIFT); + if (t) { + if (unlikely(vgpu->failsafe)) { + /* remove write protection to prevent furture traps */ + intel_vgpu_clean_page_track(vgpu, t); + } else { + ret = t->handler(t, pa, p_data, bytes); + if (ret) { + gvt_err("guest page write error %d, " + "gfn 0x%lx, pa 0x%llx, " + "var 0x%x, len %d\n", + ret, t->gfn, pa, + *(u32 *)p_data, bytes); + } + } + } + mutex_unlock(&gvt->lock); + } + return ret; +} + + static int alloc_scratch_pages(struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type) { struct intel_vgpu_gtt *gtt = &vgpu->gtt; struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; - int page_entry_num = GTT_PAGE_SIZE >> + int page_entry_num = I915_GTT_PAGE_SIZE >> vgpu->gvt->device_info.gtt_entry_size_shift; void *scratch_pt; int i; @@ -1947,7 +2047,7 @@ static int alloc_scratch_pages(struct intel_vgpu *vgpu, return -ENOMEM; } gtt->scratch_pt[type].page_mfn = - (unsigned long)(daddr >> GTT_PAGE_SHIFT); + (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT); gtt->scratch_pt[type].page = virt_to_page(scratch_pt); gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n", vgpu->id, type, gtt->scratch_pt[type].page_mfn); @@ -1972,7 +2072,7 @@ static int alloc_scratch_pages(struct intel_vgpu *vgpu, */ se.val64 |= _PAGE_PRESENT | _PAGE_RW; if (type == GTT_TYPE_PPGTT_PDE_PT) - se.val64 |= PPAT_CACHED_INDEX; + se.val64 |= PPAT_CACHED; for (i = 0; i < page_entry_num; i++) ops->set_entry(scratch_pt, &se, i, false, 0, vgpu); @@ -1990,7 +2090,7 @@ static int release_scratch_page_tree(struct intel_vgpu *vgpu) for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { if (vgpu->gtt.scratch_pt[i].page != NULL) { daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn << - GTT_PAGE_SHIFT); + I915_GTT_PAGE_SHIFT); dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL); __free_page(vgpu->gtt.scratch_pt[i].page); vgpu->gtt.scratch_pt[i].page = NULL; @@ -2033,7 +2133,7 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) struct intel_vgpu_gtt *gtt = &vgpu->gtt; struct intel_vgpu_mm *ggtt_mm; - hash_init(gtt->guest_page_hash_table); + hash_init(gtt->tracked_guest_page_hash_table); hash_init(gtt->shadow_page_hash_table); INIT_LIST_HEAD(>t->mm_list_head); @@ -2194,7 +2294,7 @@ struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu, int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu, int page_table_level) { - u64 *pdp = (u64 *)&vgpu_vreg64(vgpu, vgtif_reg(pdp[0])); + u64 *pdp = (u64 *)&vgpu_vreg64_t(vgpu, vgtif_reg(pdp[0])); struct intel_vgpu_mm *mm; if (WARN_ON((page_table_level != 4) && (page_table_level != 3))) @@ -2229,7 +2329,7 @@ int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu, int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu, int page_table_level) { - u64 *pdp = (u64 *)&vgpu_vreg64(vgpu, vgtif_reg(pdp[0])); + u64 *pdp = (u64 *)&vgpu_vreg64_t(vgpu, vgtif_reg(pdp[0])); struct intel_vgpu_mm *mm; if (WARN_ON((page_table_level != 4) && (page_table_level != 3))) @@ -2286,15 +2386,16 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt) __free_page(virt_to_page(page)); return -ENOMEM; } - gvt->gtt.scratch_ggtt_page = virt_to_page(page); - gvt->gtt.scratch_ggtt_mfn = (unsigned long)(daddr >> GTT_PAGE_SHIFT); + + gvt->gtt.scratch_page = virt_to_page(page); + gvt->gtt.scratch_mfn = (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT); if (enable_out_of_sync) { ret = setup_spt_oos(gvt); if (ret) { gvt_err("fail to initialize SPT oos\n"); dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL); - __free_page(gvt->gtt.scratch_ggtt_page); + __free_page(gvt->gtt.scratch_page); return ret; } } @@ -2313,12 +2414,12 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt) void intel_gvt_clean_gtt(struct intel_gvt *gvt) { struct device *dev = &gvt->dev_priv->drm.pdev->dev; - dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_ggtt_mfn << - GTT_PAGE_SHIFT); + dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_mfn << + I915_GTT_PAGE_SHIFT); dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL); - __free_page(gvt->gtt.scratch_ggtt_page); + __free_page(gvt->gtt.scratch_page); if (enable_out_of_sync) clean_spt_oos(gvt); @@ -2344,7 +2445,7 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu) memset(&e, 0, sizeof(struct intel_gvt_gtt_entry)); e.type = GTT_TYPE_GGTT_PTE; - ops->set_pfn(&e, gvt->gtt.scratch_ggtt_mfn); + ops->set_pfn(&e, gvt->gtt.scratch_mfn); e.val64 |= _PAGE_PRESENT; index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT; @@ -2370,8 +2471,6 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu) */ void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu) { - int i; - ppgtt_free_all_shadow_page(vgpu); /* Shadow pages are only created when there is no page @@ -2381,11 +2480,4 @@ void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu) intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_PPGTT); intel_vgpu_reset_ggtt(vgpu); - - /* clear scratch page for security */ - for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { - if (vgpu->gtt.scratch_pt[i].page != NULL) - memset(page_address(vgpu->gtt.scratch_pt[i].page), - 0, PAGE_SIZE); - } } diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index 30a4c8d16026..4cc13b5934f1 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -34,9 +34,8 @@ #ifndef _GVT_GTT_H_ #define _GVT_GTT_H_ -#define GTT_PAGE_SHIFT 12 -#define GTT_PAGE_SIZE (1UL << GTT_PAGE_SHIFT) -#define GTT_PAGE_MASK (~(GTT_PAGE_SIZE-1)) +#define I915_GTT_PAGE_SHIFT 12 +#define I915_GTT_PAGE_MASK (~(I915_GTT_PAGE_SIZE - 1)) struct intel_vgpu_mm; @@ -63,6 +62,7 @@ struct intel_gvt_gtt_pte_ops { struct intel_vgpu *vgpu); bool (*test_present)(struct intel_gvt_gtt_entry *e); void (*clear_present)(struct intel_gvt_gtt_entry *e); + void (*set_present)(struct intel_gvt_gtt_entry *e); bool (*test_pse)(struct intel_gvt_gtt_entry *e); void (*set_pfn)(struct intel_gvt_gtt_entry *e, unsigned long pfn); unsigned long (*get_pfn)(struct intel_gvt_gtt_entry *e); @@ -86,8 +86,8 @@ struct intel_gvt_gtt { struct list_head oos_page_free_list_head; struct list_head mm_lru_list_head; - struct page *scratch_ggtt_page; - unsigned long scratch_ggtt_mfn; + struct page *scratch_page; + unsigned long scratch_mfn; }; enum { @@ -193,18 +193,16 @@ struct intel_vgpu_scratch_pt { unsigned long page_mfn; }; - struct intel_vgpu_gtt { struct intel_vgpu_mm *ggtt_mm; unsigned long active_ppgtt_mm_bitmap; struct list_head mm_list_head; DECLARE_HASHTABLE(shadow_page_hash_table, INTEL_GVT_GTT_HASH_BITS); - DECLARE_HASHTABLE(guest_page_hash_table, INTEL_GVT_GTT_HASH_BITS); - atomic_t n_write_protected_guest_page; + DECLARE_HASHTABLE(tracked_guest_page_hash_table, INTEL_GVT_GTT_HASH_BITS); + atomic_t n_tracked_guest_page; struct list_head oos_page_list_head; struct list_head post_shadow_list_head; struct intel_vgpu_scratch_pt scratch_pt[GTT_TYPE_MAX]; - }; extern int intel_vgpu_init_gtt(struct intel_vgpu *vgpu); @@ -228,12 +226,16 @@ struct intel_vgpu_shadow_page { unsigned long mfn; }; -struct intel_vgpu_guest_page { +struct intel_vgpu_page_track { struct hlist_node node; - bool writeprotection; + bool tracked; unsigned long gfn; int (*handler)(void *, u64, void *, int); void *data; +}; + +struct intel_vgpu_guest_page { + struct intel_vgpu_page_track track; unsigned long write_cnt; struct intel_vgpu_oos_page *oos_page; }; @@ -243,7 +245,7 @@ struct intel_vgpu_oos_page { struct list_head list; struct list_head vm_list; int id; - unsigned char mem[GTT_PAGE_SIZE]; + unsigned char mem[I915_GTT_PAGE_SIZE]; }; #define GTT_ENTRY_NUM_IN_ONE_PAGE 512 @@ -258,22 +260,16 @@ struct intel_vgpu_ppgtt_spt { struct list_head post_shadow_list; }; -int intel_vgpu_init_guest_page(struct intel_vgpu *vgpu, - struct intel_vgpu_guest_page *guest_page, +int intel_vgpu_init_page_track(struct intel_vgpu *vgpu, + struct intel_vgpu_page_track *t, unsigned long gfn, int (*handler)(void *gp, u64, void *, int), void *data); -void intel_vgpu_clean_guest_page(struct intel_vgpu *vgpu, - struct intel_vgpu_guest_page *guest_page); - -int intel_vgpu_set_guest_page_writeprotection(struct intel_vgpu *vgpu, - struct intel_vgpu_guest_page *guest_page); +void intel_vgpu_clean_page_track(struct intel_vgpu *vgpu, + struct intel_vgpu_page_track *t); -void intel_vgpu_clear_guest_page_writeprotection(struct intel_vgpu *vgpu, - struct intel_vgpu_guest_page *guest_page); - -struct intel_vgpu_guest_page *intel_vgpu_find_guest_page( +struct intel_vgpu_page_track *intel_vgpu_find_tracked_page( struct intel_vgpu *vgpu, unsigned long gfn); int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu); @@ -312,4 +308,7 @@ int intel_vgpu_emulate_gtt_mmio_read(struct intel_vgpu *vgpu, int intel_vgpu_emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, void *p_data, unsigned int bytes); +int intel_vgpu_write_protect_handler(struct intel_vgpu *vgpu, u64 pa, + void *p_data, unsigned int bytes); + #endif /* _GVT_GTT_H_ */ diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index c27c6838eaca..fac54f32d33f 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -36,6 +36,8 @@ #include "i915_drv.h" #include "gvt.h" +#include <linux/vfio.h> +#include <linux/mdev.h> struct intel_gvt_host intel_gvt_host; @@ -44,6 +46,129 @@ static const char * const supported_hypervisors[] = { [INTEL_GVT_HYPERVISOR_KVM] = "KVM", }; +static struct intel_vgpu_type *intel_gvt_find_vgpu_type(struct intel_gvt *gvt, + const char *name) +{ + int i; + struct intel_vgpu_type *t; + const char *driver_name = dev_driver_string( + &gvt->dev_priv->drm.pdev->dev); + + for (i = 0; i < gvt->num_types; i++) { + t = &gvt->types[i]; + if (!strncmp(t->name, name + strlen(driver_name) + 1, + sizeof(t->name))) + return t; + } + + return NULL; +} + +static ssize_t available_instances_show(struct kobject *kobj, + struct device *dev, char *buf) +{ + struct intel_vgpu_type *type; + unsigned int num = 0; + void *gvt = kdev_to_i915(dev)->gvt; + + type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj)); + if (!type) + num = 0; + else + num = type->avail_instance; + + return sprintf(buf, "%u\n", num); +} + +static ssize_t device_api_show(struct kobject *kobj, struct device *dev, + char *buf) +{ + return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING); +} + +static ssize_t description_show(struct kobject *kobj, struct device *dev, + char *buf) +{ + struct intel_vgpu_type *type; + void *gvt = kdev_to_i915(dev)->gvt; + + type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj)); + if (!type) + return 0; + + return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n" + "fence: %d\nresolution: %s\n" + "weight: %d\n", + BYTES_TO_MB(type->low_gm_size), + BYTES_TO_MB(type->high_gm_size), + type->fence, vgpu_edid_str(type->resolution), + type->weight); +} + +static MDEV_TYPE_ATTR_RO(available_instances); +static MDEV_TYPE_ATTR_RO(device_api); +static MDEV_TYPE_ATTR_RO(description); + +static struct attribute *gvt_type_attrs[] = { + &mdev_type_attr_available_instances.attr, + &mdev_type_attr_device_api.attr, + &mdev_type_attr_description.attr, + NULL, +}; + +static struct attribute_group *gvt_vgpu_type_groups[] = { + [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL, +}; + +static bool intel_get_gvt_attrs(struct attribute ***type_attrs, + struct attribute_group ***intel_vgpu_type_groups) +{ + *type_attrs = gvt_type_attrs; + *intel_vgpu_type_groups = gvt_vgpu_type_groups; + return true; +} + +static bool intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt) +{ + int i, j; + struct intel_vgpu_type *type; + struct attribute_group *group; + + for (i = 0; i < gvt->num_types; i++) { + type = &gvt->types[i]; + + group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL); + if (WARN_ON(!group)) + goto unwind; + + group->name = type->name; + group->attrs = gvt_type_attrs; + gvt_vgpu_type_groups[i] = group; + } + + return true; + +unwind: + for (j = 0; j < i; j++) { + group = gvt_vgpu_type_groups[j]; + kfree(group); + } + + return false; +} + +static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt) +{ + int i; + struct attribute_group *group; + + for (i = 0; i < gvt->num_types; i++) { + group = gvt_vgpu_type_groups[i]; + gvt_vgpu_type_groups[i] = NULL; + kfree(group); + } +} + static const struct intel_gvt_ops intel_gvt_ops = { .emulate_cfg_read = intel_vgpu_emulate_cfg_read, .emulate_cfg_write = intel_vgpu_emulate_cfg_write, @@ -54,6 +179,11 @@ static const struct intel_gvt_ops intel_gvt_ops = { .vgpu_reset = intel_gvt_reset_vgpu, .vgpu_activate = intel_gvt_activate_vgpu, .vgpu_deactivate = intel_gvt_deactivate_vgpu, + .gvt_find_vgpu_type = intel_gvt_find_vgpu_type, + .get_gvt_attrs = intel_get_gvt_attrs, + .vgpu_query_plane = intel_vgpu_query_plane, + .vgpu_get_dmabuf = intel_vgpu_get_dmabuf, + .write_protect_handler = intel_vgpu_write_protect_handler, }; /** @@ -111,7 +241,7 @@ static void init_device_info(struct intel_gvt *gvt) if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv)) { info->max_support_vgpus = 8; - info->cfg_space_size = 256; + info->cfg_space_size = PCI_CFG_SPACE_EXP_SIZE; info->mmio_size = 2 * 1024 * 1024; info->mmio_bar = 0; info->gtt_start_offset = 8 * 1024 * 1024; @@ -191,17 +321,18 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv) if (WARN_ON(!gvt)) return; + intel_gvt_debugfs_clean(gvt); clean_service_thread(gvt); intel_gvt_clean_cmd_parser(gvt); intel_gvt_clean_sched_policy(gvt); intel_gvt_clean_workload_scheduler(gvt); - intel_gvt_clean_opregion(gvt); intel_gvt_clean_gtt(gvt); intel_gvt_clean_irq(gvt); intel_gvt_clean_mmio_info(gvt); intel_gvt_free_firmware(gvt); intel_gvt_hypervisor_host_exit(&dev_priv->drm.pdev->dev, gvt); + intel_gvt_cleanup_vgpu_type_groups(gvt); intel_gvt_clean_vgpu_types(gvt); idr_destroy(&gvt->vgpu_idr); @@ -256,6 +387,8 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv) if (ret) goto out_clean_idr; + intel_gvt_init_engine_mmio_context(gvt); + ret = intel_gvt_load_firmware(gvt); if (ret) goto out_clean_mmio_info; @@ -268,13 +401,9 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv) if (ret) goto out_clean_irq; - ret = intel_gvt_init_opregion(gvt); - if (ret) - goto out_clean_gtt; - ret = intel_gvt_init_workload_scheduler(gvt); if (ret) - goto out_clean_opregion; + goto out_clean_gtt; ret = intel_gvt_init_sched_policy(gvt); if (ret) @@ -292,6 +421,12 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv) if (ret) goto out_clean_thread; + ret = intel_gvt_init_vgpu_type_groups(gvt); + if (ret == false) { + gvt_err("failed to init vgpu type groups: %d\n", ret); + goto out_clean_types; + } + ret = intel_gvt_hypervisor_host_init(&dev_priv->drm.pdev->dev, gvt, &intel_gvt_ops); if (ret) { @@ -307,6 +442,10 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv) } gvt->idle_vgpu = vgpu; + ret = intel_gvt_debugfs_init(gvt); + if (ret) + gvt_err("debugfs registeration failed, go on.\n"); + gvt_dbg_core("gvt device initialization is done\n"); dev_priv->gvt = gvt; return 0; @@ -321,8 +460,6 @@ out_clean_sched_policy: intel_gvt_clean_sched_policy(gvt); out_clean_workload_scheduler: intel_gvt_clean_workload_scheduler(gvt); -out_clean_opregion: - intel_gvt_clean_opregion(gvt); out_clean_gtt: intel_gvt_clean_gtt(gvt); out_clean_irq: diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 44b719eda8c4..c6197d990818 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -44,8 +44,10 @@ #include "execlist.h" #include "scheduler.h" #include "sched_policy.h" -#include "render.h" +#include "mmio_context.h" #include "cmd_parser.h" +#include "fb_decoder.h" +#include "dmabuf.h" #define GVT_MAX_VGPU 8 @@ -99,7 +101,6 @@ struct intel_vgpu_mmio { bool disable_warn_untrack; }; -#define INTEL_GVT_MAX_CFG_SPACE_SZ 256 #define INTEL_GVT_MAX_BAR_NUM 4 struct intel_vgpu_pci_bar { @@ -108,7 +109,7 @@ struct intel_vgpu_pci_bar { }; struct intel_vgpu_cfg_space { - unsigned char virtual_cfg_space[INTEL_GVT_MAX_CFG_SPACE_SZ]; + unsigned char virtual_cfg_space[PCI_CFG_SPACE_EXP_SIZE]; struct intel_vgpu_pci_bar bar[INTEL_GVT_MAX_BAR_NUM]; }; @@ -123,9 +124,9 @@ struct intel_vgpu_irq { }; struct intel_vgpu_opregion { + bool mapped; void *va; u32 gfn[INTEL_GVT_OPREGION_PAGES]; - struct page *pages[INTEL_GVT_OPREGION_PAGES]; }; #define vgpu_opregion(vgpu) (&(vgpu->opregion)) @@ -142,6 +143,33 @@ struct vgpu_sched_ctl { int weight; }; +enum { + INTEL_VGPU_EXECLIST_SUBMISSION = 1, + INTEL_VGPU_GUC_SUBMISSION, +}; + +struct intel_vgpu_submission_ops { + const char *name; + int (*init)(struct intel_vgpu *vgpu, unsigned long engine_mask); + void (*clean)(struct intel_vgpu *vgpu, unsigned long engine_mask); + void (*reset)(struct intel_vgpu *vgpu, unsigned long engine_mask); +}; + +struct intel_vgpu_submission { + struct intel_vgpu_execlist execlist[I915_NUM_ENGINES]; + struct list_head workload_q_head[I915_NUM_ENGINES]; + struct kmem_cache *workloads; + atomic_t running_workload_num; + struct i915_gem_context *shadow_ctx; + DECLARE_BITMAP(shadow_ctx_desc_updated, I915_NUM_ENGINES); + DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES); + void *ring_scan_buffer[I915_NUM_ENGINES]; + int ring_scan_buffer_size[I915_NUM_ENGINES]; + const struct intel_vgpu_submission_ops *ops; + int virtual_submission_interface; + bool active; +}; + struct intel_vgpu { struct intel_gvt *gvt; int id; @@ -161,13 +189,10 @@ struct intel_vgpu { struct intel_vgpu_gtt gtt; struct intel_vgpu_opregion opregion; struct intel_vgpu_display display; - struct intel_vgpu_execlist execlist[I915_NUM_ENGINES]; - struct list_head workload_q_head[I915_NUM_ENGINES]; - struct kmem_cache *workloads; - atomic_t running_workload_num; - DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES); - struct i915_gem_context *shadow_ctx; - DECLARE_BITMAP(shadow_ctx_desc_updated, I915_NUM_ENGINES); + struct intel_vgpu_submission submission; + u32 hws_pga[I915_NUM_ENGINES]; + + struct dentry *debugfs; #if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT) struct { @@ -183,10 +208,22 @@ struct intel_vgpu { struct kvm *kvm; struct work_struct release_work; atomic_t released; + struct vfio_device *vfio_device; } vdev; #endif + + struct list_head dmabuf_obj_list_head; + struct mutex dmabuf_lock; + struct idr object_idr; + + struct completion vblank_done; + }; +/* validating GM healthy status*/ +#define vgpu_is_vm_unhealthy(ret_val) \ + (((ret_val) == -EBADRQC) || ((ret_val) == -EFAULT)) + struct intel_gvt_gm { unsigned long vgpu_allocated_low_gm_size; unsigned long vgpu_allocated_high_gm_size; @@ -228,7 +265,7 @@ struct intel_gvt_mmio { unsigned int num_mmio_block; DECLARE_HASHTABLE(mmio_info_table, INTEL_GVT_MMIO_HASH_BITS); - unsigned int num_tracked_mmio; + unsigned long num_tracked_mmio; }; struct intel_gvt_firmware { @@ -237,11 +274,6 @@ struct intel_gvt_firmware { bool firmware_loaded; }; -struct intel_gvt_opregion { - void *opregion_va; - u32 opregion_pa; -}; - #define NR_MAX_INTEL_VGPU_TYPES 20 struct intel_vgpu_type { char name[16]; @@ -265,7 +297,6 @@ struct intel_gvt { struct intel_gvt_firmware firmware; struct intel_gvt_irq irq; struct intel_gvt_gtt gtt; - struct intel_gvt_opregion opregion; struct intel_gvt_workload_scheduler scheduler; struct notifier_block shadow_ctx_notifier_block[I915_NUM_ENGINES]; DECLARE_HASHTABLE(cmd_table, GVT_CMD_HASH_BITS); @@ -276,6 +307,10 @@ struct intel_gvt { struct task_struct *service_thread; wait_queue_head_t service_thread_wq; unsigned long service_request; + + struct engine_mmio *engine_mmio_list; + + struct dentry *debugfs_root; }; static inline struct intel_gvt *to_gvt(struct drm_i915_private *i915) @@ -313,7 +348,7 @@ int intel_gvt_load_firmware(struct intel_gvt *gvt); /* Aperture/GM space definitions for GVT device */ #define gvt_aperture_sz(gvt) (gvt->dev_priv->ggtt.mappable_end) -#define gvt_aperture_pa_base(gvt) (gvt->dev_priv->ggtt.mappable_base) +#define gvt_aperture_pa_base(gvt) (gvt->dev_priv->ggtt.gmadr.start) #define gvt_ggtt_gm_sz(gvt) (gvt->dev_priv->ggtt.base.total) #define gvt_ggtt_sz(gvt) \ @@ -375,23 +410,20 @@ void intel_vgpu_free_resource(struct intel_vgpu *vgpu); void intel_vgpu_write_fence(struct intel_vgpu *vgpu, u32 fence, u64 value); -/* Macros for easily accessing vGPU virtual/shadow register */ -#define vgpu_vreg(vgpu, reg) \ - (*(u32 *)(vgpu->mmio.vreg + INTEL_GVT_MMIO_OFFSET(reg))) -#define vgpu_vreg8(vgpu, reg) \ - (*(u8 *)(vgpu->mmio.vreg + INTEL_GVT_MMIO_OFFSET(reg))) -#define vgpu_vreg16(vgpu, reg) \ - (*(u16 *)(vgpu->mmio.vreg + INTEL_GVT_MMIO_OFFSET(reg))) -#define vgpu_vreg64(vgpu, reg) \ - (*(u64 *)(vgpu->mmio.vreg + INTEL_GVT_MMIO_OFFSET(reg))) -#define vgpu_sreg(vgpu, reg) \ - (*(u32 *)(vgpu->mmio.sreg + INTEL_GVT_MMIO_OFFSET(reg))) -#define vgpu_sreg8(vgpu, reg) \ - (*(u8 *)(vgpu->mmio.sreg + INTEL_GVT_MMIO_OFFSET(reg))) -#define vgpu_sreg16(vgpu, reg) \ - (*(u16 *)(vgpu->mmio.sreg + INTEL_GVT_MMIO_OFFSET(reg))) -#define vgpu_sreg64(vgpu, reg) \ - (*(u64 *)(vgpu->mmio.sreg + INTEL_GVT_MMIO_OFFSET(reg))) +/* Macros for easily accessing vGPU virtual/shadow register. + Explicitly seperate use for typed MMIO reg or real offset.*/ +#define vgpu_vreg_t(vgpu, reg) \ + (*(u32 *)(vgpu->mmio.vreg + i915_mmio_reg_offset(reg))) +#define vgpu_vreg(vgpu, offset) \ + (*(u32 *)(vgpu->mmio.vreg + (offset))) +#define vgpu_vreg64_t(vgpu, reg) \ + (*(u64 *)(vgpu->mmio.vreg + i915_mmio_reg_offset(reg))) +#define vgpu_vreg64(vgpu, offset) \ + (*(u64 *)(vgpu->mmio.vreg + (offset))) +#define vgpu_sreg_t(vgpu, reg) \ + (*(u32 *)(vgpu->mmio.sreg + i915_mmio_reg_offset(reg))) +#define vgpu_sreg(vgpu, offset) \ + (*(u32 *)(vgpu->mmio.sreg + (offset))) #define for_each_active_vgpu(gvt, vgpu, id) \ idr_for_each_entry((&(gvt)->vgpu_idr), (vgpu), (id)) \ @@ -474,16 +506,22 @@ int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset, int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes); -void intel_gvt_clean_opregion(struct intel_gvt *gvt); -int intel_gvt_init_opregion(struct intel_gvt *gvt); +static inline u64 intel_vgpu_get_bar_gpa(struct intel_vgpu *vgpu, int bar) +{ + /* We are 64bit bar. */ + return (*(u64 *)(vgpu->cfg_space.virtual_cfg_space + bar)) & + PCI_BASE_ADDRESS_MEM_MASK; +} void intel_vgpu_clean_opregion(struct intel_vgpu *vgpu); -int intel_vgpu_init_opregion(struct intel_vgpu *vgpu, u32 gpa); +int intel_vgpu_init_opregion(struct intel_vgpu *vgpu); +int intel_vgpu_opregion_base_write_handler(struct intel_vgpu *vgpu, u32 gpa); int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci); void populate_pvinfo_page(struct intel_vgpu *vgpu); int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload); +void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason); struct intel_gvt_ops { int (*emulate_cfg_read)(struct intel_vgpu *, unsigned int, void *, @@ -500,12 +538,21 @@ struct intel_gvt_ops { void (*vgpu_reset)(struct intel_vgpu *); void (*vgpu_activate)(struct intel_vgpu *); void (*vgpu_deactivate)(struct intel_vgpu *); + struct intel_vgpu_type *(*gvt_find_vgpu_type)(struct intel_gvt *gvt, + const char *name); + bool (*get_gvt_attrs)(struct attribute ***type_attrs, + struct attribute_group ***intel_vgpu_type_groups); + int (*vgpu_query_plane)(struct intel_vgpu *vgpu, void *); + int (*vgpu_get_dmabuf)(struct intel_vgpu *vgpu, unsigned int); + int (*write_protect_handler)(struct intel_vgpu *, u64, void *, + unsigned int); }; enum { GVT_FAILSAFE_UNSUPPORTED_GUEST, GVT_FAILSAFE_INSUFFICIENT_RESOURCE, + GVT_FAILSAFE_GUEST_ERR, }; static inline void mmio_hw_access_pre(struct drm_i915_private *dev_priv) @@ -581,6 +628,12 @@ static inline bool intel_gvt_mmio_has_mode_mask( return gvt->mmio.mmio_attribute[offset >> 2] & F_MODE_MASK; } +int intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu); +void intel_gvt_debugfs_remove_vgpu(struct intel_vgpu *vgpu); +int intel_gvt_debugfs_init(struct intel_gvt *gvt); +void intel_gvt_debugfs_clean(struct intel_gvt *gvt); + + #include "trace.h" #include "mpt.h" diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index a5bed2e71b92..9be639aa3b55 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -137,17 +137,26 @@ static int new_mmio_info(struct intel_gvt *gvt, return 0; } -static int render_mmio_to_ring_id(struct intel_gvt *gvt, unsigned int reg) +/** + * intel_gvt_render_mmio_to_ring_id - convert a mmio offset into ring id + * @gvt: a GVT device + * @offset: register offset + * + * Returns: + * Ring ID on success, negative error code if failed. + */ +int intel_gvt_render_mmio_to_ring_id(struct intel_gvt *gvt, + unsigned int offset) { enum intel_engine_id id; struct intel_engine_cs *engine; - reg &= ~GENMASK(11, 0); + offset &= ~GENMASK(11, 0); for_each_engine(engine, gvt->dev_priv, id) { - if (engine->mmio_base == reg) + if (engine->mmio_base == offset) return id; } - return -1; + return -ENODEV; } #define offset_to_fence_num(offset) \ @@ -157,7 +166,7 @@ static int render_mmio_to_ring_id(struct intel_gvt *gvt, unsigned int reg) (num * 8 + i915_mmio_reg_offset(FENCE_REG_GEN6_LO(0))) -static void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason) +void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason) { switch (reason) { case GVT_FAILSAFE_UNSUPPORTED_GUEST: @@ -165,6 +174,10 @@ static void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason) break; case GVT_FAILSAFE_INSUFFICIENT_RESOURCE: pr_err("Graphics resource is not enough for the guest\n"); + break; + case GVT_FAILSAFE_GUEST_ERR: + pr_err("GVT Internal error for the guest\n"); + break; default: break; } @@ -330,13 +343,13 @@ static int pch_pp_control_mmio_write(struct intel_vgpu *vgpu, write_vreg(vgpu, offset, p_data, bytes); if (vgpu_vreg(vgpu, offset) & PANEL_POWER_ON) { - vgpu_vreg(vgpu, PCH_PP_STATUS) |= PP_ON; - vgpu_vreg(vgpu, PCH_PP_STATUS) |= PP_SEQUENCE_STATE_ON_IDLE; - vgpu_vreg(vgpu, PCH_PP_STATUS) &= ~PP_SEQUENCE_POWER_DOWN; - vgpu_vreg(vgpu, PCH_PP_STATUS) &= ~PP_CYCLE_DELAY_ACTIVE; + vgpu_vreg_t(vgpu, PCH_PP_STATUS) |= PP_ON; + vgpu_vreg_t(vgpu, PCH_PP_STATUS) |= PP_SEQUENCE_STATE_ON_IDLE; + vgpu_vreg_t(vgpu, PCH_PP_STATUS) &= ~PP_SEQUENCE_POWER_DOWN; + vgpu_vreg_t(vgpu, PCH_PP_STATUS) &= ~PP_CYCLE_DELAY_ACTIVE; } else - vgpu_vreg(vgpu, PCH_PP_STATUS) &= + vgpu_vreg_t(vgpu, PCH_PP_STATUS) &= ~(PP_ON | PP_SEQUENCE_POWER_DOWN | PP_CYCLE_DELAY_ACTIVE); return 0; @@ -490,7 +503,7 @@ static int ddi_buf_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, } else { vgpu_vreg(vgpu, offset) |= DDI_BUF_IS_IDLE; if (offset == i915_mmio_reg_offset(DDI_BUF_CTL(PORT_E))) - vgpu_vreg(vgpu, DP_TP_STATUS(PORT_E)) + vgpu_vreg_t(vgpu, DP_TP_STATUS(PORT_E)) &= ~DP_TP_STATUS_AUTOTRAIN_DONE; } return 0; @@ -508,9 +521,9 @@ static int fdi_rx_iir_mmio_write(struct intel_vgpu *vgpu, static int fdi_auto_training_started(struct intel_vgpu *vgpu) { - u32 ddi_buf_ctl = vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_E)); + u32 ddi_buf_ctl = vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_E)); u32 rx_ctl = vgpu_vreg(vgpu, _FDI_RXA_CTL); - u32 tx_ctl = vgpu_vreg(vgpu, DP_TP_CTL(PORT_E)); + u32 tx_ctl = vgpu_vreg_t(vgpu, DP_TP_CTL(PORT_E)); if ((ddi_buf_ctl & DDI_BUF_CTL_ENABLE) && (rx_ctl & FDI_RX_ENABLE) && @@ -551,12 +564,12 @@ static int check_fdi_rx_train_status(struct intel_vgpu *vgpu, fdi_tx_check_bits = FDI_TX_ENABLE | fdi_tx_train_bits; /* If imr bit has been masked */ - if (vgpu_vreg(vgpu, fdi_rx_imr) & fdi_iir_check_bits) + if (vgpu_vreg_t(vgpu, fdi_rx_imr) & fdi_iir_check_bits) return 0; - if (((vgpu_vreg(vgpu, fdi_tx_ctl) & fdi_tx_check_bits) + if (((vgpu_vreg_t(vgpu, fdi_tx_ctl) & fdi_tx_check_bits) == fdi_tx_check_bits) - && ((vgpu_vreg(vgpu, fdi_rx_ctl) & fdi_rx_check_bits) + && ((vgpu_vreg_t(vgpu, fdi_rx_ctl) & fdi_rx_check_bits) == fdi_rx_check_bits)) return 1; else @@ -613,17 +626,17 @@ static int update_fdi_rx_iir_status(struct intel_vgpu *vgpu, if (ret < 0) return ret; if (ret) - vgpu_vreg(vgpu, fdi_rx_iir) |= FDI_RX_BIT_LOCK; + vgpu_vreg_t(vgpu, fdi_rx_iir) |= FDI_RX_BIT_LOCK; ret = check_fdi_rx_train_status(vgpu, index, FDI_LINK_TRAIN_PATTERN2); if (ret < 0) return ret; if (ret) - vgpu_vreg(vgpu, fdi_rx_iir) |= FDI_RX_SYMBOL_LOCK; + vgpu_vreg_t(vgpu, fdi_rx_iir) |= FDI_RX_SYMBOL_LOCK; if (offset == _FDI_RXA_CTL) if (fdi_auto_training_started(vgpu)) - vgpu_vreg(vgpu, DP_TP_STATUS(PORT_E)) |= + vgpu_vreg_t(vgpu, DP_TP_STATUS(PORT_E)) |= DP_TP_STATUS_AUTOTRAIN_DONE; return 0; } @@ -644,7 +657,7 @@ static int dp_tp_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, data = (vgpu_vreg(vgpu, offset) & GENMASK(10, 8)) >> 8; if (data == 0x2) { status_reg = DP_TP_STATUS(index); - vgpu_vreg(vgpu, status_reg) |= (1 << 25); + vgpu_vreg_t(vgpu, status_reg) |= (1 << 25); } return 0; } @@ -708,7 +721,7 @@ static int pri_surf_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, }; write_vreg(vgpu, offset, p_data, bytes); - vgpu_vreg(vgpu, surflive_reg) = vgpu_vreg(vgpu, offset); + vgpu_vreg_t(vgpu, surflive_reg) = vgpu_vreg(vgpu, offset); set_bit(flip_event[index], vgpu->irq.flip_done_event[index]); return 0; @@ -729,7 +742,7 @@ static int spr_surf_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, }; write_vreg(vgpu, offset, p_data, bytes); - vgpu_vreg(vgpu, surflive_reg) = vgpu_vreg(vgpu, offset); + vgpu_vreg_t(vgpu, surflive_reg) = vgpu_vreg(vgpu, offset); set_bit(flip_event[index], vgpu->irq.flip_done_event[index]); return 0; @@ -1051,9 +1064,9 @@ static void write_virtual_sbi_register(struct intel_vgpu *vgpu, static int sbi_data_mmio_read(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { - if (((vgpu_vreg(vgpu, SBI_CTL_STAT) & SBI_OPCODE_MASK) >> + if (((vgpu_vreg_t(vgpu, SBI_CTL_STAT) & SBI_OPCODE_MASK) >> SBI_OPCODE_SHIFT) == SBI_CMD_CRRD) { - unsigned int sbi_offset = (vgpu_vreg(vgpu, SBI_ADDR) & + unsigned int sbi_offset = (vgpu_vreg_t(vgpu, SBI_ADDR) & SBI_ADDR_OFFSET_MASK) >> SBI_ADDR_OFFSET_SHIFT; vgpu_vreg(vgpu, offset) = read_virtual_sbi_register(vgpu, sbi_offset); @@ -1078,13 +1091,13 @@ static int sbi_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, vgpu_vreg(vgpu, offset) = data; - if (((vgpu_vreg(vgpu, SBI_CTL_STAT) & SBI_OPCODE_MASK) >> + if (((vgpu_vreg_t(vgpu, SBI_CTL_STAT) & SBI_OPCODE_MASK) >> SBI_OPCODE_SHIFT) == SBI_CMD_CRWR) { - unsigned int sbi_offset = (vgpu_vreg(vgpu, SBI_ADDR) & + unsigned int sbi_offset = (vgpu_vreg_t(vgpu, SBI_ADDR) & SBI_ADDR_OFFSET_MASK) >> SBI_ADDR_OFFSET_SHIFT; write_virtual_sbi_register(vgpu, sbi_offset, - vgpu_vreg(vgpu, SBI_DATA)); + vgpu_vreg_t(vgpu, SBI_DATA)); } return 0; } @@ -1330,7 +1343,7 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset, { u32 value = *(u32 *)p_data; u32 cmd = value & 0xff; - u32 *data0 = &vgpu_vreg(vgpu, GEN6_PCODE_DATA); + u32 *data0 = &vgpu_vreg_t(vgpu, GEN6_PCODE_DATA); switch (cmd) { case GEN9_PCODE_READ_MEM_LATENCY: @@ -1369,6 +1382,34 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset, return intel_vgpu_default_mmio_write(vgpu, offset, &value, bytes); } +static int hws_pga_write(struct intel_vgpu *vgpu, unsigned int offset, + void *p_data, unsigned int bytes) +{ + u32 value = *(u32 *)p_data; + int ring_id = intel_gvt_render_mmio_to_ring_id(vgpu->gvt, offset); + + if (!intel_gvt_ggtt_validate_range(vgpu, value, I915_GTT_PAGE_SIZE)) { + gvt_vgpu_err("VM(%d) write invalid HWSP address, reg:0x%x, value:0x%x\n", + vgpu->id, offset, value); + return -EINVAL; + } + /* + * Need to emulate all the HWSP register write to ensure host can + * update the VM CSB status correctly. Here listed registers can + * support BDW, SKL or other platforms with same HWSP registers. + */ + if (unlikely(ring_id < 0 || ring_id >= I915_NUM_ENGINES)) { + gvt_vgpu_err("VM(%d) access unknown hardware status page register:0x%x\n", + vgpu->id, offset); + return -EINVAL; + } + vgpu->hws_pga[ring_id] = value; + gvt_dbg_mmio("VM(%d) write: 0x%x to HWSP: 0x%x\n", + vgpu->id, value, offset); + + return intel_vgpu_default_mmio_write(vgpu, offset, &value, bytes); +} + static int skl_power_well_ctl_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { @@ -1381,40 +1422,6 @@ static int skl_power_well_ctl_write(struct intel_vgpu *vgpu, return intel_vgpu_default_mmio_write(vgpu, offset, &v, bytes); } -static int skl_misc_ctl_write(struct intel_vgpu *vgpu, unsigned int offset, - void *p_data, unsigned int bytes) -{ - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - u32 v = *(u32 *)p_data; - - if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)) - return intel_vgpu_default_mmio_write(vgpu, - offset, p_data, bytes); - - switch (offset) { - case 0x4ddc: - /* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */ - vgpu_vreg(vgpu, offset) = v & ~(1 << 31); - break; - case 0x42080: - /* bypass WaCompressedResourceDisplayNewHashMode */ - vgpu_vreg(vgpu, offset) = v & ~(1 << 15); - break; - case 0xe194: - /* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */ - vgpu_vreg(vgpu, offset) = v & ~(1 << 8); - break; - case 0x7014: - /* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */ - vgpu_vreg(vgpu, offset) = v & ~(1 << 13); - break; - default: - return -EINVAL; - } - - return 0; -} - static int skl_lcpll_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { @@ -1432,28 +1439,46 @@ static int skl_lcpll_write(struct intel_vgpu *vgpu, unsigned int offset, static int mmio_read_from_hw(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + struct intel_gvt *gvt = vgpu->gvt; + struct drm_i915_private *dev_priv = gvt->dev_priv; + int ring_id; + u32 ring_base; + + ring_id = intel_gvt_render_mmio_to_ring_id(gvt, offset); + /** + * Read HW reg in following case + * a. the offset isn't a ring mmio + * b. the offset's ring is running on hw. + * c. the offset is ring time stamp mmio + */ + if (ring_id >= 0) + ring_base = dev_priv->engine[ring_id]->mmio_base; + + if (ring_id < 0 || vgpu == gvt->scheduler.engine_owner[ring_id] || + offset == i915_mmio_reg_offset(RING_TIMESTAMP(ring_base)) || + offset == i915_mmio_reg_offset(RING_TIMESTAMP_UDW(ring_base))) { + mmio_hw_access_pre(dev_priv); + vgpu_vreg(vgpu, offset) = I915_READ(_MMIO(offset)); + mmio_hw_access_post(dev_priv); + } - mmio_hw_access_pre(dev_priv); - vgpu_vreg(vgpu, offset) = I915_READ(_MMIO(offset)); - mmio_hw_access_post(dev_priv); return intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes); } static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { - int ring_id = render_mmio_to_ring_id(vgpu->gvt, offset); + int ring_id = intel_gvt_render_mmio_to_ring_id(vgpu->gvt, offset); struct intel_vgpu_execlist *execlist; u32 data = *(u32 *)p_data; int ret = 0; - if (WARN_ON(ring_id < 0 || ring_id > I915_NUM_ENGINES - 1)) + if (WARN_ON(ring_id < 0 || ring_id >= I915_NUM_ENGINES)) return -EINVAL; - execlist = &vgpu->execlist[ring_id]; + execlist = &vgpu->submission.execlist[ring_id]; - execlist->elsp_dwords.data[execlist->elsp_dwords.index] = data; + execlist->elsp_dwords.data[3 - execlist->elsp_dwords.index] = data; if (execlist->elsp_dwords.index == 3) { ret = intel_vgpu_submit_execlist(vgpu, ring_id); if(ret) @@ -1470,8 +1495,9 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { u32 data = *(u32 *)p_data; - int ring_id = render_mmio_to_ring_id(vgpu->gvt, offset); + int ring_id = intel_gvt_render_mmio_to_ring_id(vgpu->gvt, offset); bool enable_execlist; + int ret; write_vreg(vgpu, offset, p_data, bytes); @@ -1493,8 +1519,16 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, (enable_execlist ? "enabling" : "disabling"), ring_id); - if (enable_execlist) - intel_vgpu_start_schedule(vgpu); + if (!enable_execlist) + return 0; + + ret = intel_vgpu_select_submission_ops(vgpu, + ENGINE_MASK(ring_id), + INTEL_VGPU_EXECLIST_SUBMISSION); + if (ret) + return ret; + + intel_vgpu_start_schedule(vgpu); } return 0; } @@ -1526,7 +1560,7 @@ static int gvt_reg_tlb_control_handler(struct intel_vgpu *vgpu, default: return -EINVAL; } - set_bit(id, (void *)vgpu->tlb_handle_pending); + set_bit(id, (void *)vgpu->submission.tlb_handle_pending); return 0; } @@ -1549,7 +1583,7 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu, } #define MMIO_F(reg, s, f, am, rm, d, r, w) do { \ - ret = new_mmio_info(gvt, INTEL_GVT_MMIO_OFFSET(reg), \ + ret = new_mmio_info(gvt, i915_mmio_reg_offset(reg), \ f, s, am, rm, d, r, w); \ if (ret) \ return ret; \ @@ -1617,22 +1651,22 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_GM_RDR(BLT_HWS_PGA_GEN7, D_ALL, NULL, NULL); MMIO_GM_RDR(VEBOX_HWS_PGA_GEN7, D_ALL, NULL, NULL); -#define RING_REG(base) (base + 0x28) +#define RING_REG(base) _MMIO((base) + 0x28) MMIO_RING_DFH(RING_REG, D_ALL, F_CMD_ACCESS, NULL, NULL); #undef RING_REG -#define RING_REG(base) (base + 0x134) +#define RING_REG(base) _MMIO((base) + 0x134) MMIO_RING_DFH(RING_REG, D_ALL, F_CMD_ACCESS, NULL, NULL); #undef RING_REG -#define RING_REG(base) (base + 0x6c) +#define RING_REG(base) _MMIO((base) + 0x6c) MMIO_RING_DFH(RING_REG, D_ALL, 0, mmio_read_from_hw, NULL); #undef RING_REG MMIO_DH(GEN7_SC_INSTDONE, D_BDW_PLUS, mmio_read_from_hw, NULL); - MMIO_GM_RDR(0x2148, D_ALL, NULL, NULL); + MMIO_GM_RDR(_MMIO(0x2148), D_ALL, NULL, NULL); MMIO_GM_RDR(CCID, D_ALL, NULL, NULL); - MMIO_GM_RDR(0x12198, D_ALL, NULL, NULL); + MMIO_GM_RDR(_MMIO(0x12198), D_ALL, NULL, NULL); MMIO_D(GEN7_CXT_SIZE, D_ALL); MMIO_RING_DFH(RING_TAIL, D_ALL, F_CMD_ACCESS, NULL, NULL); @@ -1642,7 +1676,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_RING_GM_RDR(RING_START, D_ALL, NULL, NULL); /* RING MODE */ -#define RING_REG(base) (base + 0x29c) +#define RING_REG(base) _MMIO((base) + 0x29c) MMIO_RING_DFH(RING_REG, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, ring_mode_mmio_write); #undef RING_REG @@ -1661,37 +1695,37 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) NULL, NULL); MMIO_DFH(CACHE_MODE_1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(CACHE_MODE_0, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x2124, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x2124), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x20dc, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x20dc), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(_3D_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x2088, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x20e4, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x2470, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x2088), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x20e4), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x2470), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GAM_ECOCHK, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, - skl_misc_ctl_write); - MMIO_DFH(0x9030, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x20a0, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x2420, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x2430, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x2434, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x2438, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x243c, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x7018, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); + MMIO_DFH(_MMIO(0x9030), D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x20a0), D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x2420), D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x2430), D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x2434), D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x2438), D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x243c), D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x7018), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN7_HALF_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); /* display */ - MMIO_F(0x60220, 0x20, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_D(0x602a0, D_ALL); + MMIO_F(_MMIO(0x60220), 0x20, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_D(_MMIO(0x602a0), D_ALL); - MMIO_D(0x65050, D_ALL); - MMIO_D(0x650b4, D_ALL); + MMIO_D(_MMIO(0x65050), D_ALL); + MMIO_D(_MMIO(0x650b4), D_ALL); - MMIO_D(0xc4040, D_ALL); + MMIO_D(_MMIO(0xc4040), D_ALL); MMIO_D(DERRMR, D_ALL); MMIO_D(PIPEDSL(PIPE_A), D_ALL); @@ -1731,14 +1765,14 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(CURBASE(PIPE_B), D_ALL); MMIO_D(CURBASE(PIPE_C), D_ALL); - MMIO_D(0x700ac, D_ALL); - MMIO_D(0x710ac, D_ALL); - MMIO_D(0x720ac, D_ALL); + MMIO_D(_MMIO(0x700ac), D_ALL); + MMIO_D(_MMIO(0x710ac), D_ALL); + MMIO_D(_MMIO(0x720ac), D_ALL); - MMIO_D(0x70090, D_ALL); - MMIO_D(0x70094, D_ALL); - MMIO_D(0x70098, D_ALL); - MMIO_D(0x7009c, D_ALL); + MMIO_D(_MMIO(0x70090), D_ALL); + MMIO_D(_MMIO(0x70094), D_ALL); + MMIO_D(_MMIO(0x70098), D_ALL); + MMIO_D(_MMIO(0x7009c), D_ALL); MMIO_D(DSPCNTR(PIPE_A), D_ALL); MMIO_D(DSPADDR(PIPE_A), D_ALL); @@ -1914,24 +1948,24 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(BLC_PWM_PCH_CTL1, D_ALL); MMIO_D(BLC_PWM_PCH_CTL2, D_ALL); - MMIO_D(0x48268, D_ALL); + MMIO_D(_MMIO(0x48268), D_ALL); MMIO_F(PCH_GMBUS0, 4 * 4, 0, 0, 0, D_ALL, gmbus_mmio_read, gmbus_mmio_write); MMIO_F(PCH_GPIOA, 6 * 4, F_UNALIGN, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0xe4f00, 0x28, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0xe4f00), 0x28, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(_PCH_DPB_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_PRE_SKL, NULL, + MMIO_F(_MMIO(_PCH_DPB_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_PRE_SKL, NULL, dp_aux_ch_ctl_mmio_write); - MMIO_F(_PCH_DPC_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_PRE_SKL, NULL, + MMIO_F(_MMIO(_PCH_DPC_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_PRE_SKL, NULL, dp_aux_ch_ctl_mmio_write); - MMIO_F(_PCH_DPD_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_PRE_SKL, NULL, + MMIO_F(_MMIO(_PCH_DPD_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_PRE_SKL, NULL, dp_aux_ch_ctl_mmio_write); MMIO_DH(PCH_ADPA, D_PRE_SKL, NULL, pch_adpa_mmio_write); - MMIO_DH(_PCH_TRANSACONF, D_ALL, NULL, transconf_mmio_write); - MMIO_DH(_PCH_TRANSBCONF, D_ALL, NULL, transconf_mmio_write); + MMIO_DH(_MMIO(_PCH_TRANSACONF), D_ALL, NULL, transconf_mmio_write); + MMIO_DH(_MMIO(_PCH_TRANSBCONF), D_ALL, NULL, transconf_mmio_write); MMIO_DH(FDI_RX_IIR(PIPE_A), D_ALL, NULL, fdi_rx_iir_mmio_write); MMIO_DH(FDI_RX_IIR(PIPE_B), D_ALL, NULL, fdi_rx_iir_mmio_write); @@ -1943,30 +1977,30 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DH(FDI_RX_CTL(PIPE_B), D_ALL, NULL, update_fdi_rx_iir_status); MMIO_DH(FDI_RX_CTL(PIPE_C), D_ALL, NULL, update_fdi_rx_iir_status); - MMIO_D(_PCH_TRANS_HTOTAL_A, D_ALL); - MMIO_D(_PCH_TRANS_HBLANK_A, D_ALL); - MMIO_D(_PCH_TRANS_HSYNC_A, D_ALL); - MMIO_D(_PCH_TRANS_VTOTAL_A, D_ALL); - MMIO_D(_PCH_TRANS_VBLANK_A, D_ALL); - MMIO_D(_PCH_TRANS_VSYNC_A, D_ALL); - MMIO_D(_PCH_TRANS_VSYNCSHIFT_A, D_ALL); - - MMIO_D(_PCH_TRANS_HTOTAL_B, D_ALL); - MMIO_D(_PCH_TRANS_HBLANK_B, D_ALL); - MMIO_D(_PCH_TRANS_HSYNC_B, D_ALL); - MMIO_D(_PCH_TRANS_VTOTAL_B, D_ALL); - MMIO_D(_PCH_TRANS_VBLANK_B, D_ALL); - MMIO_D(_PCH_TRANS_VSYNC_B, D_ALL); - MMIO_D(_PCH_TRANS_VSYNCSHIFT_B, D_ALL); - - MMIO_D(_PCH_TRANSA_DATA_M1, D_ALL); - MMIO_D(_PCH_TRANSA_DATA_N1, D_ALL); - MMIO_D(_PCH_TRANSA_DATA_M2, D_ALL); - MMIO_D(_PCH_TRANSA_DATA_N2, D_ALL); - MMIO_D(_PCH_TRANSA_LINK_M1, D_ALL); - MMIO_D(_PCH_TRANSA_LINK_N1, D_ALL); - MMIO_D(_PCH_TRANSA_LINK_M2, D_ALL); - MMIO_D(_PCH_TRANSA_LINK_N2, D_ALL); + MMIO_D(_MMIO(_PCH_TRANS_HTOTAL_A), D_ALL); + MMIO_D(_MMIO(_PCH_TRANS_HBLANK_A), D_ALL); + MMIO_D(_MMIO(_PCH_TRANS_HSYNC_A), D_ALL); + MMIO_D(_MMIO(_PCH_TRANS_VTOTAL_A), D_ALL); + MMIO_D(_MMIO(_PCH_TRANS_VBLANK_A), D_ALL); + MMIO_D(_MMIO(_PCH_TRANS_VSYNC_A), D_ALL); + MMIO_D(_MMIO(_PCH_TRANS_VSYNCSHIFT_A), D_ALL); + + MMIO_D(_MMIO(_PCH_TRANS_HTOTAL_B), D_ALL); + MMIO_D(_MMIO(_PCH_TRANS_HBLANK_B), D_ALL); + MMIO_D(_MMIO(_PCH_TRANS_HSYNC_B), D_ALL); + MMIO_D(_MMIO(_PCH_TRANS_VTOTAL_B), D_ALL); + MMIO_D(_MMIO(_PCH_TRANS_VBLANK_B), D_ALL); + MMIO_D(_MMIO(_PCH_TRANS_VSYNC_B), D_ALL); + MMIO_D(_MMIO(_PCH_TRANS_VSYNCSHIFT_B), D_ALL); + + MMIO_D(_MMIO(_PCH_TRANSA_DATA_M1), D_ALL); + MMIO_D(_MMIO(_PCH_TRANSA_DATA_N1), D_ALL); + MMIO_D(_MMIO(_PCH_TRANSA_DATA_M2), D_ALL); + MMIO_D(_MMIO(_PCH_TRANSA_DATA_N2), D_ALL); + MMIO_D(_MMIO(_PCH_TRANSA_LINK_M1), D_ALL); + MMIO_D(_MMIO(_PCH_TRANSA_LINK_N1), D_ALL); + MMIO_D(_MMIO(_PCH_TRANSA_LINK_M2), D_ALL); + MMIO_D(_MMIO(_PCH_TRANSA_LINK_N2), D_ALL); MMIO_D(TRANS_DP_CTL(PIPE_A), D_ALL); MMIO_D(TRANS_DP_CTL(PIPE_B), D_ALL); @@ -1984,38 +2018,38 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(TVIDEO_DIP_DATA(PIPE_C), D_ALL); MMIO_D(TVIDEO_DIP_GCP(PIPE_C), D_ALL); - MMIO_D(_FDI_RXA_MISC, D_ALL); - MMIO_D(_FDI_RXB_MISC, D_ALL); - MMIO_D(_FDI_RXA_TUSIZE1, D_ALL); - MMIO_D(_FDI_RXA_TUSIZE2, D_ALL); - MMIO_D(_FDI_RXB_TUSIZE1, D_ALL); - MMIO_D(_FDI_RXB_TUSIZE2, D_ALL); + MMIO_D(_MMIO(_FDI_RXA_MISC), D_ALL); + MMIO_D(_MMIO(_FDI_RXB_MISC), D_ALL); + MMIO_D(_MMIO(_FDI_RXA_TUSIZE1), D_ALL); + MMIO_D(_MMIO(_FDI_RXA_TUSIZE2), D_ALL); + MMIO_D(_MMIO(_FDI_RXB_TUSIZE1), D_ALL); + MMIO_D(_MMIO(_FDI_RXB_TUSIZE2), D_ALL); MMIO_DH(PCH_PP_CONTROL, D_ALL, NULL, pch_pp_control_mmio_write); MMIO_D(PCH_PP_DIVISOR, D_ALL); MMIO_D(PCH_PP_STATUS, D_ALL); MMIO_D(PCH_LVDS, D_ALL); - MMIO_D(_PCH_DPLL_A, D_ALL); - MMIO_D(_PCH_DPLL_B, D_ALL); - MMIO_D(_PCH_FPA0, D_ALL); - MMIO_D(_PCH_FPA1, D_ALL); - MMIO_D(_PCH_FPB0, D_ALL); - MMIO_D(_PCH_FPB1, D_ALL); + MMIO_D(_MMIO(_PCH_DPLL_A), D_ALL); + MMIO_D(_MMIO(_PCH_DPLL_B), D_ALL); + MMIO_D(_MMIO(_PCH_FPA0), D_ALL); + MMIO_D(_MMIO(_PCH_FPA1), D_ALL); + MMIO_D(_MMIO(_PCH_FPB0), D_ALL); + MMIO_D(_MMIO(_PCH_FPB1), D_ALL); MMIO_D(PCH_DREF_CONTROL, D_ALL); MMIO_D(PCH_RAWCLK_FREQ, D_ALL); MMIO_D(PCH_DPLL_SEL, D_ALL); - MMIO_D(0x61208, D_ALL); - MMIO_D(0x6120c, D_ALL); + MMIO_D(_MMIO(0x61208), D_ALL); + MMIO_D(_MMIO(0x6120c), D_ALL); MMIO_D(PCH_PP_ON_DELAYS, D_ALL); MMIO_D(PCH_PP_OFF_DELAYS, D_ALL); - MMIO_DH(0xe651c, D_ALL, dpy_reg_mmio_read, NULL); - MMIO_DH(0xe661c, D_ALL, dpy_reg_mmio_read, NULL); - MMIO_DH(0xe671c, D_ALL, dpy_reg_mmio_read, NULL); - MMIO_DH(0xe681c, D_ALL, dpy_reg_mmio_read, NULL); - MMIO_DH(0xe6c04, D_ALL, dpy_reg_mmio_read, NULL); - MMIO_DH(0xe6e1c, D_ALL, dpy_reg_mmio_read, NULL); + MMIO_DH(_MMIO(0xe651c), D_ALL, dpy_reg_mmio_read, NULL); + MMIO_DH(_MMIO(0xe661c), D_ALL, dpy_reg_mmio_read, NULL); + MMIO_DH(_MMIO(0xe671c), D_ALL, dpy_reg_mmio_read, NULL); + MMIO_DH(_MMIO(0xe681c), D_ALL, dpy_reg_mmio_read, NULL); + MMIO_DH(_MMIO(0xe6c04), D_ALL, dpy_reg_mmio_read, NULL); + MMIO_DH(_MMIO(0xe6e1c), D_ALL, dpy_reg_mmio_read, NULL); MMIO_RO(PCH_PORT_HOTPLUG, D_ALL, 0, PORTA_HOTPLUG_STATUS_MASK @@ -2037,11 +2071,11 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(SOUTH_CHICKEN1, D_ALL); MMIO_DH(SOUTH_CHICKEN2, D_ALL, NULL, south_chicken2_mmio_write); - MMIO_D(_TRANSA_CHICKEN1, D_ALL); - MMIO_D(_TRANSB_CHICKEN1, D_ALL); + MMIO_D(_MMIO(_TRANSA_CHICKEN1), D_ALL); + MMIO_D(_MMIO(_TRANSB_CHICKEN1), D_ALL); MMIO_D(SOUTH_DSPCLK_GATE_D, D_ALL); - MMIO_D(_TRANSA_CHICKEN2, D_ALL); - MMIO_D(_TRANSB_CHICKEN2, D_ALL); + MMIO_D(_MMIO(_TRANSA_CHICKEN2), D_ALL); + MMIO_D(_MMIO(_TRANSB_CHICKEN2), D_ALL); MMIO_D(ILK_DPFC_CB_BASE, D_ALL); MMIO_D(ILK_DPFC_CONTROL, D_ALL); @@ -2107,24 +2141,24 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(PREC_PAL_DATA(PIPE_C), D_ALL); MMIO_F(PREC_PAL_GC_MAX(PIPE_C, 0), 4 * 3, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_D(0x60110, D_ALL); - MMIO_D(0x61110, D_ALL); - MMIO_F(0x70400, 0x40, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x71400, 0x40, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x72400, 0x40, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x70440, 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL); - MMIO_F(0x71440, 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL); - MMIO_F(0x72440, 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL); - MMIO_F(0x7044c, 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL); - MMIO_F(0x7144c, 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL); - MMIO_F(0x7244c, 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL); + MMIO_D(_MMIO(0x60110), D_ALL); + MMIO_D(_MMIO(0x61110), D_ALL); + MMIO_F(_MMIO(0x70400), 0x40, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0x71400), 0x40, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0x72400), 0x40, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0x70440), 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL); + MMIO_F(_MMIO(0x71440), 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL); + MMIO_F(_MMIO(0x72440), 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL); + MMIO_F(_MMIO(0x7044c), 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL); + MMIO_F(_MMIO(0x7144c), 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL); + MMIO_F(_MMIO(0x7244c), 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL); MMIO_D(PIPE_WM_LINETIME(PIPE_A), D_ALL); MMIO_D(PIPE_WM_LINETIME(PIPE_B), D_ALL); MMIO_D(PIPE_WM_LINETIME(PIPE_C), D_ALL); MMIO_D(SPLL_CTL, D_ALL); - MMIO_D(_WRPLL_CTL1, D_ALL); - MMIO_D(_WRPLL_CTL2, D_ALL); + MMIO_D(_MMIO(_WRPLL_CTL1), D_ALL); + MMIO_D(_MMIO(_WRPLL_CTL2), D_ALL); MMIO_D(PORT_CLK_SEL(PORT_A), D_ALL); MMIO_D(PORT_CLK_SEL(PORT_B), D_ALL); MMIO_D(PORT_CLK_SEL(PORT_C), D_ALL); @@ -2135,15 +2169,15 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(TRANS_CLK_SEL(TRANSCODER_C), D_ALL); MMIO_D(HSW_NDE_RSTWRN_OPT, D_ALL); - MMIO_D(0x46508, D_ALL); + MMIO_D(_MMIO(0x46508), D_ALL); - MMIO_D(0x49080, D_ALL); - MMIO_D(0x49180, D_ALL); - MMIO_D(0x49280, D_ALL); + MMIO_D(_MMIO(0x49080), D_ALL); + MMIO_D(_MMIO(0x49180), D_ALL); + MMIO_D(_MMIO(0x49280), D_ALL); - MMIO_F(0x49090, 0x14, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x49190, 0x14, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x49290, 0x14, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0x49090), 0x14, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0x49190), 0x14, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0x49290), 0x14, 0, 0, 0, D_ALL, NULL, NULL); MMIO_D(GAMMA_MODE(PIPE_A), D_ALL); MMIO_D(GAMMA_MODE(PIPE_B), D_ALL); @@ -2163,7 +2197,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DH(SBI_CTL_STAT, D_ALL, NULL, sbi_ctl_mmio_write); MMIO_D(PIXCLK_GATE, D_ALL); - MMIO_F(_DPA_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_ALL, NULL, + MMIO_F(_MMIO(_DPA_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_ALL, NULL, dp_aux_ch_ctl_mmio_write); MMIO_DH(DDI_BUF_CTL(PORT_A), D_ALL, NULL, ddi_buf_ctl_mmio_write); @@ -2184,24 +2218,24 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DH(DP_TP_STATUS(PORT_D), D_ALL, NULL, dp_tp_status_mmio_write); MMIO_DH(DP_TP_STATUS(PORT_E), D_ALL, NULL, NULL); - MMIO_F(_DDI_BUF_TRANS_A, 0x50, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x64e60, 0x50, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x64eC0, 0x50, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x64f20, 0x50, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x64f80, 0x50, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(_DDI_BUF_TRANS_A), 0x50, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0x64e60), 0x50, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0x64eC0), 0x50, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0x64f20), 0x50, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0x64f80), 0x50, 0, 0, 0, D_ALL, NULL, NULL); MMIO_D(HSW_AUD_CFG(PIPE_A), D_ALL); MMIO_D(HSW_AUD_PIN_ELD_CP_VLD, D_ALL); - MMIO_DH(_TRANS_DDI_FUNC_CTL_A, D_ALL, NULL, NULL); - MMIO_DH(_TRANS_DDI_FUNC_CTL_B, D_ALL, NULL, NULL); - MMIO_DH(_TRANS_DDI_FUNC_CTL_C, D_ALL, NULL, NULL); - MMIO_DH(_TRANS_DDI_FUNC_CTL_EDP, D_ALL, NULL, NULL); + MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_A), D_ALL, NULL, NULL); + MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_B), D_ALL, NULL, NULL); + MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_C), D_ALL, NULL, NULL); + MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_EDP), D_ALL, NULL, NULL); - MMIO_D(_TRANSA_MSA_MISC, D_ALL); - MMIO_D(_TRANSB_MSA_MISC, D_ALL); - MMIO_D(_TRANSC_MSA_MISC, D_ALL); - MMIO_D(_TRANS_EDP_MSA_MISC, D_ALL); + MMIO_D(_MMIO(_TRANSA_MSA_MISC), D_ALL); + MMIO_D(_MMIO(_TRANSB_MSA_MISC), D_ALL); + MMIO_D(_MMIO(_TRANSC_MSA_MISC), D_ALL); + MMIO_D(_MMIO(_TRANS_EDP_MSA_MISC), D_ALL); MMIO_DH(FORCEWAKE, D_ALL, NULL, NULL); MMIO_D(FORCEWAKE_ACK, D_ALL); @@ -2267,101 +2301,101 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(GEN6_UCGCTL1, D_ALL); MMIO_D(GEN6_UCGCTL2, D_ALL); - MMIO_F(0x4f000, 0x90, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0x4f000), 0x90, 0, 0, 0, D_ALL, NULL, NULL); MMIO_D(GEN6_PCODE_DATA, D_ALL); - MMIO_D(0x13812c, D_ALL); + MMIO_D(_MMIO(0x13812c), D_ALL); MMIO_DH(GEN7_ERR_INT, D_ALL, NULL, NULL); MMIO_D(HSW_EDRAM_CAP, D_ALL); MMIO_D(HSW_IDICR, D_ALL); MMIO_DH(GFX_FLSH_CNTL_GEN6, D_ALL, NULL, NULL); - MMIO_D(0x3c, D_ALL); - MMIO_D(0x860, D_ALL); + MMIO_D(_MMIO(0x3c), D_ALL); + MMIO_D(_MMIO(0x860), D_ALL); MMIO_D(ECOSKPD, D_ALL); - MMIO_D(0x121d0, D_ALL); + MMIO_D(_MMIO(0x121d0), D_ALL); MMIO_D(GEN6_BLITTER_ECOSKPD, D_ALL); - MMIO_D(0x41d0, D_ALL); + MMIO_D(_MMIO(0x41d0), D_ALL); MMIO_D(GAC_ECO_BITS, D_ALL); - MMIO_D(0x6200, D_ALL); - MMIO_D(0x6204, D_ALL); - MMIO_D(0x6208, D_ALL); - MMIO_D(0x7118, D_ALL); - MMIO_D(0x7180, D_ALL); - MMIO_D(0x7408, D_ALL); - MMIO_D(0x7c00, D_ALL); + MMIO_D(_MMIO(0x6200), D_ALL); + MMIO_D(_MMIO(0x6204), D_ALL); + MMIO_D(_MMIO(0x6208), D_ALL); + MMIO_D(_MMIO(0x7118), D_ALL); + MMIO_D(_MMIO(0x7180), D_ALL); + MMIO_D(_MMIO(0x7408), D_ALL); + MMIO_D(_MMIO(0x7c00), D_ALL); MMIO_DH(GEN6_MBCTL, D_ALL, NULL, mbctl_write); - MMIO_D(0x911c, D_ALL); - MMIO_D(0x9120, D_ALL); + MMIO_D(_MMIO(0x911c), D_ALL); + MMIO_D(_MMIO(0x9120), D_ALL); MMIO_DFH(GEN7_UCGCTL4, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_D(GAB_CTL, D_ALL); - MMIO_D(0x48800, D_ALL); - MMIO_D(0xce044, D_ALL); - MMIO_D(0xe6500, D_ALL); - MMIO_D(0xe6504, D_ALL); - MMIO_D(0xe6600, D_ALL); - MMIO_D(0xe6604, D_ALL); - MMIO_D(0xe6700, D_ALL); - MMIO_D(0xe6704, D_ALL); - MMIO_D(0xe6800, D_ALL); - MMIO_D(0xe6804, D_ALL); + MMIO_D(_MMIO(0x48800), D_ALL); + MMIO_D(_MMIO(0xce044), D_ALL); + MMIO_D(_MMIO(0xe6500), D_ALL); + MMIO_D(_MMIO(0xe6504), D_ALL); + MMIO_D(_MMIO(0xe6600), D_ALL); + MMIO_D(_MMIO(0xe6604), D_ALL); + MMIO_D(_MMIO(0xe6700), D_ALL); + MMIO_D(_MMIO(0xe6704), D_ALL); + MMIO_D(_MMIO(0xe6800), D_ALL); + MMIO_D(_MMIO(0xe6804), D_ALL); MMIO_D(PCH_GMBUS4, D_ALL); MMIO_D(PCH_GMBUS5, D_ALL); - MMIO_D(0x902c, D_ALL); - MMIO_D(0xec008, D_ALL); - MMIO_D(0xec00c, D_ALL); - MMIO_D(0xec008 + 0x18, D_ALL); - MMIO_D(0xec00c + 0x18, D_ALL); - MMIO_D(0xec008 + 0x18 * 2, D_ALL); - MMIO_D(0xec00c + 0x18 * 2, D_ALL); - MMIO_D(0xec008 + 0x18 * 3, D_ALL); - MMIO_D(0xec00c + 0x18 * 3, D_ALL); - MMIO_D(0xec408, D_ALL); - MMIO_D(0xec40c, D_ALL); - MMIO_D(0xec408 + 0x18, D_ALL); - MMIO_D(0xec40c + 0x18, D_ALL); - MMIO_D(0xec408 + 0x18 * 2, D_ALL); - MMIO_D(0xec40c + 0x18 * 2, D_ALL); - MMIO_D(0xec408 + 0x18 * 3, D_ALL); - MMIO_D(0xec40c + 0x18 * 3, D_ALL); - MMIO_D(0xfc810, D_ALL); - MMIO_D(0xfc81c, D_ALL); - MMIO_D(0xfc828, D_ALL); - MMIO_D(0xfc834, D_ALL); - MMIO_D(0xfcc00, D_ALL); - MMIO_D(0xfcc0c, D_ALL); - MMIO_D(0xfcc18, D_ALL); - MMIO_D(0xfcc24, D_ALL); - MMIO_D(0xfd000, D_ALL); - MMIO_D(0xfd00c, D_ALL); - MMIO_D(0xfd018, D_ALL); - MMIO_D(0xfd024, D_ALL); - MMIO_D(0xfd034, D_ALL); + MMIO_D(_MMIO(0x902c), D_ALL); + MMIO_D(_MMIO(0xec008), D_ALL); + MMIO_D(_MMIO(0xec00c), D_ALL); + MMIO_D(_MMIO(0xec008 + 0x18), D_ALL); + MMIO_D(_MMIO(0xec00c + 0x18), D_ALL); + MMIO_D(_MMIO(0xec008 + 0x18 * 2), D_ALL); + MMIO_D(_MMIO(0xec00c + 0x18 * 2), D_ALL); + MMIO_D(_MMIO(0xec008 + 0x18 * 3), D_ALL); + MMIO_D(_MMIO(0xec00c + 0x18 * 3), D_ALL); + MMIO_D(_MMIO(0xec408), D_ALL); + MMIO_D(_MMIO(0xec40c), D_ALL); + MMIO_D(_MMIO(0xec408 + 0x18), D_ALL); + MMIO_D(_MMIO(0xec40c + 0x18), D_ALL); + MMIO_D(_MMIO(0xec408 + 0x18 * 2), D_ALL); + MMIO_D(_MMIO(0xec40c + 0x18 * 2), D_ALL); + MMIO_D(_MMIO(0xec408 + 0x18 * 3), D_ALL); + MMIO_D(_MMIO(0xec40c + 0x18 * 3), D_ALL); + MMIO_D(_MMIO(0xfc810), D_ALL); + MMIO_D(_MMIO(0xfc81c), D_ALL); + MMIO_D(_MMIO(0xfc828), D_ALL); + MMIO_D(_MMIO(0xfc834), D_ALL); + MMIO_D(_MMIO(0xfcc00), D_ALL); + MMIO_D(_MMIO(0xfcc0c), D_ALL); + MMIO_D(_MMIO(0xfcc18), D_ALL); + MMIO_D(_MMIO(0xfcc24), D_ALL); + MMIO_D(_MMIO(0xfd000), D_ALL); + MMIO_D(_MMIO(0xfd00c), D_ALL); + MMIO_D(_MMIO(0xfd018), D_ALL); + MMIO_D(_MMIO(0xfd024), D_ALL); + MMIO_D(_MMIO(0xfd034), D_ALL); MMIO_DH(FPGA_DBG, D_ALL, NULL, fpga_dbg_mmio_write); - MMIO_D(0x2054, D_ALL); - MMIO_D(0x12054, D_ALL); - MMIO_D(0x22054, D_ALL); - MMIO_D(0x1a054, D_ALL); - - MMIO_D(0x44070, D_ALL); - MMIO_DFH(0x215c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x2178, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x217c, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x12178, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x1217c, D_ALL, F_CMD_ACCESS, NULL, NULL); - - MMIO_F(0x2290, 8, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, NULL, NULL); - MMIO_D(0x2b00, D_BDW_PLUS); - MMIO_D(0x2360, D_BDW_PLUS); - MMIO_F(0x5200, 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x5240, 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x5280, 16, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); - - MMIO_DFH(0x1c17c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x1c178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_D(_MMIO(0x2054), D_ALL); + MMIO_D(_MMIO(0x12054), D_ALL); + MMIO_D(_MMIO(0x22054), D_ALL); + MMIO_D(_MMIO(0x1a054), D_ALL); + + MMIO_D(_MMIO(0x44070), D_ALL); + MMIO_DFH(_MMIO(0x215c), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x2178), D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x217c), D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x12178), D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x1217c), D_ALL, F_CMD_ACCESS, NULL, NULL); + + MMIO_F(_MMIO(0x2290), 8, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, NULL, NULL); + MMIO_D(_MMIO(0x2b00), D_BDW_PLUS); + MMIO_D(_MMIO(0x2360), D_BDW_PLUS); + MMIO_F(_MMIO(0x5200), 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0x5240), 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(_MMIO(0x5280), 16, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + + MMIO_DFH(_MMIO(0x1c17c), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x1c178), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(BCS_SWCTRL, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_F(HS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); @@ -2375,24 +2409,24 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_F(CL_PRIMITIVES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); MMIO_F(PS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); MMIO_F(PS_DEPTH_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); - MMIO_DH(0x4260, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); - MMIO_DH(0x4264, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); - MMIO_DH(0x4268, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); - MMIO_DH(0x426c, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); - MMIO_DH(0x4270, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); - MMIO_DFH(0x4094, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DH(_MMIO(0x4260), D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); + MMIO_DH(_MMIO(0x4264), D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); + MMIO_DH(_MMIO(0x4268), D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); + MMIO_DH(_MMIO(0x426c), D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); + MMIO_DH(_MMIO(0x4270), D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); + MMIO_DFH(_MMIO(0x4094), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(ARB_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_RING_GM_RDR(RING_BBADDR, D_ALL, NULL, NULL); - MMIO_DFH(0x2220, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x12220, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x22220, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x2220), D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x12220), D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x22220), D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_RING_DFH(RING_SYNC_1, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_RING_DFH(RING_SYNC_0, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x22178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x1a178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x1a17c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x2217c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x22178), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x1a178), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x1a17c), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x2217c), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); return 0; } @@ -2466,40 +2500,40 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_RING_DFH(RING_ACTHD_UDW, D_BDW_PLUS, F_CMD_ACCESS, mmio_read_from_hw, NULL); -#define RING_REG(base) (base + 0xd0) +#define RING_REG(base) _MMIO((base) + 0xd0) MMIO_RING_F(RING_REG, 4, F_RO, 0, ~_MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET), D_BDW_PLUS, NULL, ring_reset_ctl_write); #undef RING_REG -#define RING_REG(base) (base + 0x230) +#define RING_REG(base) _MMIO((base) + 0x230) MMIO_RING_DFH(RING_REG, D_BDW_PLUS, 0, NULL, elsp_mmio_write); #undef RING_REG -#define RING_REG(base) (base + 0x234) +#define RING_REG(base) _MMIO((base) + 0x234) MMIO_RING_F(RING_REG, 8, F_RO | F_CMD_ACCESS, 0, ~0, D_BDW_PLUS, NULL, NULL); #undef RING_REG -#define RING_REG(base) (base + 0x244) +#define RING_REG(base) _MMIO((base) + 0x244) MMIO_RING_DFH(RING_REG, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); #undef RING_REG -#define RING_REG(base) (base + 0x370) +#define RING_REG(base) _MMIO((base) + 0x370) MMIO_RING_F(RING_REG, 48, F_RO, 0, ~0, D_BDW_PLUS, NULL, NULL); #undef RING_REG -#define RING_REG(base) (base + 0x3a0) +#define RING_REG(base) _MMIO((base) + 0x3a0) MMIO_RING_DFH(RING_REG, D_BDW_PLUS, F_MODE_MASK, NULL, NULL); #undef RING_REG MMIO_D(PIPEMISC(PIPE_A), D_BDW_PLUS); MMIO_D(PIPEMISC(PIPE_B), D_BDW_PLUS); MMIO_D(PIPEMISC(PIPE_C), D_BDW_PLUS); - MMIO_D(0x1c1d0, D_BDW_PLUS); + MMIO_D(_MMIO(0x1c1d0), D_BDW_PLUS); MMIO_D(GEN6_MBCUNIT_SNPCR, D_BDW_PLUS); MMIO_D(GEN7_MISCCPCTL, D_BDW_PLUS); - MMIO_D(0x1c054, D_BDW_PLUS); + MMIO_D(_MMIO(0x1c054), D_BDW_PLUS); MMIO_DH(GEN6_PCODE_MAILBOX, D_BDW_PLUS, NULL, mailbox_write); @@ -2508,11 +2542,11 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(GAMTARBMODE, D_BDW_PLUS); -#define RING_REG(base) (base + 0x270) +#define RING_REG(base) _MMIO((base) + 0x270) MMIO_RING_F(RING_REG, 32, 0, 0, 0, D_BDW_PLUS, NULL, NULL); #undef RING_REG - MMIO_RING_GM_RDR(RING_HWS_PGA, D_BDW_PLUS, NULL, NULL); + MMIO_RING_GM_RDR(RING_HWS_PGA, D_BDW_PLUS, NULL, hws_pga_write); MMIO_DFH(HDC_CHICKEN0, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); @@ -2521,10 +2555,10 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(CHICKEN_PIPESL_1(PIPE_C), D_BDW_PLUS); MMIO_D(WM_MISC, D_BDW); - MMIO_D(BDW_EDP_PSR_BASE, D_BDW); + MMIO_D(_MMIO(BDW_EDP_PSR_BASE), D_BDW); - MMIO_D(0x66c00, D_BDW_PLUS); - MMIO_D(0x66c04, D_BDW_PLUS); + MMIO_D(_MMIO(0x66c00), D_BDW_PLUS); + MMIO_D(_MMIO(0x66c04), D_BDW_PLUS); MMIO_D(HSW_GTT_CACHE_EN, D_BDW_PLUS); @@ -2532,55 +2566,54 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(GEN8_EU_DISABLE1, D_BDW_PLUS); MMIO_D(GEN8_EU_DISABLE2, D_BDW_PLUS); - MMIO_D(0xfdc, D_BDW_PLUS); + MMIO_D(_MMIO(0xfdc), D_BDW_PLUS); MMIO_DFH(GEN8_ROW_CHICKEN, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN7_ROW_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN8_UCGCTL6, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xb1f0, D_BDW, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xb1c0, D_BDW, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0xb1f0), D_BDW, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0xb1c0), D_BDW, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN8_L3SQCREG4, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xb100, D_BDW, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xb10c, D_BDW, F_CMD_ACCESS, NULL, NULL); - MMIO_D(0xb110, D_BDW); + MMIO_DFH(_MMIO(0xb100), D_BDW, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0xb10c), D_BDW, F_CMD_ACCESS, NULL, NULL); + MMIO_D(_MMIO(0xb110), D_BDW); - MMIO_F(0x24d0, 48, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, + MMIO_F(_MMIO(0x24d0), 48, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, NULL, force_nonpriv_write); - MMIO_D(0x44484, D_BDW_PLUS); - MMIO_D(0x4448c, D_BDW_PLUS); + MMIO_D(_MMIO(0x44484), D_BDW_PLUS); + MMIO_D(_MMIO(0x4448c), D_BDW_PLUS); - MMIO_DFH(0x83a4, D_BDW, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x83a4), D_BDW, F_CMD_ACCESS, NULL, NULL); MMIO_D(GEN8_L3_LRA_1_GPGPU, D_BDW_PLUS); - MMIO_DFH(0x8430, D_BDW, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x8430), D_BDW, F_CMD_ACCESS, NULL, NULL); - MMIO_D(0x110000, D_BDW_PLUS); + MMIO_D(_MMIO(0x110000), D_BDW_PLUS); - MMIO_D(0x48400, D_BDW_PLUS); + MMIO_D(_MMIO(0x48400), D_BDW_PLUS); - MMIO_D(0x6e570, D_BDW_PLUS); - MMIO_D(0x65f10, D_BDW_PLUS); + MMIO_D(_MMIO(0x6e570), D_BDW_PLUS); + MMIO_D(_MMIO(0x65f10), D_BDW_PLUS); - MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, - skl_misc_ctl_write); - MMIO_DFH(0xe188, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0xe194), D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0xe188), D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(HALF_SLICE_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x2580, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - - MMIO_DFH(0x2248, D_BDW, F_CMD_ACCESS, NULL, NULL); - - MMIO_DFH(0xe220, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xe230, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xe240, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xe260, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xe270, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xe280, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xe2a0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xe2b0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xe2c0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x2580), D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + + MMIO_DFH(_MMIO(0x2248), D_BDW, F_CMD_ACCESS, NULL, NULL); + + MMIO_DFH(_MMIO(0xe220), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0xe230), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0xe240), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0xe260), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0xe270), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0xe280), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0xe2a0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0xe2b0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0xe2c0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); return 0; } @@ -2596,11 +2629,11 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_DH(FORCEWAKE_MEDIA_GEN9, D_SKL_PLUS, NULL, mul_force_wake_write); MMIO_DH(FORCEWAKE_ACK_MEDIA_GEN9, D_SKL_PLUS, NULL, NULL); - MMIO_F(_DPB_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, + MMIO_F(_MMIO(_DPB_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, dp_aux_ch_ctl_mmio_write); - MMIO_F(_DPC_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, + MMIO_F(_MMIO(_DPC_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, dp_aux_ch_ctl_mmio_write); - MMIO_F(_DPD_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, + MMIO_F(_MMIO(_DPD_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL, dp_aux_ch_ctl_mmio_write); /* @@ -2611,26 +2644,26 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_DH(HSW_PWR_WELL_CTL_DRIVER(SKL_DISP_PW_MISC_IO), D_SKL_PLUS, NULL, skl_power_well_ctl_write); - MMIO_D(0xa210, D_SKL_PLUS); + MMIO_D(_MMIO(0xa210), D_SKL_PLUS); MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_D(GEN9_RENDER_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_DFH(GEN9_GAMT_ECO_REG_RW_IA, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DH(0x4ddc, D_SKL_PLUS, NULL, skl_misc_ctl_write); - MMIO_DH(0x42080, D_SKL_PLUS, NULL, skl_misc_ctl_write); - MMIO_D(0x45504, D_SKL_PLUS); - MMIO_D(0x45520, D_SKL_PLUS); - MMIO_D(0x46000, D_SKL_PLUS); - MMIO_DH(0x46010, D_SKL | D_KBL, NULL, skl_lcpll_write); - MMIO_DH(0x46014, D_SKL | D_KBL, NULL, skl_lcpll_write); - MMIO_D(0x6C040, D_SKL | D_KBL); - MMIO_D(0x6C048, D_SKL | D_KBL); - MMIO_D(0x6C050, D_SKL | D_KBL); - MMIO_D(0x6C044, D_SKL | D_KBL); - MMIO_D(0x6C04C, D_SKL | D_KBL); - MMIO_D(0x6C054, D_SKL | D_KBL); - MMIO_D(0x6c058, D_SKL | D_KBL); - MMIO_D(0x6c05c, D_SKL | D_KBL); - MMIO_DH(0X6c060, D_SKL | D_KBL, dpll_status_read, NULL); + MMIO_DH(_MMIO(0x4ddc), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(0x42080), D_SKL_PLUS, NULL, NULL); + MMIO_D(_MMIO(0x45504), D_SKL_PLUS); + MMIO_D(_MMIO(0x45520), D_SKL_PLUS); + MMIO_D(_MMIO(0x46000), D_SKL_PLUS); + MMIO_DH(_MMIO(0x46010), D_SKL | D_KBL, NULL, skl_lcpll_write); + MMIO_DH(_MMIO(0x46014), D_SKL | D_KBL, NULL, skl_lcpll_write); + MMIO_D(_MMIO(0x6C040), D_SKL | D_KBL); + MMIO_D(_MMIO(0x6C048), D_SKL | D_KBL); + MMIO_D(_MMIO(0x6C050), D_SKL | D_KBL); + MMIO_D(_MMIO(0x6C044), D_SKL | D_KBL); + MMIO_D(_MMIO(0x6C04C), D_SKL | D_KBL); + MMIO_D(_MMIO(0x6C054), D_SKL | D_KBL); + MMIO_D(_MMIO(0x6c058), D_SKL | D_KBL); + MMIO_D(_MMIO(0x6c05c), D_SKL | D_KBL); + MMIO_DH(_MMIO(0x6c060), D_SKL | D_KBL, dpll_status_read, NULL); MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 0), D_SKL_PLUS, NULL, pf_write); MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 1), D_SKL_PLUS, NULL, pf_write); @@ -2719,105 +2752,108 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 2), D_SKL_PLUS, NULL, NULL); MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 3), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_A, 1), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_A, 2), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_A, 3), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_A, 4), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C0(PIPE_A, 1)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C0(PIPE_A, 2)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C0(PIPE_A, 3)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C0(PIPE_A, 4)), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_B, 1), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_B, 2), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_B, 3), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_B, 4), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C0(PIPE_B, 1)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C0(PIPE_B, 2)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C0(PIPE_B, 3)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C0(PIPE_B, 4)), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_C, 1), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_C, 2), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_C, 3), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C0(PIPE_C, 4), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C0(PIPE_C, 1)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C0(PIPE_C, 2)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C0(PIPE_C, 3)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C0(PIPE_C, 4)), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_A, 1), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_A, 2), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_A, 3), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_A, 4), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C4(PIPE_A, 1)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C4(PIPE_A, 2)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C4(PIPE_A, 3)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C4(PIPE_A, 4)), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_B, 1), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_B, 2), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_B, 3), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_B, 4), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C4(PIPE_B, 1)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C4(PIPE_B, 2)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C4(PIPE_B, 3)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C4(PIPE_B, 4)), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_C, 1), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_C, 2), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_C, 3), D_SKL_PLUS, NULL, NULL); - MMIO_DH(_REG_701C4(PIPE_C, 4), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C4(PIPE_C, 1)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C4(PIPE_C, 2)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C4(PIPE_C, 3)), D_SKL_PLUS, NULL, NULL); + MMIO_DH(_MMIO(_REG_701C4(PIPE_C, 4)), D_SKL_PLUS, NULL, NULL); - MMIO_D(0x70380, D_SKL_PLUS); - MMIO_D(0x71380, D_SKL_PLUS); - MMIO_D(0x72380, D_SKL_PLUS); - MMIO_D(0x7039c, D_SKL_PLUS); + MMIO_D(_MMIO(0x70380), D_SKL_PLUS); + MMIO_D(_MMIO(0x71380), D_SKL_PLUS); + MMIO_D(_MMIO(0x72380), D_SKL_PLUS); + MMIO_D(_MMIO(0x7039c), D_SKL_PLUS); - MMIO_D(0x8f074, D_SKL | D_KBL); - MMIO_D(0x8f004, D_SKL | D_KBL); - MMIO_D(0x8f034, D_SKL | D_KBL); + MMIO_D(_MMIO(0x8f074), D_SKL | D_KBL); + MMIO_D(_MMIO(0x8f004), D_SKL | D_KBL); + MMIO_D(_MMIO(0x8f034), D_SKL | D_KBL); - MMIO_D(0xb11c, D_SKL | D_KBL); + MMIO_D(_MMIO(0xb11c), D_SKL | D_KBL); - MMIO_D(0x51000, D_SKL | D_KBL); - MMIO_D(0x6c00c, D_SKL_PLUS); + MMIO_D(_MMIO(0x51000), D_SKL | D_KBL); + MMIO_D(_MMIO(0x6c00c), D_SKL_PLUS); - MMIO_F(0xc800, 0x7f8, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL); - MMIO_F(0xb020, 0x80, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL); + MMIO_F(_MMIO(0xc800), 0x7f8, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL); + MMIO_F(_MMIO(0xb020), 0x80, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL); - MMIO_D(0xd08, D_SKL_PLUS); - MMIO_DFH(0x20e0, D_SKL_PLUS, F_MODE_MASK, NULL, NULL); - MMIO_DFH(0x20ec, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_D(_MMIO(0xd08), D_SKL_PLUS); + MMIO_DFH(_MMIO(0x20e0), D_SKL_PLUS, F_MODE_MASK, NULL, NULL); + MMIO_DFH(_MMIO(0x20ec), D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); /* TRTT */ - MMIO_DFH(0x4de0, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x4de4, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x4de8, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x4dec, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x4df0, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x4df4, D_SKL | D_KBL, F_CMD_ACCESS, NULL, gen9_trtte_write); - MMIO_DH(0x4dfc, D_SKL | D_KBL, NULL, gen9_trtt_chicken_write); + MMIO_DFH(_MMIO(0x4de0), D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x4de4), D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x4de8), D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x4dec), D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x4df0), D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x4df4), D_SKL | D_KBL, F_CMD_ACCESS, NULL, gen9_trtte_write); + MMIO_DH(_MMIO(0x4dfc), D_SKL | D_KBL, NULL, gen9_trtt_chicken_write); - MMIO_D(0x45008, D_SKL | D_KBL); + MMIO_D(_MMIO(0x45008), D_SKL | D_KBL); - MMIO_D(0x46430, D_SKL | D_KBL); + MMIO_D(_MMIO(0x46430), D_SKL | D_KBL); - MMIO_D(0x46520, D_SKL | D_KBL); + MMIO_D(_MMIO(0x46520), D_SKL | D_KBL); - MMIO_D(0xc403c, D_SKL | D_KBL); - MMIO_D(0xb004, D_SKL_PLUS); + MMIO_D(_MMIO(0xc403c), D_SKL | D_KBL); + MMIO_D(_MMIO(0xb004), D_SKL_PLUS); MMIO_DH(DMA_CTRL, D_SKL_PLUS, NULL, dma_ctrl_write); - MMIO_D(0x65900, D_SKL_PLUS); - MMIO_D(0x1082c0, D_SKL | D_KBL); - MMIO_D(0x4068, D_SKL | D_KBL); - MMIO_D(0x67054, D_SKL | D_KBL); - MMIO_D(0x6e560, D_SKL | D_KBL); - MMIO_D(0x6e554, D_SKL | D_KBL); - MMIO_D(0x2b20, D_SKL | D_KBL); - MMIO_D(0x65f00, D_SKL | D_KBL); - MMIO_D(0x65f08, D_SKL | D_KBL); - MMIO_D(0x320f0, D_SKL | D_KBL); - - MMIO_D(0x70034, D_SKL_PLUS); - MMIO_D(0x71034, D_SKL_PLUS); - MMIO_D(0x72034, D_SKL_PLUS); - - MMIO_D(_PLANE_KEYVAL_1(PIPE_A), D_SKL_PLUS); - MMIO_D(_PLANE_KEYVAL_1(PIPE_B), D_SKL_PLUS); - MMIO_D(_PLANE_KEYVAL_1(PIPE_C), D_SKL_PLUS); - MMIO_D(_PLANE_KEYMSK_1(PIPE_A), D_SKL_PLUS); - MMIO_D(_PLANE_KEYMSK_1(PIPE_B), D_SKL_PLUS); - MMIO_D(_PLANE_KEYMSK_1(PIPE_C), D_SKL_PLUS); - - MMIO_D(0x44500, D_SKL_PLUS); + MMIO_D(_MMIO(0x65900), D_SKL_PLUS); + MMIO_D(_MMIO(0x1082c0), D_SKL | D_KBL); + MMIO_D(_MMIO(0x4068), D_SKL | D_KBL); + MMIO_D(_MMIO(0x67054), D_SKL | D_KBL); + MMIO_D(_MMIO(0x6e560), D_SKL | D_KBL); + MMIO_D(_MMIO(0x6e554), D_SKL | D_KBL); + MMIO_D(_MMIO(0x2b20), D_SKL | D_KBL); + MMIO_D(_MMIO(0x65f00), D_SKL | D_KBL); + MMIO_D(_MMIO(0x65f08), D_SKL | D_KBL); + MMIO_D(_MMIO(0x320f0), D_SKL | D_KBL); + + MMIO_D(_MMIO(0x70034), D_SKL_PLUS); + MMIO_D(_MMIO(0x71034), D_SKL_PLUS); + MMIO_D(_MMIO(0x72034), D_SKL_PLUS); + + MMIO_D(_MMIO(_PLANE_KEYVAL_1(PIPE_A)), D_SKL_PLUS); + MMIO_D(_MMIO(_PLANE_KEYVAL_1(PIPE_B)), D_SKL_PLUS); + MMIO_D(_MMIO(_PLANE_KEYVAL_1(PIPE_C)), D_SKL_PLUS); + MMIO_D(_MMIO(_PLANE_KEYMAX_1(PIPE_A)), D_SKL_PLUS); + MMIO_D(_MMIO(_PLANE_KEYMAX_1(PIPE_B)), D_SKL_PLUS); + MMIO_D(_MMIO(_PLANE_KEYMAX_1(PIPE_C)), D_SKL_PLUS); + MMIO_D(_MMIO(_PLANE_KEYMSK_1(PIPE_A)), D_SKL_PLUS); + MMIO_D(_MMIO(_PLANE_KEYMSK_1(PIPE_B)), D_SKL_PLUS); + MMIO_D(_MMIO(_PLANE_KEYMSK_1(PIPE_C)), D_SKL_PLUS); + + MMIO_D(_MMIO(0x44500), D_SKL_PLUS); MMIO_DFH(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN8_HDC_CHICKEN1, D_SKL | D_KBL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_D(0x4ab8, D_KBL); - MMIO_D(0x2248, D_SKL_PLUS | D_KBL); + MMIO_D(_MMIO(0x4ab8), D_KBL); + MMIO_D(_MMIO(0x2248), D_SKL_PLUS | D_KBL); return 0; } @@ -2833,8 +2869,8 @@ static struct gvt_mmio_block *find_mmio_block(struct intel_gvt *gvt, for (i = 0; i < num; i++, block++) { if (!(device & block->device)) continue; - if (offset >= INTEL_GVT_MMIO_OFFSET(block->offset) && - offset < INTEL_GVT_MMIO_OFFSET(block->offset) + block->size) + if (offset >= i915_mmio_reg_offset(block->offset) && + offset < i915_mmio_reg_offset(block->offset) + block->size) return block; } return NULL; @@ -2914,14 +2950,46 @@ int intel_gvt_setup_mmio_info(struct intel_gvt *gvt) gvt->mmio.mmio_block = mmio_blocks; gvt->mmio.num_mmio_block = ARRAY_SIZE(mmio_blocks); - gvt_dbg_mmio("traced %u virtual mmio registers\n", - gvt->mmio.num_tracked_mmio); return 0; err: intel_gvt_clean_mmio_info(gvt); return ret; } +/** + * intel_gvt_for_each_tracked_mmio - iterate each tracked mmio + * @gvt: a GVT device + * @handler: the handler + * @data: private data given to handler + * + * Returns: + * Zero on success, negative error code if failed. + */ +int intel_gvt_for_each_tracked_mmio(struct intel_gvt *gvt, + int (*handler)(struct intel_gvt *gvt, u32 offset, void *data), + void *data) +{ + struct gvt_mmio_block *block = gvt->mmio.mmio_block; + struct intel_gvt_mmio_info *e; + int i, j, ret; + + hash_for_each(gvt->mmio.mmio_info_table, i, e, node) { + ret = handler(gvt, e->offset, data); + if (ret) + return ret; + } + + for (i = 0; i < gvt->mmio.num_mmio_block; i++, block++) { + for (j = 0; j < block->size; j += 4) { + ret = handler(gvt, + i915_mmio_reg_offset(block->offset) + j, + data); + if (ret) + return ret; + } + } + return 0; +} /** * intel_vgpu_default_mmio_read - default MMIO read handler diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index df7f33abd393..f8e77e166246 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -55,6 +55,10 @@ struct intel_gvt_mpt { unsigned long mfn, unsigned int nr, bool map); int (*set_trap_area)(unsigned long handle, u64 start, u64 end, bool map); + int (*set_opregion)(void *vgpu); + int (*get_vfio_device)(void *vgpu); + void (*put_vfio_device)(void *vgpu); + bool (*is_valid_gfn)(unsigned long handle, unsigned long gfn); }; extern struct intel_gvt_mpt xengt_mpt; diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 83e88c70272a..021f722e2481 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -53,11 +53,23 @@ static const struct intel_gvt_ops *intel_gvt_ops; #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) #define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) +#define OPREGION_SIGNATURE "IntelGraphicsMem" + +struct vfio_region; +struct intel_vgpu_regops { + size_t (*rw)(struct intel_vgpu *vgpu, char *buf, + size_t count, loff_t *ppos, bool iswrite); + void (*release)(struct intel_vgpu *vgpu, + struct vfio_region *region); +}; + struct vfio_region { u32 type; u32 subtype; size_t size; u32 flags; + const struct intel_vgpu_regops *ops; + void *data; }; struct kvmgt_pgfn { @@ -248,120 +260,6 @@ static void gvt_cache_destroy(struct intel_vgpu *vgpu) } } -static struct intel_vgpu_type *intel_gvt_find_vgpu_type(struct intel_gvt *gvt, - const char *name) -{ - int i; - struct intel_vgpu_type *t; - const char *driver_name = dev_driver_string( - &gvt->dev_priv->drm.pdev->dev); - - for (i = 0; i < gvt->num_types; i++) { - t = &gvt->types[i]; - if (!strncmp(t->name, name + strlen(driver_name) + 1, - sizeof(t->name))) - return t; - } - - return NULL; -} - -static ssize_t available_instances_show(struct kobject *kobj, - struct device *dev, char *buf) -{ - struct intel_vgpu_type *type; - unsigned int num = 0; - void *gvt = kdev_to_i915(dev)->gvt; - - type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj)); - if (!type) - num = 0; - else - num = type->avail_instance; - - return sprintf(buf, "%u\n", num); -} - -static ssize_t device_api_show(struct kobject *kobj, struct device *dev, - char *buf) -{ - return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING); -} - -static ssize_t description_show(struct kobject *kobj, struct device *dev, - char *buf) -{ - struct intel_vgpu_type *type; - void *gvt = kdev_to_i915(dev)->gvt; - - type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj)); - if (!type) - return 0; - - return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n" - "fence: %d\nresolution: %s\n" - "weight: %d\n", - BYTES_TO_MB(type->low_gm_size), - BYTES_TO_MB(type->high_gm_size), - type->fence, vgpu_edid_str(type->resolution), - type->weight); -} - -static MDEV_TYPE_ATTR_RO(available_instances); -static MDEV_TYPE_ATTR_RO(device_api); -static MDEV_TYPE_ATTR_RO(description); - -static struct attribute *type_attrs[] = { - &mdev_type_attr_available_instances.attr, - &mdev_type_attr_device_api.attr, - &mdev_type_attr_description.attr, - NULL, -}; - -static struct attribute_group *intel_vgpu_type_groups[] = { - [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL, -}; - -static bool intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt) -{ - int i, j; - struct intel_vgpu_type *type; - struct attribute_group *group; - - for (i = 0; i < gvt->num_types; i++) { - type = &gvt->types[i]; - - group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL); - if (WARN_ON(!group)) - goto unwind; - - group->name = type->name; - group->attrs = type_attrs; - intel_vgpu_type_groups[i] = group; - } - - return true; - -unwind: - for (j = 0; j < i; j++) { - group = intel_vgpu_type_groups[j]; - kfree(group); - } - - return false; -} - -static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt) -{ - int i; - struct attribute_group *group; - - for (i = 0; i < gvt->num_types; i++) { - group = intel_vgpu_type_groups[i]; - kfree(group); - } -} - static void kvmgt_protect_table_init(struct kvmgt_guest_info *info) { hash_init(info->ptable); @@ -430,6 +328,108 @@ static void kvmgt_protect_table_del(struct kvmgt_guest_info *info, } } +static size_t intel_vgpu_reg_rw_opregion(struct intel_vgpu *vgpu, char *buf, + size_t count, loff_t *ppos, bool iswrite) +{ + unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - + VFIO_PCI_NUM_REGIONS; + void *base = vgpu->vdev.region[i].data; + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; + + if (pos >= vgpu->vdev.region[i].size || iswrite) { + gvt_vgpu_err("invalid op or offset for Intel vgpu OpRegion\n"); + return -EINVAL; + } + count = min(count, (size_t)(vgpu->vdev.region[i].size - pos)); + memcpy(buf, base + pos, count); + + return count; +} + +static void intel_vgpu_reg_release_opregion(struct intel_vgpu *vgpu, + struct vfio_region *region) +{ +} + +static const struct intel_vgpu_regops intel_vgpu_regops_opregion = { + .rw = intel_vgpu_reg_rw_opregion, + .release = intel_vgpu_reg_release_opregion, +}; + +static int intel_vgpu_register_reg(struct intel_vgpu *vgpu, + unsigned int type, unsigned int subtype, + const struct intel_vgpu_regops *ops, + size_t size, u32 flags, void *data) +{ + struct vfio_region *region; + + region = krealloc(vgpu->vdev.region, + (vgpu->vdev.num_regions + 1) * sizeof(*region), + GFP_KERNEL); + if (!region) + return -ENOMEM; + + vgpu->vdev.region = region; + vgpu->vdev.region[vgpu->vdev.num_regions].type = type; + vgpu->vdev.region[vgpu->vdev.num_regions].subtype = subtype; + vgpu->vdev.region[vgpu->vdev.num_regions].ops = ops; + vgpu->vdev.region[vgpu->vdev.num_regions].size = size; + vgpu->vdev.region[vgpu->vdev.num_regions].flags = flags; + vgpu->vdev.region[vgpu->vdev.num_regions].data = data; + vgpu->vdev.num_regions++; + return 0; +} + +static int kvmgt_get_vfio_device(void *p_vgpu) +{ + struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; + + vgpu->vdev.vfio_device = vfio_device_get_from_dev( + mdev_dev(vgpu->vdev.mdev)); + if (!vgpu->vdev.vfio_device) { + gvt_vgpu_err("failed to get vfio device\n"); + return -ENODEV; + } + return 0; +} + + +static int kvmgt_set_opregion(void *p_vgpu) +{ + struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; + void *base; + int ret; + + /* Each vgpu has its own opregion, although VFIO would create another + * one later. This one is used to expose opregion to VFIO. And the + * other one created by VFIO later, is used by guest actually. + */ + base = vgpu_opregion(vgpu)->va; + if (!base) + return -ENOMEM; + + if (memcmp(base, OPREGION_SIGNATURE, 16)) { + memunmap(base); + return -EINVAL; + } + + ret = intel_vgpu_register_reg(vgpu, + PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE, + VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, + &intel_vgpu_regops_opregion, OPREGION_SIZE, + VFIO_REGION_INFO_FLAG_READ, base); + + return ret; +} + +static void kvmgt_put_vfio_device(void *vgpu) +{ + if (WARN_ON(!((struct intel_vgpu *)vgpu)->vdev.vfio_device)) + return; + + vfio_device_put(((struct intel_vgpu *)vgpu)->vdev.vfio_device); +} + static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev) { struct intel_vgpu *vgpu = NULL; @@ -441,7 +441,7 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev) pdev = mdev_parent_dev(mdev); gvt = kdev_to_i915(pdev)->gvt; - type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj)); + type = intel_gvt_ops->gvt_find_vgpu_type(gvt, kobject_name(kobj)); if (!type) { gvt_vgpu_err("failed to find type %s to create\n", kobject_name(kobj)); @@ -609,21 +609,20 @@ static void intel_vgpu_release_work(struct work_struct *work) __intel_vgpu_release(vgpu); } -static uint64_t intel_vgpu_get_bar0_addr(struct intel_vgpu *vgpu) +static uint64_t intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) { u32 start_lo, start_hi; u32 mem_type; - int pos = PCI_BASE_ADDRESS_0; - start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + pos)) & + start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) & PCI_BASE_ADDRESS_MEM_MASK; - mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + pos)) & + mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) & PCI_BASE_ADDRESS_MEM_TYPE_MASK; switch (mem_type) { case PCI_BASE_ADDRESS_MEM_TYPE_64: start_hi = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space - + pos + 4)); + + bar + 4)); break; case PCI_BASE_ADDRESS_MEM_TYPE_32: case PCI_BASE_ADDRESS_MEM_TYPE_1M: @@ -637,6 +636,54 @@ static uint64_t intel_vgpu_get_bar0_addr(struct intel_vgpu *vgpu) return ((u64)start_hi << 32) | start_lo; } +static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, uint64_t off, + void *buf, unsigned int count, bool is_write) +{ + uint64_t bar_start = intel_vgpu_get_bar_addr(vgpu, bar); + int ret; + + if (is_write) + ret = intel_gvt_ops->emulate_mmio_write(vgpu, + bar_start + off, buf, count); + else + ret = intel_gvt_ops->emulate_mmio_read(vgpu, + bar_start + off, buf, count); + return ret; +} + +static inline bool intel_vgpu_in_aperture(struct intel_vgpu *vgpu, uint64_t off) +{ + return off >= vgpu_aperture_offset(vgpu) && + off < vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu); +} + +static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, uint64_t off, + void *buf, unsigned long count, bool is_write) +{ + void *aperture_va; + + if (!intel_vgpu_in_aperture(vgpu, off) || + !intel_vgpu_in_aperture(vgpu, off + count)) { + gvt_vgpu_err("Invalid aperture offset %llu\n", off); + return -EINVAL; + } + + aperture_va = io_mapping_map_wc(&vgpu->gvt->dev_priv->ggtt.iomap, + ALIGN_DOWN(off, PAGE_SIZE), + count + offset_in_page(off)); + if (!aperture_va) + return -EIO; + + if (is_write) + memcpy(aperture_va + offset_in_page(off), buf, count); + else + memcpy(buf, aperture_va + offset_in_page(off), count); + + io_mapping_unmap(aperture_va); + + return 0; +} + static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf, size_t count, loff_t *ppos, bool is_write) { @@ -646,7 +693,7 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf, int ret = -EINVAL; - if (index >= VFIO_PCI_NUM_REGIONS) { + if (index >= VFIO_PCI_NUM_REGIONS + vgpu->vdev.num_regions) { gvt_vgpu_err("invalid index: %u\n", index); return -EINVAL; } @@ -661,32 +708,50 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf, buf, count); break; case VFIO_PCI_BAR0_REGION_INDEX: - case VFIO_PCI_BAR1_REGION_INDEX: - if (is_write) { - uint64_t bar0_start = intel_vgpu_get_bar0_addr(vgpu); - - ret = intel_gvt_ops->emulate_mmio_write(vgpu, - bar0_start + pos, buf, count); - } else { - uint64_t bar0_start = intel_vgpu_get_bar0_addr(vgpu); - - ret = intel_gvt_ops->emulate_mmio_read(vgpu, - bar0_start + pos, buf, count); - } + ret = intel_vgpu_bar_rw(vgpu, PCI_BASE_ADDRESS_0, pos, + buf, count, is_write); break; case VFIO_PCI_BAR2_REGION_INDEX: + ret = intel_vgpu_aperture_rw(vgpu, pos, buf, count, is_write); + break; + case VFIO_PCI_BAR1_REGION_INDEX: case VFIO_PCI_BAR3_REGION_INDEX: case VFIO_PCI_BAR4_REGION_INDEX: case VFIO_PCI_BAR5_REGION_INDEX: case VFIO_PCI_VGA_REGION_INDEX: case VFIO_PCI_ROM_REGION_INDEX: + break; default: - gvt_vgpu_err("unsupported region: %u\n", index); + if (index >= VFIO_PCI_NUM_REGIONS + vgpu->vdev.num_regions) + return -EINVAL; + + index -= VFIO_PCI_NUM_REGIONS; + return vgpu->vdev.region[index].ops->rw(vgpu, buf, count, + ppos, is_write); } return ret == 0 ? count : ret; } +static bool gtt_entry(struct mdev_device *mdev, loff_t *ppos) +{ + struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); + unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); + struct intel_gvt *gvt = vgpu->gvt; + int offset; + + /* Only allow MMIO GGTT entry access */ + if (index != PCI_BASE_ADDRESS_0) + return false; + + offset = (u64)(*ppos & VFIO_PCI_OFFSET_MASK) - + intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_0); + + return (offset >= gvt->device_info.gtt_start_offset && + offset < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) ? + true : false; +} + static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf, size_t count, loff_t *ppos) { @@ -696,7 +761,21 @@ static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf, while (count) { size_t filled; - if (count >= 4 && !(*ppos % 4)) { + /* Only support GGTT entry 8 bytes read */ + if (count >= 8 && !(*ppos % 8) && + gtt_entry(mdev, ppos)) { + u64 val; + + ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val), + ppos, false); + if (ret <= 0) + goto read_err; + + if (copy_to_user(buf, &val, sizeof(val))) + goto read_err; + + filled = 8; + } else if (count >= 4 && !(*ppos % 4)) { u32 val; ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val), @@ -756,7 +835,21 @@ static ssize_t intel_vgpu_write(struct mdev_device *mdev, while (count) { size_t filled; - if (count >= 4 && !(*ppos % 4)) { + /* Only support GGTT entry 8 bytes write */ + if (count >= 8 && !(*ppos % 8) && + gtt_entry(mdev, ppos)) { + u64 val; + + if (copy_from_user(&val, buf, sizeof(val))) + goto write_err; + + ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val), + ppos, true); + if (ret <= 0) + goto write_err; + + filled = 8; + } else if (count >= 4 && !(*ppos % 4)) { u32 val; if (copy_from_user(&val, buf, sizeof(val))) @@ -944,7 +1037,8 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, info.flags = VFIO_DEVICE_FLAGS_PCI; info.flags |= VFIO_DEVICE_FLAGS_RESET; - info.num_regions = VFIO_PCI_NUM_REGIONS; + info.num_regions = VFIO_PCI_NUM_REGIONS + + vgpu->vdev.num_regions; info.num_irqs = VFIO_PCI_NUM_IRQS; return copy_to_user((void __user *)arg, &info, minsz) ? @@ -970,7 +1064,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, switch (info.index) { case VFIO_PCI_CONFIG_REGION_INDEX: info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); - info.size = INTEL_GVT_MAX_CFG_SPACE_SZ; + info.size = vgpu->gvt->device_info.cfg_space_size; info.flags = VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE; break; @@ -1004,6 +1098,8 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, if (!sparse) return -ENOMEM; + sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP; + sparse->header.version = 1; sparse->nr_areas = nr_areas; cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP; sparse->areas[0].offset = @@ -1014,18 +1110,24 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX: info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); info.size = 0; - info.flags = 0; + gvt_dbg_core("get region info bar:%d\n", info.index); break; case VFIO_PCI_ROM_REGION_INDEX: case VFIO_PCI_VGA_REGION_INDEX: + info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); + info.size = 0; + info.flags = 0; + gvt_dbg_core("get region info index:%d\n", info.index); break; default: { - struct vfio_region_info_cap_type cap_type; + struct vfio_region_info_cap_type cap_type = { + .header.id = VFIO_REGION_INFO_CAP_TYPE, + .header.version = 1 }; if (info.index >= VFIO_PCI_NUM_REGIONS + vgpu->vdev.num_regions) @@ -1042,8 +1144,8 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, cap_type.subtype = vgpu->vdev.region[i].subtype; ret = vfio_info_add_capability(&caps, - VFIO_REGION_INFO_CAP_TYPE, - &cap_type); + &cap_type.header, + sizeof(cap_type)); if (ret) return ret; } @@ -1053,8 +1155,9 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, switch (cap_type_id) { case VFIO_REGION_INFO_CAP_SPARSE_MMAP: ret = vfio_info_add_capability(&caps, - VFIO_REGION_INFO_CAP_SPARSE_MMAP, - sparse); + &sparse->header, sizeof(*sparse) + + (sparse->nr_areas * + sizeof(*sparse->areas))); kfree(sparse); if (ret) return ret; @@ -1065,6 +1168,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, } if (caps.size) { + info.flags |= VFIO_REGION_INFO_FLAG_CAPS; if (info.argsz < sizeof(info) + caps.size) { info.argsz = sizeof(info) + caps.size; info.cap_offset = 0; @@ -1151,6 +1255,33 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, } else if (cmd == VFIO_DEVICE_RESET) { intel_gvt_ops->vgpu_reset(vgpu); return 0; + } else if (cmd == VFIO_DEVICE_QUERY_GFX_PLANE) { + struct vfio_device_gfx_plane_info dmabuf; + int ret = 0; + + minsz = offsetofend(struct vfio_device_gfx_plane_info, + dmabuf_id); + if (copy_from_user(&dmabuf, (void __user *)arg, minsz)) + return -EFAULT; + if (dmabuf.argsz < minsz) + return -EINVAL; + + ret = intel_gvt_ops->vgpu_query_plane(vgpu, &dmabuf); + if (ret != 0) + return ret; + + return copy_to_user((void __user *)arg, &dmabuf, minsz) ? + -EFAULT : 0; + } else if (cmd == VFIO_DEVICE_GET_GFX_DMABUF) { + __u32 dmabuf_id; + __s32 dmabuf_fd; + + if (get_user(dmabuf_id, (__u32 __user *)arg)) + return -EFAULT; + + dmabuf_fd = intel_gvt_ops->vgpu_get_dmabuf(vgpu, dmabuf_id); + return dmabuf_fd; + } return 0; @@ -1180,7 +1311,7 @@ hw_id_show(struct device *dev, struct device_attribute *attr, struct intel_vgpu *vgpu = (struct intel_vgpu *) mdev_get_drvdata(mdev); return sprintf(buf, "%u\n", - vgpu->shadow_ctx->hw_id); + vgpu->submission.shadow_ctx->hw_id); } return sprintf(buf, "\n"); } @@ -1204,8 +1335,7 @@ static const struct attribute_group *intel_vgpu_groups[] = { NULL, }; -static const struct mdev_parent_ops intel_vgpu_ops = { - .supported_type_groups = intel_vgpu_type_groups, +static struct mdev_parent_ops intel_vgpu_ops = { .mdev_attr_groups = intel_vgpu_groups, .create = intel_vgpu_create, .remove = intel_vgpu_remove, @@ -1221,17 +1351,20 @@ static const struct mdev_parent_ops intel_vgpu_ops = { static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops) { - if (!intel_gvt_init_vgpu_type_groups(gvt)) - return -EFAULT; + struct attribute **kvm_type_attrs; + struct attribute_group **kvm_vgpu_type_groups; intel_gvt_ops = ops; + if (!intel_gvt_ops->get_gvt_attrs(&kvm_type_attrs, + &kvm_vgpu_type_groups)) + return -EFAULT; + intel_vgpu_ops.supported_type_groups = kvm_vgpu_type_groups; return mdev_register_device(dev, &intel_vgpu_ops); } static void kvmgt_host_exit(struct device *dev, void *gvt) { - intel_gvt_cleanup_vgpu_type_groups(gvt); mdev_unregister_device(dev); } @@ -1311,8 +1444,8 @@ static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvmgt_guest_info, track_node); if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa))) - intel_gvt_ops->emulate_mmio_write(info->vgpu, gpa, - (void *)val, len); + intel_gvt_ops->write_protect_handler(info->vgpu, gpa, + (void *)val, len); } static void kvmgt_page_track_flush_slot(struct kvm *kvm, @@ -1390,6 +1523,9 @@ static int kvmgt_guest_init(struct mdev_device *mdev) kvmgt_protect_table_init(info); gvt_cache_init(vgpu); + mutex_init(&vgpu->dmabuf_lock); + init_completion(&vgpu->vblank_done); + info->track_node.track_write = kvmgt_page_track_write; info->track_node.track_flush_slot = kvmgt_page_track_flush_slot; kvm_page_track_register_notifier(kvm, &info->track_node); @@ -1518,6 +1654,21 @@ static unsigned long kvmgt_virt_to_pfn(void *addr) return PFN_DOWN(__pa(addr)); } +static bool kvmgt_is_valid_gfn(unsigned long handle, unsigned long gfn) +{ + struct kvmgt_guest_info *info; + struct kvm *kvm; + + if (!handle_valid(handle)) + return false; + + info = (struct kvmgt_guest_info *)handle; + kvm = info->kvm; + + return kvm_is_visible_gfn(kvm, gfn); + +} + struct intel_gvt_mpt kvmgt_mpt = { .host_init = kvmgt_host_init, .host_exit = kvmgt_host_exit, @@ -1530,6 +1681,10 @@ struct intel_gvt_mpt kvmgt_mpt = { .read_gpa = kvmgt_read_gpa, .write_gpa = kvmgt_write_gpa, .gfn_to_mfn = kvmgt_gfn_to_pfn, + .set_opregion = kvmgt_set_opregion, + .get_vfio_device = kvmgt_get_vfio_device, + .put_vfio_device = kvmgt_put_vfio_device, + .is_valid_gfn = kvmgt_is_valid_gfn, }; EXPORT_SYMBOL_GPL(kvmgt_mpt); diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 980ec8906b1e..5c869e3fdf3b 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -45,8 +45,7 @@ */ int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa) { - u64 gttmmio_gpa = *(u64 *)(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_0) & - ~GENMASK(3, 0); + u64 gttmmio_gpa = intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_0); return gpa - gttmmio_gpa; } @@ -86,25 +85,6 @@ static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, uint64_t pa, else memcpy(pt, p_data, bytes); - } else if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) { - struct intel_vgpu_guest_page *gp; - - /* Since we enter the failsafe mode early during guest boot, - * guest may not have chance to set up its ppgtt table, so - * there should not be any wp pages for guest. Keep the wp - * related code here in case we need to handle it in furture. - */ - gp = intel_vgpu_find_guest_page(vgpu, pa >> PAGE_SHIFT); - if (gp) { - /* remove write protection to prevent furture traps */ - intel_vgpu_clean_guest_page(vgpu, gp); - if (read) - intel_gvt_hypervisor_read_gpa(vgpu, pa, - p_data, bytes); - else - intel_gvt_hypervisor_write_gpa(vgpu, pa, - p_data, bytes); - } } mutex_unlock(&gvt->lock); } @@ -126,31 +106,12 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, unsigned int offset = 0; int ret = -EINVAL; - if (vgpu->failsafe) { failsafe_emulate_mmio_rw(vgpu, pa, p_data, bytes, true); return 0; } mutex_lock(&gvt->lock); - if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) { - struct intel_vgpu_guest_page *gp; - - gp = intel_vgpu_find_guest_page(vgpu, pa >> PAGE_SHIFT); - if (gp) { - ret = intel_gvt_hypervisor_read_gpa(vgpu, pa, - p_data, bytes); - if (ret) { - gvt_vgpu_err("guest page read error %d, " - "gfn 0x%lx, pa 0x%llx, var 0x%x, len %d\n", - ret, gp->gfn, pa, *(u32 *)p_data, - bytes); - } - mutex_unlock(&gvt->lock); - return ret; - } - } - offset = intel_vgpu_gpa_to_mmio_offset(vgpu, pa); if (WARN_ON(bytes > 8)) @@ -168,14 +129,12 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, p_data, bytes); if (ret) goto err; - mutex_unlock(&gvt->lock); - return ret; + goto out; } if (WARN_ON_ONCE(!reg_is_mmio(gvt, offset))) { ret = intel_gvt_hypervisor_read_gpa(vgpu, pa, p_data, bytes); - mutex_unlock(&gvt->lock); - return ret; + goto out; } if (WARN_ON(!reg_is_mmio(gvt, offset + bytes - 1))) @@ -191,11 +150,13 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, goto err; intel_gvt_mmio_set_accessed(gvt, offset); - mutex_unlock(&gvt->lock); - return 0; + ret = 0; + goto out; + err: gvt_vgpu_err("fail to emulate MMIO read %08x len %d\n", offset, bytes); +out: mutex_unlock(&gvt->lock); return ret; } @@ -224,24 +185,6 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, mutex_lock(&gvt->lock); - if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) { - struct intel_vgpu_guest_page *gp; - - gp = intel_vgpu_find_guest_page(vgpu, pa >> PAGE_SHIFT); - if (gp) { - ret = gp->handler(gp, pa, p_data, bytes); - if (ret) { - gvt_err("guest page write error %d, " - "gfn 0x%lx, pa 0x%llx, " - "var 0x%x, len %d\n", - ret, gp->gfn, pa, - *(u32 *)p_data, bytes); - } - mutex_unlock(&gvt->lock); - return ret; - } - } - offset = intel_vgpu_gpa_to_mmio_offset(vgpu, pa); if (WARN_ON(bytes > 8)) @@ -259,14 +202,12 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, p_data, bytes); if (ret) goto err; - mutex_unlock(&gvt->lock); - return ret; + goto out; } if (WARN_ON_ONCE(!reg_is_mmio(gvt, offset))) { ret = intel_gvt_hypervisor_write_gpa(vgpu, pa, p_data, bytes); - mutex_unlock(&gvt->lock); - return ret; + goto out; } ret = intel_vgpu_mmio_reg_rw(vgpu, offset, p_data, bytes, false); @@ -274,11 +215,12 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, goto err; intel_gvt_mmio_set_accessed(gvt, offset); - mutex_unlock(&gvt->lock); - return 0; + ret = 0; + goto out; err: gvt_vgpu_err("fail to emulate MMIO write %08x len %d\n", offset, bytes); +out: mutex_unlock(&gvt->lock); return ret; } @@ -299,10 +241,10 @@ void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu, bool dmlr) memcpy(vgpu->mmio.vreg, mmio, info->mmio_size); memcpy(vgpu->mmio.sreg, mmio, info->mmio_size); - vgpu_vreg(vgpu, GEN6_GT_THREAD_STATUS_REG) = 0; + vgpu_vreg_t(vgpu, GEN6_GT_THREAD_STATUS_REG) = 0; /* set the bit 0:2(Core C-State ) to C0 */ - vgpu_vreg(vgpu, GEN6_GT_CORE_STATUS) = 0; + vgpu_vreg_t(vgpu, GEN6_GT_CORE_STATUS) = 0; vgpu->mmio.disable_warn_untrack = false; } else { diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h index 32cd64ddad26..71b620875943 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.h +++ b/drivers/gpu/drm/i915/gvt/mmio.h @@ -65,17 +65,16 @@ struct intel_gvt_mmio_info { struct hlist_node node; }; +int intel_gvt_render_mmio_to_ring_id(struct intel_gvt *gvt, + unsigned int reg); unsigned long intel_gvt_get_device_type(struct intel_gvt *gvt); bool intel_gvt_match_device(struct intel_gvt *gvt, unsigned long device); int intel_gvt_setup_mmio_info(struct intel_gvt *gvt); void intel_gvt_clean_mmio_info(struct intel_gvt *gvt); - -#define INTEL_GVT_MMIO_OFFSET(reg) ({ \ - typeof(reg) __reg = reg; \ - u32 *offset = (u32 *)&__reg; \ - *offset; \ -}) +int intel_gvt_for_each_tracked_mmio(struct intel_gvt *gvt, + int (*handler)(struct intel_gvt *gvt, u32 offset, void *data), + void *data); int intel_vgpu_init_mmio(struct intel_vgpu *vgpu); void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu, bool dmlr); diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c new file mode 100644 index 000000000000..256f1bb522b7 --- /dev/null +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -0,0 +1,413 @@ +/* + * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eddie Dong <eddie.dong@intel.com> + * Kevin Tian <kevin.tian@intel.com> + * + * Contributors: + * Zhi Wang <zhi.a.wang@intel.com> + * Changbin Du <changbin.du@intel.com> + * Zhenyu Wang <zhenyuw@linux.intel.com> + * Tina Zhang <tina.zhang@intel.com> + * Bing Niu <bing.niu@intel.com> + * + */ + +#include "i915_drv.h" +#include "gvt.h" +#include "trace.h" + +/** + * Defined in Intel Open Source PRM. + * Ref: https://01.org/linuxgraphics/documentation/hardware-specification-prms + */ +#define TRVATTL3PTRDW(i) _MMIO(0x4de0 + (i)*4) +#define TRNULLDETCT _MMIO(0x4de8) +#define TRINVTILEDETCT _MMIO(0x4dec) +#define TRVADR _MMIO(0x4df0) +#define TRTTE _MMIO(0x4df4) +#define RING_EXCC(base) _MMIO((base) + 0x28) +#define RING_GFX_MODE(base) _MMIO((base) + 0x29c) +#define VF_GUARDBAND _MMIO(0x83a4) + +/* Raw offset is appened to each line for convenience. */ +static struct engine_mmio gen8_engine_mmio_list[] __cacheline_aligned = { + {RCS, GFX_MODE_GEN7, 0xffff, false}, /* 0x229c */ + {RCS, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */ + {RCS, HWSTAM, 0x0, false}, /* 0x2098 */ + {RCS, INSTPM, 0xffff, true}, /* 0x20c0 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 0), 0, false}, /* 0x24d0 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 1), 0, false}, /* 0x24d4 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 2), 0, false}, /* 0x24d8 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 3), 0, false}, /* 0x24dc */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 4), 0, false}, /* 0x24e0 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 5), 0, false}, /* 0x24e4 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 6), 0, false}, /* 0x24e8 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 7), 0, false}, /* 0x24ec */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 8), 0, false}, /* 0x24f0 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 9), 0, false}, /* 0x24f4 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 10), 0, false}, /* 0x24f8 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 11), 0, false}, /* 0x24fc */ + {RCS, CACHE_MODE_1, 0xffff, true}, /* 0x7004 */ + {RCS, GEN7_GT_MODE, 0xffff, true}, /* 0x7008 */ + {RCS, CACHE_MODE_0_GEN7, 0xffff, true}, /* 0x7000 */ + {RCS, GEN7_COMMON_SLICE_CHICKEN1, 0xffff, true}, /* 0x7010 */ + {RCS, HDC_CHICKEN0, 0xffff, true}, /* 0x7300 */ + {RCS, VF_GUARDBAND, 0xffff, true}, /* 0x83a4 */ + + {BCS, RING_GFX_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2229c */ + {BCS, RING_MI_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2209c */ + {BCS, RING_INSTPM(BLT_RING_BASE), 0xffff, false}, /* 0x220c0 */ + {BCS, RING_HWSTAM(BLT_RING_BASE), 0x0, false}, /* 0x22098 */ + {BCS, RING_EXCC(BLT_RING_BASE), 0x0, false}, /* 0x22028 */ + {RCS, INVALID_MMIO_REG, 0, false } /* Terminated */ +}; + +static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = { + {RCS, GFX_MODE_GEN7, 0xffff, false}, /* 0x229c */ + {RCS, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */ + {RCS, HWSTAM, 0x0, false}, /* 0x2098 */ + {RCS, INSTPM, 0xffff, true}, /* 0x20c0 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 0), 0, false}, /* 0x24d0 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 1), 0, false}, /* 0x24d4 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 2), 0, false}, /* 0x24d8 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 3), 0, false}, /* 0x24dc */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 4), 0, false}, /* 0x24e0 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 5), 0, false}, /* 0x24e4 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 6), 0, false}, /* 0x24e8 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 7), 0, false}, /* 0x24ec */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 8), 0, false}, /* 0x24f0 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 9), 0, false}, /* 0x24f4 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 10), 0, false}, /* 0x24f8 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 11), 0, false}, /* 0x24fc */ + {RCS, CACHE_MODE_1, 0xffff, true}, /* 0x7004 */ + {RCS, GEN7_GT_MODE, 0xffff, true}, /* 0x7008 */ + {RCS, CACHE_MODE_0_GEN7, 0xffff, true}, /* 0x7000 */ + {RCS, GEN7_COMMON_SLICE_CHICKEN1, 0xffff, true}, /* 0x7010 */ + {RCS, HDC_CHICKEN0, 0xffff, true}, /* 0x7300 */ + {RCS, VF_GUARDBAND, 0xffff, true}, /* 0x83a4 */ + + {RCS, GEN8_PRIVATE_PAT_LO, 0, false}, /* 0x40e0 */ + {RCS, GEN8_PRIVATE_PAT_HI, 0, false}, /* 0x40e4 */ + {RCS, GEN8_CS_CHICKEN1, 0xffff, true}, /* 0x2580 */ + {RCS, COMMON_SLICE_CHICKEN2, 0xffff, true}, /* 0x7014 */ + {RCS, GEN9_CS_DEBUG_MODE1, 0xffff, false}, /* 0x20ec */ + {RCS, GEN8_L3SQCREG4, 0, false}, /* 0xb118 */ + {RCS, GEN7_HALF_SLICE_CHICKEN1, 0xffff, true}, /* 0xe100 */ + {RCS, HALF_SLICE_CHICKEN2, 0xffff, true}, /* 0xe180 */ + {RCS, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */ + {RCS, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */ + {RCS, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */ + {RCS, GEN8_ROW_CHICKEN, 0xffff, true}, /* 0xe4f0 */ + {RCS, TRVATTL3PTRDW(0), 0, false}, /* 0x4de0 */ + {RCS, TRVATTL3PTRDW(1), 0, false}, /* 0x4de4 */ + {RCS, TRNULLDETCT, 0, false}, /* 0x4de8 */ + {RCS, TRINVTILEDETCT, 0, false}, /* 0x4dec */ + {RCS, TRVADR, 0, false}, /* 0x4df0 */ + {RCS, TRTTE, 0, false}, /* 0x4df4 */ + + {BCS, RING_GFX_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2229c */ + {BCS, RING_MI_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2209c */ + {BCS, RING_INSTPM(BLT_RING_BASE), 0xffff, false}, /* 0x220c0 */ + {BCS, RING_HWSTAM(BLT_RING_BASE), 0x0, false}, /* 0x22098 */ + {BCS, RING_EXCC(BLT_RING_BASE), 0x0, false}, /* 0x22028 */ + + {VCS2, RING_EXCC(GEN8_BSD2_RING_BASE), 0xffff, false}, /* 0x1c028 */ + + {VECS, RING_EXCC(VEBOX_RING_BASE), 0xffff, false}, /* 0x1a028 */ + + {RCS, GEN8_HDC_CHICKEN1, 0xffff, true}, /* 0x7304 */ + {RCS, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */ + {RCS, GEN7_UCGCTL4, 0x0, false}, /* 0x940c */ + {RCS, GAMT_CHKN_BIT_REG, 0x0, false}, /* 0x4ab8 */ + + {RCS, GEN9_GAMT_ECO_REG_RW_IA, 0x0, false}, /* 0x4ab0 */ + {RCS, GEN9_CSFE_CHICKEN1_RCS, 0x0, false}, /* 0x20d4 */ + + {RCS, GEN8_GARBCNTL, 0x0, false}, /* 0xb004 */ + {RCS, GEN7_FF_THREAD_MODE, 0x0, false}, /* 0x20a0 */ + {RCS, FF_SLICE_CS_CHICKEN2, 0xffff, false}, /* 0x20e4 */ + {RCS, INVALID_MMIO_REG, 0, false } /* Terminated */ +}; + +static struct { + bool initialized; + u32 control_table[I915_NUM_ENGINES][64]; + u32 l3cc_table[32]; +} gen9_render_mocs; + +static void load_render_mocs(struct drm_i915_private *dev_priv) +{ + i915_reg_t offset; + u32 regs[] = { + [RCS] = 0xc800, + [VCS] = 0xc900, + [VCS2] = 0xca00, + [BCS] = 0xcc00, + [VECS] = 0xcb00, + }; + int ring_id, i; + + for (ring_id = 0; ring_id < ARRAY_SIZE(regs); ring_id++) { + offset.reg = regs[ring_id]; + for (i = 0; i < 64; i++) { + gen9_render_mocs.control_table[ring_id][i] = + I915_READ_FW(offset); + offset.reg += 4; + } + } + + offset.reg = 0xb020; + for (i = 0; i < 32; i++) { + gen9_render_mocs.l3cc_table[i] = + I915_READ_FW(offset); + offset.reg += 4; + } + gen9_render_mocs.initialized = true; +} + +static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) +{ + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + struct intel_vgpu_submission *s = &vgpu->submission; + enum forcewake_domains fw; + i915_reg_t reg; + u32 regs[] = { + [RCS] = 0x4260, + [VCS] = 0x4264, + [VCS2] = 0x4268, + [BCS] = 0x426c, + [VECS] = 0x4270, + }; + + if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) + return; + + if (!test_and_clear_bit(ring_id, (void *)s->tlb_handle_pending)) + return; + + reg = _MMIO(regs[ring_id]); + + /* WaForceWakeRenderDuringMmioTLBInvalidate:skl + * we need to put a forcewake when invalidating RCS TLB caches, + * otherwise device can go to RC6 state and interrupt invalidation + * process + */ + fw = intel_uncore_forcewake_for_reg(dev_priv, reg, + FW_REG_READ | FW_REG_WRITE); + if (ring_id == RCS && (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))) + fw |= FORCEWAKE_RENDER; + + intel_uncore_forcewake_get(dev_priv, fw); + + I915_WRITE_FW(reg, 0x1); + + if (wait_for_atomic((I915_READ_FW(reg) == 0), 50)) + gvt_vgpu_err("timeout in invalidate ring (%d) tlb\n", ring_id); + else + vgpu_vreg_t(vgpu, reg) = 0; + + intel_uncore_forcewake_put(dev_priv, fw); + + gvt_dbg_core("invalidate TLB for ring %d\n", ring_id); +} + +static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, + int ring_id) +{ + struct drm_i915_private *dev_priv; + i915_reg_t offset, l3_offset; + u32 old_v, new_v; + + u32 regs[] = { + [RCS] = 0xc800, + [VCS] = 0xc900, + [VCS2] = 0xca00, + [BCS] = 0xcc00, + [VECS] = 0xcb00, + }; + int i; + + dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv; + if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) + return; + + if (!pre && !gen9_render_mocs.initialized) + load_render_mocs(dev_priv); + + offset.reg = regs[ring_id]; + for (i = 0; i < 64; i++) { + if (pre) + old_v = vgpu_vreg_t(pre, offset); + else + old_v = gen9_render_mocs.control_table[ring_id][i]; + if (next) + new_v = vgpu_vreg_t(next, offset); + else + new_v = gen9_render_mocs.control_table[ring_id][i]; + + if (old_v != new_v) + I915_WRITE_FW(offset, new_v); + + offset.reg += 4; + } + + if (ring_id == RCS) { + l3_offset.reg = 0xb020; + for (i = 0; i < 32; i++) { + if (pre) + old_v = vgpu_vreg_t(pre, l3_offset); + else + old_v = gen9_render_mocs.l3cc_table[i]; + if (next) + new_v = vgpu_vreg_t(next, l3_offset); + else + new_v = gen9_render_mocs.l3cc_table[i]; + + if (old_v != new_v) + I915_WRITE_FW(l3_offset, new_v); + + l3_offset.reg += 4; + } + } +} + +#define CTX_CONTEXT_CONTROL_VAL 0x03 + +/* Switch ring mmio values (context). */ +static void switch_mmio(struct intel_vgpu *pre, + struct intel_vgpu *next, + int ring_id) +{ + struct drm_i915_private *dev_priv; + struct intel_vgpu_submission *s; + u32 *reg_state, ctx_ctrl; + u32 inhibit_mask = + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); + struct engine_mmio *mmio; + u32 old_v, new_v; + + dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv; + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + switch_mocs(pre, next, ring_id); + + for (mmio = dev_priv->gvt->engine_mmio_list; + i915_mmio_reg_valid(mmio->reg); mmio++) { + if (mmio->ring_id != ring_id) + continue; + // save + if (pre) { + vgpu_vreg_t(pre, mmio->reg) = I915_READ_FW(mmio->reg); + if (mmio->mask) + vgpu_vreg_t(pre, mmio->reg) &= + ~(mmio->mask << 16); + old_v = vgpu_vreg_t(pre, mmio->reg); + } else + old_v = mmio->value = I915_READ_FW(mmio->reg); + + // restore + if (next) { + s = &next->submission; + reg_state = + s->shadow_ctx->engine[ring_id].lrc_reg_state; + ctx_ctrl = reg_state[CTX_CONTEXT_CONTROL_VAL]; + /* + * if it is an inhibit context, load in_context mmio + * into HW by mmio write. If it is not, skip this mmio + * write. + */ + if (mmio->in_context && + (ctx_ctrl & inhibit_mask) != inhibit_mask) + continue; + + if (mmio->mask) + new_v = vgpu_vreg_t(next, mmio->reg) | + (mmio->mask << 16); + else + new_v = vgpu_vreg_t(next, mmio->reg); + } else { + if (mmio->in_context) + continue; + if (mmio->mask) + new_v = mmio->value | (mmio->mask << 16); + else + new_v = mmio->value; + } + + I915_WRITE_FW(mmio->reg, new_v); + + trace_render_mmio(pre ? pre->id : 0, + next ? next->id : 0, + "switch", + i915_mmio_reg_offset(mmio->reg), + old_v, new_v); + } + + if (next) + handle_tlb_pending_event(next, ring_id); +} + +/** + * intel_gvt_switch_render_mmio - switch mmio context of specific engine + * @pre: the last vGPU that own the engine + * @next: the vGPU to switch to + * @ring_id: specify the engine + * + * If pre is null indicates that host own the engine. If next is null + * indicates that we are switching to host workload. + */ +void intel_gvt_switch_mmio(struct intel_vgpu *pre, + struct intel_vgpu *next, int ring_id) +{ + struct drm_i915_private *dev_priv; + + if (WARN_ON(!pre && !next)) + return; + + gvt_dbg_render("switch ring %d from %s to %s\n", ring_id, + pre ? "vGPU" : "host", next ? "vGPU" : "HOST"); + + dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv; + + /** + * We are using raw mmio access wrapper to improve the + * performace for batch mmio read/write, so we need + * handle forcewake mannually. + */ + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + switch_mmio(pre, next, ring_id); + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); +} + +/** + * intel_gvt_init_engine_mmio_context - Initiate the engine mmio list + * @gvt: GVT device + * + */ +void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt) +{ + if (IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv)) + gvt->engine_mmio_list = gen9_engine_mmio_list; + else + gvt->engine_mmio_list = gen8_engine_mmio_list; +} diff --git a/drivers/gpu/drm/i915/gvt/render.h b/drivers/gpu/drm/i915/gvt/mmio_context.h index 91db1d39d28f..ca2c6a745673 100644 --- a/drivers/gpu/drm/i915/gvt/render.h +++ b/drivers/gpu/drm/i915/gvt/mmio_context.h @@ -36,8 +36,17 @@ #ifndef __GVT_RENDER_H__ #define __GVT_RENDER_H__ +struct engine_mmio { + int ring_id; + i915_reg_t reg; + u32 mask; + bool in_context; + u32 value; +}; + void intel_gvt_switch_mmio(struct intel_vgpu *pre, struct intel_vgpu *next, int ring_id); +void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt); #endif diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index f0e5487e6688..81aff4eacbfe 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -154,51 +154,53 @@ static inline unsigned long intel_gvt_hypervisor_virt_to_mfn(void *p) } /** - * intel_gvt_hypervisor_set_wp_page - set a guest page to write-protected + * intel_gvt_hypervisor_enable - set a guest page to write-protected * @vgpu: a vGPU - * @p: intel_vgpu_guest_page + * @t: page track data structure * * Returns: * Zero on success, negative error code if failed. */ -static inline int intel_gvt_hypervisor_set_wp_page(struct intel_vgpu *vgpu, - struct intel_vgpu_guest_page *p) +static inline int intel_gvt_hypervisor_enable_page_track( + struct intel_vgpu *vgpu, + struct intel_vgpu_page_track *t) { int ret; - if (p->writeprotection) + if (t->tracked) return 0; - ret = intel_gvt_host.mpt->set_wp_page(vgpu->handle, p->gfn); + ret = intel_gvt_host.mpt->set_wp_page(vgpu->handle, t->gfn); if (ret) return ret; - p->writeprotection = true; - atomic_inc(&vgpu->gtt.n_write_protected_guest_page); + t->tracked = true; + atomic_inc(&vgpu->gtt.n_tracked_guest_page); return 0; } /** - * intel_gvt_hypervisor_unset_wp_page - remove the write-protection of a + * intel_gvt_hypervisor_disable_page_track - remove the write-protection of a * guest page * @vgpu: a vGPU - * @p: intel_vgpu_guest_page + * @t: page track data structure * * Returns: * Zero on success, negative error code if failed. */ -static inline int intel_gvt_hypervisor_unset_wp_page(struct intel_vgpu *vgpu, - struct intel_vgpu_guest_page *p) +static inline int intel_gvt_hypervisor_disable_page_track( + struct intel_vgpu *vgpu, + struct intel_vgpu_page_track *t) { int ret; - if (!p->writeprotection) + if (!t->tracked) return 0; - ret = intel_gvt_host.mpt->unset_wp_page(vgpu->handle, p->gfn); + ret = intel_gvt_host.mpt->unset_wp_page(vgpu->handle, t->gfn); if (ret) return ret; - p->writeprotection = false; - atomic_dec(&vgpu->gtt.n_write_protected_guest_page); + t->tracked = false; + atomic_dec(&vgpu->gtt.n_tracked_guest_page); return 0; } @@ -292,4 +294,66 @@ static inline int intel_gvt_hypervisor_set_trap_area( return intel_gvt_host.mpt->set_trap_area(vgpu->handle, start, end, map); } +/** + * intel_gvt_hypervisor_set_opregion - Set opregion for guest + * @vgpu: a vGPU + * + * Returns: + * Zero on success, negative error code if failed. + */ +static inline int intel_gvt_hypervisor_set_opregion(struct intel_vgpu *vgpu) +{ + if (!intel_gvt_host.mpt->set_opregion) + return 0; + + return intel_gvt_host.mpt->set_opregion(vgpu); +} + +/** + * intel_gvt_hypervisor_get_vfio_device - increase vfio device ref count + * @vgpu: a vGPU + * + * Returns: + * Zero on success, negative error code if failed. + */ +static inline int intel_gvt_hypervisor_get_vfio_device(struct intel_vgpu *vgpu) +{ + if (!intel_gvt_host.mpt->get_vfio_device) + return 0; + + return intel_gvt_host.mpt->get_vfio_device(vgpu); +} + +/** + * intel_gvt_hypervisor_put_vfio_device - decrease vfio device ref count + * @vgpu: a vGPU + * + * Returns: + * Zero on success, negative error code if failed. + */ +static inline void intel_gvt_hypervisor_put_vfio_device(struct intel_vgpu *vgpu) +{ + if (!intel_gvt_host.mpt->put_vfio_device) + return; + + intel_gvt_host.mpt->put_vfio_device(vgpu); +} + +/** + * intel_gvt_hypervisor_is_valid_gfn - check if a visible gfn + * @vgpu: a vGPU + * @gfn: guest PFN + * + * Returns: + * true on valid gfn, false on not. + */ +static inline bool intel_gvt_hypervisor_is_valid_gfn( + struct intel_vgpu *vgpu, unsigned long gfn) +{ + if (!intel_gvt_host.mpt->is_valid_gfn) + return true; + + return intel_gvt_host.mpt->is_valid_gfn(vgpu->handle, gfn); +} + #endif /* _GVT_MPT_H_ */ diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c index 311799136d7f..fa75a2eead90 100644 --- a/drivers/gpu/drm/i915/gvt/opregion.c +++ b/drivers/gpu/drm/i915/gvt/opregion.c @@ -25,36 +25,237 @@ #include "i915_drv.h" #include "gvt.h" -static int init_vgpu_opregion(struct intel_vgpu *vgpu, u32 gpa) +/* + * Note: Only for GVT-g virtual VBT generation, other usage must + * not do like this. + */ +#define _INTEL_BIOS_PRIVATE +#include "intel_vbt_defs.h" + +#define OPREGION_SIGNATURE "IntelGraphicsMem" +#define MBOX_VBT (1<<3) + +/* device handle */ +#define DEVICE_TYPE_CRT 0x01 +#define DEVICE_TYPE_EFP1 0x04 +#define DEVICE_TYPE_EFP2 0x40 +#define DEVICE_TYPE_EFP3 0x20 +#define DEVICE_TYPE_EFP4 0x10 + +#define DEV_SIZE 38 + +struct opregion_header { + u8 signature[16]; + u32 size; + u32 opregion_ver; + u8 bios_ver[32]; + u8 vbios_ver[16]; + u8 driver_ver[16]; + u32 mboxes; + u32 driver_model; + u32 pcon; + u8 dver[32]; + u8 rsvd[124]; +} __packed; + +struct bdb_data_header { + u8 id; + u16 size; /* data size */ +} __packed; + +struct efp_child_device_config { + u16 handle; + u16 device_type; + u16 device_class; + u8 i2c_speed; + u8 dp_onboard_redriver; /* 158 */ + u8 dp_ondock_redriver; /* 158 */ + u8 hdmi_level_shifter_value:4; /* 169 */ + u8 hdmi_max_data_rate:4; /* 204 */ + u16 dtd_buf_ptr; /* 161 */ + u8 edidless_efp:1; /* 161 */ + u8 compression_enable:1; /* 198 */ + u8 compression_method:1; /* 198 */ + u8 ganged_edp:1; /* 202 */ + u8 skip0:4; + u8 compression_structure_index:4; /* 198 */ + u8 skip1:4; + u8 slave_port; /* 202 */ + u8 skip2; + u8 dvo_port; + u8 i2c_pin; /* for add-in card */ + u8 slave_addr; /* for add-in card */ + u8 ddc_pin; + u16 edid_ptr; + u8 dvo_config; + u8 efp_docked_port:1; /* 158 */ + u8 lane_reversal:1; /* 184 */ + u8 onboard_lspcon:1; /* 192 */ + u8 iboost_enable:1; /* 196 */ + u8 hpd_invert:1; /* BXT 196 */ + u8 slip3:3; + u8 hdmi_compat:1; + u8 dp_compat:1; + u8 tmds_compat:1; + u8 skip4:5; + u8 aux_channel; + u8 dongle_detect; + u8 pipe_cap:2; + u8 sdvo_stall:1; /* 158 */ + u8 hpd_status:2; + u8 integrated_encoder:1; + u8 skip5:2; + u8 dvo_wiring; + u8 mipi_bridge_type; /* 171 */ + u16 device_class_ext; + u8 dvo_function; + u8 dp_usb_type_c:1; /* 195 */ + u8 skip6:7; + u8 dp_usb_type_c_2x_gpio_index; /* 195 */ + u16 dp_usb_type_c_2x_gpio_pin; /* 195 */ + u8 iboost_dp:4; /* 196 */ + u8 iboost_hdmi:4; /* 196 */ +} __packed; + +struct vbt { + /* header->bdb_offset point to bdb_header offset */ + struct vbt_header header; + struct bdb_header bdb_header; + + struct bdb_data_header general_features_header; + struct bdb_general_features general_features; + + struct bdb_data_header general_definitions_header; + struct bdb_general_definitions general_definitions; + + struct efp_child_device_config child0; + struct efp_child_device_config child1; + struct efp_child_device_config child2; + struct efp_child_device_config child3; + + struct bdb_data_header driver_features_header; + struct bdb_driver_features driver_features; +}; + +static void virt_vbt_generation(struct vbt *v) { - u8 *buf; - int i; + int num_child; + + memset(v, 0, sizeof(struct vbt)); + + v->header.signature[0] = '$'; + v->header.signature[1] = 'V'; + v->header.signature[2] = 'B'; + v->header.signature[3] = 'T'; + + /* there's features depending on version! */ + v->header.version = 155; + v->header.header_size = sizeof(v->header); + v->header.vbt_size = sizeof(struct vbt) - sizeof(v->header); + v->header.bdb_offset = offsetof(struct vbt, bdb_header); + + strcpy(&v->bdb_header.signature[0], "BIOS_DATA_BLOCK"); + v->bdb_header.version = 186; /* child_dev_size = 38 */ + v->bdb_header.header_size = sizeof(v->bdb_header); + + v->bdb_header.bdb_size = sizeof(struct vbt) - sizeof(struct vbt_header) + - sizeof(struct bdb_header); + + /* general features */ + v->general_features_header.id = BDB_GENERAL_FEATURES; + v->general_features_header.size = sizeof(struct bdb_general_features); + v->general_features.int_crt_support = 0; + v->general_features.int_tv_support = 0; + + /* child device */ + num_child = 4; /* each port has one child */ + v->general_definitions_header.id = BDB_GENERAL_DEFINITIONS; + /* size will include child devices */ + v->general_definitions_header.size = + sizeof(struct bdb_general_definitions) + num_child * DEV_SIZE; + v->general_definitions.child_dev_size = DEV_SIZE; + + /* portA */ + v->child0.handle = DEVICE_TYPE_EFP1; + v->child0.device_type = DEVICE_TYPE_DP; + v->child0.dvo_port = DVO_PORT_DPA; + v->child0.aux_channel = DP_AUX_A; + v->child0.dp_compat = true; + v->child0.integrated_encoder = true; + + /* portB */ + v->child1.handle = DEVICE_TYPE_EFP2; + v->child1.device_type = DEVICE_TYPE_DP; + v->child1.dvo_port = DVO_PORT_DPB; + v->child1.aux_channel = DP_AUX_B; + v->child1.dp_compat = true; + v->child1.integrated_encoder = true; + + /* portC */ + v->child2.handle = DEVICE_TYPE_EFP3; + v->child2.device_type = DEVICE_TYPE_DP; + v->child2.dvo_port = DVO_PORT_DPC; + v->child2.aux_channel = DP_AUX_C; + v->child2.dp_compat = true; + v->child2.integrated_encoder = true; + + /* portD */ + v->child3.handle = DEVICE_TYPE_EFP4; + v->child3.device_type = DEVICE_TYPE_DP; + v->child3.dvo_port = DVO_PORT_DPD; + v->child3.aux_channel = DP_AUX_D; + v->child3.dp_compat = true; + v->child3.integrated_encoder = true; + + /* driver features */ + v->driver_features_header.id = BDB_DRIVER_FEATURES; + v->driver_features_header.size = sizeof(struct bdb_driver_features); + v->driver_features.lvds_config = BDB_DRIVER_FEATURE_NO_LVDS; +} - if (WARN((vgpu_opregion(vgpu)->va), - "vgpu%d: opregion has been initialized already.\n", - vgpu->id)) - return -EINVAL; +/** + * intel_vgpu_init_opregion - initialize the stuff used to emulate opregion + * @vgpu: a vGPU + * @gpa: guest physical address of opregion + * + * Returns: + * Zero on success, negative error code if failed. + */ +int intel_vgpu_init_opregion(struct intel_vgpu *vgpu) +{ + u8 *buf; + struct opregion_header *header; + struct vbt v; + const char opregion_signature[16] = OPREGION_SIGNATURE; + gvt_dbg_core("init vgpu%d opregion\n", vgpu->id); vgpu_opregion(vgpu)->va = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(INTEL_GVT_OPREGION_SIZE)); - - if (!vgpu_opregion(vgpu)->va) + if (!vgpu_opregion(vgpu)->va) { + gvt_err("fail to get memory for vgpu virt opregion\n"); return -ENOMEM; + } - memcpy(vgpu_opregion(vgpu)->va, vgpu->gvt->opregion.opregion_va, - INTEL_GVT_OPREGION_SIZE); - - for (i = 0; i < INTEL_GVT_OPREGION_PAGES; i++) - vgpu_opregion(vgpu)->gfn[i] = (gpa >> PAGE_SHIFT) + i; + /* emulated opregion with VBT mailbox only */ + buf = (u8 *)vgpu_opregion(vgpu)->va; + header = (struct opregion_header *)buf; + memcpy(header->signature, opregion_signature, + sizeof(opregion_signature)); + header->size = 0x8; + header->opregion_ver = 0x02000000; + header->mboxes = MBOX_VBT; /* for unknown reason, the value in LID field is incorrect * which block the windows guest, so workaround it by force * setting it to "OPEN" */ - buf = (u8 *)vgpu_opregion(vgpu)->va; buf[INTEL_GVT_OPREGION_CLID] = 0x3; + /* emulated vbt from virt vbt generation */ + virt_vbt_generation(&v); + memcpy(buf + INTEL_GVT_OPREGION_VBT_OFFSET, &v, sizeof(struct vbt)); + return 0; } @@ -79,93 +280,80 @@ static int map_vgpu_opregion(struct intel_vgpu *vgpu, bool map) return ret; } } - return 0; -} -/** - * intel_vgpu_clean_opregion - clean the stuff used to emulate opregion - * @vgpu: a vGPU - * - */ -void intel_vgpu_clean_opregion(struct intel_vgpu *vgpu) -{ - gvt_dbg_core("vgpu%d: clean vgpu opregion\n", vgpu->id); - - if (!vgpu_opregion(vgpu)->va) - return; - - if (intel_gvt_host.hypervisor_type == INTEL_GVT_HYPERVISOR_XEN) { - map_vgpu_opregion(vgpu, false); - free_pages((unsigned long)vgpu_opregion(vgpu)->va, - get_order(INTEL_GVT_OPREGION_SIZE)); + vgpu_opregion(vgpu)->mapped = map; - vgpu_opregion(vgpu)->va = NULL; - } + return 0; } /** - * intel_vgpu_init_opregion - initialize the stuff used to emulate opregion + * intel_vgpu_opregion_base_write_handler - Opregion base register write handler + * * @vgpu: a vGPU * @gpa: guest physical address of opregion * * Returns: * Zero on success, negative error code if failed. */ -int intel_vgpu_init_opregion(struct intel_vgpu *vgpu, u32 gpa) +int intel_vgpu_opregion_base_write_handler(struct intel_vgpu *vgpu, u32 gpa) { - int ret; - gvt_dbg_core("vgpu%d: init vgpu opregion\n", vgpu->id); + int i, ret = 0; - if (intel_gvt_host.hypervisor_type == INTEL_GVT_HYPERVISOR_XEN) { - gvt_dbg_core("emulate opregion from kernel\n"); + gvt_dbg_core("emulate opregion from kernel\n"); - ret = init_vgpu_opregion(vgpu, gpa); - if (ret) - return ret; + switch (intel_gvt_host.hypervisor_type) { + case INTEL_GVT_HYPERVISOR_KVM: + for (i = 0; i < INTEL_GVT_OPREGION_PAGES; i++) + vgpu_opregion(vgpu)->gfn[i] = (gpa >> PAGE_SHIFT) + i; + break; + case INTEL_GVT_HYPERVISOR_XEN: + /** + * Wins guest on Xengt will write this register twice: xen + * hvmloader and windows graphic driver. + */ + if (vgpu_opregion(vgpu)->mapped) + map_vgpu_opregion(vgpu, false); + + for (i = 0; i < INTEL_GVT_OPREGION_PAGES; i++) + vgpu_opregion(vgpu)->gfn[i] = (gpa >> PAGE_SHIFT) + i; ret = map_vgpu_opregion(vgpu, true); - if (ret) - return ret; + break; + default: + ret = -EINVAL; + gvt_vgpu_err("not supported hypervisor\n"); } - return 0; -} - -/** - * intel_gvt_clean_opregion - clean host opergion related stuffs - * @gvt: a GVT device - * - */ -void intel_gvt_clean_opregion(struct intel_gvt *gvt) -{ - memunmap(gvt->opregion.opregion_va); - gvt->opregion.opregion_va = NULL; + return ret; } /** - * intel_gvt_init_opregion - initialize host opergion related stuffs - * @gvt: a GVT device + * intel_vgpu_clean_opregion - clean the stuff used to emulate opregion + * @vgpu: a vGPU * - * Returns: - * Zero on success, negative error code if failed. */ -int intel_gvt_init_opregion(struct intel_gvt *gvt) +void intel_vgpu_clean_opregion(struct intel_vgpu *vgpu) { - gvt_dbg_core("init host opregion\n"); + gvt_dbg_core("vgpu%d: clean vgpu opregion\n", vgpu->id); - pci_read_config_dword(gvt->dev_priv->drm.pdev, INTEL_GVT_PCI_OPREGION, - &gvt->opregion.opregion_pa); + if (!vgpu_opregion(vgpu)->va) + return; - gvt->opregion.opregion_va = memremap(gvt->opregion.opregion_pa, - INTEL_GVT_OPREGION_SIZE, MEMREMAP_WB); - if (!gvt->opregion.opregion_va) { - gvt_err("fail to map host opregion\n"); - return -EFAULT; + if (intel_gvt_host.hypervisor_type == INTEL_GVT_HYPERVISOR_XEN) { + if (vgpu_opregion(vgpu)->mapped) + map_vgpu_opregion(vgpu, false); + } else if (intel_gvt_host.hypervisor_type == INTEL_GVT_HYPERVISOR_KVM) { + /* Guest opregion is released by VFIO */ } - return 0; + free_pages((unsigned long)vgpu_opregion(vgpu)->va, + get_order(INTEL_GVT_OPREGION_SIZE)); + + vgpu_opregion(vgpu)->va = NULL; + } + #define GVT_OPREGION_FUNC(scic) \ ({ \ u32 __ret; \ @@ -281,11 +469,45 @@ static bool querying_capabilities(u32 scic) */ int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci) { - u32 *scic, *parm; + u32 scic, parm; u32 func, subfunc; + u64 scic_pa = 0, parm_pa = 0; + int ret; + + switch (intel_gvt_host.hypervisor_type) { + case INTEL_GVT_HYPERVISOR_XEN: + scic = *((u32 *)vgpu_opregion(vgpu)->va + + INTEL_GVT_OPREGION_SCIC); + parm = *((u32 *)vgpu_opregion(vgpu)->va + + INTEL_GVT_OPREGION_PARM); + break; + case INTEL_GVT_HYPERVISOR_KVM: + scic_pa = (vgpu_opregion(vgpu)->gfn[0] << PAGE_SHIFT) + + INTEL_GVT_OPREGION_SCIC; + parm_pa = (vgpu_opregion(vgpu)->gfn[0] << PAGE_SHIFT) + + INTEL_GVT_OPREGION_PARM; + + ret = intel_gvt_hypervisor_read_gpa(vgpu, scic_pa, + &scic, sizeof(scic)); + if (ret) { + gvt_vgpu_err("guest opregion read error %d, gpa 0x%llx, len %lu\n", + ret, scic_pa, sizeof(scic)); + return ret; + } - scic = vgpu_opregion(vgpu)->va + INTEL_GVT_OPREGION_SCIC; - parm = vgpu_opregion(vgpu)->va + INTEL_GVT_OPREGION_PARM; + ret = intel_gvt_hypervisor_read_gpa(vgpu, parm_pa, + &parm, sizeof(parm)); + if (ret) { + gvt_vgpu_err("guest opregion read error %d, gpa 0x%llx, len %lu\n", + ret, scic_pa, sizeof(scic)); + return ret; + } + + break; + default: + gvt_vgpu_err("not supported hypervisor\n"); + return -EINVAL; + } if (!(swsci & SWSCI_SCI_SELECT)) { gvt_vgpu_err("requesting SMI service\n"); @@ -298,9 +520,9 @@ int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci) return 0; } - func = GVT_OPREGION_FUNC(*scic); - subfunc = GVT_OPREGION_SUBFUNC(*scic); - if (!querying_capabilities(*scic)) { + func = GVT_OPREGION_FUNC(scic); + subfunc = GVT_OPREGION_SUBFUNC(scic); + if (!querying_capabilities(scic)) { gvt_vgpu_err("requesting runtime service: func \"%s\"," " subfunc \"%s\"\n", opregion_func_name(func), @@ -309,11 +531,43 @@ int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci) * emulate exit status of function call, '0' means * "failure, generic, unsupported or unknown cause" */ - *scic &= ~OPREGION_SCIC_EXIT_MASK; - return 0; + scic &= ~OPREGION_SCIC_EXIT_MASK; + goto out; + } + + scic = 0; + parm = 0; + +out: + switch (intel_gvt_host.hypervisor_type) { + case INTEL_GVT_HYPERVISOR_XEN: + *((u32 *)vgpu_opregion(vgpu)->va + + INTEL_GVT_OPREGION_SCIC) = scic; + *((u32 *)vgpu_opregion(vgpu)->va + + INTEL_GVT_OPREGION_PARM) = parm; + break; + case INTEL_GVT_HYPERVISOR_KVM: + ret = intel_gvt_hypervisor_write_gpa(vgpu, scic_pa, + &scic, sizeof(scic)); + if (ret) { + gvt_vgpu_err("guest opregion write error %d, gpa 0x%llx, len %lu\n", + ret, scic_pa, sizeof(scic)); + return ret; + } + + ret = intel_gvt_hypervisor_write_gpa(vgpu, parm_pa, + &parm, sizeof(parm)); + if (ret) { + gvt_vgpu_err("guest opregion write error %d, gpa 0x%llx, len %lu\n", + ret, scic_pa, sizeof(scic)); + return ret; + } + + break; + default: + gvt_vgpu_err("not supported hypervisor\n"); + return -EINVAL; } - *scic = 0; - *parm = 0; return 0; } diff --git a/drivers/gpu/drm/i915/gvt/reg.h b/drivers/gpu/drm/i915/gvt/reg.h index 7d01c77a0f7a..d4f7ce6dc1d7 100644 --- a/drivers/gpu/drm/i915/gvt/reg.h +++ b/drivers/gpu/drm/i915/gvt/reg.h @@ -51,6 +51,9 @@ #define INTEL_GVT_OPREGION_PAGES 2 #define INTEL_GVT_OPREGION_SIZE (INTEL_GVT_OPREGION_PAGES * PAGE_SIZE) +#define INTEL_GVT_OPREGION_VBT_OFFSET 0x400 +#define INTEL_GVT_OPREGION_VBT_SIZE \ + (INTEL_GVT_OPREGION_SIZE - INTEL_GVT_OPREGION_VBT_OFFSET) #define VGT_SPRSTRIDE(pipe) _PIPE(pipe, _SPRA_STRIDE, _PLANE_STRIDE_2_B) @@ -71,6 +74,7 @@ #define RB_HEAD_OFF_MASK ((1U << 21) - (1U << 2)) #define RB_TAIL_OFF_MASK ((1U << 21) - (1U << 3)) #define RB_TAIL_SIZE_MASK ((1U << 21) - (1U << 12)) -#define _RING_CTL_BUF_SIZE(ctl) (((ctl) & RB_TAIL_SIZE_MASK) + GTT_PAGE_SIZE) +#define _RING_CTL_BUF_SIZE(ctl) (((ctl) & RB_TAIL_SIZE_MASK) + \ + I915_GTT_PAGE_SIZE) #endif diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c deleted file mode 100644 index 2ea542257f03..000000000000 --- a/drivers/gpu/drm/i915/gvt/render.c +++ /dev/null @@ -1,405 +0,0 @@ -/* - * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Eddie Dong <eddie.dong@intel.com> - * Kevin Tian <kevin.tian@intel.com> - * - * Contributors: - * Zhi Wang <zhi.a.wang@intel.com> - * Changbin Du <changbin.du@intel.com> - * Zhenyu Wang <zhenyuw@linux.intel.com> - * Tina Zhang <tina.zhang@intel.com> - * Bing Niu <bing.niu@intel.com> - * - */ - -#include "i915_drv.h" -#include "gvt.h" -#include "trace.h" - -struct render_mmio { - int ring_id; - i915_reg_t reg; - u32 mask; - bool in_context; - u32 value; -}; - -static struct render_mmio gen8_render_mmio_list[] __cacheline_aligned = { - {RCS, _MMIO(0x229c), 0xffff, false}, - {RCS, _MMIO(0x2248), 0x0, false}, - {RCS, _MMIO(0x2098), 0x0, false}, - {RCS, _MMIO(0x20c0), 0xffff, true}, - {RCS, _MMIO(0x24d0), 0, false}, - {RCS, _MMIO(0x24d4), 0, false}, - {RCS, _MMIO(0x24d8), 0, false}, - {RCS, _MMIO(0x24dc), 0, false}, - {RCS, _MMIO(0x24e0), 0, false}, - {RCS, _MMIO(0x24e4), 0, false}, - {RCS, _MMIO(0x24e8), 0, false}, - {RCS, _MMIO(0x24ec), 0, false}, - {RCS, _MMIO(0x24f0), 0, false}, - {RCS, _MMIO(0x24f4), 0, false}, - {RCS, _MMIO(0x24f8), 0, false}, - {RCS, _MMIO(0x24fc), 0, false}, - {RCS, _MMIO(0x7004), 0xffff, true}, - {RCS, _MMIO(0x7008), 0xffff, true}, - {RCS, _MMIO(0x7000), 0xffff, true}, - {RCS, _MMIO(0x7010), 0xffff, true}, - {RCS, _MMIO(0x7300), 0xffff, true}, - {RCS, _MMIO(0x83a4), 0xffff, true}, - - {BCS, _MMIO(0x2229c), 0xffff, false}, - {BCS, _MMIO(0x2209c), 0xffff, false}, - {BCS, _MMIO(0x220c0), 0xffff, false}, - {BCS, _MMIO(0x22098), 0x0, false}, - {BCS, _MMIO(0x22028), 0x0, false}, -}; - -static struct render_mmio gen9_render_mmio_list[] __cacheline_aligned = { - {RCS, _MMIO(0x229c), 0xffff, false}, - {RCS, _MMIO(0x2248), 0x0, false}, - {RCS, _MMIO(0x2098), 0x0, false}, - {RCS, _MMIO(0x20c0), 0xffff, true}, - {RCS, _MMIO(0x24d0), 0, false}, - {RCS, _MMIO(0x24d4), 0, false}, - {RCS, _MMIO(0x24d8), 0, false}, - {RCS, _MMIO(0x24dc), 0, false}, - {RCS, _MMIO(0x24e0), 0, false}, - {RCS, _MMIO(0x24e4), 0, false}, - {RCS, _MMIO(0x24e8), 0, false}, - {RCS, _MMIO(0x24ec), 0, false}, - {RCS, _MMIO(0x24f0), 0, false}, - {RCS, _MMIO(0x24f4), 0, false}, - {RCS, _MMIO(0x24f8), 0, false}, - {RCS, _MMIO(0x24fc), 0, false}, - {RCS, _MMIO(0x7004), 0xffff, true}, - {RCS, _MMIO(0x7008), 0xffff, true}, - {RCS, _MMIO(0x7000), 0xffff, true}, - {RCS, _MMIO(0x7010), 0xffff, true}, - {RCS, _MMIO(0x7300), 0xffff, true}, - {RCS, _MMIO(0x83a4), 0xffff, true}, - - {RCS, _MMIO(0x40e0), 0, false}, - {RCS, _MMIO(0x40e4), 0, false}, - {RCS, _MMIO(0x2580), 0xffff, true}, - {RCS, _MMIO(0x7014), 0xffff, true}, - {RCS, _MMIO(0x20ec), 0xffff, false}, - {RCS, _MMIO(0xb118), 0, false}, - {RCS, _MMIO(0xe100), 0xffff, true}, - {RCS, _MMIO(0xe180), 0xffff, true}, - {RCS, _MMIO(0xe184), 0xffff, true}, - {RCS, _MMIO(0xe188), 0xffff, true}, - {RCS, _MMIO(0xe194), 0xffff, true}, - {RCS, _MMIO(0x4de0), 0, false}, - {RCS, _MMIO(0x4de4), 0, false}, - {RCS, _MMIO(0x4de8), 0, false}, - {RCS, _MMIO(0x4dec), 0, false}, - {RCS, _MMIO(0x4df0), 0, false}, - {RCS, _MMIO(0x4df4), 0, false}, - - {BCS, _MMIO(0x2229c), 0xffff, false}, - {BCS, _MMIO(0x2209c), 0xffff, false}, - {BCS, _MMIO(0x220c0), 0xffff, false}, - {BCS, _MMIO(0x22098), 0x0, false}, - {BCS, _MMIO(0x22028), 0x0, false}, - - {VCS2, _MMIO(0x1c028), 0xffff, false}, - - {VECS, _MMIO(0x1a028), 0xffff, false}, - - {RCS, _MMIO(0x7304), 0xffff, true}, - {RCS, _MMIO(0x2248), 0x0, false}, - {RCS, _MMIO(0x940c), 0x0, false}, - {RCS, _MMIO(0x4ab8), 0x0, false}, - - {RCS, _MMIO(0x4ab0), 0x0, false}, - {RCS, _MMIO(0x20d4), 0x0, false}, - - {RCS, _MMIO(0xb004), 0x0, false}, - {RCS, _MMIO(0x20a0), 0x0, false}, - {RCS, _MMIO(0x20e4), 0xffff, false}, -}; - -static u32 gen9_render_mocs[I915_NUM_ENGINES][64]; -static u32 gen9_render_mocs_L3[32]; - -static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) -{ - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - enum forcewake_domains fw; - i915_reg_t reg; - u32 regs[] = { - [RCS] = 0x4260, - [VCS] = 0x4264, - [VCS2] = 0x4268, - [BCS] = 0x426c, - [VECS] = 0x4270, - }; - - if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) - return; - - if (!test_and_clear_bit(ring_id, (void *)vgpu->tlb_handle_pending)) - return; - - reg = _MMIO(regs[ring_id]); - - /* WaForceWakeRenderDuringMmioTLBInvalidate:skl - * we need to put a forcewake when invalidating RCS TLB caches, - * otherwise device can go to RC6 state and interrupt invalidation - * process - */ - fw = intel_uncore_forcewake_for_reg(dev_priv, reg, - FW_REG_READ | FW_REG_WRITE); - if (ring_id == RCS && (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))) - fw |= FORCEWAKE_RENDER; - - intel_uncore_forcewake_get(dev_priv, fw); - - I915_WRITE_FW(reg, 0x1); - - if (wait_for_atomic((I915_READ_FW(reg) == 0), 50)) - gvt_vgpu_err("timeout in invalidate ring (%d) tlb\n", ring_id); - else - vgpu_vreg(vgpu, regs[ring_id]) = 0; - - intel_uncore_forcewake_put(dev_priv, fw); - - gvt_dbg_core("invalidate TLB for ring %d\n", ring_id); -} - -static void load_mocs(struct intel_vgpu *vgpu, int ring_id) -{ - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - i915_reg_t offset, l3_offset; - u32 regs[] = { - [RCS] = 0xc800, - [VCS] = 0xc900, - [VCS2] = 0xca00, - [BCS] = 0xcc00, - [VECS] = 0xcb00, - }; - int i; - - if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) - return; - - offset.reg = regs[ring_id]; - for (i = 0; i < 64; i++) { - gen9_render_mocs[ring_id][i] = I915_READ_FW(offset); - I915_WRITE(offset, vgpu_vreg(vgpu, offset)); - offset.reg += 4; - } - - if (ring_id == RCS) { - l3_offset.reg = 0xb020; - for (i = 0; i < 32; i++) { - gen9_render_mocs_L3[i] = I915_READ_FW(l3_offset); - I915_WRITE_FW(l3_offset, vgpu_vreg(vgpu, l3_offset)); - l3_offset.reg += 4; - } - } -} - -static void restore_mocs(struct intel_vgpu *vgpu, int ring_id) -{ - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - i915_reg_t offset, l3_offset; - u32 regs[] = { - [RCS] = 0xc800, - [VCS] = 0xc900, - [VCS2] = 0xca00, - [BCS] = 0xcc00, - [VECS] = 0xcb00, - }; - int i; - - if (WARN_ON(ring_id >= ARRAY_SIZE(regs))) - return; - - offset.reg = regs[ring_id]; - for (i = 0; i < 64; i++) { - vgpu_vreg(vgpu, offset) = I915_READ_FW(offset); - I915_WRITE_FW(offset, gen9_render_mocs[ring_id][i]); - offset.reg += 4; - } - - if (ring_id == RCS) { - l3_offset.reg = 0xb020; - for (i = 0; i < 32; i++) { - vgpu_vreg(vgpu, l3_offset) = I915_READ_FW(l3_offset); - I915_WRITE_FW(l3_offset, gen9_render_mocs_L3[i]); - l3_offset.reg += 4; - } - } -} - -#define CTX_CONTEXT_CONTROL_VAL 0x03 - -/* Switch ring mmio values (context) from host to a vgpu. */ -static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id) -{ - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - struct render_mmio *mmio; - u32 v; - int i, array_size; - u32 *reg_state = vgpu->shadow_ctx->engine[ring_id].lrc_reg_state; - u32 ctx_ctrl = reg_state[CTX_CONTEXT_CONTROL_VAL]; - u32 inhibit_mask = - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); - i915_reg_t last_reg = _MMIO(0); - - if (IS_SKYLAKE(vgpu->gvt->dev_priv) - || IS_KABYLAKE(vgpu->gvt->dev_priv)) { - mmio = gen9_render_mmio_list; - array_size = ARRAY_SIZE(gen9_render_mmio_list); - load_mocs(vgpu, ring_id); - } else { - mmio = gen8_render_mmio_list; - array_size = ARRAY_SIZE(gen8_render_mmio_list); - } - - for (i = 0; i < array_size; i++, mmio++) { - if (mmio->ring_id != ring_id) - continue; - - mmio->value = I915_READ_FW(mmio->reg); - - /* - * if it is an inhibit context, load in_context mmio - * into HW by mmio write. If it is not, skip this mmio - * write. - */ - if (mmio->in_context && - ((ctx_ctrl & inhibit_mask) != inhibit_mask) && - i915.enable_execlists) - continue; - - if (mmio->mask) - v = vgpu_vreg(vgpu, mmio->reg) | (mmio->mask << 16); - else - v = vgpu_vreg(vgpu, mmio->reg); - - I915_WRITE_FW(mmio->reg, v); - last_reg = mmio->reg; - - trace_render_mmio(vgpu->id, "load", - i915_mmio_reg_offset(mmio->reg), - mmio->value, v); - } - - /* Make sure the swiched MMIOs has taken effect. */ - if (likely(INTEL_GVT_MMIO_OFFSET(last_reg))) - I915_READ_FW(last_reg); - - handle_tlb_pending_event(vgpu, ring_id); -} - -/* Switch ring mmio values (context) from vgpu to host. */ -static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id) -{ - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - struct render_mmio *mmio; - i915_reg_t last_reg = _MMIO(0); - u32 v; - int i, array_size; - - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - mmio = gen9_render_mmio_list; - array_size = ARRAY_SIZE(gen9_render_mmio_list); - restore_mocs(vgpu, ring_id); - } else { - mmio = gen8_render_mmio_list; - array_size = ARRAY_SIZE(gen8_render_mmio_list); - } - - for (i = 0; i < array_size; i++, mmio++) { - if (mmio->ring_id != ring_id) - continue; - - vgpu_vreg(vgpu, mmio->reg) = I915_READ_FW(mmio->reg); - - if (mmio->mask) { - vgpu_vreg(vgpu, mmio->reg) &= ~(mmio->mask << 16); - v = mmio->value | (mmio->mask << 16); - } else - v = mmio->value; - - if (mmio->in_context) - continue; - - I915_WRITE_FW(mmio->reg, v); - last_reg = mmio->reg; - - trace_render_mmio(vgpu->id, "restore", - i915_mmio_reg_offset(mmio->reg), - mmio->value, v); - } - - /* Make sure the swiched MMIOs has taken effect. */ - if (likely(INTEL_GVT_MMIO_OFFSET(last_reg))) - I915_READ_FW(last_reg); -} - -/** - * intel_gvt_switch_render_mmio - switch mmio context of specific engine - * @pre: the last vGPU that own the engine - * @next: the vGPU to switch to - * @ring_id: specify the engine - * - * If pre is null indicates that host own the engine. If next is null - * indicates that we are switching to host workload. - */ -void intel_gvt_switch_mmio(struct intel_vgpu *pre, - struct intel_vgpu *next, int ring_id) -{ - struct drm_i915_private *dev_priv; - - if (WARN_ON(!pre && !next)) - return; - - gvt_dbg_render("switch ring %d from %s to %s\n", ring_id, - pre ? "vGPU" : "host", next ? "vGPU" : "HOST"); - - dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv; - - /** - * We are using raw mmio access wrapper to improve the - * performace for batch mmio read/write, so we need - * handle forcewake mannually. - */ - intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - - /** - * TODO: Optimize for vGPU to vGPU switch by merging - * switch_mmio_to_host() and switch_mmio_to_vgpu(). - */ - if (pre) - switch_mmio_to_host(pre, ring_id); - - if (next) - switch_mmio_to_vgpu(next, ring_id); - - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); -} diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index 03532dfc0cd5..cc1ce361cd76 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -50,6 +50,7 @@ static bool vgpu_has_pending_workload(struct intel_vgpu *vgpu) struct vgpu_sched_data { struct list_head lru_list; struct intel_vgpu *vgpu; + bool active; ktime_t sched_in_time; ktime_t sched_out_time; @@ -308,8 +309,15 @@ static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu) static void tbs_sched_clean_vgpu(struct intel_vgpu *vgpu) { + struct intel_gvt *gvt = vgpu->gvt; + struct gvt_sched_data *sched_data = gvt->scheduler.sched_data; + kfree(vgpu->sched_data); vgpu->sched_data = NULL; + + /* this vgpu id has been removed */ + if (idr_is_empty(&gvt->vgpu_idr)) + hrtimer_cancel(&sched_data->timer); } static void tbs_sched_start_schedule(struct intel_vgpu *vgpu) @@ -325,6 +333,7 @@ static void tbs_sched_start_schedule(struct intel_vgpu *vgpu) if (!hrtimer_active(&sched_data->timer)) hrtimer_start(&sched_data->timer, ktime_add_ns(ktime_get(), sched_data->period), HRTIMER_MODE_ABS); + vgpu_data->active = true; } static void tbs_sched_stop_schedule(struct intel_vgpu *vgpu) @@ -332,6 +341,7 @@ static void tbs_sched_stop_schedule(struct intel_vgpu *vgpu) struct vgpu_sched_data *vgpu_data = vgpu->sched_data; list_del_init(&vgpu_data->lru_list); + vgpu_data->active = false; } static struct intel_gvt_sched_policy_ops tbs_schedule_ops = { @@ -367,9 +377,17 @@ void intel_vgpu_clean_sched_policy(struct intel_vgpu *vgpu) void intel_vgpu_start_schedule(struct intel_vgpu *vgpu) { - gvt_dbg_core("vgpu%d: start schedule\n", vgpu->id); + struct vgpu_sched_data *vgpu_data = vgpu->sched_data; - vgpu->gvt->scheduler.sched_ops->start_schedule(vgpu); + if (!vgpu_data->active) { + gvt_dbg_core("vgpu%d: start schedule\n", vgpu->id); + vgpu->gvt->scheduler.sched_ops->start_schedule(vgpu); + } +} + +void intel_gvt_kick_schedule(struct intel_gvt *gvt) +{ + intel_gvt_request_service(gvt, INTEL_GVT_REQUEST_EVENT_SCHED); } void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu) @@ -377,6 +395,10 @@ void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu) struct intel_gvt_workload_scheduler *scheduler = &vgpu->gvt->scheduler; int ring_id; + struct vgpu_sched_data *vgpu_data = vgpu->sched_data; + + if (!vgpu_data->active) + return; gvt_dbg_core("vgpu%d: stop schedule\n", vgpu->id); diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.h b/drivers/gpu/drm/i915/gvt/sched_policy.h index ba00a5f7455f..7b59e3e88b8b 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.h +++ b/drivers/gpu/drm/i915/gvt/sched_policy.h @@ -57,4 +57,6 @@ void intel_vgpu_start_schedule(struct intel_vgpu *vgpu); void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu); +void intel_gvt_kick_schedule(struct intel_gvt *gvt); + #endif diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 391800d2067b..b55b3580ca1d 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -57,7 +57,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) struct intel_vgpu *vgpu = workload->vgpu; struct intel_gvt *gvt = vgpu->gvt; int ring_id = workload->ring_id; - struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; + struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx; struct drm_i915_gem_object *ctx_obj = shadow_ctx->engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; @@ -81,16 +81,16 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) while (i < context_page_num) { context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, (u32)((workload->ctx_desc.lrca + i) << - GTT_PAGE_SHIFT)); + I915_GTT_PAGE_SHIFT)); if (context_gpa == INTEL_GVT_INVALID_ADDR) { gvt_vgpu_err("Invalid guest context descriptor\n"); - return -EINVAL; + return -EFAULT; } - page = i915_gem_object_get_page(ctx_obj, LRC_PPHWSP_PN + i); + page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i); dst = kmap(page); intel_gvt_hypervisor_read_gpa(vgpu, context_gpa, dst, - GTT_PAGE_SIZE); + I915_GTT_PAGE_SIZE); kunmap(page); i++; } @@ -120,7 +120,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) sizeof(*shadow_ring_context), (void *)shadow_ring_context + sizeof(*shadow_ring_context), - GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); + I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); kunmap(page); return 0; @@ -131,6 +131,20 @@ static inline bool is_gvt_request(struct drm_i915_gem_request *req) return i915_gem_context_force_single_submission(req->ctx); } +static void save_ring_hw_state(struct intel_vgpu *vgpu, int ring_id) +{ + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + u32 ring_base = dev_priv->engine[ring_id]->mmio_base; + i915_reg_t reg; + + reg = RING_INSTDONE(ring_base); + vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = I915_READ_FW(reg); + reg = RING_ACTHD(ring_base); + vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = I915_READ_FW(reg); + reg = RING_ACTHD_UDW(ring_base); + vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = I915_READ_FW(reg); +} + static int shadow_context_status_change(struct notifier_block *nb, unsigned long action, void *data) { @@ -140,9 +154,10 @@ static int shadow_context_status_change(struct notifier_block *nb, struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; enum intel_engine_id ring_id = req->engine->id; struct intel_vgpu_workload *workload; + unsigned long flags; if (!is_gvt_request(req)) { - spin_lock_bh(&scheduler->mmio_context_lock); + spin_lock_irqsave(&scheduler->mmio_context_lock, flags); if (action == INTEL_CONTEXT_SCHEDULE_IN && scheduler->engine_owner[ring_id]) { /* Switch ring from vGPU to host. */ @@ -150,7 +165,7 @@ static int shadow_context_status_change(struct notifier_block *nb, NULL, ring_id); scheduler->engine_owner[ring_id] = NULL; } - spin_unlock_bh(&scheduler->mmio_context_lock); + spin_unlock_irqrestore(&scheduler->mmio_context_lock, flags); return NOTIFY_OK; } @@ -161,7 +176,7 @@ static int shadow_context_status_change(struct notifier_block *nb, switch (action) { case INTEL_CONTEXT_SCHEDULE_IN: - spin_lock_bh(&scheduler->mmio_context_lock); + spin_lock_irqsave(&scheduler->mmio_context_lock, flags); if (workload->vgpu != scheduler->engine_owner[ring_id]) { /* Switch ring from host to vGPU or vGPU to vGPU. */ intel_gvt_switch_mmio(scheduler->engine_owner[ring_id], @@ -170,12 +185,16 @@ static int shadow_context_status_change(struct notifier_block *nb, } else gvt_dbg_sched("skip ring %d mmio switch for vgpu%d\n", ring_id, workload->vgpu->id); - spin_unlock_bh(&scheduler->mmio_context_lock); + spin_unlock_irqrestore(&scheduler->mmio_context_lock, flags); atomic_set(&workload->shadow_ctx_active, 1); break; case INTEL_CONTEXT_SCHEDULE_OUT: + save_ring_hw_state(workload->vgpu, ring_id); atomic_set(&workload->shadow_ctx_active, 0); break; + case INTEL_CONTEXT_SCHEDULE_PREEMPTED: + save_ring_hw_state(workload->vgpu, ring_id); + break; default: WARN_ON(1); return NOTIFY_OK; @@ -201,6 +220,43 @@ static void shadow_context_descriptor_update(struct i915_gem_context *ctx, ce->lrc_desc = desc; } +static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload) +{ + struct intel_vgpu *vgpu = workload->vgpu; + void *shadow_ring_buffer_va; + u32 *cs; + + /* allocate shadow ring buffer */ + cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32)); + if (IS_ERR(cs)) { + gvt_vgpu_err("fail to alloc size =%ld shadow ring buffer\n", + workload->rb_len); + return PTR_ERR(cs); + } + + shadow_ring_buffer_va = workload->shadow_ring_buffer_va; + + /* get shadow ring buffer va */ + workload->shadow_ring_buffer_va = cs; + + memcpy(cs, shadow_ring_buffer_va, + workload->rb_len); + + cs += workload->rb_len / sizeof(u32); + intel_ring_advance(workload->req, cs); + + return 0; +} + +static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) +{ + if (!wa_ctx->indirect_ctx.obj) + return; + + i915_gem_object_unpin_map(wa_ctx->indirect_ctx.obj); + i915_gem_object_put(wa_ctx->indirect_ctx.obj); +} + /** * intel_gvt_scan_and_shadow_workload - audit the workload by scanning and * shadow it as well, include ringbuffer,wa_ctx and ctx. @@ -211,11 +267,13 @@ static void shadow_context_descriptor_update(struct i915_gem_context *ctx, */ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) { - int ring_id = workload->ring_id; - struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; - struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; - struct drm_i915_gem_request *rq; struct intel_vgpu *vgpu = workload->vgpu; + struct intel_vgpu_submission *s = &vgpu->submission; + struct i915_gem_context *shadow_ctx = s->shadow_ctx; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + int ring_id = workload->ring_id; + struct intel_engine_cs *engine = dev_priv->engine[ring_id]; + struct intel_ring *ring; int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); @@ -227,50 +285,279 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) shadow_ctx->desc_template |= workload->ctx_desc.addressing_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; - if (!test_and_set_bit(ring_id, vgpu->shadow_ctx_desc_updated)) + if (!test_and_set_bit(ring_id, s->shadow_ctx_desc_updated)) shadow_context_descriptor_update(shadow_ctx, dev_priv->engine[ring_id]); + ret = intel_gvt_scan_and_shadow_ringbuffer(workload); + if (ret) + goto err_scan; + + if ((workload->ring_id == RCS) && + (workload->wa_ctx.indirect_ctx.size != 0)) { + ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx); + if (ret) + goto err_scan; + } + + /* pin shadow context by gvt even the shadow context will be pinned + * when i915 alloc request. That is because gvt will update the guest + * context from shadow context when workload is completed, and at that + * moment, i915 may already unpined the shadow context to make the + * shadow_ctx pages invalid. So gvt need to pin itself. After update + * the guest context, gvt can unpin the shadow_ctx safely. + */ + ring = engine->context_pin(engine, shadow_ctx); + if (IS_ERR(ring)) { + ret = PTR_ERR(ring); + gvt_vgpu_err("fail to pin shadow context\n"); + goto err_shadow; + } + + ret = populate_shadow_context(workload); + if (ret) + goto err_unpin; + workload->shadowed = true; + return 0; + +err_unpin: + engine->context_unpin(engine, shadow_ctx); +err_shadow: + release_shadow_wa_ctx(&workload->wa_ctx); +err_scan: + return ret; +} + +static int intel_gvt_generate_request(struct intel_vgpu_workload *workload) +{ + int ring_id = workload->ring_id; + struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; + struct intel_engine_cs *engine = dev_priv->engine[ring_id]; + struct drm_i915_gem_request *rq; + struct intel_vgpu *vgpu = workload->vgpu; + struct intel_vgpu_submission *s = &vgpu->submission; + struct i915_gem_context *shadow_ctx = s->shadow_ctx; + int ret; + rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx); if (IS_ERR(rq)) { gvt_vgpu_err("fail to allocate gem request\n"); ret = PTR_ERR(rq); - goto out; + goto err_unpin; } gvt_dbg_sched("ring id %d get i915 gem request %p\n", ring_id, rq); workload->req = i915_gem_request_get(rq); - - ret = intel_gvt_scan_and_shadow_ringbuffer(workload); + ret = copy_workload_to_ring_buffer(workload); if (ret) - goto out; + goto err_unpin; + return 0; - if ((workload->ring_id == RCS) && - (workload->wa_ctx.indirect_ctx.size != 0)) { - ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx); +err_unpin: + engine->context_unpin(engine, shadow_ctx); + release_shadow_wa_ctx(&workload->wa_ctx); + return ret; +} + +static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload); + +static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) +{ + struct intel_gvt *gvt = workload->vgpu->gvt; + const int gmadr_bytes = gvt->device_info.gmadr_bytes_in_cmd; + struct intel_vgpu_shadow_bb *bb; + int ret; + + list_for_each_entry(bb, &workload->shadow_bb, list) { + bb->vma = i915_gem_object_ggtt_pin(bb->obj, NULL, 0, 0, 0); + if (IS_ERR(bb->vma)) { + ret = PTR_ERR(bb->vma); + goto err; + } + + /* relocate shadow batch buffer */ + bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma); + if (gmadr_bytes == 8) + bb->bb_start_cmd_va[2] = 0; + + /* No one is going to touch shadow bb from now on. */ + if (bb->clflush & CLFLUSH_AFTER) { + drm_clflush_virt_range(bb->va, bb->obj->base.size); + bb->clflush &= ~CLFLUSH_AFTER; + } + + ret = i915_gem_object_set_to_gtt_domain(bb->obj, false); if (ret) - goto out; + goto err; + + i915_gem_obj_finish_shmem_access(bb->obj); + bb->accessing = false; + + i915_vma_move_to_active(bb->vma, workload->req, 0); } + return 0; +err: + release_shadow_batch_buffer(workload); + return ret; +} - ret = populate_shadow_context(workload); - if (ret) - goto out; +static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) +{ + struct intel_vgpu_workload *workload = container_of(wa_ctx, + struct intel_vgpu_workload, + wa_ctx); + int ring_id = workload->ring_id; + struct intel_vgpu_submission *s = &workload->vgpu->submission; + struct i915_gem_context *shadow_ctx = s->shadow_ctx; + struct drm_i915_gem_object *ctx_obj = + shadow_ctx->engine[ring_id].state->obj; + struct execlist_ring_context *shadow_ring_context; + struct page *page; - workload->shadowed = true; + page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); + shadow_ring_context = kmap_atomic(page); -out: + shadow_ring_context->bb_per_ctx_ptr.val = + (shadow_ring_context->bb_per_ctx_ptr.val & + (~PER_CTX_ADDR_MASK)) | wa_ctx->per_ctx.shadow_gma; + shadow_ring_context->rcs_indirect_ctx.val = + (shadow_ring_context->rcs_indirect_ctx.val & + (~INDIRECT_CTX_ADDR_MASK)) | wa_ctx->indirect_ctx.shadow_gma; + + kunmap_atomic(shadow_ring_context); + return 0; +} + +static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) +{ + struct i915_vma *vma; + unsigned char *per_ctx_va = + (unsigned char *)wa_ctx->indirect_ctx.shadow_va + + wa_ctx->indirect_ctx.size; + + if (wa_ctx->indirect_ctx.size == 0) + return 0; + + vma = i915_gem_object_ggtt_pin(wa_ctx->indirect_ctx.obj, NULL, + 0, CACHELINE_BYTES, 0); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + /* FIXME: we are not tracking our pinned VMA leaving it + * up to the core to fix up the stray pin_count upon + * free. + */ + + wa_ctx->indirect_ctx.shadow_gma = i915_ggtt_offset(vma); + + wa_ctx->per_ctx.shadow_gma = *((unsigned int *)per_ctx_va + 1); + memset(per_ctx_va, 0, CACHELINE_BYTES); + + update_wa_ctx_2_shadow_ctx(wa_ctx); + return 0; +} + +static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) +{ + struct intel_vgpu *vgpu = workload->vgpu; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + struct intel_vgpu_shadow_bb *bb, *pos; + + if (list_empty(&workload->shadow_bb)) + return; + + bb = list_first_entry(&workload->shadow_bb, + struct intel_vgpu_shadow_bb, list); + + mutex_lock(&dev_priv->drm.struct_mutex); + + list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) { + if (bb->obj) { + if (bb->accessing) + i915_gem_obj_finish_shmem_access(bb->obj); + + if (bb->va && !IS_ERR(bb->va)) + i915_gem_object_unpin_map(bb->obj); + + if (bb->vma && !IS_ERR(bb->vma)) { + i915_vma_unpin(bb->vma); + i915_vma_close(bb->vma); + } + __i915_gem_object_release_unless_active(bb->obj); + } + list_del(&bb->list); + kfree(bb); + } + + mutex_unlock(&dev_priv->drm.struct_mutex); +} + +static int prepare_workload(struct intel_vgpu_workload *workload) +{ + struct intel_vgpu *vgpu = workload->vgpu; + int ret = 0; + + ret = intel_vgpu_pin_mm(workload->shadow_mm); + if (ret) { + gvt_vgpu_err("fail to vgpu pin mm\n"); + return ret; + } + + ret = intel_vgpu_sync_oos_pages(workload->vgpu); + if (ret) { + gvt_vgpu_err("fail to vgpu sync oos pages\n"); + goto err_unpin_mm; + } + + ret = intel_vgpu_flush_post_shadow(workload->vgpu); + if (ret) { + gvt_vgpu_err("fail to flush post shadow\n"); + goto err_unpin_mm; + } + + ret = intel_gvt_generate_request(workload); + if (ret) { + gvt_vgpu_err("fail to generate request\n"); + goto err_unpin_mm; + } + + ret = prepare_shadow_batch_buffer(workload); + if (ret) { + gvt_vgpu_err("fail to prepare_shadow_batch_buffer\n"); + goto err_unpin_mm; + } + + ret = prepare_shadow_wa_ctx(&workload->wa_ctx); + if (ret) { + gvt_vgpu_err("fail to prepare_shadow_wa_ctx\n"); + goto err_shadow_batch; + } + + if (workload->prepare) { + ret = workload->prepare(workload); + if (ret) + goto err_shadow_wa_ctx; + } + + return 0; +err_shadow_wa_ctx: + release_shadow_wa_ctx(&workload->wa_ctx); +err_shadow_batch: + release_shadow_batch_buffer(workload); +err_unpin_mm: + intel_vgpu_unpin_mm(workload->shadow_mm); return ret; } static int dispatch_workload(struct intel_vgpu_workload *workload) { + struct intel_vgpu *vgpu = workload->vgpu; + struct intel_vgpu_submission *s = &vgpu->submission; + struct i915_gem_context *shadow_ctx = s->shadow_ctx; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; int ring_id = workload->ring_id; - struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; - struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; struct intel_engine_cs *engine = dev_priv->engine[ring_id]; - struct intel_vgpu *vgpu = workload->vgpu; - struct intel_ring *ring; int ret = 0; gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n", @@ -282,23 +569,9 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) if (ret) goto out; - if (workload->prepare) { - ret = workload->prepare(workload); - if (ret) - goto out; - } - - /* pin shadow context by gvt even the shadow context will be pinned - * when i915 alloc request. That is because gvt will update the guest - * context from shadow context when workload is completed, and at that - * moment, i915 may already unpined the shadow context to make the - * shadow_ctx pages invalid. So gvt need to pin itself. After update - * the guest context, gvt can unpin the shadow_ctx safely. - */ - ring = engine->context_pin(engine, shadow_ctx); - if (IS_ERR(ring)) { - ret = PTR_ERR(ring); - gvt_vgpu_err("fail to pin shadow context\n"); + ret = prepare_workload(workload); + if (ret) { + engine->context_unpin(engine, shadow_ctx); goto out; } @@ -367,7 +640,7 @@ static struct intel_vgpu_workload *pick_next_workload( gvt_dbg_sched("ring id %d pick new workload %p\n", ring_id, workload); - atomic_inc(&workload->vgpu->running_workload_num); + atomic_inc(&workload->vgpu->submission.running_workload_num); out: mutex_unlock(&gvt->lock); return workload; @@ -377,8 +650,9 @@ static void update_guest_context(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; struct intel_gvt *gvt = vgpu->gvt; + struct intel_vgpu_submission *s = &vgpu->submission; + struct i915_gem_context *shadow_ctx = s->shadow_ctx; int ring_id = workload->ring_id; - struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; struct drm_i915_gem_object *ctx_obj = shadow_ctx->engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; @@ -402,16 +676,16 @@ static void update_guest_context(struct intel_vgpu_workload *workload) while (i < context_page_num) { context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, (u32)((workload->ctx_desc.lrca + i) << - GTT_PAGE_SHIFT)); + I915_GTT_PAGE_SHIFT)); if (context_gpa == INTEL_GVT_INVALID_ADDR) { gvt_vgpu_err("invalid guest context descriptor\n"); return; } - page = i915_gem_object_get_page(ctx_obj, LRC_PPHWSP_PN + i); + page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i); src = kmap(page); intel_gvt_hypervisor_write_gpa(vgpu, context_gpa, src, - GTT_PAGE_SIZE); + I915_GTT_PAGE_SIZE); kunmap(page); i++; } @@ -436,23 +710,41 @@ static void update_guest_context(struct intel_vgpu_workload *workload) sizeof(*shadow_ring_context), (void *)shadow_ring_context + sizeof(*shadow_ring_context), - GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); + I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); kunmap(page); } +static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask) +{ + struct intel_vgpu_submission *s = &vgpu->submission; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + struct intel_engine_cs *engine; + struct intel_vgpu_workload *pos, *n; + unsigned int tmp; + + /* free the unsubmited workloads in the queues. */ + for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { + list_for_each_entry_safe(pos, n, + &s->workload_q_head[engine->id], list) { + list_del_init(&pos->list); + intel_vgpu_destroy_workload(pos); + } + clear_bit(engine->id, s->shadow_ctx_desc_updated); + } +} + static void complete_current_workload(struct intel_gvt *gvt, int ring_id) { struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; - struct intel_vgpu_workload *workload; - struct intel_vgpu *vgpu; + struct intel_vgpu_workload *workload = + scheduler->current_workload[ring_id]; + struct intel_vgpu *vgpu = workload->vgpu; + struct intel_vgpu_submission *s = &vgpu->submission; int event; mutex_lock(&gvt->lock); - workload = scheduler->current_workload[ring_id]; - vgpu = workload->vgpu; - /* For the workload w/ request, needs to wait for the context * switch to make sure request is completed. * For the workload w/o request, directly complete the workload. @@ -489,7 +781,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) } mutex_lock(&dev_priv->drm.struct_mutex); /* unpin shadow ctx as the shadow_ctx update is done */ - engine->context_unpin(engine, workload->vgpu->shadow_ctx); + engine->context_unpin(engine, s->shadow_ctx); mutex_unlock(&dev_priv->drm.struct_mutex); } @@ -499,9 +791,32 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) scheduler->current_workload[ring_id] = NULL; list_del_init(&workload->list); + + if (!workload->status) { + release_shadow_batch_buffer(workload); + release_shadow_wa_ctx(&workload->wa_ctx); + } + + if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) { + /* if workload->status is not successful means HW GPU + * has occurred GPU hang or something wrong with i915/GVT, + * and GVT won't inject context switch interrupt to guest. + * So this error is a vGPU hang actually to the guest. + * According to this we should emunlate a vGPU hang. If + * there are pending workloads which are already submitted + * from guest, we should clean them up like HW GPU does. + * + * if it is in middle of engine resetting, the pending + * workloads won't be submitted to HW GPU and will be + * cleaned up during the resetting process later, so doing + * the workload clean up here doesn't have any impact. + **/ + clean_workloads(vgpu, ENGINE_MASK(ring_id)); + } + workload->complete(workload); - atomic_dec(&vgpu->running_workload_num); + atomic_dec(&s->running_workload_num); wake_up(&scheduler->workload_complete_wq); if (gvt->scheduler.need_reschedule) @@ -584,20 +899,23 @@ complete: FORCEWAKE_ALL); intel_runtime_pm_put(gvt->dev_priv); + if (ret && (vgpu_is_vm_unhealthy(ret))) + enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR); } return 0; } void intel_gvt_wait_vgpu_idle(struct intel_vgpu *vgpu) { + struct intel_vgpu_submission *s = &vgpu->submission; struct intel_gvt *gvt = vgpu->gvt; struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; - if (atomic_read(&vgpu->running_workload_num)) { + if (atomic_read(&s->running_workload_num)) { gvt_dbg_sched("wait vgpu idle\n"); wait_event(scheduler->workload_complete_wq, - !atomic_read(&vgpu->running_workload_num)); + !atomic_read(&s->running_workload_num)); } } @@ -662,23 +980,370 @@ err: return ret; } -void intel_vgpu_clean_gvt_context(struct intel_vgpu *vgpu) +/** + * intel_vgpu_clean_submission - free submission-related resource for vGPU + * @vgpu: a vGPU + * + * This function is called when a vGPU is being destroyed. + * + */ +void intel_vgpu_clean_submission(struct intel_vgpu *vgpu) { - i915_gem_context_put(vgpu->shadow_ctx); + struct intel_vgpu_submission *s = &vgpu->submission; + + intel_vgpu_select_submission_ops(vgpu, ALL_ENGINES, 0); + i915_gem_context_put(s->shadow_ctx); + kmem_cache_destroy(s->workloads); } -int intel_vgpu_init_gvt_context(struct intel_vgpu *vgpu) + +/** + * intel_vgpu_reset_submission - reset submission-related resource for vGPU + * @vgpu: a vGPU + * @engine_mask: engines expected to be reset + * + * This function is called when a vGPU is being destroyed. + * + */ +void intel_vgpu_reset_submission(struct intel_vgpu *vgpu, + unsigned long engine_mask) { - atomic_set(&vgpu->running_workload_num, 0); + struct intel_vgpu_submission *s = &vgpu->submission; + + if (!s->active) + return; + + clean_workloads(vgpu, engine_mask); + s->ops->reset(vgpu, engine_mask); +} - vgpu->shadow_ctx = i915_gem_context_create_gvt( +/** + * intel_vgpu_setup_submission - setup submission-related resource for vGPU + * @vgpu: a vGPU + * + * This function is called when a vGPU is being created. + * + * Returns: + * Zero on success, negative error code if failed. + * + */ +int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) +{ + struct intel_vgpu_submission *s = &vgpu->submission; + enum intel_engine_id i; + struct intel_engine_cs *engine; + int ret; + + s->shadow_ctx = i915_gem_context_create_gvt( &vgpu->gvt->dev_priv->drm); - if (IS_ERR(vgpu->shadow_ctx)) - return PTR_ERR(vgpu->shadow_ctx); + if (IS_ERR(s->shadow_ctx)) + return PTR_ERR(s->shadow_ctx); + + if (HAS_LOGICAL_RING_PREEMPTION(vgpu->gvt->dev_priv)) + s->shadow_ctx->priority = INT_MAX; + + bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES); + + s->workloads = kmem_cache_create("gvt-g_vgpu_workload", + sizeof(struct intel_vgpu_workload), 0, + SLAB_HWCACHE_ALIGN, + NULL); + + if (!s->workloads) { + ret = -ENOMEM; + goto out_shadow_ctx; + } + + for_each_engine(engine, vgpu->gvt->dev_priv, i) + INIT_LIST_HEAD(&s->workload_q_head[i]); + + atomic_set(&s->running_workload_num, 0); + bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES); + + return 0; + +out_shadow_ctx: + i915_gem_context_put(s->shadow_ctx); + return ret; +} + +/** + * intel_vgpu_select_submission_ops - select virtual submission interface + * @vgpu: a vGPU + * @interface: expected vGPU virtual submission interface + * + * This function is called when guest configures submission interface. + * + * Returns: + * Zero on success, negative error code if failed. + * + */ +int intel_vgpu_select_submission_ops(struct intel_vgpu *vgpu, + unsigned long engine_mask, + unsigned int interface) +{ + struct intel_vgpu_submission *s = &vgpu->submission; + const struct intel_vgpu_submission_ops *ops[] = { + [INTEL_VGPU_EXECLIST_SUBMISSION] = + &intel_vgpu_execlist_submission_ops, + }; + int ret; + + if (WARN_ON(interface >= ARRAY_SIZE(ops))) + return -EINVAL; + + if (WARN_ON(interface == 0 && engine_mask != ALL_ENGINES)) + return -EINVAL; + + if (s->active) + s->ops->clean(vgpu, engine_mask); + + if (interface == 0) { + s->ops = NULL; + s->virtual_submission_interface = 0; + s->active = false; + gvt_dbg_core("vgpu%d: remove submission ops\n", vgpu->id); + return 0; + } + + ret = ops[interface]->init(vgpu, engine_mask); + if (ret) + return ret; + + s->ops = ops[interface]; + s->virtual_submission_interface = interface; + s->active = true; + + gvt_dbg_core("vgpu%d: activate ops [ %s ]\n", + vgpu->id, s->ops->name); - vgpu->shadow_ctx->engine[RCS].initialised = true; + return 0; +} - bitmap_zero(vgpu->shadow_ctx_desc_updated, I915_NUM_ENGINES); +/** + * intel_vgpu_destroy_workload - destroy a vGPU workload + * @vgpu: a vGPU + * + * This function is called when destroy a vGPU workload. + * + */ +void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload) +{ + struct intel_vgpu_submission *s = &workload->vgpu->submission; + + if (workload->shadow_mm) + intel_gvt_mm_unreference(workload->shadow_mm); + kmem_cache_free(s->workloads, workload); +} + +static struct intel_vgpu_workload * +alloc_workload(struct intel_vgpu *vgpu) +{ + struct intel_vgpu_submission *s = &vgpu->submission; + struct intel_vgpu_workload *workload; + + workload = kmem_cache_zalloc(s->workloads, GFP_KERNEL); + if (!workload) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&workload->list); + INIT_LIST_HEAD(&workload->shadow_bb); + + init_waitqueue_head(&workload->shadow_ctx_status_wq); + atomic_set(&workload->shadow_ctx_active, 0); + + workload->status = -EINPROGRESS; + workload->shadowed = false; + workload->vgpu = vgpu; + + return workload; +} + +#define RING_CTX_OFF(x) \ + offsetof(struct execlist_ring_context, x) + +static void read_guest_pdps(struct intel_vgpu *vgpu, + u64 ring_context_gpa, u32 pdp[8]) +{ + u64 gpa; + int i; + + gpa = ring_context_gpa + RING_CTX_OFF(pdp3_UDW.val); + + for (i = 0; i < 8; i++) + intel_gvt_hypervisor_read_gpa(vgpu, + gpa + i * 8, &pdp[7 - i], 4); +} + +static int prepare_mm(struct intel_vgpu_workload *workload) +{ + struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc; + struct intel_vgpu_mm *mm; + struct intel_vgpu *vgpu = workload->vgpu; + int page_table_level; + u32 pdp[8]; + + if (desc->addressing_mode == 1) { /* legacy 32-bit */ + page_table_level = 3; + } else if (desc->addressing_mode == 3) { /* legacy 64 bit */ + page_table_level = 4; + } else { + gvt_vgpu_err("Advanced Context mode(SVM) is not supported!\n"); + return -EINVAL; + } + + read_guest_pdps(workload->vgpu, workload->ring_context_gpa, pdp); + + mm = intel_vgpu_find_ppgtt_mm(workload->vgpu, page_table_level, pdp); + if (mm) { + intel_gvt_mm_reference(mm); + } else { + + mm = intel_vgpu_create_mm(workload->vgpu, INTEL_GVT_MM_PPGTT, + pdp, page_table_level, 0); + if (IS_ERR(mm)) { + gvt_vgpu_err("fail to create mm object.\n"); + return PTR_ERR(mm); + } + } + workload->shadow_mm = mm; return 0; } + +#define same_context(a, b) (((a)->context_id == (b)->context_id) && \ + ((a)->lrca == (b)->lrca)) + +#define get_last_workload(q) \ + (list_empty(q) ? NULL : container_of(q->prev, \ + struct intel_vgpu_workload, list)) +/** + * intel_vgpu_create_workload - create a vGPU workload + * @vgpu: a vGPU + * @desc: a guest context descriptor + * + * This function is called when creating a vGPU workload. + * + * Returns: + * struct intel_vgpu_workload * on success, negative error code in + * pointer if failed. + * + */ +struct intel_vgpu_workload * +intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, + struct execlist_ctx_descriptor_format *desc) +{ + struct intel_vgpu_submission *s = &vgpu->submission; + struct list_head *q = workload_q_head(vgpu, ring_id); + struct intel_vgpu_workload *last_workload = get_last_workload(q); + struct intel_vgpu_workload *workload = NULL; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + u64 ring_context_gpa; + u32 head, tail, start, ctl, ctx_ctl, per_ctx, indirect_ctx; + int ret; + + ring_context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, + (u32)((desc->lrca + 1) << I915_GTT_PAGE_SHIFT)); + if (ring_context_gpa == INTEL_GVT_INVALID_ADDR) { + gvt_vgpu_err("invalid guest context LRCA: %x\n", desc->lrca); + return ERR_PTR(-EINVAL); + } + + intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + + RING_CTX_OFF(ring_header.val), &head, 4); + + intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + + RING_CTX_OFF(ring_tail.val), &tail, 4); + + head &= RB_HEAD_OFF_MASK; + tail &= RB_TAIL_OFF_MASK; + + if (last_workload && same_context(&last_workload->ctx_desc, desc)) { + gvt_dbg_el("ring id %d cur workload == last\n", ring_id); + gvt_dbg_el("ctx head %x real head %lx\n", head, + last_workload->rb_tail); + /* + * cannot use guest context head pointer here, + * as it might not be updated at this time + */ + head = last_workload->rb_tail; + } + + gvt_dbg_el("ring id %d begin a new workload\n", ring_id); + + /* record some ring buffer register values for scan and shadow */ + intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + + RING_CTX_OFF(rb_start.val), &start, 4); + intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + + RING_CTX_OFF(rb_ctrl.val), &ctl, 4); + intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + + RING_CTX_OFF(ctx_ctrl.val), &ctx_ctl, 4); + + workload = alloc_workload(vgpu); + if (IS_ERR(workload)) + return workload; + + workload->ring_id = ring_id; + workload->ctx_desc = *desc; + workload->ring_context_gpa = ring_context_gpa; + workload->rb_head = head; + workload->rb_tail = tail; + workload->rb_start = start; + workload->rb_ctl = ctl; + + if (ring_id == RCS) { + intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + + RING_CTX_OFF(bb_per_ctx_ptr.val), &per_ctx, 4); + intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + + RING_CTX_OFF(rcs_indirect_ctx.val), &indirect_ctx, 4); + + workload->wa_ctx.indirect_ctx.guest_gma = + indirect_ctx & INDIRECT_CTX_ADDR_MASK; + workload->wa_ctx.indirect_ctx.size = + (indirect_ctx & INDIRECT_CTX_SIZE_MASK) * + CACHELINE_BYTES; + workload->wa_ctx.per_ctx.guest_gma = + per_ctx & PER_CTX_ADDR_MASK; + workload->wa_ctx.per_ctx.valid = per_ctx & 1; + } + + gvt_dbg_el("workload %p ring id %d head %x tail %x start %x ctl %x\n", + workload, ring_id, head, tail, start, ctl); + + ret = prepare_mm(workload); + if (ret) { + kmem_cache_free(s->workloads, workload); + return ERR_PTR(ret); + } + + /* Only scan and shadow the first workload in the queue + * as there is only one pre-allocated buf-obj for shadow. + */ + if (list_empty(workload_q_head(vgpu, ring_id))) { + intel_runtime_pm_get(dev_priv); + mutex_lock(&dev_priv->drm.struct_mutex); + ret = intel_gvt_scan_and_shadow_workload(workload); + mutex_unlock(&dev_priv->drm.struct_mutex); + intel_runtime_pm_put(dev_priv); + } + + if (ret && (vgpu_is_vm_unhealthy(ret))) { + enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR); + intel_vgpu_destroy_workload(workload); + return ERR_PTR(ret); + } + + return workload; +} + +/** + * intel_vgpu_queue_workload - Qeue a vGPU workload + * @workload: the workload to queue in + */ +void intel_vgpu_queue_workload(struct intel_vgpu_workload *workload) +{ + list_add_tail(&workload->list, + workload_q_head(workload->vgpu, workload->ring_id)); + intel_gvt_kick_schedule(workload->vgpu->gvt); + wake_up(&workload->vgpu->gvt->scheduler.waitq[workload->ring_id]); +} diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 93a49eb0209e..ff175a98b19e 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -112,24 +112,20 @@ struct intel_vgpu_workload { struct intel_shadow_wa_ctx wa_ctx; }; -/* Intel shadow batch buffer is a i915 gem object */ -struct intel_shadow_bb_entry { +struct intel_vgpu_shadow_bb { struct list_head list; struct drm_i915_gem_object *obj; + struct i915_vma *vma; void *va; - unsigned long len; u32 *bb_start_cmd_va; + unsigned int clflush; + bool accessing; }; #define workload_q_head(vgpu, ring_id) \ - (&(vgpu->workload_q_head[ring_id])) + (&(vgpu->submission.workload_q_head[ring_id])) -#define queue_workload(workload) do { \ - list_add_tail(&workload->list, \ - workload_q_head(workload->vgpu, workload->ring_id)); \ - wake_up(&workload->vgpu->gvt-> \ - scheduler.waitq[workload->ring_id]); \ -} while (0) +void intel_vgpu_queue_workload(struct intel_vgpu_workload *workload); int intel_gvt_init_workload_scheduler(struct intel_gvt *gvt); @@ -137,8 +133,24 @@ void intel_gvt_clean_workload_scheduler(struct intel_gvt *gvt); void intel_gvt_wait_vgpu_idle(struct intel_vgpu *vgpu); -int intel_vgpu_init_gvt_context(struct intel_vgpu *vgpu); +int intel_vgpu_setup_submission(struct intel_vgpu *vgpu); -void intel_vgpu_clean_gvt_context(struct intel_vgpu *vgpu); +void intel_vgpu_reset_submission(struct intel_vgpu *vgpu, + unsigned long engine_mask); + +void intel_vgpu_clean_submission(struct intel_vgpu *vgpu); + +int intel_vgpu_select_submission_ops(struct intel_vgpu *vgpu, + unsigned long engine_mask, + unsigned int interface); + +extern const struct intel_vgpu_submission_ops +intel_vgpu_execlist_submission_ops; + +struct intel_vgpu_workload * +intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, + struct execlist_ctx_descriptor_format *desc); + +void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload); #endif diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h index 8c150381d9a4..736bd2bc5127 100644 --- a/drivers/gpu/drm/i915/gvt/trace.h +++ b/drivers/gpu/drm/i915/gvt/trace.h @@ -330,13 +330,14 @@ TRACE_EVENT(inject_msi, ); TRACE_EVENT(render_mmio, - TP_PROTO(int id, char *action, unsigned int reg, + TP_PROTO(int old_id, int new_id, char *action, unsigned int reg, unsigned int old_val, unsigned int new_val), - TP_ARGS(id, action, reg, new_val, old_val), + TP_ARGS(old_id, new_id, action, reg, old_val, new_val), TP_STRUCT__entry( - __field(int, id) + __field(int, old_id) + __field(int, new_id) __array(char, buf, GVT_TEMP_STR_LEN) __field(unsigned int, reg) __field(unsigned int, old_val) @@ -344,15 +345,17 @@ TRACE_EVENT(render_mmio, ), TP_fast_assign( - __entry->id = id; + __entry->old_id = old_id; + __entry->new_id = new_id; snprintf(__entry->buf, GVT_TEMP_STR_LEN, "%s", action); __entry->reg = reg; __entry->old_val = old_val; __entry->new_val = new_val; ), - TP_printk("VM%u %s reg %x, old %08x new %08x\n", - __entry->id, __entry->buf, __entry->reg, + TP_printk("VM%u -> VM%u %s reg %x, old %08x new %08x\n", + __entry->old_id, __entry->new_id, + __entry->buf, __entry->reg, __entry->old_val, __entry->new_val) ); diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 02c61a1ad56a..b87b19d8443c 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -38,22 +38,25 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu) { /* setup the ballooning information */ - vgpu_vreg64(vgpu, vgtif_reg(magic)) = VGT_MAGIC; - vgpu_vreg(vgpu, vgtif_reg(version_major)) = 1; - vgpu_vreg(vgpu, vgtif_reg(version_minor)) = 0; - vgpu_vreg(vgpu, vgtif_reg(display_ready)) = 0; - vgpu_vreg(vgpu, vgtif_reg(vgt_id)) = vgpu->id; - vgpu_vreg(vgpu, vgtif_reg(vgt_caps)) = VGT_CAPS_FULL_48BIT_PPGTT; - vgpu_vreg(vgpu, vgtif_reg(avail_rs.mappable_gmadr.base)) = + vgpu_vreg64_t(vgpu, vgtif_reg(magic)) = VGT_MAGIC; + vgpu_vreg_t(vgpu, vgtif_reg(version_major)) = 1; + vgpu_vreg_t(vgpu, vgtif_reg(version_minor)) = 0; + vgpu_vreg_t(vgpu, vgtif_reg(display_ready)) = 0; + vgpu_vreg_t(vgpu, vgtif_reg(vgt_id)) = vgpu->id; + + vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) = VGT_CAPS_FULL_48BIT_PPGTT; + vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) |= VGT_CAPS_HWSP_EMULATION; + + vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.mappable_gmadr.base)) = vgpu_aperture_gmadr_base(vgpu); - vgpu_vreg(vgpu, vgtif_reg(avail_rs.mappable_gmadr.size)) = + vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.mappable_gmadr.size)) = vgpu_aperture_sz(vgpu); - vgpu_vreg(vgpu, vgtif_reg(avail_rs.nonmappable_gmadr.base)) = + vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.nonmappable_gmadr.base)) = vgpu_hidden_gmadr_base(vgpu); - vgpu_vreg(vgpu, vgtif_reg(avail_rs.nonmappable_gmadr.size)) = + vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.nonmappable_gmadr.size)) = vgpu_hidden_sz(vgpu); - vgpu_vreg(vgpu, vgtif_reg(avail_rs.fence_num)) = vgpu_fence_sz(vgpu); + vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.fence_num)) = vgpu_fence_sz(vgpu); gvt_dbg_core("Populate PVINFO PAGE for vGPU %d\n", vgpu->id); gvt_dbg_core("aperture base [GMADR] 0x%llx size 0x%llx\n", @@ -226,13 +229,14 @@ void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu) vgpu->active = false; - if (atomic_read(&vgpu->running_workload_num)) { + if (atomic_read(&vgpu->submission.running_workload_num)) { mutex_unlock(&gvt->lock); intel_gvt_wait_vgpu_idle(vgpu); mutex_lock(&gvt->lock); } intel_vgpu_stop_schedule(vgpu); + intel_vgpu_dmabuf_cleanup(vgpu); mutex_unlock(&gvt->lock); } @@ -252,16 +256,19 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) WARN(vgpu->active, "vGPU is still active!\n"); + intel_gvt_debugfs_remove_vgpu(vgpu); idr_remove(&gvt->vgpu_idr, vgpu->id); + if (idr_is_empty(&gvt->vgpu_idr)) + intel_gvt_clean_irq(gvt); intel_vgpu_clean_sched_policy(vgpu); - intel_vgpu_clean_gvt_context(vgpu); - intel_vgpu_clean_execlist(vgpu); + intel_vgpu_clean_submission(vgpu); intel_vgpu_clean_display(vgpu); intel_vgpu_clean_opregion(vgpu); intel_vgpu_clean_gtt(vgpu); intel_gvt_hypervisor_detach_vgpu(vgpu); intel_vgpu_free_resource(vgpu); intel_vgpu_clean_mmio(vgpu); + intel_vgpu_dmabuf_cleanup(vgpu); vfree(vgpu); intel_gvt_update_vgpu_types(gvt); @@ -293,7 +300,7 @@ struct intel_vgpu *intel_gvt_create_idle_vgpu(struct intel_gvt *gvt) vgpu->gvt = gvt; for (i = 0; i < I915_NUM_ENGINES; i++) - INIT_LIST_HEAD(&vgpu->workload_q_head[i]); + INIT_LIST_HEAD(&vgpu->submission.workload_q_head[i]); ret = intel_vgpu_init_sched_policy(vgpu); if (ret) @@ -346,8 +353,8 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, vgpu->handle = param->handle; vgpu->gvt = gvt; vgpu->sched_ctl.weight = param->weight; - bitmap_zero(vgpu->tlb_handle_pending, I915_NUM_ENGINES); - + INIT_LIST_HEAD(&vgpu->dmabuf_obj_list_head); + idr_init(&vgpu->object_idr); intel_vgpu_init_cfg_space(vgpu, param->primary); ret = intel_vgpu_init_mmio(vgpu); @@ -368,32 +375,42 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (ret) goto out_detach_hypervisor_vgpu; - ret = intel_vgpu_init_display(vgpu, param->resolution); + ret = intel_vgpu_init_opregion(vgpu); if (ret) goto out_clean_gtt; - ret = intel_vgpu_init_execlist(vgpu); + ret = intel_vgpu_init_display(vgpu, param->resolution); if (ret) - goto out_clean_display; + goto out_clean_opregion; - ret = intel_vgpu_init_gvt_context(vgpu); + ret = intel_vgpu_setup_submission(vgpu); if (ret) - goto out_clean_execlist; + goto out_clean_display; ret = intel_vgpu_init_sched_policy(vgpu); if (ret) - goto out_clean_shadow_ctx; + goto out_clean_submission; + + ret = intel_gvt_debugfs_add_vgpu(vgpu); + if (ret) + goto out_clean_sched_policy; + + ret = intel_gvt_hypervisor_set_opregion(vgpu); + if (ret) + goto out_clean_sched_policy; mutex_unlock(&gvt->lock); return vgpu; -out_clean_shadow_ctx: - intel_vgpu_clean_gvt_context(vgpu); -out_clean_execlist: - intel_vgpu_clean_execlist(vgpu); +out_clean_sched_policy: + intel_vgpu_clean_sched_policy(vgpu); +out_clean_submission: + intel_vgpu_clean_submission(vgpu); out_clean_display: intel_vgpu_clean_display(vgpu); +out_clean_opregion: + intel_vgpu_clean_opregion(vgpu); out_clean_gtt: intel_vgpu_clean_gtt(vgpu); out_detach_hypervisor_vgpu: @@ -500,11 +517,10 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, mutex_lock(&gvt->lock); } - intel_vgpu_reset_execlist(vgpu, resetting_eng); - + intel_vgpu_reset_submission(vgpu, resetting_eng); /* full GPU reset or device model level reset */ if (engine_mask == ALL_ENGINES || dmlr) { - + intel_vgpu_select_submission_ops(vgpu, ALL_ENGINES, 0); /*fence will not be reset during virtual reset */ if (dmlr) { intel_vgpu_reset_gtt(vgpu); diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 8ba932b22f7c..95478db9998b 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -26,6 +26,7 @@ */ #include "i915_drv.h" +#include "intel_ringbuffer.h" /** * DOC: batch buffer command parser @@ -798,22 +799,15 @@ struct cmd_node { */ static inline u32 cmd_header_key(u32 x) { - u32 shift; - switch (x >> INSTR_CLIENT_SHIFT) { default: case INSTR_MI_CLIENT: - shift = STD_MI_OPCODE_SHIFT; - break; + return x >> STD_MI_OPCODE_SHIFT; case INSTR_RC_CLIENT: - shift = STD_3D_OPCODE_SHIFT; - break; + return x >> STD_3D_OPCODE_SHIFT; case INSTR_BC_CLIENT: - shift = STD_2D_OPCODE_SHIFT; - break; + return x >> STD_2D_OPCODE_SHIFT; } - - return x >> shift; } static int init_hash_table(struct intel_engine_cs *engine, @@ -947,7 +941,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) return; } - engine->needs_cmd_parser = true; + engine->flags |= I915_ENGINE_NEEDS_CMD_PARSER; } /** @@ -959,7 +953,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) */ void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine) { - if (!engine->needs_cmd_parser) + if (!intel_engine_needs_cmd_parser(engine)) return; fini_hash_table(engine); @@ -1038,7 +1032,7 @@ find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr) const struct drm_i915_reg_table *table = engine->reg_tables; int count = engine->reg_table_count; - do { + for (; count > 0; ++table, --count) { if (!table->master || is_master) { const struct drm_i915_reg_descriptor *reg; @@ -1046,7 +1040,7 @@ find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr) if (reg != NULL) return reg; } - } while (table++, --count); + } return NULL; } @@ -1218,6 +1212,12 @@ static bool check_cmd(const struct intel_engine_cs *engine, continue; } + if (desc->bits[i].offset >= length) { + DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X, too short to check bitmask (%s)\n", + *cmd, engine->name); + return false; + } + dword = cmd[desc->bits[i].offset] & desc->bits[i].mask; @@ -1357,7 +1357,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv) /* If the command parser is not enabled, report 0 - unsupported */ for_each_engine(engine, dev_priv, id) { - if (engine->needs_cmd_parser) { + if (intel_engine_needs_cmd_parser(engine)) { active = true; break; } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e4d4b6b41e26..e968aeae1d84 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -30,46 +30,28 @@ #include <linux/sort.h> #include <linux/sched/mm.h> #include "intel_drv.h" +#include "intel_guc_submission.h" static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) { return to_i915(node->minor->dev); } -static __always_inline void seq_print_param(struct seq_file *m, - const char *name, - const char *type, - const void *x) -{ - if (!__builtin_strcmp(type, "bool")) - seq_printf(m, "i915.%s=%s\n", name, yesno(*(const bool *)x)); - else if (!__builtin_strcmp(type, "int")) - seq_printf(m, "i915.%s=%d\n", name, *(const int *)x); - else if (!__builtin_strcmp(type, "unsigned int")) - seq_printf(m, "i915.%s=%u\n", name, *(const unsigned int *)x); - else if (!__builtin_strcmp(type, "char *")) - seq_printf(m, "i915.%s=%s\n", name, *(const char **)x); - else - BUILD_BUG(); -} - static int i915_capabilities(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); const struct intel_device_info *info = INTEL_INFO(dev_priv); + struct drm_printer p = drm_seq_file_printer(m); seq_printf(m, "gen: %d\n", INTEL_GEN(dev_priv)); seq_printf(m, "platform: %s\n", intel_platform_name(info->platform)); seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(dev_priv)); -#define PRINT_FLAG(x) seq_printf(m, #x ": %s\n", yesno(info->x)) - DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG); -#undef PRINT_FLAG + intel_device_info_dump_flags(info, &p); + intel_device_info_dump_runtime(info, &p); kernel_param_lock(THIS_MODULE); -#define PRINT_PARAM(T, x) seq_print_param(m, #x, #T, &i915.x); - I915_PARAMS_FOR_EACH(PRINT_PARAM); -#undef PRINT_PARAM + i915_params_dump(&i915_modparams, &p); kernel_param_unlock(THIS_MODULE); return 0; @@ -82,7 +64,7 @@ static char get_active_flag(struct drm_i915_gem_object *obj) static char get_pin_flag(struct drm_i915_gem_object *obj) { - return obj->pin_display ? 'p' : ' '; + return obj->pin_global ? 'p' : ' '; } static char get_tiling_flag(struct drm_i915_gem_object *obj) @@ -97,7 +79,7 @@ static char get_tiling_flag(struct drm_i915_gem_object *obj) static char get_global_flag(struct drm_i915_gem_object *obj) { - return !list_empty(&obj->userfault_link) ? 'g' : ' '; + return obj->userfault_count ? 'g' : ' '; } static char get_pin_mapped_flag(struct drm_i915_gem_object *obj) @@ -110,14 +92,44 @@ static u64 i915_gem_obj_total_ggtt_size(struct drm_i915_gem_object *obj) u64 size = 0; struct i915_vma *vma; - list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (i915_vma_is_ggtt(vma) && drm_mm_node_allocated(&vma->node)) + for_each_ggtt_vma(vma, obj) { + if (drm_mm_node_allocated(&vma->node)) size += vma->node.size; } return size; } +static const char * +stringify_page_sizes(unsigned int page_sizes, char *buf, size_t len) +{ + size_t x = 0; + + switch (page_sizes) { + case 0: + return ""; + case I915_GTT_PAGE_SIZE_4K: + return "4K"; + case I915_GTT_PAGE_SIZE_64K: + return "64K"; + case I915_GTT_PAGE_SIZE_2M: + return "2M"; + default: + if (!buf) + return "M"; + + if (page_sizes & I915_GTT_PAGE_SIZE_2M) + x += snprintf(buf + x, len - x, "2M, "); + if (page_sizes & I915_GTT_PAGE_SIZE_64K) + x += snprintf(buf + x, len - x, "64K, "); + if (page_sizes & I915_GTT_PAGE_SIZE_4K) + x += snprintf(buf + x, len - x, "4K, "); + buf[x-2] = '\0'; + + return buf; + } +} + static void describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) { @@ -149,15 +161,16 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) pin_count++; } seq_printf(m, " (pinned x %d)", pin_count); - if (obj->pin_display) - seq_printf(m, " (display)"); + if (obj->pin_global) + seq_printf(m, " (global)"); list_for_each_entry(vma, &obj->vma_list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; - seq_printf(m, " (%sgtt offset: %08llx, size: %08llx", + seq_printf(m, " (%sgtt offset: %08llx, size: %08llx, pages: %s", i915_vma_is_ggtt(vma) ? "g" : "pp", - vma->node.start, vma->node.size); + vma->node.start, vma->node.size, + stringify_page_sizes(vma->page_sizes.gtt, NULL, 0)); if (i915_vma_is_ggtt(vma)) { switch (vma->ggtt_view.type) { case I915_GGTT_VIEW_NORMAL: @@ -239,7 +252,9 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data) goto out; total_obj_size = total_gtt_size = count = 0; - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) { + + spin_lock(&dev_priv->mm.obj_lock); + list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) { if (count == total) break; @@ -251,7 +266,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data) total_gtt_size += i915_gem_obj_total_ggtt_size(obj); } - list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) { + list_for_each_entry(obj, &dev_priv->mm.unbound_list, mm.link) { if (count == total) break; @@ -261,6 +276,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data) objects[count++] = obj; total_obj_size += obj->base.size; } + spin_unlock(&dev_priv->mm.obj_lock); sort(objects, count, sizeof(*objects), obj_rank_by_stolen, NULL); @@ -402,10 +418,12 @@ static int i915_gem_object_info(struct seq_file *m, void *data) struct drm_i915_private *dev_priv = node_to_i915(m->private); struct drm_device *dev = &dev_priv->drm; struct i915_ggtt *ggtt = &dev_priv->ggtt; - u32 count, mapped_count, purgeable_count, dpy_count; - u64 size, mapped_size, purgeable_size, dpy_size; + u32 count, mapped_count, purgeable_count, dpy_count, huge_count; + u64 size, mapped_size, purgeable_size, dpy_size, huge_size; struct drm_i915_gem_object *obj; + unsigned int page_sizes = 0; struct drm_file *file; + char buf[80]; int ret; ret = mutex_lock_interruptible(&dev->struct_mutex); @@ -419,7 +437,10 @@ static int i915_gem_object_info(struct seq_file *m, void *data) size = count = 0; mapped_size = mapped_count = 0; purgeable_size = purgeable_count = 0; - list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) { + huge_size = huge_count = 0; + + spin_lock(&dev_priv->mm.obj_lock); + list_for_each_entry(obj, &dev_priv->mm.unbound_list, mm.link) { size += obj->base.size; ++count; @@ -432,15 +453,21 @@ static int i915_gem_object_info(struct seq_file *m, void *data) mapped_count++; mapped_size += obj->base.size; } + + if (obj->mm.page_sizes.sg > I915_GTT_PAGE_SIZE) { + huge_count++; + huge_size += obj->base.size; + page_sizes |= obj->mm.page_sizes.sg; + } } seq_printf(m, "%u unbound objects, %llu bytes\n", count, size); size = count = dpy_size = dpy_count = 0; - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) { + list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) { size += obj->base.size; ++count; - if (obj->pin_display) { + if (obj->pin_global) { dpy_size += obj->base.size; ++dpy_count; } @@ -454,18 +481,33 @@ static int i915_gem_object_info(struct seq_file *m, void *data) mapped_count++; mapped_size += obj->base.size; } + + if (obj->mm.page_sizes.sg > I915_GTT_PAGE_SIZE) { + huge_count++; + huge_size += obj->base.size; + page_sizes |= obj->mm.page_sizes.sg; + } } + spin_unlock(&dev_priv->mm.obj_lock); + seq_printf(m, "%u bound objects, %llu bytes\n", count, size); seq_printf(m, "%u purgeable objects, %llu bytes\n", purgeable_count, purgeable_size); seq_printf(m, "%u mapped objects, %llu bytes\n", mapped_count, mapped_size); - seq_printf(m, "%u display objects (pinned), %llu bytes\n", + seq_printf(m, "%u huge-paged objects (%s) %llu bytes\n", + huge_count, + stringify_page_sizes(page_sizes, buf, sizeof(buf)), + huge_size); + seq_printf(m, "%u display objects (globally pinned), %llu bytes\n", dpy_count, dpy_size); - seq_printf(m, "%llu [%llu] gtt total\n", - ggtt->base.total, ggtt->mappable_end); + seq_printf(m, "%llu [%pa] gtt total\n", + ggtt->base.total, &ggtt->mappable_end); + seq_printf(m, "Supported page sizes: %s\n", + stringify_page_sizes(INTEL_INFO(dev_priv)->page_sizes, + buf, sizeof(buf))); seq_putc(m, '\n'); print_batch_pool_stats(m, dev_priv); @@ -514,32 +556,46 @@ static int i915_gem_gtt_info(struct seq_file *m, void *data) struct drm_info_node *node = m->private; struct drm_i915_private *dev_priv = node_to_i915(node); struct drm_device *dev = &dev_priv->drm; - bool show_pin_display_only = !!node->info_ent->data; + struct drm_i915_gem_object **objects; struct drm_i915_gem_object *obj; u64 total_obj_size, total_gtt_size; + unsigned long nobject, n; int count, ret; + nobject = READ_ONCE(dev_priv->mm.object_count); + objects = kvmalloc_array(nobject, sizeof(*objects), GFP_KERNEL); + if (!objects) + return -ENOMEM; + ret = mutex_lock_interruptible(&dev->struct_mutex); if (ret) return ret; - total_obj_size = total_gtt_size = count = 0; - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) { - if (show_pin_display_only && !obj->pin_display) - continue; + count = 0; + spin_lock(&dev_priv->mm.obj_lock); + list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) { + objects[count++] = obj; + if (count == nobject) + break; + } + spin_unlock(&dev_priv->mm.obj_lock); + + total_obj_size = total_gtt_size = 0; + for (n = 0; n < count; n++) { + obj = objects[n]; seq_puts(m, " "); describe_obj(m, obj); seq_putc(m, '\n'); total_obj_size += obj->base.size; total_gtt_size += i915_gem_obj_total_ggtt_size(obj); - count++; } mutex_unlock(&dev->struct_mutex); seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n", count, total_obj_size, total_gtt_size); + kvfree(objects); return 0; } @@ -589,86 +645,6 @@ static int i915_gem_batch_pool_info(struct seq_file *m, void *data) return 0; } -static void print_request(struct seq_file *m, - struct drm_i915_gem_request *rq, - const char *prefix) -{ - seq_printf(m, "%s%x [%x:%x] prio=%d @ %dms: %s\n", prefix, - rq->global_seqno, rq->ctx->hw_id, rq->fence.seqno, - rq->priotree.priority, - jiffies_to_msecs(jiffies - rq->emitted_jiffies), - rq->timeline->common->name); -} - -static int i915_gem_request_info(struct seq_file *m, void *data) -{ - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; - struct drm_i915_gem_request *req; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int ret, any; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - - any = 0; - for_each_engine(engine, dev_priv, id) { - int count; - - count = 0; - list_for_each_entry(req, &engine->timeline->requests, link) - count++; - if (count == 0) - continue; - - seq_printf(m, "%s requests: %d\n", engine->name, count); - list_for_each_entry(req, &engine->timeline->requests, link) - print_request(m, req, " "); - - any++; - } - mutex_unlock(&dev->struct_mutex); - - if (any == 0) - seq_puts(m, "No requests\n"); - - return 0; -} - -static void i915_ring_seqno_info(struct seq_file *m, - struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct rb_node *rb; - - seq_printf(m, "Current sequence (%s): %x\n", - engine->name, intel_engine_get_seqno(engine)); - - spin_lock_irq(&b->rb_lock); - for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { - struct intel_wait *w = rb_entry(rb, typeof(*w), node); - - seq_printf(m, "Waiting (%s): %s [%d] on %x\n", - engine->name, w->tsk->comm, w->tsk->pid, w->seqno); - } - spin_unlock_irq(&b->rb_lock); -} - -static int i915_gem_seqno_info(struct seq_file *m, void *data) -{ - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, dev_priv, id) - i915_ring_seqno_info(m, engine); - - return 0; -} - - static int i915_interrupt_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -869,13 +845,12 @@ static int i915_interrupt_info(struct seq_file *m, void *data) seq_printf(m, "Graphics Interrupt mask: %08x\n", I915_READ(GTIMR)); } - for_each_engine(engine, dev_priv, id) { - if (INTEL_GEN(dev_priv) >= 6) { + if (INTEL_GEN(dev_priv) >= 6) { + for_each_engine(engine, dev_priv, id) { seq_printf(m, "Graphics Interrupt mask (%s): %08x\n", engine->name, I915_READ_IMR(engine)); } - i915_ring_seqno_info(m, engine); } intel_runtime_pm_put(dev_priv); @@ -1026,6 +1001,7 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops, static int i915_frequency_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct intel_rps *rps = &dev_priv->gt_pm.rps; int ret = 0; intel_runtime_pm_get(dev_priv); @@ -1041,9 +1017,19 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_printf(m, "Current P-state: %d\n", (rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT); } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - u32 freq_sts; + u32 rpmodectl, freq_sts; + + mutex_lock(&dev_priv->pcu_lock); + + rpmodectl = I915_READ(GEN6_RP_CONTROL); + seq_printf(m, "Video Turbo Mode: %s\n", + yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); + seq_printf(m, "HW control enabled: %s\n", + yesno(rpmodectl & GEN6_RP_ENABLE)); + seq_printf(m, "SW control enabled: %s\n", + yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == + GEN6_RP_MEDIA_SW_MODE)); - mutex_lock(&dev_priv->rps.hw_lock); freq_sts = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts); seq_printf(m, "DDR freq: %d MHz\n", dev_priv->mem_freq); @@ -1052,21 +1038,21 @@ static int i915_frequency_info(struct seq_file *m, void *unused) intel_gpu_freq(dev_priv, (freq_sts >> 8) & 0xff)); seq_printf(m, "current GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq)); + intel_gpu_freq(dev_priv, rps->cur_freq)); seq_printf(m, "max GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); + intel_gpu_freq(dev_priv, rps->max_freq)); seq_printf(m, "min GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq)); + intel_gpu_freq(dev_priv, rps->min_freq)); seq_printf(m, "idle GPU freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq)); + intel_gpu_freq(dev_priv, rps->idle_freq)); seq_printf(m, "efficient (RPe) frequency: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq)); - mutex_unlock(&dev_priv->rps.hw_lock); + intel_gpu_freq(dev_priv, rps->efficient_freq)); + mutex_unlock(&dev_priv->pcu_lock); } else if (INTEL_GEN(dev_priv) >= 6) { u32 rp_state_limits; u32 gt_perf_status; @@ -1113,13 +1099,8 @@ static int i915_frequency_info(struct seq_file *m, void *unused) rpdownei = I915_READ(GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK; rpcurdown = I915_READ(GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK; rpprevdown = I915_READ(GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK; - if (INTEL_GEN(dev_priv) >= 9) - cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; - else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; - else - cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; - cagf = intel_gpu_freq(dev_priv, cagf); + cagf = intel_gpu_freq(dev_priv, + intel_get_cagf(dev_priv, rpstat)); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); @@ -1136,10 +1117,17 @@ static int i915_frequency_info(struct seq_file *m, void *unused) pm_iir = I915_READ(GEN8_GT_IIR(2)); pm_mask = I915_READ(GEN6_PMINTRMSK); } + seq_printf(m, "Video Turbo Mode: %s\n", + yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); + seq_printf(m, "HW control enabled: %s\n", + yesno(rpmodectl & GEN6_RP_ENABLE)); + seq_printf(m, "SW control enabled: %s\n", + yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == + GEN6_RP_MEDIA_SW_MODE)); seq_printf(m, "PM IER=0x%08x IMR=0x%08x ISR=0x%08x IIR=0x%08x, MASK=0x%08x\n", pm_ier, pm_imr, pm_isr, pm_iir, pm_mask); seq_printf(m, "pm_intrmsk_mbz: 0x%08x\n", - dev_priv->rps.pm_intrmsk_mbz); + rps->pm_intrmsk_mbz); seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); seq_printf(m, "Render p-state ratio: %d\n", (gt_perf_status & (INTEL_GEN(dev_priv) >= 9 ? 0x1ff00 : 0xff00)) >> 8); @@ -1159,8 +1147,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) rpcurup, GT_PM_INTERVAL_TO_US(dev_priv, rpcurup)); seq_printf(m, "RP PREV UP: %d (%dus)\n", rpprevup, GT_PM_INTERVAL_TO_US(dev_priv, rpprevup)); - seq_printf(m, "Up threshold: %d%%\n", - dev_priv->rps.up_threshold); + seq_printf(m, "Up threshold: %d%%\n", rps->up_threshold); seq_printf(m, "RP CUR DOWN EI: %d (%dus)\n", rpdownei, GT_PM_INTERVAL_TO_US(dev_priv, rpdownei)); @@ -1168,8 +1155,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) rpcurdown, GT_PM_INTERVAL_TO_US(dev_priv, rpcurdown)); seq_printf(m, "RP PREV DOWN: %d (%dus)\n", rpprevdown, GT_PM_INTERVAL_TO_US(dev_priv, rpprevdown)); - seq_printf(m, "Down threshold: %d%%\n", - dev_priv->rps.down_threshold); + seq_printf(m, "Down threshold: %d%%\n", rps->down_threshold); max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 0 : rp_state_cap >> 16) & 0xff; @@ -1191,22 +1177,22 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); seq_printf(m, "Max overclocked frequency: %dMHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); + intel_gpu_freq(dev_priv, rps->max_freq)); seq_printf(m, "Current freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq)); + intel_gpu_freq(dev_priv, rps->cur_freq)); seq_printf(m, "Actual freq: %d MHz\n", cagf); seq_printf(m, "Idle freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq)); + intel_gpu_freq(dev_priv, rps->idle_freq)); seq_printf(m, "Min freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq)); + intel_gpu_freq(dev_priv, rps->min_freq)); seq_printf(m, "Boost freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.boost_freq)); + intel_gpu_freq(dev_priv, rps->boost_freq)); seq_printf(m, "Max freq: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); + intel_gpu_freq(dev_priv, rps->max_freq)); seq_printf(m, "efficient (RPe) frequency: %d MHz\n", - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq)); + intel_gpu_freq(dev_priv, rps->efficient_freq)); } else { seq_puts(m, "no P-state info available\n"); } @@ -1267,7 +1253,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) if (waitqueue_active(&dev_priv->gpu_error.reset_queue)) seq_puts(m, "struct_mutex blocked for reset\n"); - if (!i915.enable_hangcheck) { + if (!i915_modparams.enable_hangcheck) { seq_puts(m, "Hangcheck disabled\n"); return 0; } @@ -1422,6 +1408,9 @@ static int i915_forcewake_domains(struct seq_file *m, void *data) struct intel_uncore_forcewake_domain *fw_domain; unsigned int tmp; + seq_printf(m, "user.bypass_count = %u\n", + i915->uncore.user_forcewake.count); + for_each_fw_domain(fw_domain, i915, tmp) seq_printf(m, "%s.wake_count = %u\n", intel_uncore_forcewake_domain_to_str(fw_domain->id), @@ -1444,21 +1433,11 @@ static void print_rc6_res(struct seq_file *m, static int vlv_drpc_info(struct seq_file *m) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - u32 rpmodectl1, rcctl1, pw_status; + u32 rcctl1, pw_status; pw_status = I915_READ(VLV_GTLC_PW_STATUS); - rpmodectl1 = I915_READ(GEN6_RP_CONTROL); rcctl1 = I915_READ(GEN6_RC_CONTROL); - seq_printf(m, "Video Turbo Mode: %s\n", - yesno(rpmodectl1 & GEN6_RP_MEDIA_TURBO)); - seq_printf(m, "Turbo enabled: %s\n", - yesno(rpmodectl1 & GEN6_RP_ENABLE)); - seq_printf(m, "HW control enabled: %s\n", - yesno(rpmodectl1 & GEN6_RP_ENABLE)); - seq_printf(m, "SW control enabled: %s\n", - yesno((rpmodectl1 & GEN6_RP_MEDIA_MODE_MASK) == - GEN6_RP_MEDIA_SW_MODE)); seq_printf(m, "RC6 Enabled: %s\n", yesno(rcctl1 & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1)))); @@ -1476,7 +1455,7 @@ static int vlv_drpc_info(struct seq_file *m) static int gen6_drpc_info(struct seq_file *m) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - u32 rpmodectl1, gt_core_status, rcctl1, rc6vids = 0; + u32 gt_core_status, rcctl1, rc6vids = 0; u32 gen9_powergate_enable = 0, gen9_powergate_status = 0; unsigned forcewake_count; int count = 0; @@ -1495,24 +1474,16 @@ static int gen6_drpc_info(struct seq_file *m) gt_core_status = I915_READ_FW(GEN6_GT_CORE_STATUS); trace_i915_reg_rw(false, GEN6_GT_CORE_STATUS, gt_core_status, 4, true); - rpmodectl1 = I915_READ(GEN6_RP_CONTROL); rcctl1 = I915_READ(GEN6_RC_CONTROL); if (INTEL_GEN(dev_priv) >= 9) { gen9_powergate_enable = I915_READ(GEN9_PG_ENABLE); gen9_powergate_status = I915_READ(GEN9_PWRGT_DOMAIN_STATUS); } - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); - seq_printf(m, "Video Turbo Mode: %s\n", - yesno(rpmodectl1 & GEN6_RP_MEDIA_TURBO)); - seq_printf(m, "HW control enabled: %s\n", - yesno(rpmodectl1 & GEN6_RP_ENABLE)); - seq_printf(m, "SW control enabled: %s\n", - yesno((rpmodectl1 & GEN6_RP_MEDIA_MODE_MASK) == - GEN6_RP_MEDIA_SW_MODE)); seq_printf(m, "RC1e Enabled: %s\n", yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE)); seq_printf(m, "RC6 Enabled: %s\n", @@ -1611,20 +1582,23 @@ static int i915_frontbuffer_tracking(struct seq_file *m, void *unused) static int i915_fbc_status(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct intel_fbc *fbc = &dev_priv->fbc; - if (!HAS_FBC(dev_priv)) { - seq_puts(m, "FBC unsupported on this chipset\n"); - return 0; - } + if (!HAS_FBC(dev_priv)) + return -ENODEV; intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->fbc.lock); + mutex_lock(&fbc->lock); if (intel_fbc_is_active(dev_priv)) seq_puts(m, "FBC enabled\n"); else - seq_printf(m, "FBC disabled: %s\n", - dev_priv->fbc.no_fbc_reason); + seq_printf(m, "FBC disabled: %s\n", fbc->no_fbc_reason); + + if (fbc->work.scheduled) + seq_printf(m, "FBC worker scheduled on vblank %u, now %llu\n", + fbc->work.scheduled_vblank, + drm_crtc_vblank_count(&fbc->crtc->base)); if (intel_fbc_is_active(dev_priv)) { u32 mask; @@ -1644,7 +1618,7 @@ static int i915_fbc_status(struct seq_file *m, void *unused) seq_printf(m, "Compressing: %s\n", yesno(mask)); } - mutex_unlock(&dev_priv->fbc.lock); + mutex_unlock(&fbc->lock); intel_runtime_pm_put(dev_priv); return 0; @@ -1691,15 +1665,13 @@ static int i915_ips_status(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - if (!HAS_IPS(dev_priv)) { - seq_puts(m, "not supported\n"); - return 0; - } + if (!HAS_IPS(dev_priv)) + return -ENODEV; intel_runtime_pm_get(dev_priv); seq_printf(m, "Enabled by kernel parameter: %s\n", - yesno(i915.enable_ips)); + yesno(i915_modparams.enable_ips)); if (INTEL_GEN(dev_priv) >= 8) { seq_puts(m, "Currently: unknown\n"); @@ -1775,30 +1747,27 @@ static int i915_emon_status(struct seq_file *m, void *unused) static int i915_ring_freq_table(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct intel_rps *rps = &dev_priv->gt_pm.rps; int ret = 0; int gpu_freq, ia_freq; unsigned int max_gpu_freq, min_gpu_freq; - if (!HAS_LLC(dev_priv)) { - seq_puts(m, "unsupported on this chipset\n"); - return 0; - } + if (!HAS_LLC(dev_priv)) + return -ENODEV; intel_runtime_pm_get(dev_priv); - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); + ret = mutex_lock_interruptible(&dev_priv->pcu_lock); if (ret) goto out; if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { /* Convert GT frequency to 50 HZ units */ - min_gpu_freq = - dev_priv->rps.min_freq_softlimit / GEN9_FREQ_SCALER; - max_gpu_freq = - dev_priv->rps.max_freq_softlimit / GEN9_FREQ_SCALER; + min_gpu_freq = rps->min_freq_softlimit / GEN9_FREQ_SCALER; + max_gpu_freq = rps->max_freq_softlimit / GEN9_FREQ_SCALER; } else { - min_gpu_freq = dev_priv->rps.min_freq_softlimit; - max_gpu_freq = dev_priv->rps.max_freq_softlimit; + min_gpu_freq = rps->min_freq_softlimit; + max_gpu_freq = rps->max_freq_softlimit; } seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n"); @@ -1817,7 +1786,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) ((ia_freq >> 8) & 0xff) * 100); } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); out: intel_runtime_pm_put(dev_priv); @@ -1947,7 +1916,6 @@ static int i915_context_status(struct seq_file *m, void *unused) struct intel_context *ce = &ctx->engine[engine->id]; seq_printf(m, "%s: ", engine->name); - seq_putc(m, ce->initialised ? 'I' : 'i'); if (ce->state) describe_obj(m, ce->state->obj); if (ce->ring) @@ -1963,75 +1931,6 @@ static int i915_context_status(struct seq_file *m, void *unused) return 0; } -static void i915_dump_lrc_obj(struct seq_file *m, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine) -{ - struct i915_vma *vma = ctx->engine[engine->id].state; - struct page *page; - int j; - - seq_printf(m, "CONTEXT: %s %u\n", engine->name, ctx->hw_id); - - if (!vma) { - seq_puts(m, "\tFake context\n"); - return; - } - - if (vma->flags & I915_VMA_GLOBAL_BIND) - seq_printf(m, "\tBound in GGTT at 0x%08x\n", - i915_ggtt_offset(vma)); - - if (i915_gem_object_pin_pages(vma->obj)) { - seq_puts(m, "\tFailed to get pages for context object\n\n"); - return; - } - - page = i915_gem_object_get_page(vma->obj, LRC_STATE_PN); - if (page) { - u32 *reg_state = kmap_atomic(page); - - for (j = 0; j < 0x600 / sizeof(u32) / 4; j += 4) { - seq_printf(m, - "\t[0x%04x] 0x%08x 0x%08x 0x%08x 0x%08x\n", - j * 4, - reg_state[j], reg_state[j + 1], - reg_state[j + 2], reg_state[j + 3]); - } - kunmap_atomic(reg_state); - } - - i915_gem_object_unpin_pages(vma->obj); - seq_putc(m, '\n'); -} - -static int i915_dump_lrc(struct seq_file *m, void *unused) -{ - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; - struct intel_engine_cs *engine; - struct i915_gem_context *ctx; - enum intel_engine_id id; - int ret; - - if (!i915.enable_execlists) { - seq_printf(m, "Logical Ring Contexts are disabled\n"); - return 0; - } - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - - list_for_each_entry(ctx, &dev_priv->contexts.list, link) - for_each_engine(engine, dev_priv, id) - i915_dump_lrc_obj(m, ctx, engine); - - mutex_unlock(&dev->struct_mutex); - - return 0; -} - static const char *swizzle_string(unsigned swizzle) { switch (swizzle) { @@ -2251,25 +2150,26 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); struct drm_device *dev = &dev_priv->drm; + struct intel_rps *rps = &dev_priv->gt_pm.rps; struct drm_file *file; - seq_printf(m, "RPS enabled? %d\n", dev_priv->rps.enabled); + seq_printf(m, "RPS enabled? %d\n", rps->enabled); seq_printf(m, "GPU busy? %s [%d requests]\n", yesno(dev_priv->gt.awake), dev_priv->gt.active_requests); seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv)); seq_printf(m, "Boosts outstanding? %d\n", - atomic_read(&dev_priv->rps.num_waiters)); + atomic_read(&rps->num_waiters)); seq_printf(m, "Frequency requested %d\n", - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq)); + intel_gpu_freq(dev_priv, rps->cur_freq)); seq_printf(m, " min hard:%d, soft:%d; max soft:%d, hard:%d\n", - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit), - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit), - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); + intel_gpu_freq(dev_priv, rps->min_freq), + intel_gpu_freq(dev_priv, rps->min_freq_softlimit), + intel_gpu_freq(dev_priv, rps->max_freq_softlimit), + intel_gpu_freq(dev_priv, rps->max_freq)); seq_printf(m, " idle:%d, efficient:%d, boost:%d\n", - intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq), - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), - intel_gpu_freq(dev_priv, dev_priv->rps.boost_freq)); + intel_gpu_freq(dev_priv, rps->idle_freq), + intel_gpu_freq(dev_priv, rps->efficient_freq), + intel_gpu_freq(dev_priv, rps->boost_freq)); mutex_lock(&dev->filelist_mutex); list_for_each_entry_reverse(file, &dev->filelist, lhead) { @@ -2281,15 +2181,15 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) seq_printf(m, "%s [%d]: %d boosts\n", task ? task->comm : "<unknown>", task ? task->pid : -1, - atomic_read(&file_priv->rps.boosts)); + atomic_read(&file_priv->rps_client.boosts)); rcu_read_unlock(); } seq_printf(m, "Kernel (anonymous) boosts: %d\n", - atomic_read(&dev_priv->rps.boosts)); + atomic_read(&rps->boosts)); mutex_unlock(&dev->filelist_mutex); if (INTEL_GEN(dev_priv) >= 6 && - dev_priv->rps.enabled && + rps->enabled && dev_priv->gt.active_requests) { u32 rpup, rpupei; u32 rpdown, rpdownei; @@ -2302,13 +2202,13 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n", - rps_power_to_str(dev_priv->rps.power)); + rps_power_to_str(rps->power)); seq_printf(m, " Avg. up: %d%% [above threshold? %d%%]\n", rpup && rpupei ? 100 * rpup / rpupei : 0, - dev_priv->rps.up_threshold); + rps->up_threshold); seq_printf(m, " Avg. down: %d%% [below threshold? %d%%]\n", rpdown && rpdownei ? 100 * rpdown / rpdownei : 0, - dev_priv->rps.down_threshold); + rps->down_threshold); } else { seq_puts(m, "\nRPS Autotuning inactive\n"); } @@ -2331,27 +2231,13 @@ static int i915_llc(struct seq_file *m, void *data) static int i915_huc_load_status_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct intel_uc_fw *huc_fw = &dev_priv->huc.fw; + struct drm_printer p; - if (!HAS_HUC_UCODE(dev_priv)) - return 0; + if (!HAS_HUC(dev_priv)) + return -ENODEV; - seq_puts(m, "HuC firmware status:\n"); - seq_printf(m, "\tpath: %s\n", huc_fw->path); - seq_printf(m, "\tfetch: %s\n", - intel_uc_fw_status_repr(huc_fw->fetch_status)); - seq_printf(m, "\tload: %s\n", - intel_uc_fw_status_repr(huc_fw->load_status)); - seq_printf(m, "\tversion wanted: %d.%d\n", - huc_fw->major_ver_wanted, huc_fw->minor_ver_wanted); - seq_printf(m, "\tversion found: %d.%d\n", - huc_fw->major_ver_found, huc_fw->minor_ver_found); - seq_printf(m, "\theader: offset is %d; size = %d\n", - huc_fw->header_offset, huc_fw->header_size); - seq_printf(m, "\tuCode: offset is %d; size = %d\n", - huc_fw->ucode_offset, huc_fw->ucode_size); - seq_printf(m, "\tRSA: offset is %d; size = %d\n", - huc_fw->rsa_offset, huc_fw->rsa_size); + p = drm_seq_file_printer(m); + intel_uc_fw_dump(&dev_priv->huc.fw, &p); intel_runtime_pm_get(dev_priv); seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2)); @@ -2363,29 +2249,14 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data) static int i915_guc_load_status_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; + struct drm_printer p; u32 tmp, i; - if (!HAS_GUC_UCODE(dev_priv)) - return 0; + if (!HAS_GUC(dev_priv)) + return -ENODEV; - seq_printf(m, "GuC firmware status:\n"); - seq_printf(m, "\tpath: %s\n", - guc_fw->path); - seq_printf(m, "\tfetch: %s\n", - intel_uc_fw_status_repr(guc_fw->fetch_status)); - seq_printf(m, "\tload: %s\n", - intel_uc_fw_status_repr(guc_fw->load_status)); - seq_printf(m, "\tversion wanted: %d.%d\n", - guc_fw->major_ver_wanted, guc_fw->minor_ver_wanted); - seq_printf(m, "\tversion found: %d.%d\n", - guc_fw->major_ver_found, guc_fw->minor_ver_found); - seq_printf(m, "\theader: offset is %d; size = %d\n", - guc_fw->header_offset, guc_fw->header_size); - seq_printf(m, "\tuCode: offset is %d; size = %d\n", - guc_fw->ucode_offset, guc_fw->ucode_size); - seq_printf(m, "\tRSA: offset is %d; size = %d\n", - guc_fw->rsa_offset, guc_fw->rsa_size); + p = drm_seq_file_printer(m); + intel_uc_fw_dump(&dev_priv->guc.fw, &p); intel_runtime_pm_get(dev_priv); @@ -2435,7 +2306,7 @@ static void i915_guc_log_info(struct seq_file *m, static void i915_guc_client_info(struct seq_file *m, struct drm_i915_private *dev_priv, - struct i915_guc_client *client) + struct intel_guc_client *client) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -2443,12 +2314,8 @@ static void i915_guc_client_info(struct seq_file *m, seq_printf(m, "\tPriority %d, GuC stage index: %u, PD offset 0x%x\n", client->priority, client->stage_id, client->proc_desc_offset); - seq_printf(m, "\tDoorbell id %d, offset: 0x%lx, cookie 0x%x\n", - client->doorbell_id, client->doorbell_offset, client->doorbell_cookie); - seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n", - client->wq_size, client->wq_offset, client->wq_tail); - - seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space); + seq_printf(m, "\tDoorbell id %d, offset: 0x%lx\n", + client->doorbell_id, client->doorbell_offset); for_each_engine(engine, dev_priv, id) { u64 submissions = client->submissions[id]; @@ -2459,29 +2326,16 @@ static void i915_guc_client_info(struct seq_file *m, seq_printf(m, "\tTotal: %llu\n", tot); } -static bool check_guc_submission(struct seq_file *m) -{ - struct drm_i915_private *dev_priv = node_to_i915(m->private); - const struct intel_guc *guc = &dev_priv->guc; - - if (!guc->execbuf_client) { - seq_printf(m, "GuC submission %s\n", - HAS_GUC_SCHED(dev_priv) ? - "disabled" : - "not supported"); - return false; - } - - return true; -} - static int i915_guc_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); const struct intel_guc *guc = &dev_priv->guc; - if (!check_guc_submission(m)) - return 0; + if (!USES_GUC_SUBMISSION(dev_priv)) + return -ENODEV; + + GEM_BUG_ON(!guc->execbuf_client); + GEM_BUG_ON(!guc->preempt_client); seq_printf(m, "Doorbell map:\n"); seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap); @@ -2489,6 +2343,8 @@ static int i915_guc_info(struct seq_file *m, void *data) seq_printf(m, "\nGuC execbuf client @ %p:\n", guc->execbuf_client); i915_guc_client_info(m, dev_priv, guc->execbuf_client); + seq_printf(m, "\nGuC preempt client @ %p:\n", guc->preempt_client); + i915_guc_client_info(m, dev_priv, guc->preempt_client); i915_guc_log_info(m, dev_priv); @@ -2502,12 +2358,12 @@ static int i915_guc_stage_pool(struct seq_file *m, void *data) struct drm_i915_private *dev_priv = node_to_i915(m->private); const struct intel_guc *guc = &dev_priv->guc; struct guc_stage_desc *desc = guc->stage_desc_pool_vaddr; - struct i915_guc_client *client = guc->execbuf_client; + struct intel_guc_client *client = guc->execbuf_client; unsigned int tmp; int index; - if (!check_guc_submission(m)) - return 0; + if (!USES_GUC_SUBMISSION(dev_priv)) + return -ENODEV; for (index = 0; index < GUC_MAX_STAGE_DESCRIPTORS; index++, desc++) { struct intel_engine_cs *engine; @@ -2560,6 +2416,9 @@ static int i915_guc_log_dump(struct seq_file *m, void *data) u32 *log; int i = 0; + if (!HAS_GUC(dev_priv)) + return -ENODEV; + if (dump_load_err) obj = dev_priv->guc.load_err_log; else if (dev_priv->guc.log.vma) @@ -2591,10 +2450,13 @@ static int i915_guc_log_control_get(void *data, u64 *val) { struct drm_i915_private *dev_priv = data; + if (!HAS_GUC(dev_priv)) + return -ENODEV; + if (!dev_priv->guc.log.vma) return -EINVAL; - *val = i915.guc_log_level; + *val = i915_modparams.guc_log_level; return 0; } @@ -2604,6 +2466,9 @@ static int i915_guc_log_control_set(void *data, u64 val) struct drm_i915_private *dev_priv = data; int ret; + if (!HAS_GUC(dev_priv)) + return -ENODEV; + if (!dev_priv->guc.log.vma) return -EINVAL; @@ -2654,10 +2519,8 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) enum pipe pipe; bool enabled = false; - if (!HAS_PSR(dev_priv)) { - seq_puts(m, "PSR not supported\n"); - return 0; - } + if (!HAS_PSR(dev_priv)) + return -ENODEV; intel_runtime_pm_get(dev_priv); @@ -2739,39 +2602,76 @@ static int i915_sink_crc(struct seq_file *m, void *data) struct intel_connector *connector; struct drm_connector_list_iter conn_iter; struct intel_dp *intel_dp = NULL; + struct drm_modeset_acquire_ctx ctx; int ret; u8 crc[6]; - drm_modeset_lock_all(dev); + drm_modeset_acquire_init(&ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE); + drm_connector_list_iter_begin(dev, &conn_iter); + for_each_intel_connector_iter(connector, &conn_iter) { struct drm_crtc *crtc; + struct drm_connector_state *state; + struct intel_crtc_state *crtc_state; - if (!connector->base.state->best_encoder) + if (connector->base.connector_type != DRM_MODE_CONNECTOR_eDP) continue; - crtc = connector->base.state->crtc; - if (!crtc->state->active) +retry: + ret = drm_modeset_lock(&dev->mode_config.connection_mutex, &ctx); + if (ret) + goto err; + + state = connector->base.state; + if (!state->best_encoder) continue; - if (connector->base.connector_type != DRM_MODE_CONNECTOR_eDP) + crtc = state->crtc; + ret = drm_modeset_lock(&crtc->mutex, &ctx); + if (ret) + goto err; + + crtc_state = to_intel_crtc_state(crtc->state); + if (!crtc_state->base.active) continue; - intel_dp = enc_to_intel_dp(connector->base.state->best_encoder); + /* + * We need to wait for all crtc updates to complete, to make + * sure any pending modesets and plane updates are completed. + */ + if (crtc_state->base.commit) { + ret = wait_for_completion_interruptible(&crtc_state->base.commit->hw_done); + + if (ret) + goto err; + } - ret = intel_dp_sink_crc(intel_dp, crc); + intel_dp = enc_to_intel_dp(state->best_encoder); + + ret = intel_dp_sink_crc(intel_dp, crtc_state, crc); if (ret) - goto out; + goto err; seq_printf(m, "%02x%02x%02x%02x%02x%02x\n", crc[0], crc[1], crc[2], crc[3], crc[4], crc[5]); goto out; + +err: + if (ret == -EDEADLK) { + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry; + } + goto out; } ret = -ENODEV; out: drm_connector_list_iter_end(&conn_iter); - drm_modeset_unlock_all(dev); + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); + return ret; } @@ -2859,10 +2759,8 @@ static int i915_dmc_info(struct seq_file *m, void *unused) struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_csr *csr; - if (!HAS_CSR(dev_priv)) { - seq_puts(m, "not supported\n"); - return 0; - } + if (!HAS_CSR(dev_priv)) + return -ENODEV; csr = &dev_priv->csr; @@ -3054,7 +2952,7 @@ static void intel_connector_info(struct seq_file *m, break; case DRM_MODE_CONNECTOR_HDMIA: if (intel_encoder->type == INTEL_OUTPUT_HDMI || - intel_encoder->type == INTEL_OUTPUT_UNKNOWN) + intel_encoder->type == INTEL_OUTPUT_DDI) intel_hdmi_info(m, intel_connector); break; default: @@ -3239,9 +3137,9 @@ static int i915_display_info(struct seq_file *m, void *unused) static int i915_engine_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct i915_gpu_error *error = &dev_priv->gpu_error; struct intel_engine_cs *engine; enum intel_engine_id id; + struct drm_printer p; intel_runtime_pm_get(dev_priv); @@ -3249,211 +3147,25 @@ static int i915_engine_info(struct seq_file *m, void *unused) yesno(dev_priv->gt.awake)); seq_printf(m, "Global active requests: %d\n", dev_priv->gt.active_requests); + seq_printf(m, "CS timestamp frequency: %u kHz\n", + dev_priv->info.cs_timestamp_frequency_khz); - for_each_engine(engine, dev_priv, id) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct drm_i915_gem_request *rq; - struct rb_node *rb; - u64 addr; - - seq_printf(m, "%s\n", engine->name); - seq_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", - intel_engine_get_seqno(engine), - intel_engine_last_submit(engine), - engine->hangcheck.seqno, - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), - engine->timeline->inflight_seqnos); - seq_printf(m, "\tReset count: %d\n", - i915_reset_engine_count(error, engine)); - - rcu_read_lock(); - - seq_printf(m, "\tRequests:\n"); - - rq = list_first_entry(&engine->timeline->requests, - struct drm_i915_gem_request, link); - if (&rq->link != &engine->timeline->requests) - print_request(m, rq, "\t\tfirst "); - - rq = list_last_entry(&engine->timeline->requests, - struct drm_i915_gem_request, link); - if (&rq->link != &engine->timeline->requests) - print_request(m, rq, "\t\tlast "); - - rq = i915_gem_find_active_request(engine); - if (rq) { - print_request(m, rq, "\t\tactive "); - seq_printf(m, - "\t\t[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]\n", - rq->head, rq->postfix, rq->tail, - rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, - rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); - } - - seq_printf(m, "\tRING_START: 0x%08x [0x%08x]\n", - I915_READ(RING_START(engine->mmio_base)), - rq ? i915_ggtt_offset(rq->ring->vma) : 0); - seq_printf(m, "\tRING_HEAD: 0x%08x [0x%08x]\n", - I915_READ(RING_HEAD(engine->mmio_base)) & HEAD_ADDR, - rq ? rq->ring->head : 0); - seq_printf(m, "\tRING_TAIL: 0x%08x [0x%08x]\n", - I915_READ(RING_TAIL(engine->mmio_base)) & TAIL_ADDR, - rq ? rq->ring->tail : 0); - seq_printf(m, "\tRING_CTL: 0x%08x [%s]\n", - I915_READ(RING_CTL(engine->mmio_base)), - I915_READ(RING_CTL(engine->mmio_base)) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? "waiting" : ""); - - rcu_read_unlock(); - - addr = intel_engine_get_active_head(engine); - seq_printf(m, "\tACTHD: 0x%08x_%08x\n", - upper_32_bits(addr), lower_32_bits(addr)); - addr = intel_engine_get_last_batch_head(engine); - seq_printf(m, "\tBBADDR: 0x%08x_%08x\n", - upper_32_bits(addr), lower_32_bits(addr)); - - if (i915.enable_execlists) { - u32 ptr, read, write; - unsigned int idx; - - seq_printf(m, "\tExeclist status: 0x%08x %08x\n", - I915_READ(RING_EXECLIST_STATUS_LO(engine)), - I915_READ(RING_EXECLIST_STATUS_HI(engine))); - - ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine)); - read = GEN8_CSB_READ_PTR(ptr); - write = GEN8_CSB_WRITE_PTR(ptr); - seq_printf(m, "\tExeclist CSB read %d, write %d, interrupt posted? %s\n", - read, write, - yesno(test_bit(ENGINE_IRQ_EXECLIST, - &engine->irq_posted))); - if (read >= GEN8_CSB_ENTRIES) - read = 0; - if (write >= GEN8_CSB_ENTRIES) - write = 0; - if (read > write) - write += GEN8_CSB_ENTRIES; - while (read < write) { - idx = ++read % GEN8_CSB_ENTRIES; - seq_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n", - idx, - I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)), - I915_READ(RING_CONTEXT_STATUS_BUF_HI(engine, idx))); - } - - rcu_read_lock(); - for (idx = 0; idx < ARRAY_SIZE(engine->execlist_port); idx++) { - unsigned int count; - - rq = port_unpack(&engine->execlist_port[idx], - &count); - if (rq) { - seq_printf(m, "\t\tELSP[%d] count=%d, ", - idx, count); - print_request(m, rq, "rq: "); - } else { - seq_printf(m, "\t\tELSP[%d] idle\n", - idx); - } - } - rcu_read_unlock(); - - spin_lock_irq(&engine->timeline->lock); - for (rb = engine->execlist_first; rb; rb = rb_next(rb)){ - struct i915_priolist *p = - rb_entry(rb, typeof(*p), node); - - list_for_each_entry(rq, &p->requests, - priotree.link) - print_request(m, rq, "\t\tQ "); - } - spin_unlock_irq(&engine->timeline->lock); - } else if (INTEL_GEN(dev_priv) > 6) { - seq_printf(m, "\tPP_DIR_BASE: 0x%08x\n", - I915_READ(RING_PP_DIR_BASE(engine))); - seq_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n", - I915_READ(RING_PP_DIR_BASE_READ(engine))); - seq_printf(m, "\tPP_DIR_DCLV: 0x%08x\n", - I915_READ(RING_PP_DIR_DCLV(engine))); - } - - spin_lock_irq(&b->rb_lock); - for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { - struct intel_wait *w = rb_entry(rb, typeof(*w), node); - - seq_printf(m, "\t%s [%d] waiting for %x\n", - w->tsk->comm, w->tsk->pid, w->seqno); - } - spin_unlock_irq(&b->rb_lock); - - seq_puts(m, "\n"); - } + p = drm_seq_file_printer(m); + for_each_engine(engine, dev_priv, id) + intel_engine_dump(engine, &p, "%s\n", engine->name); intel_runtime_pm_put(dev_priv); return 0; } -static int i915_semaphore_status(struct seq_file *m, void *unused) +static int i915_shrinker_info(struct seq_file *m, void *unused) { - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; - struct intel_engine_cs *engine; - int num_rings = INTEL_INFO(dev_priv)->num_rings; - enum intel_engine_id id; - int j, ret; - - if (!i915.semaphores) { - seq_puts(m, "Semaphores are disabled\n"); - return 0; - } - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - intel_runtime_pm_get(dev_priv); - - if (IS_BROADWELL(dev_priv)) { - struct page *page; - uint64_t *seqno; - - page = i915_gem_object_get_page(dev_priv->semaphore->obj, 0); - - seqno = (uint64_t *)kmap_atomic(page); - for_each_engine(engine, dev_priv, id) { - uint64_t offset; - - seq_printf(m, "%s\n", engine->name); - - seq_puts(m, " Last signal:"); - for (j = 0; j < num_rings; j++) { - offset = id * I915_NUM_ENGINES + j; - seq_printf(m, "0x%08llx (0x%02llx) ", - seqno[offset], offset * 8); - } - seq_putc(m, '\n'); - - seq_puts(m, " Last wait: "); - for (j = 0; j < num_rings; j++) { - offset = id + (j * I915_NUM_ENGINES); - seq_printf(m, "0x%08llx (0x%02llx) ", - seqno[offset], offset * 8); - } - seq_putc(m, '\n'); + struct drm_i915_private *i915 = node_to_i915(m->private); - } - kunmap_atomic(seqno); - } else { - seq_puts(m, " Last signal:"); - for_each_engine(engine, dev_priv, id) - for (j = 0; j < num_rings; j++) - seq_printf(m, "0x%08x\n", - I915_READ(engine->semaphore.mbox.signal[j])); - seq_putc(m, '\n'); - } + seq_printf(m, "seeks = %d\n", i915->mm.shrinker.seeks); + seq_printf(m, "batch = %lu\n", i915->mm.shrinker.batch); - intel_runtime_pm_put(dev_priv); - mutex_unlock(&dev->struct_mutex); return 0; } @@ -3523,6 +3235,57 @@ static int i915_wa_registers(struct seq_file *m, void *unused) return 0; } +static int i915_ipc_status_show(struct seq_file *m, void *data) +{ + struct drm_i915_private *dev_priv = m->private; + + seq_printf(m, "Isochronous Priority Control: %s\n", + yesno(dev_priv->ipc_enabled)); + return 0; +} + +static int i915_ipc_status_open(struct inode *inode, struct file *file) +{ + struct drm_i915_private *dev_priv = inode->i_private; + + if (!HAS_IPC(dev_priv)) + return -ENODEV; + + return single_open(file, i915_ipc_status_show, dev_priv); +} + +static ssize_t i915_ipc_status_write(struct file *file, const char __user *ubuf, + size_t len, loff_t *offp) +{ + struct seq_file *m = file->private_data; + struct drm_i915_private *dev_priv = m->private; + int ret; + bool enable; + + ret = kstrtobool_from_user(ubuf, len, &enable); + if (ret < 0) + return ret; + + intel_runtime_pm_get(dev_priv); + if (!dev_priv->ipc_enabled && enable) + DRM_INFO("Enabling IPC: WM will be proper only after next commit\n"); + dev_priv->wm.distrust_bios_wm = true; + dev_priv->ipc_enabled = enable; + intel_enable_ipc(dev_priv); + intel_runtime_pm_put(dev_priv); + + return len; +} + +static const struct file_operations i915_ipc_status_fops = { + .owner = THIS_MODULE, + .open = i915_ipc_status_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = i915_ipc_status_write +}; + static int i915_ddb_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -3533,7 +3296,7 @@ static int i915_ddb_info(struct seq_file *m, void *unused) int plane; if (INTEL_GEN(dev_priv) < 9) - return 0; + return -ENODEV; drm_modeset_lock_all(dev); @@ -3680,7 +3443,7 @@ static int i915_dp_mst_info(struct seq_file *m, void *unused) continue; seq_printf(m, "MST Source Port %c\n", - port_name(intel_dig_port->port)); + port_name(intel_dig_port->base.port)); drm_dp_mst_dump_topology(m, &intel_dig_port->dp.mst_mgr); } drm_connector_list_iter_end(&conn_iter); @@ -4203,8 +3966,7 @@ fault_irq_set(struct drm_i915_private *i915, mutex_unlock(&i915->drm.struct_mutex); /* Flush idle worker to disarm irq */ - while (flush_delayed_work(&i915->gt.idle_work)) - ; + drain_delayed_work(&i915->gt.idle_work); return 0; @@ -4259,18 +4021,20 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops, i915_ring_test_irq_get, i915_ring_test_irq_set, "0x%08llx\n"); -#define DROP_UNBOUND 0x1 -#define DROP_BOUND 0x2 -#define DROP_RETIRE 0x4 -#define DROP_ACTIVE 0x8 -#define DROP_FREED 0x10 -#define DROP_SHRINK_ALL 0x20 +#define DROP_UNBOUND BIT(0) +#define DROP_BOUND BIT(1) +#define DROP_RETIRE BIT(2) +#define DROP_ACTIVE BIT(3) +#define DROP_FREED BIT(4) +#define DROP_SHRINK_ALL BIT(5) +#define DROP_IDLE BIT(6) #define DROP_ALL (DROP_UNBOUND | \ DROP_BOUND | \ DROP_RETIRE | \ DROP_ACTIVE | \ DROP_FREED | \ - DROP_SHRINK_ALL) + DROP_SHRINK_ALL |\ + DROP_IDLE) static int i915_drop_caches_get(void *data, u64 *val) { @@ -4286,7 +4050,8 @@ i915_drop_caches_set(void *data, u64 val) struct drm_device *dev = &dev_priv->drm; int ret = 0; - DRM_DEBUG("Dropping caches: 0x%08llx\n", val); + DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n", + val, val & DROP_ALL); /* No need to check and wait for gpu resets, only libdrm auto-restarts * on ioctls on -EAGAIN. */ @@ -4317,6 +4082,9 @@ i915_drop_caches_set(void *data, u64 val) i915_gem_shrink_all(dev_priv); fs_reclaim_release(GFP_KERNEL); + if (val & DROP_IDLE) + drain_delayed_work(&dev_priv->gt.idle_work); + if (val & DROP_FREED) { synchronize_rcu(); i915_gem_drain_freed_objects(dev_priv); @@ -4337,7 +4105,7 @@ i915_max_freq_get(void *data, u64 *val) if (INTEL_GEN(dev_priv) < 6) return -ENODEV; - *val = intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit); + *val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.max_freq_softlimit); return 0; } @@ -4345,6 +4113,7 @@ static int i915_max_freq_set(void *data, u64 val) { struct drm_i915_private *dev_priv = data; + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 hw_max, hw_min; int ret; @@ -4353,7 +4122,7 @@ i915_max_freq_set(void *data, u64 val) DRM_DEBUG_DRIVER("Manually setting max freq to %llu\n", val); - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); + ret = mutex_lock_interruptible(&dev_priv->pcu_lock); if (ret) return ret; @@ -4362,20 +4131,20 @@ i915_max_freq_set(void *data, u64 val) */ val = intel_freq_opcode(dev_priv, val); - hw_max = dev_priv->rps.max_freq; - hw_min = dev_priv->rps.min_freq; + hw_max = rps->max_freq; + hw_min = rps->min_freq; - if (val < hw_min || val > hw_max || val < dev_priv->rps.min_freq_softlimit) { - mutex_unlock(&dev_priv->rps.hw_lock); + if (val < hw_min || val > hw_max || val < rps->min_freq_softlimit) { + mutex_unlock(&dev_priv->pcu_lock); return -EINVAL; } - dev_priv->rps.max_freq_softlimit = val; + rps->max_freq_softlimit = val; if (intel_set_rps(dev_priv, val)) DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); return 0; } @@ -4392,7 +4161,7 @@ i915_min_freq_get(void *data, u64 *val) if (INTEL_GEN(dev_priv) < 6) return -ENODEV; - *val = intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit); + *val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.min_freq_softlimit); return 0; } @@ -4400,6 +4169,7 @@ static int i915_min_freq_set(void *data, u64 val) { struct drm_i915_private *dev_priv = data; + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 hw_max, hw_min; int ret; @@ -4408,7 +4178,7 @@ i915_min_freq_set(void *data, u64 val) DRM_DEBUG_DRIVER("Manually setting min freq to %llu\n", val); - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); + ret = mutex_lock_interruptible(&dev_priv->pcu_lock); if (ret) return ret; @@ -4417,21 +4187,21 @@ i915_min_freq_set(void *data, u64 val) */ val = intel_freq_opcode(dev_priv, val); - hw_max = dev_priv->rps.max_freq; - hw_min = dev_priv->rps.min_freq; + hw_max = rps->max_freq; + hw_min = rps->min_freq; if (val < hw_min || - val > hw_max || val > dev_priv->rps.max_freq_softlimit) { - mutex_unlock(&dev_priv->rps.hw_lock); + val > hw_max || val > rps->max_freq_softlimit) { + mutex_unlock(&dev_priv->pcu_lock); return -EINVAL; } - dev_priv->rps.min_freq_softlimit = val; + rps->min_freq_softlimit = val; if (intel_set_rps(dev_priv, val)) DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); return 0; } @@ -4520,6 +4290,61 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv, } } +static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, + struct sseu_dev_info *sseu) +{ + const struct intel_device_info *info = INTEL_INFO(dev_priv); + int s_max = 6, ss_max = 4; + int s, ss; + u32 s_reg[s_max], eu_reg[2 * s_max], eu_mask[2]; + + for (s = 0; s < s_max; s++) { + /* + * FIXME: Valid SS Mask respects the spec and read + * only valid bits for those registers, excluding reserverd + * although this seems wrong because it would leave many + * subslices without ACK. + */ + s_reg[s] = I915_READ(GEN10_SLICE_PGCTL_ACK(s)) & + GEN10_PGCTL_VALID_SS_MASK(s); + eu_reg[2 * s] = I915_READ(GEN10_SS01_EU_PGCTL_ACK(s)); + eu_reg[2 * s + 1] = I915_READ(GEN10_SS23_EU_PGCTL_ACK(s)); + } + + eu_mask[0] = GEN9_PGCTL_SSA_EU08_ACK | + GEN9_PGCTL_SSA_EU19_ACK | + GEN9_PGCTL_SSA_EU210_ACK | + GEN9_PGCTL_SSA_EU311_ACK; + eu_mask[1] = GEN9_PGCTL_SSB_EU08_ACK | + GEN9_PGCTL_SSB_EU19_ACK | + GEN9_PGCTL_SSB_EU210_ACK | + GEN9_PGCTL_SSB_EU311_ACK; + + for (s = 0; s < s_max; s++) { + if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0) + /* skip disabled slice */ + continue; + + sseu->slice_mask |= BIT(s); + sseu->subslice_mask = info->sseu.subslice_mask; + + for (ss = 0; ss < ss_max; ss++) { + unsigned int eu_cnt; + + if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) + /* skip disabled subslice */ + continue; + + eu_cnt = 2 * hweight32(eu_reg[2 * s + ss / 2] & + eu_mask[ss % 2]); + sseu->eu_total += eu_cnt; + sseu->eu_per_subslice = max_t(unsigned int, + sseu->eu_per_subslice, + eu_cnt); + } + } +} + static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, struct sseu_dev_info *sseu) { @@ -4555,7 +4380,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, sseu->slice_mask |= BIT(s); - if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask; @@ -4661,8 +4486,10 @@ static int i915_sseu_status(struct seq_file *m, void *unused) cherryview_sseu_device_status(dev_priv, &sseu); } else if (IS_BROADWELL(dev_priv)) { broadwell_sseu_device_status(dev_priv, &sseu); - } else if (INTEL_GEN(dev_priv) >= 9) { + } else if (IS_GEN9(dev_priv)) { gen9_sseu_device_status(dev_priv, &sseu); + } else if (INTEL_GEN(dev_priv) >= 10) { + gen10_sseu_device_status(dev_priv, &sseu); } intel_runtime_pm_put(dev_priv); @@ -4674,26 +4501,26 @@ static int i915_sseu_status(struct seq_file *m, void *unused) static int i915_forcewake_open(struct inode *inode, struct file *file) { - struct drm_i915_private *dev_priv = inode->i_private; + struct drm_i915_private *i915 = inode->i_private; - if (INTEL_GEN(dev_priv) < 6) + if (INTEL_GEN(i915) < 6) return 0; - intel_runtime_pm_get(dev_priv); - intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + intel_runtime_pm_get(i915); + intel_uncore_forcewake_user_get(i915); return 0; } static int i915_forcewake_release(struct inode *inode, struct file *file) { - struct drm_i915_private *dev_priv = inode->i_private; + struct drm_i915_private *i915 = inode->i_private; - if (INTEL_GEN(dev_priv) < 6) + if (INTEL_GEN(i915) < 6) return 0; - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - intel_runtime_pm_put(dev_priv); + intel_uncore_forcewake_user_put(i915); + intel_runtime_pm_put(i915); return 0; } @@ -4783,10 +4610,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_capabilities", i915_capabilities, 0}, {"i915_gem_objects", i915_gem_object_info, 0}, {"i915_gem_gtt", i915_gem_gtt_info, 0}, - {"i915_gem_pin_display", i915_gem_gtt_info, 0, (void *)1}, {"i915_gem_stolen", i915_gem_stolen_list_info }, - {"i915_gem_request", i915_gem_request_info, 0}, - {"i915_gem_seqno", i915_gem_seqno_info, 0}, {"i915_gem_fence_regs", i915_gem_fence_regs_info, 0}, {"i915_gem_interrupt", i915_interrupt_info, 0}, {"i915_gem_batch_pool", i915_gem_batch_pool_info, 0}, @@ -4810,7 +4634,6 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_vbt", i915_vbt, 0}, {"i915_gem_framebuffer", i915_gem_framebuffer_info, 0}, {"i915_context_status", i915_context_status, 0}, - {"i915_dump_lrc", i915_dump_lrc, 0}, {"i915_forcewake_domains", i915_forcewake_domains, 0}, {"i915_swizzle_info", i915_swizzle_info, 0}, {"i915_ppgtt_info", i915_ppgtt_info, 0}, @@ -4823,7 +4646,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_dmc_info", i915_dmc_info, 0}, {"i915_display_info", i915_display_info, 0}, {"i915_engine_info", i915_engine_info, 0}, - {"i915_semaphore_status", i915_semaphore_status, 0}, + {"i915_shrinker_info", i915_shrinker_info, 0}, {"i915_shared_dplls_info", i915_shared_dplls_info, 0}, {"i915_dp_mst_info", i915_dp_mst_info, 0}, {"i915_wa_registers", i915_wa_registers, 0}, @@ -4859,7 +4682,8 @@ static const struct i915_debugfs_files { {"i915_dp_test_type", &i915_displayport_test_type_fops}, {"i915_dp_test_active", &i915_displayport_test_active_fops}, {"i915_guc_log_control", &i915_guc_log_control_fops}, - {"i915_hpd_storm_ctl", &i915_hpd_storm_ctl_fops} + {"i915_hpd_storm_ctl", &i915_hpd_storm_ctl_fops}, + {"i915_ipc_status", &i915_ipc_status_fops} }; int i915_debugfs_register(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 9f45cfeae775..2f5209de0391 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -48,6 +48,7 @@ #include "i915_drv.h" #include "i915_trace.h" +#include "i915_pmu.h" #include "i915_vgpu.h" #include "intel_drv.h" #include "intel_uc.h" @@ -58,12 +59,12 @@ static unsigned int i915_load_fail_count; bool __i915_inject_load_failure(const char *func, int line) { - if (i915_load_fail_count >= i915.inject_load_failure) + if (i915_load_fail_count >= i915_modparams.inject_load_failure) return false; - if (++i915_load_fail_count == i915.inject_load_failure) { + if (++i915_load_fail_count == i915_modparams.inject_load_failure) { DRM_INFO("Injecting failure at checkpoint %u [%s:%d]\n", - i915.inject_load_failure, func, line); + i915_modparams.inject_load_failure, func, line); return true; } @@ -106,8 +107,8 @@ __i915_printk(struct drm_i915_private *dev_priv, const char *level, static bool i915_error_injected(struct drm_i915_private *dev_priv) { - return i915.inject_load_failure && - i915_load_fail_count == i915.inject_load_failure; + return i915_modparams.inject_load_failure && + i915_load_fail_count == i915_modparams.inject_load_failure; } #define i915_load_error(dev_priv, fmt, ...) \ @@ -239,7 +240,8 @@ static void intel_detect_pch(struct drm_i915_private *dev_priv) dev_priv->pch_type = PCH_KBP; DRM_DEBUG_KMS("Found Kaby Lake PCH (KBP)\n"); WARN_ON(!IS_SKYLAKE(dev_priv) && - !IS_KABYLAKE(dev_priv)); + !IS_KABYLAKE(dev_priv) && + !IS_COFFEELAKE(dev_priv)); } else if (id == INTEL_PCH_CNP_DEVICE_ID_TYPE) { dev_priv->pch_type = PCH_CNP; DRM_DEBUG_KMS("Found Cannon Lake PCH (CNP)\n"); @@ -320,7 +322,7 @@ static int i915_getparam(struct drm_device *dev, void *data, value = USES_PPGTT(dev_priv); break; case I915_PARAM_HAS_SEMAPHORES: - value = i915.semaphores; + value = HAS_LEGACY_SEMAPHORES(dev_priv); break; case I915_PARAM_HAS_SECURE_BATCHES: value = capable(CAP_SYS_ADMIN); @@ -339,7 +341,8 @@ static int i915_getparam(struct drm_device *dev, void *data, return -ENODEV; break; case I915_PARAM_HAS_GPU_RESET: - value = i915.enable_hangcheck && intel_has_gpu_reset(dev_priv); + value = i915_modparams.enable_hangcheck && + intel_has_gpu_reset(dev_priv); if (value && intel_has_reset_engine(dev_priv)) value = 2; break; @@ -365,9 +368,15 @@ static int i915_getparam(struct drm_device *dev, void *data, value = i915_gem_mmap_gtt_version(); break; case I915_PARAM_HAS_SCHEDULER: - value = dev_priv->engine[RCS] && - dev_priv->engine[RCS]->schedule; + value = 0; + if (dev_priv->engine[RCS] && dev_priv->engine[RCS]->schedule) { + value |= I915_SCHEDULER_CAP_ENABLED; + value |= I915_SCHEDULER_CAP_PRIORITY; + if (HAS_LOGICAL_RING_PREEMPTION(dev_priv)) + value |= I915_SCHEDULER_CAP_PREEMPTION; + } break; + case I915_PARAM_MMAP_VERSION: /* Remember to bump this if the version changes! */ case I915_PARAM_HAS_GEM: @@ -396,6 +405,9 @@ static int i915_getparam(struct drm_device *dev, void *data, */ value = 1; break; + case I915_PARAM_HAS_CONTEXT_ISOLATION: + value = intel_engines_has_context_isolation(dev_priv); + break; case I915_PARAM_SLICE_MASK: value = INTEL_INFO(dev_priv)->sseu.slice_mask; if (!value) @@ -406,6 +418,9 @@ static int i915_getparam(struct drm_device *dev, void *data, if (!value) return -ENODEV; break; + case I915_PARAM_CS_TIMESTAMP_FREQUENCY: + value = 1000 * INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; @@ -602,11 +617,14 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); intel_uc_fini_hw(dev_priv); + intel_uc_fini(dev_priv); i915_gem_cleanup_engines(dev_priv); i915_gem_contexts_fini(dev_priv); - i915_gem_cleanup_userptr(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); + intel_uc_fini_wq(dev_priv); + i915_gem_cleanup_userptr(dev_priv); + i915_gem_drain_freed_objects(dev_priv); WARN_ON(!list_empty(&dev_priv->contexts.list)); @@ -665,7 +683,7 @@ static int i915_load_modeset_init(struct drm_device *dev) if (ret) goto cleanup_uc; - intel_modeset_gem_init(dev); + intel_setup_overlay(dev_priv); if (INTEL_INFO(dev_priv)->num_pipes == 0) return 0; @@ -677,8 +695,6 @@ static int i915_load_modeset_init(struct drm_device *dev) /* Only enable hotplug handling once the fbdev is fully set up. */ intel_hpd_init(dev_priv); - drm_kms_helper_poll_init(dev); - return 0; cleanup_gem: @@ -712,7 +728,7 @@ static int i915_kick_out_firmware_fb(struct drm_i915_private *dev_priv) if (!ap) return -ENOMEM; - ap->ranges[0].base = ggtt->mappable_base; + ap->ranges[0].base = ggtt->gmadr.start; ap->ranges[0].size = ggtt->mappable_end; primary = @@ -826,6 +842,11 @@ static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv) * We don't keep the workarounds for pre-production hardware, so we expect our * driver to fail on these machines in one way or another. A little warning on * dmesg may help both the user and the bug triagers. + * + * Our policy for removing pre-production workarounds is to keep the + * current gen workarounds as a guide to the bring-up of the next gen + * (workarounds have a habit of persisting!). Anything older than that + * should be removed along with the complications they introduce. */ static void intel_detect_preproduction_hw(struct drm_i915_private *dev_priv) { @@ -868,6 +889,10 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, memcpy(device_info, match_info, sizeof(*device_info)); device_info->device_id = dev_priv->drm.pdev->device; + BUILD_BUG_ON(INTEL_MAX_PLATFORMS > + sizeof(device_info->platform_mask) * BITS_PER_BYTE); + device_info->platform_mask = BIT(device_info->platform); + BUG_ON(device_info->gen > sizeof(device_info->gen_mask) * BITS_PER_BYTE); device_info->gen_mask = BIT(device_info->gen - 1); @@ -876,7 +901,6 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, mutex_init(&dev_priv->backlight_lock); spin_lock_init(&dev_priv->uncore.lock); - spin_lock_init(&dev_priv->mm.object_stat_lock); mutex_init(&dev_priv->sb_lock); mutex_init(&dev_priv->modeset_restore_lock); mutex_init(&dev_priv->av_mutex); @@ -907,12 +931,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, intel_display_crc_init(dev_priv); - intel_device_info_dump(dev_priv); - intel_detect_preproduction_hw(dev_priv); - i915_perf_init(dev_priv); - return 0; err_irq: @@ -929,7 +949,6 @@ err_engines: */ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv) { - i915_perf_fini(dev_priv); i915_gem_load_cleanup(dev_priv); intel_irq_fini(dev_priv); i915_workqueues_cleanup(dev_priv); @@ -1001,6 +1020,8 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv) intel_uncore_init(dev_priv); + intel_uc_init_mmio(dev_priv); + ret = intel_engines_init_mmio(dev_priv); if (ret) goto err_uncore; @@ -1030,22 +1051,16 @@ static void i915_driver_cleanup_mmio(struct drm_i915_private *dev_priv) static void intel_sanitize_options(struct drm_i915_private *dev_priv) { - i915.enable_execlists = - intel_sanitize_enable_execlists(dev_priv, - i915.enable_execlists); - /* * i915.enable_ppgtt is read-only, so do an early pass to validate the * user's requested state against the hardware/driver capabilities. We * do this now so that we can print out any log messages once rather * than every time we check intel_enable_ppgtt(). */ - i915.enable_ppgtt = - intel_sanitize_enable_ppgtt(dev_priv, i915.enable_ppgtt); - DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt); - - i915.semaphores = intel_sanitize_semaphores(dev_priv, i915.semaphores); - DRM_DEBUG_DRIVER("use GPU semaphores? %s\n", yesno(i915.semaphores)); + i915_modparams.enable_ppgtt = + intel_sanitize_enable_ppgtt(dev_priv, + i915_modparams.enable_ppgtt); + DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915_modparams.enable_ppgtt); intel_uc_sanitize_options(dev_priv); @@ -1067,10 +1082,12 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) if (i915_inject_load_failure()) return -ENODEV; - intel_device_info_runtime_init(dev_priv); + intel_device_info_runtime_init(mkwrite_device_info(dev_priv)); intel_sanitize_options(dev_priv); + i915_perf_init(dev_priv); + ret = i915_ggtt_probe_hw(dev_priv); if (ret) return ret; @@ -1176,6 +1193,8 @@ static void i915_driver_cleanup_hw(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; + i915_perf_fini(dev_priv); + if (pdev->msi_enabled) pci_disable_msi(pdev); @@ -1194,7 +1213,8 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; - i915_gem_shrinker_init(dev_priv); + i915_gem_shrinker_register(dev_priv); + i915_pmu_register(dev_priv); /* * Notify a valid surface after modesetting, @@ -1233,6 +1253,13 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) * cannot run before the connectors are registered. */ intel_fbdev_initial_config_async(dev); + + /* + * We need to coordinate the hotplugs with the asynchronous fbdev + * configuration, for which we use the fbdev->async_cookie. + */ + if (INTEL_INFO(dev_priv)->num_pipes) + drm_kms_helper_poll_init(dev); } /** @@ -1244,17 +1271,40 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) intel_fbdev_unregister(dev_priv); intel_audio_deinit(dev_priv); + /* + * After flushing the fbdev (incl. a late async config which will + * have delayed queuing of a hotplug event), then flush the hotplug + * events. + */ + drm_kms_helper_poll_fini(&dev_priv->drm); + intel_gpu_ips_teardown(); acpi_video_unregister(); intel_opregion_unregister(dev_priv); i915_perf_unregister(dev_priv); + i915_pmu_unregister(dev_priv); i915_teardown_sysfs(dev_priv); i915_guc_log_unregister(dev_priv); drm_dev_unregister(&dev_priv->drm); - i915_gem_shrinker_cleanup(dev_priv); + i915_gem_shrinker_unregister(dev_priv); +} + +static void i915_welcome_messages(struct drm_i915_private *dev_priv) +{ + if (drm_debug & DRM_UT_DRIVER) { + struct drm_printer p = drm_debug_printer("i915 device info:"); + + intel_device_info_dump(&dev_priv->info, &p); + intel_device_info_dump_runtime(&dev_priv->info, &p); + } + + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG)) + DRM_INFO("DRM_I915_DEBUG enabled\n"); + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + DRM_INFO("DRM_I915_DEBUG_GEM enabled\n"); } /** @@ -1276,7 +1326,7 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) int ret; /* Enable nuclear pageflip on ILK+ */ - if (!i915.nuclear_pageflip && match_info->gen < 5) + if (!i915_modparams.nuclear_pageflip && match_info->gen < 5) driver.driver_features &= ~DRIVER_ATOMIC; ret = -ENOMEM; @@ -1304,7 +1354,7 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) * becaue the HDA driver may require us to enable the audio power * domain during system suspend. */ - pdev->dev_flags |= PCI_DEV_FLAGS_NEEDS_RESUME; + dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NEVER_SKIP); ret = i915_driver_init_early(dev_priv, ent); if (ret < 0) @@ -1340,15 +1390,12 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) intel_runtime_pm_enable(dev_priv); - dev_priv->ipc_enabled = false; - - if (IS_ENABLED(CONFIG_DRM_I915_DEBUG)) - DRM_INFO("DRM_I915_DEBUG enabled\n"); - if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - DRM_INFO("DRM_I915_DEBUG_GEM enabled\n"); + intel_init_ipc(dev_priv); intel_runtime_pm_put(dev_priv); + i915_welcome_messages(dev_priv); + return 0; out_cleanup_hw: @@ -1386,19 +1433,7 @@ void i915_driver_unload(struct drm_device *dev) intel_modeset_cleanup(dev); - /* - * free the memory space allocated for the child device - * config parsed from VBT - */ - if (dev_priv->vbt.child_dev && dev_priv->vbt.child_dev_num) { - kfree(dev_priv->vbt.child_dev); - dev_priv->vbt.child_dev = NULL; - dev_priv->vbt.child_dev_num = 0; - } - kfree(dev_priv->vbt.sdvo_lvds_vbt_mode); - dev_priv->vbt.sdvo_lvds_vbt_mode = NULL; - kfree(dev_priv->vbt.lfp_lvds_vbt_mode); - dev_priv->vbt.lfp_lvds_vbt_mode = NULL; + intel_bios_cleanup(dev_priv); vga_switcheroo_unregister_client(pdev); vga_client_register(pdev, NULL, NULL, NULL); @@ -1571,7 +1606,7 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) intel_display_set_init_power(dev_priv, false); - fw_csr = !IS_GEN9_LP(dev_priv) && + fw_csr = !IS_GEN9_LP(dev_priv) && !hibernation && suspend_to_idle(dev_priv) && dev_priv->csr.dmc_payload; /* * In case of firmware assisted context save/restore don't manually @@ -1661,8 +1696,6 @@ static int i915_drm_resume(struct drm_device *dev) intel_csr_ucode_resume(dev_priv); - i915_gem_resume(dev_priv); - i915_restore_state(dev_priv); intel_pps_unlock_regs_wa(dev_priv); intel_opregion_setup(dev_priv); @@ -1683,16 +1716,10 @@ static int i915_drm_resume(struct drm_device *dev) drm_mode_config_reset(dev); - mutex_lock(&dev->struct_mutex); - if (i915_gem_init_hw(dev_priv)) { - DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n"); - i915_gem_set_wedged(dev_priv); - } - mutex_unlock(&dev->struct_mutex); - - intel_guc_resume(dev_priv); + i915_gem_resume(dev_priv); intel_modeset_init_hw(dev); + intel_init_clock_gating(dev_priv); spin_lock_irq(&dev_priv->irq_lock); if (dev_priv->display.hpd_irq_setup) @@ -1723,8 +1750,6 @@ static int i915_drm_resume(struct drm_device *dev) intel_opregion_notify_adapter(dev_priv, PCI_D0); - intel_autoenable_gt_powersave(dev_priv); - enable_rpm_wakeref_asserts(dev_priv); return 0; @@ -1805,6 +1830,8 @@ static int i915_drm_resume_early(struct drm_device *dev) if (IS_GEN9_LP(dev_priv) || !(dev_priv->suspended_to_idle && dev_priv->csr.dmc_payload)) intel_power_domains_init_hw(dev_priv, true); + else + intel_display_set_init_power(dev_priv, true); i915_gem_sanitize(dev_priv); @@ -1852,7 +1879,9 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags) { struct i915_gpu_error *error = &i915->gpu_error; int ret; + int i; + might_sleep(); lockdep_assert_held(&i915->drm.struct_mutex); GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags)); @@ -1870,22 +1899,30 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags) disable_irq(i915->drm.irq); ret = i915_gem_reset_prepare(i915); if (ret) { - DRM_ERROR("GPU recovery failed\n"); + dev_err(i915->drm.dev, "GPU recovery failed\n"); intel_gpu_reset(i915, ALL_ENGINES); - goto error; + goto taint; } - ret = intel_gpu_reset(i915, ALL_ENGINES); - if (ret) { - if (ret != -ENODEV) - DRM_ERROR("Failed to reset chip: %i\n", ret); + if (!intel_has_gpu_reset(i915)) { + if (i915_modparams.reset) + dev_err(i915->drm.dev, "GPU reset not supported\n"); else DRM_DEBUG_DRIVER("GPU reset disabled\n"); goto error; } - i915_gem_reset(i915); - intel_overlay_reset(i915); + for (i = 0; i < 3; i++) { + ret = intel_gpu_reset(i915, ALL_ENGINES); + if (ret == 0) + break; + + msleep(100); + } + if (ret) { + dev_err(i915->drm.dev, "Failed to reset chip\n"); + goto taint; + } /* Ok, now get things going again... */ @@ -1899,6 +1936,9 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags) goto error; } + i915_gem_reset(i915); + intel_overlay_reset(i915); + /* * Next we need to restore the context, but we don't use those * yet either... @@ -1924,12 +1964,32 @@ wakeup: wake_up_bit(&error->flags, I915_RESET_HANDOFF); return; +taint: + /* + * History tells us that if we cannot reset the GPU now, we + * never will. This then impacts everything that is run + * subsequently. On failing the reset, we mark the driver + * as wedged, preventing further execution on the GPU. + * We also want to go one step further and add a taint to the + * kernel so that any subsequent faults can be traced back to + * this failure. This is important for CI, where if the + * GPU/driver fails we would like to reboot and restart testing + * rather than continue on into oblivion. For everyone else, + * the system should still plod along, but they have been warned! + */ + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); error: i915_gem_set_wedged(i915); i915_gem_retire_requests(i915); goto finish; } +static inline int intel_gt_reset_engine(struct drm_i915_private *dev_priv, + struct intel_engine_cs *engine) +{ + return intel_gpu_reset(dev_priv, intel_engine_flag(engine)); +} + /** * i915_reset_engine - reset GPU engine to recover from a hang * @engine: engine to reset @@ -1951,23 +2011,27 @@ int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags) GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); + active_request = i915_gem_reset_prepare_engine(engine); + if (IS_ERR_OR_NULL(active_request)) { + /* Either the previous reset failed, or we pardon the reset. */ + ret = PTR_ERR(active_request); + goto out; + } + if (!(flags & I915_RESET_QUIET)) { dev_notice(engine->i915->drm.dev, "Resetting %s after gpu hang\n", engine->name); } error->reset_engine_count[engine->id]++; - active_request = i915_gem_reset_prepare_engine(engine); - if (IS_ERR(active_request)) { - DRM_DEBUG_DRIVER("Previous reset failed, promote to full reset\n"); - ret = PTR_ERR(active_request); - goto out; - } - - ret = intel_gpu_reset(engine->i915, intel_engine_flag(engine)); + if (!engine->i915->guc.execbuf_client) + ret = intel_gt_reset_engine(engine->i915, engine); + else + ret = intel_guc_reset_engine(&engine->i915->guc, engine); if (ret) { /* If we fail here, we expect to fallback to a global reset */ - DRM_DEBUG_DRIVER("Failed to reset %s, ret=%d\n", + DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n", + engine->i915->guc.execbuf_client ? "GuC " : "", engine->name, ret); goto out; } @@ -2061,11 +2125,14 @@ static int i915_pm_resume(struct device *kdev) /* freeze: before creating the hibernation_image */ static int i915_pm_freeze(struct device *kdev) { + struct drm_device *dev = &kdev_to_i915(kdev)->drm; int ret; - ret = i915_pm_suspend(kdev); - if (ret) - return ret; + if (dev->switch_power_state != DRM_SWITCH_POWER_OFF) { + ret = i915_drm_suspend(dev); + if (ret) + return ret; + } ret = i915_gem_freeze(kdev_to_i915(kdev)); if (ret) @@ -2076,11 +2143,14 @@ static int i915_pm_freeze(struct device *kdev) static int i915_pm_freeze_late(struct device *kdev) { + struct drm_device *dev = &kdev_to_i915(kdev)->drm; int ret; - ret = i915_pm_suspend_late(kdev); - if (ret) - return ret; + if (dev->switch_power_state != DRM_SWITCH_POWER_OFF) { + ret = i915_drm_suspend_late(dev, true); + if (ret) + return ret; + } ret = i915_gem_freeze_late(kdev_to_i915(kdev)); if (ret) @@ -2476,7 +2546,7 @@ static int intel_runtime_suspend(struct device *kdev) struct drm_i915_private *dev_priv = to_i915(dev); int ret; - if (WARN_ON_ONCE(!(dev_priv->rps.enabled && intel_enable_rc6()))) + if (WARN_ON_ONCE(!(dev_priv->gt_pm.rc6.enabled && HAS_RC6(dev_priv)))) return -ENODEV; if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev_priv))) @@ -2496,6 +2566,8 @@ static int intel_runtime_suspend(struct device *kdev) intel_runtime_pm_disable_interrupts(dev_priv); + intel_uncore_suspend(dev_priv); + ret = 0; if (IS_GEN9_LP(dev_priv)) { bxt_display_core_uninit(dev_priv); @@ -2508,6 +2580,8 @@ static int intel_runtime_suspend(struct device *kdev) if (ret) { DRM_ERROR("Runtime suspend failed, disabling it (%d)\n", ret); + intel_uncore_runtime_resume(dev_priv); + intel_runtime_pm_enable_interrupts(dev_priv); enable_rpm_wakeref_asserts(dev_priv); @@ -2515,15 +2589,13 @@ static int intel_runtime_suspend(struct device *kdev) return ret; } - intel_uncore_suspend(dev_priv); - enable_rpm_wakeref_asserts(dev_priv); - WARN_ON_ONCE(atomic_read(&dev_priv->pm.wakeref_count)); + WARN_ON_ONCE(atomic_read(&dev_priv->runtime_pm.wakeref_count)); if (intel_uncore_arm_unclaimed_mmio_detection(dev_priv)) DRM_ERROR("Unclaimed access detected prior to suspending\n"); - dev_priv->pm.suspended = true; + dev_priv->runtime_pm.suspended = true; /* * FIXME: We really should find a document that references the arguments @@ -2569,11 +2641,11 @@ static int intel_runtime_resume(struct device *kdev) DRM_DEBUG_KMS("Resuming device\n"); - WARN_ON_ONCE(atomic_read(&dev_priv->pm.wakeref_count)); + WARN_ON_ONCE(atomic_read(&dev_priv->runtime_pm.wakeref_count)); disable_rpm_wakeref_asserts(dev_priv); intel_opregion_notify_adapter(dev_priv, PCI_D0); - dev_priv->pm.suspended = false; + dev_priv->runtime_pm.suspended = false; if (intel_uncore_unclaimed_mmio(dev_priv)) DRM_DEBUG_DRIVER("Unclaimed access during suspend, bios?\n"); @@ -2591,6 +2663,8 @@ static int intel_runtime_resume(struct device *kdev) ret = vlv_resume_prepare(dev_priv, true); } + intel_uncore_runtime_resume(dev_priv); + /* * No point of rolling back things in case of an error, as the best * we can do is to hope that things will still work (and disable RPM). @@ -2608,6 +2682,8 @@ static int intel_runtime_resume(struct device *kdev) if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv)) intel_hpd_init(dev_priv); + intel_enable_ipc(dev_priv); + enable_rpm_wakeref_asserts(dev_priv); if (ret) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 18d9da53282b..d307429a5ae0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -40,6 +40,7 @@ #include <linux/hash.h> #include <linux/intel-iommu.h> #include <linux/kref.h> +#include <linux/perf_event.h> #include <linux/pm_qos.h> #include <linux/reservation.h> #include <linux/shmem_fs.h> @@ -55,19 +56,21 @@ #include "i915_reg.h" #include "i915_utils.h" -#include "intel_uncore.h" #include "intel_bios.h" +#include "intel_device_info.h" +#include "intel_display.h" #include "intel_dpll_mgr.h" -#include "intel_uc.h" #include "intel_lrc.h" +#include "intel_opregion.h" #include "intel_ringbuffer.h" +#include "intel_uncore.h" +#include "intel_uc.h" #include "i915_gem.h" #include "i915_gem_context.h" #include "i915_gem_fence_reg.h" #include "i915_gem_object.h" #include "i915_gem_gtt.h" -#include "i915_gem_render_state.h" #include "i915_gem_request.h" #include "i915_gem_timeline.h" @@ -80,8 +83,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20170818" -#define DRIVER_TIMESTAMP 1503088845 +#define DRIVER_DATE "20171222" +#define DRIVER_TIMESTAMP 1513971710 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions @@ -93,7 +96,7 @@ #define I915_STATE_WARN(condition, format...) ({ \ int __ret_warn_on = !!(condition); \ if (unlikely(__ret_warn_on)) \ - if (!WARN(i915.verbose_state_checks, format)) \ + if (!WARN(i915_modparams.verbose_state_checks, format)) \ DRM_ERROR(format); \ unlikely(__ret_warn_on); \ }) @@ -126,7 +129,7 @@ static inline uint_fixed_16_16_t u32_to_fixed16(uint32_t val) { uint_fixed_16_16_t fp; - WARN_ON(val >> 16); + WARN_ON(val > U16_MAX); fp.val = val << 16; return fp; @@ -163,8 +166,8 @@ static inline uint_fixed_16_16_t max_fixed16(uint_fixed_16_16_t max1, static inline uint_fixed_16_16_t clamp_u64_to_fixed16(uint64_t val) { uint_fixed_16_16_t fp; - WARN_ON(val >> 32); - fp.val = clamp_t(uint32_t, val, 0, ~0); + WARN_ON(val > U32_MAX); + fp.val = (uint32_t) val; return fp; } @@ -181,8 +184,8 @@ static inline uint32_t mul_round_up_u32_fixed16(uint32_t val, intermediate_val = (uint64_t) val * mul.val; intermediate_val = DIV_ROUND_UP_ULL(intermediate_val, 1 << 16); - WARN_ON(intermediate_val >> 32); - return clamp_t(uint32_t, intermediate_val, 0, ~0); + WARN_ON(intermediate_val > U32_MAX); + return (uint32_t) intermediate_val; } static inline uint_fixed_16_16_t mul_fixed16(uint_fixed_16_16_t val, @@ -211,8 +214,8 @@ static inline uint32_t div_round_up_u32_fixed16(uint32_t val, interm_val = (uint64_t)val << 16; interm_val = DIV_ROUND_UP_ULL(interm_val, d.val); - WARN_ON(interm_val >> 32); - return clamp_t(uint32_t, interm_val, 0, ~0); + WARN_ON(interm_val > U32_MAX); + return (uint32_t) interm_val; } static inline uint_fixed_16_16_t mul_u32_fixed16(uint32_t val, @@ -243,173 +246,6 @@ static inline uint_fixed_16_16_t add_fixed16_u32(uint_fixed_16_16_t add1, return clamp_u64_to_fixed16(interm_sum); } -static inline const char *yesno(bool v) -{ - return v ? "yes" : "no"; -} - -static inline const char *onoff(bool v) -{ - return v ? "on" : "off"; -} - -static inline const char *enableddisabled(bool v) -{ - return v ? "enabled" : "disabled"; -} - -enum pipe { - INVALID_PIPE = -1, - PIPE_A = 0, - PIPE_B, - PIPE_C, - _PIPE_EDP, - I915_MAX_PIPES = _PIPE_EDP -}; -#define pipe_name(p) ((p) + 'A') - -enum transcoder { - TRANSCODER_A = 0, - TRANSCODER_B, - TRANSCODER_C, - TRANSCODER_EDP, - TRANSCODER_DSI_A, - TRANSCODER_DSI_C, - I915_MAX_TRANSCODERS -}; - -static inline const char *transcoder_name(enum transcoder transcoder) -{ - switch (transcoder) { - case TRANSCODER_A: - return "A"; - case TRANSCODER_B: - return "B"; - case TRANSCODER_C: - return "C"; - case TRANSCODER_EDP: - return "EDP"; - case TRANSCODER_DSI_A: - return "DSI A"; - case TRANSCODER_DSI_C: - return "DSI C"; - default: - return "<invalid>"; - } -} - -static inline bool transcoder_is_dsi(enum transcoder transcoder) -{ - return transcoder == TRANSCODER_DSI_A || transcoder == TRANSCODER_DSI_C; -} - -/* - * Global legacy plane identifier. Valid only for primary/sprite - * planes on pre-g4x, and only for primary planes on g4x+. - */ -enum plane { - PLANE_A, - PLANE_B, - PLANE_C, -}; -#define plane_name(p) ((p) + 'A') - -#define sprite_name(p, s) ((p) * INTEL_INFO(dev_priv)->num_sprites[(p)] + (s) + 'A') - -/* - * Per-pipe plane identifier. - * I915_MAX_PLANES in the enum below is the maximum (across all platforms) - * number of planes per CRTC. Not all platforms really have this many planes, - * which means some arrays of size I915_MAX_PLANES may have unused entries - * between the topmost sprite plane and the cursor plane. - * - * This is expected to be passed to various register macros - * (eg. PLANE_CTL(), PS_PLANE_SEL(), etc.) so adjust with care. - */ -enum plane_id { - PLANE_PRIMARY, - PLANE_SPRITE0, - PLANE_SPRITE1, - PLANE_SPRITE2, - PLANE_CURSOR, - I915_MAX_PLANES, -}; - -#define for_each_plane_id_on_crtc(__crtc, __p) \ - for ((__p) = PLANE_PRIMARY; (__p) < I915_MAX_PLANES; (__p)++) \ - for_each_if ((__crtc)->plane_ids_mask & BIT(__p)) - -enum port { - PORT_NONE = -1, - PORT_A = 0, - PORT_B, - PORT_C, - PORT_D, - PORT_E, - I915_MAX_PORTS -}; -#define port_name(p) ((p) + 'A') - -#define I915_NUM_PHYS_VLV 2 - -enum dpio_channel { - DPIO_CH0, - DPIO_CH1 -}; - -enum dpio_phy { - DPIO_PHY0, - DPIO_PHY1, - DPIO_PHY2, -}; - -enum intel_display_power_domain { - POWER_DOMAIN_PIPE_A, - POWER_DOMAIN_PIPE_B, - POWER_DOMAIN_PIPE_C, - POWER_DOMAIN_PIPE_A_PANEL_FITTER, - POWER_DOMAIN_PIPE_B_PANEL_FITTER, - POWER_DOMAIN_PIPE_C_PANEL_FITTER, - POWER_DOMAIN_TRANSCODER_A, - POWER_DOMAIN_TRANSCODER_B, - POWER_DOMAIN_TRANSCODER_C, - POWER_DOMAIN_TRANSCODER_EDP, - POWER_DOMAIN_TRANSCODER_DSI_A, - POWER_DOMAIN_TRANSCODER_DSI_C, - POWER_DOMAIN_PORT_DDI_A_LANES, - POWER_DOMAIN_PORT_DDI_B_LANES, - POWER_DOMAIN_PORT_DDI_C_LANES, - POWER_DOMAIN_PORT_DDI_D_LANES, - POWER_DOMAIN_PORT_DDI_E_LANES, - POWER_DOMAIN_PORT_DDI_A_IO, - POWER_DOMAIN_PORT_DDI_B_IO, - POWER_DOMAIN_PORT_DDI_C_IO, - POWER_DOMAIN_PORT_DDI_D_IO, - POWER_DOMAIN_PORT_DDI_E_IO, - POWER_DOMAIN_PORT_DSI, - POWER_DOMAIN_PORT_CRT, - POWER_DOMAIN_PORT_OTHER, - POWER_DOMAIN_VGA, - POWER_DOMAIN_AUDIO, - POWER_DOMAIN_PLLS, - POWER_DOMAIN_AUX_A, - POWER_DOMAIN_AUX_B, - POWER_DOMAIN_AUX_C, - POWER_DOMAIN_AUX_D, - POWER_DOMAIN_GMBUS, - POWER_DOMAIN_MODESET, - POWER_DOMAIN_INIT, - - POWER_DOMAIN_NUM, -}; - -#define POWER_DOMAIN_PIPE(pipe) ((pipe) + POWER_DOMAIN_PIPE_A) -#define POWER_DOMAIN_PIPE_PANEL_FITTER(pipe) \ - ((pipe) + POWER_DOMAIN_PIPE_A_PANEL_FITTER) -#define POWER_DOMAIN_TRANSCODER(tran) \ - ((tran) == TRANSCODER_EDP ? POWER_DOMAIN_TRANSCODER_EDP : \ - (tran) + POWER_DOMAIN_TRANSCODER_A) - enum hpd_pin { HPD_NONE = 0, HPD_TV = HPD_NONE, /* TV is known to be unreliable */ @@ -471,104 +307,6 @@ struct i915_hotplug { I915_GEM_DOMAIN_INSTRUCTION | \ I915_GEM_DOMAIN_VERTEX) -#define for_each_pipe(__dev_priv, __p) \ - for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) -#define for_each_pipe_masked(__dev_priv, __p, __mask) \ - for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) \ - for_each_if ((__mask) & (1 << (__p))) -#define for_each_universal_plane(__dev_priv, __pipe, __p) \ - for ((__p) = 0; \ - (__p) < INTEL_INFO(__dev_priv)->num_sprites[(__pipe)] + 1; \ - (__p)++) -#define for_each_sprite(__dev_priv, __p, __s) \ - for ((__s) = 0; \ - (__s) < INTEL_INFO(__dev_priv)->num_sprites[(__p)]; \ - (__s)++) - -#define for_each_port_masked(__port, __ports_mask) \ - for ((__port) = PORT_A; (__port) < I915_MAX_PORTS; (__port)++) \ - for_each_if ((__ports_mask) & (1 << (__port))) - -#define for_each_crtc(dev, crtc) \ - list_for_each_entry(crtc, &(dev)->mode_config.crtc_list, head) - -#define for_each_intel_plane(dev, intel_plane) \ - list_for_each_entry(intel_plane, \ - &(dev)->mode_config.plane_list, \ - base.head) - -#define for_each_intel_plane_mask(dev, intel_plane, plane_mask) \ - list_for_each_entry(intel_plane, \ - &(dev)->mode_config.plane_list, \ - base.head) \ - for_each_if ((plane_mask) & \ - (1 << drm_plane_index(&intel_plane->base))) - -#define for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) \ - list_for_each_entry(intel_plane, \ - &(dev)->mode_config.plane_list, \ - base.head) \ - for_each_if ((intel_plane)->pipe == (intel_crtc)->pipe) - -#define for_each_intel_crtc(dev, intel_crtc) \ - list_for_each_entry(intel_crtc, \ - &(dev)->mode_config.crtc_list, \ - base.head) - -#define for_each_intel_crtc_mask(dev, intel_crtc, crtc_mask) \ - list_for_each_entry(intel_crtc, \ - &(dev)->mode_config.crtc_list, \ - base.head) \ - for_each_if ((crtc_mask) & (1 << drm_crtc_index(&intel_crtc->base))) - -#define for_each_intel_encoder(dev, intel_encoder) \ - list_for_each_entry(intel_encoder, \ - &(dev)->mode_config.encoder_list, \ - base.head) - -#define for_each_intel_connector_iter(intel_connector, iter) \ - while ((intel_connector = to_intel_connector(drm_connector_list_iter_next(iter)))) - -#define for_each_encoder_on_crtc(dev, __crtc, intel_encoder) \ - list_for_each_entry((intel_encoder), &(dev)->mode_config.encoder_list, base.head) \ - for_each_if ((intel_encoder)->base.crtc == (__crtc)) - -#define for_each_connector_on_encoder(dev, __encoder, intel_connector) \ - list_for_each_entry((intel_connector), &(dev)->mode_config.connector_list, base.head) \ - for_each_if ((intel_connector)->base.encoder == (__encoder)) - -#define for_each_power_domain(domain, mask) \ - for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++) \ - for_each_if (BIT_ULL(domain) & (mask)) - -#define for_each_power_well(__dev_priv, __power_well) \ - for ((__power_well) = (__dev_priv)->power_domains.power_wells; \ - (__power_well) - (__dev_priv)->power_domains.power_wells < \ - (__dev_priv)->power_domains.power_well_count; \ - (__power_well)++) - -#define for_each_power_well_rev(__dev_priv, __power_well) \ - for ((__power_well) = (__dev_priv)->power_domains.power_wells + \ - (__dev_priv)->power_domains.power_well_count - 1; \ - (__power_well) - (__dev_priv)->power_domains.power_wells >= 0; \ - (__power_well)--) - -#define for_each_power_domain_well(__dev_priv, __power_well, __domain_mask) \ - for_each_power_well(__dev_priv, __power_well) \ - for_each_if ((__power_well)->domains & (__domain_mask)) - -#define for_each_power_domain_well_rev(__dev_priv, __power_well, __domain_mask) \ - for_each_power_well_rev(__dev_priv, __power_well) \ - for_each_if ((__power_well)->domains & (__domain_mask)) - -#define for_each_intel_plane_in_state(__state, plane, plane_state, __i) \ - for ((__i) = 0; \ - (__i) < (__state)->base.dev->mode_config.num_total_plane && \ - ((plane) = to_intel_plane((__state)->base.planes[__i].ptr), \ - (plane_state) = to_intel_plane_state((__state)->base.planes[__i].state), 1); \ - (__i)++) \ - for_each_if (plane_state) - struct drm_i915_private; struct i915_mm_struct; struct i915_mmu_object; @@ -591,7 +329,7 @@ struct drm_i915_file_private { struct intel_rps_client { atomic_t boosts; - } rps; + } rps_client; unsigned int bsd_engine; @@ -605,20 +343,6 @@ struct drm_i915_file_private { atomic_t context_bans; }; -/* Used by dp and fdi links */ -struct intel_link_m_n { - uint32_t tu; - uint32_t gmch_m; - uint32_t gmch_n; - uint32_t link_m; - uint32_t link_n; -}; - -void intel_link_compute_m_n(int bpp, int nlanes, - int pixel_clock, int link_clock, - struct intel_link_m_n *m_n, - bool reduce_m_n); - /* Interface history: * * 1.1: Original. @@ -633,27 +357,6 @@ void intel_link_compute_m_n(int bpp, int nlanes, #define DRIVER_MINOR 6 #define DRIVER_PATCHLEVEL 0 -struct opregion_header; -struct opregion_acpi; -struct opregion_swsci; -struct opregion_asle; - -struct intel_opregion { - struct opregion_header *header; - struct opregion_acpi *acpi; - struct opregion_swsci *swsci; - u32 swsci_gbda_sub_functions; - u32 swsci_sbcb_sub_functions; - struct opregion_asle *asle; - void *rvda; - void *vbt_firmware; - const void *vbt; - u32 vbt_size; - u32 *lid_state; - struct work_struct asle_work; -}; -#define OPREGION_SIZE (8*1024) - struct intel_overlay; struct intel_overlay_error_state; @@ -681,7 +384,8 @@ struct drm_i915_display_funcs { struct intel_cdclk_state *cdclk_state); void (*set_cdclk)(struct drm_i915_private *dev_priv, const struct intel_cdclk_state *cdclk_state); - int (*get_fifo_size)(struct drm_i915_private *dev_priv, int plane); + int (*get_fifo_size)(struct drm_i915_private *dev_priv, + enum i9xx_plane_id i9xx_plane); int (*compute_pipe_wm)(struct intel_crtc_state *cstate); int (*compute_intermediate_wm)(struct drm_device *dev, struct intel_crtc *intel_crtc, @@ -707,12 +411,13 @@ struct drm_i915_display_funcs { struct drm_atomic_state *old_state); void (*crtc_disable)(struct intel_crtc_state *old_crtc_state, struct drm_atomic_state *old_state); - void (*update_crtcs)(struct drm_atomic_state *state, - unsigned int *crtc_vblank_mask); - void (*audio_codec_enable)(struct drm_connector *connector, - struct intel_encoder *encoder, - const struct drm_display_mode *adjusted_mode); - void (*audio_codec_disable)(struct intel_encoder *encoder); + void (*update_crtcs)(struct drm_atomic_state *state); + void (*audio_codec_enable)(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); + void (*audio_codec_disable)(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state); void (*fdi_link_train)(struct intel_crtc *crtc, const struct intel_crtc_state *crtc_state); void (*init_clock_gating)(struct drm_i915_private *dev_priv); @@ -744,125 +449,6 @@ struct intel_csr { uint32_t allowed_dc_mask; }; -#define DEV_INFO_FOR_EACH_FLAG(func) \ - func(is_mobile); \ - func(is_lp); \ - func(is_alpha_support); \ - /* Keep has_* in alphabetical order */ \ - func(has_64bit_reloc); \ - func(has_aliasing_ppgtt); \ - func(has_csr); \ - func(has_ddi); \ - func(has_dp_mst); \ - func(has_reset_engine); \ - func(has_fbc); \ - func(has_fpga_dbg); \ - func(has_full_ppgtt); \ - func(has_full_48bit_ppgtt); \ - func(has_gmbus_irq); \ - func(has_gmch_display); \ - func(has_guc); \ - func(has_guc_ct); \ - func(has_hotplug); \ - func(has_l3_dpf); \ - func(has_llc); \ - func(has_logical_ring_contexts); \ - func(has_overlay); \ - func(has_pipe_cxsr); \ - func(has_pooled_eu); \ - func(has_psr); \ - func(has_rc6); \ - func(has_rc6p); \ - func(has_resource_streamer); \ - func(has_runtime_pm); \ - func(has_snoop); \ - func(unfenced_needs_alignment); \ - func(cursor_needs_physical); \ - func(hws_needs_physical); \ - func(overlay_needs_physical); \ - func(supports_tv); - -struct sseu_dev_info { - u8 slice_mask; - u8 subslice_mask; - u8 eu_total; - u8 eu_per_subslice; - u8 min_eu_in_pool; - /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */ - u8 subslice_7eu[3]; - u8 has_slice_pg:1; - u8 has_subslice_pg:1; - u8 has_eu_pg:1; -}; - -static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu) -{ - return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask); -} - -/* Keep in gen based order, and chronological order within a gen */ -enum intel_platform { - INTEL_PLATFORM_UNINITIALIZED = 0, - INTEL_I830, - INTEL_I845G, - INTEL_I85X, - INTEL_I865G, - INTEL_I915G, - INTEL_I915GM, - INTEL_I945G, - INTEL_I945GM, - INTEL_G33, - INTEL_PINEVIEW, - INTEL_I965G, - INTEL_I965GM, - INTEL_G45, - INTEL_GM45, - INTEL_IRONLAKE, - INTEL_SANDYBRIDGE, - INTEL_IVYBRIDGE, - INTEL_VALLEYVIEW, - INTEL_HASWELL, - INTEL_BROADWELL, - INTEL_CHERRYVIEW, - INTEL_SKYLAKE, - INTEL_BROXTON, - INTEL_KABYLAKE, - INTEL_GEMINILAKE, - INTEL_COFFEELAKE, - INTEL_CANNONLAKE, - INTEL_MAX_PLATFORMS -}; - -struct intel_device_info { - u32 display_mmio_offset; - u16 device_id; - u8 num_pipes; - u8 num_sprites[I915_MAX_PIPES]; - u8 num_scalers[I915_MAX_PIPES]; - u8 gen; - u16 gen_mask; - enum intel_platform platform; - u8 ring_mask; /* Rings supported by the HW */ - u8 num_rings; -#define DEFINE_FLAG(name) u8 name:1 - DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG); -#undef DEFINE_FLAG - u16 ddb_size; /* in blocks */ - /* Register offsets for the various display pipes and transcoders */ - int pipe_offsets[I915_MAX_TRANSCODERS]; - int trans_offsets[I915_MAX_TRANSCODERS]; - int palette_offsets[I915_MAX_PIPES]; - int cursor_offsets[I915_MAX_PIPES]; - - /* Slice/subslice/EU info */ - struct sseu_dev_info sseu; - - struct color_luts { - u16 degamma_lut_size; - u16 gamma_lut_size; - } color; -}; - struct intel_display_error_state; struct i915_gpu_state { @@ -884,6 +470,12 @@ struct i915_gpu_state { struct intel_device_info device_info; struct i915_params params; + struct i915_error_uc { + struct intel_uc_fw guc_fw; + struct intel_uc_fw huc_fw; + struct drm_i915_error_object *guc_log; + } uc; + /* Generic register state */ u32 eir; u32 pgtbl_er; @@ -906,12 +498,11 @@ struct i915_gpu_state { u64 fence[I915_MAX_NUM_FENCES]; struct intel_overlay_error_state *overlay; struct intel_display_error_state *display; - struct drm_i915_error_object *semaphore; - struct drm_i915_error_object *guc_log; struct drm_i915_error_engine { int engine_id; /* Software tracked state */ + bool idle; bool waiting; int num_waiters; unsigned long hangcheck_timestamp; @@ -956,6 +547,7 @@ struct i915_gpu_state { pid_t pid; u32 handle; u32 hw_id; + int priority; int ban_score; int active; int guilty; @@ -973,16 +565,19 @@ struct i915_gpu_state { long user_bo_count; struct drm_i915_error_object *wa_ctx; + struct drm_i915_error_object *default_state; struct drm_i915_error_request { long jiffies; pid_t pid; u32 context; + int priority; int ban_score; u32 seqno; u32 head; u32 tail; - } *requests, execlist[2]; + } *requests, execlist[EXECLIST_MAX_PORTS]; + unsigned int num_ports; struct drm_i915_error_waiter { char comm[TASK_COMM_LEN]; @@ -1077,6 +672,16 @@ struct intel_fbc { int src_w; int src_h; bool visible; + /* + * Display surface base address adjustement for + * pageflips. Note that on gen4+ this only adjusts up + * to a tile, offsets within a tile are handled in + * the hw itself (with the TILEOFF register). + */ + int adjusted_x; + int adjusted_y; + + int y; } plane; struct { @@ -1097,7 +702,7 @@ struct intel_fbc { struct { enum pipe pipe; - enum plane plane; + enum i9xx_plane_id i9xx_plane; unsigned int fence_y_offset; } crtc; @@ -1107,6 +712,7 @@ struct intel_fbc { } fb; int cfb_size; + unsigned int gen9_wa_cfb_stride; } params; struct intel_fbc_work { @@ -1159,6 +765,14 @@ struct i915_psr { bool y_cord_support; bool colorimetry_support; bool alpm; + + void (*enable_source)(struct intel_dp *, + const struct intel_crtc_state *); + void (*disable_source)(struct intel_dp *, + const struct intel_crtc_state *); + void (*enable_sink)(struct intel_dp *); + void (*activate)(struct intel_dp *); + void (*setup_vsc)(struct intel_dp *, const struct intel_crtc_state *); }; enum intel_pch { @@ -1277,7 +891,7 @@ struct intel_rps_ei { u32 media_c0; }; -struct intel_gen6_power_mgmt { +struct intel_rps { /* * work, interrupts_enabled and pm_iir are protected by * dev_priv->irq_lock @@ -1318,20 +932,25 @@ struct intel_gen6_power_mgmt { enum { LOW_POWER, BETWEEN, HIGH_POWER } power; bool enabled; - struct delayed_work autoenable_work; atomic_t num_waiters; atomic_t boosts; /* manual wa residency calculations */ struct intel_rps_ei ei; +}; - /* - * Protects RPS/RC6 register access and PCU communication. - * Must be taken after struct_mutex if nested. Note that - * this lock may be held for long periods of time when - * talking to hw - so only take it when talking to hw! - */ - struct mutex hw_lock; +struct intel_rc6 { + bool enabled; +}; + +struct intel_llc_pstate { + bool enabled; +}; + +struct intel_gen6_power_mgmt { + struct intel_rps rps; + struct intel_rc6 rc6; + struct intel_llc_pstate llc_pstate; }; /* defined intel_pm.c */ @@ -1444,6 +1063,9 @@ struct i915_gem_mm { * always the inner lock when overlapping with struct_mutex. */ struct mutex stolen_lock; + /* Protects bound_list/unbound_list and #drm_i915_gem_object.mm.link */ + spinlock_t obj_lock; + /** List of all objects in gtt_space. Used to restore gtt * mappings on resume */ struct list_head bound_list; @@ -1464,9 +1086,17 @@ struct i915_gem_mm { */ struct llist_head free_list; struct work_struct free_work; + spinlock_t free_lock; + + /** + * Small stash of WC pages + */ + struct pagevec wc_stash; - /** Usable portion of the GTT for GEM */ - dma_addr_t stolen_base; /* limited to low memory (32-bit) */ + /** + * tmpfs instance used for shmem backed objects + */ + struct vfsmount *gemfs; /** PPGTT used for aliasing the PPGTT with the GTT */ struct i915_hw_ppgtt *aliasing_ppgtt; @@ -1511,6 +1141,8 @@ struct drm_i915_error_state_buf { loff_t pos; }; +#define I915_IDLE_ENGINES_TIMEOUT (200) /* in ms */ + #define I915_RESET_TIMEOUT (10 * HZ) /* 10s */ #define I915_FENCE_TIMEOUT (10 * HZ) /* 10s */ @@ -1629,6 +1261,8 @@ enum modeset_restore { #define DDC_PIN_D 0x06 struct ddi_vbt_port_info { + int max_tmds_clock; + /* * This is an index in the HDMI/DVI DDI buffer translation table. * The special value HDMI_LEVEL_SHIFT_UNKNOWN means the VBT didn't @@ -1709,16 +1343,19 @@ struct intel_vbt_data { u16 panel_id; struct mipi_config *config; struct mipi_pps_data *pps; + u16 bl_ports; + u16 cabc_ports; u8 seq_version; u32 size; u8 *data; const u8 *sequence[MIPI_SEQ_MAX]; + u8 *deassert_seq; /* Used by fixup_mipi_sequences() */ } dsi; int crt_ddc_pin; int child_dev_num; - union child_device_config *child_dev; + struct child_device_config *child_dev; struct ddi_vbt_port_info ddi_port_info[I915_MAX_PORTS]; struct sdvo_device_mapping sdvo_mappings[2]; @@ -1812,6 +1449,20 @@ struct skl_wm_level { uint8_t plane_res_l; }; +/* Stores plane specific WM parameters */ +struct skl_wm_params { + bool x_tiled, y_tiled; + bool rc_surface; + uint32_t width; + uint8_t cpp; + uint32_t plane_pixel_rate; + uint32_t y_min_scanlines; + uint32_t plane_bytes_per_line; + uint_fixed_16_16_t plane_blocks_per_line; + uint_fixed_16_16_t y_tile_minimum; + uint32_t linetime_us; +}; + /* * This struct helps tracking the state needed for runtime PM, which puts the * device in PCI D3 state. Notice that when this happens, nothing on the @@ -1890,13 +1541,7 @@ struct i915_wa_reg { u32 mask; }; -/* - * RING_MAX_NONPRIV_SLOTS is per-engine but at this point we are only - * allowing it for RCS as we don't foresee any requirement of having - * a whitelist for other engines. When it is really required for - * other engines then the limit need to be increased. - */ -#define I915_MAX_WA_REGS (16 + RING_MAX_NONPRIV_SLOTS) +#define I915_MAX_WA_REGS 16 struct i915_workarounds { struct i915_wa_reg reg[I915_MAX_WA_REGS]; @@ -2149,6 +1794,7 @@ struct i915_oa_ops { struct intel_cdclk_state { unsigned int cdclk, vco, ref; + u8 voltage_level; }; struct drm_i915_private { @@ -2163,6 +1809,30 @@ struct drm_i915_private { const struct intel_device_info info; + /** + * Data Stolen Memory - aka "i915 stolen memory" gives us the start and + * end of stolen which we can optionally use to create GEM objects + * backed by stolen memory. Note that stolen_usable_size tells us + * exactly how much of this we are actually allowed to use, given that + * some portion of it is in fact reserved for use by hardware functions. + */ + struct resource dsm; + /** + * Reseved portion of Data Stolen Memory + */ + struct resource dsm_reserved; + + /* + * Stolen memory is segmented in hardware with different portions + * offlimits to certain functions. + * + * The drm_mm is initialised to the total accessible range, as found + * from the PCI config. On Broadwell+, this is further restricted to + * avoid the first page! The upper end of stolen memory is reserved for + * hardware functions and similarly removed from the accessible range. + */ + resource_size_t stolen_usable_size; /* Total size minus reserved ranges */ + void __iomem *regs; struct intel_uncore uncore; @@ -2197,9 +1867,13 @@ struct drm_i915_private { wait_queue_head_t gmbus_wait_queue; struct pci_dev *bridge_dev; - struct i915_gem_context *kernel_context; struct intel_engine_cs *engine[I915_NUM_ENGINES]; - struct i915_vma *semaphore; + /* Context used internally to idle the GPU and setup initial state */ + struct i915_gem_context *kernel_context; + /* Context only to be used for injecting preemption commands */ + struct i915_gem_context *preempt_context; + struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1] + [MAX_ENGINE_INSTANCE + 1]; struct drm_dma_handle *status_page_dmah; struct resource mch_res; @@ -2257,6 +1931,7 @@ struct drm_i915_private { unsigned int max_dotclk_freq; unsigned int rawclk_freq; unsigned int hpll_freq; + unsigned int fdi_pll_freq; unsigned int czclk_freq; struct { @@ -2286,6 +1961,9 @@ struct drm_i915_private { */ struct workqueue_struct *wq; + /* ordered wq for modesets */ + struct workqueue_struct *modeset_wq; + /* Display functions */ struct drm_i915_display_funcs display; @@ -2307,6 +1985,8 @@ struct drm_i915_private { DECLARE_HASHTABLE(mm_structs, 7); struct mutex mm_lock; + struct intel_ppat ppat; + /* Kernel Modesetting */ struct intel_crtc *plane_to_crtc_mapping[I915_MAX_PIPES]; @@ -2329,7 +2009,10 @@ struct drm_i915_private { struct mutex dpll_lock; unsigned int active_crtcs; - unsigned int min_pixclk[I915_MAX_PIPES]; + /* minimum acceptable cdclk for each pipe */ + int min_cdclk[I915_MAX_PIPES]; + /* minimum acceptable voltage level for each pipe */ + u8 min_voltage_level[I915_MAX_PIPES]; int dpio_phy_iosf_port[I915_NUM_PHYS_VLV]; @@ -2351,8 +2034,16 @@ struct drm_i915_private { /* Cannot be determined by PCIID. You must always read a register. */ u32 edram_cap; - /* gen6+ rps state */ - struct intel_gen6_power_mgmt rps; + /* + * Protects RPS/RC6 register access and PCU communication. + * Must be taken after struct_mutex if nested. Note that + * this lock may be held for long periods of time when + * talking to hw - so only take it when talking to hw! + */ + struct mutex pcu_lock; + + /* gen6+ GT PM state */ + struct intel_gen6_power_mgmt gt_pm; /* ilk-only ips/rps state. Everything in here is protected by the global * mchdev_lock in intel_pm.c */ @@ -2463,7 +2154,7 @@ struct drm_i915_private { bool distrust_bios_wm; } wm; - struct i915_runtime_pm pm; + struct i915_runtime_pm runtime_pm; struct { bool initialized; @@ -2513,7 +2204,6 @@ struct drm_i915_private { bool periodic; int period_exponent; - int timestamp_frequency; struct i915_oa_config test_config; @@ -2658,6 +2348,8 @@ struct drm_i915_private { int irq; } lpe_audio; + struct i915_pmu pmu; + /* * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch * will be rejected. Instead look for a better place. @@ -2786,8 +2478,8 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg) #define for_each_sgt_dma(__dmap, __iter, __sgt) \ for ((__iter) = __sgt_iter((__sgt)->sgl, true); \ ((__dmap) = (__iter).dma + (__iter).curr); \ - (((__iter).curr += PAGE_SIZE) < (__iter).max) || \ - ((__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0)) + (((__iter).curr += PAGE_SIZE) >= (__iter).max) ? \ + (__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0 : 0) /** * for_each_sgt_page - iterate over the pages of the given sg_table @@ -2799,8 +2491,38 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg) for ((__iter) = __sgt_iter((__sgt)->sgl, false); \ ((__pp) = (__iter).pfn == 0 ? NULL : \ pfn_to_page((__iter).pfn + ((__iter).curr >> PAGE_SHIFT))); \ - (((__iter).curr += PAGE_SIZE) < (__iter).max) || \ - ((__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0)) + (((__iter).curr += PAGE_SIZE) >= (__iter).max) ? \ + (__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0 : 0) + +static inline unsigned int i915_sg_page_sizes(struct scatterlist *sg) +{ + unsigned int page_sizes; + + page_sizes = 0; + while (sg) { + GEM_BUG_ON(sg->offset); + GEM_BUG_ON(!IS_ALIGNED(sg->length, PAGE_SIZE)); + page_sizes |= sg->length; + sg = __sg_next(sg); + } + + return page_sizes; +} + +static inline unsigned int i915_sg_segment_size(void) +{ + unsigned int size = swiotlb_max_segment(); + + if (size == 0) + return SCATTERLIST_MAX_SEGMENT; + + size = rounddown(size, PAGE_SIZE); + /* swiotlb_max_segment_size can return 1 byte when it means one page. */ + if (size < PAGE_SIZE) + size = PAGE_SIZE; + + return size; +} static inline const struct intel_device_info * intel_info(const struct drm_i915_private *dev_priv) @@ -2817,23 +2539,21 @@ intel_info(const struct drm_i915_private *dev_priv) #define INTEL_REVID(dev_priv) ((dev_priv)->drm.pdev->revision) #define GEN_FOREVER (0) + +#define INTEL_GEN_MASK(s, e) ( \ + BUILD_BUG_ON_ZERO(!__builtin_constant_p(s)) + \ + BUILD_BUG_ON_ZERO(!__builtin_constant_p(e)) + \ + GENMASK((e) != GEN_FOREVER ? (e) - 1 : BITS_PER_LONG - 1, \ + (s) != GEN_FOREVER ? (s) - 1 : 0) \ +) + /* * Returns true if Gen is in inclusive range [Start, End]. * * Use GEN_FOREVER for unbound start and or end. */ -#define IS_GEN(dev_priv, s, e) ({ \ - unsigned int __s = (s), __e = (e); \ - BUILD_BUG_ON(!__builtin_constant_p(s)); \ - BUILD_BUG_ON(!__builtin_constant_p(e)); \ - if ((__s) != GEN_FOREVER) \ - __s = (s) - 1; \ - if ((__e) == GEN_FOREVER) \ - __e = BITS_PER_LONG - 1; \ - else \ - __e = (e) - 1; \ - !!((dev_priv)->info.gen_mask & GENMASK((__e), (__s))); \ -}) +#define IS_GEN(dev_priv, s, e) \ + (!!((dev_priv)->info.gen_mask & INTEL_GEN_MASK((s), (e)))) /* * Return true if revision is in range [since,until] inclusive. @@ -2843,38 +2563,39 @@ intel_info(const struct drm_i915_private *dev_priv) #define IS_REVID(p, since, until) \ (INTEL_REVID(p) >= (since) && INTEL_REVID(p) <= (until)) -#define IS_I830(dev_priv) ((dev_priv)->info.platform == INTEL_I830) -#define IS_I845G(dev_priv) ((dev_priv)->info.platform == INTEL_I845G) -#define IS_I85X(dev_priv) ((dev_priv)->info.platform == INTEL_I85X) -#define IS_I865G(dev_priv) ((dev_priv)->info.platform == INTEL_I865G) -#define IS_I915G(dev_priv) ((dev_priv)->info.platform == INTEL_I915G) -#define IS_I915GM(dev_priv) ((dev_priv)->info.platform == INTEL_I915GM) -#define IS_I945G(dev_priv) ((dev_priv)->info.platform == INTEL_I945G) -#define IS_I945GM(dev_priv) ((dev_priv)->info.platform == INTEL_I945GM) -#define IS_I965G(dev_priv) ((dev_priv)->info.platform == INTEL_I965G) -#define IS_I965GM(dev_priv) ((dev_priv)->info.platform == INTEL_I965GM) -#define IS_G45(dev_priv) ((dev_priv)->info.platform == INTEL_G45) -#define IS_GM45(dev_priv) ((dev_priv)->info.platform == INTEL_GM45) +#define IS_PLATFORM(dev_priv, p) ((dev_priv)->info.platform_mask & BIT(p)) + +#define IS_I830(dev_priv) IS_PLATFORM(dev_priv, INTEL_I830) +#define IS_I845G(dev_priv) IS_PLATFORM(dev_priv, INTEL_I845G) +#define IS_I85X(dev_priv) IS_PLATFORM(dev_priv, INTEL_I85X) +#define IS_I865G(dev_priv) IS_PLATFORM(dev_priv, INTEL_I865G) +#define IS_I915G(dev_priv) IS_PLATFORM(dev_priv, INTEL_I915G) +#define IS_I915GM(dev_priv) IS_PLATFORM(dev_priv, INTEL_I915GM) +#define IS_I945G(dev_priv) IS_PLATFORM(dev_priv, INTEL_I945G) +#define IS_I945GM(dev_priv) IS_PLATFORM(dev_priv, INTEL_I945GM) +#define IS_I965G(dev_priv) IS_PLATFORM(dev_priv, INTEL_I965G) +#define IS_I965GM(dev_priv) IS_PLATFORM(dev_priv, INTEL_I965GM) +#define IS_G45(dev_priv) IS_PLATFORM(dev_priv, INTEL_G45) +#define IS_GM45(dev_priv) IS_PLATFORM(dev_priv, INTEL_GM45) #define IS_G4X(dev_priv) (IS_G45(dev_priv) || IS_GM45(dev_priv)) #define IS_PINEVIEW_G(dev_priv) (INTEL_DEVID(dev_priv) == 0xa001) #define IS_PINEVIEW_M(dev_priv) (INTEL_DEVID(dev_priv) == 0xa011) -#define IS_PINEVIEW(dev_priv) ((dev_priv)->info.platform == INTEL_PINEVIEW) -#define IS_G33(dev_priv) ((dev_priv)->info.platform == INTEL_G33) +#define IS_PINEVIEW(dev_priv) IS_PLATFORM(dev_priv, INTEL_PINEVIEW) +#define IS_G33(dev_priv) IS_PLATFORM(dev_priv, INTEL_G33) #define IS_IRONLAKE_M(dev_priv) (INTEL_DEVID(dev_priv) == 0x0046) -#define IS_IVYBRIDGE(dev_priv) ((dev_priv)->info.platform == INTEL_IVYBRIDGE) -#define IS_IVB_GT1(dev_priv) (INTEL_DEVID(dev_priv) == 0x0156 || \ - INTEL_DEVID(dev_priv) == 0x0152 || \ - INTEL_DEVID(dev_priv) == 0x015a) -#define IS_VALLEYVIEW(dev_priv) ((dev_priv)->info.platform == INTEL_VALLEYVIEW) -#define IS_CHERRYVIEW(dev_priv) ((dev_priv)->info.platform == INTEL_CHERRYVIEW) -#define IS_HASWELL(dev_priv) ((dev_priv)->info.platform == INTEL_HASWELL) -#define IS_BROADWELL(dev_priv) ((dev_priv)->info.platform == INTEL_BROADWELL) -#define IS_SKYLAKE(dev_priv) ((dev_priv)->info.platform == INTEL_SKYLAKE) -#define IS_BROXTON(dev_priv) ((dev_priv)->info.platform == INTEL_BROXTON) -#define IS_KABYLAKE(dev_priv) ((dev_priv)->info.platform == INTEL_KABYLAKE) -#define IS_GEMINILAKE(dev_priv) ((dev_priv)->info.platform == INTEL_GEMINILAKE) -#define IS_COFFEELAKE(dev_priv) ((dev_priv)->info.platform == INTEL_COFFEELAKE) -#define IS_CANNONLAKE(dev_priv) ((dev_priv)->info.platform == INTEL_CANNONLAKE) +#define IS_IVYBRIDGE(dev_priv) IS_PLATFORM(dev_priv, INTEL_IVYBRIDGE) +#define IS_IVB_GT1(dev_priv) (IS_IVYBRIDGE(dev_priv) && \ + (dev_priv)->info.gt == 1) +#define IS_VALLEYVIEW(dev_priv) IS_PLATFORM(dev_priv, INTEL_VALLEYVIEW) +#define IS_CHERRYVIEW(dev_priv) IS_PLATFORM(dev_priv, INTEL_CHERRYVIEW) +#define IS_HASWELL(dev_priv) IS_PLATFORM(dev_priv, INTEL_HASWELL) +#define IS_BROADWELL(dev_priv) IS_PLATFORM(dev_priv, INTEL_BROADWELL) +#define IS_SKYLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_SKYLAKE) +#define IS_BROXTON(dev_priv) IS_PLATFORM(dev_priv, INTEL_BROXTON) +#define IS_KABYLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_KABYLAKE) +#define IS_GEMINILAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_GEMINILAKE) +#define IS_COFFEELAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_COFFEELAKE) +#define IS_CANNONLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_CANNONLAKE) #define IS_MOBILE(dev_priv) ((dev_priv)->info.is_mobile) #define IS_HSW_EARLY_SDV(dev_priv) (IS_HASWELL(dev_priv) && \ (INTEL_DEVID(dev_priv) & 0xFF00) == 0x0C00) @@ -2886,11 +2607,11 @@ intel_info(const struct drm_i915_private *dev_priv) #define IS_BDW_ULX(dev_priv) (IS_BROADWELL(dev_priv) && \ (INTEL_DEVID(dev_priv) & 0xf) == 0xe) #define IS_BDW_GT3(dev_priv) (IS_BROADWELL(dev_priv) && \ - (INTEL_DEVID(dev_priv) & 0x00F0) == 0x0020) + (dev_priv)->info.gt == 3) #define IS_HSW_ULT(dev_priv) (IS_HASWELL(dev_priv) && \ (INTEL_DEVID(dev_priv) & 0xFF00) == 0x0A00) #define IS_HSW_GT3(dev_priv) (IS_HASWELL(dev_priv) && \ - (INTEL_DEVID(dev_priv) & 0x00F0) == 0x0020) + (dev_priv)->info.gt == 3) /* ULX machines are also considered ULT. */ #define IS_HSW_ULX(dev_priv) (INTEL_DEVID(dev_priv) == 0x0A0E || \ INTEL_DEVID(dev_priv) == 0x0A1E) @@ -2911,17 +2632,21 @@ intel_info(const struct drm_i915_private *dev_priv) INTEL_DEVID(dev_priv) == 0x5915 || \ INTEL_DEVID(dev_priv) == 0x591E) #define IS_SKL_GT2(dev_priv) (IS_SKYLAKE(dev_priv) && \ - (INTEL_DEVID(dev_priv) & 0x00F0) == 0x0010) + (dev_priv)->info.gt == 2) #define IS_SKL_GT3(dev_priv) (IS_SKYLAKE(dev_priv) && \ - (INTEL_DEVID(dev_priv) & 0x00F0) == 0x0020) + (dev_priv)->info.gt == 3) #define IS_SKL_GT4(dev_priv) (IS_SKYLAKE(dev_priv) && \ - (INTEL_DEVID(dev_priv) & 0x00F0) == 0x0030) + (dev_priv)->info.gt == 4) #define IS_KBL_GT2(dev_priv) (IS_KABYLAKE(dev_priv) && \ - (INTEL_DEVID(dev_priv) & 0x00F0) == 0x0010) + (dev_priv)->info.gt == 2) #define IS_KBL_GT3(dev_priv) (IS_KABYLAKE(dev_priv) && \ - (INTEL_DEVID(dev_priv) & 0x00F0) == 0x0020) + (dev_priv)->info.gt == 3) #define IS_CFL_ULT(dev_priv) (IS_COFFEELAKE(dev_priv) && \ (INTEL_DEVID(dev_priv) & 0x00F0) == 0x00A0) +#define IS_CFL_GT2(dev_priv) (IS_COFFEELAKE(dev_priv) && \ + (dev_priv)->info.gt == 2) +#define IS_CFL_GT3(dev_priv) (IS_COFFEELAKE(dev_priv) && \ + (dev_priv)->info.gt == 3) #define IS_ALPHA_SUPPORT(intel_info) ((intel_info)->is_alpha_support) @@ -2962,6 +2687,7 @@ intel_info(const struct drm_i915_private *dev_priv) #define CNL_REVID_A0 0x0 #define CNL_REVID_B0 0x1 +#define CNL_REVID_C0 0x2 #define IS_CNL_REVID(p, since, until) \ (IS_CANNONLAKE(p) && IS_REVID(p, since, until)) @@ -3002,6 +2728,8 @@ intel_info(const struct drm_i915_private *dev_priv) #define HAS_BLT(dev_priv) HAS_ENGINE(dev_priv, BCS) #define HAS_VEBOX(dev_priv) HAS_ENGINE(dev_priv, VECS) +#define HAS_LEGACY_SEMAPHORES(dev_priv) IS_GEN7(dev_priv) + #define HAS_LLC(dev_priv) ((dev_priv)->info.has_llc) #define HAS_SNOOP(dev_priv) ((dev_priv)->info.has_snoop) #define HAS_EDRAM(dev_priv) (!!((dev_priv)->edram_cap & EDRAM_ENABLED)) @@ -3012,9 +2740,18 @@ intel_info(const struct drm_i915_private *dev_priv) #define HAS_LOGICAL_RING_CONTEXTS(dev_priv) \ ((dev_priv)->info.has_logical_ring_contexts) -#define USES_PPGTT(dev_priv) (i915.enable_ppgtt) -#define USES_FULL_PPGTT(dev_priv) (i915.enable_ppgtt >= 2) -#define USES_FULL_48BIT_PPGTT(dev_priv) (i915.enable_ppgtt == 3) +#define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \ + ((dev_priv)->info.has_logical_ring_preemption) + +#define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv) + +#define USES_PPGTT(dev_priv) (i915_modparams.enable_ppgtt) +#define USES_FULL_PPGTT(dev_priv) (i915_modparams.enable_ppgtt >= 2) +#define USES_FULL_48BIT_PPGTT(dev_priv) (i915_modparams.enable_ppgtt == 3) +#define HAS_PAGE_SIZES(dev_priv, sizes) ({ \ + GEM_BUG_ON((sizes) == 0); \ + ((sizes) & ~(dev_priv)->info.page_sizes) == 0; \ +}) #define HAS_OVERLAY(dev_priv) ((dev_priv)->info.has_overlay) #define OVERLAY_NEEDS_PHYSICAL(dev_priv) \ @@ -3032,9 +2769,12 @@ intel_info(const struct drm_i915_private *dev_priv) * even when in MSI mode. This results in spurious interrupt warnings if the * legacy irq no. is shared with another device. The kernel then disables that * interrupt source and so prevents the other device from working properly. + * + * Since we don't enable MSI anymore on gen4, we can always use GMBUS/AUX + * interrupts. */ -#define HAS_AUX_IRQ(dev_priv) ((dev_priv)->info.gen >= 5) -#define HAS_GMBUS_IRQ(dev_priv) ((dev_priv)->info.has_gmbus_irq) +#define HAS_AUX_IRQ(dev_priv) true +#define HAS_GMBUS_IRQ(dev_priv) (INTEL_GEN(dev_priv) >= 4) /* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte * rows, which changed the alignment requirements and fence programming. @@ -3046,7 +2786,6 @@ intel_info(const struct drm_i915_private *dev_priv) #define I915_HAS_HOTPLUG(dev_priv) ((dev_priv)->info.has_hotplug) #define HAS_FW_BLC(dev_priv) (INTEL_GEN(dev_priv) > 2) -#define HAS_PIPE_CXSR(dev_priv) ((dev_priv)->info.has_pipe_cxsr) #define HAS_FBC(dev_priv) ((dev_priv)->info.has_fbc) #define HAS_CUR_FBC(dev_priv) (!HAS_GMCH_DISPLAY(dev_priv) && INTEL_INFO(dev_priv)->gen >= 7) @@ -3057,14 +2796,18 @@ intel_info(const struct drm_i915_private *dev_priv) #define HAS_DDI(dev_priv) ((dev_priv)->info.has_ddi) #define HAS_FPGA_DBG_UNCLAIMED(dev_priv) ((dev_priv)->info.has_fpga_dbg) #define HAS_PSR(dev_priv) ((dev_priv)->info.has_psr) + #define HAS_RC6(dev_priv) ((dev_priv)->info.has_rc6) #define HAS_RC6p(dev_priv) ((dev_priv)->info.has_rc6p) +#define HAS_RC6pp(dev_priv) (false) /* HW was never validated */ #define HAS_CSR(dev_priv) ((dev_priv)->info.has_csr) #define HAS_RUNTIME_PM(dev_priv) ((dev_priv)->info.has_runtime_pm) #define HAS_64BIT_RELOC(dev_priv) ((dev_priv)->info.has_64bit_reloc) +#define HAS_IPC(dev_priv) ((dev_priv)->info.has_ipc) + /* * For now, anything with a GuC requires uCode loading, and then supports * command submission once loaded. But these are logically independent @@ -3074,8 +2817,16 @@ intel_info(const struct drm_i915_private *dev_priv) #define HAS_GUC_CT(dev_priv) ((dev_priv)->info.has_guc_ct) #define HAS_GUC_UCODE(dev_priv) (HAS_GUC(dev_priv)) #define HAS_GUC_SCHED(dev_priv) (HAS_GUC(dev_priv)) + +/* For now, anything with a GuC has also HuC */ +#define HAS_HUC(dev_priv) (HAS_GUC(dev_priv)) #define HAS_HUC_UCODE(dev_priv) (HAS_GUC(dev_priv)) +/* Having a GuC is not the same as using a GuC */ +#define USES_GUC(dev_priv) intel_uc_is_using_guc() +#define USES_GUC_SUBMISSION(dev_priv) intel_uc_is_using_guc_submission() +#define USES_HUC(dev_priv) intel_uc_is_using_huc() + #define HAS_RESOURCE_STREAMER(dev_priv) ((dev_priv)->info.has_resource_streamer) #define HAS_POOLED_EU(dev_priv) ((dev_priv)->info.has_pooled_eu) @@ -3152,8 +2903,6 @@ intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *dev_priv) int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, int enable_ppgtt); -bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value); - /* i915_drv.c */ void __printf(3, 4) __i915_printk(struct drm_i915_private *dev_priv, const char *level, @@ -3182,7 +2931,9 @@ extern int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags); extern bool intel_has_reset_engine(struct drm_i915_private *dev_priv); -extern int intel_guc_reset(struct drm_i915_private *dev_priv); +extern int intel_reset_guc(struct drm_i915_private *dev_priv); +extern int intel_guc_reset_engine(struct intel_guc *guc, + struct intel_engine_cs *engine); extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine); extern void intel_hangcheck_init(struct drm_i915_private *dev_priv); extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv); @@ -3210,7 +2961,7 @@ static inline void i915_queue_hangcheck(struct drm_i915_private *dev_priv) { unsigned long delay; - if (unlikely(!i915.enable_hangcheck)) + if (unlikely(!i915_modparams.enable_hangcheck)) return; /* Don't continually defer the hangcheck so that it is always run at @@ -3243,6 +2994,8 @@ static inline bool intel_vgpu_active(struct drm_i915_private *dev_priv) return dev_priv->vgpu.active; } +u32 i915_pipestat_enable_mask(struct drm_i915_private *dev_priv, + enum pipe pipe); void i915_enable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe, u32 status_mask); @@ -3424,7 +3177,8 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, unsigned long n); void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages); + struct sg_table *pages, + unsigned int sg_page_sizes); int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj); static inline int __must_check @@ -3438,10 +3192,16 @@ i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) return __i915_gem_object_get_pages(obj); } +static inline bool +i915_gem_object_has_pages(struct drm_i915_gem_object *obj) +{ + return !IS_ERR_OR_NULL(READ_ONCE(obj->mm.pages)); +} + static inline void __i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) { - GEM_BUG_ON(!obj->mm.pages); + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); atomic_inc(&obj->mm.pages_pin_count); } @@ -3455,8 +3215,8 @@ i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj) static inline void __i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) { + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); - GEM_BUG_ON(!obj->mm.pages); atomic_dec(&obj->mm.pages_pin_count); } @@ -3646,8 +3406,9 @@ i915_vm_to_ppgtt(struct i915_address_space *vm) } /* i915_gem_fence_reg.c */ -int __must_check i915_vma_get_fence(struct i915_vma *vma); -int __must_check i915_vma_put_fence(struct i915_vma *vma); +struct drm_i915_fence_reg * +i915_reserve_fence(struct drm_i915_private *dev_priv); +void i915_unreserve_fence(struct drm_i915_fence_reg *fence); void i915_gem_revoke_fences(struct drm_i915_private *dev_priv); void i915_gem_restore_fences(struct drm_i915_private *dev_priv); @@ -3709,6 +3470,8 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm, unsigned int flags); int i915_gem_evict_vm(struct i915_address_space *vm); +void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv); + /* belongs in i915_gem_gtt.h */ static inline void i915_gem_chipset_flush(struct drm_i915_private *dev_priv) { @@ -3730,12 +3493,13 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, int i915_gem_init_stolen(struct drm_i915_private *dev_priv); void i915_gem_cleanup_stolen(struct drm_device *dev); struct drm_i915_gem_object * -i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, u32 size); +i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, + resource_size_t size); struct drm_i915_gem_object * i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv, - u32 stolen_offset, - u32 gtt_offset, - u32 size); + resource_size_t stolen_offset, + resource_size_t gtt_offset, + resource_size_t size); /* i915_gem_internal.c */ struct drm_i915_gem_object * @@ -3743,7 +3507,7 @@ i915_gem_object_create_internal(struct drm_i915_private *dev_priv, phys_addr_t size); /* i915_gem_shrinker.c */ -unsigned long i915_gem_shrink(struct drm_i915_private *dev_priv, +unsigned long i915_gem_shrink(struct drm_i915_private *i915, unsigned long target, unsigned long *nr_scanned, unsigned flags); @@ -3752,9 +3516,9 @@ unsigned long i915_gem_shrink(struct drm_i915_private *dev_priv, #define I915_SHRINK_BOUND 0x4 #define I915_SHRINK_ACTIVE 0x8 #define I915_SHRINK_VMAPS 0x10 -unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv); -void i915_gem_shrinker_init(struct drm_i915_private *dev_priv); -void i915_gem_shrinker_cleanup(struct drm_i915_private *dev_priv); +unsigned long i915_gem_shrink_all(struct drm_i915_private *i915); +void i915_gem_shrinker_register(struct drm_i915_private *i915); +void i915_gem_shrinker_unregister(struct drm_i915_private *i915); /* i915_gem_tiling.c */ @@ -3894,6 +3658,7 @@ extern void intel_i2c_reset(struct drm_i915_private *dev_priv); /* intel_bios.c */ void intel_bios_init(struct drm_i915_private *dev_priv); +void intel_bios_cleanup(struct drm_i915_private *dev_priv); bool intel_bios_is_valid_vbt(const void *buf, size_t size); bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv); bool intel_bios_is_lvds_present(struct drm_i915_private *dev_priv, u8 *i2c_pin); @@ -3906,41 +3671,6 @@ bool intel_bios_is_port_hpd_inverted(struct drm_i915_private *dev_priv, bool intel_bios_is_lspcon_present(struct drm_i915_private *dev_priv, enum port port); - -/* intel_opregion.c */ -#ifdef CONFIG_ACPI -extern int intel_opregion_setup(struct drm_i915_private *dev_priv); -extern void intel_opregion_register(struct drm_i915_private *dev_priv); -extern void intel_opregion_unregister(struct drm_i915_private *dev_priv); -extern void intel_opregion_asle_intr(struct drm_i915_private *dev_priv); -extern int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, - bool enable); -extern int intel_opregion_notify_adapter(struct drm_i915_private *dev_priv, - pci_power_t state); -extern int intel_opregion_get_panel_type(struct drm_i915_private *dev_priv); -#else -static inline int intel_opregion_setup(struct drm_i915_private *dev) { return 0; } -static inline void intel_opregion_register(struct drm_i915_private *dev_priv) { } -static inline void intel_opregion_unregister(struct drm_i915_private *dev_priv) { } -static inline void intel_opregion_asle_intr(struct drm_i915_private *dev_priv) -{ -} -static inline int -intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, bool enable) -{ - return 0; -} -static inline int -intel_opregion_notify_adapter(struct drm_i915_private *dev, pci_power_t state) -{ - return 0; -} -static inline int intel_opregion_get_panel_type(struct drm_i915_private *dev) -{ - return -ENODEV; -} -#endif - /* intel_acpi.c */ #ifdef CONFIG_ACPI extern void intel_register_dsm_handler(void); @@ -3957,14 +3687,9 @@ mkwrite_device_info(struct drm_i915_private *dev_priv) return (struct intel_device_info *)&dev_priv->info; } -const char *intel_platform_name(enum intel_platform platform); -void intel_device_info_runtime_init(struct drm_i915_private *dev_priv); -void intel_device_info_dump(struct drm_i915_private *dev_priv); - /* modesetting */ extern void intel_modeset_init_hw(struct drm_device *dev); extern int intel_modeset_init(struct drm_device *dev); -extern void intel_modeset_gem_init(struct drm_device *dev); extern void intel_modeset_cleanup(struct drm_device *dev); extern int intel_connector_register(struct drm_connector *); extern void intel_connector_unregister(struct drm_connector *); @@ -3994,7 +3719,11 @@ extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e, struct intel_display_error_state *error); int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val); -int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val); +int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, u32 mbox, + u32 val, int timeout_us); +#define sandybridge_pcode_write(dev_priv, mbox, val) \ + sandybridge_pcode_write_timeout(dev_priv, mbox, val, 500) + int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, u32 reply_mask, u32 reply, int timeout_base_ms); @@ -4031,8 +3760,7 @@ bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, enum dpio_phy phy); bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv, enum dpio_phy phy); -uint8_t bxt_ddi_phy_calc_lane_lat_optim_mask(struct intel_encoder *encoder, - uint8_t lane_count); +uint8_t bxt_ddi_phy_calc_lane_lat_optim_mask(uint8_t lane_count); void bxt_ddi_phy_set_lane_optim_mask(struct intel_encoder *encoder, uint8_t lane_lat_optim_mask); uint8_t bxt_ddi_phy_get_lane_lat_optim_mask(struct intel_encoder *encoder); @@ -4041,24 +3769,39 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder, u32 deemph_reg_value, u32 margin_reg_value, bool uniq_trans_scale); void chv_data_lane_soft_reset(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, bool reset); -void chv_phy_pre_pll_enable(struct intel_encoder *encoder); -void chv_phy_pre_encoder_enable(struct intel_encoder *encoder); +void chv_phy_pre_pll_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); +void chv_phy_pre_encoder_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); void chv_phy_release_cl2_override(struct intel_encoder *encoder); -void chv_phy_post_pll_disable(struct intel_encoder *encoder); +void chv_phy_post_pll_disable(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state); void vlv_set_phy_signal_level(struct intel_encoder *encoder, u32 demph_reg_value, u32 preemph_reg_value, u32 uniqtranscale_reg_value, u32 tx3_demph); -void vlv_phy_pre_pll_enable(struct intel_encoder *encoder); -void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder); -void vlv_phy_reset_lanes(struct intel_encoder *encoder); +void vlv_phy_pre_pll_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); +void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); +void vlv_phy_reset_lanes(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state); int intel_gpu_freq(struct drm_i915_private *dev_priv, int val); int intel_freq_opcode(struct drm_i915_private *dev_priv, int val); -u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, +u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv, const i915_reg_t reg); +u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat1); + +static inline u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, + const i915_reg_t reg) +{ + return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(dev_priv, reg), 1000); +} + #define I915_READ8(reg) dev_priv->uncore.funcs.mmio_readb(dev_priv, (reg), true) #define I915_WRITE8(reg, val) dev_priv->uncore.funcs.mmio_writeb(dev_priv, (reg), (val), true) @@ -4333,11 +4076,12 @@ int remap_io_mapping(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, unsigned long size, struct io_mapping *iomap); -static inline bool -intel_engine_can_store_dword(struct intel_engine_cs *engine) +static inline int intel_hws_csb_write_index(struct drm_i915_private *i915) { - return __intel_engine_can_store_dword(INTEL_GEN(engine->i915), - engine->class); + if (INTEL_GEN(i915) >= 10) + return CNL_HWS_CSB_WRITE_INDEX; + else + return I915_HWS_CSB_WRITE_INDEX; } #endif diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index dc1faa49687d..dd89abd2263d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -35,6 +35,7 @@ #include "intel_drv.h" #include "intel_frontbuffer.h" #include "intel_mocs.h" +#include "i915_gemfs.h" #include <linux/dma-fence-array.h> #include <linux/kthread.h> #include <linux/reservation.h> @@ -55,7 +56,7 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) return true; - return obj->pin_display; + return obj->pin_global; /* currently in use by HW, keep flushed */ } static int @@ -161,8 +162,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, return 0; } -static struct sg_table * -i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) +static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) { struct address_space *mapping = obj->base.filp->f_mapping; drm_dma_handle_t *phys; @@ -170,19 +170,20 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) struct scatterlist *sg; char *vaddr; int i; + int err; if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) - return ERR_PTR(-EINVAL); + return -EINVAL; /* Always aligning to the object size, allows a single allocation * to handle all possible callers, and given typical object sizes, * the alignment of the buddy allocation will naturally match. */ phys = drm_pci_alloc(obj->base.dev, - obj->base.size, + roundup_pow_of_two(obj->base.size), roundup_pow_of_two(obj->base.size)); if (!phys) - return ERR_PTR(-ENOMEM); + return -ENOMEM; vaddr = phys->vaddr; for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { @@ -191,7 +192,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) page = shmem_read_mapping_page(mapping, i); if (IS_ERR(page)) { - st = ERR_CAST(page); + err = PTR_ERR(page); goto err_phys; } @@ -208,13 +209,13 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) st = kmalloc(sizeof(*st), GFP_KERNEL); if (!st) { - st = ERR_PTR(-ENOMEM); + err = -ENOMEM; goto err_phys; } if (sg_alloc_table(st, 1, GFP_KERNEL)) { kfree(st); - st = ERR_PTR(-ENOMEM); + err = -ENOMEM; goto err_phys; } @@ -226,11 +227,15 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) sg_dma_len(sg) = obj->base.size; obj->phys_handle = phys; - return st; + + __i915_gem_object_set_pages(obj, st, sg->length); + + return 0; err_phys: drm_pci_free(obj->base.dev, phys); - return st; + + return err; } static void __start_cpu_write(struct drm_i915_gem_object *obj) @@ -325,17 +330,10 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj) * must wait for all rendering to complete to the object (as unbinding * must anyway), and retire the requests. */ - ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED | - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT, - NULL); + ret = i915_gem_object_set_to_cpu_domain(obj, false); if (ret) return ret; - i915_gem_retire_requests(to_i915(obj->base.dev)); - while ((vma = list_first_entry_or_null(&obj->vma_list, struct i915_vma, obj_link))) { @@ -353,7 +351,7 @@ static long i915_gem_object_wait_fence(struct dma_fence *fence, unsigned int flags, long timeout, - struct intel_rps_client *rps) + struct intel_rps_client *rps_client) { struct drm_i915_gem_request *rq; @@ -386,11 +384,11 @@ i915_gem_object_wait_fence(struct dma_fence *fence, * forcing the clocks too high for the whole system, we only allow * each client to waitboost once in a busy period. */ - if (rps) { + if (rps_client) { if (INTEL_GEN(rq->i915) >= 6) - gen6_rps_boost(rq, rps); + gen6_rps_boost(rq, rps_client); else - rps = NULL; + rps_client = NULL; } timeout = i915_wait_request(rq, flags, timeout); @@ -406,7 +404,7 @@ static long i915_gem_object_wait_reservation(struct reservation_object *resv, unsigned int flags, long timeout, - struct intel_rps_client *rps) + struct intel_rps_client *rps_client) { unsigned int seq = __read_seqcount_begin(&resv->seq); struct dma_fence *excl; @@ -425,7 +423,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, for (i = 0; i < count; i++) { timeout = i915_gem_object_wait_fence(shared[i], flags, timeout, - rps); + rps_client); if (timeout < 0) break; @@ -442,7 +440,8 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, } if (excl && timeout >= 0) { - timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps); + timeout = i915_gem_object_wait_fence(excl, flags, timeout, + rps_client); prune_fences = timeout >= 0; } @@ -468,7 +467,7 @@ static void __fence_set_priority(struct dma_fence *fence, int prio) struct drm_i915_gem_request *rq; struct intel_engine_cs *engine; - if (!dma_fence_is_i915(fence)) + if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence)) return; rq = to_request(fence); @@ -532,13 +531,13 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, * @obj: i915 gem object * @flags: how to wait (under a lock, for all rendering or just for writes etc) * @timeout: how long to wait - * @rps: client (user process) to charge for any waitboosting + * @rps_client: client (user process) to charge for any waitboosting */ int i915_gem_object_wait(struct drm_i915_gem_object *obj, unsigned int flags, long timeout, - struct intel_rps_client *rps) + struct intel_rps_client *rps_client) { might_sleep(); #if IS_ENABLED(CONFIG_LOCKDEP) @@ -550,7 +549,7 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj, timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout, - rps); + rps_client); return timeout < 0 ? timeout : 0; } @@ -558,7 +557,7 @@ static struct intel_rps_client *to_rps_client(struct drm_file *file) { struct drm_i915_file_private *fpriv = file->driver_priv; - return &fpriv->rps; + return &fpriv->rps_client; } static int @@ -667,17 +666,13 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) obj->frontbuffer_ggtt_origin : ORIGIN_CPU); } -static void -flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) +void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - - if (!(obj->base.write_domain & flush_domains)) - return; - - /* No actual flushing is required for the GTT write domain. Writes - * to it "immediately" go to main memory as far as we know, so there's - * no chipset flush. It also doesn't land in render cache. + /* + * No actual flushing is required for the GTT write domain for reads + * from the GTT domain. Writes to it "immediately" go to main memory + * as far as we know, so there's no chipset flush. It also doesn't + * land in the GPU render cache. * * However, we do have to enforce the order so that all writes through * the GTT land before any writes to the device, such as updates to @@ -688,22 +683,43 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) * timing. This issue has only been observed when switching quickly * between GTT writes and CPU reads from inside the kernel on recent hw, * and it appears to only affect discrete GTT blocks (i.e. on LLC - * system agents we cannot reproduce this behaviour). + * system agents we cannot reproduce this behaviour, until Cannonlake + * that was!). */ + wmb(); + intel_runtime_pm_get(dev_priv); + spin_lock_irq(&dev_priv->uncore.lock); + + POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); + + spin_unlock_irq(&dev_priv->uncore.lock); + intel_runtime_pm_put(dev_priv); +} + +static void +flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) +{ + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct i915_vma *vma; + + if (!(obj->base.write_domain & flush_domains)) + return; + switch (obj->base.write_domain) { case I915_GEM_DOMAIN_GTT: - if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) { - intel_runtime_pm_get(dev_priv); - spin_lock_irq(&dev_priv->uncore.lock); - POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); - spin_unlock_irq(&dev_priv->uncore.lock); - intel_runtime_pm_put(dev_priv); - } + i915_gem_flush_ggtt_writes(dev_priv); intel_fb_obj_flush(obj, fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); + + for_each_ggtt_vma(vma, obj) { + if (vma->iomap) + continue; + + i915_vma_unset_ggtt_write(vma); + } break; case I915_GEM_DOMAIN_CPU: @@ -1013,17 +1029,20 @@ gtt_user_read(struct io_mapping *mapping, loff_t base, int offset, char __user *user_data, int length) { - void *vaddr; + void __iomem *vaddr; unsigned long unwritten; /* We can use the cpu mem copy function because this is X86. */ - vaddr = (void __force *)io_mapping_map_atomic_wc(mapping, base); - unwritten = __copy_to_user_inatomic(user_data, vaddr + offset, length); + vaddr = io_mapping_map_atomic_wc(mapping, base); + unwritten = __copy_to_user_inatomic(user_data, + (void __force *)vaddr + offset, + length); io_mapping_unmap_atomic(vaddr); if (unwritten) { - vaddr = (void __force *) - io_mapping_map_wc(mapping, base, PAGE_SIZE); - unwritten = copy_to_user(user_data, vaddr + offset, length); + vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); + unwritten = copy_to_user(user_data, + (void __force *)vaddr + offset, + length); io_mapping_unmap(vaddr); } return unwritten; @@ -1047,7 +1066,9 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, intel_runtime_pm_get(i915); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | PIN_NONBLOCK); + PIN_MAPPABLE | + PIN_NONFAULT | + PIN_NONBLOCK); if (!IS_ERR(vma)) { node.start = i915_ggtt_offset(vma); node.allocated = false; @@ -1095,7 +1116,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, page_base += offset & PAGE_MASK; } - if (gtt_user_read(&ggtt->mappable, page_base, page_offset, + if (gtt_user_read(&ggtt->iomap, page_base, page_offset, user_data, page_length)) { ret = -EFAULT; break; @@ -1189,18 +1210,18 @@ ggtt_write(struct io_mapping *mapping, loff_t base, int offset, char __user *user_data, int length) { - void *vaddr; + void __iomem *vaddr; unsigned long unwritten; /* We can use the cpu mem copy function because this is X86. */ - vaddr = (void __force *)io_mapping_map_atomic_wc(mapping, base); - unwritten = __copy_from_user_inatomic_nocache(vaddr + offset, + vaddr = io_mapping_map_atomic_wc(mapping, base); + unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, user_data, length); io_mapping_unmap_atomic(vaddr); if (unwritten) { - vaddr = (void __force *) - io_mapping_map_wc(mapping, base, PAGE_SIZE); - unwritten = copy_from_user(vaddr + offset, user_data, length); + vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); + unwritten = copy_from_user((void __force *)vaddr + offset, + user_data, length); io_mapping_unmap(vaddr); } @@ -1229,9 +1250,27 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, if (ret) return ret; - intel_runtime_pm_get(i915); + if (i915_gem_object_has_struct_page(obj)) { + /* + * Avoid waking the device up if we can fallback, as + * waking/resuming is very slow (worst-case 10-100 ms + * depending on PCI sleeps and our own resume time). + * This easily dwarfs any performance advantage from + * using the cache bypass of indirect GGTT access. + */ + if (!intel_runtime_pm_get_if_in_use(i915)) { + ret = -EFAULT; + goto out_unlock; + } + } else { + /* No backing pages, no fallback, we must force GGTT access */ + intel_runtime_pm_get(i915); + } + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | PIN_NONBLOCK); + PIN_MAPPABLE | + PIN_NONFAULT | + PIN_NONBLOCK); if (!IS_ERR(vma)) { node.start = i915_ggtt_offset(vma); node.allocated = false; @@ -1244,7 +1283,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) { ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); if (ret) - goto out_unlock; + goto out_rpm; GEM_BUG_ON(!node.allocated); } @@ -1285,7 +1324,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, * If the object is non-shmem backed, we retry again with the * path that handles page fault. */ - if (ggtt_write(&ggtt->mappable, page_base, page_offset, + if (ggtt_write(&ggtt->iomap, page_base, page_offset, user_data, page_length)) { ret = -EFAULT; break; @@ -1307,8 +1346,9 @@ out_unpin: } else { i915_vma_unpin(vma); } -out_unlock: +out_rpm: intel_runtime_pm_put(i915); +out_unlock: mutex_unlock(&i915->drm.struct_mutex); return ret; } @@ -1524,10 +1564,9 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) struct list_head *list; struct i915_vma *vma; - list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (!i915_vma_is_ggtt(vma)) - break; + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + for_each_ggtt_vma(vma, obj) { if (i915_vma_is_active(vma)) continue; @@ -1538,8 +1577,10 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) } i915 = to_i915(obj->base.dev); + spin_lock(&i915->mm.obj_lock); list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; - list_move_tail(&obj->global_link, list); + list_move_tail(&obj->mm.link, list); + spin_unlock(&i915->mm.obj_lock); } /** @@ -1585,7 +1626,19 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (err) goto out; - /* Flush and acquire obj->pages so that we are coherent through + /* + * Proxy objects do not control access to the backing storage, ergo + * they cannot be used as a means to manipulate the cache domain + * tracking for that backing storage. The proxy object is always + * considered to be outside of any cache domain. + */ + if (i915_gem_object_is_proxy(obj)) { + err = -ENXIO; + goto out; + } + + /* + * Flush and acquire obj->pages so that we are coherent through * direct access in memory with previous cached writes through * shmemfs and that our cache domain tracking remains valid. * For example, if the obj->filp was moved to swap without us @@ -1641,6 +1694,11 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, if (!obj) return -ENOENT; + /* + * Proxy objects are barred from CPU access, so there is no + * need to ban sw_finish as it is a nop. + */ + /* Pinned buffers may be scanout, so flush the cache */ i915_gem_object_flush_if_display(obj); i915_gem_object_put(obj); @@ -1691,7 +1749,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, */ if (!obj->base.filp) { i915_gem_object_put(obj); - return -EINVAL; + return -ENXIO; } addr = vm_mmap(obj->base.filp, 0, args->size, @@ -1902,22 +1960,29 @@ int i915_gem_fault(struct vm_fault *vmf) if (ret) goto err_unpin; - ret = i915_vma_get_fence(vma); + ret = i915_vma_pin_fence(vma); if (ret) goto err_unpin; - /* Mark as being mmapped into userspace for later revocation */ - assert_rpm_wakelock_held(dev_priv); - if (list_empty(&obj->userfault_link)) - list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); - /* Finally, remap it using the new GTT offset */ ret = remap_io_mapping(area, area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), - (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT, + (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT, min_t(u64, vma->size, area->vm_end - area->vm_start), - &ggtt->mappable); + &ggtt->iomap); + if (ret) + goto err_fence; + + /* Mark as being mmapped into userspace for later revocation */ + assert_rpm_wakelock_held(dev_priv); + if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) + list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); + GEM_BUG_ON(!obj->userfault_count); + + i915_vma_set_ggtt_write(vma); +err_fence: + i915_vma_unpin_fence(vma); err_unpin: __i915_vma_unpin(vma); err_unlock: @@ -1969,6 +2034,21 @@ err: return ret; } +static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) +{ + struct i915_vma *vma; + + GEM_BUG_ON(!obj->userfault_count); + + obj->userfault_count = 0; + list_del(&obj->userfault_link); + drm_vma_node_unmap(&obj->base.vma_node, + obj->base.dev->anon_inode->i_mapping); + + for_each_ggtt_vma(vma, obj) + i915_vma_unset_userfault(vma); +} + /** * i915_gem_release_mmap - remove physical page mappings * @obj: obj in question @@ -1999,12 +2079,10 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) lockdep_assert_held(&i915->drm.struct_mutex); intel_runtime_pm_get(i915); - if (list_empty(&obj->userfault_link)) + if (!obj->userfault_count) goto out; - list_del_init(&obj->userfault_link); - drm_vma_node_unmap(&obj->base.vma_node, - obj->base.dev->anon_inode->i_mapping); + __i915_gem_object_release_mmap(obj); /* Ensure that the CPU's PTE are revoked and there are not outstanding * memory transactions from userspace before we return. The TLB @@ -2032,11 +2110,8 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) */ list_for_each_entry_safe(obj, on, - &dev_priv->mm.userfault_list, userfault_link) { - list_del_init(&obj->userfault_link); - drm_vma_node_unmap(&obj->base.vma_node, - obj->base.dev->anon_inode->i_mapping); - } + &dev_priv->mm.userfault_list, userfault_link) + __i915_gem_object_release_mmap(obj); /* The fence will be lost when the device powers down. If any were * in use by hardware (i.e. they are pinned), we should not be powering @@ -2059,7 +2134,7 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) if (!reg->vma) continue; - GEM_BUG_ON(!list_empty(®->vma->obj->userfault_link)); + GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); reg->dirty = true; } } @@ -2164,7 +2239,7 @@ void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) struct address_space *mapping; lockdep_assert_held(&obj->mm.lock); - GEM_BUG_ON(obj->mm.pages); + GEM_BUG_ON(i915_gem_object_has_pages(obj)); switch (obj->mm.madv) { case I915_MADV_DONTNEED: @@ -2223,13 +2298,14 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, enum i915_mm_subclass subclass) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct sg_table *pages; if (i915_gem_object_has_pinned_pages(obj)) return; GEM_BUG_ON(obj->bind_count); - if (!READ_ONCE(obj->mm.pages)) + if (!i915_gem_object_has_pages(obj)) return; /* May be called by shrinker from within get_pages() (on another bo) */ @@ -2243,6 +2319,10 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, pages = fetch_and_zero(&obj->mm.pages); GEM_BUG_ON(!pages); + spin_lock(&i915->mm.obj_lock); + list_del(&obj->mm.link); + spin_unlock(&i915->mm.obj_lock); + if (obj->mm.mapping) { void *ptr; @@ -2260,6 +2340,8 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, if (!IS_ERR(pages)) obj->ops->put_pages(obj, pages); + obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; + unlock: mutex_unlock(&obj->mm.lock); } @@ -2290,8 +2372,7 @@ static bool i915_sg_trim(struct sg_table *orig_st) return true; } -static struct sg_table * -i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) +static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); const unsigned long page_count = obj->base.size / PAGE_SIZE; @@ -2302,7 +2383,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) struct sgt_iter sgt_iter; struct page *page; unsigned long last_pfn = 0; /* suppress gcc warning */ - unsigned int max_segment; + unsigned int max_segment = i915_sg_segment_size(); + unsigned int sg_page_sizes; gfp_t noreclaim; int ret; @@ -2313,18 +2395,14 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) GEM_BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); GEM_BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); - max_segment = swiotlb_max_segment(); - if (!max_segment) - max_segment = rounddown(UINT_MAX, PAGE_SIZE); - st = kmalloc(sizeof(*st), GFP_KERNEL); if (st == NULL) - return ERR_PTR(-ENOMEM); + return -ENOMEM; rebuild_st: if (sg_alloc_table(st, page_count, GFP_KERNEL)) { kfree(st); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } /* Get the list of pages out of our struct file. They'll be pinned @@ -2338,6 +2416,7 @@ rebuild_st: sg = st->sgl; st->nents = 0; + sg_page_sizes = 0; for (i = 0; i < page_count; i++) { const unsigned int shrink[] = { I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, @@ -2390,8 +2469,10 @@ rebuild_st: if (!i || sg->length >= max_segment || page_to_pfn(page) != last_pfn + 1) { - if (i) + if (i) { + sg_page_sizes |= sg->length; sg = sg_next(sg); + } st->nents++; sg_set_page(sg, page, PAGE_SIZE, 0); } else { @@ -2402,8 +2483,10 @@ rebuild_st: /* Check that the i965g/gm workaround works. */ WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); } - if (sg) /* loop terminated early; short sg table */ + if (sg) { /* loop terminated early; short sg table */ + sg_page_sizes |= sg->length; sg_mark_end(sg); + } /* Trim unused sg entries to avoid wasting memory. */ i915_sg_trim(st); @@ -2432,7 +2515,9 @@ rebuild_st: if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_do_bit_17_swizzle(obj, st); - return st; + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return 0; err_sg: sg_mark_end(sg); @@ -2453,12 +2538,17 @@ err_pages: if (ret == -ENOSPC) ret = -ENOMEM; - return ERR_PTR(ret); + return ret; } void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages) + struct sg_table *pages, + unsigned int sg_page_sizes) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + unsigned long supported = INTEL_INFO(i915)->page_sizes; + int i; + lockdep_assert_held(&obj->mm.lock); obj->mm.get_page.sg_pos = pages->sgl; @@ -2467,30 +2557,48 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, obj->mm.pages = pages; if (i915_gem_object_is_tiled(obj) && - to_i915(obj->base.dev)->quirks & QUIRK_PIN_SWIZZLED_PAGES) { + i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { GEM_BUG_ON(obj->mm.quirked); __i915_gem_object_pin_pages(obj); obj->mm.quirked = true; } + + GEM_BUG_ON(!sg_page_sizes); + obj->mm.page_sizes.phys = sg_page_sizes; + + /* + * Calculate the supported page-sizes which fit into the given + * sg_page_sizes. This will give us the page-sizes which we may be able + * to use opportunistically when later inserting into the GTT. For + * example if phys=2G, then in theory we should be able to use 1G, 2M, + * 64K or 4K pages, although in practice this will depend on a number of + * other factors. + */ + obj->mm.page_sizes.sg = 0; + for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { + if (obj->mm.page_sizes.phys & ~0u << i) + obj->mm.page_sizes.sg |= BIT(i); + } + GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg)); + + spin_lock(&i915->mm.obj_lock); + list_add(&obj->mm.link, &i915->mm.unbound_list); + spin_unlock(&i915->mm.obj_lock); } static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) { - struct sg_table *pages; - - GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); + int err; if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { DRM_DEBUG("Attempting to obtain a purgeable object\n"); return -EFAULT; } - pages = obj->ops->get_pages(obj); - if (unlikely(IS_ERR(pages))) - return PTR_ERR(pages); + err = obj->ops->get_pages(obj); + GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj)); - __i915_gem_object_set_pages(obj, pages); - return 0; + return err; } /* Ensure that the associated pages are gathered from the backing storage @@ -2508,7 +2616,9 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) if (err) return err; - if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) { + if (unlikely(!i915_gem_object_has_pages(obj))) { + GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); + err = ____i915_gem_object_get_pages(obj); if (err) goto unlock; @@ -2581,7 +2691,8 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, void *ptr; int ret; - GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); + if (unlikely(!i915_gem_object_has_struct_page(obj))) + return ERR_PTR(-ENXIO); ret = mutex_lock_interruptible(&obj->mm.lock); if (ret) @@ -2591,7 +2702,9 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, type &= ~I915_MAP_OVERRIDE; if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) { - if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) { + if (unlikely(!i915_gem_object_has_pages(obj))) { + GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); + ret = ____i915_gem_object_get_pages(obj); if (ret) goto err_unlock; @@ -2601,7 +2714,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, atomic_inc(&obj->mm.pages_pin_count); pinned = false; } - GEM_BUG_ON(!obj->mm.pages); + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); ptr = page_unpack_bits(obj->mm.mapping, &has_type); if (ptr && has_type != type) { @@ -2656,7 +2769,7 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, * allows it to avoid the cost of retrieving a page (either swapin * or clearing-before-use) before it is overwritten. */ - if (READ_ONCE(obj->mm.pages)) + if (i915_gem_object_has_pages(obj)) return -ENODEV; if (obj->mm.madv != I915_MADV_WILLNEED) @@ -2800,7 +2913,17 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request = NULL; - /* Prevent the signaler thread from updating the request + /* + * During the reset sequence, we must prevent the engine from + * entering RC6. As the context state is undefined until we restart + * the engine, if it does enter RC6 during the reset, the state + * written to the powercontext is undefined and so we may lose + * GPU state upon resume, i.e. fail to restart after a reset. + */ + intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); + + /* + * Prevent the signaler thread from updating the request * state (by calling dma_fence_signal) as we are processing * the reset. The write from the GPU of the seqno is * asynchronous and the signaler thread may see a different @@ -2811,16 +2934,27 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) */ kthread_park(engine->breadcrumbs.signaler); - /* Prevent request submission to the hardware until we have + /* + * Prevent request submission to the hardware until we have * completed the reset in i915_gem_reset_finish(). If a request * is completed by one engine, it may then queue a request - * to a second via its engine->irq_tasklet *just* as we are + * to a second via its execlists->tasklet *just* as we are * calling engine->init_hw() and also writing the ELSP. - * Turning off the engine->irq_tasklet until the reset is over + * Turning off the execlists->tasklet until the reset is over * prevents the race. */ - tasklet_kill(&engine->irq_tasklet); - tasklet_disable(&engine->irq_tasklet); + tasklet_kill(&engine->execlists.tasklet); + tasklet_disable(&engine->execlists.tasklet); + + /* + * We're using worker to queue preemption requests from the tasklet in + * GuC submission mode. + * Even though tasklet was disabled, we may still have a worker queued. + * Let's make sure that all workers scheduled before disabling the + * tasklet are completed before continuing with the reset. + */ + if (engine->i915->guc.preempt_wq) + flush_workqueue(engine->i915->guc.preempt_wq); if (engine->irq_seqno_barrier) engine->irq_seqno_barrier(engine); @@ -2955,7 +3089,12 @@ i915_gem_reset_request(struct intel_engine_cs *engine, void i915_gem_reset_engine(struct intel_engine_cs *engine, struct drm_i915_gem_request *request) { - engine->irq_posted = 0; + /* + * Make sure this write is visible before we re-enable the interrupt + * handlers on another CPU, as tasklet_enable() resolves to just + * a compiler barrier which is insufficient for our purpose here. + */ + smp_store_mb(engine->irq_posted, 0); if (request) request = i915_gem_reset_request(engine, request); @@ -2985,6 +3124,25 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) ctx = fetch_and_zero(&engine->last_retired_context); if (ctx) engine->context_unpin(engine, ctx); + + /* + * Ostensibily, we always want a context loaded for powersaving, + * so if the engine is idle after the reset, send a request + * to load our scratch kernel_context. + * + * More mysteriously, if we leave the engine idle after a reset, + * the next userspace batch may hang, with what appears to be + * an incoherent read by the CS (presumably stale TLB). An + * empty request appears sufficient to paper over the glitch. + */ + if (list_empty(&engine->timeline->requests)) { + struct drm_i915_gem_request *rq; + + rq = i915_gem_request_alloc(engine, + dev_priv->kernel_context); + if (!IS_ERR(rq)) + __i915_add_request(rq, false); + } } i915_gem_restore_fences(dev_priv); @@ -2999,8 +3157,10 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) { - tasklet_enable(&engine->irq_tasklet); + tasklet_enable(&engine->execlists.tasklet); kthread_unpark(engine->breadcrumbs.signaler); + + intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); } void i915_gem_reset_finish(struct drm_i915_private *dev_priv) @@ -3018,9 +3178,15 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) static void nop_submit_request(struct drm_i915_gem_request *request) { + dma_fence_set_error(&request->fence, -EIO); + + i915_gem_request_submit(request); +} + +static void nop_complete_submit_request(struct drm_i915_gem_request *request) +{ unsigned long flags; - GEM_BUG_ON(!i915_terminally_wedged(&request->i915->gpu_error)); dma_fence_set_error(&request->fence, -EIO); spin_lock_irqsave(&request->engine->timeline->lock, flags); @@ -3029,81 +3195,59 @@ static void nop_submit_request(struct drm_i915_gem_request *request) spin_unlock_irqrestore(&request->engine->timeline->lock, flags); } -static void engine_set_wedged(struct intel_engine_cs *engine) +void i915_gem_set_wedged(struct drm_i915_private *i915) { - struct drm_i915_gem_request *request; - unsigned long flags; + struct intel_engine_cs *engine; + enum intel_engine_id id; - /* We need to be sure that no thread is running the old callback as - * we install the nop handler (otherwise we would submit a request - * to hardware that will never complete). In order to prevent this - * race, we wait until the machine is idle before making the swap - * (using stop_machine()). + /* + * First, stop submission to hw, but do not yet complete requests by + * rolling the global seqno forward (since this would complete requests + * for which we haven't set the fence error to EIO yet). */ - engine->submit_request = nop_submit_request; - - /* Mark all executing requests as skipped */ - spin_lock_irqsave(&engine->timeline->lock, flags); - list_for_each_entry(request, &engine->timeline->requests, link) - if (!i915_gem_request_completed(request)) - dma_fence_set_error(&request->fence, -EIO); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + for_each_engine(engine, i915, id) + engine->submit_request = nop_submit_request; /* - * Clear the execlists queue up before freeing the requests, as those - * are the ones that keep the context and ringbuffer backing objects - * pinned in place. + * Make sure no one is running the old callback before we proceed with + * cancelling requests and resetting the completion tracking. Otherwise + * we might submit a request to the hardware which never completes. */ + synchronize_rcu(); - if (i915.enable_execlists) { - struct execlist_port *port = engine->execlist_port; - unsigned long flags; - unsigned int n; - - spin_lock_irqsave(&engine->timeline->lock, flags); - - for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) - i915_gem_request_put(port_request(&port[n])); - memset(engine->execlist_port, 0, sizeof(engine->execlist_port)); - engine->execlist_queue = RB_ROOT; - engine->execlist_first = NULL; - - spin_unlock_irqrestore(&engine->timeline->lock, flags); + for_each_engine(engine, i915, id) { + /* Mark all executing requests as skipped */ + engine->cancel_requests(engine); - /* The port is checked prior to scheduling a tasklet, but - * just in case we have suspended the tasklet to do the - * wedging make sure that when it wakes, it decides there - * is no work to do by clearing the irq_posted bit. + /* + * Only once we've force-cancelled all in-flight requests can we + * start to complete all requests. */ - clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + engine->submit_request = nop_complete_submit_request; } - /* Mark all pending requests as complete so that any concurrent - * (lockless) lookup doesn't try and wait upon the request as we - * reset it. + /* + * Make sure no request can slip through without getting completed by + * either this call here to intel_engine_init_global_seqno, or the one + * in nop_complete_submit_request. */ - intel_engine_init_global_seqno(engine, - intel_engine_last_submit(engine)); -} + synchronize_rcu(); -static int __i915_gem_set_wedged_BKL(void *data) -{ - struct drm_i915_private *i915 = data; - struct intel_engine_cs *engine; - enum intel_engine_id id; + for_each_engine(engine, i915, id) { + unsigned long flags; - for_each_engine(engine, i915, id) - engine_set_wedged(engine); + /* Mark all pending requests as complete so that any concurrent + * (lockless) lookup doesn't try and wait upon the request as we + * reset it. + */ + spin_lock_irqsave(&engine->timeline->lock, flags); + intel_engine_init_global_seqno(engine, + intel_engine_last_submit(engine)); + spin_unlock_irqrestore(&engine->timeline->lock, flags); + } set_bit(I915_WEDGED, &i915->gpu_error.flags); wake_up_all(&i915->gpu_error.reset_queue); - - return 0; -} - -void i915_gem_set_wedged(struct drm_i915_private *dev_priv) -{ - stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL); } bool i915_gem_unset_wedged(struct drm_i915_private *i915) @@ -3179,16 +3323,22 @@ i915_gem_retire_work_handler(struct work_struct *work) mutex_unlock(&dev->struct_mutex); } - /* Keep the retire handler running until we are finally idle. + /* + * Keep the retire handler running until we are finally idle. * We do not need to do this test under locking as in the worst-case * we queue the retire worker once too often. */ - if (READ_ONCE(dev_priv->gt.awake)) { - i915_queue_hangcheck(dev_priv); + if (READ_ONCE(dev_priv->gt.awake)) queue_delayed_work(dev_priv->wq, &dev_priv->gt.retire_work, round_jiffies_up_relative(HZ)); - } +} + +static inline bool +new_requests_since_last_retire(const struct drm_i915_private *i915) +{ + return (READ_ONCE(i915->gt.active_requests) || + work_pending(&i915->gt.idle_work.work)); } static void @@ -3196,8 +3346,8 @@ i915_gem_idle_work_handler(struct work_struct *work) { struct drm_i915_private *dev_priv = container_of(work, typeof(*dev_priv), gt.idle_work.work); - struct drm_device *dev = &dev_priv->drm; bool rearm_hangcheck; + ktime_t end; if (!READ_ONCE(dev_priv->gt.awake)) return; @@ -3206,14 +3356,21 @@ i915_gem_idle_work_handler(struct work_struct *work) * Wait for last execlists context complete, but bail out in case a * new request is submitted. */ - wait_for(intel_engines_are_idle(dev_priv), 10); - if (READ_ONCE(dev_priv->gt.active_requests)) - return; + end = ktime_add_ms(ktime_get(), I915_IDLE_ENGINES_TIMEOUT); + do { + if (new_requests_since_last_retire(dev_priv)) + return; + + if (intel_engines_are_idle(dev_priv)) + break; + + usleep_range(100, 500); + } while (ktime_before(ktime_get(), end)); rearm_hangcheck = cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); - if (!mutex_trylock(&dev->struct_mutex)) { + if (!mutex_trylock(&dev_priv->drm.struct_mutex)) { /* Currently busy, come back later */ mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, @@ -3225,17 +3382,26 @@ i915_gem_idle_work_handler(struct work_struct *work) * New request retired after this work handler started, extend active * period until next instance of the work. */ - if (work_pending(work)) + if (new_requests_since_last_retire(dev_priv)) goto out_unlock; - if (dev_priv->gt.active_requests) - goto out_unlock; + /* + * Be paranoid and flush a concurrent interrupt to make sure + * we don't reactivate any irq tasklets after parking. + * + * FIXME: Note that even though we have waited for execlists to be idle, + * there may still be an in-flight interrupt even though the CSB + * is now empty. synchronize_irq() makes sure that a residual interrupt + * is completed before we continue, but it doesn't prevent the HW from + * raising a spurious interrupt later. To complete the shield we should + * coordinate disabling the CS irq with flushing the interrupts. + */ + synchronize_irq(dev_priv->drm.irq); - if (wait_for(intel_engines_are_idle(dev_priv), 10)) - DRM_ERROR("Timeout waiting for engines to idle\n"); + intel_engines_park(dev_priv); + i915_gem_timelines_park(dev_priv); - intel_engines_mark_idle(dev_priv); - i915_gem_timelines_mark_idle(dev_priv); + i915_pmu_gt_parked(dev_priv); GEM_BUG_ON(!dev_priv->gt.awake); dev_priv->gt.awake = false; @@ -3243,9 +3409,12 @@ i915_gem_idle_work_handler(struct work_struct *work) if (INTEL_GEN(dev_priv) >= 6) gen6_rps_idle(dev_priv); + + intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ); + intel_runtime_pm_put(dev_priv); out_unlock: - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&dev_priv->drm.struct_mutex); out_rearm: if (rearm_hangcheck) { @@ -3267,11 +3436,11 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) struct i915_gem_context *ctx = lut->ctx; struct i915_vma *vma; + GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF)); if (ctx->file_priv != fpriv) continue; vma = radix_tree_delete(&ctx->handles_vma, lut->handle); - GEM_BUG_ON(vma->obj != obj); /* We allow the process to have multiple handles to the same @@ -3385,24 +3554,23 @@ static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags) return 0; } -static int wait_for_engine(struct intel_engine_cs *engine, int timeout_ms) -{ - return wait_for(intel_engine_is_idle(engine), timeout_ms); -} - static int wait_for_engines(struct drm_i915_private *i915) { - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, i915, id) { - if (GEM_WARN_ON(wait_for_engine(engine, 50))) { - i915_gem_set_wedged(i915); - return -EIO; + if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { + dev_err(i915->drm.dev, + "Failed to idle engines, declaring wedged!\n"); + if (drm_debug & DRM_UT_DRIVER) { + struct drm_printer p = drm_debug_printer(__func__); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) + intel_engine_dump(engine, &p, + "%s", engine->name); } - GEM_BUG_ON(intel_engine_get_seqno(engine) != - intel_engine_last_submit(engine)); + i915_gem_set_wedged(i915); + return -EIO; } return 0; @@ -3426,9 +3594,7 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) if (ret) return ret; } - i915_gem_retire_requests(i915); - GEM_BUG_ON(i915->gt.active_requests); ret = wait_for_engines(i915); } else { @@ -3452,7 +3618,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) { - if (!READ_ONCE(obj->pin_display)) + if (!READ_ONCE(obj->pin_global)) return; mutex_lock(&obj->base.dev->struct_mutex); @@ -3627,7 +3793,8 @@ restart: return -EBUSY; } - if (i915_gem_valid_gtt_space(vma, cache_level)) + if (!i915_vma_is_closed(vma) && + i915_gem_valid_gtt_space(vma, cache_level)) continue; ret = i915_vma_unbind(vma); @@ -3680,7 +3847,7 @@ restart: * dropped the fence as all snoopable access is * supposed to be linear. */ - list_for_each_entry(vma, &obj->vma_list, obj_link) { + for_each_ggtt_vma(vma, obj) { ret = i915_vma_put_fence(vma); if (ret) return ret; @@ -3782,6 +3949,15 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, if (!obj) return -ENOENT; + /* + * The caching mode of proxy object is handled by its generator, and + * not allowed to be changed by userspace. + */ + if (i915_gem_object_is_proxy(obj)) { + ret = -ENXIO; + goto out; + } + if (obj->cache_level == level) goto out; @@ -3819,10 +3995,10 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, lockdep_assert_held(&obj->base.dev->struct_mutex); - /* Mark the pin_display early so that we account for the + /* Mark the global pin early so that we account for the * display coherency whilst setting up the cache domains. */ - obj->pin_display++; + obj->pin_global++; /* The display engine is not coherent with the LLC cache on gen6. As * a result, we make sure that the pinning that is about to occur is @@ -3838,7 +4014,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, I915_CACHE_WT : I915_CACHE_NONE); if (ret) { vma = ERR_PTR(ret); - goto err_unpin_display; + goto err_unpin_global; } /* As the user may map the buffer once pinned in the display plane @@ -3869,7 +4045,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); } if (IS_ERR(vma)) - goto err_unpin_display; + goto err_unpin_global; vma->display_alignment = max_t(u64, vma->display_alignment, alignment); @@ -3884,8 +4060,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, return vma; -err_unpin_display: - obj->pin_display--; +err_unpin_global: + obj->pin_global--; return vma; } @@ -3894,10 +4070,10 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) { lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - if (WARN_ON(vma->obj->pin_display == 0)) + if (WARN_ON(vma->obj->pin_global == 0)) return; - if (--vma->obj->pin_display == 0) + if (--vma->obj->pin_global == 0) vma->display_alignment = I915_GTT_MIN_ALIGNMENT; /* Bump the LRU to try and avoid premature eviction whilst flipping */ @@ -4016,42 +4192,47 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, lockdep_assert_held(&obj->base.dev->struct_mutex); + if (!view && flags & PIN_MAPPABLE) { + /* If the required space is larger than the available + * aperture, we will not able to find a slot for the + * object and unbinding the object now will be in + * vain. Worse, doing so may cause us to ping-pong + * the object in and out of the Global GTT and + * waste a lot of cycles under the mutex. + */ + if (obj->base.size > dev_priv->ggtt.mappable_end) + return ERR_PTR(-E2BIG); + + /* If NONBLOCK is set the caller is optimistically + * trying to cache the full object within the mappable + * aperture, and *must* have a fallback in place for + * situations where we cannot bind the object. We + * can be a little more lax here and use the fallback + * more often to avoid costly migrations of ourselves + * and other objects within the aperture. + * + * Half-the-aperture is used as a simple heuristic. + * More interesting would to do search for a free + * block prior to making the commitment to unbind. + * That caters for the self-harm case, and with a + * little more heuristics (e.g. NOFAULT, NOEVICT) + * we could try to minimise harm to others. + */ + if (flags & PIN_NONBLOCK && + obj->base.size > dev_priv->ggtt.mappable_end / 2) + return ERR_PTR(-ENOSPC); + } + vma = i915_vma_instance(obj, vm, view); if (unlikely(IS_ERR(vma))) return vma; if (i915_vma_misplaced(vma, size, alignment, flags)) { - if (flags & PIN_NONBLOCK && - (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))) - return ERR_PTR(-ENOSPC); + if (flags & PIN_NONBLOCK) { + if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) + return ERR_PTR(-ENOSPC); - if (flags & PIN_MAPPABLE) { - /* If the required space is larger than the available - * aperture, we will not able to find a slot for the - * object and unbinding the object now will be in - * vain. Worse, doing so may cause us to ping-pong - * the object in and out of the Global GTT and - * waste a lot of cycles under the mutex. - */ - if (vma->fence_size > dev_priv->ggtt.mappable_end) - return ERR_PTR(-E2BIG); - - /* If NONBLOCK is set the caller is optimistically - * trying to cache the full object within the mappable - * aperture, and *must* have a fallback in place for - * situations where we cannot bind the object. We - * can be a little more lax here and use the fallback - * more often to avoid costly migrations of ourselves - * and other objects within the aperture. - * - * Half-the-aperture is used as a simple heuristic. - * More interesting would to do search for a free - * block prior to making the commitment to unbind. - * That caters for the self-harm case, and with a - * little more heuristics (e.g. NOFAULT, NOEVICT) - * we could try to minimise harm to others. - */ - if (flags & PIN_NONBLOCK && + if (flags & PIN_MAPPABLE && vma->fence_size > dev_priv->ggtt.mappable_end / 2) return ERR_PTR(-ENOSPC); } @@ -4232,7 +4413,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, if (err) goto out; - if (obj->mm.pages && + if (i915_gem_object_has_pages(obj) && i915_gem_object_is_tiled(obj) && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { if (obj->mm.madv == I915_MADV_WILLNEED) { @@ -4251,7 +4432,8 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, obj->mm.madv = args->madv; /* if the object is no longer attached, discard its backing storage */ - if (obj->mm.madv == I915_MADV_DONTNEED && !obj->mm.pages) + if (obj->mm.madv == I915_MADV_DONTNEED && + !i915_gem_object_has_pages(obj)) i915_gem_object_truncate(obj); args->retained = obj->mm.madv != __I915_MADV_PURGED; @@ -4277,8 +4459,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, { mutex_init(&obj->mm.lock); - INIT_LIST_HEAD(&obj->global_link); - INIT_LIST_HEAD(&obj->userfault_link); INIT_LIST_HEAD(&obj->vma_list); INIT_LIST_HEAD(&obj->lut_list); INIT_LIST_HEAD(&obj->batch_pool_link); @@ -4308,6 +4488,30 @@ static const struct drm_i915_gem_object_ops i915_gem_object_ops = { .pwrite = i915_gem_object_pwrite_gtt, }; +static int i915_gem_object_create_shmem(struct drm_device *dev, + struct drm_gem_object *obj, + size_t size) +{ + struct drm_i915_private *i915 = to_i915(dev); + unsigned long flags = VM_NORESERVE; + struct file *filp; + + drm_gem_private_object_init(dev, obj, size); + + if (i915->mm.gemfs) + filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size, + flags); + else + filp = shmem_file_setup("i915", size, flags); + + if (IS_ERR(filp)) + return PTR_ERR(filp); + + obj->filp = filp; + + return 0; +} + struct drm_i915_gem_object * i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) { @@ -4332,7 +4536,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) if (obj == NULL) return ERR_PTR(-ENOMEM); - ret = drm_gem_object_init(&dev_priv->drm, &obj->base, size); + ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size); if (ret) goto fail; @@ -4409,13 +4613,14 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, { struct drm_i915_gem_object *obj, *on; - mutex_lock(&i915->drm.struct_mutex); intel_runtime_pm_get(i915); - llist_for_each_entry(obj, freed, freed) { + llist_for_each_entry_safe(obj, on, freed, freed) { struct i915_vma *vma, *vn; trace_i915_gem_object_destroy(obj); + mutex_lock(&i915->drm.struct_mutex); + GEM_BUG_ON(i915_gem_object_is_active(obj)); list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link) { @@ -4426,16 +4631,24 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, GEM_BUG_ON(!list_empty(&obj->vma_list)); GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); - list_del(&obj->global_link); - } - intel_runtime_pm_put(i915); - mutex_unlock(&i915->drm.struct_mutex); + /* This serializes freeing with the shrinker. Since the free + * is delayed, first by RCU then by the workqueue, we want the + * shrinker to be able to free pages of unreferenced objects, + * or else we may oom whilst there are plenty of deferred + * freed objects. + */ + if (i915_gem_object_has_pages(obj)) { + spin_lock(&i915->mm.obj_lock); + list_del_init(&obj->mm.link); + spin_unlock(&i915->mm.obj_lock); + } - cond_resched(); + mutex_unlock(&i915->drm.struct_mutex); - llist_for_each_entry_safe(obj, on, freed, freed) { GEM_BUG_ON(obj->bind_count); + GEM_BUG_ON(obj->userfault_count); GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); + GEM_BUG_ON(!list_empty(&obj->lut_list)); if (obj->ops->release) obj->ops->release(obj); @@ -4443,7 +4656,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, if (WARN_ON(i915_gem_object_has_pinned_pages(obj))) atomic_set(&obj->mm.pages_pin_count, 0); __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - GEM_BUG_ON(obj->mm.pages); + GEM_BUG_ON(i915_gem_object_has_pages(obj)); if (obj->base.import_attach) drm_prime_gem_destroy(&obj->base, NULL); @@ -4454,16 +4667,29 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, kfree(obj->bit_17); i915_gem_object_free(obj); + + if (on) + cond_resched(); } + intel_runtime_pm_put(i915); } static void i915_gem_flush_free_objects(struct drm_i915_private *i915) { struct llist_node *freed; - freed = llist_del_all(&i915->mm.free_list); - if (unlikely(freed)) + /* Free the oldest, most stale object to keep the free_list short */ + freed = NULL; + if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */ + /* Only one consumer of llist_del_first() allowed */ + spin_lock(&i915->mm.free_lock); + freed = llist_del_first(&i915->mm.free_list); + spin_unlock(&i915->mm.free_lock); + } + if (unlikely(freed)) { + freed->next = NULL; __i915_gem_free_objects(i915, freed); + } } static void __i915_gem_free_work(struct work_struct *work) @@ -4480,11 +4706,17 @@ static void __i915_gem_free_work(struct work_struct *work) * unbound now. */ + spin_lock(&i915->mm.free_lock); while ((freed = llist_del_all(&i915->mm.free_list))) { + spin_unlock(&i915->mm.free_lock); + __i915_gem_free_objects(i915, freed); if (need_resched()) - break; + return; + + spin_lock(&i915->mm.free_lock); } + spin_unlock(&i915->mm.free_lock); } static void __i915_gem_free_object_rcu(struct rcu_head *head) @@ -4531,18 +4763,26 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) i915_gem_object_put(obj); } -static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv) +static void assert_kernel_context_is_current(struct drm_i915_private *i915) { + struct i915_gem_context *kernel_context = i915->kernel_context; struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, dev_priv, id) - GEM_BUG_ON(engine->last_retired_context && - !i915_gem_context_is_kernel(engine->last_retired_context)); + for_each_engine(engine, i915, id) { + GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline->last_request)); + GEM_BUG_ON(engine->last_retired_context != kernel_context); + } } void i915_gem_sanitize(struct drm_i915_private *i915) { + if (i915_terminally_wedged(&i915->gpu_error)) { + mutex_lock(&i915->drm.struct_mutex); + i915_gem_unset_wedged(i915); + mutex_unlock(&i915->drm.struct_mutex); + } + /* * If we inherit context state from the BIOS or earlier occupants * of the GPU, the GPU may be in an inconsistent state when we @@ -4575,17 +4815,19 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) * state. Fortunately, the kernel_context is disposable and we do * not rely on its state. */ - ret = i915_gem_switch_to_kernel_context(dev_priv); - if (ret) - goto err_unlock; + if (!i915_terminally_wedged(&dev_priv->gpu_error)) { + ret = i915_gem_switch_to_kernel_context(dev_priv); + if (ret) + goto err_unlock; - ret = i915_gem_wait_for_idle(dev_priv, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED); - if (ret) - goto err_unlock; + ret = i915_gem_wait_for_idle(dev_priv, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED); + if (ret && ret != -EIO) + goto err_unlock; - assert_kernel_context_is_current(dev_priv); + assert_kernel_context_is_current(dev_priv); + } i915_gem_contexts_lost(dev_priv); mutex_unlock(&dev->struct_mutex); @@ -4597,14 +4839,14 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) /* As the idle_work is rearming if it detects a race, play safe and * repeat the flush until it is definitely idle. */ - while (flush_delayed_work(&dev_priv->gt.idle_work)) - ; + drain_delayed_work(&dev_priv->gt.idle_work); /* Assert that we sucessfully flushed all the work and * reset the GPU back to its idle, low power state. */ WARN_ON(dev_priv->gt.awake); - WARN_ON(!intel_engines_are_idle(dev_priv)); + if (WARN_ON(!intel_engines_are_idle(dev_priv))) + i915_gem_set_wedged(dev_priv); /* no hope, discard everything */ /* * Neither the BIOS, ourselves or any other kernel @@ -4626,31 +4868,53 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) * machine in an unusable condition. */ i915_gem_sanitize(dev_priv); - goto out_rpm_put; + + intel_runtime_pm_put(dev_priv); + return 0; err_unlock: mutex_unlock(&dev->struct_mutex); -out_rpm_put: intel_runtime_pm_put(dev_priv); return ret; } -void i915_gem_resume(struct drm_i915_private *dev_priv) +void i915_gem_resume(struct drm_i915_private *i915) { - struct drm_device *dev = &dev_priv->drm; + WARN_ON(i915->gt.awake); - WARN_ON(dev_priv->gt.awake); + mutex_lock(&i915->drm.struct_mutex); + intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); - mutex_lock(&dev->struct_mutex); - i915_gem_restore_gtt_mappings(dev_priv); + i915_gem_restore_gtt_mappings(i915); + i915_gem_restore_fences(i915); - /* As we didn't flush the kernel context before suspend, we cannot + /* + * As we didn't flush the kernel context before suspend, we cannot * guarantee that the context image is complete. So let's just reset * it and start again. */ - dev_priv->gt.resume(dev_priv); + i915->gt.resume(i915); - mutex_unlock(&dev->struct_mutex); + if (i915_gem_init_hw(i915)) + goto err_wedged; + + intel_guc_resume(i915); + + /* Always reload a context for powersaving. */ + if (i915_gem_switch_to_kernel_context(i915)) + goto err_wedged; + +out_unlock: + intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); + mutex_unlock(&i915->drm.struct_mutex); + return; + +err_wedged: + if (!i915_terminally_wedged(&i915->gpu_error)) { + DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n"); + i915_gem_set_wedged(i915); + } + goto out_unlock; } void i915_gem_init_swizzling(struct drm_i915_private *dev_priv) @@ -4756,6 +5020,10 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) init_unused_rings(dev_priv); BUG_ON(!dev_priv->kernel_context); + if (i915_terminally_wedged(&dev_priv->gpu_error)) { + ret = -EIO; + goto out; + } ret = i915_ppgtt_init_hw(dev_priv); if (ret) { @@ -4763,97 +5031,276 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) goto out; } - /* Need to do basic initialisation of all rings first: */ - ret = __i915_gem_restart_engines(dev_priv); - if (ret) - goto out; - - intel_mocs_init_l3cc_table(dev_priv); - /* We can't enable contexts until all firmware is loaded */ ret = intel_uc_init_hw(dev_priv); if (ret) goto out; + intel_mocs_init_l3cc_table(dev_priv); + + /* Only when the HW is re-initialised, can we replay the requests */ + ret = __i915_gem_restart_engines(dev_priv); out: intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); return ret; } -bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value) +static int __intel_engines_record_defaults(struct drm_i915_private *i915) { - if (INTEL_INFO(dev_priv)->gen < 6) - return false; + struct i915_gem_context *ctx; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err; - /* TODO: make semaphores and Execlists play nicely together */ - if (i915.enable_execlists) - return false; + /* + * As we reset the gpu during very early sanitisation, the current + * register state on the GPU should reflect its defaults values. + * We load a context onto the hw (with restore-inhibit), then switch + * over to a second context to save that default register state. We + * can then prime every new context with that state so they all start + * from the same default HW values. + */ - if (value >= 0) - return value; + ctx = i915_gem_context_create_kernel(i915, 0); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); - /* Enable semaphores on SNB when IO remapping is off */ - if (IS_GEN6(dev_priv) && intel_vtd_active()) - return false; + for_each_engine(engine, i915, id) { + struct drm_i915_gem_request *rq; - return true; + rq = i915_gem_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_ctx; + } + + err = 0; + if (engine->init_context) + err = engine->init_context(rq); + + __i915_add_request(rq, true); + if (err) + goto err_active; + } + + err = i915_gem_switch_to_kernel_context(i915); + if (err) + goto err_active; + + err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); + if (err) + goto err_active; + + assert_kernel_context_is_current(i915); + + for_each_engine(engine, i915, id) { + struct i915_vma *state; + + state = ctx->engine[id].state; + if (!state) + continue; + + /* + * As we will hold a reference to the logical state, it will + * not be torn down with the context, and importantly the + * object will hold onto its vma (making it possible for a + * stray GTT write to corrupt our defaults). Unmap the vma + * from the GTT to prevent such accidents and reclaim the + * space. + */ + err = i915_vma_unbind(state); + if (err) + goto err_active; + + err = i915_gem_object_set_to_cpu_domain(state->obj, false); + if (err) + goto err_active; + + engine->default_state = i915_gem_object_get(state->obj); + } + + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { + unsigned int found = intel_engines_has_context_isolation(i915); + + /* + * Make sure that classes with multiple engine instances all + * share the same basic configuration. + */ + for_each_engine(engine, i915, id) { + unsigned int bit = BIT(engine->uabi_class); + unsigned int expected = engine->default_state ? bit : 0; + + if ((found & bit) != expected) { + DRM_ERROR("mismatching default context state for class %d on engine %s\n", + engine->uabi_class, engine->name); + } + } + } + +out_ctx: + i915_gem_context_set_closed(ctx); + i915_gem_context_put(ctx); + return err; + +err_active: + /* + * If we have to abandon now, we expect the engines to be idle + * and ready to be torn-down. First try to flush any remaining + * request, ensure we are pointing at the kernel context and + * then remove it. + */ + if (WARN_ON(i915_gem_switch_to_kernel_context(i915))) + goto out_ctx; + + if (WARN_ON(i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED))) + goto out_ctx; + + i915_gem_contexts_lost(i915); + goto out_ctx; } int i915_gem_init(struct drm_i915_private *dev_priv) { int ret; - mutex_lock(&dev_priv->drm.struct_mutex); + /* + * We need to fallback to 4K pages since gvt gtt handling doesn't + * support huge page entries - we will need to check either hypervisor + * mm can support huge guest page or just do emulation in gvt. + */ + if (intel_vgpu_active(dev_priv)) + mkwrite_device_info(dev_priv)->page_sizes = + I915_GTT_PAGE_SIZE_4K; dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); - if (!i915.enable_execlists) { - dev_priv->gt.resume = intel_legacy_submission_resume; - dev_priv->gt.cleanup_engine = intel_engine_cleanup; - } else { + if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { dev_priv->gt.resume = intel_lr_context_resume; dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; + } else { + dev_priv->gt.resume = intel_legacy_submission_resume; + dev_priv->gt.cleanup_engine = intel_engine_cleanup; } + ret = i915_gem_init_userptr(dev_priv); + if (ret) + return ret; + + ret = intel_uc_init_wq(dev_priv); + if (ret) + return ret; + /* This is just a security blanket to placate dragons. * On some systems, we very sporadically observe that the first TLBs * used by the CS may be stale, despite us poking the TLB reset. If * we hold the forcewake during initialisation these problems * just magically go away. */ + mutex_lock(&dev_priv->drm.struct_mutex); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - ret = i915_gem_init_userptr(dev_priv); - if (ret) - goto out_unlock; - ret = i915_gem_init_ggtt(dev_priv); - if (ret) - goto out_unlock; + if (ret) { + GEM_BUG_ON(ret == -EIO); + goto err_unlock; + } ret = i915_gem_contexts_init(dev_priv); - if (ret) - goto out_unlock; + if (ret) { + GEM_BUG_ON(ret == -EIO); + goto err_ggtt; + } ret = intel_engines_init(dev_priv); + if (ret) { + GEM_BUG_ON(ret == -EIO); + goto err_context; + } + + intel_init_gt_powersave(dev_priv); + + ret = intel_uc_init(dev_priv); if (ret) - goto out_unlock; + goto err_pm; ret = i915_gem_init_hw(dev_priv); + if (ret) + goto err_uc_init; + + /* + * Despite its name intel_init_clock_gating applies both display + * clock gating workarounds; GT mmio workarounds and the occasional + * GT power context workaround. Worse, sometimes it includes a context + * register workaround which we need to apply before we record the + * default HW state for all contexts. + * + * FIXME: break up the workarounds and apply them at the right time! + */ + intel_init_clock_gating(dev_priv); + + ret = __intel_engines_record_defaults(dev_priv); + if (ret) + goto err_init_hw; + + if (i915_inject_load_failure()) { + ret = -ENODEV; + goto err_init_hw; + } + + if (i915_inject_load_failure()) { + ret = -EIO; + goto err_init_hw; + } + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + mutex_unlock(&dev_priv->drm.struct_mutex); + + return 0; + + /* + * Unwinding is complicated by that we want to handle -EIO to mean + * disable GPU submission but keep KMS alive. We want to mark the + * HW as irrevisibly wedged, but keep enough state around that the + * driver doesn't explode during runtime. + */ +err_init_hw: + i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED); + i915_gem_contexts_lost(dev_priv); + intel_uc_fini_hw(dev_priv); +err_uc_init: + intel_uc_fini(dev_priv); +err_pm: + if (ret != -EIO) { + intel_cleanup_gt_powersave(dev_priv); + i915_gem_cleanup_engines(dev_priv); + } +err_context: + if (ret != -EIO) + i915_gem_contexts_fini(dev_priv); +err_ggtt: +err_unlock: + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + mutex_unlock(&dev_priv->drm.struct_mutex); + + intel_uc_fini_wq(dev_priv); + + if (ret != -EIO) + i915_gem_cleanup_userptr(dev_priv); + if (ret == -EIO) { - /* Allow engine initialisation to fail by marking the GPU as + /* + * Allow engine initialisation to fail by marking the GPU as * wedged. But we only want to do this where the GPU is angry, * for all other failure, such as an allocation failure, bail. */ - DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); - i915_gem_set_wedged(dev_priv); + if (!i915_terminally_wedged(&dev_priv->gpu_error)) { + DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); + i915_gem_set_wedged(dev_priv); + } ret = 0; } -out_unlock: - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - mutex_unlock(&dev_priv->drm.struct_mutex); - + i915_gem_drain_freed_objects(dev_priv); return ret; } @@ -4904,6 +5351,22 @@ i915_gem_load_init_fences(struct drm_i915_private *dev_priv) i915_gem_detect_bit_6_swizzle(dev_priv); } +static void i915_gem_init__mm(struct drm_i915_private *i915) +{ + spin_lock_init(&i915->mm.object_stat_lock); + spin_lock_init(&i915->mm.obj_lock); + spin_lock_init(&i915->mm.free_lock); + + init_llist_head(&i915->mm.free_list); + + INIT_LIST_HEAD(&i915->mm.unbound_list); + INIT_LIST_HEAD(&i915->mm.bound_list); + INIT_LIST_HEAD(&i915->mm.fence_list); + INIT_LIST_HEAD(&i915->mm.userfault_list); + + INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); +} + int i915_gem_load_init(struct drm_i915_private *dev_priv) { @@ -4945,12 +5408,8 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) if (err) goto err_priorities; - INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work); - init_llist_head(&dev_priv->mm.free_list); - INIT_LIST_HEAD(&dev_priv->mm.unbound_list); - INIT_LIST_HEAD(&dev_priv->mm.bound_list); - INIT_LIST_HEAD(&dev_priv->mm.fence_list); - INIT_LIST_HEAD(&dev_priv->mm.userfault_list); + i915_gem_init__mm(dev_priv); + INIT_DELAYED_WORK(&dev_priv->gt.retire_work, i915_gem_retire_work_handler); INIT_DELAYED_WORK(&dev_priv->gt.idle_work, @@ -4962,6 +5421,10 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) spin_lock_init(&dev_priv->fb_tracking.lock); + err = i915_gemfs_init(dev_priv); + if (err) + DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); + return 0; err_priorities: @@ -5000,6 +5463,8 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ rcu_barrier(); + + i915_gemfs_fini(dev_priv); } int i915_gem_freeze(struct drm_i915_private *dev_priv) @@ -5038,12 +5503,12 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv) i915_gem_shrink(dev_priv, -1UL, NULL, I915_SHRINK_UNBOUND); i915_gem_drain_freed_objects(dev_priv); - mutex_lock(&dev_priv->drm.struct_mutex); + spin_lock(&dev_priv->mm.obj_lock); for (p = phases; *p; p++) { - list_for_each_entry(obj, *p, global_link) + list_for_each_entry(obj, *p, mm.link) __start_cpu_write(obj); } - mutex_unlock(&dev_priv->drm.struct_mutex); + spin_unlock(&dev_priv->mm.obj_lock); return 0; } @@ -5362,7 +5827,17 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) goto err_unlock; } - pages = obj->mm.pages; + pages = fetch_and_zero(&obj->mm.pages); + if (pages) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + + __i915_gem_object_reset_page_iter(obj); + + spin_lock(&i915->mm.obj_lock); + list_del(&obj->mm.link); + spin_unlock(&i915->mm.obj_lock); + } + obj->ops = &i915_gem_phys_ops; err = ____i915_gem_object_get_pages(obj); @@ -5389,6 +5864,7 @@ err_unlock: #include "selftests/scatterlist.c" #include "selftests/mock_gem_device.c" #include "selftests/huge_gem_object.c" +#include "selftests/huge_pages.c" #include "selftests/i915_gem_object.c" #include "selftests/i915_gem_coherency.c" #endif diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index ee54597465b6..e920dab7f1b8 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -28,7 +28,11 @@ #include <linux/bug.h> #ifdef CONFIG_DRM_I915_DEBUG_GEM -#define GEM_BUG_ON(expr) BUG_ON(expr) +#define GEM_BUG_ON(condition) do { if (unlikely((condition))) { \ + printk(KERN_ERR "GEM_BUG_ON(%s)\n", __stringify(condition)); \ + BUG(); \ + } \ + } while(0) #define GEM_WARN_ON(expr) WARN_ON(expr) #define GEM_DEBUG_DECL(var) var @@ -44,6 +48,12 @@ #define GEM_DEBUG_BUG_ON(expr) #endif +#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GEM) +#define GEM_TRACE(...) trace_printk(__VA_ARGS__) +#else +#define GEM_TRACE(...) do { } while (0) +#endif + #define I915_NUM_ENGINES 5 #endif /* __I915_GEM_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c index 8a04d33055be..b9b53ac14176 100644 --- a/drivers/gpu/drm/i915/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/i915_gem_clflush.c @@ -70,6 +70,7 @@ static const struct dma_fence_ops i915_clflush_ops = { static void __i915_do_clflush(struct drm_i915_gem_object *obj) { + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); drm_clflush_sg(obj->mm.pages); intel_fb_obj_flush(obj, ORIGIN_CPU); } @@ -166,7 +167,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, i915_sw_fence_await_reservation(&clflush->wait, obj->resv, NULL, true, I915_FENCE_TIMEOUT, - GFP_KERNEL); + I915_FENCE_GFP); reservation_object_lock(obj->resv, NULL); reservation_object_add_excl_fence(obj->resv, &clflush->dma); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 8afd2ce59b8d..0c963fcf31ff 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -107,14 +107,9 @@ static void lut_close(struct i915_gem_context *ctx) rcu_read_lock(); radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) { struct i915_vma *vma = rcu_dereference_raw(*slot); - struct drm_i915_gem_object *obj = vma->obj; radix_tree_iter_delete(&ctx->handles_vma, &iter, slot); - - if (!i915_vma_is_ggtt(vma)) - i915_vma_close(vma); - - __i915_gem_object_release_unless_active(obj); + __i915_gem_object_release_unless_active(vma->obj); } rcu_read_unlock(); } @@ -200,6 +195,11 @@ static void context_close(struct i915_gem_context *ctx) { i915_gem_context_set_closed(ctx); + /* + * The LUT uses the VMA as a backpointer to unref the object, + * so we need to clear the LUT before we close all the VMA (inside + * the ppgtt). + */ lut_close(ctx); if (ctx->ppgtt) i915_ppgtt_close(&ctx->ppgtt->base); @@ -316,7 +316,7 @@ __create_hw_context(struct drm_i915_private *dev_priv, * present or not in use we still need a small bias as ring wraparound * at offset 0 sometimes hangs. No idea why. */ - if (HAS_GUC(dev_priv) && i915.enable_guc_loading) + if (USES_GUC(dev_priv)) ctx->ggtt_offset_bias = GUC_WOPCM_TOP; else ctx->ggtt_offset_bias = I915_GTT_PAGE_SIZE; @@ -409,7 +409,7 @@ i915_gem_context_create_gvt(struct drm_device *dev) i915_gem_context_set_closed(ctx); /* not user accessible */ i915_gem_context_clear_bannable(ctx); i915_gem_context_set_force_single_submission(ctx); - if (!i915.enable_guc_submission) + if (!USES_GUC_SUBMISSION(to_i915(dev))) ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */ GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); @@ -418,53 +418,84 @@ out: return ctx; } +struct i915_gem_context * +i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) +{ + struct i915_gem_context *ctx; + + ctx = i915_gem_create_context(i915, NULL); + if (IS_ERR(ctx)) + return ctx; + + i915_gem_context_clear_bannable(ctx); + ctx->priority = prio; + ctx->ring_size = PAGE_SIZE; + + GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); + + return ctx; +} + +static void +destroy_kernel_context(struct i915_gem_context **ctxp) +{ + struct i915_gem_context *ctx; + + /* Keep the context ref so that we can free it immediately ourselves */ + ctx = i915_gem_context_get(fetch_and_zero(ctxp)); + GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); + + context_close(ctx); + i915_gem_context_free(ctx); +} + int i915_gem_contexts_init(struct drm_i915_private *dev_priv) { struct i915_gem_context *ctx; + int err; - /* Init should only be called once per module load. Eventually the - * restriction on the context_disabled check can be loosened. */ - if (WARN_ON(dev_priv->kernel_context)) - return 0; + GEM_BUG_ON(dev_priv->kernel_context); INIT_LIST_HEAD(&dev_priv->contexts.list); INIT_WORK(&dev_priv->contexts.free_work, contexts_free_worker); init_llist_head(&dev_priv->contexts.free_list); - if (intel_vgpu_active(dev_priv) && - HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { - if (!i915.enable_execlists) { - DRM_INFO("Only EXECLIST mode is supported in vgpu.\n"); - return -EINVAL; - } - } - /* Using the simple ida interface, the max is limited by sizeof(int) */ BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); ida_init(&dev_priv->contexts.hw_ida); - ctx = i915_gem_create_context(dev_priv, NULL); + /* lowest priority; idle task */ + ctx = i915_gem_context_create_kernel(dev_priv, I915_PRIORITY_MIN); if (IS_ERR(ctx)) { - DRM_ERROR("Failed to create default global context (error %ld)\n", - PTR_ERR(ctx)); - return PTR_ERR(ctx); + DRM_ERROR("Failed to create default global context\n"); + err = PTR_ERR(ctx); + goto err; } - - /* For easy recognisablity, we want the kernel context to be 0 and then + /* + * For easy recognisablity, we want the kernel context to be 0 and then * all user contexts will have non-zero hw_id. */ GEM_BUG_ON(ctx->hw_id); - - i915_gem_context_clear_bannable(ctx); - ctx->priority = I915_PRIORITY_MIN; /* lowest priority; idle task */ dev_priv->kernel_context = ctx; - GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); + /* highest priority; preempting task */ + ctx = i915_gem_context_create_kernel(dev_priv, INT_MAX); + if (IS_ERR(ctx)) { + DRM_ERROR("Failed to create default preempt context\n"); + err = PTR_ERR(ctx); + goto err_kernel_context; + } + dev_priv->preempt_context = ctx; DRM_DEBUG_DRIVER("%s context support initialized\n", dev_priv->engine[RCS]->context_size ? "logical" : "fake"); return 0; + +err_kernel_context: + destroy_kernel_context(&dev_priv->kernel_context); +err: + return err; } void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) @@ -476,6 +507,7 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) { engine->legacy_active_context = NULL; + engine->legacy_active_ppgtt = NULL; if (!engine->last_retired_context) continue; @@ -483,41 +515,14 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) engine->context_unpin(engine, engine->last_retired_context); engine->last_retired_context = NULL; } - - /* Force the GPU state to be restored on enabling */ - if (!i915.enable_execlists) { - struct i915_gem_context *ctx; - - list_for_each_entry(ctx, &dev_priv->contexts.list, link) { - if (!i915_gem_context_is_default(ctx)) - continue; - - for_each_engine(engine, dev_priv, id) - ctx->engine[engine->id].initialised = false; - - ctx->remap_slice = ALL_L3_SLICES(dev_priv); - } - - for_each_engine(engine, dev_priv, id) { - struct intel_context *kce = - &dev_priv->kernel_context->engine[engine->id]; - - kce->initialised = true; - } - } } void i915_gem_contexts_fini(struct drm_i915_private *i915) { - struct i915_gem_context *ctx; - lockdep_assert_held(&i915->drm.struct_mutex); - /* Keep the context so that we can free it immediately ourselves */ - ctx = i915_gem_context_get(fetch_and_zero(&i915->kernel_context)); - GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); - context_close(ctx); - i915_gem_context_free(ctx); + destroy_kernel_context(&i915->preempt_context); + destroy_kernel_context(&i915->kernel_context); /* Must free all deferred contexts (via flush_workqueue) first */ ida_destroy(&i915->contexts.hw_ida); @@ -562,310 +567,7 @@ void i915_gem_context_close(struct drm_file *file) idr_destroy(&file_priv->context_idr); } -static inline int -mi_set_context(struct drm_i915_gem_request *req, u32 flags) -{ - struct drm_i915_private *dev_priv = req->i915; - struct intel_engine_cs *engine = req->engine; - enum intel_engine_id id; - const int num_rings = - /* Use an extended w/a on gen7 if signalling from other rings */ - (i915.semaphores && INTEL_GEN(dev_priv) == 7) ? - INTEL_INFO(dev_priv)->num_rings - 1 : - 0; - int len; - u32 *cs; - - flags |= MI_MM_SPACE_GTT; - if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8) - /* These flags are for resource streamer on HSW+ */ - flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN; - else - flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN; - - len = 4; - if (INTEL_GEN(dev_priv) >= 7) - len += 2 + (num_rings ? 4*num_rings + 6 : 0); - - cs = intel_ring_begin(req, len); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ - if (INTEL_GEN(dev_priv) >= 7) { - *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; - if (num_rings) { - struct intel_engine_cs *signaller; - - *cs++ = MI_LOAD_REGISTER_IMM(num_rings); - for_each_engine(signaller, dev_priv, id) { - if (signaller == engine) - continue; - - *cs++ = i915_mmio_reg_offset( - RING_PSMI_CTL(signaller->mmio_base)); - *cs++ = _MASKED_BIT_ENABLE( - GEN6_PSMI_SLEEP_MSG_DISABLE); - } - } - } - - *cs++ = MI_NOOP; - *cs++ = MI_SET_CONTEXT; - *cs++ = i915_ggtt_offset(req->ctx->engine[RCS].state) | flags; - /* - * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP - * WaMiSetContext_Hang:snb,ivb,vlv - */ - *cs++ = MI_NOOP; - - if (INTEL_GEN(dev_priv) >= 7) { - if (num_rings) { - struct intel_engine_cs *signaller; - i915_reg_t last_reg = {}; /* keep gcc quiet */ - - *cs++ = MI_LOAD_REGISTER_IMM(num_rings); - for_each_engine(signaller, dev_priv, id) { - if (signaller == engine) - continue; - - last_reg = RING_PSMI_CTL(signaller->mmio_base); - *cs++ = i915_mmio_reg_offset(last_reg); - *cs++ = _MASKED_BIT_DISABLE( - GEN6_PSMI_SLEEP_MSG_DISABLE); - } - - /* Insert a delay before the next switch! */ - *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; - *cs++ = i915_mmio_reg_offset(last_reg); - *cs++ = i915_ggtt_offset(engine->scratch); - *cs++ = MI_NOOP; - } - *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - } - - intel_ring_advance(req, cs); - - return 0; -} - -static int remap_l3(struct drm_i915_gem_request *req, int slice) -{ - u32 *cs, *remap_info = req->i915->l3_parity.remap_info[slice]; - int i; - - if (!remap_info) - return 0; - - cs = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* - * Note: We do not worry about the concurrent register cacheline hang - * here because no other code should access these registers other than - * at initialization time. - */ - *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4); - for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { - *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i)); - *cs++ = remap_info[i]; - } - *cs++ = MI_NOOP; - intel_ring_advance(req, cs); - - return 0; -} - -static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt, - struct intel_engine_cs *engine, - struct i915_gem_context *to) -{ - if (to->remap_slice) - return false; - - if (!to->engine[RCS].initialised) - return false; - - if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings)) - return false; - - return to == engine->legacy_active_context; -} - -static bool -needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt, struct intel_engine_cs *engine) -{ - struct i915_gem_context *from = engine->legacy_active_context; - - if (!ppgtt) - return false; - - /* Always load the ppgtt on first use */ - if (!from) - return true; - - /* Same context without new entries, skip */ - if ((!from->ppgtt || from->ppgtt == ppgtt) && - !(intel_engine_flag(engine) & ppgtt->pd_dirty_rings)) - return false; - - if (engine->id != RCS) - return true; - - if (INTEL_GEN(engine->i915) < 8) - return true; - - return false; -} - -static bool -needs_pd_load_post(struct i915_hw_ppgtt *ppgtt, - struct i915_gem_context *to, - u32 hw_flags) -{ - if (!ppgtt) - return false; - - if (!IS_GEN8(to->i915)) - return false; - - if (hw_flags & MI_RESTORE_INHIBIT) - return true; - - return false; -} - -static int do_rcs_switch(struct drm_i915_gem_request *req) -{ - struct i915_gem_context *to = req->ctx; - struct intel_engine_cs *engine = req->engine; - struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt; - struct i915_gem_context *from = engine->legacy_active_context; - u32 hw_flags; - int ret, i; - - GEM_BUG_ON(engine->id != RCS); - - if (skip_rcs_switch(ppgtt, engine, to)) - return 0; - - if (needs_pd_load_pre(ppgtt, engine)) { - /* Older GENs and non render rings still want the load first, - * "PP_DCLV followed by PP_DIR_BASE register through Load - * Register Immediate commands in Ring Buffer before submitting - * a context."*/ - trace_switch_mm(engine, to); - ret = ppgtt->switch_mm(ppgtt, req); - if (ret) - return ret; - } - - if (!to->engine[RCS].initialised || i915_gem_context_is_default(to)) - /* NB: If we inhibit the restore, the context is not allowed to - * die because future work may end up depending on valid address - * space. This means we must enforce that a page table load - * occur when this occurs. */ - hw_flags = MI_RESTORE_INHIBIT; - else if (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings) - hw_flags = MI_FORCE_RESTORE; - else - hw_flags = 0; - - if (to != from || (hw_flags & MI_FORCE_RESTORE)) { - ret = mi_set_context(req, hw_flags); - if (ret) - return ret; - - engine->legacy_active_context = to; - } - - /* GEN8 does *not* require an explicit reload if the PDPs have been - * setup, and we do not wish to move them. - */ - if (needs_pd_load_post(ppgtt, to, hw_flags)) { - trace_switch_mm(engine, to); - ret = ppgtt->switch_mm(ppgtt, req); - /* The hardware context switch is emitted, but we haven't - * actually changed the state - so it's probably safe to bail - * here. Still, let the user know something dangerous has - * happened. - */ - if (ret) - return ret; - } - - if (ppgtt) - ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); - - for (i = 0; i < MAX_L3_SLICES; i++) { - if (!(to->remap_slice & (1<<i))) - continue; - - ret = remap_l3(req, i); - if (ret) - return ret; - - to->remap_slice &= ~(1<<i); - } - - if (!to->engine[RCS].initialised) { - if (engine->init_context) { - ret = engine->init_context(req); - if (ret) - return ret; - } - to->engine[RCS].initialised = true; - } - - return 0; -} - -/** - * i915_switch_context() - perform a GPU context switch. - * @req: request for which we'll execute the context switch - * - * The context life cycle is simple. The context refcount is incremented and - * decremented by 1 and create and destroy. If the context is in use by the GPU, - * it will have a refcount > 1. This allows us to destroy the context abstract - * object while letting the normal object tracking destroy the backing BO. - * - * This function should not be used in execlists mode. Instead the context is - * switched by writing to the ELSP and requests keep a reference to their - * context. - */ -int i915_switch_context(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - - lockdep_assert_held(&req->i915->drm.struct_mutex); - if (i915.enable_execlists) - return 0; - - if (!req->ctx->engine[engine->id].state) { - struct i915_gem_context *to = req->ctx; - struct i915_hw_ppgtt *ppgtt = - to->ppgtt ?: req->i915->mm.aliasing_ppgtt; - - if (needs_pd_load_pre(ppgtt, engine)) { - int ret; - - trace_switch_mm(engine, to); - ret = ppgtt->switch_mm(ppgtt, req); - if (ret) - return ret; - - ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); - } - - engine->legacy_active_context = to; - return 0; - } - - return do_rcs_switch(req); -} - -static bool engine_has_kernel_context(struct intel_engine_cs *engine) +static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine) { struct i915_gem_timeline *timeline; @@ -881,8 +583,7 @@ static bool engine_has_kernel_context(struct intel_engine_cs *engine) return false; } - return (!engine->last_retired_context || - i915_gem_context_is_kernel(engine->last_retired_context)); + return intel_engine_has_kernel_context(engine); } int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) @@ -897,9 +598,8 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) { struct drm_i915_gem_request *req; - int ret; - if (engine_has_kernel_context(engine)) + if (engine_has_idle_kernel_context(engine)) continue; req = i915_gem_request_alloc(engine, dev_priv->kernel_context); @@ -917,13 +617,17 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) if (prev) i915_sw_fence_await_sw_fence_gfp(&req->submit, &prev->submit, - GFP_KERNEL); + I915_FENCE_GFP); } - ret = i915_switch_context(req); - i915_add_request(req); - if (ret) - return ret; + /* + * Force a flush after the switch to ensure that all rendering + * and operations prior to switching to the kernel context hits + * memory. This should be guaranteed by the previous request, + * but an extra layer of paranoia before we declare the system + * idle (on suspend etc) is advisable! + */ + __i915_add_request(req, true); } return 0; @@ -1038,6 +742,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, case I915_CONTEXT_PARAM_BANNABLE: args->value = i915_gem_context_is_bannable(ctx); break; + case I915_CONTEXT_PARAM_PRIORITY: + args->value = ctx->priority; + break; default: ret = -EINVAL; break; @@ -1093,6 +800,26 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, else i915_gem_context_clear_bannable(ctx); break; + + case I915_CONTEXT_PARAM_PRIORITY: + { + s64 priority = args->value; + + if (args->size) + ret = -EINVAL; + else if (!to_i915(dev)->engine[RCS]->schedule) + ret = -ENODEV; + else if (priority > I915_CONTEXT_MAX_USER_PRIORITY || + priority < I915_CONTEXT_MIN_USER_PRIORITY) + ret = -EINVAL; + else if (priority > I915_CONTEXT_DEFAULT_PRIORITY && + !capable(CAP_SYS_NICE)) + ret = -EPERM; + else + ctx->priority = priority; + } + break; + default: ret = -EINVAL; break; diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 44688e22a5c2..4bfb72f8e1cb 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -157,7 +157,6 @@ struct i915_gem_context { u32 *lrc_reg_state; u64 lrc_desc; int pin_count; - bool initialised; } engine[I915_NUM_ENGINES]; /** ring_size: size for allocating the per-engine ring buffer */ @@ -292,6 +291,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +struct i915_gem_context * +i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio); + static inline struct i915_gem_context * i915_gem_context_get(struct i915_gem_context *ctx) { diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 6176e589cf09..864439a214c8 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -256,11 +256,21 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev, return drm_gem_dmabuf_export(dev, &exp_info); } -static struct sg_table * -i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) +static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) { - return dma_buf_map_attachment(obj->base.import_attach, - DMA_BIDIRECTIONAL); + struct sg_table *pages; + unsigned int sg_page_sizes; + + pages = dma_buf_map_attachment(obj->base.import_attach, + DMA_BIDIRECTIONAL); + if (IS_ERR(pages)) + return PTR_ERR(pages); + + sg_page_sizes = i915_sg_page_sizes(pages->sgl); + + __i915_gem_object_set_pages(obj, pages, sg_page_sizes); + + return 0; } static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index e161d383b526..60ca4f05ae94 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -33,6 +33,10 @@ #include "intel_drv.h" #include "i915_trace.h" +I915_SELFTEST_DECLARE(static struct igt_evict_ctl { + bool fail_if_busy:1; +} igt_evict_ctl;) + static bool ggtt_is_idle(struct drm_i915_private *i915) { struct intel_engine_cs *engine; @@ -42,7 +46,7 @@ static bool ggtt_is_idle(struct drm_i915_private *i915) return false; for_each_engine(engine, i915, id) { - if (engine->last_retired_context != i915->kernel_context) + if (!intel_engine_has_kernel_context(engine)) return false; } @@ -69,6 +73,7 @@ static int ggtt_flush(struct drm_i915_private *i915) if (err) return err; + GEM_BUG_ON(!ggtt_is_idle(i915)); return 0; } @@ -81,7 +86,7 @@ mark_free(struct drm_mm_scan *scan, if (i915_vma_is_pinned(vma)) return false; - if (flags & PIN_NONFAULT && !list_empty(&vma->obj->userfault_link)) + if (flags & PIN_NONFAULT && i915_vma_has_userfault(vma)) return false; list_add(&vma->evict_link, unwind); @@ -205,10 +210,14 @@ search_again: * the kernel's there is no more we can evict. */ if (!ggtt_is_idle(dev_priv)) { + if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy)) + return -EBUSY; + ret = ggtt_flush(dev_priv); if (ret) return ret; + cond_resched(); goto search_again; } @@ -330,6 +339,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, break; } + if (flags & PIN_NONFAULT && i915_vma_has_userfault(vma)) { + ret = -ENOSPC; + break; + } + /* Overlap of objects in the same batch? */ if (i915_vma_is_pinned(vma)) { ret = -ENOSPC; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 4ac454ae54d7..3ab1ace2a6bd 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -58,6 +58,7 @@ enum { #define __EXEC_HAS_RELOC BIT(31) #define __EXEC_VALIDATED BIT(30) +#define __EXEC_INTERNAL_FLAGS (~0u << 30) #define UPDATE PIN_OFFSET_FIXED #define BATCH_OFFSET_BIAS (256*1024) @@ -268,6 +269,11 @@ static inline u64 gen8_noncanonical_addr(u64 address) return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0); } +static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) +{ + return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len; +} + static int eb_create(struct i915_execbuffer *eb) { if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) { @@ -365,12 +371,12 @@ eb_pin_vma(struct i915_execbuffer *eb, return false; if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { - if (unlikely(i915_vma_get_fence(vma))) { + if (unlikely(i915_vma_pin_fence(vma))) { i915_vma_unpin(vma); return false; } - if (i915_vma_pin_fence(vma)) + if (vma->fence) exec_flags |= __EXEC_OBJECT_HAS_FENCE; } @@ -383,7 +389,7 @@ static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags) GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN)); if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE)) - i915_vma_unpin_fence(vma); + __i915_vma_unpin_fence(vma); __i915_vma_unpin(vma); } @@ -499,6 +505,8 @@ eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma) list_add_tail(&vma->exec_link, &eb->unbound); if (drm_mm_node_allocated(&vma->node)) err = i915_vma_unbind(vma); + if (unlikely(err)) + vma->exec_flags = NULL; } return err; } @@ -561,13 +569,13 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb, } if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { - err = i915_vma_get_fence(vma); + err = i915_vma_pin_fence(vma); if (unlikely(err)) { i915_vma_unpin(vma); return err; } - if (i915_vma_pin_fence(vma)) + if (vma->fence) exec_flags |= __EXEC_OBJECT_HAS_FENCE; } @@ -678,7 +686,7 @@ static int eb_select_context(struct i915_execbuffer *eb) static int eb_lookup_vmas(struct i915_execbuffer *eb) { struct radix_tree_root *handles_vma = &eb->ctx->handles_vma; - struct drm_i915_gem_object *uninitialized_var(obj); + struct drm_i915_gem_object *obj; unsigned int i; int err; @@ -724,19 +732,17 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) goto err_obj; } + /* transfer ref to ctx */ vma->open_count++; list_add(&lut->obj_link, &obj->lut_list); list_add(&lut->ctx_link, &eb->ctx->handles_list); lut->ctx = eb->ctx; lut->handle = handle; - /* transfer ref to ctx */ - obj = NULL; - add_vma: err = eb_add_vma(eb, i, vma); if (unlikely(err)) - goto err_obj; + goto err_vma; GEM_BUG_ON(vma != eb->vma[i]); GEM_BUG_ON(vma->exec_flags != &eb->flags[i]); @@ -765,8 +771,7 @@ add_vma: return eb_reserve(eb); err_obj: - if (obj) - i915_gem_object_put(obj); + i915_gem_object_put(obj); err_vma: eb->vma[i] = NULL; return err; @@ -975,7 +980,9 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, return ERR_PTR(err); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | PIN_NONBLOCK); + PIN_MAPPABLE | + PIN_NONBLOCK | + PIN_NONFAULT); if (IS_ERR(vma)) { memset(&cache->node, 0, sizeof(cache->node)); err = drm_mm_insert_node_in_range @@ -1007,7 +1014,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, offset += page << PAGE_SHIFT; } - vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->mappable, + vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap, offset); cache->page = page; cache->vaddr = (unsigned long)vaddr; @@ -1106,14 +1113,6 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (err) goto err_request; - err = eb->engine->emit_flush(rq, EMIT_INVALIDATE); - if (err) - goto err_request; - - err = i915_switch_context(rq); - if (err) - goto err_request; - err = eb->engine->emit_bb_start(rq, batch->node.start, PAGE_SIZE, cache->gen > 5 ? 0 : I915_DISPATCH_SECURE); @@ -1163,6 +1162,13 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, if (unlikely(!cache->rq)) { int err; + /* If we need to copy for the cmdparser, we will stall anyway */ + if (eb_use_cmdparser(eb)) + return ERR_PTR(-EWOULDBLOCK); + + if (!intel_engine_can_store_dword(eb->engine)) + return ERR_PTR(-ENODEV); + err = __reloc_gpu_alloc(eb, vma, len); if (unlikely(err)) return ERR_PTR(err); @@ -1187,9 +1193,7 @@ relocate_entry(struct i915_vma *vma, if (!eb->reloc_cache.vaddr && (DBG_FORCE_RELOC == FORCE_GPU_RELOC || - !reservation_object_test_signaled_rcu(vma->resv, true)) && - __intel_engine_can_store_dword(eb->reloc_cache.gen, - eb->engine->class)) { + !reservation_object_test_signaled_rcu(vma->resv, true))) { const unsigned int gen = eb->reloc_cache.gen; unsigned int len; u32 *batch; @@ -1581,7 +1585,7 @@ static int eb_prefault_relocations(const struct i915_execbuffer *eb) const unsigned int count = eb->buffer_count; unsigned int i; - if (unlikely(i915.prefault_disable)) + if (unlikely(i915_modparams.prefault_disable)) return 0; for (i = 0; i < count; i++) { @@ -1808,8 +1812,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) /* Unconditionally flush any chipset caches (for streaming writes). */ i915_gem_chipset_flush(eb->i915); - /* Unconditionally invalidate GPU caches and TLBs. */ - return eb->engine->emit_flush(eb->request, EMIT_INVALIDATE); + return 0; } static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) @@ -1955,10 +1958,6 @@ static int eb_submit(struct i915_execbuffer *eb) if (err) return err; - err = i915_switch_context(eb->request); - if (err) - return err; - if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) { err = i915_reset_gen7_sol_offsets(eb->request); if (err) @@ -2064,23 +2063,27 @@ static struct drm_syncobj ** get_fence_array(struct drm_i915_gem_execbuffer2 *args, struct drm_file *file) { - const unsigned int nfences = args->num_cliprects; + const unsigned long nfences = args->num_cliprects; struct drm_i915_gem_exec_fence __user *user; struct drm_syncobj **fences; - unsigned int n; + unsigned long n; int err; if (!(args->flags & I915_EXEC_FENCE_ARRAY)) return NULL; - if (nfences > SIZE_MAX / sizeof(*fences)) + /* Check multiplication overflow for access_ok() and kvmalloc_array() */ + BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); + if (nfences > min_t(unsigned long, + ULONG_MAX / sizeof(*user), + SIZE_MAX / sizeof(*fences))) return ERR_PTR(-EINVAL); user = u64_to_user_ptr(args->cliprects_ptr); - if (!access_ok(VERIFY_READ, user, nfences * 2 * sizeof(u32))) + if (!access_ok(VERIFY_READ, user, nfences * sizeof(*user))) return ERR_PTR(-EFAULT); - fences = kvmalloc_array(args->num_cliprects, sizeof(*fences), + fences = kvmalloc_array(nfences, sizeof(*fences), __GFP_NOWARN | GFP_KERNEL); if (!fences) return ERR_PTR(-ENOMEM); @@ -2094,6 +2097,11 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args, goto err; } + if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { + err = -EINVAL; + goto err; + } + syncobj = drm_syncobj_find(file, fence.handle); if (!syncobj) { DRM_DEBUG("Invalid syncobj handle provided\n"); @@ -2101,6 +2109,9 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args, goto err; } + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & + ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); + fences[n] = ptr_pack_bits(syncobj, fence.flags, 2); } @@ -2182,6 +2193,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, int out_fence_fd = -1; int err; + BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS); BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS); @@ -2295,7 +2307,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_vma; } - if (eb.engine->needs_cmd_parser && eb.batch_len) { + if (eb_use_cmdparser(&eb)) { struct i915_vma *vma; vma = eb_parse(&eb, drm_is_current_master(file)); @@ -2400,7 +2412,7 @@ err_request: if (out_fence) { if (err == 0) { fd_install(out_fence_fd, out_fence->file); - args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */ + args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */ args->rsvd2 |= (u64)out_fence_fd << 32; out_fence_fd = -1; } else { @@ -2428,6 +2440,26 @@ err_in_fence: return err; } +static size_t eb_element_size(void) +{ + return (sizeof(struct drm_i915_gem_exec_object2) + + sizeof(struct i915_vma *) + + sizeof(unsigned int)); +} + +static bool check_buffer_count(size_t count) +{ + const size_t sz = eb_element_size(); + + /* + * When using LUT_HANDLE, we impose a limit of INT_MAX for the lookup + * array size (see eb_create()). Otherwise, we can accept an array as + * large as can be addressed (though use large arrays at your peril)! + */ + + return !(count < 1 || count > INT_MAX || count > SIZE_MAX / sz - 1); +} + /* * Legacy execbuffer just creates an exec2 list from the original exec object * list array and passes it to the real function. @@ -2436,18 +2468,16 @@ int i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_file *file) { - const size_t sz = (sizeof(struct drm_i915_gem_exec_object2) + - sizeof(struct i915_vma *) + - sizeof(unsigned int)); struct drm_i915_gem_execbuffer *args = data; struct drm_i915_gem_execbuffer2 exec2; struct drm_i915_gem_exec_object *exec_list = NULL; struct drm_i915_gem_exec_object2 *exec2_list = NULL; + const size_t count = args->buffer_count; unsigned int i; int err; - if (args->buffer_count < 1 || args->buffer_count > SIZE_MAX / sz - 1) { - DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); + if (!check_buffer_count(count)) { + DRM_DEBUG("execbuf2 with %zd buffers\n", count); return -EINVAL; } @@ -2466,9 +2496,9 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, return -EINVAL; /* Copy in the exec list from userland */ - exec_list = kvmalloc_array(args->buffer_count, sizeof(*exec_list), + exec_list = kvmalloc_array(count, sizeof(*exec_list), __GFP_NOWARN | GFP_KERNEL); - exec2_list = kvmalloc_array(args->buffer_count + 1, sz, + exec2_list = kvmalloc_array(count + 1, eb_element_size(), __GFP_NOWARN | GFP_KERNEL); if (exec_list == NULL || exec2_list == NULL) { DRM_DEBUG("Failed to allocate exec list for %d buffers\n", @@ -2479,7 +2509,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, } err = copy_from_user(exec_list, u64_to_user_ptr(args->buffers_ptr), - sizeof(*exec_list) * args->buffer_count); + sizeof(*exec_list) * count); if (err) { DRM_DEBUG("copy %d exec entries failed %d\n", args->buffer_count, err); @@ -2529,16 +2559,14 @@ int i915_gem_execbuffer2(struct drm_device *dev, void *data, struct drm_file *file) { - const size_t sz = (sizeof(struct drm_i915_gem_exec_object2) + - sizeof(struct i915_vma *) + - sizeof(unsigned int)); struct drm_i915_gem_execbuffer2 *args = data; struct drm_i915_gem_exec_object2 *exec2_list; struct drm_syncobj **fences = NULL; + const size_t count = args->buffer_count; int err; - if (args->buffer_count < 1 || args->buffer_count > SIZE_MAX / sz - 1) { - DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); + if (!check_buffer_count(count)) { + DRM_DEBUG("execbuf2 with %zd buffers\n", count); return -EINVAL; } @@ -2546,17 +2574,17 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, return -EINVAL; /* Allocate an extra slot for use by the command parser */ - exec2_list = kvmalloc_array(args->buffer_count + 1, sz, + exec2_list = kvmalloc_array(count + 1, eb_element_size(), __GFP_NOWARN | GFP_KERNEL); if (exec2_list == NULL) { - DRM_DEBUG("Failed to allocate exec list for %d buffers\n", - args->buffer_count); + DRM_DEBUG("Failed to allocate exec list for %zd buffers\n", + count); return -ENOMEM; } if (copy_from_user(exec2_list, u64_to_user_ptr(args->buffers_ptr), - sizeof(*exec2_list) * args->buffer_count)) { - DRM_DEBUG("copy %d exec entries failed\n", args->buffer_count); + sizeof(*exec2_list) * count)) { + DRM_DEBUG("copy %zd exec entries failed\n", count); kvfree(exec2_list); return -EFAULT; } diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index 5fe2cd8c8f28..012250f25255 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -240,7 +240,8 @@ static int fence_update(struct drm_i915_fence_reg *fence, /* Ensure that all userspace CPU access is completed before * stealing the fence. */ - i915_gem_release_mmap(fence->vma->obj); + GEM_BUG_ON(fence->vma->fence != fence); + i915_vma_revoke_mmap(fence->vma); fence->vma->fence = NULL; fence->vma = NULL; @@ -280,8 +281,7 @@ static int fence_update(struct drm_i915_fence_reg *fence, * * 0 on success, negative error code on failure. */ -int -i915_vma_put_fence(struct i915_vma *vma) +int i915_vma_put_fence(struct i915_vma *vma) { struct drm_i915_fence_reg *fence = vma->fence; @@ -299,6 +299,8 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv) struct drm_i915_fence_reg *fence; list_for_each_entry(fence, &dev_priv->mm.fence_list, link) { + GEM_BUG_ON(fence->vma && fence->vma->fence != fence); + if (fence->pin_count) continue; @@ -313,7 +315,7 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv) } /** - * i915_vma_get_fence - set up fencing for a vma + * i915_vma_pin_fence - set up fencing for a vma * @vma: vma to map through a fence reg * * When mapping objects through the GTT, userspace wants to be able to write @@ -331,10 +333,11 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv) * 0 on success, negative error code on failure. */ int -i915_vma_get_fence(struct i915_vma *vma) +i915_vma_pin_fence(struct i915_vma *vma) { struct drm_i915_fence_reg *fence; struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; + int err; /* Note that we revoke fences on runtime suspend. Therefore the user * must keep the device awake whilst using the fence. @@ -344,6 +347,8 @@ i915_vma_get_fence(struct i915_vma *vma) /* Just update our place in the LRU if our fence is getting reused. */ if (vma->fence) { fence = vma->fence; + GEM_BUG_ON(fence->vma != vma); + fence->pin_count++; if (!fence->dirty) { list_move_tail(&fence->link, &fence->i915->mm.fence_list); @@ -353,10 +358,76 @@ i915_vma_get_fence(struct i915_vma *vma) fence = fence_find(vma->vm->i915); if (IS_ERR(fence)) return PTR_ERR(fence); + + GEM_BUG_ON(fence->pin_count); + fence->pin_count++; } else return 0; - return fence_update(fence, set); + err = fence_update(fence, set); + if (err) + goto out_unpin; + + GEM_BUG_ON(fence->vma != set); + GEM_BUG_ON(vma->fence != (set ? fence : NULL)); + + if (set) + return 0; + +out_unpin: + fence->pin_count--; + return err; +} + +/** + * i915_reserve_fence - Reserve a fence for vGPU + * @dev_priv: i915 device private + * + * This function walks the fence regs looking for a free one and remove + * it from the fence_list. It is used to reserve fence for vGPU to use. + */ +struct drm_i915_fence_reg * +i915_reserve_fence(struct drm_i915_private *dev_priv) +{ + struct drm_i915_fence_reg *fence; + int count; + int ret; + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + /* Keep at least one fence available for the display engine. */ + count = 0; + list_for_each_entry(fence, &dev_priv->mm.fence_list, link) + count += !fence->pin_count; + if (count <= 1) + return ERR_PTR(-ENOSPC); + + fence = fence_find(dev_priv); + if (IS_ERR(fence)) + return fence; + + if (fence->vma) { + /* Force-remove fence from VMA */ + ret = fence_update(fence, NULL); + if (ret) + return ERR_PTR(ret); + } + + list_del(&fence->link); + return fence; +} + +/** + * i915_unreserve_fence - Reclaim a reserved fence + * @fence: the fence reg + * + * This function add a reserved fence register from vGPU to the fence_list. + */ +void i915_unreserve_fence(struct drm_i915_fence_reg *fence) +{ + lockdep_assert_held(&fence->i915->drm.struct_mutex); + + list_add(&fence->link, &fence->i915->mm.fence_list); } /** @@ -378,8 +449,10 @@ void i915_gem_revoke_fences(struct drm_i915_private *dev_priv) for (i = 0; i < dev_priv->num_fence_regs; i++) { struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; + GEM_BUG_ON(fence->vma && fence->vma->fence != fence); + if (fence->vma) - i915_gem_release_mmap(fence->vma->obj); + i915_vma_revoke_mmap(fence->vma); } } @@ -399,13 +472,15 @@ void i915_gem_restore_fences(struct drm_i915_private *dev_priv) struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; struct i915_vma *vma = reg->vma; + GEM_BUG_ON(vma && vma->fence != reg); + /* * Commit delayed tiling changes if we have an object still * attached to the fence, otherwise just clear the fence. */ if (vma && !i915_gem_object_is_tiled(vma->obj)) { GEM_BUG_ON(!reg->dirty); - GEM_BUG_ON(!list_empty(&vma->obj->userfault_link)); + GEM_BUG_ON(i915_vma_has_userfault(vma)); list_move(®->link, &dev_priv->mm.fence_list); vma->fence = NULL; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index e2410eb5d96e..7e403eaa9e0f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -135,11 +135,12 @@ static inline void i915_ggtt_invalidate(struct drm_i915_private *i915) int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, int enable_ppgtt) { - bool has_aliasing_ppgtt; bool has_full_ppgtt; bool has_full_48bit_ppgtt; - has_aliasing_ppgtt = dev_priv->info.has_aliasing_ppgtt; + if (!dev_priv->info.has_aliasing_ppgtt) + return 0; + has_full_ppgtt = dev_priv->info.has_full_ppgtt; has_full_48bit_ppgtt = dev_priv->info.has_full_48bit_ppgtt; @@ -149,9 +150,6 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, has_full_48bit_ppgtt = intel_vgpu_has_full_48bit_ppgtt(dev_priv); } - if (!has_aliasing_ppgtt) - return 0; - /* * We don't allow disabling PPGTT for gen9+ as it's a requirement for * execlists, the sole mechanism available to submit work. @@ -180,7 +178,7 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, return 0; } - if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists) { + if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { if (has_full_48bit_ppgtt) return 3; @@ -188,7 +186,7 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, return 2; } - return has_aliasing_ppgtt ? 1 : 0; + return 1; } static int ppgtt_bind_vma(struct i915_vma *vma, @@ -205,8 +203,6 @@ static int ppgtt_bind_vma(struct i915_vma *vma, return ret; } - vma->pages = vma->obj->mm.pages; - /* Currently applicable only to VLV */ pte_flags = 0; if (vma->obj->gt_ro) @@ -222,6 +218,30 @@ static void ppgtt_unbind_vma(struct i915_vma *vma) vma->vm->clear_range(vma->vm, vma->node.start, vma->size); } +static int ppgtt_set_pages(struct i915_vma *vma) +{ + GEM_BUG_ON(vma->pages); + + vma->pages = vma->obj->mm.pages; + + vma->page_sizes = vma->obj->mm.page_sizes; + + return 0; +} + +static void clear_pages(struct i915_vma *vma) +{ + GEM_BUG_ON(!vma->pages); + + if (vma->pages != vma->obj->mm.pages) { + sg_free_table(vma->pages); + kfree(vma->pages); + } + vma->pages = NULL; + + memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); +} + static gen8_pte_t gen8_pte_encode(dma_addr_t addr, enum i915_cache_level level) { @@ -230,13 +250,13 @@ static gen8_pte_t gen8_pte_encode(dma_addr_t addr, switch (level) { case I915_CACHE_NONE: - pte |= PPAT_UNCACHED_INDEX; + pte |= PPAT_UNCACHED; break; case I915_CACHE_WT: - pte |= PPAT_DISPLAY_ELLC_INDEX; + pte |= PPAT_DISPLAY_ELLC; break; default: - pte |= PPAT_CACHED_INDEX; + pte |= PPAT_CACHED; break; } @@ -249,9 +269,9 @@ static gen8_pde_t gen8_pde_encode(const dma_addr_t addr, gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW; pde |= addr; if (level != I915_CACHE_NONE) - pde |= PPAT_CACHED_PDE_INDEX; + pde |= PPAT_CACHED_PDE; else - pde |= PPAT_UNCACHED_INDEX; + pde |= PPAT_UNCACHED; return pde; } @@ -356,39 +376,112 @@ static gen6_pte_t iris_pte_encode(dma_addr_t addr, static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp) { - struct page *page; + struct pagevec *pvec = &vm->free_pages; + struct pagevec stash; if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) i915_gem_shrink_all(vm->i915); - if (vm->free_pages.nr) - return vm->free_pages.pages[--vm->free_pages.nr]; + if (likely(pvec->nr)) + return pvec->pages[--pvec->nr]; + + if (!vm->pt_kmap_wc) + return alloc_page(gfp); + + /* A placeholder for a specific mutex to guard the WC stash */ + lockdep_assert_held(&vm->i915->drm.struct_mutex); + + /* Look in our global stash of WC pages... */ + pvec = &vm->i915->mm.wc_stash; + if (likely(pvec->nr)) + return pvec->pages[--pvec->nr]; + + /* + * Otherwise batch allocate pages to amoritize cost of set_pages_wc. + * + * We have to be careful as page allocation may trigger the shrinker + * (via direct reclaim) which will fill up the WC stash underneath us. + * So we add our WB pages into a temporary pvec on the stack and merge + * them into the WC stash after all the allocations are complete. + */ + pagevec_init(&stash); + do { + struct page *page; + + page = alloc_page(gfp); + if (unlikely(!page)) + break; - page = alloc_page(gfp); - if (!page) - return NULL; + stash.pages[stash.nr++] = page; + } while (stash.nr < pagevec_space(pvec)); - if (vm->pt_kmap_wc) - set_pages_array_wc(&page, 1); + if (stash.nr) { + int nr = min_t(int, stash.nr, pagevec_space(pvec)); + struct page **pages = stash.pages + stash.nr - nr; + + if (nr && !set_pages_array_wc(pages, nr)) { + memcpy(pvec->pages + pvec->nr, + pages, sizeof(pages[0]) * nr); + pvec->nr += nr; + stash.nr -= nr; + } - return page; + pagevec_release(&stash); + } + + return likely(pvec->nr) ? pvec->pages[--pvec->nr] : NULL; } -static void vm_free_pages_release(struct i915_address_space *vm) +static void vm_free_pages_release(struct i915_address_space *vm, + bool immediate) { - GEM_BUG_ON(!pagevec_count(&vm->free_pages)); + struct pagevec *pvec = &vm->free_pages; + + GEM_BUG_ON(!pagevec_count(pvec)); + + if (vm->pt_kmap_wc) { + struct pagevec *stash = &vm->i915->mm.wc_stash; - if (vm->pt_kmap_wc) - set_pages_array_wb(vm->free_pages.pages, - pagevec_count(&vm->free_pages)); + /* When we use WC, first fill up the global stash and then + * only if full immediately free the overflow. + */ - __pagevec_release(&vm->free_pages); + lockdep_assert_held(&vm->i915->drm.struct_mutex); + if (pagevec_space(stash)) { + do { + stash->pages[stash->nr++] = + pvec->pages[--pvec->nr]; + if (!pvec->nr) + return; + } while (pagevec_space(stash)); + + /* As we have made some room in the VM's free_pages, + * we can wait for it to fill again. Unless we are + * inside i915_address_space_fini() and must + * immediately release the pages! + */ + if (!immediate) + return; + } + + set_pages_array_wb(pvec->pages, pvec->nr); + } + + __pagevec_release(pvec); } static void vm_free_page(struct i915_address_space *vm, struct page *page) { + /* + * On !llc, we need to change the pages back to WB. We only do so + * in bulk, so we rarely need to change the page attributes here, + * but doing so requires a stop_machine() from deep inside arch/x86/mm. + * To make detection of the possible sleep more likely, use an + * unconditional might_sleep() for everybody. + */ + might_sleep(); if (!pagevec_add(&vm->free_pages, page)) - vm_free_pages_release(vm); + vm_free_pages_release(vm, false); } static int __setup_page_dma(struct i915_address_space *vm, @@ -434,10 +527,8 @@ static void fill_page_dma(struct i915_address_space *vm, const u64 val) { u64 * const vaddr = kmap_atomic(p->page); - int i; - for (i = 0; i < 512; i++) - vaddr[i] = val; + memset64(vaddr, val, PAGE_SIZE / sizeof(val)); kunmap_atomic(vaddr); } @@ -452,12 +543,73 @@ static void fill_page_dma_32(struct i915_address_space *vm, static int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) { - return __setup_page_dma(vm, &vm->scratch_page, gfp | __GFP_ZERO); + struct page *page = NULL; + dma_addr_t addr; + int order; + + /* + * In order to utilize 64K pages for an object with a size < 2M, we will + * need to support a 64K scratch page, given that every 16th entry for a + * page-table operating in 64K mode must point to a properly aligned 64K + * region, including any PTEs which happen to point to scratch. + * + * This is only relevant for the 48b PPGTT where we support + * huge-gtt-pages, see also i915_vma_insert(). + * + * TODO: we should really consider write-protecting the scratch-page and + * sharing between ppgtt + */ + if (i915_vm_is_48bit(vm) && + HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) { + order = get_order(I915_GTT_PAGE_SIZE_64K); + page = alloc_pages(gfp | __GFP_ZERO | __GFP_NOWARN, order); + if (page) { + addr = dma_map_page(vm->dma, page, 0, + I915_GTT_PAGE_SIZE_64K, + PCI_DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(vm->dma, addr))) { + __free_pages(page, order); + page = NULL; + } + + if (!IS_ALIGNED(addr, I915_GTT_PAGE_SIZE_64K)) { + dma_unmap_page(vm->dma, addr, + I915_GTT_PAGE_SIZE_64K, + PCI_DMA_BIDIRECTIONAL); + __free_pages(page, order); + page = NULL; + } + } + } + + if (!page) { + order = 0; + page = alloc_page(gfp | __GFP_ZERO); + if (unlikely(!page)) + return -ENOMEM; + + addr = dma_map_page(vm->dma, page, 0, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(vm->dma, addr))) { + __free_page(page); + return -ENOMEM; + } + } + + vm->scratch_page.page = page; + vm->scratch_page.daddr = addr; + vm->scratch_page.order = order; + + return 0; } static void cleanup_scratch_page(struct i915_address_space *vm) { - cleanup_page_dma(vm, &vm->scratch_page); + struct i915_page_dma *p = &vm->scratch_page; + + dma_unmap_page(vm->dma, p->daddr, BIT(p->order) << PAGE_SHIFT, + PCI_DMA_BIDIRECTIONAL); + __free_pages(p->page, p->order); } static struct i915_page_table *alloc_pt(struct i915_address_space *vm) @@ -832,10 +984,14 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, } } -struct sgt_dma { +static inline struct sgt_dma { struct scatterlist *sg; dma_addr_t dma, max; -}; +} sgt_dma(struct i915_vma *vma) { + struct scatterlist *sg = vma->pages->sgl; + dma_addr_t addr = sg_dma_address(sg); + return (struct sgt_dma) { sg, addr, addr + sg->length }; +} struct gen8_insert_pte { u16 pml4e; @@ -916,15 +1072,110 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, u32 unused) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct sgt_dma iter = { - .sg = vma->pages->sgl, - .dma = sg_dma_address(iter.sg), - .max = iter.dma + iter.sg->length, - }; + struct sgt_dma iter = sgt_dma(vma); struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx, cache_level); + + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; +} + +static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, + struct i915_page_directory_pointer **pdps, + struct sgt_dma *iter, + enum i915_cache_level cache_level) +{ + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); + u64 start = vma->node.start; + dma_addr_t rem = iter->sg->length; + + do { + struct gen8_insert_pte idx = gen8_insert_pte(start); + struct i915_page_directory_pointer *pdp = pdps[idx.pml4e]; + struct i915_page_directory *pd = pdp->page_directory[idx.pdpe]; + unsigned int page_size; + bool maybe_64K = false; + gen8_pte_t encode = pte_encode; + gen8_pte_t *vaddr; + u16 index, max; + + if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M && + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && + rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) { + index = idx.pde; + max = I915_PDES; + page_size = I915_GTT_PAGE_SIZE_2M; + + encode |= GEN8_PDE_PS_2M; + + vaddr = kmap_atomic_px(pd); + } else { + struct i915_page_table *pt = pd->page_table[idx.pde]; + + index = idx.pte; + max = GEN8_PTES; + page_size = I915_GTT_PAGE_SIZE; + + if (!index && + vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K && + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && + (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || + rem >= (max - index) << PAGE_SHIFT)) + maybe_64K = true; + + vaddr = kmap_atomic_px(pt); + } + + do { + GEM_BUG_ON(iter->sg->length < page_size); + vaddr[index++] = encode | iter->dma; + + start += page_size; + iter->dma += page_size; + rem -= page_size; + if (iter->dma >= iter->max) { + iter->sg = __sg_next(iter->sg); + if (!iter->sg) + break; + + rem = iter->sg->length; + iter->dma = sg_dma_address(iter->sg); + iter->max = iter->dma + rem; + + if (maybe_64K && index < max && + !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && + (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || + rem >= (max - index) << PAGE_SHIFT))) + maybe_64K = false; + + if (unlikely(!IS_ALIGNED(iter->dma, page_size))) + break; + } + } while (rem >= page_size && index < max); + + kunmap_atomic(vaddr); + + /* + * Is it safe to mark the 2M block as 64K? -- Either we have + * filled whole page-table with 64K entries, or filled part of + * it and have reached the end of the sg table and we have + * enough padding. + */ + if (maybe_64K && + (index == max || + (i915_vm_has_scratch_64K(vma->vm) && + !iter->sg && IS_ALIGNED(vma->node.start + + vma->node.size, + I915_GTT_PAGE_SIZE_2M)))) { + vaddr = kmap_atomic_px(pd); + vaddr[idx.pde] |= GEN8_PDE_IPS_64K; + kunmap_atomic(vaddr); + page_size = I915_GTT_PAGE_SIZE_64K; + } + + vma->page_sizes.gtt |= page_size; + } while (iter->sg); } static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, @@ -933,17 +1184,20 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, u32 unused) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct sgt_dma iter = { - .sg = vma->pages->sgl, - .dma = sg_dma_address(iter.sg), - .max = iter.dma + iter.sg->length, - }; + struct sgt_dma iter = sgt_dma(vma); struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps; - struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); - while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], &iter, - &idx, cache_level)) - GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); + if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { + gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level); + } else { + struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); + + while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], + &iter, &idx, cache_level)) + GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); + + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; + } } static void gen8_free_page_tables(struct i915_address_space *vm, @@ -1102,19 +1356,25 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, unsigned int pde; gen8_for_each_pde(pt, pd, start, length, pde) { + int count = gen8_pte_count(start, length); + if (pt == vm->scratch_pt) { + pd->used_pdes++; + pt = alloc_pt(vm); - if (IS_ERR(pt)) + if (IS_ERR(pt)) { + pd->used_pdes--; goto unwind; + } - gen8_initialize_pt(vm, pt); + if (count < GEN8_PTES || intel_vgpu_active(vm->i915)) + gen8_initialize_pt(vm, pt); gen8_ppgtt_set_pde(vm, pd, pt, pde); - pd->used_pdes++; GEM_BUG_ON(pd->used_pdes > I915_PDES); } - pt->used_ptes += gen8_pte_count(start, length); + pt->used_ptes += count; } return 0; @@ -1134,13 +1394,16 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { if (pd == vm->scratch_pd) { + pdp->used_pdpes++; + pd = alloc_pd(vm); - if (IS_ERR(pd)) + if (IS_ERR(pd)) { + pdp->used_pdpes--; goto unwind; + } gen8_initialize_pd(vm, pd); gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); - pdp->used_pdpes++; GEM_BUG_ON(pdp->used_pdpes > i915_pdpes_per_pdp(vm)); mark_tlbs_dirty(i915_vm_to_ppgtt(vm)); @@ -1337,18 +1600,18 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 1ULL << 48 : 1ULL << 32; - ret = gen8_init_scratch(&ppgtt->base); - if (ret) { - ppgtt->base.total = 0; - return ret; - } - /* There are only few exceptions for gen >=6. chv and bxt. * And we are not sure about the latter so play safe for now. */ if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) ppgtt->base.pt_kmap_wc = true; + ret = gen8_init_scratch(&ppgtt->base); + if (ret) { + ppgtt->base.total = 0; + return ret; + } + if (use_4lvl(vm)) { ret = setup_px(&ppgtt->base, &ppgtt->pml4); if (ret) @@ -1385,6 +1648,8 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.cleanup = gen8_ppgtt_cleanup; ppgtt->base.unbind_vma = ppgtt_unbind_vma; ppgtt->base.bind_vma = ppgtt_bind_vma; + ppgtt->base.set_pages = ppgtt_set_pages; + ppgtt->base.clear_pages = clear_pages; ppgtt->debug_dump = gen8_dump_ppgtt; return 0; @@ -1632,13 +1897,10 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, unsigned act_pt = first_entry / GEN6_PTES; unsigned act_pte = first_entry % GEN6_PTES; const u32 pte_encode = vm->pte_encode(0, cache_level, flags); - struct sgt_dma iter; + struct sgt_dma iter = sgt_dma(vma); gen6_pte_t *vaddr; vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]); - iter.sg = vma->pages->sgl; - iter.dma = sg_dma_address(iter.sg); - iter.max = iter.dma + iter.sg->length; do { vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma); @@ -1659,6 +1921,8 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, } } while (1); kunmap_atomic(vaddr); + + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; } static int gen6_alloc_va_range(struct i915_address_space *vm, @@ -1827,6 +2091,8 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; ppgtt->base.unbind_vma = ppgtt_unbind_vma; ppgtt->base.bind_vma = ppgtt_bind_vma; + ppgtt->base.set_pages = ppgtt_set_pages; + ppgtt->base.clear_pages = clear_pages; ppgtt->base.cleanup = gen6_ppgtt_cleanup; ppgtt->debug_dump = gen6_dump_ppgtt; @@ -1866,13 +2132,13 @@ static void i915_address_space_init(struct i915_address_space *vm, INIT_LIST_HEAD(&vm->unbound_list); list_add_tail(&vm->global_link, &dev_priv->vm_list); - pagevec_init(&vm->free_pages, false); + pagevec_init(&vm->free_pages); } static void i915_address_space_fini(struct i915_address_space *vm) { if (pagevec_count(&vm->free_pages)) - vm_free_pages_release(vm); + vm_free_pages_release(vm, true); i915_gem_timeline_fini(&vm->timeline); drm_mm_takedown(&vm->mm); @@ -1885,15 +2151,32 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv) * called on driver load and after a GPU reset, so you can place * workarounds here even if they get overwritten by GPU reset. */ - /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl */ + /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl */ if (IS_BROADWELL(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); else if (IS_CHERRYVIEW(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); - else if (IS_GEN9_BC(dev_priv)) + else if (IS_GEN9_BC(dev_priv) || IS_GEN10(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); else if (IS_GEN9_LP(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); + + /* + * To support 64K PTEs we need to first enable the use of the + * Intermediate-Page-Size(IPS) bit of the PDE field via some magical + * mmio, otherwise the page-walker will simply ignore the IPS bit. This + * shouldn't be needed after GEN10. + * + * 64K pages were first introduced from BDW+, although technically they + * only *work* from gen9+. For pre-BDW we instead have the option for + * 32K pages, but we don't currently have any support for it in our + * driver. + */ + if (HAS_PAGE_SIZES(dev_priv, I915_GTT_PAGE_SIZE_64K) && + INTEL_GEN(dev_priv) <= 10) + I915_WRITE(GEN8_GAMW_ECO_DEV_RW_IA, + I915_READ(GEN8_GAMW_ECO_DEV_RW_IA) | + GAMW_ECO_ENABLE_64K_IPS_FIELD); } int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv) @@ -1903,7 +2186,7 @@ int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv) /* In the case of execlists, PPGTT is enabled by the context descriptor * and the PDPs are contained within the context itself. We don't * need to do anything here. */ - if (i915.enable_execlists) + if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) return 0; if (!USES_PPGTT(dev_priv)) @@ -1997,35 +2280,73 @@ static bool needs_idle_maps(struct drm_i915_private *dev_priv) return IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_vtd_active(); } -void i915_check_and_clear_faults(struct drm_i915_private *dev_priv) +static void gen6_check_and_clear_faults(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; - - if (INTEL_INFO(dev_priv)->gen < 6) - return; + u32 fault; for_each_engine(engine, dev_priv, id) { - u32 fault_reg; - fault_reg = I915_READ(RING_FAULT_REG(engine)); - if (fault_reg & RING_FAULT_VALID) { + fault = I915_READ(RING_FAULT_REG(engine)); + if (fault & RING_FAULT_VALID) { DRM_DEBUG_DRIVER("Unexpected fault\n" "\tAddr: 0x%08lx\n" "\tAddress space: %s\n" "\tSource ID: %d\n" "\tType: %d\n", - fault_reg & PAGE_MASK, - fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", - RING_FAULT_SRCID(fault_reg), - RING_FAULT_FAULT_TYPE(fault_reg)); + fault & PAGE_MASK, + fault & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", + RING_FAULT_SRCID(fault), + RING_FAULT_FAULT_TYPE(fault)); I915_WRITE(RING_FAULT_REG(engine), - fault_reg & ~RING_FAULT_VALID); + fault & ~RING_FAULT_VALID); } } - /* Engine specific init may not have been done till this point. */ - if (dev_priv->engine[RCS]) - POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS])); + POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS])); +} + +static void gen8_check_and_clear_faults(struct drm_i915_private *dev_priv) +{ + u32 fault = I915_READ(GEN8_RING_FAULT_REG); + + if (fault & RING_FAULT_VALID) { + u32 fault_data0, fault_data1; + u64 fault_addr; + + fault_data0 = I915_READ(GEN8_FAULT_TLB_DATA0); + fault_data1 = I915_READ(GEN8_FAULT_TLB_DATA1); + fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) | + ((u64)fault_data0 << 12); + + DRM_DEBUG_DRIVER("Unexpected fault\n" + "\tAddr: 0x%08x_%08x\n" + "\tAddress space: %s\n" + "\tEngine ID: %d\n" + "\tSource ID: %d\n" + "\tType: %d\n", + upper_32_bits(fault_addr), + lower_32_bits(fault_addr), + fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT", + GEN8_RING_FAULT_ENGINE_ID(fault), + RING_FAULT_SRCID(fault), + RING_FAULT_FAULT_TYPE(fault)); + I915_WRITE(GEN8_RING_FAULT_REG, + fault & ~RING_FAULT_VALID); + } + + POSTING_READ(GEN8_RING_FAULT_REG); +} + +void i915_check_and_clear_faults(struct drm_i915_private *dev_priv) +{ + /* From GEN8 onwards we only have one 'All Engine Fault Register' */ + if (INTEL_GEN(dev_priv) >= 8) + gen8_check_and_clear_faults(dev_priv); + else if (INTEL_GEN(dev_priv) >= 6) + gen6_check_and_clear_faults(dev_priv); + else + return; } void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv) @@ -2338,12 +2659,6 @@ static int ggtt_bind_vma(struct i915_vma *vma, struct drm_i915_gem_object *obj = vma->obj; u32 pte_flags; - if (unlikely(!vma->pages)) { - int ret = i915_get_ggtt_vma_pages(vma); - if (ret) - return ret; - } - /* Currently applicable only to VLV */ pte_flags = 0; if (obj->gt_ro) @@ -2353,6 +2668,8 @@ static int ggtt_bind_vma(struct i915_vma *vma, vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); intel_runtime_pm_put(i915); + vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; + /* * Without aliasing PPGTT there's no difference between * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally @@ -2380,12 +2697,6 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, u32 pte_flags; int ret; - if (unlikely(!vma->pages)) { - ret = i915_get_ggtt_vma_pages(vma); - if (ret) - return ret; - } - /* Currently applicable only to VLV */ pte_flags = 0; if (vma->obj->gt_ro) @@ -2400,7 +2711,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, vma->node.start, vma->size); if (ret) - goto err_pages; + return ret; } appgtt->base.insert_entries(&appgtt->base, vma, cache_level, @@ -2414,17 +2725,6 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, } return 0; - -err_pages: - if (!(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND))) { - if (vma->pages != vma->obj->mm.pages) { - GEM_BUG_ON(!vma->pages); - sg_free_table(vma->pages); - kfree(vma->pages); - } - vma->pages = NULL; - } - return ret; } static void aliasing_gtt_unbind_vma(struct i915_vma *vma) @@ -2462,6 +2762,21 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL); } +static int ggtt_set_pages(struct i915_vma *vma) +{ + int ret; + + GEM_BUG_ON(vma->pages); + + ret = i915_get_ggtt_vma_pages(vma); + if (ret) + return ret; + + vma->page_sizes = vma->obj->mm.page_sizes; + + return 0; +} + static void i915_gtt_color_adjust(const struct drm_mm_node *node, unsigned long color, u64 *start, @@ -2598,6 +2913,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) { struct i915_ggtt *ggtt = &dev_priv->ggtt; struct i915_vma *vma, *vn; + struct pagevec *pvec; ggtt->base.closed = true; @@ -2621,10 +2937,17 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) } ggtt->base.cleanup(&ggtt->base); + + pvec = &dev_priv->mm.wc_stash; + if (pvec->nr) { + set_pages_array_wb(pvec->pages, pvec->nr); + __pagevec_release(pvec); + } + mutex_unlock(&dev_priv->drm.struct_mutex); arch_phys_wc_del(ggtt->mtrr); - io_mapping_fini(&ggtt->mappable); + io_mapping_fini(&ggtt->iomap); } static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) @@ -2661,50 +2984,6 @@ static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) return 0; } -static size_t gen6_get_stolen_size(u16 snb_gmch_ctl) -{ - snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; - snb_gmch_ctl &= SNB_GMCH_GMS_MASK; - return (size_t)snb_gmch_ctl << 25; /* 32 MB units */ -} - -static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) -{ - bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; - bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; - return (size_t)bdw_gmch_ctl << 25; /* 32 MB units */ -} - -static size_t chv_get_stolen_size(u16 gmch_ctrl) -{ - gmch_ctrl >>= SNB_GMCH_GMS_SHIFT; - gmch_ctrl &= SNB_GMCH_GMS_MASK; - - /* - * 0x0 to 0x10: 32MB increments starting at 0MB - * 0x11 to 0x16: 4MB increments starting at 8MB - * 0x17 to 0x1d: 4MB increments start at 36MB - */ - if (gmch_ctrl < 0x11) - return (size_t)gmch_ctrl << 25; - else if (gmch_ctrl < 0x17) - return (size_t)(gmch_ctrl - 0x11 + 2) << 22; - else - return (size_t)(gmch_ctrl - 0x17 + 9) << 22; -} - -static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) -{ - gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; - gen9_gmch_ctl &= BDW_GMCH_GMS_MASK; - - if (gen9_gmch_ctl < 0xf0) - return (size_t)gen9_gmch_ctl << 25; /* 32 MB units */ - else - /* 4MB increments starting at 0xf0 for 4MB */ - return (size_t)(gen9_gmch_ctl - 0xf0 + 1) << 22; -} - static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) { struct drm_i915_private *dev_priv = ggtt->base.i915; @@ -2716,13 +2995,13 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; /* - * On BXT writes larger than 64 bit to the GTT pagetable range will be - * dropped. For WC mappings in general we have 64 byte burst writes - * when the WC buffer is flushed, so we can't use it, but have to + * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range + * will be dropped. For WC mappings in general we have 64 byte burst + * writes when the WC buffer is flushed, so we can't use it, but have to * resort to an uncached mapping. The WC issue is easily caught by the * readback check when writing GTT PTE entries. */ - if (IS_GEN9_LP(dev_priv)) + if (IS_GEN9_LP(dev_priv) || INTEL_GEN(dev_priv) >= 10) ggtt->gsm = ioremap_nocache(phys_addr, size); else ggtt->gsm = ioremap_wc(phys_addr, size); @@ -2742,41 +3021,203 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) return 0; } -static void cnl_setup_private_ppat(struct drm_i915_private *dev_priv) +static struct intel_ppat_entry * +__alloc_ppat_entry(struct intel_ppat *ppat, unsigned int index, u8 value) { - /* XXX: spec is unclear if this is still needed for CNL+ */ - if (!USES_PPGTT(dev_priv)) { - I915_WRITE(GEN10_PAT_INDEX(0), GEN8_PPAT_UC); - return; + struct intel_ppat_entry *entry = &ppat->entries[index]; + + GEM_BUG_ON(index >= ppat->max_entries); + GEM_BUG_ON(test_bit(index, ppat->used)); + + entry->ppat = ppat; + entry->value = value; + kref_init(&entry->ref); + set_bit(index, ppat->used); + set_bit(index, ppat->dirty); + + return entry; +} + +static void __free_ppat_entry(struct intel_ppat_entry *entry) +{ + struct intel_ppat *ppat = entry->ppat; + unsigned int index = entry - ppat->entries; + + GEM_BUG_ON(index >= ppat->max_entries); + GEM_BUG_ON(!test_bit(index, ppat->used)); + + entry->value = ppat->clear_value; + clear_bit(index, ppat->used); + set_bit(index, ppat->dirty); +} + +/** + * intel_ppat_get - get a usable PPAT entry + * @i915: i915 device instance + * @value: the PPAT value required by the caller + * + * The function tries to search if there is an existing PPAT entry which + * matches with the required value. If perfectly matched, the existing PPAT + * entry will be used. If only partially matched, it will try to check if + * there is any available PPAT index. If yes, it will allocate a new PPAT + * index for the required entry and update the HW. If not, the partially + * matched entry will be used. + */ +const struct intel_ppat_entry * +intel_ppat_get(struct drm_i915_private *i915, u8 value) +{ + struct intel_ppat *ppat = &i915->ppat; + struct intel_ppat_entry *entry = NULL; + unsigned int scanned, best_score; + int i; + + GEM_BUG_ON(!ppat->max_entries); + + scanned = best_score = 0; + for_each_set_bit(i, ppat->used, ppat->max_entries) { + unsigned int score; + + score = ppat->match(ppat->entries[i].value, value); + if (score > best_score) { + entry = &ppat->entries[i]; + if (score == INTEL_PPAT_PERFECT_MATCH) { + kref_get(&entry->ref); + return entry; + } + best_score = score; + } + scanned++; } - I915_WRITE(GEN10_PAT_INDEX(0), GEN8_PPAT_WB | GEN8_PPAT_LLC); - I915_WRITE(GEN10_PAT_INDEX(1), GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); - I915_WRITE(GEN10_PAT_INDEX(2), GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); - I915_WRITE(GEN10_PAT_INDEX(3), GEN8_PPAT_UC); - I915_WRITE(GEN10_PAT_INDEX(4), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); - I915_WRITE(GEN10_PAT_INDEX(5), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); - I915_WRITE(GEN10_PAT_INDEX(6), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); - I915_WRITE(GEN10_PAT_INDEX(7), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); + if (scanned == ppat->max_entries) { + if (!entry) + return ERR_PTR(-ENOSPC); + + kref_get(&entry->ref); + return entry; + } + + i = find_first_zero_bit(ppat->used, ppat->max_entries); + entry = __alloc_ppat_entry(ppat, i, value); + ppat->update_hw(i915); + return entry; +} + +static void release_ppat(struct kref *kref) +{ + struct intel_ppat_entry *entry = + container_of(kref, struct intel_ppat_entry, ref); + struct drm_i915_private *i915 = entry->ppat->i915; + + __free_ppat_entry(entry); + entry->ppat->update_hw(i915); +} + +/** + * intel_ppat_put - put back the PPAT entry got from intel_ppat_get() + * @entry: an intel PPAT entry + * + * Put back the PPAT entry got from intel_ppat_get(). If the PPAT index of the + * entry is dynamically allocated, its reference count will be decreased. Once + * the reference count becomes into zero, the PPAT index becomes free again. + */ +void intel_ppat_put(const struct intel_ppat_entry *entry) +{ + struct intel_ppat *ppat = entry->ppat; + unsigned int index = entry - ppat->entries; + + GEM_BUG_ON(!ppat->max_entries); + + kref_put(&ppat->entries[index].ref, release_ppat); +} + +static void cnl_private_pat_update_hw(struct drm_i915_private *dev_priv) +{ + struct intel_ppat *ppat = &dev_priv->ppat; + int i; + + for_each_set_bit(i, ppat->dirty, ppat->max_entries) { + I915_WRITE(GEN10_PAT_INDEX(i), ppat->entries[i].value); + clear_bit(i, ppat->dirty); + } +} + +static void bdw_private_pat_update_hw(struct drm_i915_private *dev_priv) +{ + struct intel_ppat *ppat = &dev_priv->ppat; + u64 pat = 0; + int i; + + for (i = 0; i < ppat->max_entries; i++) + pat |= GEN8_PPAT(i, ppat->entries[i].value); + + bitmap_clear(ppat->dirty, 0, ppat->max_entries); + + I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); + I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); +} + +static unsigned int bdw_private_pat_match(u8 src, u8 dst) +{ + unsigned int score = 0; + enum { + AGE_MATCH = BIT(0), + TC_MATCH = BIT(1), + CA_MATCH = BIT(2), + }; + + /* Cache attribute has to be matched. */ + if (GEN8_PPAT_GET_CA(src) != GEN8_PPAT_GET_CA(dst)) + return 0; + + score |= CA_MATCH; + + if (GEN8_PPAT_GET_TC(src) == GEN8_PPAT_GET_TC(dst)) + score |= TC_MATCH; + + if (GEN8_PPAT_GET_AGE(src) == GEN8_PPAT_GET_AGE(dst)) + score |= AGE_MATCH; + + if (score == (AGE_MATCH | TC_MATCH | CA_MATCH)) + return INTEL_PPAT_PERFECT_MATCH; + + return score; +} + +static unsigned int chv_private_pat_match(u8 src, u8 dst) +{ + return (CHV_PPAT_GET_SNOOP(src) == CHV_PPAT_GET_SNOOP(dst)) ? + INTEL_PPAT_PERFECT_MATCH : 0; +} + +static void cnl_setup_private_ppat(struct intel_ppat *ppat) +{ + ppat->max_entries = 8; + ppat->update_hw = cnl_private_pat_update_hw; + ppat->match = bdw_private_pat_match; + ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3); + + __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC); + __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); + __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); + __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC); + __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); + __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); + __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); + __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); } /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability * bits. When using advanced contexts each context stores its own PAT, but * writing this data shouldn't be harmful even in those cases. */ -static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) +static void bdw_setup_private_ppat(struct intel_ppat *ppat) { - u64 pat; + ppat->max_entries = 8; + ppat->update_hw = bdw_private_pat_update_hw; + ppat->match = bdw_private_pat_match; + ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3); - pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ - GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ - GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ - GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ - GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | - GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | - GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | - GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); - - if (!USES_PPGTT(dev_priv)) + if (!USES_PPGTT(ppat->i915)) { /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry, * so RTL will always use the value corresponding to * pat_sel = 000". @@ -2790,17 +3231,26 @@ static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) * So we can still hold onto all our assumptions wrt cpu * clflushing on LLC machines. */ - pat = GEN8_PPAT(0, GEN8_PPAT_UC); + __alloc_ppat_entry(ppat, 0, GEN8_PPAT_UC); + return; + } - /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b - * write would work. */ - I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); - I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); + __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC); /* for normal objects, no eLLC */ + __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); /* for something pointing to ptes? */ + __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); /* for scanout with eLLC */ + __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC); /* Uncached objects, mostly for scanout */ + __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); + __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); + __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); + __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); } -static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) +static void chv_setup_private_ppat(struct intel_ppat *ppat) { - u64 pat; + ppat->max_entries = 8; + ppat->update_hw = bdw_private_pat_update_hw; + ppat->match = chv_private_pat_match; + ppat->clear_value = CHV_PPAT_SNOOP; /* * Map WB on BDW to snooped on CHV. @@ -2820,17 +3270,15 @@ static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) * Which means we must set the snoop bit in PAT entry 0 * in order to keep the global status page working. */ - pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | - GEN8_PPAT(1, 0) | - GEN8_PPAT(2, 0) | - GEN8_PPAT(3, 0) | - GEN8_PPAT(4, CHV_PPAT_SNOOP) | - GEN8_PPAT(5, CHV_PPAT_SNOOP) | - GEN8_PPAT(6, CHV_PPAT_SNOOP) | - GEN8_PPAT(7, CHV_PPAT_SNOOP); - I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); - I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); + __alloc_ppat_entry(ppat, 0, CHV_PPAT_SNOOP); + __alloc_ppat_entry(ppat, 1, 0); + __alloc_ppat_entry(ppat, 2, 0); + __alloc_ppat_entry(ppat, 3, 0); + __alloc_ppat_entry(ppat, 4, CHV_PPAT_SNOOP); + __alloc_ppat_entry(ppat, 5, CHV_PPAT_SNOOP); + __alloc_ppat_entry(ppat, 6, CHV_PPAT_SNOOP); + __alloc_ppat_entry(ppat, 7, CHV_PPAT_SNOOP); } static void gen6_gmch_remove(struct i915_address_space *vm) @@ -2841,6 +3289,31 @@ static void gen6_gmch_remove(struct i915_address_space *vm) cleanup_scratch_page(vm); } +static void setup_private_pat(struct drm_i915_private *dev_priv) +{ + struct intel_ppat *ppat = &dev_priv->ppat; + int i; + + ppat->i915 = dev_priv; + + if (INTEL_GEN(dev_priv) >= 10) + cnl_setup_private_ppat(ppat); + else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv)) + chv_setup_private_ppat(ppat); + else + bdw_setup_private_ppat(ppat); + + GEM_BUG_ON(ppat->max_entries > INTEL_MAX_PPAT_ENTRIES); + + for_each_clear_bit(i, ppat->used, ppat->max_entries) { + ppat->entries[i].value = ppat->clear_value; + ppat->entries[i].ppat = ppat; + set_bit(i, ppat->dirty); + } + + ppat->update_hw(dev_priv); +} + static int gen8_gmch_probe(struct i915_ggtt *ggtt) { struct drm_i915_private *dev_priv = ggtt->base.i915; @@ -2850,8 +3323,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) int err; /* TODO: We're not aware of mappable constraints on gen8 yet */ - ggtt->mappable_base = pci_resource_start(pdev, 2); - ggtt->mappable_end = pci_resource_len(pdev, 2); + ggtt->gmadr = + (struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2), + pci_resource_len(pdev, 2)); + ggtt->mappable_end = resource_size(&ggtt->gmadr); err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39)); if (!err) @@ -2862,28 +3337,19 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); if (INTEL_GEN(dev_priv) >= 9) { - ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl); size = gen8_get_total_gtt_size(snb_gmch_ctl); } else if (IS_CHERRYVIEW(dev_priv)) { - ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl); size = chv_get_total_gtt_size(snb_gmch_ctl); } else { - ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl); size = gen8_get_total_gtt_size(snb_gmch_ctl); } ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT; - - if (INTEL_GEN(dev_priv) >= 10) - cnl_setup_private_ppat(dev_priv); - else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv)) - chv_setup_private_ppat(dev_priv); - else - bdw_setup_private_ppat(dev_priv); - ggtt->base.cleanup = gen6_gmch_remove; ggtt->base.bind_vma = ggtt_bind_vma; ggtt->base.unbind_vma = ggtt_unbind_vma; + ggtt->base.set_pages = ggtt_set_pages; + ggtt->base.clear_pages = clear_pages; ggtt->base.insert_page = gen8_ggtt_insert_page; ggtt->base.clear_range = nop_clear_range; if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv)) @@ -2901,6 +3367,8 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->invalidate = gen6_ggtt_invalidate; + setup_private_pat(dev_priv); + return ggtt_probe_common(ggtt, size); } @@ -2912,14 +3380,16 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) u16 snb_gmch_ctl; int err; - ggtt->mappable_base = pci_resource_start(pdev, 2); - ggtt->mappable_end = pci_resource_len(pdev, 2); + ggtt->gmadr = + (struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2), + pci_resource_len(pdev, 2)); + ggtt->mappable_end = resource_size(&ggtt->gmadr); /* 64/512MB is the current min/max we actually know of, but this is just * a coarse sanity check. */ if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { - DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end); + DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end); return -ENXIO; } @@ -2930,8 +3400,6 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); - ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl); - size = gen6_get_total_gtt_size(snb_gmch_ctl); ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT; @@ -2940,6 +3408,8 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) ggtt->base.insert_entries = gen6_ggtt_insert_entries; ggtt->base.bind_vma = ggtt_bind_vma; ggtt->base.unbind_vma = ggtt_unbind_vma; + ggtt->base.set_pages = ggtt_set_pages; + ggtt->base.clear_pages = clear_pages; ggtt->base.cleanup = gen6_gmch_remove; ggtt->invalidate = gen6_ggtt_invalidate; @@ -2966,6 +3436,7 @@ static void i915_gmch_remove(struct i915_address_space *vm) static int i915_gmch_probe(struct i915_ggtt *ggtt) { struct drm_i915_private *dev_priv = ggtt->base.i915; + phys_addr_t gmadr_base; int ret; ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL); @@ -2975,16 +3446,21 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) } intel_gtt_get(&ggtt->base.total, - &ggtt->stolen_size, - &ggtt->mappable_base, + &gmadr_base, &ggtt->mappable_end); + ggtt->gmadr = + (struct resource) DEFINE_RES_MEM(gmadr_base, + ggtt->mappable_end); + ggtt->do_idle_maps = needs_idle_maps(dev_priv); ggtt->base.insert_page = i915_ggtt_insert_page; ggtt->base.insert_entries = i915_ggtt_insert_entries; ggtt->base.clear_range = i915_ggtt_clear_range; ggtt->base.bind_vma = ggtt_bind_vma; ggtt->base.unbind_vma = ggtt_unbind_vma; + ggtt->base.set_pages = ggtt_set_pages; + ggtt->base.clear_pages = clear_pages; ggtt->base.cleanup = i915_gmch_remove; ggtt->invalidate = gmch_ggtt_invalidate; @@ -3021,9 +3497,9 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) * currently don't have any bits spare to pass in this upper * restriction! */ - if (HAS_GUC(dev_priv) && i915.enable_guc_loading) { + if (USES_GUC(dev_priv)) { ggtt->base.total = min_t(u64, ggtt->base.total, GUC_GGTT_TOP); - ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total); + ggtt->mappable_end = min_t(u64, ggtt->mappable_end, ggtt->base.total); } if ((ggtt->base.total - 1) >> 32) { @@ -3031,21 +3507,21 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) " of address space! Found %lldM!\n", ggtt->base.total >> 20); ggtt->base.total = 1ULL << 32; - ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total); + ggtt->mappable_end = min_t(u64, ggtt->mappable_end, ggtt->base.total); } if (ggtt->mappable_end > ggtt->base.total) { DRM_ERROR("mappable aperture extends past end of GGTT," - " aperture=%llx, total=%llx\n", - ggtt->mappable_end, ggtt->base.total); + " aperture=%pa, total=%llx\n", + &ggtt->mappable_end, ggtt->base.total); ggtt->mappable_end = ggtt->base.total; } /* GMADR is the PCI mmio aperture into the global GTT. */ - DRM_INFO("Memory usable by graphics device = %lluM\n", - ggtt->base.total >> 20); - DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20); - DRM_DEBUG_DRIVER("GTT stolen size = %uM\n", ggtt->stolen_size >> 20); + DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->base.total >> 20); + DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20); + DRM_DEBUG_DRIVER("DSM size = %lluM\n", + (u64)resource_size(&intel_graphics_stolen_res) >> 20); if (intel_vtd_active()) DRM_INFO("VT-d active for gfx access\n"); @@ -3074,14 +3550,14 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) ggtt->base.mm.color_adjust = i915_gtt_color_adjust; mutex_unlock(&dev_priv->drm.struct_mutex); - if (!io_mapping_init_wc(&dev_priv->ggtt.mappable, - dev_priv->ggtt.mappable_base, + if (!io_mapping_init_wc(&dev_priv->ggtt.iomap, + dev_priv->ggtt.gmadr.start, dev_priv->ggtt.mappable_end)) { ret = -EIO; goto out_gtt_cleanup; } - ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, ggtt->mappable_end); + ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end); /* * Initialise stolen early so that we may reserve preallocated @@ -3111,6 +3587,8 @@ void i915_ggtt_enable_guc(struct drm_i915_private *i915) GEM_BUG_ON(i915->ggtt.invalidate != gen6_ggtt_invalidate); i915->ggtt.invalidate = guc_ggtt_invalidate; + + i915_ggtt_invalidate(i915); } void i915_ggtt_disable_guc(struct drm_i915_private *i915) @@ -3119,6 +3597,8 @@ void i915_ggtt_disable_guc(struct drm_i915_private *i915) GEM_BUG_ON(i915->ggtt.invalidate != guc_ggtt_invalidate); i915->ggtt.invalidate = gen6_ggtt_invalidate; + + i915_ggtt_invalidate(i915); } void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) @@ -3134,15 +3614,11 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */ /* clflush objects bound into the GGTT and rebind them. */ - list_for_each_entry_safe(obj, on, - &dev_priv->mm.bound_list, global_link) { + list_for_each_entry_safe(obj, on, &dev_priv->mm.bound_list, mm.link) { bool ggtt_bound = false; struct i915_vma *vma; - list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (vma->vm != &ggtt->base) - continue; - + for_each_ggtt_vma(vma, obj) { if (!i915_vma_unbind(vma)) continue; @@ -3158,13 +3634,10 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) ggtt->base.closed = false; if (INTEL_GEN(dev_priv) >= 8) { - if (INTEL_GEN(dev_priv) >= 10) - cnl_setup_private_ppat(dev_priv); - else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv)) - chv_setup_private_ppat(dev_priv); - else - bdw_setup_private_ppat(dev_priv); + struct intel_ppat *ppat = &dev_priv->ppat; + bitmap_set(ppat->dirty, 0, ppat->max_entries); + dev_priv->ppat.update_hw(dev_priv); return; } @@ -3259,9 +3732,6 @@ intel_rotate_pages(struct intel_rotation_info *rot_info, rot_info->plane[i].stride, st, sg); } - DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages)\n", - obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); - kvfree(page_addr_list); return st; @@ -3271,8 +3741,8 @@ err_sg_alloc: err_st_alloc: kvfree(page_addr_list); - DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", - obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); + DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", + obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index b4e3aa7c0ce1..a42890d9af38 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -42,7 +42,13 @@ #include "i915_gem_request.h" #include "i915_selftest.h" -#define I915_GTT_PAGE_SIZE 4096UL +#define I915_GTT_PAGE_SIZE_4K BIT(12) +#define I915_GTT_PAGE_SIZE_64K BIT(16) +#define I915_GTT_PAGE_SIZE_2M BIT(21) + +#define I915_GTT_PAGE_SIZE I915_GTT_PAGE_SIZE_4K +#define I915_GTT_MAX_PAGE_SIZE I915_GTT_PAGE_SIZE_2M + #define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE #define I915_FENCE_REG_NONE -1 @@ -126,13 +132,13 @@ typedef u64 gen8_ppgtt_pml4e_t; * tables */ #define GEN8_PDPE_MASK 0x1ff -#define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) -#define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */ -#define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */ -#define PPAT_DISPLAY_ELLC_INDEX _PAGE_PCD /* WT eLLC */ +#define PPAT_UNCACHED (_PAGE_PWT | _PAGE_PCD) +#define PPAT_CACHED_PDE 0 /* WB LLC */ +#define PPAT_CACHED _PAGE_PAT /* WB LLCeLLC */ +#define PPAT_DISPLAY_ELLC _PAGE_PCD /* WT eLLC */ #define CHV_PPAT_SNOOP (1<<6) -#define GEN8_PPAT_AGE(x) (x<<4) +#define GEN8_PPAT_AGE(x) ((x)<<4) #define GEN8_PPAT_LLCeLLC (3<<2) #define GEN8_PPAT_LLCELLC (2<<2) #define GEN8_PPAT_LLC (1<<2) @@ -143,6 +149,14 @@ typedef u64 gen8_ppgtt_pml4e_t; #define GEN8_PPAT_ELLC_OVERRIDE (0<<2) #define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8)) +#define GEN8_PPAT_GET_CA(x) ((x) & 3) +#define GEN8_PPAT_GET_TC(x) ((x) & (3 << 2)) +#define GEN8_PPAT_GET_AGE(x) ((x) & (3 << 4)) +#define CHV_PPAT_GET_SNOOP(x) ((x) & (1 << 6)) + +#define GEN8_PDE_IPS_64K BIT(11) +#define GEN8_PDE_PS_2M BIT(7) + struct sg_table; struct intel_rotation_info { @@ -202,6 +216,7 @@ struct i915_vma; struct i915_page_dma { struct page *page; + int order; union { dma_addr_t daddr; @@ -324,6 +339,8 @@ struct i915_address_space { int (*bind_vma)(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags); + int (*set_pages)(struct i915_vma *vma); + void (*clear_pages)(struct i915_vma *vma); I915_SELFTEST_DECLARE(struct fault_attr fault_attr); }; @@ -336,6 +353,12 @@ i915_vm_is_48bit(const struct i915_address_space *vm) return (vm->total - 1) >> 32; } +static inline bool +i915_vm_has_scratch_64K(struct i915_address_space *vm) +{ + return vm->scratch_page.order == get_order(I915_GTT_PAGE_SIZE_64K); +} + /* The Graphics Translation Table is the way in which GEN hardware translates a * Graphics Virtual Address into a Physical Address. In addition to the normal * collateral associated with any va->pa translations GEN hardware also has a @@ -345,23 +368,10 @@ i915_vm_is_48bit(const struct i915_address_space *vm) */ struct i915_ggtt { struct i915_address_space base; - struct io_mapping mappable; /* Mapping to our CPU mappable region */ - - phys_addr_t mappable_base; /* PA of our GMADR */ - u64 mappable_end; /* End offset that we can CPU map */ - /* Stolen memory is segmented in hardware with different portions - * offlimits to certain functions. - * - * The drm_mm is initialised to the total accessible range, as found - * from the PCI config. On Broadwell+, this is further restricted to - * avoid the first page! The upper end of stolen memory is reserved for - * hardware functions and similarly removed from the accessible range. - */ - u32 stolen_size; /* Total size of stolen memory */ - u32 stolen_usable_size; /* Total size minus reserved ranges */ - u32 stolen_reserved_base; - u32 stolen_reserved_size; + struct io_mapping iomap; /* Mapping to our CPU mappable region */ + struct resource gmadr; /* GMADR resource */ + resource_size_t mappable_end; /* End offset that we can CPU map */ /** "Graphics Stolen Memory" holds the global PTEs */ void __iomem *gsm; @@ -536,6 +546,37 @@ i915_vm_to_ggtt(struct i915_address_space *vm) return container_of(vm, struct i915_ggtt, base); } +#define INTEL_MAX_PPAT_ENTRIES 8 +#define INTEL_PPAT_PERFECT_MATCH (~0U) + +struct intel_ppat; + +struct intel_ppat_entry { + struct intel_ppat *ppat; + struct kref ref; + u8 value; +}; + +struct intel_ppat { + struct intel_ppat_entry entries[INTEL_MAX_PPAT_ENTRIES]; + DECLARE_BITMAP(used, INTEL_MAX_PPAT_ENTRIES); + DECLARE_BITMAP(dirty, INTEL_MAX_PPAT_ENTRIES); + unsigned int max_entries; + u8 clear_value; + /* + * Return a score to show how two PPAT values match, + * a INTEL_PPAT_PERFECT_MATCH indicates a perfect match + */ + unsigned int (*match)(u8 src, u8 dst); + void (*update_hw)(struct drm_i915_private *i915); + + struct drm_i915_private *i915; +}; + +const struct intel_ppat_entry * +intel_ppat_get(struct drm_i915_private *i915, u8 value); +void intel_ppat_put(const struct intel_ppat_entry *entry); + int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915); void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index c1f64ddaf8aa..a1d6956734f7 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -27,6 +27,7 @@ #include "i915_drv.h" #define QUIET (__GFP_NORETRY | __GFP_NOWARN) +#define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN) /* convert swiotlb segment size into sensible units (pages)! */ #define IO_TLB_SEGPAGES (IO_TLB_SEGSIZE << IO_TLB_SHIFT >> PAGE_SHIFT) @@ -44,12 +45,12 @@ static void internal_free_pages(struct sg_table *st) kfree(st); } -static struct sg_table * -i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) +static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); struct sg_table *st; struct scatterlist *sg; + unsigned int sg_page_sizes; unsigned int npages; int max_order; gfp_t gfp; @@ -78,23 +79,25 @@ i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) create_st: st = kmalloc(sizeof(*st), GFP_KERNEL); if (!st) - return ERR_PTR(-ENOMEM); + return -ENOMEM; npages = obj->base.size / PAGE_SIZE; if (sg_alloc_table(st, npages, GFP_KERNEL)) { kfree(st); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } sg = st->sgl; st->nents = 0; + sg_page_sizes = 0; do { int order = min(fls(npages) - 1, max_order); struct page *page; do { - page = alloc_pages(gfp | (order ? QUIET : 0), order); + page = alloc_pages(gfp | (order ? QUIET : MAYFAIL), + order); if (page) break; if (!order--) @@ -105,6 +108,7 @@ create_st: } while (1); sg_set_page(sg, page, PAGE_SIZE << order, 0); + sg_page_sizes |= PAGE_SIZE << order; st->nents++; npages -= 1 << order; @@ -132,13 +136,17 @@ create_st: * object are only valid whilst active and pinned. */ obj->mm.madv = I915_MADV_DONTNEED; - return st; + + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return 0; err: sg_set_page(sg, NULL, 0, 0); sg_mark_end(sg); internal_free_pages(st); - return ERR_PTR(-ENOMEM); + + return -ENOMEM; } static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index c30d8f808185..05e89e1c0a08 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -53,8 +53,9 @@ struct i915_lut_handle { struct drm_i915_gem_object_ops { unsigned int flags; -#define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0) -#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) +#define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0) +#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) +#define I915_GEM_OBJECT_IS_PROXY BIT(2) /* Interface between the GEM object and its backing storage. * get_pages() is called once prior to the use of the associated set @@ -69,7 +70,7 @@ struct drm_i915_gem_object_ops { * being released or under memory pressure (where we attempt to * reap pages for the shrinker). */ - struct sg_table *(*get_pages)(struct drm_i915_gem_object *); + int (*get_pages)(struct drm_i915_gem_object *); void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *); int (*pwrite)(struct drm_i915_gem_object *, @@ -114,7 +115,6 @@ struct drm_i915_gem_object { /** Stolen memory for this object, instead of being backed by shmem. */ struct drm_mm_node *stolen; - struct list_head global_link; union { struct rcu_head rcu; struct llist_node freed; @@ -123,6 +123,7 @@ struct drm_i915_gem_object { /** * Whether the object is currently in the GGTT mmap. */ + unsigned int userfault_count; struct list_head userfault_link; struct list_head batch_pool_link; @@ -160,7 +161,8 @@ struct drm_i915_gem_object { /** Count of VMA actually bound by this object */ unsigned int bind_count; unsigned int active_count; - unsigned int pin_display; + /** Count of how many global VMA are currently pinned for use by HW */ + unsigned int pin_global; struct { struct mutex lock; /* protects the pages and their use */ @@ -169,6 +171,35 @@ struct drm_i915_gem_object { struct sg_table *pages; void *mapping; + /* TODO: whack some of this into the error state */ + struct i915_page_sizes { + /** + * The sg mask of the pages sg_table. i.e the mask of + * of the lengths for each sg entry. + */ + unsigned int phys; + + /** + * The gtt page sizes we are allowed to use given the + * sg mask and the supported page sizes. This will + * express the smallest unit we can use for the whole + * object, as well as the larger sizes we may be able + * to use opportunistically. + */ + unsigned int sg; + + /** + * The actual gtt page size usage. Since we can have + * multiple vma associated with this object we need to + * prevent any trampling of state, hence a copy of this + * struct also lives in each vma, therefore the gtt + * value here should only be read/write through the vma. + */ + unsigned int gtt; + } page_sizes; + + I915_SELFTEST_DECLARE(unsigned int page_mask); + struct i915_gem_object_page_iter { struct scatterlist *sg_pos; unsigned int sg_idx; /* in pages, but 32bit eek! */ @@ -178,6 +209,12 @@ struct drm_i915_gem_object { } get_page; /** + * Element within i915->mm.unbound_list or i915->mm.bound_list, + * locked by i915->mm.obj_lock. + */ + struct list_head link; + + /** * Advice: are the backing pages purgeable? */ unsigned int madv:2; @@ -224,6 +261,8 @@ struct drm_i915_gem_object { } userptr; unsigned long scratch; + + void *gvt_info; }; /** for phys allocated objects */ @@ -326,6 +365,12 @@ i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) } static inline bool +i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj) +{ + return obj->ops->flags & I915_GEM_OBJECT_IS_PROXY; +} + +static inline bool i915_gem_object_is_active(const struct drm_i915_gem_object *obj) { return obj->active_count; diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 4dd4c2159a92..f7fc0df251ac 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -26,10 +26,12 @@ */ #include "i915_drv.h" +#include "i915_gem_render_state.h" #include "intel_renderstate.h" struct intel_render_state { const struct intel_renderstate_rodata *rodata; + struct drm_i915_gem_object *obj; struct i915_vma *vma; u32 batch_offset; u32 batch_size; @@ -40,6 +42,9 @@ struct intel_render_state { static const struct intel_renderstate_rodata * render_state_get_rodata(const struct intel_engine_cs *engine) { + if (engine->id != RCS) + return NULL; + switch (INTEL_GEN(engine->i915)) { case 6: return &gen6_null_state; @@ -74,17 +79,16 @@ static int render_state_setup(struct intel_render_state *so, struct drm_i915_private *i915) { const struct intel_renderstate_rodata *rodata = so->rodata; - struct drm_i915_gem_object *obj = so->vma->obj; unsigned int i = 0, reloc_index = 0; unsigned int needs_clflush; u32 *d; int ret; - ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); + ret = i915_gem_obj_prepare_shmem_write(so->obj, &needs_clflush); if (ret) return ret; - d = kmap_atomic(i915_gem_object_get_dirty_page(obj, 0)); + d = kmap_atomic(i915_gem_object_get_dirty_page(so->obj, 0)); while (i < rodata->batch_items) { u32 s = rodata->batch[i]; @@ -112,7 +116,7 @@ static int render_state_setup(struct intel_render_state *so, goto err; } - so->batch_offset = so->vma->node.start; + so->batch_offset = i915_ggtt_offset(so->vma); so->batch_size = rodata->batch_items * sizeof(u32); while (i % CACHELINE_DWORDS) @@ -160,9 +164,9 @@ static int render_state_setup(struct intel_render_state *so, drm_clflush_virt_range(d, i * sizeof(u32)); kunmap_atomic(d); - ret = i915_gem_object_set_to_gtt_domain(obj, false); + ret = i915_gem_object_set_to_gtt_domain(so->obj, false); out: - i915_gem_obj_finish_shmem_access(obj); + i915_gem_obj_finish_shmem_access(so->obj); return ret; err: @@ -173,112 +177,57 @@ err: #undef OUT_BATCH -int i915_gem_render_state_init(struct intel_engine_cs *engine) +int i915_gem_render_state_emit(struct drm_i915_gem_request *rq) { - struct intel_render_state *so; - const struct intel_renderstate_rodata *rodata; - struct drm_i915_gem_object *obj; - int ret; + struct intel_engine_cs *engine = rq->engine; + struct intel_render_state so = {}; /* keep the compiler happy */ + int err; - if (engine->id != RCS) + so.rodata = render_state_get_rodata(engine); + if (!so.rodata) return 0; - rodata = render_state_get_rodata(engine); - if (!rodata) - return 0; - - if (rodata->batch_items * 4 > PAGE_SIZE) + if (so.rodata->batch_items * 4 > PAGE_SIZE) return -EINVAL; - so = kmalloc(sizeof(*so), GFP_KERNEL); - if (!so) - return -ENOMEM; - - obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); - if (IS_ERR(obj)) { - ret = PTR_ERR(obj); - goto err_free; - } + so.obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); + if (IS_ERR(so.obj)) + return PTR_ERR(so.obj); - so->vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); - if (IS_ERR(so->vma)) { - ret = PTR_ERR(so->vma); + so.vma = i915_vma_instance(so.obj, &engine->i915->ggtt.base, NULL); + if (IS_ERR(so.vma)) { + err = PTR_ERR(so.vma); goto err_obj; } - so->rodata = rodata; - engine->render_state = so; - return 0; - -err_obj: - i915_gem_object_put(obj); -err_free: - kfree(so); - return ret; -} - -int i915_gem_render_state_emit(struct drm_i915_gem_request *req) -{ - struct intel_render_state *so; - int ret; - - lockdep_assert_held(&req->i915->drm.struct_mutex); - - so = req->engine->render_state; - if (!so) - return 0; - - /* Recreate the page after shrinking */ - if (!so->vma->obj->mm.pages) - so->batch_offset = -1; - - ret = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH); - if (ret) - return ret; + err = i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (err) + goto err_vma; - if (so->vma->node.start != so->batch_offset) { - ret = render_state_setup(so, req->i915); - if (ret) - goto err_unpin; - } - - ret = req->engine->emit_flush(req, EMIT_INVALIDATE); - if (ret) + err = render_state_setup(&so, rq->i915); + if (err) goto err_unpin; - ret = req->engine->emit_bb_start(req, - so->batch_offset, so->batch_size, - I915_DISPATCH_SECURE); - if (ret) + err = engine->emit_bb_start(rq, + so.batch_offset, so.batch_size, + I915_DISPATCH_SECURE); + if (err) goto err_unpin; - if (so->aux_size > 8) { - ret = req->engine->emit_bb_start(req, - so->aux_offset, so->aux_size, - I915_DISPATCH_SECURE); - if (ret) + if (so.aux_size > 8) { + err = engine->emit_bb_start(rq, + so.aux_offset, so.aux_size, + I915_DISPATCH_SECURE); + if (err) goto err_unpin; } - i915_vma_move_to_active(so->vma, req, 0); + i915_vma_move_to_active(so.vma, rq, 0); err_unpin: - i915_vma_unpin(so->vma); - return ret; -} - -void i915_gem_render_state_fini(struct intel_engine_cs *engine) -{ - struct intel_render_state *so; - struct drm_i915_gem_object *obj; - - so = fetch_and_zero(&engine->render_state); - if (!so) - return; - - obj = so->vma->obj; - - i915_vma_close(so->vma); - __i915_gem_object_release_unless_active(obj); - - kfree(so); + i915_vma_unpin(so.vma); +err_vma: + i915_vma_close(so.vma); +err_obj: + __i915_gem_object_release_unless_active(so.obj); + return err; } diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h index 87481845799d..86369520482e 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.h +++ b/drivers/gpu/drm/i915/i915_gem_render_state.h @@ -26,8 +26,6 @@ struct drm_i915_gem_request; -int i915_gem_render_state_init(struct intel_engine_cs *engine); -int i915_gem_render_state_emit(struct drm_i915_gem_request *req); -void i915_gem_render_state_fini(struct intel_engine_cs *engine); +int i915_gem_render_state_emit(struct drm_i915_gem_request *rq); #endif /* _I915_GEM_RENDER_STATE_H_ */ diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 813a3b546d6e..a3e93d46316a 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -186,7 +186,7 @@ i915_priotree_init(struct i915_priotree *pt) INIT_LIST_HEAD(&pt->signalers_list); INIT_LIST_HEAD(&pt->waiters_list); INIT_LIST_HEAD(&pt->link); - pt->priority = INT_MIN; + pt->priority = I915_PRIORITY_INVALID; } static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) @@ -252,12 +252,31 @@ static void mark_busy(struct drm_i915_private *i915) GEM_BUG_ON(!i915->gt.active_requests); intel_runtime_pm_get_noresume(i915); + + /* + * It seems that the DMC likes to transition between the DC states a lot + * when there are no connected displays (no active power domains) during + * command submission. + * + * This activity has negative impact on the performance of the chip with + * huge latencies observed in the interrupt handler and elsewhere. + * + * Work around it by grabbing a GT IRQ power domain whilst there is any + * GT activity, preventing any DC state transitions. + */ + intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); + i915->gt.awake = true; intel_enable_gt_powersave(i915); i915_update_gfx_val(i915); if (INTEL_GEN(i915) >= 6) gen6_rps_busy(i915); + i915_pmu_gt_unparked(i915); + + intel_engines_unpark(i915); + + i915_queue_hangcheck(i915); queue_delayed_work(i915->wq, &i915->gt.retire_work, @@ -416,7 +435,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) spin_lock_irq(&request->lock); if (request->waitboost) - atomic_dec(&request->i915->rps.num_waiters); + atomic_dec(&request->i915->gt_pm.rps.num_waiters); dma_fence_signal_locked(&request->fence); spin_unlock_irq(&request->lock); @@ -457,11 +476,10 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->timeline->lock); - trace_i915_gem_request_execute(request); - /* Transfer from per-context onto the global per-engine timeline */ timeline = engine->timeline; GEM_BUG_ON(timeline == request->timeline); + GEM_BUG_ON(request->global_seqno); seqno = timeline_get_seqno(timeline); GEM_BUG_ON(!seqno); @@ -481,6 +499,8 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) list_move_tail(&request->link, &timeline->requests); spin_unlock(&request->timeline->lock); + trace_i915_gem_request_execute(request); + wake_up_all(&request->execute); } @@ -508,6 +528,7 @@ void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request) /* Only unwind in reverse order, required so that the per-context list * is kept in seqno/ring order. */ + GEM_BUG_ON(!request->global_seqno); GEM_BUG_ON(request->global_seqno != engine->timeline->seqno); engine->timeline->seqno--; @@ -556,7 +577,16 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) switch (state) { case FENCE_COMPLETE: trace_i915_gem_request_submit(request); + /* + * We need to serialize use of the submit_request() callback with its + * hotplugging performed during an emergency i915_gem_set_wedged(). + * We use the RCU mechanism to mark the critical section in order to + * force i915_gem_set_wedged() to wait until the submit_request() is + * completed before proceeding. + */ + rcu_read_lock(); request->engine->submit_request(request); + rcu_read_unlock(); break; case FENCE_FREE: @@ -587,6 +617,13 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, lockdep_assert_held(&dev_priv->drm.struct_mutex); + /* + * Preempt contexts are reserved for exclusive use to inject a + * preemption context switch. They are never to be used for any trivial + * request! + */ + GEM_BUG_ON(ctx == dev_priv->preempt_context); + /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report * EIO if the GPU is already wedged. */ @@ -606,6 +643,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, if (ret) goto err_unpin; + ret = intel_ring_wait_for_space(ring, MIN_SPACE_FOR_ADD_REQUEST); + if (ret) + goto err_unreserve; + /* Move the oldest request to the slab-cache (if not in use!) */ req = list_first_entry_or_null(&engine->timeline->requests, typeof(*req), link); @@ -640,10 +681,21 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, * * Do not use kmem_cache_zalloc() here! */ - req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL); - if (!req) { - ret = -ENOMEM; - goto err_unreserve; + req = kmem_cache_alloc(dev_priv->requests, + GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); + if (unlikely(!req)) { + /* Ratelimit ourselves to prevent oom from malicious clients */ + ret = i915_gem_wait_for_idle(dev_priv, + I915_WAIT_LOCKED | + I915_WAIT_INTERRUPTIBLE); + if (ret) + goto err_unreserve; + + req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL); + if (!req) { + ret = -ENOMEM; + goto err_unreserve; + } } req->timeline = i915_gem_context_lookup_timeline(ctx, engine); @@ -685,22 +737,30 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; GEM_BUG_ON(req->reserved_space < engine->emit_breadcrumb_sz); - ret = engine->request_alloc(req); - if (ret) - goto err_ctx; - - /* Record the position of the start of the request so that + /* + * Record the position of the start of the request so that * should we detect the updated seqno part-way through the * GPU processing the request, we never over-estimate the * position of the head. */ req->head = req->ring->emit; + /* Unconditionally invalidate GPU caches and TLBs. */ + ret = engine->emit_flush(req, EMIT_INVALIDATE); + if (ret) + goto err_unwind; + + ret = engine->request_alloc(req); + if (ret) + goto err_unwind; + /* Check that we didn't interrupt ourselves with a new request */ GEM_BUG_ON(req->timeline->seqno != req->fence.seqno); return req; -err_ctx: +err_unwind: + req->ring->emit = req->head; + /* Make sure we didn't add ourselves to external state before freeing */ GEM_BUG_ON(!list_empty(&req->active_list)); GEM_BUG_ON(!list_empty(&req->priotree.signalers_list)); @@ -737,7 +797,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, if (to->engine == from->engine) { ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, &from->submit, - GFP_KERNEL); + I915_FENCE_GFP); return ret < 0 ? ret : 0; } @@ -765,7 +825,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, await_dma_fence: ret = i915_sw_fence_await_dma_fence(&to->submit, &from->fence, 0, - GFP_KERNEL); + I915_FENCE_GFP); return ret < 0 ? ret : 0; } @@ -816,7 +876,7 @@ i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req, else ret = i915_sw_fence_await_dma_fence(&req->submit, fence, I915_FENCE_TIMEOUT, - GFP_KERNEL); + I915_FENCE_GFP); if (ret < 0) return ret; @@ -1021,12 +1081,28 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu) return this_cpu != cpu; } -bool __i915_spin_request(const struct drm_i915_gem_request *req, - u32 seqno, int state, unsigned long timeout_us) +static bool __i915_spin_request(const struct drm_i915_gem_request *req, + u32 seqno, int state, unsigned long timeout_us) { struct intel_engine_cs *engine = req->engine; unsigned int irq, cpu; + GEM_BUG_ON(!seqno); + + /* + * Only wait for the request if we know it is likely to complete. + * + * We don't track the timestamps around requests, nor the average + * request length, so we do not have a good indicator that this + * request will complete within the timeout. What we do know is the + * order in which requests are executed by the engine and so we can + * tell if the request has started. If the request hasn't started yet, + * it is a fair assumption that it will not complete within our + * relatively short timeout. + */ + if (!i915_seqno_passed(intel_engine_get_seqno(engine), seqno - 1)) + return false; + /* When waiting for high frequency requests, e.g. during synchronous * rendering split between the CPU and GPU, the finite amount of time * required to set up the irq and wait upon it limits the response @@ -1040,12 +1116,8 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req, irq = atomic_read(&engine->irq_count); timeout_us += local_clock_us(&cpu); do { - if (seqno != i915_gem_request_global_seqno(req)) - break; - - if (i915_seqno_passed(intel_engine_get_seqno(req->engine), - seqno)) - return true; + if (i915_seqno_passed(intel_engine_get_seqno(engine), seqno)) + return seqno == i915_gem_request_global_seqno(req); /* Seqno are meant to be ordered *before* the interrupt. If * we see an interrupt without a corresponding seqno advance, @@ -1156,7 +1228,7 @@ restart: GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit)); /* Optimistic short spin before touching IRQs */ - if (i915_spin_request(req, state, 5)) + if (__i915_spin_request(req, wait.seqno, state, 5)) goto complete; set_current_state(state); @@ -1213,7 +1285,7 @@ wakeup: continue; /* Only spin if we know the GPU is processing this request */ - if (i915_spin_request(req, state, 2)) + if (__i915_spin_request(req, wait.seqno, state, 2)) break; if (!intel_wait_check_request(&wait, req)) { diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 49a4c8994ff0..0d6d39f19506 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -30,6 +30,8 @@ #include "i915_gem.h" #include "i915_sw_fence.h" +#include <uapi/drm/i915_drm.h> + struct drm_file; struct drm_i915_gem_object; struct drm_i915_gem_request; @@ -69,9 +71,14 @@ struct i915_priotree { struct list_head waiters_list; /* those after us, they depend upon us */ struct list_head link; int priority; -#define I915_PRIORITY_MAX 1024 -#define I915_PRIORITY_NORMAL 0 -#define I915_PRIORITY_MIN (-I915_PRIORITY_MAX) +}; + +enum { + I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, + I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, + I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, + + I915_PRIORITY_INVALID = INT_MIN }; struct i915_gem_capture_list { @@ -197,6 +204,8 @@ struct drm_i915_gem_request { struct list_head client_link; }; +#define I915_FENCE_GFP (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) + extern const struct dma_fence_ops i915_fence_ops; static inline bool dma_fence_is_i915(const struct dma_fence *fence) @@ -313,26 +322,6 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2) } static inline bool -__i915_gem_request_started(const struct drm_i915_gem_request *req, u32 seqno) -{ - GEM_BUG_ON(!seqno); - return i915_seqno_passed(intel_engine_get_seqno(req->engine), - seqno - 1); -} - -static inline bool -i915_gem_request_started(const struct drm_i915_gem_request *req) -{ - u32 seqno; - - seqno = i915_gem_request_global_seqno(req); - if (!seqno) - return false; - - return __i915_gem_request_started(req, seqno); -} - -static inline bool __i915_gem_request_completed(const struct drm_i915_gem_request *req, u32 seqno) { GEM_BUG_ON(!seqno); @@ -352,21 +341,6 @@ i915_gem_request_completed(const struct drm_i915_gem_request *req) return __i915_gem_request_completed(req, seqno); } -bool __i915_spin_request(const struct drm_i915_gem_request *request, - u32 seqno, int state, unsigned long timeout_us); -static inline bool i915_spin_request(const struct drm_i915_gem_request *request, - int state, unsigned long timeout_us) -{ - u32 seqno; - - seqno = i915_gem_request_global_seqno(request); - if (!seqno) - return 0; - - return (__i915_gem_request_started(request, seqno) && - __i915_spin_request(request, seqno, state, timeout_us)); -} - /* We treat requests as fences. This is not be to confused with our * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync. * We use the fences to synchronize access from the CPU with activity on the diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 74002b2d1b6f..0e158f9287c4 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -35,9 +35,9 @@ #include "i915_drv.h" #include "i915_trace.h" -static bool shrinker_lock(struct drm_i915_private *dev_priv, bool *unlock) +static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock) { - switch (mutex_trylock_recursive(&dev_priv->drm.struct_mutex)) { + switch (mutex_trylock_recursive(&i915->drm.struct_mutex)) { case MUTEX_TRYLOCK_RECURSIVE: *unlock = false; return true; @@ -47,7 +47,7 @@ static bool shrinker_lock(struct drm_i915_private *dev_priv, bool *unlock) preempt_disable(); do { cpu_relax(); - if (mutex_trylock(&dev_priv->drm.struct_mutex)) { + if (mutex_trylock(&i915->drm.struct_mutex)) { *unlock = true; break; } @@ -63,31 +63,12 @@ static bool shrinker_lock(struct drm_i915_private *dev_priv, bool *unlock) BUG(); } -static void shrinker_unlock(struct drm_i915_private *dev_priv, bool unlock) +static void shrinker_unlock(struct drm_i915_private *i915, bool unlock) { if (!unlock) return; - mutex_unlock(&dev_priv->drm.struct_mutex); -} - -static bool any_vma_pinned(struct drm_i915_gem_object *obj) -{ - struct i915_vma *vma; - - list_for_each_entry(vma, &obj->vma_list, obj_link) { - /* Only GGTT vma may be permanently pinned, and are always - * at the start of the list. We can stop hunting as soon - * as we see a ppGTT vma. - */ - if (!i915_vma_is_ggtt(vma)) - break; - - if (i915_vma_is_pinned(vma)) - return true; - } - - return false; + mutex_unlock(&i915->drm.struct_mutex); } static bool swap_available(void) @@ -97,9 +78,6 @@ static bool swap_available(void) static bool can_release_pages(struct drm_i915_gem_object *obj) { - if (!obj->mm.pages) - return false; - /* Consider only shrinkable ojects. */ if (!i915_gem_object_is_shrinkable(obj)) return false; @@ -115,7 +93,13 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) if (atomic_read(&obj->mm.pages_pin_count) > obj->bind_count) return false; - if (any_vma_pinned(obj)) + /* If any vma are "permanently" pinned, it will prevent us from + * reclaiming the obj->mm.pages. We only allow scanout objects to claim + * a permanent pin, along with a few others like the context objects. + * To simplify the scan, and to avoid walking the list of vma under the + * object, we just check the count of its permanently pinned. + */ + if (READ_ONCE(obj->pin_global)) return false; /* We can only return physical pages to the system if we can either @@ -129,12 +113,12 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) { if (i915_gem_object_unbind(obj) == 0) __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); - return !READ_ONCE(obj->mm.pages); + return !i915_gem_object_has_pages(obj); } /** * i915_gem_shrink - Shrink buffer object caches - * @dev_priv: i915 device + * @i915: i915 device * @target: amount of memory to make available, in pages * @nr_scanned: optional output for number of pages scanned (incremental) * @flags: control flags for selecting cache types @@ -158,7 +142,7 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) * The number of pages of backing storage actually released. */ unsigned long -i915_gem_shrink(struct drm_i915_private *dev_priv, +i915_gem_shrink(struct drm_i915_private *i915, unsigned long target, unsigned long *nr_scanned, unsigned flags) @@ -167,19 +151,31 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, struct list_head *list; unsigned int bit; } phases[] = { - { &dev_priv->mm.unbound_list, I915_SHRINK_UNBOUND }, - { &dev_priv->mm.bound_list, I915_SHRINK_BOUND }, + { &i915->mm.unbound_list, I915_SHRINK_UNBOUND }, + { &i915->mm.bound_list, I915_SHRINK_BOUND }, { NULL, 0 }, }, *phase; unsigned long count = 0; unsigned long scanned = 0; bool unlock; - if (!shrinker_lock(dev_priv, &unlock)) + if (!shrinker_lock(i915, &unlock)) return 0; - trace_i915_gem_shrink(dev_priv, target, flags); - i915_gem_retire_requests(dev_priv); + /* + * When shrinking the active list, also consider active contexts. + * Active contexts are pinned until they are retired, and so can + * not be simply unbound to retire and unpin their pages. To shrink + * the contexts, we must wait until the gpu is idle. + * + * We don't care about errors here; if we cannot wait upon the GPU, + * we will free as much as we can and hope to get a second chance. + */ + if (flags & I915_SHRINK_ACTIVE) + i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); + + trace_i915_gem_shrink(i915, target, flags); + i915_gem_retire_requests(i915); /* * Unbinding of objects will require HW access; Let us not wake the @@ -187,7 +183,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, * we will force the wake during oom-notifier. */ if ((flags & I915_SHRINK_BOUND) && - !intel_runtime_pm_get_if_in_use(dev_priv)) + !intel_runtime_pm_get_if_in_use(i915)) flags &= ~I915_SHRINK_BOUND; /* @@ -217,15 +213,20 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, continue; INIT_LIST_HEAD(&still_in_list); + + /* + * We serialize our access to unreferenced objects through + * the use of the struct_mutex. While the objects are not + * yet freed (due to RCU then a workqueue) we still want + * to be able to shrink their pages, so they remain on + * the unbound/bound list until actually freed. + */ + spin_lock(&i915->mm.obj_lock); while (count < target && (obj = list_first_entry_or_null(phase->list, typeof(*obj), - global_link))) { - list_move_tail(&obj->global_link, &still_in_list); - if (!obj->mm.pages) { - list_del_init(&obj->global_link); - continue; - } + mm.link))) { + list_move_tail(&obj->mm.link, &still_in_list); if (flags & I915_SHRINK_PURGEABLE && obj->mm.madv != I915_MADV_DONTNEED) @@ -243,28 +244,32 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, if (!can_release_pages(obj)) continue; + spin_unlock(&i915->mm.obj_lock); + if (unsafe_drop_pages(obj)) { /* May arrive from get_pages on another bo */ mutex_lock_nested(&obj->mm.lock, I915_MM_SHRINKER); - if (!obj->mm.pages) { + if (!i915_gem_object_has_pages(obj)) { __i915_gem_object_invalidate(obj); - list_del_init(&obj->global_link); count += obj->base.size >> PAGE_SHIFT; } mutex_unlock(&obj->mm.lock); - scanned += obj->base.size >> PAGE_SHIFT; } + scanned += obj->base.size >> PAGE_SHIFT; + + spin_lock(&i915->mm.obj_lock); } list_splice_tail(&still_in_list, phase->list); + spin_unlock(&i915->mm.obj_lock); } if (flags & I915_SHRINK_BOUND) - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(i915); - i915_gem_retire_requests(dev_priv); + i915_gem_retire_requests(i915); - shrinker_unlock(dev_priv, unlock); + shrinker_unlock(i915, unlock); if (nr_scanned) *nr_scanned += scanned; @@ -273,7 +278,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, /** * i915_gem_shrink_all - Shrink buffer object caches completely - * @dev_priv: i915 device + * @i915: i915 device * * This is a simple wraper around i915_gem_shrink() to aggressively shrink all * caches completely. It also first waits for and retires all outstanding @@ -285,16 +290,16 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, * Returns: * The number of pages of backing storage actually released. */ -unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv) +unsigned long i915_gem_shrink_all(struct drm_i915_private *i915) { unsigned long freed; - intel_runtime_pm_get(dev_priv); - freed = i915_gem_shrink(dev_priv, -1UL, NULL, + intel_runtime_pm_get(i915); + freed = i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_ACTIVE); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(i915); return freed; } @@ -302,28 +307,39 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv) static unsigned long i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) { - struct drm_i915_private *dev_priv = + struct drm_i915_private *i915 = container_of(shrinker, struct drm_i915_private, mm.shrinker); struct drm_i915_gem_object *obj; - unsigned long count; - bool unlock; - - if (!shrinker_lock(dev_priv, &unlock)) - return 0; - - i915_gem_retire_requests(dev_priv); + unsigned long num_objects = 0; + unsigned long count = 0; - count = 0; - list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) - if (can_release_pages(obj)) + spin_lock(&i915->mm.obj_lock); + list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) + if (can_release_pages(obj)) { count += obj->base.size >> PAGE_SHIFT; + num_objects++; + } - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) { - if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) + list_for_each_entry(obj, &i915->mm.bound_list, mm.link) + if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) { count += obj->base.size >> PAGE_SHIFT; - } + num_objects++; + } + spin_unlock(&i915->mm.obj_lock); + + /* Update our preferred vmscan batch size for the next pass. + * Our rough guess for an effective batch size is roughly 2 + * available GEM objects worth of pages. That is we don't want + * the shrinker to fire, until it is worth the cost of freeing an + * entire GEM object. + */ + if (num_objects) { + unsigned long avg = 2 * count / num_objects; - shrinker_unlock(dev_priv, unlock); + i915->mm.shrinker.batch = + max((i915->mm.shrinker.batch + avg) >> 1, + 128ul /* default SHRINK_BATCH */); + } return count; } @@ -331,53 +347,53 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) static unsigned long i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) { - struct drm_i915_private *dev_priv = + struct drm_i915_private *i915 = container_of(shrinker, struct drm_i915_private, mm.shrinker); unsigned long freed; bool unlock; sc->nr_scanned = 0; - if (!shrinker_lock(dev_priv, &unlock)) + if (!shrinker_lock(i915, &unlock)) return SHRINK_STOP; - freed = i915_gem_shrink(dev_priv, + freed = i915_gem_shrink(i915, sc->nr_to_scan, &sc->nr_scanned, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE); - if (freed < sc->nr_to_scan) - freed += i915_gem_shrink(dev_priv, + if (sc->nr_scanned < sc->nr_to_scan) + freed += i915_gem_shrink(i915, sc->nr_to_scan - sc->nr_scanned, &sc->nr_scanned, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND); - if (freed < sc->nr_to_scan && current_is_kswapd()) { - intel_runtime_pm_get(dev_priv); - freed += i915_gem_shrink(dev_priv, + if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) { + intel_runtime_pm_get(i915); + freed += i915_gem_shrink(i915, sc->nr_to_scan - sc->nr_scanned, &sc->nr_scanned, I915_SHRINK_ACTIVE | I915_SHRINK_BOUND | I915_SHRINK_UNBOUND); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(i915); } - shrinker_unlock(dev_priv, unlock); + shrinker_unlock(i915, unlock); return sc->nr_scanned ? freed : SHRINK_STOP; } static bool -shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv, bool *unlock, +shrinker_lock_uninterruptible(struct drm_i915_private *i915, bool *unlock, int timeout_ms) { unsigned long timeout = jiffies + msecs_to_jiffies_timeout(timeout_ms); do { - if (i915_gem_wait_for_idle(dev_priv, 0) == 0 && - shrinker_lock(dev_priv, unlock)) + if (i915_gem_wait_for_idle(i915, 0) == 0 && + shrinker_lock(i915, unlock)) break; schedule_timeout_killable(1); @@ -396,42 +412,32 @@ shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv, bool *unlock, static int i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) { - struct drm_i915_private *dev_priv = + struct drm_i915_private *i915 = container_of(nb, struct drm_i915_private, mm.oom_notifier); struct drm_i915_gem_object *obj; unsigned long unevictable, bound, unbound, freed_pages; - bool unlock; - - if (!shrinker_lock_uninterruptible(dev_priv, &unlock, 5000)) - return NOTIFY_DONE; - freed_pages = i915_gem_shrink_all(dev_priv); + freed_pages = i915_gem_shrink_all(i915); /* Because we may be allocating inside our own driver, we cannot * assert that there are no objects with pinned pages that are not * being pointed to by hardware. */ unbound = bound = unevictable = 0; - list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) { - if (!obj->mm.pages) - continue; - + spin_lock(&i915->mm.obj_lock); + list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) { if (!can_release_pages(obj)) unevictable += obj->base.size >> PAGE_SHIFT; else unbound += obj->base.size >> PAGE_SHIFT; } - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) { - if (!obj->mm.pages) - continue; - + list_for_each_entry(obj, &i915->mm.bound_list, mm.link) { if (!can_release_pages(obj)) unevictable += obj->base.size >> PAGE_SHIFT; else bound += obj->base.size >> PAGE_SHIFT; } - - shrinker_unlock(dev_priv, unlock); + spin_unlock(&i915->mm.obj_lock); if (freed_pages || unbound || bound) pr_info("Purging GPU memory, %lu pages freed, " @@ -449,73 +455,74 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) static int i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr) { - struct drm_i915_private *dev_priv = + struct drm_i915_private *i915 = container_of(nb, struct drm_i915_private, mm.vmap_notifier); struct i915_vma *vma, *next; unsigned long freed_pages = 0; bool unlock; int ret; - if (!shrinker_lock_uninterruptible(dev_priv, &unlock, 5000)) + if (!shrinker_lock_uninterruptible(i915, &unlock, 5000)) return NOTIFY_DONE; /* Force everything onto the inactive lists */ - ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED); + ret = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); if (ret) goto out; - intel_runtime_pm_get(dev_priv); - freed_pages += i915_gem_shrink(dev_priv, -1UL, NULL, + intel_runtime_pm_get(i915); + freed_pages += i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_ACTIVE | I915_SHRINK_VMAPS); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(i915); /* We also want to clear any cached iomaps as they wrap vmap */ list_for_each_entry_safe(vma, next, - &dev_priv->ggtt.base.inactive_list, vm_link) { + &i915->ggtt.base.inactive_list, vm_link) { unsigned long count = vma->node.size >> PAGE_SHIFT; if (vma->iomap && i915_vma_unbind(vma) == 0) freed_pages += count; } out: - shrinker_unlock(dev_priv, unlock); + shrinker_unlock(i915, unlock); *(unsigned long *)ptr += freed_pages; return NOTIFY_DONE; } /** - * i915_gem_shrinker_init - Initialize i915 shrinker - * @dev_priv: i915 device + * i915_gem_shrinker_register - Register the i915 shrinker + * @i915: i915 device * * This function registers and sets up the i915 shrinker and OOM handler. */ -void i915_gem_shrinker_init(struct drm_i915_private *dev_priv) +void i915_gem_shrinker_register(struct drm_i915_private *i915) { - dev_priv->mm.shrinker.scan_objects = i915_gem_shrinker_scan; - dev_priv->mm.shrinker.count_objects = i915_gem_shrinker_count; - dev_priv->mm.shrinker.seeks = DEFAULT_SEEKS; - WARN_ON(register_shrinker(&dev_priv->mm.shrinker)); + i915->mm.shrinker.scan_objects = i915_gem_shrinker_scan; + i915->mm.shrinker.count_objects = i915_gem_shrinker_count; + i915->mm.shrinker.seeks = DEFAULT_SEEKS; + i915->mm.shrinker.batch = 4096; + WARN_ON(register_shrinker(&i915->mm.shrinker)); - dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom; - WARN_ON(register_oom_notifier(&dev_priv->mm.oom_notifier)); + i915->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom; + WARN_ON(register_oom_notifier(&i915->mm.oom_notifier)); - dev_priv->mm.vmap_notifier.notifier_call = i915_gem_shrinker_vmap; - WARN_ON(register_vmap_purge_notifier(&dev_priv->mm.vmap_notifier)); + i915->mm.vmap_notifier.notifier_call = i915_gem_shrinker_vmap; + WARN_ON(register_vmap_purge_notifier(&i915->mm.vmap_notifier)); } /** - * i915_gem_shrinker_cleanup - Clean up i915 shrinker - * @dev_priv: i915 device + * i915_gem_shrinker_unregister - Unregisters the i915 shrinker + * @i915: i915 device * * This function unregisters the i915 shrinker and OOM handler. */ -void i915_gem_shrinker_cleanup(struct drm_i915_private *dev_priv) +void i915_gem_shrinker_unregister(struct drm_i915_private *i915) { - WARN_ON(unregister_vmap_purge_notifier(&dev_priv->mm.vmap_notifier)); - WARN_ON(unregister_oom_notifier(&dev_priv->mm.oom_notifier)); - unregister_shrinker(&dev_priv->mm.shrinker); + WARN_ON(unregister_vmap_purge_notifier(&i915->mm.vmap_notifier)); + WARN_ON(unregister_oom_notifier(&i915->mm.oom_notifier)); + unregister_shrinker(&i915->mm.shrinker); } diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 507c9f0d8df1..d3f222fa6356 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -30,9 +30,6 @@ #include <drm/i915_drm.h> #include "i915_drv.h" -#define KB(x) ((x) * 1024) -#define MB(x) (KB(x) * 1024) - /* * The BIOS typically reserves some of the system's memory for the exclusive * use of the integrated graphics. This memory is no longer available for @@ -79,129 +76,26 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, mutex_unlock(&dev_priv->mm.stolen_lock); } -static dma_addr_t i915_stolen_to_dma(struct drm_i915_private *dev_priv) +static int i915_adjust_stolen(struct drm_i915_private *dev_priv, + struct resource *dsm) { - struct pci_dev *pdev = dev_priv->drm.pdev; struct i915_ggtt *ggtt = &dev_priv->ggtt; struct resource *r; - dma_addr_t base; - - /* Almost universally we can find the Graphics Base of Stolen Memory - * at register BSM (0x5c) in the igfx configuration space. On a few - * (desktop) machines this is also mirrored in the bridge device at - * different locations, or in the MCHBAR. - * - * On 865 we just check the TOUD register. - * - * On 830/845/85x the stolen memory base isn't available in any - * register. We need to calculate it as TOM-TSEG_SIZE-stolen_size. - * - */ - base = 0; - if (INTEL_GEN(dev_priv) >= 3) { - u32 bsm; - - pci_read_config_dword(pdev, INTEL_BSM, &bsm); - - base = bsm & INTEL_BSM_MASK; - } else if (IS_I865G(dev_priv)) { - u32 tseg_size = 0; - u16 toud = 0; - u8 tmp; - - pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0), - I845_ESMRAMC, &tmp); - - if (tmp & TSEG_ENABLE) { - switch (tmp & I845_TSEG_SIZE_MASK) { - case I845_TSEG_SIZE_512K: - tseg_size = KB(512); - break; - case I845_TSEG_SIZE_1M: - tseg_size = MB(1); - break; - } - } - - pci_bus_read_config_word(pdev->bus, PCI_DEVFN(0, 0), - I865_TOUD, &toud); - - base = (toud << 16) + tseg_size; - } else if (IS_I85X(dev_priv)) { - u32 tseg_size = 0; - u32 tom; - u8 tmp; - - pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0), - I85X_ESMRAMC, &tmp); - - if (tmp & TSEG_ENABLE) - tseg_size = MB(1); - - pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 1), - I85X_DRB3, &tmp); - tom = tmp * MB(32); - - base = tom - tseg_size - ggtt->stolen_size; - } else if (IS_I845G(dev_priv)) { - u32 tseg_size = 0; - u32 tom; - u8 tmp; - - pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0), - I845_ESMRAMC, &tmp); - - if (tmp & TSEG_ENABLE) { - switch (tmp & I845_TSEG_SIZE_MASK) { - case I845_TSEG_SIZE_512K: - tseg_size = KB(512); - break; - case I845_TSEG_SIZE_1M: - tseg_size = MB(1); - break; - } - } - - pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0), - I830_DRB3, &tmp); - tom = tmp * MB(32); - base = tom - tseg_size - ggtt->stolen_size; - } else if (IS_I830(dev_priv)) { - u32 tseg_size = 0; - u32 tom; - u8 tmp; + if (dsm->start == 0 || dsm->end <= dsm->start) + return -EINVAL; - pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0), - I830_ESMRAMC, &tmp); - - if (tmp & TSEG_ENABLE) { - if (tmp & I830_TSEG_SIZE_1M) - tseg_size = MB(1); - else - tseg_size = KB(512); - } - - pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0), - I830_DRB3, &tmp); - tom = tmp * MB(32); - - base = tom - tseg_size - ggtt->stolen_size; - } - - if (base == 0 || add_overflows(base, ggtt->stolen_size)) - return 0; + /* + * TODO: We have yet too encounter the case where the GTT wasn't at the + * end of stolen. With that assumption we could simplify this. + */ - /* make sure we don't clobber the GTT if it's within stolen memory */ + /* Make sure we don't clobber the GTT if it's within stolen memory */ if (INTEL_GEN(dev_priv) <= 4 && !IS_G33(dev_priv) && !IS_PINEVIEW(dev_priv) && !IS_G4X(dev_priv)) { - struct { - dma_addr_t start, end; - } stolen[2] = { - { .start = base, .end = base + ggtt->stolen_size, }, - { .start = base, .end = base + ggtt->stolen_size, }, - }; - u64 ggtt_start, ggtt_end; + struct resource stolen[2] = {*dsm, *dsm}; + struct resource ggtt_res; + resource_size_t ggtt_start; ggtt_start = I915_READ(PGTBL_CTL); if (IS_GEN4(dev_priv)) @@ -209,70 +103,64 @@ static dma_addr_t i915_stolen_to_dma(struct drm_i915_private *dev_priv) (ggtt_start & PGTBL_ADDRESS_HI_MASK) << 28; else ggtt_start &= PGTBL_ADDRESS_LO_MASK; - ggtt_end = ggtt_start + ggtt_total_entries(ggtt) * 4; - - if (ggtt_start >= stolen[0].start && ggtt_start < stolen[0].end) - stolen[0].end = ggtt_start; - if (ggtt_end > stolen[1].start && ggtt_end <= stolen[1].end) - stolen[1].start = ggtt_end; - - /* pick the larger of the two chunks */ - if (stolen[0].end - stolen[0].start > - stolen[1].end - stolen[1].start) { - base = stolen[0].start; - ggtt->stolen_size = stolen[0].end - stolen[0].start; - } else { - base = stolen[1].start; - ggtt->stolen_size = stolen[1].end - stolen[1].start; - } + + ggtt_res = + (struct resource) DEFINE_RES_MEM(ggtt_start, + ggtt_total_entries(ggtt) * 4); + + if (ggtt_res.start >= stolen[0].start && ggtt_res.start < stolen[0].end) + stolen[0].end = ggtt_res.start; + if (ggtt_res.end > stolen[1].start && ggtt_res.end <= stolen[1].end) + stolen[1].start = ggtt_res.end; + + /* Pick the larger of the two chunks */ + if (resource_size(&stolen[0]) > resource_size(&stolen[1])) + *dsm = stolen[0]; + else + *dsm = stolen[1]; if (stolen[0].start != stolen[1].start || stolen[0].end != stolen[1].end) { - dma_addr_t end = base + ggtt->stolen_size - 1; - - DRM_DEBUG_KMS("GTT within stolen memory at 0x%llx-0x%llx\n", - (unsigned long long)ggtt_start, - (unsigned long long)ggtt_end - 1); - DRM_DEBUG_KMS("Stolen memory adjusted to %pad - %pad\n", - &base, &end); + DRM_DEBUG_KMS("GTT within stolen memory at %pR\n", &ggtt_res); + DRM_DEBUG_KMS("Stolen memory adjusted to %pR\n", dsm); } } - - /* Verify that nothing else uses this physical address. Stolen + /* + * Verify that nothing else uses this physical address. Stolen * memory should be reserved by the BIOS and hidden from the * kernel. So if the region is already marked as busy, something * is seriously wrong. */ - r = devm_request_mem_region(dev_priv->drm.dev, base, ggtt->stolen_size, + r = devm_request_mem_region(dev_priv->drm.dev, dsm->start, + resource_size(dsm), "Graphics Stolen Memory"); if (r == NULL) { /* * One more attempt but this time requesting region from - * base + 1, as we have seen that this resolves the region + * start + 1, as we have seen that this resolves the region * conflict with the PCI Bus. * This is a BIOS w/a: Some BIOS wrap stolen in the root * PCI bus, but have an off-by-one error. Hence retry the * reservation starting from 1 instead of 0. * There's also BIOS with off-by-one on the other end. */ - r = devm_request_mem_region(dev_priv->drm.dev, base + 1, - ggtt->stolen_size - 2, + r = devm_request_mem_region(dev_priv->drm.dev, dsm->start + 1, + resource_size(dsm) - 2, "Graphics Stolen Memory"); /* * GEN3 firmware likes to smash pci bridges into the stolen * range. Apparently this works. */ if (r == NULL && !IS_GEN3(dev_priv)) { - dma_addr_t end = base + ggtt->stolen_size; + DRM_ERROR("conflict detected with stolen region: %pR\n", + dsm); - DRM_ERROR("conflict detected with stolen region: [%pad - %pad]\n", - &base, &end); - base = 0; + return -EBUSY; } } - return base; + return 0; } void i915_gem_cleanup_stolen(struct drm_device *dev) @@ -286,13 +174,24 @@ void i915_gem_cleanup_stolen(struct drm_device *dev) } static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, - dma_addr_t *base, u32 *size) + resource_size_t *base, resource_size_t *size) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; uint32_t reg_val = I915_READ(IS_GM45(dev_priv) ? CTG_STOLEN_RESERVED : ELK_STOLEN_RESERVED); - dma_addr_t stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size; + resource_size_t stolen_top = dev_priv->dsm.end + 1; + + if ((reg_val & G4X_STOLEN_RESERVED_ENABLE) == 0) { + *base = 0; + *size = 0; + return; + } + + /* + * Whether ILK really reuses the ELK register for this is unclear. + * Let's see if we catch anyone with this supposedly enabled on ILK. + */ + WARN(IS_GEN5(dev_priv), "ILK stolen reserved found? 0x%08x\n", reg_val); *base = (reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK) << 16; @@ -309,10 +208,16 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, - dma_addr_t *base, u32 *size) + resource_size_t *base, resource_size_t *size) { uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); + if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) { + *base = 0; + *size = 0; + return; + } + *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK; switch (reg_val & GEN6_STOLEN_RESERVED_SIZE_MASK) { @@ -335,10 +240,16 @@ static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, - dma_addr_t *base, u32 *size) + resource_size_t *base, resource_size_t *size) { uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); + if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) { + *base = 0; + *size = 0; + return; + } + *base = reg_val & GEN7_STOLEN_RESERVED_ADDR_MASK; switch (reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK) { @@ -355,10 +266,16 @@ static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, - dma_addr_t *base, u32 *size) + resource_size_t *base, resource_size_t *size) { uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); + if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) { + *base = 0; + *size = 0; + return; + } + *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK; switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) { @@ -381,13 +298,18 @@ static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, - dma_addr_t *base, u32 *size) + resource_size_t *base, resource_size_t *size) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); - dma_addr_t stolen_top; + resource_size_t stolen_top; - stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size; + if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) { + *base = 0; + *size = 0; + return; + } + + stolen_top = dev_priv->dsm.end + 1; *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK; @@ -403,10 +325,9 @@ static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, int i915_gem_init_stolen(struct drm_i915_private *dev_priv) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; - dma_addr_t reserved_base, stolen_top; - u32 reserved_total, reserved_size; - u32 stolen_usable_start; + resource_size_t reserved_base, stolen_top; + resource_size_t reserved_total, reserved_size; + resource_size_t stolen_usable_start; mutex_init(&dev_priv->mm.stolen_lock); @@ -420,14 +341,18 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) return 0; } - if (ggtt->stolen_size == 0) + if (resource_size(&intel_graphics_stolen_res) == 0) return 0; - dev_priv->mm.stolen_base = i915_stolen_to_dma(dev_priv); - if (dev_priv->mm.stolen_base == 0) + dev_priv->dsm = intel_graphics_stolen_res; + + if (i915_adjust_stolen(dev_priv, &dev_priv->dsm)) return 0; - stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size; + GEM_BUG_ON(dev_priv->dsm.start == 0); + GEM_BUG_ON(dev_priv->dsm.end <= dev_priv->dsm.start); + + stolen_top = dev_priv->dsm.end + 1; reserved_base = 0; reserved_size = 0; @@ -436,14 +361,12 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) case 3: break; case 4: - if (IS_G4X(dev_priv)) - g4x_get_stolen_reserved(dev_priv, - &reserved_base, &reserved_size); - break; + if (!IS_G4X(dev_priv)) + break; + /* fall through */ case 5: - /* Assume the gen6 maximum for the older platforms. */ - reserved_size = 1024 * 1024; - reserved_base = stolen_top - reserved_size; + g4x_get_stolen_reserved(dev_priv, + &reserved_base, &reserved_size); break; case 6: gen6_get_stolen_reserved(dev_priv, @@ -470,50 +393,47 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) reserved_base = stolen_top; } - if (reserved_base < dev_priv->mm.stolen_base || - reserved_base + reserved_size > stolen_top) { - dma_addr_t reserved_top = reserved_base + reserved_size; - DRM_DEBUG_KMS("Stolen reserved area [%pad - %pad] outside stolen memory [%pad - %pad]\n", - &reserved_base, &reserved_top, - &dev_priv->mm.stolen_base, &stolen_top); + dev_priv->dsm_reserved = + (struct resource) DEFINE_RES_MEM(reserved_base, reserved_size); + + if (!resource_contains(&dev_priv->dsm, &dev_priv->dsm_reserved)) { + DRM_ERROR("Stolen reserved area %pR outside stolen memory %pR\n", + &dev_priv->dsm_reserved, &dev_priv->dsm); return 0; } - ggtt->stolen_reserved_base = reserved_base; - ggtt->stolen_reserved_size = reserved_size; - /* It is possible for the reserved area to end before the end of stolen * memory, so just consider the start. */ reserved_total = stolen_top - reserved_base; - DRM_DEBUG_KMS("Memory reserved for graphics device: %uK, usable: %uK\n", - ggtt->stolen_size >> 10, - (ggtt->stolen_size - reserved_total) >> 10); + DRM_DEBUG_KMS("Memory reserved for graphics device: %lluK, usable: %lluK\n", + (u64)resource_size(&dev_priv->dsm) >> 10, + ((u64)resource_size(&dev_priv->dsm) - reserved_total) >> 10); stolen_usable_start = 0; /* WaSkipStolenMemoryFirstPage:bdw+ */ if (INTEL_GEN(dev_priv) >= 8) stolen_usable_start = 4096; - ggtt->stolen_usable_size = - ggtt->stolen_size - reserved_total - stolen_usable_start; + dev_priv->stolen_usable_size = + resource_size(&dev_priv->dsm) - reserved_total - stolen_usable_start; /* Basic memrange allocator for stolen space. */ drm_mm_init(&dev_priv->mm.stolen, stolen_usable_start, - ggtt->stolen_usable_size); + dev_priv->stolen_usable_size); return 0; } static struct sg_table * i915_pages_create_for_stolen(struct drm_device *dev, - u32 offset, u32 size) + resource_size_t offset, resource_size_t size) { struct drm_i915_private *dev_priv = to_i915(dev); struct sg_table *st; struct scatterlist *sg; - GEM_BUG_ON(range_overflows(offset, size, dev_priv->ggtt.stolen_size)); + GEM_BUG_ON(range_overflows(offset, size, resource_size(&dev_priv->dsm))); /* We hide that we have no struct page backing our stolen object * by wrapping the contiguous physical allocation with a fake @@ -533,18 +453,24 @@ i915_pages_create_for_stolen(struct drm_device *dev, sg->offset = 0; sg->length = size; - sg_dma_address(sg) = (dma_addr_t)dev_priv->mm.stolen_base + offset; + sg_dma_address(sg) = (dma_addr_t)dev_priv->dsm.start + offset; sg_dma_len(sg) = size; return st; } -static struct sg_table * -i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj) +static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj) { - return i915_pages_create_for_stolen(obj->base.dev, - obj->stolen->start, - obj->stolen->size); + struct sg_table *pages = + i915_pages_create_for_stolen(obj->base.dev, + obj->stolen->start, + obj->stolen->size); + if (IS_ERR(pages)) + return PTR_ERR(pages); + + __i915_gem_object_set_pages(obj, pages, obj->stolen->size); + + return 0; } static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj, @@ -605,7 +531,8 @@ cleanup: } struct drm_i915_gem_object * -i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, u32 size) +i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, + resource_size_t size) { struct drm_i915_gem_object *obj; struct drm_mm_node *stolen; @@ -638,9 +565,9 @@ i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, u32 size) struct drm_i915_gem_object * i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv, - u32 stolen_offset, - u32 gtt_offset, - u32 size) + resource_size_t stolen_offset, + resource_size_t gtt_offset, + resource_size_t size) { struct i915_ggtt *ggtt = &dev_priv->ggtt; struct drm_i915_gem_object *obj; @@ -653,8 +580,8 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv lockdep_assert_held(&dev_priv->drm.struct_mutex); - DRM_DEBUG_KMS("creating preallocated stolen object: stolen_offset=%x, gtt_offset=%x, size=%x\n", - stolen_offset, gtt_offset, size); + DRM_DEBUG_KMS("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n", + &stolen_offset, >t_offset, &size); /* KISS and expect everything to be page-aligned */ if (WARN_ON(size == 0) || @@ -718,8 +645,11 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv vma->flags |= I915_VMA_GLOBAL_BIND; __i915_vma_set_map_and_fenceable(vma); list_move_tail(&vma->vm_link, &ggtt->base.inactive_list); - list_move_tail(&obj->global_link, &dev_priv->mm.bound_list); + + spin_lock(&dev_priv->mm.obj_lock); + list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list); obj->bind_count++; + spin_unlock(&dev_priv->mm.obj_lock); return obj; diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index fb5231f98c0d..d9dc9df523b5 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -205,10 +205,7 @@ i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, if (tiling_mode == I915_TILING_NONE) return 0; - list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (!i915_vma_is_ggtt(vma)) - break; - + for_each_ggtt_vma(vma, obj) { if (i915_vma_fence_prepare(vma, tiling_mode, stride)) continue; @@ -269,7 +266,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, * due to the change in swizzling. */ mutex_lock(&obj->mm.lock); - if (obj->mm.pages && + if (i915_gem_object_has_pages(obj) && obj->mm.madv == I915_MADV_WILLNEED && i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { if (tiling == I915_TILING_NONE) { @@ -285,10 +282,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, } mutex_unlock(&obj->mm.lock); - list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (!i915_vma_is_ggtt(vma)) - break; - + for_each_ggtt_vma(vma, obj) { vma->fence_size = i915_gem_fence_size(i915, vma->size, tiling, stride); vma->fence_alignment = @@ -345,6 +339,15 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, if (!obj) return -ENOENT; + /* + * The tiling mode of proxy objects is handled by its generator, and + * not allowed to be changed by userspace. + */ + if (i915_gem_object_is_proxy(obj)) { + err = -ENXIO; + goto err; + } + if (!i915_tiling_ok(obj, args->tiling_mode, args->stride)) { err = -EINVAL; goto err; diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c index c597ce277a04..e9fd87604067 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.c +++ b/drivers/gpu/drm/i915/i915_gem_timeline.c @@ -33,11 +33,8 @@ static void __intel_timeline_init(struct intel_timeline *tl, { tl->fence_context = context; tl->common = parent; -#ifdef CONFIG_DEBUG_SPINLOCK - __raw_spin_lock_init(&tl->lock.rlock, lockname, lockclass); -#else spin_lock_init(&tl->lock); -#endif + lockdep_set_class_and_name(&tl->lock, lockclass, lockname); init_request_active(&tl->last_request, NULL); INIT_LIST_HEAD(&tl->requests); i915_syncmap_init(&tl->sync); @@ -107,8 +104,8 @@ int i915_gem_timeline_init__global(struct drm_i915_private *i915) } /** - * i915_gem_timelines_mark_idle -- called when the driver idles - * @i915 - the drm_i915_private device + * i915_gem_timelines_park - called when the driver idles + * @i915: the drm_i915_private device * * When the driver is completely idle, we know that all of our sync points * have been signaled and our tracking is then entirely redundant. Any request @@ -116,7 +113,7 @@ int i915_gem_timeline_init__global(struct drm_i915_private *i915) * the fence is signaled and therefore we will not even look them up in the * sync point map. */ -void i915_gem_timelines_mark_idle(struct drm_i915_private *i915) +void i915_gem_timelines_park(struct drm_i915_private *i915) { struct i915_gem_timeline *timeline; int i; diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h index bfb5eb94c64d..b5a22400a01f 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h @@ -93,7 +93,7 @@ int i915_gem_timeline_init(struct drm_i915_private *i915, struct i915_gem_timeline *tl, const char *name); int i915_gem_timeline_init__global(struct drm_i915_private *i915); -void i915_gem_timelines_mark_idle(struct drm_i915_private *i915); +void i915_gem_timelines_park(struct drm_i915_private *i915); void i915_gem_timeline_fini(struct i915_gem_timeline *tl); static inline int __intel_timeline_sync_set(struct intel_timeline *tl, diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 709efe2357ea..382a77a1097e 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -82,11 +82,11 @@ static void cancel_userptr(struct work_struct *work) /* We are inside a kthread context and can't be interrupted */ if (i915_gem_object_unbind(obj) == 0) __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - WARN_ONCE(obj->mm.pages, - "Failed to release pages: bind_count=%d, pages_pin_count=%d, pin_display=%d\n", + WARN_ONCE(i915_gem_object_has_pages(obj), + "Failed to release pages: bind_count=%d, pages_pin_count=%d, pin_global=%d\n", obj->bind_count, atomic_read(&obj->mm.pages_pin_count), - obj->pin_display); + obj->pin_global); mutex_unlock(&obj->base.dev->struct_mutex); @@ -164,7 +164,6 @@ static struct i915_mmu_notifier * i915_mmu_notifier_create(struct mm_struct *mm) { struct i915_mmu_notifier *mn; - int ret; mn = kmalloc(sizeof(*mn), GFP_KERNEL); if (mn == NULL) @@ -173,20 +172,14 @@ i915_mmu_notifier_create(struct mm_struct *mm) spin_lock_init(&mn->lock); mn->mn.ops = &i915_gem_userptr_notifier; mn->objects = RB_ROOT_CACHED; - mn->wq = alloc_workqueue("i915-userptr-release", WQ_UNBOUND, 0); + mn->wq = alloc_workqueue("i915-userptr-release", + WQ_UNBOUND | WQ_MEM_RECLAIM, + 0); if (mn->wq == NULL) { kfree(mn); return ERR_PTR(-ENOMEM); } - /* Protected by mmap_sem (write-lock) */ - ret = __mmu_notifier_register(&mn->mn, mm); - if (ret) { - destroy_workqueue(mn->wq); - kfree(mn); - return ERR_PTR(ret); - } - return mn; } @@ -210,23 +203,42 @@ i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) static struct i915_mmu_notifier * i915_mmu_notifier_find(struct i915_mm_struct *mm) { - struct i915_mmu_notifier *mn = mm->mn; + struct i915_mmu_notifier *mn; + int err = 0; mn = mm->mn; if (mn) return mn; + mn = i915_mmu_notifier_create(mm->mm); + if (IS_ERR(mn)) + err = PTR_ERR(mn); + down_write(&mm->mm->mmap_sem); mutex_lock(&mm->i915->mm_lock); - if ((mn = mm->mn) == NULL) { - mn = i915_mmu_notifier_create(mm->mm); - if (!IS_ERR(mn)) - mm->mn = mn; + if (mm->mn == NULL && !err) { + /* Protected by mmap_sem (write-lock) */ + err = __mmu_notifier_register(&mn->mn, mm->mm); + if (!err) { + /* Protected by mm_lock */ + mm->mn = fetch_and_zero(&mn); + } + } else if (mm->mn) { + /* + * Someone else raced and successfully installed the mmu + * notifier, we can cancel our own errors. + */ + err = 0; } mutex_unlock(&mm->i915->mm_lock); up_write(&mm->mm->mmap_sem); - return mn; + if (mn && !IS_ERR(mn)) { + destroy_workqueue(mn->wq); + kfree(mn); + } + + return err ? ERR_PTR(err) : mm->mn; } static int @@ -399,64 +411,47 @@ struct get_pages_work { struct task_struct *task; }; -#if IS_ENABLED(CONFIG_SWIOTLB) -#define swiotlb_active() swiotlb_nr_tbl() -#else -#define swiotlb_active() 0 -#endif - -static int -st_set_pages(struct sg_table **st, struct page **pvec, int num_pages) -{ - struct scatterlist *sg; - int ret, n; - - *st = kmalloc(sizeof(**st), GFP_KERNEL); - if (*st == NULL) - return -ENOMEM; - - if (swiotlb_active()) { - ret = sg_alloc_table(*st, num_pages, GFP_KERNEL); - if (ret) - goto err; - - for_each_sg((*st)->sgl, sg, num_pages, n) - sg_set_page(sg, pvec[n], PAGE_SIZE, 0); - } else { - ret = sg_alloc_table_from_pages(*st, pvec, num_pages, - 0, num_pages << PAGE_SHIFT, - GFP_KERNEL); - if (ret) - goto err; - } - - return 0; - -err: - kfree(*st); - *st = NULL; - return ret; -} - static struct sg_table * -__i915_gem_userptr_set_pages(struct drm_i915_gem_object *obj, - struct page **pvec, int num_pages) +__i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj, + struct page **pvec, int num_pages) { - struct sg_table *pages; + unsigned int max_segment = i915_sg_segment_size(); + struct sg_table *st; + unsigned int sg_page_sizes; int ret; - ret = st_set_pages(&pages, pvec, num_pages); - if (ret) + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return ERR_PTR(-ENOMEM); + +alloc_table: + ret = __sg_alloc_table_from_pages(st, pvec, num_pages, + 0, num_pages << PAGE_SHIFT, + max_segment, + GFP_KERNEL); + if (ret) { + kfree(st); return ERR_PTR(ret); + } - ret = i915_gem_gtt_prepare_pages(obj, pages); + ret = i915_gem_gtt_prepare_pages(obj, st); if (ret) { - sg_free_table(pages); - kfree(pages); + sg_free_table(st); + + if (max_segment > PAGE_SIZE) { + max_segment = PAGE_SIZE; + goto alloc_table; + } + + kfree(st); return ERR_PTR(ret); } - return pages; + sg_page_sizes = i915_sg_page_sizes(st->sgl); + + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return st; } static int @@ -540,9 +535,9 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) struct sg_table *pages = ERR_PTR(ret); if (pinned == npages) { - pages = __i915_gem_userptr_set_pages(obj, pvec, npages); + pages = __i915_gem_userptr_alloc_pages(obj, pvec, + npages); if (!IS_ERR(pages)) { - __i915_gem_object_set_pages(obj, pages); pinned = 0; pages = NULL; } @@ -554,7 +549,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) } mutex_unlock(&obj->mm.lock); - release_pages(pvec, pinned, 0); + release_pages(pvec, pinned); kvfree(pvec); i915_gem_object_put(obj); @@ -603,8 +598,7 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj) return ERR_PTR(-EAGAIN); } -static struct sg_table * -i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) +static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) { const int num_pages = obj->base.size >> PAGE_SHIFT; struct mm_struct *mm = obj->userptr.mm->mm; @@ -633,9 +627,9 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) if (obj->userptr.work) { /* active flag should still be held for the pending work */ if (IS_ERR(obj->userptr.work)) - return ERR_CAST(obj->userptr.work); + return PTR_ERR(obj->userptr.work); else - return ERR_PTR(-EAGAIN); + return -EAGAIN; } pvec = NULL; @@ -661,17 +655,17 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) pages = __i915_gem_userptr_get_pages_schedule(obj); active = pages == ERR_PTR(-EAGAIN); } else { - pages = __i915_gem_userptr_set_pages(obj, pvec, num_pages); + pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages); active = !IS_ERR(pages); } if (active) __i915_gem_userptr_set_active(obj, true); if (IS_ERR(pages)) - release_pages(pvec, pinned, 0); + release_pages(pvec, pinned); kvfree(pvec); - return pages; + return PTR_ERR_OR_ZERO(pages); } static void @@ -834,7 +828,9 @@ int i915_gem_init_userptr(struct drm_i915_private *dev_priv) hash_init(dev_priv->mm_structs); dev_priv->mm.userptr_wq = - alloc_workqueue("i915-userptr-acquire", WQ_HIGHPRI, 0); + alloc_workqueue("i915-userptr-acquire", + WQ_HIGHPRI | WQ_UNBOUND, + 0); if (!dev_priv->mm.userptr_wq) return -ENOMEM; diff --git a/drivers/gpu/drm/i915/i915_gemfs.c b/drivers/gpu/drm/i915/i915_gemfs.c new file mode 100644 index 000000000000..888b7d3f04c3 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gemfs.c @@ -0,0 +1,75 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/fs.h> +#include <linux/mount.h> +#include <linux/pagemap.h> + +#include "i915_drv.h" +#include "i915_gemfs.h" + +int i915_gemfs_init(struct drm_i915_private *i915) +{ + struct file_system_type *type; + struct vfsmount *gemfs; + + type = get_fs_type("tmpfs"); + if (!type) + return -ENODEV; + + gemfs = kern_mount(type); + if (IS_ERR(gemfs)) + return PTR_ERR(gemfs); + + /* + * Enable huge-pages for objects that are at least HPAGE_PMD_SIZE, most + * likely 2M. Note that within_size may overallocate huge-pages, if say + * we allocate an object of size 2M + 4K, we may get 2M + 2M, but under + * memory pressure shmem should split any huge-pages which can be + * shrunk. + */ + + if (has_transparent_hugepage()) { + struct super_block *sb = gemfs->mnt_sb; + /* FIXME: Disabled until we get W/A for read BW issue. */ + char options[] = "huge=never"; + int flags = 0; + int err; + + err = sb->s_op->remount_fs(sb, &flags, options); + if (err) { + kern_unmount(gemfs); + return err; + } + } + + i915->mm.gemfs = gemfs; + + return 0; +} + +void i915_gemfs_fini(struct drm_i915_private *i915) +{ + kern_unmount(i915->mm.gemfs); +} diff --git a/drivers/gpu/drm/i915/i915_gemfs.h b/drivers/gpu/drm/i915/i915_gemfs.h new file mode 100644 index 000000000000..cca8bdc5b93e --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gemfs.h @@ -0,0 +1,34 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_GEMFS_H__ +#define __I915_GEMFS_H__ + +struct drm_i915_private; + +int i915_gemfs_init(struct drm_i915_private *i915); + +void i915_gemfs_fini(struct drm_i915_private *i915); + +#endif diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 0c779671fe2d..944059322daa 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -30,6 +30,8 @@ #include <generated/utsrelease.h> #include <linux/stop_machine.h> #include <linux/zlib.h> +#include <drm/drm_print.h> + #include "i915_drv.h" static const char *engine_str(int engine) @@ -175,6 +177,21 @@ static void i915_error_puts(struct drm_i915_error_state_buf *e, #define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__) #define err_puts(e, s) i915_error_puts(e, s) +static void __i915_printfn_error(struct drm_printer *p, struct va_format *vaf) +{ + i915_error_vprintf(p->arg, vaf->fmt, *vaf->va); +} + +static inline struct drm_printer +i915_error_printer(struct drm_i915_error_state_buf *e) +{ + struct drm_printer p = { + .printfn = __i915_printfn_error, + .arg = e, + }; + return p; +} + #ifdef CONFIG_DRM_I915_COMPRESS_ERROR struct compress { @@ -377,9 +394,9 @@ static void error_print_request(struct drm_i915_error_state_buf *m, if (!erq->seqno) return; - err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, emitted %dms ago, head %08x, tail %08x\n", + err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms ago, head %08x, tail %08x\n", prefix, erq->pid, erq->ban_score, - erq->context, erq->seqno, + erq->context, erq->seqno, erq->priority, jiffies_to_msecs(jiffies - erq->jiffies), erq->head, erq->tail); } @@ -388,15 +405,18 @@ static void error_print_context(struct drm_i915_error_state_buf *m, const char *header, const struct drm_i915_error_context *ctx) { - err_printf(m, "%s%s[%d] user_handle %d hw_id %d, ban score %d guilty %d active %d\n", + err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d guilty %d active %d\n", header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id, - ctx->ban_score, ctx->guilty, ctx->active); + ctx->priority, ctx->ban_score, ctx->guilty, ctx->active); } static void error_print_engine(struct drm_i915_error_state_buf *m, const struct drm_i915_error_engine *ee) { + int n; + err_printf(m, "%s command stream:\n", engine_str(ee->engine_id)); + err_printf(m, " IDLE?: %s\n", yesno(ee->idle)); err_printf(m, " START: 0x%08x\n", ee->start); err_printf(m, " HEAD: 0x%08x [0x%08x]\n", ee->head, ee->rq_head); err_printf(m, " TAIL: 0x%08x [0x%08x, 0x%08x]\n", @@ -465,8 +485,11 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, jiffies_to_msecs(jiffies - ee->hangcheck_timestamp)); err_printf(m, " engine reset count: %u\n", ee->reset_count); - error_print_request(m, " ELSP[0]: ", &ee->execlist[0]); - error_print_request(m, " ELSP[1]: ", &ee->execlist[1]); + for (n = 0; n < ee->num_ports; n++) { + err_printf(m, " ELSP[%d]:", n); + error_print_request(m, " ", &ee->execlist[n]); + } + error_print_context(m, " Active context: ", &ee->context); } @@ -542,34 +565,17 @@ static void print_error_obj(struct drm_i915_error_state_buf *m, static void err_print_capabilities(struct drm_i915_error_state_buf *m, const struct intel_device_info *info) { -#define PRINT_FLAG(x) err_printf(m, #x ": %s\n", yesno(info->x)) - DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG); -#undef PRINT_FLAG -} + struct drm_printer p = i915_error_printer(m); -static __always_inline void err_print_param(struct drm_i915_error_state_buf *m, - const char *name, - const char *type, - const void *x) -{ - if (!__builtin_strcmp(type, "bool")) - err_printf(m, "i915.%s=%s\n", name, yesno(*(const bool *)x)); - else if (!__builtin_strcmp(type, "int")) - err_printf(m, "i915.%s=%d\n", name, *(const int *)x); - else if (!__builtin_strcmp(type, "unsigned int")) - err_printf(m, "i915.%s=%u\n", name, *(const unsigned int *)x); - else if (!__builtin_strcmp(type, "char *")) - err_printf(m, "i915.%s=%s\n", name, *(const char **)x); - else - BUILD_BUG(); + intel_device_info_dump_flags(info, &p); } static void err_print_params(struct drm_i915_error_state_buf *m, - const struct i915_params *p) + const struct i915_params *params) { -#define PRINT(T, x) err_print_param(m, #x, #T, &p->x); - I915_PARAMS_FOR_EACH(PRINT); -#undef PRINT + struct drm_printer p = i915_error_printer(m); + + i915_params_dump(params, &p); } static void err_print_pciid(struct drm_i915_error_state_buf *m, @@ -584,6 +590,21 @@ static void err_print_pciid(struct drm_i915_error_state_buf *m, pdev->subsystem_device); } +static void err_print_uc(struct drm_i915_error_state_buf *m, + const struct i915_error_uc *error_uc) +{ + struct drm_printer p = i915_error_printer(m); + const struct i915_gpu_state *error = + container_of(error_uc, typeof(*error), uc); + + if (!error->device_info.has_guc) + return; + + intel_uc_fw_dump(&error_uc->guc_fw, &p); + intel_uc_fw_dump(&error_uc->huc_fw, &p); + print_error_obj(m, NULL, "GuC log buffer", error_uc->guc_log); +} + int i915_error_state_to_str(struct drm_i915_error_state_buf *m, const struct i915_gpu_state *error) { @@ -754,11 +775,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, print_error_obj(m, dev_priv->engine[i], "WA batchbuffer", ee->wa_batchbuffer); - } - print_error_obj(m, NULL, "Semaphores", error->semaphore); - - print_error_obj(m, NULL, "GuC log buffer", error->guc_log); + print_error_obj(m, dev_priv->engine[i], + "NULL context", ee->default_state); + } if (error->overlay) intel_overlay_print_error_state(m, error->overlay); @@ -768,6 +788,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, err_print_capabilities(m, &error->device_info); err_print_params(m, &error->params); + err_print_uc(m, &error->uc); if (m->bytes == 0 && m->err) return m->err; @@ -826,6 +847,22 @@ static __always_inline void free_param(const char *type, void *x) kfree(*(void **)x); } +static void cleanup_params(struct i915_gpu_state *error) +{ +#define FREE(T, x, ...) free_param(#T, &error->params.x); + I915_PARAMS_FOR_EACH(FREE); +#undef FREE +} + +static void cleanup_uc_state(struct i915_gpu_state *error) +{ + struct i915_error_uc *error_uc = &error->uc; + + kfree(error_uc->guc_fw.path); + kfree(error_uc->huc_fw.path); + i915_error_object_free(error_uc->guc_log); +} + void __i915_gpu_state_free(struct kref *error_ref) { struct i915_gpu_state *error = @@ -851,9 +888,6 @@ void __i915_gpu_state_free(struct kref *error_ref) kfree(ee->waiters); } - i915_error_object_free(error->semaphore); - i915_error_object_free(error->guc_log); - for (i = 0; i < ARRAY_SIZE(error->active_bo); i++) kfree(error->active_bo[i]); kfree(error->pinned_bo); @@ -861,9 +895,8 @@ void __i915_gpu_state_free(struct kref *error_ref) kfree(error->overlay); kfree(error->display); -#define FREE(T, x) free_param(#T, &error->params.x); - I915_PARAMS_FOR_EACH(FREE); -#undef FREE + cleanup_params(error); + cleanup_uc_state(error); kfree(error); } @@ -907,7 +940,7 @@ i915_error_object_create(struct drm_i915_private *i915, ggtt->base.insert_page(&ggtt->base, dma, slot, I915_CACHE_NONE, 0); - s = io_mapping_map_atomic_wc(&ggtt->mappable, slot); + s = io_mapping_map_atomic_wc(&ggtt->iomap, slot); ret = compress_page(&compress, (void __force *)s, dst); io_mapping_unmap_atomic(s); @@ -1066,34 +1099,6 @@ gen8_engine_sync_index(struct intel_engine_cs *engine, return idx; } -static void gen8_record_semaphore_state(struct i915_gpu_state *error, - struct intel_engine_cs *engine, - struct drm_i915_error_engine *ee) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct intel_engine_cs *to; - enum intel_engine_id id; - - if (!error->semaphore) - return; - - for_each_engine(to, dev_priv, id) { - int idx; - u16 signal_offset; - u32 *tmp; - - if (engine == to) - continue; - - signal_offset = - (GEN8_SIGNAL_OFFSET(engine, id) & (PAGE_SIZE - 1)) / 4; - tmp = error->semaphore->pages[0]; - idx = gen8_engine_sync_index(engine, to); - - ee->semaphore_mboxes[idx] = tmp[signal_offset]; - } -} - static void gen6_record_semaphore_state(struct intel_engine_cs *engine, struct drm_i915_error_engine *ee) { @@ -1167,11 +1172,12 @@ static void error_record_engine_registers(struct i915_gpu_state *error, if (INTEL_GEN(dev_priv) >= 6) { ee->rc_psmi = I915_READ(RING_PSMI_CTL(engine->mmio_base)); - ee->fault_reg = I915_READ(RING_FAULT_REG(engine)); - if (INTEL_GEN(dev_priv) >= 8) - gen8_record_semaphore_state(error, engine, ee); - else + if (INTEL_GEN(dev_priv) >= 8) { + ee->fault_reg = I915_READ(GEN8_RING_FAULT_REG); + } else { gen6_record_semaphore_state(engine, ee); + ee->fault_reg = I915_READ(RING_FAULT_REG(engine)); + } } if (INTEL_GEN(dev_priv) >= 4) { @@ -1234,6 +1240,7 @@ static void error_record_engine_registers(struct i915_gpu_state *error, ee->hws = I915_READ(mmio); } + ee->idle = intel_engine_is_idle(engine); ee->hangcheck_timestamp = engine->hangcheck.action_timestamp; ee->hangcheck_action = engine->hangcheck.action; ee->hangcheck_stalled = engine->hangcheck.stalled; @@ -1266,6 +1273,7 @@ static void record_request(struct drm_i915_gem_request *request, struct drm_i915_error_request *erq) { erq->context = request->ctx->hw_id; + erq->priority = request->priotree.priority; erq->ban_score = atomic_read(&request->ctx->ban_score); erq->seqno = request->global_seqno; erq->jiffies = request->emitted_jiffies; @@ -1327,17 +1335,19 @@ static void engine_record_requests(struct intel_engine_cs *engine, static void error_record_engine_execlists(struct intel_engine_cs *engine, struct drm_i915_error_engine *ee) { - const struct execlist_port *port = engine->execlist_port; + const struct intel_engine_execlists * const execlists = &engine->execlists; unsigned int n; - for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) { - struct drm_i915_gem_request *rq = port_request(&port[n]); + for (n = 0; n < execlists_num_ports(execlists); n++) { + struct drm_i915_gem_request *rq = port_request(&execlists->port[n]); if (!rq) break; record_request(rq, &ee->execlist[n]); } + + ee->num_ports = n; } static void record_context(struct drm_i915_error_context *e, @@ -1357,6 +1367,7 @@ static void record_context(struct drm_i915_error_context *e, e->handle = ctx->user_handle; e->hw_id = ctx->hw_id; + e->priority = ctx->priority; e->ban_score = atomic_read(&ctx->ban_score); e->guilty = atomic_read(&ctx->guilty_count); e->active = atomic_read(&ctx->active_count); @@ -1391,15 +1402,30 @@ static void request_record_user_bo(struct drm_i915_gem_request *request, ee->user_bo_count = count; } +static struct drm_i915_error_object * +capture_object(struct drm_i915_private *dev_priv, + struct drm_i915_gem_object *obj) +{ + if (obj && i915_gem_object_has_pages(obj)) { + struct i915_vma fake = { + .node = { .start = U64_MAX, .size = obj->base.size }, + .size = obj->base.size, + .pages = obj->mm.pages, + .obj = obj, + }; + + return i915_error_object_create(dev_priv, &fake); + } else { + return NULL; + } +} + static void i915_gem_record_rings(struct drm_i915_private *dev_priv, struct i915_gpu_state *error) { struct i915_ggtt *ggtt = &dev_priv->ggtt; int i; - error->semaphore = - i915_error_object_create(dev_priv, dev_priv->semaphore); - for (i = 0; i < I915_NUM_ENGINES; i++) { struct intel_engine_cs *engine = dev_priv->engine[i]; struct drm_i915_error_engine *ee = &error->engine[i]; @@ -1465,6 +1491,9 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, ee->wa_ctx = i915_error_object_create(dev_priv, engine->wa_ctx.vma); + + ee->default_state = + capture_object(dev_priv, engine->default_state); } } @@ -1550,15 +1579,25 @@ static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv, error->pinned_bo = bo; } -static void i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv, - struct i915_gpu_state *error) +static void capture_uc_state(struct i915_gpu_state *error) { - /* Capturing log buf contents won't be useful if logging was disabled */ - if (!dev_priv->guc.log.vma || (i915.guc_log_level < 0)) + struct drm_i915_private *i915 = error->i915; + struct i915_error_uc *error_uc = &error->uc; + + /* Capturing uC state won't be useful if there is no GuC */ + if (!error->device_info.has_guc) return; - error->guc_log = i915_error_object_create(dev_priv, - dev_priv->guc.log.vma); + error_uc->guc_fw = i915->guc.fw; + error_uc->huc_fw = i915->huc.fw; + + /* Non-default firmware paths will be specified by the modparam. + * As modparams are generally accesible from the userspace make + * explicit copies of the firmware paths. + */ + error_uc->guc_fw.path = kstrdup(i915->guc.fw.path, GFP_ATOMIC); + error_uc->huc_fw.path = kstrdup(i915->huc.fw.path, GFP_ATOMIC); + error_uc->guc_log = i915_error_object_create(i915, i915->guc.log.vma); } /* Capture all registers which don't fit into another category. */ @@ -1665,8 +1704,8 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv, struct i915_gpu_state *error) { error->awake = dev_priv->gt.awake; - error->wakelock = atomic_read(&dev_priv->pm.wakeref_count); - error->suspended = dev_priv->pm.suspended; + error->wakelock = atomic_read(&dev_priv->runtime_pm.wakeref_count); + error->suspended = dev_priv->runtime_pm.suspended; error->iommu = -1; #ifdef CONFIG_INTEL_IOMMU @@ -1686,6 +1725,14 @@ static __always_inline void dup_param(const char *type, void *x) *(void **)x = kstrdup(*(void **)x, GFP_ATOMIC); } +static void capture_params(struct i915_gpu_state *error) +{ + error->params = i915_modparams; +#define DUP(T, x, ...) dup_param(#T, &error->params.x); + I915_PARAMS_FOR_EACH(DUP); +#undef DUP +} + static int capture(void *data) { struct i915_gpu_state *error = data; @@ -1696,10 +1743,8 @@ static int capture(void *data) ktime_to_timeval(ktime_sub(ktime_get(), error->i915->gt.last_init_time)); - error->params = i915; -#define DUP(T, x) dup_param(#T, &error->params.x); - I915_PARAMS_FOR_EACH(DUP); -#undef DUP + capture_params(error); + capture_uc_state(error); i915_capture_gen_state(error->i915, error); i915_capture_reg_state(error->i915, error); @@ -1707,7 +1752,6 @@ static int capture(void *data) i915_gem_record_rings(error->i915, error); i915_capture_active_buffers(error->i915, error); i915_capture_pinned_buffers(error->i915, error); - i915_gem_capture_guc_log_buffer(error->i915, error); error->overlay = intel_overlay_capture_error_state(error->i915); error->display = intel_display_capture_error_state(error->i915); @@ -1751,7 +1795,7 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, struct i915_gpu_state *error; unsigned long flags; - if (!i915.error_capture) + if (!i915_modparams.error_capture) return; if (READ_ONCE(dev_priv->gpu_error.first_error)) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index b63893eeca73..3517c6548e2c 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -126,7 +126,7 @@ static const u32 hpd_bxt[HPD_NUM_PINS] = { POSTING_READ(GEN8_##type##_IIR(which)); \ } while (0) -#define GEN5_IRQ_RESET(type) do { \ +#define GEN3_IRQ_RESET(type) do { \ I915_WRITE(type##IMR, 0xffffffff); \ POSTING_READ(type##IMR); \ I915_WRITE(type##IER, 0); \ @@ -136,10 +136,20 @@ static const u32 hpd_bxt[HPD_NUM_PINS] = { POSTING_READ(type##IIR); \ } while (0) +#define GEN2_IRQ_RESET(type) do { \ + I915_WRITE16(type##IMR, 0xffff); \ + POSTING_READ16(type##IMR); \ + I915_WRITE16(type##IER, 0); \ + I915_WRITE16(type##IIR, 0xffff); \ + POSTING_READ16(type##IIR); \ + I915_WRITE16(type##IIR, 0xffff); \ + POSTING_READ16(type##IIR); \ +} while (0) + /* * We should clear IMR at preinstall/uninstall, and just check at postinstall. */ -static void gen5_assert_iir_is_zero(struct drm_i915_private *dev_priv, +static void gen3_assert_iir_is_zero(struct drm_i915_private *dev_priv, i915_reg_t reg) { u32 val = I915_READ(reg); @@ -155,20 +165,43 @@ static void gen5_assert_iir_is_zero(struct drm_i915_private *dev_priv, POSTING_READ(reg); } +static void gen2_assert_iir_is_zero(struct drm_i915_private *dev_priv, + i915_reg_t reg) +{ + u16 val = I915_READ16(reg); + + if (val == 0) + return; + + WARN(1, "Interrupt register 0x%x is not zero: 0x%08x\n", + i915_mmio_reg_offset(reg), val); + I915_WRITE16(reg, 0xffff); + POSTING_READ16(reg); + I915_WRITE16(reg, 0xffff); + POSTING_READ16(reg); +} + #define GEN8_IRQ_INIT_NDX(type, which, imr_val, ier_val) do { \ - gen5_assert_iir_is_zero(dev_priv, GEN8_##type##_IIR(which)); \ + gen3_assert_iir_is_zero(dev_priv, GEN8_##type##_IIR(which)); \ I915_WRITE(GEN8_##type##_IER(which), (ier_val)); \ I915_WRITE(GEN8_##type##_IMR(which), (imr_val)); \ POSTING_READ(GEN8_##type##_IMR(which)); \ } while (0) -#define GEN5_IRQ_INIT(type, imr_val, ier_val) do { \ - gen5_assert_iir_is_zero(dev_priv, type##IIR); \ +#define GEN3_IRQ_INIT(type, imr_val, ier_val) do { \ + gen3_assert_iir_is_zero(dev_priv, type##IIR); \ I915_WRITE(type##IER, (ier_val)); \ I915_WRITE(type##IMR, (imr_val)); \ POSTING_READ(type##IMR); \ } while (0) +#define GEN2_IRQ_INIT(type, imr_val, ier_val) do { \ + gen2_assert_iir_is_zero(dev_priv, type##IIR); \ + I915_WRITE16(type##IER, (ier_val)); \ + I915_WRITE16(type##IMR, (imr_val)); \ + POSTING_READ16(type##IMR); \ +} while (0) + static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir); static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir); @@ -336,7 +369,7 @@ void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask) __gen6_mask_pm_irq(dev_priv, mask); } -void gen6_reset_pm_iir(struct drm_i915_private *dev_priv, u32 reset_mask) +static void gen6_reset_pm_iir(struct drm_i915_private *dev_priv, u32 reset_mask) { i915_reg_t reg = gen6_pm_iir(dev_priv); @@ -347,7 +380,7 @@ void gen6_reset_pm_iir(struct drm_i915_private *dev_priv, u32 reset_mask) POSTING_READ(reg); } -void gen6_enable_pm_irq(struct drm_i915_private *dev_priv, u32 enable_mask) +static void gen6_enable_pm_irq(struct drm_i915_private *dev_priv, u32 enable_mask) { lockdep_assert_held(&dev_priv->irq_lock); @@ -357,7 +390,7 @@ void gen6_enable_pm_irq(struct drm_i915_private *dev_priv, u32 enable_mask) /* unmask_pm_irq provides an implicit barrier (POSTING_READ) */ } -void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, u32 disable_mask) +static void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, u32 disable_mask) { lockdep_assert_held(&dev_priv->irq_lock); @@ -371,19 +404,21 @@ void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv) { spin_lock_irq(&dev_priv->irq_lock); gen6_reset_pm_iir(dev_priv, dev_priv->pm_rps_events); - dev_priv->rps.pm_iir = 0; + dev_priv->gt_pm.rps.pm_iir = 0; spin_unlock_irq(&dev_priv->irq_lock); } void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv) { - if (READ_ONCE(dev_priv->rps.interrupts_enabled)) + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + if (READ_ONCE(rps->interrupts_enabled)) return; spin_lock_irq(&dev_priv->irq_lock); - WARN_ON_ONCE(dev_priv->rps.pm_iir); + WARN_ON_ONCE(rps->pm_iir); WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events); - dev_priv->rps.interrupts_enabled = true; + rps->interrupts_enabled = true; gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); spin_unlock_irq(&dev_priv->irq_lock); @@ -391,11 +426,13 @@ void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv) void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv) { - if (!READ_ONCE(dev_priv->rps.interrupts_enabled)) + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + if (!READ_ONCE(rps->interrupts_enabled)) return; spin_lock_irq(&dev_priv->irq_lock); - dev_priv->rps.interrupts_enabled = false; + rps->interrupts_enabled = false; I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u)); @@ -405,11 +442,11 @@ void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv) synchronize_irq(dev_priv->drm.irq); /* Now that we will not be generating any more work, flush any - * outsanding tasks. As we are called on the RPS idle path, + * outstanding tasks. As we are called on the RPS idle path, * we will reset the GPU to minimum frequencies, so the current * state of the worker can be discarded. */ - cancel_work_sync(&dev_priv->rps.work); + cancel_work_sync(&rps->work); gen6_reset_rps_interrupts(dev_priv); } @@ -534,62 +571,16 @@ void ibx_display_interrupt_update(struct drm_i915_private *dev_priv, POSTING_READ(SDEIMR); } -static void -__i915_enable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe, - u32 enable_mask, u32 status_mask) -{ - i915_reg_t reg = PIPESTAT(pipe); - u32 pipestat = I915_READ(reg) & PIPESTAT_INT_ENABLE_MASK; - - lockdep_assert_held(&dev_priv->irq_lock); - WARN_ON(!intel_irqs_enabled(dev_priv)); - - if (WARN_ONCE(enable_mask & ~PIPESTAT_INT_ENABLE_MASK || - status_mask & ~PIPESTAT_INT_STATUS_MASK, - "pipe %c: enable_mask=0x%x, status_mask=0x%x\n", - pipe_name(pipe), enable_mask, status_mask)) - return; - - if ((pipestat & enable_mask) == enable_mask) - return; - - dev_priv->pipestat_irq_mask[pipe] |= status_mask; - - /* Enable the interrupt, clear any pending status */ - pipestat |= enable_mask | status_mask; - I915_WRITE(reg, pipestat); - POSTING_READ(reg); -} - -static void -__i915_disable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe, - u32 enable_mask, u32 status_mask) +u32 i915_pipestat_enable_mask(struct drm_i915_private *dev_priv, + enum pipe pipe) { - i915_reg_t reg = PIPESTAT(pipe); - u32 pipestat = I915_READ(reg) & PIPESTAT_INT_ENABLE_MASK; + u32 status_mask = dev_priv->pipestat_irq_mask[pipe]; + u32 enable_mask = status_mask << 16; lockdep_assert_held(&dev_priv->irq_lock); - WARN_ON(!intel_irqs_enabled(dev_priv)); - - if (WARN_ONCE(enable_mask & ~PIPESTAT_INT_ENABLE_MASK || - status_mask & ~PIPESTAT_INT_STATUS_MASK, - "pipe %c: enable_mask=0x%x, status_mask=0x%x\n", - pipe_name(pipe), enable_mask, status_mask)) - return; - - if ((pipestat & enable_mask) == 0) - return; - - dev_priv->pipestat_irq_mask[pipe] &= ~status_mask; - pipestat &= ~enable_mask; - I915_WRITE(reg, pipestat); - POSTING_READ(reg); -} - -static u32 vlv_get_pipestat_enable_mask(struct drm_device *dev, u32 status_mask) -{ - u32 enable_mask = status_mask << 16; + if (INTEL_GEN(dev_priv) < 5) + goto out; /* * On pipe A we don't support the PSR interrupt yet, @@ -612,35 +603,59 @@ static u32 vlv_get_pipestat_enable_mask(struct drm_device *dev, u32 status_mask) if (status_mask & SPRITE1_FLIP_DONE_INT_STATUS_VLV) enable_mask |= SPRITE1_FLIP_DONE_INT_EN_VLV; +out: + WARN_ONCE(enable_mask & ~PIPESTAT_INT_ENABLE_MASK || + status_mask & ~PIPESTAT_INT_STATUS_MASK, + "pipe %c: enable_mask=0x%x, status_mask=0x%x\n", + pipe_name(pipe), enable_mask, status_mask); + return enable_mask; } -void -i915_enable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe, - u32 status_mask) +void i915_enable_pipestat(struct drm_i915_private *dev_priv, + enum pipe pipe, u32 status_mask) { + i915_reg_t reg = PIPESTAT(pipe); u32 enable_mask; - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - enable_mask = vlv_get_pipestat_enable_mask(&dev_priv->drm, - status_mask); - else - enable_mask = status_mask << 16; - __i915_enable_pipestat(dev_priv, pipe, enable_mask, status_mask); + WARN_ONCE(status_mask & ~PIPESTAT_INT_STATUS_MASK, + "pipe %c: status_mask=0x%x\n", + pipe_name(pipe), status_mask); + + lockdep_assert_held(&dev_priv->irq_lock); + WARN_ON(!intel_irqs_enabled(dev_priv)); + + if ((dev_priv->pipestat_irq_mask[pipe] & status_mask) == status_mask) + return; + + dev_priv->pipestat_irq_mask[pipe] |= status_mask; + enable_mask = i915_pipestat_enable_mask(dev_priv, pipe); + + I915_WRITE(reg, enable_mask | status_mask); + POSTING_READ(reg); } -void -i915_disable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe, - u32 status_mask) +void i915_disable_pipestat(struct drm_i915_private *dev_priv, + enum pipe pipe, u32 status_mask) { + i915_reg_t reg = PIPESTAT(pipe); u32 enable_mask; - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - enable_mask = vlv_get_pipestat_enable_mask(&dev_priv->drm, - status_mask); - else - enable_mask = status_mask << 16; - __i915_disable_pipestat(dev_priv, pipe, enable_mask, status_mask); + WARN_ONCE(status_mask & ~PIPESTAT_INT_STATUS_MASK, + "pipe %c: status_mask=0x%x\n", + pipe_name(pipe), status_mask); + + lockdep_assert_held(&dev_priv->irq_lock); + WARN_ON(!intel_irqs_enabled(dev_priv)); + + if ((dev_priv->pipestat_irq_mask[pipe] & status_mask) == 0) + return; + + dev_priv->pipestat_irq_mask[pipe] &= ~status_mask; + enable_mask = i915_pipestat_enable_mask(dev_priv, pipe); + + I915_WRITE(reg, enable_mask | status_mask); + POSTING_READ(reg); } /** @@ -772,6 +787,57 @@ static u32 g4x_get_vblank_counter(struct drm_device *dev, unsigned int pipe) return I915_READ(PIPE_FRMCOUNT_G4X(pipe)); } +/* + * On certain encoders on certain platforms, pipe + * scanline register will not work to get the scanline, + * since the timings are driven from the PORT or issues + * with scanline register updates. + * This function will use Framestamp and current + * timestamp registers to calculate the scanline. + */ +static u32 __intel_get_crtc_scanline_from_timestamp(struct intel_crtc *crtc) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct drm_vblank_crtc *vblank = + &crtc->base.dev->vblank[drm_crtc_index(&crtc->base)]; + const struct drm_display_mode *mode = &vblank->hwmode; + u32 vblank_start = mode->crtc_vblank_start; + u32 vtotal = mode->crtc_vtotal; + u32 htotal = mode->crtc_htotal; + u32 clock = mode->crtc_clock; + u32 scanline, scan_prev_time, scan_curr_time, scan_post_time; + + /* + * To avoid the race condition where we might cross into the + * next vblank just between the PIPE_FRMTMSTMP and TIMESTAMP_CTR + * reads. We make sure we read PIPE_FRMTMSTMP and TIMESTAMP_CTR + * during the same frame. + */ + do { + /* + * This field provides read back of the display + * pipe frame time stamp. The time stamp value + * is sampled at every start of vertical blank. + */ + scan_prev_time = I915_READ_FW(PIPE_FRMTMSTMP(crtc->pipe)); + + /* + * The TIMESTAMP_CTR register has the current + * time stamp value. + */ + scan_curr_time = I915_READ_FW(IVB_TIMESTAMP_CTR); + + scan_post_time = I915_READ_FW(PIPE_FRMTMSTMP(crtc->pipe)); + } while (scan_post_time != scan_prev_time); + + scanline = div_u64(mul_u32_u32(scan_curr_time - scan_prev_time, + clock), 1000 * htotal); + scanline = min(scanline, vtotal - 1); + scanline = (scanline + vblank_start) % vtotal; + + return scanline; +} + /* I915_READ_FW, only for fast reads of display block, no need for forcewake etc. */ static int __intel_get_crtc_scanline(struct intel_crtc *crtc) { @@ -788,6 +854,9 @@ static int __intel_get_crtc_scanline(struct intel_crtc *crtc) vblank = &crtc->base.dev->vblank[drm_crtc_index(&crtc->base)]; mode = &vblank->hwmode; + if (mode->private_flags & I915_MODE_FLAG_GET_SCANLINE_FROM_TIMESTAMP) + return __intel_get_crtc_scanline_from_timestamp(crtc); + vtotal = mode->crtc_vtotal; if (mode->flags & DRM_MODE_FLAG_INTERLACE) vtotal /= 2; @@ -999,12 +1068,17 @@ static void notify_ring(struct intel_engine_cs *engine) struct drm_i915_gem_request *rq = NULL; struct intel_wait *wait; + if (!engine->breadcrumbs.irq_armed) + return; + atomic_inc(&engine->irq_count); set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); spin_lock(&engine->breadcrumbs.irq_lock); wait = engine->breadcrumbs.irq_wait; if (wait) { + bool wakeup = engine->irq_seqno_barrier; + /* We use a callback from the dma-fence to submit * requests after waiting on our own requests. To * ensure minimum delay in queuing the next request to @@ -1017,14 +1091,21 @@ static void notify_ring(struct intel_engine_cs *engine) * and many waiters. */ if (i915_seqno_passed(intel_engine_get_seqno(engine), - wait->seqno) && - !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &wait->request->fence.flags)) - rq = i915_gem_request_get(wait->request); + wait->seqno)) { + struct drm_i915_gem_request *waiter = wait->request; + + wakeup = true; + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &waiter->fence.flags) && + intel_wait_check_request(wait, waiter)) + rq = i915_gem_request_get(waiter); + } - wake_up_process(wait->tsk); + if (wakeup) + wake_up_process(wait->tsk); } else { - __intel_engine_disarm_breadcrumbs(engine); + if (engine->breadcrumbs.irq_armed) + __intel_engine_disarm_breadcrumbs(engine); } spin_unlock(&engine->breadcrumbs.irq_lock); @@ -1046,12 +1127,13 @@ static void vlv_c0_read(struct drm_i915_private *dev_priv, void gen6_rps_reset_ei(struct drm_i915_private *dev_priv) { - memset(&dev_priv->rps.ei, 0, sizeof(dev_priv->rps.ei)); + memset(&dev_priv->gt_pm.rps.ei, 0, sizeof(dev_priv->gt_pm.rps.ei)); } static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) { - const struct intel_rps_ei *prev = &dev_priv->rps.ei; + struct intel_rps *rps = &dev_priv->gt_pm.rps; + const struct intel_rps_ei *prev = &rps->ei; struct intel_rps_ei now; u32 events = 0; @@ -1078,28 +1160,29 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) c0 = max(render, media); c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */ - if (c0 > time * dev_priv->rps.up_threshold) + if (c0 > time * rps->up_threshold) events = GEN6_PM_RP_UP_THRESHOLD; - else if (c0 < time * dev_priv->rps.down_threshold) + else if (c0 < time * rps->down_threshold) events = GEN6_PM_RP_DOWN_THRESHOLD; } - dev_priv->rps.ei = now; + rps->ei = now; return events; } static void gen6_pm_rps_work(struct work_struct *work) { struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, rps.work); + container_of(work, struct drm_i915_private, gt_pm.rps.work); + struct intel_rps *rps = &dev_priv->gt_pm.rps; bool client_boost = false; int new_delay, adj, min, max; u32 pm_iir = 0; spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->rps.interrupts_enabled) { - pm_iir = fetch_and_zero(&dev_priv->rps.pm_iir); - client_boost = atomic_read(&dev_priv->rps.num_waiters); + if (rps->interrupts_enabled) { + pm_iir = fetch_and_zero(&rps->pm_iir); + client_boost = atomic_read(&rps->num_waiters); } spin_unlock_irq(&dev_priv->irq_lock); @@ -1108,18 +1191,18 @@ static void gen6_pm_rps_work(struct work_struct *work) if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost) goto out; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir); - adj = dev_priv->rps.last_adj; - new_delay = dev_priv->rps.cur_freq; - min = dev_priv->rps.min_freq_softlimit; - max = dev_priv->rps.max_freq_softlimit; + adj = rps->last_adj; + new_delay = rps->cur_freq; + min = rps->min_freq_softlimit; + max = rps->max_freq_softlimit; if (client_boost) - max = dev_priv->rps.max_freq; - if (client_boost && new_delay < dev_priv->rps.boost_freq) { - new_delay = dev_priv->rps.boost_freq; + max = rps->max_freq; + if (client_boost && new_delay < rps->boost_freq) { + new_delay = rps->boost_freq; adj = 0; } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { if (adj > 0) @@ -1127,15 +1210,15 @@ static void gen6_pm_rps_work(struct work_struct *work) else /* CHV needs even encode values */ adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1; - if (new_delay >= dev_priv->rps.max_freq_softlimit) + if (new_delay >= rps->max_freq_softlimit) adj = 0; } else if (client_boost) { adj = 0; } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { - if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq) - new_delay = dev_priv->rps.efficient_freq; - else if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit) - new_delay = dev_priv->rps.min_freq_softlimit; + if (rps->cur_freq > rps->efficient_freq) + new_delay = rps->efficient_freq; + else if (rps->cur_freq > rps->min_freq_softlimit) + new_delay = rps->min_freq_softlimit; adj = 0; } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { if (adj < 0) @@ -1143,13 +1226,13 @@ static void gen6_pm_rps_work(struct work_struct *work) else /* CHV needs even encode values */ adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1; - if (new_delay <= dev_priv->rps.min_freq_softlimit) + if (new_delay <= rps->min_freq_softlimit) adj = 0; } else { /* unknown event */ adj = 0; } - dev_priv->rps.last_adj = adj; + rps->last_adj = adj; /* sysfs frequency interfaces may have snuck in while servicing the * interrupt @@ -1159,15 +1242,15 @@ static void gen6_pm_rps_work(struct work_struct *work) if (intel_set_rps(dev_priv, new_delay)) { DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); - dev_priv->rps.last_adj = 0; + rps->last_adj = 0; } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); out: /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */ spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->rps.interrupts_enabled) + if (rps->interrupts_enabled) gen6_unmask_pm_irq(dev_priv, dev_priv->pm_rps_events); spin_unlock_irq(&dev_priv->irq_lock); } @@ -1305,10 +1388,11 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv, static void gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) { + struct intel_engine_execlists * const execlists = &engine->execlists; bool tasklet = false; if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { - if (port_count(&engine->execlist_port[0])) { + if (READ_ONCE(engine->execlists.active)) { __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); tasklet = true; } @@ -1316,11 +1400,11 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) { notify_ring(engine); - tasklet |= i915.enable_guc_submission; + tasklet |= USES_GUC_SUBMISSION(engine->i915); } if (tasklet) - tasklet_hi_schedule(&engine->irq_tasklet); + tasklet_hi_schedule(&execlists->tasklet); } static irqreturn_t gen8_gt_irq_ack(struct drm_i915_private *dev_priv, @@ -1573,11 +1657,11 @@ static void display_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, * bonkers. So let's just wait for the next vblank and read * out the buggy result. * - * On CHV sometimes the second CRC is bonkers as well, so + * On GEN8+ sometimes the second CRC is bonkers as well, so * don't trust that one either. */ if (pipe_crc->skipped == 0 || - (IS_CHERRYVIEW(dev_priv) && pipe_crc->skipped == 1)) { + (INTEL_GEN(dev_priv) >= 8 && pipe_crc->skipped == 1)) { pipe_crc->skipped++; spin_unlock(&pipe_crc->lock); return; @@ -1649,12 +1733,14 @@ static void i9xx_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, * the work queue. */ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + if (pm_iir & dev_priv->pm_rps_events) { spin_lock(&dev_priv->irq_lock); gen6_mask_pm_irq(dev_priv, pm_iir & dev_priv->pm_rps_events); - if (dev_priv->rps.interrupts_enabled) { - dev_priv->rps.pm_iir |= pm_iir & dev_priv->pm_rps_events; - schedule_work(&dev_priv->rps.work); + if (rps->interrupts_enabled) { + rps->pm_iir |= pm_iir & dev_priv->pm_rps_events; + schedule_work(&rps->work); } spin_unlock(&dev_priv->irq_lock); } @@ -1706,8 +1792,21 @@ static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) } } -static void valleyview_pipestat_irq_ack(struct drm_i915_private *dev_priv, - u32 iir, u32 pipe_stats[I915_MAX_PIPES]) +static void i9xx_pipestat_irq_reset(struct drm_i915_private *dev_priv) +{ + enum pipe pipe; + + for_each_pipe(dev_priv, pipe) { + I915_WRITE(PIPESTAT(pipe), + PIPESTAT_INT_STATUS_MASK | + PIPE_FIFO_UNDERRUN_STATUS); + + dev_priv->pipestat_irq_mask[pipe] = 0; + } +} + +static void i9xx_pipestat_irq_ack(struct drm_i915_private *dev_priv, + u32 iir, u32 pipe_stats[I915_MAX_PIPES]) { int pipe; @@ -1720,7 +1819,7 @@ static void valleyview_pipestat_irq_ack(struct drm_i915_private *dev_priv, for_each_pipe(dev_priv, pipe) { i915_reg_t reg; - u32 mask, iir_bit = 0; + u32 status_mask, enable_mask, iir_bit = 0; /* * PIPESTAT bits get signalled even when the interrupt is @@ -1731,7 +1830,7 @@ static void valleyview_pipestat_irq_ack(struct drm_i915_private *dev_priv, */ /* fifo underruns are filterered in the underrun handler. */ - mask = PIPE_FIFO_UNDERRUN_STATUS; + status_mask = PIPE_FIFO_UNDERRUN_STATUS; switch (pipe) { case PIPE_A: @@ -1745,25 +1844,92 @@ static void valleyview_pipestat_irq_ack(struct drm_i915_private *dev_priv, break; } if (iir & iir_bit) - mask |= dev_priv->pipestat_irq_mask[pipe]; + status_mask |= dev_priv->pipestat_irq_mask[pipe]; - if (!mask) + if (!status_mask) continue; reg = PIPESTAT(pipe); - mask |= PIPESTAT_INT_ENABLE_MASK; - pipe_stats[pipe] = I915_READ(reg) & mask; + pipe_stats[pipe] = I915_READ(reg) & status_mask; + enable_mask = i915_pipestat_enable_mask(dev_priv, pipe); /* * Clear the PIPE*STAT regs before the IIR */ - if (pipe_stats[pipe] & (PIPE_FIFO_UNDERRUN_STATUS | - PIPESTAT_INT_STATUS_MASK)) - I915_WRITE(reg, pipe_stats[pipe]); + if (pipe_stats[pipe]) + I915_WRITE(reg, enable_mask | pipe_stats[pipe]); } spin_unlock(&dev_priv->irq_lock); } +static void i8xx_pipestat_irq_handler(struct drm_i915_private *dev_priv, + u16 iir, u32 pipe_stats[I915_MAX_PIPES]) +{ + enum pipe pipe; + + for_each_pipe(dev_priv, pipe) { + if (pipe_stats[pipe] & PIPE_VBLANK_INTERRUPT_STATUS) + drm_handle_vblank(&dev_priv->drm, pipe); + + if (pipe_stats[pipe] & PIPE_CRC_DONE_INTERRUPT_STATUS) + i9xx_pipe_crc_irq_handler(dev_priv, pipe); + + if (pipe_stats[pipe] & PIPE_FIFO_UNDERRUN_STATUS) + intel_cpu_fifo_underrun_irq_handler(dev_priv, pipe); + } +} + +static void i915_pipestat_irq_handler(struct drm_i915_private *dev_priv, + u32 iir, u32 pipe_stats[I915_MAX_PIPES]) +{ + bool blc_event = false; + enum pipe pipe; + + for_each_pipe(dev_priv, pipe) { + if (pipe_stats[pipe] & PIPE_VBLANK_INTERRUPT_STATUS) + drm_handle_vblank(&dev_priv->drm, pipe); + + if (pipe_stats[pipe] & PIPE_LEGACY_BLC_EVENT_STATUS) + blc_event = true; + + if (pipe_stats[pipe] & PIPE_CRC_DONE_INTERRUPT_STATUS) + i9xx_pipe_crc_irq_handler(dev_priv, pipe); + + if (pipe_stats[pipe] & PIPE_FIFO_UNDERRUN_STATUS) + intel_cpu_fifo_underrun_irq_handler(dev_priv, pipe); + } + + if (blc_event || (iir & I915_ASLE_INTERRUPT)) + intel_opregion_asle_intr(dev_priv); +} + +static void i965_pipestat_irq_handler(struct drm_i915_private *dev_priv, + u32 iir, u32 pipe_stats[I915_MAX_PIPES]) +{ + bool blc_event = false; + enum pipe pipe; + + for_each_pipe(dev_priv, pipe) { + if (pipe_stats[pipe] & PIPE_START_VBLANK_INTERRUPT_STATUS) + drm_handle_vblank(&dev_priv->drm, pipe); + + if (pipe_stats[pipe] & PIPE_LEGACY_BLC_EVENT_STATUS) + blc_event = true; + + if (pipe_stats[pipe] & PIPE_CRC_DONE_INTERRUPT_STATUS) + i9xx_pipe_crc_irq_handler(dev_priv, pipe); + + if (pipe_stats[pipe] & PIPE_FIFO_UNDERRUN_STATUS) + intel_cpu_fifo_underrun_irq_handler(dev_priv, pipe); + } + + if (blc_event || (iir & I915_ASLE_INTERRUPT)) + intel_opregion_asle_intr(dev_priv); + + if (pipe_stats[0] & PIPE_GMBUS_INTERRUPT_STATUS) + gmbus_irq_handler(dev_priv); +} + static void valleyview_pipestat_irq_handler(struct drm_i915_private *dev_priv, u32 pipe_stats[I915_MAX_PIPES]) { @@ -1879,7 +2045,7 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg) /* Call regardless, as some status bits might not be * signalled in iir */ - valleyview_pipestat_irq_ack(dev_priv, iir, pipe_stats); + i9xx_pipestat_irq_ack(dev_priv, iir, pipe_stats); if (iir & (I915_LPE_PIPE_A_INTERRUPT | I915_LPE_PIPE_B_INTERRUPT)) @@ -1963,7 +2129,7 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg) /* Call regardless, as some status bits might not be * signalled in iir */ - valleyview_pipestat_irq_ack(dev_priv, iir, pipe_stats); + i9xx_pipestat_irq_ack(dev_priv, iir, pipe_stats); if (iir & (I915_LPE_PIPE_A_INTERRUPT | I915_LPE_PIPE_B_INTERRUPT | @@ -2100,18 +2266,14 @@ static void ivb_err_int_handler(struct drm_i915_private *dev_priv) static void cpt_serr_int_handler(struct drm_i915_private *dev_priv) { u32 serr_int = I915_READ(SERR_INT); + enum pipe pipe; if (serr_int & SERR_INT_POISON) DRM_ERROR("PCH poison interrupt\n"); - if (serr_int & SERR_INT_TRANS_A_FIFO_UNDERRUN) - intel_pch_fifo_underrun_irq_handler(dev_priv, PIPE_A); - - if (serr_int & SERR_INT_TRANS_B_FIFO_UNDERRUN) - intel_pch_fifo_underrun_irq_handler(dev_priv, PIPE_B); - - if (serr_int & SERR_INT_TRANS_C_FIFO_UNDERRUN) - intel_pch_fifo_underrun_irq_handler(dev_priv, PIPE_C); + for_each_pipe(dev_priv, pipe) + if (serr_int & SERR_INT_TRANS_FIFO_UNDERRUN(pipe)) + intel_pch_fifo_underrun_irq_handler(dev_priv, pipe); I915_WRITE(SERR_INT, serr_int); } @@ -2860,7 +3022,7 @@ static void ibx_irq_reset(struct drm_i915_private *dev_priv) if (HAS_PCH_NOP(dev_priv)) return; - GEN5_IRQ_RESET(SDE); + GEN3_IRQ_RESET(SDE); if (HAS_PCH_CPT(dev_priv) || HAS_PCH_LPT(dev_priv)) I915_WRITE(SERR_INT, 0xffffffff); @@ -2888,15 +3050,13 @@ static void ibx_irq_pre_postinstall(struct drm_device *dev) static void gen5_gt_irq_reset(struct drm_i915_private *dev_priv) { - GEN5_IRQ_RESET(GT); + GEN3_IRQ_RESET(GT); if (INTEL_GEN(dev_priv) >= 6) - GEN5_IRQ_RESET(GEN6_PM); + GEN3_IRQ_RESET(GEN6_PM); } static void vlv_display_irq_reset(struct drm_i915_private *dev_priv) { - enum pipe pipe; - if (IS_CHERRYVIEW(dev_priv)) I915_WRITE(DPINVGTT, DPINVGTT_STATUS_MASK_CHV); else @@ -2905,15 +3065,10 @@ static void vlv_display_irq_reset(struct drm_i915_private *dev_priv) i915_hotplug_interrupt_update_locked(dev_priv, 0xffffffff, 0); I915_WRITE(PORT_HOTPLUG_STAT, I915_READ(PORT_HOTPLUG_STAT)); - for_each_pipe(dev_priv, pipe) { - I915_WRITE(PIPESTAT(pipe), - PIPE_FIFO_UNDERRUN_STATUS | - PIPESTAT_INT_STATUS_MASK); - dev_priv->pipestat_irq_mask[pipe] = 0; - } + i9xx_pipestat_irq_reset(dev_priv); - GEN5_IRQ_RESET(VLV_); - dev_priv->irq_mask = ~0; + GEN3_IRQ_RESET(VLV_); + dev_priv->irq_mask = ~0u; } static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv) @@ -2922,8 +3077,7 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv) u32 enable_mask; enum pipe pipe; - pipestat_mask = PLANE_FLIP_DONE_INT_STATUS_VLV | - PIPE_CRC_DONE_INTERRUPT_STATUS; + pipestat_mask = PIPE_CRC_DONE_INTERRUPT_STATUS; i915_enable_pipestat(dev_priv, PIPE_A, PIPE_GMBUS_INTERRUPT_STATUS); for_each_pipe(dev_priv, pipe) @@ -2939,11 +3093,11 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv) enable_mask |= I915_DISPLAY_PIPE_C_EVENT_INTERRUPT | I915_LPE_PIPE_C_INTERRUPT; - WARN_ON(dev_priv->irq_mask != ~0); + WARN_ON(dev_priv->irq_mask != ~0u); dev_priv->irq_mask = ~enable_mask; - GEN5_IRQ_INIT(VLV_, dev_priv->irq_mask, enable_mask); + GEN3_IRQ_INIT(VLV_, dev_priv->irq_mask, enable_mask); } /* drm_dma.h hooks @@ -2952,9 +3106,10 @@ static void ironlake_irq_reset(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - I915_WRITE(HWSTAM, 0xffffffff); + if (IS_GEN5(dev_priv)) + I915_WRITE(HWSTAM, 0xffffffff); - GEN5_IRQ_RESET(DE); + GEN3_IRQ_RESET(DE); if (IS_GEN7(dev_priv)) I915_WRITE(GEN7_ERR_INT, 0xffffffff); @@ -2963,7 +3118,7 @@ static void ironlake_irq_reset(struct drm_device *dev) ibx_irq_reset(dev_priv); } -static void valleyview_irq_preinstall(struct drm_device *dev) +static void valleyview_irq_reset(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -3001,9 +3156,9 @@ static void gen8_irq_reset(struct drm_device *dev) POWER_DOMAIN_PIPE(pipe))) GEN8_IRQ_RESET_NDX(DE_PIPE, pipe); - GEN5_IRQ_RESET(GEN8_DE_PORT_); - GEN5_IRQ_RESET(GEN8_DE_MISC_); - GEN5_IRQ_RESET(GEN8_PCU_); + GEN3_IRQ_RESET(GEN8_DE_PORT_); + GEN3_IRQ_RESET(GEN8_DE_MISC_); + GEN3_IRQ_RESET(GEN8_PCU_); if (HAS_PCH_SPLIT(dev_priv)) ibx_irq_reset(dev_priv); @@ -3016,10 +3171,17 @@ void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv, enum pipe pipe; spin_lock_irq(&dev_priv->irq_lock); + + if (!intel_irqs_enabled(dev_priv)) { + spin_unlock_irq(&dev_priv->irq_lock); + return; + } + for_each_pipe_masked(dev_priv, pipe, pipe_mask) GEN8_IRQ_INIT_NDX(DE_PIPE, pipe, dev_priv->de_irq_mask[pipe], ~dev_priv->de_irq_mask[pipe] | extra_ier); + spin_unlock_irq(&dev_priv->irq_lock); } @@ -3029,15 +3191,22 @@ void gen8_irq_power_well_pre_disable(struct drm_i915_private *dev_priv, enum pipe pipe; spin_lock_irq(&dev_priv->irq_lock); + + if (!intel_irqs_enabled(dev_priv)) { + spin_unlock_irq(&dev_priv->irq_lock); + return; + } + for_each_pipe_masked(dev_priv, pipe, pipe_mask) GEN8_IRQ_RESET_NDX(DE_PIPE, pipe); + spin_unlock_irq(&dev_priv->irq_lock); /* make sure we're done processing display irqs */ synchronize_irq(dev_priv->drm.irq); } -static void cherryview_irq_preinstall(struct drm_device *dev) +static void cherryview_irq_reset(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -3046,7 +3215,7 @@ static void cherryview_irq_preinstall(struct drm_device *dev) gen8_gt_irq_reset(dev_priv); - GEN5_IRQ_RESET(GEN8_PCU_); + GEN3_IRQ_RESET(GEN8_PCU_); spin_lock_irq(&dev_priv->irq_lock); if (dev_priv->display_irqs_enabled) @@ -3111,7 +3280,15 @@ static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) static void spt_hpd_detection_setup(struct drm_i915_private *dev_priv) { - u32 hotplug; + u32 val, hotplug; + + /* Display WA #1179 WaHardHangonHotPlug: cnp */ + if (HAS_PCH_CNP(dev_priv)) { + val = I915_READ(SOUTH_CHICKEN1); + val &= ~CHASSIS_CLK_REQ_DURATION_MASK; + val |= CHASSIS_CLK_REQ_DURATION(0xf); + I915_WRITE(SOUTH_CHICKEN1, val); + } /* Enable digital hotplug on the PCH */ hotplug = I915_READ(PCH_PORT_HOTPLUG); @@ -3238,10 +3415,12 @@ static void ibx_irq_postinstall(struct drm_device *dev) if (HAS_PCH_IBX(dev_priv)) mask = SDE_GMBUS | SDE_AUX_MASK | SDE_POISON; - else + else if (HAS_PCH_CPT(dev_priv) || HAS_PCH_LPT(dev_priv)) mask = SDE_GMBUS_CPT | SDE_AUX_MASK_CPT; + else + mask = SDE_GMBUS_CPT; - gen5_assert_iir_is_zero(dev_priv, SDEIIR); + gen3_assert_iir_is_zero(dev_priv, SDEIIR); I915_WRITE(SDEIMR, ~mask); if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv) || @@ -3272,7 +3451,7 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev) gt_irqs |= GT_BLT_USER_INTERRUPT | GT_BSD_USER_INTERRUPT; } - GEN5_IRQ_INIT(GT, dev_priv->gt_irq_mask, gt_irqs); + GEN3_IRQ_INIT(GT, dev_priv->gt_irq_mask, gt_irqs); if (INTEL_GEN(dev_priv) >= 6) { /* @@ -3285,7 +3464,7 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev) } dev_priv->pm_imr = 0xffffffff; - GEN5_IRQ_INIT(GEN6_PM, dev_priv->pm_imr, pm_irqs); + GEN3_IRQ_INIT(GEN6_PM, dev_priv->pm_imr, pm_irqs); } } @@ -3296,18 +3475,14 @@ static int ironlake_irq_postinstall(struct drm_device *dev) if (INTEL_GEN(dev_priv) >= 7) { display_mask = (DE_MASTER_IRQ_CONTROL | DE_GSE_IVB | - DE_PCH_EVENT_IVB | DE_PLANEC_FLIP_DONE_IVB | - DE_PLANEB_FLIP_DONE_IVB | - DE_PLANEA_FLIP_DONE_IVB | DE_AUX_CHANNEL_A_IVB); + DE_PCH_EVENT_IVB | DE_AUX_CHANNEL_A_IVB); extra_mask = (DE_PIPEC_VBLANK_IVB | DE_PIPEB_VBLANK_IVB | DE_PIPEA_VBLANK_IVB | DE_ERR_INT_IVB | DE_DP_A_HOTPLUG_IVB); } else { display_mask = (DE_MASTER_IRQ_CONTROL | DE_GSE | DE_PCH_EVENT | - DE_PLANEA_FLIP_DONE | DE_PLANEB_FLIP_DONE | - DE_AUX_CHANNEL_A | - DE_PIPEB_CRC_DONE | DE_PIPEA_CRC_DONE | - DE_POISON); + DE_AUX_CHANNEL_A | DE_PIPEB_CRC_DONE | + DE_PIPEA_CRC_DONE | DE_POISON); extra_mask = (DE_PIPEA_VBLANK | DE_PIPEB_VBLANK | DE_PCU_EVENT | DE_PIPEB_FIFO_UNDERRUN | DE_PIPEA_FIFO_UNDERRUN | DE_DP_A_HOTPLUG); @@ -3315,11 +3490,9 @@ static int ironlake_irq_postinstall(struct drm_device *dev) dev_priv->irq_mask = ~display_mask; - I915_WRITE(HWSTAM, 0xeffe); - ibx_irq_pre_postinstall(dev); - GEN5_IRQ_INIT(DE, dev_priv->irq_mask, display_mask | extra_mask); + GEN3_IRQ_INIT(DE, dev_priv->irq_mask, display_mask | extra_mask); gen5_gt_irq_postinstall(dev); @@ -3429,15 +3602,13 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) enum pipe pipe; if (INTEL_GEN(dev_priv) >= 9) { - de_pipe_masked |= GEN9_PIPE_PLANE1_FLIP_DONE | - GEN9_DE_PIPE_IRQ_FAULT_ERRORS; + de_pipe_masked |= GEN9_DE_PIPE_IRQ_FAULT_ERRORS; de_port_masked |= GEN9_AUX_CHANNEL_B | GEN9_AUX_CHANNEL_C | GEN9_AUX_CHANNEL_D; if (IS_GEN9_LP(dev_priv)) de_port_masked |= BXT_DE_PORT_GMBUS; } else { - de_pipe_masked |= GEN8_PIPE_PRIMARY_FLIP_DONE | - GEN8_DE_PIPE_IRQ_FAULT_ERRORS; + de_pipe_masked |= GEN8_DE_PIPE_IRQ_FAULT_ERRORS; } de_pipe_enables = de_pipe_masked | GEN8_PIPE_VBLANK | @@ -3449,19 +3620,18 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) else if (IS_BROADWELL(dev_priv)) de_port_enables |= GEN8_PORT_DP_A_HOTPLUG; - dev_priv->de_irq_mask[PIPE_A] = ~de_pipe_masked; - dev_priv->de_irq_mask[PIPE_B] = ~de_pipe_masked; - dev_priv->de_irq_mask[PIPE_C] = ~de_pipe_masked; + for_each_pipe(dev_priv, pipe) { + dev_priv->de_irq_mask[pipe] = ~de_pipe_masked; - for_each_pipe(dev_priv, pipe) if (intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PIPE(pipe))) GEN8_IRQ_INIT_NDX(DE_PIPE, pipe, dev_priv->de_irq_mask[pipe], de_pipe_enables); + } - GEN5_IRQ_INIT(GEN8_DE_PORT_, ~de_port_masked, de_port_enables); - GEN5_IRQ_INIT(GEN8_DE_MISC_, ~de_misc_masked, de_misc_masked); + GEN3_IRQ_INIT(GEN8_DE_PORT_, ~de_port_masked, de_port_enables); + GEN3_IRQ_INIT(GEN8_DE_MISC_, ~de_misc_masked, de_misc_masked); if (IS_GEN9_LP(dev_priv)) bxt_hpd_detection_setup(dev_priv); @@ -3505,98 +3675,36 @@ static int cherryview_irq_postinstall(struct drm_device *dev) return 0; } -static void gen8_irq_uninstall(struct drm_device *dev) +static void i8xx_irq_reset(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - if (!dev_priv) - return; + i9xx_pipestat_irq_reset(dev_priv); - gen8_irq_reset(dev); -} - -static void valleyview_irq_uninstall(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - - if (!dev_priv) - return; - - I915_WRITE(VLV_MASTER_IER, 0); - POSTING_READ(VLV_MASTER_IER); - - gen5_gt_irq_reset(dev_priv); - - I915_WRITE(HWSTAM, 0xffffffff); - - spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->display_irqs_enabled) - vlv_display_irq_reset(dev_priv); - spin_unlock_irq(&dev_priv->irq_lock); -} - -static void cherryview_irq_uninstall(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - - if (!dev_priv) - return; - - I915_WRITE(GEN8_MASTER_IRQ, 0); - POSTING_READ(GEN8_MASTER_IRQ); - - gen8_gt_irq_reset(dev_priv); - - GEN5_IRQ_RESET(GEN8_PCU_); - - spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->display_irqs_enabled) - vlv_display_irq_reset(dev_priv); - spin_unlock_irq(&dev_priv->irq_lock); -} - -static void ironlake_irq_uninstall(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - - if (!dev_priv) - return; - - ironlake_irq_reset(dev); -} - -static void i8xx_irq_preinstall(struct drm_device * dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - int pipe; + I915_WRITE16(HWSTAM, 0xffff); - for_each_pipe(dev_priv, pipe) - I915_WRITE(PIPESTAT(pipe), 0); - I915_WRITE16(IMR, 0xffff); - I915_WRITE16(IER, 0x0); - POSTING_READ16(IER); + GEN2_IRQ_RESET(); } static int i8xx_irq_postinstall(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + u16 enable_mask; - I915_WRITE16(EMR, - ~(I915_ERROR_PAGE_TABLE | I915_ERROR_MEMORY_REFRESH)); + I915_WRITE16(EMR, ~(I915_ERROR_PAGE_TABLE | + I915_ERROR_MEMORY_REFRESH)); /* Unmask the interrupts that we always want on. */ dev_priv->irq_mask = ~(I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | - I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | - I915_DISPLAY_PLANE_A_FLIP_PENDING_INTERRUPT | - I915_DISPLAY_PLANE_B_FLIP_PENDING_INTERRUPT); - I915_WRITE16(IMR, dev_priv->irq_mask); + I915_DISPLAY_PIPE_B_EVENT_INTERRUPT); + + enable_mask = + I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | + I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | + I915_USER_INTERRUPT; - I915_WRITE16(IER, - I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | - I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | - I915_USER_INTERRUPT); - POSTING_READ16(IER); + GEN2_IRQ_INIT(, dev_priv->irq_mask, enable_mask); /* Interrupt setup is already guaranteed to be single-threaded, this is * just to make the assert_spin_locked check happy. */ @@ -3608,17 +3716,11 @@ static int i8xx_irq_postinstall(struct drm_device *dev) return 0; } -/* - * Returns true when a page flip has completed. - */ static irqreturn_t i8xx_irq_handler(int irq, void *arg) { struct drm_device *dev = arg; struct drm_i915_private *dev_priv = to_i915(dev); - u16 iir, new_iir; - u32 pipe_stats[2]; - int pipe; - irqreturn_t ret; + irqreturn_t ret = IRQ_NONE; if (!intel_irqs_enabled(dev_priv)) return IRQ_NONE; @@ -3626,96 +3728,50 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg) /* IRQs are synced during runtime_suspend, we don't require a wakeref */ disable_rpm_wakeref_asserts(dev_priv); - ret = IRQ_NONE; - iir = I915_READ16(IIR); - if (iir == 0) - goto out; + do { + u32 pipe_stats[I915_MAX_PIPES] = {}; + u16 iir; - while (iir) { - /* Can't rely on pipestat interrupt bit in iir as it might - * have been cleared after the pipestat interrupt was received. - * It doesn't set the bit in iir again, but it still produces - * interrupts (for non-MSI). - */ - spin_lock(&dev_priv->irq_lock); - if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) - DRM_DEBUG("Command parser error, iir 0x%08x\n", iir); + iir = I915_READ16(IIR); + if (iir == 0) + break; - for_each_pipe(dev_priv, pipe) { - i915_reg_t reg = PIPESTAT(pipe); - pipe_stats[pipe] = I915_READ(reg); + ret = IRQ_HANDLED; - /* - * Clear the PIPE*STAT regs before the IIR - */ - if (pipe_stats[pipe] & 0x8000ffff) - I915_WRITE(reg, pipe_stats[pipe]); - } - spin_unlock(&dev_priv->irq_lock); + /* Call regardless, as some status bits might not be + * signalled in iir */ + i9xx_pipestat_irq_ack(dev_priv, iir, pipe_stats); I915_WRITE16(IIR, iir); - new_iir = I915_READ16(IIR); /* Flush posted writes */ if (iir & I915_USER_INTERRUPT) notify_ring(dev_priv->engine[RCS]); - for_each_pipe(dev_priv, pipe) { - int plane = pipe; - if (HAS_FBC(dev_priv)) - plane = !plane; - - if (pipe_stats[pipe] & PIPE_VBLANK_INTERRUPT_STATUS) - drm_handle_vblank(&dev_priv->drm, pipe); - - if (pipe_stats[pipe] & PIPE_CRC_DONE_INTERRUPT_STATUS) - i9xx_pipe_crc_irq_handler(dev_priv, pipe); - - if (pipe_stats[pipe] & PIPE_FIFO_UNDERRUN_STATUS) - intel_cpu_fifo_underrun_irq_handler(dev_priv, - pipe); - } + if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) + DRM_DEBUG("Command parser error, iir 0x%08x\n", iir); - iir = new_iir; - } - ret = IRQ_HANDLED; + i8xx_pipestat_irq_handler(dev_priv, iir, pipe_stats); + } while (0); -out: enable_rpm_wakeref_asserts(dev_priv); return ret; } -static void i8xx_irq_uninstall(struct drm_device * dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - int pipe; - - for_each_pipe(dev_priv, pipe) { - /* Clear enable bits; then clear status bits */ - I915_WRITE(PIPESTAT(pipe), 0); - I915_WRITE(PIPESTAT(pipe), I915_READ(PIPESTAT(pipe))); - } - I915_WRITE16(IMR, 0xffff); - I915_WRITE16(IER, 0x0); - I915_WRITE16(IIR, I915_READ16(IIR)); -} - -static void i915_irq_preinstall(struct drm_device * dev) +static void i915_irq_reset(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - int pipe; if (I915_HAS_HOTPLUG(dev_priv)) { i915_hotplug_interrupt_update(dev_priv, 0xffffffff, 0); I915_WRITE(PORT_HOTPLUG_STAT, I915_READ(PORT_HOTPLUG_STAT)); } - I915_WRITE16(HWSTAM, 0xeffe); - for_each_pipe(dev_priv, pipe) - I915_WRITE(PIPESTAT(pipe), 0); - I915_WRITE(IMR, 0xffffffff); - I915_WRITE(IER, 0x0); - POSTING_READ(IER); + i9xx_pipestat_irq_reset(dev_priv); + + I915_WRITE(HWSTAM, 0xffffffff); + + GEN3_IRQ_RESET(); } static int i915_irq_postinstall(struct drm_device *dev) @@ -3723,15 +3779,14 @@ static int i915_irq_postinstall(struct drm_device *dev) struct drm_i915_private *dev_priv = to_i915(dev); u32 enable_mask; - I915_WRITE(EMR, ~(I915_ERROR_PAGE_TABLE | I915_ERROR_MEMORY_REFRESH)); + I915_WRITE(EMR, ~(I915_ERROR_PAGE_TABLE | + I915_ERROR_MEMORY_REFRESH)); /* Unmask the interrupts that we always want on. */ dev_priv->irq_mask = ~(I915_ASLE_INTERRUPT | I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | - I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | - I915_DISPLAY_PLANE_A_FLIP_PENDING_INTERRUPT | - I915_DISPLAY_PLANE_B_FLIP_PENDING_INTERRUPT); + I915_DISPLAY_PIPE_B_EVENT_INTERRUPT); enable_mask = I915_ASLE_INTERRUPT | @@ -3740,20 +3795,13 @@ static int i915_irq_postinstall(struct drm_device *dev) I915_USER_INTERRUPT; if (I915_HAS_HOTPLUG(dev_priv)) { - i915_hotplug_interrupt_update(dev_priv, 0xffffffff, 0); - POSTING_READ(PORT_HOTPLUG_EN); - /* Enable in IER... */ enable_mask |= I915_DISPLAY_PORT_INTERRUPT; /* and unmask in IMR */ dev_priv->irq_mask &= ~I915_DISPLAY_PORT_INTERRUPT; } - I915_WRITE(IMR, dev_priv->irq_mask); - I915_WRITE(IER, enable_mask); - POSTING_READ(IER); - - i915_enable_asle_pipestat(dev_priv); + GEN3_IRQ_INIT(, dev_priv->irq_mask, enable_mask); /* Interrupt setup is already guaranteed to be single-threaded, this is * just to make the assert_spin_locked check happy. */ @@ -3762,6 +3810,8 @@ static int i915_irq_postinstall(struct drm_device *dev) i915_enable_pipestat(dev_priv, PIPE_B, PIPE_CRC_DONE_INTERRUPT_STATUS); spin_unlock_irq(&dev_priv->irq_lock); + i915_enable_asle_pipestat(dev_priv); + return 0; } @@ -3769,8 +3819,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) { struct drm_device *dev = arg; struct drm_i915_private *dev_priv = to_i915(dev); - u32 iir, new_iir, pipe_stats[I915_MAX_PIPES]; - int pipe, ret = IRQ_NONE; + irqreturn_t ret = IRQ_NONE; if (!intel_irqs_enabled(dev_priv)) return IRQ_NONE; @@ -3778,131 +3827,56 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) /* IRQs are synced during runtime_suspend, we don't require a wakeref */ disable_rpm_wakeref_asserts(dev_priv); - iir = I915_READ(IIR); do { - bool irq_received = (iir) != 0; - bool blc_event = false; - - /* Can't rely on pipestat interrupt bit in iir as it might - * have been cleared after the pipestat interrupt was received. - * It doesn't set the bit in iir again, but it still produces - * interrupts (for non-MSI). - */ - spin_lock(&dev_priv->irq_lock); - if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) - DRM_DEBUG("Command parser error, iir 0x%08x\n", iir); - - for_each_pipe(dev_priv, pipe) { - i915_reg_t reg = PIPESTAT(pipe); - pipe_stats[pipe] = I915_READ(reg); - - /* Clear the PIPE*STAT regs before the IIR */ - if (pipe_stats[pipe] & 0x8000ffff) { - I915_WRITE(reg, pipe_stats[pipe]); - irq_received = true; - } - } - spin_unlock(&dev_priv->irq_lock); + u32 pipe_stats[I915_MAX_PIPES] = {}; + u32 hotplug_status = 0; + u32 iir; - if (!irq_received) + iir = I915_READ(IIR); + if (iir == 0) break; - /* Consume port. Then clear IIR or we'll miss events */ + ret = IRQ_HANDLED; + if (I915_HAS_HOTPLUG(dev_priv) && - iir & I915_DISPLAY_PORT_INTERRUPT) { - u32 hotplug_status = i9xx_hpd_irq_ack(dev_priv); - if (hotplug_status) - i9xx_hpd_irq_handler(dev_priv, hotplug_status); - } + iir & I915_DISPLAY_PORT_INTERRUPT) + hotplug_status = i9xx_hpd_irq_ack(dev_priv); + + /* Call regardless, as some status bits might not be + * signalled in iir */ + i9xx_pipestat_irq_ack(dev_priv, iir, pipe_stats); I915_WRITE(IIR, iir); - new_iir = I915_READ(IIR); /* Flush posted writes */ if (iir & I915_USER_INTERRUPT) notify_ring(dev_priv->engine[RCS]); - for_each_pipe(dev_priv, pipe) { - int plane = pipe; - if (HAS_FBC(dev_priv)) - plane = !plane; - - if (pipe_stats[pipe] & PIPE_VBLANK_INTERRUPT_STATUS) - drm_handle_vblank(&dev_priv->drm, pipe); - - if (pipe_stats[pipe] & PIPE_LEGACY_BLC_EVENT_STATUS) - blc_event = true; - - if (pipe_stats[pipe] & PIPE_CRC_DONE_INTERRUPT_STATUS) - i9xx_pipe_crc_irq_handler(dev_priv, pipe); - - if (pipe_stats[pipe] & PIPE_FIFO_UNDERRUN_STATUS) - intel_cpu_fifo_underrun_irq_handler(dev_priv, - pipe); - } + if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) + DRM_DEBUG("Command parser error, iir 0x%08x\n", iir); - if (blc_event || (iir & I915_ASLE_INTERRUPT)) - intel_opregion_asle_intr(dev_priv); + if (hotplug_status) + i9xx_hpd_irq_handler(dev_priv, hotplug_status); - /* With MSI, interrupts are only generated when iir - * transitions from zero to nonzero. If another bit got - * set while we were handling the existing iir bits, then - * we would never get another interrupt. - * - * This is fine on non-MSI as well, as if we hit this path - * we avoid exiting the interrupt handler only to generate - * another one. - * - * Note that for MSI this could cause a stray interrupt report - * if an interrupt landed in the time between writing IIR and - * the posting read. This should be rare enough to never - * trigger the 99% of 100,000 interrupts test for disabling - * stray interrupts. - */ - ret = IRQ_HANDLED; - iir = new_iir; - } while (iir); + i915_pipestat_irq_handler(dev_priv, iir, pipe_stats); + } while (0); enable_rpm_wakeref_asserts(dev_priv); return ret; } -static void i915_irq_uninstall(struct drm_device * dev) +static void i965_irq_reset(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - int pipe; - - if (I915_HAS_HOTPLUG(dev_priv)) { - i915_hotplug_interrupt_update(dev_priv, 0xffffffff, 0); - I915_WRITE(PORT_HOTPLUG_STAT, I915_READ(PORT_HOTPLUG_STAT)); - } - - I915_WRITE16(HWSTAM, 0xffff); - for_each_pipe(dev_priv, pipe) { - /* Clear enable bits; then clear status bits */ - I915_WRITE(PIPESTAT(pipe), 0); - I915_WRITE(PIPESTAT(pipe), I915_READ(PIPESTAT(pipe))); - } - I915_WRITE(IMR, 0xffffffff); - I915_WRITE(IER, 0x0); - - I915_WRITE(IIR, I915_READ(IIR)); -} - -static void i965_irq_preinstall(struct drm_device * dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - int pipe; i915_hotplug_interrupt_update(dev_priv, 0xffffffff, 0); I915_WRITE(PORT_HOTPLUG_STAT, I915_READ(PORT_HOTPLUG_STAT)); - I915_WRITE(HWSTAM, 0xeffe); - for_each_pipe(dev_priv, pipe) - I915_WRITE(PIPESTAT(pipe), 0); - I915_WRITE(IMR, 0xffffffff); - I915_WRITE(IER, 0x0); - POSTING_READ(IER); + i9xx_pipestat_irq_reset(dev_priv); + + I915_WRITE(HWSTAM, 0xffffffff); + + GEN3_IRQ_RESET(); } static int i965_irq_postinstall(struct drm_device *dev) @@ -3911,31 +3885,6 @@ static int i965_irq_postinstall(struct drm_device *dev) u32 enable_mask; u32 error_mask; - /* Unmask the interrupts that we always want on. */ - dev_priv->irq_mask = ~(I915_ASLE_INTERRUPT | - I915_DISPLAY_PORT_INTERRUPT | - I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | - I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | - I915_DISPLAY_PLANE_A_FLIP_PENDING_INTERRUPT | - I915_DISPLAY_PLANE_B_FLIP_PENDING_INTERRUPT | - I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT); - - enable_mask = ~dev_priv->irq_mask; - enable_mask &= ~(I915_DISPLAY_PLANE_A_FLIP_PENDING_INTERRUPT | - I915_DISPLAY_PLANE_B_FLIP_PENDING_INTERRUPT); - enable_mask |= I915_USER_INTERRUPT; - - if (IS_G4X(dev_priv)) - enable_mask |= I915_BSD_USER_INTERRUPT; - - /* Interrupt setup is already guaranteed to be single-threaded, this is - * just to make the assert_spin_locked check happy. */ - spin_lock_irq(&dev_priv->irq_lock); - i915_enable_pipestat(dev_priv, PIPE_A, PIPE_GMBUS_INTERRUPT_STATUS); - i915_enable_pipestat(dev_priv, PIPE_A, PIPE_CRC_DONE_INTERRUPT_STATUS); - i915_enable_pipestat(dev_priv, PIPE_B, PIPE_CRC_DONE_INTERRUPT_STATUS); - spin_unlock_irq(&dev_priv->irq_lock); - /* * Enable some error detection, note the instruction error mask * bit is reserved, so we leave it masked. @@ -3951,12 +3900,34 @@ static int i965_irq_postinstall(struct drm_device *dev) } I915_WRITE(EMR, error_mask); - I915_WRITE(IMR, dev_priv->irq_mask); - I915_WRITE(IER, enable_mask); - POSTING_READ(IER); + /* Unmask the interrupts that we always want on. */ + dev_priv->irq_mask = + ~(I915_ASLE_INTERRUPT | + I915_DISPLAY_PORT_INTERRUPT | + I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | + I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | + I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT); - i915_hotplug_interrupt_update(dev_priv, 0xffffffff, 0); - POSTING_READ(PORT_HOTPLUG_EN); + enable_mask = + I915_ASLE_INTERRUPT | + I915_DISPLAY_PORT_INTERRUPT | + I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | + I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | + I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT | + I915_USER_INTERRUPT; + + if (IS_G4X(dev_priv)) + enable_mask |= I915_BSD_USER_INTERRUPT; + + GEN3_IRQ_INIT(, dev_priv->irq_mask, enable_mask); + + /* Interrupt setup is already guaranteed to be single-threaded, this is + * just to make the assert_spin_locked check happy. */ + spin_lock_irq(&dev_priv->irq_lock); + i915_enable_pipestat(dev_priv, PIPE_A, PIPE_GMBUS_INTERRUPT_STATUS); + i915_enable_pipestat(dev_priv, PIPE_A, PIPE_CRC_DONE_INTERRUPT_STATUS); + i915_enable_pipestat(dev_priv, PIPE_B, PIPE_CRC_DONE_INTERRUPT_STATUS); + spin_unlock_irq(&dev_priv->irq_lock); i915_enable_asle_pipestat(dev_priv); @@ -3992,9 +3963,7 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) { struct drm_device *dev = arg; struct drm_i915_private *dev_priv = to_i915(dev); - u32 iir, new_iir; - u32 pipe_stats[I915_MAX_PIPES]; - int ret = IRQ_NONE, pipe; + irqreturn_t ret = IRQ_NONE; if (!intel_irqs_enabled(dev_priv)) return IRQ_NONE; @@ -4002,121 +3971,46 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) /* IRQs are synced during runtime_suspend, we don't require a wakeref */ disable_rpm_wakeref_asserts(dev_priv); - iir = I915_READ(IIR); - - for (;;) { - bool irq_received = (iir) != 0; - bool blc_event = false; - - /* Can't rely on pipestat interrupt bit in iir as it might - * have been cleared after the pipestat interrupt was received. - * It doesn't set the bit in iir again, but it still produces - * interrupts (for non-MSI). - */ - spin_lock(&dev_priv->irq_lock); - if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) - DRM_DEBUG("Command parser error, iir 0x%08x\n", iir); - - for_each_pipe(dev_priv, pipe) { - i915_reg_t reg = PIPESTAT(pipe); - pipe_stats[pipe] = I915_READ(reg); - - /* - * Clear the PIPE*STAT regs before the IIR - */ - if (pipe_stats[pipe] & 0x8000ffff) { - I915_WRITE(reg, pipe_stats[pipe]); - irq_received = true; - } - } - spin_unlock(&dev_priv->irq_lock); + do { + u32 pipe_stats[I915_MAX_PIPES] = {}; + u32 hotplug_status = 0; + u32 iir; - if (!irq_received) + iir = I915_READ(IIR); + if (iir == 0) break; ret = IRQ_HANDLED; - /* Consume port. Then clear IIR or we'll miss events */ - if (iir & I915_DISPLAY_PORT_INTERRUPT) { - u32 hotplug_status = i9xx_hpd_irq_ack(dev_priv); - if (hotplug_status) - i9xx_hpd_irq_handler(dev_priv, hotplug_status); - } + if (iir & I915_DISPLAY_PORT_INTERRUPT) + hotplug_status = i9xx_hpd_irq_ack(dev_priv); + + /* Call regardless, as some status bits might not be + * signalled in iir */ + i9xx_pipestat_irq_ack(dev_priv, iir, pipe_stats); I915_WRITE(IIR, iir); - new_iir = I915_READ(IIR); /* Flush posted writes */ if (iir & I915_USER_INTERRUPT) notify_ring(dev_priv->engine[RCS]); + if (iir & I915_BSD_USER_INTERRUPT) notify_ring(dev_priv->engine[VCS]); - for_each_pipe(dev_priv, pipe) { - if (pipe_stats[pipe] & PIPE_START_VBLANK_INTERRUPT_STATUS) - drm_handle_vblank(&dev_priv->drm, pipe); - - if (pipe_stats[pipe] & PIPE_LEGACY_BLC_EVENT_STATUS) - blc_event = true; - - if (pipe_stats[pipe] & PIPE_CRC_DONE_INTERRUPT_STATUS) - i9xx_pipe_crc_irq_handler(dev_priv, pipe); - - if (pipe_stats[pipe] & PIPE_FIFO_UNDERRUN_STATUS) - intel_cpu_fifo_underrun_irq_handler(dev_priv, pipe); - } - - if (blc_event || (iir & I915_ASLE_INTERRUPT)) - intel_opregion_asle_intr(dev_priv); + if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) + DRM_DEBUG("Command parser error, iir 0x%08x\n", iir); - if (pipe_stats[0] & PIPE_GMBUS_INTERRUPT_STATUS) - gmbus_irq_handler(dev_priv); + if (hotplug_status) + i9xx_hpd_irq_handler(dev_priv, hotplug_status); - /* With MSI, interrupts are only generated when iir - * transitions from zero to nonzero. If another bit got - * set while we were handling the existing iir bits, then - * we would never get another interrupt. - * - * This is fine on non-MSI as well, as if we hit this path - * we avoid exiting the interrupt handler only to generate - * another one. - * - * Note that for MSI this could cause a stray interrupt report - * if an interrupt landed in the time between writing IIR and - * the posting read. This should be rare enough to never - * trigger the 99% of 100,000 interrupts test for disabling - * stray interrupts. - */ - iir = new_iir; - } + i965_pipestat_irq_handler(dev_priv, iir, pipe_stats); + } while (0); enable_rpm_wakeref_asserts(dev_priv); return ret; } -static void i965_irq_uninstall(struct drm_device * dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - int pipe; - - if (!dev_priv) - return; - - i915_hotplug_interrupt_update(dev_priv, 0xffffffff, 0); - I915_WRITE(PORT_HOTPLUG_STAT, I915_READ(PORT_HOTPLUG_STAT)); - - I915_WRITE(HWSTAM, 0xffffffff); - for_each_pipe(dev_priv, pipe) - I915_WRITE(PIPESTAT(pipe), 0); - I915_WRITE(IMR, 0xffffffff); - I915_WRITE(IER, 0x0); - - for_each_pipe(dev_priv, pipe) - I915_WRITE(PIPESTAT(pipe), - I915_READ(PIPESTAT(pipe)) & 0x8000ffff); - I915_WRITE(IIR, I915_READ(IIR)); -} - /** * intel_irq_init - initializes irq support * @dev_priv: i915 device instance @@ -4127,11 +4021,12 @@ static void i965_irq_uninstall(struct drm_device * dev) void intel_irq_init(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; + struct intel_rps *rps = &dev_priv->gt_pm.rps; int i; intel_hpd_init_work(dev_priv); - INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work); + INIT_WORK(&rps->work, gen6_pm_rps_work); INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work); for (i = 0; i < MAX_L3_SLICES; ++i) @@ -4147,7 +4042,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv) else dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS; - dev_priv->rps.pm_intrmsk_mbz = 0; + rps->pm_intrmsk_mbz = 0; /* * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer @@ -4156,10 +4051,10 @@ void intel_irq_init(struct drm_i915_private *dev_priv) * TODO: verify if this can be reproduced on VLV,CHV. */ if (INTEL_GEN(dev_priv) <= 7) - dev_priv->rps.pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; + rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; if (INTEL_GEN(dev_priv) >= 8) - dev_priv->rps.pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; + rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; if (IS_GEN2(dev_priv)) { /* Gen2 doesn't have a hardware frame counter */ @@ -4197,17 +4092,17 @@ void intel_irq_init(struct drm_i915_private *dev_priv) if (IS_CHERRYVIEW(dev_priv)) { dev->driver->irq_handler = cherryview_irq_handler; - dev->driver->irq_preinstall = cherryview_irq_preinstall; + dev->driver->irq_preinstall = cherryview_irq_reset; dev->driver->irq_postinstall = cherryview_irq_postinstall; - dev->driver->irq_uninstall = cherryview_irq_uninstall; + dev->driver->irq_uninstall = cherryview_irq_reset; dev->driver->enable_vblank = i965_enable_vblank; dev->driver->disable_vblank = i965_disable_vblank; dev_priv->display.hpd_irq_setup = i915_hpd_irq_setup; } else if (IS_VALLEYVIEW(dev_priv)) { dev->driver->irq_handler = valleyview_irq_handler; - dev->driver->irq_preinstall = valleyview_irq_preinstall; + dev->driver->irq_preinstall = valleyview_irq_reset; dev->driver->irq_postinstall = valleyview_irq_postinstall; - dev->driver->irq_uninstall = valleyview_irq_uninstall; + dev->driver->irq_uninstall = valleyview_irq_reset; dev->driver->enable_vblank = i965_enable_vblank; dev->driver->disable_vblank = i965_disable_vblank; dev_priv->display.hpd_irq_setup = i915_hpd_irq_setup; @@ -4215,7 +4110,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv) dev->driver->irq_handler = gen8_irq_handler; dev->driver->irq_preinstall = gen8_irq_reset; dev->driver->irq_postinstall = gen8_irq_postinstall; - dev->driver->irq_uninstall = gen8_irq_uninstall; + dev->driver->irq_uninstall = gen8_irq_reset; dev->driver->enable_vblank = gen8_enable_vblank; dev->driver->disable_vblank = gen8_disable_vblank; if (IS_GEN9_LP(dev_priv)) @@ -4229,29 +4124,29 @@ void intel_irq_init(struct drm_i915_private *dev_priv) dev->driver->irq_handler = ironlake_irq_handler; dev->driver->irq_preinstall = ironlake_irq_reset; dev->driver->irq_postinstall = ironlake_irq_postinstall; - dev->driver->irq_uninstall = ironlake_irq_uninstall; + dev->driver->irq_uninstall = ironlake_irq_reset; dev->driver->enable_vblank = ironlake_enable_vblank; dev->driver->disable_vblank = ironlake_disable_vblank; dev_priv->display.hpd_irq_setup = ilk_hpd_irq_setup; } else { if (IS_GEN2(dev_priv)) { - dev->driver->irq_preinstall = i8xx_irq_preinstall; + dev->driver->irq_preinstall = i8xx_irq_reset; dev->driver->irq_postinstall = i8xx_irq_postinstall; dev->driver->irq_handler = i8xx_irq_handler; - dev->driver->irq_uninstall = i8xx_irq_uninstall; + dev->driver->irq_uninstall = i8xx_irq_reset; dev->driver->enable_vblank = i8xx_enable_vblank; dev->driver->disable_vblank = i8xx_disable_vblank; } else if (IS_GEN3(dev_priv)) { - dev->driver->irq_preinstall = i915_irq_preinstall; + dev->driver->irq_preinstall = i915_irq_reset; dev->driver->irq_postinstall = i915_irq_postinstall; - dev->driver->irq_uninstall = i915_irq_uninstall; + dev->driver->irq_uninstall = i915_irq_reset; dev->driver->irq_handler = i915_irq_handler; dev->driver->enable_vblank = i8xx_enable_vblank; dev->driver->disable_vblank = i8xx_disable_vblank; } else { - dev->driver->irq_preinstall = i965_irq_preinstall; + dev->driver->irq_preinstall = i965_irq_reset; dev->driver->irq_postinstall = i965_irq_postinstall; - dev->driver->irq_uninstall = i965_irq_uninstall; + dev->driver->irq_uninstall = i965_irq_reset; dev->driver->irq_handler = i965_irq_handler; dev->driver->enable_vblank = i965_enable_vblank; dev->driver->disable_vblank = i965_disable_vblank; @@ -4293,7 +4188,7 @@ int intel_irq_install(struct drm_i915_private *dev_priv) * interrupts as enabled _before_ actually enabling them to avoid * special cases in our ordering checks. */ - dev_priv->pm.irqs_enabled = true; + dev_priv->runtime_pm.irqs_enabled = true; return drm_irq_install(&dev_priv->drm, dev_priv->drm.pdev->irq); } @@ -4309,7 +4204,7 @@ void intel_irq_uninstall(struct drm_i915_private *dev_priv) { drm_irq_uninstall(&dev_priv->drm); intel_hpd_cancel_work(dev_priv); - dev_priv->pm.irqs_enabled = false; + dev_priv->runtime_pm.irqs_enabled = false; } /** @@ -4322,7 +4217,7 @@ void intel_irq_uninstall(struct drm_i915_private *dev_priv) void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv) { dev_priv->drm.driver->irq_uninstall(&dev_priv->drm); - dev_priv->pm.irqs_enabled = false; + dev_priv->runtime_pm.irqs_enabled = false; synchronize_irq(dev_priv->drm.irq); } @@ -4335,7 +4230,7 @@ void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv) */ void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv) { - dev_priv->pm.irqs_enabled = true; + dev_priv->runtime_pm.irqs_enabled = true; dev_priv->drm.driver->irq_preinstall(&dev_priv->drm); dev_priv->drm.driver->irq_postinstall(&dev_priv->drm); } diff --git a/drivers/gpu/drm/i915/i915_memcpy.c b/drivers/gpu/drm/i915/i915_memcpy.c index 49a079494b68..79f8ec756362 100644 --- a/drivers/gpu/drm/i915/i915_memcpy.c +++ b/drivers/gpu/drm/i915/i915_memcpy.c @@ -96,6 +96,11 @@ bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len) void i915_memcpy_init_early(struct drm_i915_private *dev_priv) { - if (static_cpu_has(X86_FEATURE_XMM4_1)) + /* + * Some hypervisors (e.g. KVM) don't support VEX-prefix instructions + * emulation. So don't enable movntdqa in hypervisor guest. + */ + if (static_cpu_has(X86_FEATURE_XMM4_1) && + !boot_cpu_has(X86_FEATURE_HYPERVISOR)) static_branch_enable(&has_movntdqa); } diff --git a/drivers/gpu/drm/i915/i915_oa_bdw.c b/drivers/gpu/drm/i915/i915_oa_bdw.c index abdf4d0abcce..4abd2e8b5083 100644 --- a/drivers/gpu/drm/i915/i915_oa_bdw.c +++ b/drivers/gpu/drm/i915/i915_oa_bdw.c @@ -85,9 +85,9 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_bdw(struct drm_i915_private *dev_priv) { - strncpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.oa.test_config.uuid, "d6de6f55-e526-4f79-a6a6-d7315c09044e", - UUID_STRING_LEN); + sizeof(dev_priv->perf.oa.test_config.uuid)); dev_priv->perf.oa.test_config.id = 1; dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; diff --git a/drivers/gpu/drm/i915/i915_oa_bxt.c b/drivers/gpu/drm/i915/i915_oa_bxt.c index b69b900de0fe..cb6f304ec16a 100644 --- a/drivers/gpu/drm/i915/i915_oa_bxt.c +++ b/drivers/gpu/drm/i915/i915_oa_bxt.c @@ -83,9 +83,9 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_bxt(struct drm_i915_private *dev_priv) { - strncpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.oa.test_config.uuid, "5ee72f5c-092f-421e-8b70-225f7c3e9612", - UUID_STRING_LEN); + sizeof(dev_priv->perf.oa.test_config.uuid)); dev_priv->perf.oa.test_config.id = 1; dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; diff --git a/drivers/gpu/drm/i915/i915_oa_cflgt2.c b/drivers/gpu/drm/i915/i915_oa_cflgt2.c new file mode 100644 index 000000000000..8641ae30e343 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_cflgt2.c @@ -0,0 +1,109 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/sysfs.h> + +#include "i915_drv.h" +#include "i915_oa_cflgt2.h" + +static const struct i915_oa_reg b_counter_config_test_oa[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2770), 0x00000004 }, + { _MMIO(0x2774), 0x00000000 }, + { _MMIO(0x2778), 0x00000003 }, + { _MMIO(0x277c), 0x00000000 }, + { _MMIO(0x2780), 0x00000007 }, + { _MMIO(0x2784), 0x00000000 }, + { _MMIO(0x2788), 0x00100002 }, + { _MMIO(0x278c), 0x0000fff7 }, + { _MMIO(0x2790), 0x00100002 }, + { _MMIO(0x2794), 0x0000ffcf }, + { _MMIO(0x2798), 0x00100082 }, + { _MMIO(0x279c), 0x0000ffef }, + { _MMIO(0x27a0), 0x001000c2 }, + { _MMIO(0x27a4), 0x0000ffe7 }, + { _MMIO(0x27a8), 0x00100001 }, + { _MMIO(0x27ac), 0x0000ffe7 }, +}; + +static const struct i915_oa_reg flex_eu_config_test_oa[] = { +}; + +static const struct i915_oa_reg mux_config_test_oa[] = { + { _MMIO(0x9840), 0x00000080 }, + { _MMIO(0x9888), 0x11810000 }, + { _MMIO(0x9888), 0x07810013 }, + { _MMIO(0x9888), 0x1f810000 }, + { _MMIO(0x9888), 0x1d810000 }, + { _MMIO(0x9888), 0x1b930040 }, + { _MMIO(0x9888), 0x07e54000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0x9888), 0x11900000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static ssize_t +show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "1\n"); +} + +void +i915_perf_load_test_config_cflgt2(struct drm_i915_private *dev_priv) +{ + strlcpy(dev_priv->perf.oa.test_config.uuid, + "74fb4902-d3d3-4237-9e90-cbdc68d0a446", + sizeof(dev_priv->perf.oa.test_config.uuid)); + dev_priv->perf.oa.test_config.id = 1; + + dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; + dev_priv->perf.oa.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); + + dev_priv->perf.oa.test_config.b_counter_regs = b_counter_config_test_oa; + dev_priv->perf.oa.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); + + dev_priv->perf.oa.test_config.flex_regs = flex_eu_config_test_oa; + dev_priv->perf.oa.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); + + dev_priv->perf.oa.test_config.sysfs_metric.name = "74fb4902-d3d3-4237-9e90-cbdc68d0a446"; + dev_priv->perf.oa.test_config.sysfs_metric.attrs = dev_priv->perf.oa.test_config.attrs; + + dev_priv->perf.oa.test_config.attrs[0] = &dev_priv->perf.oa.test_config.sysfs_metric_id.attr; + + dev_priv->perf.oa.test_config.sysfs_metric_id.attr.name = "id"; + dev_priv->perf.oa.test_config.sysfs_metric_id.attr.mode = 0444; + dev_priv->perf.oa.test_config.sysfs_metric_id.show = show_test_oa_id; +} diff --git a/drivers/gpu/drm/i915/i915_oa_cflgt2.h b/drivers/gpu/drm/i915/i915_oa_cflgt2.h new file mode 100644 index 000000000000..1f3268ef2ea2 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_cflgt2.h @@ -0,0 +1,34 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_OA_CFLGT2_H__ +#define __I915_OA_CFLGT2_H__ + +extern void i915_perf_load_test_config_cflgt2(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_oa_cflgt3.c b/drivers/gpu/drm/i915/i915_oa_cflgt3.c new file mode 100644 index 000000000000..792facdb6702 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_cflgt3.c @@ -0,0 +1,109 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/sysfs.h> + +#include "i915_drv.h" +#include "i915_oa_cflgt3.h" + +static const struct i915_oa_reg b_counter_config_test_oa[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2744), 0x00800000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2770), 0x00000004 }, + { _MMIO(0x2774), 0x00000000 }, + { _MMIO(0x2778), 0x00000003 }, + { _MMIO(0x277c), 0x00000000 }, + { _MMIO(0x2780), 0x00000007 }, + { _MMIO(0x2784), 0x00000000 }, + { _MMIO(0x2788), 0x00100002 }, + { _MMIO(0x278c), 0x0000fff7 }, + { _MMIO(0x2790), 0x00100002 }, + { _MMIO(0x2794), 0x0000ffcf }, + { _MMIO(0x2798), 0x00100082 }, + { _MMIO(0x279c), 0x0000ffef }, + { _MMIO(0x27a0), 0x001000c2 }, + { _MMIO(0x27a4), 0x0000ffe7 }, + { _MMIO(0x27a8), 0x00100001 }, + { _MMIO(0x27ac), 0x0000ffe7 }, +}; + +static const struct i915_oa_reg flex_eu_config_test_oa[] = { +}; + +static const struct i915_oa_reg mux_config_test_oa[] = { + { _MMIO(0x9840), 0x00000080 }, + { _MMIO(0x9888), 0x11810000 }, + { _MMIO(0x9888), 0x07810013 }, + { _MMIO(0x9888), 0x1f810000 }, + { _MMIO(0x9888), 0x1d810000 }, + { _MMIO(0x9888), 0x1b930040 }, + { _MMIO(0x9888), 0x07e54000 }, + { _MMIO(0x9888), 0x1f908000 }, + { _MMIO(0x9888), 0x11900000 }, + { _MMIO(0x9888), 0x37900000 }, + { _MMIO(0x9888), 0x53900000 }, + { _MMIO(0x9888), 0x45900000 }, + { _MMIO(0x9888), 0x33900000 }, +}; + +static ssize_t +show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "1\n"); +} + +void +i915_perf_load_test_config_cflgt3(struct drm_i915_private *dev_priv) +{ + strlcpy(dev_priv->perf.oa.test_config.uuid, + "577e8e2c-3fa0-4875-8743-3538d585e3b0", + sizeof(dev_priv->perf.oa.test_config.uuid)); + dev_priv->perf.oa.test_config.id = 1; + + dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; + dev_priv->perf.oa.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); + + dev_priv->perf.oa.test_config.b_counter_regs = b_counter_config_test_oa; + dev_priv->perf.oa.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); + + dev_priv->perf.oa.test_config.flex_regs = flex_eu_config_test_oa; + dev_priv->perf.oa.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); + + dev_priv->perf.oa.test_config.sysfs_metric.name = "577e8e2c-3fa0-4875-8743-3538d585e3b0"; + dev_priv->perf.oa.test_config.sysfs_metric.attrs = dev_priv->perf.oa.test_config.attrs; + + dev_priv->perf.oa.test_config.attrs[0] = &dev_priv->perf.oa.test_config.sysfs_metric_id.attr; + + dev_priv->perf.oa.test_config.sysfs_metric_id.attr.name = "id"; + dev_priv->perf.oa.test_config.sysfs_metric_id.attr.mode = 0444; + dev_priv->perf.oa.test_config.sysfs_metric_id.show = show_test_oa_id; +} diff --git a/drivers/gpu/drm/i915/i915_oa_cflgt3.h b/drivers/gpu/drm/i915/i915_oa_cflgt3.h new file mode 100644 index 000000000000..c13b5aac01b9 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_cflgt3.h @@ -0,0 +1,34 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_OA_CFLGT3_H__ +#define __I915_OA_CFLGT3_H__ + +extern void i915_perf_load_test_config_cflgt3(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_oa_chv.c b/drivers/gpu/drm/i915/i915_oa_chv.c index 322a3f94cd16..556febb2c3c8 100644 --- a/drivers/gpu/drm/i915/i915_oa_chv.c +++ b/drivers/gpu/drm/i915/i915_oa_chv.c @@ -84,9 +84,9 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_chv(struct drm_i915_private *dev_priv) { - strncpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.oa.test_config.uuid, "4a534b07-cba3-414d-8d60-874830e883aa", - UUID_STRING_LEN); + sizeof(dev_priv->perf.oa.test_config.uuid)); dev_priv->perf.oa.test_config.id = 1; dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; diff --git a/drivers/gpu/drm/i915/i915_oa_cnl.c b/drivers/gpu/drm/i915/i915_oa_cnl.c new file mode 100644 index 000000000000..ba9140c87cc0 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_cnl.c @@ -0,0 +1,121 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/sysfs.h> + +#include "i915_drv.h" +#include "i915_oa_cnl.h" + +static const struct i915_oa_reg b_counter_config_test_oa[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2770), 0x00000004 }, + { _MMIO(0x2774), 0x0000ffff }, + { _MMIO(0x2778), 0x00000003 }, + { _MMIO(0x277c), 0x0000ffff }, + { _MMIO(0x2780), 0x00000007 }, + { _MMIO(0x2784), 0x0000ffff }, + { _MMIO(0x2788), 0x00100002 }, + { _MMIO(0x278c), 0x0000fff7 }, + { _MMIO(0x2790), 0x00100002 }, + { _MMIO(0x2794), 0x0000ffcf }, + { _MMIO(0x2798), 0x00100082 }, + { _MMIO(0x279c), 0x0000ffef }, + { _MMIO(0x27a0), 0x001000c2 }, + { _MMIO(0x27a4), 0x0000ffe7 }, + { _MMIO(0x27a8), 0x00100001 }, + { _MMIO(0x27ac), 0x0000ffe7 }, +}; + +static const struct i915_oa_reg flex_eu_config_test_oa[] = { +}; + +static const struct i915_oa_reg mux_config_test_oa[] = { + { _MMIO(0xd04), 0x00000200 }, + { _MMIO(0x9884), 0x00000007 }, + { _MMIO(0x9888), 0x17060000 }, + { _MMIO(0x9840), 0x00000000 }, + { _MMIO(0x9884), 0x00000007 }, + { _MMIO(0x9888), 0x13034000 }, + { _MMIO(0x9884), 0x00000007 }, + { _MMIO(0x9888), 0x07060066 }, + { _MMIO(0x9884), 0x00000007 }, + { _MMIO(0x9888), 0x05060000 }, + { _MMIO(0x9884), 0x00000007 }, + { _MMIO(0x9888), 0x0f080040 }, + { _MMIO(0x9884), 0x00000007 }, + { _MMIO(0x9888), 0x07091000 }, + { _MMIO(0x9884), 0x00000007 }, + { _MMIO(0x9888), 0x0f041000 }, + { _MMIO(0x9884), 0x00000007 }, + { _MMIO(0x9888), 0x1d004000 }, + { _MMIO(0x9884), 0x00000007 }, + { _MMIO(0x9888), 0x35000000 }, + { _MMIO(0x9884), 0x00000007 }, + { _MMIO(0x9888), 0x49000000 }, + { _MMIO(0x9884), 0x00000007 }, + { _MMIO(0x9888), 0x3d000000 }, + { _MMIO(0x9884), 0x00000007 }, + { _MMIO(0x9888), 0x31000000 }, +}; + +static ssize_t +show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "1\n"); +} + +void +i915_perf_load_test_config_cnl(struct drm_i915_private *dev_priv) +{ + strlcpy(dev_priv->perf.oa.test_config.uuid, + "db41edd4-d8e7-4730-ad11-b9a2d6833503", + sizeof(dev_priv->perf.oa.test_config.uuid)); + dev_priv->perf.oa.test_config.id = 1; + + dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; + dev_priv->perf.oa.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); + + dev_priv->perf.oa.test_config.b_counter_regs = b_counter_config_test_oa; + dev_priv->perf.oa.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); + + dev_priv->perf.oa.test_config.flex_regs = flex_eu_config_test_oa; + dev_priv->perf.oa.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); + + dev_priv->perf.oa.test_config.sysfs_metric.name = "db41edd4-d8e7-4730-ad11-b9a2d6833503"; + dev_priv->perf.oa.test_config.sysfs_metric.attrs = dev_priv->perf.oa.test_config.attrs; + + dev_priv->perf.oa.test_config.attrs[0] = &dev_priv->perf.oa.test_config.sysfs_metric_id.attr; + + dev_priv->perf.oa.test_config.sysfs_metric_id.attr.name = "id"; + dev_priv->perf.oa.test_config.sysfs_metric_id.attr.mode = 0444; + dev_priv->perf.oa.test_config.sysfs_metric_id.show = show_test_oa_id; +} diff --git a/drivers/gpu/drm/i915/i915_oa_cnl.h b/drivers/gpu/drm/i915/i915_oa_cnl.h new file mode 100644 index 000000000000..fb918b131105 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_cnl.h @@ -0,0 +1,34 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_OA_CNL_H__ +#define __I915_OA_CNL_H__ + +extern void i915_perf_load_test_config_cnl(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_oa_glk.c b/drivers/gpu/drm/i915/i915_oa_glk.c index 4ee527e4c926..971db587957c 100644 --- a/drivers/gpu/drm/i915/i915_oa_glk.c +++ b/drivers/gpu/drm/i915/i915_oa_glk.c @@ -83,9 +83,9 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_glk(struct drm_i915_private *dev_priv) { - strncpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.oa.test_config.uuid, "dd3fd789-e783-4204-8cd0-b671bbccb0cf", - UUID_STRING_LEN); + sizeof(dev_priv->perf.oa.test_config.uuid)); dev_priv->perf.oa.test_config.id = 1; dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; diff --git a/drivers/gpu/drm/i915/i915_oa_hsw.c b/drivers/gpu/drm/i915/i915_oa_hsw.c index 56b03773bb9d..434a9b96d7ab 100644 --- a/drivers/gpu/drm/i915/i915_oa_hsw.c +++ b/drivers/gpu/drm/i915/i915_oa_hsw.c @@ -113,9 +113,9 @@ show_render_basic_id(struct device *kdev, struct device_attribute *attr, char *b void i915_perf_load_test_config_hsw(struct drm_i915_private *dev_priv) { - strncpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.oa.test_config.uuid, "403d8832-1a27-4aa6-a64e-f5389ce7b212", - UUID_STRING_LEN); + sizeof(dev_priv->perf.oa.test_config.uuid)); dev_priv->perf.oa.test_config.id = 1; dev_priv->perf.oa.test_config.mux_regs = mux_config_render_basic; diff --git a/drivers/gpu/drm/i915/i915_oa_kblgt2.c b/drivers/gpu/drm/i915/i915_oa_kblgt2.c index b6e7cc774136..2fa98a40bbc8 100644 --- a/drivers/gpu/drm/i915/i915_oa_kblgt2.c +++ b/drivers/gpu/drm/i915/i915_oa_kblgt2.c @@ -84,9 +84,9 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_kblgt2(struct drm_i915_private *dev_priv) { - strncpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.oa.test_config.uuid, "baa3c7e4-52b6-4b85-801e-465a94b746dd", - UUID_STRING_LEN); + sizeof(dev_priv->perf.oa.test_config.uuid)); dev_priv->perf.oa.test_config.id = 1; dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; diff --git a/drivers/gpu/drm/i915/i915_oa_kblgt3.c b/drivers/gpu/drm/i915/i915_oa_kblgt3.c index 5576afdd9a7e..f3cb6679a1bc 100644 --- a/drivers/gpu/drm/i915/i915_oa_kblgt3.c +++ b/drivers/gpu/drm/i915/i915_oa_kblgt3.c @@ -84,9 +84,9 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_kblgt3(struct drm_i915_private *dev_priv) { - strncpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.oa.test_config.uuid, "f1792f32-6db2-4b50-b4b2-557128f1688d", - UUID_STRING_LEN); + sizeof(dev_priv->perf.oa.test_config.uuid)); dev_priv->perf.oa.test_config.id = 1; dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; diff --git a/drivers/gpu/drm/i915/i915_oa_sklgt2.c b/drivers/gpu/drm/i915/i915_oa_sklgt2.c index 890d55879946..bf8b8cd8a50d 100644 --- a/drivers/gpu/drm/i915/i915_oa_sklgt2.c +++ b/drivers/gpu/drm/i915/i915_oa_sklgt2.c @@ -83,9 +83,9 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_sklgt2(struct drm_i915_private *dev_priv) { - strncpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.oa.test_config.uuid, "1651949f-0ac0-4cb1-a06f-dafd74a407d1", - UUID_STRING_LEN); + sizeof(dev_priv->perf.oa.test_config.uuid)); dev_priv->perf.oa.test_config.id = 1; dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; diff --git a/drivers/gpu/drm/i915/i915_oa_sklgt3.c b/drivers/gpu/drm/i915/i915_oa_sklgt3.c index 85e51addf86a..ae534c7c8135 100644 --- a/drivers/gpu/drm/i915/i915_oa_sklgt3.c +++ b/drivers/gpu/drm/i915/i915_oa_sklgt3.c @@ -84,9 +84,9 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_sklgt3(struct drm_i915_private *dev_priv) { - strncpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.oa.test_config.uuid, "2b985803-d3c9-4629-8a4f-634bfecba0e8", - UUID_STRING_LEN); + sizeof(dev_priv->perf.oa.test_config.uuid)); dev_priv->perf.oa.test_config.id = 1; dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; diff --git a/drivers/gpu/drm/i915/i915_oa_sklgt4.c b/drivers/gpu/drm/i915/i915_oa_sklgt4.c index bce031ee4445..817fba2d82df 100644 --- a/drivers/gpu/drm/i915/i915_oa_sklgt4.c +++ b/drivers/gpu/drm/i915/i915_oa_sklgt4.c @@ -84,9 +84,9 @@ show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) void i915_perf_load_test_config_sklgt4(struct drm_i915_private *dev_priv) { - strncpy(dev_priv->perf.oa.test_config.uuid, + strlcpy(dev_priv->perf.oa.test_config.uuid, "882fa433-1f4a-4a67-a962-c741888fe5f5", - UUID_STRING_LEN); + sizeof(dev_priv->perf.oa.test_config.uuid)); dev_priv->perf.oa.test_config.id = 1; dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 8ab003dca113..b5f3eb4fa8a3 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -22,238 +22,186 @@ * IN THE SOFTWARE. */ +#include <drm/drm_print.h> + #include "i915_params.h" #include "i915_drv.h" -struct i915_params i915 __read_mostly = { - .modeset = -1, - .panel_ignore_lid = 1, - .semaphores = -1, - .lvds_channel_mode = 0, - .panel_use_ssc = -1, - .vbt_sdvo_panel_type = -1, - .enable_rc6 = -1, - .enable_dc = -1, - .enable_fbc = -1, - .enable_execlists = -1, - .enable_hangcheck = true, - .enable_ppgtt = -1, - .enable_psr = -1, - .alpha_support = IS_ENABLED(CONFIG_DRM_I915_ALPHA_SUPPORT), - .disable_power_well = -1, - .enable_ips = 1, - .fastboot = 0, - .prefault_disable = 0, - .load_detect_test = 0, - .force_reset_modeset_test = 0, - .reset = 2, - .error_capture = true, - .invert_brightness = 0, - .disable_display = 0, - .enable_cmd_parser = true, - .use_mmio_flip = 0, - .mmio_debug = 0, - .verbose_state_checks = 1, - .nuclear_pageflip = 0, - .edp_vswing = 0, - .enable_guc_loading = 0, - .enable_guc_submission = 0, - .guc_log_level = -1, - .guc_firmware_path = NULL, - .huc_firmware_path = NULL, - .enable_dp_mst = true, - .inject_load_failure = 0, - .enable_dpcd_backlight = false, - .enable_gvt = false, +#define i915_param_named(name, T, perm, desc) \ + module_param_named(name, i915_modparams.name, T, perm); \ + MODULE_PARM_DESC(name, desc) +#define i915_param_named_unsafe(name, T, perm, desc) \ + module_param_named_unsafe(name, i915_modparams.name, T, perm); \ + MODULE_PARM_DESC(name, desc) + +struct i915_params i915_modparams __read_mostly = { +#define MEMBER(T, member, value) .member = (value), + I915_PARAMS_FOR_EACH(MEMBER) +#undef MEMBER }; -module_param_named(modeset, i915.modeset, int, 0400); -MODULE_PARM_DESC(modeset, +i915_param_named(modeset, int, 0400, "Use kernel modesetting [KMS] (0=disable, " "1=on, -1=force vga console preference [default])"); -module_param_named_unsafe(panel_ignore_lid, i915.panel_ignore_lid, int, 0600); -MODULE_PARM_DESC(panel_ignore_lid, +i915_param_named_unsafe(panel_ignore_lid, int, 0600, "Override lid status (0=autodetect, 1=autodetect disabled [default], " "-1=force lid closed, -2=force lid open)"); -module_param_named_unsafe(semaphores, i915.semaphores, int, 0400); -MODULE_PARM_DESC(semaphores, - "Use semaphores for inter-ring sync " - "(default: -1 (use per-chip defaults))"); - -module_param_named_unsafe(enable_rc6, i915.enable_rc6, int, 0400); -MODULE_PARM_DESC(enable_rc6, - "Enable power-saving render C-state 6. " - "Different stages can be selected via bitmask values " - "(0 = disable; 1 = enable rc6; 2 = enable deep rc6; 4 = enable deepest rc6). " - "For example, 3 would enable rc6 and deep rc6, and 7 would enable everything. " - "default: -1 (use per-chip default)"); - -module_param_named_unsafe(enable_dc, i915.enable_dc, int, 0400); -MODULE_PARM_DESC(enable_dc, +i915_param_named_unsafe(enable_dc, int, 0400, "Enable power-saving display C-states. " "(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6)"); -module_param_named_unsafe(enable_fbc, i915.enable_fbc, int, 0600); -MODULE_PARM_DESC(enable_fbc, +i915_param_named_unsafe(enable_fbc, int, 0600, "Enable frame buffer compression for power savings " "(default: -1 (use per-chip default))"); -module_param_named_unsafe(lvds_channel_mode, i915.lvds_channel_mode, int, 0400); -MODULE_PARM_DESC(lvds_channel_mode, +i915_param_named_unsafe(lvds_channel_mode, int, 0400, "Specify LVDS channel mode " "(0=probe BIOS [default], 1=single-channel, 2=dual-channel)"); -module_param_named_unsafe(lvds_use_ssc, i915.panel_use_ssc, int, 0600); -MODULE_PARM_DESC(lvds_use_ssc, +i915_param_named_unsafe(panel_use_ssc, int, 0600, "Use Spread Spectrum Clock with panels [LVDS/eDP] " "(default: auto from VBT)"); -module_param_named_unsafe(vbt_sdvo_panel_type, i915.vbt_sdvo_panel_type, int, 0400); -MODULE_PARM_DESC(vbt_sdvo_panel_type, +i915_param_named_unsafe(vbt_sdvo_panel_type, int, 0400, "Override/Ignore selection of SDVO panel mode in the VBT " "(-2=ignore, -1=auto [default], index in VBT BIOS table)"); -module_param_named_unsafe(reset, i915.reset, int, 0600); -MODULE_PARM_DESC(reset, "Attempt GPU resets (0=disabled, 1=full gpu reset, 2=engine reset [default])"); +i915_param_named_unsafe(reset, int, 0600, + "Attempt GPU resets (0=disabled, 1=full gpu reset, 2=engine reset [default])"); -module_param_named_unsafe(vbt_firmware, i915.vbt_firmware, charp, 0400); -MODULE_PARM_DESC(vbt_firmware, - "Load VBT from specified file under /lib/firmware"); +i915_param_named_unsafe(vbt_firmware, charp, 0400, + "Load VBT from specified file under /lib/firmware"); #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) -module_param_named(error_capture, i915.error_capture, bool, 0600); -MODULE_PARM_DESC(error_capture, +i915_param_named(error_capture, bool, 0600, "Record the GPU state following a hang. " "This information in /sys/class/drm/card<N>/error is vital for " "triaging and debugging hangs."); #endif -module_param_named_unsafe(enable_hangcheck, i915.enable_hangcheck, bool, 0644); -MODULE_PARM_DESC(enable_hangcheck, +i915_param_named_unsafe(enable_hangcheck, bool, 0644, "Periodically check GPU activity for detecting hangs. " "WARNING: Disabling this can cause system wide hangs. " "(default: true)"); -module_param_named_unsafe(enable_ppgtt, i915.enable_ppgtt, int, 0400); -MODULE_PARM_DESC(enable_ppgtt, +i915_param_named_unsafe(enable_ppgtt, int, 0400, "Override PPGTT usage. " "(-1=auto [default], 0=disabled, 1=aliasing, 2=full, 3=full with extended address space)"); -module_param_named_unsafe(enable_execlists, i915.enable_execlists, int, 0400); -MODULE_PARM_DESC(enable_execlists, - "Override execlists usage. " - "(-1=auto [default], 0=disabled, 1=enabled)"); - -module_param_named_unsafe(enable_psr, i915.enable_psr, int, 0600); -MODULE_PARM_DESC(enable_psr, "Enable PSR " - "(0=disabled, 1=enabled - link mode chosen per-platform, 2=force link-standby mode, 3=force link-off mode) " - "Default: -1 (use per-chip default)"); +i915_param_named_unsafe(enable_psr, int, 0600, + "Enable PSR " + "(0=disabled, 1=enabled - link mode chosen per-platform, 2=force link-standby mode, 3=force link-off mode) " + "Default: -1 (use per-chip default)"); -module_param_named_unsafe(alpha_support, i915.alpha_support, bool, 0400); -MODULE_PARM_DESC(alpha_support, +i915_param_named_unsafe(alpha_support, bool, 0400, "Enable alpha quality driver support for latest hardware. " "See also CONFIG_DRM_I915_ALPHA_SUPPORT."); -module_param_named_unsafe(disable_power_well, i915.disable_power_well, int, 0400); -MODULE_PARM_DESC(disable_power_well, +i915_param_named_unsafe(disable_power_well, int, 0400, "Disable display power wells when possible " "(-1=auto [default], 0=power wells always on, 1=power wells disabled when possible)"); -module_param_named_unsafe(enable_ips, i915.enable_ips, int, 0600); -MODULE_PARM_DESC(enable_ips, "Enable IPS (default: true)"); +i915_param_named_unsafe(enable_ips, int, 0600, "Enable IPS (default: true)"); -module_param_named(fastboot, i915.fastboot, bool, 0600); -MODULE_PARM_DESC(fastboot, +i915_param_named(fastboot, bool, 0600, "Try to skip unnecessary mode sets at boot time (default: false)"); -module_param_named_unsafe(prefault_disable, i915.prefault_disable, bool, 0600); -MODULE_PARM_DESC(prefault_disable, +i915_param_named_unsafe(prefault_disable, bool, 0600, "Disable page prefaulting for pread/pwrite/reloc (default:false). " "For developers only."); -module_param_named_unsafe(load_detect_test, i915.load_detect_test, bool, 0600); -MODULE_PARM_DESC(load_detect_test, +i915_param_named_unsafe(load_detect_test, bool, 0600, "Force-enable the VGA load detect code for testing (default:false). " "For developers only."); -module_param_named_unsafe(force_reset_modeset_test, i915.force_reset_modeset_test, bool, 0600); -MODULE_PARM_DESC(force_reset_modeset_test, +i915_param_named_unsafe(force_reset_modeset_test, bool, 0600, "Force a modeset during gpu reset for testing (default:false). " "For developers only."); -module_param_named_unsafe(invert_brightness, i915.invert_brightness, int, 0600); -MODULE_PARM_DESC(invert_brightness, +i915_param_named_unsafe(invert_brightness, int, 0600, "Invert backlight brightness " "(-1 force normal, 0 machine defaults, 1 force inversion), please " "report PCI device ID, subsystem vendor and subsystem device ID " "to dri-devel@lists.freedesktop.org, if your machine needs it. " "It will then be included in an upcoming module version."); -module_param_named(disable_display, i915.disable_display, bool, 0400); -MODULE_PARM_DESC(disable_display, "Disable display (default: false)"); - -module_param_named_unsafe(enable_cmd_parser, i915.enable_cmd_parser, bool, 0400); -MODULE_PARM_DESC(enable_cmd_parser, - "Enable command parsing (true=enabled [default], false=disabled)"); +i915_param_named(disable_display, bool, 0400, + "Disable display (default: false)"); -module_param_named_unsafe(use_mmio_flip, i915.use_mmio_flip, int, 0600); -MODULE_PARM_DESC(use_mmio_flip, - "use MMIO flips (-1=never, 0=driver discretion [default], 1=always)"); +i915_param_named_unsafe(enable_cmd_parser, bool, 0400, + "Enable command parsing (true=enabled [default], false=disabled)"); -module_param_named(mmio_debug, i915.mmio_debug, int, 0600); -MODULE_PARM_DESC(mmio_debug, +i915_param_named(mmio_debug, int, 0600, "Enable the MMIO debug code for the first N failures (default: off). " "This may negatively affect performance."); -module_param_named(verbose_state_checks, i915.verbose_state_checks, bool, 0600); -MODULE_PARM_DESC(verbose_state_checks, +i915_param_named(verbose_state_checks, bool, 0600, "Enable verbose logs (ie. WARN_ON()) in case of unexpected hw state conditions."); -module_param_named_unsafe(nuclear_pageflip, i915.nuclear_pageflip, bool, 0400); -MODULE_PARM_DESC(nuclear_pageflip, - "Force enable atomic functionality on platforms that don't have full support yet."); +i915_param_named_unsafe(nuclear_pageflip, bool, 0400, + "Force enable atomic functionality on platforms that don't have full support yet."); /* WA to get away with the default setting in VBT for early platforms.Will be removed */ -module_param_named_unsafe(edp_vswing, i915.edp_vswing, int, 0400); -MODULE_PARM_DESC(edp_vswing, - "Ignore/Override vswing pre-emph table selection from VBT " - "(0=use value from vbt [default], 1=low power swing(200mV)," - "2=default swing(400mV))"); - -module_param_named_unsafe(enable_guc_loading, i915.enable_guc_loading, int, 0400); -MODULE_PARM_DESC(enable_guc_loading, - "Enable GuC firmware loading " - "(-1=auto, 0=never [default], 1=if available, 2=required)"); - -module_param_named_unsafe(enable_guc_submission, i915.enable_guc_submission, int, 0400); -MODULE_PARM_DESC(enable_guc_submission, - "Enable GuC submission " - "(-1=auto, 0=never [default], 1=if available, 2=required)"); - -module_param_named(guc_log_level, i915.guc_log_level, int, 0400); -MODULE_PARM_DESC(guc_log_level, +i915_param_named_unsafe(edp_vswing, int, 0400, + "Ignore/Override vswing pre-emph table selection from VBT " + "(0=use value from vbt [default], 1=low power swing(200mV)," + "2=default swing(400mV))"); + +i915_param_named_unsafe(enable_guc, int, 0400, + "Enable GuC load for GuC submission and/or HuC load. " + "Required functionality can be selected using bitmask values. " + "(-1=auto, 0=disable [default], 1=GuC submission, 2=HuC load)"); + +i915_param_named(guc_log_level, int, 0400, "GuC firmware logging level (-1:disabled (default), 0-3:enabled)"); -module_param_named_unsafe(guc_firmware_path, i915.guc_firmware_path, charp, 0400); -MODULE_PARM_DESC(guc_firmware_path, +i915_param_named_unsafe(guc_firmware_path, charp, 0400, "GuC firmware path to use instead of the default one"); -module_param_named_unsafe(huc_firmware_path, i915.huc_firmware_path, charp, 0400); -MODULE_PARM_DESC(huc_firmware_path, +i915_param_named_unsafe(huc_firmware_path, charp, 0400, "HuC firmware path to use instead of the default one"); -module_param_named_unsafe(enable_dp_mst, i915.enable_dp_mst, bool, 0600); -MODULE_PARM_DESC(enable_dp_mst, +i915_param_named_unsafe(enable_dp_mst, bool, 0600, "Enable multi-stream transport (MST) for new DisplayPort sinks. (default: true)"); -module_param_named_unsafe(inject_load_failure, i915.inject_load_failure, uint, 0400); -MODULE_PARM_DESC(inject_load_failure, + +i915_param_named_unsafe(inject_load_failure, uint, 0400, "Force an error after a number of failure check points (0:disabled (default), N:force failure at the Nth failure check point)"); -module_param_named(enable_dpcd_backlight, i915.enable_dpcd_backlight, bool, 0600); -MODULE_PARM_DESC(enable_dpcd_backlight, + +i915_param_named(enable_dpcd_backlight, bool, 0600, "Enable support for DPCD backlight control (default:false)"); -module_param_named(enable_gvt, i915.enable_gvt, bool, 0400); -MODULE_PARM_DESC(enable_gvt, +i915_param_named(enable_gvt, bool, 0400, "Enable support for Intel GVT-g graphics virtualization host support(default:false)"); + +static __always_inline void _print_param(struct drm_printer *p, + const char *name, + const char *type, + const void *x) +{ + if (!__builtin_strcmp(type, "bool")) + drm_printf(p, "i915.%s=%s\n", name, yesno(*(const bool *)x)); + else if (!__builtin_strcmp(type, "int")) + drm_printf(p, "i915.%s=%d\n", name, *(const int *)x); + else if (!__builtin_strcmp(type, "unsigned int")) + drm_printf(p, "i915.%s=%u\n", name, *(const unsigned int *)x); + else if (!__builtin_strcmp(type, "char *")) + drm_printf(p, "i915.%s=%s\n", name, *(const char **)x); + else + BUILD_BUG(); +} + +/** + * i915_params_dump - dump i915 modparams + * @params: i915 modparams + * @p: the &drm_printer + * + * Pretty printer for i915 modparams. + */ +void i915_params_dump(const struct i915_params *params, struct drm_printer *p) +{ +#define PRINT(T, x, ...) _print_param(p, #x, #T, ¶ms->x); + I915_PARAMS_FOR_EACH(PRINT); +#undef PRINT +} diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index ac844709c97e..c96360398072 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -25,58 +25,61 @@ #ifndef _I915_PARAMS_H_ #define _I915_PARAMS_H_ +#include <linux/bitops.h> #include <linux/cache.h> /* for __read_mostly */ -#define I915_PARAMS_FOR_EACH(func) \ - func(char *, vbt_firmware); \ - func(int, modeset); \ - func(int, panel_ignore_lid); \ - func(int, semaphores); \ - func(int, lvds_channel_mode); \ - func(int, panel_use_ssc); \ - func(int, vbt_sdvo_panel_type); \ - func(int, enable_rc6); \ - func(int, enable_dc); \ - func(int, enable_fbc); \ - func(int, enable_ppgtt); \ - func(int, enable_execlists); \ - func(int, enable_psr); \ - func(int, disable_power_well); \ - func(int, enable_ips); \ - func(int, invert_brightness); \ - func(int, enable_guc_loading); \ - func(int, enable_guc_submission); \ - func(int, guc_log_level); \ - func(char *, guc_firmware_path); \ - func(char *, huc_firmware_path); \ - func(int, use_mmio_flip); \ - func(int, mmio_debug); \ - func(int, edp_vswing); \ - func(int, reset); \ - func(unsigned int, inject_load_failure); \ +struct drm_printer; + +#define ENABLE_GUC_SUBMISSION BIT(0) +#define ENABLE_GUC_LOAD_HUC BIT(1) + +#define I915_PARAMS_FOR_EACH(param) \ + param(char *, vbt_firmware, NULL) \ + param(int, modeset, -1) \ + param(int, panel_ignore_lid, 1) \ + param(int, lvds_channel_mode, 0) \ + param(int, panel_use_ssc, -1) \ + param(int, vbt_sdvo_panel_type, -1) \ + param(int, enable_dc, -1) \ + param(int, enable_fbc, -1) \ + param(int, enable_ppgtt, -1) \ + param(int, enable_psr, -1) \ + param(int, disable_power_well, -1) \ + param(int, enable_ips, 1) \ + param(int, invert_brightness, 0) \ + param(int, enable_guc, 0) \ + param(int, guc_log_level, -1) \ + param(char *, guc_firmware_path, NULL) \ + param(char *, huc_firmware_path, NULL) \ + param(int, mmio_debug, 0) \ + param(int, edp_vswing, 0) \ + param(int, reset, 2) \ + param(unsigned int, inject_load_failure, 0) \ /* leave bools at the end to not create holes */ \ - func(bool, alpha_support); \ - func(bool, enable_cmd_parser); \ - func(bool, enable_hangcheck); \ - func(bool, fastboot); \ - func(bool, prefault_disable); \ - func(bool, load_detect_test); \ - func(bool, force_reset_modeset_test); \ - func(bool, error_capture); \ - func(bool, disable_display); \ - func(bool, verbose_state_checks); \ - func(bool, nuclear_pageflip); \ - func(bool, enable_dp_mst); \ - func(bool, enable_dpcd_backlight); \ - func(bool, enable_gvt) + param(bool, alpha_support, IS_ENABLED(CONFIG_DRM_I915_ALPHA_SUPPORT)) \ + param(bool, enable_cmd_parser, true) \ + param(bool, enable_hangcheck, true) \ + param(bool, fastboot, false) \ + param(bool, prefault_disable, false) \ + param(bool, load_detect_test, false) \ + param(bool, force_reset_modeset_test, false) \ + param(bool, error_capture, true) \ + param(bool, disable_display, false) \ + param(bool, verbose_state_checks, true) \ + param(bool, nuclear_pageflip, false) \ + param(bool, enable_dp_mst, true) \ + param(bool, enable_dpcd_backlight, false) \ + param(bool, enable_gvt, false) -#define MEMBER(T, member) T member +#define MEMBER(T, member, ...) T member; struct i915_params { I915_PARAMS_FOR_EACH(MEMBER); }; #undef MEMBER -extern struct i915_params i915 __read_mostly; +extern struct i915_params i915_modparams __read_mostly; + +void i915_params_dump(const struct i915_params *params, struct drm_printer *p); #endif diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 09d97e0990b7..1c30c688f23a 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -54,8 +54,14 @@ .color = { .degamma_lut_size = 512, .gamma_lut_size = 512 } #define CHV_COLORS \ .color = { .degamma_lut_size = 65, .gamma_lut_size = 257 } +#define GLK_COLORS \ + .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } /* Keep in gen based order, and chronological order within a gen */ + +#define GEN_DEFAULT_PAGE_SIZES \ + .page_sizes = I915_GTT_PAGE_SIZE_4K + #define GEN2_FEATURES \ .gen = 2, .num_pipes = 1, \ .has_overlay = 1, .overlay_needs_physical = 1, \ @@ -63,7 +69,9 @@ .hws_needs_physical = 1, \ .unfenced_needs_alignment = 1, \ .ring_mask = RENDER_RING, \ + .has_snoop = true, \ GEN_DEFAULT_PIPEOFFSETS, \ + GEN_DEFAULT_PAGE_SIZES, \ CURSOR_OFFSETS static const struct intel_device_info intel_i830_info = { @@ -95,7 +103,9 @@ static const struct intel_device_info intel_i865g_info = { .gen = 3, .num_pipes = 2, \ .has_gmch_display = 1, \ .ring_mask = RENDER_RING, \ + .has_snoop = true, \ GEN_DEFAULT_PIPEOFFSETS, \ + GEN_DEFAULT_PAGE_SIZES, \ CURSOR_OFFSETS static const struct intel_device_info intel_i915g_info = { @@ -157,7 +167,9 @@ static const struct intel_device_info intel_pineview_info = { .has_hotplug = 1, \ .has_gmch_display = 1, \ .ring_mask = RENDER_RING, \ + .has_snoop = true, \ GEN_DEFAULT_PIPEOFFSETS, \ + GEN_DEFAULT_PAGE_SIZES, \ CURSOR_OFFSETS static const struct intel_device_info intel_i965g_info = { @@ -165,6 +177,7 @@ static const struct intel_device_info intel_i965g_info = { .platform = INTEL_I965G, .has_overlay = 1, .hws_needs_physical = 1, + .has_snoop = false, }; static const struct intel_device_info intel_i965gm_info = { @@ -174,12 +187,12 @@ static const struct intel_device_info intel_i965gm_info = { .has_overlay = 1, .supports_tv = 1, .hws_needs_physical = 1, + .has_snoop = false, }; static const struct intel_device_info intel_g45_info = { GEN4_FEATURES, .platform = INTEL_G45, - .has_pipe_cxsr = 1, .ring_mask = RENDER_RING | BSD_RING, }; @@ -187,7 +200,6 @@ static const struct intel_device_info intel_gm45_info = { GEN4_FEATURES, .platform = INTEL_GM45, .is_mobile = 1, .has_fbc = 1, - .has_pipe_cxsr = 1, .supports_tv = 1, .ring_mask = RENDER_RING | BSD_RING, }; @@ -195,9 +207,12 @@ static const struct intel_device_info intel_gm45_info = { #define GEN5_FEATURES \ .gen = 5, .num_pipes = 2, \ .has_hotplug = 1, \ - .has_gmbus_irq = 1, \ .ring_mask = RENDER_RING | BSD_RING, \ + .has_snoop = true, \ + /* ilk does support rc6, but we do not implement [power] contexts */ \ + .has_rc6 = 0, \ GEN_DEFAULT_PIPEOFFSETS, \ + GEN_DEFAULT_PAGE_SIZES, \ CURSOR_OFFSETS static const struct intel_device_info intel_ironlake_d_info = { @@ -219,20 +234,39 @@ static const struct intel_device_info intel_ironlake_m_info = { .has_llc = 1, \ .has_rc6 = 1, \ .has_rc6p = 1, \ - .has_gmbus_irq = 1, \ .has_aliasing_ppgtt = 1, \ GEN_DEFAULT_PIPEOFFSETS, \ + GEN_DEFAULT_PAGE_SIZES, \ CURSOR_OFFSETS -static const struct intel_device_info intel_sandybridge_d_info = { - GEN6_FEATURES, - .platform = INTEL_SANDYBRIDGE, +#define SNB_D_PLATFORM \ + GEN6_FEATURES, \ + .platform = INTEL_SANDYBRIDGE + +static const struct intel_device_info intel_sandybridge_d_gt1_info = { + SNB_D_PLATFORM, + .gt = 1, }; -static const struct intel_device_info intel_sandybridge_m_info = { - GEN6_FEATURES, - .platform = INTEL_SANDYBRIDGE, - .is_mobile = 1, +static const struct intel_device_info intel_sandybridge_d_gt2_info = { + SNB_D_PLATFORM, + .gt = 2, +}; + +#define SNB_M_PLATFORM \ + GEN6_FEATURES, \ + .platform = INTEL_SANDYBRIDGE, \ + .is_mobile = 1 + + +static const struct intel_device_info intel_sandybridge_m_gt1_info = { + SNB_M_PLATFORM, + .gt = 1, +}; + +static const struct intel_device_info intel_sandybridge_m_gt2_info = { + SNB_M_PLATFORM, + .gt = 2, }; #define GEN7_FEATURES \ @@ -243,28 +277,47 @@ static const struct intel_device_info intel_sandybridge_m_info = { .has_llc = 1, \ .has_rc6 = 1, \ .has_rc6p = 1, \ - .has_gmbus_irq = 1, \ .has_aliasing_ppgtt = 1, \ .has_full_ppgtt = 1, \ GEN_DEFAULT_PIPEOFFSETS, \ + GEN_DEFAULT_PAGE_SIZES, \ IVB_CURSOR_OFFSETS -static const struct intel_device_info intel_ivybridge_d_info = { - GEN7_FEATURES, - .platform = INTEL_IVYBRIDGE, - .has_l3_dpf = 1, +#define IVB_D_PLATFORM \ + GEN7_FEATURES, \ + .platform = INTEL_IVYBRIDGE, \ + .has_l3_dpf = 1 + +static const struct intel_device_info intel_ivybridge_d_gt1_info = { + IVB_D_PLATFORM, + .gt = 1, }; -static const struct intel_device_info intel_ivybridge_m_info = { - GEN7_FEATURES, - .platform = INTEL_IVYBRIDGE, - .is_mobile = 1, - .has_l3_dpf = 1, +static const struct intel_device_info intel_ivybridge_d_gt2_info = { + IVB_D_PLATFORM, + .gt = 2, +}; + +#define IVB_M_PLATFORM \ + GEN7_FEATURES, \ + .platform = INTEL_IVYBRIDGE, \ + .is_mobile = 1, \ + .has_l3_dpf = 1 + +static const struct intel_device_info intel_ivybridge_m_gt1_info = { + IVB_M_PLATFORM, + .gt = 1, +}; + +static const struct intel_device_info intel_ivybridge_m_gt2_info = { + IVB_M_PLATFORM, + .gt = 2, }; static const struct intel_device_info intel_ivybridge_q_info = { GEN7_FEATURES, .platform = INTEL_IVYBRIDGE, + .gt = 2, .num_pipes = 0, /* legal, last one wins */ .has_l3_dpf = 1, }; @@ -277,18 +330,19 @@ static const struct intel_device_info intel_valleyview_info = { .has_psr = 1, .has_runtime_pm = 1, .has_rc6 = 1, - .has_gmbus_irq = 1, .has_gmch_display = 1, .has_hotplug = 1, .has_aliasing_ppgtt = 1, .has_full_ppgtt = 1, + .has_snoop = true, .ring_mask = RENDER_RING | BSD_RING | BLT_RING, .display_mmio_offset = VLV_DISPLAY_BASE, + GEN_DEFAULT_PAGE_SIZES, GEN_DEFAULT_PIPEOFFSETS, CURSOR_OFFSETS }; -#define HSW_FEATURES \ +#define G75_FEATURES \ GEN7_FEATURES, \ .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, \ .has_ddi = 1, \ @@ -299,31 +353,62 @@ static const struct intel_device_info intel_valleyview_info = { .has_rc6p = 0 /* RC6p removed-by HSW */, \ .has_runtime_pm = 1 -static const struct intel_device_info intel_haswell_info = { - HSW_FEATURES, - .platform = INTEL_HASWELL, - .has_l3_dpf = 1, +#define HSW_PLATFORM \ + G75_FEATURES, \ + .platform = INTEL_HASWELL, \ + .has_l3_dpf = 1 + +static const struct intel_device_info intel_haswell_gt1_info = { + HSW_PLATFORM, + .gt = 1, +}; + +static const struct intel_device_info intel_haswell_gt2_info = { + HSW_PLATFORM, + .gt = 2, +}; + +static const struct intel_device_info intel_haswell_gt3_info = { + HSW_PLATFORM, + .gt = 3, }; -#define BDW_FEATURES \ - HSW_FEATURES, \ +#define GEN8_FEATURES \ + G75_FEATURES, \ BDW_COLORS, \ + .page_sizes = I915_GTT_PAGE_SIZE_4K | \ + I915_GTT_PAGE_SIZE_2M, \ .has_logical_ring_contexts = 1, \ .has_full_48bit_ppgtt = 1, \ .has_64bit_reloc = 1, \ .has_reset_engine = 1 #define BDW_PLATFORM \ - BDW_FEATURES, \ + GEN8_FEATURES, \ .gen = 8, \ .platform = INTEL_BROADWELL -static const struct intel_device_info intel_broadwell_info = { +static const struct intel_device_info intel_broadwell_gt1_info = { BDW_PLATFORM, + .gt = 1, +}; + +static const struct intel_device_info intel_broadwell_gt2_info = { + BDW_PLATFORM, + .gt = 2, +}; + +static const struct intel_device_info intel_broadwell_rsvd_info = { + BDW_PLATFORM, + .gt = 3, + /* According to the device ID those devices are GT3, they were + * previously treated as not GT3, keep it like that. + */ }; static const struct intel_device_info intel_broadwell_gt3_info = { BDW_PLATFORM, + .gt = 3, .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, }; @@ -338,33 +423,61 @@ static const struct intel_device_info intel_cherryview_info = { .has_runtime_pm = 1, .has_resource_streamer = 1, .has_rc6 = 1, - .has_gmbus_irq = 1, .has_logical_ring_contexts = 1, .has_gmch_display = 1, .has_aliasing_ppgtt = 1, .has_full_ppgtt = 1, .has_reset_engine = 1, + .has_snoop = true, .display_mmio_offset = VLV_DISPLAY_BASE, + GEN_DEFAULT_PAGE_SIZES, GEN_CHV_PIPEOFFSETS, CURSOR_OFFSETS, CHV_COLORS, }; -#define SKL_PLATFORM \ - BDW_FEATURES, \ - .gen = 9, \ - .platform = INTEL_SKYLAKE, \ +#define GEN9_DEFAULT_PAGE_SIZES \ + .page_sizes = I915_GTT_PAGE_SIZE_4K | \ + I915_GTT_PAGE_SIZE_64K | \ + I915_GTT_PAGE_SIZE_2M + +#define GEN9_FEATURES \ + GEN8_FEATURES, \ + GEN9_DEFAULT_PAGE_SIZES, \ + .has_logical_ring_preemption = 1, \ .has_csr = 1, \ .has_guc = 1, \ + .has_ipc = 1, \ .ddb_size = 896 -static const struct intel_device_info intel_skylake_info = { +#define SKL_PLATFORM \ + GEN9_FEATURES, \ + .gen = 9, \ + .platform = INTEL_SKYLAKE + +static const struct intel_device_info intel_skylake_gt1_info = { SKL_PLATFORM, + .gt = 1, }; -static const struct intel_device_info intel_skylake_gt3_info = { +static const struct intel_device_info intel_skylake_gt2_info = { SKL_PLATFORM, - .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, + .gt = 2, +}; + +#define SKL_GT3_PLUS_PLATFORM \ + SKL_PLATFORM, \ + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING + + +static const struct intel_device_info intel_skylake_gt3_info = { + SKL_GT3_PLUS_PLATFORM, + .gt = 3, +}; + +static const struct intel_device_info intel_skylake_gt4_info = { + SKL_GT3_PLUS_PLATFORM, + .gt = 4, }; #define GEN9_LP_FEATURES \ @@ -377,19 +490,23 @@ static const struct intel_device_info intel_skylake_gt3_info = { .has_ddi = 1, \ .has_fpga_dbg = 1, \ .has_fbc = 1, \ + .has_psr = 1, \ .has_runtime_pm = 1, \ .has_pooled_eu = 0, \ .has_csr = 1, \ .has_resource_streamer = 1, \ .has_rc6 = 1, \ .has_dp_mst = 1, \ - .has_gmbus_irq = 1, \ .has_logical_ring_contexts = 1, \ + .has_logical_ring_preemption = 1, \ .has_guc = 1, \ .has_aliasing_ppgtt = 1, \ .has_full_ppgtt = 1, \ .has_full_48bit_ppgtt = 1, \ .has_reset_engine = 1, \ + .has_snoop = true, \ + .has_ipc = 1, \ + GEN9_DEFAULT_PAGE_SIZES, \ GEN_DEFAULT_PIPEOFFSETS, \ IVB_CURSOR_OFFSETS, \ BDW_COLORS @@ -398,59 +515,68 @@ static const struct intel_device_info intel_broxton_info = { GEN9_LP_FEATURES, .platform = INTEL_BROXTON, .ddb_size = 512, - .has_reset_engine = false, }; static const struct intel_device_info intel_geminilake_info = { GEN9_LP_FEATURES, .platform = INTEL_GEMINILAKE, .ddb_size = 1024, - .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } + GLK_COLORS, }; #define KBL_PLATFORM \ - BDW_FEATURES, \ + GEN9_FEATURES, \ .gen = 9, \ - .platform = INTEL_KABYLAKE, \ - .has_csr = 1, \ - .has_guc = 1, \ - .ddb_size = 896 + .platform = INTEL_KABYLAKE -static const struct intel_device_info intel_kabylake_info = { +static const struct intel_device_info intel_kabylake_gt1_info = { KBL_PLATFORM, + .gt = 1, +}; + +static const struct intel_device_info intel_kabylake_gt2_info = { + KBL_PLATFORM, + .gt = 2, }; static const struct intel_device_info intel_kabylake_gt3_info = { KBL_PLATFORM, + .gt = 3, .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, }; #define CFL_PLATFORM \ - .is_alpha_support = 1, \ - BDW_FEATURES, \ + GEN9_FEATURES, \ .gen = 9, \ - .platform = INTEL_COFFEELAKE, \ - .has_csr = 1, \ - .has_guc = 1, \ - .ddb_size = 896 + .platform = INTEL_COFFEELAKE + +static const struct intel_device_info intel_coffeelake_gt1_info = { + CFL_PLATFORM, + .gt = 1, +}; -static const struct intel_device_info intel_coffeelake_info = { +static const struct intel_device_info intel_coffeelake_gt2_info = { CFL_PLATFORM, + .gt = 2, }; static const struct intel_device_info intel_coffeelake_gt3_info = { CFL_PLATFORM, + .gt = 3, .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING, }; -static const struct intel_device_info intel_cannonlake_info = { - BDW_FEATURES, +#define GEN10_FEATURES \ + GEN9_FEATURES, \ + .ddb_size = 1024, \ + GLK_COLORS + +static const struct intel_device_info intel_cannonlake_gt2_info = { + GEN10_FEATURES, .is_alpha_support = 1, .platform = INTEL_CANNONLAKE, .gen = 10, - .ddb_size = 1024, - .has_csr = 1, - .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } + .gt = 2, }; /* @@ -476,31 +602,42 @@ static const struct pci_device_id pciidlist[] = { INTEL_PINEVIEW_IDS(&intel_pineview_info), INTEL_IRONLAKE_D_IDS(&intel_ironlake_d_info), INTEL_IRONLAKE_M_IDS(&intel_ironlake_m_info), - INTEL_SNB_D_IDS(&intel_sandybridge_d_info), - INTEL_SNB_M_IDS(&intel_sandybridge_m_info), + INTEL_SNB_D_GT1_IDS(&intel_sandybridge_d_gt1_info), + INTEL_SNB_D_GT2_IDS(&intel_sandybridge_d_gt2_info), + INTEL_SNB_M_GT1_IDS(&intel_sandybridge_m_gt1_info), + INTEL_SNB_M_GT2_IDS(&intel_sandybridge_m_gt2_info), INTEL_IVB_Q_IDS(&intel_ivybridge_q_info), /* must be first IVB */ - INTEL_IVB_M_IDS(&intel_ivybridge_m_info), - INTEL_IVB_D_IDS(&intel_ivybridge_d_info), - INTEL_HSW_IDS(&intel_haswell_info), + INTEL_IVB_M_GT1_IDS(&intel_ivybridge_m_gt1_info), + INTEL_IVB_M_GT2_IDS(&intel_ivybridge_m_gt2_info), + INTEL_IVB_D_GT1_IDS(&intel_ivybridge_d_gt1_info), + INTEL_IVB_D_GT2_IDS(&intel_ivybridge_d_gt2_info), + INTEL_HSW_GT1_IDS(&intel_haswell_gt1_info), + INTEL_HSW_GT2_IDS(&intel_haswell_gt2_info), + INTEL_HSW_GT3_IDS(&intel_haswell_gt3_info), INTEL_VLV_IDS(&intel_valleyview_info), - INTEL_BDW_GT12_IDS(&intel_broadwell_info), + INTEL_BDW_GT1_IDS(&intel_broadwell_gt1_info), + INTEL_BDW_GT2_IDS(&intel_broadwell_gt2_info), INTEL_BDW_GT3_IDS(&intel_broadwell_gt3_info), - INTEL_BDW_RSVD_IDS(&intel_broadwell_info), + INTEL_BDW_RSVD_IDS(&intel_broadwell_rsvd_info), INTEL_CHV_IDS(&intel_cherryview_info), - INTEL_SKL_GT1_IDS(&intel_skylake_info), - INTEL_SKL_GT2_IDS(&intel_skylake_info), + INTEL_SKL_GT1_IDS(&intel_skylake_gt1_info), + INTEL_SKL_GT2_IDS(&intel_skylake_gt2_info), INTEL_SKL_GT3_IDS(&intel_skylake_gt3_info), - INTEL_SKL_GT4_IDS(&intel_skylake_gt3_info), + INTEL_SKL_GT4_IDS(&intel_skylake_gt4_info), INTEL_BXT_IDS(&intel_broxton_info), INTEL_GLK_IDS(&intel_geminilake_info), - INTEL_KBL_GT1_IDS(&intel_kabylake_info), - INTEL_KBL_GT2_IDS(&intel_kabylake_info), + INTEL_KBL_GT1_IDS(&intel_kabylake_gt1_info), + INTEL_KBL_GT2_IDS(&intel_kabylake_gt2_info), INTEL_KBL_GT3_IDS(&intel_kabylake_gt3_info), INTEL_KBL_GT4_IDS(&intel_kabylake_gt3_info), - INTEL_CFL_S_IDS(&intel_coffeelake_info), - INTEL_CFL_H_IDS(&intel_coffeelake_info), - INTEL_CFL_U_IDS(&intel_coffeelake_gt3_info), - INTEL_CNL_IDS(&intel_cannonlake_info), + INTEL_CFL_S_GT1_IDS(&intel_coffeelake_gt1_info), + INTEL_CFL_S_GT2_IDS(&intel_coffeelake_gt2_info), + INTEL_CFL_H_GT2_IDS(&intel_coffeelake_gt2_info), + INTEL_CFL_U_GT1_IDS(&intel_coffeelake_gt1_info), + INTEL_CFL_U_GT2_IDS(&intel_coffeelake_gt2_info), + INTEL_CFL_U_GT3_IDS(&intel_coffeelake_gt3_info), + INTEL_CNL_U_GT2_IDS(&intel_cannonlake_gt2_info), + INTEL_CNL_Y_GT2_IDS(&intel_cannonlake_gt2_info), {0, 0, 0} }; MODULE_DEVICE_TABLE(pci, pciidlist); @@ -510,7 +647,7 @@ static void i915_pci_remove(struct pci_dev *pdev) struct drm_device *dev = pci_get_drvdata(pdev); i915_driver_unload(dev); - drm_dev_unref(dev); + drm_dev_put(dev); } static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) @@ -519,7 +656,7 @@ static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) (struct intel_device_info *) ent->driver_data; int err; - if (IS_ALPHA_SUPPORT(intel_info) && !i915.alpha_support) { + if (IS_ALPHA_SUPPORT(intel_info) && !i915_modparams.alpha_support) { DRM_INFO("The driver support for your hardware in this kernel version is alpha quality\n" "See CONFIG_DRM_I915_ALPHA_SUPPORT or i915.alpha_support module parameter\n" "to enable support in this kernel version, or check for kernel updates.\n"); @@ -577,10 +714,10 @@ static int __init i915_init(void) * vga_text_mode_force boot option. */ - if (i915.modeset == 0) + if (i915_modparams.modeset == 0) use_kms = false; - if (vgacon_text_force() && i915.modeset == -1) + if (vgacon_text_force() && i915_modparams.modeset == -1) use_kms = false; if (!use_kms) { diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 370b9d248fed..0be50e43507d 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -206,6 +206,9 @@ #include "i915_oa_kblgt2.h" #include "i915_oa_kblgt3.h" #include "i915_oa_glk.h" +#include "i915_oa_cflgt2.h" +#include "i915_oa_cflgt3.h" +#include "i915_oa_cnl.h" /* HW requires this to be a power of two, between 128k and 16M, though driver * is currently generally designed assuming the largest 16M size is used such @@ -241,7 +244,7 @@ * The two separate pointers let us decouple read()s from tail pointer aging. * * The tail pointers are checked and updated at a limited rate within a hrtimer - * callback (the same callback that is used for delivering POLLIN events) + * callback (the same callback that is used for delivering EPOLLIN events) * * Initially the tails are marked invalid with %INVALID_TAIL_PTR which * indicates that an updated tail pointer is needed. @@ -1213,9 +1216,9 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; - if (i915.enable_execlists) + if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { dev_priv->perf.oa.specific_ctx_id = stream->ctx->hw_id; - else { + } else { struct intel_engine_cs *engine = dev_priv->engine[RCS]; struct intel_ring *ring; int ret; @@ -1259,7 +1262,7 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; - if (i915.enable_execlists) { + if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; } else { struct intel_engine_cs *engine = dev_priv->engine[RCS]; @@ -1723,10 +1726,9 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr GFP_KERNEL); } - ret = i915_switch_context(req); i915_add_request(req); - return ret; + return 0; } /* @@ -1850,8 +1852,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv, * be read back from automatically triggered reports, as part of the * RPT_ID field. */ - if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv) || - IS_KABYLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) { + if (IS_GEN9(dev_priv) || IS_GEN10(dev_priv)) { I915_WRITE(GEN8_OA_DEBUG, _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | GEN9_OA_DEBUG_INCLUDE_CLK_RATIO)); @@ -1884,6 +1885,16 @@ static void gen8_disable_metric_set(struct drm_i915_private *dev_priv) } +static void gen10_disable_metric_set(struct drm_i915_private *dev_priv) +{ + /* Reset all contexts' slices/subslices configurations. */ + gen8_configure_all_contexts(dev_priv, NULL, false); + + /* Make sure we disable noa to save power. */ + I915_WRITE(RPM_CONFIG1, + I915_READ(RPM_CONFIG1) & ~GEN10_GT_NOA_ENABLE); +} + static void gen7_oa_enable(struct drm_i915_private *dev_priv) { /* @@ -2281,13 +2292,13 @@ static ssize_t i915_perf_read(struct file *file, mutex_unlock(&dev_priv->perf.lock); } - /* We allow the poll checking to sometimes report false positive POLLIN + /* We allow the poll checking to sometimes report false positive EPOLLIN * events where we might actually report EAGAIN on read() if there's * not really any data available. In this situation though we don't - * want to enter a busy loop between poll() reporting a POLLIN event + * want to enter a busy loop between poll() reporting a EPOLLIN event * and read() returning -EAGAIN. Clearing the oa.pollin state here * effectively ensures we back off until the next hrtimer callback - * before reporting another POLLIN event. + * before reporting another EPOLLIN event. */ if (ret >= 0 || ret == -EAGAIN) { /* Maybe make ->pollin per-stream state if we support multiple @@ -2331,12 +2342,12 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) * * Returns: any poll events that are ready without sleeping */ -static unsigned int i915_perf_poll_locked(struct drm_i915_private *dev_priv, +static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv, struct i915_perf_stream *stream, struct file *file, poll_table *wait) { - unsigned int events = 0; + __poll_t events = 0; stream->ops->poll_wait(stream, file, wait); @@ -2347,7 +2358,7 @@ static unsigned int i915_perf_poll_locked(struct drm_i915_private *dev_priv, * samples to read. */ if (dev_priv->perf.oa.pollin) - events |= POLLIN; + events |= EPOLLIN; return events; } @@ -2365,11 +2376,11 @@ static unsigned int i915_perf_poll_locked(struct drm_i915_private *dev_priv, * * Returns: any poll events that are ready without sleeping */ -static unsigned int i915_perf_poll(struct file *file, poll_table *wait) +static __poll_t i915_perf_poll(struct file *file, poll_table *wait) { struct i915_perf_stream *stream = file->private_data; struct drm_i915_private *dev_priv = stream->dev_priv; - int ret; + __poll_t ret; mutex_lock(&dev_priv->perf.lock); ret = i915_perf_poll_locked(dev_priv, stream, file, wait); @@ -2679,8 +2690,8 @@ err: static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) { - return div_u64(1000000000ULL * (2ULL << exponent), - dev_priv->perf.oa.timestamp_frequency); + return div64_u64(1000000000ULL * (2ULL << exponent), + 1000ULL * INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz); } /** @@ -2931,6 +2942,13 @@ void i915_perf_register(struct drm_i915_private *dev_priv) i915_perf_load_test_config_kblgt3(dev_priv); } else if (IS_GEMINILAKE(dev_priv)) { i915_perf_load_test_config_glk(dev_priv); + } else if (IS_COFFEELAKE(dev_priv)) { + if (IS_CFL_GT2(dev_priv)) + i915_perf_load_test_config_cflgt2(dev_priv); + if (IS_CFL_GT3(dev_priv)) + i915_perf_load_test_config_cflgt3(dev_priv); + } else if (IS_CANNONLAKE(dev_priv)) { + i915_perf_load_test_config_cnl(dev_priv); } if (dev_priv->perf.oa.test_config.id == 0) @@ -2988,7 +3006,7 @@ static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr) int i; for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) { - if (flex_eu_regs[i].reg == addr) + if (i915_mmio_reg_offset(flex_eu_regs[i]) == addr) return true; } return false; @@ -2996,31 +3014,47 @@ static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr) static bool gen7_is_valid_b_counter_addr(struct drm_i915_private *dev_priv, u32 addr) { - return (addr >= OASTARTTRIG1.reg && addr <= OASTARTTRIG8.reg) || - (addr >= OAREPORTTRIG1.reg && addr <= OAREPORTTRIG8.reg) || - (addr >= OACEC0_0.reg && addr <= OACEC7_1.reg); + return (addr >= i915_mmio_reg_offset(OASTARTTRIG1) && + addr <= i915_mmio_reg_offset(OASTARTTRIG8)) || + (addr >= i915_mmio_reg_offset(OAREPORTTRIG1) && + addr <= i915_mmio_reg_offset(OAREPORTTRIG8)) || + (addr >= i915_mmio_reg_offset(OACEC0_0) && + addr <= i915_mmio_reg_offset(OACEC7_1)); } static bool gen7_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) { - return addr == HALF_SLICE_CHICKEN2.reg || - (addr >= MICRO_BP0_0.reg && addr <= NOA_WRITE.reg) || - (addr >= OA_PERFCNT1_LO.reg && addr <= OA_PERFCNT2_HI.reg) || - (addr >= OA_PERFMATRIX_LO.reg && addr <= OA_PERFMATRIX_HI.reg); + return addr == i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) || + (addr >= i915_mmio_reg_offset(MICRO_BP0_0) && + addr <= i915_mmio_reg_offset(NOA_WRITE)) || + (addr >= i915_mmio_reg_offset(OA_PERFCNT1_LO) && + addr <= i915_mmio_reg_offset(OA_PERFCNT2_HI)) || + (addr >= i915_mmio_reg_offset(OA_PERFMATRIX_LO) && + addr <= i915_mmio_reg_offset(OA_PERFMATRIX_HI)); } static bool gen8_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) { return gen7_is_valid_mux_addr(dev_priv, addr) || - addr == WAIT_FOR_RC6_EXIT.reg || - (addr >= RPM_CONFIG0.reg && addr <= NOA_CONFIG(8).reg); + addr == i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) || + (addr >= i915_mmio_reg_offset(RPM_CONFIG0) && + addr <= i915_mmio_reg_offset(NOA_CONFIG(8))); +} + +static bool gen10_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +{ + return gen8_is_valid_mux_addr(dev_priv, addr) || + (addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) && + addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI)); } static bool hsw_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) { return gen7_is_valid_mux_addr(dev_priv, addr) || (addr >= 0x25100 && addr <= 0x2FF90) || - addr == 0x9ec0; + (addr >= i915_mmio_reg_offset(HSW_MBVID2_NOA0) && + addr <= i915_mmio_reg_offset(HSW_MBVID2_NOA9)) || + addr == i915_mmio_reg_offset(HSW_MBVID2_MISR0); } static bool chv_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) @@ -3035,14 +3069,14 @@ static uint32_t mask_reg_value(u32 reg, u32 val) * WaDisableSTUnitPowerOptimization workaround. Make sure the value * programmed by userspace doesn't change this. */ - if (HALF_SLICE_CHICKEN2.reg == reg) + if (i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) == reg) val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE); /* WAIT_FOR_RC6_EXIT has only one bit fullfilling the function * indicated by its name and a bunch of selection fields used by OA * configs. */ - if (WAIT_FOR_RC6_EXIT.reg == reg) + if (i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) == reg) val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE); return val; @@ -3389,8 +3423,6 @@ static struct ctl_table dev_root[] = { */ void i915_perf_init(struct drm_i915_private *dev_priv) { - dev_priv->perf.oa.timestamp_frequency = 0; - if (IS_HASWELL(dev_priv)) { dev_priv->perf.oa.ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; @@ -3406,69 +3438,68 @@ void i915_perf_init(struct drm_i915_private *dev_priv) dev_priv->perf.oa.ops.oa_hw_tail_read = gen7_oa_hw_tail_read; - dev_priv->perf.oa.timestamp_frequency = 12500000; - dev_priv->perf.oa.oa_formats = hsw_oa_formats; - } else if (i915.enable_execlists) { + } else if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { /* Note: that although we could theoretically also support the * legacy ringbuffer mode on BDW (and earlier iterations of * this driver, before upstreaming did this) it didn't seem * worth the complexity to maintain now that BDW+ enable * execlist mode by default. */ - dev_priv->perf.oa.ops.is_valid_b_counter_reg = - gen7_is_valid_b_counter_addr; - dev_priv->perf.oa.ops.is_valid_mux_reg = - gen8_is_valid_mux_addr; - dev_priv->perf.oa.ops.is_valid_flex_reg = - gen8_is_valid_flex_addr; + dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats; dev_priv->perf.oa.ops.init_oa_buffer = gen8_init_oa_buffer; - dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set; - dev_priv->perf.oa.ops.disable_metric_set = gen8_disable_metric_set; dev_priv->perf.oa.ops.oa_enable = gen8_oa_enable; dev_priv->perf.oa.ops.oa_disable = gen8_oa_disable; dev_priv->perf.oa.ops.read = gen8_oa_read; dev_priv->perf.oa.ops.oa_hw_tail_read = gen8_oa_hw_tail_read; - dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats; - - if (IS_GEN8(dev_priv)) { - dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120; - dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce; - - dev_priv->perf.oa.timestamp_frequency = 12500000; + if (IS_GEN8(dev_priv) || IS_GEN9(dev_priv)) { + dev_priv->perf.oa.ops.is_valid_b_counter_reg = + gen7_is_valid_b_counter_addr; + dev_priv->perf.oa.ops.is_valid_mux_reg = + gen8_is_valid_mux_addr; + dev_priv->perf.oa.ops.is_valid_flex_reg = + gen8_is_valid_flex_addr; - dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25); if (IS_CHERRYVIEW(dev_priv)) { dev_priv->perf.oa.ops.is_valid_mux_reg = chv_is_valid_mux_addr; } - } else if (IS_GEN9(dev_priv)) { + + dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set; + dev_priv->perf.oa.ops.disable_metric_set = gen8_disable_metric_set; + + if (IS_GEN8(dev_priv)) { + dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120; + dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce; + + dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25); + } else { + dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; + dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; + + dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); + } + } else if (IS_GEN10(dev_priv)) { + dev_priv->perf.oa.ops.is_valid_b_counter_reg = + gen7_is_valid_b_counter_addr; + dev_priv->perf.oa.ops.is_valid_mux_reg = + gen10_is_valid_mux_addr; + dev_priv->perf.oa.ops.is_valid_flex_reg = + gen8_is_valid_flex_addr; + + dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set; + dev_priv->perf.oa.ops.disable_metric_set = gen10_disable_metric_set; + dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); - - switch (dev_priv->info.platform) { - case INTEL_BROXTON: - case INTEL_GEMINILAKE: - dev_priv->perf.oa.timestamp_frequency = 19200000; - break; - case INTEL_SKYLAKE: - case INTEL_KABYLAKE: - dev_priv->perf.oa.timestamp_frequency = 12000000; - break; - default: - /* Leave timestamp_frequency to 0 so we can - * detect unsupported platforms. - */ - break; - } } } - if (dev_priv->perf.oa.timestamp_frequency) { + if (dev_priv->perf.oa.ops.enable_metric_set) { hrtimer_init(&dev_priv->perf.oa.poll_check_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); dev_priv->perf.oa.poll_check_timer.function = oa_poll_check_timer_cb; @@ -3478,8 +3509,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) mutex_init(&dev_priv->perf.lock); spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock); - oa_sample_rate_hard_limit = - dev_priv->perf.oa.timestamp_frequency / 2; + oa_sample_rate_hard_limit = 1000 * + (INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz / 2); dev_priv->perf.sysctl_header = register_sysctl_table(dev_root); mutex_init(&dev_priv->perf.metrics_lock); diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c new file mode 100644 index 000000000000..0e9b98c32b62 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -0,0 +1,920 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/perf_event.h> +#include <linux/pm_runtime.h> + +#include "i915_drv.h" +#include "i915_pmu.h" +#include "intel_ringbuffer.h" + +/* Frequency for the sampling timer for events which need it. */ +#define FREQUENCY 200 +#define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY) + +#define ENGINE_SAMPLE_MASK \ + (BIT(I915_SAMPLE_BUSY) | \ + BIT(I915_SAMPLE_WAIT) | \ + BIT(I915_SAMPLE_SEMA)) + +#define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS) + +static cpumask_t i915_pmu_cpumask; + +static u8 engine_config_sample(u64 config) +{ + return config & I915_PMU_SAMPLE_MASK; +} + +static u8 engine_event_sample(struct perf_event *event) +{ + return engine_config_sample(event->attr.config); +} + +static u8 engine_event_class(struct perf_event *event) +{ + return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff; +} + +static u8 engine_event_instance(struct perf_event *event) +{ + return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff; +} + +static bool is_engine_config(u64 config) +{ + return config < __I915_PMU_OTHER(0); +} + +static unsigned int config_enabled_bit(u64 config) +{ + if (is_engine_config(config)) + return engine_config_sample(config); + else + return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0)); +} + +static u64 config_enabled_mask(u64 config) +{ + return BIT_ULL(config_enabled_bit(config)); +} + +static bool is_engine_event(struct perf_event *event) +{ + return is_engine_config(event->attr.config); +} + +static unsigned int event_enabled_bit(struct perf_event *event) +{ + return config_enabled_bit(event->attr.config); +} + +static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active) +{ + u64 enable; + + /* + * Only some counters need the sampling timer. + * + * We start with a bitmask of all currently enabled events. + */ + enable = i915->pmu.enable; + + /* + * Mask out all the ones which do not need the timer, or in + * other words keep all the ones that could need the timer. + */ + enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) | + config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) | + ENGINE_SAMPLE_MASK; + + /* + * When the GPU is idle per-engine counters do not need to be + * running so clear those bits out. + */ + if (!gpu_active) + enable &= ~ENGINE_SAMPLE_MASK; + /* + * Also there is software busyness tracking available we do not + * need the timer for I915_SAMPLE_BUSY counter. + * + * Use RCS as proxy for all engines. + */ + else if (intel_engine_supports_stats(i915->engine[RCS])) + enable &= ~BIT(I915_SAMPLE_BUSY); + + /* + * If some bits remain it means we need the sampling timer running. + */ + return enable; +} + +void i915_pmu_gt_parked(struct drm_i915_private *i915) +{ + if (!i915->pmu.base.event_init) + return; + + spin_lock_irq(&i915->pmu.lock); + /* + * Signal sampling timer to stop if only engine events are enabled and + * GPU went idle. + */ + i915->pmu.timer_enabled = pmu_needs_timer(i915, false); + spin_unlock_irq(&i915->pmu.lock); +} + +static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915) +{ + if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) { + i915->pmu.timer_enabled = true; + hrtimer_start_range_ns(&i915->pmu.timer, + ns_to_ktime(PERIOD), 0, + HRTIMER_MODE_REL_PINNED); + } +} + +void i915_pmu_gt_unparked(struct drm_i915_private *i915) +{ + if (!i915->pmu.base.event_init) + return; + + spin_lock_irq(&i915->pmu.lock); + /* + * Re-enable sampling timer when GPU goes active. + */ + __i915_pmu_maybe_start_timer(i915); + spin_unlock_irq(&i915->pmu.lock); +} + +static bool grab_forcewake(struct drm_i915_private *i915, bool fw) +{ + if (!fw) + intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); + + return true; +} + +static void +update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val) +{ + sample->cur += mul_u32_u32(val, unit); +} + +static void engines_sample(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + bool fw = false; + + if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0) + return; + + if (!dev_priv->gt.awake) + return; + + if (!intel_runtime_pm_get_if_in_use(dev_priv)) + return; + + for_each_engine(engine, dev_priv, id) { + u32 current_seqno = intel_engine_get_seqno(engine); + u32 last_seqno = intel_engine_last_submit(engine); + u32 val; + + val = !i915_seqno_passed(current_seqno, last_seqno); + + update_sample(&engine->pmu.sample[I915_SAMPLE_BUSY], + PERIOD, val); + + if (val && (engine->pmu.enable & + (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) { + fw = grab_forcewake(dev_priv, fw); + + val = I915_READ_FW(RING_CTL(engine->mmio_base)); + } else { + val = 0; + } + + update_sample(&engine->pmu.sample[I915_SAMPLE_WAIT], + PERIOD, !!(val & RING_WAIT)); + + update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA], + PERIOD, !!(val & RING_WAIT_SEMAPHORE)); + } + + if (fw) + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + + intel_runtime_pm_put(dev_priv); +} + +static void frequency_sample(struct drm_i915_private *dev_priv) +{ + if (dev_priv->pmu.enable & + config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) { + u32 val; + + val = dev_priv->gt_pm.rps.cur_freq; + if (dev_priv->gt.awake && + intel_runtime_pm_get_if_in_use(dev_priv)) { + val = intel_get_cagf(dev_priv, + I915_READ_NOTRACE(GEN6_RPSTAT1)); + intel_runtime_pm_put(dev_priv); + } + + update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT], + 1, intel_gpu_freq(dev_priv, val)); + } + + if (dev_priv->pmu.enable & + config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) { + update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], 1, + intel_gpu_freq(dev_priv, + dev_priv->gt_pm.rps.cur_freq)); + } +} + +static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) +{ + struct drm_i915_private *i915 = + container_of(hrtimer, struct drm_i915_private, pmu.timer); + + if (!READ_ONCE(i915->pmu.timer_enabled)) + return HRTIMER_NORESTART; + + engines_sample(i915); + frequency_sample(i915); + + hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD)); + return HRTIMER_RESTART; +} + +static u64 count_interrupts(struct drm_i915_private *i915) +{ + /* open-coded kstat_irqs() */ + struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq); + u64 sum = 0; + int cpu; + + if (!desc || !desc->kstat_irqs) + return 0; + + for_each_possible_cpu(cpu) + sum += *per_cpu_ptr(desc->kstat_irqs, cpu); + + return sum; +} + +static void engine_event_destroy(struct perf_event *event) +{ + struct drm_i915_private *i915 = + container_of(event->pmu, typeof(*i915), pmu.base); + struct intel_engine_cs *engine; + + engine = intel_engine_lookup_user(i915, + engine_event_class(event), + engine_event_instance(event)); + if (WARN_ON_ONCE(!engine)) + return; + + if (engine_event_sample(event) == I915_SAMPLE_BUSY && + intel_engine_supports_stats(engine)) + intel_disable_engine_stats(engine); +} + +static void i915_pmu_event_destroy(struct perf_event *event) +{ + WARN_ON(event->parent); + + if (is_engine_event(event)) + engine_event_destroy(event); +} + +static int +engine_event_status(struct intel_engine_cs *engine, + enum drm_i915_pmu_engine_sample sample) +{ + switch (sample) { + case I915_SAMPLE_BUSY: + case I915_SAMPLE_WAIT: + break; + case I915_SAMPLE_SEMA: + if (INTEL_GEN(engine->i915) < 6) + return -ENODEV; + break; + default: + return -ENOENT; + } + + return 0; +} + +static int engine_event_init(struct perf_event *event) +{ + struct drm_i915_private *i915 = + container_of(event->pmu, typeof(*i915), pmu.base); + struct intel_engine_cs *engine; + u8 sample; + int ret; + + engine = intel_engine_lookup_user(i915, engine_event_class(event), + engine_event_instance(event)); + if (!engine) + return -ENODEV; + + sample = engine_event_sample(event); + ret = engine_event_status(engine, sample); + if (ret) + return ret; + + if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine)) + ret = intel_enable_engine_stats(engine); + + return ret; +} + +static int i915_pmu_event_init(struct perf_event *event) +{ + struct drm_i915_private *i915 = + container_of(event->pmu, typeof(*i915), pmu.base); + int ret; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* unsupported modes and filters */ + if (event->attr.sample_period) /* no sampling */ + return -EINVAL; + + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + if (event->cpu < 0) + return -EINVAL; + + /* only allow running on one cpu at a time */ + if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask)) + return -EINVAL; + + if (is_engine_event(event)) { + ret = engine_event_init(event); + } else { + ret = 0; + switch (event->attr.config) { + case I915_PMU_ACTUAL_FREQUENCY: + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + /* Requires a mutex for sampling! */ + ret = -ENODEV; + case I915_PMU_REQUESTED_FREQUENCY: + if (INTEL_GEN(i915) < 6) + ret = -ENODEV; + break; + case I915_PMU_INTERRUPTS: + break; + case I915_PMU_RC6_RESIDENCY: + if (!HAS_RC6(i915)) + ret = -ENODEV; + break; + default: + ret = -ENOENT; + break; + } + } + if (ret) + return ret; + + if (!event->parent) + event->destroy = i915_pmu_event_destroy; + + return 0; +} + +static u64 __get_rc6(struct drm_i915_private *i915) +{ + u64 val; + + val = intel_rc6_residency_ns(i915, + IS_VALLEYVIEW(i915) ? + VLV_GT_RENDER_RC6 : + GEN6_GT_GFX_RC6); + + if (HAS_RC6p(i915)) + val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p); + + if (HAS_RC6pp(i915)) + val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp); + + return val; +} + +static u64 get_rc6(struct drm_i915_private *i915, bool locked) +{ +#if IS_ENABLED(CONFIG_PM) + unsigned long flags; + u64 val; + + if (intel_runtime_pm_get_if_in_use(i915)) { + val = __get_rc6(i915); + intel_runtime_pm_put(i915); + + /* + * If we are coming back from being runtime suspended we must + * be careful not to report a larger value than returned + * previously. + */ + + if (!locked) + spin_lock_irqsave(&i915->pmu.lock, flags); + + if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { + i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0; + i915->pmu.sample[__I915_SAMPLE_RC6].cur = val; + } else { + val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur; + } + + if (!locked) + spin_unlock_irqrestore(&i915->pmu.lock, flags); + } else { + struct pci_dev *pdev = i915->drm.pdev; + struct device *kdev = &pdev->dev; + unsigned long flags2; + + /* + * We are runtime suspended. + * + * Report the delta from when the device was suspended to now, + * on top of the last known real value, as the approximated RC6 + * counter value. + */ + if (!locked) + spin_lock_irqsave(&i915->pmu.lock, flags); + + spin_lock_irqsave(&kdev->power.lock, flags2); + + if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) + i915->pmu.suspended_jiffies_last = + kdev->power.suspended_jiffies; + + val = kdev->power.suspended_jiffies - + i915->pmu.suspended_jiffies_last; + val += jiffies - kdev->power.accounting_timestamp; + + spin_unlock_irqrestore(&kdev->power.lock, flags2); + + val = jiffies_to_nsecs(val); + val += i915->pmu.sample[__I915_SAMPLE_RC6].cur; + i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; + + if (!locked) + spin_unlock_irqrestore(&i915->pmu.lock, flags); + } + + return val; +#else + return __get_rc6(i915); +#endif +} + +static u64 __i915_pmu_event_read(struct perf_event *event, bool locked) +{ + struct drm_i915_private *i915 = + container_of(event->pmu, typeof(*i915), pmu.base); + u64 val = 0; + + if (is_engine_event(event)) { + u8 sample = engine_event_sample(event); + struct intel_engine_cs *engine; + + engine = intel_engine_lookup_user(i915, + engine_event_class(event), + engine_event_instance(event)); + + if (WARN_ON_ONCE(!engine)) { + /* Do nothing */ + } else if (sample == I915_SAMPLE_BUSY && + intel_engine_supports_stats(engine)) { + val = ktime_to_ns(intel_engine_get_busy_time(engine)); + } else { + val = engine->pmu.sample[sample].cur; + } + } else { + switch (event->attr.config) { + case I915_PMU_ACTUAL_FREQUENCY: + val = + div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur, + FREQUENCY); + break; + case I915_PMU_REQUESTED_FREQUENCY: + val = + div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur, + FREQUENCY); + break; + case I915_PMU_INTERRUPTS: + val = count_interrupts(i915); + break; + case I915_PMU_RC6_RESIDENCY: + val = get_rc6(i915, locked); + break; + } + } + + return val; +} + +static void i915_pmu_event_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 prev, new; + +again: + prev = local64_read(&hwc->prev_count); + new = __i915_pmu_event_read(event, false); + + if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev) + goto again; + + local64_add(new - prev, &event->count); +} + +static void i915_pmu_enable(struct perf_event *event) +{ + struct drm_i915_private *i915 = + container_of(event->pmu, typeof(*i915), pmu.base); + unsigned int bit = event_enabled_bit(event); + unsigned long flags; + + spin_lock_irqsave(&i915->pmu.lock, flags); + + /* + * Update the bitmask of enabled events and increment + * the event reference counter. + */ + GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); + GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0); + i915->pmu.enable |= BIT_ULL(bit); + i915->pmu.enable_count[bit]++; + + /* + * Start the sampling timer if needed and not already enabled. + */ + __i915_pmu_maybe_start_timer(i915); + + /* + * For per-engine events the bitmask and reference counting + * is stored per engine. + */ + if (is_engine_event(event)) { + u8 sample = engine_event_sample(event); + struct intel_engine_cs *engine; + + engine = intel_engine_lookup_user(i915, + engine_event_class(event), + engine_event_instance(event)); + GEM_BUG_ON(!engine); + engine->pmu.enable |= BIT(sample); + + GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); + GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0); + engine->pmu.enable_count[sample]++; + } + + /* + * Store the current counter value so we can report the correct delta + * for all listeners. Even when the event was already enabled and has + * an existing non-zero value. + */ + local64_set(&event->hw.prev_count, __i915_pmu_event_read(event, true)); + + spin_unlock_irqrestore(&i915->pmu.lock, flags); +} + +static void i915_pmu_disable(struct perf_event *event) +{ + struct drm_i915_private *i915 = + container_of(event->pmu, typeof(*i915), pmu.base); + unsigned int bit = event_enabled_bit(event); + unsigned long flags; + + spin_lock_irqsave(&i915->pmu.lock, flags); + + if (is_engine_event(event)) { + u8 sample = engine_event_sample(event); + struct intel_engine_cs *engine; + + engine = intel_engine_lookup_user(i915, + engine_event_class(event), + engine_event_instance(event)); + GEM_BUG_ON(!engine); + GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); + GEM_BUG_ON(engine->pmu.enable_count[sample] == 0); + /* + * Decrement the reference count and clear the enabled + * bitmask when the last listener on an event goes away. + */ + if (--engine->pmu.enable_count[sample] == 0) + engine->pmu.enable &= ~BIT(sample); + } + + GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); + GEM_BUG_ON(i915->pmu.enable_count[bit] == 0); + /* + * Decrement the reference count and clear the enabled + * bitmask when the last listener on an event goes away. + */ + if (--i915->pmu.enable_count[bit] == 0) { + i915->pmu.enable &= ~BIT_ULL(bit); + i915->pmu.timer_enabled &= pmu_needs_timer(i915, true); + } + + spin_unlock_irqrestore(&i915->pmu.lock, flags); +} + +static void i915_pmu_event_start(struct perf_event *event, int flags) +{ + i915_pmu_enable(event); + event->hw.state = 0; +} + +static void i915_pmu_event_stop(struct perf_event *event, int flags) +{ + if (flags & PERF_EF_UPDATE) + i915_pmu_event_read(event); + i915_pmu_disable(event); + event->hw.state = PERF_HES_STOPPED; +} + +static int i915_pmu_event_add(struct perf_event *event, int flags) +{ + if (flags & PERF_EF_START) + i915_pmu_event_start(event, flags); + + return 0; +} + +static void i915_pmu_event_del(struct perf_event *event, int flags) +{ + i915_pmu_event_stop(event, PERF_EF_UPDATE); +} + +static int i915_pmu_event_event_idx(struct perf_event *event) +{ + return 0; +} + +struct i915_str_attribute { + struct device_attribute attr; + const char *str; +}; + +static ssize_t i915_pmu_format_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i915_str_attribute *eattr; + + eattr = container_of(attr, struct i915_str_attribute, attr); + return sprintf(buf, "%s\n", eattr->str); +} + +#define I915_PMU_FORMAT_ATTR(_name, _config) \ + (&((struct i915_str_attribute[]) { \ + { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \ + .str = _config, } \ + })[0].attr.attr) + +static struct attribute *i915_pmu_format_attrs[] = { + I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"), + NULL, +}; + +static const struct attribute_group i915_pmu_format_attr_group = { + .name = "format", + .attrs = i915_pmu_format_attrs, +}; + +struct i915_ext_attribute { + struct device_attribute attr; + unsigned long val; +}; + +static ssize_t i915_pmu_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i915_ext_attribute *eattr; + + eattr = container_of(attr, struct i915_ext_attribute, attr); + return sprintf(buf, "config=0x%lx\n", eattr->val); +} + +#define I915_EVENT_ATTR(_name, _config) \ + (&((struct i915_ext_attribute[]) { \ + { .attr = __ATTR(_name, 0444, i915_pmu_event_show, NULL), \ + .val = _config, } \ + })[0].attr.attr) + +#define I915_EVENT_STR(_name, _str) \ + (&((struct perf_pmu_events_attr[]) { \ + { .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \ + .id = 0, \ + .event_str = _str, } \ + })[0].attr.attr) + +#define I915_EVENT(_name, _config, _unit) \ + I915_EVENT_ATTR(_name, _config), \ + I915_EVENT_STR(_name.unit, _unit) + +#define I915_ENGINE_EVENT(_name, _class, _instance, _sample) \ + I915_EVENT_ATTR(_name, __I915_PMU_ENGINE(_class, _instance, _sample)), \ + I915_EVENT_STR(_name.unit, "ns") + +#define I915_ENGINE_EVENTS(_name, _class, _instance) \ + I915_ENGINE_EVENT(_name##_instance-busy, _class, _instance, I915_SAMPLE_BUSY), \ + I915_ENGINE_EVENT(_name##_instance-sema, _class, _instance, I915_SAMPLE_SEMA), \ + I915_ENGINE_EVENT(_name##_instance-wait, _class, _instance, I915_SAMPLE_WAIT) + +static struct attribute *i915_pmu_events_attrs[] = { + I915_ENGINE_EVENTS(rcs, I915_ENGINE_CLASS_RENDER, 0), + I915_ENGINE_EVENTS(bcs, I915_ENGINE_CLASS_COPY, 0), + I915_ENGINE_EVENTS(vcs, I915_ENGINE_CLASS_VIDEO, 0), + I915_ENGINE_EVENTS(vcs, I915_ENGINE_CLASS_VIDEO, 1), + I915_ENGINE_EVENTS(vecs, I915_ENGINE_CLASS_VIDEO_ENHANCE, 0), + + I915_EVENT(actual-frequency, I915_PMU_ACTUAL_FREQUENCY, "MHz"), + I915_EVENT(requested-frequency, I915_PMU_REQUESTED_FREQUENCY, "MHz"), + + I915_EVENT_ATTR(interrupts, I915_PMU_INTERRUPTS), + + I915_EVENT(rc6-residency, I915_PMU_RC6_RESIDENCY, "ns"), + + NULL, +}; + +static const struct attribute_group i915_pmu_events_attr_group = { + .name = "events", + .attrs = i915_pmu_events_attrs, +}; + +static ssize_t +i915_pmu_get_attr_cpumask(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask); +} + +static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL); + +static struct attribute *i915_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group i915_pmu_cpumask_attr_group = { + .attrs = i915_cpumask_attrs, +}; + +static const struct attribute_group *i915_pmu_attr_groups[] = { + &i915_pmu_format_attr_group, + &i915_pmu_events_attr_group, + &i915_pmu_cpumask_attr_group, + NULL +}; + +static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) +{ + struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); + + GEM_BUG_ON(!pmu->base.event_init); + + /* Select the first online CPU as a designated reader. */ + if (!cpumask_weight(&i915_pmu_cpumask)) + cpumask_set_cpu(cpu, &i915_pmu_cpumask); + + return 0; +} + +static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) +{ + struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); + unsigned int target; + + GEM_BUG_ON(!pmu->base.event_init); + + if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) { + target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); + /* Migrate events if there is a valid target */ + if (target < nr_cpu_ids) { + cpumask_set_cpu(target, &i915_pmu_cpumask); + perf_pmu_migrate_context(&pmu->base, cpu, target); + } + } + + return 0; +} + +static enum cpuhp_state cpuhp_slot = CPUHP_INVALID; + +static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915) +{ + enum cpuhp_state slot; + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "perf/x86/intel/i915:online", + i915_pmu_cpu_online, + i915_pmu_cpu_offline); + if (ret < 0) + return ret; + + slot = ret; + ret = cpuhp_state_add_instance(slot, &i915->pmu.node); + if (ret) { + cpuhp_remove_multi_state(slot); + return ret; + } + + cpuhp_slot = slot; + return 0; +} + +static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915) +{ + WARN_ON(cpuhp_slot == CPUHP_INVALID); + WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node)); + cpuhp_remove_multi_state(cpuhp_slot); +} + +void i915_pmu_register(struct drm_i915_private *i915) +{ + int ret; + + if (INTEL_GEN(i915) <= 2) { + DRM_INFO("PMU not supported for this GPU."); + return; + } + + i915->pmu.base.attr_groups = i915_pmu_attr_groups; + i915->pmu.base.task_ctx_nr = perf_invalid_context; + i915->pmu.base.event_init = i915_pmu_event_init; + i915->pmu.base.add = i915_pmu_event_add; + i915->pmu.base.del = i915_pmu_event_del; + i915->pmu.base.start = i915_pmu_event_start; + i915->pmu.base.stop = i915_pmu_event_stop; + i915->pmu.base.read = i915_pmu_event_read; + i915->pmu.base.event_idx = i915_pmu_event_event_idx; + + spin_lock_init(&i915->pmu.lock); + hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + i915->pmu.timer.function = i915_sample; + + ret = perf_pmu_register(&i915->pmu.base, "i915", -1); + if (ret) + goto err; + + ret = i915_pmu_register_cpuhp_state(i915); + if (ret) + goto err_unreg; + + return; + +err_unreg: + perf_pmu_unregister(&i915->pmu.base); +err: + i915->pmu.base.event_init = NULL; + DRM_NOTE("Failed to register PMU! (err=%d)\n", ret); +} + +void i915_pmu_unregister(struct drm_i915_private *i915) +{ + if (!i915->pmu.base.event_init) + return; + + WARN_ON(i915->pmu.enable); + + hrtimer_cancel(&i915->pmu.timer); + + i915_pmu_unregister_cpuhp_state(i915); + + perf_pmu_unregister(&i915->pmu.base); + i915->pmu.base.event_init = NULL; +} diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h new file mode 100644 index 000000000000..bb62df15afa4 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -0,0 +1,117 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ +#ifndef __I915_PMU_H__ +#define __I915_PMU_H__ + +enum { + __I915_SAMPLE_FREQ_ACT = 0, + __I915_SAMPLE_FREQ_REQ, + __I915_SAMPLE_RC6, + __I915_SAMPLE_RC6_ESTIMATED, + __I915_NUM_PMU_SAMPLERS +}; + +/** + * How many different events we track in the global PMU mask. + * + * It is also used to know to needed number of event reference counters. + */ +#define I915_PMU_MASK_BITS \ + ((1 << I915_PMU_SAMPLE_BITS) + \ + (I915_PMU_LAST + 1 - __I915_PMU_OTHER(0))) + +struct i915_pmu_sample { + u64 cur; +}; + +struct i915_pmu { + /** + * @node: List node for CPU hotplug handling. + */ + struct hlist_node node; + /** + * @base: PMU base. + */ + struct pmu base; + /** + * @lock: Lock protecting enable mask and ref count handling. + */ + spinlock_t lock; + /** + * @timer: Timer for internal i915 PMU sampling. + */ + struct hrtimer timer; + /** + * @enable: Bitmask of all currently enabled events. + * + * Bits are derived from uAPI event numbers in a way that low 16 bits + * correspond to engine event _sample_ _type_ (I915_SAMPLE_QUEUED is + * bit 0), and higher bits correspond to other events (for instance + * I915_PMU_ACTUAL_FREQUENCY is bit 16 etc). + * + * In other words, low 16 bits are not per engine but per engine + * sampler type, while the upper bits are directly mapped to other + * event types. + */ + u64 enable; + /** + * @enable_count: Reference counts for the enabled events. + * + * Array indices are mapped in the same way as bits in the @enable field + * and they are used to control sampling on/off when multiple clients + * are using the PMU API. + */ + unsigned int enable_count[I915_PMU_MASK_BITS]; + /** + * @timer_enabled: Should the internal sampling timer be running. + */ + bool timer_enabled; + /** + * @sample: Current and previous (raw) counters for sampling events. + * + * These counters are updated from the i915 PMU sampling timer. + * + * Only global counters are held here, while the per-engine ones are in + * struct intel_engine_cs. + */ + struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS]; + /** + * @suspended_jiffies_last: Cached suspend time from PM core. + */ + unsigned long suspended_jiffies_last; +}; + +#ifdef CONFIG_PERF_EVENTS +void i915_pmu_register(struct drm_i915_private *i915); +void i915_pmu_unregister(struct drm_i915_private *i915); +void i915_pmu_gt_parked(struct drm_i915_private *i915); +void i915_pmu_gt_unparked(struct drm_i915_private *i915); +#else +static inline void i915_pmu_register(struct drm_i915_private *i915) {} +static inline void i915_pmu_unregister(struct drm_i915_private *i915) {} +static inline void i915_pmu_gt_parked(struct drm_i915_private *i915) {} +static inline void i915_pmu_gt_unparked(struct drm_i915_private *i915) {} +#endif + +#endif diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h b/drivers/gpu/drm/i915/i915_pvinfo.h index 0679a58cdbae..195203f298df 100644 --- a/drivers/gpu/drm/i915/i915_pvinfo.h +++ b/drivers/gpu/drm/i915/i915_pvinfo.h @@ -53,6 +53,7 @@ enum vgt_g2v_type { * VGT capabilities type */ #define VGT_CAPS_FULL_48BIT_PPGTT BIT(2) +#define VGT_CAPS_HWSP_EMULATION BIT(3) struct vgt_if { u64 magic; /* VGT_MAGIC */ diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c9bcc6c45012..33eb0c5b1d32 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -186,6 +186,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define VIDEO_ENHANCEMENT_CLASS 2 #define COPY_ENGINE_CLASS 3 #define OTHER_CLASS 4 +#define MAX_ENGINE_CLASS 4 + +#define MAX_ENGINE_INSTANCE 1 /* PCI config space */ @@ -355,9 +358,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define ECOCHK_PPGTT_WT_HSW (0x2<<3) #define ECOCHK_PPGTT_WB_HSW (0x3<<3) -#define GEN8_CONFIG0 _MMIO(0xD00) -#define GEN9_DEFAULT_FIXES (1 << 3 | 1 << 2 | 1 << 1) - #define GAC_ECO_BITS _MMIO(0x14090) #define ECOBITS_SNB_BIT (1<<13) #define ECOBITS_PPGTT_CACHE64B (3<<8) @@ -382,6 +382,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN8_STOLEN_RESERVED_2M (1 << 7) #define GEN8_STOLEN_RESERVED_4M (2 << 7) #define GEN8_STOLEN_RESERVED_8M (3 << 7) +#define GEN6_STOLEN_RESERVED_ENABLE (1 << 0) /* VGA stuff */ @@ -1109,16 +1110,50 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OA_PERFCNT1_HI _MMIO(0x91BC) #define OA_PERFCNT2_LO _MMIO(0x91C0) #define OA_PERFCNT2_HI _MMIO(0x91C4) +#define OA_PERFCNT3_LO _MMIO(0x91C8) +#define OA_PERFCNT3_HI _MMIO(0x91CC) +#define OA_PERFCNT4_LO _MMIO(0x91D8) +#define OA_PERFCNT4_HI _MMIO(0x91DC) #define OA_PERFMATRIX_LO _MMIO(0x91C8) #define OA_PERFMATRIX_HI _MMIO(0x91CC) /* RPM unit config (Gen8+) */ #define RPM_CONFIG0 _MMIO(0x0D00) -#define RPM_CONFIG1 _MMIO(0x0D04) +#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT 3 +#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK (1 << GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT) +#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ 0 +#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ 1 +#define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT 1 +#define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK (0x3 << GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT) -/* RPC unit config (Gen8+) */ -#define RPM_CONFIG _MMIO(0x0D08) +#define RPM_CONFIG1 _MMIO(0x0D04) +#define GEN10_GT_NOA_ENABLE (1 << 9) + +/* GPM unit config (Gen9+) */ +#define CTC_MODE _MMIO(0xA26C) +#define CTC_SOURCE_PARAMETER_MASK 1 +#define CTC_SOURCE_CRYSTAL_CLOCK 0 +#define CTC_SOURCE_DIVIDE_LOGIC 1 +#define CTC_SHIFT_PARAMETER_SHIFT 1 +#define CTC_SHIFT_PARAMETER_MASK (0x3 << CTC_SHIFT_PARAMETER_SHIFT) + +/* RCP unit config (Gen8+) */ +#define RCP_CONFIG _MMIO(0x0D08) + +/* NOA (HSW) */ +#define HSW_MBVID2_NOA0 _MMIO(0x9E80) +#define HSW_MBVID2_NOA1 _MMIO(0x9E84) +#define HSW_MBVID2_NOA2 _MMIO(0x9E88) +#define HSW_MBVID2_NOA3 _MMIO(0x9E8C) +#define HSW_MBVID2_NOA4 _MMIO(0x9E90) +#define HSW_MBVID2_NOA5 _MMIO(0x9E94) +#define HSW_MBVID2_NOA6 _MMIO(0x9E98) +#define HSW_MBVID2_NOA7 _MMIO(0x9E9C) +#define HSW_MBVID2_NOA8 _MMIO(0x9EA0) +#define HSW_MBVID2_NOA9 _MMIO(0x9EA4) + +#define HSW_MBVID2_MISR0 _MMIO(0x9EC0) /* NOA (Gen8+) */ #define NOA_CONFIG(i) _MMIO(0x0D0C + (i) * 4) @@ -1992,7 +2027,7 @@ enum i915_power_well_id { #define _CNL_PORT_TX_DW5_LN0_AE 0x162454 #define _CNL_PORT_TX_DW5_LN0_B 0x162654 #define _CNL_PORT_TX_DW5_LN0_C 0x162C54 -#define _CNL_PORT_TX_DW5_LN0_D 0x162ED4 +#define _CNL_PORT_TX_DW5_LN0_D 0x162E54 #define _CNL_PORT_TX_DW5_LN0_F 0x162854 #define CNL_PORT_TX_DW5_GRP(port) _MMIO_PORT6(port, \ _CNL_PORT_TX_DW5_GRP_AE, \ @@ -2023,7 +2058,7 @@ enum i915_power_well_id { #define _CNL_PORT_TX_DW7_LN0_AE 0x16245C #define _CNL_PORT_TX_DW7_LN0_B 0x16265C #define _CNL_PORT_TX_DW7_LN0_C 0x162C5C -#define _CNL_PORT_TX_DW7_LN0_D 0x162EDC +#define _CNL_PORT_TX_DW7_LN0_D 0x162E5C #define _CNL_PORT_TX_DW7_LN0_F 0x16285C #define CNL_PORT_TX_DW7_GRP(port) _MMIO_PORT6(port, \ _CNL_PORT_TX_DW7_GRP_AE, \ @@ -2329,6 +2364,8 @@ enum i915_power_well_id { #define ARB_MODE_SWIZZLE_BDW (1<<1) #define RENDER_HWS_PGA_GEN7 _MMIO(0x04080) #define RING_FAULT_REG(engine) _MMIO(0x4094 + 0x100*(engine)->hw_id) +#define GEN8_RING_FAULT_REG _MMIO(0x4094) +#define GEN8_RING_FAULT_ENGINE_ID(x) (((x) >> 12) & 0x7) #define RING_FAULT_GTTSEL_MASK (1<<11) #define RING_FAULT_SRCID(x) (((x) >> 3) & 0xff) #define RING_FAULT_FAULT_TYPE(x) (((x) >> 1) & 0x3) @@ -2336,7 +2373,7 @@ enum i915_power_well_id { #define DONE_REG _MMIO(0x40b0) #define GEN8_PRIVATE_PAT_LO _MMIO(0x40e0) #define GEN8_PRIVATE_PAT_HI _MMIO(0x40e0 + 4) -#define GEN10_PAT_INDEX(index) _MMIO(0x40e0 + index*4) +#define GEN10_PAT_INDEX(index) _MMIO(0x40e0 + (index)*4) #define BSD_HWS_PGA_GEN7 _MMIO(0x04180) #define BLT_HWS_PGA_GEN7 _MMIO(0x04280) #define VEBOX_HWS_PGA_GEN7 _MMIO(0x04380) @@ -2371,8 +2408,12 @@ enum i915_power_well_id { #define GEN9_GAMT_ECO_REG_RW_IA _MMIO(0x4ab0) #define GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS (1<<18) +#define GEN8_GAMW_ECO_DEV_RW_IA _MMIO(0x4080) +#define GAMW_ECO_ENABLE_64K_IPS_FIELD 0xF + #define GAMT_CHKN_BIT_REG _MMIO(0x4ab8) #define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1<<28) +#define GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT (1<<24) #if 0 #define PRB0_TAIL _MMIO(0x2030) @@ -2448,6 +2489,8 @@ enum i915_power_well_id { #define GEN8_FAULT_TLB_DATA0 _MMIO(0x4b10) #define GEN8_FAULT_TLB_DATA1 _MMIO(0x4b14) +#define FAULT_VA_HIGH_BITS (0xf << 0) +#define FAULT_GTT_SEL (1 << 4) #define FPGA_DBG _MMIO(0x42300) #define FPGA_DBG_RM_NOCLAIM (1<<31) @@ -2491,6 +2534,7 @@ enum i915_power_well_id { # define _3D_CHICKEN2_WM_READ_PIPELINED (1 << 14) #define _3D_CHICKEN3 _MMIO(0x2090) #define _3D_CHICKEN_SF_DISABLE_OBJEND_CULL (1 << 10) +#define _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE (1 << 5) #define _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL (1 << 5) #define _3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(x) ((x)<<1) /* gen8+ */ #define _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH (1 << 1) /* gen6 */ @@ -2728,6 +2772,11 @@ enum i915_power_well_id { #define GEN9_F2_SS_DIS_SHIFT 20 #define GEN9_F2_SS_DIS_MASK (0xf << GEN9_F2_SS_DIS_SHIFT) +#define GEN10_F2_S_ENA_SHIFT 22 +#define GEN10_F2_S_ENA_MASK (0x3f << GEN10_F2_S_ENA_SHIFT) +#define GEN10_F2_SS_DIS_SHIFT 18 +#define GEN10_F2_SS_DIS_MASK (0xf << GEN10_F2_SS_DIS_SHIFT) + #define GEN8_EU_DISABLE0 _MMIO(0x9134) #define GEN8_EU_DIS0_S0_MASK 0xffffff #define GEN8_EU_DIS0_S1_SHIFT 24 @@ -2743,6 +2792,9 @@ enum i915_power_well_id { #define GEN9_EU_DISABLE(slice) _MMIO(0x9134 + (slice)*0x4) +#define GEN10_EU_DISABLE3 _MMIO(0x9140) +#define GEN10_EU_DIS_SS_MASK 0xff + #define GEN6_BSD_SLEEP_PSMI_CONTROL _MMIO(0x12050) #define GEN6_BSD_SLEEP_MSG_DISABLE (1 << 0) #define GEN6_BSD_SLEEP_FLUSH_DISABLE (1 << 2) @@ -3228,6 +3280,7 @@ enum i915_power_well_id { # define AUDUNIT_CLOCK_GATE_DISABLE (1 << 26) /* 965 */ # define DPUNIT_A_CLOCK_GATE_DISABLE (1 << 25) /* 965 */ # define DPCUNIT_CLOCK_GATE_DISABLE (1 << 24) /* 965 */ +# define PNV_GMBUSUNIT_CLOCK_GATE_DISABLE (1 << 24) /* pnv */ # define TVRUNIT_CLOCK_GATE_DISABLE (1 << 23) /* 915-945 */ # define TVCUNIT_CLOCK_GATE_DISABLE (1 << 22) /* 915-945 */ # define TVFUNIT_CLOCK_GATE_DISABLE (1 << 21) /* 915-945 */ @@ -3382,6 +3435,7 @@ enum i915_power_well_id { #define ELK_STOLEN_RESERVED _MMIO(MCHBAR_MIRROR_BASE + 0x48) #define G4X_STOLEN_RESERVED_ADDR1_MASK (0xFFFF << 16) #define G4X_STOLEN_RESERVED_ADDR2_MASK (0xFFF << 4) +#define G4X_STOLEN_RESERVED_ENABLE (1 << 0) /* Memory controller frequency in MCHBAR for Haswell (possible SNB+) */ #define DCLK _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5e04) @@ -3803,9 +3857,33 @@ enum { * GEN9 clock gating regs */ #define GEN9_CLKGATE_DIS_0 _MMIO(0x46530) +#define DARBF_GATING_DIS (1 << 27) #define PWM2_GATING_DIS (1 << 14) #define PWM1_GATING_DIS (1 << 13) +#define GEN9_CLKGATE_DIS_4 _MMIO(0x4653C) +#define BXT_GMBUS_GATING_DIS (1 << 14) + +#define _CLKGATE_DIS_PSL_A 0x46520 +#define _CLKGATE_DIS_PSL_B 0x46524 +#define _CLKGATE_DIS_PSL_C 0x46528 +#define DPF_GATING_DIS (1 << 10) +#define DPF_RAM_GATING_DIS (1 << 9) +#define DPFR_GATING_DIS (1 << 8) + +#define CLKGATE_DIS_PSL(pipe) \ + _MMIO_PIPE(pipe, _CLKGATE_DIS_PSL_A, _CLKGATE_DIS_PSL_B) + +/* + * GEN10 clock gating regs + */ +#define SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4) +#define SARBUNIT_CLKGATE_DIS (1 << 5) +#define RCCUNIT_CLKGATE_DIS (1 << 7) + +#define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434) +#define VFUNIT_CLKGATE_DIS (1 << 20) + /* * Display engine regs */ @@ -4036,7 +4114,7 @@ enum { #define EDP_PSR2_FRAME_BEFORE_SU_SHIFT 4 #define EDP_PSR2_FRAME_BEFORE_SU_MASK (0xf<<4) #define EDP_PSR2_IDLE_MASK 0xf -#define EDP_FRAMES_BEFORE_SU_ENTRY (1<<4) +#define EDP_PSR2_FRAME_BEFORE_SU(a) ((a)<<4) #define EDP_PSR2_STATUS_CTL _MMIO(0x6f940) #define EDP_PSR2_STATUS_STATE_MASK (0xf<<28) @@ -5210,7 +5288,7 @@ enum { #define DP_AUX_CH_CTL_TIME_OUT_400us (0 << 26) #define DP_AUX_CH_CTL_TIME_OUT_600us (1 << 26) #define DP_AUX_CH_CTL_TIME_OUT_800us (2 << 26) -#define DP_AUX_CH_CTL_TIME_OUT_1600us (3 << 26) +#define DP_AUX_CH_CTL_TIME_OUT_MAX (3 << 26) /* Varies per platform */ #define DP_AUX_CH_CTL_TIME_OUT_MASK (3 << 26) #define DP_AUX_CH_CTL_RECEIVE_ERROR (1 << 25) #define DP_AUX_CH_CTL_MESSAGE_SIZE_MASK (0x1f << 20) @@ -5652,8 +5730,7 @@ enum { #define CBR_PWM_CLOCK_MUX_SELECT (1<<30) #define CBR4_VLV _MMIO(VLV_DISPLAY_BASE + 0x70450) -#define CBR_DPLLBMD_PIPE_C (1<<29) -#define CBR_DPLLBMD_PIPE_B (1<<18) +#define CBR_DPLLBMD_PIPE(pipe) (1<<(7+(pipe)*11)) /* pipes B and C */ /* FIFO watermark sizes etc */ #define G4X_FIFO_LINE_SIZE 64 @@ -6232,7 +6309,7 @@ enum { #define _PLANE_CTL_2_A 0x70280 #define _PLANE_CTL_3_A 0x70380 #define PLANE_CTL_ENABLE (1 << 31) -#define PLANE_CTL_PIPE_GAMMA_ENABLE (1 << 30) +#define PLANE_CTL_PIPE_GAMMA_ENABLE (1 << 30) /* Pre-GLK */ #define PLANE_CTL_FORMAT_MASK (0xf << 24) #define PLANE_CTL_FORMAT_YUV422 ( 0 << 24) #define PLANE_CTL_FORMAT_NV12 ( 1 << 24) @@ -6242,7 +6319,7 @@ enum { #define PLANE_CTL_FORMAT_AYUV ( 8 << 24) #define PLANE_CTL_FORMAT_INDEXED ( 12 << 24) #define PLANE_CTL_FORMAT_RGB_565 ( 14 << 24) -#define PLANE_CTL_PIPE_CSC_ENABLE (1 << 23) +#define PLANE_CTL_PIPE_CSC_ENABLE (1 << 23) /* Pre-GLK */ #define PLANE_CTL_KEY_ENABLE_MASK (0x3 << 21) #define PLANE_CTL_KEY_ENABLE_SOURCE ( 1 << 21) #define PLANE_CTL_KEY_ENABLE_DESTINATION ( 2 << 21) @@ -6255,13 +6332,14 @@ enum { #define PLANE_CTL_YUV422_VYUY ( 3 << 16) #define PLANE_CTL_DECOMPRESSION_ENABLE (1 << 15) #define PLANE_CTL_TRICKLE_FEED_DISABLE (1 << 14) -#define PLANE_CTL_PLANE_GAMMA_DISABLE (1 << 13) +#define PLANE_CTL_PLANE_GAMMA_DISABLE (1 << 13) /* Pre-GLK */ #define PLANE_CTL_TILED_MASK (0x7 << 10) #define PLANE_CTL_TILED_LINEAR ( 0 << 10) #define PLANE_CTL_TILED_X ( 1 << 10) #define PLANE_CTL_TILED_Y ( 4 << 10) #define PLANE_CTL_TILED_YF ( 5 << 10) -#define PLANE_CTL_ALPHA_MASK (0x3 << 4) +#define PLANE_CTL_FLIP_HORIZONTAL ( 1 << 8) +#define PLANE_CTL_ALPHA_MASK (0x3 << 4) /* Pre-GLK */ #define PLANE_CTL_ALPHA_DISABLE ( 0 << 4) #define PLANE_CTL_ALPHA_SW_PREMULTIPLY ( 2 << 4) #define PLANE_CTL_ALPHA_HW_PREMULTIPLY ( 3 << 4) @@ -6301,6 +6379,10 @@ enum { #define PLANE_COLOR_PIPE_GAMMA_ENABLE (1 << 30) #define PLANE_COLOR_PIPE_CSC_ENABLE (1 << 23) #define PLANE_COLOR_PLANE_GAMMA_DISABLE (1 << 13) +#define PLANE_COLOR_ALPHA_MASK (0x3 << 4) +#define PLANE_COLOR_ALPHA_DISABLE (0 << 4) +#define PLANE_COLOR_ALPHA_SW_PREMULTIPLY (2 << 4) +#define PLANE_COLOR_ALPHA_HW_PREMULTIPLY (3 << 4) #define _PLANE_BUF_CFG_1_A 0x7027c #define _PLANE_BUF_CFG_2_A 0x7037c #define _PLANE_NV12_BUF_CFG_1_A 0x70278 @@ -6902,7 +6984,7 @@ enum { # define CHICKEN3_DGMG_DONE_FIX_DISABLE (1 << 2) #define CHICKEN_PAR1_1 _MMIO(0x42080) -#define SKL_RC_HASH_OUTSIDE (1 << 15) +#define SKL_DE_COMPRESSED_HASH_MODE (1 << 15) #define DPA_MASK_VBLANK_SRD (1 << 15) #define FORCE_ARB_IDLE_PLANES (1 << 14) #define SKL_EDP_PSR_FIX_RDWRAP (1 << 3) @@ -6916,6 +6998,10 @@ enum { #define GLK_CL1_PWR_DOWN (1 << 11) #define GLK_CL0_PWR_DOWN (1 << 10) +#define CHICKEN_MISC_4 _MMIO(0x4208c) +#define FBC_STRIDE_OVERRIDE (1 << 13) +#define FBC_STRIDE_MASK 0x1FFF + #define _CHICKEN_PIPESL_1_A 0x420b0 #define _CHICKEN_PIPESL_1_B 0x420b4 #define HSW_FBCQ_DIS (1 << 22) @@ -6934,6 +7020,7 @@ enum { #define DISP_FBC_WM_DIS (1<<15) #define DISP_ARB_CTL2 _MMIO(0x45004) #define DISP_DATA_PARTITION_5_6 (1<<6) +#define DISP_IPC_ENABLE (1<<3) #define DBUF_CTL _MMIO(0x45008) #define DBUF_POWER_REQUEST (1<<31) #define DBUF_POWER_STATE (1<<30) @@ -6944,6 +7031,7 @@ enum { #define RESET_PCH_HANDSHAKE_ENABLE (1<<4) #define GEN8_CHICKEN_DCPR_1 _MMIO(0x46430) +#define SKL_SELECT_ALTERNATE_DC_EXIT (1<<30) #define MASK_WAKEMEM (1<<13) #define SKL_DFSM _MMIO(0x51000) @@ -6969,12 +7057,19 @@ enum { #define GEN9_CS_DEBUG_MODE1 _MMIO(0x20ec) #define GEN9_CTX_PREEMPT_REG _MMIO(0x2248) #define GEN8_CS_CHICKEN1 _MMIO(0x2580) +#define GEN9_PREEMPT_3D_OBJECT_LEVEL (1<<0) +#define GEN9_PREEMPT_GPGPU_LEVEL(hi, lo) (((hi) << 2) | ((lo) << 1)) +#define GEN9_PREEMPT_GPGPU_MID_THREAD_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(0, 0) +#define GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(0, 1) +#define GEN9_PREEMPT_GPGPU_COMMAND_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(1, 0) +#define GEN9_PREEMPT_GPGPU_LEVEL_MASK GEN9_PREEMPT_GPGPU_LEVEL(1, 1) /* GEN7 chicken */ #define GEN7_COMMON_SLICE_CHICKEN1 _MMIO(0x7010) # define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC ((1<<10) | (1<<26)) # define GEN9_RHWO_OPTIMIZATION_DISABLE (1<<14) #define COMMON_SLICE_CHICKEN2 _MMIO(0x7014) +# define GEN9_PBE_COMPRESSED_HASH_SELECTION (1<<13) # define GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE (1<<12) # define GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION (1<<8) # define GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE (1<<0) @@ -6986,6 +7081,8 @@ enum { #define GEN9_SLICE_COMMON_ECO_CHICKEN0 _MMIO(0x7308) #define DISABLE_PIXEL_MASK_CAMMING (1<<14) +#define GEN9_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c) + #define GEN7_L3SQCREG1 _MMIO(0xB010) #define VLV_B0_WA_L3SQCREG1_VALUE 0x00D30000 @@ -7018,6 +7115,7 @@ enum { /* GEN8 chicken */ #define HDC_CHICKEN0 _MMIO(0x7300) +#define CNL_HDC_CHICKEN0 _MMIO(0xE5F0) #define HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE (1<<15) #define HDC_FENCE_DEST_SLM_DISABLE (1<<14) #define HDC_DONOT_FETCH_MEM_WHEN_MASKED (1<<11) @@ -7139,9 +7237,6 @@ enum { #define SERR_INT _MMIO(0xc4040) #define SERR_INT_POISON (1<<31) -#define SERR_INT_TRANS_C_FIFO_UNDERRUN (1<<6) -#define SERR_INT_TRANS_B_FIFO_UNDERRUN (1<<3) -#define SERR_INT_TRANS_A_FIFO_UNDERRUN (1<<0) #define SERR_INT_TRANS_FIFO_UNDERRUN(pipe) (1<<((pipe)*3)) /* digital port hotplug */ @@ -7454,6 +7549,8 @@ enum { #define FDI_PHASE_SYNC_OVR(pipe) (1<<(FDIA_PHASE_SYNC_SHIFT_OVR - ((pipe) * 2))) #define FDI_PHASE_SYNC_EN(pipe) (1<<(FDIA_PHASE_SYNC_SHIFT_EN - ((pipe) * 2))) #define FDI_BC_BIFURCATION_SELECT (1 << 12) +#define CHASSIS_CLK_REQ_DURATION_MASK (0xf << 8) +#define CHASSIS_CLK_REQ_DURATION(x) ((x) << 8) #define SPT_PWM_GRANULARITY (1<<0) #define SOUTH_CHICKEN2 _MMIO(0xc2004) #define FDI_MPHY_IOSFSB_RESET_STATUS (1<<13) @@ -7468,9 +7565,11 @@ enum { #define FDI_RX_CHICKEN(pipe) _MMIO_PIPE(pipe, _FDI_RXA_CHICKEN, _FDI_RXB_CHICKEN) #define SOUTH_DSPCLK_GATE_D _MMIO(0xc2020) +#define PCH_GMBUSUNIT_CLOCK_GATE_DISABLE (1<<31) #define PCH_DPLUNIT_CLOCK_GATE_DISABLE (1<<30) #define PCH_DPLSUNIT_CLOCK_GATE_DISABLE (1<<29) #define PCH_CPUNIT_CLOCK_GATE_DISABLE (1<<14) +#define CNP_PWM_CGE_GATING_DISABLE (1<<13) #define PCH_LP_PARTITION_LEVEL_DISABLE (1<<12) /* CPU: FDI_TX */ @@ -7730,8 +7829,9 @@ enum { #define FORCEWAKE_ACK_MEDIA_GEN9 _MMIO(0x0D88) #define FORCEWAKE_ACK_RENDER_GEN9 _MMIO(0x0D84) #define FORCEWAKE_ACK_BLITTER_GEN9 _MMIO(0x130044) -#define FORCEWAKE_KERNEL 0x1 -#define FORCEWAKE_USER 0x2 +#define FORCEWAKE_KERNEL BIT(0) +#define FORCEWAKE_USER BIT(1) +#define FORCEWAKE_KERNEL_FALLBACK BIT(15) #define FORCEWAKE_MT_ACK _MMIO(0x130040) #define ECOBUS _MMIO(0xa180) #define FORCEWAKE_MT_ENABLE (1<<5) @@ -7861,6 +7961,7 @@ enum { #define GEN6_RC1_WAKE_RATE_LIMIT _MMIO(0xA098) #define GEN6_RC6_WAKE_RATE_LIMIT _MMIO(0xA09C) #define GEN6_RC6pp_WAKE_RATE_LIMIT _MMIO(0xA0A0) +#define GEN10_MEDIA_WAKE_RATE_LIMIT _MMIO(0xA0A0) #define GEN6_RC_EVALUATION_INTERVAL _MMIO(0xA0A8) #define GEN6_RC_IDLE_HYSTERSIS _MMIO(0xA0AC) #define GEN6_RC_SLEEP _MMIO(0xA0B0) @@ -7937,8 +8038,8 @@ enum { #define GEN7_PCODE_TIMEOUT 0x2 #define GEN7_PCODE_ILLEGAL_DATA 0x3 #define GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE 0x10 -#define GEN6_PCODE_WRITE_RC6VIDS 0x4 -#define GEN6_PCODE_READ_RC6VIDS 0x5 +#define GEN6_PCODE_WRITE_RC6VIDS 0x4 +#define GEN6_PCODE_READ_RC6VIDS 0x5 #define GEN6_ENCODE_RC6_VID(mv) (((mv) - 245) / 5) #define GEN6_DECODE_RC6_VID(vids) (((vids) * 5) + 245) #define BDW_PCODE_DISPLAY_FREQ_CHANGE_REQ 0x18 @@ -7957,7 +8058,9 @@ enum { #define GEN6_PCODE_WRITE_D_COMP 0x11 #define HSW_PCODE_DE_WRITE_FREQ_REQ 0x17 #define DISPLAY_IPS_CONTROL 0x19 -#define HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL 0x1A + /* See also IPS_CTL */ +#define IPS_PCODE_CONTROL (1 << 30) +#define HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL 0x1A #define GEN9_PCODE_SAGV_CONTROL 0x21 #define GEN9_SAGV_DISABLE 0x0 #define GEN9_SAGV_IS_DISABLED 0x1 @@ -7990,11 +8093,18 @@ enum { #define CHV_EU311_PG_ENABLE (1<<1) #define GEN9_SLICE_PGCTL_ACK(slice) _MMIO(0x804c + (slice)*0x4) +#define GEN10_SLICE_PGCTL_ACK(slice) _MMIO(0x804c + ((slice) / 3) * 0x34 + \ + ((slice) % 3) * 0x4) #define GEN9_PGCTL_SLICE_ACK (1 << 0) #define GEN9_PGCTL_SS_ACK(subslice) (1 << (2 + (subslice)*2)) +#define GEN10_PGCTL_VALID_SS_MASK(slice) ((slice) == 0 ? 0x7F : 0x1F) #define GEN9_SS01_EU_PGCTL_ACK(slice) _MMIO(0x805c + (slice)*0x8) +#define GEN10_SS01_EU_PGCTL_ACK(slice) _MMIO(0x805c + ((slice) / 3) * 0x30 + \ + ((slice) % 3) * 0x8) #define GEN9_SS23_EU_PGCTL_ACK(slice) _MMIO(0x8060 + (slice)*0x8) +#define GEN10_SS23_EU_PGCTL_ACK(slice) _MMIO(0x8060 + ((slice) / 3) * 0x30 + \ + ((slice) % 3) * 0x8) #define GEN9_PGCTL_SSA_EU08_ACK (1 << 0) #define GEN9_PGCTL_SSA_EU19_ACK (1 << 2) #define GEN9_PGCTL_SSA_EU210_ACK (1 << 4) @@ -8045,10 +8155,13 @@ enum { #define FLOW_CONTROL_ENABLE (1<<15) #define PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE (1<<8) #define STALL_DOP_GATING_DISABLE (1<<5) +#define THROTTLE_12_5 (7<<2) +#define DISABLE_EARLY_EOT (1<<1) #define GEN7_ROW_CHICKEN2 _MMIO(0xe4f4) #define GEN7_ROW_CHICKEN2_GT2 _MMIO(0xf4f4) #define DOP_CLOCK_GATING_DISABLE (1<<0) +#define PUSH_CONSTANT_DEREF_DISABLE (1<<8) #define HSW_ROW_CHICKEN3 _MMIO(0xe49c) #define HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE (1 << 6) @@ -8060,9 +8173,11 @@ enum { #define HSW_SAMPLE_C_PERFORMANCE (1<<9) #define GEN8_CENTROID_PIXEL_OPT_DIS (1<<8) #define GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC (1<<5) +#define CNL_FAST_ANISO_L1_BANKING_FIX (1<<4) #define GEN8_SAMPLER_POWER_BYPASS_DIS (1<<1) #define GEN9_HALF_SLICE_CHICKEN7 _MMIO(0xe194) +#define GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR (1<<8) #define GEN9_ENABLE_YV12_BUGFIX (1<<4) #define GEN9_ENABLE_GPGPU_PREEMPTION (1<<2) @@ -8475,6 +8590,7 @@ enum skl_power_gate { #define BXT_CDCLK_CD2X_DIV_SEL_2 (2<<22) #define BXT_CDCLK_CD2X_DIV_SEL_4 (3<<22) #define BXT_CDCLK_CD2X_PIPE(pipe) ((pipe)<<20) +#define CDCLK_DIVMUX_CD_OVERRIDE (1<<19) #define BXT_CDCLK_CD2X_PIPE_NONE BXT_CDCLK_CD2X_PIPE(3) #define BXT_CDCLK_SSA_PRECHARGE_ENABLE (1<<16) #define CDCLK_FREQ_DECIMAL_MASK (0x7ff) @@ -8575,7 +8691,7 @@ enum skl_power_gate { #define DPLL_CFGCR0_LINK_RATE_3240 (6 << 25) #define DPLL_CFGCR0_LINK_RATE_4050 (7 << 25) #define DPLL_CFGCR0_DCO_FRACTION_MASK (0x7fff << 10) -#define DPLL_CFGCR0_DCO_FRAC_SHIFT (10) +#define DPLL_CFGCR0_DCO_FRACTION_SHIFT (10) #define DPLL_CFGCR0_DCO_FRACTION(x) ((x) << 10) #define DPLL_CFGCR0_DCO_INTEGER_MASK (0x3ff) #define CNL_DPLL_CFGCR0(pll) _MMIO_PLL(pll, _CNL_DPLL0_CFGCR0, _CNL_DPLL1_CFGCR0) @@ -8782,6 +8898,21 @@ enum skl_power_gate { #define MIPIO_TXESC_CLK_DIV2 _MMIO(0x160008) #define GLK_TX_ESC_CLK_DIV2_MASK 0x3FF +/* Gen4+ Timestamp and Pipe Frame time stamp registers */ +#define GEN4_TIMESTAMP _MMIO(0x2358) +#define ILK_TIMESTAMP_HI _MMIO(0x70070) +#define IVB_TIMESTAMP_CTR _MMIO(0x44070) + +#define GEN9_TIMESTAMP_OVERRIDE _MMIO(0x44074) +#define GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT 0 +#define GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK 0x3ff +#define GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT 12 +#define GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK (0xf << 12) + +#define _PIPE_FRMTMSTMP_A 0x70048 +#define PIPE_FRMTMSTMP(pipe) \ + _MMIO_PIPE2(pipe, _PIPE_FRMTMSTMP_A) + /* BXT MIPI clock controls */ #define BXT_MAX_VAR_OUTPUT_KHZ 39500 @@ -9363,4 +9494,8 @@ enum skl_power_gate { #define GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL 0x67F1427F /* " " */ #define GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT 0x5FF101FF /* " " */ +#define MMCD_MISC_CTRL _MMIO(0x4ddc) /* skl+ */ +#define MMCD_PCLA (1 << 31) +#define MMCD_HOTSPOT_EN (1 << 27) + #endif /* _I915_REG_H_ */ diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h index 78e1a1b168ff..9766e806dce6 100644 --- a/drivers/gpu/drm/i915/i915_selftest.h +++ b/drivers/gpu/drm/i915/i915_selftest.h @@ -99,6 +99,6 @@ __printf(2, 3) bool __igt_timeout(unsigned long timeout, const char *fmt, ...); #define igt_timeout(t, fmt, ...) \ - __igt_timeout((t), KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) + __igt_timeout((t), KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) #endif /* !__I915_SELFTEST_H__ */ diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index 5c86925a0294..8f3aa4dc0c98 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -108,8 +108,6 @@ int i915_restore_state(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); - i915_gem_restore_fences(dev_priv); - if (IS_GEN4(dev_priv)) pci_write_config_word(pdev, GCDGMBUS, dev_priv->regfile.saveGCDGMBUS); diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index f29540f922af..3669f5eeb91e 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -9,6 +9,7 @@ #include <linux/slab.h> #include <linux/dma-fence.h> +#include <linux/irq_work.h> #include <linux/reservation.h> #include "i915_sw_fence.h" @@ -40,6 +41,11 @@ static inline void debug_fence_init(struct i915_sw_fence *fence) debug_object_init(fence, &i915_sw_fence_debug_descr); } +static inline void debug_fence_init_onstack(struct i915_sw_fence *fence) +{ + debug_object_init_on_stack(fence, &i915_sw_fence_debug_descr); +} + static inline void debug_fence_activate(struct i915_sw_fence *fence) { debug_object_activate(fence, &i915_sw_fence_debug_descr); @@ -78,6 +84,10 @@ static inline void debug_fence_init(struct i915_sw_fence *fence) { } +static inline void debug_fence_init_onstack(struct i915_sw_fence *fence) +{ +} + static inline void debug_fence_activate(struct i915_sw_fence *fence) { } @@ -293,6 +303,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, int pending; debug_fence_assert(fence); + might_sleep_if(gfpflags_allow_blocking(gfp)); if (i915_sw_fence_done(signaler)) return 0; @@ -356,34 +367,48 @@ struct i915_sw_dma_fence_cb { struct i915_sw_fence *fence; struct dma_fence *dma; struct timer_list timer; + struct irq_work work; + struct rcu_head rcu; }; -static void timer_i915_sw_fence_wake(unsigned long data) +static void timer_i915_sw_fence_wake(struct timer_list *t) { - struct i915_sw_dma_fence_cb *cb = (struct i915_sw_dma_fence_cb *)data; + struct i915_sw_dma_fence_cb *cb = from_timer(cb, t, timer); + struct i915_sw_fence *fence; + + fence = xchg(&cb->fence, NULL); + if (!fence) + return; pr_warn("asynchronous wait on fence %s:%s:%x timed out\n", cb->dma->ops->get_driver_name(cb->dma), cb->dma->ops->get_timeline_name(cb->dma), cb->dma->seqno); - dma_fence_put(cb->dma); - cb->dma = NULL; - i915_sw_fence_complete(cb->fence); - cb->timer.function = NULL; + i915_sw_fence_complete(fence); } static void dma_i915_sw_fence_wake(struct dma_fence *dma, struct dma_fence_cb *data) { struct i915_sw_dma_fence_cb *cb = container_of(data, typeof(*cb), base); + struct i915_sw_fence *fence; + + fence = xchg(&cb->fence, NULL); + if (fence) + i915_sw_fence_complete(fence); + + irq_work_queue(&cb->work); +} + +static void irq_i915_sw_fence_work(struct irq_work *wrk) +{ + struct i915_sw_dma_fence_cb *cb = container_of(wrk, typeof(*cb), work); del_timer_sync(&cb->timer); - if (cb->timer.function) - i915_sw_fence_complete(cb->fence); dma_fence_put(cb->dma); - kfree(cb); + kfree_rcu(cb, rcu); } int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, @@ -395,6 +420,7 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, int ret; debug_fence_assert(fence); + might_sleep_if(gfpflags_allow_blocking(gfp)); if (dma_fence_is_signaled(dma)) return 0; @@ -411,9 +437,8 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, i915_sw_fence_await(fence); cb->dma = NULL; - __setup_timer(&cb->timer, - timer_i915_sw_fence_wake, (unsigned long)cb, - TIMER_IRQSAFE); + timer_setup(&cb->timer, timer_i915_sw_fence_wake, TIMER_IRQSAFE); + init_irq_work(&cb->work, irq_i915_sw_fence_work); if (timeout) { cb->dma = dma_fence_get(dma); mod_timer(&cb->timer, round_jiffies_up(jiffies + timeout)); @@ -442,6 +467,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, int ret = 0, pending; debug_fence_assert(fence); + might_sleep_if(gfpflags_allow_blocking(gfp)); if (write) { struct dma_fence **shared; @@ -492,5 +518,6 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/lib_sw_fence.c" #include "selftests/i915_sw_fence.c" #endif diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index d61c8727f756..b33d2158c234 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -42,14 +42,30 @@ static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev) static u32 calc_residency(struct drm_i915_private *dev_priv, i915_reg_t reg) { - return DIV_ROUND_CLOSEST_ULL(intel_rc6_residency_us(dev_priv, reg), - 1000); + u64 res; + + intel_runtime_pm_get(dev_priv); + res = intel_rc6_residency_us(dev_priv, reg); + intel_runtime_pm_put(dev_priv); + + return DIV_ROUND_CLOSEST_ULL(res, 1000); } static ssize_t show_rc6_mask(struct device *kdev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%x\n", intel_enable_rc6()); + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + unsigned int mask; + + mask = 0; + if (HAS_RC6(dev_priv)) + mask |= BIT(0); + if (HAS_RC6p(dev_priv)) + mask |= BIT(1); + if (HAS_RC6pp(dev_priv)) + mask |= BIT(2); + + return snprintf(buf, PAGE_SIZE, "%x\n", mask); } static ssize_t @@ -246,22 +262,17 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { u32 freq; freq = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); ret = intel_gpu_freq(dev_priv, (freq >> 8) & 0xff); } else { - u32 rpstat = I915_READ(GEN6_RPSTAT1); - if (INTEL_GEN(dev_priv) >= 9) - ret = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; - else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - ret = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; - else - ret = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; - ret = intel_gpu_freq(dev_priv, ret); + ret = intel_gpu_freq(dev_priv, + intel_get_cagf(dev_priv, + I915_READ(GEN6_RPSTAT1))); } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv); @@ -275,7 +286,7 @@ static ssize_t gt_cur_freq_mhz_show(struct device *kdev, return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, - dev_priv->rps.cur_freq)); + dev_priv->gt_pm.rps.cur_freq)); } static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) @@ -284,7 +295,7 @@ static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribu return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, - dev_priv->rps.boost_freq)); + dev_priv->gt_pm.rps.boost_freq)); } static ssize_t gt_boost_freq_mhz_store(struct device *kdev, @@ -292,6 +303,7 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; ssize_t ret; @@ -301,12 +313,12 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, /* Validate against (static) hardware limits */ val = intel_freq_opcode(dev_priv, val); - if (val < dev_priv->rps.min_freq || val > dev_priv->rps.max_freq) + if (val < rps->min_freq || val > rps->max_freq) return -EINVAL; - mutex_lock(&dev_priv->rps.hw_lock); - dev_priv->rps.boost_freq = val; - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); + rps->boost_freq = val; + mutex_unlock(&dev_priv->pcu_lock); return count; } @@ -318,7 +330,7 @@ static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev, return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, - dev_priv->rps.efficient_freq)); + dev_priv->gt_pm.rps.efficient_freq)); } static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) @@ -327,7 +339,7 @@ static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, - dev_priv->rps.max_freq_softlimit)); + dev_priv->gt_pm.rps.max_freq_softlimit)); } static ssize_t gt_max_freq_mhz_store(struct device *kdev, @@ -335,6 +347,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; ssize_t ret; @@ -344,34 +357,34 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = intel_freq_opcode(dev_priv, val); - if (val < dev_priv->rps.min_freq || - val > dev_priv->rps.max_freq || - val < dev_priv->rps.min_freq_softlimit) { - mutex_unlock(&dev_priv->rps.hw_lock); + if (val < rps->min_freq || + val > rps->max_freq || + val < rps->min_freq_softlimit) { + mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv); return -EINVAL; } - if (val > dev_priv->rps.rp0_freq) + if (val > rps->rp0_freq) DRM_DEBUG("User requested overclocking to %d\n", intel_gpu_freq(dev_priv, val)); - dev_priv->rps.max_freq_softlimit = val; + rps->max_freq_softlimit = val; - val = clamp_t(int, dev_priv->rps.cur_freq, - dev_priv->rps.min_freq_softlimit, - dev_priv->rps.max_freq_softlimit); + val = clamp_t(int, rps->cur_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit); /* We still need *_set_rps to process the new max_delay and * update the interrupt limits and PMINTRMSK even though * frequency request may be unchanged. */ ret = intel_set_rps(dev_priv, val); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv); @@ -384,7 +397,7 @@ static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, - dev_priv->rps.min_freq_softlimit)); + dev_priv->gt_pm.rps.min_freq_softlimit)); } static ssize_t gt_min_freq_mhz_store(struct device *kdev, @@ -392,6 +405,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; ssize_t ret; @@ -401,43 +415,43 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = intel_freq_opcode(dev_priv, val); - if (val < dev_priv->rps.min_freq || - val > dev_priv->rps.max_freq || - val > dev_priv->rps.max_freq_softlimit) { - mutex_unlock(&dev_priv->rps.hw_lock); + if (val < rps->min_freq || + val > rps->max_freq || + val > rps->max_freq_softlimit) { + mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv); return -EINVAL; } - dev_priv->rps.min_freq_softlimit = val; + rps->min_freq_softlimit = val; - val = clamp_t(int, dev_priv->rps.cur_freq, - dev_priv->rps.min_freq_softlimit, - dev_priv->rps.max_freq_softlimit); + val = clamp_t(int, rps->cur_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit); /* We still need *_set_rps to process the new min_delay and * update the interrupt limits and PMINTRMSK even though * frequency request may be unchanged. */ ret = intel_set_rps(dev_priv, val); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv); return ret ?: count; } -static DEVICE_ATTR(gt_act_freq_mhz, S_IRUGO, gt_act_freq_mhz_show, NULL); -static DEVICE_ATTR(gt_cur_freq_mhz, S_IRUGO, gt_cur_freq_mhz_show, NULL); -static DEVICE_ATTR(gt_boost_freq_mhz, S_IRUGO | S_IWUSR, gt_boost_freq_mhz_show, gt_boost_freq_mhz_store); -static DEVICE_ATTR(gt_max_freq_mhz, S_IRUGO | S_IWUSR, gt_max_freq_mhz_show, gt_max_freq_mhz_store); -static DEVICE_ATTR(gt_min_freq_mhz, S_IRUGO | S_IWUSR, gt_min_freq_mhz_show, gt_min_freq_mhz_store); +static DEVICE_ATTR_RO(gt_act_freq_mhz); +static DEVICE_ATTR_RO(gt_cur_freq_mhz); +static DEVICE_ATTR_RW(gt_boost_freq_mhz); +static DEVICE_ATTR_RW(gt_max_freq_mhz); +static DEVICE_ATTR_RW(gt_min_freq_mhz); -static DEVICE_ATTR(vlv_rpe_freq_mhz, S_IRUGO, vlv_rpe_freq_mhz_show, NULL); +static DEVICE_ATTR_RO(vlv_rpe_freq_mhz); static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf); static DEVICE_ATTR(gt_RP0_freq_mhz, S_IRUGO, gt_rp_mhz_show, NULL); @@ -448,14 +462,15 @@ static DEVICE_ATTR(gt_RPn_freq_mhz, S_IRUGO, gt_rp_mhz_show, NULL); static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; if (attr == &dev_attr_gt_RP0_freq_mhz) - val = intel_gpu_freq(dev_priv, dev_priv->rps.rp0_freq); + val = intel_gpu_freq(dev_priv, rps->rp0_freq); else if (attr == &dev_attr_gt_RP1_freq_mhz) - val = intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq); + val = intel_gpu_freq(dev_priv, rps->rp1_freq); else if (attr == &dev_attr_gt_RPn_freq_mhz) - val = intel_gpu_freq(dev_priv, dev_priv->rps.min_freq); + val = intel_gpu_freq(dev_priv, rps->min_freq); else BUG(); diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index ef72da74b87f..e1169c02eb2b 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -346,7 +346,7 @@ TRACE_EVENT(i915_gem_object_create, TP_STRUCT__entry( __field(struct drm_i915_gem_object *, obj) - __field(u32, size) + __field(u64, size) ), TP_fast_assign( @@ -354,7 +354,7 @@ TRACE_EVENT(i915_gem_object_create, __entry->size = obj->base.size; ), - TP_printk("obj=%p, size=%u", __entry->obj, __entry->size) + TP_printk("obj=%p, size=0x%llx", __entry->obj, __entry->size) ); TRACE_EVENT(i915_gem_shrink, @@ -385,7 +385,7 @@ TRACE_EVENT(i915_vma_bind, __field(struct drm_i915_gem_object *, obj) __field(struct i915_address_space *, vm) __field(u64, offset) - __field(u32, size) + __field(u64, size) __field(unsigned, flags) ), @@ -397,7 +397,7 @@ TRACE_EVENT(i915_vma_bind, __entry->flags = flags; ), - TP_printk("obj=%p, offset=%016llx size=%x%s vm=%p", + TP_printk("obj=%p, offset=0x%016llx size=0x%llx%s vm=%p", __entry->obj, __entry->offset, __entry->size, __entry->flags & PIN_MAPPABLE ? ", mappable" : "", __entry->vm) @@ -411,7 +411,7 @@ TRACE_EVENT(i915_vma_unbind, __field(struct drm_i915_gem_object *, obj) __field(struct i915_address_space *, vm) __field(u64, offset) - __field(u32, size) + __field(u64, size) ), TP_fast_assign( @@ -421,18 +421,18 @@ TRACE_EVENT(i915_vma_unbind, __entry->size = vma->node.size; ), - TP_printk("obj=%p, offset=%016llx size=%x vm=%p", + TP_printk("obj=%p, offset=0x%016llx size=0x%llx vm=%p", __entry->obj, __entry->offset, __entry->size, __entry->vm) ); TRACE_EVENT(i915_gem_object_pwrite, - TP_PROTO(struct drm_i915_gem_object *obj, u32 offset, u32 len), + TP_PROTO(struct drm_i915_gem_object *obj, u64 offset, u64 len), TP_ARGS(obj, offset, len), TP_STRUCT__entry( __field(struct drm_i915_gem_object *, obj) - __field(u32, offset) - __field(u32, len) + __field(u64, offset) + __field(u64, len) ), TP_fast_assign( @@ -441,18 +441,18 @@ TRACE_EVENT(i915_gem_object_pwrite, __entry->len = len; ), - TP_printk("obj=%p, offset=%u, len=%u", + TP_printk("obj=%p, offset=0x%llx, len=0x%llx", __entry->obj, __entry->offset, __entry->len) ); TRACE_EVENT(i915_gem_object_pread, - TP_PROTO(struct drm_i915_gem_object *obj, u32 offset, u32 len), + TP_PROTO(struct drm_i915_gem_object *obj, u64 offset, u64 len), TP_ARGS(obj, offset, len), TP_STRUCT__entry( __field(struct drm_i915_gem_object *, obj) - __field(u32, offset) - __field(u32, len) + __field(u64, offset) + __field(u64, len) ), TP_fast_assign( @@ -461,17 +461,17 @@ TRACE_EVENT(i915_gem_object_pread, __entry->len = len; ), - TP_printk("obj=%p, offset=%u, len=%u", + TP_printk("obj=%p, offset=0x%llx, len=0x%llx", __entry->obj, __entry->offset, __entry->len) ); TRACE_EVENT(i915_gem_object_fault, - TP_PROTO(struct drm_i915_gem_object *obj, u32 index, bool gtt, bool write), + TP_PROTO(struct drm_i915_gem_object *obj, u64 index, bool gtt, bool write), TP_ARGS(obj, index, gtt, write), TP_STRUCT__entry( __field(struct drm_i915_gem_object *, obj) - __field(u32, index) + __field(u64, index) __field(bool, gtt) __field(bool, write) ), @@ -483,7 +483,7 @@ TRACE_EVENT(i915_gem_object_fault, __entry->write = write; ), - TP_printk("obj=%p, %s index=%u %s", + TP_printk("obj=%p, %s index=%llu %s", __entry->obj, __entry->gtt ? "GTT" : "CPU", __entry->index, @@ -516,14 +516,14 @@ DEFINE_EVENT(i915_gem_object, i915_gem_object_destroy, ); TRACE_EVENT(i915_gem_evict, - TP_PROTO(struct i915_address_space *vm, u32 size, u32 align, unsigned int flags), + TP_PROTO(struct i915_address_space *vm, u64 size, u64 align, unsigned int flags), TP_ARGS(vm, size, align, flags), TP_STRUCT__entry( __field(u32, dev) __field(struct i915_address_space *, vm) - __field(u32, size) - __field(u32, align) + __field(u64, size) + __field(u64, align) __field(unsigned int, flags) ), @@ -535,43 +535,11 @@ TRACE_EVENT(i915_gem_evict, __entry->flags = flags; ), - TP_printk("dev=%d, vm=%p, size=%d, align=%d %s", + TP_printk("dev=%d, vm=%p, size=0x%llx, align=0x%llx %s", __entry->dev, __entry->vm, __entry->size, __entry->align, __entry->flags & PIN_MAPPABLE ? ", mappable" : "") ); -TRACE_EVENT(i915_gem_evict_everything, - TP_PROTO(struct drm_device *dev), - TP_ARGS(dev), - - TP_STRUCT__entry( - __field(u32, dev) - ), - - TP_fast_assign( - __entry->dev = dev->primary->index; - ), - - TP_printk("dev=%d", __entry->dev) -); - -TRACE_EVENT(i915_gem_evict_vm, - TP_PROTO(struct i915_address_space *vm), - TP_ARGS(vm), - - TP_STRUCT__entry( - __field(u32, dev) - __field(struct i915_address_space *, vm) - ), - - TP_fast_assign( - __entry->dev = vm->i915->drm.primary->index; - __entry->vm = vm; - ), - - TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm) -); - TRACE_EVENT(i915_gem_evict_node, TP_PROTO(struct i915_address_space *vm, struct drm_mm_node *node, unsigned int flags), TP_ARGS(vm, node, flags), @@ -594,12 +562,29 @@ TRACE_EVENT(i915_gem_evict_node, __entry->flags = flags; ), - TP_printk("dev=%d, vm=%p, start=%llx size=%llx, color=%lx, flags=%x", + TP_printk("dev=%d, vm=%p, start=0x%llx size=0x%llx, color=0x%lx, flags=%x", __entry->dev, __entry->vm, __entry->start, __entry->size, __entry->color, __entry->flags) ); +TRACE_EVENT(i915_gem_evict_vm, + TP_PROTO(struct i915_address_space *vm), + TP_ARGS(vm), + + TP_STRUCT__entry( + __field(u32, dev) + __field(struct i915_address_space *, vm) + ), + + TP_fast_assign( + __entry->dev = vm->i915->drm.primary->index; + __entry->vm = vm; + ), + + TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm) +); + TRACE_EVENT(i915_gem_ring_sync_to, TP_PROTO(struct drm_i915_gem_request *to, struct drm_i915_gem_request *from), @@ -631,6 +616,7 @@ TRACE_EVENT(i915_gem_request_queue, TP_STRUCT__entry( __field(u32, dev) + __field(u32, hw_id) __field(u32, ring) __field(u32, ctx) __field(u32, seqno) @@ -639,38 +625,16 @@ TRACE_EVENT(i915_gem_request_queue, TP_fast_assign( __entry->dev = req->i915->drm.primary->index; + __entry->hw_id = req->ctx->hw_id; __entry->ring = req->engine->id; __entry->ctx = req->fence.context; __entry->seqno = req->fence.seqno; __entry->flags = flags; ), - TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, flags=0x%x", - __entry->dev, __entry->ring, __entry->ctx, __entry->seqno, - __entry->flags) -); - -TRACE_EVENT(i915_gem_ring_flush, - TP_PROTO(struct drm_i915_gem_request *req, u32 invalidate, u32 flush), - TP_ARGS(req, invalidate, flush), - - TP_STRUCT__entry( - __field(u32, dev) - __field(u32, ring) - __field(u32, invalidate) - __field(u32, flush) - ), - - TP_fast_assign( - __entry->dev = req->i915->drm.primary->index; - __entry->ring = req->engine->id; - __entry->invalidate = invalidate; - __entry->flush = flush; - ), - - TP_printk("dev=%u, ring=%x, invalidate=%04x, flush=%04x", - __entry->dev, __entry->ring, - __entry->invalidate, __entry->flush) + TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, flags=0x%x", + __entry->dev, __entry->hw_id, __entry->ring, __entry->ctx, + __entry->seqno, __entry->flags) ); DECLARE_EVENT_CLASS(i915_gem_request, @@ -679,23 +643,25 @@ DECLARE_EVENT_CLASS(i915_gem_request, TP_STRUCT__entry( __field(u32, dev) - __field(u32, ctx) + __field(u32, hw_id) __field(u32, ring) + __field(u32, ctx) __field(u32, seqno) __field(u32, global) ), TP_fast_assign( __entry->dev = req->i915->drm.primary->index; + __entry->hw_id = req->ctx->hw_id; __entry->ring = req->engine->id; __entry->ctx = req->fence.context; __entry->seqno = req->fence.seqno; __entry->global = req->global_seqno; ), - TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u", - __entry->dev, __entry->ring, __entry->ctx, __entry->seqno, - __entry->global) + TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u", + __entry->dev, __entry->hw_id, __entry->ring, __entry->ctx, + __entry->seqno, __entry->global) ); DEFINE_EVENT(i915_gem_request, i915_gem_request_add, @@ -721,15 +687,17 @@ DECLARE_EVENT_CLASS(i915_gem_request_hw, TP_STRUCT__entry( __field(u32, dev) + __field(u32, hw_id) __field(u32, ring) + __field(u32, ctx) __field(u32, seqno) __field(u32, global_seqno) - __field(u32, ctx) __field(u32, port) ), TP_fast_assign( __entry->dev = req->i915->drm.primary->index; + __entry->hw_id = req->ctx->hw_id; __entry->ring = req->engine->id; __entry->ctx = req->fence.context; __entry->seqno = req->fence.seqno; @@ -737,10 +705,10 @@ DECLARE_EVENT_CLASS(i915_gem_request_hw, __entry->port = port; ), - TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u, port=%u", - __entry->dev, __entry->ring, __entry->ctx, - __entry->seqno, __entry->global_seqno, - __entry->port) + TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u, port=%u", + __entry->dev, __entry->hw_id, __entry->ring, + __entry->ctx, __entry->seqno, + __entry->global_seqno, __entry->port) ); DEFINE_EVENT(i915_gem_request_hw, i915_gem_request_in, @@ -810,6 +778,7 @@ TRACE_EVENT(i915_gem_request_wait_begin, TP_STRUCT__entry( __field(u32, dev) + __field(u32, hw_id) __field(u32, ring) __field(u32, ctx) __field(u32, seqno) @@ -825,6 +794,7 @@ TRACE_EVENT(i915_gem_request_wait_begin, */ TP_fast_assign( __entry->dev = req->i915->drm.primary->index; + __entry->hw_id = req->ctx->hw_id; __entry->ring = req->engine->id; __entry->ctx = req->fence.context; __entry->seqno = req->fence.seqno; @@ -832,10 +802,10 @@ TRACE_EVENT(i915_gem_request_wait_begin, __entry->flags = flags; ), - TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u, blocking=%u, flags=0x%x", - __entry->dev, __entry->ring, __entry->ctx, __entry->seqno, - __entry->global, !!(__entry->flags & I915_WAIT_LOCKED), - __entry->flags) + TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u, blocking=%u, flags=0x%x", + __entry->dev, __entry->hw_id, __entry->ring, __entry->ctx, + __entry->seqno, __entry->global, + !!(__entry->flags & I915_WAIT_LOCKED), __entry->flags) ); DEFINE_EVENT(i915_gem_request, i915_gem_request_wait_end, @@ -1032,5 +1002,5 @@ TRACE_EVENT(switch_mm, /* This part must be outside protection */ #undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/i915 #include <trace/define_trace.h> diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 12fc250b47b9..51dbfe5bb418 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -83,8 +83,11 @@ (typeof(ptr))(__v & -BIT(n)); \ }) -#define ptr_pack_bits(ptr, bits, n) \ - ((typeof(ptr))((unsigned long)(ptr) | (bits))) +#define ptr_pack_bits(ptr, bits, n) ({ \ + unsigned long __bits = (bits); \ + GEM_BUG_ON(__bits & -BIT(n)); \ + ((typeof(ptr))((unsigned long)(ptr) | __bits)); \ +}) #define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT) #define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT) @@ -99,6 +102,11 @@ __T; \ }) +static inline u64 ptr_to_u64(const void *ptr) +{ + return (uintptr_t)ptr; +} + #define u64_to_ptr(T, x) ({ \ typecheck(u64, x); \ (T *)(uintptr_t)(x); \ @@ -119,4 +127,32 @@ static inline void __list_del_many(struct list_head *head, WRITE_ONCE(head->next, first); } +/* + * Wait until the work is finally complete, even if it tries to postpone + * by requeueing itself. Note, that if the worker never cancels itself, + * we will spin forever. + */ +static inline void drain_delayed_work(struct delayed_work *dw) +{ + do { + while (flush_delayed_work(dw)) + ; + } while (delayed_work_pending(dw)); +} + +static inline const char *yesno(bool v) +{ + return v ? "yes" : "no"; +} + +static inline const char *onoff(bool v) +{ + return v ? "on" : "off"; +} + +static inline const char *enableddisabled(bool v) +{ + return v ? "enabled" : "disabled"; +} + #endif /* !__I915_UTILS_H */ diff --git a/drivers/gpu/drm/i915/i915_vgpu.h b/drivers/gpu/drm/i915/i915_vgpu.h index b72bd2956b70..bb8338450dc1 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.h +++ b/drivers/gpu/drm/i915/i915_vgpu.h @@ -30,6 +30,12 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv); bool intel_vgpu_has_full_48bit_ppgtt(struct drm_i915_private *dev_priv); +static inline bool +intel_vgpu_has_hwsp_emulation(struct drm_i915_private *dev_priv) +{ + return dev_priv->vgpu.caps & VGT_CAPS_HWSP_EMULATION; +} + int intel_vgt_balloon(struct drm_i915_private *dev_priv); void intel_vgt_deballoon(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 02d1a5eacb00..e0e7c48f45dc 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -54,12 +54,21 @@ i915_vma_retire(struct i915_gem_active *active, if (--obj->active_count) return; + /* Prune the shared fence arrays iff completely idle (inc. external) */ + if (reservation_object_trylock(obj->resv)) { + if (reservation_object_test_signaled_rcu(obj->resv, true)) + reservation_object_add_excl_fence(obj->resv, NULL); + reservation_object_unlock(obj->resv); + } + /* Bump our place on the bound list to keep it roughly in LRU order * so that we don't steal from recently used but inactive objects * (unless we are forced to ofc!) */ + spin_lock(&rq->i915->mm.obj_lock); if (obj->bind_count) - list_move_tail(&obj->global_link, &rq->i915->mm.bound_list); + list_move_tail(&obj->mm.link, &rq->i915->mm.bound_list); + spin_unlock(&rq->i915->mm.obj_lock); obj->mm.dirty = true; /* be paranoid */ @@ -133,6 +142,12 @@ vma_create(struct drm_i915_gem_object *obj, i915_gem_object_get_stride(obj)); GEM_BUG_ON(!is_power_of_2(vma->fence_alignment)); + /* + * We put the GGTT vma at the start of the vma-list, followed + * by the ppGGTT vma. This allows us to break early when + * iterating over only the GGTT vma for an object, see + * for_each_ggtt_vma() + */ vma->flags |= I915_VMA_GGTT; list_add(&vma->obj_link, &obj->vma_list); } else { @@ -266,6 +281,8 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, if (bind_flags == 0) return 0; + GEM_BUG_ON(!vma->pages); + trace_i915_vma_bind(vma, bind_flags); ret = vma->vm->bind_vma(vma, cache_level, bind_flags); if (ret) @@ -278,30 +295,68 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) { void __iomem *ptr; + int err; /* Access through the GTT requires the device to be awake. */ assert_rpm_wakelock_held(vma->vm->i915); lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) - return IO_ERR_PTR(-ENODEV); + if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) { + err = -ENODEV; + goto err; + } GEM_BUG_ON(!i915_vma_is_ggtt(vma)); GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0); ptr = vma->iomap; if (ptr == NULL) { - ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable, + ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->iomap, vma->node.start, vma->node.size); - if (ptr == NULL) - return IO_ERR_PTR(-ENOMEM); + if (ptr == NULL) { + err = -ENOMEM; + goto err; + } vma->iomap = ptr; } __i915_vma_pin(vma); + + err = i915_vma_pin_fence(vma); + if (err) + goto err_unpin; + + i915_vma_set_ggtt_write(vma); return ptr; + +err_unpin: + __i915_vma_unpin(vma); +err: + return IO_ERR_PTR(err); +} + +void i915_vma_flush_writes(struct i915_vma *vma) +{ + if (!i915_vma_has_ggtt_write(vma)) + return; + + i915_gem_flush_ggtt_writes(vma->vm->i915); + + i915_vma_unset_ggtt_write(vma); +} + +void i915_vma_unpin_iomap(struct i915_vma *vma) +{ + lockdep_assert_held(&vma->obj->base.dev->struct_mutex); + + GEM_BUG_ON(vma->iomap == NULL); + + i915_vma_flush_writes(vma); + + i915_vma_unpin_fence(vma); + i915_vma_unpin(vma); } void i915_vma_unpin_and_release(struct i915_vma **p_vma) @@ -430,6 +485,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) u64 start, end; int ret; + GEM_BUG_ON(i915_vma_is_closed(vma)); GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); @@ -471,25 +527,64 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) if (ret) return ret; + GEM_BUG_ON(vma->pages); + + ret = vma->vm->set_pages(vma); + if (ret) + goto err_unpin; + if (flags & PIN_OFFSET_FIXED) { u64 offset = flags & PIN_OFFSET_MASK; if (!IS_ALIGNED(offset, alignment) || range_overflows(offset, size, end)) { ret = -EINVAL; - goto err_unpin; + goto err_clear; } ret = i915_gem_gtt_reserve(vma->vm, &vma->node, size, offset, obj->cache_level, flags); if (ret) - goto err_unpin; + goto err_clear; } else { + /* + * We only support huge gtt pages through the 48b PPGTT, + * however we also don't want to force any alignment for + * objects which need to be tightly packed into the low 32bits. + * + * Note that we assume that GGTT are limited to 4GiB for the + * forseeable future. See also i915_ggtt_offset(). + */ + if (upper_32_bits(end - 1) && + vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { + /* + * We can't mix 64K and 4K PTEs in the same page-table + * (2M block), and so to avoid the ugliness and + * complexity of coloring we opt for just aligning 64K + * objects to 2M. + */ + u64 page_alignment = + rounddown_pow_of_two(vma->page_sizes.sg | + I915_GTT_PAGE_SIZE_2M); + + /* + * Check we don't expand for the limited Global GTT + * (mappable aperture is even more precious!). This + * also checks that we exclude the aliasing-ppgtt. + */ + GEM_BUG_ON(i915_vma_is_ggtt(vma)); + + alignment = max(alignment, page_alignment); + + if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) + size = round_up(size, I915_GTT_PAGE_SIZE_2M); + } + ret = i915_gem_gtt_insert(vma->vm, &vma->node, size, alignment, obj->cache_level, start, end, flags); if (ret) - goto err_unpin; + goto err_clear; GEM_BUG_ON(vma->node.start < start); GEM_BUG_ON(vma->node.start + vma->node.size > end); @@ -497,13 +592,19 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level)); - list_move_tail(&obj->global_link, &dev_priv->mm.bound_list); list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + + spin_lock(&dev_priv->mm.obj_lock); + list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list); obj->bind_count++; + spin_unlock(&dev_priv->mm.obj_lock); + GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count); return 0; +err_clear: + vma->vm->clear_pages(vma); err_unpin: i915_gem_object_unpin_pages(obj); return ret; @@ -512,20 +613,24 @@ err_unpin: static void i915_vma_remove(struct i915_vma *vma) { + struct drm_i915_private *i915 = vma->vm->i915; struct drm_i915_gem_object *obj = vma->obj; GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); + vma->vm->clear_pages(vma); + drm_mm_remove_node(&vma->node); list_move_tail(&vma->vm_link, &vma->vm->unbound_list); /* Since the unbound list is global, only move to that list if * no more VMAs exist. */ + spin_lock(&i915->mm.obj_lock); if (--obj->bind_count == 0) - list_move_tail(&obj->global_link, - &to_i915(obj->base.dev)->mm.unbound_list); + list_move_tail(&obj->mm.link, &i915->mm.unbound_list); + spin_unlock(&i915->mm.obj_lock); /* And finally now the object is completely decoupled from this vma, * we can drop its hold on the backing storage and allow it to be @@ -555,22 +660,25 @@ int __i915_vma_do_pin(struct i915_vma *vma, if (ret) goto err_unpin; } + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); ret = i915_vma_bind(vma, vma->obj->cache_level, flags); if (ret) goto err_remove; + GEM_BUG_ON((vma->flags & I915_VMA_BIND_MASK) == 0); + if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND) __i915_vma_set_map_and_fenceable(vma); - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); return 0; err_remove: if ((bound & I915_VMA_BIND_MASK) == 0) { - GEM_BUG_ON(vma->pages); i915_vma_remove(vma); + GEM_BUG_ON(vma->pages); + GEM_BUG_ON(vma->flags & I915_VMA_BIND_MASK); } err_unpin: __i915_vma_unpin(vma); @@ -590,7 +698,9 @@ static void i915_vma_destroy(struct i915_vma *vma) GEM_BUG_ON(i915_gem_active_isset(&vma->last_read[i])); GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence)); + list_del(&vma->obj_link); list_del(&vma->vm_link); + if (!i915_vma_is_ggtt(vma)) i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); @@ -602,7 +712,6 @@ void i915_vma_close(struct i915_vma *vma) GEM_BUG_ON(i915_vma_is_closed(vma)); vma->flags |= I915_VMA_CLOSED; - list_del(&vma->obj_link); rb_erase(&vma->obj_node, &vma->obj->vma_tree); if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma)) @@ -620,6 +729,30 @@ static void __i915_vma_iounmap(struct i915_vma *vma) vma->iomap = NULL; } +void i915_vma_revoke_mmap(struct i915_vma *vma) +{ + struct drm_vma_offset_node *node = &vma->obj->base.vma_node; + u64 vma_offset; + + lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); + + if (!i915_vma_has_userfault(vma)) + return; + + GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); + GEM_BUG_ON(!vma->obj->userfault_count); + + vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT; + unmap_mapping_range(vma->vm->i915->drm.anon_inode->i_mapping, + drm_vma_node_offset_addr(node) + vma_offset, + vma->size, + 1); + + i915_vma_unset_userfault(vma); + if (!--vma->obj->userfault_count) + list_del(&vma->obj->userfault_link); +} + int i915_vma_unbind(struct i915_vma *vma) { struct drm_i915_gem_object *obj = vma->obj; @@ -631,6 +764,7 @@ int i915_vma_unbind(struct i915_vma *vma) /* First wait upon any activity as retiring the request may * have side-effects such as unpinning or even unbinding this vma. */ + might_sleep(); active = i915_vma_get_active(vma); if (active) { int idx; @@ -677,17 +811,28 @@ int i915_vma_unbind(struct i915_vma *vma) GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); if (i915_vma_is_map_and_fenceable(vma)) { + /* + * Check that we have flushed all writes through the GGTT + * before the unbind, other due to non-strict nature of those + * indirect writes they may end up referencing the GGTT PTE + * after the unbind. + */ + i915_vma_flush_writes(vma); + GEM_BUG_ON(i915_vma_has_ggtt_write(vma)); + /* release the fence reg _after_ flushing */ ret = i915_vma_put_fence(vma); if (ret) return ret; /* Force a pagefault for domain tracking on next user access */ - i915_gem_release_mmap(obj); + i915_vma_revoke_mmap(vma); __i915_vma_iounmap(vma); vma->flags &= ~I915_VMA_CAN_FENCE; } + GEM_BUG_ON(vma->fence); + GEM_BUG_ON(i915_vma_has_userfault(vma)); if (likely(!vma->vm->closed)) { trace_i915_vma_unbind(vma); @@ -695,13 +840,6 @@ int i915_vma_unbind(struct i915_vma *vma) } vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); - if (vma->pages != obj->mm.pages) { - GEM_BUG_ON(!vma->pages); - sg_free_table(vma->pages); - kfree(vma->pages); - } - vma->pages = NULL; - i915_vma_remove(vma); destroy: diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index e811067c7724..fd5b84904f7c 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -55,6 +55,7 @@ struct i915_vma { void __iomem *iomap; u64 size; u64 display_alignment; + struct i915_page_sizes page_sizes; u32 fence_size; u32 fence_alignment; @@ -65,7 +66,7 @@ struct i915_vma { * that exist in the ctx->handle_vmas LUT for this vma. */ unsigned int open_count; - unsigned int flags; + unsigned long flags; /** * How many users have pinned this object in GTT space. The following * users can each hold at most one reference: pwrite/pread, execbuffer @@ -87,6 +88,9 @@ struct i915_vma { #define I915_VMA_GGTT BIT(8) #define I915_VMA_CAN_FENCE BIT(9) #define I915_VMA_CLOSED BIT(10) +#define I915_VMA_USERFAULT_BIT 11 +#define I915_VMA_USERFAULT BIT(I915_VMA_USERFAULT_BIT) +#define I915_VMA_GGTT_WRITE BIT(12) unsigned int active; struct i915_gem_active last_read[I915_NUM_ENGINES]; @@ -135,6 +139,24 @@ static inline bool i915_vma_is_ggtt(const struct i915_vma *vma) return vma->flags & I915_VMA_GGTT; } +static inline bool i915_vma_has_ggtt_write(const struct i915_vma *vma) +{ + return vma->flags & I915_VMA_GGTT_WRITE; +} + +static inline void i915_vma_set_ggtt_write(struct i915_vma *vma) +{ + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + vma->flags |= I915_VMA_GGTT_WRITE; +} + +static inline void i915_vma_unset_ggtt_write(struct i915_vma *vma) +{ + vma->flags &= ~I915_VMA_GGTT_WRITE; +} + +void i915_vma_flush_writes(struct i915_vma *vma); + static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma) { return vma->flags & I915_VMA_CAN_FENCE; @@ -145,6 +167,22 @@ static inline bool i915_vma_is_closed(const struct i915_vma *vma) return vma->flags & I915_VMA_CLOSED; } +static inline bool i915_vma_set_userfault(struct i915_vma *vma) +{ + GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); + return __test_and_set_bit(I915_VMA_USERFAULT_BIT, &vma->flags); +} + +static inline void i915_vma_unset_userfault(struct i915_vma *vma) +{ + return __clear_bit(I915_VMA_USERFAULT_BIT, &vma->flags); +} + +static inline bool i915_vma_has_userfault(const struct i915_vma *vma) +{ + return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags); +} + static inline unsigned int i915_vma_get_active(const struct i915_vma *vma) { return vma->active; @@ -243,6 +281,7 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level); bool i915_vma_misplaced(const struct i915_vma *vma, u64 size, u64 alignment, u64 flags); void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); +void i915_vma_revoke_mmap(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma); void i915_vma_unlink_ctx(struct i915_vma *vma); void i915_vma_close(struct i915_vma *vma); @@ -321,12 +360,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); * Callers must hold the struct_mutex. This function is only valid to be * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap(). */ -static inline void i915_vma_unpin_iomap(struct i915_vma *vma) -{ - lockdep_assert_held(&vma->obj->base.dev->struct_mutex); - GEM_BUG_ON(vma->iomap == NULL); - i915_vma_unpin(vma); -} +void i915_vma_unpin_iomap(struct i915_vma *vma); static inline struct page *i915_vma_first_page(struct i915_vma *vma) { @@ -349,15 +383,13 @@ static inline struct page *i915_vma_first_page(struct i915_vma *vma) * * True if the vma has a fence, false otherwise. */ -static inline bool -i915_vma_pin_fence(struct i915_vma *vma) +int i915_vma_pin_fence(struct i915_vma *vma); +int __must_check i915_vma_put_fence(struct i915_vma *vma); + +static inline void __i915_vma_unpin_fence(struct i915_vma *vma) { - lockdep_assert_held(&vma->obj->base.dev->struct_mutex); - if (vma->fence) { - vma->fence->pin_count++; - return true; - } else - return false; + GEM_BUG_ON(vma->fence->pin_count <= 0); + vma->fence->pin_count--; } /** @@ -372,11 +404,23 @@ static inline void i915_vma_unpin_fence(struct i915_vma *vma) { lockdep_assert_held(&vma->obj->base.dev->struct_mutex); - if (vma->fence) { - GEM_BUG_ON(vma->fence->pin_count <= 0); - vma->fence->pin_count--; - } + if (vma->fence) + __i915_vma_unpin_fence(vma); } -#endif +#define for_each_until(cond) if (cond) break; else + +/** + * for_each_ggtt_vma - Iterate over the GGTT VMA belonging to an object. + * @V: the #i915_vma iterator + * @OBJ: the #drm_i915_gem_object + * + * GGTT VMA are placed at the being of the object's vma_list, see + * vma_create(), so we can stop our walk as soon as we see a ppgtt VMA, + * or the list is empty ofc. + */ +#define for_each_ggtt_vma(V, OBJ) \ + list_for_each_entry(V, &(OBJ)->vma_list, obj_link) \ + for_each_until(!i915_vma_is_ggtt(V)) +#endif diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index ee76fab7bb6f..8e6dc159f64d 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -107,7 +107,9 @@ intel_plane_destroy_state(struct drm_plane *plane, drm_atomic_helper_plane_destroy_state(plane, state); } -int intel_plane_atomic_check_with_state(struct intel_crtc_state *crtc_state, +int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state, + struct intel_crtc_state *crtc_state, + const struct intel_plane_state *old_plane_state, struct intel_plane_state *intel_state) { struct drm_plane *plane = intel_state->base.plane; @@ -124,7 +126,7 @@ int intel_plane_atomic_check_with_state(struct intel_crtc_state *crtc_state, * anything driver-specific we need to test in that case, so * just return success. */ - if (!intel_state->base.crtc && !plane->state->crtc) + if (!intel_state->base.crtc && !old_plane_state->base.crtc) return 0; /* Clip all planes to CRTC size, or 0x0 if CRTC is disabled */ @@ -194,16 +196,21 @@ int intel_plane_atomic_check_with_state(struct intel_crtc_state *crtc_state, else crtc_state->active_planes &= ~BIT(intel_plane->id); - return intel_plane_atomic_calc_changes(&crtc_state->base, state); + return intel_plane_atomic_calc_changes(old_crtc_state, + &crtc_state->base, + old_plane_state, + state); } static int intel_plane_atomic_check(struct drm_plane *plane, - struct drm_plane_state *state) + struct drm_plane_state *new_plane_state) { - struct drm_crtc *crtc = state->crtc; - struct drm_crtc_state *drm_crtc_state; - - crtc = crtc ? crtc : plane->state->crtc; + struct drm_atomic_state *state = new_plane_state->state; + const struct drm_plane_state *old_plane_state = + drm_atomic_get_old_plane_state(state, plane); + struct drm_crtc *crtc = new_plane_state->crtc ?: old_plane_state->crtc; + const struct drm_crtc_state *old_crtc_state; + struct drm_crtc_state *new_crtc_state; /* * Both crtc and plane->crtc could be NULL if we're updating a @@ -214,29 +221,33 @@ static int intel_plane_atomic_check(struct drm_plane *plane, if (!crtc) return 0; - drm_crtc_state = drm_atomic_get_existing_crtc_state(state->state, crtc); - if (WARN_ON(!drm_crtc_state)) - return -EINVAL; + old_crtc_state = drm_atomic_get_old_crtc_state(state, crtc); + new_crtc_state = drm_atomic_get_new_crtc_state(state, crtc); - return intel_plane_atomic_check_with_state(to_intel_crtc_state(drm_crtc_state), - to_intel_plane_state(state)); + return intel_plane_atomic_check_with_state(to_intel_crtc_state(old_crtc_state), + to_intel_crtc_state(new_crtc_state), + to_intel_plane_state(old_plane_state), + to_intel_plane_state(new_plane_state)); } static void intel_plane_atomic_update(struct drm_plane *plane, struct drm_plane_state *old_state) { + struct intel_atomic_state *state = to_intel_atomic_state(old_state->state); struct intel_plane *intel_plane = to_intel_plane(plane); - struct intel_plane_state *intel_state = - to_intel_plane_state(plane->state); - struct drm_crtc *crtc = plane->state->crtc ?: old_state->crtc; + const struct intel_plane_state *new_plane_state = + intel_atomic_get_new_plane_state(state, intel_plane); + struct drm_crtc *crtc = new_plane_state->base.crtc ?: old_state->crtc; + + if (new_plane_state->base.visible) { + const struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, to_intel_crtc(crtc)); - if (intel_state->base.visible) { trace_intel_update_plane(plane, to_intel_crtc(crtc)); intel_plane->update_plane(intel_plane, - to_intel_crtc_state(crtc->state), - intel_state); + new_crtc_state, new_plane_state); } else { trace_intel_disable_plane(plane, to_intel_crtc(crtc)); diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 27743be5b768..4a01f62a392d 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -102,13 +102,13 @@ static const struct dp_aud_n_m dp_aud_n_m[] = { }; static const struct dp_aud_n_m * -audio_config_dp_get_n_m(struct intel_crtc *intel_crtc, int rate) +audio_config_dp_get_n_m(const struct intel_crtc_state *crtc_state, int rate) { int i; for (i = 0; i < ARRAY_SIZE(dp_aud_n_m); i++) { if (rate == dp_aud_n_m[i].sample_rate && - intel_crtc->config->port_clock == dp_aud_n_m[i].clock) + crtc_state->port_clock == dp_aud_n_m[i].clock) return &dp_aud_n_m[i]; } @@ -157,8 +157,10 @@ static const struct { }; /* get AUD_CONFIG_PIXEL_CLOCK_HDMI_* value for mode */ -static u32 audio_config_hdmi_pixel_clock(const struct drm_display_mode *adjusted_mode) +static u32 audio_config_hdmi_pixel_clock(const struct intel_crtc_state *crtc_state) { + const struct drm_display_mode *adjusted_mode = + &crtc_state->base.adjusted_mode; int i; for (i = 0; i < ARRAY_SIZE(hdmi_audio_clock); i++) { @@ -179,9 +181,11 @@ static u32 audio_config_hdmi_pixel_clock(const struct drm_display_mode *adjusted return hdmi_audio_clock[i].config; } -static int audio_config_hdmi_get_n(const struct drm_display_mode *adjusted_mode, +static int audio_config_hdmi_get_n(const struct intel_crtc_state *crtc_state, int rate) { + const struct drm_display_mode *adjusted_mode = + &crtc_state->base.adjusted_mode; int i; for (i = 0; i < ARRAY_SIZE(hdmi_aud_ncts); i++) { @@ -220,7 +224,9 @@ static bool intel_eld_uptodate(struct drm_connector *connector, return true; } -static void g4x_audio_codec_disable(struct intel_encoder *encoder) +static void g4x_audio_codec_disable(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); uint32_t eldv, tmp; @@ -239,11 +245,12 @@ static void g4x_audio_codec_disable(struct intel_encoder *encoder) I915_WRITE(G4X_AUD_CNTL_ST, tmp); } -static void g4x_audio_codec_enable(struct drm_connector *connector, - struct intel_encoder *encoder, - const struct drm_display_mode *adjusted_mode) +static void g4x_audio_codec_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { - struct drm_i915_private *dev_priv = to_i915(connector->dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct drm_connector *connector = conn_state->connector; uint8_t *eld = connector->eld; uint32_t eldv; uint32_t tmp; @@ -279,16 +286,20 @@ static void g4x_audio_codec_enable(struct drm_connector *connector, } static void -hsw_dp_audio_config_update(struct intel_crtc *intel_crtc, enum port port, - const struct drm_display_mode *adjusted_mode) +hsw_dp_audio_config_update(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct i915_audio_component *acomp = dev_priv->audio_component; - int rate = acomp ? acomp->aud_sample_rate[port] : 0; - const struct dp_aud_n_m *nm = audio_config_dp_get_n_m(intel_crtc, rate); - enum pipe pipe = intel_crtc->pipe; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + enum port port = encoder->port; + enum pipe pipe = crtc->pipe; + const struct dp_aud_n_m *nm; + int rate; u32 tmp; + rate = acomp ? acomp->aud_sample_rate[port] : 0; + nm = audio_config_dp_get_n_m(crtc_state, rate); if (nm) DRM_DEBUG_KMS("using Maud %u, Naud %u\n", nm->m, nm->n); else @@ -323,23 +334,26 @@ hsw_dp_audio_config_update(struct intel_crtc *intel_crtc, enum port port, } static void -hsw_hdmi_audio_config_update(struct intel_crtc *intel_crtc, enum port port, - const struct drm_display_mode *adjusted_mode) +hsw_hdmi_audio_config_update(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct i915_audio_component *acomp = dev_priv->audio_component; - int rate = acomp ? acomp->aud_sample_rate[port] : 0; - enum pipe pipe = intel_crtc->pipe; - int n; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + enum port port = encoder->port; + enum pipe pipe = crtc->pipe; + int n, rate; u32 tmp; + rate = acomp ? acomp->aud_sample_rate[port] : 0; + tmp = I915_READ(HSW_AUD_CFG(pipe)); tmp &= ~AUD_CONFIG_N_VALUE_INDEX; tmp &= ~AUD_CONFIG_PIXEL_CLOCK_HDMI_MASK; tmp &= ~AUD_CONFIG_N_PROG_ENABLE; - tmp |= audio_config_hdmi_pixel_clock(adjusted_mode); + tmp |= audio_config_hdmi_pixel_clock(crtc_state); - n = audio_config_hdmi_get_n(adjusted_mode, rate); + n = audio_config_hdmi_get_n(crtc_state, rate); if (n != 0) { DRM_DEBUG_KMS("using N %d\n", n); @@ -363,20 +377,22 @@ hsw_hdmi_audio_config_update(struct intel_crtc *intel_crtc, enum port port, } static void -hsw_audio_config_update(struct intel_crtc *intel_crtc, enum port port, - const struct drm_display_mode *adjusted_mode) +hsw_audio_config_update(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { - if (intel_crtc_has_dp_encoder(intel_crtc->config)) - hsw_dp_audio_config_update(intel_crtc, port, adjusted_mode); + if (intel_crtc_has_dp_encoder(crtc_state)) + hsw_dp_audio_config_update(encoder, crtc_state); else - hsw_hdmi_audio_config_update(intel_crtc, port, adjusted_mode); + hsw_hdmi_audio_config_update(encoder, crtc_state); } -static void hsw_audio_codec_disable(struct intel_encoder *encoder) +static void hsw_audio_codec_disable(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); - enum pipe pipe = intel_crtc->pipe; + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); + enum pipe pipe = crtc->pipe; uint32_t tmp; DRM_DEBUG_KMS("Disable audio codec on pipe %c\n", pipe_name(pipe)); @@ -389,7 +405,7 @@ static void hsw_audio_codec_disable(struct intel_encoder *encoder) tmp |= AUD_CONFIG_N_PROG_ENABLE; tmp &= ~AUD_CONFIG_UPPER_N_MASK; tmp &= ~AUD_CONFIG_LOWER_N_MASK; - if (intel_crtc_has_dp_encoder(intel_crtc->config)) + if (intel_crtc_has_dp_encoder(old_crtc_state)) tmp |= AUD_CONFIG_N_VALUE_INDEX; I915_WRITE(HSW_AUD_CFG(pipe), tmp); @@ -402,14 +418,14 @@ static void hsw_audio_codec_disable(struct intel_encoder *encoder) mutex_unlock(&dev_priv->av_mutex); } -static void hsw_audio_codec_enable(struct drm_connector *connector, - struct intel_encoder *intel_encoder, - const struct drm_display_mode *adjusted_mode) +static void hsw_audio_codec_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { - struct drm_i915_private *dev_priv = to_i915(connector->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(intel_encoder->base.crtc); - enum pipe pipe = intel_crtc->pipe; - enum port port = intel_encoder->port; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_connector *connector = conn_state->connector; + enum pipe pipe = crtc->pipe; const uint8_t *eld = connector->eld; uint32_t tmp; int len, i; @@ -448,17 +464,19 @@ static void hsw_audio_codec_enable(struct drm_connector *connector, I915_WRITE(HSW_AUD_PIN_ELD_CP_VLD, tmp); /* Enable timestamps */ - hsw_audio_config_update(intel_crtc, port, adjusted_mode); + hsw_audio_config_update(encoder, crtc_state); mutex_unlock(&dev_priv->av_mutex); } -static void ilk_audio_codec_disable(struct intel_encoder *intel_encoder) +static void ilk_audio_codec_disable(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { - struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(intel_encoder->base.crtc); - enum pipe pipe = intel_crtc->pipe; - enum port port = intel_encoder->port; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); + enum pipe pipe = crtc->pipe; + enum port port = encoder->port; uint32_t tmp, eldv; i915_reg_t aud_config, aud_cntrl_st2; @@ -485,7 +503,7 @@ static void ilk_audio_codec_disable(struct intel_encoder *intel_encoder) tmp |= AUD_CONFIG_N_PROG_ENABLE; tmp &= ~AUD_CONFIG_UPPER_N_MASK; tmp &= ~AUD_CONFIG_LOWER_N_MASK; - if (intel_crtc_has_dp_encoder(intel_crtc->config)) + if (intel_crtc_has_dp_encoder(old_crtc_state)) tmp |= AUD_CONFIG_N_VALUE_INDEX; I915_WRITE(aud_config, tmp); @@ -497,14 +515,15 @@ static void ilk_audio_codec_disable(struct intel_encoder *intel_encoder) I915_WRITE(aud_cntrl_st2, tmp); } -static void ilk_audio_codec_enable(struct drm_connector *connector, - struct intel_encoder *intel_encoder, - const struct drm_display_mode *adjusted_mode) +static void ilk_audio_codec_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { - struct drm_i915_private *dev_priv = to_i915(connector->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(intel_encoder->base.crtc); - enum pipe pipe = intel_crtc->pipe; - enum port port = intel_encoder->port; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_connector *connector = conn_state->connector; + enum pipe pipe = crtc->pipe; + enum port port = encoder->port; uint8_t *eld = connector->eld; uint32_t tmp, eldv; int len, i; @@ -568,36 +587,36 @@ static void ilk_audio_codec_enable(struct drm_connector *connector, tmp &= ~AUD_CONFIG_N_VALUE_INDEX; tmp &= ~AUD_CONFIG_N_PROG_ENABLE; tmp &= ~AUD_CONFIG_PIXEL_CLOCK_HDMI_MASK; - if (intel_crtc_has_dp_encoder(intel_crtc->config)) + if (intel_crtc_has_dp_encoder(crtc_state)) tmp |= AUD_CONFIG_N_VALUE_INDEX; else - tmp |= audio_config_hdmi_pixel_clock(adjusted_mode); + tmp |= audio_config_hdmi_pixel_clock(crtc_state); I915_WRITE(aud_config, tmp); } /** * intel_audio_codec_enable - Enable the audio codec for HD audio - * @intel_encoder: encoder on which to enable audio + * @encoder: encoder on which to enable audio * @crtc_state: pointer to the current crtc state. * @conn_state: pointer to the current connector state. * * The enable sequences may only be performed after enabling the transcoder and * port, and after completed link training. */ -void intel_audio_codec_enable(struct intel_encoder *intel_encoder, +void intel_audio_codec_enable(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - struct drm_encoder *encoder = &intel_encoder->base; - const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; - struct drm_connector *connector; - struct drm_i915_private *dev_priv = to_i915(encoder->dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct i915_audio_component *acomp = dev_priv->audio_component; - enum port port = intel_encoder->port; - enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_connector *connector = conn_state->connector; + const struct drm_display_mode *adjusted_mode = + &crtc_state->base.adjusted_mode; + enum port port = encoder->port; + enum pipe pipe = crtc->pipe; - connector = conn_state->connector; - if (!connector || !connector->eld[0]) + if (!connector->eld[0]) return; DRM_DEBUG_DRIVER("ELD on [CONNECTOR:%d:%s], [ENCODER:%d:%s]\n", @@ -609,19 +628,20 @@ void intel_audio_codec_enable(struct intel_encoder *intel_encoder, connector->eld[6] = drm_av_sync_delay(connector, adjusted_mode) / 2; if (dev_priv->display.audio_codec_enable) - dev_priv->display.audio_codec_enable(connector, intel_encoder, - adjusted_mode); + dev_priv->display.audio_codec_enable(encoder, + crtc_state, + conn_state); mutex_lock(&dev_priv->av_mutex); - intel_encoder->audio_connector = connector; + encoder->audio_connector = connector; /* referred in audio callbacks */ - dev_priv->av_enc_map[pipe] = intel_encoder; + dev_priv->av_enc_map[pipe] = encoder; mutex_unlock(&dev_priv->av_mutex); if (acomp && acomp->audio_ops && acomp->audio_ops->pin_eld_notify) { /* audio drivers expect pipe = -1 to indicate Non-MST cases */ - if (intel_encoder->type != INTEL_OUTPUT_DP_MST) + if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) pipe = -1; acomp->audio_ops->pin_eld_notify(acomp->audio_ops->audio_ptr, (int) port, (int) pipe); @@ -629,36 +649,41 @@ void intel_audio_codec_enable(struct intel_encoder *intel_encoder, intel_lpe_audio_notify(dev_priv, pipe, port, connector->eld, crtc_state->port_clock, - intel_encoder->type == INTEL_OUTPUT_DP); + intel_crtc_has_dp_encoder(crtc_state)); } /** * intel_audio_codec_disable - Disable the audio codec for HD audio - * @intel_encoder: encoder on which to disable audio + * @encoder: encoder on which to disable audio + * @old_crtc_state: pointer to the old crtc state. + * @old_conn_state: pointer to the old connector state. * * The disable sequences must be performed before disabling the transcoder or * port. */ -void intel_audio_codec_disable(struct intel_encoder *intel_encoder) +void intel_audio_codec_disable(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { - struct drm_encoder *encoder = &intel_encoder->base; - struct drm_i915_private *dev_priv = to_i915(encoder->dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct i915_audio_component *acomp = dev_priv->audio_component; - enum port port = intel_encoder->port; - struct intel_crtc *crtc = to_intel_crtc(encoder->crtc); + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); + enum port port = encoder->port; enum pipe pipe = crtc->pipe; if (dev_priv->display.audio_codec_disable) - dev_priv->display.audio_codec_disable(intel_encoder); + dev_priv->display.audio_codec_disable(encoder, + old_crtc_state, + old_conn_state); mutex_lock(&dev_priv->av_mutex); - intel_encoder->audio_connector = NULL; + encoder->audio_connector = NULL; dev_priv->av_enc_map[pipe] = NULL; mutex_unlock(&dev_priv->av_mutex); if (acomp && acomp->audio_ops && acomp->audio_ops->pin_eld_notify) { /* audio drivers expect pipe = -1 to indicate Non-MST cases */ - if (intel_encoder->type != INTEL_OUTPUT_DP_MST) + if (!intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST)) pipe = -1; acomp->audio_ops->pin_eld_notify(acomp->audio_ops->audio_ptr, (int) port, (int) pipe); @@ -754,11 +779,11 @@ static struct intel_encoder *get_saved_enc(struct drm_i915_private *dev_priv, { struct intel_encoder *encoder; - if (WARN_ON(pipe >= I915_MAX_PIPES)) - return NULL; - /* MST */ if (pipe >= 0) { + if (WARN_ON(pipe >= ARRAY_SIZE(dev_priv->av_enc_map))) + return NULL; + encoder = dev_priv->av_enc_map[pipe]; /* * when bootup, audio driver may not know it is @@ -793,10 +818,9 @@ static int i915_audio_component_sync_audio_rate(struct device *kdev, int port, int pipe, int rate) { struct drm_i915_private *dev_priv = kdev_to_i915(kdev); - struct intel_encoder *intel_encoder; - struct intel_crtc *crtc; - struct drm_display_mode *adjusted_mode; struct i915_audio_component *acomp = dev_priv->audio_component; + struct intel_encoder *encoder; + struct intel_crtc *crtc; int err = 0; if (!HAS_DDI(dev_priv)) @@ -806,23 +830,19 @@ static int i915_audio_component_sync_audio_rate(struct device *kdev, int port, mutex_lock(&dev_priv->av_mutex); /* 1. get the pipe */ - intel_encoder = get_saved_enc(dev_priv, port, pipe); - if (!intel_encoder || !intel_encoder->base.crtc) { + encoder = get_saved_enc(dev_priv, port, pipe); + if (!encoder || !encoder->base.crtc) { DRM_DEBUG_KMS("Not valid for port %c\n", port_name(port)); err = -ENODEV; goto unlock; } - /* pipe passed from the audio driver will be -1 for Non-MST case */ - crtc = to_intel_crtc(intel_encoder->base.crtc); - pipe = crtc->pipe; - - adjusted_mode = &crtc->config->base.adjusted_mode; + crtc = to_intel_crtc(encoder->base.crtc); /* port must be valid now, otherwise the pipe will be invalid */ acomp->aud_sample_rate[port] = rate; - hsw_audio_config_update(crtc, port, adjusted_mode); + hsw_audio_config_update(encoder, crtc->config); unlock: mutex_unlock(&dev_priv->av_mutex); diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 5d4cd3d00564..b49a2df44430 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -356,7 +356,7 @@ parse_sdvo_panel_data(struct drm_i915_private *dev_priv, struct drm_display_mode *panel_fixed_mode; int index; - index = i915.vbt_sdvo_panel_type; + index = i915_modparams.vbt_sdvo_panel_type; if (index == -2) { DRM_DEBUG_KMS("Ignore SDVO panel mode from BIOS VBT tables.\n"); return; @@ -431,70 +431,31 @@ parse_general_features(struct drm_i915_private *dev_priv, dev_priv->vbt.fdi_rx_polarity_inverted); } -static void -parse_general_definitions(struct drm_i915_private *dev_priv, - const struct bdb_header *bdb) -{ - const struct bdb_general_definitions *general; - - general = find_section(bdb, BDB_GENERAL_DEFINITIONS); - if (general) { - u16 block_size = get_blocksize(general); - if (block_size >= sizeof(*general)) { - int bus_pin = general->crt_ddc_gmbus_pin; - DRM_DEBUG_KMS("crt_ddc_bus_pin: %d\n", bus_pin); - if (intel_gmbus_is_valid_pin(dev_priv, bus_pin)) - dev_priv->vbt.crt_ddc_pin = bus_pin; - } else { - DRM_DEBUG_KMS("BDB_GD too small (%d). Invalid.\n", - block_size); - } - } -} - -static const union child_device_config * -child_device_ptr(const struct bdb_general_definitions *p_defs, int i) +static const struct child_device_config * +child_device_ptr(const struct bdb_general_definitions *defs, int i) { - return (const void *) &p_defs->devices[i * p_defs->child_dev_size]; + return (const void *) &defs->devices[i * defs->child_dev_size]; } static void -parse_sdvo_device_mapping(struct drm_i915_private *dev_priv, - const struct bdb_header *bdb) +parse_sdvo_device_mapping(struct drm_i915_private *dev_priv, u8 bdb_version) { - struct sdvo_device_mapping *p_mapping; - const struct bdb_general_definitions *p_defs; - const struct old_child_dev_config *child; /* legacy */ - int i, child_device_num, count; - u16 block_size; - - p_defs = find_section(bdb, BDB_GENERAL_DEFINITIONS); - if (!p_defs) { - DRM_DEBUG_KMS("No general definition block is found, unable to construct sdvo mapping.\n"); - return; - } + struct sdvo_device_mapping *mapping; + const struct child_device_config *child; + int i, count = 0; /* - * Only parse SDVO mappings when the general definitions block child - * device size matches that of the *legacy* child device config - * struct. Thus, SDVO mapping will be skipped for newer VBT. + * Only parse SDVO mappings on gens that could have SDVO. This isn't + * accurate and doesn't have to be, as long as it's not too strict. */ - if (p_defs->child_dev_size != sizeof(*child)) { - DRM_DEBUG_KMS("Unsupported child device size for SDVO mapping.\n"); + if (!IS_GEN(dev_priv, 3, 7)) { + DRM_DEBUG_KMS("Skipping SDVO device mapping\n"); return; } - /* get the block size of general definitions */ - block_size = get_blocksize(p_defs); - /* get the number of child device */ - child_device_num = (block_size - sizeof(*p_defs)) / - p_defs->child_dev_size; - count = 0; - for (i = 0; i < child_device_num; i++) { - child = &child_device_ptr(p_defs, i)->old; - if (!child->device_type) { - /* skip the device block if device type is invalid */ - continue; - } + + for (i = 0, count = 0; i < dev_priv->vbt.child_dev_num; i++) { + child = dev_priv->vbt.child_dev + i; + if (child->slave_addr != SLAVE_ADDR1 && child->slave_addr != SLAVE_ADDR2) { /* @@ -514,20 +475,20 @@ parse_sdvo_device_mapping(struct drm_i915_private *dev_priv, child->slave_addr, (child->dvo_port == DEVICE_PORT_DVOB) ? "SDVOB" : "SDVOC"); - p_mapping = &dev_priv->vbt.sdvo_mappings[child->dvo_port - 1]; - if (!p_mapping->initialized) { - p_mapping->dvo_port = child->dvo_port; - p_mapping->slave_addr = child->slave_addr; - p_mapping->dvo_wiring = child->dvo_wiring; - p_mapping->ddc_pin = child->ddc_pin; - p_mapping->i2c_pin = child->i2c_pin; - p_mapping->initialized = 1; + mapping = &dev_priv->vbt.sdvo_mappings[child->dvo_port - 1]; + if (!mapping->initialized) { + mapping->dvo_port = child->dvo_port; + mapping->slave_addr = child->slave_addr; + mapping->dvo_wiring = child->dvo_wiring; + mapping->ddc_pin = child->ddc_pin; + mapping->i2c_pin = child->i2c_pin; + mapping->initialized = 1; DRM_DEBUG_KMS("SDVO device: dvo=%x, addr=%x, wiring=%d, ddc_pin=%d, i2c_pin=%d\n", - p_mapping->dvo_port, - p_mapping->slave_addr, - p_mapping->dvo_wiring, - p_mapping->ddc_pin, - p_mapping->i2c_pin); + mapping->dvo_port, + mapping->slave_addr, + mapping->dvo_wiring, + mapping->ddc_pin, + mapping->i2c_pin); } else { DRM_DEBUG_KMS("Maybe one SDVO port is shared by " "two SDVO device.\n"); @@ -545,7 +506,6 @@ parse_sdvo_device_mapping(struct drm_i915_private *dev_priv, /* No SDVO device info is found */ DRM_DEBUG_KMS("No SDVO device info is found in VBT\n"); } - return; } static void @@ -577,7 +537,7 @@ parse_edp(struct drm_i915_private *dev_priv, const struct bdb_header *bdb) { const struct bdb_edp *edp; const struct edp_power_seq *edp_pps; - const struct edp_link_params *edp_link_params; + const struct edp_fast_link_params *edp_link_params; int panel_type = dev_priv->vbt.panel_type; edp = find_section(bdb, BDB_EDP); @@ -601,7 +561,7 @@ parse_edp(struct drm_i915_private *dev_priv, const struct bdb_header *bdb) /* Get the eDP sequencing and link info */ edp_pps = &edp->power_seqs[panel_type]; - edp_link_params = &edp->link_params[panel_type]; + edp_link_params = &edp->fast_link_params[panel_type]; dev_priv->vbt.edp.pps = *edp_pps; @@ -676,8 +636,9 @@ parse_edp(struct drm_i915_private *dev_priv, const struct bdb_header *bdb) uint8_t vswing; /* Don't read from VBT if module parameter has valid value*/ - if (i915.edp_vswing) { - dev_priv->vbt.edp.low_vswing = i915.edp_vswing == 1; + if (i915_modparams.edp_vswing) { + dev_priv->vbt.edp.low_vswing = + i915_modparams.edp_vswing == 1; } else { vswing = (edp->edp_vswing_preemph >> (panel_type * 4)) & 0xF; dev_priv->vbt.edp.low_vswing = vswing == 0; @@ -730,6 +691,48 @@ parse_psr(struct drm_i915_private *dev_priv, const struct bdb_header *bdb) dev_priv->vbt.psr.tp2_tp3_wakeup_time = psr_table->tp2_tp3_wakeup_time; } +static void parse_dsi_backlight_ports(struct drm_i915_private *dev_priv, + u16 version, enum port port) +{ + if (!dev_priv->vbt.dsi.config->dual_link || version < 197) { + dev_priv->vbt.dsi.bl_ports = BIT(port); + if (dev_priv->vbt.dsi.config->cabc_supported) + dev_priv->vbt.dsi.cabc_ports = BIT(port); + + return; + } + + switch (dev_priv->vbt.dsi.config->dl_dcs_backlight_ports) { + case DL_DCS_PORT_A: + dev_priv->vbt.dsi.bl_ports = BIT(PORT_A); + break; + case DL_DCS_PORT_C: + dev_priv->vbt.dsi.bl_ports = BIT(PORT_C); + break; + default: + case DL_DCS_PORT_A_AND_C: + dev_priv->vbt.dsi.bl_ports = BIT(PORT_A) | BIT(PORT_C); + break; + } + + if (!dev_priv->vbt.dsi.config->cabc_supported) + return; + + switch (dev_priv->vbt.dsi.config->dl_dcs_cabc_ports) { + case DL_DCS_PORT_A: + dev_priv->vbt.dsi.cabc_ports = BIT(PORT_A); + break; + case DL_DCS_PORT_C: + dev_priv->vbt.dsi.cabc_ports = BIT(PORT_C); + break; + default: + case DL_DCS_PORT_A_AND_C: + dev_priv->vbt.dsi.cabc_ports = + BIT(PORT_A) | BIT(PORT_C); + break; + } +} + static void parse_mipi_config(struct drm_i915_private *dev_priv, const struct bdb_header *bdb) @@ -738,9 +741,10 @@ parse_mipi_config(struct drm_i915_private *dev_priv, const struct mipi_config *config; const struct mipi_pps_data *pps; int panel_type = dev_priv->vbt.panel_type; + enum port port; /* parse MIPI blocks only if LFP type is MIPI */ - if (!intel_bios_is_dsi_present(dev_priv, NULL)) + if (!intel_bios_is_dsi_present(dev_priv, &port)) return; /* Initialize this to undefined indicating no generic MIPI support */ @@ -781,15 +785,7 @@ parse_mipi_config(struct drm_i915_private *dev_priv, return; } - /* - * These fields are introduced from the VBT version 197 onwards, - * so making sure that these bits are set zero in the previous - * versions. - */ - if (dev_priv->vbt.dsi.config->dual_link && bdb->version < 197) { - dev_priv->vbt.dsi.config->dl_dcs_cabc_ports = 0; - dev_priv->vbt.dsi.config->dl_dcs_backlight_ports = 0; - } + parse_dsi_backlight_ports(dev_priv, bdb->version, port); /* We have mandatory mipi config blocks. Initialize as generic panel */ dev_priv->vbt.dsi.panel_id = MIPI_DSI_GENERIC_PANEL_ID; @@ -951,6 +947,86 @@ static int goto_next_sequence_v3(const u8 *data, int index, int total) return 0; } +/* + * Get len of pre-fixed deassert fragment from a v1 init OTP sequence, + * skip all delay + gpio operands and stop at the first DSI packet op. + */ +static int get_init_otp_deassert_fragment_len(struct drm_i915_private *dev_priv) +{ + const u8 *data = dev_priv->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP]; + int index, len; + + if (WARN_ON(!data || dev_priv->vbt.dsi.seq_version != 1)) + return 0; + + /* index = 1 to skip sequence byte */ + for (index = 1; data[index] != MIPI_SEQ_ELEM_END; index += len) { + switch (data[index]) { + case MIPI_SEQ_ELEM_SEND_PKT: + return index == 1 ? 0 : index; + case MIPI_SEQ_ELEM_DELAY: + len = 5; /* 1 byte for operand + uint32 */ + break; + case MIPI_SEQ_ELEM_GPIO: + len = 3; /* 1 byte for op, 1 for gpio_nr, 1 for value */ + break; + default: + return 0; + } + } + + return 0; +} + +/* + * Some v1 VBT MIPI sequences do the deassert in the init OTP sequence. + * The deassert must be done before calling intel_dsi_device_ready, so for + * these devices we split the init OTP sequence into a deassert sequence and + * the actual init OTP part. + */ +static void fixup_mipi_sequences(struct drm_i915_private *dev_priv) +{ + u8 *init_otp; + int len; + + /* Limit this to VLV for now. */ + if (!IS_VALLEYVIEW(dev_priv)) + return; + + /* Limit this to v1 vid-mode sequences */ + if (dev_priv->vbt.dsi.config->is_cmd_mode || + dev_priv->vbt.dsi.seq_version != 1) + return; + + /* Only do this if there are otp and assert seqs and no deassert seq */ + if (!dev_priv->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] || + !dev_priv->vbt.dsi.sequence[MIPI_SEQ_ASSERT_RESET] || + dev_priv->vbt.dsi.sequence[MIPI_SEQ_DEASSERT_RESET]) + return; + + /* The deassert-sequence ends at the first DSI packet */ + len = get_init_otp_deassert_fragment_len(dev_priv); + if (!len) + return; + + DRM_DEBUG_KMS("Using init OTP fragment to deassert reset\n"); + + /* Copy the fragment, update seq byte and terminate it */ + init_otp = (u8 *)dev_priv->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP]; + dev_priv->vbt.dsi.deassert_seq = kmemdup(init_otp, len + 1, GFP_KERNEL); + if (!dev_priv->vbt.dsi.deassert_seq) + return; + dev_priv->vbt.dsi.deassert_seq[0] = MIPI_SEQ_DEASSERT_RESET; + dev_priv->vbt.dsi.deassert_seq[len] = MIPI_SEQ_ELEM_END; + /* Use the copy for deassert */ + dev_priv->vbt.dsi.sequence[MIPI_SEQ_DEASSERT_RESET] = + dev_priv->vbt.dsi.deassert_seq; + /* Replace the last byte of the fragment with init OTP seq byte */ + init_otp[len - 1] = MIPI_SEQ_INIT_OTP; + /* And make MIPI_MIPI_SEQ_INIT_OTP point to it */ + dev_priv->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] = init_otp + len - 1; +} + static void parse_mipi_sequence(struct drm_i915_private *dev_priv, const struct bdb_header *bdb) @@ -1020,6 +1096,8 @@ parse_mipi_sequence(struct drm_i915_private *dev_priv, dev_priv->vbt.dsi.size = seq_size; dev_priv->vbt.dsi.seq_version = sequence->version; + fixup_mipi_sequences(dev_priv); + DRM_DEBUG_DRIVER("MIPI related VBT parsing complete\n"); return; @@ -1110,10 +1188,32 @@ static void sanitize_aux_ch(struct drm_i915_private *dev_priv, } } +static const u8 cnp_ddc_pin_map[] = { + [0] = 0, /* N/A */ + [DDC_BUS_DDI_B] = GMBUS_PIN_1_BXT, + [DDC_BUS_DDI_C] = GMBUS_PIN_2_BXT, + [DDC_BUS_DDI_D] = GMBUS_PIN_4_CNP, /* sic */ + [DDC_BUS_DDI_F] = GMBUS_PIN_3_BXT, /* sic */ +}; + +static u8 map_ddc_pin(struct drm_i915_private *dev_priv, u8 vbt_pin) +{ + if (HAS_PCH_CNP(dev_priv)) { + if (vbt_pin < ARRAY_SIZE(cnp_ddc_pin_map)) { + return cnp_ddc_pin_map[vbt_pin]; + } else { + DRM_DEBUG_KMS("Ignoring alternate pin: VBT claims DDC pin %d, which is not valid for this platform\n", vbt_pin); + return 0; + } + } + + return vbt_pin; +} + static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, - const struct bdb_header *bdb) + u8 bdb_version) { - union child_device_config *it, *child = NULL; + struct child_device_config *it, *child = NULL; struct ddi_vbt_port_info *info = &dev_priv->vbt.ddi_port_info[port]; uint8_t hdmi_level_shift; int i, j; @@ -1141,7 +1241,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, if (dvo_ports[port][j] == -1) break; - if (it->common.dvo_port == dvo_ports[port][j]) { + if (it->dvo_port == dvo_ports[port][j]) { if (child) { DRM_DEBUG_KMS("More than one child device for port %c in VBT, using the first.\n", port_name(port)); @@ -1154,14 +1254,21 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, if (!child) return; - aux_channel = child->common.aux_channel; - ddc_pin = child->common.ddc_pin; + aux_channel = child->aux_channel; + ddc_pin = child->ddc_pin; - is_dvi = child->common.device_type & DEVICE_TYPE_TMDS_DVI_SIGNALING; - is_dp = child->common.device_type & DEVICE_TYPE_DISPLAYPORT_OUTPUT; - is_crt = child->common.device_type & DEVICE_TYPE_ANALOG_OUTPUT; - is_hdmi = is_dvi && (child->common.device_type & DEVICE_TYPE_NOT_HDMI_OUTPUT) == 0; - is_edp = is_dp && (child->common.device_type & DEVICE_TYPE_INTERNAL_CONNECTOR); + is_dvi = child->device_type & DEVICE_TYPE_TMDS_DVI_SIGNALING; + is_dp = child->device_type & DEVICE_TYPE_DISPLAYPORT_OUTPUT; + is_crt = child->device_type & DEVICE_TYPE_ANALOG_OUTPUT; + is_hdmi = is_dvi && (child->device_type & DEVICE_TYPE_NOT_HDMI_OUTPUT) == 0; + is_edp = is_dp && (child->device_type & DEVICE_TYPE_INTERNAL_CONNECTOR); + + if (port == PORT_A && is_dvi) { + DRM_DEBUG_KMS("VBT claims port A supports DVI%s, ignoring\n", + is_hdmi ? "/HDMI" : ""); + is_dvi = false; + is_hdmi = false; + } if (port == PORT_A && is_dvi) { DRM_DEBUG_KMS("VBT claims port A supports DVI%s, ignoring\n", @@ -1195,16 +1302,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, DRM_DEBUG_KMS("Port %c is internal DP\n", port_name(port)); if (is_dvi) { - info->alternate_ddc_pin = ddc_pin; - - /* - * All VBTs that we got so far for B Stepping has this - * information wrong for Port D. So, let's just ignore for now. - */ - if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0) && - port == PORT_D) { - info->alternate_ddc_pin = 0; - } + info->alternate_ddc_pin = map_ddc_pin(dev_priv, ddc_pin); sanitize_ddc_pin(dev_priv, port); } @@ -1215,28 +1313,51 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, sanitize_aux_ch(dev_priv, port); } - if (bdb->version >= 158) { + if (bdb_version >= 158) { /* The VBT HDMI level shift values match the table we have. */ - hdmi_level_shift = child->raw[7] & 0xF; + hdmi_level_shift = child->hdmi_level_shifter_value; DRM_DEBUG_KMS("VBT HDMI level shift for port %c: %d\n", port_name(port), hdmi_level_shift); info->hdmi_level_shift = hdmi_level_shift; } + if (bdb_version >= 204) { + int max_tmds_clock; + + switch (child->hdmi_max_data_rate) { + default: + MISSING_CASE(child->hdmi_max_data_rate); + /* fall through */ + case HDMI_MAX_DATA_RATE_PLATFORM: + max_tmds_clock = 0; + break; + case HDMI_MAX_DATA_RATE_297: + max_tmds_clock = 297000; + break; + case HDMI_MAX_DATA_RATE_165: + max_tmds_clock = 165000; + break; + } + + if (max_tmds_clock) + DRM_DEBUG_KMS("VBT HDMI max TMDS clock for port %c: %d kHz\n", + port_name(port), max_tmds_clock); + info->max_tmds_clock = max_tmds_clock; + } + /* Parse the I_boost config for SKL and above */ - if (bdb->version >= 196 && child->common.iboost) { - info->dp_boost_level = translate_iboost(child->common.iboost_level & 0xF); + if (bdb_version >= 196 && child->iboost) { + info->dp_boost_level = translate_iboost(child->dp_iboost_level); DRM_DEBUG_KMS("VBT (e)DP boost level for port %c: %d\n", port_name(port), info->dp_boost_level); - info->hdmi_boost_level = translate_iboost(child->common.iboost_level >> 4); + info->hdmi_boost_level = translate_iboost(child->hdmi_iboost_level); DRM_DEBUG_KMS("VBT HDMI boost level for port %c: %d\n", port_name(port), info->hdmi_boost_level); } } -static void parse_ddi_ports(struct drm_i915_private *dev_priv, - const struct bdb_header *bdb) +static void parse_ddi_ports(struct drm_i915_private *dev_priv, u8 bdb_version) { enum port port; @@ -1246,79 +1367,88 @@ static void parse_ddi_ports(struct drm_i915_private *dev_priv, if (!dev_priv->vbt.child_dev_num) return; - if (bdb->version < 155) + if (bdb_version < 155) return; for (port = PORT_A; port < I915_MAX_PORTS; port++) - parse_ddi_port(dev_priv, port, bdb); + parse_ddi_port(dev_priv, port, bdb_version); } static void -parse_device_mapping(struct drm_i915_private *dev_priv, - const struct bdb_header *bdb) +parse_general_definitions(struct drm_i915_private *dev_priv, + const struct bdb_header *bdb) { - const struct bdb_general_definitions *p_defs; - const union child_device_config *p_child; - union child_device_config *child_dev_ptr; + const struct bdb_general_definitions *defs; + const struct child_device_config *child; int i, child_device_num, count; u8 expected_size; u16 block_size; + int bus_pin; - p_defs = find_section(bdb, BDB_GENERAL_DEFINITIONS); - if (!p_defs) { + defs = find_section(bdb, BDB_GENERAL_DEFINITIONS); + if (!defs) { DRM_DEBUG_KMS("No general definition block is found, no devices defined.\n"); return; } + + block_size = get_blocksize(defs); + if (block_size < sizeof(*defs)) { + DRM_DEBUG_KMS("General definitions block too small (%u)\n", + block_size); + return; + } + + bus_pin = defs->crt_ddc_gmbus_pin; + DRM_DEBUG_KMS("crt_ddc_bus_pin: %d\n", bus_pin); + if (intel_gmbus_is_valid_pin(dev_priv, bus_pin)) + dev_priv->vbt.crt_ddc_pin = bus_pin; + if (bdb->version < 106) { expected_size = 22; } else if (bdb->version < 111) { expected_size = 27; } else if (bdb->version < 195) { - BUILD_BUG_ON(sizeof(struct old_child_dev_config) != 33); - expected_size = sizeof(struct old_child_dev_config); + expected_size = LEGACY_CHILD_DEVICE_CONFIG_SIZE; } else if (bdb->version == 195) { expected_size = 37; - } else if (bdb->version <= 197) { + } else if (bdb->version <= 215) { expected_size = 38; + } else if (bdb->version <= 216) { + expected_size = 39; } else { - expected_size = 38; - BUILD_BUG_ON(sizeof(*p_child) < 38); + expected_size = sizeof(*child); + BUILD_BUG_ON(sizeof(*child) < 39); DRM_DEBUG_DRIVER("Expected child device config size for VBT version %u not known; assuming %u\n", bdb->version, expected_size); } /* Flag an error for unexpected size, but continue anyway. */ - if (p_defs->child_dev_size != expected_size) + if (defs->child_dev_size != expected_size) DRM_ERROR("Unexpected child device config size %u (expected %u for VBT version %u)\n", - p_defs->child_dev_size, expected_size, bdb->version); + defs->child_dev_size, expected_size, bdb->version); /* The legacy sized child device config is the minimum we need. */ - if (p_defs->child_dev_size < sizeof(struct old_child_dev_config)) { + if (defs->child_dev_size < LEGACY_CHILD_DEVICE_CONFIG_SIZE) { DRM_DEBUG_KMS("Child device config size %u is too small.\n", - p_defs->child_dev_size); + defs->child_dev_size); return; } - /* get the block size of general definitions */ - block_size = get_blocksize(p_defs); /* get the number of child device */ - child_device_num = (block_size - sizeof(*p_defs)) / - p_defs->child_dev_size; + child_device_num = (block_size - sizeof(*defs)) / defs->child_dev_size; count = 0; /* get the number of child device that is present */ for (i = 0; i < child_device_num; i++) { - p_child = child_device_ptr(p_defs, i); - if (!p_child->common.device_type) { - /* skip the device block if device type is invalid */ + child = child_device_ptr(defs, i); + if (!child->device_type) continue; - } count++; } if (!count) { DRM_DEBUG_KMS("no child dev is parsed from VBT\n"); return; } - dev_priv->vbt.child_dev = kcalloc(count, sizeof(*p_child), GFP_KERNEL); + dev_priv->vbt.child_dev = kcalloc(count, sizeof(*child), GFP_KERNEL); if (!dev_priv->vbt.child_dev) { DRM_DEBUG_KMS("No memory space for child device\n"); return; @@ -1327,37 +1457,19 @@ parse_device_mapping(struct drm_i915_private *dev_priv, dev_priv->vbt.child_dev_num = count; count = 0; for (i = 0; i < child_device_num; i++) { - p_child = child_device_ptr(p_defs, i); - if (!p_child->common.device_type) { - /* skip the device block if device type is invalid */ + child = child_device_ptr(defs, i); + if (!child->device_type) continue; - } - - child_dev_ptr = dev_priv->vbt.child_dev + count; - count++; /* * Copy as much as we know (sizeof) and is available * (child_dev_size) of the child device. Accessing the data must * depend on VBT version. */ - memcpy(child_dev_ptr, p_child, - min_t(size_t, p_defs->child_dev_size, sizeof(*p_child))); - - /* - * copied full block, now init values when they are not - * available in current version - */ - if (bdb->version < 196) { - /* Set default values for bits added from v196 */ - child_dev_ptr->common.iboost = 0; - child_dev_ptr->common.hpd_invert = 0; - } - - if (bdb->version < 192) - child_dev_ptr->common.lspcon = 0; + memcpy(dev_priv->vbt.child_dev + count, child, + min_t(size_t, defs->child_dev_size, sizeof(*child))); + count++; } - return; } /* Common defaults which may be overridden by VBT. */ @@ -1538,14 +1650,15 @@ void intel_bios_init(struct drm_i915_private *dev_priv) parse_lfp_panel_data(dev_priv, bdb); parse_lfp_backlight(dev_priv, bdb); parse_sdvo_panel_data(dev_priv, bdb); - parse_sdvo_device_mapping(dev_priv, bdb); - parse_device_mapping(dev_priv, bdb); parse_driver_features(dev_priv, bdb); parse_edp(dev_priv, bdb); parse_psr(dev_priv, bdb); parse_mipi_config(dev_priv, bdb); parse_mipi_sequence(dev_priv, bdb); - parse_ddi_ports(dev_priv, bdb); + + /* Further processing on pre-parsed data */ + parse_sdvo_device_mapping(dev_priv, bdb->version); + parse_ddi_ports(dev_priv, bdb->version); out: if (!vbt) { @@ -1558,6 +1671,29 @@ out: } /** + * intel_bios_cleanup - Free any resources allocated by intel_bios_init() + * @dev_priv: i915 device instance + */ +void intel_bios_cleanup(struct drm_i915_private *dev_priv) +{ + kfree(dev_priv->vbt.child_dev); + dev_priv->vbt.child_dev = NULL; + dev_priv->vbt.child_dev_num = 0; + kfree(dev_priv->vbt.sdvo_lvds_vbt_mode); + dev_priv->vbt.sdvo_lvds_vbt_mode = NULL; + kfree(dev_priv->vbt.lfp_lvds_vbt_mode); + dev_priv->vbt.lfp_lvds_vbt_mode = NULL; + kfree(dev_priv->vbt.dsi.data); + dev_priv->vbt.dsi.data = NULL; + kfree(dev_priv->vbt.dsi.pps); + dev_priv->vbt.dsi.pps = NULL; + kfree(dev_priv->vbt.dsi.config); + dev_priv->vbt.dsi.config = NULL; + kfree(dev_priv->vbt.dsi.deassert_seq); + dev_priv->vbt.dsi.deassert_seq = NULL; +} + +/** * intel_bios_is_tv_present - is integrated TV present in VBT * @dev_priv: i915 device instance * @@ -1566,7 +1702,7 @@ out: */ bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv) { - union child_device_config *p_child; + const struct child_device_config *child; int i; if (!dev_priv->vbt.int_tv_support) @@ -1576,11 +1712,11 @@ bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv) return true; for (i = 0; i < dev_priv->vbt.child_dev_num; i++) { - p_child = dev_priv->vbt.child_dev + i; + child = dev_priv->vbt.child_dev + i; /* * If the device type is not TV, continue. */ - switch (p_child->old.device_type) { + switch (child->device_type) { case DEVICE_TYPE_INT_TV: case DEVICE_TYPE_TV: case DEVICE_TYPE_TV_SVIDEO_COMPOSITE: @@ -1591,7 +1727,7 @@ bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv) /* Only when the addin_offset is non-zero, it is regarded * as present. */ - if (p_child->old.addin_offset) + if (child->addin_offset) return true; } @@ -1608,14 +1744,14 @@ bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv) */ bool intel_bios_is_lvds_present(struct drm_i915_private *dev_priv, u8 *i2c_pin) { + const struct child_device_config *child; int i; if (!dev_priv->vbt.child_dev_num) return true; for (i = 0; i < dev_priv->vbt.child_dev_num; i++) { - union child_device_config *uchild = dev_priv->vbt.child_dev + i; - struct old_child_dev_config *child = &uchild->old; + child = dev_priv->vbt.child_dev + i; /* If the device type is not LFP, continue. * We have to check both the new identifiers as well as the @@ -1657,6 +1793,7 @@ bool intel_bios_is_lvds_present(struct drm_i915_private *dev_priv, u8 *i2c_pin) */ bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port port) { + const struct child_device_config *child; static const struct { u16 dp, hdmi; } port_mapping[] = { @@ -1675,12 +1812,12 @@ bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port por return false; for (i = 0; i < dev_priv->vbt.child_dev_num; i++) { - const union child_device_config *p_child = - &dev_priv->vbt.child_dev[i]; - if ((p_child->common.dvo_port == port_mapping[port].dp || - p_child->common.dvo_port == port_mapping[port].hdmi) && - (p_child->common.device_type & (DEVICE_TYPE_TMDS_DVI_SIGNALING | - DEVICE_TYPE_DISPLAYPORT_OUTPUT))) + child = dev_priv->vbt.child_dev + i; + + if ((child->dvo_port == port_mapping[port].dp || + child->dvo_port == port_mapping[port].hdmi) && + (child->device_type & (DEVICE_TYPE_TMDS_DVI_SIGNALING | + DEVICE_TYPE_DISPLAYPORT_OUTPUT))) return true; } @@ -1696,7 +1833,7 @@ bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port por */ bool intel_bios_is_port_edp(struct drm_i915_private *dev_priv, enum port port) { - union child_device_config *p_child; + const struct child_device_config *child; static const short port_mapping[] = { [PORT_B] = DVO_PORT_DPB, [PORT_C] = DVO_PORT_DPC, @@ -1712,10 +1849,10 @@ bool intel_bios_is_port_edp(struct drm_i915_private *dev_priv, enum port port) return false; for (i = 0; i < dev_priv->vbt.child_dev_num; i++) { - p_child = dev_priv->vbt.child_dev + i; + child = dev_priv->vbt.child_dev + i; - if (p_child->common.dvo_port == port_mapping[port] && - (p_child->common.device_type & DEVICE_TYPE_eDP_BITS) == + if (child->dvo_port == port_mapping[port] && + (child->device_type & DEVICE_TYPE_eDP_BITS) == (DEVICE_TYPE_eDP & DEVICE_TYPE_eDP_BITS)) return true; } @@ -1723,7 +1860,7 @@ bool intel_bios_is_port_edp(struct drm_i915_private *dev_priv, enum port port) return false; } -static bool child_dev_is_dp_dual_mode(const union child_device_config *p_child, +static bool child_dev_is_dp_dual_mode(const struct child_device_config *child, enum port port) { static const struct { @@ -1742,16 +1879,16 @@ static bool child_dev_is_dp_dual_mode(const union child_device_config *p_child, if (port == PORT_A || port >= ARRAY_SIZE(port_mapping)) return false; - if ((p_child->common.device_type & DEVICE_TYPE_DP_DUAL_MODE_BITS) != + if ((child->device_type & DEVICE_TYPE_DP_DUAL_MODE_BITS) != (DEVICE_TYPE_DP_DUAL_MODE & DEVICE_TYPE_DP_DUAL_MODE_BITS)) return false; - if (p_child->common.dvo_port == port_mapping[port].dp) + if (child->dvo_port == port_mapping[port].dp) return true; /* Only accept a HDMI dvo_port as DP++ if it has an AUX channel */ - if (p_child->common.dvo_port == port_mapping[port].hdmi && - p_child->common.aux_channel != 0) + if (child->dvo_port == port_mapping[port].hdmi && + child->aux_channel != 0) return true; return false; @@ -1760,13 +1897,13 @@ static bool child_dev_is_dp_dual_mode(const union child_device_config *p_child, bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv, enum port port) { + const struct child_device_config *child; int i; for (i = 0; i < dev_priv->vbt.child_dev_num; i++) { - const union child_device_config *p_child = - &dev_priv->vbt.child_dev[i]; + child = dev_priv->vbt.child_dev + i; - if (child_dev_is_dp_dual_mode(p_child, port)) + if (child_dev_is_dp_dual_mode(child, port)) return true; } @@ -1783,17 +1920,17 @@ bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv, bool intel_bios_is_dsi_present(struct drm_i915_private *dev_priv, enum port *port) { - union child_device_config *p_child; + const struct child_device_config *child; u8 dvo_port; int i; for (i = 0; i < dev_priv->vbt.child_dev_num; i++) { - p_child = dev_priv->vbt.child_dev + i; + child = dev_priv->vbt.child_dev + i; - if (!(p_child->common.device_type & DEVICE_TYPE_MIPI_OUTPUT)) + if (!(child->device_type & DEVICE_TYPE_MIPI_OUTPUT)) continue; - dvo_port = p_child->common.dvo_port; + dvo_port = child->dvo_port; switch (dvo_port) { case DVO_PORT_MIPIA: @@ -1823,16 +1960,19 @@ bool intel_bios_is_port_hpd_inverted(struct drm_i915_private *dev_priv, enum port port) { + const struct child_device_config *child; int i; if (WARN_ON_ONCE(!IS_GEN9_LP(dev_priv))) return false; for (i = 0; i < dev_priv->vbt.child_dev_num; i++) { - if (!dev_priv->vbt.child_dev[i].common.hpd_invert) + child = dev_priv->vbt.child_dev + i; + + if (!child->hpd_invert) continue; - switch (dev_priv->vbt.child_dev[i].common.dvo_port) { + switch (child->dvo_port) { case DVO_PORT_DPA: case DVO_PORT_HDMIA: if (port == PORT_A) @@ -1867,16 +2007,19 @@ bool intel_bios_is_lspcon_present(struct drm_i915_private *dev_priv, enum port port) { + const struct child_device_config *child; int i; if (!HAS_LSPCON(dev_priv)) return false; for (i = 0; i < dev_priv->vbt.child_dev_num; i++) { - if (!dev_priv->vbt.child_dev[i].common.lspcon) + child = dev_priv->vbt.child_dev + i; + + if (!child->lspcon) continue; - switch (dev_priv->vbt.child_dev[i].common.dvo_port) { + switch (child->dvo_port) { case DVO_PORT_DPA: case DVO_PORT_HDMIA: if (port == PORT_A) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 4e00e5cb9fa1..f54ddda9fdad 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -27,6 +27,12 @@ #include "i915_drv.h" +#ifdef CONFIG_SMP +#define task_asleep(tsk) ((tsk)->state & TASK_NORMAL && !(tsk)->on_cpu) +#else +#define task_asleep(tsk) ((tsk)->state & TASK_NORMAL) +#endif + static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b) { struct intel_wait *wait; @@ -36,8 +42,20 @@ static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b) wait = b->irq_wait; if (wait) { + /* + * N.B. Since task_asleep() and ttwu are not atomic, the + * waiter may actually go to sleep after the check, causing + * us to suppress a valid wakeup. We prefer to reduce the + * number of false positive missed_breadcrumb() warnings + * at the expense of a few false negatives, as it it easy + * to trigger a false positive under heavy load. Enough + * signal should remain from genuine missed_breadcrumb() + * for us to detect in CI. + */ + bool was_asleep = task_asleep(wait->tsk); + result = ENGINE_WAKEUP_WAITER; - if (wake_up_process(wait->tsk)) + if (wake_up_process(wait->tsk) && was_asleep) result |= ENGINE_WAKEUP_ASLEEP; } @@ -64,19 +82,21 @@ static unsigned long wait_timeout(void) static noinline void missed_breadcrumb(struct intel_engine_cs *engine) { - DRM_DEBUG_DRIVER("%s missed breadcrumb at %pF, irq posted? %s, current seqno=%x, last=%x\n", - engine->name, __builtin_return_address(0), - yesno(test_bit(ENGINE_IRQ_BREADCRUMB, - &engine->irq_posted)), - intel_engine_get_seqno(engine), - intel_engine_last_submit(engine)); + if (drm_debug & DRM_UT_DRIVER) { + struct drm_printer p = drm_debug_printer(__func__); + + intel_engine_dump(engine, &p, + "%s missed breadcrumb at %pS\n", + engine->name, __builtin_return_address(0)); + } set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); } -static void intel_breadcrumbs_hangcheck(unsigned long data) +static void intel_breadcrumbs_hangcheck(struct timer_list *t) { - struct intel_engine_cs *engine = (struct intel_engine_cs *)data; + struct intel_engine_cs *engine = + from_timer(engine, t, breadcrumbs.hangcheck); struct intel_breadcrumbs *b = &engine->breadcrumbs; if (!b->irq_armed) @@ -102,15 +122,16 @@ static void intel_breadcrumbs_hangcheck(unsigned long data) */ if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) { missed_breadcrumb(engine); - mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); + mod_timer(&b->fake_irq, jiffies + 1); } else { mod_timer(&b->hangcheck, wait_timeout()); } } -static void intel_breadcrumbs_fake_irq(unsigned long data) +static void intel_breadcrumbs_fake_irq(struct timer_list *t) { - struct intel_engine_cs *engine = (struct intel_engine_cs *)data; + struct intel_engine_cs *engine = from_timer(engine, t, + breadcrumbs.fake_irq); struct intel_breadcrumbs *b = &engine->breadcrumbs; /* The timer persists in case we cannot enable interrupts, @@ -121,28 +142,25 @@ static void intel_breadcrumbs_fake_irq(unsigned long data) */ spin_lock_irq(&b->irq_lock); - if (!__intel_breadcrumbs_wakeup(b)) + if (b->irq_armed && !__intel_breadcrumbs_wakeup(b)) __intel_engine_disarm_breadcrumbs(engine); spin_unlock_irq(&b->irq_lock); if (!b->irq_armed) return; mod_timer(&b->fake_irq, jiffies + 1); - - /* Ensure that even if the GPU hangs, we get woken up. - * - * However, note that if no one is waiting, we never notice - * a gpu hang. Eventually, we will have to wait for a resource - * held by the GPU and so trigger a hangcheck. In the most - * pathological case, this will be upon memory starvation! To - * prevent this, we also queue the hangcheck from the retire - * worker. - */ - i915_queue_hangcheck(engine->i915); } static void irq_enable(struct intel_engine_cs *engine) { + /* + * FIXME: Ideally we want this on the API boundary, but for the + * sake of testing with mock breadcrumbs (no HW so unable to + * enable irqs) we place it deep within the bowels, at the point + * of no return. + */ + GEM_BUG_ON(!intel_irqs_enabled(engine->i915)); + /* Enabling the IRQ may miss the generation of the interrupt, but * we still need to force the barrier before reading the seqno, * just in case. @@ -169,43 +187,76 @@ void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) lockdep_assert_held(&b->irq_lock); GEM_BUG_ON(b->irq_wait); + GEM_BUG_ON(!b->irq_armed); - if (b->irq_enabled) { + GEM_BUG_ON(!b->irq_enabled); + if (!--b->irq_enabled) irq_disable(engine); - b->irq_enabled = false; - } b->irq_armed = false; } +void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + spin_lock_irq(&b->irq_lock); + if (!b->irq_enabled++) + irq_enable(engine); + GEM_BUG_ON(!b->irq_enabled); /* no overflow! */ + spin_unlock_irq(&b->irq_lock); +} + +void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + spin_lock_irq(&b->irq_lock); + GEM_BUG_ON(!b->irq_enabled); /* no underflow! */ + if (!--b->irq_enabled) + irq_disable(engine); + spin_unlock_irq(&b->irq_lock); +} + void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct intel_wait *wait, *n, *first; + struct intel_wait *wait, *n; if (!b->irq_armed) - return; + goto wakeup_signaler; - /* We only disarm the irq when we are idle (all requests completed), + /* + * We only disarm the irq when we are idle (all requests completed), * so if the bottom-half remains asleep, it missed the request * completion. */ + if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) + missed_breadcrumb(engine); spin_lock_irq(&b->rb_lock); spin_lock(&b->irq_lock); - first = fetch_and_zero(&b->irq_wait); - __intel_engine_disarm_breadcrumbs(engine); + b->irq_wait = NULL; + if (b->irq_armed) + __intel_engine_disarm_breadcrumbs(engine); spin_unlock(&b->irq_lock); rbtree_postorder_for_each_entry_safe(wait, n, &b->waiters, node) { RB_CLEAR_NODE(&wait->node); - if (wake_up_process(wait->tsk) && wait == first) - missed_breadcrumb(engine); + wake_up_process(wait->tsk); } b->waiters = RB_ROOT; spin_unlock_irq(&b->rb_lock); + + /* + * The signaling thread may be asleep holding a reference to a request, + * that had its signaling cancelled prior to being preempted. We need + * to kick the signaler, just in case, to release any such reference. + */ +wakeup_signaler: + wake_up_process(b->signaler); } static bool use_fake_irq(const struct intel_breadcrumbs *b) @@ -239,6 +290,7 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) struct intel_engine_cs *engine = container_of(b, struct intel_engine_cs, breadcrumbs); struct drm_i915_private *i915 = engine->i915; + bool enabled; lockdep_assert_held(&b->irq_lock); if (b->irq_armed) @@ -250,7 +302,6 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) * the irq. */ b->irq_armed = true; - GEM_BUG_ON(b->irq_enabled); if (I915_SELFTEST_ONLY(b->mock)) { /* For our mock objects we want to avoid interaction @@ -271,14 +322,15 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) */ /* No interrupts? Kick the waiter every jiffie! */ - if (intel_irqs_enabled(i915)) { - if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings)) - irq_enable(engine); - b->irq_enabled = true; + enabled = false; + if (!b->irq_enabled++ && + !test_bit(engine->id, &i915->gpu_error.test_irq_rings)) { + irq_enable(engine); + enabled = true; } enable_fake_irq(b); - return true; + return enabled; } static inline struct intel_wait *to_wait(struct rb_node *node) @@ -515,6 +567,7 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine, GEM_BUG_ON(RB_EMPTY_NODE(&wait->node)); rb_erase(&wait->node, &b->waiters); + RB_CLEAR_NODE(&wait->node); out: GEM_BUG_ON(b->irq_wait == wait); @@ -541,29 +594,16 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, spin_unlock_irq(&b->rb_lock); } -static bool signal_valid(const struct drm_i915_gem_request *request) -{ - return intel_wait_check_request(&request->signaling.wait, request); -} - static bool signal_complete(const struct drm_i915_gem_request *request) { if (!request) return false; - /* If another process served as the bottom-half it may have already - * signalled that this wait is already completed. - */ - if (intel_wait_complete(&request->signaling.wait)) - return signal_valid(request); - - /* Carefully check if the request is complete, giving time for the + /* + * Carefully check if the request is complete, giving time for the * seqno to be visible or if the GPU hung. */ - if (__i915_request_irq_complete(request)) - return true; - - return false; + return __i915_request_irq_complete(request); } static struct drm_i915_gem_request *to_signaler(struct rb_node *rb) @@ -606,9 +646,13 @@ static int intel_breadcrumbs_signaler(void *arg) request = i915_gem_request_get_rcu(request); rcu_read_unlock(); if (signal_complete(request)) { - local_bh_disable(); - dma_fence_signal(&request->fence); - local_bh_enable(); /* kick start the tasklets */ + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &request->fence.flags)) { + local_bh_disable(); + dma_fence_signal(&request->fence); + GEM_BUG_ON(!i915_gem_request_completed(request)); + local_bh_enable(); /* kick start the tasklets */ + } spin_lock_irq(&b->rb_lock); @@ -648,23 +692,15 @@ static int intel_breadcrumbs_signaler(void *arg) } if (unlikely(do_schedule)) { - DEFINE_WAIT(exec); - if (kthread_should_park()) kthread_parkme(); - if (kthread_should_stop()) { - GEM_BUG_ON(request); + if (unlikely(kthread_should_stop())) { + i915_gem_request_put(request); break; } - if (request) - add_wait_queue(&request->execute, &exec); - schedule(); - - if (request) - remove_wait_queue(&request->execute, &exec); } i915_gem_request_put(request); } while (1); @@ -787,12 +823,8 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) spin_lock_init(&b->rb_lock); spin_lock_init(&b->irq_lock); - setup_timer(&b->fake_irq, - intel_breadcrumbs_fake_irq, - (unsigned long)engine); - setup_timer(&b->hangcheck, - intel_breadcrumbs_hangcheck, - (unsigned long)engine); + timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0); + timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0); /* Spawn a thread to provide a common bottom-half for all signals. * As this is an asynchronous interface we cannot steal the current diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 1241e5891b29..1704c8897afd 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -417,36 +417,65 @@ static void hsw_get_cdclk(struct drm_i915_private *dev_priv, cdclk_state->cdclk = 540000; } -static int vlv_calc_cdclk(struct drm_i915_private *dev_priv, - int max_pixclk) +static int vlv_calc_cdclk(struct drm_i915_private *dev_priv, int min_cdclk) { int freq_320 = (dev_priv->hpll_freq << 1) % 320000 != 0 ? 333333 : 320000; - int limit = IS_CHERRYVIEW(dev_priv) ? 95 : 90; /* * We seem to get an unstable or solid color picture at 200MHz. * Not sure what's wrong. For now use 200MHz only when all pipes * are off. */ - if (!IS_CHERRYVIEW(dev_priv) && - max_pixclk > freq_320*limit/100) + if (IS_VALLEYVIEW(dev_priv) && min_cdclk > freq_320) return 400000; - else if (max_pixclk > 266667*limit/100) + else if (min_cdclk > 266667) return freq_320; - else if (max_pixclk > 0) + else if (min_cdclk > 0) return 266667; else return 200000; } +static u8 vlv_calc_voltage_level(struct drm_i915_private *dev_priv, int cdclk) +{ + if (IS_VALLEYVIEW(dev_priv)) { + if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */ + return 2; + else if (cdclk >= 266667) + return 1; + else + return 0; + } else { + /* + * Specs are full of misinformation, but testing on actual + * hardware has shown that we just need to write the desired + * CCK divider into the Punit register. + */ + return DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1; + } +} + static void vlv_get_cdclk(struct drm_i915_private *dev_priv, struct intel_cdclk_state *cdclk_state) { + u32 val; + cdclk_state->vco = vlv_get_hpll_vco(dev_priv); cdclk_state->cdclk = vlv_get_cck_clock(dev_priv, "cdclk", CCK_DISPLAY_CLOCK_CONTROL, cdclk_state->vco); + + mutex_lock(&dev_priv->pcu_lock); + val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); + mutex_unlock(&dev_priv->pcu_lock); + + if (IS_VALLEYVIEW(dev_priv)) + cdclk_state->voltage_level = (val & DSPFREQGUAR_MASK) >> + DSPFREQGUAR_SHIFT; + else + cdclk_state->voltage_level = (val & DSPFREQGUAR_MASK_CHV) >> + DSPFREQGUAR_SHIFT_CHV; } static void vlv_program_pfi_credits(struct drm_i915_private *dev_priv) @@ -489,7 +518,19 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, const struct intel_cdclk_state *cdclk_state) { int cdclk = cdclk_state->cdclk; - u32 val, cmd; + u32 val, cmd = cdclk_state->voltage_level; + + switch (cdclk) { + case 400000: + case 333333: + case 320000: + case 266667: + case 200000: + break; + default: + MISSING_CASE(cdclk); + return; + } /* There are cases where we can end up here with power domains * off and a CDCLK frequency other than the minimum, like when @@ -499,14 +540,7 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, */ intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); - if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */ - cmd = 2; - else if (cdclk == 266667) - cmd = 1; - else - cmd = 0; - - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); val &= ~DSPFREQGUAR_MASK; val |= (cmd << DSPFREQGUAR_SHIFT); @@ -516,7 +550,7 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, 50)) { DRM_ERROR("timed out waiting for CDclk change\n"); } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); mutex_lock(&dev_priv->sb_lock); @@ -565,7 +599,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, const struct intel_cdclk_state *cdclk_state) { int cdclk = cdclk_state->cdclk; - u32 val, cmd; + u32 val, cmd = cdclk_state->voltage_level; switch (cdclk) { case 333333: @@ -586,14 +620,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, */ intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); - /* - * Specs are full of misinformation, but testing on actual - * hardware has shown that we just need to write the desired - * CCK divider into the Punit register. - */ - cmd = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1; - - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); val &= ~DSPFREQGUAR_MASK_CHV; val |= (cmd << DSPFREQGUAR_SHIFT_CHV); @@ -603,7 +630,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, 50)) { DRM_ERROR("timed out waiting for CDclk change\n"); } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); @@ -612,18 +639,33 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A); } -static int bdw_calc_cdclk(int max_pixclk) +static int bdw_calc_cdclk(int min_cdclk) { - if (max_pixclk > 540000) + if (min_cdclk > 540000) return 675000; - else if (max_pixclk > 450000) + else if (min_cdclk > 450000) return 540000; - else if (max_pixclk > 337500) + else if (min_cdclk > 337500) return 450000; else return 337500; } +static u8 bdw_calc_voltage_level(int cdclk) +{ + switch (cdclk) { + default: + case 337500: + return 2; + case 450000: + return 0; + case 540000: + return 1; + case 675000: + return 3; + } +} + static void bdw_get_cdclk(struct drm_i915_private *dev_priv, struct intel_cdclk_state *cdclk_state) { @@ -642,13 +684,20 @@ static void bdw_get_cdclk(struct drm_i915_private *dev_priv, cdclk_state->cdclk = 337500; else cdclk_state->cdclk = 675000; + + /* + * Can't read this out :( Let's assume it's + * at least what the CDCLK frequency requires. + */ + cdclk_state->voltage_level = + bdw_calc_voltage_level(cdclk_state->cdclk); } static void bdw_set_cdclk(struct drm_i915_private *dev_priv, const struct intel_cdclk_state *cdclk_state) { int cdclk = cdclk_state->cdclk; - uint32_t val, data; + uint32_t val; int ret; if (WARN((I915_READ(LCPLL_CTL) & @@ -659,10 +708,10 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, "trying to change cdclk frequency with cdclk not enabled\n")) return; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write(dev_priv, BDW_PCODE_DISPLAY_FREQ_CHANGE_REQ, 0x0); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("failed to inform pcode about cdclk change\n"); return; @@ -672,33 +721,33 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, val |= LCPLL_CD_SOURCE_FCLK; I915_WRITE(LCPLL_CTL, val); + /* + * According to the spec, it should be enough to poll for this 1 us. + * However, extensive testing shows that this can take longer. + */ if (wait_for_us(I915_READ(LCPLL_CTL) & - LCPLL_CD_SOURCE_FCLK_DONE, 1)) + LCPLL_CD_SOURCE_FCLK_DONE, 100)) DRM_ERROR("Switching to FCLK failed\n"); val = I915_READ(LCPLL_CTL); val &= ~LCPLL_CLK_FREQ_MASK; switch (cdclk) { + default: + MISSING_CASE(cdclk); + /* fall through */ + case 337500: + val |= LCPLL_CLK_FREQ_337_5_BDW; + break; case 450000: val |= LCPLL_CLK_FREQ_450; - data = 0; break; case 540000: val |= LCPLL_CLK_FREQ_54O_BDW; - data = 1; - break; - case 337500: - val |= LCPLL_CLK_FREQ_337_5_BDW; - data = 2; break; case 675000: val |= LCPLL_CLK_FREQ_675_BDW; - data = 3; break; - default: - WARN(1, "invalid cdclk frequency\n"); - return; } I915_WRITE(LCPLL_CTL, val); @@ -711,42 +760,57 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) DRM_ERROR("Switching back to LCPLL failed\n"); - mutex_lock(&dev_priv->rps.hw_lock); - sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, data); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); + sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, + cdclk_state->voltage_level); + mutex_unlock(&dev_priv->pcu_lock); I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1); intel_update_cdclk(dev_priv); - - WARN(cdclk != dev_priv->cdclk.hw.cdclk, - "cdclk requested %d kHz but got %d kHz\n", - cdclk, dev_priv->cdclk.hw.cdclk); } -static int skl_calc_cdclk(int max_pixclk, int vco) +static int skl_calc_cdclk(int min_cdclk, int vco) { if (vco == 8640000) { - if (max_pixclk > 540000) + if (min_cdclk > 540000) return 617143; - else if (max_pixclk > 432000) + else if (min_cdclk > 432000) return 540000; - else if (max_pixclk > 308571) + else if (min_cdclk > 308571) return 432000; else return 308571; } else { - if (max_pixclk > 540000) + if (min_cdclk > 540000) return 675000; - else if (max_pixclk > 450000) + else if (min_cdclk > 450000) return 540000; - else if (max_pixclk > 337500) + else if (min_cdclk > 337500) return 450000; else return 337500; } } +static u8 skl_calc_voltage_level(int cdclk) +{ + switch (cdclk) { + default: + case 308571: + case 337500: + return 0; + case 450000: + case 432000: + return 1; + case 540000: + return 2; + case 617143: + case 675000: + return 3; + } +} + static void skl_dpll0_update(struct drm_i915_private *dev_priv, struct intel_cdclk_state *cdclk_state) { @@ -797,7 +861,7 @@ static void skl_get_cdclk(struct drm_i915_private *dev_priv, cdclk_state->cdclk = cdclk_state->ref; if (cdclk_state->vco == 0) - return; + goto out; cdctl = I915_READ(CDCLK_CTL); @@ -838,6 +902,14 @@ static void skl_get_cdclk(struct drm_i915_private *dev_priv, break; } } + + out: + /* + * Can't read this out :( Let's assume it's + * at least what the CDCLK frequency requires. + */ + cdclk_state->voltage_level = + skl_calc_voltage_level(cdclk_state->cdclk); } /* convert from kHz to .1 fixpoint MHz with -1MHz offset */ @@ -859,16 +931,10 @@ static void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv, static void skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco) { - int min_cdclk = skl_calc_cdclk(0, vco); u32 val; WARN_ON(vco != 8100000 && vco != 8640000); - /* select the minimum CDCLK before enabling DPLL 0 */ - val = CDCLK_FREQ_337_308 | skl_cdclk_decimal(min_cdclk); - I915_WRITE(CDCLK_CTL, val); - POSTING_READ(CDCLK_CTL); - /* * We always enable DPLL0 with the lowest link rate possible, but still * taking into account the VCO required to operate the eDP panel at the @@ -922,44 +988,41 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, { int cdclk = cdclk_state->cdclk; int vco = cdclk_state->vco; - u32 freq_select, pcu_ack; + u32 freq_select, cdclk_ctl; int ret; - WARN_ON((cdclk == 24000) != (vco == 0)); - - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, SKL_CDCLK_PREPARE_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, 3); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", ret); return; } - /* set CDCLK_CTL */ + /* Choose frequency for this cdclk */ switch (cdclk) { + default: + WARN_ON(cdclk != dev_priv->cdclk.hw.ref); + WARN_ON(vco != 0); + /* fall through */ + case 308571: + case 337500: + freq_select = CDCLK_FREQ_337_308; + break; case 450000: case 432000: freq_select = CDCLK_FREQ_450_432; - pcu_ack = 1; break; case 540000: freq_select = CDCLK_FREQ_540; - pcu_ack = 2; - break; - case 308571: - case 337500: - default: - freq_select = CDCLK_FREQ_337_308; - pcu_ack = 0; break; case 617143: case 675000: freq_select = CDCLK_FREQ_675_617; - pcu_ack = 3; break; } @@ -967,16 +1030,40 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, dev_priv->cdclk.hw.vco != vco) skl_dpll0_disable(dev_priv); + cdclk_ctl = I915_READ(CDCLK_CTL); + + if (dev_priv->cdclk.hw.vco != vco) { + /* Wa Display #1183: skl,kbl,cfl */ + cdclk_ctl &= ~(CDCLK_FREQ_SEL_MASK | CDCLK_FREQ_DECIMAL_MASK); + cdclk_ctl |= freq_select | skl_cdclk_decimal(cdclk); + I915_WRITE(CDCLK_CTL, cdclk_ctl); + } + + /* Wa Display #1183: skl,kbl,cfl */ + cdclk_ctl |= CDCLK_DIVMUX_CD_OVERRIDE; + I915_WRITE(CDCLK_CTL, cdclk_ctl); + POSTING_READ(CDCLK_CTL); + if (dev_priv->cdclk.hw.vco != vco) skl_dpll0_enable(dev_priv, vco); - I915_WRITE(CDCLK_CTL, freq_select | skl_cdclk_decimal(cdclk)); + /* Wa Display #1183: skl,kbl,cfl */ + cdclk_ctl &= ~(CDCLK_FREQ_SEL_MASK | CDCLK_FREQ_DECIMAL_MASK); + I915_WRITE(CDCLK_CTL, cdclk_ctl); + + cdclk_ctl |= freq_select | skl_cdclk_decimal(cdclk); + I915_WRITE(CDCLK_CTL, cdclk_ctl); + + /* Wa Display #1183: skl,kbl,cfl */ + cdclk_ctl &= ~CDCLK_DIVMUX_CD_OVERRIDE; + I915_WRITE(CDCLK_CTL, cdclk_ctl); POSTING_READ(CDCLK_CTL); /* inform PCU of the change */ - mutex_lock(&dev_priv->rps.hw_lock); - sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); + sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, + cdclk_state->voltage_level); + mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); } @@ -994,6 +1081,8 @@ static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv) goto sanitize; intel_update_cdclk(dev_priv); + intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); + /* Is PLL enabled and locked ? */ if (dev_priv->cdclk.hw.vco == 0 || dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.ref) @@ -1054,6 +1143,7 @@ void skl_init_cdclk(struct drm_i915_private *dev_priv) if (cdclk_state.vco == 0) cdclk_state.vco = 8100000; cdclk_state.cdclk = skl_calc_cdclk(0, cdclk_state.vco); + cdclk_state.voltage_level = skl_calc_voltage_level(cdclk_state.cdclk); skl_set_cdclk(dev_priv, &cdclk_state); } @@ -1071,40 +1161,40 @@ void skl_uninit_cdclk(struct drm_i915_private *dev_priv) cdclk_state.cdclk = cdclk_state.ref; cdclk_state.vco = 0; + cdclk_state.voltage_level = skl_calc_voltage_level(cdclk_state.cdclk); skl_set_cdclk(dev_priv, &cdclk_state); } -static int bxt_calc_cdclk(int max_pixclk) +static int bxt_calc_cdclk(int min_cdclk) { - if (max_pixclk > 576000) + if (min_cdclk > 576000) return 624000; - else if (max_pixclk > 384000) + else if (min_cdclk > 384000) return 576000; - else if (max_pixclk > 288000) + else if (min_cdclk > 288000) return 384000; - else if (max_pixclk > 144000) + else if (min_cdclk > 144000) return 288000; else return 144000; } -static int glk_calc_cdclk(int max_pixclk) +static int glk_calc_cdclk(int min_cdclk) { - /* - * FIXME: Avoid using a pixel clock that is more than 99% of the cdclk - * as a temporary workaround. Use a higher cdclk instead. (Note that - * intel_compute_max_dotclk() limits the max pixel clock to 99% of max - * cdclk.) - */ - if (max_pixclk > DIV_ROUND_UP(2 * 158400 * 99, 100)) + if (min_cdclk > 158400) return 316800; - else if (max_pixclk > DIV_ROUND_UP(2 * 79200 * 99, 100)) + else if (min_cdclk > 79200) return 158400; else return 79200; } +static u8 bxt_calc_voltage_level(int cdclk) +{ + return DIV_ROUND_UP(cdclk, 25000); +} + static int bxt_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) { int ratio; @@ -1115,6 +1205,7 @@ static int bxt_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) switch (cdclk) { default: MISSING_CASE(cdclk); + /* fall through */ case 144000: case 288000: case 384000: @@ -1139,6 +1230,7 @@ static int glk_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) switch (cdclk) { default: MISSING_CASE(cdclk); + /* fall through */ case 79200: case 158400: case 316800: @@ -1179,7 +1271,7 @@ static void bxt_get_cdclk(struct drm_i915_private *dev_priv, cdclk_state->cdclk = cdclk_state->ref; if (cdclk_state->vco == 0) - return; + goto out; divider = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK; @@ -1203,6 +1295,14 @@ static void bxt_get_cdclk(struct drm_i915_private *dev_priv, } cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, div); + + out: + /* + * Can't read this out :( Let's assume it's + * at least what the CDCLK frequency requires. + */ + cdclk_state->voltage_level = + bxt_calc_voltage_level(cdclk_state->cdclk); } static void bxt_de_pll_disable(struct drm_i915_private *dev_priv) @@ -1251,32 +1351,35 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, /* cdclk = vco / 2 / div{1,1.5,2,4} */ switch (DIV_ROUND_CLOSEST(vco, cdclk)) { - case 8: - divider = BXT_CDCLK_CD2X_DIV_SEL_4; - break; - case 4: - divider = BXT_CDCLK_CD2X_DIV_SEL_2; + default: + WARN_ON(cdclk != dev_priv->cdclk.hw.ref); + WARN_ON(vco != 0); + /* fall through */ + case 2: + divider = BXT_CDCLK_CD2X_DIV_SEL_1; break; case 3: WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); divider = BXT_CDCLK_CD2X_DIV_SEL_1_5; break; - case 2: - divider = BXT_CDCLK_CD2X_DIV_SEL_1; + case 4: + divider = BXT_CDCLK_CD2X_DIV_SEL_2; break; - default: - WARN_ON(cdclk != dev_priv->cdclk.hw.ref); - WARN_ON(vco != 0); - - divider = BXT_CDCLK_CD2X_DIV_SEL_1; + case 8: + divider = BXT_CDCLK_CD2X_DIV_SEL_4; break; } - /* Inform power controller of upcoming frequency change */ - mutex_lock(&dev_priv->rps.hw_lock); - ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, - 0x80000000); - mutex_unlock(&dev_priv->rps.hw_lock); + /* + * Inform power controller of upcoming frequency change. BSpec + * requires us to wait up to 150usec, but that leads to timeouts; + * the 2ms used here is based on experiment. + */ + mutex_lock(&dev_priv->pcu_lock); + ret = sandybridge_pcode_write_timeout(dev_priv, + HSW_PCODE_DE_WRITE_FREQ_REQ, + 0x80000000, 2000); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n", @@ -1305,10 +1408,17 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; I915_WRITE(CDCLK_CTL, val); - mutex_lock(&dev_priv->rps.hw_lock); - ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, - DIV_ROUND_UP(cdclk, 25000)); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); + /* + * The timeout isn't specified, the 2ms used here is based on + * experiment. + * FIXME: Waiting for the request completion could be delayed until + * the next PCODE request based on BSpec. + */ + ret = sandybridge_pcode_write_timeout(dev_priv, + HSW_PCODE_DE_WRITE_FREQ_REQ, + cdclk_state->voltage_level, 2000); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n", @@ -1324,6 +1434,7 @@ static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) u32 cdctl, expected; intel_update_cdclk(dev_priv); + intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); if (dev_priv->cdclk.hw.vco == 0 || dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.ref) @@ -1399,6 +1510,7 @@ void bxt_init_cdclk(struct drm_i915_private *dev_priv) cdclk_state.cdclk = bxt_calc_cdclk(0); cdclk_state.vco = bxt_de_pll_vco(dev_priv, cdclk_state.cdclk); } + cdclk_state.voltage_level = bxt_calc_voltage_level(cdclk_state.cdclk); bxt_set_cdclk(dev_priv, &cdclk_state); } @@ -1416,20 +1528,34 @@ void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) cdclk_state.cdclk = cdclk_state.ref; cdclk_state.vco = 0; + cdclk_state.voltage_level = bxt_calc_voltage_level(cdclk_state.cdclk); bxt_set_cdclk(dev_priv, &cdclk_state); } -static int cnl_calc_cdclk(int max_pixclk) +static int cnl_calc_cdclk(int min_cdclk) { - if (max_pixclk > 336000) + if (min_cdclk > 336000) return 528000; - else if (max_pixclk > 168000) + else if (min_cdclk > 168000) return 336000; else return 168000; } +static u8 cnl_calc_voltage_level(int cdclk) +{ + switch (cdclk) { + default: + case 168000: + return 0; + case 336000: + return 1; + case 528000: + return 2; + } +} + static void cnl_cdclk_pll_update(struct drm_i915_private *dev_priv, struct intel_cdclk_state *cdclk_state) { @@ -1463,7 +1589,7 @@ static void cnl_get_cdclk(struct drm_i915_private *dev_priv, cdclk_state->cdclk = cdclk_state->ref; if (cdclk_state->vco == 0) - return; + goto out; divider = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK; @@ -1480,6 +1606,14 @@ static void cnl_get_cdclk(struct drm_i915_private *dev_priv, } cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, div); + + out: + /* + * Can't read this out :( Let's assume it's + * at least what the CDCLK frequency requires. + */ + cdclk_state->voltage_level = + cnl_calc_voltage_level(cdclk_state->cdclk); } static void cnl_cdclk_pll_disable(struct drm_i915_private *dev_priv) @@ -1520,15 +1654,15 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, { int cdclk = cdclk_state->cdclk; int vco = cdclk_state->vco; - u32 val, divider, pcu_ack; + u32 val, divider; int ret; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, SKL_CDCLK_PREPARE_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, 3); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", ret); @@ -1537,30 +1671,15 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, /* cdclk = vco / 2 / div{1,2} */ switch (DIV_ROUND_CLOSEST(vco, cdclk)) { - case 4: - divider = BXT_CDCLK_CD2X_DIV_SEL_2; - break; - case 2: - divider = BXT_CDCLK_CD2X_DIV_SEL_1; - break; default: WARN_ON(cdclk != dev_priv->cdclk.hw.ref); WARN_ON(vco != 0); - + /* fall through */ + case 2: divider = BXT_CDCLK_CD2X_DIV_SEL_1; break; - } - - switch (cdclk) { - case 528000: - pcu_ack = 2; - break; - case 336000: - pcu_ack = 1; - break; - case 168000: - default: - pcu_ack = 0; + case 4: + divider = BXT_CDCLK_CD2X_DIV_SEL_2; break; } @@ -1580,11 +1699,18 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, I915_WRITE(CDCLK_CTL, val); /* inform PCU of the change */ - mutex_lock(&dev_priv->rps.hw_lock); - sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); + sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, + cdclk_state->voltage_level); + mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); + + /* + * Can't read out the voltage level :( + * Let's just assume everything is as expected. + */ + dev_priv->cdclk.hw.voltage_level = cdclk_state->voltage_level; } static int cnl_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk) @@ -1597,6 +1723,7 @@ static int cnl_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk) switch (cdclk) { default: MISSING_CASE(cdclk); + /* fall through */ case 168000: case 336000: ratio = dev_priv->cdclk.hw.ref == 19200 ? 35 : 28; @@ -1614,6 +1741,7 @@ static void cnl_sanitize_cdclk(struct drm_i915_private *dev_priv) u32 cdctl, expected; intel_update_cdclk(dev_priv); + intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); if (dev_priv->cdclk.hw.vco == 0 || dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.ref) @@ -1673,6 +1801,7 @@ void cnl_init_cdclk(struct drm_i915_private *dev_priv) cdclk_state.cdclk = cnl_calc_cdclk(0); cdclk_state.vco = cnl_cdclk_pll_vco(dev_priv, cdclk_state.cdclk); + cdclk_state.voltage_level = cnl_calc_voltage_level(cdclk_state.cdclk); cnl_set_cdclk(dev_priv, &cdclk_state); } @@ -1690,22 +1819,48 @@ void cnl_uninit_cdclk(struct drm_i915_private *dev_priv) cdclk_state.cdclk = cdclk_state.ref; cdclk_state.vco = 0; + cdclk_state.voltage_level = cnl_calc_voltage_level(cdclk_state.cdclk); cnl_set_cdclk(dev_priv, &cdclk_state); } /** - * intel_cdclk_state_compare - Determine if two CDCLK states differ + * intel_cdclk_needs_modeset - Determine if two CDCLK states require a modeset on all pipes * @a: first CDCLK state * @b: second CDCLK state * * Returns: - * True if the CDCLK states are identical, false if they differ. + * True if the CDCLK states require pipes to be off during reprogramming, false if not. */ -bool intel_cdclk_state_compare(const struct intel_cdclk_state *a, +bool intel_cdclk_needs_modeset(const struct intel_cdclk_state *a, const struct intel_cdclk_state *b) { - return memcmp(a, b, sizeof(*a)) == 0; + return a->cdclk != b->cdclk || + a->vco != b->vco || + a->ref != b->ref; +} + +/** + * intel_cdclk_changed - Determine if two CDCLK states are different + * @a: first CDCLK state + * @b: second CDCLK state + * + * Returns: + * True if the CDCLK states don't match, false if they do. + */ +bool intel_cdclk_changed(const struct intel_cdclk_state *a, + const struct intel_cdclk_state *b) +{ + return intel_cdclk_needs_modeset(a, b) || + a->voltage_level != b->voltage_level; +} + +void intel_dump_cdclk_state(const struct intel_cdclk_state *cdclk_state, + const char *context) +{ + DRM_DEBUG_DRIVER("%s %d kHz, VCO %d kHz, ref %d kHz, voltage level %d\n", + context, cdclk_state->cdclk, cdclk_state->vco, + cdclk_state->ref, cdclk_state->voltage_level); } /** @@ -1719,124 +1874,187 @@ bool intel_cdclk_state_compare(const struct intel_cdclk_state *a, void intel_set_cdclk(struct drm_i915_private *dev_priv, const struct intel_cdclk_state *cdclk_state) { - if (intel_cdclk_state_compare(&dev_priv->cdclk.hw, cdclk_state)) + if (!intel_cdclk_changed(&dev_priv->cdclk.hw, cdclk_state)) return; if (WARN_ON_ONCE(!dev_priv->display.set_cdclk)) return; - DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz, VCO %d kHz, ref %d kHz\n", - cdclk_state->cdclk, cdclk_state->vco, - cdclk_state->ref); + intel_dump_cdclk_state(cdclk_state, "Changing CDCLK to"); dev_priv->display.set_cdclk(dev_priv, cdclk_state); + + if (WARN(intel_cdclk_changed(&dev_priv->cdclk.hw, cdclk_state), + "cdclk state doesn't match!\n")) { + intel_dump_cdclk_state(&dev_priv->cdclk.hw, "[hw state]"); + intel_dump_cdclk_state(cdclk_state, "[sw state]"); + } } -static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state, - int pixel_rate) +static int intel_pixel_rate_to_cdclk(struct drm_i915_private *dev_priv, + int pixel_rate) +{ + if (INTEL_GEN(dev_priv) >= 10) + return DIV_ROUND_UP(pixel_rate, 2); + else if (IS_GEMINILAKE(dev_priv)) + /* + * FIXME: Avoid using a pixel clock that is more than 99% of the cdclk + * as a temporary workaround. Use a higher cdclk instead. (Note that + * intel_compute_max_dotclk() limits the max pixel clock to 99% of max + * cdclk.) + */ + return DIV_ROUND_UP(pixel_rate * 100, 2 * 99); + else if (IS_GEN9(dev_priv) || + IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) + return pixel_rate; + else if (IS_CHERRYVIEW(dev_priv)) + return DIV_ROUND_UP(pixel_rate * 100, 95); + else + return DIV_ROUND_UP(pixel_rate * 100, 90); +} + +int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + int min_cdclk; + + if (!crtc_state->base.enable) + return 0; + + min_cdclk = intel_pixel_rate_to_cdclk(dev_priv, crtc_state->pixel_rate); /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ - if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled) - pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95); + if (IS_BROADWELL(dev_priv) && hsw_crtc_state_ips_capable(crtc_state)) + min_cdclk = DIV_ROUND_UP(min_cdclk * 100, 95); /* BSpec says "Do not use DisplayPort with CDCLK less than 432 MHz, * audio enabled, port width x4, and link rate HBR2 (5.4 GHz), or else * there may be audio corruption or screen corruption." This cdclk - * restriction for GLK is 316.8 MHz and since GLK can output two - * pixels per clock, the pixel rate becomes 2 * 316.8 MHz. + * restriction for GLK is 316.8 MHz. */ if (intel_crtc_has_dp_encoder(crtc_state) && crtc_state->has_audio && crtc_state->port_clock >= 540000 && crtc_state->lane_count == 4) { - if (IS_CANNONLAKE(dev_priv)) - pixel_rate = max(316800, pixel_rate); - else if (IS_GEMINILAKE(dev_priv)) - pixel_rate = max(2 * 316800, pixel_rate); - else - pixel_rate = max(432000, pixel_rate); + if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) { + /* Display WA #1145: glk,cnl */ + min_cdclk = max(316800, min_cdclk); + } else if (IS_GEN9(dev_priv) || IS_BROADWELL(dev_priv)) { + /* Display WA #1144: skl,bxt */ + min_cdclk = max(432000, min_cdclk); + } } /* According to BSpec, "The CD clock frequency must be at least twice * the frequency of the Azalia BCLK." and BCLK is 96 MHz by default. - * The check for GLK has to be adjusted as the platform can output - * two pixels per clock. */ - if (crtc_state->has_audio && INTEL_GEN(dev_priv) >= 9) { - if (IS_GEMINILAKE(dev_priv)) - pixel_rate = max(2 * 2 * 96000, pixel_rate); - else - pixel_rate = max(2 * 96000, pixel_rate); + if (crtc_state->has_audio && INTEL_GEN(dev_priv) >= 9) + min_cdclk = max(2 * 96000, min_cdclk); + + /* + * On Valleyview some DSI panels lose (v|h)sync when the clock is lower + * than 320000KHz. + */ + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI) && + IS_VALLEYVIEW(dev_priv)) + min_cdclk = max(320000, min_cdclk); + + if (min_cdclk > dev_priv->max_cdclk_freq) { + DRM_DEBUG_KMS("required cdclk (%d kHz) exceeds max (%d kHz)\n", + min_cdclk, dev_priv->max_cdclk_freq); + return -EINVAL; } - return pixel_rate; + return min_cdclk; } -/* compute the max rate for new configuration */ -static int intel_max_pixel_rate(struct drm_atomic_state *state) +static int intel_compute_min_cdclk(struct drm_atomic_state *state) { struct intel_atomic_state *intel_state = to_intel_atomic_state(state); struct drm_i915_private *dev_priv = to_i915(state->dev); - struct drm_crtc *crtc; - struct drm_crtc_state *cstate; + struct intel_crtc *crtc; struct intel_crtc_state *crtc_state; - unsigned int max_pixel_rate = 0, i; + int min_cdclk, i; enum pipe pipe; - memcpy(intel_state->min_pixclk, dev_priv->min_pixclk, - sizeof(intel_state->min_pixclk)); + memcpy(intel_state->min_cdclk, dev_priv->min_cdclk, + sizeof(intel_state->min_cdclk)); - for_each_new_crtc_in_state(state, crtc, cstate, i) { - int pixel_rate; + for_each_new_intel_crtc_in_state(intel_state, crtc, crtc_state, i) { + min_cdclk = intel_crtc_compute_min_cdclk(crtc_state); + if (min_cdclk < 0) + return min_cdclk; - crtc_state = to_intel_crtc_state(cstate); - if (!crtc_state->base.enable) { - intel_state->min_pixclk[i] = 0; - continue; - } + intel_state->min_cdclk[i] = min_cdclk; + } + + min_cdclk = 0; + for_each_pipe(dev_priv, pipe) + min_cdclk = max(intel_state->min_cdclk[pipe], min_cdclk); - pixel_rate = crtc_state->pixel_rate; + return min_cdclk; +} + +/* + * Note that this functions assumes that 0 is + * the lowest voltage value, and higher values + * correspond to increasingly higher voltages. + * + * Should that relationship no longer hold on + * future platforms this code will need to be + * adjusted. + */ +static u8 cnl_compute_min_voltage_level(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_crtc *crtc; + struct intel_crtc_state *crtc_state; + u8 min_voltage_level; + int i; + enum pipe pipe; - if (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9) - pixel_rate = - bdw_adjust_min_pipe_pixel_rate(crtc_state, - pixel_rate); + memcpy(state->min_voltage_level, dev_priv->min_voltage_level, + sizeof(state->min_voltage_level)); - intel_state->min_pixclk[i] = pixel_rate; + for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { + if (crtc_state->base.enable) + state->min_voltage_level[i] = + crtc_state->min_voltage_level; + else + state->min_voltage_level[i] = 0; } + min_voltage_level = 0; for_each_pipe(dev_priv, pipe) - max_pixel_rate = max(intel_state->min_pixclk[pipe], - max_pixel_rate); + min_voltage_level = max(state->min_voltage_level[pipe], + min_voltage_level); - return max_pixel_rate; + return min_voltage_level; } static int vlv_modeset_calc_cdclk(struct drm_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->dev); - int max_pixclk = intel_max_pixel_rate(state); - struct intel_atomic_state *intel_state = - to_intel_atomic_state(state); - int cdclk; + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + int min_cdclk, cdclk; - cdclk = vlv_calc_cdclk(dev_priv, max_pixclk); + min_cdclk = intel_compute_min_cdclk(state); + if (min_cdclk < 0) + return min_cdclk; - if (cdclk > dev_priv->max_cdclk_freq) { - DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", - cdclk, dev_priv->max_cdclk_freq); - return -EINVAL; - } + cdclk = vlv_calc_cdclk(dev_priv, min_cdclk); intel_state->cdclk.logical.cdclk = cdclk; + intel_state->cdclk.logical.voltage_level = + vlv_calc_voltage_level(dev_priv, cdclk); if (!intel_state->active_crtcs) { cdclk = vlv_calc_cdclk(dev_priv, 0); intel_state->cdclk.actual.cdclk = cdclk; + intel_state->cdclk.actual.voltage_level = + vlv_calc_voltage_level(dev_priv, cdclk); } else { intel_state->cdclk.actual = intel_state->cdclk.logical; @@ -1847,29 +2065,29 @@ static int vlv_modeset_calc_cdclk(struct drm_atomic_state *state) static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) { - struct drm_i915_private *dev_priv = to_i915(state->dev); struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - int max_pixclk = intel_max_pixel_rate(state); - int cdclk; + int min_cdclk, cdclk; + + min_cdclk = intel_compute_min_cdclk(state); + if (min_cdclk < 0) + return min_cdclk; /* * FIXME should also account for plane ratio * once 64bpp pixel formats are supported. */ - cdclk = bdw_calc_cdclk(max_pixclk); - - if (cdclk > dev_priv->max_cdclk_freq) { - DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", - cdclk, dev_priv->max_cdclk_freq); - return -EINVAL; - } + cdclk = bdw_calc_cdclk(min_cdclk); intel_state->cdclk.logical.cdclk = cdclk; + intel_state->cdclk.logical.voltage_level = + bdw_calc_voltage_level(cdclk); if (!intel_state->active_crtcs) { cdclk = bdw_calc_cdclk(0); intel_state->cdclk.actual.cdclk = cdclk; + intel_state->cdclk.actual.voltage_level = + bdw_calc_voltage_level(cdclk); } else { intel_state->cdclk.actual = intel_state->cdclk.logical; @@ -1880,10 +2098,13 @@ static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) { - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); struct drm_i915_private *dev_priv = to_i915(state->dev); - const int max_pixclk = intel_max_pixel_rate(state); - int cdclk, vco; + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + int min_cdclk, cdclk, vco; + + min_cdclk = intel_compute_min_cdclk(state); + if (min_cdclk < 0) + return min_cdclk; vco = intel_state->cdclk.logical.vco; if (!vco) @@ -1893,22 +2114,20 @@ static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) * FIXME should also account for plane ratio * once 64bpp pixel formats are supported. */ - cdclk = skl_calc_cdclk(max_pixclk, vco); - - if (cdclk > dev_priv->max_cdclk_freq) { - DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", - cdclk, dev_priv->max_cdclk_freq); - return -EINVAL; - } + cdclk = skl_calc_cdclk(min_cdclk, vco); intel_state->cdclk.logical.vco = vco; intel_state->cdclk.logical.cdclk = cdclk; + intel_state->cdclk.logical.voltage_level = + skl_calc_voltage_level(cdclk); if (!intel_state->active_crtcs) { cdclk = skl_calc_cdclk(0, vco); intel_state->cdclk.actual.vco = vco; intel_state->cdclk.actual.cdclk = cdclk; + intel_state->cdclk.actual.voltage_level = + skl_calc_voltage_level(cdclk); } else { intel_state->cdclk.actual = intel_state->cdclk.logical; @@ -1920,27 +2139,25 @@ static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->dev); - int max_pixclk = intel_max_pixel_rate(state); - struct intel_atomic_state *intel_state = - to_intel_atomic_state(state); - int cdclk, vco; + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + int min_cdclk, cdclk, vco; + + min_cdclk = intel_compute_min_cdclk(state); + if (min_cdclk < 0) + return min_cdclk; if (IS_GEMINILAKE(dev_priv)) { - cdclk = glk_calc_cdclk(max_pixclk); + cdclk = glk_calc_cdclk(min_cdclk); vco = glk_de_pll_vco(dev_priv, cdclk); } else { - cdclk = bxt_calc_cdclk(max_pixclk); + cdclk = bxt_calc_cdclk(min_cdclk); vco = bxt_de_pll_vco(dev_priv, cdclk); } - if (cdclk > dev_priv->max_cdclk_freq) { - DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", - cdclk, dev_priv->max_cdclk_freq); - return -EINVAL; - } - intel_state->cdclk.logical.vco = vco; intel_state->cdclk.logical.cdclk = cdclk; + intel_state->cdclk.logical.voltage_level = + bxt_calc_voltage_level(cdclk); if (!intel_state->active_crtcs) { if (IS_GEMINILAKE(dev_priv)) { @@ -1953,6 +2170,8 @@ static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) intel_state->cdclk.actual.vco = vco; intel_state->cdclk.actual.cdclk = cdclk; + intel_state->cdclk.actual.voltage_level = + bxt_calc_voltage_level(cdclk); } else { intel_state->cdclk.actual = intel_state->cdclk.logical; @@ -1964,22 +2183,21 @@ static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) static int cnl_modeset_calc_cdclk(struct drm_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->dev); - struct intel_atomic_state *intel_state = - to_intel_atomic_state(state); - int max_pixclk = intel_max_pixel_rate(state); - int cdclk, vco; + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + int min_cdclk, cdclk, vco; - cdclk = cnl_calc_cdclk(max_pixclk); - vco = cnl_cdclk_pll_vco(dev_priv, cdclk); + min_cdclk = intel_compute_min_cdclk(state); + if (min_cdclk < 0) + return min_cdclk; - if (cdclk > dev_priv->max_cdclk_freq) { - DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", - cdclk, dev_priv->max_cdclk_freq); - return -EINVAL; - } + cdclk = cnl_calc_cdclk(min_cdclk); + vco = cnl_cdclk_pll_vco(dev_priv, cdclk); intel_state->cdclk.logical.vco = vco; intel_state->cdclk.logical.cdclk = cdclk; + intel_state->cdclk.logical.voltage_level = + max(cnl_calc_voltage_level(cdclk), + cnl_compute_min_voltage_level(intel_state)); if (!intel_state->active_crtcs) { cdclk = cnl_calc_cdclk(0); @@ -1987,6 +2205,8 @@ static int cnl_modeset_calc_cdclk(struct drm_atomic_state *state) intel_state->cdclk.actual.vco = vco; intel_state->cdclk.actual.cdclk = cdclk; + intel_state->cdclk.actual.voltage_level = + cnl_calc_voltage_level(cdclk); } else { intel_state->cdclk.actual = intel_state->cdclk.logical; @@ -1999,14 +2219,16 @@ static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) { int max_cdclk_freq = dev_priv->max_cdclk_freq; - if (IS_GEMINILAKE(dev_priv)) + if (INTEL_GEN(dev_priv) >= 10) + return 2 * max_cdclk_freq; + else if (IS_GEMINILAKE(dev_priv)) /* * FIXME: Limiting to 99% as a temporary workaround. See - * glk_calc_cdclk() for details. + * intel_min_cdclk() for details. */ return 2 * max_cdclk_freq * 99 / 100; - else if (INTEL_INFO(dev_priv)->gen >= 9 || - IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + else if (IS_GEN9(dev_priv) || + IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) return max_cdclk_freq; else if (IS_CHERRYVIEW(dev_priv)) return max_cdclk_freq*95/100; @@ -2097,10 +2319,6 @@ void intel_update_cdclk(struct drm_i915_private *dev_priv) { dev_priv->display.get_cdclk(dev_priv, &dev_priv->cdclk.hw); - DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz, VCO: %d kHz, ref: %d kHz\n", - dev_priv->cdclk.hw.cdclk, dev_priv->cdclk.hw.vco, - dev_priv->cdclk.hw.ref); - /* * 9:0 CMBUS [sic] CDCLK frequency (cdfreq): * Programmng [sic] note: bit[9:2] should be programmed to the number diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index b8315bca852b..aa66e952a95d 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -370,7 +370,7 @@ static void haswell_load_luts(struct drm_crtc_state *crtc_state) */ if (IS_HASWELL(dev_priv) && intel_crtc_state->ips_enabled && (intel_crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT)) { - hsw_disable_ips(intel_crtc); + hsw_disable_ips(intel_crtc_state); reenable_ips = true; } @@ -380,7 +380,7 @@ static void haswell_load_luts(struct drm_crtc_state *crtc_state) i9xx_load_luts(crtc_state); if (reenable_ips) - hsw_enable_ips(intel_crtc); + hsw_enable_ips(intel_crtc_state); } static void bdw_load_degamma_lut(struct drm_crtc_state *state) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 70e0ff41070c..9f31aea51dff 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -119,6 +119,8 @@ static unsigned int intel_crt_get_flags(struct intel_encoder *encoder) static void intel_crt_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { + pipe_config->output_types |= BIT(INTEL_OUTPUT_ANALOG); + pipe_config->base.adjusted_mode.flags |= intel_crt_get_flags(encoder); pipe_config->base.adjusted_mode.crtc_clock = pipe_config->port_clock; @@ -143,7 +145,7 @@ static void hsw_crt_get_config(struct intel_encoder *encoder, /* Note: The caller is required to filter out dpms modes not supported by the * platform. */ static void intel_crt_set_dpms(struct intel_encoder *encoder, - struct intel_crtc_state *crtc_state, + const struct intel_crtc_state *crtc_state, int mode) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -194,28 +196,39 @@ static void intel_crt_set_dpms(struct intel_encoder *encoder, } static void intel_disable_crt(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { intel_crt_set_dpms(encoder, old_crtc_state, DRM_MODE_DPMS_OFF); } static void pch_disable_crt(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { } static void pch_post_disable_crt(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { intel_disable_crt(encoder, old_crtc_state, old_conn_state); } +static void hsw_disable_crt(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + WARN_ON(!old_crtc_state->has_pch_encoder); + + intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false); +} + static void hsw_post_disable_crt(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -225,13 +238,61 @@ static void hsw_post_disable_crt(struct intel_encoder *encoder, lpt_disable_iclkip(dev_priv); intel_ddi_fdi_post_disable(encoder, old_crtc_state, old_conn_state); + + WARN_ON(!old_crtc_state->has_pch_encoder); + + intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true); +} + +static void hsw_pre_pll_enable_crt(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + WARN_ON(!crtc_state->has_pch_encoder); + + intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false); +} + +static void hsw_pre_enable_crt(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + enum pipe pipe = crtc->pipe; + + WARN_ON(!crtc_state->has_pch_encoder); + + intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); + + dev_priv->display.fdi_link_train(crtc, crtc_state); +} + +static void hsw_enable_crt(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + enum pipe pipe = crtc->pipe; + + WARN_ON(!crtc_state->has_pch_encoder); + + intel_crt_set_dpms(encoder, crtc_state, DRM_MODE_DPMS_ON); + + intel_wait_for_vblank(dev_priv, pipe); + intel_wait_for_vblank(dev_priv, pipe); + intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); + intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true); } static void intel_enable_crt(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { - intel_crt_set_dpms(encoder, pipe_config, DRM_MODE_DPMS_ON); + intel_crt_set_dpms(encoder, crtc_state, DRM_MODE_DPMS_ON); } static enum drm_mode_status @@ -279,10 +340,25 @@ static bool intel_crt_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { + return true; +} + +static bool pch_crt_compute_config(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) +{ + pipe_config->has_pch_encoder = true; + + return true; +} + +static bool hsw_crt_compute_config(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) +{ struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - if (HAS_PCH_SPLIT(dev_priv)) - pipe_config->has_pch_encoder = true; + pipe_config->has_pch_encoder = true; /* LPT FDI RX only supports 8bpc. */ if (HAS_PCH_LPT(dev_priv)) { @@ -295,8 +371,7 @@ static bool intel_crt_compute_config(struct intel_encoder *encoder, } /* FDI must always be 2.7 GHz */ - if (HAS_DDI(dev_priv)) - pipe_config->port_clock = 135000 * 2; + pipe_config->port_clock = 135000 * 2; return true; } @@ -712,7 +787,7 @@ intel_crt_detect(struct drm_connector *connector, * broken monitor (without edid) to work behind a broken kvm (that fails * to have the right resistors for HP detection) needs to fix this up. * For now just bail out. */ - if (I915_HAS_HOTPLUG(dev_priv) && !i915.load_detect_test) { + if (I915_HAS_HOTPLUG(dev_priv) && !i915_modparams.load_detect_test) { status = connector_status_disconnected; goto out; } @@ -730,7 +805,7 @@ intel_crt_detect(struct drm_connector *connector, else if (INTEL_GEN(dev_priv) < 4) status = intel_crt_load_detect(crt, to_intel_crtc(connector->state->crtc)->pipe); - else if (i915.load_detect_test) + else if (i915_modparams.load_detect_test) status = connector_status_disconnected; else status = connector_status_unknown; @@ -890,26 +965,33 @@ void intel_crt_init(struct drm_i915_private *dev_priv) crt->base.power_domain = POWER_DOMAIN_PORT_CRT; - crt->base.compute_config = intel_crt_compute_config; - if (HAS_PCH_SPLIT(dev_priv)) { - crt->base.disable = pch_disable_crt; - crt->base.post_disable = pch_post_disable_crt; - } else { - crt->base.disable = intel_disable_crt; - } - crt->base.enable = intel_enable_crt; if (I915_HAS_HOTPLUG(dev_priv) && !dmi_check_system(intel_spurious_crt_detect)) crt->base.hpd_pin = HPD_CRT; + if (HAS_DDI(dev_priv)) { crt->base.port = PORT_E; crt->base.get_config = hsw_crt_get_config; crt->base.get_hw_state = intel_ddi_get_hw_state; + crt->base.compute_config = hsw_crt_compute_config; + crt->base.pre_pll_enable = hsw_pre_pll_enable_crt; + crt->base.pre_enable = hsw_pre_enable_crt; + crt->base.enable = hsw_enable_crt; + crt->base.disable = hsw_disable_crt; crt->base.post_disable = hsw_post_disable_crt; } else { + if (HAS_PCH_SPLIT(dev_priv)) { + crt->base.compute_config = pch_crt_compute_config; + crt->base.disable = pch_disable_crt; + crt->base.post_disable = pch_post_disable_crt; + } else { + crt->base.compute_config = intel_crt_compute_config; + crt->base.disable = intel_disable_crt; + } crt->base.port = PORT_NONE; crt->base.get_config = intel_crt_get_config; crt->base.get_hw_state = intel_crt_get_hw_state; + crt->base.enable = intel_enable_crt; } intel_connector->get_hw_state = intel_connector_get_hw_state; diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 92c1f8e166dc..7fe4aac0facc 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -37,25 +37,21 @@ #define I915_CSR_GLK "i915/glk_dmc_ver1_04.bin" #define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 4) -#define I915_CSR_CNL "i915/cnl_dmc_ver1_04.bin" -#define CNL_CSR_VERSION_REQUIRED CSR_VERSION(1, 4) +#define I915_CSR_CNL "i915/cnl_dmc_ver1_06.bin" +#define CNL_CSR_VERSION_REQUIRED CSR_VERSION(1, 6) -#define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin" +#define I915_CSR_KBL "i915/kbl_dmc_ver1_04.bin" MODULE_FIRMWARE(I915_CSR_KBL); -#define KBL_CSR_VERSION_REQUIRED CSR_VERSION(1, 1) +#define KBL_CSR_VERSION_REQUIRED CSR_VERSION(1, 4) -#define I915_CSR_SKL "i915/skl_dmc_ver1_26.bin" +#define I915_CSR_SKL "i915/skl_dmc_ver1_27.bin" MODULE_FIRMWARE(I915_CSR_SKL); -#define SKL_CSR_VERSION_REQUIRED CSR_VERSION(1, 26) +#define SKL_CSR_VERSION_REQUIRED CSR_VERSION(1, 27) #define I915_CSR_BXT "i915/bxt_dmc_ver1_07.bin" MODULE_FIRMWARE(I915_CSR_BXT); #define BXT_CSR_VERSION_REQUIRED CSR_VERSION(1, 7) -#define FIRMWARE_URL "https://01.org/linuxgraphics/downloads/firmware" - - - #define CSR_MAX_FW_SIZE 0x2FFF #define CSR_DEFAULT_FW_OFFSET 0xFFFFFFFF @@ -202,6 +198,7 @@ intel_get_stepping_info(struct drm_i915_private *dev_priv) si = bxt_stepping_info; } else { size = 0; + si = NULL; } if (INTEL_REVID(dev_priv) < size) @@ -252,8 +249,14 @@ void intel_csr_load_program(struct drm_i915_private *dev_priv) } fw_size = dev_priv->csr.dmc_fw_size; + assert_rpm_wakelock_held(dev_priv); + + preempt_disable(); + for (i = 0; i < fw_size; i++) - I915_WRITE(CSR_PROGRAM(i), payload[i]); + I915_WRITE_FW(CSR_PROGRAM(i), payload[i]); + + preempt_enable(); for (i = 0; i < dev_priv->csr.mmio_count; i++) { I915_WRITE(dev_priv->csr.mmioaddr[i], @@ -285,7 +288,8 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, css_header = (struct intel_css_header *)fw->data; if (sizeof(struct intel_css_header) != (css_header->header_len * 4)) { - DRM_ERROR("Firmware has wrong CSS header length %u bytes\n", + DRM_ERROR("DMC firmware has wrong CSS header length " + "(%u bytes)\n", (css_header->header_len * 4)); return NULL; } @@ -309,7 +313,7 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, if (csr->version != required_version) { DRM_INFO("Refusing to load DMC firmware v%u.%u," - " please use v%u.%u [" FIRMWARE_URL "].\n", + " please use v%u.%u\n", CSR_VERSION_MAJOR(csr->version), CSR_VERSION_MINOR(csr->version), CSR_VERSION_MAJOR(required_version), @@ -324,7 +328,8 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, &fw->data[readcount]; if (sizeof(struct intel_package_header) != (package_header->header_len * 4)) { - DRM_ERROR("Firmware has wrong package header length %u bytes\n", + DRM_ERROR("DMC firmware has wrong package header length " + "(%u bytes)\n", (package_header->header_len * 4)); return NULL; } @@ -345,7 +350,7 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, dmc_offset = package_header->fw_info[i].offset; } if (dmc_offset == CSR_DEFAULT_FW_OFFSET) { - DRM_ERROR("Firmware not supported for %c stepping\n", + DRM_ERROR("DMC firmware not supported for %c stepping\n", si->stepping); return NULL; } @@ -354,7 +359,8 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, /* Extract dmc_header information. */ dmc_header = (struct intel_dmc_header *)&fw->data[readcount]; if (sizeof(struct intel_dmc_header) != (dmc_header->header_len)) { - DRM_ERROR("Firmware has wrong dmc header length %u bytes\n", + DRM_ERROR("DMC firmware has wrong dmc header length " + "(%u bytes)\n", (dmc_header->header_len)); return NULL; } @@ -362,7 +368,7 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, /* Cache the dmc header info. */ if (dmc_header->mmio_count > ARRAY_SIZE(csr->mmioaddr)) { - DRM_ERROR("Firmware has wrong mmio count %u\n", + DRM_ERROR("DMC firmware has wrong mmio count %u\n", dmc_header->mmio_count); return NULL; } @@ -370,7 +376,7 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, for (i = 0; i < dmc_header->mmio_count; i++) { if (dmc_header->mmioaddr[i] < CSR_MMIO_START_RANGE || dmc_header->mmioaddr[i] > CSR_MMIO_END_RANGE) { - DRM_ERROR(" Firmware has wrong mmio address 0x%x\n", + DRM_ERROR("DMC firmware has wrong mmio address 0x%x\n", dmc_header->mmioaddr[i]); return NULL; } @@ -381,7 +387,7 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, /* fw_size is in dwords, so multiplied by 4 to convert into bytes. */ nbytes = dmc_header->fw_size * 4; if (nbytes > CSR_MAX_FW_SIZE) { - DRM_ERROR("CSR firmware too big (%u) bytes\n", nbytes); + DRM_ERROR("DMC firmware too big (%u bytes)\n", nbytes); return NULL; } csr->dmc_fw_size = dmc_header->fw_size; @@ -419,9 +425,11 @@ static void csr_load_work_fn(struct work_struct *work) CSR_VERSION_MINOR(csr->version)); } else { dev_notice(dev_priv->drm.dev, - "Failed to load DMC firmware" - " [" FIRMWARE_URL "]," - " disabling runtime power management.\n"); + "Failed to load DMC firmware %s." + " Disabling runtime power management.\n", + csr->fw_path); + dev_notice(dev_priv->drm.dev, "DMC firmware homepage: %s", + INTEL_UC_FIRMWARE_URL); } release_firmware(fw); diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 5e5fe03b638c..f51645a08dca 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -301,39 +301,38 @@ static const struct ddi_buf_trans skl_y_ddi_translations_hdmi[] = { }; struct bxt_ddi_buf_trans { - u32 margin; /* swing value */ - u32 scale; /* scale value */ - u32 enable; /* scale enable */ - u32 deemphasis; - bool default_index; /* true if the entry represents default value */ + u8 margin; /* swing value */ + u8 scale; /* scale value */ + u8 enable; /* scale enable */ + u8 deemphasis; }; static const struct bxt_ddi_buf_trans bxt_ddi_translations_dp[] = { /* Idx NT mV diff db */ - { 52, 0x9A, 0, 128, true }, /* 0: 400 0 */ - { 78, 0x9A, 0, 85, false }, /* 1: 400 3.5 */ - { 104, 0x9A, 0, 64, false }, /* 2: 400 6 */ - { 154, 0x9A, 0, 43, false }, /* 3: 400 9.5 */ - { 77, 0x9A, 0, 128, false }, /* 4: 600 0 */ - { 116, 0x9A, 0, 85, false }, /* 5: 600 3.5 */ - { 154, 0x9A, 0, 64, false }, /* 6: 600 6 */ - { 102, 0x9A, 0, 128, false }, /* 7: 800 0 */ - { 154, 0x9A, 0, 85, false }, /* 8: 800 3.5 */ - { 154, 0x9A, 1, 128, false }, /* 9: 1200 0 */ + { 52, 0x9A, 0, 128, }, /* 0: 400 0 */ + { 78, 0x9A, 0, 85, }, /* 1: 400 3.5 */ + { 104, 0x9A, 0, 64, }, /* 2: 400 6 */ + { 154, 0x9A, 0, 43, }, /* 3: 400 9.5 */ + { 77, 0x9A, 0, 128, }, /* 4: 600 0 */ + { 116, 0x9A, 0, 85, }, /* 5: 600 3.5 */ + { 154, 0x9A, 0, 64, }, /* 6: 600 6 */ + { 102, 0x9A, 0, 128, }, /* 7: 800 0 */ + { 154, 0x9A, 0, 85, }, /* 8: 800 3.5 */ + { 154, 0x9A, 1, 128, }, /* 9: 1200 0 */ }; static const struct bxt_ddi_buf_trans bxt_ddi_translations_edp[] = { /* Idx NT mV diff db */ - { 26, 0, 0, 128, false }, /* 0: 200 0 */ - { 38, 0, 0, 112, false }, /* 1: 200 1.5 */ - { 48, 0, 0, 96, false }, /* 2: 200 4 */ - { 54, 0, 0, 69, false }, /* 3: 200 6 */ - { 32, 0, 0, 128, false }, /* 4: 250 0 */ - { 48, 0, 0, 104, false }, /* 5: 250 1.5 */ - { 54, 0, 0, 85, false }, /* 6: 250 4 */ - { 43, 0, 0, 128, false }, /* 7: 300 0 */ - { 54, 0, 0, 101, false }, /* 8: 300 1.5 */ - { 48, 0, 0, 128, false }, /* 9: 300 0 */ + { 26, 0, 0, 128, }, /* 0: 200 0 */ + { 38, 0, 0, 112, }, /* 1: 200 1.5 */ + { 48, 0, 0, 96, }, /* 2: 200 4 */ + { 54, 0, 0, 69, }, /* 3: 200 6 */ + { 32, 0, 0, 128, }, /* 4: 250 0 */ + { 48, 0, 0, 104, }, /* 5: 250 1.5 */ + { 54, 0, 0, 85, }, /* 6: 250 4 */ + { 43, 0, 0, 128, }, /* 7: 300 0 */ + { 54, 0, 0, 101, }, /* 8: 300 1.5 */ + { 48, 0, 0, 128, }, /* 9: 300 0 */ }; /* BSpec has 2 recommended values - entries 0 and 8. @@ -341,24 +340,24 @@ static const struct bxt_ddi_buf_trans bxt_ddi_translations_edp[] = { */ static const struct bxt_ddi_buf_trans bxt_ddi_translations_hdmi[] = { /* Idx NT mV diff db */ - { 52, 0x9A, 0, 128, false }, /* 0: 400 0 */ - { 52, 0x9A, 0, 85, false }, /* 1: 400 3.5 */ - { 52, 0x9A, 0, 64, false }, /* 2: 400 6 */ - { 42, 0x9A, 0, 43, false }, /* 3: 400 9.5 */ - { 77, 0x9A, 0, 128, false }, /* 4: 600 0 */ - { 77, 0x9A, 0, 85, false }, /* 5: 600 3.5 */ - { 77, 0x9A, 0, 64, false }, /* 6: 600 6 */ - { 102, 0x9A, 0, 128, false }, /* 7: 800 0 */ - { 102, 0x9A, 0, 85, false }, /* 8: 800 3.5 */ - { 154, 0x9A, 1, 128, true }, /* 9: 1200 0 */ + { 52, 0x9A, 0, 128, }, /* 0: 400 0 */ + { 52, 0x9A, 0, 85, }, /* 1: 400 3.5 */ + { 52, 0x9A, 0, 64, }, /* 2: 400 6 */ + { 42, 0x9A, 0, 43, }, /* 3: 400 9.5 */ + { 77, 0x9A, 0, 128, }, /* 4: 600 0 */ + { 77, 0x9A, 0, 85, }, /* 5: 600 3.5 */ + { 77, 0x9A, 0, 64, }, /* 6: 600 6 */ + { 102, 0x9A, 0, 128, }, /* 7: 800 0 */ + { 102, 0x9A, 0, 85, }, /* 8: 800 3.5 */ + { 154, 0x9A, 1, 128, }, /* 9: 1200 0 */ }; struct cnl_ddi_buf_trans { - u32 dw2_swing_sel; - u32 dw7_n_scalar; - u32 dw4_cursor_coeff; - u32 dw4_post_cursor_2; - u32 dw4_post_cursor_1; + u8 dw2_swing_sel; + u8 dw7_n_scalar; + u8 dw4_cursor_coeff; + u8 dw4_post_cursor_2; + u8 dw4_post_cursor_1; }; /* Voltage Swing Programming for VccIO 0.85V for DP */ @@ -493,24 +492,6 @@ static const struct cnl_ddi_buf_trans cnl_ddi_translations_edp_1_05V[] = { { 0x2, 0x7F, 0x3F, 0x00, 0x00 }, /* 400 400 0.0 */ }; -enum port intel_ddi_get_encoder_port(struct intel_encoder *encoder) -{ - switch (encoder->type) { - case INTEL_OUTPUT_DP_MST: - return enc_to_mst(&encoder->base)->primary->port; - case INTEL_OUTPUT_DP: - case INTEL_OUTPUT_EDP: - case INTEL_OUTPUT_HDMI: - case INTEL_OUTPUT_UNKNOWN: - return enc_to_dig_port(&encoder->base)->port; - case INTEL_OUTPUT_ANALOG: - return PORT_E; - default: - MISSING_CASE(encoder->type); - return PORT_A; - } -} - static const struct ddi_buf_trans * bdw_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) { @@ -588,48 +569,29 @@ skl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries) } } -static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port port) +static int skl_buf_trans_num_entries(enum port port, int n_entries) { - int n_hdmi_entries; - int hdmi_level; - int hdmi_default_entry; - - hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift; - - if (IS_GEN9_LP(dev_priv)) - return hdmi_level; - - if (IS_GEN9_BC(dev_priv)) { - skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); - hdmi_default_entry = 8; - } else if (IS_BROADWELL(dev_priv)) { - n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); - hdmi_default_entry = 7; - } else if (IS_HASWELL(dev_priv)) { - n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); - hdmi_default_entry = 6; - } else { - WARN(1, "ddi translation table missing\n"); - n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); - hdmi_default_entry = 7; - } - - /* Choose a good default if VBT is badly populated */ - if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN || - hdmi_level >= n_hdmi_entries) - hdmi_level = hdmi_default_entry; - - return hdmi_level; + /* Only DDIA and DDIE can select the 10th register with DP */ + if (port == PORT_A || port == PORT_E) + return min(n_entries, 10); + else + return min(n_entries, 9); } static const struct ddi_buf_trans * intel_ddi_get_buf_trans_dp(struct drm_i915_private *dev_priv, - int *n_entries) + enum port port, int *n_entries) { if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { - return kbl_get_buf_trans_dp(dev_priv, n_entries); + const struct ddi_buf_trans *ddi_translations = + kbl_get_buf_trans_dp(dev_priv, n_entries); + *n_entries = skl_buf_trans_num_entries(port, *n_entries); + return ddi_translations; } else if (IS_SKYLAKE(dev_priv)) { - return skl_get_buf_trans_dp(dev_priv, n_entries); + const struct ddi_buf_trans *ddi_translations = + skl_get_buf_trans_dp(dev_priv, n_entries); + *n_entries = skl_buf_trans_num_entries(port, *n_entries); + return ddi_translations; } else if (IS_BROADWELL(dev_priv)) { *n_entries = ARRAY_SIZE(bdw_ddi_translations_dp); return bdw_ddi_translations_dp; @@ -644,10 +606,13 @@ intel_ddi_get_buf_trans_dp(struct drm_i915_private *dev_priv, static const struct ddi_buf_trans * intel_ddi_get_buf_trans_edp(struct drm_i915_private *dev_priv, - int *n_entries) + enum port port, int *n_entries) { if (IS_GEN9_BC(dev_priv)) { - return skl_get_buf_trans_edp(dev_priv, n_entries); + const struct ddi_buf_trans *ddi_translations = + skl_get_buf_trans_edp(dev_priv, n_entries); + *n_entries = skl_buf_trans_num_entries(port, *n_entries); + return ddi_translations; } else if (IS_BROADWELL(dev_priv)) { return bdw_get_buf_trans_edp(dev_priv, n_entries); } else if (IS_HASWELL(dev_priv)) { @@ -675,50 +640,182 @@ intel_ddi_get_buf_trans_fdi(struct drm_i915_private *dev_priv, return NULL; } +static const struct ddi_buf_trans * +intel_ddi_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, + int *n_entries) +{ + if (IS_GEN9_BC(dev_priv)) { + return skl_get_buf_trans_hdmi(dev_priv, n_entries); + } else if (IS_BROADWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + return bdw_ddi_translations_hdmi; + } else if (IS_HASWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); + return hsw_ddi_translations_hdmi; + } + + *n_entries = 0; + return NULL; +} + +static const struct bxt_ddi_buf_trans * +bxt_get_buf_trans_dp(struct drm_i915_private *dev_priv, int *n_entries) +{ + *n_entries = ARRAY_SIZE(bxt_ddi_translations_dp); + return bxt_ddi_translations_dp; +} + +static const struct bxt_ddi_buf_trans * +bxt_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) +{ + if (dev_priv->vbt.edp.low_vswing) { + *n_entries = ARRAY_SIZE(bxt_ddi_translations_edp); + return bxt_ddi_translations_edp; + } + + return bxt_get_buf_trans_dp(dev_priv, n_entries); +} + +static const struct bxt_ddi_buf_trans * +bxt_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries) +{ + *n_entries = ARRAY_SIZE(bxt_ddi_translations_hdmi); + return bxt_ddi_translations_hdmi; +} + +static const struct cnl_ddi_buf_trans * +cnl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries) +{ + u32 voltage = I915_READ(CNL_PORT_COMP_DW3) & VOLTAGE_INFO_MASK; + + if (voltage == VOLTAGE_INFO_0_85V) { + *n_entries = ARRAY_SIZE(cnl_ddi_translations_hdmi_0_85V); + return cnl_ddi_translations_hdmi_0_85V; + } else if (voltage == VOLTAGE_INFO_0_95V) { + *n_entries = ARRAY_SIZE(cnl_ddi_translations_hdmi_0_95V); + return cnl_ddi_translations_hdmi_0_95V; + } else if (voltage == VOLTAGE_INFO_1_05V) { + *n_entries = ARRAY_SIZE(cnl_ddi_translations_hdmi_1_05V); + return cnl_ddi_translations_hdmi_1_05V; + } else { + *n_entries = 1; /* shut up gcc */ + MISSING_CASE(voltage); + } + return NULL; +} + +static const struct cnl_ddi_buf_trans * +cnl_get_buf_trans_dp(struct drm_i915_private *dev_priv, int *n_entries) +{ + u32 voltage = I915_READ(CNL_PORT_COMP_DW3) & VOLTAGE_INFO_MASK; + + if (voltage == VOLTAGE_INFO_0_85V) { + *n_entries = ARRAY_SIZE(cnl_ddi_translations_dp_0_85V); + return cnl_ddi_translations_dp_0_85V; + } else if (voltage == VOLTAGE_INFO_0_95V) { + *n_entries = ARRAY_SIZE(cnl_ddi_translations_dp_0_95V); + return cnl_ddi_translations_dp_0_95V; + } else if (voltage == VOLTAGE_INFO_1_05V) { + *n_entries = ARRAY_SIZE(cnl_ddi_translations_dp_1_05V); + return cnl_ddi_translations_dp_1_05V; + } else { + *n_entries = 1; /* shut up gcc */ + MISSING_CASE(voltage); + } + return NULL; +} + +static const struct cnl_ddi_buf_trans * +cnl_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) +{ + u32 voltage = I915_READ(CNL_PORT_COMP_DW3) & VOLTAGE_INFO_MASK; + + if (dev_priv->vbt.edp.low_vswing) { + if (voltage == VOLTAGE_INFO_0_85V) { + *n_entries = ARRAY_SIZE(cnl_ddi_translations_edp_0_85V); + return cnl_ddi_translations_edp_0_85V; + } else if (voltage == VOLTAGE_INFO_0_95V) { + *n_entries = ARRAY_SIZE(cnl_ddi_translations_edp_0_95V); + return cnl_ddi_translations_edp_0_95V; + } else if (voltage == VOLTAGE_INFO_1_05V) { + *n_entries = ARRAY_SIZE(cnl_ddi_translations_edp_1_05V); + return cnl_ddi_translations_edp_1_05V; + } else { + *n_entries = 1; /* shut up gcc */ + MISSING_CASE(voltage); + } + return NULL; + } else { + return cnl_get_buf_trans_dp(dev_priv, n_entries); + } +} + +static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port port) +{ + int n_entries, level, default_entry; + + level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift; + + if (IS_CANNONLAKE(dev_priv)) { + cnl_get_buf_trans_hdmi(dev_priv, &n_entries); + default_entry = n_entries - 1; + } else if (IS_GEN9_LP(dev_priv)) { + bxt_get_buf_trans_hdmi(dev_priv, &n_entries); + default_entry = n_entries - 1; + } else if (IS_GEN9_BC(dev_priv)) { + intel_ddi_get_buf_trans_hdmi(dev_priv, &n_entries); + default_entry = 8; + } else if (IS_BROADWELL(dev_priv)) { + intel_ddi_get_buf_trans_hdmi(dev_priv, &n_entries); + default_entry = 7; + } else if (IS_HASWELL(dev_priv)) { + intel_ddi_get_buf_trans_hdmi(dev_priv, &n_entries); + default_entry = 6; + } else { + WARN(1, "ddi translation table missing\n"); + return 0; + } + + /* Choose a good default if VBT is badly populated */ + if (level == HDMI_LEVEL_SHIFT_UNKNOWN || level >= n_entries) + level = default_entry; + + if (WARN_ON_ONCE(n_entries == 0)) + return 0; + if (WARN_ON_ONCE(level >= n_entries)) + level = n_entries - 1; + + return level; +} + /* * Starting with Haswell, DDI port buffers must be programmed with correct * values in advance. This function programs the correct values for * DP/eDP/FDI use cases. */ -static void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) +static void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 iboost_bit = 0; int i, n_entries; - enum port port = intel_ddi_get_encoder_port(encoder); + enum port port = encoder->port; const struct ddi_buf_trans *ddi_translations; - if (IS_GEN9_LP(dev_priv)) - return; - - switch (encoder->type) { - case INTEL_OUTPUT_EDP: - ddi_translations = intel_ddi_get_buf_trans_edp(dev_priv, - &n_entries); - break; - case INTEL_OUTPUT_DP: - ddi_translations = intel_ddi_get_buf_trans_dp(dev_priv, - &n_entries); - break; - case INTEL_OUTPUT_ANALOG: + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_ANALOG)) ddi_translations = intel_ddi_get_buf_trans_fdi(dev_priv, &n_entries); - break; - default: - MISSING_CASE(encoder->type); - return; - } + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) + ddi_translations = intel_ddi_get_buf_trans_edp(dev_priv, port, + &n_entries); + else + ddi_translations = intel_ddi_get_buf_trans_dp(dev_priv, port, + &n_entries); - if (IS_GEN9_BC(dev_priv)) { - /* If we're boosting the current, set bit 31 of trans1 */ - if (dev_priv->vbt.ddi_port_info[port].dp_boost_level) - iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; - - if (WARN_ON(encoder->type == INTEL_OUTPUT_EDP && - port != PORT_A && port != PORT_E && - n_entries > 9)) - n_entries = 9; - } + /* If we're boosting the current, set bit 31 of trans1 */ + if (IS_GEN9_BC(dev_priv) && + dev_priv->vbt.ddi_port_info[port].dp_boost_level) + iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; for (i = 0; i < n_entries; i++) { I915_WRITE(DDI_BUF_TRANS_LO(port, i), @@ -733,42 +830,32 @@ static void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) * values in advance. This function programs the correct values for * HDMI/DVI use cases. */ -static void intel_prepare_hdmi_ddi_buffers(struct intel_encoder *encoder) +static void intel_prepare_hdmi_ddi_buffers(struct intel_encoder *encoder, + int level) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 iboost_bit = 0; - int n_hdmi_entries, hdmi_level; - enum port port = intel_ddi_get_encoder_port(encoder); - const struct ddi_buf_trans *ddi_translations_hdmi; - - if (IS_GEN9_LP(dev_priv)) - return; + int n_entries; + enum port port = encoder->port; + const struct ddi_buf_trans *ddi_translations; - hdmi_level = intel_ddi_hdmi_level(dev_priv, port); + ddi_translations = intel_ddi_get_buf_trans_hdmi(dev_priv, &n_entries); - if (IS_GEN9_BC(dev_priv)) { - ddi_translations_hdmi = skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); + if (WARN_ON_ONCE(!ddi_translations)) + return; + if (WARN_ON_ONCE(level >= n_entries)) + level = n_entries - 1; - /* If we're boosting the current, set bit 31 of trans1 */ - if (dev_priv->vbt.ddi_port_info[port].hdmi_boost_level) - iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; - } else if (IS_BROADWELL(dev_priv)) { - ddi_translations_hdmi = bdw_ddi_translations_hdmi; - n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); - } else if (IS_HASWELL(dev_priv)) { - ddi_translations_hdmi = hsw_ddi_translations_hdmi; - n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); - } else { - WARN(1, "ddi translation table missing\n"); - ddi_translations_hdmi = bdw_ddi_translations_hdmi; - n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); - } + /* If we're boosting the current, set bit 31 of trans1 */ + if (IS_GEN9_BC(dev_priv) && + dev_priv->vbt.ddi_port_info[port].hdmi_boost_level) + iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; /* Entry 9 is for HDMI: */ I915_WRITE(DDI_BUF_TRANS_LO(port, 9), - ddi_translations_hdmi[hdmi_level].trans1 | iboost_bit); + ddi_translations[level].trans1 | iboost_bit); I915_WRITE(DDI_BUF_TRANS_HI(port, 9), - ddi_translations_hdmi[hdmi_level].trans2); + ddi_translations[level].trans2); } static void intel_wait_ddi_buf_idle(struct drm_i915_private *dev_priv, @@ -785,7 +872,7 @@ static void intel_wait_ddi_buf_idle(struct drm_i915_private *dev_priv, DRM_ERROR("Timeout waiting for DDI BUF %c idle bit\n", port_name(port)); } -static uint32_t hsw_pll_to_ddi_pll_sel(struct intel_shared_dpll *pll) +static uint32_t hsw_pll_to_ddi_pll_sel(const struct intel_shared_dpll *pll) { switch (pll->id) { case DPLL_ID_WRPLL1: @@ -825,7 +912,7 @@ void hsw_fdi_link_train(struct intel_crtc *crtc, for_each_encoder_on_crtc(dev, &crtc->base, encoder) { WARN_ON(encoder->type != INTEL_OUTPUT_ANALOG); - intel_prepare_dp_ddi_buffers(encoder); + intel_prepare_dp_ddi_buffers(encoder, crtc_state); } /* Set the FDI_RX_MISC pwrdn lanes and the 2 workarounds listed at the @@ -1044,14 +1131,14 @@ static int hsw_ddi_calc_wrpll_link(struct drm_i915_private *dev_priv, } static int skl_calc_wrpll_link(struct drm_i915_private *dev_priv, - uint32_t dpll) + enum intel_dpll_id pll_id) { i915_reg_t cfgcr1_reg, cfgcr2_reg; uint32_t cfgcr1_val, cfgcr2_val; uint32_t p0, p1, p2, dco_freq; - cfgcr1_reg = DPLL_CFGCR1(dpll); - cfgcr2_reg = DPLL_CFGCR2(dpll); + cfgcr1_reg = DPLL_CFGCR1(pll_id); + cfgcr2_reg = DPLL_CFGCR2(pll_id); cfgcr1_val = I915_READ(cfgcr1_reg); cfgcr2_val = I915_READ(cfgcr2_reg); @@ -1104,7 +1191,7 @@ static int skl_calc_wrpll_link(struct drm_i915_private *dev_priv, } static int cnl_calc_wrpll_link(struct drm_i915_private *dev_priv, - uint32_t pll_id) + enum intel_dpll_id pll_id) { uint32_t cfgcr0, cfgcr1; uint32_t p0, p1, p2, dco_freq, ref_clock; @@ -1154,7 +1241,10 @@ static int cnl_calc_wrpll_link(struct drm_i915_private *dev_priv, dco_freq = (cfgcr0 & DPLL_CFGCR0_DCO_INTEGER_MASK) * ref_clock; dco_freq += (((cfgcr0 & DPLL_CFGCR0_DCO_FRACTION_MASK) >> - DPLL_CFGCR0_DCO_FRAC_SHIFT) * ref_clock) / 0x8000; + DPLL_CFGCR0_DCO_FRACTION_SHIFT) * ref_clock) / 0x8000; + + if (WARN_ON(p0 == 0 || p1 == 0 || p2 == 0)) + return 0; return dco_freq / (p0 * p1 * p2 * 5); } @@ -1188,7 +1278,8 @@ static void cnl_ddi_clock_get(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); int link_clock = 0; - uint32_t cfgcr0, pll_id; + uint32_t cfgcr0; + enum intel_dpll_id pll_id; pll_id = intel_get_shared_dpll_id(dev_priv, pipe_config->shared_dpll); @@ -1241,17 +1332,18 @@ static void skl_ddi_clock_get(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); int link_clock = 0; - uint32_t dpll_ctl1, dpll; + uint32_t dpll_ctl1; + enum intel_dpll_id pll_id; - dpll = intel_get_shared_dpll_id(dev_priv, pipe_config->shared_dpll); + pll_id = intel_get_shared_dpll_id(dev_priv, pipe_config->shared_dpll); dpll_ctl1 = I915_READ(DPLL_CTRL1); - if (dpll_ctl1 & DPLL_CTRL1_HDMI_MODE(dpll)) { - link_clock = skl_calc_wrpll_link(dev_priv, dpll); + if (dpll_ctl1 & DPLL_CTRL1_HDMI_MODE(pll_id)) { + link_clock = skl_calc_wrpll_link(dev_priv, pll_id); } else { - link_clock = dpll_ctl1 & DPLL_CTRL1_LINK_RATE_MASK(dpll); - link_clock >>= DPLL_CTRL1_LINK_RATE_SHIFT(dpll); + link_clock = dpll_ctl1 & DPLL_CTRL1_LINK_RATE_MASK(pll_id); + link_clock >>= DPLL_CTRL1_LINK_RATE_SHIFT(pll_id); switch (link_clock) { case DPLL_CTRL1_LINK_RATE_810: @@ -1331,19 +1423,16 @@ static void hsw_ddi_clock_get(struct intel_encoder *encoder, ddi_dotclock_get(pipe_config); } -static int bxt_calc_pll_link(struct drm_i915_private *dev_priv, - enum intel_dpll_id dpll) +static int bxt_calc_pll_link(struct intel_crtc_state *crtc_state) { - struct intel_shared_dpll *pll; struct intel_dpll_hw_state *state; struct dpll clock; /* For DDI ports we always use a shared PLL. */ - if (WARN_ON(dpll == DPLL_ID_PRIVATE)) + if (WARN_ON(!crtc_state->shared_dpll)) return 0; - pll = &dev_priv->shared_dplls[dpll]; - state = &pll->state.hw_state; + state = &crtc_state->dpll_hw_state; clock.m1 = 2; clock.m2 = (state->pll0 & PORT_PLL_M2_MASK) << 22; @@ -1357,19 +1446,15 @@ static int bxt_calc_pll_link(struct drm_i915_private *dev_priv, } static void bxt_ddi_clock_get(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config) + struct intel_crtc_state *pipe_config) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = intel_ddi_get_encoder_port(encoder); - uint32_t dpll = port; - - pipe_config->port_clock = bxt_calc_pll_link(dev_priv, dpll); + pipe_config->port_clock = bxt_calc_pll_link(pipe_config); ddi_dotclock_get(pipe_config); } -void intel_ddi_clock_get(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config) +static void intel_ddi_clock_get(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -1387,33 +1472,34 @@ void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct intel_encoder *encoder = intel_ddi_get_crtc_encoder(crtc); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; - int type = encoder->type; - uint32_t temp; + u32 temp; - if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP || type == INTEL_OUTPUT_DP_MST) { - WARN_ON(transcoder_is_dsi(cpu_transcoder)); + if (!intel_crtc_has_dp_encoder(crtc_state)) + return; - temp = TRANS_MSA_SYNC_CLK; - switch (crtc_state->pipe_bpp) { - case 18: - temp |= TRANS_MSA_6_BPC; - break; - case 24: - temp |= TRANS_MSA_8_BPC; - break; - case 30: - temp |= TRANS_MSA_10_BPC; - break; - case 36: - temp |= TRANS_MSA_12_BPC; - break; - default: - BUG(); - } - I915_WRITE(TRANS_MSA_MISC(cpu_transcoder), temp); + WARN_ON(transcoder_is_dsi(cpu_transcoder)); + + temp = TRANS_MSA_SYNC_CLK; + switch (crtc_state->pipe_bpp) { + case 18: + temp |= TRANS_MSA_6_BPC; + break; + case 24: + temp |= TRANS_MSA_8_BPC; + break; + case 30: + temp |= TRANS_MSA_10_BPC; + break; + case 36: + temp |= TRANS_MSA_12_BPC; + break; + default: + MISSING_CASE(crtc_state->pipe_bpp); + break; } + + I915_WRITE(TRANS_MSA_MISC(cpu_transcoder), temp); } void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, @@ -1423,6 +1509,7 @@ void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; uint32_t temp; + temp = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder)); if (state == true) temp |= TRANS_DDI_DP_VC_PAYLOAD_ALLOC; @@ -1438,8 +1525,7 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; - enum port port = intel_ddi_get_encoder_port(encoder); - int type = encoder->type; + enum port port = encoder->port; uint32_t temp; /* Enable TRANS_DDI_FUNC_CTL for the pipe to work in HDMI mode */ @@ -1494,7 +1580,7 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) } } - if (type == INTEL_OUTPUT_HDMI) { + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) { if (crtc_state->has_hdmi_sink) temp |= TRANS_DDI_MODE_SELECT_HDMI; else @@ -1504,19 +1590,15 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) temp |= TRANS_DDI_HDMI_SCRAMBLING_MASK; if (crtc_state->hdmi_high_tmds_clock_ratio) temp |= TRANS_DDI_HIGH_TMDS_CHAR_RATE; - } else if (type == INTEL_OUTPUT_ANALOG) { + } else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_ANALOG)) { temp |= TRANS_DDI_MODE_SELECT_FDI; temp |= (crtc_state->fdi_lanes - 1) << 1; - } else if (type == INTEL_OUTPUT_DP || - type == INTEL_OUTPUT_EDP) { - temp |= TRANS_DDI_MODE_SELECT_DP_SST; - temp |= DDI_PORT_WIDTH(crtc_state->lane_count); - } else if (type == INTEL_OUTPUT_DP_MST) { + } else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) { temp |= TRANS_DDI_MODE_SELECT_DP_MST; temp |= DDI_PORT_WIDTH(crtc_state->lane_count); } else { - WARN(1, "Invalid encoder type %d for pipe %c\n", - encoder->type, pipe_name(pipe)); + temp |= TRANS_DDI_MODE_SELECT_DP_SST; + temp |= DDI_PORT_WIDTH(crtc_state->lane_count); } I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), temp); @@ -1539,7 +1621,7 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector) struct drm_i915_private *dev_priv = to_i915(dev); struct intel_encoder *encoder = intel_connector->encoder; int type = intel_connector->base.connector_type; - enum port port = intel_ddi_get_encoder_port(encoder); + enum port port = encoder->port; enum pipe pipe = 0; enum transcoder cpu_transcoder; uint32_t tmp; @@ -1598,9 +1680,9 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder, { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum port port = intel_ddi_get_encoder_port(encoder); + enum port port = encoder->port; + enum pipe p; u32 tmp; - int i; bool ret; if (!intel_display_power_get_if_enabled(dev_priv, @@ -1635,15 +1717,17 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder, goto out; } - for (i = TRANSCODER_A; i <= TRANSCODER_C; i++) { - tmp = I915_READ(TRANS_DDI_FUNC_CTL(i)); + for_each_pipe(dev_priv, p) { + enum transcoder cpu_transcoder = (enum transcoder) p; + + tmp = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder)); if ((tmp & TRANS_DDI_PORT_MASK) == TRANS_DDI_SELECT_PORT(port)) { if ((tmp & TRANS_DDI_MODE_SELECT_MASK) == TRANS_DDI_MODE_SELECT_DP_MST) goto out; - *pipe = i; + *pipe = p; ret = true; goto out; @@ -1683,7 +1767,7 @@ void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state) struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_encoder *encoder = intel_ddi_get_crtc_encoder(crtc); - enum port port = intel_ddi_get_encoder_port(encoder); + enum port port = encoder->port; enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; if (cpu_transcoder != TRANSCODER_EDP) @@ -1715,54 +1799,36 @@ static void _skl_ddi_set_iboost(struct drm_i915_private *dev_priv, I915_WRITE(DISPIO_CR_TX_BMU_CR0, tmp); } -static void skl_ddi_set_iboost(struct intel_encoder *encoder, u32 level) +static void skl_ddi_set_iboost(struct intel_encoder *encoder, + int level, enum intel_output_type type) { struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); - struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); - enum port port = intel_dig_port->port; - int type = encoder->type; - const struct ddi_buf_trans *ddi_translations; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum port port = encoder->port; uint8_t iboost; - uint8_t dp_iboost, hdmi_iboost; - int n_entries; - /* VBT may override standard boost values */ - dp_iboost = dev_priv->vbt.ddi_port_info[port].dp_boost_level; - hdmi_iboost = dev_priv->vbt.ddi_port_info[port].hdmi_boost_level; + if (type == INTEL_OUTPUT_HDMI) + iboost = dev_priv->vbt.ddi_port_info[port].hdmi_boost_level; + else + iboost = dev_priv->vbt.ddi_port_info[port].dp_boost_level; + + if (iboost == 0) { + const struct ddi_buf_trans *ddi_translations; + int n_entries; - if (type == INTEL_OUTPUT_DP) { - if (dp_iboost) { - iboost = dp_iboost; - } else { - if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) - ddi_translations = kbl_get_buf_trans_dp(dev_priv, - &n_entries); - else - ddi_translations = skl_get_buf_trans_dp(dev_priv, - &n_entries); - iboost = ddi_translations[level].i_boost; - } - } else if (type == INTEL_OUTPUT_EDP) { - if (dp_iboost) { - iboost = dp_iboost; - } else { - ddi_translations = skl_get_buf_trans_edp(dev_priv, &n_entries); + if (type == INTEL_OUTPUT_HDMI) + ddi_translations = intel_ddi_get_buf_trans_hdmi(dev_priv, &n_entries); + else if (type == INTEL_OUTPUT_EDP) + ddi_translations = intel_ddi_get_buf_trans_edp(dev_priv, port, &n_entries); + else + ddi_translations = intel_ddi_get_buf_trans_dp(dev_priv, port, &n_entries); - if (WARN_ON(port != PORT_A && - port != PORT_E && n_entries > 9)) - n_entries = 9; + if (WARN_ON_ONCE(!ddi_translations)) + return; + if (WARN_ON_ONCE(level >= n_entries)) + level = n_entries - 1; - iboost = ddi_translations[level].i_boost; - } - } else if (type == INTEL_OUTPUT_HDMI) { - if (hdmi_iboost) { - iboost = hdmi_iboost; - } else { - ddi_translations = skl_get_buf_trans_hdmi(dev_priv, &n_entries); - iboost = ddi_translations[level].i_boost; - } - } else { - return; + iboost = ddi_translations[level].i_boost; } /* Make sure that the requested I_boost is valid */ @@ -1777,38 +1843,25 @@ static void skl_ddi_set_iboost(struct intel_encoder *encoder, u32 level) _skl_ddi_set_iboost(dev_priv, PORT_E, iboost); } -static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv, - u32 level, enum port port, int type) +static void bxt_ddi_vswing_sequence(struct intel_encoder *encoder, + int level, enum intel_output_type type) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); const struct bxt_ddi_buf_trans *ddi_translations; - u32 n_entries, i; - - if (type == INTEL_OUTPUT_EDP && dev_priv->vbt.edp.low_vswing) { - n_entries = ARRAY_SIZE(bxt_ddi_translations_edp); - ddi_translations = bxt_ddi_translations_edp; - } else if (type == INTEL_OUTPUT_DP - || type == INTEL_OUTPUT_EDP) { - n_entries = ARRAY_SIZE(bxt_ddi_translations_dp); - ddi_translations = bxt_ddi_translations_dp; - } else if (type == INTEL_OUTPUT_HDMI) { - n_entries = ARRAY_SIZE(bxt_ddi_translations_hdmi); - ddi_translations = bxt_ddi_translations_hdmi; - } else { - DRM_DEBUG_KMS("Vswing programming not done for encoder %d\n", - type); - return; - } + enum port port = encoder->port; + int n_entries; - /* Check if default value has to be used */ - if (level >= n_entries || - (type == INTEL_OUTPUT_HDMI && level == HDMI_LEVEL_SHIFT_UNKNOWN)) { - for (i = 0; i < n_entries; i++) { - if (ddi_translations[i].default_index) { - level = i; - break; - } - } - } + if (type == INTEL_OUTPUT_HDMI) + ddi_translations = bxt_get_buf_trans_hdmi(dev_priv, &n_entries); + else if (type == INTEL_OUTPUT_EDP) + ddi_translations = bxt_get_buf_trans_edp(dev_priv, &n_entries); + else + ddi_translations = bxt_get_buf_trans_dp(dev_priv, &n_entries); + + if (WARN_ON_ONCE(!ddi_translations)) + return; + if (WARN_ON_ONCE(level >= n_entries)) + level = n_entries - 1; bxt_ddi_phy_set_signal_level(dev_priv, port, ddi_translations[level].margin, @@ -1820,12 +1873,25 @@ static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv, u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum port port = encoder->port; int n_entries; - if (encoder->type == INTEL_OUTPUT_EDP) - intel_ddi_get_buf_trans_edp(dev_priv, &n_entries); - else - intel_ddi_get_buf_trans_dp(dev_priv, &n_entries); + if (IS_CANNONLAKE(dev_priv)) { + if (encoder->type == INTEL_OUTPUT_EDP) + cnl_get_buf_trans_edp(dev_priv, &n_entries); + else + cnl_get_buf_trans_dp(dev_priv, &n_entries); + } else if (IS_GEN9_LP(dev_priv)) { + if (encoder->type == INTEL_OUTPUT_EDP) + bxt_get_buf_trans_edp(dev_priv, &n_entries); + else + bxt_get_buf_trans_dp(dev_priv, &n_entries); + } else { + if (encoder->type == INTEL_OUTPUT_EDP) + intel_ddi_get_buf_trans_edp(dev_priv, port, &n_entries); + else + intel_ddi_get_buf_trans_dp(dev_priv, port, &n_entries); + } if (WARN_ON(n_entries < 1)) n_entries = 1; @@ -1836,95 +1902,26 @@ u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) DP_TRAIN_VOLTAGE_SWING_MASK; } -static const struct cnl_ddi_buf_trans * -cnl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, - u32 voltage, int *n_entries) +static void cnl_ddi_vswing_program(struct intel_encoder *encoder, + int level, enum intel_output_type type) { - if (voltage == VOLTAGE_INFO_0_85V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_hdmi_0_85V); - return cnl_ddi_translations_hdmi_0_85V; - } else if (voltage == VOLTAGE_INFO_0_95V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_hdmi_0_95V); - return cnl_ddi_translations_hdmi_0_95V; - } else if (voltage == VOLTAGE_INFO_1_05V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_hdmi_1_05V); - return cnl_ddi_translations_hdmi_1_05V; - } - return NULL; -} - -static const struct cnl_ddi_buf_trans * -cnl_get_buf_trans_dp(struct drm_i915_private *dev_priv, - u32 voltage, int *n_entries) -{ - if (voltage == VOLTAGE_INFO_0_85V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_dp_0_85V); - return cnl_ddi_translations_dp_0_85V; - } else if (voltage == VOLTAGE_INFO_0_95V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_dp_0_95V); - return cnl_ddi_translations_dp_0_95V; - } else if (voltage == VOLTAGE_INFO_1_05V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_dp_1_05V); - return cnl_ddi_translations_dp_1_05V; - } - return NULL; -} - -static const struct cnl_ddi_buf_trans * -cnl_get_buf_trans_edp(struct drm_i915_private *dev_priv, - u32 voltage, int *n_entries) -{ - if (dev_priv->vbt.edp.low_vswing) { - if (voltage == VOLTAGE_INFO_0_85V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_edp_0_85V); - return cnl_ddi_translations_edp_0_85V; - } else if (voltage == VOLTAGE_INFO_0_95V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_edp_0_95V); - return cnl_ddi_translations_edp_0_95V; - } else if (voltage == VOLTAGE_INFO_1_05V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_edp_1_05V); - return cnl_ddi_translations_edp_1_05V; - } - return NULL; - } else { - return cnl_get_buf_trans_dp(dev_priv, voltage, n_entries); - } -} - -static void cnl_ddi_vswing_program(struct drm_i915_private *dev_priv, - u32 level, enum port port, int type) -{ - const struct cnl_ddi_buf_trans *ddi_translations = NULL; - u32 n_entries, val, voltage; - int ln; - - /* - * Values for each port type are listed in - * voltage swing programming tables. - * Vccio voltage found in PORT_COMP_DW3. - */ - voltage = I915_READ(CNL_PORT_COMP_DW3) & VOLTAGE_INFO_MASK; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + const struct cnl_ddi_buf_trans *ddi_translations; + enum port port = encoder->port; + int n_entries, ln; + u32 val; - if (type == INTEL_OUTPUT_HDMI) { - ddi_translations = cnl_get_buf_trans_hdmi(dev_priv, - voltage, &n_entries); - } else if (type == INTEL_OUTPUT_DP) { - ddi_translations = cnl_get_buf_trans_dp(dev_priv, - voltage, &n_entries); - } else if (type == INTEL_OUTPUT_EDP) { - ddi_translations = cnl_get_buf_trans_edp(dev_priv, - voltage, &n_entries); - } + if (type == INTEL_OUTPUT_HDMI) + ddi_translations = cnl_get_buf_trans_hdmi(dev_priv, &n_entries); + else if (type == INTEL_OUTPUT_EDP) + ddi_translations = cnl_get_buf_trans_edp(dev_priv, &n_entries); + else + ddi_translations = cnl_get_buf_trans_dp(dev_priv, &n_entries); - if (ddi_translations == NULL) { - MISSING_CASE(voltage); + if (WARN_ON_ONCE(!ddi_translations)) return; - } - - if (level >= n_entries) { - DRM_DEBUG_KMS("DDI translation not found for level %d. Using %d instead.", level, n_entries - 1); + if (WARN_ON_ONCE(level >= n_entries)) level = n_entries - 1; - } /* Set PORT_TX_DW5 Scaling Mode Sel to 010b. */ val = I915_READ(CNL_PORT_TX_DW5_LN0(port)); @@ -1942,7 +1939,7 @@ static void cnl_ddi_vswing_program(struct drm_i915_private *dev_priv, val |= RCOMP_SCALAR(0x98); I915_WRITE(CNL_PORT_TX_DW2_GRP(port), val); - /* Program PORT_TX_DW4 */ + /* Program PORT_TX_DW4 */ /* We cannot write to GRP. It would overrite individual loadgen */ for (ln = 0; ln < 4; ln++) { val = I915_READ(CNL_PORT_TX_DW4_LN(port, ln)); @@ -1954,7 +1951,7 @@ static void cnl_ddi_vswing_program(struct drm_i915_private *dev_priv, I915_WRITE(CNL_PORT_TX_DW4_LN(port, ln), val); } - /* Program PORT_TX_DW5 */ + /* Program PORT_TX_DW5 */ /* All DW5 values are fixed for every table entry */ val = I915_READ(CNL_PORT_TX_DW5_LN0(port)); val &= ~RTERM_SELECT_MASK; @@ -1962,33 +1959,29 @@ static void cnl_ddi_vswing_program(struct drm_i915_private *dev_priv, val |= TAP3_DISABLE; I915_WRITE(CNL_PORT_TX_DW5_GRP(port), val); - /* Program PORT_TX_DW7 */ + /* Program PORT_TX_DW7 */ val = I915_READ(CNL_PORT_TX_DW7_LN0(port)); val &= ~N_SCALAR_MASK; val |= N_SCALAR(ddi_translations[level].dw7_n_scalar); I915_WRITE(CNL_PORT_TX_DW7_GRP(port), val); } -static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder, u32 level) +static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder, + int level, enum intel_output_type type) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - enum port port = intel_ddi_get_encoder_port(encoder); - int type = encoder->type; - int width = 0; - int rate = 0; + enum port port = encoder->port; + int width, rate, ln; u32 val; - int ln = 0; - if ((intel_dp) && (type == INTEL_OUTPUT_EDP || type == INTEL_OUTPUT_DP)) { - width = intel_dp->lane_count; - rate = intel_dp->link_rate; - } else if (type == INTEL_OUTPUT_HDMI) { + if (type == INTEL_OUTPUT_HDMI) { width = 4; - /* Rate is always < than 6GHz for HDMI */ + rate = 0; /* Rate is always < than 6GHz for HDMI */ } else { - MISSING_CASE(type); - return; + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + + width = intel_dp->lane_count; + rate = intel_dp->link_rate; } /* @@ -1997,7 +1990,7 @@ static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder, u32 level) * else clear to 0b. */ val = I915_READ(CNL_PORT_PCS_DW1_LN0(port)); - if (type == INTEL_OUTPUT_EDP || type == INTEL_OUTPUT_DP) + if (type != INTEL_OUTPUT_HDMI) val |= COMMON_KEEPER_EN; else val &= ~COMMON_KEEPER_EN; @@ -2032,7 +2025,7 @@ static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder, u32 level) I915_WRITE(CNL_PORT_TX_DW5_GRP(port), val); /* 5. Program swing and de-emphasis */ - cnl_ddi_vswing_program(dev_priv, level, port, type); + cnl_ddi_vswing_program(encoder, level, type); /* 6. Set training enable to trigger update */ val = I915_READ(CNL_PORT_TX_DW5_LN0(port)); @@ -2055,44 +2048,59 @@ static uint32_t translate_signal_level(int signal_levels) return 0; } -uint32_t ddi_signal_levels(struct intel_dp *intel_dp) +static uint32_t intel_ddi_dp_level(struct intel_dp *intel_dp) { - struct intel_digital_port *dport = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev); - struct intel_encoder *encoder = &dport->base; uint8_t train_set = intel_dp->train_set[0]; int signal_levels = train_set & (DP_TRAIN_VOLTAGE_SWING_MASK | DP_TRAIN_PRE_EMPHASIS_MASK); - enum port port = dport->port; - uint32_t level; - level = translate_signal_level(signal_levels); + return translate_signal_level(signal_levels); +} + +u32 bxt_signal_levels(struct intel_dp *intel_dp) +{ + struct intel_digital_port *dport = dp_to_dig_port(intel_dp); + struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev); + struct intel_encoder *encoder = &dport->base; + int level = intel_ddi_dp_level(intel_dp); + + if (IS_CANNONLAKE(dev_priv)) + cnl_ddi_vswing_sequence(encoder, level, encoder->type); + else + bxt_ddi_vswing_sequence(encoder, level, encoder->type); + + return 0; +} + +uint32_t ddi_signal_levels(struct intel_dp *intel_dp) +{ + struct intel_digital_port *dport = dp_to_dig_port(intel_dp); + struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev); + struct intel_encoder *encoder = &dport->base; + int level = intel_ddi_dp_level(intel_dp); if (IS_GEN9_BC(dev_priv)) - skl_ddi_set_iboost(encoder, level); - else if (IS_GEN9_LP(dev_priv)) - bxt_ddi_vswing_sequence(dev_priv, level, port, encoder->type); - else if (IS_CANNONLAKE(dev_priv)) { - cnl_ddi_vswing_sequence(encoder, level); - /* DDI_BUF_CTL bits 27:24 are reserved on CNL */ - return 0; - } + skl_ddi_set_iboost(encoder, level, encoder->type); + return DDI_BUF_TRANS_SELECT(level); } static void intel_ddi_clk_select(struct intel_encoder *encoder, - struct intel_shared_dpll *pll) + const struct intel_shared_dpll *pll) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = intel_ddi_get_encoder_port(encoder); + enum port port = encoder->port; uint32_t val; if (WARN_ON(!pll)) return; + mutex_lock(&dev_priv->dpll_lock); + if (IS_CANNONLAKE(dev_priv)) { /* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */ val = I915_READ(DPCLKA_CFGCR0); + val &= ~DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port); val |= DPCLKA_CFGCR0_DDI_CLK_SEL(pll->id, port); I915_WRITE(DPCLKA_CFGCR0, val); @@ -2109,7 +2117,7 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, val = I915_READ(DPLL_CTRL2); val &= ~(DPLL_CTRL2_DDI_CLK_OFF(port) | - DPLL_CTRL2_DDI_CLK_SEL_MASK(port)); + DPLL_CTRL2_DDI_CLK_SEL_MASK(port)); val |= (DPLL_CTRL2_DDI_CLK_SEL(pll->id, port) | DPLL_CTRL2_DDI_SEL_OVERRIDE(port)); @@ -2118,110 +2126,130 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, } else if (INTEL_INFO(dev_priv)->gen < 9) { I915_WRITE(PORT_CLK_SEL(port), hsw_pll_to_ddi_pll_sel(pll)); } + + mutex_unlock(&dev_priv->dpll_lock); +} + +static void intel_ddi_clk_disable(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum port port = encoder->port; + + if (IS_CANNONLAKE(dev_priv)) + I915_WRITE(DPCLKA_CFGCR0, I915_READ(DPCLKA_CFGCR0) | + DPCLKA_CFGCR0_DDI_CLK_OFF(port)); + else if (IS_GEN9_BC(dev_priv)) + I915_WRITE(DPLL_CTRL2, I915_READ(DPLL_CTRL2) | + DPLL_CTRL2_DDI_CLK_OFF(port)); + else if (INTEL_GEN(dev_priv) < 9) + I915_WRITE(PORT_CLK_SEL(port), PORT_CLK_SEL_NONE); } static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, - int link_rate, uint32_t lane_count, - struct intel_shared_dpll *pll, - bool link_mst) + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = intel_ddi_get_encoder_port(encoder); + enum port port = encoder->port; struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST); + int level = intel_ddi_dp_level(intel_dp); - WARN_ON(link_mst && (port == PORT_A || port == PORT_E)); + WARN_ON(is_mst && (port == PORT_A || port == PORT_E)); - intel_dp_set_link_params(intel_dp, link_rate, lane_count, - link_mst); - if (encoder->type == INTEL_OUTPUT_EDP) - intel_edp_panel_on(intel_dp); + intel_dp_set_link_params(intel_dp, crtc_state->port_clock, + crtc_state->lane_count, is_mst); - intel_ddi_clk_select(encoder, pll); + intel_edp_panel_on(intel_dp); + + intel_ddi_clk_select(encoder, crtc_state->shared_dpll); intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); - intel_prepare_dp_ddi_buffers(encoder); + if (IS_CANNONLAKE(dev_priv)) + cnl_ddi_vswing_sequence(encoder, level, encoder->type); + else if (IS_GEN9_LP(dev_priv)) + bxt_ddi_vswing_sequence(encoder, level, encoder->type); + else + intel_prepare_dp_ddi_buffers(encoder, crtc_state); + intel_ddi_init_dp_buf_reg(encoder); - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); + if (!is_mst) + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); intel_dp_start_link_train(intel_dp); if (port != PORT_A || INTEL_GEN(dev_priv) >= 9) intel_dp_stop_link_train(intel_dp); } static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, - bool has_hdmi_sink, const struct intel_crtc_state *crtc_state, - const struct drm_connector_state *conn_state, - struct intel_shared_dpll *pll) + const struct drm_connector_state *conn_state) { - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); + struct intel_hdmi *intel_hdmi = &intel_dig_port->hdmi; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct drm_encoder *drm_encoder = &encoder->base; - enum port port = intel_ddi_get_encoder_port(encoder); + enum port port = encoder->port; int level = intel_ddi_hdmi_level(dev_priv, port); struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); intel_dp_dual_mode_set_tmds_output(intel_hdmi, true); - intel_ddi_clk_select(encoder, pll); + intel_ddi_clk_select(encoder, crtc_state->shared_dpll); intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); - intel_prepare_hdmi_ddi_buffers(encoder); - if (IS_GEN9_BC(dev_priv)) - skl_ddi_set_iboost(encoder, level); + if (IS_CANNONLAKE(dev_priv)) + cnl_ddi_vswing_sequence(encoder, level, INTEL_OUTPUT_HDMI); else if (IS_GEN9_LP(dev_priv)) - bxt_ddi_vswing_sequence(dev_priv, level, port, - INTEL_OUTPUT_HDMI); - else if (IS_CANNONLAKE(dev_priv)) - cnl_ddi_vswing_sequence(encoder, level); + bxt_ddi_vswing_sequence(encoder, level, INTEL_OUTPUT_HDMI); + else + intel_prepare_hdmi_ddi_buffers(encoder, level); + + if (IS_GEN9_BC(dev_priv)) + skl_ddi_set_iboost(encoder, level, INTEL_OUTPUT_HDMI); - intel_hdmi->set_infoframes(drm_encoder, - has_hdmi_sink, - crtc_state, conn_state); + intel_dig_port->set_infoframes(&encoder->base, + crtc_state->has_infoframe, + crtc_state, conn_state); } static void intel_ddi_pre_enable(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) -{ - int type = encoder->type; - - if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { - intel_ddi_pre_enable_dp(encoder, - pipe_config->port_clock, - pipe_config->lane_count, - pipe_config->shared_dpll, - intel_crtc_has_type(pipe_config, - INTEL_OUTPUT_DP_MST)); - } - if (type == INTEL_OUTPUT_HDMI) { - intel_ddi_pre_enable_hdmi(encoder, - pipe_config->has_hdmi_sink, - pipe_config, conn_state, - pipe_config->shared_dpll); - } + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + + /* + * When called from DP MST code: + * - conn_state will be NULL + * - encoder will be the main encoder (ie. mst->primary) + * - the main connector associated with this port + * won't be active or linked to a crtc + * - crtc_state will be the state of the first stream to + * be activated on this port, and it may not be the same + * stream that will be deactivated last, but each stream + * should have a state that is identical when it comes to + * the DP link parameteres + */ + + WARN_ON(crtc_state->has_pch_encoder); + + intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); + + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + intel_ddi_pre_enable_hdmi(encoder, crtc_state, conn_state); + else + intel_ddi_pre_enable_dp(encoder, crtc_state, conn_state); } -static void intel_ddi_post_disable(struct intel_encoder *intel_encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) +static void intel_disable_ddi_buf(struct intel_encoder *encoder) { - struct drm_encoder *encoder = &intel_encoder->base; - struct drm_i915_private *dev_priv = to_i915(encoder->dev); - enum port port = intel_ddi_get_encoder_port(intel_encoder); - struct intel_digital_port *dig_port = enc_to_dig_port(encoder); - struct intel_dp *intel_dp = NULL; - int type = intel_encoder->type; - uint32_t val; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum port port = encoder->port; bool wait = false; - - /* old_crtc_state and old_conn_state are NULL when called from DP_MST */ - - if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { - intel_dp = enc_to_intel_dp(encoder); - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); - } + u32 val; val = I915_READ(DDI_BUF_CTL(port)); if (val & DDI_BUF_CTL_ENABLE) { @@ -2237,34 +2265,82 @@ static void intel_ddi_post_disable(struct intel_encoder *intel_encoder, if (wait) intel_wait_ddi_buf_idle(dev_priv, port); +} - if (intel_dp) { - intel_edp_panel_vdd_on(intel_dp); - intel_edp_panel_off(intel_dp); - } +static void intel_ddi_post_disable_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct intel_dp *intel_dp = &dig_port->dp; + bool is_mst = intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST); - if (dig_port) - intel_display_power_put(dev_priv, dig_port->ddi_io_power_domain); + /* + * Power down sink before disabling the port, otherwise we end + * up getting interrupts from the sink on detecting link loss. + */ + if (!is_mst) + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); - if (IS_CANNONLAKE(dev_priv)) - I915_WRITE(DPCLKA_CFGCR0, I915_READ(DPCLKA_CFGCR0) | - DPCLKA_CFGCR0_DDI_CLK_OFF(port)); - else if (IS_GEN9_BC(dev_priv)) - I915_WRITE(DPLL_CTRL2, (I915_READ(DPLL_CTRL2) | - DPLL_CTRL2_DDI_CLK_OFF(port))); - else if (INTEL_GEN(dev_priv) < 9) - I915_WRITE(PORT_CLK_SEL(port), PORT_CLK_SEL_NONE); + intel_disable_ddi_buf(encoder); - if (type == INTEL_OUTPUT_HDMI) { - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); + intel_edp_panel_vdd_on(intel_dp); + intel_edp_panel_off(intel_dp); - intel_dp_dual_mode_set_tmds_output(intel_hdmi, false); - } + intel_display_power_put(dev_priv, dig_port->ddi_io_power_domain); + + intel_ddi_clk_disable(encoder); +} + +static void intel_ddi_post_disable_hdmi(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct intel_hdmi *intel_hdmi = &dig_port->hdmi; + + intel_disable_ddi_buf(encoder); + + dig_port->set_infoframes(&encoder->base, false, + old_crtc_state, old_conn_state); + + intel_display_power_put(dev_priv, dig_port->ddi_io_power_domain); + + intel_ddi_clk_disable(encoder); + + intel_dp_dual_mode_set_tmds_output(intel_hdmi, false); +} + +static void intel_ddi_post_disable(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + /* + * When called from DP MST code: + * - old_conn_state will be NULL + * - encoder will be the main encoder (ie. mst->primary) + * - the main connector associated with this port + * won't be active or linked to a crtc + * - old_crtc_state will be the state of the last stream to + * be deactivated on this port, and it may not be the same + * stream that was activated last, but each stream + * should have a state that is identical when it comes to + * the DP link parameteres + */ + + if (intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_HDMI)) + intel_ddi_post_disable_hdmi(encoder, + old_crtc_state, old_conn_state); + else + intel_ddi_post_disable_dp(encoder, + old_crtc_state, old_conn_state); } void intel_ddi_fdi_post_disable(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); uint32_t val; @@ -2279,7 +2355,8 @@ void intel_ddi_fdi_post_disable(struct intel_encoder *encoder, val &= ~FDI_RX_ENABLE; I915_WRITE(FDI_RX_CTL(PIPE_A), val); - intel_ddi_post_disable(encoder, old_crtc_state, old_conn_state); + intel_disable_ddi_buf(encoder); + intel_ddi_clk_disable(encoder); val = I915_READ(FDI_RX_MISC(PIPE_A)); val &= ~(FDI_RX_PWRDN_LANE1_MASK | FDI_RX_PWRDN_LANE0_MASK); @@ -2295,75 +2372,100 @@ void intel_ddi_fdi_post_disable(struct intel_encoder *encoder, I915_WRITE(FDI_RX_CTL(PIPE_A), val); } -static void intel_enable_ddi(struct intel_encoder *intel_encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) +static void intel_enable_ddi_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { - struct drm_encoder *encoder = &intel_encoder->base; - struct drm_i915_private *dev_priv = to_i915(encoder->dev); - enum port port = intel_ddi_get_encoder_port(intel_encoder); - int type = intel_encoder->type; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + enum port port = encoder->port; - if (type == INTEL_OUTPUT_HDMI) { - struct intel_digital_port *intel_dig_port = - enc_to_dig_port(encoder); - bool clock_ratio = pipe_config->hdmi_high_tmds_clock_ratio; - bool scrambling = pipe_config->hdmi_scrambling; - - intel_hdmi_handle_sink_scrambling(intel_encoder, - conn_state->connector, - clock_ratio, scrambling); - - /* In HDMI/DVI mode, the port width, and swing/emphasis values - * are ignored so nothing special needs to be done besides - * enabling the port. - */ - I915_WRITE(DDI_BUF_CTL(port), - intel_dig_port->saved_port_bits | - DDI_BUF_CTL_ENABLE); - } else if (type == INTEL_OUTPUT_EDP) { - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - - if (port == PORT_A && INTEL_GEN(dev_priv) < 9) - intel_dp_stop_link_train(intel_dp); - - intel_edp_backlight_on(pipe_config, conn_state); - intel_psr_enable(intel_dp); - intel_edp_drrs_enable(intel_dp, pipe_config); - } + if (port == PORT_A && INTEL_GEN(dev_priv) < 9) + intel_dp_stop_link_train(intel_dp); + + intel_edp_backlight_on(crtc_state, conn_state); + intel_psr_enable(intel_dp, crtc_state); + intel_edp_drrs_enable(intel_dp, crtc_state); + + if (crtc_state->has_audio) + intel_audio_codec_enable(encoder, crtc_state, conn_state); +} + +static void intel_enable_ddi_hdmi(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + enum port port = encoder->port; + + intel_hdmi_handle_sink_scrambling(encoder, + conn_state->connector, + crtc_state->hdmi_high_tmds_clock_ratio, + crtc_state->hdmi_scrambling); + + /* In HDMI/DVI mode, the port width, and swing/emphasis values + * are ignored so nothing special needs to be done besides + * enabling the port. + */ + I915_WRITE(DDI_BUF_CTL(port), + dig_port->saved_port_bits | DDI_BUF_CTL_ENABLE); + + if (crtc_state->has_audio) + intel_audio_codec_enable(encoder, crtc_state, conn_state); +} - if (pipe_config->has_audio) - intel_audio_codec_enable(intel_encoder, pipe_config, conn_state); +static void intel_enable_ddi(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + intel_enable_ddi_hdmi(encoder, crtc_state, conn_state); + else + intel_enable_ddi_dp(encoder, crtc_state, conn_state); } -static void intel_disable_ddi(struct intel_encoder *intel_encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) +static void intel_disable_ddi_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { - struct drm_encoder *encoder = &intel_encoder->base; - int type = intel_encoder->type; + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); if (old_crtc_state->has_audio) - intel_audio_codec_disable(intel_encoder); + intel_audio_codec_disable(encoder, + old_crtc_state, old_conn_state); - if (type == INTEL_OUTPUT_HDMI) { - intel_hdmi_handle_sink_scrambling(intel_encoder, - old_conn_state->connector, - false, false); - } + intel_edp_drrs_disable(intel_dp, old_crtc_state); + intel_psr_disable(intel_dp, old_crtc_state); + intel_edp_backlight_off(old_conn_state); +} - if (type == INTEL_OUTPUT_EDP) { - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); +static void intel_disable_ddi_hdmi(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + if (old_crtc_state->has_audio) + intel_audio_codec_disable(encoder, + old_crtc_state, old_conn_state); - intel_edp_drrs_disable(intel_dp, old_crtc_state); - intel_psr_disable(intel_dp); - intel_edp_backlight_off(old_conn_state); - } + intel_hdmi_handle_sink_scrambling(encoder, + old_conn_state->connector, + false, false); +} + +static void intel_disable_ddi(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + if (intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_HDMI)) + intel_disable_ddi_hdmi(encoder, old_crtc_state, old_conn_state); + else + intel_disable_ddi_dp(encoder, old_crtc_state, old_conn_state); } static void bxt_ddi_pre_pll_enable(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { uint8_t mask = pipe_config->lane_lat_optim_mask; @@ -2375,7 +2477,7 @@ void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp) struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); - enum port port = intel_dig_port->port; + enum port port = intel_dig_port->base.port; uint32_t val; bool wait = false; @@ -2416,26 +2518,33 @@ void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp) udelay(600); } -bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv, - struct intel_crtc *intel_crtc) +static bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv, + enum transcoder cpu_transcoder) { - u32 temp; + if (cpu_transcoder == TRANSCODER_EDP) + return false; - if (intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_AUDIO)) { - temp = I915_READ(HSW_AUD_PIN_ELD_CP_VLD); - if (temp & AUDIO_OUTPUT_ENABLE(intel_crtc->pipe)) - return true; - } - return false; + if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_AUDIO)) + return false; + + return I915_READ(HSW_AUD_PIN_ELD_CP_VLD) & + AUDIO_OUTPUT_ENABLE(cpu_transcoder); +} + +void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv, + struct intel_crtc_state *crtc_state) +{ + if (IS_CANNONLAKE(dev_priv) && crtc_state->port_clock > 594000) + crtc_state->min_voltage_level = 2; } void intel_ddi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc); enum transcoder cpu_transcoder = pipe_config->cpu_transcoder; - struct intel_hdmi *intel_hdmi; + struct intel_digital_port *intel_dig_port; u32 temp, flags = 0; /* XXX: DSI transcoder paranoia */ @@ -2474,9 +2583,9 @@ void intel_ddi_get_config(struct intel_encoder *encoder, switch (temp & TRANS_DDI_MODE_SELECT_MASK) { case TRANS_DDI_MODE_SELECT_HDMI: pipe_config->has_hdmi_sink = true; - intel_hdmi = enc_to_intel_hdmi(&encoder->base); + intel_dig_port = enc_to_dig_port(&encoder->base); - if (intel_hdmi->infoframe_enabled(&encoder->base, pipe_config)) + if (intel_dig_port->infoframe_enabled(&encoder->base, pipe_config)) pipe_config->has_infoframe = true; if ((temp & TRANS_DDI_HDMI_SCRAMBLING_MASK) == @@ -2486,12 +2595,23 @@ void intel_ddi_get_config(struct intel_encoder *encoder, pipe_config->hdmi_high_tmds_clock_ratio = true; /* fall through */ case TRANS_DDI_MODE_SELECT_DVI: + pipe_config->output_types |= BIT(INTEL_OUTPUT_HDMI); pipe_config->lane_count = 4; break; case TRANS_DDI_MODE_SELECT_FDI: + pipe_config->output_types |= BIT(INTEL_OUTPUT_ANALOG); break; case TRANS_DDI_MODE_SELECT_DP_SST: + if (encoder->type == INTEL_OUTPUT_EDP) + pipe_config->output_types |= BIT(INTEL_OUTPUT_EDP); + else + pipe_config->output_types |= BIT(INTEL_OUTPUT_DP); + pipe_config->lane_count = + ((temp & DDI_PORT_WIDTH_MASK) >> DDI_PORT_WIDTH_SHIFT) + 1; + intel_dp_get_m_n(intel_crtc, pipe_config); + break; case TRANS_DDI_MODE_SELECT_DP_MST: + pipe_config->output_types |= BIT(INTEL_OUTPUT_DP_MST); pipe_config->lane_count = ((temp & DDI_PORT_WIDTH_MASK) >> DDI_PORT_WIDTH_SHIFT) + 1; intel_dp_get_m_n(intel_crtc, pipe_config); @@ -2501,7 +2621,7 @@ void intel_ddi_get_config(struct intel_encoder *encoder, } pipe_config->has_audio = - intel_ddi_is_audio_enabled(dev_priv, intel_crtc); + intel_ddi_is_audio_enabled(dev_priv, cpu_transcoder); if (encoder->type == INTEL_OUTPUT_EDP && dev_priv->vbt.edp.bpp && pipe_config->pipe_bpp > dev_priv->vbt.edp.bpp) { @@ -2528,6 +2648,26 @@ void intel_ddi_get_config(struct intel_encoder *encoder, if (IS_GEN9_LP(dev_priv)) pipe_config->lane_lat_optim_mask = bxt_ddi_phy_get_lane_lat_optim_mask(encoder); + + intel_ddi_compute_min_voltage_level(dev_priv, pipe_config); +} + +static enum intel_output_type +intel_ddi_compute_output_type(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + struct drm_connector_state *conn_state) +{ + switch (conn_state->connector->connector_type) { + case DRM_MODE_CONNECTOR_HDMIA: + return INTEL_OUTPUT_HDMI; + case DRM_MODE_CONNECTOR_eDP: + return INTEL_OUTPUT_EDP; + case DRM_MODE_CONNECTOR_DisplayPort: + return INTEL_OUTPUT_DP; + default: + MISSING_CASE(conn_state->connector->connector_type); + return INTEL_OUTPUT_UNUSED; + } } static bool intel_ddi_compute_config(struct intel_encoder *encoder, @@ -2535,24 +2675,22 @@ static bool intel_ddi_compute_config(struct intel_encoder *encoder, struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - int type = encoder->type; - int port = intel_ddi_get_encoder_port(encoder); + enum port port = encoder->port; int ret; - WARN(type == INTEL_OUTPUT_UNKNOWN, "compute_config() on unknown output!\n"); - if (port == PORT_A) pipe_config->cpu_transcoder = TRANSCODER_EDP; - if (type == INTEL_OUTPUT_HDMI) + if (intel_crtc_has_type(pipe_config, INTEL_OUTPUT_HDMI)) ret = intel_hdmi_compute_config(encoder, pipe_config, conn_state); else ret = intel_dp_compute_config(encoder, pipe_config, conn_state); if (IS_GEN9_LP(dev_priv) && ret) pipe_config->lane_lat_optim_mask = - bxt_ddi_phy_calc_lane_lat_optim_mask(encoder, - pipe_config->lane_count); + bxt_ddi_phy_calc_lane_lat_optim_mask(pipe_config->lane_count); + + intel_ddi_compute_min_voltage_level(dev_priv, pipe_config); return ret; @@ -2567,7 +2705,7 @@ static struct intel_connector * intel_ddi_init_dp_connector(struct intel_digital_port *intel_dig_port) { struct intel_connector *connector; - enum port port = intel_dig_port->port; + enum port port = intel_dig_port->base.port; connector = intel_connector_alloc(); if (!connector) @@ -2586,7 +2724,7 @@ static struct intel_connector * intel_ddi_init_hdmi_connector(struct intel_digital_port *intel_dig_port) { struct intel_connector *connector; - enum port port = intel_dig_port->port; + enum port port = intel_dig_port->base.port; connector = intel_connector_alloc(); if (!connector) @@ -2598,6 +2736,34 @@ intel_ddi_init_hdmi_connector(struct intel_digital_port *intel_dig_port) return connector; } +static bool intel_ddi_a_force_4_lanes(struct intel_digital_port *dport) +{ + struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev); + + if (dport->base.port != PORT_A) + return false; + + if (dport->saved_port_bits & DDI_A_4_LANES) + return false; + + /* Broxton/Geminilake: Bspec says that DDI_A_4_LANES is the only + * supported configuration + */ + if (IS_GEN9_LP(dev_priv)) + return true; + + /* Cannonlake: Most of SKUs don't support DDI_E, and the only + * one who does also have a full A/E split called + * DDI_F what makes DDI_E useless. However for this + * case let's trust VBT info. + */ + if (IS_CANNONLAKE(dev_priv) && + !intel_bios_is_port_present(dev_priv, PORT_E)) + return true; + + return false; +} + void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) { struct intel_digital_port *intel_dig_port; @@ -2664,6 +2830,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) drm_encoder_init(&dev_priv->drm, encoder, &intel_ddi_funcs, DRM_MODE_ENCODER_TMDS, "DDI %c", port_name(port)); + intel_encoder->compute_output_type = intel_ddi_compute_output_type; intel_encoder->compute_config = intel_ddi_compute_config; intel_encoder->enable = intel_enable_ddi; if (IS_GEN9_LP(dev_priv)) @@ -2676,7 +2843,6 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) intel_encoder->suspend = intel_dp_encoder_suspend; intel_encoder->get_power_domains = intel_ddi_get_power_domains; - intel_dig_port->port = port; intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & (DDI_BUF_PORT_REVERSAL | DDI_A_4_LANES); @@ -2707,28 +2873,27 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) } /* - * Bspec says that DDI_A_4_LANES is the only supported configuration - * for Broxton. Yet some BIOS fail to set this bit on port A if eDP - * wasn't lit up at boot. Force this bit on in our internal - * configuration so that we use the proper lane count for our - * calculations. + * Some BIOS might fail to set this bit on port A if eDP + * wasn't lit up at boot. Force this bit set when needed + * so we use the proper lane count for our calculations. */ - if (IS_GEN9_LP(dev_priv) && port == PORT_A) { - if (!(intel_dig_port->saved_port_bits & DDI_A_4_LANES)) { - DRM_DEBUG_KMS("BXT BIOS forgot to set DDI_A_4_LANES for port A; fixing\n"); - intel_dig_port->saved_port_bits |= DDI_A_4_LANES; - max_lanes = 4; - } + if (intel_ddi_a_force_4_lanes(intel_dig_port)) { + DRM_DEBUG_KMS("Forcing DDI_A_4_LANES for port A\n"); + intel_dig_port->saved_port_bits |= DDI_A_4_LANES; + max_lanes = 4; } + intel_dig_port->dp.output_reg = INVALID_MMIO_REG; intel_dig_port->max_lanes = max_lanes; - intel_encoder->type = INTEL_OUTPUT_UNKNOWN; + intel_encoder->type = INTEL_OUTPUT_DDI; intel_encoder->power_domain = intel_port_to_power_domain(port); intel_encoder->port = port; intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); intel_encoder->cloneable = 0; + intel_infoframe_init(intel_dig_port); + if (init_dp) { if (!intel_ddi_init_dp_connector(intel_dig_port)) goto err; diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 5f91ddc78c7a..d28592e43512 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -22,6 +22,9 @@ * */ +#include <drm/drm_print.h> + +#include "intel_device_info.h" #include "i915_drv.h" #define PLATFORM_NAME(x) [INTEL_##x] = #x @@ -67,21 +70,88 @@ const char *intel_platform_name(enum intel_platform platform) return platform_names[platform]; } -void intel_device_info_dump(struct drm_i915_private *dev_priv) +void intel_device_info_dump_flags(const struct intel_device_info *info, + struct drm_printer *p) { - const struct intel_device_info *info = &dev_priv->info; - - DRM_DEBUG_DRIVER("i915 device info: platform=%s gen=%i pciid=0x%04x rev=0x%02x", - intel_platform_name(info->platform), - info->gen, - dev_priv->drm.pdev->device, - dev_priv->drm.pdev->revision); -#define PRINT_FLAG(name) \ - DRM_DEBUG_DRIVER("i915 device info: " #name ": %s", yesno(info->name)) +#define PRINT_FLAG(name) drm_printf(p, "%s: %s\n", #name, yesno(info->name)); DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG); #undef PRINT_FLAG } +static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) +{ + drm_printf(p, "slice mask: %04x\n", sseu->slice_mask); + drm_printf(p, "slice total: %u\n", hweight8(sseu->slice_mask)); + drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu)); + drm_printf(p, "subslice mask %04x\n", sseu->subslice_mask); + drm_printf(p, "subslice per slice: %u\n", + hweight8(sseu->subslice_mask)); + drm_printf(p, "EU total: %u\n", sseu->eu_total); + drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice); + drm_printf(p, "has slice power gating: %s\n", + yesno(sseu->has_slice_pg)); + drm_printf(p, "has subslice power gating: %s\n", + yesno(sseu->has_subslice_pg)); + drm_printf(p, "has EU power gating: %s\n", yesno(sseu->has_eu_pg)); +} + +void intel_device_info_dump_runtime(const struct intel_device_info *info, + struct drm_printer *p) +{ + sseu_dump(&info->sseu, p); + + drm_printf(p, "CS timestamp frequency: %u kHz\n", + info->cs_timestamp_frequency_khz); +} + +void intel_device_info_dump(const struct intel_device_info *info, + struct drm_printer *p) +{ + struct drm_i915_private *dev_priv = + container_of(info, struct drm_i915_private, info); + + drm_printf(p, "pciid=0x%04x rev=0x%02x platform=%s gen=%i\n", + INTEL_DEVID(dev_priv), + INTEL_REVID(dev_priv), + intel_platform_name(info->platform), + info->gen); + + intel_device_info_dump_flags(info, p); +} + +static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) +{ + struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; + const u32 fuse2 = I915_READ(GEN8_FUSE2); + + sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >> + GEN10_F2_S_ENA_SHIFT; + sseu->subslice_mask = (1 << 4) - 1; + sseu->subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> + GEN10_F2_SS_DIS_SHIFT); + + sseu->eu_total = hweight32(~I915_READ(GEN8_EU_DISABLE0)); + sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE1)); + sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE2)); + sseu->eu_total += hweight8(~(I915_READ(GEN10_EU_DISABLE3) & + GEN10_EU_DIS_SS_MASK)); + + /* + * CNL is expected to always have a uniform distribution + * of EU across subslices with the exception that any one + * EU in any one subslice may be fused off for die + * recovery. + */ + sseu->eu_per_subslice = sseu_subslice_total(sseu) ? + DIV_ROUND_UP(sseu->eu_total, + sseu_subslice_total(sseu)) : 0; + + /* No restrictions on Power Gating */ + sseu->has_slice_pg = 1; + sseu->has_subslice_pg = 1; + sseu->has_eu_pg = 1; +} + static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; @@ -202,16 +272,6 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) #define IS_SS_DISABLED(ss) (!(sseu->subslice_mask & BIT(ss))) info->has_pooled_eu = hweight8(sseu->subslice_mask) == 3; - /* - * There is a HW issue in 2x6 fused down parts that requires - * Pooled EU to be enabled as a WA. The pool configuration - * changes depending upon which subslice is fused down. This - * doesn't affect if the device has all 3 subslices enabled. - */ - /* WaEnablePooledEuFor2x6:bxt */ - info->has_pooled_eu |= (hweight8(sseu->subslice_mask) == 2 && - IS_BXT_REVID(dev_priv, 0, BXT_REVID_B_LAST)); - sseu->min_eu_in_pool = 0; if (info->has_pooled_eu) { if (IS_SS_DISABLED(2) || IS_SS_DISABLED(0)) @@ -296,7 +356,111 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) sseu->has_eu_pg = 0; } -/* +static u32 read_reference_ts_freq(struct drm_i915_private *dev_priv) +{ + u32 ts_override = I915_READ(GEN9_TIMESTAMP_OVERRIDE); + u32 base_freq, frac_freq; + + base_freq = ((ts_override & GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >> + GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT) + 1; + base_freq *= 1000; + + frac_freq = ((ts_override & + GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >> + GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT); + frac_freq = 1000 / (frac_freq + 1); + + return base_freq + frac_freq; +} + +static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv) +{ + u32 f12_5_mhz = 12500; + u32 f19_2_mhz = 19200; + u32 f24_mhz = 24000; + + if (INTEL_GEN(dev_priv) <= 4) { + /* PRMs say: + * + * "The value in this register increments once every 16 + * hclks." (through the “Clocking Configuration” + * (“CLKCFG”) MCHBAR register) + */ + return dev_priv->rawclk_freq / 16; + } else if (INTEL_GEN(dev_priv) <= 8) { + /* PRMs say: + * + * "The PCU TSC counts 10ns increments; this timestamp + * reflects bits 38:3 of the TSC (i.e. 80ns granularity, + * rolling over every 1.5 hours). + */ + return f12_5_mhz; + } else if (INTEL_GEN(dev_priv) <= 9) { + u32 ctc_reg = I915_READ(CTC_MODE); + u32 freq = 0; + + if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) { + freq = read_reference_ts_freq(dev_priv); + } else { + freq = IS_GEN9_LP(dev_priv) ? f19_2_mhz : f24_mhz; + + /* Now figure out how the command stream's timestamp + * register increments from this frequency (it might + * increment only every few clock cycle). + */ + freq >>= 3 - ((ctc_reg & CTC_SHIFT_PARAMETER_MASK) >> + CTC_SHIFT_PARAMETER_SHIFT); + } + + return freq; + } else if (INTEL_GEN(dev_priv) <= 10) { + u32 ctc_reg = I915_READ(CTC_MODE); + u32 freq = 0; + u32 rpm_config_reg = 0; + + /* First figure out the reference frequency. There are 2 ways + * we can compute the frequency, either through the + * TIMESTAMP_OVERRIDE register or through RPM_CONFIG. CTC_MODE + * tells us which one we should use. + */ + if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) { + freq = read_reference_ts_freq(dev_priv); + } else { + u32 crystal_clock; + + rpm_config_reg = I915_READ(RPM_CONFIG0); + crystal_clock = (rpm_config_reg & + GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >> + GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; + switch (crystal_clock) { + case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ: + freq = f19_2_mhz; + break; + case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ: + freq = f24_mhz; + break; + } + + /* Now figure out how the command stream's timestamp + * register increments from this frequency (it might + * increment only every few clock cycle). + */ + freq >>= 3 - ((rpm_config_reg & + GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> + GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT); + } + + return freq; + } + + MISSING_CASE("Unknown gen, unable to read command streamer timestamp frequency\n"); + return 0; +} + +/** + * intel_device_info_runtime_init - initialize runtime info + * @info: intel device info struct + * * Determine various intel_device_info fields at runtime. * * Use it when either: @@ -309,12 +473,16 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) * - after the PCH has been detected, * - before the first usage of the fields it can tweak. */ -void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) +void intel_device_info_runtime_init(struct intel_device_info *info) { - struct intel_device_info *info = mkwrite_device_info(dev_priv); + struct drm_i915_private *dev_priv = + container_of(info, struct drm_i915_private, info); enum pipe pipe; - if (INTEL_GEN(dev_priv) >= 9) { + if (INTEL_GEN(dev_priv) >= 10) { + for_each_pipe(dev_priv, pipe) + info->num_scalers[pipe] = 2; + } else if (INTEL_GEN(dev_priv) == 9) { info->num_scalers[PIPE_A] = 2; info->num_scalers[PIPE_B] = 2; info->num_scalers[PIPE_C] = 1; @@ -343,7 +511,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) info->num_sprites[pipe] = 1; } - if (i915.disable_display) { + if (i915_modparams.disable_display) { DRM_INFO("Display disabled (module parameter)\n"); info->num_pipes = 0; } else if (info->num_pipes > 0 && @@ -409,24 +577,11 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) cherryview_sseu_info_init(dev_priv); else if (IS_BROADWELL(dev_priv)) broadwell_sseu_info_init(dev_priv); - else if (INTEL_INFO(dev_priv)->gen >= 9) + else if (INTEL_GEN(dev_priv) == 9) gen9_sseu_info_init(dev_priv); + else if (INTEL_GEN(dev_priv) >= 10) + gen10_sseu_info_init(dev_priv); - info->has_snoop = !info->has_llc; - - DRM_DEBUG_DRIVER("slice mask: %04x\n", info->sseu.slice_mask); - DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info->sseu.slice_mask)); - DRM_DEBUG_DRIVER("subslice total: %u\n", - sseu_subslice_total(&info->sseu)); - DRM_DEBUG_DRIVER("subslice mask %04x\n", info->sseu.subslice_mask); - DRM_DEBUG_DRIVER("subslice per slice: %u\n", - hweight8(info->sseu.subslice_mask)); - DRM_DEBUG_DRIVER("EU total: %u\n", info->sseu.eu_total); - DRM_DEBUG_DRIVER("EU per subslice: %u\n", info->sseu.eu_per_subslice); - DRM_DEBUG_DRIVER("has slice power gating: %s\n", - info->sseu.has_slice_pg ? "y" : "n"); - DRM_DEBUG_DRIVER("has subslice power gating: %s\n", - info->sseu.has_subslice_pg ? "y" : "n"); - DRM_DEBUG_DRIVER("has EU power gating: %s\n", - info->sseu.has_eu_pg ? "y" : "n"); + /* Initialize command stream timestamp frequency */ + info->cs_timestamp_frequency_khz = read_timestamp_frequency(dev_priv); } diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h new file mode 100644 index 000000000000..49cb27bd04c1 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -0,0 +1,183 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_DEVICE_INFO_H_ +#define _INTEL_DEVICE_INFO_H_ + +#include "intel_display.h" + +struct drm_printer; +struct drm_i915_private; + +/* Keep in gen based order, and chronological order within a gen */ +enum intel_platform { + INTEL_PLATFORM_UNINITIALIZED = 0, + /* gen2 */ + INTEL_I830, + INTEL_I845G, + INTEL_I85X, + INTEL_I865G, + /* gen3 */ + INTEL_I915G, + INTEL_I915GM, + INTEL_I945G, + INTEL_I945GM, + INTEL_G33, + INTEL_PINEVIEW, + /* gen4 */ + INTEL_I965G, + INTEL_I965GM, + INTEL_G45, + INTEL_GM45, + /* gen5 */ + INTEL_IRONLAKE, + /* gen6 */ + INTEL_SANDYBRIDGE, + /* gen7 */ + INTEL_IVYBRIDGE, + INTEL_VALLEYVIEW, + INTEL_HASWELL, + /* gen8 */ + INTEL_BROADWELL, + INTEL_CHERRYVIEW, + /* gen9 */ + INTEL_SKYLAKE, + INTEL_BROXTON, + INTEL_KABYLAKE, + INTEL_GEMINILAKE, + INTEL_COFFEELAKE, + /* gen10 */ + INTEL_CANNONLAKE, + INTEL_MAX_PLATFORMS +}; + +#define DEV_INFO_FOR_EACH_FLAG(func) \ + func(is_mobile); \ + func(is_lp); \ + func(is_alpha_support); \ + /* Keep has_* in alphabetical order */ \ + func(has_64bit_reloc); \ + func(has_aliasing_ppgtt); \ + func(has_csr); \ + func(has_ddi); \ + func(has_dp_mst); \ + func(has_reset_engine); \ + func(has_fbc); \ + func(has_fpga_dbg); \ + func(has_full_ppgtt); \ + func(has_full_48bit_ppgtt); \ + func(has_gmch_display); \ + func(has_guc); \ + func(has_guc_ct); \ + func(has_hotplug); \ + func(has_l3_dpf); \ + func(has_llc); \ + func(has_logical_ring_contexts); \ + func(has_logical_ring_preemption); \ + func(has_overlay); \ + func(has_pooled_eu); \ + func(has_psr); \ + func(has_rc6); \ + func(has_rc6p); \ + func(has_resource_streamer); \ + func(has_runtime_pm); \ + func(has_snoop); \ + func(unfenced_needs_alignment); \ + func(cursor_needs_physical); \ + func(hws_needs_physical); \ + func(overlay_needs_physical); \ + func(supports_tv); \ + func(has_ipc); + +struct sseu_dev_info { + u8 slice_mask; + u8 subslice_mask; + u8 eu_total; + u8 eu_per_subslice; + u8 min_eu_in_pool; + /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */ + u8 subslice_7eu[3]; + u8 has_slice_pg:1; + u8 has_subslice_pg:1; + u8 has_eu_pg:1; +}; + +struct intel_device_info { + u16 device_id; + u16 gen_mask; + + u8 gen; + u8 gt; /* GT number, 0 if undefined */ + u8 num_rings; + u8 ring_mask; /* Rings supported by the HW */ + + enum intel_platform platform; + u32 platform_mask; + + u32 display_mmio_offset; + + u8 num_pipes; + u8 num_sprites[I915_MAX_PIPES]; + u8 num_scalers[I915_MAX_PIPES]; + + unsigned int page_sizes; /* page sizes supported by the HW */ + +#define DEFINE_FLAG(name) u8 name:1 + DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG); +#undef DEFINE_FLAG + u16 ddb_size; /* in blocks */ + + /* Register offsets for the various display pipes and transcoders */ + int pipe_offsets[I915_MAX_TRANSCODERS]; + int trans_offsets[I915_MAX_TRANSCODERS]; + int palette_offsets[I915_MAX_PIPES]; + int cursor_offsets[I915_MAX_PIPES]; + + /* Slice/subslice/EU info */ + struct sseu_dev_info sseu; + + u32 cs_timestamp_frequency_khz; + + struct color_luts { + u16 degamma_lut_size; + u16 gamma_lut_size; + } color; +}; + +static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu) +{ + return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask); +} + +const char *intel_platform_name(enum intel_platform platform); + +void intel_device_info_runtime_init(struct intel_device_info *info); +void intel_device_info_dump(const struct intel_device_info *info, + struct drm_printer *p); +void intel_device_info_dump_flags(const struct intel_device_info *info, + struct drm_printer *p); +void intel_device_info_dump_runtime(const struct intel_device_info *info, + struct drm_printer *p); + +#endif diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5ebdb63330dd..f288bcc7be22 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -219,10 +219,8 @@ intel_fdi_link_freq(struct drm_i915_private *dev_priv, { if (HAS_DDI(dev_priv)) return pipe_config->port_clock; /* SPLL */ - else if (IS_GEN5(dev_priv)) - return ((I915_READ(FDI_PLL_BIOS_0) & FDI_PLL_FB_CLOCK_MASK) + 2) * 10000; else - return 270000; + return dev_priv->fdi_pll_freq; } static const struct intel_limit intel_limits_i8xx_dac = { @@ -491,7 +489,7 @@ static const struct intel_limit intel_limits_bxt = { }; static bool -needs_modeset(struct drm_crtc_state *state) +needs_modeset(const struct drm_crtc_state *state) { return drm_atomic_crtc_needs_modeset(state); } @@ -1000,7 +998,8 @@ enum transcoder intel_pipe_to_cpu_transcoder(struct drm_i915_private *dev_priv, return crtc->config->cpu_transcoder; } -static bool pipe_dsl_stopped(struct drm_i915_private *dev_priv, enum pipe pipe) +static bool pipe_scanline_is_moving(struct drm_i915_private *dev_priv, + enum pipe pipe) { i915_reg_t reg = PIPEDSL(pipe); u32 line1, line2; @@ -1015,32 +1014,38 @@ static bool pipe_dsl_stopped(struct drm_i915_private *dev_priv, enum pipe pipe) msleep(5); line2 = I915_READ(reg) & line_mask; - return line1 == line2; + return line1 != line2; } -/* - * intel_wait_for_pipe_off - wait for pipe to turn off - * @crtc: crtc whose pipe to wait for - * - * After disabling a pipe, we can't wait for vblank in the usual way, - * spinning on the vblank interrupt status bit, since we won't actually - * see an interrupt when the pipe is disabled. - * - * On Gen4 and above: - * wait for the pipe register state bit to turn off - * - * Otherwise: - * wait for the display line value to settle (it usually - * ends up stopping at the start of the next frame). - * - */ -static void intel_wait_for_pipe_off(struct intel_crtc *crtc) +static void wait_for_pipe_scanline_moving(struct intel_crtc *crtc, bool state) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; enum pipe pipe = crtc->pipe; + /* Wait for the display line to settle/start moving */ + if (wait_for(pipe_scanline_is_moving(dev_priv, pipe) == state, 100)) + DRM_ERROR("pipe %c scanline %s wait timed out\n", + pipe_name(pipe), onoff(state)); +} + +static void intel_wait_for_pipe_scanline_stopped(struct intel_crtc *crtc) +{ + wait_for_pipe_scanline_moving(crtc, false); +} + +static void intel_wait_for_pipe_scanline_moving(struct intel_crtc *crtc) +{ + wait_for_pipe_scanline_moving(crtc, true); +} + +static void +intel_wait_for_pipe_off(const struct intel_crtc_state *old_crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + if (INTEL_GEN(dev_priv) >= 4) { + enum transcoder cpu_transcoder = old_crtc_state->cpu_transcoder; i915_reg_t reg = PIPECONF(cpu_transcoder); /* Wait for the Pipe State to go off */ @@ -1049,9 +1054,7 @@ static void intel_wait_for_pipe_off(struct intel_crtc *crtc) 100)) WARN(1, "pipe_off wait timed out\n"); } else { - /* Wait for the display line to settle */ - if (wait_for(pipe_dsl_stopped(dev_priv, pipe), 100)) - WARN(1, "pipe_off wait timed out\n"); + intel_wait_for_pipe_scanline_stopped(crtc); } } @@ -1192,23 +1195,6 @@ void assert_panel_unlocked(struct drm_i915_private *dev_priv, enum pipe pipe) pipe_name(pipe)); } -static void assert_cursor(struct drm_i915_private *dev_priv, - enum pipe pipe, bool state) -{ - bool cur_state; - - if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) - cur_state = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE; - else - cur_state = I915_READ(CURCNTR(pipe)) & CURSOR_MODE; - - I915_STATE_WARN(cur_state != state, - "cursor on pipe %c assertion failure (expected %s, current %s)\n", - pipe_name(pipe), onoff(state), onoff(cur_state)); -} -#define assert_cursor_enabled(d, p) assert_cursor(d, p, true) -#define assert_cursor_disabled(d, p) assert_cursor(d, p, false) - void assert_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, bool state) { @@ -1236,77 +1222,25 @@ void assert_pipe(struct drm_i915_private *dev_priv, pipe_name(pipe), onoff(state), onoff(cur_state)); } -static void assert_plane(struct drm_i915_private *dev_priv, - enum plane plane, bool state) +static void assert_plane(struct intel_plane *plane, bool state) { - u32 val; - bool cur_state; + bool cur_state = plane->get_hw_state(plane); - val = I915_READ(DSPCNTR(plane)); - cur_state = !!(val & DISPLAY_PLANE_ENABLE); I915_STATE_WARN(cur_state != state, - "plane %c assertion failure (expected %s, current %s)\n", - plane_name(plane), onoff(state), onoff(cur_state)); + "%s assertion failure (expected %s, current %s)\n", + plane->base.name, onoff(state), onoff(cur_state)); } -#define assert_plane_enabled(d, p) assert_plane(d, p, true) -#define assert_plane_disabled(d, p) assert_plane(d, p, false) +#define assert_plane_enabled(p) assert_plane(p, true) +#define assert_plane_disabled(p) assert_plane(p, false) -static void assert_planes_disabled(struct drm_i915_private *dev_priv, - enum pipe pipe) -{ - int i; - - /* Primary planes are fixed to pipes on gen4+ */ - if (INTEL_GEN(dev_priv) >= 4) { - u32 val = I915_READ(DSPCNTR(pipe)); - I915_STATE_WARN(val & DISPLAY_PLANE_ENABLE, - "plane %c assertion failure, should be disabled but not\n", - plane_name(pipe)); - return; - } - - /* Need to check both planes against the pipe */ - for_each_pipe(dev_priv, i) { - u32 val = I915_READ(DSPCNTR(i)); - enum pipe cur_pipe = (val & DISPPLANE_SEL_PIPE_MASK) >> - DISPPLANE_SEL_PIPE_SHIFT; - I915_STATE_WARN((val & DISPLAY_PLANE_ENABLE) && pipe == cur_pipe, - "plane %c assertion failure, should be off on pipe %c but is still active\n", - plane_name(i), pipe_name(pipe)); - } -} - -static void assert_sprites_disabled(struct drm_i915_private *dev_priv, - enum pipe pipe) +static void assert_planes_disabled(struct intel_crtc *crtc) { - int sprite; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_plane *plane; - if (INTEL_GEN(dev_priv) >= 9) { - for_each_sprite(dev_priv, pipe, sprite) { - u32 val = I915_READ(PLANE_CTL(pipe, sprite)); - I915_STATE_WARN(val & PLANE_CTL_ENABLE, - "plane %d assertion failure, should be off on pipe %c but is still active\n", - sprite, pipe_name(pipe)); - } - } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - for_each_sprite(dev_priv, pipe, sprite) { - u32 val = I915_READ(SPCNTR(pipe, PLANE_SPRITE0 + sprite)); - I915_STATE_WARN(val & SP_ENABLE, - "sprite %c assertion failure, should be off on pipe %c but is still active\n", - sprite_name(pipe, sprite), pipe_name(pipe)); - } - } else if (INTEL_GEN(dev_priv) >= 7) { - u32 val = I915_READ(SPRCTL(pipe)); - I915_STATE_WARN(val & SPRITE_ENABLE, - "sprite %c assertion failure, should be off on pipe %c but is still active\n", - plane_name(pipe), pipe_name(pipe)); - } else if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) { - u32 val = I915_READ(DVSCNTR(pipe)); - I915_STATE_WARN(val & DVS_ENABLE, - "sprite %c assertion failure, should be off on pipe %c but is still active\n", - plane_name(pipe), pipe_name(pipe)); - } + for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) + assert_plane_disabled(plane); } static void assert_vblank_disabled(struct drm_crtc *crtc) @@ -1539,7 +1473,7 @@ static void chv_enable_pll(struct intel_crtc *crtc, * DPLLCMD is AWOL. Use chicken bits to propagate * the value from DPLLBMD to either pipe B or C. */ - I915_WRITE(CBR4_VLV, pipe == PIPE_B ? CBR_DPLLBMD_PIPE_B : CBR_DPLLBMD_PIPE_C); + I915_WRITE(CBR4_VLV, CBR_DPLLBMD_PIPE(pipe)); I915_WRITE(DPLL_MD(PIPE_B), pipe_config->dpll_hw_state.dpll_md); I915_WRITE(CBR4_VLV, 0); dev_priv->chv_dpll_md[pipe] = pipe_config->dpll_hw_state.dpll_md; @@ -1568,11 +1502,12 @@ static int intel_num_dvo_pipes(struct drm_i915_private *dev_priv) return count; } -static void i9xx_enable_pll(struct intel_crtc *crtc) +static void i9xx_enable_pll(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); i915_reg_t reg = DPLL(crtc->pipe); - u32 dpll = crtc->config->dpll_hw_state.dpll; + u32 dpll = crtc_state->dpll_hw_state.dpll; int i; assert_pipe_disabled(dev_priv, crtc->pipe); @@ -1609,7 +1544,7 @@ static void i9xx_enable_pll(struct intel_crtc *crtc) if (INTEL_GEN(dev_priv) >= 4) { I915_WRITE(DPLL_MD(crtc->pipe), - crtc->config->dpll_hw_state.dpll_md); + crtc_state->dpll_hw_state.dpll_md); } else { /* The pixel multiplier can only be updated once the * DPLL is enabled and the clocks are stable. @@ -1627,15 +1562,6 @@ static void i9xx_enable_pll(struct intel_crtc *crtc) } } -/** - * i9xx_disable_pll - disable a PLL - * @dev_priv: i915 private structure - * @pipe: pipe PLL to disable - * - * Disable the PLL for @pipe, making sure the pipe is off first. - * - * Note! This is for pre-ILK only. - */ static void i9xx_disable_pll(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -1711,7 +1637,7 @@ void vlv_wait_port_ready(struct drm_i915_private *dev_priv, u32 port_mask; i915_reg_t dpll_reg; - switch (dport->port) { + switch (dport->base.port) { case PORT_B: port_mask = DPLL_PORTB_READY_MASK; dpll_reg = DPLL(0); @@ -1733,7 +1659,8 @@ void vlv_wait_port_ready(struct drm_i915_private *dev_priv, dpll_reg, port_mask, expected_mask, 1000)) WARN(1, "timed out waiting for port %c ready: got 0x%x, expected 0x%x\n", - port_name(dport->port), I915_READ(dpll_reg) & port_mask, expected_mask); + port_name(dport->base.port), + I915_READ(dpll_reg) & port_mask, expected_mask); } static void ironlake_enable_pch_transcoder(struct drm_i915_private *dev_priv, @@ -1881,35 +1808,24 @@ enum pipe intel_crtc_pch_transcoder(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - WARN_ON(!crtc->config->has_pch_encoder); - if (HAS_PCH_LPT(dev_priv)) return PIPE_A; else return crtc->pipe; } -/** - * intel_enable_pipe - enable a pipe, asserting requirements - * @crtc: crtc responsible for the pipe - * - * Enable @crtc's pipe, making sure that various hardware specific requirements - * are met, if applicable, e.g. PLL enabled, LVDS pairs enabled, etc. - */ -static void intel_enable_pipe(struct intel_crtc *crtc) +static void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state) { - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum transcoder cpu_transcoder = new_crtc_state->cpu_transcoder; enum pipe pipe = crtc->pipe; - enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; i915_reg_t reg; u32 val; DRM_DEBUG_KMS("enabling pipe %c\n", pipe_name(pipe)); - assert_planes_disabled(dev_priv, pipe); - assert_cursor_disabled(dev_priv, pipe); - assert_sprites_disabled(dev_priv, pipe); + assert_planes_disabled(crtc); /* * A pipe without a PLL won't actually be able to drive bits from @@ -1917,12 +1833,12 @@ static void intel_enable_pipe(struct intel_crtc *crtc) * need the check. */ if (HAS_GMCH_DISPLAY(dev_priv)) { - if (intel_crtc_has_type(crtc->config, INTEL_OUTPUT_DSI)) + if (intel_crtc_has_type(new_crtc_state, INTEL_OUTPUT_DSI)) assert_dsi_pll_enabled(dev_priv); else assert_pll_enabled(dev_priv, pipe); } else { - if (crtc->config->has_pch_encoder) { + if (new_crtc_state->has_pch_encoder) { /* if driving the PCH, we need FDI enabled */ assert_fdi_rx_pll_enabled(dev_priv, intel_crtc_pch_transcoder(crtc)); @@ -1944,31 +1860,21 @@ static void intel_enable_pipe(struct intel_crtc *crtc) POSTING_READ(reg); /* - * Until the pipe starts DSL will read as 0, which would cause - * an apparent vblank timestamp jump, which messes up also the - * frame count when it's derived from the timestamps. So let's - * wait for the pipe to start properly before we call - * drm_crtc_vblank_on() + * Until the pipe starts PIPEDSL reads will return a stale value, + * which causes an apparent vblank timestamp jump when PIPEDSL + * resets to its proper value. That also messes up the frame count + * when it's derived from the timestamps. So let's wait for the + * pipe to start properly before we call drm_crtc_vblank_on() */ - if (dev->max_vblank_count == 0 && - wait_for(intel_get_crtc_scanline(crtc) != crtc->scanline_offset, 50)) - DRM_ERROR("pipe %c didn't start\n", pipe_name(pipe)); + if (dev_priv->drm.max_vblank_count == 0) + intel_wait_for_pipe_scanline_moving(crtc); } -/** - * intel_disable_pipe - disable a pipe, asserting requirements - * @crtc: crtc whose pipes is to be disabled - * - * Disable the pipe of @crtc, making sure that various hardware - * specific requirements are met, if applicable, e.g. plane - * disabled, panel fitter off, etc. - * - * Will wait until the pipe has shut down before returning. - */ -static void intel_disable_pipe(struct intel_crtc *crtc) +static void intel_disable_pipe(const struct intel_crtc_state *old_crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; + enum transcoder cpu_transcoder = old_crtc_state->cpu_transcoder; enum pipe pipe = crtc->pipe; i915_reg_t reg; u32 val; @@ -1979,9 +1885,7 @@ static void intel_disable_pipe(struct intel_crtc *crtc) * Make sure planes won't keep trying to pump pixels to us, * or we might hang the display. */ - assert_planes_disabled(dev_priv, pipe); - assert_cursor_disabled(dev_priv, pipe); - assert_sprites_disabled(dev_priv, pipe); + assert_planes_disabled(crtc); reg = PIPECONF(cpu_transcoder); val = I915_READ(reg); @@ -1992,7 +1896,7 @@ static void intel_disable_pipe(struct intel_crtc *crtc) * Double wide has implications for planes * so best keep it disabled when not needed. */ - if (crtc->config->double_wide) + if (old_crtc_state->double_wide) val &= ~PIPECONF_DOUBLE_WIDE; /* Don't disable pipe or pipe PLLs if needed */ @@ -2001,7 +1905,7 @@ static void intel_disable_pipe(struct intel_crtc *crtc) I915_WRITE(reg, val); if ((val & PIPECONF_ENABLE) == 0) - intel_wait_for_pipe_off(crtc); + intel_wait_for_pipe_off(old_crtc_state); } static unsigned int intel_tile_size(const struct drm_i915_private *dev_priv) @@ -2219,8 +2123,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation) * something and try to run the system in a "less than optimal" * mode that matches the user configuration. */ - if (i915_vma_get_fence(vma) == 0) - i915_vma_pin_fence(vma); + i915_vma_pin_fence(vma); } i915_vma_get(vma); @@ -2736,7 +2639,6 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; struct drm_i915_gem_object *obj = NULL; struct drm_mode_fb_cmd2 mode_cmd = { 0 }; struct drm_framebuffer *fb = &plane_config->fb->base; @@ -2752,7 +2654,7 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, /* If the FB is too big, just don't use it since fbdev is not very * important and we should probably use that space with FBC or other * features. */ - if (size_aligned * 2 > ggtt->stolen_usable_size) + if (size_aligned * 2 > dev_priv->stolen_usable_size) return false; mutex_lock(&dev->struct_mutex); @@ -2811,6 +2713,23 @@ intel_set_plane_visible(struct intel_crtc_state *crtc_state, crtc_state->active_planes); } +static void intel_plane_disable_noatomic(struct intel_crtc *crtc, + struct intel_plane *plane) +{ + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + + intel_set_plane_visible(crtc_state, plane_state, false); + + if (plane->id == PLANE_PRIMARY) + intel_pre_disable_primary_noatomic(&crtc->base); + + trace_intel_disable_plane(&plane->base, crtc); + plane->disable_plane(plane, crtc); +} + static void intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, struct intel_initial_plane_config *plane_config) @@ -2856,7 +2775,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, if (intel_plane_ggtt_offset(state) == plane_config->base) { fb = c->primary->fb; - drm_framebuffer_reference(fb); + drm_framebuffer_get(fb); goto valid_fb; } } @@ -2868,12 +2787,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, * simplest solution is to just disable the primary plane now and * pretend the BIOS never had it enabled. */ - intel_set_plane_visible(to_intel_crtc_state(crtc_state), - to_intel_plane_state(plane_state), - false); - intel_pre_disable_primary_noatomic(&intel_crtc->base); - trace_intel_disable_plane(primary, intel_crtc); - intel_plane->disable_plane(intel_plane, intel_crtc); + intel_plane_disable_noatomic(intel_crtc, intel_plane); return; @@ -2887,7 +2801,7 @@ valid_fb: intel_crtc->pipe, PTR_ERR(intel_state->vma)); intel_state->vma = NULL; - drm_framebuffer_unreference(fb); + drm_framebuffer_put(fb); return; } @@ -2908,7 +2822,7 @@ valid_fb: if (i915_gem_object_is_tiled(obj)) dev_priv->preserve_bios_swizzle = true; - drm_framebuffer_reference(fb); + drm_framebuffer_get(fb); primary->fb = primary->state->fb = fb; primary->crtc = primary->state->crtc = &intel_crtc->base; @@ -3159,6 +3073,12 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state) unsigned int rotation = plane_state->base.rotation; int ret; + if (rotation & DRM_MODE_REFLECT_X && + fb->modifier == DRM_FORMAT_MOD_LINEAR) { + DRM_DEBUG_KMS("horizontal flip is not supported with linear surface formats\n"); + return -EINVAL; + } + if (!plane_state->base.visible) return 0; @@ -3293,30 +3213,27 @@ int i9xx_check_plane_surface(struct intel_plane_state *plane_state) return 0; } -static void i9xx_update_primary_plane(struct intel_plane *primary, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static void i9xx_update_plane(struct intel_plane *plane, + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) { - struct drm_i915_private *dev_priv = to_i915(primary->base.dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); const struct drm_framebuffer *fb = plane_state->base.fb; - enum plane plane = primary->plane; + enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; u32 linear_offset; u32 dspcntr = plane_state->ctl; - i915_reg_t reg = DSPCNTR(plane); + i915_reg_t reg = DSPCNTR(i9xx_plane); int x = plane_state->main.x; int y = plane_state->main.y; unsigned long irqflags; + u32 dspaddr_offset; linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); if (INTEL_GEN(dev_priv) >= 4) - crtc->dspaddr_offset = plane_state->main.offset; + dspaddr_offset = plane_state->main.offset; else - crtc->dspaddr_offset = linear_offset; - - crtc->adjusted_x = x; - crtc->adjusted_y = y; + dspaddr_offset = linear_offset; spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); @@ -3324,61 +3241,85 @@ static void i9xx_update_primary_plane(struct intel_plane *primary, /* pipesrc and dspsize control the size that is scaled from, * which should always be the user's requested size. */ - I915_WRITE_FW(DSPSIZE(plane), + I915_WRITE_FW(DSPSIZE(i9xx_plane), ((crtc_state->pipe_src_h - 1) << 16) | (crtc_state->pipe_src_w - 1)); - I915_WRITE_FW(DSPPOS(plane), 0); - } else if (IS_CHERRYVIEW(dev_priv) && plane == PLANE_B) { - I915_WRITE_FW(PRIMSIZE(plane), + I915_WRITE_FW(DSPPOS(i9xx_plane), 0); + } else if (IS_CHERRYVIEW(dev_priv) && i9xx_plane == PLANE_B) { + I915_WRITE_FW(PRIMSIZE(i9xx_plane), ((crtc_state->pipe_src_h - 1) << 16) | (crtc_state->pipe_src_w - 1)); - I915_WRITE_FW(PRIMPOS(plane), 0); - I915_WRITE_FW(PRIMCNSTALPHA(plane), 0); + I915_WRITE_FW(PRIMPOS(i9xx_plane), 0); + I915_WRITE_FW(PRIMCNSTALPHA(i9xx_plane), 0); } I915_WRITE_FW(reg, dspcntr); - I915_WRITE_FW(DSPSTRIDE(plane), fb->pitches[0]); + I915_WRITE_FW(DSPSTRIDE(i9xx_plane), fb->pitches[0]); if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { - I915_WRITE_FW(DSPSURF(plane), + I915_WRITE_FW(DSPSURF(i9xx_plane), intel_plane_ggtt_offset(plane_state) + - crtc->dspaddr_offset); - I915_WRITE_FW(DSPOFFSET(plane), (y << 16) | x); + dspaddr_offset); + I915_WRITE_FW(DSPOFFSET(i9xx_plane), (y << 16) | x); } else if (INTEL_GEN(dev_priv) >= 4) { - I915_WRITE_FW(DSPSURF(plane), + I915_WRITE_FW(DSPSURF(i9xx_plane), intel_plane_ggtt_offset(plane_state) + - crtc->dspaddr_offset); - I915_WRITE_FW(DSPTILEOFF(plane), (y << 16) | x); - I915_WRITE_FW(DSPLINOFF(plane), linear_offset); + dspaddr_offset); + I915_WRITE_FW(DSPTILEOFF(i9xx_plane), (y << 16) | x); + I915_WRITE_FW(DSPLINOFF(i9xx_plane), linear_offset); } else { - I915_WRITE_FW(DSPADDR(plane), + I915_WRITE_FW(DSPADDR(i9xx_plane), intel_plane_ggtt_offset(plane_state) + - crtc->dspaddr_offset); + dspaddr_offset); } POSTING_READ_FW(reg); spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } -static void i9xx_disable_primary_plane(struct intel_plane *primary, - struct intel_crtc *crtc) +static void i9xx_disable_plane(struct intel_plane *plane, + struct intel_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(primary->base.dev); - enum plane plane = primary->plane; + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; unsigned long irqflags; spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - I915_WRITE_FW(DSPCNTR(plane), 0); - if (INTEL_INFO(dev_priv)->gen >= 4) - I915_WRITE_FW(DSPSURF(plane), 0); + I915_WRITE_FW(DSPCNTR(i9xx_plane), 0); + if (INTEL_GEN(dev_priv) >= 4) + I915_WRITE_FW(DSPSURF(i9xx_plane), 0); else - I915_WRITE_FW(DSPADDR(plane), 0); - POSTING_READ_FW(DSPCNTR(plane)); + I915_WRITE_FW(DSPADDR(i9xx_plane), 0); + POSTING_READ_FW(DSPCNTR(i9xx_plane)); spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } +static bool i9xx_plane_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; + enum pipe pipe = plane->pipe; + bool ret; + + /* + * Not 100% correct for planes that can move between pipes, + * but that's only the case for gen2-4 which don't have any + * display power wells. + */ + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(DSPCNTR(i9xx_plane)) & DISPLAY_PLANE_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static u32 intel_fb_stride_alignment(const struct drm_framebuffer *fb, int plane) { @@ -3445,20 +3386,11 @@ static u32 skl_plane_ctl_format(uint32_t pixel_format) case DRM_FORMAT_RGB565: return PLANE_CTL_FORMAT_RGB_565; case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_ABGR8888: return PLANE_CTL_FORMAT_XRGB_8888 | PLANE_CTL_ORDER_RGBX; case DRM_FORMAT_XRGB8888: - return PLANE_CTL_FORMAT_XRGB_8888; - /* - * XXX: For ARBG/ABGR formats we default to expecting scanout buffers - * to be already pre-multiplied. We need to add a knob (or a different - * DRM_FORMAT) for user-space to configure that. - */ - case DRM_FORMAT_ABGR8888: - return PLANE_CTL_FORMAT_XRGB_8888 | PLANE_CTL_ORDER_RGBX | - PLANE_CTL_ALPHA_SW_PREMULTIPLY; case DRM_FORMAT_ARGB8888: - return PLANE_CTL_FORMAT_XRGB_8888 | - PLANE_CTL_ALPHA_SW_PREMULTIPLY; + return PLANE_CTL_FORMAT_XRGB_8888; case DRM_FORMAT_XRGB2101010: return PLANE_CTL_FORMAT_XRGB_2101010; case DRM_FORMAT_XBGR2101010: @@ -3478,6 +3410,33 @@ static u32 skl_plane_ctl_format(uint32_t pixel_format) return 0; } +/* + * XXX: For ARBG/ABGR formats we default to expecting scanout buffers + * to be already pre-multiplied. We need to add a knob (or a different + * DRM_FORMAT) for user-space to configure that. + */ +static u32 skl_plane_ctl_alpha(uint32_t pixel_format) +{ + switch (pixel_format) { + case DRM_FORMAT_ABGR8888: + case DRM_FORMAT_ARGB8888: + return PLANE_CTL_ALPHA_SW_PREMULTIPLY; + default: + return PLANE_CTL_ALPHA_DISABLE; + } +} + +static u32 glk_plane_color_ctl_alpha(uint32_t pixel_format) +{ + switch (pixel_format) { + case DRM_FORMAT_ABGR8888: + case DRM_FORMAT_ARGB8888: + return PLANE_COLOR_ALPHA_SW_PREMULTIPLY; + default: + return PLANE_COLOR_ALPHA_DISABLE; + } +} + static u32 skl_plane_ctl_tiling(uint64_t fb_modifier) { switch (fb_modifier) { @@ -3500,9 +3459,9 @@ static u32 skl_plane_ctl_tiling(uint64_t fb_modifier) return 0; } -static u32 skl_plane_ctl_rotation(unsigned int rotation) +static u32 skl_plane_ctl_rotate(unsigned int rotate) { - switch (rotation) { + switch (rotate) { case DRM_MODE_ROTATE_0: break; /* @@ -3516,7 +3475,22 @@ static u32 skl_plane_ctl_rotation(unsigned int rotation) case DRM_MODE_ROTATE_270: return PLANE_CTL_ROTATE_90; default: - MISSING_CASE(rotation); + MISSING_CASE(rotate); + } + + return 0; +} + +static u32 cnl_plane_ctl_flip(unsigned int reflect) +{ + switch (reflect) { + case 0: + break; + case DRM_MODE_REFLECT_X: + return PLANE_CTL_FLIP_HORIZONTAL; + case DRM_MODE_REFLECT_Y: + default: + MISSING_CASE(reflect); } return 0; @@ -3534,7 +3508,8 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, plane_ctl = PLANE_CTL_ENABLE; - if (!IS_GEMINILAKE(dev_priv) && !IS_CANNONLAKE(dev_priv)) { + if (INTEL_GEN(dev_priv) < 10 && !IS_GEMINILAKE(dev_priv)) { + plane_ctl |= skl_plane_ctl_alpha(fb->format->format); plane_ctl |= PLANE_CTL_PIPE_GAMMA_ENABLE | PLANE_CTL_PIPE_CSC_ENABLE | @@ -3543,7 +3518,11 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, plane_ctl |= skl_plane_ctl_format(fb->format->format); plane_ctl |= skl_plane_ctl_tiling(fb->modifier); - plane_ctl |= skl_plane_ctl_rotation(rotation); + plane_ctl |= skl_plane_ctl_rotate(rotation & DRM_MODE_ROTATE_MASK); + + if (INTEL_GEN(dev_priv) >= 10) + plane_ctl |= cnl_plane_ctl_flip(rotation & + DRM_MODE_REFLECT_MASK); if (key->flags & I915_SET_COLORKEY_DESTINATION) plane_ctl |= PLANE_CTL_KEY_ENABLE_DESTINATION; @@ -3553,98 +3532,18 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, return plane_ctl; } -static void skylake_update_primary_plane(struct intel_plane *plane, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) { - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); const struct drm_framebuffer *fb = plane_state->base.fb; - enum plane_id plane_id = plane->id; - enum pipe pipe = plane->pipe; - u32 plane_ctl = plane_state->ctl; - unsigned int rotation = plane_state->base.rotation; - u32 stride = skl_plane_stride(fb, 0, rotation); - u32 aux_stride = skl_plane_stride(fb, 1, rotation); - u32 surf_addr = plane_state->main.offset; - int scaler_id = plane_state->scaler_id; - int src_x = plane_state->main.x; - int src_y = plane_state->main.y; - int src_w = drm_rect_width(&plane_state->base.src) >> 16; - int src_h = drm_rect_height(&plane_state->base.src) >> 16; - int dst_x = plane_state->base.dst.x1; - int dst_y = plane_state->base.dst.y1; - int dst_w = drm_rect_width(&plane_state->base.dst); - int dst_h = drm_rect_height(&plane_state->base.dst); - unsigned long irqflags; - - /* Sizes are 0 based */ - src_w--; - src_h--; - dst_w--; - dst_h--; - - crtc->dspaddr_offset = surf_addr; + u32 plane_color_ctl = 0; - crtc->adjusted_x = src_x; - crtc->adjusted_y = src_y; + plane_color_ctl |= PLANE_COLOR_PIPE_GAMMA_ENABLE; + plane_color_ctl |= PLANE_COLOR_PIPE_CSC_ENABLE; + plane_color_ctl |= PLANE_COLOR_PLANE_GAMMA_DISABLE; + plane_color_ctl |= glk_plane_color_ctl_alpha(fb->format->format); - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - - if (IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) { - I915_WRITE_FW(PLANE_COLOR_CTL(pipe, plane_id), - PLANE_COLOR_PIPE_GAMMA_ENABLE | - PLANE_COLOR_PIPE_CSC_ENABLE | - PLANE_COLOR_PLANE_GAMMA_DISABLE); - } - - I915_WRITE_FW(PLANE_CTL(pipe, plane_id), plane_ctl); - I915_WRITE_FW(PLANE_OFFSET(pipe, plane_id), (src_y << 16) | src_x); - I915_WRITE_FW(PLANE_STRIDE(pipe, plane_id), stride); - I915_WRITE_FW(PLANE_SIZE(pipe, plane_id), (src_h << 16) | src_w); - I915_WRITE_FW(PLANE_AUX_DIST(pipe, plane_id), - (plane_state->aux.offset - surf_addr) | aux_stride); - I915_WRITE_FW(PLANE_AUX_OFFSET(pipe, plane_id), - (plane_state->aux.y << 16) | plane_state->aux.x); - - if (scaler_id >= 0) { - uint32_t ps_ctrl = 0; - - WARN_ON(!dst_w || !dst_h); - ps_ctrl = PS_SCALER_EN | PS_PLANE_SEL(plane_id) | - crtc_state->scaler_state.scalers[scaler_id].mode; - I915_WRITE_FW(SKL_PS_CTRL(pipe, scaler_id), ps_ctrl); - I915_WRITE_FW(SKL_PS_PWR_GATE(pipe, scaler_id), 0); - I915_WRITE_FW(SKL_PS_WIN_POS(pipe, scaler_id), (dst_x << 16) | dst_y); - I915_WRITE_FW(SKL_PS_WIN_SZ(pipe, scaler_id), (dst_w << 16) | dst_h); - I915_WRITE_FW(PLANE_POS(pipe, plane_id), 0); - } else { - I915_WRITE_FW(PLANE_POS(pipe, plane_id), (dst_y << 16) | dst_x); - } - - I915_WRITE_FW(PLANE_SURF(pipe, plane_id), - intel_plane_ggtt_offset(plane_state) + surf_addr); - - POSTING_READ_FW(PLANE_SURF(pipe, plane_id)); - - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); -} - -static void skylake_disable_primary_plane(struct intel_plane *primary, - struct intel_crtc *crtc) -{ - struct drm_i915_private *dev_priv = to_i915(primary->base.dev); - enum plane_id plane_id = primary->id; - enum pipe pipe = primary->pipe; - unsigned long irqflags; - - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - - I915_WRITE_FW(PLANE_CTL(pipe, plane_id), 0); - I915_WRITE_FW(PLANE_SURF(pipe, plane_id), 0); - POSTING_READ_FW(PLANE_SURF(pipe, plane_id)); - - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); + return plane_color_ctl; } static int @@ -3701,7 +3600,7 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv) /* reset doesn't touch the display */ - if (!i915.force_reset_modeset_test && + if (!i915_modparams.force_reset_modeset_test && !gpu_reset_clobbers_display(dev_priv)) return; @@ -3757,7 +3656,7 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) int ret; /* reset doesn't touch the display */ - if (!i915.force_reset_modeset_test && + if (!i915_modparams.force_reset_modeset_test && !gpu_reset_clobbers_display(dev_priv)) return; @@ -3770,8 +3669,8 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) if (!gpu_reset_clobbers_display(dev_priv)) { /* for testing only restore the display */ ret = __intel_display_resume(dev, state, ctx); - if (ret) - DRM_ERROR("Restoring old state failed with %i\n", ret); + if (ret) + DRM_ERROR("Restoring old state failed with %i\n", ret); } else { /* * The display has been reset as well, @@ -3782,6 +3681,7 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) intel_pps_unlock_regs_wa(dev_priv); intel_modeset_init_hw(dev); + intel_init_clock_gating(dev_priv); spin_lock_irq(&dev_priv->irq_lock); if (dev_priv->display.hpd_irq_setup) @@ -3804,15 +3704,14 @@ unlock: clear_bit(I915_RESET_MODESET, &dev_priv->gpu_error.flags); } -static void intel_update_pipe_config(struct intel_crtc *crtc, - struct intel_crtc_state *old_crtc_state) +static void intel_update_pipe_config(const struct intel_crtc_state *old_crtc_state, + const struct intel_crtc_state *new_crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct intel_crtc_state *pipe_config = - to_intel_crtc_state(crtc->base.state); /* drm_atomic_helper_update_legacy_modeset_state might not be called. */ - crtc->base.mode = crtc->base.state->mode; + crtc->base.mode = new_crtc_state->base.mode; /* * Update pipe size and adjust fitter if needed: the reason for this is @@ -3824,17 +3723,17 @@ static void intel_update_pipe_config(struct intel_crtc *crtc, */ I915_WRITE(PIPESRC(crtc->pipe), - ((pipe_config->pipe_src_w - 1) << 16) | - (pipe_config->pipe_src_h - 1)); + ((new_crtc_state->pipe_src_w - 1) << 16) | + (new_crtc_state->pipe_src_h - 1)); /* on skylake this is done by detaching scalers */ if (INTEL_GEN(dev_priv) >= 9) { skl_detach_scalers(crtc); - if (pipe_config->pch_pfit.enabled) + if (new_crtc_state->pch_pfit.enabled) skylake_pfit_enable(crtc); } else if (HAS_PCH_SPLIT(dev_priv)) { - if (pipe_config->pch_pfit.enabled) + if (new_crtc_state->pch_pfit.enabled) ironlake_pfit_enable(crtc); else if (old_crtc_state->pch_pfit.enabled) ironlake_pfit_disable(crtc, true); @@ -4589,7 +4488,7 @@ intel_trans_dp_port_sel(struct intel_crtc *crtc) for_each_encoder_on_crtc(dev, &crtc->base, encoder) { if (encoder->type == INTEL_OUTPUT_DP || encoder->type == INTEL_OUTPUT_EDP) - return enc_to_dig_port(&encoder->base)->port; + return encoder->port; } return -1; @@ -4940,12 +4839,13 @@ static void ironlake_pfit_enable(struct intel_crtc *crtc) } } -void hsw_enable_ips(struct intel_crtc *crtc) +void hsw_enable_ips(const struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - if (!crtc->config->ips_enabled) + if (!crtc_state->ips_enabled) return; /* @@ -4953,12 +4853,13 @@ void hsw_enable_ips(struct intel_crtc *crtc) * This function is called from post_plane_update, which is run after * a vblank wait. */ + WARN_ON(!(crtc_state->active_planes & ~BIT(PLANE_CURSOR))); - assert_plane_enabled(dev_priv, crtc->plane); if (IS_BROADWELL(dev_priv)) { - mutex_lock(&dev_priv->rps.hw_lock); - WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 0xc0000000)); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); + WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, + IPS_ENABLE | IPS_PCODE_CONTROL)); + mutex_unlock(&dev_priv->pcu_lock); /* Quoting Art Runyan: "its not safe to expect any particular * value in IPS_CTL bit 31 after enabling IPS through the * mailbox." Moreover, the mailbox may return a bogus state, @@ -4978,19 +4879,19 @@ void hsw_enable_ips(struct intel_crtc *crtc) } } -void hsw_disable_ips(struct intel_crtc *crtc) +void hsw_disable_ips(const struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - if (!crtc->config->ips_enabled) + if (!crtc_state->ips_enabled) return; - assert_plane_enabled(dev_priv, crtc->plane); if (IS_BROADWELL(dev_priv)) { - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 0)); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); /* wait for pcode to finish disabling IPS, which may take up to 42ms */ if (intel_wait_for_register(dev_priv, IPS_CTL, IPS_ENABLE, 0, @@ -5031,7 +4932,8 @@ static void intel_crtc_dpms_overlay_disable(struct intel_crtc *intel_crtc) * completely hide the primary plane. */ static void -intel_post_enable_primary(struct drm_crtc *crtc) +intel_post_enable_primary(struct drm_crtc *crtc, + const struct intel_crtc_state *new_crtc_state) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -5039,14 +4941,6 @@ intel_post_enable_primary(struct drm_crtc *crtc) int pipe = intel_crtc->pipe; /* - * FIXME IPS should be fine as long as one plane is - * enabled, but in practice it seems to have problems - * when going from primary only to sprite only and vice - * versa. - */ - hsw_enable_ips(intel_crtc); - - /* * Gen2 reports pipe underruns whenever all planes are disabled. * So don't enable underrun reporting before at least some planes * are enabled. @@ -5061,9 +4955,9 @@ intel_post_enable_primary(struct drm_crtc *crtc) intel_check_pch_fifo_underruns(dev_priv); } -/* FIXME move all this to pre_plane_update() with proper state tracking */ +/* FIXME get rid of this and use pre_plane_update */ static void -intel_pre_disable_primary(struct drm_crtc *crtc) +intel_pre_disable_primary_noatomic(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -5072,32 +4966,12 @@ intel_pre_disable_primary(struct drm_crtc *crtc) /* * Gen2 reports pipe underruns whenever all planes are disabled. - * So diasble underrun reporting before all the planes get disabled. - * FIXME: Need to fix the logic to work when we turn off all planes - * but leave the pipe running. + * So disable underrun reporting before all the planes get disabled. */ if (IS_GEN2(dev_priv)) intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); - /* - * FIXME IPS should be fine as long as one plane is - * enabled, but in practice it seems to have problems - * when going from primary only to sprite only and vice - * versa. - */ - hsw_disable_ips(intel_crtc); -} - -/* FIXME get rid of this and use pre_plane_update */ -static void -intel_pre_disable_primary_noatomic(struct drm_crtc *crtc) -{ - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; - - intel_pre_disable_primary(crtc); + hsw_disable_ips(to_intel_crtc_state(crtc->state)); /* * Vblank time updates from the shadow to live plane control register @@ -5113,12 +4987,45 @@ intel_pre_disable_primary_noatomic(struct drm_crtc *crtc) intel_wait_for_vblank(dev_priv, pipe); } +static bool hsw_pre_update_disable_ips(const struct intel_crtc_state *old_crtc_state, + const struct intel_crtc_state *new_crtc_state) +{ + if (!old_crtc_state->ips_enabled) + return false; + + if (needs_modeset(&new_crtc_state->base)) + return true; + + return !new_crtc_state->ips_enabled; +} + +static bool hsw_post_update_enable_ips(const struct intel_crtc_state *old_crtc_state, + const struct intel_crtc_state *new_crtc_state) +{ + if (!new_crtc_state->ips_enabled) + return false; + + if (needs_modeset(&new_crtc_state->base)) + return true; + + /* + * We can't read out IPS on broadwell, assume the worst and + * forcibly enable IPS on the first fastset. + */ + if (new_crtc_state->update_pipe && + old_crtc_state->base.adjusted_mode.private_flags & I915_MODE_FLAG_INHERITED) + return true; + + return !old_crtc_state->ips_enabled; +} + static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) { struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); struct drm_atomic_state *old_state = old_crtc_state->base.state; struct intel_crtc_state *pipe_config = - to_intel_crtc_state(crtc->base.state); + intel_atomic_get_new_crtc_state(to_intel_atomic_state(old_state), + crtc); struct drm_plane *primary = crtc->base.primary; struct drm_plane_state *old_pri_state = drm_atomic_get_existing_plane_state(old_state, primary); @@ -5128,9 +5035,13 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) if (pipe_config->update_wm_post && pipe_config->base.active) intel_update_watermarks(crtc); + if (hsw_post_update_enable_ips(old_crtc_state, pipe_config)) + hsw_enable_ips(pipe_config); + if (old_pri_state) { struct intel_plane_state *primary_state = - to_intel_plane_state(primary->state); + intel_atomic_get_new_plane_state(to_intel_atomic_state(old_state), + to_intel_plane(primary)); struct intel_plane_state *old_primary_state = to_intel_plane_state(old_pri_state); @@ -5139,7 +5050,7 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) if (primary_state->base.visible && (needs_modeset(&pipe_config->base) || !old_primary_state->base.visible)) - intel_post_enable_primary(&crtc->base); + intel_post_enable_primary(&crtc->base, pipe_config); } } @@ -5157,17 +5068,24 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state, struct intel_atomic_state *old_intel_state = to_intel_atomic_state(old_state); + if (hsw_pre_update_disable_ips(old_crtc_state, pipe_config)) + hsw_disable_ips(old_crtc_state); + if (old_pri_state) { struct intel_plane_state *primary_state = - to_intel_plane_state(primary->state); + intel_atomic_get_new_plane_state(old_intel_state, + to_intel_plane(primary)); struct intel_plane_state *old_primary_state = to_intel_plane_state(old_pri_state); intel_fbc_pre_update(crtc, pipe_config, primary_state); - - if (old_primary_state->base.visible && + /* + * Gen2 reports pipe underruns whenever all planes are disabled. + * So disable underrun reporting before all the planes get disabled. + */ + if (IS_GEN2(dev_priv) && old_primary_state->base.visible && (modeset || !primary_state->base.visible)) - intel_pre_disable_primary(&crtc->base); + intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false); } /* @@ -5430,7 +5348,7 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config, if (dev_priv->display.initial_watermarks != NULL) dev_priv->display.initial_watermarks(old_intel_state, intel_crtc->config); - intel_enable_pipe(intel_crtc); + intel_enable_pipe(pipe_config); if (intel_crtc->config->has_pch_encoder) ironlake_pch_enable(pipe_config); @@ -5456,6 +5374,20 @@ static bool hsw_crtc_supports_ips(struct intel_crtc *crtc) return HAS_IPS(to_i915(crtc->base.dev)) && crtc->pipe == PIPE_A; } +static void glk_pipe_scaler_clock_gating_wa(struct drm_i915_private *dev_priv, + enum pipe pipe, bool apply) +{ + u32 val = I915_READ(CLKGATE_DIS_PSL(pipe)); + u32 mask = DPF_GATING_DIS | DPF_RAM_GATING_DIS | DPFR_GATING_DIS; + + if (apply) + val |= mask; + else + val &= ~mask; + + I915_WRITE(CLKGATE_DIS_PSL(pipe), val); +} + static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, struct drm_atomic_state *old_state) { @@ -5466,13 +5398,11 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; struct intel_atomic_state *old_intel_state = to_intel_atomic_state(old_state); + bool psl_clkgate_wa; if (WARN_ON(intel_crtc->active)) return; - if (intel_crtc->config->has_pch_encoder) - intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false); - intel_encoders_pre_pll_enable(crtc, pipe_config, old_state); if (intel_crtc->config->shared_dpll) @@ -5506,19 +5436,17 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_crtc->active = true; - if (intel_crtc->config->has_pch_encoder) - intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); - else - intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); - intel_encoders_pre_enable(crtc, pipe_config, old_state); - if (intel_crtc->config->has_pch_encoder) - dev_priv->display.fdi_link_train(intel_crtc, pipe_config); - if (!transcoder_is_dsi(cpu_transcoder)) intel_ddi_enable_pipe_clock(pipe_config); + /* Display WA #1180: WaDisableScalarClockGating: glk, cnl */ + psl_clkgate_wa = (IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) && + intel_crtc->config->pch_pfit.enabled; + if (psl_clkgate_wa) + glk_pipe_scaler_clock_gating_wa(dev_priv, pipe, true); + if (INTEL_GEN(dev_priv) >= 9) skylake_pfit_enable(intel_crtc); else @@ -5539,7 +5467,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, /* XXX: Do the pipe assertions at the right place for BXT DSI. */ if (!transcoder_is_dsi(cpu_transcoder)) - intel_enable_pipe(intel_crtc); + intel_enable_pipe(pipe_config); if (intel_crtc->config->has_pch_encoder) lpt_pch_enable(pipe_config); @@ -5552,11 +5480,9 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_encoders_enable(crtc, pipe_config, old_state); - if (intel_crtc->config->has_pch_encoder) { - intel_wait_for_vblank(dev_priv, pipe); + if (psl_clkgate_wa) { intel_wait_for_vblank(dev_priv, pipe); - intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); - intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true); + glk_pipe_scaler_clock_gating_wa(dev_priv, pipe, false); } /* If we change the relative order between pipe/planes enabling, we need @@ -5607,7 +5533,7 @@ static void ironlake_crtc_disable(struct intel_crtc_state *old_crtc_state, drm_crtc_vblank_off(crtc); assert_vblank_disabled(crtc); - intel_disable_pipe(intel_crtc); + intel_disable_pipe(old_crtc_state); ironlake_pfit_disable(intel_crtc, false); @@ -5652,9 +5578,6 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; - if (intel_crtc->config->has_pch_encoder) - intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false); - intel_encoders_disable(crtc, old_crtc_state, old_state); drm_crtc_vblank_off(crtc); @@ -5662,7 +5585,7 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, /* XXX: Do the pipe assertions at the right place for BXT DSI. */ if (!transcoder_is_dsi(cpu_transcoder)) - intel_disable_pipe(intel_crtc); + intel_disable_pipe(old_crtc_state); if (intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DP_MST)) intel_ddi_set_vc_payload_alloc(intel_crtc->config, false); @@ -5679,9 +5602,6 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, intel_ddi_disable_pipe_clock(intel_crtc->config); intel_encoders_post_disable(crtc, old_crtc_state, old_state); - - if (old_crtc_state->has_pch_encoder) - intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true); } static void i9xx_pfit_enable(struct intel_crtc *crtc) @@ -5741,8 +5661,8 @@ static u64 get_crtc_power_domains(struct drm_crtc *crtc, if (!crtc_state->base.active) return 0; - mask = BIT(POWER_DOMAIN_PIPE(pipe)); - mask |= BIT(POWER_DOMAIN_TRANSCODER(transcoder)); + mask = BIT_ULL(POWER_DOMAIN_PIPE(pipe)); + mask |= BIT_ULL(POWER_DOMAIN_TRANSCODER(transcoder)); if (crtc_state->pch_pfit.enabled || crtc_state->pch_pfit.force_thru) mask |= BIT_ULL(POWER_DOMAIN_PIPE_PANEL_FITTER(pipe)); @@ -5754,7 +5674,7 @@ static u64 get_crtc_power_domains(struct drm_crtc *crtc, } if (HAS_DDI(dev_priv) && crtc_state->has_audio) - mask |= BIT(POWER_DOMAIN_AUDIO); + mask |= BIT_ULL(POWER_DOMAIN_AUDIO); if (crtc_state->shared_dpll) mask |= BIT_ULL(POWER_DOMAIN_PLLS); @@ -5843,7 +5763,7 @@ static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, dev_priv->display.initial_watermarks(old_intel_state, pipe_config); - intel_enable_pipe(intel_crtc); + intel_enable_pipe(pipe_config); assert_vblank_disabled(crtc); drm_crtc_vblank_on(crtc); @@ -5891,7 +5811,7 @@ static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config, intel_encoders_pre_enable(crtc, pipe_config, old_state); - i9xx_enable_pll(intel_crtc); + i9xx_enable_pll(intel_crtc, pipe_config); i9xx_pfit_enable(intel_crtc); @@ -5902,7 +5822,7 @@ static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config, intel_crtc->config); else intel_update_watermarks(intel_crtc); - intel_enable_pipe(intel_crtc); + intel_enable_pipe(pipe_config); assert_vblank_disabled(crtc); drm_crtc_vblank_on(crtc); @@ -5946,7 +5866,7 @@ static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state, drm_crtc_vblank_off(crtc); assert_vblank_disabled(crtc); - intel_disable_pipe(intel_crtc); + intel_disable_pipe(old_crtc_state); i9xx_pfit_disable(intel_crtc); @@ -5981,6 +5901,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc, struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct drm_i915_private *dev_priv = to_i915(crtc->dev); enum intel_display_power_domain domain; + struct intel_plane *plane; u64 domains; struct drm_atomic_state *state; struct intel_crtc_state *crtc_state; @@ -5989,11 +5910,12 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc, if (!intel_crtc->active) return; - if (crtc->primary->state->visible) { - intel_pre_disable_primary_noatomic(crtc); + for_each_intel_plane_on_crtc(&dev_priv->drm, intel_crtc, plane) { + const struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); - intel_crtc_disable_planes(crtc, 1 << drm_plane_index(crtc->primary)); - crtc->primary->state->visible = false; + if (plane_state->base.visible) + intel_plane_disable_noatomic(intel_crtc, plane); } state = drm_atomic_state_alloc(crtc->dev); @@ -6038,7 +5960,8 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc, intel_crtc->enabled_power_domains = 0; dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe); - dev_priv->min_pixclk[intel_crtc->pipe] = 0; + dev_priv->min_cdclk[intel_crtc->pipe] = 0; + dev_priv->min_voltage_level[intel_crtc->pipe] = 0; } /* @@ -6143,6 +6066,19 @@ struct intel_connector *intel_connector_alloc(void) return connector; } +/* + * Free the bits allocated by intel_connector_alloc. + * This should only be used after intel_connector_alloc has returned + * successfully, and before drm_connector_init returns successfully. + * Otherwise the destroy callbacks for the connector and the state should + * take care of proper cleanup/free + */ +void intel_connector_free(struct intel_connector *connector) +{ + kfree(to_intel_digital_connector_state(connector->base.state)); + kfree(connector); +} + /* Simple connector->get_hw_state implementation for encoders that support only * one connector and no cloning and hence the encoder state determines the state * of the connector. */ @@ -6280,15 +6216,20 @@ retry: return ret; } -static bool pipe_config_supports_ips(struct drm_i915_private *dev_priv, - struct intel_crtc_state *pipe_config) +bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state) { - if (pipe_config->pipe_bpp > 24) + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + + /* IPS only exists on ULT machines and is tied to pipe A. */ + if (!hsw_crtc_supports_ips(crtc)) return false; - /* HSW can handle pixel rate up to cdclk? */ - if (IS_HASWELL(dev_priv)) - return true; + if (!i915_modparams.enable_ips) + return false; + + if (crtc_state->pipe_bpp > 24) + return false; /* * We compare against max which means we must take @@ -6297,19 +6238,36 @@ static bool pipe_config_supports_ips(struct drm_i915_private *dev_priv, * * Should measure whether using a lower cdclk w/o IPS */ - return pipe_config->pixel_rate <= - dev_priv->max_cdclk_freq * 95 / 100; + if (IS_BROADWELL(dev_priv) && + crtc_state->pixel_rate > dev_priv->max_cdclk_freq * 95 / 100) + return false; + + return true; } -static void hsw_compute_ips_config(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) +static bool hsw_compute_ips_config(struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = + to_i915(crtc_state->base.crtc->dev); + struct intel_atomic_state *intel_state = + to_intel_atomic_state(crtc_state->base.state); + + if (!hsw_crtc_state_ips_capable(crtc_state)) + return false; + + if (crtc_state->ips_force_disable) + return false; - pipe_config->ips_enabled = i915.enable_ips && - hsw_crtc_supports_ips(crtc) && - pipe_config_supports_ips(dev_priv, pipe_config); + /* IPS should be fine as long as at least one plane is enabled. */ + if (!(crtc_state->active_planes & ~BIT(PLANE_CURSOR))) + return false; + + /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ + if (IS_BROADWELL(dev_priv) && + crtc_state->pixel_rate > intel_state->cdclk.logical.cdclk * 95 / 100) + return false; + + return true; } static bool intel_crtc_supports_double_wide(const struct intel_crtc *crtc) @@ -6427,9 +6385,6 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc, intel_crtc_compute_pixel_rate(pipe_config); - if (HAS_IPS(dev_priv)) - hsw_compute_ips_config(crtc, pipe_config); - if (pipe_config->has_pch_encoder) return ironlake_fdi_compute_config(crtc, pipe_config); @@ -6488,8 +6443,8 @@ intel_link_compute_m_n(int bits_per_pixel, int nlanes, static inline bool intel_panel_use_ssc(struct drm_i915_private *dev_priv) { - if (i915.panel_use_ssc >= 0) - return i915.panel_use_ssc != 0; + if (i915_modparams.panel_use_ssc >= 0) + return i915_modparams.panel_use_ssc != 0; return dev_priv->vbt.lvds_use_ssc && !(dev_priv->quirks & QUIRK_LVDS_SSC_DISABLE); } @@ -6523,11 +6478,9 @@ static void i9xx_update_pll_dividers(struct intel_crtc *crtc, crtc_state->dpll_hw_state.fp0 = fp; - crtc->lowfreq_avail = false; if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_LVDS) && reduced_clock) { crtc_state->dpll_hw_state.fp1 = fp2; - crtc->lowfreq_avail = true; } else { crtc_state->dpll_hw_state.fp1 = fp; } @@ -7222,15 +7175,6 @@ static void i9xx_set_pipeconf(struct intel_crtc *intel_crtc) } } - if (HAS_PIPE_CXSR(dev_priv)) { - if (intel_crtc->lowfreq_avail) { - DRM_DEBUG_KMS("enabling CxSR downclocking\n"); - pipeconf |= PIPECONF_CXSR_DOWNCLOCK; - } else { - DRM_DEBUG_KMS("disabling CxSR downclocking\n"); - } - } - if (intel_crtc->config->base.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) { if (INTEL_GEN(dev_priv) < 4 || intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_SDVO)) @@ -7497,15 +7441,16 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_plane *plane = to_intel_plane(crtc->base.primary); + enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; + enum pipe pipe = crtc->pipe; u32 val, base, offset; - int pipe = crtc->pipe, plane = crtc->plane; int fourcc, pixel_format; unsigned int aligned_height; struct drm_framebuffer *fb; struct intel_framebuffer *intel_fb; - val = I915_READ(DSPCNTR(plane)); - if (!(val & DISPLAY_PLANE_ENABLE)) + if (!plane->get_hw_state(plane)) return; intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL); @@ -7518,6 +7463,8 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc, fb->dev = dev; + val = I915_READ(DSPCNTR(i9xx_plane)); + if (INTEL_GEN(dev_priv) >= 4) { if (val & DISPPLANE_TILED) { plane_config->tiling = I915_TILING_X; @@ -7529,14 +7476,17 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc, fourcc = i9xx_format_to_fourcc(pixel_format); fb->format = drm_format_info(fourcc); - if (INTEL_GEN(dev_priv) >= 4) { + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { + offset = I915_READ(DSPOFFSET(i9xx_plane)); + base = I915_READ(DSPSURF(i9xx_plane)) & 0xfffff000; + } else if (INTEL_GEN(dev_priv) >= 4) { if (plane_config->tiling) - offset = I915_READ(DSPTILEOFF(plane)); + offset = I915_READ(DSPTILEOFF(i9xx_plane)); else - offset = I915_READ(DSPLINOFF(plane)); - base = I915_READ(DSPSURF(plane)) & 0xfffff000; + offset = I915_READ(DSPLINOFF(i9xx_plane)); + base = I915_READ(DSPSURF(i9xx_plane)) & 0xfffff000; } else { - base = I915_READ(DSPADDR(plane)); + base = I915_READ(DSPADDR(i9xx_plane)); } plane_config->base = base; @@ -7544,15 +7494,15 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc, fb->width = ((val >> 16) & 0xfff) + 1; fb->height = ((val >> 0) & 0xfff) + 1; - val = I915_READ(DSPSTRIDE(pipe)); + val = I915_READ(DSPSTRIDE(i9xx_plane)); fb->pitches[0] = val & 0xffffffc0; aligned_height = intel_fb_align_height(fb, 0, fb->height); plane_config->size = fb->pitches[0] * aligned_height; - DRM_DEBUG_KMS("pipe/plane %c/%d with fb: size=%dx%d@%d, offset=%x, pitch %d, size 0x%x\n", - pipe_name(pipe), plane, fb->width, fb->height, + DRM_DEBUG_KMS("%s/%s with fb: size=%dx%d@%d, offset=%x, pitch %d, size 0x%x\n", + crtc->base.name, plane->base.name, fb->width, fb->height, fb->format->cpp[0] * 8, base, fb->pitches[0], plane_config->size); @@ -7728,7 +7678,7 @@ static void ironlake_init_pch_refclk(struct drm_i915_private *dev_priv) break; case INTEL_OUTPUT_EDP: has_panel = true; - if (enc_to_dig_port(&encoder->base)->port == PORT_A) + if (encoder->port == PORT_A) has_cpu_edp = true; break; default: @@ -8366,8 +8316,6 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc, memset(&crtc_state->dpll_hw_state, 0, sizeof(crtc_state->dpll_hw_state)); - crtc->lowfreq_avail = false; - /* CPU eDP is the only output that doesn't need a PCH PLL of its own. */ if (!crtc_state->has_pch_encoder) return 0; @@ -8523,13 +8471,18 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - u32 val, base, offset, stride_mult, tiling; - int pipe = crtc->pipe; + struct intel_plane *plane = to_intel_plane(crtc->base.primary); + enum plane_id plane_id = plane->id; + enum pipe pipe = crtc->pipe; + u32 val, base, offset, stride_mult, tiling, alpha; int fourcc, pixel_format; unsigned int aligned_height; struct drm_framebuffer *fb; struct intel_framebuffer *intel_fb; + if (!plane->get_hw_state(plane)) + return; + intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL); if (!intel_fb) { DRM_DEBUG_KMS("failed to alloc fb\n"); @@ -8540,14 +8493,19 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, fb->dev = dev; - val = I915_READ(PLANE_CTL(pipe, 0)); - if (!(val & PLANE_CTL_ENABLE)) - goto error; + val = I915_READ(PLANE_CTL(pipe, plane_id)); pixel_format = val & PLANE_CTL_FORMAT_MASK; + + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { + alpha = I915_READ(PLANE_COLOR_CTL(pipe, plane_id)); + alpha &= PLANE_COLOR_ALPHA_MASK; + } else { + alpha = val & PLANE_CTL_ALPHA_MASK; + } + fourcc = skl_format_to_fourcc(pixel_format, - val & PLANE_CTL_ORDER_RGBX, - val & PLANE_CTL_ALPHA_MASK); + val & PLANE_CTL_ORDER_RGBX, alpha); fb->format = drm_format_info(fourcc); tiling = val & PLANE_CTL_TILED_MASK; @@ -8576,16 +8534,16 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, goto error; } - base = I915_READ(PLANE_SURF(pipe, 0)) & 0xfffff000; + base = I915_READ(PLANE_SURF(pipe, plane_id)) & 0xfffff000; plane_config->base = base; - offset = I915_READ(PLANE_OFFSET(pipe, 0)); + offset = I915_READ(PLANE_OFFSET(pipe, plane_id)); - val = I915_READ(PLANE_SIZE(pipe, 0)); + val = I915_READ(PLANE_SIZE(pipe, plane_id)); fb->height = ((val >> 16) & 0xfff) + 1; fb->width = ((val >> 0) & 0x1fff) + 1; - val = I915_READ(PLANE_STRIDE(pipe, 0)); + val = I915_READ(PLANE_STRIDE(pipe, plane_id)); stride_mult = intel_fb_stride_alignment(fb, 0); fb->pitches[0] = (val & 0x3ff) * stride_mult; @@ -8593,8 +8551,8 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, plane_config->size = fb->pitches[0] * aligned_height; - DRM_DEBUG_KMS("pipe %c with fb: size=%dx%d@%d, offset=%x, pitch %d, size 0x%x\n", - pipe_name(pipe), fb->width, fb->height, + DRM_DEBUG_KMS("%s/%s with fb: size=%dx%d@%d, offset=%x, pitch %d, size 0x%x\n", + crtc->base.name, plane->base.name, fb->width, fb->height, fb->format->cpp[0] * 8, base, fb->pitches[0], plane_config->size); @@ -8629,74 +8587,6 @@ static void ironlake_get_pfit_config(struct intel_crtc *crtc, } } -static void -ironlake_get_initial_plane_config(struct intel_crtc *crtc, - struct intel_initial_plane_config *plane_config) -{ - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - u32 val, base, offset; - int pipe = crtc->pipe; - int fourcc, pixel_format; - unsigned int aligned_height; - struct drm_framebuffer *fb; - struct intel_framebuffer *intel_fb; - - val = I915_READ(DSPCNTR(pipe)); - if (!(val & DISPLAY_PLANE_ENABLE)) - return; - - intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL); - if (!intel_fb) { - DRM_DEBUG_KMS("failed to alloc fb\n"); - return; - } - - fb = &intel_fb->base; - - fb->dev = dev; - - if (INTEL_GEN(dev_priv) >= 4) { - if (val & DISPPLANE_TILED) { - plane_config->tiling = I915_TILING_X; - fb->modifier = I915_FORMAT_MOD_X_TILED; - } - } - - pixel_format = val & DISPPLANE_PIXFORMAT_MASK; - fourcc = i9xx_format_to_fourcc(pixel_format); - fb->format = drm_format_info(fourcc); - - base = I915_READ(DSPSURF(pipe)) & 0xfffff000; - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { - offset = I915_READ(DSPOFFSET(pipe)); - } else { - if (plane_config->tiling) - offset = I915_READ(DSPTILEOFF(pipe)); - else - offset = I915_READ(DSPLINOFF(pipe)); - } - plane_config->base = base; - - val = I915_READ(PIPESRC(pipe)); - fb->width = ((val >> 16) & 0xfff) + 1; - fb->height = ((val >> 0) & 0xfff) + 1; - - val = I915_READ(DSPSTRIDE(pipe)); - fb->pitches[0] = val & 0xffffffc0; - - aligned_height = intel_fb_align_height(fb, 0, fb->height); - - plane_config->size = fb->pitches[0] * aligned_height; - - DRM_DEBUG_KMS("pipe %c with fb: size=%dx%d@%d, offset=%x, pitch %d, size 0x%x\n", - pipe_name(pipe), fb->width, fb->height, - fb->format->cpp[0] * 8, base, fb->pitches[0], - plane_config->size); - - plane_config->fb = intel_fb; -} - static bool ironlake_get_pipe_config(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config) { @@ -8840,11 +8730,11 @@ static uint32_t hsw_read_dcomp(struct drm_i915_private *dev_priv) static void hsw_write_dcomp(struct drm_i915_private *dev_priv, uint32_t val) { if (IS_HASWELL(dev_priv)) { - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); if (sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_D_COMP, val)) DRM_DEBUG_KMS("Failed to write to D_COMP\n"); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } else { I915_WRITE(D_COMP_BDW, val); POSTING_READ(D_COMP_BDW); @@ -8954,7 +8844,9 @@ static void hsw_restore_lcpll(struct drm_i915_private *dev_priv) } intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + intel_update_cdclk(dev_priv); + intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); } /* @@ -9026,8 +8918,6 @@ static int haswell_crtc_compute_clock(struct intel_crtc *crtc, } } - crtc->lowfreq_avail = false; - return 0; } @@ -9039,7 +8929,7 @@ static void cannonlake_get_ddi_pll(struct drm_i915_private *dev_priv, u32 temp; temp = I915_READ(DPCLKA_CFGCR0) & DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port); - id = temp >> (port * 2); + id = temp >> DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port); if (WARN_ON(id < SKL_DPLL0 || id > SKL_DPLL2)) return; @@ -9304,11 +9194,11 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, pipe_config->gamma_mode = I915_READ(GAMMA_MODE(crtc->pipe)) & GAMMA_MODE_MODE_MASK; - if (IS_BROADWELL(dev_priv) || dev_priv->info.gen >= 9) { + if (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9) { u32 tmp = I915_READ(PIPEMISC(crtc->pipe)); bool clrspace_yuv = tmp & PIPEMISC_OUTPUT_COLORSPACE_YUV; - if (IS_GEMINILAKE(dev_priv) || dev_priv->info.gen >= 10) { + if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10) { bool blend_mode_420 = tmp & PIPEMISC_YUV420_MODE_FULL_BLEND; @@ -9330,9 +9220,18 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, ironlake_get_pfit_config(crtc, pipe_config); } - if (IS_HASWELL(dev_priv)) - pipe_config->ips_enabled = hsw_crtc_supports_ips(crtc) && - (I915_READ(IPS_CTL) & IPS_ENABLE); + if (hsw_crtc_supports_ips(crtc)) { + if (IS_HASWELL(dev_priv)) + pipe_config->ips_enabled = I915_READ(IPS_CTL) & IPS_ENABLE; + else { + /* + * We cannot readout IPS state on broadwell, set to + * true so we can set it to a defined state on first + * commit. + */ + pipe_config->ips_enabled = true; + } + } if (pipe_config->cpu_transcoder != TRANSCODER_EDP && !transcoder_is_dsi(pipe_config->cpu_transcoder)) { @@ -9413,11 +9312,12 @@ static int intel_check_cursor(struct intel_crtc_state *crtc_state, u32 offset; int ret; - ret = drm_plane_helper_check_state(&plane_state->base, - &plane_state->clip, - DRM_PLANE_HELPER_NO_SCALING, - DRM_PLANE_HELPER_NO_SCALING, - true, true); + ret = drm_atomic_helper_check_plane_state(&plane_state->base, + &crtc_state->base, + &plane_state->clip, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + true, true); if (ret) return ret; @@ -9558,6 +9458,23 @@ static void i845_disable_cursor(struct intel_plane *plane, i845_update_cursor(plane, NULL, NULL); } +static bool i845_cursor_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + bool ret; + + power_domain = POWER_DOMAIN_PIPE(PIPE_A); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -9751,9 +9668,31 @@ static void i9xx_disable_cursor(struct intel_plane *plane, i9xx_update_cursor(plane, NULL, NULL); } +static bool i9xx_cursor_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum pipe pipe = plane->pipe; + bool ret; + + /* + * Not 100% correct for planes that can move between pipes, + * but that's only the case for gen2-3 which don't have any + * display power wells. + */ + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(CURCNTR(pipe)) & CURSOR_MODE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} /* VESA 640x480x72Hz mode to set on the pipe */ -static struct drm_display_mode load_detect_mode = { +static const struct drm_display_mode load_detect_mode = { DRM_MODE("640x480", DRM_MODE_TYPE_DEFAULT, 31500, 640, 664, 704, 832, 0, 480, 489, 491, 520, 0, DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC), }; @@ -9780,117 +9719,33 @@ err: return ERR_PTR(ret); } -static u32 -intel_framebuffer_pitch_for_width(int width, int bpp) -{ - u32 pitch = DIV_ROUND_UP(width * bpp, 8); - return ALIGN(pitch, 64); -} - -static u32 -intel_framebuffer_size_for_mode(struct drm_display_mode *mode, int bpp) -{ - u32 pitch = intel_framebuffer_pitch_for_width(mode->hdisplay, bpp); - return PAGE_ALIGN(pitch * mode->vdisplay); -} - -static struct drm_framebuffer * -intel_framebuffer_create_for_mode(struct drm_device *dev, - struct drm_display_mode *mode, - int depth, int bpp) -{ - struct drm_framebuffer *fb; - struct drm_i915_gem_object *obj; - struct drm_mode_fb_cmd2 mode_cmd = { 0 }; - - obj = i915_gem_object_create(to_i915(dev), - intel_framebuffer_size_for_mode(mode, bpp)); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - mode_cmd.width = mode->hdisplay; - mode_cmd.height = mode->vdisplay; - mode_cmd.pitches[0] = intel_framebuffer_pitch_for_width(mode_cmd.width, - bpp); - mode_cmd.pixel_format = drm_mode_legacy_fb_format(bpp, depth); - - fb = intel_framebuffer_create(obj, &mode_cmd); - if (IS_ERR(fb)) - i915_gem_object_put(obj); - - return fb; -} - -static struct drm_framebuffer * -mode_fits_in_fbdev(struct drm_device *dev, - struct drm_display_mode *mode) -{ -#ifdef CONFIG_DRM_FBDEV_EMULATION - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_i915_gem_object *obj; - struct drm_framebuffer *fb; - - if (!dev_priv->fbdev) - return NULL; - - if (!dev_priv->fbdev->fb) - return NULL; - - obj = dev_priv->fbdev->fb->obj; - BUG_ON(!obj); - - fb = &dev_priv->fbdev->fb->base; - if (fb->pitches[0] < intel_framebuffer_pitch_for_width(mode->hdisplay, - fb->format->cpp[0] * 8)) - return NULL; - - if (obj->base.size < mode->vdisplay * fb->pitches[0]) - return NULL; - - drm_framebuffer_reference(fb); - return fb; -#else - return NULL; -#endif -} - -static int intel_modeset_setup_plane_state(struct drm_atomic_state *state, - struct drm_crtc *crtc, - struct drm_display_mode *mode, - struct drm_framebuffer *fb, - int x, int y) +static int intel_modeset_disable_planes(struct drm_atomic_state *state, + struct drm_crtc *crtc) { + struct drm_plane *plane; struct drm_plane_state *plane_state; - int hdisplay, vdisplay; - int ret; - - plane_state = drm_atomic_get_plane_state(state, crtc->primary); - if (IS_ERR(plane_state)) - return PTR_ERR(plane_state); - - if (mode) - drm_mode_get_hv_timing(mode, &hdisplay, &vdisplay); - else - hdisplay = vdisplay = 0; + int ret, i; - ret = drm_atomic_set_crtc_for_plane(plane_state, fb ? crtc : NULL); + ret = drm_atomic_add_affected_planes(state, crtc); if (ret) return ret; - drm_atomic_set_fb_for_plane(plane_state, fb); - plane_state->crtc_x = 0; - plane_state->crtc_y = 0; - plane_state->crtc_w = hdisplay; - plane_state->crtc_h = vdisplay; - plane_state->src_x = x << 16; - plane_state->src_y = y << 16; - plane_state->src_w = hdisplay << 16; - plane_state->src_h = vdisplay << 16; + + for_each_new_plane_in_state(state, plane, plane_state, i) { + if (plane_state->crtc != crtc) + continue; + + ret = drm_atomic_set_crtc_for_plane(plane_state, NULL); + if (ret) + return ret; + + drm_atomic_set_fb_for_plane(plane_state, NULL); + } return 0; } int intel_get_load_detect_pipe(struct drm_connector *connector, - struct drm_display_mode *mode, + const struct drm_display_mode *mode, struct intel_load_detect_pipe *old, struct drm_modeset_acquire_ctx *ctx) { @@ -9902,7 +9757,6 @@ int intel_get_load_detect_pipe(struct drm_connector *connector, struct drm_crtc *crtc = NULL; struct drm_device *dev = encoder->dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_framebuffer *fb; struct drm_mode_config *config = &dev->mode_config; struct drm_atomic_state *state = NULL, *restore_state = NULL; struct drm_connector_state *connector_state; @@ -9970,10 +9824,6 @@ int intel_get_load_detect_pipe(struct drm_connector *connector, found: intel_crtc = to_intel_crtc(crtc); - ret = drm_modeset_lock(&crtc->primary->mutex, ctx); - if (ret) - goto fail; - state = drm_atomic_state_alloc(dev); restore_state = drm_atomic_state_alloc(dev); if (!state || !restore_state) { @@ -10005,40 +9855,17 @@ found: if (!mode) mode = &load_detect_mode; - /* We need a framebuffer large enough to accommodate all accesses - * that the plane may generate whilst we perform load detection. - * We can not rely on the fbcon either being present (we get called - * during its initialisation to detect all boot displays, or it may - * not even exist) or that it is large enough to satisfy the - * requested mode. - */ - fb = mode_fits_in_fbdev(dev, mode); - if (fb == NULL) { - DRM_DEBUG_KMS("creating tmp fb for load-detection\n"); - fb = intel_framebuffer_create_for_mode(dev, mode, 24, 32); - } else - DRM_DEBUG_KMS("reusing fbdev for load-detection framebuffer\n"); - if (IS_ERR(fb)) { - DRM_DEBUG_KMS("failed to allocate framebuffer for load-detection\n"); - ret = PTR_ERR(fb); - goto fail; - } - - ret = intel_modeset_setup_plane_state(state, crtc, mode, fb, 0, 0); + ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode); if (ret) goto fail; - drm_framebuffer_unreference(fb); - - ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode); + ret = intel_modeset_disable_planes(state, crtc); if (ret) goto fail; ret = PTR_ERR_OR_ZERO(drm_atomic_get_connector_state(restore_state, connector)); if (!ret) ret = PTR_ERR_OR_ZERO(drm_atomic_get_crtc_state(restore_state, crtc)); - if (!ret) - ret = PTR_ERR_OR_ZERO(drm_atomic_get_plane_state(restore_state, crtc->primary)); if (ret) { DRM_DEBUG_KMS("Failed to create a copy of old state to restore: %i\n", ret); goto fail; @@ -10218,7 +10045,7 @@ int intel_dotclock_calculate(int link_freq, if (!m_n->link_n) return 0; - return div_u64((u64)m_n->link_m * link_freq, m_n->link_n); + return div_u64(mul_u32_u32(m_n->link_m, link_freq), m_n->link_n); } static void ironlake_pch_clock_get(struct intel_crtc *crtc, @@ -10239,62 +10066,44 @@ static void ironlake_pch_clock_get(struct intel_crtc *crtc, &pipe_config->fdi_m_n); } -/** Returns the currently programmed mode of the given pipe. */ -struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev, - struct drm_crtc *crtc) +/* Returns the currently programmed mode of the given encoder. */ +struct drm_display_mode * +intel_encoder_current_mode(struct intel_encoder *encoder) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - enum transcoder cpu_transcoder; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc_state *crtc_state; struct drm_display_mode *mode; - struct intel_crtc_state *pipe_config; - u32 htot, hsync, vtot, vsync; - enum pipe pipe = intel_crtc->pipe; + struct intel_crtc *crtc; + enum pipe pipe; + + if (!encoder->get_hw_state(encoder, &pipe)) + return NULL; + + crtc = intel_get_crtc_for_pipe(dev_priv, pipe); mode = kzalloc(sizeof(*mode), GFP_KERNEL); if (!mode) return NULL; - pipe_config = kzalloc(sizeof(*pipe_config), GFP_KERNEL); - if (!pipe_config) { + crtc_state = kzalloc(sizeof(*crtc_state), GFP_KERNEL); + if (!crtc_state) { kfree(mode); return NULL; } - /* - * Construct a pipe_config sufficient for getting the clock info - * back out of crtc_clock_get. - * - * Note, if LVDS ever uses a non-1 pixel multiplier, we'll need - * to use a real value here instead. - */ - pipe_config->cpu_transcoder = (enum transcoder) pipe; - pipe_config->pixel_multiplier = 1; - pipe_config->dpll_hw_state.dpll = I915_READ(DPLL(pipe)); - pipe_config->dpll_hw_state.fp0 = I915_READ(FP0(pipe)); - pipe_config->dpll_hw_state.fp1 = I915_READ(FP1(pipe)); - i9xx_crtc_clock_get(intel_crtc, pipe_config); - - mode->clock = pipe_config->port_clock / pipe_config->pixel_multiplier; - - cpu_transcoder = pipe_config->cpu_transcoder; - htot = I915_READ(HTOTAL(cpu_transcoder)); - hsync = I915_READ(HSYNC(cpu_transcoder)); - vtot = I915_READ(VTOTAL(cpu_transcoder)); - vsync = I915_READ(VSYNC(cpu_transcoder)); - - mode->hdisplay = (htot & 0xffff) + 1; - mode->htotal = ((htot & 0xffff0000) >> 16) + 1; - mode->hsync_start = (hsync & 0xffff) + 1; - mode->hsync_end = ((hsync & 0xffff0000) >> 16) + 1; - mode->vdisplay = (vtot & 0xffff) + 1; - mode->vtotal = ((vtot & 0xffff0000) >> 16) + 1; - mode->vsync_start = (vsync & 0xffff) + 1; - mode->vsync_end = ((vsync & 0xffff0000) >> 16) + 1; + crtc_state->base.crtc = &crtc->base; - drm_mode_set_name(mode); + if (!dev_priv->display.get_pipe_config(crtc, crtc_state)) { + kfree(crtc_state); + kfree(mode); + return NULL; + } - kfree(pipe_config); + encoder->get_config(encoder, crtc_state); + + intel_mode_from_pipe_config(mode, crtc_state); + + kfree(crtc_state); return mode; } @@ -10341,7 +10150,7 @@ static bool intel_wm_need_update(struct drm_plane *plane, return false; } -static bool needs_scaling(struct intel_plane_state *state) +static bool needs_scaling(const struct intel_plane_state *state) { int src_w = drm_rect_width(&state->base.src) >> 16; int src_h = drm_rect_height(&state->base.src) >> 16; @@ -10351,7 +10160,9 @@ static bool needs_scaling(struct intel_plane_state *state) return (src_w != dst_w || src_h != dst_h); } -int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, +int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_state, + struct drm_crtc_state *crtc_state, + const struct intel_plane_state *old_plane_state, struct drm_plane_state *plane_state) { struct intel_crtc_state *pipe_config = to_intel_crtc_state(crtc_state); @@ -10360,10 +10171,8 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, struct intel_plane *plane = to_intel_plane(plane_state->plane); struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_plane_state *old_plane_state = - to_intel_plane_state(plane->base.state); bool mode_changed = needs_modeset(crtc_state); - bool was_crtc_enabled = crtc->state->active; + bool was_crtc_enabled = old_crtc_state->base.active; bool is_crtc_enabled = crtc_state->active; bool turn_off, turn_on, visible, was_visible; struct drm_framebuffer *fb = plane_state->fb; @@ -10565,6 +10374,9 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc, pipe_config); } + if (HAS_IPS(dev_priv)) + pipe_config->ips_enabled = hsw_compute_ips_config(pipe_config); + return ret; } @@ -10681,6 +10493,52 @@ intel_dump_m_n_config(struct intel_crtc_state *pipe_config, char *id, m_n->link_m, m_n->link_n, m_n->tu); } +#define OUTPUT_TYPE(x) [INTEL_OUTPUT_ ## x] = #x + +static const char * const output_type_str[] = { + OUTPUT_TYPE(UNUSED), + OUTPUT_TYPE(ANALOG), + OUTPUT_TYPE(DVO), + OUTPUT_TYPE(SDVO), + OUTPUT_TYPE(LVDS), + OUTPUT_TYPE(TVOUT), + OUTPUT_TYPE(HDMI), + OUTPUT_TYPE(DP), + OUTPUT_TYPE(EDP), + OUTPUT_TYPE(DSI), + OUTPUT_TYPE(DDI), + OUTPUT_TYPE(DP_MST), +}; + +#undef OUTPUT_TYPE + +static void snprintf_output_types(char *buf, size_t len, + unsigned int output_types) +{ + char *str = buf; + int i; + + str[0] = '\0'; + + for (i = 0; i < ARRAY_SIZE(output_type_str); i++) { + int r; + + if ((output_types & BIT(i)) == 0) + continue; + + r = snprintf(str, len, "%s%s", + str != buf ? "," : "", output_type_str[i]); + if (r >= len) + break; + str += r; + len -= r; + + output_types &= ~BIT(i); + } + + WARN_ON_ONCE(output_types != 0); +} + static void intel_dump_pipe_config(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config, const char *context) @@ -10691,10 +10549,15 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc, struct intel_plane *intel_plane; struct intel_plane_state *state; struct drm_framebuffer *fb; + char buf[64]; DRM_DEBUG_KMS("[CRTC:%d:%s]%s\n", crtc->base.base.id, crtc->base.name, context); + snprintf_output_types(buf, sizeof(buf), pipe_config->output_types); + DRM_DEBUG_KMS("output_types: %s (0x%x)\n", + buf, pipe_config->output_types); + DRM_DEBUG_KMS("cpu_transcoder: %s, pipe bpp: %i, dithering: %i\n", transcoder_name(pipe_config->cpu_transcoder), pipe_config->pipe_bpp, pipe_config->dither); @@ -10814,13 +10677,13 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state) switch (encoder->type) { unsigned int port_mask; - case INTEL_OUTPUT_UNKNOWN: + case INTEL_OUTPUT_DDI: if (WARN_ON(!HAS_DDI(to_i915(dev)))) break; case INTEL_OUTPUT_DP: case INTEL_OUTPUT_HDMI: case INTEL_OUTPUT_EDP: - port_mask = 1 << enc_to_dig_port(&encoder->base)->port; + port_mask = 1 << encoder->port; /* the same port mustn't appear more than once */ if (used_ports & port_mask) @@ -10830,7 +10693,7 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state) break; case INTEL_OUTPUT_DP_MST: used_mst_ports |= - 1 << enc_to_mst(&encoder->base)->primary->port; + 1 << encoder->port; break; default: break; @@ -10854,7 +10717,7 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state) struct intel_dpll_hw_state dpll_hw_state; struct intel_shared_dpll *shared_dpll; struct intel_crtc_wm_state wm_state; - bool force_thru; + bool force_thru, ips_force_disable; /* FIXME: before the switch to atomic started, a new pipe_config was * kzalloc'd. Code that depends on any field being zero should be @@ -10865,6 +10728,7 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state) shared_dpll = crtc_state->shared_dpll; dpll_hw_state = crtc_state->dpll_hw_state; force_thru = crtc_state->pch_pfit.force_thru; + ips_force_disable = crtc_state->ips_force_disable; if (IS_G4X(dev_priv) || IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) wm_state = crtc_state->wm; @@ -10878,6 +10742,7 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state) crtc_state->shared_dpll = shared_dpll; crtc_state->dpll_hw_state = dpll_hw_state; crtc_state->pch_pfit.force_thru = force_thru; + crtc_state->ips_force_disable = ips_force_disable; if (IS_G4X(dev_priv) || IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) crtc_state->wm = wm_state; @@ -10945,7 +10810,12 @@ intel_modeset_pipe_config(struct drm_crtc *crtc, * Determine output_types before calling the .compute_config() * hooks so that the hooks can use this information safely. */ - pipe_config->output_types |= 1 << encoder->type; + if (encoder->compute_output_type) + pipe_config->output_types |= + BIT(encoder->compute_output_type(encoder, pipe_config, + connector_state)); + else + pipe_config->output_types |= BIT(encoder->type); } encoder_retry: @@ -11009,31 +10879,6 @@ fail: return ret; } -static void -intel_modeset_update_crtc_state(struct drm_atomic_state *state) -{ - struct drm_crtc *crtc; - struct drm_crtc_state *new_crtc_state; - int i; - - /* Double check state. */ - for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { - to_intel_crtc(crtc)->config = to_intel_crtc_state(new_crtc_state); - - /* - * Update legacy state to satisfy fbc code. This can - * be removed when fbc uses the atomic state. - */ - if (drm_atomic_get_existing_plane_state(state, crtc->primary)) { - struct drm_plane_state *plane_state = crtc->primary->state; - - crtc->primary->fb = plane_state->fb; - crtc->x = plane_state->src_x >> 16; - crtc->y = plane_state->src_y >> 16; - } - } -} - static bool intel_fuzzy_clock_check(int clock1, int clock2) { int diff; @@ -11134,6 +10979,9 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, bool adjust) { bool ret = true; + bool fixup_inherited = adjust && + (current_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED) && + !(pipe_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED); #define PIPE_CONF_CHECK_X(name) \ if (current_config->name != pipe_config->name) { \ @@ -11153,6 +11001,31 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, ret = false; \ } +#define PIPE_CONF_CHECK_BOOL(name) \ + if (current_config->name != pipe_config->name) { \ + pipe_config_err(adjust, __stringify(name), \ + "(expected %s, found %s)\n", \ + yesno(current_config->name), \ + yesno(pipe_config->name)); \ + ret = false; \ + } + +/* + * Checks state where we only read out the enabling, but not the entire + * state itself (like full infoframes or ELD for audio). These states + * require a full modeset on bootup to fix up. + */ +#define PIPE_CONF_CHECK_BOOL_INCOMPLETE(name) \ + if (!fixup_inherited || (!current_config->name && !pipe_config->name)) { \ + PIPE_CONF_CHECK_BOOL(name); \ + } else { \ + pipe_config_err(adjust, __stringify(name), \ + "unable to verify whether state matches exactly, forcing modeset (expected %s, found %s)\n", \ + yesno(current_config->name), \ + yesno(pipe_config->name)); \ + ret = false; \ + } + #define PIPE_CONF_CHECK_P(name) \ if (current_config->name != pipe_config->name) { \ pipe_config_err(adjust, __stringify(name), \ @@ -11238,7 +11111,7 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, PIPE_CONF_CHECK_I(cpu_transcoder); - PIPE_CONF_CHECK_I(has_pch_encoder); + PIPE_CONF_CHECK_BOOL(has_pch_encoder); PIPE_CONF_CHECK_I(fdi_lanes); PIPE_CONF_CHECK_M_N(fdi_m_n); @@ -11270,17 +11143,17 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_vsync_end); PIPE_CONF_CHECK_I(pixel_multiplier); - PIPE_CONF_CHECK_I(has_hdmi_sink); + PIPE_CONF_CHECK_BOOL(has_hdmi_sink); if ((INTEL_GEN(dev_priv) < 8 && !IS_HASWELL(dev_priv)) || IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - PIPE_CONF_CHECK_I(limited_color_range); + PIPE_CONF_CHECK_BOOL(limited_color_range); - PIPE_CONF_CHECK_I(hdmi_scrambling); - PIPE_CONF_CHECK_I(hdmi_high_tmds_clock_ratio); - PIPE_CONF_CHECK_I(has_infoframe); - PIPE_CONF_CHECK_I(ycbcr420); + PIPE_CONF_CHECK_BOOL(hdmi_scrambling); + PIPE_CONF_CHECK_BOOL(hdmi_high_tmds_clock_ratio); + PIPE_CONF_CHECK_BOOL_INCOMPLETE(has_infoframe); + PIPE_CONF_CHECK_BOOL(ycbcr420); - PIPE_CONF_CHECK_I(has_audio); + PIPE_CONF_CHECK_BOOL_INCOMPLETE(has_audio); PIPE_CONF_CHECK_FLAGS(base.adjusted_mode.flags, DRM_MODE_FLAG_INTERLACE); @@ -11306,7 +11179,7 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, PIPE_CONF_CHECK_I(pipe_src_w); PIPE_CONF_CHECK_I(pipe_src_h); - PIPE_CONF_CHECK_I(pch_pfit.enabled); + PIPE_CONF_CHECK_BOOL(pch_pfit.enabled); if (current_config->pch_pfit.enabled) { PIPE_CONF_CHECK_X(pch_pfit.pos); PIPE_CONF_CHECK_X(pch_pfit.size); @@ -11316,11 +11189,7 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, PIPE_CONF_CHECK_CLOCK_FUZZY(pixel_rate); } - /* BDW+ don't expose a synchronous way to read the state */ - if (IS_HASWELL(dev_priv)) - PIPE_CONF_CHECK_I(ips_enabled); - - PIPE_CONF_CHECK_I(double_wide); + PIPE_CONF_CHECK_BOOL(double_wide); PIPE_CONF_CHECK_P(shared_dpll); PIPE_CONF_CHECK_X(dpll_hw_state.dpll); @@ -11332,6 +11201,18 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, PIPE_CONF_CHECK_X(dpll_hw_state.ctrl1); PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr1); PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr2); + PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr0); + PIPE_CONF_CHECK_X(dpll_hw_state.ebb0); + PIPE_CONF_CHECK_X(dpll_hw_state.ebb4); + PIPE_CONF_CHECK_X(dpll_hw_state.pll0); + PIPE_CONF_CHECK_X(dpll_hw_state.pll1); + PIPE_CONF_CHECK_X(dpll_hw_state.pll2); + PIPE_CONF_CHECK_X(dpll_hw_state.pll3); + PIPE_CONF_CHECK_X(dpll_hw_state.pll6); + PIPE_CONF_CHECK_X(dpll_hw_state.pll8); + PIPE_CONF_CHECK_X(dpll_hw_state.pll9); + PIPE_CONF_CHECK_X(dpll_hw_state.pll10); + PIPE_CONF_CHECK_X(dpll_hw_state.pcsdw12); PIPE_CONF_CHECK_X(dsi_pll.ctrl); PIPE_CONF_CHECK_X(dsi_pll.div); @@ -11342,8 +11223,12 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, PIPE_CONF_CHECK_CLOCK_FUZZY(base.adjusted_mode.crtc_clock); PIPE_CONF_CHECK_CLOCK_FUZZY(port_clock); + PIPE_CONF_CHECK_I(min_voltage_level); + #undef PIPE_CONF_CHECK_X #undef PIPE_CONF_CHECK_I +#undef PIPE_CONF_CHECK_BOOL +#undef PIPE_CONF_CHECK_BOOL_INCOMPLETE #undef PIPE_CONF_CHECK_P #undef PIPE_CONF_CHECK_FLAGS #undef PIPE_CONF_CHECK_CLOCK_FUZZY @@ -11610,10 +11495,8 @@ verify_crtc_state(struct drm_crtc *crtc, "Encoder connected to wrong pipe %c\n", pipe_name(pipe)); - if (active) { - pipe_config->output_types |= 1 << encoder->type; + if (active) encoder->get_config(encoder, pipe_config); - } } intel_crtc_compute_pixel_rate(pipe_config); @@ -11635,6 +11518,18 @@ verify_crtc_state(struct drm_crtc *crtc, } static void +intel_verify_planes(struct intel_atomic_state *state) +{ + struct intel_plane *plane; + const struct intel_plane_state *plane_state; + int i; + + for_each_new_intel_plane_in_state(state, plane, + plane_state, i) + assert_plane(plane, plane_state->base.visible); +} + +static void verify_single_dpll_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct drm_crtc *crtc, @@ -11984,16 +11879,16 @@ static int intel_modeset_checks(struct drm_atomic_state *state) * holding all the crtc locks, even if we don't end up * touching the hardware */ - if (!intel_cdclk_state_compare(&dev_priv->cdclk.logical, - &intel_state->cdclk.logical)) { + if (intel_cdclk_changed(&dev_priv->cdclk.logical, + &intel_state->cdclk.logical)) { ret = intel_lock_all_pipes(state); if (ret < 0) return ret; } /* All pipes must be switched off while we change the cdclk. */ - if (!intel_cdclk_state_compare(&dev_priv->cdclk.actual, - &intel_state->cdclk.actual)) { + if (intel_cdclk_needs_modeset(&dev_priv->cdclk.actual, + &intel_state->cdclk.actual)) { ret = intel_modeset_all_pipes(state); if (ret < 0) return ret; @@ -12002,6 +11897,9 @@ static int intel_modeset_checks(struct drm_atomic_state *state) DRM_DEBUG_KMS("New cdclk calculated to be logical %u kHz, actual %u kHz\n", intel_state->cdclk.logical.cdclk, intel_state->cdclk.actual.cdclk); + DRM_DEBUG_KMS("New voltage level calculated to be logical %u, actual %u\n", + intel_state->cdclk.logical.voltage_level, + intel_state->cdclk.actual.voltage_level); } else { to_intel_atomic_state(state)->cdclk.logical = dev_priv->cdclk.logical; } @@ -12080,7 +11978,7 @@ static int intel_atomic_check(struct drm_device *dev, return ret; } - if (i915.fastboot && + if (i915_modparams.fastboot && intel_pipe_config_compare(dev_priv, to_intel_crtc_state(old_crtc_state), pipe_config, true)) { @@ -12113,7 +12011,7 @@ static int intel_atomic_check(struct drm_device *dev, if (ret) return ret; - intel_fbc_choose_crtc(dev_priv, state); + intel_fbc_choose_crtc(dev_priv, intel_state); return calc_watermark_data(state); } @@ -12133,73 +12031,10 @@ u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc) return dev->driver->get_vblank_counter(dev, crtc->pipe); } -static void intel_atomic_wait_for_vblanks(struct drm_device *dev, - struct drm_i915_private *dev_priv, - unsigned crtc_mask) -{ - unsigned last_vblank_count[I915_MAX_PIPES]; - enum pipe pipe; - int ret; - - if (!crtc_mask) - return; - - for_each_pipe(dev_priv, pipe) { - struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, - pipe); - - if (!((1 << pipe) & crtc_mask)) - continue; - - ret = drm_crtc_vblank_get(&crtc->base); - if (WARN_ON(ret != 0)) { - crtc_mask &= ~(1 << pipe); - continue; - } - - last_vblank_count[pipe] = drm_crtc_vblank_count(&crtc->base); - } - - for_each_pipe(dev_priv, pipe) { - struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, - pipe); - long lret; - - if (!((1 << pipe) & crtc_mask)) - continue; - - lret = wait_event_timeout(dev->vblank[pipe].queue, - last_vblank_count[pipe] != - drm_crtc_vblank_count(&crtc->base), - msecs_to_jiffies(50)); - - WARN(!lret, "pipe %c vblank wait timed out\n", pipe_name(pipe)); - - drm_crtc_vblank_put(&crtc->base); - } -} - -static bool needs_vblank_wait(struct intel_crtc_state *crtc_state) -{ - /* fb updated, need to unpin old fb */ - if (crtc_state->fb_changed) - return true; - - /* wm changes, need vblank before final wm's */ - if (crtc_state->update_wm_post) - return true; - - if (crtc_state->wm.need_postvbl_update) - return true; - - return false; -} - static void intel_update_crtc(struct drm_crtc *crtc, struct drm_atomic_state *state, struct drm_crtc_state *old_crtc_state, - struct drm_crtc_state *new_crtc_state, - unsigned int *crtc_vblank_mask) + struct drm_crtc_state *new_crtc_state) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -12222,13 +12057,9 @@ static void intel_update_crtc(struct drm_crtc *crtc, } drm_atomic_helper_commit_planes_on_crtc(old_crtc_state); - - if (needs_vblank_wait(pipe_config)) - *crtc_vblank_mask |= drm_crtc_mask(crtc); } -static void intel_update_crtcs(struct drm_atomic_state *state, - unsigned int *crtc_vblank_mask) +static void intel_update_crtcs(struct drm_atomic_state *state) { struct drm_crtc *crtc; struct drm_crtc_state *old_crtc_state, *new_crtc_state; @@ -12239,12 +12070,11 @@ static void intel_update_crtcs(struct drm_atomic_state *state, continue; intel_update_crtc(crtc, state, old_crtc_state, - new_crtc_state, crtc_vblank_mask); + new_crtc_state); } } -static void skl_update_crtcs(struct drm_atomic_state *state, - unsigned int *crtc_vblank_mask) +static void skl_update_crtcs(struct drm_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->dev); struct intel_atomic_state *intel_state = to_intel_atomic_state(state); @@ -12278,13 +12108,16 @@ static void skl_update_crtcs(struct drm_atomic_state *state, unsigned int cmask = drm_crtc_mask(crtc); intel_crtc = to_intel_crtc(crtc); - cstate = to_intel_crtc_state(crtc->state); + cstate = to_intel_crtc_state(new_crtc_state); pipe = intel_crtc->pipe; if (updated & cmask || !cstate->base.active) continue; - if (skl_ddb_allocation_overlaps(entries, &cstate->wm.skl.ddb, i)) + if (skl_ddb_allocation_overlaps(dev_priv, + entries, + &cstate->wm.skl.ddb, + i)) continue; updated |= cmask; @@ -12303,7 +12136,7 @@ static void skl_update_crtcs(struct drm_atomic_state *state, vbl_wait = true; intel_update_crtc(crtc, state, old_crtc_state, - new_crtc_state, crtc_vblank_mask); + new_crtc_state); if (vbl_wait) intel_wait_for_vblank(dev_priv, pipe); @@ -12364,7 +12197,6 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) struct drm_crtc *crtc; struct intel_crtc_state *intel_cstate; u64 put_domains[I915_MAX_PIPES] = {}; - unsigned crtc_vblank_mask = 0; int i; intel_atomic_commit_fence_wait(intel_state); @@ -12405,7 +12237,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) intel_check_cpu_fifo_underruns(dev_priv); intel_check_pch_fifo_underruns(dev_priv); - if (!crtc->state->active) { + if (!new_crtc_state->active) { /* * Make sure we don't call initial_watermarks * for ILK-style watermark updates. @@ -12414,14 +12246,14 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) */ if (INTEL_GEN(dev_priv) >= 9) dev_priv->display.initial_watermarks(intel_state, - to_intel_crtc_state(crtc->state)); + to_intel_crtc_state(new_crtc_state)); } } } - /* Only after disabling all output pipelines that will be changed can we - * update the the output configuration. */ - intel_modeset_update_crtc_state(state); + /* FIXME: Eventually get rid of our intel_crtc->config pointer */ + for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) + to_intel_crtc(crtc)->config = to_intel_crtc_state(new_crtc_state); if (intel_state->modeset) { drm_atomic_helper_update_legacy_modeset_state(state->dev, state); @@ -12453,7 +12285,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) } /* Now enable the clocks, plane, pipe, and connectors that we set up. */ - dev_priv->display.update_crtcs(state, &crtc_vblank_mask); + dev_priv->display.update_crtcs(state); /* FIXME: We should call drm_atomic_helper_commit_hw_done() here * already, but still need the state for the delayed optimization. To @@ -12464,8 +12296,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) * - switch over to the vblank wait helper in the core after that since * we don't need out special handling any more. */ - if (!state->legacy_cursor_update) - intel_atomic_wait_for_vblanks(dev, dev_priv, crtc_vblank_mask); + drm_atomic_helper_wait_for_flip_done(dev, state); /* * Now that the vblank has passed, we can go ahead and program the @@ -12491,6 +12322,9 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) intel_modeset_verify_crtc(crtc, state, old_crtc_state, new_crtc_state); } + if (intel_state->modeset) + intel_verify_planes(intel_state); + if (intel_state->modeset && intel_can_enable_sagv(state)) intel_enable_sagv(dev_priv); @@ -12581,21 +12415,10 @@ static int intel_atomic_commit(struct drm_device *dev, struct drm_i915_private *dev_priv = to_i915(dev); int ret = 0; - ret = drm_atomic_helper_setup_commit(state, nonblock); - if (ret) - return ret; - drm_atomic_state_get(state); i915_sw_fence_init(&intel_state->commit_ready, intel_atomic_commit_ready); - ret = intel_atomic_prepare_commit(dev, state); - if (ret) { - DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret); - i915_sw_fence_commit(&intel_state->commit_ready); - return ret; - } - /* * The intel_legacy_cursor_update() fast path takes care * of avoiding the vblank waits for simple cursor @@ -12604,19 +12427,37 @@ static int intel_atomic_commit(struct drm_device *dev, * updates happen during the correct frames. Gen9+ have * double buffered watermarks and so shouldn't need this. * - * Do this after drm_atomic_helper_setup_commit() and - * intel_atomic_prepare_commit() because we still want - * to skip the flip and fb cleanup waits. Although that - * does risk yanking the mapping from under the display - * engine. + * Unset state->legacy_cursor_update before the call to + * drm_atomic_helper_setup_commit() because otherwise + * drm_atomic_helper_wait_for_flip_done() is a noop and + * we get FIFO underruns because we didn't wait + * for vblank. * * FIXME doing watermarks and fb cleanup from a vblank worker * (assuming we had any) would solve these problems. */ - if (INTEL_GEN(dev_priv) < 9) - state->legacy_cursor_update = false; + if (INTEL_GEN(dev_priv) < 9 && state->legacy_cursor_update) { + struct intel_crtc_state *new_crtc_state; + struct intel_crtc *crtc; + int i; + + for_each_new_intel_crtc_in_state(intel_state, crtc, new_crtc_state, i) + if (new_crtc_state->wm.need_postvbl_update || + new_crtc_state->update_wm_post) + state->legacy_cursor_update = false; + } + + ret = intel_atomic_prepare_commit(dev, state); + if (ret) { + DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret); + i915_sw_fence_commit(&intel_state->commit_ready); + return ret; + } + + ret = drm_atomic_helper_setup_commit(state, nonblock); + if (!ret) + ret = drm_atomic_helper_swap_state(state, true); - ret = drm_atomic_helper_swap_state(state, true); if (ret) { i915_sw_fence_commit(&intel_state->commit_ready); @@ -12628,8 +12469,11 @@ static int intel_atomic_commit(struct drm_device *dev, intel_atomic_track_fbs(state); if (intel_state->modeset) { - memcpy(dev_priv->min_pixclk, intel_state->min_pixclk, - sizeof(intel_state->min_pixclk)); + memcpy(dev_priv->min_cdclk, intel_state->min_cdclk, + sizeof(intel_state->min_cdclk)); + memcpy(dev_priv->min_voltage_level, + intel_state->min_voltage_level, + sizeof(intel_state->min_voltage_level)); dev_priv->active_crtcs = intel_state->active_crtcs; dev_priv->cdclk.logical = intel_state->cdclk.logical; dev_priv->cdclk.actual = intel_state->cdclk.actual; @@ -12639,11 +12483,15 @@ static int intel_atomic_commit(struct drm_device *dev, INIT_WORK(&state->commit_work, intel_atomic_commit_work); i915_sw_fence_commit(&intel_state->commit_ready); - if (nonblock) + if (nonblock && intel_state->modeset) { + queue_work(dev_priv->modeset_wq, &state->commit_work); + } else if (nonblock) { queue_work(system_unbound_wq, &state->commit_work); - else + } else { + if (intel_state->modeset) + flush_workqueue(dev_priv->modeset_wq); intel_atomic_commit_tail(state); - + } return 0; } @@ -12658,6 +12506,58 @@ static const struct drm_crtc_funcs intel_crtc_funcs = { .set_crc_source = intel_crtc_set_crc_source, }; +struct wait_rps_boost { + struct wait_queue_entry wait; + + struct drm_crtc *crtc; + struct drm_i915_gem_request *request; +}; + +static int do_rps_boost(struct wait_queue_entry *_wait, + unsigned mode, int sync, void *key) +{ + struct wait_rps_boost *wait = container_of(_wait, typeof(*wait), wait); + struct drm_i915_gem_request *rq = wait->request; + + gen6_rps_boost(rq, NULL); + i915_gem_request_put(rq); + + drm_crtc_vblank_put(wait->crtc); + + list_del(&wait->wait.entry); + kfree(wait); + return 1; +} + +static void add_rps_boost_after_vblank(struct drm_crtc *crtc, + struct dma_fence *fence) +{ + struct wait_rps_boost *wait; + + if (!dma_fence_is_i915(fence)) + return; + + if (INTEL_GEN(to_i915(crtc->dev)) < 6) + return; + + if (drm_crtc_vblank_get(crtc)) + return; + + wait = kmalloc(sizeof(*wait), GFP_KERNEL); + if (!wait) { + drm_crtc_vblank_put(crtc); + return; + } + + wait->request = to_request(dma_fence_get(fence)); + wait->crtc = crtc; + + wait->wait.func = do_rps_boost; + wait->wait.flags = 0; + + add_wait_queue(drm_crtc_vblank_waitqueue(crtc), &wait->wait); +} + /** * intel_prepare_plane_fb - Prepare fb for usage on plane * @plane: drm plane to prepare for @@ -12755,12 +12655,22 @@ intel_prepare_plane_fb(struct drm_plane *plane, return ret; if (!new_state->fence) { /* implicit fencing */ + struct dma_fence *fence; + ret = i915_sw_fence_await_reservation(&intel_state->commit_ready, obj->resv, NULL, false, I915_FENCE_TIMEOUT, GFP_KERNEL); if (ret < 0) return ret; + + fence = reservation_object_get_excl_rcu(obj->resv); + if (fence) { + add_rps_boost_after_vblank(new_state->crtc, fence); + dma_fence_put(fence); + } + } else { + add_rps_boost_after_vblank(new_state->crtc, new_state->fence); } return 0; @@ -12805,7 +12715,7 @@ skl_max_scale(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state crtc_clock = crtc_state->base.adjusted_mode.crtc_clock; max_dotclk = to_intel_atomic_state(crtc_state->base.state)->cdclk.logical.cdclk; - if (IS_GEMINILAKE(dev_priv)) + if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10) max_dotclk *= 2; if (WARN_ON_ONCE(!crtc_clock || max_dotclk < crtc_clock)) @@ -12844,10 +12754,11 @@ intel_check_primary_plane(struct intel_plane *plane, can_position = true; } - ret = drm_plane_helper_check_state(&state->base, - &state->clip, - min_scale, max_scale, - can_position, true); + ret = drm_atomic_helper_check_plane_state(&state->base, + &crtc_state->base, + &state->clip, + min_scale, max_scale, + can_position, true); if (ret) return ret; @@ -12868,6 +12779,9 @@ intel_check_primary_plane(struct intel_plane *plane, state->ctl = i9xx_plane_ctl(crtc_state, state); } + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + state->color_ctl = glk_plane_color_ctl(crtc_state, state); + return 0; } @@ -12877,29 +12791,29 @@ static void intel_begin_crtc_commit(struct drm_crtc *crtc, struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_crtc_state *intel_cstate = - to_intel_crtc_state(crtc->state); struct intel_crtc_state *old_intel_cstate = to_intel_crtc_state(old_crtc_state); struct intel_atomic_state *old_intel_state = to_intel_atomic_state(old_crtc_state->state); - bool modeset = needs_modeset(crtc->state); + struct intel_crtc_state *intel_cstate = + intel_atomic_get_new_crtc_state(old_intel_state, intel_crtc); + bool modeset = needs_modeset(&intel_cstate->base); if (!modeset && (intel_cstate->base.color_mgmt_changed || intel_cstate->update_pipe)) { - intel_color_set_csc(crtc->state); - intel_color_load_luts(crtc->state); + intel_color_set_csc(&intel_cstate->base); + intel_color_load_luts(&intel_cstate->base); } /* Perform vblank evasion around commit operation */ - intel_pipe_update_start(intel_crtc); + intel_pipe_update_start(intel_cstate); if (modeset) goto out; if (intel_cstate->update_pipe) - intel_update_pipe_config(intel_crtc, old_intel_cstate); + intel_update_pipe_config(old_intel_cstate, intel_cstate); else if (INTEL_GEN(dev_priv) >= 9) skl_detach_scalers(intel_crtc); @@ -12912,9 +12826,28 @@ out: static void intel_finish_crtc_commit(struct drm_crtc *crtc, struct drm_crtc_state *old_crtc_state) { + struct drm_i915_private *dev_priv = to_i915(crtc->dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_atomic_state *old_intel_state = + to_intel_atomic_state(old_crtc_state->state); + struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(old_intel_state, intel_crtc); - intel_pipe_update_end(intel_crtc); + intel_pipe_update_end(new_crtc_state); + + if (new_crtc_state->update_pipe && + !needs_modeset(&new_crtc_state->base) && + old_crtc_state->mode.private_flags & I915_MODE_FLAG_INHERITED) { + if (!IS_GEN2(dev_priv)) + intel_set_cpu_fifo_underrun_reporting(dev_priv, intel_crtc->pipe, true); + + if (new_crtc_state->has_pch_encoder) { + enum pipe pch_transcoder = + intel_crtc_pch_transcoder(intel_crtc); + + intel_set_pch_fifo_underrun_reporting(dev_priv, pch_transcoder, true); + } + } } /** @@ -13063,6 +12996,14 @@ intel_legacy_cursor_update(struct drm_plane *plane, goto slow; old_plane_state = plane->state; + /* + * Don't do an async update if there is an outstanding commit modifying + * the plane. This prevents our async update's changes from getting + * overridden by a previous synchronous update's state. + */ + if (old_plane_state->commit && + !try_wait_for_completion(&old_plane_state->commit->hw_done)) + goto slow; /* * If any parameters change that may affect watermarks, @@ -13093,6 +13034,8 @@ intel_legacy_cursor_update(struct drm_plane *plane, new_plane_state->crtc_h = crtc_h; ret = intel_plane_atomic_check_with_state(to_intel_crtc_state(crtc->state), + to_intel_crtc_state(crtc->state), /* FIXME need a new crtc state? */ + to_intel_plane_state(plane->state), to_intel_plane_state(new_plane_state)); if (ret) goto out_free; @@ -13122,17 +13065,12 @@ intel_legacy_cursor_update(struct drm_plane *plane, } old_fb = old_plane_state->fb; - old_vma = to_intel_plane_state(old_plane_state)->vma; i915_gem_track_fb(intel_fb_obj(old_fb), intel_fb_obj(fb), intel_plane->frontbuffer_bit); /* Swap plane state */ - new_plane_state->fence = old_plane_state->fence; - *to_intel_plane_state(old_plane_state) = *to_intel_plane_state(new_plane_state); - new_plane_state->fence = NULL; - new_plane_state->fb = old_fb; - to_intel_plane_state(new_plane_state)->vma = NULL; + plane->state = new_plane_state; if (plane->state->visible) { trace_intel_update_plane(plane, to_intel_crtc(crtc)); @@ -13144,13 +13082,17 @@ intel_legacy_cursor_update(struct drm_plane *plane, intel_plane->disable_plane(intel_plane, to_intel_crtc(crtc)); } + old_vma = fetch_and_zero(&to_intel_plane_state(old_plane_state)->vma); if (old_vma) intel_unpin_fb_vma(old_vma); out_unlock: mutex_unlock(&dev_priv->drm.struct_mutex); out_free: - intel_plane_destroy_state(plane, new_plane_state); + if (ret) + intel_plane_destroy_state(plane, new_plane_state); + else + intel_plane_destroy_state(plane, old_plane_state); return ret; slow: @@ -13207,20 +13149,21 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) * port is hooked to pipe B. Hence we want plane A feeding pipe B. */ if (HAS_FBC(dev_priv) && INTEL_GEN(dev_priv) < 4) - primary->plane = (enum plane) !pipe; + primary->i9xx_plane = (enum i9xx_plane_id) !pipe; else - primary->plane = (enum plane) pipe; + primary->i9xx_plane = (enum i9xx_plane_id) pipe; primary->id = PLANE_PRIMARY; primary->frontbuffer_bit = INTEL_FRONTBUFFER_PRIMARY(pipe); primary->check_plane = intel_check_primary_plane; - if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 10) { intel_primary_formats = skl_primary_formats; num_formats = ARRAY_SIZE(skl_primary_formats); modifiers = skl_format_modifiers_ccs; - primary->update_plane = skylake_update_primary_plane; - primary->disable_plane = skylake_disable_primary_plane; + primary->update_plane = skl_update_plane; + primary->disable_plane = skl_disable_plane; + primary->get_hw_state = skl_plane_get_hw_state; } else if (INTEL_GEN(dev_priv) >= 9) { intel_primary_formats = skl_primary_formats; num_formats = ARRAY_SIZE(skl_primary_formats); @@ -13229,22 +13172,25 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) else modifiers = skl_format_modifiers_noccs; - primary->update_plane = skylake_update_primary_plane; - primary->disable_plane = skylake_disable_primary_plane; + primary->update_plane = skl_update_plane; + primary->disable_plane = skl_disable_plane; + primary->get_hw_state = skl_plane_get_hw_state; } else if (INTEL_GEN(dev_priv) >= 4) { intel_primary_formats = i965_primary_formats; num_formats = ARRAY_SIZE(i965_primary_formats); modifiers = i9xx_format_modifiers; - primary->update_plane = i9xx_update_primary_plane; - primary->disable_plane = i9xx_disable_primary_plane; + primary->update_plane = i9xx_update_plane; + primary->disable_plane = i9xx_disable_plane; + primary->get_hw_state = i9xx_plane_get_hw_state; } else { intel_primary_formats = i8xx_primary_formats; num_formats = ARRAY_SIZE(i8xx_primary_formats); modifiers = i9xx_format_modifiers; - primary->update_plane = i9xx_update_primary_plane; - primary->disable_plane = i9xx_disable_primary_plane; + primary->update_plane = i9xx_update_plane; + primary->disable_plane = i9xx_disable_plane; + primary->get_hw_state = i9xx_plane_get_hw_state; } if (INTEL_GEN(dev_priv) >= 9) @@ -13267,11 +13213,17 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) intel_primary_formats, num_formats, modifiers, DRM_PLANE_TYPE_PRIMARY, - "plane %c", plane_name(primary->plane)); + "plane %c", + plane_name(primary->i9xx_plane)); if (ret) goto fail; - if (INTEL_GEN(dev_priv) >= 9) { + if (INTEL_GEN(dev_priv) >= 10) { + supported_rotations = + DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 | + DRM_MODE_ROTATE_180 | DRM_MODE_ROTATE_270 | + DRM_MODE_REFLECT_X; + } else if (INTEL_GEN(dev_priv) >= 9) { supported_rotations = DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_180 | DRM_MODE_ROTATE_270; @@ -13327,17 +13279,19 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv, cursor->can_scale = false; cursor->max_downscale = 1; cursor->pipe = pipe; - cursor->plane = pipe; + cursor->i9xx_plane = (enum i9xx_plane_id) pipe; cursor->id = PLANE_CURSOR; cursor->frontbuffer_bit = INTEL_FRONTBUFFER_CURSOR(pipe); if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) { cursor->update_plane = i845_update_cursor; cursor->disable_plane = i845_disable_cursor; + cursor->get_hw_state = i845_cursor_get_hw_state; cursor->check_plane = i845_check_cursor; } else { cursor->update_plane = i9xx_update_cursor; cursor->disable_plane = i9xx_disable_cursor; + cursor->get_hw_state = i9xx_cursor_get_hw_state; cursor->check_plane = i9xx_check_cursor; } @@ -13453,14 +13407,13 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe) goto fail; intel_crtc->pipe = pipe; - intel_crtc->plane = primary->plane; /* initialize shared scalers */ intel_crtc_init_scalers(intel_crtc, crtc_state); BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) || - dev_priv->plane_to_crtc_mapping[intel_crtc->plane] != NULL); - dev_priv->plane_to_crtc_mapping[intel_crtc->plane] = intel_crtc; + dev_priv->plane_to_crtc_mapping[primary->i9xx_plane] != NULL); + dev_priv->plane_to_crtc_mapping[primary->i9xx_plane] = intel_crtc; dev_priv->pipe_to_crtc_mapping[intel_crtc->pipe] = intel_crtc; drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs); @@ -13501,7 +13454,7 @@ int intel_get_pipe_from_crtc_id(struct drm_device *dev, void *data, struct drm_crtc *drmmode_crtc; struct intel_crtc *crtc; - drmmode_crtc = drm_crtc_find(dev, pipe_from_crtc_id->crtc_id); + drmmode_crtc = drm_crtc_find(dev, file, pipe_from_crtc_id->crtc_id); if (!drmmode_crtc) return -ENOENT; @@ -13665,7 +13618,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) } else if (HAS_PCH_SPLIT(dev_priv)) { int found; - dpd_is_edp = intel_dp_is_edp(dev_priv, PORT_D); + dpd_is_edp = intel_dp_is_port_edp(dev_priv, PORT_D); if (has_edp_a(dev_priv)) intel_dp_init(dev_priv, DP_A, PORT_A); @@ -13708,14 +13661,14 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) * trust the port type the VBT declares as we've seen at least * HDMI ports that the VBT claim are DP or eDP. */ - has_edp = intel_dp_is_edp(dev_priv, PORT_B); + has_edp = intel_dp_is_port_edp(dev_priv, PORT_B); has_port = intel_bios_is_port_present(dev_priv, PORT_B); if (I915_READ(VLV_DP_B) & DP_DETECTED || has_port) has_edp &= intel_dp_init(dev_priv, VLV_DP_B, PORT_B); if ((I915_READ(VLV_HDMIB) & SDVO_DETECTED || has_port) && !has_edp) intel_hdmi_init(dev_priv, VLV_HDMIB, PORT_B); - has_edp = intel_dp_is_edp(dev_priv, PORT_C); + has_edp = intel_dp_is_port_edp(dev_priv, PORT_C); has_port = intel_bios_is_port_present(dev_priv, PORT_C); if (I915_READ(VLV_DP_C) & DP_DETECTED || has_port) has_edp &= intel_dp_init(dev_priv, VLV_DP_C, PORT_C); @@ -14140,7 +14093,7 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) } else if (HAS_DDI(dev_priv)) { dev_priv->display.get_pipe_config = haswell_get_pipe_config; dev_priv->display.get_initial_plane_config = - ironlake_get_initial_plane_config; + i9xx_get_initial_plane_config; dev_priv->display.crtc_compute_clock = haswell_crtc_compute_clock; dev_priv->display.crtc_enable = haswell_crtc_enable; @@ -14148,7 +14101,7 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) } else if (HAS_PCH_SPLIT(dev_priv)) { dev_priv->display.get_pipe_config = ironlake_get_pipe_config; dev_priv->display.get_initial_plane_config = - ironlake_get_initial_plane_config; + i9xx_get_initial_plane_config; dev_priv->display.crtc_compute_clock = ironlake_crtc_compute_clock; dev_priv->display.crtc_enable = ironlake_crtc_enable; @@ -14208,7 +14161,7 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) dev_priv->display.fdi_link_train = hsw_fdi_link_train; } - if (dev_priv->info.gen >= 9) + if (INTEL_GEN(dev_priv) >= 9) dev_priv->display.update_crtcs = skl_update_crtcs; else dev_priv->display.update_crtcs = intel_update_crtcs; @@ -14387,9 +14340,8 @@ void intel_modeset_init_hw(struct drm_device *dev) struct drm_i915_private *dev_priv = to_i915(dev); intel_update_cdclk(dev_priv); + intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); dev_priv->cdclk.logical = dev_priv->cdclk.actual = dev_priv->cdclk.hw; - - intel_init_clock_gating(dev_priv); } /* @@ -14468,6 +14420,8 @@ retry: cs->wm.need_postvbl_update = true; dev_priv->display.optimize_watermarks(intel_state, cs); + + to_intel_crtc_state(crtc->state)->wm = cs->wm; } put_state: @@ -14477,6 +14431,22 @@ fail: drm_modeset_acquire_fini(&ctx); } +static void intel_update_fdi_pll_freq(struct drm_i915_private *dev_priv) +{ + if (IS_GEN5(dev_priv)) { + u32 fdi_pll_clk = + I915_READ(FDI_PLL_BIOS_0) & FDI_PLL_FB_CLOCK_MASK; + + dev_priv->fdi_pll_freq = (fdi_pll_clk + 2) * 10000; + } else if (IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv)) { + dev_priv->fdi_pll_freq = 270000; + } else { + return; + } + + DRM_DEBUG_DRIVER("FDI PLL freq=%d\n", dev_priv->fdi_pll_freq); +} + int intel_modeset_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -14484,6 +14454,8 @@ int intel_modeset_init(struct drm_device *dev) enum pipe pipe; struct intel_crtc *crtc; + dev_priv->modeset_wq = alloc_ordered_workqueue("i915_modeset", 0); + drm_mode_config_init(dev); dev->mode_config.min_width = 0; @@ -14547,7 +14519,7 @@ int intel_modeset_init(struct drm_device *dev) dev->mode_config.cursor_height = MAX_CURSOR_HEIGHT; } - dev->mode_config.fb_base = ggtt->mappable_base; + dev->mode_config.fb_base = ggtt->gmadr.start; DRM_DEBUG_KMS("%d display pipe%s available.\n", INTEL_INFO(dev_priv)->num_pipes, @@ -14564,6 +14536,7 @@ int intel_modeset_init(struct drm_device *dev) } intel_shared_dpll_init(dev); + intel_update_fdi_pll_freq(dev_priv); intel_update_czclk(dev_priv); intel_modeset_init_hw(dev); @@ -14615,6 +14588,7 @@ int intel_modeset_init(struct drm_device *dev) void i830_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) { + struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); /* 640x480@60Hz, ~25175 kHz */ struct dpll clock = { .m1 = 18, @@ -14678,42 +14652,62 @@ void i830_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) I915_WRITE(PIPECONF(pipe), PIPECONF_ENABLE | PIPECONF_PROGRESSIVE); POSTING_READ(PIPECONF(pipe)); + + intel_wait_for_pipe_scanline_moving(crtc); } void i830_disable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) { + struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); + DRM_DEBUG_KMS("disabling pipe %c due to force quirk\n", pipe_name(pipe)); - assert_plane_disabled(dev_priv, PLANE_A); - assert_plane_disabled(dev_priv, PLANE_B); + WARN_ON(I915_READ(DSPCNTR(PLANE_A)) & DISPLAY_PLANE_ENABLE); + WARN_ON(I915_READ(DSPCNTR(PLANE_B)) & DISPLAY_PLANE_ENABLE); + WARN_ON(I915_READ(DSPCNTR(PLANE_C)) & DISPLAY_PLANE_ENABLE); + WARN_ON(I915_READ(CURCNTR(PIPE_A)) & CURSOR_MODE); + WARN_ON(I915_READ(CURCNTR(PIPE_B)) & CURSOR_MODE); I915_WRITE(PIPECONF(pipe), 0); POSTING_READ(PIPECONF(pipe)); - if (wait_for(pipe_dsl_stopped(dev_priv, pipe), 100)) - DRM_ERROR("pipe %c off wait timed out\n", pipe_name(pipe)); + intel_wait_for_pipe_scanline_stopped(crtc); I915_WRITE(DPLL(pipe), DPLL_VGA_MODE_DIS); POSTING_READ(DPLL(pipe)); } -static bool -intel_check_plane_mapping(struct intel_crtc *crtc) +static bool intel_plane_mapping_ok(struct intel_crtc *crtc, + struct intel_plane *plane) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - u32 val; + enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; + u32 val = I915_READ(DSPCNTR(i9xx_plane)); - if (INTEL_INFO(dev_priv)->num_pipes == 1) - return true; + return (val & DISPLAY_PLANE_ENABLE) == 0 || + (val & DISPPLANE_SEL_PIPE_MASK) == DISPPLANE_SEL_PIPE(crtc->pipe); +} - val = I915_READ(DSPCNTR(!crtc->plane)); +static void +intel_sanitize_plane_mapping(struct drm_i915_private *dev_priv) +{ + struct intel_crtc *crtc; - if ((val & DISPLAY_PLANE_ENABLE) && - (!!(val & DISPPLANE_SEL_PIPE_MASK) == crtc->pipe)) - return false; + if (INTEL_GEN(dev_priv) >= 4) + return; - return true; + for_each_intel_crtc(&dev_priv->drm, crtc) { + struct intel_plane *plane = + to_intel_plane(crtc->base.primary); + + if (intel_plane_mapping_ok(crtc, plane)) + continue; + + DRM_DEBUG_KMS("%s attached to the wrong pipe, disabling plane\n", + plane->base.name); + intel_plane_disable_noatomic(crtc, plane); + } } static bool intel_crtc_has_encoders(struct intel_crtc *crtc) @@ -14739,10 +14733,10 @@ static struct intel_connector *intel_encoder_find_connector(struct intel_encoder } static bool has_pch_trancoder(struct drm_i915_private *dev_priv, - enum transcoder pch_transcoder) + enum pipe pch_transcoder) { return HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv) || - (HAS_PCH_LPT_H(dev_priv) && pch_transcoder == TRANSCODER_A); + (HAS_PCH_LPT_H(dev_priv) && pch_transcoder == PIPE_A); } static void intel_sanitize_crtc(struct intel_crtc *crtc, @@ -14753,7 +14747,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; /* Clear any frame start delays used for debugging left by the BIOS */ - if (!transcoder_is_dsi(cpu_transcoder)) { + if (crtc->active && !transcoder_is_dsi(cpu_transcoder)) { i915_reg_t reg = PIPECONF(cpu_transcoder); I915_WRITE(reg, @@ -14769,33 +14763,15 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, /* Disable everything but the primary plane */ for_each_intel_plane_on_crtc(dev, crtc, plane) { - if (plane->base.type == DRM_PLANE_TYPE_PRIMARY) - continue; + const struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); - trace_intel_disable_plane(&plane->base, crtc); - plane->disable_plane(plane, crtc); + if (plane_state->base.visible && + plane->base.type != DRM_PLANE_TYPE_PRIMARY) + intel_plane_disable_noatomic(crtc, plane); } } - /* We need to sanitize the plane -> pipe mapping first because this will - * disable the crtc (and hence change the state) if it is wrong. Note - * that gen4+ has a fixed plane -> pipe mapping. */ - if (INTEL_GEN(dev_priv) < 4 && !intel_check_plane_mapping(crtc)) { - bool plane; - - DRM_DEBUG_KMS("[CRTC:%d:%s] wrong plane connection detected!\n", - crtc->base.base.id, crtc->base.name); - - /* Pipe has the wrong plane attached and the plane is active. - * Temporarily change the plane mapping and disable everything - * ... */ - plane = crtc->plane; - crtc->base.primary->state->visible = true; - crtc->plane = !plane; - intel_crtc_disable_noatomic(&crtc->base, ctx); - crtc->plane = plane; - } - /* Adjust the state of the output pipe according to whether we * have active connectors/encoders. */ if (crtc->active && !intel_crtc_has_encoders(crtc)) @@ -14825,7 +14801,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, * PCH transcoders B and C would prevent enabling the south * error interrupt (see cpt_can_enable_serr_int()). */ - if (has_pch_trancoder(dev_priv, (enum transcoder)crtc->pipe)) + if (has_pch_trancoder(dev_priv, crtc->pipe)) crtc->pch_fifo_underrun_disabled = true; } } @@ -14869,8 +14845,6 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder) connector->base.dpms = DRM_MODE_DPMS_OFF; connector->base.encoder = NULL; } - /* Enabled encoders without active connectors will be fixed in - * the crtc fixup. */ } void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv) @@ -14900,24 +14874,21 @@ void i915_redisable_vga(struct drm_i915_private *dev_priv) intel_display_power_put(dev_priv, POWER_DOMAIN_VGA); } -static bool primary_get_hw_state(struct intel_plane *plane) -{ - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - - return I915_READ(DSPCNTR(plane->plane)) & DISPLAY_PLANE_ENABLE; -} - /* FIXME read out full plane state for all planes */ static void readout_plane_state(struct intel_crtc *crtc) { - struct intel_plane *primary = to_intel_plane(crtc->base.primary); - bool visible; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct intel_plane *plane; - visible = crtc->active && primary_get_hw_state(primary); + for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) { + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + bool visible = plane->get_hw_state(plane); - intel_set_plane_visible(to_intel_crtc_state(crtc->base.state), - to_intel_plane_state(primary->base.state), - visible); + intel_set_plane_visible(crtc_state, plane_state, visible); + } } static void intel_modeset_readout_hw_state(struct drm_device *dev) @@ -14986,7 +14957,6 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) crtc_state = to_intel_crtc_state(crtc->base.state); encoder->base.crtc = &crtc->base; - crtc_state->output_types |= 1 << encoder->type; encoder->get_config(encoder, crtc_state); } else { encoder->base.crtc = NULL; @@ -15032,7 +15002,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) for_each_intel_crtc(dev, crtc) { struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); - int pixclk = 0; + int min_cdclk = 0; memset(&crtc->base.mode, 0, sizeof(crtc->base.mode)); if (crtc_state->base.active) { @@ -15053,22 +15023,20 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) intel_crtc_compute_pixel_rate(crtc_state); - if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv) || - IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - pixclk = crtc_state->pixel_rate; - else - WARN_ON(dev_priv->display.modeset_calc_cdclk); - - /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ - if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled) - pixclk = DIV_ROUND_UP(pixclk * 100, 95); + if (dev_priv->display.modeset_calc_cdclk) { + min_cdclk = intel_crtc_compute_min_cdclk(crtc_state); + if (WARN_ON(min_cdclk < 0)) + min_cdclk = 0; + } drm_calc_timestamping_constants(&crtc->base, &crtc_state->base.adjusted_mode); update_scanline_offset(crtc); } - dev_priv->min_pixclk[crtc->pipe] = pixclk; + dev_priv->min_cdclk[crtc->pipe] = min_cdclk; + dev_priv->min_voltage_level[crtc->pipe] = + crtc_state->min_voltage_level; intel_pipe_config_sanity_check(dev_priv, crtc_state); } @@ -15092,6 +15060,23 @@ get_encoder_power_domains(struct drm_i915_private *dev_priv) } } +static void intel_early_display_was(struct drm_i915_private *dev_priv) +{ + /* Display WA #1185 WaDisableDARBFClkGating:cnl,glk */ + if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) + I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | + DARBF_GATING_DIS); + + if (IS_HASWELL(dev_priv)) { + /* + * WaRsPkgCStateDisplayPMReq:hsw + * System hang if this isn't done before disabling all planes! + */ + I915_WRITE(CHICKEN_PAR1_1, + I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES); + } +} + /* Scan out the current hw modeset state, * and sanitizes it to the current state */ @@ -15105,11 +15090,14 @@ intel_modeset_setup_hw_state(struct drm_device *dev, struct intel_encoder *encoder; int i; + intel_early_display_was(dev_priv); intel_modeset_readout_hw_state(dev); /* HW state is read out, now we need to sanitize this mess. */ get_encoder_power_domains(dev_priv); + intel_sanitize_plane_mapping(dev_priv); + for_each_intel_encoder(dev, encoder) { intel_sanitize_encoder(encoder); } @@ -15186,6 +15174,7 @@ void intel_display_resume(struct drm_device *dev) if (!ret) ret = __intel_display_resume(dev, state, &ctx); + intel_enable_ipc(dev_priv); drm_modeset_drop_locks(&ctx); drm_modeset_acquire_fini(&ctx); @@ -15195,15 +15184,6 @@ void intel_display_resume(struct drm_device *dev) drm_atomic_state_put(state); } -void intel_modeset_gem_init(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - - intel_init_gt_powersave(dev_priv); - - intel_setup_overlay(dev_priv); -} - int intel_connector_register(struct drm_connector *connector) { struct intel_connector *intel_connector = to_intel_connector(connector); @@ -15232,10 +15212,7 @@ static void intel_hpd_poll_fini(struct drm_device *dev) struct intel_connector *connector; struct drm_connector_list_iter conn_iter; - /* First disable polling... */ - drm_kms_helper_poll_fini(dev); - - /* Then kill the work that may have been queued by hpd. */ + /* Kill all the work that may have been queued by hpd. */ drm_connector_list_iter_begin(dev, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) { if (connector->modeset_retry_work.func) @@ -15283,6 +15260,8 @@ void intel_modeset_cleanup(struct drm_device *dev) intel_cleanup_gt_powersave(dev_priv); intel_teardown_gmbus(dev_priv); + + destroy_workqueue(dev_priv->modeset_wq); } void intel_connector_attach_encoder(struct intel_connector *connector, diff --git a/drivers/gpu/drm/i915/intel_display.h b/drivers/gpu/drm/i915/intel_display.h new file mode 100644 index 000000000000..a0d2b6169361 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_display.h @@ -0,0 +1,321 @@ +/* + * Copyright © 2006-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_DISPLAY_H_ +#define _INTEL_DISPLAY_H_ + +enum pipe { + INVALID_PIPE = -1, + + PIPE_A = 0, + PIPE_B, + PIPE_C, + _PIPE_EDP, + + I915_MAX_PIPES = _PIPE_EDP +}; + +#define pipe_name(p) ((p) + 'A') + +enum transcoder { + TRANSCODER_A = 0, + TRANSCODER_B, + TRANSCODER_C, + TRANSCODER_EDP, + TRANSCODER_DSI_A, + TRANSCODER_DSI_C, + + I915_MAX_TRANSCODERS +}; + +static inline const char *transcoder_name(enum transcoder transcoder) +{ + switch (transcoder) { + case TRANSCODER_A: + return "A"; + case TRANSCODER_B: + return "B"; + case TRANSCODER_C: + return "C"; + case TRANSCODER_EDP: + return "EDP"; + case TRANSCODER_DSI_A: + return "DSI A"; + case TRANSCODER_DSI_C: + return "DSI C"; + default: + return "<invalid>"; + } +} + +static inline bool transcoder_is_dsi(enum transcoder transcoder) +{ + return transcoder == TRANSCODER_DSI_A || transcoder == TRANSCODER_DSI_C; +} + +/* + * Global legacy plane identifier. Valid only for primary/sprite + * planes on pre-g4x, and only for primary planes on g4x-bdw. + */ +enum i9xx_plane_id { + PLANE_A, + PLANE_B, + PLANE_C, +}; + +#define plane_name(p) ((p) + 'A') +#define sprite_name(p, s) ((p) * INTEL_INFO(dev_priv)->num_sprites[(p)] + (s) + 'A') + +/* + * Per-pipe plane identifier. + * I915_MAX_PLANES in the enum below is the maximum (across all platforms) + * number of planes per CRTC. Not all platforms really have this many planes, + * which means some arrays of size I915_MAX_PLANES may have unused entries + * between the topmost sprite plane and the cursor plane. + * + * This is expected to be passed to various register macros + * (eg. PLANE_CTL(), PS_PLANE_SEL(), etc.) so adjust with care. + */ +enum plane_id { + PLANE_PRIMARY, + PLANE_SPRITE0, + PLANE_SPRITE1, + PLANE_SPRITE2, + PLANE_CURSOR, + + I915_MAX_PLANES, +}; + +#define for_each_plane_id_on_crtc(__crtc, __p) \ + for ((__p) = PLANE_PRIMARY; (__p) < I915_MAX_PLANES; (__p)++) \ + for_each_if((__crtc)->plane_ids_mask & BIT(__p)) + +enum port { + PORT_NONE = -1, + + PORT_A = 0, + PORT_B, + PORT_C, + PORT_D, + PORT_E, + + I915_MAX_PORTS +}; + +#define port_name(p) ((p) + 'A') + +enum dpio_channel { + DPIO_CH0, + DPIO_CH1 +}; + +enum dpio_phy { + DPIO_PHY0, + DPIO_PHY1, + DPIO_PHY2, +}; + +#define I915_NUM_PHYS_VLV 2 + +enum intel_display_power_domain { + POWER_DOMAIN_PIPE_A, + POWER_DOMAIN_PIPE_B, + POWER_DOMAIN_PIPE_C, + POWER_DOMAIN_PIPE_A_PANEL_FITTER, + POWER_DOMAIN_PIPE_B_PANEL_FITTER, + POWER_DOMAIN_PIPE_C_PANEL_FITTER, + POWER_DOMAIN_TRANSCODER_A, + POWER_DOMAIN_TRANSCODER_B, + POWER_DOMAIN_TRANSCODER_C, + POWER_DOMAIN_TRANSCODER_EDP, + POWER_DOMAIN_TRANSCODER_DSI_A, + POWER_DOMAIN_TRANSCODER_DSI_C, + POWER_DOMAIN_PORT_DDI_A_LANES, + POWER_DOMAIN_PORT_DDI_B_LANES, + POWER_DOMAIN_PORT_DDI_C_LANES, + POWER_DOMAIN_PORT_DDI_D_LANES, + POWER_DOMAIN_PORT_DDI_E_LANES, + POWER_DOMAIN_PORT_DDI_A_IO, + POWER_DOMAIN_PORT_DDI_B_IO, + POWER_DOMAIN_PORT_DDI_C_IO, + POWER_DOMAIN_PORT_DDI_D_IO, + POWER_DOMAIN_PORT_DDI_E_IO, + POWER_DOMAIN_PORT_DSI, + POWER_DOMAIN_PORT_CRT, + POWER_DOMAIN_PORT_OTHER, + POWER_DOMAIN_VGA, + POWER_DOMAIN_AUDIO, + POWER_DOMAIN_PLLS, + POWER_DOMAIN_AUX_A, + POWER_DOMAIN_AUX_B, + POWER_DOMAIN_AUX_C, + POWER_DOMAIN_AUX_D, + POWER_DOMAIN_GMBUS, + POWER_DOMAIN_MODESET, + POWER_DOMAIN_GT_IRQ, + POWER_DOMAIN_INIT, + + POWER_DOMAIN_NUM, +}; + +#define POWER_DOMAIN_PIPE(pipe) ((pipe) + POWER_DOMAIN_PIPE_A) +#define POWER_DOMAIN_PIPE_PANEL_FITTER(pipe) \ + ((pipe) + POWER_DOMAIN_PIPE_A_PANEL_FITTER) +#define POWER_DOMAIN_TRANSCODER(tran) \ + ((tran) == TRANSCODER_EDP ? POWER_DOMAIN_TRANSCODER_EDP : \ + (tran) + POWER_DOMAIN_TRANSCODER_A) + +/* Used by dp and fdi links */ +struct intel_link_m_n { + u32 tu; + u32 gmch_m; + u32 gmch_n; + u32 link_m; + u32 link_n; +}; + +#define for_each_pipe(__dev_priv, __p) \ + for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) + +#define for_each_pipe_masked(__dev_priv, __p, __mask) \ + for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) \ + for_each_if((__mask) & BIT(__p)) + +#define for_each_universal_plane(__dev_priv, __pipe, __p) \ + for ((__p) = 0; \ + (__p) < INTEL_INFO(__dev_priv)->num_sprites[(__pipe)] + 1; \ + (__p)++) + +#define for_each_sprite(__dev_priv, __p, __s) \ + for ((__s) = 0; \ + (__s) < INTEL_INFO(__dev_priv)->num_sprites[(__p)]; \ + (__s)++) + +#define for_each_port_masked(__port, __ports_mask) \ + for ((__port) = PORT_A; (__port) < I915_MAX_PORTS; (__port)++) \ + for_each_if((__ports_mask) & BIT(__port)) + +#define for_each_crtc(dev, crtc) \ + list_for_each_entry(crtc, &(dev)->mode_config.crtc_list, head) + +#define for_each_intel_plane(dev, intel_plane) \ + list_for_each_entry(intel_plane, \ + &(dev)->mode_config.plane_list, \ + base.head) + +#define for_each_intel_plane_mask(dev, intel_plane, plane_mask) \ + list_for_each_entry(intel_plane, \ + &(dev)->mode_config.plane_list, \ + base.head) \ + for_each_if((plane_mask) & \ + BIT(drm_plane_index(&intel_plane->base))) + +#define for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) \ + list_for_each_entry(intel_plane, \ + &(dev)->mode_config.plane_list, \ + base.head) \ + for_each_if((intel_plane)->pipe == (intel_crtc)->pipe) + +#define for_each_intel_crtc(dev, intel_crtc) \ + list_for_each_entry(intel_crtc, \ + &(dev)->mode_config.crtc_list, \ + base.head) + +#define for_each_intel_crtc_mask(dev, intel_crtc, crtc_mask) \ + list_for_each_entry(intel_crtc, \ + &(dev)->mode_config.crtc_list, \ + base.head) \ + for_each_if((crtc_mask) & BIT(drm_crtc_index(&intel_crtc->base))) + +#define for_each_intel_encoder(dev, intel_encoder) \ + list_for_each_entry(intel_encoder, \ + &(dev)->mode_config.encoder_list, \ + base.head) + +#define for_each_intel_connector_iter(intel_connector, iter) \ + while ((intel_connector = to_intel_connector(drm_connector_list_iter_next(iter)))) + +#define for_each_encoder_on_crtc(dev, __crtc, intel_encoder) \ + list_for_each_entry((intel_encoder), &(dev)->mode_config.encoder_list, base.head) \ + for_each_if((intel_encoder)->base.crtc == (__crtc)) + +#define for_each_connector_on_encoder(dev, __encoder, intel_connector) \ + list_for_each_entry((intel_connector), &(dev)->mode_config.connector_list, base.head) \ + for_each_if((intel_connector)->base.encoder == (__encoder)) + +#define for_each_power_domain(domain, mask) \ + for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++) \ + for_each_if(BIT_ULL(domain) & (mask)) + +#define for_each_power_well(__dev_priv, __power_well) \ + for ((__power_well) = (__dev_priv)->power_domains.power_wells; \ + (__power_well) - (__dev_priv)->power_domains.power_wells < \ + (__dev_priv)->power_domains.power_well_count; \ + (__power_well)++) + +#define for_each_power_well_rev(__dev_priv, __power_well) \ + for ((__power_well) = (__dev_priv)->power_domains.power_wells + \ + (__dev_priv)->power_domains.power_well_count - 1; \ + (__power_well) - (__dev_priv)->power_domains.power_wells >= 0; \ + (__power_well)--) + +#define for_each_power_domain_well(__dev_priv, __power_well, __domain_mask) \ + for_each_power_well(__dev_priv, __power_well) \ + for_each_if((__power_well)->domains & (__domain_mask)) + +#define for_each_power_domain_well_rev(__dev_priv, __power_well, __domain_mask) \ + for_each_power_well_rev(__dev_priv, __power_well) \ + for_each_if((__power_well)->domains & (__domain_mask)) + +#define for_each_new_intel_plane_in_state(__state, plane, new_plane_state, __i) \ + for ((__i) = 0; \ + (__i) < (__state)->base.dev->mode_config.num_total_plane && \ + ((plane) = to_intel_plane((__state)->base.planes[__i].ptr), \ + (new_plane_state) = to_intel_plane_state((__state)->base.planes[__i].new_state), 1); \ + (__i)++) \ + for_each_if(plane) + +#define for_each_new_intel_crtc_in_state(__state, crtc, new_crtc_state, __i) \ + for ((__i) = 0; \ + (__i) < (__state)->base.dev->mode_config.num_crtc && \ + ((crtc) = to_intel_crtc((__state)->base.crtcs[__i].ptr), \ + (new_crtc_state) = to_intel_crtc_state((__state)->base.crtcs[__i].new_state), 1); \ + (__i)++) \ + for_each_if(crtc) + +#define for_each_oldnew_intel_plane_in_state(__state, plane, old_plane_state, new_plane_state, __i) \ + for ((__i) = 0; \ + (__i) < (__state)->base.dev->mode_config.num_total_plane && \ + ((plane) = to_intel_plane((__state)->base.planes[__i].ptr), \ + (old_plane_state) = to_intel_plane_state((__state)->base.planes[__i].old_state), \ + (new_plane_state) = to_intel_plane_state((__state)->base.planes[__i].new_state), 1); \ + (__i)++) \ + for_each_if(plane) + +void intel_link_compute_m_n(int bpp, int nlanes, + int pixel_clock, int link_clock, + struct intel_link_m_n *m_n, + bool reduce_m_n); + +#endif diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 09f274419eea..35c5299feab6 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -42,6 +42,7 @@ #include "i915_drv.h" #define DP_LINK_CHECK_TIMEOUT (10 * 1000) +#define DP_DPRX_ESI_LEN 14 /* Compliance test status bits */ #define INTEL_DP_RESOLUTION_SHIFT_MASK 0 @@ -103,13 +104,13 @@ static const int cnl_rates[] = { 162000, 216000, 270000, static const int default_rates[] = { 162000, 270000, 540000 }; /** - * is_edp - is the given port attached to an eDP panel (either CPU or PCH) + * intel_dp_is_edp - is the given port attached to an eDP panel (either CPU or PCH) * @intel_dp: DP struct * * If a CPU or PCH DP output is attached to an eDP panel, this function * will return true, and false otherwise. */ -static bool is_edp(struct intel_dp *intel_dp) +bool intel_dp_is_edp(struct intel_dp *intel_dp) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); @@ -128,40 +129,30 @@ static struct intel_dp *intel_attached_dp(struct drm_connector *connector) return enc_to_intel_dp(&intel_attached_encoder(connector)->base); } -static void intel_dp_link_down(struct intel_dp *intel_dp); +static void intel_dp_link_down(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state); static bool edp_panel_vdd_on(struct intel_dp *intel_dp); static void edp_panel_vdd_off(struct intel_dp *intel_dp, bool sync); -static void vlv_init_panel_power_sequencer(struct intel_dp *intel_dp); -static void vlv_steal_power_sequencer(struct drm_device *dev, +static void vlv_init_panel_power_sequencer(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); +static void vlv_steal_power_sequencer(struct drm_i915_private *dev_priv, enum pipe pipe); static void intel_dp_unset_edid(struct intel_dp *intel_dp); -static int intel_dp_num_rates(u8 link_bw_code) -{ - switch (link_bw_code) { - default: - WARN(1, "invalid max DP link bw val %x, using 1.62Gbps\n", - link_bw_code); - case DP_LINK_BW_1_62: - return 1; - case DP_LINK_BW_2_7: - return 2; - case DP_LINK_BW_5_4: - return 3; - } -} - /* update sink rates from dpcd */ static void intel_dp_set_sink_rates(struct intel_dp *intel_dp) { - int i, num_rates; + int i, max_rate; - num_rates = intel_dp_num_rates(intel_dp->dpcd[DP_MAX_LINK_RATE]); + max_rate = drm_dp_bw_code_to_link_rate(intel_dp->dpcd[DP_MAX_LINK_RATE]); - for (i = 0; i < num_rates; i++) + for (i = 0; i < ARRAY_SIZE(default_rates); i++) { + if (default_rates[i] > max_rate) + break; intel_dp->sink_rates[i] = default_rates[i]; + } - intel_dp->num_sink_rates = num_rates; + intel_dp->num_sink_rates = i; } /* Theoretical max between source and sink */ @@ -232,7 +223,7 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - enum port port = dig_port->port; + enum port port = dig_port->base.port; const int *source_rates; int size; u32 voltage; @@ -253,15 +244,15 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp) } else if (IS_GEN9_BC(dev_priv)) { source_rates = skl_rates; size = ARRAY_SIZE(skl_rates); - } else { + } else if ((IS_HASWELL(dev_priv) && !IS_HSW_ULX(dev_priv)) || + IS_BROADWELL(dev_priv)) { source_rates = default_rates; size = ARRAY_SIZE(default_rates); + } else { + source_rates = default_rates; + size = ARRAY_SIZE(default_rates) - 1; } - /* This depends on the fact that 5.4 is last value in the array */ - if (!intel_dp_source_supports_hbr2(intel_dp)) - size--; - intel_dp->source_rates = source_rates; intel_dp->num_source_rates = size; } @@ -388,7 +379,7 @@ intel_dp_mode_valid(struct drm_connector *connector, max_dotclk = intel_dp_downstream_max_dotclock(intel_dp); - if (is_edp(intel_dp) && fixed_mode) { + if (intel_dp_is_edp(intel_dp) && fixed_mode) { if (mode->hdisplay > fixed_mode->hdisplay) return MODE_PANEL; @@ -438,24 +429,19 @@ static void intel_dp_unpack_aux(uint32_t src, uint8_t *dst, int dst_bytes) } static void -intel_dp_init_panel_power_sequencer(struct drm_device *dev, - struct intel_dp *intel_dp); +intel_dp_init_panel_power_sequencer(struct intel_dp *intel_dp); static void -intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev, - struct intel_dp *intel_dp, +intel_dp_init_panel_power_sequencer_registers(struct intel_dp *intel_dp, bool force_disable_vdd); static void -intel_dp_pps_init(struct drm_device *dev, struct intel_dp *intel_dp); +intel_dp_pps_init(struct intel_dp *intel_dp); static void pps_lock(struct intel_dp *intel_dp) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *encoder = &intel_dig_port->base; - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); /* - * See vlv_power_sequencer_reset() why we need + * See intel_power_sequencer_reset() why we need * a power domain reference here. */ intel_display_power_get(dev_priv, intel_dp->aux_power_domain); @@ -465,10 +451,7 @@ static void pps_lock(struct intel_dp *intel_dp) static void pps_unlock(struct intel_dp *intel_dp) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *encoder = &intel_dig_port->base; - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); mutex_unlock(&dev_priv->pps_mutex); @@ -478,8 +461,8 @@ static void pps_unlock(struct intel_dp *intel_dp) static void vlv_power_sequencer_kick(struct intel_dp *intel_dp) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); enum pipe pipe = intel_dp->pps_pipe; bool pll_enabled, release_cl_override = false; enum dpio_phy phy = DPIO_PHY(pipe); @@ -488,11 +471,11 @@ vlv_power_sequencer_kick(struct intel_dp *intel_dp) if (WARN(I915_READ(intel_dp->output_reg) & DP_PORT_EN, "skipping pipe %c power seqeuncer kick due to port %c being active\n", - pipe_name(pipe), port_name(intel_dig_port->port))) + pipe_name(pipe), port_name(intel_dig_port->base.port))) return; DRM_DEBUG_KMS("kicking pipe %c power sequencer for port %c\n", - pipe_name(pipe), port_name(intel_dig_port->port)); + pipe_name(pipe), port_name(intel_dig_port->base.port)); /* Preserve the BIOS-computed detected bit. This is * supposed to be read-only. @@ -589,15 +572,14 @@ static enum pipe vlv_find_free_pps(struct drm_i915_private *dev_priv) static enum pipe vlv_power_sequencer_pipe(struct intel_dp *intel_dp) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); enum pipe pipe; lockdep_assert_held(&dev_priv->pps_mutex); /* We should never land here with regular DP ports */ - WARN_ON(!is_edp(intel_dp)); + WARN_ON(!intel_dp_is_edp(intel_dp)); WARN_ON(intel_dp->active_pipe != INVALID_PIPE && intel_dp->active_pipe != intel_dp->pps_pipe); @@ -614,16 +596,16 @@ vlv_power_sequencer_pipe(struct intel_dp *intel_dp) if (WARN_ON(pipe == INVALID_PIPE)) pipe = PIPE_A; - vlv_steal_power_sequencer(dev, pipe); + vlv_steal_power_sequencer(dev_priv, pipe); intel_dp->pps_pipe = pipe; DRM_DEBUG_KMS("picked pipe %c power sequencer for port %c\n", pipe_name(intel_dp->pps_pipe), - port_name(intel_dig_port->port)); + port_name(intel_dig_port->base.port)); /* init power sequencer on this pipe and port */ - intel_dp_init_panel_power_sequencer(dev, intel_dp); - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, true); + intel_dp_init_panel_power_sequencer(intel_dp); + intel_dp_init_panel_power_sequencer_registers(intel_dp, true); /* * Even vdd force doesn't work until we've made @@ -637,14 +619,12 @@ vlv_power_sequencer_pipe(struct intel_dp *intel_dp) static int bxt_power_sequencer_idx(struct intel_dp *intel_dp) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); lockdep_assert_held(&dev_priv->pps_mutex); /* We should never land here with regular DP ports */ - WARN_ON(!is_edp(intel_dp)); + WARN_ON(!intel_dp_is_edp(intel_dp)); /* * TODO: BXT has 2 PPS instances. The correct port->PPS instance @@ -660,7 +640,7 @@ bxt_power_sequencer_idx(struct intel_dp *intel_dp) * Only the HW needs to be reprogrammed, the SW state is fixed and * has been setup during connector init. */ - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, false); + intel_dp_init_panel_power_sequencer_registers(intel_dp, false); return 0; } @@ -712,10 +692,9 @@ vlv_initial_pps_pipe(struct drm_i915_private *dev_priv, static void vlv_initial_power_sequencer_setup(struct intel_dp *intel_dp) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - enum port port = intel_dig_port->port; + enum port port = intel_dig_port->base.port; lockdep_assert_held(&dev_priv->pps_mutex); @@ -742,13 +721,12 @@ vlv_initial_power_sequencer_setup(struct intel_dp *intel_dp) DRM_DEBUG_KMS("initial power sequencer for port %c: pipe %c\n", port_name(port), pipe_name(intel_dp->pps_pipe)); - intel_dp_init_panel_power_sequencer(dev, intel_dp); - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, false); + intel_dp_init_panel_power_sequencer(intel_dp); + intel_dp_init_panel_power_sequencer_registers(intel_dp, false); } void intel_power_sequencer_reset(struct drm_i915_private *dev_priv) { - struct drm_device *dev = &dev_priv->drm; struct intel_encoder *encoder; if (WARN_ON(!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv) && @@ -765,15 +743,20 @@ void intel_power_sequencer_reset(struct drm_i915_private *dev_priv) * should use them always. */ - for_each_intel_encoder(dev, encoder) { + for_each_intel_encoder(&dev_priv->drm, encoder) { struct intel_dp *intel_dp; if (encoder->type != INTEL_OUTPUT_DP && - encoder->type != INTEL_OUTPUT_EDP) + encoder->type != INTEL_OUTPUT_EDP && + encoder->type != INTEL_OUTPUT_DDI) continue; intel_dp = enc_to_intel_dp(&encoder->base); + /* Skip pure DVI/HDMI DDI encoders */ + if (!i915_mmio_reg_valid(intel_dp->output_reg)) + continue; + WARN_ON(intel_dp->active_pipe != INVALID_PIPE); if (encoder->type != INTEL_OUTPUT_EDP) @@ -794,10 +777,10 @@ struct pps_registers { i915_reg_t pp_div; }; -static void intel_pps_get_registers(struct drm_i915_private *dev_priv, - struct intel_dp *intel_dp, +static void intel_pps_get_registers(struct intel_dp *intel_dp, struct pps_registers *regs) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); int pps_idx = 0; memset(regs, 0, sizeof(*regs)); @@ -820,8 +803,7 @@ _pp_ctrl_reg(struct intel_dp *intel_dp) { struct pps_registers regs; - intel_pps_get_registers(to_i915(intel_dp_to_dev(intel_dp)), intel_dp, - ®s); + intel_pps_get_registers(intel_dp, ®s); return regs.pp_ctrl; } @@ -831,8 +813,7 @@ _pp_stat_reg(struct intel_dp *intel_dp) { struct pps_registers regs; - intel_pps_get_registers(to_i915(intel_dp_to_dev(intel_dp)), intel_dp, - ®s); + intel_pps_get_registers(intel_dp, ®s); return regs.pp_stat; } @@ -844,10 +825,9 @@ static int edp_notify_handler(struct notifier_block *this, unsigned long code, { struct intel_dp *intel_dp = container_of(this, typeof(* intel_dp), edp_notifier); - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); - if (!is_edp(intel_dp) || code != SYS_RESTART) + if (!intel_dp_is_edp(intel_dp) || code != SYS_RESTART) return 0; pps_lock(intel_dp); @@ -875,8 +855,7 @@ static int edp_notify_handler(struct notifier_block *this, unsigned long code, static bool edp_have_panel_power(struct intel_dp *intel_dp) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); lockdep_assert_held(&dev_priv->pps_mutex); @@ -889,8 +868,7 @@ static bool edp_have_panel_power(struct intel_dp *intel_dp) static bool edp_have_panel_vdd(struct intel_dp *intel_dp) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); lockdep_assert_held(&dev_priv->pps_mutex); @@ -904,10 +882,9 @@ static bool edp_have_panel_vdd(struct intel_dp *intel_dp) static void intel_dp_check_edp(struct intel_dp *intel_dp) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); - if (!is_edp(intel_dp)) + if (!intel_dp_is_edp(intel_dp)) return; if (!edp_have_panel_power(intel_dp) && !edp_have_panel_vdd(intel_dp)) { @@ -921,9 +898,7 @@ intel_dp_check_edp(struct intel_dp *intel_dp) static uint32_t intel_dp_aux_wait_done(struct intel_dp *intel_dp, bool has_aux_irq) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); i915_reg_t ch_ctl = intel_dp->aux_ch_ctl_reg; uint32_t status; bool done; @@ -970,7 +945,7 @@ static uint32_t ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index) * like to run at 2MHz. So, take the cdclk or PCH rawclk value and * divide by 2000 and use that */ - if (intel_dig_port->port == PORT_A) + if (intel_dig_port->base.port == PORT_A) return DIV_ROUND_CLOSEST(dev_priv->cdclk.hw.cdclk, 2000); else return DIV_ROUND_CLOSEST(dev_priv->rawclk_freq, 2000); @@ -981,7 +956,7 @@ static uint32_t hsw_get_aux_clock_divider(struct intel_dp *intel_dp, int index) struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); - if (intel_dig_port->port != PORT_A && HAS_PCH_LPT_H(dev_priv)) { + if (intel_dig_port->base.port != PORT_A && HAS_PCH_LPT_H(dev_priv)) { /* Workaround for non-ULT HSW */ switch (index) { case 0: return 63; @@ -1018,7 +993,7 @@ static uint32_t g4x_get_aux_send_ctl(struct intel_dp *intel_dp, else precharge = 5; - if (IS_BROADWELL(dev_priv) && intel_dig_port->port == PORT_A) + if (IS_BROADWELL(dev_priv)) timeout = DP_AUX_CH_CTL_TIME_OUT_600us; else timeout = DP_AUX_CH_CTL_TIME_OUT_400us; @@ -1043,7 +1018,7 @@ static uint32_t skl_get_aux_send_ctl(struct intel_dp *intel_dp, DP_AUX_CH_CTL_DONE | (has_aux_irq ? DP_AUX_CH_CTL_INTERRUPT : 0) | DP_AUX_CH_CTL_TIME_OUT_ERROR | - DP_AUX_CH_CTL_TIME_OUT_1600us | + DP_AUX_CH_CTL_TIME_OUT_MAX | DP_AUX_CH_CTL_RECEIVE_ERROR | (send_bytes << DP_AUX_CH_CTL_MESSAGE_SIZE_SHIFT) | DP_AUX_CH_CTL_FW_SYNC_PULSE_SKL(32) | @@ -1451,7 +1426,7 @@ static void intel_aux_reg_init(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); enum port port = intel_aux_port(dev_priv, - dp_to_dig_port(intel_dp)->port); + dp_to_dig_port(intel_dp)->base.port); int i; intel_dp->aux_ch_ctl_reg = intel_aux_ctl_reg(dev_priv, port); @@ -1469,7 +1444,7 @@ static void intel_dp_aux_init(struct intel_dp *intel_dp) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - enum port port = intel_dig_port->port; + enum port port = intel_dig_port->base.port; intel_aux_reg_init(intel_dp); drm_dp_aux_init(&intel_dp->aux); @@ -1481,22 +1456,16 @@ intel_dp_aux_init(struct intel_dp *intel_dp) bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp) { - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); + int max_rate = intel_dp->source_rates[intel_dp->num_source_rates - 1]; - if ((IS_HASWELL(dev_priv) && !IS_HSW_ULX(dev_priv)) || - IS_BROADWELL(dev_priv) || (INTEL_GEN(dev_priv) >= 9)) - return true; - else - return false; + return max_rate >= 540000; } static void intel_dp_set_clock(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); const struct dp_link_dpll *divisor = NULL; int i, count = 0; @@ -1644,7 +1613,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - enum port port = dp_to_dig_port(intel_dp)->port; + enum port port = encoder->port; struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc); struct intel_connector *intel_connector = intel_dp->attached_connector; struct intel_digital_connector_state *intel_conn_state = @@ -1674,14 +1643,14 @@ intel_dp_compute_config(struct intel_encoder *encoder, pipe_config->has_pch_encoder = true; pipe_config->has_drrs = false; - if (port == PORT_A) + if (IS_G4X(dev_priv) || port == PORT_A) pipe_config->has_audio = false; else if (intel_conn_state->force_audio == HDMI_AUDIO_AUTO) pipe_config->has_audio = intel_dp->has_audio; else pipe_config->has_audio = intel_conn_state->force_audio == HDMI_AUDIO_ON; - if (is_edp(intel_dp) && intel_connector->panel.fixed_mode) { + if (intel_dp_is_edp(intel_dp) && intel_connector->panel.fixed_mode) { struct drm_display_mode *panel_mode = intel_connector->panel.alt_fixed_mode; struct drm_display_mode *req_mode = &pipe_config->base.mode; @@ -1708,6 +1677,10 @@ intel_dp_compute_config(struct intel_encoder *encoder, conn_state->scaling_mode); } + if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && + adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) + return false; + if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) return false; @@ -1736,7 +1709,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, /* Walk through all bpp values. Luckily they're all nicely spaced with 2 * bpc in between. */ bpp = intel_dp_compute_bpp(intel_dp, pipe_config); - if (is_edp(intel_dp)) { + if (intel_dp_is_edp(intel_dp)) { /* Get bpp from vbt only for panels that dont have bpp in edid */ if (intel_connector->base.display_info.bpc == 0 && @@ -1829,7 +1802,7 @@ found: * DPLL0 VCO may need to be adjusted to get the correct * clock for eDP. This will affect cdclk as well. */ - if (is_edp(intel_dp) && IS_GEN9_BC(dev_priv)) { + if (intel_dp_is_edp(intel_dp) && IS_GEN9_BC(dev_priv)) { int vco; switch (pipe_config->port_clock / 2) { @@ -1848,6 +1821,8 @@ found: if (!HAS_DDI(dev_priv)) intel_dp_set_clock(encoder, pipe_config); + intel_psr_compute_config(intel_dp, pipe_config); + return true; } @@ -1861,13 +1836,12 @@ void intel_dp_set_link_params(struct intel_dp *intel_dp, } static void intel_dp_prepare(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config) + const struct intel_crtc_state *pipe_config) { - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - enum port port = dp_to_dig_port(intel_dp)->port; - struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); + enum port port = encoder->port; + struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; intel_dp_set_link_params(intel_dp, pipe_config->port_clock, @@ -1954,20 +1928,18 @@ static void intel_dp_prepare(struct intel_encoder *encoder, #define IDLE_CYCLE_MASK (PP_ON | PP_SEQUENCE_MASK | PP_CYCLE_DELAY_ACTIVE | PP_SEQUENCE_STATE_MASK) #define IDLE_CYCLE_VALUE (0 | PP_SEQUENCE_NONE | 0 | PP_SEQUENCE_STATE_OFF_IDLE) -static void intel_pps_verify_state(struct drm_i915_private *dev_priv, - struct intel_dp *intel_dp); +static void intel_pps_verify_state(struct intel_dp *intel_dp); static void wait_panel_status(struct intel_dp *intel_dp, u32 mask, u32 value) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); i915_reg_t pp_stat_reg, pp_ctrl_reg; lockdep_assert_held(&dev_priv->pps_mutex); - intel_pps_verify_state(dev_priv, intel_dp); + intel_pps_verify_state(intel_dp); pp_stat_reg = _pp_stat_reg(intel_dp); pp_ctrl_reg = _pp_ctrl_reg(intel_dp); @@ -2038,8 +2010,7 @@ static void edp_wait_backlight_off(struct intel_dp *intel_dp) static u32 ironlake_get_pp_control(struct intel_dp *intel_dp) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); u32 control; lockdep_assert_held(&dev_priv->pps_mutex); @@ -2060,16 +2031,15 @@ static u32 ironlake_get_pp_control(struct intel_dp *intel_dp) */ static bool edp_panel_vdd_on(struct intel_dp *intel_dp) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); u32 pp; i915_reg_t pp_stat_reg, pp_ctrl_reg; bool need_to_disable = !intel_dp->want_panel_vdd; lockdep_assert_held(&dev_priv->pps_mutex); - if (!is_edp(intel_dp)) + if (!intel_dp_is_edp(intel_dp)) return false; cancel_delayed_work(&intel_dp->panel_vdd_work); @@ -2081,7 +2051,7 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp) intel_display_power_get(dev_priv, intel_dp->aux_power_domain); DRM_DEBUG_KMS("Turning eDP port %c VDD on\n", - port_name(intel_dig_port->port)); + port_name(intel_dig_port->base.port)); if (!edp_have_panel_power(intel_dp)) wait_panel_power_cycle(intel_dp); @@ -2101,7 +2071,7 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp) */ if (!edp_have_panel_power(intel_dp)) { DRM_DEBUG_KMS("eDP port %c panel power wasn't enabled\n", - port_name(intel_dig_port->port)); + port_name(intel_dig_port->base.port)); msleep(intel_dp->panel_power_up_delay); } @@ -2119,7 +2089,7 @@ void intel_edp_panel_vdd_on(struct intel_dp *intel_dp) { bool vdd; - if (!is_edp(intel_dp)) + if (!intel_dp_is_edp(intel_dp)) return; pps_lock(intel_dp); @@ -2127,13 +2097,12 @@ void intel_edp_panel_vdd_on(struct intel_dp *intel_dp) pps_unlock(intel_dp); I915_STATE_WARN(!vdd, "eDP port %c VDD already requested on\n", - port_name(dp_to_dig_port(intel_dp)->port)); + port_name(dp_to_dig_port(intel_dp)->base.port)); } static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); u32 pp; @@ -2147,7 +2116,7 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) return; DRM_DEBUG_KMS("Turning eDP port %c VDD off\n", - port_name(intel_dig_port->port)); + port_name(intel_dig_port->base.port)); pp = ironlake_get_pp_control(intel_dp); pp &= ~EDP_FORCE_VDD; @@ -2203,11 +2172,11 @@ static void edp_panel_vdd_off(struct intel_dp *intel_dp, bool sync) lockdep_assert_held(&dev_priv->pps_mutex); - if (!is_edp(intel_dp)) + if (!intel_dp_is_edp(intel_dp)) return; I915_STATE_WARN(!intel_dp->want_panel_vdd, "eDP port %c VDD not forced on", - port_name(dp_to_dig_port(intel_dp)->port)); + port_name(dp_to_dig_port(intel_dp)->base.port)); intel_dp->want_panel_vdd = false; @@ -2219,22 +2188,21 @@ static void edp_panel_vdd_off(struct intel_dp *intel_dp, bool sync) static void edp_panel_on(struct intel_dp *intel_dp) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); u32 pp; i915_reg_t pp_ctrl_reg; lockdep_assert_held(&dev_priv->pps_mutex); - if (!is_edp(intel_dp)) + if (!intel_dp_is_edp(intel_dp)) return; DRM_DEBUG_KMS("Turn eDP port %c panel power on\n", - port_name(dp_to_dig_port(intel_dp)->port)); + port_name(dp_to_dig_port(intel_dp)->base.port)); if (WARN(edp_have_panel_power(intel_dp), "eDP port %c panel power already on\n", - port_name(dp_to_dig_port(intel_dp)->port))) + port_name(dp_to_dig_port(intel_dp)->base.port))) return; wait_panel_power_cycle(intel_dp); @@ -2267,7 +2235,7 @@ static void edp_panel_on(struct intel_dp *intel_dp) void intel_edp_panel_on(struct intel_dp *intel_dp) { - if (!is_edp(intel_dp)) + if (!intel_dp_is_edp(intel_dp)) return; pps_lock(intel_dp); @@ -2278,21 +2246,20 @@ void intel_edp_panel_on(struct intel_dp *intel_dp) static void edp_panel_off(struct intel_dp *intel_dp) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); u32 pp; i915_reg_t pp_ctrl_reg; lockdep_assert_held(&dev_priv->pps_mutex); - if (!is_edp(intel_dp)) + if (!intel_dp_is_edp(intel_dp)) return; DRM_DEBUG_KMS("Turn eDP port %c panel power off\n", - port_name(dp_to_dig_port(intel_dp)->port)); + port_name(dp_to_dig_port(intel_dp)->base.port)); WARN(!intel_dp->want_panel_vdd, "Need eDP port %c VDD to turn off panel\n", - port_name(dp_to_dig_port(intel_dp)->port)); + port_name(dp_to_dig_port(intel_dp)->base.port)); pp = ironlake_get_pp_control(intel_dp); /* We need to switch off panel power _and_ force vdd, for otherwise some @@ -2316,7 +2283,7 @@ static void edp_panel_off(struct intel_dp *intel_dp) void intel_edp_panel_off(struct intel_dp *intel_dp) { - if (!is_edp(intel_dp)) + if (!intel_dp_is_edp(intel_dp)) return; pps_lock(intel_dp); @@ -2327,9 +2294,7 @@ void intel_edp_panel_off(struct intel_dp *intel_dp) /* Enable backlight in the panel power control. */ static void _intel_edp_backlight_on(struct intel_dp *intel_dp) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); u32 pp; i915_reg_t pp_ctrl_reg; @@ -2360,7 +2325,7 @@ void intel_edp_backlight_on(const struct intel_crtc_state *crtc_state, { struct intel_dp *intel_dp = enc_to_intel_dp(conn_state->best_encoder); - if (!is_edp(intel_dp)) + if (!intel_dp_is_edp(intel_dp)) return; DRM_DEBUG_KMS("\n"); @@ -2372,12 +2337,11 @@ void intel_edp_backlight_on(const struct intel_crtc_state *crtc_state, /* Disable backlight in the panel power control. */ static void _intel_edp_backlight_off(struct intel_dp *intel_dp) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); u32 pp; i915_reg_t pp_ctrl_reg; - if (!is_edp(intel_dp)) + if (!intel_dp_is_edp(intel_dp)) return; pps_lock(intel_dp); @@ -2401,7 +2365,7 @@ void intel_edp_backlight_off(const struct drm_connector_state *old_conn_state) { struct intel_dp *intel_dp = enc_to_intel_dp(old_conn_state->best_encoder); - if (!is_edp(intel_dp)) + if (!intel_dp_is_edp(intel_dp)) return; DRM_DEBUG_KMS("\n"); @@ -2444,7 +2408,7 @@ static void assert_dp_port(struct intel_dp *intel_dp, bool state) I915_STATE_WARN(cur_state != state, "DP port %c state assertion failure (expected %s, current %s)\n", - port_name(dig_port->port), + port_name(dig_port->base.port), onoff(state), onoff(cur_state)); } #define assert_dp_port_disabled(d) assert_dp_port((d), false) @@ -2461,7 +2425,7 @@ static void assert_edp_pll(struct drm_i915_private *dev_priv, bool state) #define assert_edp_pll_disabled(d) assert_edp_pll((d), false) static void ironlake_edp_pll_on(struct intel_dp *intel_dp, - struct intel_crtc_state *pipe_config) + const struct intel_crtc_state *pipe_config) { struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -2500,10 +2464,10 @@ static void ironlake_edp_pll_on(struct intel_dp *intel_dp, udelay(200); } -static void ironlake_edp_pll_off(struct intel_dp *intel_dp) +static void ironlake_edp_pll_off(struct intel_dp *intel_dp, + const struct intel_crtc_state *old_crtc_state) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_crtc *crtc = to_intel_crtc(intel_dig_port->base.base.crtc); + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); assert_pipe_disabled(dev_priv, crtc->pipe); @@ -2519,6 +2483,21 @@ static void ironlake_edp_pll_off(struct intel_dp *intel_dp) udelay(200); } +static bool downstream_hpd_needs_d0(struct intel_dp *intel_dp) +{ + /* + * DPCD 1.2+ should support BRANCH_DEVICE_CTRL, and thus + * be capable of signalling downstream hpd with a long pulse. + * Whether or not that means D3 is safe to use is not clear, + * but let's assume so until proven otherwise. + * + * FIXME should really check all downstream ports... + */ + return intel_dp->dpcd[DP_DPCD_REV] == 0x11 && + intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] & DP_DWN_STRM_PORT_PRESENT && + intel_dp->downstream_ports[0] & DP_DS_PORT_HPD; +} + /* If the sink supports it, try to set the power state appropriately */ void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode) { @@ -2529,6 +2508,9 @@ void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode) return; if (mode != DRM_MODE_DPMS_ON) { + if (downstream_hpd_needs_d0(intel_dp)) + return; + ret = drm_dp_dpcd_writeb(&intel_dp->aux, DP_SET_POWER, DP_SET_POWER_D3); } else { @@ -2558,10 +2540,9 @@ void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode) static bool intel_dp_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - enum port port = dp_to_dig_port(intel_dp)->port; - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + enum port port = encoder->port; u32 tmp; bool ret; @@ -2610,12 +2591,16 @@ out: static void intel_dp_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); u32 tmp, flags = 0; - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - enum port port = dp_to_dig_port(intel_dp)->port; - struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); + enum port port = encoder->port; + struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); + + if (encoder->type == INTEL_OUTPUT_EDP) + pipe_config->output_types |= BIT(INTEL_OUTPUT_EDP); + else + pipe_config->output_types |= BIT(INTEL_OUTPUT_DP); tmp = I915_READ(intel_dp->output_reg); @@ -2666,7 +2651,7 @@ static void intel_dp_get_config(struct intel_encoder *encoder, intel_dotclock_calculate(pipe_config->port_clock, &pipe_config->dp_m_n); - if (is_edp(intel_dp) && dev_priv->vbt.edp.bpp && + if (intel_dp_is_edp(intel_dp) && dev_priv->vbt.edp.bpp && pipe_config->pipe_bpp > dev_priv->vbt.edp.bpp) { /* * This is a big fat ugly hack. @@ -2688,17 +2673,14 @@ static void intel_dp_get_config(struct intel_encoder *encoder, } static void intel_disable_dp(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); if (old_crtc_state->has_audio) - intel_audio_codec_disable(encoder); - - if (HAS_PSR(dev_priv) && !HAS_DDI(dev_priv)) - intel_psr_disable(intel_dp); + intel_audio_codec_disable(encoder, + old_crtc_state, old_conn_state); /* Make sure the panel is off before trying to change the mode. But also * ensure that we have vdd while we switch off the panel. */ @@ -2706,49 +2688,69 @@ static void intel_disable_dp(struct intel_encoder *encoder, intel_edp_backlight_off(old_conn_state); intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); intel_edp_panel_off(intel_dp); +} + +static void g4x_disable_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + intel_disable_dp(encoder, old_crtc_state, old_conn_state); /* disable the port before the pipe on g4x */ - if (INTEL_GEN(dev_priv) < 5) - intel_dp_link_down(intel_dp); + intel_dp_link_down(encoder, old_crtc_state); +} + +static void ilk_disable_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + intel_disable_dp(encoder, old_crtc_state, old_conn_state); +} + +static void vlv_disable_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + + intel_psr_disable(intel_dp, old_crtc_state); + + intel_disable_dp(encoder, old_crtc_state, old_conn_state); } static void ilk_post_disable_dp(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - enum port port = dp_to_dig_port(intel_dp)->port; + enum port port = encoder->port; - intel_dp_link_down(intel_dp); + intel_dp_link_down(encoder, old_crtc_state); /* Only ilk+ has port A */ if (port == PORT_A) - ironlake_edp_pll_off(intel_dp); + ironlake_edp_pll_off(intel_dp, old_crtc_state); } static void vlv_post_disable_dp(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - - intel_dp_link_down(intel_dp); + intel_dp_link_down(encoder, old_crtc_state); } static void chv_post_disable_dp(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { - struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - intel_dp_link_down(intel_dp); + intel_dp_link_down(encoder, old_crtc_state); mutex_lock(&dev_priv->sb_lock); /* Assert data lane reset */ - chv_data_lane_soft_reset(encoder, true); + chv_data_lane_soft_reset(encoder, old_crtc_state, true); mutex_unlock(&dev_priv->sb_lock); } @@ -2758,10 +2760,9 @@ _intel_dp_set_link_train(struct intel_dp *intel_dp, uint32_t *DP, uint8_t dp_train_pat) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - enum port port = intel_dig_port->port; + enum port port = intel_dig_port->base.port; if (dp_train_pat & DP_TRAINING_PATTERN_MASK) DRM_DEBUG_KMS("Using DP training pattern TPS%d\n", @@ -2842,10 +2843,9 @@ _intel_dp_set_link_train(struct intel_dp *intel_dp, } static void intel_dp_enable_port(struct intel_dp *intel_dp, - struct intel_crtc_state *old_crtc_state) + const struct intel_crtc_state *old_crtc_state) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); /* enable with pattern 1 (as per spec) */ @@ -2866,13 +2866,12 @@ static void intel_dp_enable_port(struct intel_dp *intel_dp, } static void intel_enable_dp(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); uint32_t dp_reg = I915_READ(intel_dp->output_reg); enum pipe pipe = crtc->pipe; @@ -2882,7 +2881,7 @@ static void intel_enable_dp(struct intel_encoder *encoder, pps_lock(intel_dp); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - vlv_init_panel_power_sequencer(intel_dp); + vlv_init_panel_power_sequencer(encoder, pipe_config); intel_dp_enable_port(intel_dp, pipe_config); @@ -2914,29 +2913,29 @@ static void intel_enable_dp(struct intel_encoder *encoder, } static void g4x_enable_dp(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { intel_enable_dp(encoder, pipe_config, conn_state); intel_edp_backlight_on(pipe_config, conn_state); } static void vlv_enable_dp(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); intel_edp_backlight_on(pipe_config, conn_state); - intel_psr_enable(intel_dp); + intel_psr_enable(intel_dp, pipe_config); } static void g4x_pre_enable_dp(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - enum port port = dp_to_dig_port(intel_dp)->port; + enum port port = encoder->port; intel_dp_prepare(encoder, pipe_config); @@ -2969,22 +2968,21 @@ static void vlv_detach_power_sequencer(struct intel_dp *intel_dp) * from a port. */ DRM_DEBUG_KMS("detaching pipe %c power sequencer from port %c\n", - pipe_name(pipe), port_name(intel_dig_port->port)); + pipe_name(pipe), port_name(intel_dig_port->base.port)); I915_WRITE(pp_on_reg, 0); POSTING_READ(pp_on_reg); intel_dp->pps_pipe = INVALID_PIPE; } -static void vlv_steal_power_sequencer(struct drm_device *dev, +static void vlv_steal_power_sequencer(struct drm_i915_private *dev_priv, enum pipe pipe) { - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_encoder *encoder; lockdep_assert_held(&dev_priv->pps_mutex); - for_each_intel_encoder(dev, encoder) { + for_each_intel_encoder(&dev_priv->drm, encoder) { struct intel_dp *intel_dp; enum port port; @@ -2993,7 +2991,7 @@ static void vlv_steal_power_sequencer(struct drm_device *dev, continue; intel_dp = enc_to_intel_dp(&encoder->base); - port = dp_to_dig_port(intel_dp)->port; + port = dp_to_dig_port(intel_dp)->base.port; WARN(intel_dp->active_pipe == pipe, "stealing pipe %c power sequencer from active (e)DP port %c\n", @@ -3010,13 +3008,12 @@ static void vlv_steal_power_sequencer(struct drm_device *dev, } } -static void vlv_init_panel_power_sequencer(struct intel_dp *intel_dp) +static void vlv_init_panel_power_sequencer(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *encoder = &intel_dig_port->base; - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); lockdep_assert_held(&dev_priv->pps_mutex); @@ -3036,47 +3033,47 @@ static void vlv_init_panel_power_sequencer(struct intel_dp *intel_dp) * We may be stealing the power * sequencer from another port. */ - vlv_steal_power_sequencer(dev, crtc->pipe); + vlv_steal_power_sequencer(dev_priv, crtc->pipe); intel_dp->active_pipe = crtc->pipe; - if (!is_edp(intel_dp)) + if (!intel_dp_is_edp(intel_dp)) return; /* now it's all ours */ intel_dp->pps_pipe = crtc->pipe; DRM_DEBUG_KMS("initializing pipe %c power sequencer for port %c\n", - pipe_name(intel_dp->pps_pipe), port_name(intel_dig_port->port)); + pipe_name(intel_dp->pps_pipe), port_name(encoder->port)); /* init power sequencer on this pipe and port */ - intel_dp_init_panel_power_sequencer(dev, intel_dp); - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, true); + intel_dp_init_panel_power_sequencer(intel_dp); + intel_dp_init_panel_power_sequencer_registers(intel_dp, true); } static void vlv_pre_enable_dp(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { - vlv_phy_pre_encoder_enable(encoder); + vlv_phy_pre_encoder_enable(encoder, pipe_config); intel_enable_dp(encoder, pipe_config, conn_state); } static void vlv_dp_pre_pll_enable(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { intel_dp_prepare(encoder, pipe_config); - vlv_phy_pre_pll_enable(encoder); + vlv_phy_pre_pll_enable(encoder, pipe_config); } static void chv_pre_enable_dp(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { - chv_phy_pre_encoder_enable(encoder); + chv_phy_pre_encoder_enable(encoder, pipe_config); intel_enable_dp(encoder, pipe_config, conn_state); @@ -3085,19 +3082,19 @@ static void chv_pre_enable_dp(struct intel_encoder *encoder, } static void chv_dp_pre_pll_enable(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { intel_dp_prepare(encoder, pipe_config); - chv_phy_pre_pll_enable(encoder); + chv_phy_pre_pll_enable(encoder, pipe_config); } static void chv_dp_post_pll_disable(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { - chv_phy_post_pll_disable(encoder); + chv_phy_post_pll_disable(encoder, old_crtc_state); } /* @@ -3145,11 +3142,9 @@ uint8_t intel_dp_voltage_max(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); - enum port port = dp_to_dig_port(intel_dp)->port; + enum port port = dp_to_dig_port(intel_dp)->base.port; - if (IS_GEN9_LP(dev_priv)) - return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; - else if (INTEL_GEN(dev_priv) >= 9) { + if (INTEL_GEN(dev_priv) >= 9) { struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; return intel_ddi_dp_voltage_max(encoder); } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) @@ -3166,7 +3161,7 @@ uint8_t intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, uint8_t voltage_swing) { struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); - enum port port = dp_to_dig_port(intel_dp)->port; + enum port port = dp_to_dig_port(intel_dp)->base.port; if (INTEL_GEN(dev_priv) >= 9) { switch (voltage_swing & DP_TRAIN_VOLTAGE_SWING_MASK) { @@ -3499,20 +3494,17 @@ gen7_edp_signal_levels(uint8_t train_set) void intel_dp_set_signal_levels(struct intel_dp *intel_dp) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - enum port port = intel_dig_port->port; - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + enum port port = intel_dig_port->base.port; uint32_t signal_levels, mask = 0; uint8_t train_set = intel_dp->train_set[0]; - if (HAS_DDI(dev_priv)) { + if (IS_GEN9_LP(dev_priv) || IS_CANNONLAKE(dev_priv)) { + signal_levels = bxt_signal_levels(intel_dp); + } else if (HAS_DDI(dev_priv)) { signal_levels = ddi_signal_levels(intel_dp); - - if (IS_GEN9_LP(dev_priv) || IS_CANNONLAKE(dev_priv)) - signal_levels = 0; - else - mask = DDI_BUF_EMP_MASK; + mask = DDI_BUF_EMP_MASK; } else if (IS_CHERRYVIEW(dev_priv)) { signal_levels = chv_signal_levels(intel_dp); } else if (IS_VALLEYVIEW(dev_priv)) { @@ -3559,10 +3551,9 @@ intel_dp_program_link_training_pattern(struct intel_dp *intel_dp, void intel_dp_set_idle_link_train(struct intel_dp *intel_dp) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - enum port port = intel_dig_port->port; + enum port port = intel_dig_port->base.port; uint32_t val; if (!HAS_DDI(dev_priv)) @@ -3591,13 +3582,13 @@ void intel_dp_set_idle_link_train(struct intel_dp *intel_dp) } static void -intel_dp_link_down(struct intel_dp *intel_dp) +intel_dp_link_down(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_crtc *crtc = to_intel_crtc(intel_dig_port->base.base.crtc); - enum port port = intel_dig_port->port; - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); + enum port port = encoder->port; uint32_t DP = intel_dp->DP; if (WARN_ON(HAS_DDI(dev_priv))) @@ -3743,11 +3734,11 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) if (drm_dp_dpcd_read(&intel_dp->aux, DP_EDP_DPCD_REV, intel_dp->edp_dpcd, sizeof(intel_dp->edp_dpcd)) == sizeof(intel_dp->edp_dpcd)) - DRM_DEBUG_KMS("EDP DPCD : %*ph\n", (int) sizeof(intel_dp->edp_dpcd), + DRM_DEBUG_KMS("eDP DPCD: %*ph\n", (int) sizeof(intel_dp->edp_dpcd), intel_dp->edp_dpcd); - /* Intermediate frequency support */ - if (intel_dp->edp_dpcd[0] >= 0x03) { /* eDp v1.4 or higher */ + /* Read the eDP 1.4+ supported link rates. */ + if (intel_dp->edp_dpcd[0] >= DP_EDP_14) { __le16 sink_rates[DP_MAX_SUPPORTED_RATES]; int i; @@ -3771,6 +3762,10 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) intel_dp->num_sink_rates = i; } + /* + * Use DP_LINK_RATE_SET if DP_SUPPORTED_LINK_RATES are available, + * default to DP_MAX_LINK_RATE and DP_LINK_BW_SET otherwise. + */ if (intel_dp->num_sink_rates) intel_dp->use_rate_select = true; else @@ -3791,7 +3786,7 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) return false; /* Don't clobber cached eDP rates. */ - if (!is_edp(intel_dp)) { + if (!intel_dp_is_edp(intel_dp)) { intel_dp_set_sink_rates(intel_dp); intel_dp_set_common_rates(intel_dp); } @@ -3813,7 +3808,7 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) * downstream port information. So, an early return here saves * time from performing other operations which are not required. */ - if (!is_edp(intel_dp) && !intel_dp->sink_count) + if (!intel_dp_is_edp(intel_dp) && !intel_dp->sink_count) return false; if (!drm_dp_is_branch(intel_dp->dpcd)) @@ -3835,7 +3830,7 @@ intel_dp_can_mst(struct intel_dp *intel_dp) { u8 mstm_cap; - if (!i915.enable_dp_mst) + if (!i915_modparams.enable_dp_mst) return false; if (!intel_dp->can_mst) @@ -3853,7 +3848,7 @@ intel_dp_can_mst(struct intel_dp *intel_dp) static void intel_dp_configure_mst(struct intel_dp *intel_dp) { - if (!i915.enable_dp_mst) + if (!i915_modparams.enable_dp_mst) return; if (!intel_dp->can_mst) @@ -3870,11 +3865,12 @@ intel_dp_configure_mst(struct intel_dp *intel_dp) intel_dp->is_mst); } -static int intel_dp_sink_crc_stop(struct intel_dp *intel_dp) +static int intel_dp_sink_crc_stop(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state, bool disable_wa) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(dig_port->base.base.crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); u8 buf; int ret = 0; int count = 0; @@ -3910,15 +3906,17 @@ static int intel_dp_sink_crc_stop(struct intel_dp *intel_dp) } out: - hsw_enable_ips(intel_crtc); + if (disable_wa) + hsw_enable_ips(crtc_state); return ret; } -static int intel_dp_sink_crc_start(struct intel_dp *intel_dp) +static int intel_dp_sink_crc_start(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(dig_port->base.base.crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); u8 buf; int ret; @@ -3932,16 +3930,16 @@ static int intel_dp_sink_crc_start(struct intel_dp *intel_dp) return -EIO; if (buf & DP_TEST_SINK_START) { - ret = intel_dp_sink_crc_stop(intel_dp); + ret = intel_dp_sink_crc_stop(intel_dp, crtc_state, false); if (ret) return ret; } - hsw_disable_ips(intel_crtc); + hsw_disable_ips(crtc_state); if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_SINK, buf | DP_TEST_SINK_START) < 0) { - hsw_enable_ips(intel_crtc); + hsw_enable_ips(crtc_state); return -EIO; } @@ -3949,16 +3947,16 @@ static int intel_dp_sink_crc_start(struct intel_dp *intel_dp) return 0; } -int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc) +int intel_dp_sink_crc(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state, u8 *crc) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(dig_port->base.base.crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); u8 buf; int count, ret; int attempts = 6; - ret = intel_dp_sink_crc_start(intel_dp); + ret = intel_dp_sink_crc_start(intel_dp, crtc_state); if (ret) return ret; @@ -3986,7 +3984,7 @@ int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc) } stop: - intel_dp_sink_crc_stop(intel_dp); + intel_dp_sink_crc_stop(intel_dp, crtc_state, true); return ret; } @@ -4000,15 +3998,9 @@ intel_dp_get_sink_irq(struct intel_dp *intel_dp, u8 *sink_irq_vector) static bool intel_dp_get_sink_irq_esi(struct intel_dp *intel_dp, u8 *sink_irq_vector) { - int ret; - - ret = drm_dp_dpcd_read(&intel_dp->aux, - DP_SINK_COUNT_ESI, - sink_irq_vector, 14); - if (ret != 14) - return false; - - return true; + return drm_dp_dpcd_read(&intel_dp->aux, DP_SINK_COUNT_ESI, + sink_irq_vector, DP_DPRX_ESI_LEN) == + DP_DPRX_ESI_LEN; } static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp) @@ -4208,7 +4200,7 @@ intel_dp_check_mst_status(struct intel_dp *intel_dp) bool bret; if (intel_dp->is_mst) { - u8 esi[16] = { 0 }; + u8 esi[DP_DPRX_ESI_LEN] = { 0 }; int ret = 0; int retry; bool handled; @@ -4287,21 +4279,29 @@ intel_dp_retrain_link(struct intel_dp *intel_dp) static void intel_dp_check_link_status(struct intel_dp *intel_dp) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)->base; - struct drm_device *dev = intel_dp_to_dev(intel_dp); + struct drm_connector_state *conn_state = + intel_dp->attached_connector->base.state; u8 link_status[DP_LINK_STATUS_SIZE]; - WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex)); + WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); if (!intel_dp_get_link_status(intel_dp, link_status)) { DRM_ERROR("Failed to get link status\n"); return; } - if (!intel_encoder->base.crtc) + if (!conn_state->crtc) + return; + + WARN_ON(!drm_modeset_is_locked(&conn_state->crtc->mutex)); + + if (!conn_state->crtc->state->active) return; - if (!to_intel_crtc(intel_encoder->base.crtc)->active) + if (conn_state->commit && + !try_wait_for_completion(&conn_state->commit->hw_done)) return; /* @@ -4337,8 +4337,7 @@ intel_dp_check_link_status(struct intel_dp *intel_dp) static bool intel_dp_short_pulse(struct intel_dp *intel_dp) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)->base; + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); u8 sink_irq_vector = 0; u8 old_sink_count = intel_dp->sink_count; bool ret; @@ -4377,13 +4376,12 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) DRM_DEBUG_DRIVER("CP or sink specific irq unhandled\n"); } - drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); intel_dp_check_link_status(intel_dp); - drm_modeset_unlock(&dev->mode_config.connection_mutex); + if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { DRM_DEBUG_KMS("Link Training Compliance Test requested\n"); /* Send a Hotplug Uevent to userspace to start modeset */ - drm_kms_helper_hotplug_event(intel_encoder->base.dev); + drm_kms_helper_hotplug_event(&dev_priv->drm); } return true; @@ -4403,7 +4401,7 @@ intel_dp_detect_dpcd(struct intel_dp *intel_dp) if (!intel_dp_get_dpcd(intel_dp)) return connector_status_disconnected; - if (is_edp(intel_dp)) + if (intel_dp_is_edp(intel_dp)) return connector_status_connected; /* if there's no downstream port, we're done */ @@ -4447,8 +4445,7 @@ intel_dp_detect_dpcd(struct intel_dp *intel_dp) static enum drm_connector_status edp_detect(struct intel_dp *intel_dp) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); enum drm_connector_status status; status = intel_panel_detect(dev_priv); @@ -4463,7 +4460,7 @@ static bool ibx_digital_port_connected(struct drm_i915_private *dev_priv, { u32 bit; - switch (port->port) { + switch (port->base.port) { case PORT_B: bit = SDE_PORTB_HOTPLUG; break; @@ -4474,7 +4471,7 @@ static bool ibx_digital_port_connected(struct drm_i915_private *dev_priv, bit = SDE_PORTD_HOTPLUG; break; default: - MISSING_CASE(port->port); + MISSING_CASE(port->base.port); return false; } @@ -4486,7 +4483,7 @@ static bool cpt_digital_port_connected(struct drm_i915_private *dev_priv, { u32 bit; - switch (port->port) { + switch (port->base.port) { case PORT_B: bit = SDE_PORTB_HOTPLUG_CPT; break; @@ -4497,7 +4494,7 @@ static bool cpt_digital_port_connected(struct drm_i915_private *dev_priv, bit = SDE_PORTD_HOTPLUG_CPT; break; default: - MISSING_CASE(port->port); + MISSING_CASE(port->base.port); return false; } @@ -4509,7 +4506,7 @@ static bool spt_digital_port_connected(struct drm_i915_private *dev_priv, { u32 bit; - switch (port->port) { + switch (port->base.port) { case PORT_A: bit = SDE_PORTA_HOTPLUG_SPT; break; @@ -4528,7 +4525,7 @@ static bool g4x_digital_port_connected(struct drm_i915_private *dev_priv, { u32 bit; - switch (port->port) { + switch (port->base.port) { case PORT_B: bit = PORTB_HOTPLUG_LIVE_STATUS_G4X; break; @@ -4539,7 +4536,7 @@ static bool g4x_digital_port_connected(struct drm_i915_private *dev_priv, bit = PORTD_HOTPLUG_LIVE_STATUS_G4X; break; default: - MISSING_CASE(port->port); + MISSING_CASE(port->base.port); return false; } @@ -4551,7 +4548,7 @@ static bool gm45_digital_port_connected(struct drm_i915_private *dev_priv, { u32 bit; - switch (port->port) { + switch (port->base.port) { case PORT_B: bit = PORTB_HOTPLUG_LIVE_STATUS_GM45; break; @@ -4562,7 +4559,7 @@ static bool gm45_digital_port_connected(struct drm_i915_private *dev_priv, bit = PORTD_HOTPLUG_LIVE_STATUS_GM45; break; default: - MISSING_CASE(port->port); + MISSING_CASE(port->base.port); return false; } @@ -4572,7 +4569,7 @@ static bool gm45_digital_port_connected(struct drm_i915_private *dev_priv, static bool ilk_digital_port_connected(struct drm_i915_private *dev_priv, struct intel_digital_port *port) { - if (port->port == PORT_A) + if (port->base.port == PORT_A) return I915_READ(DEISR) & DE_DP_A_HOTPLUG; else return ibx_digital_port_connected(dev_priv, port); @@ -4581,7 +4578,7 @@ static bool ilk_digital_port_connected(struct drm_i915_private *dev_priv, static bool snb_digital_port_connected(struct drm_i915_private *dev_priv, struct intel_digital_port *port) { - if (port->port == PORT_A) + if (port->base.port == PORT_A) return I915_READ(DEISR) & DE_DP_A_HOTPLUG; else return cpt_digital_port_connected(dev_priv, port); @@ -4590,7 +4587,7 @@ static bool snb_digital_port_connected(struct drm_i915_private *dev_priv, static bool ivb_digital_port_connected(struct drm_i915_private *dev_priv, struct intel_digital_port *port) { - if (port->port == PORT_A) + if (port->base.port == PORT_A) return I915_READ(DEISR) & DE_DP_A_HOTPLUG_IVB; else return cpt_digital_port_connected(dev_priv, port); @@ -4599,7 +4596,7 @@ static bool ivb_digital_port_connected(struct drm_i915_private *dev_priv, static bool bdw_digital_port_connected(struct drm_i915_private *dev_priv, struct intel_digital_port *port) { - if (port->port == PORT_A) + if (port->base.port == PORT_A) return I915_READ(GEN8_DE_PORT_ISR) & GEN8_PORT_DP_A_HOTPLUG; else return cpt_digital_port_connected(dev_priv, port); @@ -4704,24 +4701,21 @@ intel_dp_unset_edid(struct intel_dp *intel_dp) } static int -intel_dp_long_pulse(struct intel_connector *intel_connector) +intel_dp_long_pulse(struct intel_connector *connector) { - struct drm_connector *connector = &intel_connector->base; - struct intel_dp *intel_dp = intel_attached_dp(connector); - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *intel_encoder = &intel_dig_port->base; - struct drm_device *dev = connector->dev; + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_dp *intel_dp = intel_attached_dp(&connector->base); enum drm_connector_status status; u8 sink_irq_vector = 0; - WARN_ON(!drm_modeset_is_locked(&connector->dev->mode_config.connection_mutex)); + WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); - intel_display_power_get(to_i915(dev), intel_dp->aux_power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); /* Can't disconnect eDP, but you can close the lid... */ - if (is_edp(intel_dp)) + if (intel_dp_is_edp(intel_dp)) status = edp_detect(intel_dp); - else if (intel_digital_port_connected(to_i915(dev), + else if (intel_digital_port_connected(dev_priv, dp_to_dig_port(intel_dp))) status = intel_dp_detect_dpcd(intel_dp); else @@ -4742,13 +4736,6 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) goto out; } - if (intel_encoder->type != INTEL_OUTPUT_EDP) - intel_encoder->type = INTEL_OUTPUT_DP; - - DRM_DEBUG_KMS("Display Port TPS3 support: source %s, sink %s\n", - yesno(intel_dp_source_supports_hbr2(intel_dp)), - yesno(drm_dp_tps3_supported(intel_dp->dpcd))); - if (intel_dp->reset_link_params) { /* Initial max link lane count */ intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp); @@ -4799,7 +4786,7 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) intel_dp->aux.i2c_defer_count = 0; intel_dp_set_edid(intel_dp); - if (is_edp(intel_dp) || intel_connector->detect_edid) + if (intel_dp_is_edp(intel_dp) || connector->detect_edid) status = connector_status_connected; intel_dp->detect_done = true; @@ -4822,7 +4809,7 @@ out: if (status != connector_status_connected && !intel_dp->is_mst) intel_dp_unset_edid(intel_dp); - intel_display_power_put(to_i915(dev), intel_dp->aux_power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); return status; } @@ -4838,8 +4825,19 @@ intel_dp_detect(struct drm_connector *connector, connector->base.id, connector->name); /* If full detect is not performed yet, do a full detect */ - if (!intel_dp->detect_done) + if (!intel_dp->detect_done) { + struct drm_crtc *crtc; + int ret; + + crtc = connector->state->crtc; + if (crtc) { + ret = drm_modeset_lock(&crtc->mutex, ctx); + if (ret) + return ret; + } + status = intel_dp_long_pulse(intel_dp->attached_connector); + } intel_dp->detect_done = false; @@ -4865,9 +4863,6 @@ intel_dp_force(struct drm_connector *connector) intel_dp_set_edid(intel_dp); intel_display_power_put(dev_priv, intel_dp->aux_power_domain); - - if (intel_encoder->type != INTEL_OUTPUT_EDP) - intel_encoder->type = INTEL_OUTPUT_DP; } static int intel_dp_get_modes(struct drm_connector *connector) @@ -4883,7 +4878,7 @@ static int intel_dp_get_modes(struct drm_connector *connector) } /* if eDP has no EDID, fall back to fixed mode */ - if (is_edp(intel_attached_dp(connector)) && + if (intel_dp_is_edp(intel_attached_dp(connector)) && intel_connector->panel.fixed_mode) { struct drm_display_mode *mode; @@ -4934,8 +4929,10 @@ intel_dp_connector_destroy(struct drm_connector *connector) if (!IS_ERR_OR_NULL(intel_connector->edid)) kfree(intel_connector->edid); - /* Can't call is_edp() since the encoder may have been destroyed - * already. */ + /* + * Can't call intel_dp_is_edp() since the encoder may have been + * destroyed already. + */ if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) intel_panel_fini(&intel_connector->panel); @@ -4949,7 +4946,7 @@ void intel_dp_encoder_destroy(struct drm_encoder *encoder) struct intel_dp *intel_dp = &intel_dig_port->dp; intel_dp_mst_encoder_cleanup(intel_dig_port); - if (is_edp(intel_dp)) { + if (intel_dp_is_edp(intel_dp)) { cancel_delayed_work_sync(&intel_dp->panel_vdd_work); /* * vdd might still be enabled do to the delayed vdd off. @@ -4975,7 +4972,7 @@ void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder) { struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base); - if (!is_edp(intel_dp)) + if (!intel_dp_is_edp(intel_dp)) return; /* @@ -4990,9 +4987,7 @@ void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder) static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); lockdep_assert_held(&dev_priv->pps_mutex); @@ -5043,9 +5038,9 @@ void intel_dp_encoder_reset(struct drm_encoder *encoder) if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) intel_dp->active_pipe = vlv_active_pipe(intel_dp); - if (is_edp(intel_dp)) { + if (intel_dp_is_edp(intel_dp)) { /* Reinit the power sequencer, in case BIOS did something with it. */ - intel_dp_pps_init(encoder->dev, intel_dp); + intel_dp_pps_init(intel_dp); intel_edp_panel_vdd_sanitize(intel_dp); } @@ -5080,14 +5075,9 @@ enum irqreturn intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) { struct intel_dp *intel_dp = &intel_dig_port->dp; - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); enum irqreturn ret = IRQ_NONE; - if (intel_dig_port->base.type != INTEL_OUTPUT_EDP && - intel_dig_port->base.type != INTEL_OUTPUT_HDMI) - intel_dig_port->base.type = INTEL_OUTPUT_DP; - if (long_hpd && intel_dig_port->base.type == INTEL_OUTPUT_EDP) { /* * vdd off can generate a long pulse on eDP which @@ -5096,12 +5086,12 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) * "vdd off -> long hpd -> vdd on -> detect -> vdd off -> ..." */ DRM_DEBUG_KMS("ignoring long hpd on eDP port %c\n", - port_name(intel_dig_port->port)); + port_name(intel_dig_port->base.port)); return IRQ_HANDLED; } DRM_DEBUG_KMS("got hpd irq on port %c - %s\n", - port_name(intel_dig_port->port), + port_name(intel_dig_port->base.port), long_hpd ? "long" : "short"); if (long_hpd) { @@ -5129,7 +5119,38 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) } if (!intel_dp->is_mst) { - if (!intel_dp_short_pulse(intel_dp)) { + struct drm_modeset_acquire_ctx ctx; + struct drm_connector *connector = &intel_dp->attached_connector->base; + struct drm_crtc *crtc; + int iret; + bool handled = false; + + drm_modeset_acquire_init(&ctx, 0); +retry: + iret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, &ctx); + if (iret) + goto err; + + crtc = connector->state->crtc; + if (crtc) { + iret = drm_modeset_lock(&crtc->mutex, &ctx); + if (iret) + goto err; + } + + handled = intel_dp_short_pulse(intel_dp); + +err: + if (iret == -EDEADLK) { + drm_modeset_backoff(&ctx); + goto retry; + } + + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); + WARN(iret, "Acquiring modeset locks failed with %i\n", iret); + + if (!handled) { intel_dp->detect_done = false; goto put_power; } @@ -5144,7 +5165,7 @@ put_power: } /* check the VBT to see whether the eDP is on another port */ -bool intel_dp_is_edp(struct drm_i915_private *dev_priv, enum port port) +bool intel_dp_is_port_edp(struct drm_i915_private *dev_priv, enum port port) { /* * eDP not supported on g4x. so bail out early just @@ -5163,11 +5184,14 @@ static void intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connector) { struct drm_i915_private *dev_priv = to_i915(connector->dev); + enum port port = dp_to_dig_port(intel_dp)->base.port; + + if (!IS_G4X(dev_priv) && port != PORT_A) + intel_attach_force_audio_property(connector); - intel_attach_force_audio_property(connector); intel_attach_broadcast_rgb_property(connector); - if (is_edp(intel_dp)) { + if (intel_dp_is_edp(intel_dp)) { u32 allowed_scalers; allowed_scalers = BIT(DRM_MODE_SCALE_ASPECT) | BIT(DRM_MODE_SCALE_FULLSCREEN); @@ -5189,13 +5213,13 @@ static void intel_dp_init_panel_power_timestamps(struct intel_dp *intel_dp) } static void -intel_pps_readout_hw_state(struct drm_i915_private *dev_priv, - struct intel_dp *intel_dp, struct edp_power_seq *seq) +intel_pps_readout_hw_state(struct intel_dp *intel_dp, struct edp_power_seq *seq) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); u32 pp_on, pp_off, pp_div = 0, pp_ctl = 0; struct pps_registers regs; - intel_pps_get_registers(dev_priv, intel_dp, ®s); + intel_pps_get_registers(intel_dp, ®s); /* Workaround: Need to write PP_CONTROL with the unlock key as * the very first thing. */ @@ -5239,13 +5263,12 @@ intel_pps_dump_state(const char *state_name, const struct edp_power_seq *seq) } static void -intel_pps_verify_state(struct drm_i915_private *dev_priv, - struct intel_dp *intel_dp) +intel_pps_verify_state(struct intel_dp *intel_dp) { struct edp_power_seq hw; struct edp_power_seq *sw = &intel_dp->pps_delays; - intel_pps_readout_hw_state(dev_priv, intel_dp, &hw); + intel_pps_readout_hw_state(intel_dp, &hw); if (hw.t1_t3 != sw->t1_t3 || hw.t8 != sw->t8 || hw.t9 != sw->t9 || hw.t10 != sw->t10 || hw.t11_t12 != sw->t11_t12) { @@ -5256,10 +5279,9 @@ intel_pps_verify_state(struct drm_i915_private *dev_priv, } static void -intel_dp_init_panel_power_sequencer(struct drm_device *dev, - struct intel_dp *intel_dp) +intel_dp_init_panel_power_sequencer(struct intel_dp *intel_dp) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); struct edp_power_seq cur, vbt, spec, *final = &intel_dp->pps_delays; @@ -5269,7 +5291,7 @@ intel_dp_init_panel_power_sequencer(struct drm_device *dev, if (final->t11_t12 != 0) return; - intel_pps_readout_hw_state(dev_priv, intel_dp, &cur); + intel_pps_readout_hw_state(intel_dp, &cur); intel_pps_dump_state("cur", &cur); @@ -5340,23 +5362,28 @@ intel_dp_init_panel_power_sequencer(struct drm_device *dev, */ final->t8 = 1; final->t9 = 1; + + /* + * HW has only a 100msec granularity for t11_t12 so round it up + * accordingly. + */ + final->t11_t12 = roundup(final->t11_t12, 100 * 10); } static void -intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev, - struct intel_dp *intel_dp, +intel_dp_init_panel_power_sequencer_registers(struct intel_dp *intel_dp, bool force_disable_vdd) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); u32 pp_on, pp_off, pp_div, port_sel = 0; int div = dev_priv->rawclk_freq / 1000; struct pps_registers regs; - enum port port = dp_to_dig_port(intel_dp)->port; + enum port port = dp_to_dig_port(intel_dp)->base.port; const struct edp_power_seq *seq = &intel_dp->pps_delays; lockdep_assert_held(&dev_priv->pps_mutex); - intel_pps_get_registers(dev_priv, intel_dp, ®s); + intel_pps_get_registers(intel_dp, ®s); /* * On some VLV machines the BIOS can leave the VDD @@ -5428,16 +5455,15 @@ intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev, I915_READ(regs.pp_div)); } -static void intel_dp_pps_init(struct drm_device *dev, - struct intel_dp *intel_dp) +static void intel_dp_pps_init(struct intel_dp *intel_dp) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { vlv_initial_power_sequencer_setup(intel_dp); } else { - intel_dp_init_panel_power_sequencer(dev, intel_dp); - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, false); + intel_dp_init_panel_power_sequencer(intel_dp); + intel_dp_init_panel_power_sequencer_registers(intel_dp, false); } } @@ -5455,7 +5481,7 @@ static void intel_dp_pps_init(struct drm_device *dev, * The caller of this function needs to take a lock on dev_priv->drrs. */ static void intel_dp_set_drrs_state(struct drm_i915_private *dev_priv, - struct intel_crtc_state *crtc_state, + const struct intel_crtc_state *crtc_state, int refresh_rate) { struct intel_encoder *encoder; @@ -5474,14 +5500,8 @@ static void intel_dp_set_drrs_state(struct drm_i915_private *dev_priv, return; } - /* - * FIXME: This needs proper synchronization with psr state for some - * platforms that cannot have PSR and DRRS enabled at the same time. - */ - dig_port = dp_to_dig_port(intel_dp); encoder = &dig_port->base; - intel_crtc = to_intel_crtc(encoder->base.crtc); if (!intel_crtc) { DRM_DEBUG_KMS("DRRS: intel_crtc not initialized\n"); @@ -5552,16 +5572,20 @@ static void intel_dp_set_drrs_state(struct drm_i915_private *dev_priv, * Initializes frontbuffer_bits and drrs.dp */ void intel_edp_drrs_enable(struct intel_dp *intel_dp, - struct intel_crtc_state *crtc_state) + const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); if (!crtc_state->has_drrs) { DRM_DEBUG_KMS("Panel doesn't support DRRS\n"); return; } + if (dev_priv->psr.enabled) { + DRM_DEBUG_KMS("PSR enabled. Not enabling DRRS.\n"); + return; + } + mutex_lock(&dev_priv->drrs.mutex); if (WARN_ON(dev_priv->drrs.dp)) { DRM_ERROR("DRRS already enabled\n"); @@ -5583,10 +5607,9 @@ unlock: * */ void intel_edp_drrs_disable(struct intel_dp *intel_dp, - struct intel_crtc_state *old_crtc_state) + const struct intel_crtc_state *old_crtc_state) { - struct drm_device *dev = intel_dp_to_dev(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); if (!old_crtc_state->has_drrs) return; @@ -5769,7 +5792,7 @@ void intel_edp_drrs_flush(struct drm_i915_private *dev_priv, /** * intel_dp_drrs_init - Init basic DRRS work and mutex. - * @intel_connector: eDP connector + * @connector: eDP connector * @fixed_mode: preferred mode of panel * * This function is called only once at driver load to initialize basic @@ -5781,12 +5804,10 @@ void intel_edp_drrs_flush(struct drm_i915_private *dev_priv, * from VBT setting). */ static struct drm_display_mode * -intel_dp_drrs_init(struct intel_connector *intel_connector, - struct drm_display_mode *fixed_mode) +intel_dp_drrs_init(struct intel_connector *connector, + struct drm_display_mode *fixed_mode) { - struct drm_connector *connector = &intel_connector->base; - struct drm_device *dev = connector->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct drm_display_mode *downclock_mode = NULL; INIT_DELAYED_WORK(&dev_priv->drrs.work, intel_edp_drrs_downclock_work); @@ -5802,8 +5823,8 @@ intel_dp_drrs_init(struct intel_connector *intel_connector, return NULL; } - downclock_mode = intel_find_panel_downclock - (dev_priv, fixed_mode, connector); + downclock_mode = intel_find_panel_downclock(dev_priv, fixed_mode, + &connector->base); if (!downclock_mode) { DRM_DEBUG_KMS("Downclock mode is not found. DRRS not supported\n"); @@ -5820,11 +5841,9 @@ intel_dp_drrs_init(struct intel_connector *intel_connector, static bool intel_edp_init_connector(struct intel_dp *intel_dp, struct intel_connector *intel_connector) { - struct drm_connector *connector = &intel_connector->base; - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *intel_encoder = &intel_dig_port->base; - struct drm_device *dev = intel_encoder->base.dev; + struct drm_device *dev = intel_dp_to_dev(intel_dp); struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_connector *connector = &intel_connector->base; struct drm_display_mode *fixed_mode = NULL; struct drm_display_mode *alt_fixed_mode = NULL; struct drm_display_mode *downclock_mode = NULL; @@ -5833,7 +5852,7 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, struct edid *edid; enum pipe pipe = INVALID_PIPE; - if (!is_edp(intel_dp)) + if (!intel_dp_is_edp(intel_dp)) return true; /* @@ -5842,7 +5861,7 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, * eDP and LVDS bail out early in this case to prevent interfering * with an already powered-on LVDS power sequencer. */ - if (intel_get_lvds_encoder(dev)) { + if (intel_get_lvds_encoder(&dev_priv->drm)) { WARN_ON(!(HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv))); DRM_INFO("LVDS was detected, not registering eDP\n"); @@ -5852,7 +5871,7 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, pps_lock(intel_dp); intel_dp_init_panel_power_timestamps(intel_dp); - intel_dp_pps_init(dev, intel_dp); + intel_dp_pps_init(intel_dp); intel_edp_panel_vdd_sanitize(intel_dp); pps_unlock(intel_dp); @@ -5872,7 +5891,6 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, if (drm_add_edid_modes(connector, edid)) { drm_mode_connector_update_edid_property(connector, edid); - drm_edid_to_eld(connector, edid); } else { kfree(edid); edid = ERR_PTR(-EINVAL); @@ -5953,9 +5971,9 @@ intel_dp_init_connector_port_info(struct intel_digital_port *intel_dig_port) struct intel_encoder *encoder = &intel_dig_port->base; struct intel_dp *intel_dp = &intel_dig_port->dp; - encoder->hpd_pin = intel_hpd_pin(intel_dig_port->port); + encoder->hpd_pin = intel_hpd_pin(encoder->port); - switch (intel_dig_port->port) { + switch (encoder->port) { case PORT_A: intel_dp->aux_power_domain = POWER_DOMAIN_AUX_A; break; @@ -5973,7 +5991,7 @@ intel_dp_init_connector_port_info(struct intel_digital_port *intel_dig_port) intel_dp->aux_power_domain = POWER_DOMAIN_AUX_D; break; default: - MISSING_CASE(intel_dig_port->port); + MISSING_CASE(encoder->port); } } @@ -6009,7 +6027,7 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_device *dev = intel_encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum port port = intel_dig_port->port; + enum port port = intel_encoder->port; int type; /* Initialize the work for modeset in case of link train failure */ @@ -6049,7 +6067,7 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, intel_dp->DP = I915_READ(intel_dp->output_reg); intel_dp->attached_connector = intel_connector; - if (intel_dp_is_edp(dev_priv, port)) + if (intel_dp_is_port_edp(dev_priv, port)) type = DRM_MODE_CONNECTOR_eDP; else type = DRM_MODE_CONNECTOR_DisplayPort; @@ -6067,7 +6085,8 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, /* eDP only on port B and/or C on vlv/chv */ if (WARN_ON((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && - is_edp(intel_dp) && port != PORT_B && port != PORT_C)) + intel_dp_is_edp(intel_dp) && + port != PORT_B && port != PORT_C)) return false; DRM_DEBUG_KMS("Adding %s connector on port %c\n", @@ -6077,7 +6096,8 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, drm_connector_init(dev, connector, &intel_dp_connector_funcs, type); drm_connector_helper_add(connector, &intel_dp_connector_helper_funcs); - connector->interlace_allowed = true; + if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv)) + connector->interlace_allowed = true; connector->doublescan_allowed = 0; intel_dp_init_connector_port_info(intel_dig_port); @@ -6095,7 +6115,7 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, intel_connector->get_hw_state = intel_connector_get_hw_state; /* init MST on ports that can support it */ - if (HAS_DP_MST(dev_priv) && !is_edp(intel_dp) && + if (HAS_DP_MST(dev_priv) && !intel_dp_is_edp(intel_dp) && (port == PORT_B || port == PORT_C || port == PORT_D)) intel_dp_mst_encoder_init(intel_dig_port, intel_connector->base.base.id); @@ -6151,7 +6171,6 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, goto err_encoder_init; intel_encoder->compute_config = intel_dp_compute_config; - intel_encoder->disable = intel_disable_dp; intel_encoder->get_hw_state = intel_dp_get_hw_state; intel_encoder->get_config = intel_dp_get_config; intel_encoder->suspend = intel_dp_encoder_suspend; @@ -6159,21 +6178,26 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, intel_encoder->pre_pll_enable = chv_dp_pre_pll_enable; intel_encoder->pre_enable = chv_pre_enable_dp; intel_encoder->enable = vlv_enable_dp; + intel_encoder->disable = vlv_disable_dp; intel_encoder->post_disable = chv_post_disable_dp; intel_encoder->post_pll_disable = chv_dp_post_pll_disable; } else if (IS_VALLEYVIEW(dev_priv)) { intel_encoder->pre_pll_enable = vlv_dp_pre_pll_enable; intel_encoder->pre_enable = vlv_pre_enable_dp; intel_encoder->enable = vlv_enable_dp; + intel_encoder->disable = vlv_disable_dp; intel_encoder->post_disable = vlv_post_disable_dp; + } else if (INTEL_GEN(dev_priv) >= 5) { + intel_encoder->pre_enable = g4x_pre_enable_dp; + intel_encoder->enable = g4x_enable_dp; + intel_encoder->disable = ilk_disable_dp; + intel_encoder->post_disable = ilk_post_disable_dp; } else { intel_encoder->pre_enable = g4x_pre_enable_dp; intel_encoder->enable = g4x_enable_dp; - if (INTEL_GEN(dev_priv) >= 5) - intel_encoder->post_disable = ilk_post_disable_dp; + intel_encoder->disable = g4x_disable_dp; } - intel_dig_port->port = port; intel_dig_port->dp.output_reg = output_reg; intel_dig_port->max_lanes = 4; @@ -6193,6 +6217,9 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, intel_dig_port->hpd_pulse = intel_dp_hpd_pulse; dev_priv->hotplug.irq_port[port] = intel_dig_port; + if (port != PORT_A) + intel_infoframe_init(intel_dig_port); + if (!intel_dp_init_connector(intel_dig_port, intel_connector)) goto err_init_connector; diff --git a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c index d2830ba3162e..2bb2ceb9d463 100644 --- a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c @@ -264,7 +264,7 @@ int intel_dp_aux_init_backlight_funcs(struct intel_connector *intel_connector) { struct intel_panel *panel = &intel_connector->panel; - if (!i915.enable_dpcd_backlight) + if (!i915_modparams.enable_dpcd_backlight) return -ENODEV; if (!intel_dp_aux_display_control_capable(intel_connector)) diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c index 05907fa8a553..cf8fef8b6f58 100644 --- a/drivers/gpu/drm/i915/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/intel_dp_link_training.c @@ -328,14 +328,22 @@ intel_dp_start_link_train(struct intel_dp *intel_dp) return; failure_handling: - DRM_DEBUG_KMS("[CONNECTOR:%d:%s] Link Training failed at link rate = %d, lane count = %d", - intel_connector->base.base.id, - intel_connector->base.name, - intel_dp->link_rate, intel_dp->lane_count); - if (!intel_dp_get_link_train_fallback_values(intel_dp, - intel_dp->link_rate, - intel_dp->lane_count)) - /* Schedule a Hotplug Uevent to userspace to start modeset */ - schedule_work(&intel_connector->modeset_retry_work); + /* Dont fallback and prune modes if its eDP */ + if (!intel_dp_is_edp(intel_dp)) { + DRM_DEBUG_KMS("[CONNECTOR:%d:%s] Link Training failed at link rate = %d, lane count = %d", + intel_connector->base.base.id, + intel_connector->base.name, + intel_dp->link_rate, intel_dp->lane_count); + if (!intel_dp_get_link_train_fallback_values(intel_dp, + intel_dp->link_rate, + intel_dp->lane_count)) + /* Schedule a Hotplug Uevent to userspace to start modeset */ + schedule_work(&intel_connector->modeset_retry_work); + } else { + DRM_ERROR("[CONNECTOR:%d:%s] Link Training failed at link rate = %d, lane count = %d", + intel_connector->base.base.id, + intel_connector->base.name, + intel_dp->link_rate, intel_dp->lane_count); + } return; } diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 93fc8ab9bb31..c3de0918ee13 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -34,6 +34,7 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; @@ -87,6 +88,12 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, pipe_config->dp_m_n.tu = slots; + if (IS_GEN9_LP(dev_priv)) + pipe_config->lane_lat_optim_mask = + bxt_ddi_phy_calc_lane_lat_optim_mask(pipe_config->lane_count); + + intel_ddi_compute_min_voltage_level(dev_priv, pipe_config); + return true; } @@ -123,8 +130,8 @@ static int intel_dp_mst_atomic_check(struct drm_connector *connector, } static void intel_mst_disable_dp(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); struct intel_digital_port *intel_dig_port = intel_mst->primary; @@ -133,7 +140,7 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder, to_intel_connector(old_conn_state->connector); int ret; - DRM_DEBUG_KMS("%d\n", intel_dp->active_mst_links); + DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links); drm_dp_mst_reset_vcpi_slots(&intel_dp->mst_mgr, connector->port); @@ -142,12 +149,13 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder, DRM_ERROR("failed to update payload %d\n", ret); } if (old_crtc_state->has_audio) - intel_audio_codec_disable(encoder); + intel_audio_codec_disable(encoder, + old_crtc_state, old_conn_state); } static void intel_mst_post_disable_dp(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); struct intel_digital_port *intel_dig_port = intel_mst->primary; @@ -155,8 +163,6 @@ static void intel_mst_post_disable_dp(struct intel_encoder *encoder, struct intel_connector *connector = to_intel_connector(old_conn_state->connector); - DRM_DEBUG_KMS("%d\n", intel_dp->active_mst_links); - /* this can fail */ drm_dp_check_act_status(&intel_dp->mst_mgr); /* and this can also fail */ @@ -164,26 +170,46 @@ static void intel_mst_post_disable_dp(struct intel_encoder *encoder, drm_dp_mst_deallocate_vcpi(&intel_dp->mst_mgr, connector->port); + /* + * Power down mst path before disabling the port, otherwise we end + * up getting interrupts from the sink upon detecting link loss. + */ + drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, + false); + intel_dp->active_mst_links--; intel_mst->connector = NULL; - if (intel_dp->active_mst_links == 0) { + if (intel_dp->active_mst_links == 0) intel_dig_port->base.post_disable(&intel_dig_port->base, - NULL, NULL); + old_crtc_state, NULL); - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); - } + DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links); +} + +static void intel_mst_pre_pll_enable_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) +{ + struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); + struct intel_digital_port *intel_dig_port = intel_mst->primary; + struct intel_dp *intel_dp = &intel_dig_port->dp; + + if (intel_dp->active_mst_links == 0 && + intel_dig_port->base.pre_pll_enable) + intel_dig_port->base.pre_pll_enable(&intel_dig_port->base, + pipe_config, NULL); } static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = intel_dig_port->port; + enum port port = intel_dig_port->base.port; struct intel_connector *connector = to_intel_connector(conn_state->connector); int ret; @@ -195,8 +221,9 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, connector->encoder = encoder; intel_mst->connector = connector; - DRM_DEBUG_KMS("%d\n", intel_dp->active_mst_links); + DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links); + drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, true); if (intel_dp->active_mst_links == 0) intel_dig_port->base.pre_enable(&intel_dig_port->base, pipe_config, NULL); @@ -219,17 +246,17 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, } static void intel_mst_enable_dp(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = intel_dig_port->port; + enum port port = intel_dig_port->base.port; int ret; - DRM_DEBUG_KMS("%d\n", intel_dp->active_mst_links); + DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links); if (intel_wait_for_register(dev_priv, DP_TP_STATUS(port), @@ -260,48 +287,8 @@ static void intel_dp_mst_enc_get_config(struct intel_encoder *encoder, { struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); struct intel_digital_port *intel_dig_port = intel_mst->primary; - struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum transcoder cpu_transcoder = pipe_config->cpu_transcoder; - u32 temp, flags = 0; - - pipe_config->has_audio = - intel_ddi_is_audio_enabled(dev_priv, crtc); - - temp = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder)); - if (temp & TRANS_DDI_PHSYNC) - flags |= DRM_MODE_FLAG_PHSYNC; - else - flags |= DRM_MODE_FLAG_NHSYNC; - if (temp & TRANS_DDI_PVSYNC) - flags |= DRM_MODE_FLAG_PVSYNC; - else - flags |= DRM_MODE_FLAG_NVSYNC; - - switch (temp & TRANS_DDI_BPC_MASK) { - case TRANS_DDI_BPC_6: - pipe_config->pipe_bpp = 18; - break; - case TRANS_DDI_BPC_8: - pipe_config->pipe_bpp = 24; - break; - case TRANS_DDI_BPC_10: - pipe_config->pipe_bpp = 30; - break; - case TRANS_DDI_BPC_12: - pipe_config->pipe_bpp = 36; - break; - default: - break; - } - pipe_config->base.adjusted_mode.flags |= flags; - - pipe_config->lane_count = - ((temp & DDI_PORT_WIDTH_MASK) >> DDI_PORT_WIDTH_SHIFT) + 1; - intel_dp_get_m_n(crtc, pipe_config); - - intel_ddi_clock_get(&intel_dig_port->base, pipe_config); + intel_ddi_get_config(&intel_dig_port->base, pipe_config); } static int intel_dp_mst_get_ddc_modes(struct drm_connector *connector) @@ -449,32 +436,52 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo struct intel_dp *intel_dp = container_of(mgr, struct intel_dp, mst_mgr); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = intel_dig_port->base.base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); struct intel_connector *intel_connector; struct drm_connector *connector; - int i; + enum pipe pipe; + int ret; intel_connector = intel_connector_alloc(); if (!intel_connector) return NULL; connector = &intel_connector->base; - drm_connector_init(dev, connector, &intel_dp_mst_connector_funcs, DRM_MODE_CONNECTOR_DisplayPort); + ret = drm_connector_init(dev, connector, &intel_dp_mst_connector_funcs, + DRM_MODE_CONNECTOR_DisplayPort); + if (ret) { + intel_connector_free(intel_connector); + return NULL; + } + drm_connector_helper_add(connector, &intel_dp_mst_connector_helper_funcs); intel_connector->get_hw_state = intel_dp_mst_get_hw_state; intel_connector->mst_port = intel_dp; intel_connector->port = port; - for (i = PIPE_A; i <= PIPE_C; i++) { - drm_mode_connector_attach_encoder(&intel_connector->base, - &intel_dp->mst_encoders[i]->base.base); + for_each_pipe(dev_priv, pipe) { + struct drm_encoder *enc = + &intel_dp->mst_encoders[pipe]->base.base; + + ret = drm_mode_connector_attach_encoder(&intel_connector->base, + enc); + if (ret) + goto err; } drm_object_attach_property(&connector->base, dev->mode_config.path_property, 0); drm_object_attach_property(&connector->base, dev->mode_config.tile_property, 0); - drm_mode_connector_set_path_property(connector, pathprop); + ret = drm_mode_connector_set_path_property(connector, pathprop); + if (ret) + goto err; + return connector; + +err: + drm_connector_cleanup(connector); + return NULL; } static void intel_dp_register_mst_connector(struct drm_connector *connector) @@ -494,6 +501,7 @@ static void intel_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, struct intel_connector *intel_connector = to_intel_connector(connector); struct drm_i915_private *dev_priv = to_i915(connector->dev); + DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); drm_connector_unregister(connector); if (dev_priv->fbdev) @@ -505,7 +513,6 @@ static void intel_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, drm_modeset_unlock(&connector->dev->mode_config.connection_mutex); drm_connector_unreference(connector); - DRM_DEBUG_KMS("\n"); } static void intel_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr) @@ -545,13 +552,14 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum intel_encoder->type = INTEL_OUTPUT_DP_MST; intel_encoder->power_domain = intel_dig_port->base.power_domain; - intel_encoder->port = intel_dig_port->port; + intel_encoder->port = intel_dig_port->base.port; intel_encoder->crtc_mask = 0x7; intel_encoder->cloneable = 0; intel_encoder->compute_config = intel_dp_mst_compute_config; intel_encoder->disable = intel_mst_disable_dp; intel_encoder->post_disable = intel_mst_post_disable_dp; + intel_encoder->pre_pll_enable = intel_mst_pre_pll_enable_dp; intel_encoder->pre_enable = intel_mst_pre_enable_dp; intel_encoder->enable = intel_mst_enable_dp; intel_encoder->get_hw_state = intel_dp_mst_enc_get_hw_state; @@ -564,11 +572,12 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum static bool intel_dp_create_fake_mst_encoders(struct intel_digital_port *intel_dig_port) { - int i; struct intel_dp *intel_dp = &intel_dig_port->dp; + struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); + enum pipe pipe; - for (i = PIPE_A; i <= PIPE_C; i++) - intel_dp->mst_encoders[i] = intel_dp_create_fake_mst_encoder(intel_dig_port, i); + for_each_pipe(dev_priv, pipe) + intel_dp->mst_encoders[pipe] = intel_dp_create_fake_mst_encoder(intel_dig_port, pipe); return true; } diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index de38d014ed39..76473e9836c6 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -466,21 +466,21 @@ void bxt_ddi_phy_init(struct drm_i915_private *dev_priv, enum dpio_phy phy) lockdep_assert_held(&dev_priv->power_domains.lock); - if (rcomp_phy != -1) { + was_enabled = true; + if (rcomp_phy != -1) was_enabled = bxt_ddi_phy_is_enabled(dev_priv, rcomp_phy); - /* - * We need to copy the GRC calibration value from rcomp_phy, - * so make sure it's powered up. - */ - if (!was_enabled) - _bxt_ddi_phy_init(dev_priv, rcomp_phy); - } + /* + * We need to copy the GRC calibration value from rcomp_phy, + * so make sure it's powered up. + */ + if (!was_enabled) + _bxt_ddi_phy_init(dev_priv, rcomp_phy); _bxt_ddi_phy_init(dev_priv, phy); - if (rcomp_phy != -1 && !was_enabled) - bxt_ddi_phy_uninit(dev_priv, phy_info->rcomp_phy); + if (!was_enabled) + bxt_ddi_phy_uninit(dev_priv, rcomp_phy); } static bool __printf(6, 7) @@ -567,8 +567,7 @@ bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv, } uint8_t -bxt_ddi_phy_calc_lane_lat_optim_mask(struct intel_encoder *encoder, - uint8_t lane_count) +bxt_ddi_phy_calc_lane_lat_optim_mask(uint8_t lane_count) { switch (lane_count) { case 1: @@ -587,9 +586,8 @@ bxt_ddi_phy_calc_lane_lat_optim_mask(struct intel_encoder *encoder, void bxt_ddi_phy_set_lane_optim_mask(struct intel_encoder *encoder, uint8_t lane_lat_optim_mask) { - struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); - struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev); - enum port port = dport->port; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum port port = encoder->port; enum dpio_phy phy; enum dpio_channel ch; int lane; @@ -614,9 +612,8 @@ void bxt_ddi_phy_set_lane_optim_mask(struct intel_encoder *encoder, uint8_t bxt_ddi_phy_get_lane_lat_optim_mask(struct intel_encoder *encoder) { - struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); - struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev); - enum port port = dport->port; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum port port = encoder->port; enum dpio_phy phy; enum dpio_channel ch; int lane; @@ -642,7 +639,7 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); - struct intel_crtc *intel_crtc = to_intel_crtc(dport->base.base.crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); enum dpio_channel ch = vlv_dport_to_channel(dport); enum pipe pipe = intel_crtc->pipe; u32 val; @@ -734,11 +731,12 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder, } void chv_data_lane_soft_reset(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, bool reset) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum dpio_channel ch = vlv_dport_to_channel(enc_to_dig_port(&encoder->base)); - struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); enum pipe pipe = crtc->pipe; uint32_t val; @@ -777,17 +775,16 @@ void chv_data_lane_soft_reset(struct intel_encoder *encoder, } } -void chv_phy_pre_pll_enable(struct intel_encoder *encoder) +void chv_phy_pre_pll_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = - to_intel_crtc(encoder->base.crtc); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); enum dpio_channel ch = vlv_dport_to_channel(dport); - enum pipe pipe = intel_crtc->pipe; + enum pipe pipe = crtc->pipe; unsigned int lane_mask = - intel_dp_unused_lane_mask(intel_crtc->config->lane_count); + intel_dp_unused_lane_mask(crtc_state->lane_count); u32 val; /* @@ -803,7 +800,7 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder) mutex_lock(&dev_priv->sb_lock); /* Assert data lane reset */ - chv_data_lane_soft_reset(encoder, true); + chv_data_lane_soft_reset(encoder, crtc_state, true); /* program left/right clock distribution */ if (pipe != PIPE_B) { @@ -833,7 +830,7 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder) val |= CHV_PCS_USEDCLKCHANNEL; vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW8(ch), val); - if (intel_crtc->config->lane_count > 2) { + if (crtc_state->lane_count > 2) { val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW8(ch)); val |= CHV_PCS_USEDCLKCHANNEL_OVRRIDE; if (pipe != PIPE_B) @@ -858,16 +855,15 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder) mutex_unlock(&dev_priv->sb_lock); } -void chv_phy_pre_encoder_enable(struct intel_encoder *encoder) +void chv_phy_pre_encoder_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct intel_digital_port *dport = dp_to_dig_port(intel_dp); - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = - to_intel_crtc(encoder->base.crtc); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); enum dpio_channel ch = vlv_dport_to_channel(dport); - int pipe = intel_crtc->pipe; + enum pipe pipe = crtc->pipe; int data, i, stagger; u32 val; @@ -878,16 +874,16 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder) val &= ~DPIO_LANEDESKEW_STRAP_OVRD; vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW11(ch), val); - if (intel_crtc->config->lane_count > 2) { + if (crtc_state->lane_count > 2) { val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW11(ch)); val &= ~DPIO_LANEDESKEW_STRAP_OVRD; vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW11(ch), val); } /* Program Tx lane latency optimal setting*/ - for (i = 0; i < intel_crtc->config->lane_count; i++) { + for (i = 0; i < crtc_state->lane_count; i++) { /* Set the upar bit */ - if (intel_crtc->config->lane_count == 1) + if (crtc_state->lane_count == 1) data = 0x0; else data = (i == 1) ? 0x0 : 0x1; @@ -896,13 +892,13 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder) } /* Data lane stagger programming */ - if (intel_crtc->config->port_clock > 270000) + if (crtc_state->port_clock > 270000) stagger = 0x18; - else if (intel_crtc->config->port_clock > 135000) + else if (crtc_state->port_clock > 135000) stagger = 0xd; - else if (intel_crtc->config->port_clock > 67500) + else if (crtc_state->port_clock > 67500) stagger = 0x7; - else if (intel_crtc->config->port_clock > 33750) + else if (crtc_state->port_clock > 33750) stagger = 0x4; else stagger = 0x2; @@ -911,7 +907,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder) val |= DPIO_TX2_STAGGER_MASK(0x1f); vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW11(ch), val); - if (intel_crtc->config->lane_count > 2) { + if (crtc_state->lane_count > 2) { val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW11(ch)); val |= DPIO_TX2_STAGGER_MASK(0x1f); vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW11(ch), val); @@ -924,7 +920,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder) DPIO_TX1_STAGGER_MULT(6) | DPIO_TX2_STAGGER_MULT(0)); - if (intel_crtc->config->lane_count > 2) { + if (crtc_state->lane_count > 2) { vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW12(ch), DPIO_LANESTAGGER_STRAP(stagger) | DPIO_LANESTAGGER_STRAP_OVRD | @@ -934,7 +930,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder) } /* Deassert data lane reset */ - chv_data_lane_soft_reset(encoder, false); + chv_data_lane_soft_reset(encoder, crtc_state, false); mutex_unlock(&dev_priv->sb_lock); } @@ -950,10 +946,11 @@ void chv_phy_release_cl2_override(struct intel_encoder *encoder) } } -void chv_phy_post_pll_disable(struct intel_encoder *encoder) +void chv_phy_post_pll_disable(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum pipe pipe = to_intel_crtc(encoder->base.crtc)->pipe; + enum pipe pipe = to_intel_crtc(old_crtc_state->base.crtc)->pipe; u32 val; mutex_lock(&dev_priv->sb_lock); @@ -991,7 +988,7 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder, struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); enum dpio_channel port = vlv_dport_to_channel(dport); - int pipe = intel_crtc->pipe; + enum pipe pipe = intel_crtc->pipe; mutex_lock(&dev_priv->sb_lock); vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), 0x00000000); @@ -1009,15 +1006,14 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder, mutex_unlock(&dev_priv->sb_lock); } -void vlv_phy_pre_pll_enable(struct intel_encoder *encoder) +void vlv_phy_pre_pll_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = - to_intel_crtc(encoder->base.crtc); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); enum dpio_channel port = vlv_dport_to_channel(dport); - int pipe = intel_crtc->pipe; + enum pipe pipe = crtc->pipe; /* Program Tx lane resets to default */ mutex_lock(&dev_priv->sb_lock); @@ -1037,15 +1033,15 @@ void vlv_phy_pre_pll_enable(struct intel_encoder *encoder) mutex_unlock(&dev_priv->sb_lock); } -void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder) +void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct intel_digital_port *dport = dp_to_dig_port(intel_dp); - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); enum dpio_channel port = vlv_dport_to_channel(dport); - int pipe = intel_crtc->pipe; + enum pipe pipe = crtc->pipe; u32 val; mutex_lock(&dev_priv->sb_lock); @@ -1067,14 +1063,14 @@ void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder) mutex_unlock(&dev_priv->sb_lock); } -void vlv_phy_reset_lanes(struct intel_encoder *encoder) +void vlv_phy_reset_lanes(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state) { struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = - to_intel_crtc(encoder->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); enum dpio_channel port = vlv_dport_to_channel(dport); - int pipe = intel_crtc->pipe; + enum pipe pipe = crtc->pipe; mutex_lock(&dev_priv->sb_lock); vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port), 0x00000000); diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index df808a94c511..51c5ae4e9116 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -813,15 +813,11 @@ hsw_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, memset(&crtc_state->dpll_hw_state, 0, sizeof(crtc_state->dpll_hw_state)); - if (encoder->type == INTEL_OUTPUT_HDMI) { + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) { pll = hsw_ddi_hdmi_get_dpll(clock, crtc, crtc_state); - - } else if (encoder->type == INTEL_OUTPUT_DP || - encoder->type == INTEL_OUTPUT_DP_MST || - encoder->type == INTEL_OUTPUT_EDP) { + } else if (intel_crtc_has_dp_encoder(crtc_state)) { pll = hsw_ddi_dp_get_dpll(encoder, clock); - - } else if (encoder->type == INTEL_OUTPUT_ANALOG) { + } else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_ANALOG)) { if (WARN_ON(crtc_state->port_clock / 2 != 135000)) return NULL; @@ -1369,15 +1365,13 @@ skl_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, memset(&dpll_hw_state, 0, sizeof(dpll_hw_state)); - if (encoder->type == INTEL_OUTPUT_HDMI) { + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) { bret = skl_ddi_hdmi_pll_dividers(crtc, crtc_state, clock); if (!bret) { DRM_DEBUG_KMS("Could not get HDMI pll dividers.\n"); return NULL; } - } else if (encoder->type == INTEL_OUTPUT_DP || - encoder->type == INTEL_OUTPUT_DP_MST || - encoder->type == INTEL_OUTPUT_EDP) { + } else if (intel_crtc_has_dp_encoder(crtc_state)) { bret = skl_ddi_dp_set_dpll_hw_state(clock, &dpll_hw_state); if (!bret) { DRM_DEBUG_KMS("Could not set DP dpll HW state.\n"); @@ -1388,7 +1382,7 @@ skl_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, return NULL; } - if (encoder->type == INTEL_OUTPUT_EDP) + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) pll = intel_find_shared_dpll(crtc, crtc_state, DPLL_ID_SKL_DPLL0, DPLL_ID_SKL_DPLL0); @@ -1808,18 +1802,15 @@ bxt_get_dpll(struct intel_crtc *crtc, { struct intel_dpll_hw_state dpll_hw_state = { }; struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct intel_digital_port *intel_dig_port; struct intel_shared_dpll *pll; int i, clock = crtc_state->port_clock; - if (encoder->type == INTEL_OUTPUT_HDMI && + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI) && !bxt_ddi_hdmi_set_dpll_hw_state(crtc, crtc_state, clock, &dpll_hw_state)) return NULL; - if ((encoder->type == INTEL_OUTPUT_DP || - encoder->type == INTEL_OUTPUT_EDP || - encoder->type == INTEL_OUTPUT_DP_MST) && + if (intel_crtc_has_dp_encoder(crtc_state) && !bxt_ddi_dp_set_dpll_hw_state(clock, &dpll_hw_state)) return NULL; @@ -1828,15 +1819,8 @@ bxt_get_dpll(struct intel_crtc *crtc, crtc_state->dpll_hw_state = dpll_hw_state; - if (encoder->type == INTEL_OUTPUT_DP_MST) { - struct intel_dp_mst_encoder *intel_mst = enc_to_mst(&encoder->base); - - intel_dig_port = intel_mst->primary; - } else - intel_dig_port = enc_to_dig_port(&encoder->base); - /* 1:1 mapping between ports and PLLs */ - i = (enum intel_dpll_id) intel_dig_port->port; + i = (enum intel_dpll_id) encoder->port; pll = intel_get_shared_dpll_by_id(dev_priv, i); DRM_DEBUG_KMS("[CRTC:%d:%s] using pre-allocated %s\n", @@ -2008,8 +1992,8 @@ static void cnl_ddi_pll_enable(struct drm_i915_private *dev_priv, * requirement, follow the Display Voltage Frequency Switching * Sequence Before Frequency Change * - * FIXME: (DVFS) is used to adjust the display voltage to match the - * display clock frequencies + * Note: DVFS is actually handled via the cdclk code paths, + * hence we do nothing here. */ /* 6. Enable DPLL in DPLL_ENABLE. */ @@ -2030,8 +2014,8 @@ static void cnl_ddi_pll_enable(struct drm_i915_private *dev_priv, * requirement, follow the Display Voltage Frequency Switching * Sequence After Frequency Change * - * FIXME: (DVFS) is used to adjust the display voltage to match the - * display clock frequencies + * Note: DVFS is actually handled via the cdclk code paths, + * hence we do nothing here. */ /* @@ -2055,8 +2039,8 @@ static void cnl_ddi_pll_disable(struct drm_i915_private *dev_priv, * requirement, follow the Display Voltage Frequency Switching * Sequence Before Frequency Change * - * FIXME: (DVFS) is used to adjust the display voltage to match the - * display clock frequencies + * Note: DVFS is actually handled via the cdclk code paths, + * hence we do nothing here. */ /* 3. Disable DPLL through DPLL_ENABLE. */ @@ -2077,8 +2061,8 @@ static void cnl_ddi_pll_disable(struct drm_i915_private *dev_priv, * requirement, follow the Display Voltage Frequency Switching * Sequence After Frequency Change * - * FIXME: (DVFS) is used to adjust the display voltage to match the - * display clock frequencies + * Note: DVFS is actually handled via the cdclk code paths, + * hence we do nothing here. */ /* 6. Disable DPLL power in DPLL_ENABLE. */ @@ -2126,10 +2110,8 @@ out: return ret; } -static void cnl_wrpll_get_multipliers(unsigned int bestdiv, - unsigned int *pdiv, - unsigned int *qdiv, - unsigned int *kdiv) +static void cnl_wrpll_get_multipliers(int bestdiv, int *pdiv, + int *qdiv, int *kdiv) { /* even dividers */ if (bestdiv % 2 == 0) { @@ -2167,10 +2149,12 @@ static void cnl_wrpll_get_multipliers(unsigned int bestdiv, } } -static void cnl_wrpll_params_populate(struct skl_wrpll_params *params, uint32_t dco_freq, - uint32_t ref_freq, uint32_t pdiv, uint32_t qdiv, - uint32_t kdiv) +static void cnl_wrpll_params_populate(struct skl_wrpll_params *params, + u32 dco_freq, u32 ref_freq, + int pdiv, int qdiv, int kdiv) { + u32 dco; + switch (kdiv) { case 1: params->kdiv = 1; @@ -2202,39 +2186,35 @@ static void cnl_wrpll_params_populate(struct skl_wrpll_params *params, uint32_t WARN(1, "Incorrect PDiv\n"); } - if (kdiv != 2) - qdiv = 1; + WARN_ON(kdiv != 2 && qdiv != 1); params->qdiv_ratio = qdiv; params->qdiv_mode = (qdiv == 1) ? 0 : 1; - params->dco_integer = div_u64(dco_freq, ref_freq); - params->dco_fraction = div_u64((div_u64((uint64_t)dco_freq<<15, (uint64_t)ref_freq) - - ((uint64_t)params->dco_integer<<15)) * 0x8000, 0x8000); + dco = div_u64((u64)dco_freq << 15, ref_freq); + + params->dco_integer = dco >> 15; + params->dco_fraction = dco & 0x7fff; } static bool -cnl_ddi_calculate_wrpll(int clock /* in Hz */, +cnl_ddi_calculate_wrpll(int clock, struct drm_i915_private *dev_priv, struct skl_wrpll_params *wrpll_params) { - uint64_t afe_clock = clock * 5 / KHz(1); /* clocks in kHz */ - unsigned int dco_min = 7998 * KHz(1); - unsigned int dco_max = 10000 * KHz(1); - unsigned int dco_mid = (dco_min + dco_max) / 2; - + u32 afe_clock = clock * 5; + u32 dco_min = 7998000; + u32 dco_max = 10000000; + u32 dco_mid = (dco_min + dco_max) / 2; static const int dividers[] = { 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 24, 28, 30, 32, 36, 40, 42, 44, 48, 50, 52, 54, 56, 60, 64, 66, 68, 70, 72, 76, 78, 80, 84, 88, 90, 92, 96, 98, 100, 102, 3, 5, 7, 9, 15, 21 }; - unsigned int d, dco; - unsigned int dco_centrality = 0; - unsigned int best_dco_centrality = 999999; - unsigned int best_div = 0; - unsigned int best_dco = 0; - unsigned int pdiv = 0, qdiv = 0, kdiv = 0; + u32 dco, best_dco = 0, dco_centrality = 0; + u32 best_dco_centrality = U32_MAX; /* Spec meaning of 999999 MHz */ + int d, best_div = 0, pdiv = 0, qdiv = 0, kdiv = 0; for (d = 0; d < ARRAY_SIZE(dividers); d++) { dco = afe_clock * dividers[d]; @@ -2271,7 +2251,7 @@ static bool cnl_ddi_hdmi_pll_dividers(struct intel_crtc *crtc, cfgcr0 = DPLL_CFGCR0_HDMI_MODE; - if (!cnl_ddi_calculate_wrpll(clock * 1000, dev_priv, &wrpll_params)) + if (!cnl_ddi_calculate_wrpll(clock, dev_priv, &wrpll_params)) return false; cfgcr0 |= DPLL_CFGCR0_DCO_FRACTION(wrpll_params.dco_fraction) | @@ -2281,7 +2261,6 @@ static bool cnl_ddi_hdmi_pll_dividers(struct intel_crtc *crtc, DPLL_CFGCR1_QDIV_MODE(wrpll_params.qdiv_mode) | DPLL_CFGCR1_KDIV(wrpll_params.kdiv) | DPLL_CFGCR1_PDIV(wrpll_params.pdiv) | - wrpll_params.central_freq | DPLL_CFGCR1_CENTRAL_FREQ; memset(&crtc_state->dpll_hw_state, 0, @@ -2345,15 +2324,13 @@ cnl_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, memset(&dpll_hw_state, 0, sizeof(dpll_hw_state)); - if (encoder->type == INTEL_OUTPUT_HDMI) { + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) { bret = cnl_ddi_hdmi_pll_dividers(crtc, crtc_state, clock); if (!bret) { DRM_DEBUG_KMS("Could not get HDMI pll dividers.\n"); return NULL; } - } else if (encoder->type == INTEL_OUTPUT_DP || - encoder->type == INTEL_OUTPUT_DP_MST || - encoder->type == INTEL_OUTPUT_EDP) { + } else if (intel_crtc_has_dp_encoder(crtc_state)) { bret = cnl_ddi_dp_set_dpll_hw_state(clock, &dpll_hw_state); if (!bret) { DRM_DEBUG_KMS("Could not set DP dpll HW state.\n"); @@ -2361,8 +2338,8 @@ cnl_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, } crtc_state->dpll_hw_state = dpll_hw_state; } else { - DRM_DEBUG_KMS("Skip DPLL setup for encoder %d\n", - encoder->type); + DRM_DEBUG_KMS("Skip DPLL setup for output_types 0x%x\n", + crtc_state->output_types); return NULL; } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 79fbaf78f604..30f791f89d64 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -47,14 +47,12 @@ * contexts. Note that it's important that we check the condition again after * having timed out, since the timeout could be due to preemption or similar and * we've never had a chance to check the condition before the timeout. - * - * TODO: When modesetting has fully transitioned to atomic, the below - * drm_can_sleep() can be removed and in_atomic()/!in_atomic() asserts - * added. */ -#define _wait_for(COND, US, W) ({ \ +#define _wait_for(COND, US, Wmin, Wmax) ({ \ unsigned long timeout__ = jiffies + usecs_to_jiffies(US) + 1; \ + long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \ int ret__; \ + might_sleep(); \ for (;;) { \ bool expired__ = time_after(jiffies, timeout__); \ if (COND) { \ @@ -65,16 +63,14 @@ ret__ = -ETIMEDOUT; \ break; \ } \ - if ((W) && drm_can_sleep()) { \ - usleep_range((W), (W)*2); \ - } else { \ - cpu_relax(); \ - } \ + usleep_range(wait__, wait__ * 2); \ + if (wait__ < (Wmax)) \ + wait__ <<= 1; \ } \ ret__; \ }) -#define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 1000) +#define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) /* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */ #if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) @@ -123,7 +119,7 @@ int ret__; \ BUILD_BUG_ON(!__builtin_constant_p(US)); \ if ((US) > 10) \ - ret__ = _wait_for((COND), (US), 10); \ + ret__ = _wait_for((COND), (US), 10, 10); \ else \ ret__ = _wait_for_atomic((COND), (US), 0); \ ret__; \ @@ -173,7 +169,7 @@ enum intel_output_type { INTEL_OUTPUT_DP = 7, INTEL_OUTPUT_EDP = 8, INTEL_OUTPUT_DSI = 9, - INTEL_OUTPUT_UNKNOWN = 10, + INTEL_OUTPUT_DDI = 10, INTEL_OUTPUT_DP_MST = 11, }; @@ -216,27 +212,30 @@ struct intel_encoder { enum port port; unsigned int cloneable; void (*hot_plug)(struct intel_encoder *); + enum intel_output_type (*compute_output_type)(struct intel_encoder *, + struct intel_crtc_state *, + struct drm_connector_state *); bool (*compute_config)(struct intel_encoder *, struct intel_crtc_state *, struct drm_connector_state *); void (*pre_pll_enable)(struct intel_encoder *, - struct intel_crtc_state *, - struct drm_connector_state *); + const struct intel_crtc_state *, + const struct drm_connector_state *); void (*pre_enable)(struct intel_encoder *, - struct intel_crtc_state *, - struct drm_connector_state *); + const struct intel_crtc_state *, + const struct drm_connector_state *); void (*enable)(struct intel_encoder *, - struct intel_crtc_state *, - struct drm_connector_state *); + const struct intel_crtc_state *, + const struct drm_connector_state *); void (*disable)(struct intel_encoder *, - struct intel_crtc_state *, - struct drm_connector_state *); + const struct intel_crtc_state *, + const struct drm_connector_state *); void (*post_disable)(struct intel_encoder *, - struct intel_crtc_state *, - struct drm_connector_state *); + const struct intel_crtc_state *, + const struct drm_connector_state *); void (*post_pll_disable)(struct intel_encoder *, - struct intel_crtc_state *, - struct drm_connector_state *); + const struct intel_crtc_state *, + const struct drm_connector_state *); /* Read out the current hw state of this connector, returning true if * the encoder is active. If the encoder is enabled it also set the pipe * it is connected to in the pipe parameter. */ @@ -384,7 +383,10 @@ struct intel_atomic_state { unsigned int active_pipe_changes; unsigned int active_crtcs; - unsigned int min_pixclk[I915_MAX_PIPES]; + /* minimum acceptable cdclk for each pipe */ + int min_cdclk[I915_MAX_PIPES]; + /* minimum acceptable voltage level for each pipe */ + u8 min_voltage_level[I915_MAX_PIPES]; struct intel_shared_dpll_state shared_dpll[I915_NUM_PLLS]; @@ -419,6 +421,9 @@ struct intel_plane_state { /* plane control register */ u32 ctl; + /* plane color control register */ + u32 color_ctl; + /* * scaler_id * = -1 : not using a scaler @@ -493,6 +498,8 @@ struct intel_crtc_scaler_state { /* drm_mode->private_flags */ #define I915_MODE_FLAG_INHERITED 1 +/* Flag to get scanline using frame time stamps */ +#define I915_MODE_FLAG_GET_SCANLINE_FROM_TIMESTAMP (1<<1) struct intel_pipe_wm { struct intel_wm_level wm[5]; @@ -714,6 +721,9 @@ struct intel_crtc_state { struct intel_link_m_n dp_m2_n2; bool has_drrs; + bool has_psr; + bool has_psr2; + /* * Frequence the dpll for the port should run at. Differs from the * adjusted dotclock e.g. for DP or 12bpc hdmi mode. This is also @@ -732,6 +742,9 @@ struct intel_crtc_state { */ uint8_t lane_lat_optim_mask; + /* minimum acceptable voltage level */ + u8 min_voltage_level; + /* Panel fitter controls for gen2-gen4 + VLV */ struct { u32 control; @@ -752,6 +765,7 @@ struct intel_crtc_state { struct intel_link_m_n fdi_m_n; bool ips_enabled; + bool ips_force_disable; bool enable_fbc; @@ -788,25 +802,16 @@ struct intel_crtc_state { struct intel_crtc { struct drm_crtc base; enum pipe pipe; - enum plane plane; /* * Whether the crtc and the connected output pipeline is active. Implies * that crtc->enabled is set, i.e. the current mode configuration has * some outputs connected to this crtc. */ bool active; - bool lowfreq_avail; u8 plane_ids_mask; unsigned long long enabled_power_domains; struct intel_overlay *overlay; - /* Display surface base address adjustement for pageflips. Note that on - * gen4+ this only adjusts up to a tile, offsets within a tile are - * handled in the hw itself (with the TILEOFF register). */ - u32 dspaddr_offset; - int adjusted_x; - int adjusted_y; - struct intel_crtc_state *config; /* global reset count when the last flip was submitted */ @@ -841,7 +846,7 @@ struct intel_crtc { struct intel_plane { struct drm_plane base; - u8 plane; + enum i9xx_plane_id i9xx_plane; enum plane_id id; enum pipe pipe; bool can_scale; @@ -863,6 +868,7 @@ struct intel_plane { const struct intel_plane_state *plane_state); void (*disable_plane)(struct intel_plane *plane, struct intel_crtc *crtc); + bool (*get_hw_state)(struct intel_plane *plane); int (*check_plane)(struct intel_plane *plane, struct intel_crtc_state *crtc_state, struct intel_plane_state *state); @@ -908,16 +914,6 @@ struct intel_hdmi { bool has_audio; bool rgb_quant_range_selectable; struct intel_connector *attached_connector; - void (*write_infoframe)(struct drm_encoder *encoder, - const struct intel_crtc_state *crtc_state, - enum hdmi_infoframe_type type, - const void *frame, ssize_t len); - void (*set_infoframes)(struct drm_encoder *encoder, - bool enable, - const struct intel_crtc_state *crtc_state, - const struct drm_connector_state *conn_state); - bool (*infoframe_enabled)(struct drm_encoder *encoder, - const struct intel_crtc_state *pipe_config); }; struct intel_dp_mst_encoder; @@ -1059,7 +1055,6 @@ struct intel_lspcon { struct intel_digital_port { struct intel_encoder base; - enum port port; u32 saved_port_bits; struct intel_dp dp; struct intel_hdmi hdmi; @@ -1068,6 +1063,17 @@ struct intel_digital_port { bool release_cl2_override; uint8_t max_lanes; enum intel_display_power_domain ddi_io_power_domain; + + void (*write_infoframe)(struct drm_encoder *encoder, + const struct intel_crtc_state *crtc_state, + unsigned int type, + const void *frame, ssize_t len); + void (*set_infoframes)(struct drm_encoder *encoder, + bool enable, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); + bool (*infoframe_enabled)(struct drm_encoder *encoder, + const struct intel_crtc_state *pipe_config); }; struct intel_dp_mst_encoder { @@ -1080,7 +1086,7 @@ struct intel_dp_mst_encoder { static inline enum dpio_channel vlv_dport_to_channel(struct intel_digital_port *dport) { - switch (dport->port) { + switch (dport->base.port) { case PORT_B: case PORT_D: return DPIO_CH0; @@ -1094,7 +1100,7 @@ vlv_dport_to_channel(struct intel_digital_port *dport) static inline enum dpio_phy vlv_dport_to_phy(struct intel_digital_port *dport) { - switch (dport->port) { + switch (dport->base.port) { case PORT_B: case PORT_C: return DPIO_PHY0; @@ -1126,7 +1132,7 @@ intel_get_crtc_for_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) } static inline struct intel_crtc * -intel_get_crtc_for_plane(struct drm_i915_private *dev_priv, enum plane plane) +intel_get_crtc_for_plane(struct drm_i915_private *dev_priv, enum i9xx_plane_id plane) { return dev_priv->plane_to_crtc_mapping[plane]; } @@ -1147,7 +1153,7 @@ enc_to_dig_port(struct drm_encoder *encoder) struct intel_encoder *intel_encoder = to_intel_encoder(encoder); switch (intel_encoder->type) { - case INTEL_OUTPUT_UNKNOWN: + case INTEL_OUTPUT_DDI: WARN_ON(!HAS_DDI(to_i915(encoder->dev))); case INTEL_OUTPUT_DP: case INTEL_OUTPUT_EDP: @@ -1188,6 +1194,30 @@ hdmi_to_dig_port(struct intel_hdmi *intel_hdmi) return container_of(intel_hdmi, struct intel_digital_port, hdmi); } +static inline struct intel_plane_state * +intel_atomic_get_new_plane_state(struct intel_atomic_state *state, + struct intel_plane *plane) +{ + return to_intel_plane_state(drm_atomic_get_new_plane_state(&state->base, + &plane->base)); +} + +static inline struct intel_crtc_state * +intel_atomic_get_old_crtc_state(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + return to_intel_crtc_state(drm_atomic_get_old_crtc_state(&state->base, + &crtc->base)); +} + +static inline struct intel_crtc_state * +intel_atomic_get_new_crtc_state(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + return to_intel_crtc_state(drm_atomic_get_new_crtc_state(&state->base, + &crtc->base)); +} + /* intel_fifo_underrun.c */ bool intel_set_cpu_fifo_underrun_reporting(struct drm_i915_private *dev_priv, enum pipe pipe, bool enable); @@ -1204,11 +1234,8 @@ void intel_check_pch_fifo_underruns(struct drm_i915_private *dev_priv); /* i915_irq.c */ void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask); void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask); -void gen6_reset_pm_iir(struct drm_i915_private *dev_priv, u32 mask); void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask); void gen6_unmask_pm_irq(struct drm_i915_private *dev_priv, u32 mask); -void gen6_enable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask); -void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask); void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv); void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv); void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv); @@ -1216,7 +1243,7 @@ void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv); static inline u32 gen6_sanitize_rps_pm_mask(const struct drm_i915_private *i915, u32 mask) { - return mask & ~i915->rps.pm_intrmsk_mbz; + return mask & ~i915->gt_pm.rps.pm_intrmsk_mbz; } void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv); @@ -1227,7 +1254,7 @@ static inline bool intel_irqs_enabled(struct drm_i915_private *dev_priv) * We only use drm_irq_uninstall() at unload and VT switch, so * this is the only thing we need to check. */ - return dev_priv->pm.irqs_enabled; + return dev_priv->runtime_pm.irqs_enabled; } int intel_get_crtc_scanline(struct intel_crtc *crtc); @@ -1245,12 +1272,11 @@ void intel_crt_reset(struct drm_encoder *encoder); /* intel_ddi.c */ void intel_ddi_fdi_post_disable(struct intel_encoder *intel_encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state); + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state); void hsw_fdi_link_train(struct intel_crtc *crtc, const struct intel_crtc_state *crtc_state); void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port); -enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder); bool intel_ddi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe); void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state); void intel_ddi_disable_transcoder_func(struct drm_i915_private *dev_priv, @@ -1262,15 +1288,14 @@ intel_ddi_get_crtc_new_encoder(struct intel_crtc_state *crtc_state); void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state); void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp); bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector); -bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv, - struct intel_crtc *intel_crtc); void intel_ddi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config); -void intel_ddi_clock_get(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config); void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, bool state); +void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv, + struct intel_crtc_state *crtc_state); +u32 bxt_signal_levels(struct intel_dp *intel_dp); uint32_t ddi_signal_levels(struct intel_dp *intel_dp); u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder); @@ -1282,13 +1307,16 @@ void intel_init_audio_hooks(struct drm_i915_private *dev_priv); void intel_audio_codec_enable(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); -void intel_audio_codec_disable(struct intel_encoder *encoder); +void intel_audio_codec_disable(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state); void i915_audio_component_init(struct drm_i915_private *dev_priv); void i915_audio_component_cleanup(struct drm_i915_private *dev_priv); void intel_audio_init(struct drm_i915_private *dev_priv); void intel_audio_deinit(struct drm_i915_private *dev_priv); /* intel_cdclk.c */ +int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state); void skl_init_cdclk(struct drm_i915_private *dev_priv); void skl_uninit_cdclk(struct drm_i915_private *dev_priv); void cnl_init_cdclk(struct drm_i915_private *dev_priv); @@ -1299,10 +1327,14 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv); void intel_update_max_cdclk(struct drm_i915_private *dev_priv); void intel_update_cdclk(struct drm_i915_private *dev_priv); void intel_update_rawclk(struct drm_i915_private *dev_priv); -bool intel_cdclk_state_compare(const struct intel_cdclk_state *a, +bool intel_cdclk_needs_modeset(const struct intel_cdclk_state *a, const struct intel_cdclk_state *b); +bool intel_cdclk_changed(const struct intel_cdclk_state *a, + const struct intel_cdclk_state *b); void intel_set_cdclk(struct drm_i915_private *dev_priv, const struct intel_cdclk_state *cdclk_state); +void intel_dump_cdclk_state(const struct intel_cdclk_state *cdclk_state, + const char *context); /* intel_display.c */ void i830_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe); @@ -1331,11 +1363,13 @@ void intel_pps_unlock_regs_wa(struct drm_i915_private *dev_priv); void intel_encoder_destroy(struct drm_encoder *encoder); int intel_connector_init(struct intel_connector *); struct intel_connector *intel_connector_alloc(void); +void intel_connector_free(struct intel_connector *connector); bool intel_connector_get_hw_state(struct intel_connector *connector); void intel_connector_attach_encoder(struct intel_connector *connector, struct intel_encoder *encoder); -struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev, - struct drm_crtc *crtc); +struct drm_display_mode * +intel_encoder_current_mode(struct intel_encoder *encoder); + enum pipe intel_get_pipe_from_connector(struct intel_connector *connector); int intel_get_pipe_from_crtc_id(struct drm_device *dev, void *data, struct drm_file *file_priv); @@ -1376,7 +1410,7 @@ void vlv_wait_port_ready(struct drm_i915_private *dev_priv, struct intel_digital_port *dport, unsigned int expected_mask); int intel_get_load_detect_pipe(struct drm_connector *connector, - struct drm_display_mode *mode, + const struct drm_display_mode *mode, struct intel_load_detect_pipe *old, struct drm_modeset_acquire_ctx *ctx); void intel_release_load_detect_pipe(struct drm_connector *connector, @@ -1400,7 +1434,9 @@ int intel_plane_atomic_set_property(struct drm_plane *plane, struct drm_plane_state *state, struct drm_property *property, uint64_t val); -int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, +int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_state, + struct drm_crtc_state *crtc_state, + const struct intel_plane_state *old_plane_state, struct drm_plane_state *plane_state); void assert_pch_transcoder_disabled(struct drm_i915_private *dev_priv, @@ -1450,8 +1486,9 @@ bool bxt_find_best_dpll(struct intel_crtc_state *crtc_state, int target_clock, int chv_calc_dpll_params(int refclk, struct dpll *pll_clock); bool intel_crtc_active(struct intel_crtc *crtc); -void hsw_enable_ips(struct intel_crtc *crtc); -void hsw_disable_ips(struct intel_crtc *crtc); +bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state); +void hsw_enable_ips(const struct intel_crtc_state *crtc_state); +void hsw_disable_ips(const struct intel_crtc_state *crtc_state); enum intel_display_power_domain intel_port_to_power_domain(enum port port); void intel_mode_from_pipe_config(struct drm_display_mode *mode, struct intel_crtc_state *pipe_config); @@ -1464,6 +1501,8 @@ static inline u32 intel_plane_ggtt_offset(const struct intel_plane_state *state) return i915_ggtt_offset(state->vma); } +u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state); u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane, @@ -1494,11 +1533,13 @@ void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode); void intel_dp_encoder_reset(struct drm_encoder *encoder); void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder); void intel_dp_encoder_destroy(struct drm_encoder *encoder); -int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc); +int intel_dp_sink_crc(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state, u8 *crc); bool intel_dp_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state); -bool intel_dp_is_edp(struct drm_i915_private *dev_priv, enum port port); +bool intel_dp_is_edp(struct intel_dp *intel_dp); +bool intel_dp_is_port_edp(struct drm_i915_private *dev_priv, enum port port); enum irqreturn intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd); void intel_edp_backlight_on(const struct intel_crtc_state *crtc_state, @@ -1517,9 +1558,9 @@ void intel_power_sequencer_reset(struct drm_i915_private *dev_priv); uint32_t intel_dp_pack_aux(const uint8_t *src, int src_bytes); void intel_plane_destroy(struct drm_plane *plane); void intel_edp_drrs_enable(struct intel_dp *intel_dp, - struct intel_crtc_state *crtc_state); + const struct intel_crtc_state *crtc_state); void intel_edp_drrs_disable(struct intel_dp *intel_dp, - struct intel_crtc_state *crtc_state); + const struct intel_crtc_state *crtc_state); void intel_edp_drrs_invalidate(struct drm_i915_private *dev_priv, unsigned int frontbuffer_bits); void intel_edp_drrs_flush(struct drm_i915_private *dev_priv, @@ -1612,7 +1653,7 @@ static inline void intel_fbdev_restore_mode(struct drm_device *dev) /* intel_fbc.c */ void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv, - struct drm_atomic_state *state); + struct intel_atomic_state *state); bool intel_fbc_is_active(struct drm_i915_private *dev_priv); void intel_fbc_pre_update(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, @@ -1647,6 +1688,7 @@ void intel_hdmi_handle_sink_scrambling(struct intel_encoder *intel_encoder, bool high_tmds_clock_ratio, bool scrambling); void intel_dp_dual_mode_set_tmds_output(struct intel_hdmi *hdmi, bool enable); +void intel_infoframe_init(struct intel_digital_port *intel_dig_port); /* intel_lvds.c */ @@ -1707,7 +1749,7 @@ extern struct drm_display_mode *intel_find_panel_downclock( int intel_backlight_device_register(struct intel_connector *connector); void intel_backlight_device_unregister(struct intel_connector *connector); #else /* CONFIG_BACKLIGHT_CLASS_DEVICE */ -static int intel_backlight_device_register(struct intel_connector *connector) +static inline int intel_backlight_device_register(struct intel_connector *connector) { return 0; } @@ -1718,8 +1760,10 @@ static inline void intel_backlight_device_unregister(struct intel_connector *con /* intel_psr.c */ -void intel_psr_enable(struct intel_dp *intel_dp); -void intel_psr_disable(struct intel_dp *intel_dp); +void intel_psr_enable(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state); +void intel_psr_disable(struct intel_dp *intel_dp, + const struct intel_crtc_state *old_crtc_state); void intel_psr_invalidate(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits); void intel_psr_flush(struct drm_i915_private *dev_priv, @@ -1728,6 +1772,8 @@ void intel_psr_flush(struct drm_i915_private *dev_priv, void intel_psr_init(struct drm_i915_private *dev_priv); void intel_psr_single_frame_update(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits); +void intel_psr_compute_config(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state); /* intel_runtime_pm.c */ int intel_power_domains_init(struct drm_i915_private *); @@ -1755,7 +1801,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, static inline void assert_rpm_device_not_suspended(struct drm_i915_private *dev_priv) { - WARN_ONCE(dev_priv->pm.suspended, + WARN_ONCE(dev_priv->runtime_pm.suspended, "Device suspended during HW access\n"); } @@ -1763,7 +1809,7 @@ static inline void assert_rpm_wakelock_held(struct drm_i915_private *dev_priv) { assert_rpm_device_not_suspended(dev_priv); - WARN_ONCE(!atomic_read(&dev_priv->pm.wakeref_count), + WARN_ONCE(!atomic_read(&dev_priv->runtime_pm.wakeref_count), "RPM wakelock ref not held during HW access"); } @@ -1788,7 +1834,7 @@ assert_rpm_wakelock_held(struct drm_i915_private *dev_priv) static inline void disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv) { - atomic_inc(&dev_priv->pm.wakeref_count); + atomic_inc(&dev_priv->runtime_pm.wakeref_count); } /** @@ -1805,7 +1851,7 @@ disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv) static inline void enable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv) { - atomic_dec(&dev_priv->pm.wakeref_count); + atomic_dec(&dev_priv->runtime_pm.wakeref_count); } void intel_runtime_pm_get(struct drm_i915_private *dev_priv); @@ -1835,7 +1881,6 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv); void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv); void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv); void intel_enable_gt_powersave(struct drm_i915_private *dev_priv); -void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv); void intel_disable_gt_powersave(struct drm_i915_private *dev_priv); void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv); void gen6_rps_busy(struct drm_i915_private *dev_priv); @@ -1843,7 +1888,6 @@ void gen6_rps_reset_ei(struct drm_i915_private *dev_priv); void gen6_rps_idle(struct drm_i915_private *dev_priv); void gen6_rps_boost(struct drm_i915_gem_request *rq, struct intel_rps_client *rps); -void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req); void g4x_wm_get_hw_state(struct drm_device *dev); void vlv_wm_get_hw_state(struct drm_device *dev); void ilk_wm_get_hw_state(struct drm_device *dev); @@ -1859,17 +1903,15 @@ int intel_enable_sagv(struct drm_i915_private *dev_priv); int intel_disable_sagv(struct drm_i915_private *dev_priv); bool skl_wm_level_equals(const struct skl_wm_level *l1, const struct skl_wm_level *l2); -bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry **entries, +bool skl_ddb_allocation_overlaps(struct drm_i915_private *dev_priv, + const struct skl_ddb_entry **entries, const struct skl_ddb_entry *ddb, int ignore); bool ilk_disable_lp_wm(struct drm_device *dev); -int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6); int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, struct intel_crtc_state *cstate); -static inline int intel_enable_rc6(void) -{ - return i915.enable_rc6; -} +void intel_init_ipc(struct drm_i915_private *dev_priv); +void intel_enable_ipc(struct drm_i915_private *dev_priv); /* intel_sdvo.c */ bool intel_sdvo_init(struct drm_i915_private *dev_priv, @@ -1883,8 +1925,13 @@ struct intel_plane *intel_sprite_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe, int plane); int intel_sprite_set_colorkey(struct drm_device *dev, void *data, struct drm_file *file_priv); -void intel_pipe_update_start(struct intel_crtc *crtc); -void intel_pipe_update_end(struct intel_crtc *crtc); +void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state); +void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state); +void skl_update_plane(struct intel_plane *plane, + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state); +void skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc); +bool skl_plane_get_hw_state(struct intel_plane *plane); /* intel_tv.c */ void intel_tv_init(struct drm_i915_private *dev_priv); @@ -1956,7 +2003,9 @@ struct drm_plane_state *intel_plane_duplicate_state(struct drm_plane *plane); void intel_plane_destroy_state(struct drm_plane *plane, struct drm_plane_state *state); extern const struct drm_plane_helper_funcs intel_plane_helper_funcs; -int intel_plane_atomic_check_with_state(struct intel_crtc_state *crtc_state, +int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state, + struct intel_crtc_state *crtc_state, + const struct intel_plane_state *old_plane_state, struct intel_plane_state *intel_state); /* intel_color.c */ diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 7442891762be..f67d321376e4 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -263,7 +263,7 @@ static int dpi_send_cmd(struct intel_dsi *intel_dsi, u32 cmd, bool hs, /* XXX: old code skips write if control unchanged */ if (cmd == I915_READ(MIPI_DPI_CONTROL(port))) - DRM_ERROR("Same special packet %02x twice in a row.\n", cmd); + DRM_DEBUG_KMS("Same special packet %02x twice in a row.\n", cmd); I915_WRITE(MIPI_DPI_CONTROL(port), cmd); @@ -330,6 +330,10 @@ static bool intel_dsi_compute_config(struct intel_encoder *encoder, adjusted_mode->flags = 0; if (IS_GEN9_LP(dev_priv)) { + /* Enable Frame time stamp based scanline reporting */ + adjusted_mode->private_flags |= + I915_MODE_FLAG_GET_SCANLINE_FROM_TIMESTAMP; + /* Dual link goes to DSI transcoder A. */ if (intel_dsi->ports == BIT(PORT_C)) pipe_config->cpu_transcoder = TRANSCODER_DSI_C; @@ -658,11 +662,11 @@ static void vlv_dsi_clear_device_ready(struct intel_encoder *encoder) } } -static void intel_dsi_port_enable(struct intel_encoder *encoder) +static void intel_dsi_port_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); enum port port; @@ -701,7 +705,7 @@ static void intel_dsi_port_enable(struct intel_encoder *encoder) if (IS_BROXTON(dev_priv)) temp |= LANE_CONFIGURATION_DUAL_LINK_A; else - temp |= intel_crtc->pipe ? + temp |= crtc->pipe ? LANE_CONFIGURATION_DUAL_LINK_B : LANE_CONFIGURATION_DUAL_LINK_A; } @@ -731,7 +735,7 @@ static void intel_dsi_port_disable(struct intel_encoder *encoder) } static void intel_dsi_prepare(struct intel_encoder *intel_encoder, - struct intel_crtc_state *pipe_config); + const struct intel_crtc_state *pipe_config); static void intel_dsi_unprepare(struct intel_encoder *encoder); static void intel_dsi_msleep(struct intel_dsi *intel_dsi, int msec) @@ -783,17 +787,22 @@ static void intel_dsi_msleep(struct intel_dsi *intel_dsi, int msec) */ static void intel_dsi_pre_enable(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct drm_crtc *crtc = pipe_config->base.crtc; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int pipe = intel_crtc->pipe; enum port port; u32 val; bool glk_cold_boot = false; DRM_DEBUG_KMS("\n"); + intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); + /* * The BIOS may leave the PLL in a wonky state where it doesn't * lock. It needs to be fully powered down to fix it. @@ -866,7 +875,7 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); - intel_dsi_port_enable(encoder); + intel_dsi_port_enable(encoder, pipe_config); } intel_panel_enable_backlight(pipe_config, conn_state); @@ -878,8 +887,8 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, * the pre_enable hook. */ static void intel_dsi_enable_nop(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { DRM_DEBUG_KMS("\n"); } @@ -889,8 +898,8 @@ static void intel_dsi_enable_nop(struct intel_encoder *encoder, * the post_disable hook. */ static void intel_dsi_disable(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); enum port port; @@ -925,8 +934,8 @@ static void intel_dsi_clear_device_ready(struct intel_encoder *encoder) } static void intel_dsi_post_disable(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); @@ -1066,14 +1075,14 @@ out_put_power: } static void bxt_dsi_get_pipe_config(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config) + struct intel_crtc_state *pipe_config) { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; struct drm_display_mode *adjusted_mode_sw; - struct intel_crtc *intel_crtc; + struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); unsigned int lane_count = intel_dsi->lane_count; unsigned int bpp, fmt; @@ -1084,8 +1093,7 @@ static void bxt_dsi_get_pipe_config(struct intel_encoder *encoder, crtc_hblank_start_sw, crtc_hblank_end_sw; /* FIXME: hw readout should not depend on SW state */ - intel_crtc = to_intel_crtc(encoder->base.crtc); - adjusted_mode_sw = &intel_crtc->config->base.adjusted_mode; + adjusted_mode_sw = &crtc->config->base.adjusted_mode; /* * Atleast one port is active as encoder->get_config called only if @@ -1102,6 +1110,10 @@ static void bxt_dsi_get_pipe_config(struct intel_encoder *encoder, pixel_format_from_register_bits(fmt)); bpp = pipe_config->pipe_bpp; + /* Enable Frame time stamo based scanline reporting */ + adjusted_mode->private_flags |= + I915_MODE_FLAG_GET_SCANLINE_FROM_TIMESTAMP; + /* In terms of pixels */ adjusted_mode->crtc_hdisplay = I915_READ(BXT_MIPI_TRANS_HACTIVE(port)); @@ -1230,6 +1242,8 @@ static void intel_dsi_get_config(struct intel_encoder *encoder, u32 pclk; DRM_DEBUG_KMS("\n"); + pipe_config->output_types |= BIT(INTEL_OUTPUT_DSI); + if (IS_GEN9_LP(dev_priv)) bxt_dsi_get_pipe_config(encoder, pipe_config); @@ -1370,7 +1384,7 @@ static u32 pixel_format_to_reg(enum mipi_dsi_pixel_format fmt) } static void intel_dsi_prepare(struct intel_encoder *intel_encoder, - struct intel_crtc_state *pipe_config) + const struct intel_crtc_state *pipe_config) { struct drm_encoder *encoder = &intel_encoder->base; struct drm_device *dev = encoder->dev; @@ -1652,6 +1666,27 @@ static const struct drm_connector_funcs intel_dsi_connector_funcs = { .atomic_duplicate_state = intel_digital_connector_duplicate_state, }; +static int intel_dsi_get_panel_orientation(struct intel_connector *connector) +{ + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + int orientation = DRM_MODE_PANEL_ORIENTATION_NORMAL; + enum i9xx_plane_id plane; + u32 val; + + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { + if (connector->encoder->crtc_mask == BIT(PIPE_B)) + plane = PLANE_B; + else + plane = PLANE_A; + + val = I915_READ(DSPCNTR(plane)); + if (val & DISPPLANE_ROTATE_180) + orientation = DRM_MODE_PANEL_ORIENTATION_BOTTOM_UP; + } + + return orientation; +} + static void intel_dsi_add_properties(struct intel_connector *connector) { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); @@ -1667,6 +1702,13 @@ static void intel_dsi_add_properties(struct intel_connector *connector) allowed_scalers); connector->base.state->scaling_mode = DRM_MODE_SCALE_ASPECT; + + connector->base.display_info.panel_orientation = + intel_dsi_get_panel_orientation(connector); + drm_connector_init_panel_orientation_property( + &connector->base, + connector->panel.fixed_mode->hdisplay, + connector->panel.fixed_mode->vdisplay); } } @@ -1738,42 +1780,13 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) else intel_encoder->crtc_mask = BIT(PIPE_B); - if (dev_priv->vbt.dsi.config->dual_link) { + if (dev_priv->vbt.dsi.config->dual_link) intel_dsi->ports = BIT(PORT_A) | BIT(PORT_C); - - switch (dev_priv->vbt.dsi.config->dl_dcs_backlight_ports) { - case DL_DCS_PORT_A: - intel_dsi->dcs_backlight_ports = BIT(PORT_A); - break; - case DL_DCS_PORT_C: - intel_dsi->dcs_backlight_ports = BIT(PORT_C); - break; - default: - case DL_DCS_PORT_A_AND_C: - intel_dsi->dcs_backlight_ports = BIT(PORT_A) | BIT(PORT_C); - break; - } - - switch (dev_priv->vbt.dsi.config->dl_dcs_cabc_ports) { - case DL_DCS_PORT_A: - intel_dsi->dcs_cabc_ports = BIT(PORT_A); - break; - case DL_DCS_PORT_C: - intel_dsi->dcs_cabc_ports = BIT(PORT_C); - break; - default: - case DL_DCS_PORT_A_AND_C: - intel_dsi->dcs_cabc_ports = BIT(PORT_A) | BIT(PORT_C); - break; - } - } else { + else intel_dsi->ports = BIT(port); - intel_dsi->dcs_backlight_ports = BIT(port); - intel_dsi->dcs_cabc_ports = BIT(port); - } - if (!dev_priv->vbt.dsi.config->cabc_supported) - intel_dsi->dcs_cabc_ports = 0; + intel_dsi->dcs_backlight_ports = dev_priv->vbt.dsi.bl_ports; + intel_dsi->dcs_cabc_ports = dev_priv->vbt.dsi.cabc_ports; /* Create a DSI host (and a device) for each port. */ for_each_dsi_port(port, intel_dsi->ports) { diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index c0a027274c06..754baa00bea9 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -159,6 +159,8 @@ static void intel_dvo_get_config(struct intel_encoder *encoder, struct intel_dvo *intel_dvo = enc_to_dvo(encoder); u32 tmp, flags = 0; + pipe_config->output_types |= BIT(INTEL_OUTPUT_DVO); + tmp = I915_READ(intel_dvo->dev.dvo_reg); if (tmp & DVO_HSYNC_ACTIVE_HIGH) flags |= DRM_MODE_FLAG_PHSYNC; @@ -175,8 +177,8 @@ static void intel_dvo_get_config(struct intel_encoder *encoder, } static void intel_disable_dvo(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dvo *intel_dvo = enc_to_dvo(encoder); @@ -189,8 +191,8 @@ static void intel_disable_dvo(struct intel_encoder *encoder, } static void intel_enable_dvo(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dvo *intel_dvo = enc_to_dvo(encoder); @@ -258,8 +260,8 @@ static bool intel_dvo_compute_config(struct intel_encoder *encoder, } static void intel_dvo_pre_enable(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); @@ -379,32 +381,15 @@ static const struct drm_encoder_funcs intel_dvo_enc_funcs = { * chip being on DVOB/C and having multiple pipes. */ static struct drm_display_mode * -intel_dvo_get_current_mode(struct drm_connector *connector) +intel_dvo_get_current_mode(struct intel_encoder *encoder) { - struct drm_device *dev = connector->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_dvo *intel_dvo = intel_attached_dvo(connector); - uint32_t dvo_val = I915_READ(intel_dvo->dev.dvo_reg); - struct drm_display_mode *mode = NULL; + struct drm_display_mode *mode; - /* If the DVO port is active, that'll be the LVDS, so we can pull out - * its timings to get how the BIOS set up the panel. - */ - if (dvo_val & DVO_ENABLE) { - struct intel_crtc *crtc; - int pipe = (dvo_val & DVO_PIPE_B_SELECT) ? 1 : 0; - - crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - if (crtc) { - mode = intel_crtc_mode_get(dev, &crtc->base); - if (mode) { - mode->type |= DRM_MODE_TYPE_PREFERRED; - if (dvo_val & DVO_HSYNC_ACTIVE_HIGH) - mode->flags |= DRM_MODE_FLAG_PHSYNC; - if (dvo_val & DVO_VSYNC_ACTIVE_HIGH) - mode->flags |= DRM_MODE_FLAG_PVSYNC; - } - } + mode = intel_encoder_current_mode(encoder); + if (mode) { + DRM_DEBUG_KMS("using current (BIOS) mode: "); + drm_mode_debug_printmodeline(mode); + mode->type |= DRM_MODE_TYPE_PREFERRED; } return mode; @@ -551,7 +536,7 @@ void intel_dvo_init(struct drm_i915_private *dev_priv) * mode being output through DVO. */ intel_panel_init(&intel_connector->panel, - intel_dvo_get_current_mode(connector), + intel_dvo_get_current_mode(intel_encoder), NULL, NULL); intel_dvo->panel_wants_dither = true; } diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 3c2d9cf22ed5..fa960cfd2764 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -22,7 +22,10 @@ * */ +#include <drm/drm_print.h> + #include "i915_drv.h" +#include "i915_vgpu.h" #include "intel_ringbuffer.h" #include "intel_lrc.h" @@ -34,11 +37,10 @@ * Resource Streamer, is 66944 bytes, which rounds to 17 pages. */ #define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) -/* Same as Haswell, but 72064 bytes now. */ -#define GEN8_CXT_TOTAL_SIZE (18 * PAGE_SIZE) #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) #define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) +#define GEN10_LR_CONTEXT_RENDER_SIZE (18 * PAGE_SIZE) #define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) @@ -46,6 +48,8 @@ struct engine_class_info { const char *name; int (*init_legacy)(struct intel_engine_cs *engine); int (*init_execlists)(struct intel_engine_cs *engine); + + u8 uabi_class; }; static const struct engine_class_info intel_engine_classes[] = { @@ -53,21 +57,25 @@ static const struct engine_class_info intel_engine_classes[] = { .name = "rcs", .init_execlists = logical_render_ring_init, .init_legacy = intel_init_render_ring_buffer, + .uabi_class = I915_ENGINE_CLASS_RENDER, }, [COPY_ENGINE_CLASS] = { .name = "bcs", .init_execlists = logical_xcs_ring_init, .init_legacy = intel_init_blt_ring_buffer, + .uabi_class = I915_ENGINE_CLASS_COPY, }, [VIDEO_DECODE_CLASS] = { .name = "vcs", .init_execlists = logical_xcs_ring_init, .init_legacy = intel_init_bsd_ring_buffer, + .uabi_class = I915_ENGINE_CLASS_VIDEO, }, [VIDEO_ENHANCEMENT_CLASS] = { .name = "vecs", .init_execlists = logical_xcs_ring_init, .init_legacy = intel_init_vebox_ring_buffer, + .uabi_class = I915_ENGINE_CLASS_VIDEO_ENHANCE, }, }; @@ -150,12 +158,11 @@ __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) default: MISSING_CASE(INTEL_GEN(dev_priv)); case 10: + return GEN10_LR_CONTEXT_RENDER_SIZE; case 9: return GEN9_LR_CONTEXT_RENDER_SIZE; case 8: - return i915.enable_execlists ? - GEN8_LR_CONTEXT_RENDER_SIZE : - GEN8_CXT_TOTAL_SIZE; + return GEN8_LR_CONTEXT_RENDER_SIZE; case 7: if (IS_HASWELL(dev_priv)) return HSW_CXT_TOTAL_SIZE; @@ -198,6 +205,15 @@ intel_engine_setup(struct drm_i915_private *dev_priv, GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes)); class_info = &intel_engine_classes[info->class]; + if (GEM_WARN_ON(info->class > MAX_ENGINE_CLASS)) + return -EINVAL; + + if (GEM_WARN_ON(info->instance > MAX_ENGINE_INSTANCE)) + return -EINVAL; + + if (GEM_WARN_ON(dev_priv->engine_class[info->class][info->instance])) + return -EINVAL; + GEM_BUG_ON(dev_priv->engine[id]); engine = kzalloc(sizeof(*engine), GFP_KERNEL); if (!engine) @@ -208,13 +224,15 @@ intel_engine_setup(struct drm_i915_private *dev_priv, WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s%u", class_info->name, info->instance) >= sizeof(engine->name)); - engine->uabi_id = info->uabi_id; engine->hw_id = engine->guc_id = info->hw_id; engine->mmio_base = info->mmio_base; engine->irq_shift = info->irq_shift; engine->class = info->class; engine->instance = info->instance; + engine->uabi_id = info->uabi_id; + engine->uabi_class = class_info->uabi_class; + engine->context_size = __intel_engine_context_size(dev_priv, engine->class); if (WARN_ON(engine->context_size > BIT(20))) @@ -223,8 +241,11 @@ intel_engine_setup(struct drm_i915_private *dev_priv, /* Nothing to do here, execute in order of dependencies */ engine->schedule = NULL; + spin_lock_init(&engine->stats.lock); + ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); + dev_priv->engine_class[info->class][info->instance] = engine; dev_priv->engine[id] = engine; return 0; } @@ -276,6 +297,8 @@ int intel_engines_init_mmio(struct drm_i915_private *dev_priv) device_info->num_rings = hweight32(mask); + i915_check_and_clear_faults(dev_priv); + return 0; cleanup: @@ -301,7 +324,7 @@ int intel_engines_init(struct drm_i915_private *dev_priv) &intel_engine_classes[engine->class]; int (*init)(struct intel_engine_cs *engine); - if (i915.enable_execlists) + if (HAS_EXECLISTS(dev_priv)) init = class_info->init_execlists; else init = class_info->init_legacy; @@ -351,18 +374,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) if (HAS_VEBOX(dev_priv)) I915_WRITE(RING_SYNC_2(engine->mmio_base), 0); } - if (dev_priv->semaphore) { - struct page *page = i915_vma_first_page(dev_priv->semaphore); - void *semaphores; - - /* Semaphores are in noncoherent memory, flush to be safe */ - semaphores = kmap_atomic(page); - memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), - 0, I915_NUM_ENGINES * gen8_semaphore_seqno_size); - drm_clflush_virt_range(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), - I915_NUM_ENGINES * gen8_semaphore_seqno_size); - kunmap_atomic(semaphores); - } intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); @@ -380,6 +391,37 @@ static void intel_engine_init_timeline(struct intel_engine_cs *engine) engine->timeline = &engine->i915->gt.global_timeline.engine[engine->id]; } +static bool csb_force_mmio(struct drm_i915_private *i915) +{ + /* + * IOMMU adds unpredictable latency causing the CSB write (from the + * GPU into the HWSP) to only be visible some time after the interrupt + * (missed breadcrumb syndrome). + */ + if (intel_vtd_active()) + return true; + + /* Older GVT emulation depends upon intercepting CSB mmio */ + if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915)) + return true; + + return false; +} + +static void intel_engine_init_execlist(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + + execlists->csb_use_mmio = csb_force_mmio(engine->i915); + + execlists->port_mask = 1; + BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists)); + GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); + + execlists->queue = RB_ROOT; + execlists->first = NULL; +} + /** * intel_engines_setup_common - setup engine state not requiring hw access * @engine: Engine to setup. @@ -391,8 +433,7 @@ static void intel_engine_init_timeline(struct intel_engine_cs *engine) */ void intel_engine_setup_common(struct intel_engine_cs *engine) { - engine->execlist_queue = RB_ROOT; - engine->execlist_first = NULL; + intel_engine_init_execlist(engine); intel_engine_init_timeline(engine); intel_engine_init_hangcheck(engine); @@ -442,6 +483,116 @@ static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine) i915_vma_unpin_and_release(&engine->scratch); } +static void cleanup_phys_status_page(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + if (!dev_priv->status_page_dmah) + return; + + drm_pci_free(&dev_priv->drm, dev_priv->status_page_dmah); + engine->status_page.page_addr = NULL; +} + +static void cleanup_status_page(struct intel_engine_cs *engine) +{ + struct i915_vma *vma; + struct drm_i915_gem_object *obj; + + vma = fetch_and_zero(&engine->status_page.vma); + if (!vma) + return; + + obj = vma->obj; + + i915_vma_unpin(vma); + i915_vma_close(vma); + + i915_gem_object_unpin_map(obj); + __i915_gem_object_release_unless_active(obj); +} + +static int init_status_page(struct intel_engine_cs *engine) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned int flags; + void *vaddr; + int ret; + + obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); + if (IS_ERR(obj)) { + DRM_ERROR("Failed to allocate status page\n"); + return PTR_ERR(obj); + } + + ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); + if (ret) + goto err; + + vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err; + } + + flags = PIN_GLOBAL; + if (!HAS_LLC(engine->i915)) + /* On g33, we cannot place HWS above 256MiB, so + * restrict its pinning to the low mappable arena. + * Though this restriction is not documented for + * gen4, gen5, or byt, they also behave similarly + * and hang if the HWS is placed at the top of the + * GTT. To generalise, it appears that all !llc + * platforms have issues with us placing the HWS + * above the mappable region (even though we never + * actually map it). + */ + flags |= PIN_MAPPABLE; + else + flags |= PIN_HIGH; + ret = i915_vma_pin(vma, 0, 4096, flags); + if (ret) + goto err; + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + ret = PTR_ERR(vaddr); + goto err_unpin; + } + + engine->status_page.vma = vma; + engine->status_page.ggtt_offset = i915_ggtt_offset(vma); + engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE); + + DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", + engine->name, i915_ggtt_offset(vma)); + return 0; + +err_unpin: + i915_vma_unpin(vma); +err: + i915_gem_object_put(obj); + return ret; +} + +static int init_phys_status_page(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + GEM_BUG_ON(engine->id != RCS); + + dev_priv->status_page_dmah = + drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE); + if (!dev_priv->status_page_dmah) + return -ENOMEM; + + engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr; + memset(engine->status_page.page_addr, 0, PAGE_SIZE); + + return 0; +} + /** * intel_engines_init_common - initialize cengine state which might require hw access * @engine: Engine to initialize. @@ -471,17 +622,38 @@ int intel_engine_init_common(struct intel_engine_cs *engine) if (IS_ERR(ring)) return PTR_ERR(ring); + /* + * Similarly the preempt context must always be available so that + * we can interrupt the engine at any time. + */ + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) { + ring = engine->context_pin(engine, + engine->i915->preempt_context); + if (IS_ERR(ring)) { + ret = PTR_ERR(ring); + goto err_unpin_kernel; + } + } + ret = intel_engine_init_breadcrumbs(engine); if (ret) - goto err_unpin; + goto err_unpin_preempt; - ret = i915_gem_render_state_init(engine); + if (HWS_NEEDS_PHYSICAL(engine->i915)) + ret = init_phys_status_page(engine); + else + ret = init_status_page(engine); if (ret) - goto err_unpin; + goto err_breadcrumbs; return 0; -err_unpin: +err_breadcrumbs: + intel_engine_fini_breadcrumbs(engine); +err_unpin_preempt: + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) + engine->context_unpin(engine, engine->i915->preempt_context); +err_unpin_kernel: engine->context_unpin(engine, engine->i915->kernel_context); return ret; } @@ -497,11 +669,20 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) { intel_engine_cleanup_scratch(engine); - i915_gem_render_state_fini(engine); + if (HWS_NEEDS_PHYSICAL(engine->i915)) + cleanup_phys_status_page(engine); + else + cleanup_status_page(engine); + intel_engine_fini_breadcrumbs(engine); intel_engine_cleanup_cmd_parser(engine); i915_gem_batch_pool_fini(&engine->batch_pool); + if (engine->default_state) + i915_gem_object_put(engine->default_state); + + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) + engine->context_unpin(engine, engine->i915->preempt_context); engine->context_unpin(engine, engine->i915->kernel_context); } @@ -672,11 +853,6 @@ static int wa_add(struct drm_i915_private *dev_priv, #define WA_SET_FIELD_MASKED(addr, mask, value) \ WA_REG(addr, mask, _MASKED_FIELD(mask, value)) -#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask)) -#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask)) - -#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val) - static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, i915_reg_t reg) { @@ -687,8 +863,8 @@ static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) return -EINVAL; - WA_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), - i915_mmio_reg_offset(reg)); + I915_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), + i915_mmio_reg_offset(reg)); wa->hw_whitelist_count[engine->id]++; return 0; @@ -812,6 +988,23 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | ECOCHK_DIS_TLB); + if (HAS_LLC(dev_priv)) { + /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl + * + * Must match Display Engine. See + * WaCompressedResourceDisplayNewHashMode. + */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN9_PBE_COMPRESSED_HASH_SELECTION); + WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, + GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); + + I915_WRITE(MMCD_MISC_CTRL, + I915_READ(MMCD_MISC_CTRL) | + MMCD_PCLA | + MMCD_HOTSPOT_EN); + } + /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, @@ -823,22 +1016,6 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); - /* WaDisableDgMirrorFixInHalfSliceChicken5:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, - GEN9_DG_MIRROR_FIX_ENABLE); - - /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1, - GEN9_RHWO_OPTIMIZATION_DISABLE); - /* - * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set - * but we do that in per ctx batchbuffer as there is an issue - * with this register not getting restored on ctx restore - */ - } - /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, @@ -854,11 +1031,6 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, GEN9_CCS_TLB_PREFETCH_ENABLE); - /* WaDisableMaskBasedCammingInRCC:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0, - PIXEL_MASK_CAMMING_DISABLE); - /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ WA_SET_BIT_MASKED(HDC_CHICKEN0, HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | @@ -888,25 +1060,53 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) || - IS_COFFEELAKE(dev_priv) || - IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) + IS_COFFEELAKE(dev_priv)) WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, GEN8_SAMPLER_POWER_BYPASS_DIS); /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); + /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */ + if (IS_GEN9_LP(dev_priv)) { + u32 val = I915_READ(GEN8_L3SQCREG1); + + val &= ~L3_PRIO_CREDITS_MASK; + val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2); + I915_WRITE(GEN8_L3SQCREG1, val); + } + /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | GEN8_LQSC_FLUSH_COHERENT_LINES)); + /* + * Supporting preemption with fine-granularity requires changes in the + * batch buffer programming. Since we can't break old userspace, we + * need to set our default preemption level to safe value. Userspace is + * still able to use more fine-grained preemption levels, since in + * WaEnablePreemptionGranularityControlByUMD we're whitelisting the + * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are + * not real HW workarounds, but merely a way to start using preemption + * while maintaining old contract with userspace. + */ + + /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */ + WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); + + /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */ + WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, + GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); + /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); if (ret) return ret; - /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl */ - ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); + /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ + I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, + _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); + ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); if (ret) return ret; @@ -968,25 +1168,19 @@ static int skl_init_workarounds(struct intel_engine_cs *engine) if (ret) return ret; - /* - * Actual WA is to disable percontext preemption granularity control - * until D0 which is the default case so this is equivalent to - * !WaDisablePerCtxtPreemptionGranularityControl:skl - */ - I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, - _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); - /* WaEnableGapsTsvCreditFix:skl */ I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE)); /* WaDisableGafsUnitClkGating:skl */ - WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) | + GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE)); /* WaInPlaceDecompressionHang:skl */ if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER)) - WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, + (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); /* WaDisableLSQCROPERFforOCL:skl */ ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); @@ -1005,65 +1199,81 @@ static int bxt_init_workarounds(struct intel_engine_cs *engine) if (ret) return ret; - /* WaStoreMultiplePTEenable:bxt */ - /* This is a requirement according to Hardware specification */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF); - - /* WaSetClckGatingDisableMedia:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & - ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE)); - } - /* WaDisableThreadStallDopClockGating:bxt */ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); /* WaDisablePooledEuLoadBalancingFix:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) { - WA_SET_BIT_MASKED(FF_SLICE_CS_CHICKEN2, - GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE); - } + I915_WRITE(FF_SLICE_CS_CHICKEN2, + _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE)); - /* WaDisableSbeCacheDispatchPortSharing:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) { - WA_SET_BIT_MASKED( - GEN7_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); - } + /* WaToEnableHwFixForPushConstHWBug:bxt */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */ - /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */ - /* WaDisableObjectLevelPreemtionForInstanceId:bxt */ - /* WaDisableLSQCROPERFforOCL:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - ret = wa_ring_whitelist_reg(engine, GEN9_CS_DEBUG_MODE1); - if (ret) - return ret; - - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - } + /* WaInPlaceDecompressionHang:bxt */ + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, + (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); - /* WaProgramL3SqcReg1DefaultForPerf:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) { - u32 val = I915_READ(GEN8_L3SQCREG1); - val &= ~L3_PRIO_CREDITS_MASK; - val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2); - I915_WRITE(GEN8_L3SQCREG1, val); - } + return 0; +} - /* WaToEnableHwFixForPushConstHWBug:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) +static int cnl_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */ + if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0)) + I915_WRITE(GAMT_CHKN_BIT_REG, + (I915_READ(GAMT_CHKN_BIT_REG) | + GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT)); + + /* WaForceContextSaveRestoreNonCoherent:cnl */ + WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0, + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); + + /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */ + if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0)) + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5); + + /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */ + if (IS_CNL_REVID(dev_priv, 0, CNL_REVID_B0)) WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE); - /* WaInPlaceDecompressionHang:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) - WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + /* WaInPlaceDecompressionHang:cnl */ + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, + (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); + + /* WaPushConstantDereferenceHoldDisable:cnl */ + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); + + /* FtrEnableFastAnisoL1BankingFix: cnl */ + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); + + /* WaDisable3DMidCmdPreemption:cnl */ + WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); + + /* WaDisableGPGPUMidCmdPreemption:cnl */ + WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, + GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); + + /* WaEnablePreemptionGranularityControlByUMD:cnl */ + I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, + _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); + ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); + if (ret) + return ret; + + /* WaDisableEarlyEOT:cnl */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); return 0; } @@ -1083,8 +1293,9 @@ static int kbl_init_workarounds(struct intel_engine_cs *engine) /* WaDisableDynamicCreditSharing:kbl */ if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) - WA_SET_BIT(GAMT_CHKN_BIT_REG, - GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); + I915_WRITE(GAMT_CHKN_BIT_REG, + (I915_READ(GAMT_CHKN_BIT_REG) | + GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING)); /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */ if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0)) @@ -1097,7 +1308,8 @@ static int kbl_init_workarounds(struct intel_engine_cs *engine) GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); /* WaDisableGafsUnitClkGating:kbl */ - WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) | + GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE)); /* WaDisableSbeCacheDispatchPortSharing:kbl */ WA_SET_BIT_MASKED( @@ -1105,8 +1317,9 @@ static int kbl_init_workarounds(struct intel_engine_cs *engine) GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); /* WaInPlaceDecompressionHang:kbl */ - WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, + (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); /* WaDisableLSQCROPERFforOCL:kbl */ ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); @@ -1125,6 +1338,11 @@ static int glk_init_workarounds(struct intel_engine_cs *engine) if (ret) return ret; + /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */ + ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1); + if (ret) + return ret; + /* WaToEnableHwFixForPushConstHWBug:glk */ WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); @@ -1150,7 +1368,8 @@ static int cfl_init_workarounds(struct intel_engine_cs *engine) GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); /* WaDisableGafsUnitClkGating:cfl */ - WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) | + GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE)); /* WaDisableSbeCacheDispatchPortSharing:cfl */ WA_SET_BIT_MASKED( @@ -1158,8 +1377,9 @@ static int cfl_init_workarounds(struct intel_engine_cs *engine) GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); /* WaInPlaceDecompressionHang:cfl */ - WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, + (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); return 0; } @@ -1188,6 +1408,8 @@ int init_workarounds_ring(struct intel_engine_cs *engine) err = glk_init_workarounds(engine); else if (IS_COFFEELAKE(dev_priv)) err = cfl_init_workarounds(engine); + else if (IS_CANNONLAKE(dev_priv)) + err = cnl_init_workarounds(engine); else err = 0; if (err) @@ -1236,7 +1458,9 @@ static bool ring_is_idle(struct intel_engine_cs *engine) struct drm_i915_private *dev_priv = engine->i915; bool idle = true; - intel_runtime_pm_get(dev_priv); + /* If the whole device is asleep, the engine must be idle */ + if (!intel_runtime_pm_get_if_in_use(dev_priv)) + return true; /* First check that no commands are left in the ring */ if ((I915_READ_HEAD(engine) & HEAD_ADDR) != @@ -1279,12 +1503,12 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) return false; - /* Both ports drained, no more ELSP submission? */ - if (port_request(&engine->execlist_port[0])) + /* Waiting to drain ELSP? */ + if (READ_ONCE(engine->execlists.active)) return false; /* ELSP is empty, but there are ready requests? */ - if (READ_ONCE(engine->execlist_first)) + if (READ_ONCE(engine->execlists.first)) return false; /* Ring stopped? */ @@ -1299,10 +1523,8 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv) struct intel_engine_cs *engine; enum intel_engine_id id; - if (READ_ONCE(dev_priv->gt.active_requests)) - return false; - - /* If the driver is wedged, HW state may be very inconsistent and + /* + * If the driver is wedged, HW state may be very inconsistent and * report that it is still busy, even though we have stopped using it. */ if (i915_terminally_wedged(&dev_priv->gpu_error)) @@ -1316,6 +1538,34 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv) return true; } +/** + * intel_engine_has_kernel_context: + * @engine: the engine + * + * Returns true if the last context to be executed on this engine, or has been + * executed if the engine is already idle, is the kernel context + * (#i915.kernel_context). + */ +bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) +{ + const struct i915_gem_context * const kernel_context = + engine->i915->kernel_context; + struct drm_i915_gem_request *rq; + + lockdep_assert_held(&engine->i915->drm.struct_mutex); + + /* + * Check the last context seen by the engine. If active, it will be + * the last request that remains in the timeline. When idle, it is + * the last executed context as tracked by retirement. + */ + rq = __i915_gem_active_peek(&engine->timeline->last_request); + if (rq) + return rq->ctx == kernel_context; + else + return engine->last_retired_context == kernel_context; +} + void intel_engines_reset_default_submission(struct drm_i915_private *i915) { struct intel_engine_cs *engine; @@ -1325,17 +1575,467 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915) engine->set_default_submission(engine); } -void intel_engines_mark_idle(struct drm_i915_private *i915) +/** + * intel_engines_park: called when the GT is transitioning from busy->idle + * @i915: the i915 device + * + * The GT is now idle and about to go to sleep (maybe never to wake again?). + * Time for us to tidy and put away our toys (release resources back to the + * system). + */ +void intel_engines_park(struct drm_i915_private *i915) { struct intel_engine_cs *engine; enum intel_engine_id id; for_each_engine(engine, i915, id) { + /* Flush the residual irq tasklets first. */ intel_engine_disarm_breadcrumbs(engine); + tasklet_kill(&engine->execlists.tasklet); + + /* + * We are committed now to parking the engines, make sure there + * will be no more interrupts arriving later and the engines + * are truly idle. + */ + if (wait_for(intel_engine_is_idle(engine), 10)) { + struct drm_printer p = drm_debug_printer(__func__); + + dev_err(i915->drm.dev, + "%s is not idle before parking\n", + engine->name); + intel_engine_dump(engine, &p, NULL); + } + + if (engine->park) + engine->park(engine); + i915_gem_batch_pool_fini(&engine->batch_pool); - tasklet_kill(&engine->irq_tasklet); - engine->no_priolist = false; + engine->execlists.no_priolist = false; + } +} + +/** + * intel_engines_unpark: called when the GT is transitioning from idle->busy + * @i915: the i915 device + * + * The GT was idle and now about to fire up with some new user requests. + */ +void intel_engines_unpark(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) { + if (engine->unpark) + engine->unpark(engine); + } +} + +bool intel_engine_can_store_dword(struct intel_engine_cs *engine) +{ + switch (INTEL_GEN(engine->i915)) { + case 2: + return false; /* uses physical not virtual addresses */ + case 3: + /* maybe only uses physical not virtual addresses */ + return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915)); + case 6: + return engine->class != VIDEO_DECODE_CLASS; /* b0rked */ + default: + return true; + } +} + +unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned int which; + + which = 0; + for_each_engine(engine, i915, id) + if (engine->default_state) + which |= BIT(engine->uabi_class); + + return which; +} + +static void print_request(struct drm_printer *m, + struct drm_i915_gem_request *rq, + const char *prefix) +{ + drm_printf(m, "%s%x%s [%x:%x] prio=%d @ %dms: %s\n", prefix, + rq->global_seqno, + i915_gem_request_completed(rq) ? "!" : "", + rq->ctx->hw_id, rq->fence.seqno, + rq->priotree.priority, + jiffies_to_msecs(jiffies - rq->emitted_jiffies), + rq->timeline->common->name); +} + +static void hexdump(struct drm_printer *m, const void *buf, size_t len) +{ + const size_t rowsize = 8 * sizeof(u32); + const void *prev = NULL; + bool skip = false; + size_t pos; + + for (pos = 0; pos < len; pos += rowsize) { + char line[128]; + + if (prev && !memcmp(prev, buf + pos, rowsize)) { + if (!skip) { + drm_printf(m, "*\n"); + skip = true; + } + continue; + } + + WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, + rowsize, sizeof(u32), + line, sizeof(line), + false) >= sizeof(line)); + drm_printf(m, "%08zx %s\n", pos, line); + + prev = buf + pos; + skip = false; + } +} + +void intel_engine_dump(struct intel_engine_cs *engine, + struct drm_printer *m, + const char *header, ...) +{ + struct intel_breadcrumbs * const b = &engine->breadcrumbs; + const struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_gpu_error * const error = &engine->i915->gpu_error; + struct drm_i915_private *dev_priv = engine->i915; + struct drm_i915_gem_request *rq; + struct rb_node *rb; + char hdr[80]; + u64 addr; + + if (header) { + va_list ap; + + va_start(ap, header); + drm_vprintf(m, header, &ap); + va_end(ap); + } + + if (i915_terminally_wedged(&engine->i915->gpu_error)) + drm_printf(m, "*** WEDGED ***\n"); + + drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", + intel_engine_get_seqno(engine), + intel_engine_last_submit(engine), + engine->hangcheck.seqno, + jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), + engine->timeline->inflight_seqnos); + drm_printf(m, "\tReset count: %d (global %d)\n", + i915_reset_engine_count(error, engine), + i915_reset_count(error)); + + rcu_read_lock(); + + drm_printf(m, "\tRequests:\n"); + + rq = list_first_entry(&engine->timeline->requests, + struct drm_i915_gem_request, link); + if (&rq->link != &engine->timeline->requests) + print_request(m, rq, "\t\tfirst "); + + rq = list_last_entry(&engine->timeline->requests, + struct drm_i915_gem_request, link); + if (&rq->link != &engine->timeline->requests) + print_request(m, rq, "\t\tlast "); + + rq = i915_gem_find_active_request(engine); + if (rq) { + print_request(m, rq, "\t\tactive "); + drm_printf(m, + "\t\t[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]\n", + rq->head, rq->postfix, rq->tail, + rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, + rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); + } + + drm_printf(m, "\tRING_START: 0x%08x [0x%08x]\n", + I915_READ(RING_START(engine->mmio_base)), + rq ? i915_ggtt_offset(rq->ring->vma) : 0); + drm_printf(m, "\tRING_HEAD: 0x%08x [0x%08x]\n", + I915_READ(RING_HEAD(engine->mmio_base)) & HEAD_ADDR, + rq ? rq->ring->head : 0); + drm_printf(m, "\tRING_TAIL: 0x%08x [0x%08x]\n", + I915_READ(RING_TAIL(engine->mmio_base)) & TAIL_ADDR, + rq ? rq->ring->tail : 0); + drm_printf(m, "\tRING_CTL: 0x%08x%s\n", + I915_READ(RING_CTL(engine->mmio_base)), + I915_READ(RING_CTL(engine->mmio_base)) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? " [waiting]" : ""); + if (INTEL_GEN(engine->i915) > 2) { + drm_printf(m, "\tRING_MODE: 0x%08x%s\n", + I915_READ(RING_MI_MODE(engine->mmio_base)), + I915_READ(RING_MI_MODE(engine->mmio_base)) & (MODE_IDLE) ? " [idle]" : ""); + } + if (HAS_LEGACY_SEMAPHORES(dev_priv)) { + drm_printf(m, "\tSYNC_0: 0x%08x\n", + I915_READ(RING_SYNC_0(engine->mmio_base))); + drm_printf(m, "\tSYNC_1: 0x%08x\n", + I915_READ(RING_SYNC_1(engine->mmio_base))); + if (HAS_VEBOX(dev_priv)) + drm_printf(m, "\tSYNC_2: 0x%08x\n", + I915_READ(RING_SYNC_2(engine->mmio_base))); + } + + rcu_read_unlock(); + + addr = intel_engine_get_active_head(engine); + drm_printf(m, "\tACTHD: 0x%08x_%08x\n", + upper_32_bits(addr), lower_32_bits(addr)); + addr = intel_engine_get_last_batch_head(engine); + drm_printf(m, "\tBBADDR: 0x%08x_%08x\n", + upper_32_bits(addr), lower_32_bits(addr)); + if (INTEL_GEN(dev_priv) >= 8) + addr = I915_READ64_2x32(RING_DMA_FADD(engine->mmio_base), + RING_DMA_FADD_UDW(engine->mmio_base)); + else if (INTEL_GEN(dev_priv) >= 4) + addr = I915_READ(RING_DMA_FADD(engine->mmio_base)); + else + addr = I915_READ(DMA_FADD_I8XX); + drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n", + upper_32_bits(addr), lower_32_bits(addr)); + if (INTEL_GEN(dev_priv) >= 4) { + drm_printf(m, "\tIPEIR: 0x%08x\n", + I915_READ(RING_IPEIR(engine->mmio_base))); + drm_printf(m, "\tIPEHR: 0x%08x\n", + I915_READ(RING_IPEHR(engine->mmio_base))); + } else { + drm_printf(m, "\tIPEIR: 0x%08x\n", I915_READ(IPEIR)); + drm_printf(m, "\tIPEHR: 0x%08x\n", I915_READ(IPEHR)); + } + + if (HAS_EXECLISTS(dev_priv)) { + const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; + u32 ptr, read, write; + unsigned int idx; + + drm_printf(m, "\tExeclist status: 0x%08x %08x\n", + I915_READ(RING_EXECLIST_STATUS_LO(engine)), + I915_READ(RING_EXECLIST_STATUS_HI(engine))); + + ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine)); + read = GEN8_CSB_READ_PTR(ptr); + write = GEN8_CSB_WRITE_PTR(ptr); + drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s\n", + read, execlists->csb_head, + write, + intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)), + yesno(test_bit(ENGINE_IRQ_EXECLIST, + &engine->irq_posted))); + if (read >= GEN8_CSB_ENTRIES) + read = 0; + if (write >= GEN8_CSB_ENTRIES) + write = 0; + if (read > write) + write += GEN8_CSB_ENTRIES; + while (read < write) { + idx = ++read % GEN8_CSB_ENTRIES; + drm_printf(m, "\tExeclist CSB[%d]: 0x%08x [0x%08x in hwsp], context: %d [%d in hwsp]\n", + idx, + I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)), + hws[idx * 2], + I915_READ(RING_CONTEXT_STATUS_BUF_HI(engine, idx)), + hws[idx * 2 + 1]); + } + + rcu_read_lock(); + for (idx = 0; idx < execlists_num_ports(execlists); idx++) { + unsigned int count; + + rq = port_unpack(&execlists->port[idx], &count); + if (rq) { + snprintf(hdr, sizeof(hdr), + "\t\tELSP[%d] count=%d, rq: ", + idx, count); + print_request(m, rq, hdr); + } else { + drm_printf(m, "\t\tELSP[%d] idle\n", idx); + } + } + drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active); + rcu_read_unlock(); + } else if (INTEL_GEN(dev_priv) > 6) { + drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", + I915_READ(RING_PP_DIR_BASE(engine))); + drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n", + I915_READ(RING_PP_DIR_BASE_READ(engine))); + drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n", + I915_READ(RING_PP_DIR_DCLV(engine))); + } + + spin_lock_irq(&engine->timeline->lock); + list_for_each_entry(rq, &engine->timeline->requests, link) + print_request(m, rq, "\t\tE "); + for (rb = execlists->first; rb; rb = rb_next(rb)) { + struct i915_priolist *p = + rb_entry(rb, typeof(*p), node); + + list_for_each_entry(rq, &p->requests, priotree.link) + print_request(m, rq, "\t\tQ "); + } + spin_unlock_irq(&engine->timeline->lock); + + spin_lock_irq(&b->rb_lock); + for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { + struct intel_wait *w = rb_entry(rb, typeof(*w), node); + + drm_printf(m, "\t%s [%d] waiting for %x\n", + w->tsk->comm, w->tsk->pid, w->seqno); + } + spin_unlock_irq(&b->rb_lock); + + if (INTEL_GEN(dev_priv) >= 6) { + drm_printf(m, "\tRING_IMR: %08x\n", I915_READ_IMR(engine)); + } + + drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s) (execlists? %s)\n", + engine->irq_posted, + yesno(test_bit(ENGINE_IRQ_BREADCRUMB, + &engine->irq_posted)), + yesno(test_bit(ENGINE_IRQ_EXECLIST, + &engine->irq_posted))); + + drm_printf(m, "HWSP:\n"); + hexdump(m, engine->status_page.page_addr, PAGE_SIZE); + + drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine))); +} + +static u8 user_class_map[] = { + [I915_ENGINE_CLASS_RENDER] = RENDER_CLASS, + [I915_ENGINE_CLASS_COPY] = COPY_ENGINE_CLASS, + [I915_ENGINE_CLASS_VIDEO] = VIDEO_DECODE_CLASS, + [I915_ENGINE_CLASS_VIDEO_ENHANCE] = VIDEO_ENHANCEMENT_CLASS, +}; + +struct intel_engine_cs * +intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance) +{ + if (class >= ARRAY_SIZE(user_class_map)) + return NULL; + + class = user_class_map[class]; + + GEM_BUG_ON(class > MAX_ENGINE_CLASS); + + if (instance > MAX_ENGINE_INSTANCE) + return NULL; + + return i915->engine_class[class][instance]; +} + +/** + * intel_enable_engine_stats() - Enable engine busy tracking on engine + * @engine: engine to enable stats collection + * + * Start collecting the engine busyness data for @engine. + * + * Returns 0 on success or a negative error code. + */ +int intel_enable_engine_stats(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists *execlists = &engine->execlists; + unsigned long flags; + int err = 0; + + if (!intel_engine_supports_stats(engine)) + return -ENODEV; + + tasklet_disable(&execlists->tasklet); + spin_lock_irqsave(&engine->stats.lock, flags); + + if (unlikely(engine->stats.enabled == ~0)) { + err = -EBUSY; + goto unlock; + } + + if (engine->stats.enabled++ == 0) { + const struct execlist_port *port = execlists->port; + unsigned int num_ports = execlists_num_ports(execlists); + + engine->stats.enabled_at = ktime_get(); + + /* XXX submission method oblivious? */ + while (num_ports-- && port_isset(port)) { + engine->stats.active++; + port++; + } + + if (engine->stats.active) + engine->stats.start = engine->stats.enabled_at; + } + +unlock: + spin_unlock_irqrestore(&engine->stats.lock, flags); + tasklet_enable(&execlists->tasklet); + + return err; +} + +static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine) +{ + ktime_t total = engine->stats.total; + + /* + * If the engine is executing something at the moment + * add it to the total. + */ + if (engine->stats.active) + total = ktime_add(total, + ktime_sub(ktime_get(), engine->stats.start)); + + return total; +} + +/** + * intel_engine_get_busy_time() - Return current accumulated engine busyness + * @engine: engine to report on + * + * Returns accumulated time @engine was busy since engine stats were enabled. + */ +ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine) +{ + ktime_t total; + unsigned long flags; + + spin_lock_irqsave(&engine->stats.lock, flags); + total = __intel_engine_get_busy_time(engine); + spin_unlock_irqrestore(&engine->stats.lock, flags); + + return total; +} + +/** + * intel_disable_engine_stats() - Disable engine busy tracking on engine + * @engine: engine to disable stats collection + * + * Stops collecting the engine busyness data for @engine. + */ +void intel_disable_engine_stats(struct intel_engine_cs *engine) +{ + unsigned long flags; + + if (!intel_engine_supports_stats(engine)) + return; + + spin_lock_irqsave(&engine->stats.lock, flags); + WARN_ON_ONCE(engine->stats.enabled == 0); + if (--engine->stats.enabled == 0) { + engine->stats.total = __intel_engine_get_busy_time(engine); + engine->stats.active = 0; } + spin_unlock_irqrestore(&engine->stats.lock, flags); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 8c8ead2276e0..f88c1b5dae4c 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -69,9 +69,9 @@ static inline bool no_fbc_on_multiple_pipes(struct drm_i915_private *dev_priv) * address we program because it starts at the real start of the buffer, so we * have to take this into consideration here. */ -static unsigned int get_crtc_fence_y_offset(struct intel_crtc *crtc) +static unsigned int get_crtc_fence_y_offset(struct intel_fbc *fbc) { - return crtc->base.y - crtc->adjusted_y; + return fbc->state_cache.plane.y - fbc->state_cache.plane.adjusted_y; } /* @@ -151,7 +151,7 @@ static void i8xx_fbc_activate(struct drm_i915_private *dev_priv) /* Set it up... */ fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM | FBC_CTL_CPU_FENCE; - fbc_ctl2 |= FBC_CTL_PLANE(params->crtc.plane); + fbc_ctl2 |= FBC_CTL_PLANE(params->crtc.i9xx_plane); I915_WRITE(FBC_CONTROL2, fbc_ctl2); I915_WRITE(FBC_FENCE_OFF, params->crtc.fence_y_offset); } @@ -177,7 +177,7 @@ static void g4x_fbc_activate(struct drm_i915_private *dev_priv) struct intel_fbc_reg_params *params = &dev_priv->fbc.params; u32 dpfc_ctl; - dpfc_ctl = DPFC_CTL_PLANE(params->crtc.plane) | DPFC_SR_EN; + dpfc_ctl = DPFC_CTL_PLANE(params->crtc.i9xx_plane) | DPFC_SR_EN; if (params->fb.format->cpp[0] == 2) dpfc_ctl |= DPFC_CTL_LIMIT_2X; else @@ -224,7 +224,7 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv) u32 dpfc_ctl; int threshold = dev_priv->fbc.threshold; - dpfc_ctl = DPFC_CTL_PLANE(params->crtc.plane); + dpfc_ctl = DPFC_CTL_PLANE(params->crtc.i9xx_plane); if (params->fb.format->cpp[0] == 2) threshold++; @@ -291,9 +291,22 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv) u32 dpfc_ctl; int threshold = dev_priv->fbc.threshold; + /* Display WA #0529: skl, kbl, bxt. */ + if (IS_GEN9(dev_priv) && !IS_GEMINILAKE(dev_priv)) { + u32 val = I915_READ(CHICKEN_MISC_4); + + val &= ~(FBC_STRIDE_OVERRIDE | FBC_STRIDE_MASK); + + if (i915_gem_object_get_tiling(params->vma->obj) != + I915_TILING_X) + val |= FBC_STRIDE_OVERRIDE | params->gen9_wa_cfb_stride; + + I915_WRITE(CHICKEN_MISC_4, val); + } + dpfc_ctl = 0; if (IS_IVYBRIDGE(dev_priv)) - dpfc_ctl |= IVB_DPFC_CTL_PLANE(params->crtc.plane); + dpfc_ctl |= IVB_DPFC_CTL_PLANE(params->crtc.i9xx_plane); if (params->fb.format->cpp[0] == 2) threshold++; @@ -518,7 +531,6 @@ static int find_compression_threshold(struct drm_i915_private *dev_priv, int size, int fb_cpp) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; int compression_threshold = 1; int ret; u64 end; @@ -528,7 +540,7 @@ static int find_compression_threshold(struct drm_i915_private *dev_priv, * If we enable FBC using a CFB on that memory range we'll get FIFO * underruns, even if that range is not reserved by the BIOS. */ if (IS_BROADWELL(dev_priv) || IS_GEN9_BC(dev_priv)) - end = ggtt->stolen_size - 8 * 1024 * 1024; + end = resource_size(&dev_priv->dsm) - 8 * 1024 * 1024; else end = U64_MAX; @@ -602,10 +614,16 @@ static int intel_fbc_alloc_cfb(struct intel_crtc *crtc) fbc->compressed_llb = compressed_llb; + GEM_BUG_ON(range_overflows_t(u64, dev_priv->dsm.start, + fbc->compressed_fb.start, + U32_MAX)); + GEM_BUG_ON(range_overflows_t(u64, dev_priv->dsm.start, + fbc->compressed_llb->start, + U32_MAX)); I915_WRITE(FBC_CFB_BASE, - dev_priv->mm.stolen_base + fbc->compressed_fb.start); + dev_priv->dsm.start + fbc->compressed_fb.start); I915_WRITE(FBC_LL_BASE, - dev_priv->mm.stolen_base + compressed_llb->start); + dev_priv->dsm.start + compressed_llb->start); } DRM_DEBUG_KMS("reserved %llu bytes of contiguous stolen space for FBC, threshold: %d\n", @@ -714,8 +732,8 @@ static bool intel_fbc_hw_tracking_covers_screen(struct intel_crtc *crtc) intel_fbc_get_plane_source_size(&fbc->state_cache, &effective_w, &effective_h); - effective_w += crtc->adjusted_x; - effective_h += crtc->adjusted_y; + effective_w += fbc->state_cache.plane.adjusted_x; + effective_h += fbc->state_cache.plane.adjusted_y; return effective_w <= max_w && effective_h <= max_h; } @@ -744,6 +762,9 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, cache->plane.src_w = drm_rect_width(&plane_state->base.src) >> 16; cache->plane.src_h = drm_rect_height(&plane_state->base.src) >> 16; cache->plane.visible = plane_state->base.visible; + cache->plane.adjusted_x = plane_state->main.x; + cache->plane.adjusted_y = plane_state->main.y; + cache->plane.y = plane_state->base.src.y1 >> 16; if (!cache->plane.visible) return; @@ -846,7 +867,7 @@ static bool intel_fbc_can_enable(struct drm_i915_private *dev_priv) return false; } - if (!i915.enable_fbc) { + if (!i915_modparams.enable_fbc) { fbc->no_fbc_reason = "disabled per module param or by default"; return false; } @@ -874,13 +895,17 @@ static void intel_fbc_get_reg_params(struct intel_crtc *crtc, params->vma = cache->vma; params->crtc.pipe = crtc->pipe; - params->crtc.plane = crtc->plane; - params->crtc.fence_y_offset = get_crtc_fence_y_offset(crtc); + params->crtc.i9xx_plane = to_intel_plane(crtc->base.primary)->i9xx_plane; + params->crtc.fence_y_offset = get_crtc_fence_y_offset(fbc); params->fb.format = cache->fb.format; params->fb.stride = cache->fb.stride; params->cfb_size = intel_fbc_calculate_cfb_size(dev_priv, cache); + + if (IS_GEN9(dev_priv) && !IS_GEMINILAKE(dev_priv)) + params->gen9_wa_cfb_stride = DIV_ROUND_UP(cache->plane.src_w, + 32 * fbc->threshold) * 8; } static bool intel_fbc_reg_params_equal(struct intel_fbc_reg_params *params1, @@ -1034,11 +1059,11 @@ out: * enable FBC for the chosen CRTC. If it does, it will set dev_priv->fbc.crtc. */ void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv, - struct drm_atomic_state *state) + struct intel_atomic_state *state) { struct intel_fbc *fbc = &dev_priv->fbc; - struct drm_plane *plane; - struct drm_plane_state *plane_state; + struct intel_plane *plane; + struct intel_plane_state *plane_state; bool crtc_chosen = false; int i; @@ -1046,7 +1071,7 @@ void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv, /* Does this atomic commit involve the CRTC currently tied to FBC? */ if (fbc->crtc && - !drm_atomic_get_existing_crtc_state(state, &fbc->crtc->base)) + !intel_atomic_get_new_crtc_state(state, fbc->crtc)) goto out; if (!intel_fbc_can_enable(dev_priv)) @@ -1056,25 +1081,22 @@ void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv, * plane. We could go for fancier schemes such as checking the plane * size, but this would just affect the few platforms that don't tie FBC * to pipe or plane A. */ - for_each_new_plane_in_state(state, plane, plane_state, i) { - struct intel_plane_state *intel_plane_state = - to_intel_plane_state(plane_state); - struct intel_crtc_state *intel_crtc_state; - struct intel_crtc *crtc = to_intel_crtc(plane_state->crtc); + for_each_new_intel_plane_in_state(state, plane, plane_state, i) { + struct intel_crtc_state *crtc_state; + struct intel_crtc *crtc = to_intel_crtc(plane_state->base.crtc); - if (!intel_plane_state->base.visible) + if (!plane_state->base.visible) continue; if (fbc_on_pipe_a_only(dev_priv) && crtc->pipe != PIPE_A) continue; - if (fbc_on_plane_a_only(dev_priv) && crtc->plane != PLANE_A) + if (fbc_on_plane_a_only(dev_priv) && plane->i9xx_plane != PLANE_A) continue; - intel_crtc_state = to_intel_crtc_state( - drm_atomic_get_existing_crtc_state(state, &crtc->base)); + crtc_state = intel_atomic_get_new_crtc_state(state, crtc); - intel_crtc_state->enable_fbc = true; + crtc_state->enable_fbc = true; crtc_chosen = true; break; } @@ -1293,8 +1315,8 @@ void intel_fbc_init_pipe_state(struct drm_i915_private *dev_priv) */ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv) { - if (i915.enable_fbc >= 0) - return !!i915.enable_fbc; + if (i915_modparams.enable_fbc >= 0) + return !!i915_modparams.enable_fbc; if (!HAS_FBC(dev_priv)) return 0; @@ -1338,8 +1360,9 @@ void intel_fbc_init(struct drm_i915_private *dev_priv) if (need_fbc_vtd_wa(dev_priv)) mkwrite_device_info(dev_priv)->has_fbc = false; - i915.enable_fbc = intel_sanitize_fbc_option(dev_priv); - DRM_DEBUG_KMS("Sanitized enable_fbc value: %d\n", i915.enable_fbc); + i915_modparams.enable_fbc = intel_sanitize_fbc_option(dev_priv); + DRM_DEBUG_KMS("Sanitized enable_fbc value: %d\n", + i915_modparams.enable_fbc); if (!HAS_FBC(dev_priv)) { fbc->no_fbc_reason = "unsupported by this chipset"; diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 262e75c00dd2..da48af11eb6b 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -115,7 +115,6 @@ static int intelfb_alloc(struct drm_fb_helper *helper, struct drm_framebuffer *fb; struct drm_device *dev = helper->dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; struct drm_mode_fb_cmd2 mode_cmd = {}; struct drm_i915_gem_object *obj; int size, ret; @@ -139,7 +138,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper, * important and we should probably use that space with FBC or other * features. */ obj = NULL; - if (size * 2 < ggtt->stolen_usable_size) + if (size * 2 < dev_priv->stolen_usable_size) obj = i915_gem_object_create_stolen(dev_priv, size); if (obj == NULL) obj = i915_gem_object_create(dev_priv, size); @@ -189,7 +188,7 @@ static int intelfb_create(struct drm_fb_helper *helper, " releasing it\n", intel_fb->base.width, intel_fb->base.height, sizes->fb_width, sizes->fb_height); - drm_framebuffer_unreference(&intel_fb->base); + drm_framebuffer_put(&intel_fb->base); intel_fb = ifbdev->fb = NULL; } if (!intel_fb || WARN_ON(!intel_fb->obj)) { @@ -206,6 +205,7 @@ static int intelfb_create(struct drm_fb_helper *helper, } mutex_lock(&dev->struct_mutex); + intel_runtime_pm_get(dev_priv); /* Pin the GGTT vma for our access via info->screen_base. * This also validates that any existing fb inherited from the @@ -269,6 +269,7 @@ static int intelfb_create(struct drm_fb_helper *helper, fb->width, fb->height, i915_ggtt_offset(vma)); ifbdev->vma = vma; + intel_runtime_pm_put(dev_priv); mutex_unlock(&dev->struct_mutex); vga_switcheroo_client_fb_set(pdev, info); return 0; @@ -276,6 +277,7 @@ static int intelfb_create(struct drm_fb_helper *helper, out_unpin: intel_unpin_fb_vma(vma); out_unlock: + intel_runtime_pm_put(dev_priv); mutex_unlock(&dev->struct_mutex); return ret; } @@ -624,7 +626,7 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, ifbdev->preferred_bpp = fb->base.format->cpp[0] * 8; ifbdev->fb = fb; - drm_framebuffer_reference(&ifbdev->fb->base); + drm_framebuffer_get(&ifbdev->fb->base); /* Final pass to check if any active pipes don't have fbs */ for_each_crtc(dev, crtc) { @@ -694,10 +696,8 @@ static void intel_fbdev_initial_config(void *data, async_cookie_t cookie) /* Due to peculiar init order wrt to hpd handling this is separate. */ if (drm_fb_helper_initial_config(&ifbdev->helper, - ifbdev->preferred_bpp)) { + ifbdev->preferred_bpp)) intel_fbdev_unregister(to_i915(ifbdev->helper.dev)); - intel_fbdev_fini(to_i915(ifbdev->helper.dev)); - } } void intel_fbdev_initial_config_async(struct drm_device *dev) @@ -797,7 +797,11 @@ void intel_fbdev_output_poll_changed(struct drm_device *dev) { struct intel_fbdev *ifbdev = to_i915(dev)->fbdev; - if (ifbdev) + if (!ifbdev) + return; + + intel_fbdev_sync(ifbdev); + if (ifbdev->vma) drm_fb_helper_hotplug_event(&ifbdev->helper); } diff --git a/drivers/gpu/drm/i915/intel_fifo_underrun.c b/drivers/gpu/drm/i915/intel_fifo_underrun.c index 04689600e337..77c123cc8817 100644 --- a/drivers/gpu/drm/i915/intel_fifo_underrun.c +++ b/drivers/gpu/drm/i915/intel_fifo_underrun.c @@ -88,14 +88,15 @@ static void i9xx_check_fifo_underruns(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); i915_reg_t reg = PIPESTAT(crtc->pipe); - u32 pipestat = I915_READ(reg) & 0xffff0000; + u32 enable_mask; lockdep_assert_held(&dev_priv->irq_lock); - if ((pipestat & PIPE_FIFO_UNDERRUN_STATUS) == 0) + if ((I915_READ(reg) & PIPE_FIFO_UNDERRUN_STATUS) == 0) return; - I915_WRITE(reg, pipestat | PIPE_FIFO_UNDERRUN_STATUS); + enable_mask = i915_pipestat_enable_mask(dev_priv, crtc->pipe); + I915_WRITE(reg, enable_mask | PIPE_FIFO_UNDERRUN_STATUS); POSTING_READ(reg); trace_intel_cpu_fifo_underrun(dev_priv, crtc->pipe); @@ -108,15 +109,16 @@ static void i9xx_set_fifo_underrun_reporting(struct drm_device *dev, { struct drm_i915_private *dev_priv = to_i915(dev); i915_reg_t reg = PIPESTAT(pipe); - u32 pipestat = I915_READ(reg) & 0xffff0000; lockdep_assert_held(&dev_priv->irq_lock); if (enable) { - I915_WRITE(reg, pipestat | PIPE_FIFO_UNDERRUN_STATUS); + u32 enable_mask = i915_pipestat_enable_mask(dev_priv, pipe); + + I915_WRITE(reg, enable_mask | PIPE_FIFO_UNDERRUN_STATUS); POSTING_READ(reg); } else { - if (old && pipestat & PIPE_FIFO_UNDERRUN_STATUS) + if (old && I915_READ(reg) & PIPE_FIFO_UNDERRUN_STATUS) DRM_ERROR("pipe %c underrun\n", pipe_name(pipe)); } } diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c new file mode 100644 index 000000000000..3c6bf5a34c3c --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -0,0 +1,491 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "intel_guc.h" +#include "intel_guc_submission.h" +#include "i915_drv.h" + +static void gen8_guc_raise_irq(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + + I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER); +} + +static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i) +{ + GEM_BUG_ON(!guc->send_regs.base); + GEM_BUG_ON(!guc->send_regs.count); + GEM_BUG_ON(i >= guc->send_regs.count); + + return _MMIO(guc->send_regs.base + 4 * i); +} + +void intel_guc_init_send_regs(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + enum forcewake_domains fw_domains = 0; + unsigned int i; + + guc->send_regs.base = i915_mmio_reg_offset(SOFT_SCRATCH(0)); + guc->send_regs.count = SOFT_SCRATCH_COUNT - 1; + + for (i = 0; i < guc->send_regs.count; i++) { + fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, + guc_send_reg(guc, i), + FW_REG_READ | FW_REG_WRITE); + } + guc->send_regs.fw_domains = fw_domains; +} + +void intel_guc_init_early(struct intel_guc *guc) +{ + intel_guc_fw_init_early(guc); + intel_guc_ct_init_early(&guc->ct); + + mutex_init(&guc->send_mutex); + guc->send = intel_guc_send_nop; + guc->notify = gen8_guc_raise_irq; +} + +int intel_guc_init_wq(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + + /* + * GuC log buffer flush work item has to do register access to + * send the ack to GuC and this work item, if not synced before + * suspend, can potentially get executed after the GFX device is + * suspended. + * By marking the WQ as freezable, we don't have to bother about + * flushing of this work item from the suspend hooks, the pending + * work item if any will be either executed before the suspend + * or scheduled later on resume. This way the handling of work + * item can be kept same between system suspend & rpm suspend. + */ + guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log", + WQ_HIGHPRI | WQ_FREEZABLE); + if (!guc->log.runtime.flush_wq) + return -ENOMEM; + + /* + * Even though both sending GuC action, and adding a new workitem to + * GuC workqueue are serialized (each with its own locking), since + * we're using mutliple engines, it's possible that we're going to + * issue a preempt request with two (or more - each for different + * engine) workitems in GuC queue. In this situation, GuC may submit + * all of them, which will make us very confused. + * Our preemption contexts may even already be complete - before we + * even had the chance to sent the preempt action to GuC!. Rather + * than introducing yet another lock, we can just use ordered workqueue + * to make sure we're always sending a single preemption request with a + * single workitem. + */ + if (HAS_LOGICAL_RING_PREEMPTION(dev_priv) && + USES_GUC_SUBMISSION(dev_priv)) { + guc->preempt_wq = alloc_ordered_workqueue("i915-guc_preempt", + WQ_HIGHPRI); + if (!guc->preempt_wq) { + destroy_workqueue(guc->log.runtime.flush_wq); + return -ENOMEM; + } + } + + return 0; +} + +void intel_guc_fini_wq(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + + if (HAS_LOGICAL_RING_PREEMPTION(dev_priv) && + USES_GUC_SUBMISSION(dev_priv)) + destroy_workqueue(guc->preempt_wq); + + destroy_workqueue(guc->log.runtime.flush_wq); +} + +static int guc_shared_data_create(struct intel_guc *guc) +{ + struct i915_vma *vma; + void *vaddr; + + vma = intel_guc_allocate_vma(guc, PAGE_SIZE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + i915_vma_unpin_and_release(&vma); + return PTR_ERR(vaddr); + } + + guc->shared_data = vma; + guc->shared_data_vaddr = vaddr; + + return 0; +} + +static void guc_shared_data_destroy(struct intel_guc *guc) +{ + i915_gem_object_unpin_map(guc->shared_data->obj); + i915_vma_unpin_and_release(&guc->shared_data); +} + +int intel_guc_init(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + int ret; + + ret = guc_shared_data_create(guc); + if (ret) + return ret; + GEM_BUG_ON(!guc->shared_data); + + /* We need to notify the guc whenever we change the GGTT */ + i915_ggtt_enable_guc(dev_priv); + + return 0; +} + +void intel_guc_fini(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + + i915_ggtt_disable_guc(dev_priv); + guc_shared_data_destroy(guc); +} + +static u32 get_gt_type(struct drm_i915_private *dev_priv) +{ + /* XXX: GT type based on PCI device ID? field seems unused by fw */ + return 0; +} + +static u32 get_core_family(struct drm_i915_private *dev_priv) +{ + u32 gen = INTEL_GEN(dev_priv); + + switch (gen) { + case 9: + return GUC_CORE_FAMILY_GEN9; + + default: + MISSING_CASE(gen); + return GUC_CORE_FAMILY_UNKNOWN; + } +} + +/* + * Initialise the GuC parameter block before starting the firmware + * transfer. These parameters are read by the firmware on startup + * and cannot be changed thereafter. + */ +void intel_guc_init_params(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + u32 params[GUC_CTL_MAX_DWORDS]; + int i; + + memset(params, 0, sizeof(params)); + + params[GUC_CTL_DEVICE_INFO] |= + (get_gt_type(dev_priv) << GUC_CTL_GT_TYPE_SHIFT) | + (get_core_family(dev_priv) << GUC_CTL_CORE_FAMILY_SHIFT); + + /* + * GuC ARAT increment is 10 ns. GuC default scheduler quantum is one + * second. This ARAR is calculated by: + * Scheduler-Quantum-in-ns / ARAT-increment-in-ns = 1000000000 / 10 + */ + params[GUC_CTL_ARAT_HIGH] = 0; + params[GUC_CTL_ARAT_LOW] = 100000000; + + params[GUC_CTL_WA] |= GUC_CTL_WA_UK_BY_DRIVER; + + params[GUC_CTL_FEATURE] |= GUC_CTL_DISABLE_SCHEDULER | + GUC_CTL_VCS2_ENABLED; + + params[GUC_CTL_LOG_PARAMS] = guc->log.flags; + + if (i915_modparams.guc_log_level >= 0) { + params[GUC_CTL_DEBUG] = + i915_modparams.guc_log_level << GUC_LOG_VERBOSITY_SHIFT; + } else { + params[GUC_CTL_DEBUG] = GUC_LOG_DISABLED; + } + + /* If GuC submission is enabled, set up additional parameters here */ + if (USES_GUC_SUBMISSION(dev_priv)) { + u32 ads = guc_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT; + u32 pgs = guc_ggtt_offset(dev_priv->guc.stage_desc_pool); + u32 ctx_in_16 = GUC_MAX_STAGE_DESCRIPTORS / 16; + + params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT; + params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED; + + pgs >>= PAGE_SHIFT; + params[GUC_CTL_CTXINFO] = (pgs << GUC_CTL_BASE_ADDR_SHIFT) | + (ctx_in_16 << GUC_CTL_CTXNUM_IN16_SHIFT); + + params[GUC_CTL_FEATURE] |= GUC_CTL_KERNEL_SUBMISSIONS; + + /* Unmask this bit to enable the GuC's internal scheduler */ + params[GUC_CTL_FEATURE] &= ~GUC_CTL_DISABLE_SCHEDULER; + } + + /* + * All SOFT_SCRATCH registers are in FORCEWAKE_BLITTER domain and + * they are power context saved so it's ok to release forcewake + * when we are done here and take it again at xfer time. + */ + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_BLITTER); + + I915_WRITE(SOFT_SCRATCH(0), 0); + + for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) + I915_WRITE(SOFT_SCRATCH(1 + i), params[i]); + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_BLITTER); +} + +int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len) +{ + WARN(1, "Unexpected send: action=%#x\n", *action); + return -ENODEV; +} + +/* + * This function implements the MMIO based host to GuC interface. + */ +int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + u32 status; + int i; + int ret; + + GEM_BUG_ON(!len); + GEM_BUG_ON(len > guc->send_regs.count); + + /* If CT is available, we expect to use MMIO only during init/fini */ + GEM_BUG_ON(HAS_GUC_CT(dev_priv) && + *action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER && + *action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER); + + mutex_lock(&guc->send_mutex); + intel_uncore_forcewake_get(dev_priv, guc->send_regs.fw_domains); + + for (i = 0; i < len; i++) + I915_WRITE(guc_send_reg(guc, i), action[i]); + + POSTING_READ(guc_send_reg(guc, i - 1)); + + intel_guc_notify(guc); + + /* + * No GuC command should ever take longer than 10ms. + * Fast commands should still complete in 10us. + */ + ret = __intel_wait_for_register_fw(dev_priv, + guc_send_reg(guc, 0), + INTEL_GUC_RECV_MASK, + INTEL_GUC_RECV_MASK, + 10, 10, &status); + if (status != INTEL_GUC_STATUS_SUCCESS) { + /* + * Either the GuC explicitly returned an error (which + * we convert to -EIO here) or no response at all was + * received within the timeout limit (-ETIMEDOUT) + */ + if (ret != -ETIMEDOUT) + ret = -EIO; + + DRM_WARN("INTEL_GUC_SEND: Action 0x%X failed;" + " ret=%d status=0x%08X response=0x%08X\n", + action[0], ret, status, I915_READ(SOFT_SCRATCH(15))); + } + + intel_uncore_forcewake_put(dev_priv, guc->send_regs.fw_domains); + mutex_unlock(&guc->send_mutex); + + return ret; +} + +int intel_guc_sample_forcewake(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + u32 action[2]; + + action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE; + /* WaRsDisableCoarsePowerGating:skl,bxt */ + if (!HAS_RC6(dev_priv) || NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) + action[1] = 0; + else + /* bit 0 and 1 are for Render and Media domain separately */ + action[1] = GUC_FORCEWAKE_RENDER | GUC_FORCEWAKE_MEDIA; + + return intel_guc_send(guc, action, ARRAY_SIZE(action)); +} + +/** + * intel_guc_auth_huc() - Send action to GuC to authenticate HuC ucode + * @guc: intel_guc structure + * @rsa_offset: rsa offset w.r.t ggtt base of huc vma + * + * Triggers a HuC firmware authentication request to the GuC via intel_guc_send + * INTEL_GUC_ACTION_AUTHENTICATE_HUC interface. This function is invoked by + * intel_huc_auth(). + * + * Return: non-zero code on error + */ +int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset) +{ + u32 action[] = { + INTEL_GUC_ACTION_AUTHENTICATE_HUC, + rsa_offset + }; + + return intel_guc_send(guc, action, ARRAY_SIZE(action)); +} + +/** + * intel_guc_suspend() - notify GuC entering suspend state + * @dev_priv: i915 device private + */ +int intel_guc_suspend(struct drm_i915_private *dev_priv) +{ + struct intel_guc *guc = &dev_priv->guc; + u32 data[3]; + + if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) + return 0; + + gen9_disable_guc_interrupts(dev_priv); + + data[0] = INTEL_GUC_ACTION_ENTER_S_STATE; + /* any value greater than GUC_POWER_D0 */ + data[1] = GUC_POWER_D1; + data[2] = guc_ggtt_offset(guc->shared_data); + + return intel_guc_send(guc, data, ARRAY_SIZE(data)); +} + +/** + * intel_guc_reset_engine() - ask GuC to reset an engine + * @guc: intel_guc structure + * @engine: engine to be reset + */ +int intel_guc_reset_engine(struct intel_guc *guc, + struct intel_engine_cs *engine) +{ + u32 data[7]; + + GEM_BUG_ON(!guc->execbuf_client); + + data[0] = INTEL_GUC_ACTION_REQUEST_ENGINE_RESET; + data[1] = engine->guc_id; + data[2] = 0; + data[3] = 0; + data[4] = 0; + data[5] = guc->execbuf_client->stage_id; + data[6] = guc_ggtt_offset(guc->shared_data); + + return intel_guc_send(guc, data, ARRAY_SIZE(data)); +} + +/** + * intel_guc_resume() - notify GuC resuming from suspend state + * @dev_priv: i915 device private + */ +int intel_guc_resume(struct drm_i915_private *dev_priv) +{ + struct intel_guc *guc = &dev_priv->guc; + u32 data[3]; + + if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) + return 0; + + if (i915_modparams.guc_log_level >= 0) + gen9_enable_guc_interrupts(dev_priv); + + data[0] = INTEL_GUC_ACTION_EXIT_S_STATE; + data[1] = GUC_POWER_D0; + data[2] = guc_ggtt_offset(guc->shared_data); + + return intel_guc_send(guc, data, ARRAY_SIZE(data)); +} + +/** + * intel_guc_allocate_vma() - Allocate a GGTT VMA for GuC usage + * @guc: the guc + * @size: size of area to allocate (both virtual space and memory) + * + * This is a wrapper to create an object for use with the GuC. In order to + * use it inside the GuC, an object needs to be pinned lifetime, so we allocate + * both some backing storage and a range inside the Global GTT. We must pin + * it in the GGTT somewhere other than than [0, GUC_WOPCM_TOP) because that + * range is reserved inside GuC. + * + * Return: A i915_vma if successful, otherwise an ERR_PTR. + */ +struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int ret; + + obj = i915_gem_object_create(dev_priv, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); + if (IS_ERR(vma)) + goto err; + + ret = i915_vma_pin(vma, 0, PAGE_SIZE, + PIN_GLOBAL | PIN_OFFSET_BIAS | GUC_WOPCM_TOP); + if (ret) { + vma = ERR_PTR(ret); + goto err; + } + + return vma; + +err: + i915_gem_object_put(obj); + return vma; +} + +u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv) +{ + u32 wopcm_size = GUC_WOPCM_TOP; + + /* On BXT, the top of WOPCM is reserved for RC6 context */ + if (IS_GEN9_LP(dev_priv)) + wopcm_size -= BXT_GUC_WOPCM_RC6_RESERVED; + + return wopcm_size; +} diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h new file mode 100644 index 000000000000..52856a97477d --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -0,0 +1,135 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_GUC_H_ +#define _INTEL_GUC_H_ + +#include "intel_uncore.h" +#include "intel_guc_fw.h" +#include "intel_guc_fwif.h" +#include "intel_guc_ct.h" +#include "intel_guc_log.h" +#include "intel_guc_reg.h" +#include "intel_uc_fw.h" +#include "i915_vma.h" + +struct guc_preempt_work { + struct work_struct work; + struct intel_engine_cs *engine; +}; + +/* + * Top level structure of GuC. It handles firmware loading and manages client + * pool and doorbells. intel_guc owns a intel_guc_client to replace the legacy + * ExecList submission. + */ +struct intel_guc { + struct intel_uc_fw fw; + struct intel_guc_log log; + struct intel_guc_ct ct; + + /* Log snapshot if GuC errors during load */ + struct drm_i915_gem_object *load_err_log; + + /* intel_guc_recv interrupt related state */ + bool interrupts_enabled; + + struct i915_vma *ads_vma; + struct i915_vma *stage_desc_pool; + void *stage_desc_pool_vaddr; + struct ida stage_ids; + struct i915_vma *shared_data; + void *shared_data_vaddr; + + struct intel_guc_client *execbuf_client; + struct intel_guc_client *preempt_client; + + struct guc_preempt_work preempt_work[I915_NUM_ENGINES]; + struct workqueue_struct *preempt_wq; + + DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS); + /* Cyclic counter mod pagesize */ + u32 db_cacheline; + + /* GuC's FW specific registers used in MMIO send */ + struct { + u32 base; + unsigned int count; + enum forcewake_domains fw_domains; + } send_regs; + + /* To serialize the intel_guc_send actions */ + struct mutex send_mutex; + + /* GuC's FW specific send function */ + int (*send)(struct intel_guc *guc, const u32 *data, u32 len); + + /* GuC's FW specific notify function */ + void (*notify)(struct intel_guc *guc); +}; + +static +inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) +{ + return guc->send(guc, action, len); +} + +static inline void intel_guc_notify(struct intel_guc *guc) +{ + guc->notify(guc); +} + +/* + * GuC does not allow any gfx GGTT address that falls into range [0, WOPCM_TOP), + * which is reserved for Boot ROM, SRAM and WOPCM. Currently this top address is + * 512K. In order to exclude 0-512K address space from GGTT, all gfx objects + * used by GuC is pinned with PIN_OFFSET_BIAS along with size of WOPCM. + */ +static inline u32 guc_ggtt_offset(struct i915_vma *vma) +{ + u32 offset = i915_ggtt_offset(vma); + + GEM_BUG_ON(offset < GUC_WOPCM_TOP); + GEM_BUG_ON(range_overflows_t(u64, offset, vma->size, GUC_GGTT_TOP)); + + return offset; +} + +void intel_guc_init_early(struct intel_guc *guc); +void intel_guc_init_send_regs(struct intel_guc *guc); +void intel_guc_init_params(struct intel_guc *guc); +int intel_guc_init_wq(struct intel_guc *guc); +void intel_guc_fini_wq(struct intel_guc *guc); +int intel_guc_init(struct intel_guc *guc); +void intel_guc_fini(struct intel_guc *guc); +int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len); +int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len); +int intel_guc_sample_forcewake(struct intel_guc *guc); +int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset); +int intel_guc_suspend(struct drm_i915_private *dev_priv); +int intel_guc_resume(struct drm_i915_private *dev_priv); +struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); +u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/intel_guc_ct.c b/drivers/gpu/drm/i915/intel_guc_ct.c index c4cbec140101..24ad55752396 100644 --- a/drivers/gpu/drm/i915/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/intel_guc_ct.c @@ -198,6 +198,7 @@ static int ctch_open(struct intel_guc *guc, err = ctch_init(guc, ctch); if (unlikely(err)) goto err_out; + GEM_BUG_ON(!ctch->vma); } /* vma should be already allocated and map'ed */ diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c b/drivers/gpu/drm/i915/intel_guc_fw.c new file mode 100644 index 000000000000..3b0932942857 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc_fw.c @@ -0,0 +1,287 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Vinit Azad <vinit.azad@intel.com> + * Ben Widawsky <ben@bwidawsk.net> + * Dave Gordon <david.s.gordon@intel.com> + * Alex Dai <yu.dai@intel.com> + */ + +#include "intel_guc_fw.h" +#include "i915_drv.h" + +#define SKL_FW_MAJOR 9 +#define SKL_FW_MINOR 33 + +#define BXT_FW_MAJOR 9 +#define BXT_FW_MINOR 29 + +#define KBL_FW_MAJOR 9 +#define KBL_FW_MINOR 39 + +#define GUC_FW_PATH(platform, major, minor) \ + "i915/" __stringify(platform) "_guc_ver" __stringify(major) "_" __stringify(minor) ".bin" + +#define I915_SKL_GUC_UCODE GUC_FW_PATH(skl, SKL_FW_MAJOR, SKL_FW_MINOR) +MODULE_FIRMWARE(I915_SKL_GUC_UCODE); + +#define I915_BXT_GUC_UCODE GUC_FW_PATH(bxt, BXT_FW_MAJOR, BXT_FW_MINOR) +MODULE_FIRMWARE(I915_BXT_GUC_UCODE); + +#define I915_KBL_GUC_UCODE GUC_FW_PATH(kbl, KBL_FW_MAJOR, KBL_FW_MINOR) +MODULE_FIRMWARE(I915_KBL_GUC_UCODE); + +static void guc_fw_select(struct intel_uc_fw *guc_fw) +{ + struct intel_guc *guc = container_of(guc_fw, struct intel_guc, fw); + struct drm_i915_private *dev_priv = guc_to_i915(guc); + + GEM_BUG_ON(guc_fw->type != INTEL_UC_FW_TYPE_GUC); + + if (!HAS_GUC(dev_priv)) + return; + + if (i915_modparams.guc_firmware_path) { + guc_fw->path = i915_modparams.guc_firmware_path; + guc_fw->major_ver_wanted = 0; + guc_fw->minor_ver_wanted = 0; + } else if (IS_SKYLAKE(dev_priv)) { + guc_fw->path = I915_SKL_GUC_UCODE; + guc_fw->major_ver_wanted = SKL_FW_MAJOR; + guc_fw->minor_ver_wanted = SKL_FW_MINOR; + } else if (IS_BROXTON(dev_priv)) { + guc_fw->path = I915_BXT_GUC_UCODE; + guc_fw->major_ver_wanted = BXT_FW_MAJOR; + guc_fw->minor_ver_wanted = BXT_FW_MINOR; + } else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { + guc_fw->path = I915_KBL_GUC_UCODE; + guc_fw->major_ver_wanted = KBL_FW_MAJOR; + guc_fw->minor_ver_wanted = KBL_FW_MINOR; + } else { + DRM_WARN("%s: No firmware known for this platform!\n", + intel_uc_fw_type_repr(guc_fw->type)); + } +} + +/** + * intel_guc_fw_init_early() - initializes GuC firmware struct + * @guc: intel_guc struct + * + * On platforms with GuC selects firmware for uploading + */ +void intel_guc_fw_init_early(struct intel_guc *guc) +{ + struct intel_uc_fw *guc_fw = &guc->fw; + + intel_uc_fw_init(guc_fw, INTEL_UC_FW_TYPE_GUC); + guc_fw_select(guc_fw); +} + +static void guc_prepare_xfer(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + + /* Must program this register before loading the ucode with DMA */ + I915_WRITE(GUC_SHIM_CONTROL, GUC_DISABLE_SRAM_INIT_TO_ZEROES | + GUC_ENABLE_READ_CACHE_LOGIC | + GUC_ENABLE_MIA_CACHING | + GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA | + GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA | + GUC_ENABLE_MIA_CLOCK_GATING); + + if (IS_GEN9_LP(dev_priv)) + I915_WRITE(GEN9LP_GT_PM_CONFIG, GT_DOORBELL_ENABLE); + else + I915_WRITE(GEN9_GT_PM_CONFIG, GT_DOORBELL_ENABLE); + + if (IS_GEN9(dev_priv)) { + /* DOP Clock Gating Enable for GuC clocks */ + I915_WRITE(GEN7_MISCCPCTL, (GEN8_DOP_CLOCK_GATE_GUC_ENABLE | + I915_READ(GEN7_MISCCPCTL))); + + /* allows for 5us (in 10ns units) before GT can go to RC6 */ + I915_WRITE(GUC_ARAT_C6DIS, 0x1FF); + } +} + +/* Copy RSA signature from the fw image to HW for verification */ +static int guc_xfer_rsa(struct intel_guc *guc, struct i915_vma *vma) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct intel_uc_fw *guc_fw = &guc->fw; + struct sg_table *sg = vma->pages; + u32 rsa[UOS_RSA_SCRATCH_COUNT]; + int i; + + if (sg_pcopy_to_buffer(sg->sgl, sg->nents, rsa, sizeof(rsa), + guc_fw->rsa_offset) != sizeof(rsa)) + return -EINVAL; + + for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++) + I915_WRITE(UOS_RSA_SCRATCH(i), rsa[i]); + + return 0; +} + +/* + * Transfer the firmware image to RAM for execution by the microcontroller. + * + * Architecturally, the DMA engine is bidirectional, and can potentially even + * transfer between GTT locations. This functionality is left out of the API + * for now as there is no need for it. + */ +static int guc_xfer_ucode(struct intel_guc *guc, struct i915_vma *vma) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct intel_uc_fw *guc_fw = &guc->fw; + unsigned long offset; + u32 status; + int ret; + + /* + * The header plus uCode will be copied to WOPCM via DMA, excluding any + * other components + */ + I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size); + + /* Set the source address for the new blob */ + offset = guc_ggtt_offset(vma) + guc_fw->header_offset; + I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); + I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF); + + /* + * Set the DMA destination. Current uCode expects the code to be + * loaded at 8k; locations below this are used for the stack. + */ + I915_WRITE(DMA_ADDR_1_LOW, 0x2000); + I915_WRITE(DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM); + + /* Finally start the DMA */ + I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(UOS_MOVE | START_DMA)); + + /* Wait for DMA to finish */ + ret = __intel_wait_for_register_fw(dev_priv, DMA_CTRL, START_DMA, 0, + 2, 100, &status); + DRM_DEBUG_DRIVER("GuC DMA status %#x\n", status); + + return ret; +} + +/* + * Read the GuC status register (GUC_STATUS) and store it in the + * specified location; then return a boolean indicating whether + * the value matches either of two values representing completion + * of the GuC boot process. + * + * This is used for polling the GuC status in a wait_for() + * loop below. + */ +static inline bool guc_ready(struct intel_guc *guc, u32 *status) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + u32 val = I915_READ(GUC_STATUS); + u32 uk_val = val & GS_UKERNEL_MASK; + + *status = val; + return (uk_val == GS_UKERNEL_READY) || + ((val & GS_MIA_CORE_STATE) && (uk_val == GS_UKERNEL_LAPIC_DONE)); +} + +static int guc_wait_ucode(struct intel_guc *guc) +{ + u32 status; + int ret; + + /* + * Wait for the GuC to start up. + * NB: Docs recommend not using the interrupt for completion. + * Measurements indicate this should take no more than 20ms, so a + * timeout here indicates that the GuC has failed and is unusable. + * (Higher levels of the driver will attempt to fall back to + * execlist mode if this happens.) + */ + ret = wait_for(guc_ready(guc, &status), 100); + DRM_DEBUG_DRIVER("GuC status %#x\n", status); + + if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) { + DRM_ERROR("GuC firmware signature verification failed\n"); + ret = -ENOEXEC; + } + + return ret; +} + +/* + * Load the GuC firmware blob into the MinuteIA. + */ +static int guc_fw_xfer(struct intel_uc_fw *guc_fw, struct i915_vma *vma) +{ + struct intel_guc *guc = container_of(guc_fw, struct intel_guc, fw); + struct drm_i915_private *dev_priv = guc_to_i915(guc); + int ret; + + GEM_BUG_ON(guc_fw->type != INTEL_UC_FW_TYPE_GUC); + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + guc_prepare_xfer(guc); + + /* + * Note that GuC needs the CSS header plus uKernel code to be copied + * by the DMA engine in one operation, whereas the RSA signature is + * loaded via MMIO. + */ + ret = guc_xfer_rsa(guc, vma); + if (ret) + DRM_WARN("GuC firmware signature xfer error %d\n", ret); + + ret = guc_xfer_ucode(guc, vma); + if (ret) + DRM_WARN("GuC firmware code xfer error %d\n", ret); + + ret = guc_wait_ucode(guc); + if (ret) + DRM_ERROR("GuC firmware xfer error %d\n", ret); + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + + return ret; +} + +/** + * intel_guc_fw_upload() - finish preparing the GuC for activity + * @guc: intel_guc structure + * + * Called during driver loading and also after a GPU reset. + * + * The main action required here it to load the GuC uCode into the device. + * The firmware image should have already been fetched into memory by the + * earlier call to intel_guc_init(), so here we need only check that + * worked, and then transfer the image to the h/w. + * + * Return: non-zero code on error + */ +int intel_guc_fw_upload(struct intel_guc *guc) +{ + return intel_uc_fw_upload(&guc->fw, guc_fw_xfer); +} diff --git a/drivers/gpu/drm/i915/intel_guc_fw.h b/drivers/gpu/drm/i915/intel_guc_fw.h new file mode 100644 index 000000000000..4ec5d3d9e2b0 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc_fw.h @@ -0,0 +1,33 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_GUC_FW_H_ +#define _INTEL_GUC_FW_H_ + +struct intel_guc; + +void intel_guc_fw_init_early(struct intel_guc *guc); +int intel_guc_fw_upload(struct intel_guc *guc); + +#endif diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index 5fa286074811..6a10aa6f04d3 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -56,10 +56,6 @@ #define WQ_LEN_SHIFT 16 #define WQ_NO_WCFLUSH_WAIT (1 << 27) #define WQ_PRESENT_WORKLOAD (1 << 28) -#define WQ_WORKLOAD_SHIFT 29 -#define WQ_WORKLOAD_GENERAL (0 << WQ_WORKLOAD_SHIFT) -#define WQ_WORKLOAD_GPGPU (1 << WQ_WORKLOAD_SHIFT) -#define WQ_WORKLOAD_TOUCH (2 << WQ_WORKLOAD_SHIFT) #define WQ_RING_TAIL_SHIFT 20 #define WQ_RING_TAIL_MAX 0x7FF /* 2^11 QWords */ @@ -86,8 +82,8 @@ #define GUC_CTL_ARAT_LOW 2 #define GUC_CTL_DEVICE_INFO 3 -#define GUC_CTL_GTTYPE_SHIFT 0 -#define GUC_CTL_COREFAMILY_SHIFT 7 +#define GUC_CTL_GT_TYPE_SHIFT 0 +#define GUC_CTL_CORE_FAMILY_SHIFT 7 #define GUC_CTL_LOG_PARAMS 4 #define GUC_LOG_VALID (1 << 0) @@ -182,49 +178,49 @@ */ struct uc_css_header { - uint32_t module_type; + u32 module_type; /* header_size includes all non-uCode bits, including css_header, rsa * key, modulus key and exponent data. */ - uint32_t header_size_dw; - uint32_t header_version; - uint32_t module_id; - uint32_t module_vendor; + u32 header_size_dw; + u32 header_version; + u32 module_id; + u32 module_vendor; union { struct { - uint8_t day; - uint8_t month; - uint16_t year; + u8 day; + u8 month; + u16 year; }; - uint32_t date; + u32 date; }; - uint32_t size_dw; /* uCode plus header_size_dw */ - uint32_t key_size_dw; - uint32_t modulus_size_dw; - uint32_t exponent_size_dw; + u32 size_dw; /* uCode plus header_size_dw */ + u32 key_size_dw; + u32 modulus_size_dw; + u32 exponent_size_dw; union { struct { - uint8_t hour; - uint8_t min; - uint16_t sec; + u8 hour; + u8 min; + u16 sec; }; - uint32_t time; + u32 time; }; char username[8]; char buildnumber[12]; union { struct { - uint32_t branch_client_version; - uint32_t sw_version; + u32 branch_client_version; + u32 sw_version; } guc; struct { - uint32_t sw_version; - uint32_t reserved; + u32 sw_version; + u32 reserved; } huc; }; - uint32_t prod_preprod_fw; - uint32_t reserved[12]; - uint32_t header_info; + u32 prod_preprod_fw; + u32 reserved[12]; + u32 header_info; } __packed; struct guc_doorbell_info { @@ -388,7 +384,11 @@ struct guc_ct_buffer_desc { /* Preempt to idle on quantum expiry */ #define POLICY_PREEMPT_TO_IDLE (1<<1) -#define POLICY_MAX_NUM_WI 15 +#define POLICY_MAX_NUM_WI 15 +#define POLICY_DEFAULT_DPC_PROMOTE_TIME_US 500000 +#define POLICY_DEFAULT_EXECUTION_QUANTUM_US 1000000 +#define POLICY_DEFAULT_PREEMPTION_TIME_US 500000 +#define POLICY_DEFAULT_FAULT_TIME_US 250000 struct guc_policy { /* Time for one workload to execute. (in micro seconds) */ @@ -544,9 +544,37 @@ union guc_log_control { u32 value; } __packed; +struct guc_ctx_report { + u32 report_return_status; + u32 reserved1[64]; + u32 affected_count; + u32 reserved2[2]; +} __packed; + +/* GuC Shared Context Data Struct */ +struct guc_shared_ctx_data { + u32 addr_of_last_preempted_data_low; + u32 addr_of_last_preempted_data_high; + u32 addr_of_last_preempted_data_high_tmp; + u32 padding; + u32 is_mapped_to_proxy; + u32 proxy_ctx_id; + u32 engine_reset_ctx_id; + u32 media_reset_count; + u32 reserved1[8]; + u32 uk_last_ctx_switch_reason; + u32 was_reset; + u32 lrca_gpu_addr; + u64 execlist_ctx; + u32 reserved2[66]; + struct guc_ctx_report preempt_ctx_report[GUC_MAX_ENGINES_NUM]; +} __packed; + /* This Action will be programmed in C180 - SOFT_SCRATCH_O_REG */ enum intel_guc_action { INTEL_GUC_ACTION_DEFAULT = 0x0, + INTEL_GUC_ACTION_REQUEST_PREEMPTION = 0x2, + INTEL_GUC_ACTION_REQUEST_ENGINE_RESET = 0x3, INTEL_GUC_ACTION_SAMPLE_FORCEWAKE = 0x6, INTEL_GUC_ACTION_ALLOCATE_DOORBELL = 0x10, INTEL_GUC_ACTION_DEALLOCATE_DOORBELL = 0x20, @@ -562,6 +590,18 @@ enum intel_guc_action { INTEL_GUC_ACTION_LIMIT }; +enum intel_guc_preempt_options { + INTEL_GUC_PREEMPT_OPTION_DROP_WORK_Q = 0x4, + INTEL_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8, +}; + +enum intel_guc_report_status { + INTEL_GUC_REPORT_STATUS_UNKNOWN = 0x0, + INTEL_GUC_REPORT_STATUS_ACKED = 0x1, + INTEL_GUC_REPORT_STATUS_ERROR = 0x2, + INTEL_GUC_REPORT_STATUS_COMPLETE = 0x4, +}; + /* * The GuC sends its response to a command by overwriting the * command in SS0. The response is distinguishable from a command diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c deleted file mode 100644 index 8b0ae7fce7f2..000000000000 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ /dev/null @@ -1,419 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Vinit Azad <vinit.azad@intel.com> - * Ben Widawsky <ben@bwidawsk.net> - * Dave Gordon <david.s.gordon@intel.com> - * Alex Dai <yu.dai@intel.com> - */ -#include "i915_drv.h" -#include "intel_uc.h" - -/** - * DOC: GuC-specific firmware loader - * - * intel_guc: - * Top level structure of guc. It handles firmware loading and manages client - * pool and doorbells. intel_guc owns a i915_guc_client to replace the legacy - * ExecList submission. - * - * Firmware versioning: - * The firmware build process will generate a version header file with major and - * minor version defined. The versions are built into CSS header of firmware. - * i915 kernel driver set the minimal firmware version required per platform. - * The firmware installation package will install (symbolic link) proper version - * of firmware. - * - * GuC address space: - * GuC does not allow any gfx GGTT address that falls into range [0, WOPCM_TOP), - * which is reserved for Boot ROM, SRAM and WOPCM. Currently this top address is - * 512K. In order to exclude 0-512K address space from GGTT, all gfx objects - * used by GuC is pinned with PIN_OFFSET_BIAS along with size of WOPCM. - * - */ - -#define SKL_FW_MAJOR 6 -#define SKL_FW_MINOR 1 - -#define BXT_FW_MAJOR 8 -#define BXT_FW_MINOR 7 - -#define KBL_FW_MAJOR 9 -#define KBL_FW_MINOR 14 - -#define GLK_FW_MAJOR 10 -#define GLK_FW_MINOR 56 - -#define GUC_FW_PATH(platform, major, minor) \ - "i915/" __stringify(platform) "_guc_ver" __stringify(major) "_" __stringify(minor) ".bin" - -#define I915_SKL_GUC_UCODE GUC_FW_PATH(skl, SKL_FW_MAJOR, SKL_FW_MINOR) -MODULE_FIRMWARE(I915_SKL_GUC_UCODE); - -#define I915_BXT_GUC_UCODE GUC_FW_PATH(bxt, BXT_FW_MAJOR, BXT_FW_MINOR) -MODULE_FIRMWARE(I915_BXT_GUC_UCODE); - -#define I915_KBL_GUC_UCODE GUC_FW_PATH(kbl, KBL_FW_MAJOR, KBL_FW_MINOR) -MODULE_FIRMWARE(I915_KBL_GUC_UCODE); - -#define I915_GLK_GUC_UCODE GUC_FW_PATH(glk, GLK_FW_MAJOR, GLK_FW_MINOR) - - -static u32 get_gttype(struct drm_i915_private *dev_priv) -{ - /* XXX: GT type based on PCI device ID? field seems unused by fw */ - return 0; -} - -static u32 get_core_family(struct drm_i915_private *dev_priv) -{ - u32 gen = INTEL_GEN(dev_priv); - - switch (gen) { - case 9: - return GUC_CORE_FAMILY_GEN9; - - default: - MISSING_CASE(gen); - return GUC_CORE_FAMILY_UNKNOWN; - } -} - -/* - * Initialise the GuC parameter block before starting the firmware - * transfer. These parameters are read by the firmware on startup - * and cannot be changed thereafter. - */ -static void guc_params_init(struct drm_i915_private *dev_priv) -{ - struct intel_guc *guc = &dev_priv->guc; - u32 params[GUC_CTL_MAX_DWORDS]; - int i; - - memset(¶ms, 0, sizeof(params)); - - params[GUC_CTL_DEVICE_INFO] |= - (get_gttype(dev_priv) << GUC_CTL_GTTYPE_SHIFT) | - (get_core_family(dev_priv) << GUC_CTL_COREFAMILY_SHIFT); - - /* - * GuC ARAT increment is 10 ns. GuC default scheduler quantum is one - * second. This ARAR is calculated by: - * Scheduler-Quantum-in-ns / ARAT-increment-in-ns = 1000000000 / 10 - */ - params[GUC_CTL_ARAT_HIGH] = 0; - params[GUC_CTL_ARAT_LOW] = 100000000; - - params[GUC_CTL_WA] |= GUC_CTL_WA_UK_BY_DRIVER; - - params[GUC_CTL_FEATURE] |= GUC_CTL_DISABLE_SCHEDULER | - GUC_CTL_VCS2_ENABLED; - - params[GUC_CTL_LOG_PARAMS] = guc->log.flags; - - if (i915.guc_log_level >= 0) { - params[GUC_CTL_DEBUG] = - i915.guc_log_level << GUC_LOG_VERBOSITY_SHIFT; - } else - params[GUC_CTL_DEBUG] = GUC_LOG_DISABLED; - - /* If GuC submission is enabled, set up additional parameters here */ - if (i915.enable_guc_submission) { - u32 ads = guc_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT; - u32 pgs = guc_ggtt_offset(dev_priv->guc.stage_desc_pool); - u32 ctx_in_16 = GUC_MAX_STAGE_DESCRIPTORS / 16; - - params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT; - params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED; - - pgs >>= PAGE_SHIFT; - params[GUC_CTL_CTXINFO] = (pgs << GUC_CTL_BASE_ADDR_SHIFT) | - (ctx_in_16 << GUC_CTL_CTXNUM_IN16_SHIFT); - - params[GUC_CTL_FEATURE] |= GUC_CTL_KERNEL_SUBMISSIONS; - - /* Unmask this bit to enable the GuC's internal scheduler */ - params[GUC_CTL_FEATURE] &= ~GUC_CTL_DISABLE_SCHEDULER; - } - - I915_WRITE(SOFT_SCRATCH(0), 0); - - for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) - I915_WRITE(SOFT_SCRATCH(1 + i), params[i]); -} - -/* - * Read the GuC status register (GUC_STATUS) and store it in the - * specified location; then return a boolean indicating whether - * the value matches either of two values representing completion - * of the GuC boot process. - * - * This is used for polling the GuC status in a wait_for() - * loop below. - */ -static inline bool guc_ucode_response(struct drm_i915_private *dev_priv, - u32 *status) -{ - u32 val = I915_READ(GUC_STATUS); - u32 uk_val = val & GS_UKERNEL_MASK; - *status = val; - return (uk_val == GS_UKERNEL_READY || - ((val & GS_MIA_CORE_STATE) && uk_val == GS_UKERNEL_LAPIC_DONE)); -} - -/* - * Transfer the firmware image to RAM for execution by the microcontroller. - * - * Architecturally, the DMA engine is bidirectional, and can potentially even - * transfer between GTT locations. This functionality is left out of the API - * for now as there is no need for it. - * - * Note that GuC needs the CSS header plus uKernel code to be copied by the - * DMA engine in one operation, whereas the RSA signature is loaded via MMIO. - */ -static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv, - struct i915_vma *vma) -{ - struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; - unsigned long offset; - struct sg_table *sg = vma->pages; - u32 status, rsa[UOS_RSA_SCRATCH_MAX_COUNT]; - int i, ret = 0; - - /* where RSA signature starts */ - offset = guc_fw->rsa_offset; - - /* Copy RSA signature from the fw image to HW for verification */ - sg_pcopy_to_buffer(sg->sgl, sg->nents, rsa, sizeof(rsa), offset); - for (i = 0; i < UOS_RSA_SCRATCH_MAX_COUNT; i++) - I915_WRITE(UOS_RSA_SCRATCH(i), rsa[i]); - - /* The header plus uCode will be copied to WOPCM via DMA, excluding any - * other components */ - I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size); - - /* Set the source address for the new blob */ - offset = guc_ggtt_offset(vma) + guc_fw->header_offset; - I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); - I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF); - - /* - * Set the DMA destination. Current uCode expects the code to be - * loaded at 8k; locations below this are used for the stack. - */ - I915_WRITE(DMA_ADDR_1_LOW, 0x2000); - I915_WRITE(DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM); - - /* Finally start the DMA */ - I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(UOS_MOVE | START_DMA)); - - /* - * Wait for the DMA to complete & the GuC to start up. - * NB: Docs recommend not using the interrupt for completion. - * Measurements indicate this should take no more than 20ms, so a - * timeout here indicates that the GuC has failed and is unusable. - * (Higher levels of the driver will attempt to fall back to - * execlist mode if this happens.) - */ - ret = wait_for(guc_ucode_response(dev_priv, &status), 100); - - DRM_DEBUG_DRIVER("DMA status 0x%x, GuC status 0x%x\n", - I915_READ(DMA_CTRL), status); - - if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) { - DRM_ERROR("GuC firmware signature verification failed\n"); - ret = -ENOEXEC; - } - - DRM_DEBUG_DRIVER("returning %d\n", ret); - - return ret; -} - -u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv) -{ - u32 wopcm_size = GUC_WOPCM_TOP; - - /* On BXT, the top of WOPCM is reserved for RC6 context */ - if (IS_GEN9_LP(dev_priv)) - wopcm_size -= BXT_GUC_WOPCM_RC6_RESERVED; - - return wopcm_size; -} - -/* - * Load the GuC firmware blob into the MinuteIA. - */ -static int guc_ucode_xfer(struct drm_i915_private *dev_priv) -{ - struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; - struct i915_vma *vma; - int ret; - - ret = i915_gem_object_set_to_gtt_domain(guc_fw->obj, false); - if (ret) { - DRM_DEBUG_DRIVER("set-domain failed %d\n", ret); - return ret; - } - - vma = i915_gem_object_ggtt_pin(guc_fw->obj, NULL, 0, 0, - PIN_OFFSET_BIAS | GUC_WOPCM_TOP); - if (IS_ERR(vma)) { - DRM_DEBUG_DRIVER("pin failed %d\n", (int)PTR_ERR(vma)); - return PTR_ERR(vma); - } - - intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - - /* Enable MIA caching. GuC clock gating is disabled. */ - I915_WRITE(GUC_SHIM_CONTROL, GUC_SHIM_CONTROL_VALUE); - - /* WaDisableMinuteIaClockGating:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - I915_WRITE(GUC_SHIM_CONTROL, (I915_READ(GUC_SHIM_CONTROL) & - ~GUC_ENABLE_MIA_CLOCK_GATING)); - } - - /* WaC6DisallowByGfxPause:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) - I915_WRITE(GEN6_GFXPAUSE, 0x30FFF); - - if (IS_GEN9_LP(dev_priv)) - I915_WRITE(GEN9LP_GT_PM_CONFIG, GT_DOORBELL_ENABLE); - else - I915_WRITE(GEN9_GT_PM_CONFIG, GT_DOORBELL_ENABLE); - - if (IS_GEN9(dev_priv)) { - /* DOP Clock Gating Enable for GuC clocks */ - I915_WRITE(GEN7_MISCCPCTL, (GEN8_DOP_CLOCK_GATE_GUC_ENABLE | - I915_READ(GEN7_MISCCPCTL))); - - /* allows for 5us (in 10ns units) before GT can go to RC6 */ - I915_WRITE(GUC_ARAT_C6DIS, 0x1FF); - } - - guc_params_init(dev_priv); - - ret = guc_ucode_xfer_dma(dev_priv, vma); - - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - - /* - * We keep the object pages for reuse during resume. But we can unpin it - * now that DMA has completed, so it doesn't continue to take up space. - */ - i915_vma_unpin(vma); - - return ret; -} - -/** - * intel_guc_init_hw() - finish preparing the GuC for activity - * @guc: intel_guc structure - * - * Called during driver loading and also after a GPU reset. - * - * The main action required here it to load the GuC uCode into the device. - * The firmware image should have already been fetched into memory by the - * earlier call to intel_guc_init(), so here we need only check that - * worked, and then transfer the image to the h/w. - * - * Return: non-zero code on error - */ -int intel_guc_init_hw(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - const char *fw_path = guc->fw.path; - int ret; - - DRM_DEBUG_DRIVER("GuC fw status: path %s, fetch %s, load %s\n", - fw_path, - intel_uc_fw_status_repr(guc->fw.fetch_status), - intel_uc_fw_status_repr(guc->fw.load_status)); - - if (guc->fw.fetch_status != INTEL_UC_FIRMWARE_SUCCESS) - return -EIO; - - guc->fw.load_status = INTEL_UC_FIRMWARE_PENDING; - - DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n", - intel_uc_fw_status_repr(guc->fw.fetch_status), - intel_uc_fw_status_repr(guc->fw.load_status)); - - ret = guc_ucode_xfer(dev_priv); - - if (ret) - return -EAGAIN; - - guc->fw.load_status = INTEL_UC_FIRMWARE_SUCCESS; - - DRM_INFO("GuC %s (firmware %s [version %u.%u])\n", - i915.enable_guc_submission ? "submission enabled" : "loaded", - guc->fw.path, - guc->fw.major_ver_found, guc->fw.minor_ver_found); - - return 0; -} - -/** - * intel_guc_select_fw() - selects GuC firmware for loading - * @guc: intel_guc struct - * - * Return: zero when we know firmware, non-zero in other case - */ -int intel_guc_select_fw(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - - guc->fw.path = NULL; - guc->fw.fetch_status = INTEL_UC_FIRMWARE_NONE; - guc->fw.load_status = INTEL_UC_FIRMWARE_NONE; - guc->fw.type = INTEL_UC_FW_TYPE_GUC; - - if (i915.guc_firmware_path) { - guc->fw.path = i915.guc_firmware_path; - guc->fw.major_ver_wanted = 0; - guc->fw.minor_ver_wanted = 0; - } else if (IS_SKYLAKE(dev_priv)) { - guc->fw.path = I915_SKL_GUC_UCODE; - guc->fw.major_ver_wanted = SKL_FW_MAJOR; - guc->fw.minor_ver_wanted = SKL_FW_MINOR; - } else if (IS_BROXTON(dev_priv)) { - guc->fw.path = I915_BXT_GUC_UCODE; - guc->fw.major_ver_wanted = BXT_FW_MAJOR; - guc->fw.minor_ver_wanted = BXT_FW_MINOR; - } else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { - guc->fw.path = I915_KBL_GUC_UCODE; - guc->fw.major_ver_wanted = KBL_FW_MAJOR; - guc->fw.minor_ver_wanted = KBL_FW_MINOR; - } else if (IS_GEMINILAKE(dev_priv)) { - guc->fw.path = I915_GLK_GUC_UCODE; - guc->fw.major_ver_wanted = GLK_FW_MAJOR; - guc->fw.minor_ver_wanted = GLK_FW_MINOR; - } else { - DRM_ERROR("No GuC firmware known for platform with GuC!\n"); - return -ENOENT; - } - - return 0; -} diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c index 16d3b8719cab..eaedd63e3819 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.c +++ b/drivers/gpu/drm/i915/intel_guc_log.c @@ -21,8 +21,11 @@ * IN THE SOFTWARE. * */ + #include <linux/debugfs.h> #include <linux/relay.h> + +#include "intel_guc_log.h" #include "i915_drv.h" static void guc_log_capture_logs(struct intel_guc *guc); @@ -144,7 +147,7 @@ static int guc_log_relay_file_create(struct intel_guc *guc) struct dentry *log_dir; int ret; - if (i915.guc_log_level < 0) + if (i915_modparams.guc_log_level < 0) return 0; /* For now create the log file in /sys/kernel/debug/dri/0 dir */ @@ -408,30 +411,8 @@ static int guc_log_runtime_create(struct intel_guc *guc) guc->log.runtime.relay_chan = guc_log_relay_chan; INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work); - - /* - * GuC log buffer flush work item has to do register access to - * send the ack to GuC and this work item, if not synced before - * suspend, can potentially get executed after the GFX device is - * suspended. - * By marking the WQ as freezable, we don't have to bother about - * flushing of this work item from the suspend hooks, the pending - * work item if any will be either executed before the suspend - * or scheduled later on resume. This way the handling of work - * item can be kept same between system suspend & rpm suspend. - */ - guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log", - WQ_HIGHPRI | WQ_FREEZABLE); - if (!guc->log.runtime.flush_wq) { - DRM_ERROR("Couldn't allocate the wq for GuC logging\n"); - ret = -ENOMEM; - goto err_relaychan; - } - return 0; -err_relaychan: - relay_close(guc->log.runtime.relay_chan); err_vaddr: i915_gem_object_unpin_map(guc->log.vma->obj); guc->log.runtime.buf_addr = NULL; @@ -447,7 +428,6 @@ static void guc_log_runtime_destroy(struct intel_guc *guc) if (!guc_log_has_runtime(guc)) return; - destroy_workqueue(guc->log.runtime.flush_wq); relay_close(guc->log.runtime.relay_chan); i915_gem_object_unpin_map(guc->log.vma->obj); guc->log.runtime.buf_addr = NULL; @@ -480,7 +460,7 @@ err_runtime: guc_log_runtime_destroy(guc); err: /* logging will remain off */ - i915.guc_log_level = -1; + i915_modparams.guc_log_level = -1; return ret; } @@ -502,7 +482,8 @@ static void guc_flush_logs(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); - if (!i915.enable_guc_submission || (i915.guc_log_level < 0)) + if (!USES_GUC_SUBMISSION(dev_priv) || + (i915_modparams.guc_log_level < 0)) return; /* First disable the interrupts, will be renabled afterwards */ @@ -524,13 +505,14 @@ int intel_guc_log_create(struct intel_guc *guc) { struct i915_vma *vma; unsigned long offset; - uint32_t size, flags; + u32 flags; + u32 size; int ret; GEM_BUG_ON(guc->log.vma); - if (i915.guc_log_level > GUC_LOG_VERBOSITY_MAX) - i915.guc_log_level = GUC_LOG_VERBOSITY_MAX; + if (i915_modparams.guc_log_level > GUC_LOG_VERBOSITY_MAX) + i915_modparams.guc_log_level = GUC_LOG_VERBOSITY_MAX; /* The first page is to save log buffer state. Allocate one * extra page for others in case for overlap */ @@ -555,7 +537,7 @@ int intel_guc_log_create(struct intel_guc *guc) guc->log.vma = vma; - if (i915.guc_log_level >= 0) { + if (i915_modparams.guc_log_level >= 0) { ret = guc_log_runtime_create(guc); if (ret < 0) goto err_vma; @@ -576,7 +558,7 @@ err_vma: i915_vma_unpin_and_release(&guc->log.vma); err: /* logging will be off */ - i915.guc_log_level = -1; + i915_modparams.guc_log_level = -1; return ret; } @@ -600,7 +582,7 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val) return -EINVAL; /* This combination doesn't make sense & won't have any effect */ - if (!log_param.logging_enabled && (i915.guc_log_level < 0)) + if (!log_param.logging_enabled && (i915_modparams.guc_log_level < 0)) return 0; ret = guc_log_control(guc, log_param.value); @@ -610,7 +592,7 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val) } if (log_param.logging_enabled) { - i915.guc_log_level = log_param.verbosity; + i915_modparams.guc_log_level = log_param.verbosity; /* If log_level was set as -1 at boot time, then the relay channel file * wouldn't have been created by now and interrupts also would not have @@ -633,7 +615,7 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val) guc_flush_logs(guc); /* As logging is disabled, update log level to reflect that */ - i915.guc_log_level = -1; + i915_modparams.guc_log_level = -1; } return ret; @@ -641,7 +623,8 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val) void i915_guc_log_register(struct drm_i915_private *dev_priv) { - if (!i915.enable_guc_submission || i915.guc_log_level < 0) + if (!USES_GUC_SUBMISSION(dev_priv) || + (i915_modparams.guc_log_level < 0)) return; mutex_lock(&dev_priv->drm.struct_mutex); @@ -651,7 +634,7 @@ void i915_guc_log_register(struct drm_i915_private *dev_priv) void i915_guc_log_unregister(struct drm_i915_private *dev_priv) { - if (!i915.enable_guc_submission) + if (!USES_GUC_SUBMISSION(dev_priv)) return; mutex_lock(&dev_priv->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/intel_guc_log.h b/drivers/gpu/drm/i915/intel_guc_log.h new file mode 100644 index 000000000000..f512cf79339b --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc_log.h @@ -0,0 +1,59 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_GUC_LOG_H_ +#define _INTEL_GUC_LOG_H_ + +#include <linux/workqueue.h> + +#include "intel_guc_fwif.h" + +struct drm_i915_private; +struct intel_guc; + +struct intel_guc_log { + u32 flags; + struct i915_vma *vma; + /* The runtime stuff gets created only when GuC logging gets enabled */ + struct { + void *buf_addr; + struct workqueue_struct *flush_wq; + struct work_struct flush_work; + struct rchan *relay_chan; + } runtime; + /* logging related stats */ + u32 capture_miss_count; + u32 flush_interrupt_count; + u32 prev_overflow_count[GUC_MAX_LOG_BUFFER]; + u32 total_overflow_count[GUC_MAX_LOG_BUFFER]; + u32 flush_count[GUC_MAX_LOG_BUFFER]; +}; + +int intel_guc_log_create(struct intel_guc *guc); +void intel_guc_log_destroy(struct intel_guc *guc); +int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val); +void i915_guc_log_register(struct drm_i915_private *dev_priv); +void i915_guc_log_unregister(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_guc_reg.h b/drivers/gpu/drm/i915/intel_guc_reg.h index 35cf9918d09a..19a9247c5664 100644 --- a/drivers/gpu/drm/i915/i915_guc_reg.h +++ b/drivers/gpu/drm/i915/intel_guc_reg.h @@ -21,8 +21,8 @@ * IN THE SOFTWARE. * */ -#ifndef _I915_GUC_REG_H_ -#define _I915_GUC_REG_H_ +#ifndef _INTEL_GUC_REG_H_ +#define _INTEL_GUC_REG_H_ /* Definitions of GuC H/W registers, bits, etc */ @@ -52,7 +52,8 @@ #define SOFT_SCRATCH_COUNT 16 #define UOS_RSA_SCRATCH(i) _MMIO(0xc200 + (i) * 4) -#define UOS_RSA_SCRATCH_MAX_COUNT 64 +#define UOS_RSA_SCRATCH_COUNT 64 + #define DMA_ADDR_0_LOW _MMIO(0xc300) #define DMA_ADDR_0_HIGH _MMIO(0xc304) #define DMA_ADDR_1_LOW _MMIO(0xc308) @@ -102,13 +103,6 @@ #define GUC_ENABLE_MIA_CLOCK_GATING (1<<15) #define GUC_GEN10_SHIM_WC_ENABLE (1<<21) -#define GUC_SHIM_CONTROL_VALUE (GUC_DISABLE_SRAM_INIT_TO_ZEROES | \ - GUC_ENABLE_READ_CACHE_LOGIC | \ - GUC_ENABLE_MIA_CACHING | \ - GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA | \ - GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA | \ - GUC_ENABLE_MIA_CLOCK_GATING) - #define GUC_SEND_INTERRUPT _MMIO(0xc4c8) #define GUC_SEND_TRIGGER (1<<0) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 48a1e9349a2c..4d2409466a3a 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -21,26 +21,28 @@ * IN THE SOFTWARE. * */ -#include <linux/circ_buf.h> -#include "i915_drv.h" -#include "intel_uc.h" +#include <linux/circ_buf.h> #include <trace/events/dma_fence.h> +#include "intel_guc_submission.h" +#include "i915_drv.h" + /** * DOC: GuC-based command submission * * GuC client: - * A i915_guc_client refers to a submission path through GuC. Currently, there - * is only one of these (the execbuf_client) and this one is charged with all - * submissions to the GuC. This struct is the owner of a doorbell, a process - * descriptor and a workqueue (all of them inside a single gem object that - * contains all required pages for these elements). + * A intel_guc_client refers to a submission path through GuC. Currently, there + * are two clients. One of them (the execbuf_client) is charged with all + * submissions to the GuC, the other one (preempt_client) is responsible for + * preempting the execbuf_client. This struct is the owner of a doorbell, a + * process descriptor and a workqueue (all of them inside a single gem object + * that contains all required pages for these elements). * * GuC stage descriptor: * During initialization, the driver allocates a static pool of 1024 such * descriptors, and shares them with the GuC. - * Currently, there exists a 1:1 mapping between a i915_guc_client and a + * Currently, there exists a 1:1 mapping between a intel_guc_client and a * guc_stage_desc (via the client's stage_id), so effectively only one * gets used. This stage descriptor lets the GuC know about the doorbell, * workqueue and process descriptor. Theoretically, it also lets the GuC @@ -69,7 +71,7 @@ * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which * represents in-order queue. The kernel driver packs ring tail pointer and an * ELSP context descriptor dword into Work Item. - * See guc_wq_item_append() + * See guc_add_request() * * ADS: * The Additional Data Struct (ADS) has pointers for different buffers used by @@ -80,12 +82,13 @@ * */ -static inline bool is_high_priority(struct i915_guc_client* client) +static inline bool is_high_priority(struct intel_guc_client *client) { - return client->priority <= GUC_CLIENT_PRIORITY_HIGH; + return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH || + client->priority == GUC_CLIENT_PRIORITY_HIGH); } -static int __reserve_doorbell(struct i915_guc_client *client) +static int reserve_doorbell(struct intel_guc_client *client) { unsigned long offset; unsigned long end; @@ -99,7 +102,7 @@ static int __reserve_doorbell(struct i915_guc_client *client) * priority contexts, the second half for high-priority ones. */ offset = 0; - end = GUC_NUM_DOORBELLS/2; + end = GUC_NUM_DOORBELLS / 2; if (is_high_priority(client)) { offset = end; end += offset; @@ -117,7 +120,7 @@ static int __reserve_doorbell(struct i915_guc_client *client) return 0; } -static void __unreserve_doorbell(struct i915_guc_client *client) +static void unreserve_doorbell(struct intel_guc_client *client) { GEM_BUG_ON(client->doorbell_id == GUC_DOORBELL_INVALID); @@ -149,7 +152,7 @@ static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 stage_id) return intel_guc_send(guc, action, ARRAY_SIZE(action)); } -static struct guc_stage_desc *__get_stage_desc(struct i915_guc_client *client) +static struct guc_stage_desc *__get_stage_desc(struct intel_guc_client *client) { struct guc_stage_desc *base = client->guc->stage_desc_pool_vaddr; @@ -163,7 +166,7 @@ static struct guc_stage_desc *__get_stage_desc(struct i915_guc_client *client) * client object which contains the page being used for the doorbell */ -static void __update_doorbell_desc(struct i915_guc_client *client, u16 new_id) +static void __update_doorbell_desc(struct intel_guc_client *client, u16 new_id) { struct guc_stage_desc *desc; @@ -172,12 +175,12 @@ static void __update_doorbell_desc(struct i915_guc_client *client, u16 new_id) desc->db_id = new_id; } -static struct guc_doorbell_info *__get_doorbell(struct i915_guc_client *client) +static struct guc_doorbell_info *__get_doorbell(struct intel_guc_client *client) { return client->vaddr + client->doorbell_offset; } -static bool has_doorbell(struct i915_guc_client *client) +static bool has_doorbell(struct intel_guc_client *client) { if (client->doorbell_id == GUC_DOORBELL_INVALID) return false; @@ -185,30 +188,21 @@ static bool has_doorbell(struct i915_guc_client *client) return test_bit(client->doorbell_id, client->guc->doorbell_bitmap); } -static int __create_doorbell(struct i915_guc_client *client) +static void __create_doorbell(struct intel_guc_client *client) { struct guc_doorbell_info *doorbell; - int err; doorbell = __get_doorbell(client); doorbell->db_status = GUC_DOORBELL_ENABLED; - doorbell->cookie = client->doorbell_cookie; - - err = __guc_allocate_doorbell(client->guc, client->stage_id); - if (err) { - doorbell->db_status = GUC_DOORBELL_DISABLED; - doorbell->cookie = 0; - } - return err; + doorbell->cookie = 0; } -static int __destroy_doorbell(struct i915_guc_client *client) +static void __destroy_doorbell(struct intel_guc_client *client) { struct drm_i915_private *dev_priv = guc_to_i915(client->guc); struct guc_doorbell_info *doorbell; u16 db_id = client->doorbell_id; - GEM_BUG_ON(db_id >= GUC_DOORBELL_INVALID); doorbell = __get_doorbell(client); doorbell->db_status = GUC_DOORBELL_DISABLED; @@ -216,56 +210,49 @@ static int __destroy_doorbell(struct i915_guc_client *client) /* Doorbell release flow requires that we wait for GEN8_DRB_VALID bit * to go to zero after updating db_status before we call the GuC to - * release the doorbell */ + * release the doorbell + */ if (wait_for_us(!(I915_READ(GEN8_DRBREGL(db_id)) & GEN8_DRB_VALID), 10)) WARN_ONCE(true, "Doorbell never became invalid after disable\n"); - - return __guc_deallocate_doorbell(client->guc, client->stage_id); } -static int create_doorbell(struct i915_guc_client *client) +static int create_doorbell(struct intel_guc_client *client) { int ret; - ret = __reserve_doorbell(client); - if (ret) - return ret; - __update_doorbell_desc(client, client->doorbell_id); + __create_doorbell(client); - ret = __create_doorbell(client); - if (ret) - goto err; + ret = __guc_allocate_doorbell(client->guc, client->stage_id); + if (ret) { + __destroy_doorbell(client); + __update_doorbell_desc(client, GUC_DOORBELL_INVALID); + DRM_ERROR("Couldn't create client %u doorbell: %d\n", + client->stage_id, ret); + return ret; + } return 0; - -err: - __update_doorbell_desc(client, GUC_DOORBELL_INVALID); - __unreserve_doorbell(client); - return ret; } -static int destroy_doorbell(struct i915_guc_client *client) +static int destroy_doorbell(struct intel_guc_client *client) { - int err; + int ret; GEM_BUG_ON(!has_doorbell(client)); - /* XXX: wait for any interrupts */ - /* XXX: wait for workqueue to drain */ - - err = __destroy_doorbell(client); - if (err) - return err; + __destroy_doorbell(client); + ret = __guc_deallocate_doorbell(client->guc, client->stage_id); + if (ret) + DRM_ERROR("Couldn't destroy client %u doorbell: %d\n", + client->stage_id, ret); __update_doorbell_desc(client, GUC_DOORBELL_INVALID); - __unreserve_doorbell(client); - - return 0; + return ret; } -static unsigned long __select_cacheline(struct intel_guc* guc) +static unsigned long __select_cacheline(struct intel_guc *guc) { unsigned long offset; @@ -276,12 +263,12 @@ static unsigned long __select_cacheline(struct intel_guc* guc) guc->db_cacheline += cache_line_size(); DRM_DEBUG_DRIVER("reserved cacheline 0x%lx, next 0x%x, linesize %u\n", - offset, guc->db_cacheline, cache_line_size()); + offset, guc->db_cacheline, cache_line_size()); return offset; } static inline struct guc_process_desc * -__get_process_desc(struct i915_guc_client *client) +__get_process_desc(struct intel_guc_client *client) { return client->vaddr + client->proc_desc_offset; } @@ -290,7 +277,7 @@ __get_process_desc(struct i915_guc_client *client) * Initialise the process descriptor shared with the GuC firmware. */ static void guc_proc_desc_init(struct intel_guc *guc, - struct i915_guc_client *client) + struct intel_guc_client *client) { struct guc_process_desc *desc; @@ -306,11 +293,42 @@ static void guc_proc_desc_init(struct intel_guc *guc, desc->db_base_addr = 0; desc->stage_id = client->stage_id; - desc->wq_size_bytes = client->wq_size; + desc->wq_size_bytes = GUC_WQ_SIZE; desc->wq_status = WQ_STATUS_ACTIVE; desc->priority = client->priority; } +static int guc_stage_desc_pool_create(struct intel_guc *guc) +{ + struct i915_vma *vma; + void *vaddr; + + vma = intel_guc_allocate_vma(guc, + PAGE_ALIGN(sizeof(struct guc_stage_desc) * + GUC_MAX_STAGE_DESCRIPTORS)); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + i915_vma_unpin_and_release(&vma); + return PTR_ERR(vaddr); + } + + guc->stage_desc_pool = vma; + guc->stage_desc_pool_vaddr = vaddr; + ida_init(&guc->stage_ids); + + return 0; +} + +static void guc_stage_desc_pool_destroy(struct intel_guc *guc) +{ + ida_destroy(&guc->stage_ids); + i915_gem_object_unpin_map(guc->stage_desc_pool->obj); + i915_vma_unpin_and_release(&guc->stage_desc_pool); +} + /* * Initialise/clear the stage descriptor shared with the GuC firmware. * @@ -319,7 +337,7 @@ static void guc_proc_desc_init(struct intel_guc *guc, * write queue, etc). */ static void guc_stage_desc_init(struct intel_guc *guc, - struct i915_guc_client *client) + struct intel_guc_client *client) { struct drm_i915_private *dev_priv = guc_to_i915(guc); struct intel_engine_cs *engine; @@ -331,14 +349,17 @@ static void guc_stage_desc_init(struct intel_guc *guc, desc = __get_stage_desc(client); memset(desc, 0, sizeof(*desc)); - desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE | GUC_STAGE_DESC_ATTR_KERNEL; + desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE | + GUC_STAGE_DESC_ATTR_KERNEL; + if (is_high_priority(client)) + desc->attribute |= GUC_STAGE_DESC_ATTR_PREEMPT; desc->stage_id = client->stage_id; desc->priority = client->priority; desc->db_id = client->doorbell_id; for_each_engine_masked(engine, dev_priv, client->engines, tmp) { struct intel_context *ce = &ctx->engine[engine->id]; - uint32_t guc_engine_id = engine->guc_id; + u32 guc_engine_id = engine->guc_id; struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id]; /* TODO: We have a design issue to be solved here. Only when we @@ -356,7 +377,7 @@ static void guc_stage_desc_init(struct intel_guc *guc, * submission or, in other words, not using a direct submission * model) the KMD's LRCA is not used for any work submission. * Instead, the GuC uses the LRCA of the user mode context (see - * guc_wq_item_append below). + * guc_add_request below). */ lrc->context_desc = lower_32_bits(ce->lrc_desc); @@ -365,7 +386,8 @@ static void guc_stage_desc_init(struct intel_guc *guc, guc_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE; /* XXX: In direct submission, the GuC wants the HW context id - * here. In proxy submission, it wants the stage id */ + * here. In proxy submission, it wants the stage id + */ lrc->context_id = (client->stage_id << GUC_ELC_CTXID_OFFSET) | (guc_engine_id << GUC_ELC_ENGINE_OFFSET); @@ -378,7 +400,7 @@ static void guc_stage_desc_init(struct intel_guc *guc, } DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n", - client->engines, desc->engines_used); + client->engines, desc->engines_used); WARN_ON(desc->engines_used == 0); /* @@ -388,17 +410,17 @@ static void guc_stage_desc_init(struct intel_guc *guc, gfx_addr = guc_ggtt_offset(client->vma); desc->db_trigger_phy = sg_dma_address(client->vma->pages->sgl) + client->doorbell_offset; - desc->db_trigger_cpu = (uintptr_t)__get_doorbell(client); + desc->db_trigger_cpu = ptr_to_u64(__get_doorbell(client)); desc->db_trigger_uk = gfx_addr + client->doorbell_offset; desc->process_desc = gfx_addr + client->proc_desc_offset; - desc->wq_addr = gfx_addr + client->wq_offset; - desc->wq_size = client->wq_size; + desc->wq_addr = gfx_addr + GUC_DB_SIZE; + desc->wq_size = GUC_WQ_SIZE; - desc->desc_private = (uintptr_t)client; + desc->desc_private = ptr_to_u64(client); } static void guc_stage_desc_fini(struct intel_guc *guc, - struct i915_guc_client *client) + struct intel_guc_client *client) { struct guc_stage_desc *desc; @@ -406,82 +428,19 @@ static void guc_stage_desc_fini(struct intel_guc *guc, memset(desc, 0, sizeof(*desc)); } -/** - * i915_guc_wq_reserve() - reserve space in the GuC's workqueue - * @request: request associated with the commands - * - * Return: 0 if space is available - * -EAGAIN if space is not currently available - * - * This function must be called (and must return 0) before a request - * is submitted to the GuC via i915_guc_submit() below. Once a result - * of 0 has been returned, it must be balanced by a corresponding - * call to submit(). - * - * Reservation allows the caller to determine in advance that space - * will be available for the next submission before committing resources - * to it, and helps avoid late failures with complicated recovery paths. - */ -int i915_guc_wq_reserve(struct drm_i915_gem_request *request) -{ - const size_t wqi_size = sizeof(struct guc_wq_item); - struct i915_guc_client *client = request->i915->guc.execbuf_client; - struct guc_process_desc *desc = __get_process_desc(client); - u32 freespace; - int ret; - - spin_lock_irq(&client->wq_lock); - freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size); - freespace -= client->wq_rsvd; - if (likely(freespace >= wqi_size)) { - client->wq_rsvd += wqi_size; - ret = 0; - } else { - client->no_wq_space++; - ret = -EAGAIN; - } - spin_unlock_irq(&client->wq_lock); - - return ret; -} - -static void guc_client_update_wq_rsvd(struct i915_guc_client *client, int size) -{ - unsigned long flags; - - spin_lock_irqsave(&client->wq_lock, flags); - client->wq_rsvd += size; - spin_unlock_irqrestore(&client->wq_lock, flags); -} - -void i915_guc_wq_unreserve(struct drm_i915_gem_request *request) -{ - const int wqi_size = sizeof(struct guc_wq_item); - struct i915_guc_client *client = request->i915->guc.execbuf_client; - - GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size); - guc_client_update_wq_rsvd(client, -wqi_size); -} - /* Construct a Work Item and append it to the GuC's Work Queue */ -static void guc_wq_item_append(struct i915_guc_client *client, - struct drm_i915_gem_request *rq) +static void guc_wq_item_append(struct intel_guc_client *client, + u32 target_engine, u32 context_desc, + u32 ring_tail, u32 fence_id) { /* wqi_len is in DWords, and does not include the one-word header */ const size_t wqi_size = sizeof(struct guc_wq_item); - const u32 wqi_len = wqi_size/sizeof(u32) - 1; - struct intel_engine_cs *engine = rq->engine; + const u32 wqi_len = wqi_size / sizeof(u32) - 1; struct guc_process_desc *desc = __get_process_desc(client); struct guc_wq_item *wqi; - u32 freespace, tail, wq_off; + u32 wq_off; - /* Free space is guaranteed, see i915_guc_wq_reserve() above */ - freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size); - GEM_BUG_ON(freespace < wqi_size); - - /* The GuC firmware wants the tail index in QWords, not bytes */ - tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3; - GEM_BUG_ON(tail > WQ_RING_TAIL_MAX); + lockdep_assert_held(&client->wq_lock); /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we * should not have the case where structure wqi is across page, neither @@ -491,191 +450,278 @@ static void guc_wq_item_append(struct i915_guc_client *client, * workqueue buffer dw by dw. */ BUILD_BUG_ON(wqi_size != 16); - GEM_BUG_ON(client->wq_rsvd < wqi_size); - /* postincrement WQ tail for next time */ - wq_off = client->wq_tail; + /* Free space is guaranteed. */ + wq_off = READ_ONCE(desc->tail); + GEM_BUG_ON(CIRC_SPACE(wq_off, READ_ONCE(desc->head), + GUC_WQ_SIZE) < wqi_size); GEM_BUG_ON(wq_off & (wqi_size - 1)); - client->wq_tail += wqi_size; - client->wq_tail &= client->wq_size - 1; - client->wq_rsvd -= wqi_size; /* WQ starts from the page after doorbell / process_desc */ wqi = client->vaddr + wq_off + GUC_DB_SIZE; /* Now fill in the 4-word work queue item */ wqi->header = WQ_TYPE_INORDER | - (wqi_len << WQ_LEN_SHIFT) | - (engine->guc_id << WQ_TARGET_SHIFT) | - WQ_NO_WCFLUSH_WAIT; - - /* The GuC wants only the low-order word of the context descriptor */ - wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, engine); - - wqi->submit_element_info = tail << WQ_RING_TAIL_SHIFT; - wqi->fence_id = rq->global_seqno; + (wqi_len << WQ_LEN_SHIFT) | + (target_engine << WQ_TARGET_SHIFT) | + WQ_NO_WCFLUSH_WAIT; + wqi->context_desc = context_desc; + wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT; + GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX); + wqi->fence_id = fence_id; + + /* Make the update visible to GuC */ + WRITE_ONCE(desc->tail, (wq_off + wqi_size) & (GUC_WQ_SIZE - 1)); } -static void guc_reset_wq(struct i915_guc_client *client) +static void guc_reset_wq(struct intel_guc_client *client) { struct guc_process_desc *desc = __get_process_desc(client); desc->head = 0; desc->tail = 0; - - client->wq_tail = 0; } -static int guc_ring_doorbell(struct i915_guc_client *client) +static void guc_ring_doorbell(struct intel_guc_client *client) { - struct guc_process_desc *desc = __get_process_desc(client); - union guc_doorbell_qw db_cmp, db_exc, db_ret; - union guc_doorbell_qw *db; - int attempt = 2, ret = -EAGAIN; + struct guc_doorbell_info *db; + u32 cookie; - /* Update the tail so it is visible to GuC */ - desc->tail = client->wq_tail; + lockdep_assert_held(&client->wq_lock); - /* current cookie */ - db_cmp.db_status = GUC_DOORBELL_ENABLED; - db_cmp.cookie = client->doorbell_cookie; + /* pointer of current doorbell cacheline */ + db = __get_doorbell(client); - /* cookie to be updated */ - db_exc.db_status = GUC_DOORBELL_ENABLED; - db_exc.cookie = client->doorbell_cookie + 1; - if (db_exc.cookie == 0) - db_exc.cookie = 1; + /* + * We're not expecting the doorbell cookie to change behind our back, + * we also need to treat 0 as a reserved value. + */ + cookie = READ_ONCE(db->cookie); + WARN_ON_ONCE(xchg(&db->cookie, cookie + 1 ?: cookie + 2) != cookie); - /* pointer of current doorbell cacheline */ - db = (union guc_doorbell_qw *)__get_doorbell(client); - - while (attempt--) { - /* lets ring the doorbell */ - db_ret.value_qw = atomic64_cmpxchg((atomic64_t *)db, - db_cmp.value_qw, db_exc.value_qw); - - /* if the exchange was successfully executed */ - if (db_ret.value_qw == db_cmp.value_qw) { - /* db was successfully rung */ - client->doorbell_cookie = db_exc.cookie; - ret = 0; - break; - } + /* XXX: doorbell was lost and need to acquire it again */ + GEM_BUG_ON(db->db_status != GUC_DOORBELL_ENABLED); +} - /* XXX: doorbell was lost and need to acquire it again */ - if (db_ret.db_status == GUC_DOORBELL_DISABLED) - break; +static void guc_add_request(struct intel_guc *guc, + struct drm_i915_gem_request *rq) +{ + struct intel_guc_client *client = guc->execbuf_client; + struct intel_engine_cs *engine = rq->engine; + u32 ctx_desc = lower_32_bits(intel_lr_context_descriptor(rq->ctx, + engine)); + u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); - DRM_WARN("Cookie mismatch. Expected %d, found %d\n", - db_cmp.cookie, db_ret.cookie); + spin_lock(&client->wq_lock); - /* update the cookie to newly read cookie from GuC */ - db_cmp.cookie = db_ret.cookie; - db_exc.cookie = db_ret.cookie + 1; - if (db_exc.cookie == 0) - db_exc.cookie = 1; - } + guc_wq_item_append(client, engine->guc_id, ctx_desc, + ring_tail, rq->global_seqno); + guc_ring_doorbell(client); - return ret; + client->submissions[engine->id] += 1; + + spin_unlock(&client->wq_lock); } -/** - * __i915_guc_submit() - Submit commands through GuC - * @rq: request associated with the commands - * - * The caller must have already called i915_guc_wq_reserve() above with - * a result of 0 (success), guaranteeing that there is space in the work - * queue for the new request, so enqueuing the item cannot fail. - * - * Bad Things Will Happen if the caller violates this protocol e.g. calls - * submit() when _reserve() says there's no space, or calls _submit() - * a different number of times from (successful) calls to _reserve(). - * - * The only error here arises if the doorbell hardware isn't functioning - * as expected, which really shouln't happen. +/* + * When we're doing submissions using regular execlists backend, writing to + * ELSP from CPU side is enough to make sure that writes to ringbuffer pages + * pinned in mappable aperture portion of GGTT are visible to command streamer. + * Writes done by GuC on our behalf are not guaranteeing such ordering, + * therefore, to ensure the flush, we're issuing a POSTING READ. */ -static void __i915_guc_submit(struct drm_i915_gem_request *rq) +static void flush_ggtt_writes(struct i915_vma *vma) { - struct drm_i915_private *dev_priv = rq->i915; - struct intel_engine_cs *engine = rq->engine; - unsigned int engine_id = engine->id; - struct intel_guc *guc = &rq->i915->guc; - struct i915_guc_client *client = guc->execbuf_client; - unsigned long flags; - int b_ret; - - /* WA to flush out the pending GMADR writes to ring buffer. */ - if (i915_vma_is_map_and_fenceable(rq->ring->vma)) + struct drm_i915_private *dev_priv = to_i915(vma->obj->base.dev); + + if (i915_vma_is_map_and_fenceable(vma)) POSTING_READ_FW(GUC_STATUS); +} + +#define GUC_PREEMPT_FINISHED 0x1 +#define GUC_PREEMPT_BREADCRUMB_DWORDS 0x8 +static void inject_preempt_context(struct work_struct *work) +{ + struct guc_preempt_work *preempt_work = + container_of(work, typeof(*preempt_work), work); + struct intel_engine_cs *engine = preempt_work->engine; + struct intel_guc *guc = container_of(preempt_work, typeof(*guc), + preempt_work[engine->id]); + struct intel_guc_client *client = guc->preempt_client; + struct guc_stage_desc *stage_desc = __get_stage_desc(client); + struct intel_ring *ring = client->owner->engine[engine->id].ring; + u32 ctx_desc = lower_32_bits(intel_lr_context_descriptor(client->owner, + engine)); + u32 *cs = ring->vaddr + ring->tail; + u32 data[7]; + + if (engine->id == RCS) { + cs = gen8_emit_ggtt_write_rcs(cs, GUC_PREEMPT_FINISHED, + intel_hws_preempt_done_address(engine)); + } else { + cs = gen8_emit_ggtt_write(cs, GUC_PREEMPT_FINISHED, + intel_hws_preempt_done_address(engine)); + *cs++ = MI_NOOP; + *cs++ = MI_NOOP; + } + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; - spin_lock_irqsave(&client->wq_lock, flags); + GEM_BUG_ON(!IS_ALIGNED(ring->size, + GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32))); + GEM_BUG_ON((void *)cs - (ring->vaddr + ring->tail) != + GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32)); - guc_wq_item_append(client, rq); - b_ret = guc_ring_doorbell(client); + ring->tail += GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32); + ring->tail &= (ring->size - 1); - client->submissions[engine_id] += 1; + flush_ggtt_writes(ring->vma); - spin_unlock_irqrestore(&client->wq_lock, flags); + spin_lock_irq(&client->wq_lock); + guc_wq_item_append(client, engine->guc_id, ctx_desc, + ring->tail / sizeof(u64), 0); + spin_unlock_irq(&client->wq_lock); + + /* + * If GuC firmware performs an engine reset while that engine had + * a preemption pending, it will set the terminated attribute bit + * on our preemption stage descriptor. GuC firmware retains all + * pending work items for a high-priority GuC client, unlike the + * normal-priority GuC client where work items are dropped. It + * wants to make sure the preempt-to-idle work doesn't run when + * scheduling resumes, and uses this bit to inform its scheduler + * and presumably us as well. Our job is to clear it for the next + * preemption after reset, otherwise that and future preemptions + * will never complete. We'll just clear it every time. + */ + stage_desc->attribute &= ~GUC_STAGE_DESC_ATTR_TERMINATED; + + data[0] = INTEL_GUC_ACTION_REQUEST_PREEMPTION; + data[1] = client->stage_id; + data[2] = INTEL_GUC_PREEMPT_OPTION_DROP_WORK_Q | + INTEL_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q; + data[3] = engine->guc_id; + data[4] = guc->execbuf_client->priority; + data[5] = guc->execbuf_client->stage_id; + data[6] = guc_ggtt_offset(guc->shared_data); + + if (WARN_ON(intel_guc_send(guc, data, ARRAY_SIZE(data)))) { + execlists_clear_active(&engine->execlists, + EXECLISTS_ACTIVE_PREEMPT); + tasklet_schedule(&engine->execlists.tasklet); + } } -static void i915_guc_submit(struct drm_i915_gem_request *rq) +/* + * We're using user interrupt and HWSP value to mark that preemption has + * finished and GPU is idle. Normally, we could unwind and continue similar to + * execlists submission path. Unfortunately, with GuC we also need to wait for + * it to finish its own postprocessing, before attempting to submit. Otherwise + * GuC may silently ignore our submissions, and thus we risk losing request at + * best, executing out-of-order and causing kernel panic at worst. + */ +#define GUC_PREEMPT_POSTPROCESS_DELAY_MS 10 +static void wait_for_guc_preempt_report(struct intel_engine_cs *engine) { - __i915_gem_request_submit(rq); - __i915_guc_submit(rq); + struct intel_guc *guc = &engine->i915->guc; + struct guc_shared_ctx_data *data = guc->shared_data_vaddr; + struct guc_ctx_report *report = + &data->preempt_ctx_report[engine->guc_id]; + + WARN_ON(wait_for_atomic(report->report_return_status == + INTEL_GUC_REPORT_STATUS_COMPLETE, + GUC_PREEMPT_POSTPROCESS_DELAY_MS)); + /* + * GuC is expecting that we're also going to clear the affected context + * counter, let's also reset the return status to not depend on GuC + * resetting it after recieving another preempt action + */ + report->affected_count = 0; + report->report_return_status = INTEL_GUC_REPORT_STATUS_UNKNOWN; } -static void nested_enable_signaling(struct drm_i915_gem_request *rq) +/** + * guc_submit() - Submit commands through GuC + * @engine: engine associated with the commands + * + * The only error here arises if the doorbell hardware isn't functioning + * as expected, which really shouln't happen. + */ +static void guc_submit(struct intel_engine_cs *engine) { - /* If we use dma_fence_enable_sw_signaling() directly, lockdep - * detects an ordering issue between the fence lockclass and the - * global_timeline. This circular dependency can only occur via 2 - * different fences (but same fence lockclass), so we use the nesting - * annotation here to prevent the warn, equivalent to the nesting - * inside i915_gem_request_submit() for when we also enable the - * signaler. - */ + struct intel_guc *guc = &engine->i915->guc; + struct intel_engine_execlists * const execlists = &engine->execlists; + struct execlist_port *port = execlists->port; + unsigned int n; + + for (n = 0; n < execlists_num_ports(execlists); n++) { + struct drm_i915_gem_request *rq; + unsigned int count; - if (test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, - &rq->fence.flags)) - return; + rq = port_unpack(&port[n], &count); + if (rq && count == 0) { + port_set(&port[n], port_pack(rq, ++count)); - GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)); - trace_dma_fence_enable_signal(&rq->fence); + flush_ggtt_writes(rq->ring->vma); - spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING); - intel_engine_enable_signaling(rq, true); - spin_unlock(&rq->lock); + guc_add_request(guc, rq); + } + } } static void port_assign(struct execlist_port *port, struct drm_i915_gem_request *rq) { - GEM_BUG_ON(rq == port_request(port)); - - if (port_isset(port)) - i915_gem_request_put(port_request(port)); + GEM_BUG_ON(port_isset(port)); port_set(port, i915_gem_request_get(rq)); - nested_enable_signaling(rq); } -static bool i915_guc_dequeue(struct intel_engine_cs *engine) +static void guc_dequeue(struct intel_engine_cs *engine) { - struct execlist_port *port = engine->execlist_port; - struct drm_i915_gem_request *last = port_request(port); - struct rb_node *rb; + struct intel_engine_execlists * const execlists = &engine->execlists; + struct execlist_port *port = execlists->port; + struct drm_i915_gem_request *last = NULL; + const struct execlist_port * const last_port = + &execlists->port[execlists->port_mask]; bool submit = false; + struct rb_node *rb; spin_lock_irq(&engine->timeline->lock); - rb = engine->execlist_first; - GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb); - while (rb) { + rb = execlists->first; + GEM_BUG_ON(rb_first(&execlists->queue) != rb); + + if (!rb) + goto unlock; + + if (port_isset(port)) { + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) { + struct guc_preempt_work *preempt_work = + &engine->i915->guc.preempt_work[engine->id]; + + if (rb_entry(rb, struct i915_priolist, node)->priority > + max(port_request(port)->priotree.priority, 0)) { + execlists_set_active(execlists, + EXECLISTS_ACTIVE_PREEMPT); + queue_work(engine->i915->guc.preempt_wq, + &preempt_work->work); + goto unlock; + } + } + + port++; + if (port_isset(port)) + goto unlock; + } + GEM_BUG_ON(port_isset(port)); + + do { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); struct drm_i915_gem_request *rq, *rn; list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { if (last && rq->ctx != last->ctx) { - if (port != engine->execlist_port) { + if (port == last_port) { __list_del_many(&p->requests, &rq->priotree.link); goto done; @@ -687,102 +733,72 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine) } INIT_LIST_HEAD(&rq->priotree.link); - rq->priotree.priority = INT_MAX; - i915_guc_submit(rq); - trace_i915_gem_request_in(rq, port_index(port, engine)); + __i915_gem_request_submit(rq); + trace_i915_gem_request_in(rq, + port_index(port, execlists)); last = rq; submit = true; } rb = rb_next(rb); - rb_erase(&p->node, &engine->execlist_queue); + rb_erase(&p->node, &execlists->queue); INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); - } + } while (rb); done: - engine->execlist_first = rb; - if (submit) + execlists->first = rb; + if (submit) { port_assign(port, last); + execlists_set_active(execlists, EXECLISTS_ACTIVE_USER); + guc_submit(engine); + } +unlock: spin_unlock_irq(&engine->timeline->lock); - - return submit; } -static void i915_guc_irq_handler(unsigned long data) +static void guc_submission_tasklet(unsigned long data) { - struct intel_engine_cs *engine = (struct intel_engine_cs *)data; - struct execlist_port *port = engine->execlist_port; + struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; + struct intel_engine_execlists * const execlists = &engine->execlists; + struct execlist_port *port = execlists->port; struct drm_i915_gem_request *rq; - bool submit; - do { + rq = port_request(&port[0]); + while (rq && i915_gem_request_completed(rq)) { + trace_i915_gem_request_out(rq); + i915_gem_request_put(rq); + + execlists_port_complete(execlists, port); + rq = port_request(&port[0]); - while (rq && i915_gem_request_completed(rq)) { - trace_i915_gem_request_out(rq); - i915_gem_request_put(rq); + } + if (!rq) + execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER); - port[0] = port[1]; - memset(&port[1], 0, sizeof(port[1])); + if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) && + intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) == + GUC_PREEMPT_FINISHED) { + execlists_cancel_port_requests(&engine->execlists); + execlists_unwind_incomplete_requests(execlists); - rq = port_request(&port[0]); - } + wait_for_guc_preempt_report(engine); - submit = false; - if (!port_count(&port[1])) - submit = i915_guc_dequeue(engine); - } while (submit); + execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT); + intel_write_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX, 0); + } + + if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)) + guc_dequeue(engine); } /* * Everything below here is concerned with setup & teardown, and is * therefore not part of the somewhat time-critical batch-submission - * path of i915_guc_submit() above. + * path of guc_submit() above. */ -/** - * intel_guc_allocate_vma() - Allocate a GGTT VMA for GuC usage - * @guc: the guc - * @size: size of area to allocate (both virtual space and memory) - * - * This is a wrapper to create an object for use with the GuC. In order to - * use it inside the GuC, an object needs to be pinned lifetime, so we allocate - * both some backing storage and a range inside the Global GTT. We must pin - * it in the GGTT somewhere other than than [0, GUC_WOPCM_TOP) because that - * range is reserved inside GuC. - * - * Return: A i915_vma if successful, otherwise an ERR_PTR. - */ -struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - int ret; - - obj = i915_gem_object_create(dev_priv, size); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); - if (IS_ERR(vma)) - goto err; - - ret = i915_vma_pin(vma, 0, PAGE_SIZE, - PIN_GLOBAL | PIN_OFFSET_BIAS | GUC_WOPCM_TOP); - if (ret) { - vma = ERR_PTR(ret); - goto err; - } - - return vma; - -err: - i915_gem_object_put(obj); - return vma; -} - /* Check that a doorbell register is in the expected state */ static bool doorbell_ok(struct intel_guc *guc, u16 db_id) { @@ -804,101 +820,67 @@ static bool doorbell_ok(struct intel_guc *guc, u16 db_id) return false; } -/* - * If the GuC thinks that the doorbell is unassigned (e.g. because we reset and - * reloaded the GuC FW) we can use this function to tell the GuC to reassign the - * doorbell to the rightful owner. - */ -static int __reset_doorbell(struct i915_guc_client* client, u16 db_id) +static bool guc_verify_doorbells(struct intel_guc *guc) { - int err; + u16 db_id; - __update_doorbell_desc(client, db_id); - err = __create_doorbell(client); - if (!err) - err = __destroy_doorbell(client); + for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id) + if (!doorbell_ok(guc, db_id)) + return false; - return err; + return true; } -/* - * Set up & tear down each unused doorbell in turn, to ensure that all doorbell - * HW is (re)initialised. For that end, we might have to borrow the first - * client. Also, tell GuC about all the doorbells in use by all clients. - * We do this because the KMD, the GuC and the doorbell HW can easily go out of - * sync (e.g. we can reset the GuC, but not the doorbel HW). - */ -static int guc_init_doorbell_hw(struct intel_guc *guc) +static int guc_clients_doorbell_init(struct intel_guc *guc) { - struct i915_guc_client *client = guc->execbuf_client; - bool recreate_first_client = false; - u16 db_id; int ret; - /* For unused doorbells, make sure they are disabled */ - for_each_clear_bit(db_id, guc->doorbell_bitmap, GUC_NUM_DOORBELLS) { - if (doorbell_ok(guc, db_id)) - continue; - - if (has_doorbell(client)) { - /* Borrow execbuf_client (we will recreate it later) */ - destroy_doorbell(client); - recreate_first_client = true; - } - - ret = __reset_doorbell(client, db_id); - WARN(ret, "Doorbell %u reset failed, err %d\n", db_id, ret); - } - - if (recreate_first_client) { - ret = __reserve_doorbell(client); - if (unlikely(ret)) { - DRM_ERROR("Couldn't re-reserve first client db: %d\n", ret); - return ret; - } - - __update_doorbell_desc(client, client->doorbell_id); - } + ret = create_doorbell(guc->execbuf_client); + if (ret) + return ret; - /* Now for every client (and not only execbuf_client) make sure their - * doorbells are known by the GuC */ - //for (client = client_list; client != NULL; client = client->next) - { - ret = __create_doorbell(client); - if (ret) { - DRM_ERROR("Couldn't recreate client %u doorbell: %d\n", - client->stage_id, ret); - return ret; - } + ret = create_doorbell(guc->preempt_client); + if (ret) { + destroy_doorbell(guc->execbuf_client); + return ret; } - /* Read back & verify all (used & unused) doorbell registers */ - for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id) - WARN_ON(!doorbell_ok(guc, db_id)); - return 0; } +static void guc_clients_doorbell_fini(struct intel_guc *guc) +{ + /* + * By the time we're here, GuC has already been reset. + * Instead of trying (in vain) to communicate with it, let's just + * cleanup the doorbell HW and our internal state. + */ + __destroy_doorbell(guc->preempt_client); + __update_doorbell_desc(guc->preempt_client, GUC_DOORBELL_INVALID); + __destroy_doorbell(guc->execbuf_client); + __update_doorbell_desc(guc->execbuf_client, GUC_DOORBELL_INVALID); +} + /** - * guc_client_alloc() - Allocate an i915_guc_client + * guc_client_alloc() - Allocate an intel_guc_client * @dev_priv: driver private data structure * @engines: The set of engines to enable for this client * @priority: four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW - * The kernel client to replace ExecList submission is created with - * NORMAL priority. Priority of a client for scheduler can be HIGH, - * while a preemption context can use CRITICAL. + * The kernel client to replace ExecList submission is created with + * NORMAL priority. Priority of a client for scheduler can be HIGH, + * while a preemption context can use CRITICAL. * @ctx: the context that owns the client (we use the default render - * context) + * context) * - * Return: An i915_guc_client object if success, else NULL. + * Return: An intel_guc_client object if success, else NULL. */ -static struct i915_guc_client * +static struct intel_guc_client * guc_client_alloc(struct drm_i915_private *dev_priv, - uint32_t engines, - uint32_t priority, + u32 engines, + u32 priority, struct i915_gem_context *ctx) { - struct i915_guc_client *client; + struct intel_guc_client *client; struct intel_guc *guc = &dev_priv->guc; struct i915_vma *vma; void *vaddr; @@ -913,12 +895,10 @@ guc_client_alloc(struct drm_i915_private *dev_priv, client->engines = engines; client->priority = priority; client->doorbell_id = GUC_DOORBELL_INVALID; - client->wq_offset = GUC_DB_SIZE; - client->wq_size = GUC_WQ_SIZE; spin_lock_init(&client->wq_lock); ret = ida_simple_get(&guc->stage_ids, 0, GUC_MAX_STAGE_DESCRIPTORS, - GFP_KERNEL); + GFP_KERNEL); if (ret < 0) goto err_client; @@ -956,7 +936,7 @@ guc_client_alloc(struct drm_i915_private *dev_priv, guc_proc_desc_init(guc, client); guc_stage_desc_init(guc, client); - ret = create_doorbell(client); + ret = reserve_doorbell(client); if (ret) goto err_vaddr; @@ -978,17 +958,9 @@ err_client: return ERR_PTR(ret); } -static void guc_client_free(struct i915_guc_client *client) +static void guc_client_free(struct intel_guc_client *client) { - /* - * XXX: wait for any outstanding submissions before freeing memory. - * Be sure to drop any locks - */ - - /* FIXME: in many cases, by the time we get here the GuC has been - * reset, so we cannot destroy the doorbell properly. Ignore the - * error message for now */ - destroy_doorbell(client); + unreserve_doorbell(client); guc_stage_desc_fini(client->guc, client); i915_gem_object_unpin_map(client->vma->obj); i915_vma_unpin_and_release(&client->vma); @@ -996,28 +968,83 @@ static void guc_client_free(struct i915_guc_client *client) kfree(client); } +static int guc_clients_create(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct intel_guc_client *client; + + GEM_BUG_ON(guc->execbuf_client); + GEM_BUG_ON(guc->preempt_client); + + client = guc_client_alloc(dev_priv, + INTEL_INFO(dev_priv)->ring_mask, + GUC_CLIENT_PRIORITY_KMD_NORMAL, + dev_priv->kernel_context); + if (IS_ERR(client)) { + DRM_ERROR("Failed to create GuC client for submission!\n"); + return PTR_ERR(client); + } + guc->execbuf_client = client; + + client = guc_client_alloc(dev_priv, + INTEL_INFO(dev_priv)->ring_mask, + GUC_CLIENT_PRIORITY_KMD_HIGH, + dev_priv->preempt_context); + if (IS_ERR(client)) { + DRM_ERROR("Failed to create GuC client for preemption!\n"); + guc_client_free(guc->execbuf_client); + guc->execbuf_client = NULL; + return PTR_ERR(client); + } + guc->preempt_client = client; + + return 0; +} + +static void guc_clients_destroy(struct intel_guc *guc) +{ + struct intel_guc_client *client; + + client = fetch_and_zero(&guc->execbuf_client); + guc_client_free(client); + + client = fetch_and_zero(&guc->preempt_client); + guc_client_free(client); +} + +static void guc_policy_init(struct guc_policy *policy) +{ + policy->execution_quantum = POLICY_DEFAULT_EXECUTION_QUANTUM_US; + policy->preemption_time = POLICY_DEFAULT_PREEMPTION_TIME_US; + policy->fault_time = POLICY_DEFAULT_FAULT_TIME_US; + policy->policy_flags = 0; +} + static void guc_policies_init(struct guc_policies *policies) { struct guc_policy *policy; u32 p, i; - policies->dpc_promote_time = 500000; + policies->dpc_promote_time = POLICY_DEFAULT_DPC_PROMOTE_TIME_US; policies->max_num_work_items = POLICY_MAX_NUM_WI; for (p = 0; p < GUC_CLIENT_PRIORITY_NUM; p++) { for (i = GUC_RENDER_ENGINE; i < GUC_MAX_ENGINES_NUM; i++) { policy = &policies->policy[p][i]; - policy->execution_quantum = 1000000; - policy->preemption_time = 500000; - policy->fault_time = 250000; - policy->policy_flags = 0; + guc_policy_init(policy); } } policies->is_valid = 1; } +/* + * The first 80 dwords of the register state context, containing the + * execlists and ppgtt registers. + */ +#define LR_HW_CONTEXT_SIZE (80 * sizeof(u32)) + static int guc_ads_create(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); @@ -1032,6 +1059,8 @@ static int guc_ads_create(struct intel_guc *guc) } __packed *blob; struct intel_engine_cs *engine; enum intel_engine_id id; + const u32 skipped_offset = LRC_HEADER_PAGES * PAGE_SIZE; + const u32 skipped_size = LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE; u32 base; GEM_BUG_ON(guc->ads_vma); @@ -1062,13 +1091,22 @@ static int guc_ads_create(struct intel_guc *guc) * engines after a reset. Here we use the Render ring default * context, which must already exist and be pinned in the GGTT, * so its address won't change after we've told the GuC where - * to find it. + * to find it. Note that we have to skip our header (1 page), + * because our GuC shared data is there. */ blob->ads.golden_context_lrca = - dev_priv->engine[RCS]->status_page.ggtt_offset; + guc_ggtt_offset(dev_priv->kernel_context->engine[RCS].state) + + skipped_offset; + /* + * The GuC expects us to exclude the portion of the context image that + * it skips from the size it is to read. It starts reading from after + * the execlist context (so skipping the first page [PPHWSP] and 80 + * dwords). Weird guc is weird. + */ for_each_engine(engine, dev_priv, id) - blob->ads.eng_state_size[engine->guc_id] = engine->context_size; + blob->ads.eng_state_size[engine->guc_id] = + engine->context_size - skipped_size; base = guc_ggtt_offset(vma); blob->ads.scheduler_policies = base + ptr_offset(blob, policies); @@ -1089,71 +1127,80 @@ static void guc_ads_destroy(struct intel_guc *guc) * Set up the memory resources to be shared with the GuC (via the GGTT) * at firmware loading time. */ -int i915_guc_submission_init(struct drm_i915_private *dev_priv) +int intel_guc_submission_init(struct intel_guc *guc) { - struct intel_guc *guc = &dev_priv->guc; - struct i915_vma *vma; - void *vaddr; + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; int ret; if (guc->stage_desc_pool) return 0; - vma = intel_guc_allocate_vma(guc, - PAGE_ALIGN(sizeof(struct guc_stage_desc) * - GUC_MAX_STAGE_DESCRIPTORS)); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - guc->stage_desc_pool = vma; - - vaddr = i915_gem_object_pin_map(guc->stage_desc_pool->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - ret = PTR_ERR(vaddr); - goto err_vma; - } - - guc->stage_desc_pool_vaddr = vaddr; + ret = guc_stage_desc_pool_create(guc); + if (ret) + return ret; + /* + * Keep static analysers happy, let them know that we allocated the + * vma after testing that it didn't exist earlier. + */ + GEM_BUG_ON(!guc->stage_desc_pool); ret = intel_guc_log_create(guc); if (ret < 0) - goto err_vaddr; + goto err_stage_desc_pool; ret = guc_ads_create(guc); if (ret < 0) goto err_log; + GEM_BUG_ON(!guc->ads_vma); - ida_init(&guc->stage_ids); + WARN_ON(!guc_verify_doorbells(guc)); + ret = guc_clients_create(guc); + if (ret) + return ret; + + for_each_engine(engine, dev_priv, id) { + guc->preempt_work[id].engine = engine; + INIT_WORK(&guc->preempt_work[id].work, inject_preempt_context); + } return 0; err_log: intel_guc_log_destroy(guc); -err_vaddr: - i915_gem_object_unpin_map(guc->stage_desc_pool->obj); -err_vma: - i915_vma_unpin_and_release(&guc->stage_desc_pool); +err_stage_desc_pool: + guc_stage_desc_pool_destroy(guc); return ret; } -void i915_guc_submission_fini(struct drm_i915_private *dev_priv) +void intel_guc_submission_fini(struct intel_guc *guc) { - struct intel_guc *guc = &dev_priv->guc; + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, dev_priv, id) + cancel_work_sync(&guc->preempt_work[id].work); + + guc_clients_destroy(guc); + WARN_ON(!guc_verify_doorbells(guc)); - ida_destroy(&guc->stage_ids); guc_ads_destroy(guc); intel_guc_log_destroy(guc); - i915_gem_object_unpin_map(guc->stage_desc_pool->obj); - i915_vma_unpin_and_release(&guc->stage_desc_pool); + guc_stage_desc_pool_destroy(guc); } static void guc_interrupts_capture(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; struct intel_engine_cs *engine; enum intel_engine_id id; int irqs; - /* tell all command streamers to forward interrupts (but not vblank) to GuC */ + /* tell all command streamers to forward interrupts (but not vblank) + * to GuC + */ irqs = _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING); for_each_engine(engine, dev_priv, id) I915_WRITE(RING_MODE_GEN7(engine), irqs); @@ -1185,12 +1232,13 @@ static void guc_interrupts_capture(struct drm_i915_private *dev_priv) * Here we CLEAR REDIRECT_TO_GUC bit in pm_intrmsk_mbz, which will * result in the register bit being left SET! */ - dev_priv->rps.pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; - dev_priv->rps.pm_intrmsk_mbz &= ~GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; + rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; + rps->pm_intrmsk_mbz &= ~GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; } static void guc_interrupts_release(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; struct intel_engine_cs *engine; enum intel_engine_id id; int irqs; @@ -1209,134 +1257,83 @@ static void guc_interrupts_release(struct drm_i915_private *dev_priv) I915_WRITE(GUC_VCS2_VCS1_IER, 0); I915_WRITE(GUC_WD_VECS_IER, 0); - dev_priv->rps.pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; - dev_priv->rps.pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK; + rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; + rps->pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK; } -int i915_guc_submission_enable(struct drm_i915_private *dev_priv) +static void guc_submission_park(struct intel_engine_cs *engine) { - struct intel_guc *guc = &dev_priv->guc; - struct i915_guc_client *client = guc->execbuf_client; + intel_engine_unpin_breadcrumbs_irq(engine); +} + +static void guc_submission_unpark(struct intel_engine_cs *engine) +{ + intel_engine_pin_breadcrumbs_irq(engine); +} + +int intel_guc_submission_enable(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); struct intel_engine_cs *engine; enum intel_engine_id id; int err; - if (!client) { - client = guc_client_alloc(dev_priv, - INTEL_INFO(dev_priv)->ring_mask, - GUC_CLIENT_PRIORITY_KMD_NORMAL, - dev_priv->kernel_context); - if (IS_ERR(client)) { - DRM_ERROR("Failed to create GuC client for execbuf!\n"); - return PTR_ERR(client); - } + /* + * We're using GuC work items for submitting work through GuC. Since + * we're coalescing multiple requests from a single context into a + * single work item prior to assigning it to execlist_port, we can + * never have more work items than the total number of ports (for all + * engines). The GuC firmware is controlling the HEAD of work queue, + * and it is guaranteed that it will remove the work item from the + * queue before our request is completed. + */ + BUILD_BUG_ON(ARRAY_SIZE(engine->execlists.port) * + sizeof(struct guc_wq_item) * + I915_NUM_ENGINES > GUC_WQ_SIZE); - guc->execbuf_client = client; - } + GEM_BUG_ON(!guc->execbuf_client); + + guc_reset_wq(guc->execbuf_client); + guc_reset_wq(guc->preempt_client); err = intel_guc_sample_forcewake(guc); if (err) - goto err_execbuf_client; - - guc_reset_wq(client); + return err; - err = guc_init_doorbell_hw(guc); + err = guc_clients_doorbell_init(guc); if (err) - goto err_execbuf_client; + return err; /* Take over from manual control of ELSP (execlists) */ guc_interrupts_capture(dev_priv); for_each_engine(engine, dev_priv, id) { - const int wqi_size = sizeof(struct guc_wq_item); - struct drm_i915_gem_request *rq; + struct intel_engine_execlists * const execlists = + &engine->execlists; - /* The tasklet was initialised by execlists, and may be in - * a state of flux (across a reset) and so we just want to - * take over the callback without changing any other state - * in the tasklet. - */ - engine->irq_tasklet.func = i915_guc_irq_handler; - clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - - /* Replay the current set of previously submitted requests */ - spin_lock_irq(&engine->timeline->lock); - list_for_each_entry(rq, &engine->timeline->requests, link) { - guc_client_update_wq_rsvd(client, wqi_size); - __i915_guc_submit(rq); - } - spin_unlock_irq(&engine->timeline->lock); + execlists->tasklet.func = guc_submission_tasklet; + engine->park = guc_submission_park; + engine->unpark = guc_submission_unpark; + + engine->flags &= ~I915_ENGINE_SUPPORTS_STATS; } return 0; - -err_execbuf_client: - guc_client_free(guc->execbuf_client); - guc->execbuf_client = NULL; - return err; } -void i915_guc_submission_disable(struct drm_i915_private *dev_priv) +void intel_guc_submission_disable(struct intel_guc *guc) { - struct intel_guc *guc = &dev_priv->guc; + struct drm_i915_private *dev_priv = guc_to_i915(guc); + + GEM_BUG_ON(dev_priv->gt.awake); /* GT should be parked first */ guc_interrupts_release(dev_priv); + guc_clients_doorbell_fini(guc); /* Revert back to manual ELSP submission */ intel_engines_reset_default_submission(dev_priv); - - guc_client_free(guc->execbuf_client); - guc->execbuf_client = NULL; } -/** - * intel_guc_suspend() - notify GuC entering suspend state - * @dev_priv: i915 device private - */ -int intel_guc_suspend(struct drm_i915_private *dev_priv) -{ - struct intel_guc *guc = &dev_priv->guc; - struct i915_gem_context *ctx; - u32 data[3]; - - if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) - return 0; - - gen9_disable_guc_interrupts(dev_priv); - - ctx = dev_priv->kernel_context; - - data[0] = INTEL_GUC_ACTION_ENTER_S_STATE; - /* any value greater than GUC_POWER_D0 */ - data[1] = GUC_POWER_D1; - /* first page is shared data with GuC */ - data[2] = guc_ggtt_offset(ctx->engine[RCS].state); - - return intel_guc_send(guc, data, ARRAY_SIZE(data)); -} - -/** - * intel_guc_resume() - notify GuC resuming from suspend state - * @dev_priv: i915 device private - */ -int intel_guc_resume(struct drm_i915_private *dev_priv) -{ - struct intel_guc *guc = &dev_priv->guc; - struct i915_gem_context *ctx; - u32 data[3]; - - if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) - return 0; - - if (i915.guc_log_level >= 0) - gen9_enable_guc_interrupts(dev_priv); - - ctx = dev_priv->kernel_context; - - data[0] = INTEL_GUC_ACTION_EXIT_S_STATE; - data[1] = GUC_POWER_D0; - /* first page is shared data with GuC */ - data[2] = guc_ggtt_offset(ctx->engine[RCS].state); - - return intel_guc_send(guc, data, ARRAY_SIZE(data)); -} +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_guc.c" +#endif diff --git a/drivers/gpu/drm/i915/intel_guc_submission.h b/drivers/gpu/drm/i915/intel_guc_submission.h new file mode 100644 index 000000000000..fb081cefef93 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc_submission.h @@ -0,0 +1,83 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_GUC_SUBMISSION_H_ +#define _INTEL_GUC_SUBMISSION_H_ + +#include <linux/spinlock.h> + +#include "i915_gem.h" + +struct drm_i915_private; + +/* + * This structure primarily describes the GEM object shared with the GuC. + * The specs sometimes refer to this object as a "GuC context", but we use + * the term "client" to avoid confusion with hardware contexts. This + * GEM object is held for the entire lifetime of our interaction with + * the GuC, being allocated before the GuC is loaded with its firmware. + * Because there's no way to update the address used by the GuC after + * initialisation, the shared object must stay pinned into the GGTT as + * long as the GuC is in use. We also keep the first page (only) mapped + * into kernel address space, as it includes shared data that must be + * updated on every request submission. + * + * The single GEM object described here is actually made up of several + * separate areas, as far as the GuC is concerned. The first page (kept + * kmap'd) includes the "process descriptor" which holds sequence data for + * the doorbell, and one cacheline which actually *is* the doorbell; a + * write to this will "ring the doorbell" (i.e. send an interrupt to the + * GuC). The subsequent pages of the client object constitute the work + * queue (a circular array of work items), again described in the process + * descriptor. Work queue pages are mapped momentarily as required. + */ +struct intel_guc_client { + struct i915_vma *vma; + void *vaddr; + struct i915_gem_context *owner; + struct intel_guc *guc; + + /* bitmap of (host) engine ids */ + u32 engines; + u32 priority; + u32 stage_id; + u32 proc_desc_offset; + + u16 doorbell_id; + unsigned long doorbell_offset; + + /* Protects GuC client's WQ access */ + spinlock_t wq_lock; + /* Per-engine counts of GuC submissions */ + u64 submissions[I915_NUM_ENGINES]; +}; + +int intel_guc_submission_init(struct intel_guc *guc); +int intel_guc_submission_enable(struct intel_guc *guc); +void intel_guc_submission_disable(struct intel_guc *guc); +void intel_guc_submission_fini(struct intel_guc *guc); +int intel_guc_preempt_work_create(struct intel_guc *guc); +void intel_guc_preempt_work_destroy(struct intel_guc *guc); + +#endif diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c index c17ed0e62b67..a2fe7c8d4477 100644 --- a/drivers/gpu/drm/i915/intel_gvt.c +++ b/drivers/gpu/drm/i915/intel_gvt.c @@ -58,7 +58,7 @@ static bool is_supported_device(struct drm_i915_private *dev_priv) */ void intel_gvt_sanitize_options(struct drm_i915_private *dev_priv) { - if (!i915.enable_gvt) + if (!i915_modparams.enable_gvt) return; if (intel_vgpu_active(dev_priv)) { @@ -73,7 +73,7 @@ void intel_gvt_sanitize_options(struct drm_i915_private *dev_priv) return; bail: - i915.enable_gvt = 0; + i915_modparams.enable_gvt = 0; } /** @@ -90,17 +90,12 @@ int intel_gvt_init(struct drm_i915_private *dev_priv) { int ret; - if (!i915.enable_gvt) { + if (!i915_modparams.enable_gvt) { DRM_DEBUG_DRIVER("GVT-g is disabled by kernel params\n"); return 0; } - if (!i915.enable_execlists) { - DRM_ERROR("i915 GVT-g loading failed due to disabled execlists mode\n"); - return -EIO; - } - - if (i915.enable_guc_submission) { + if (USES_GUC_SUBMISSION(dev_priv)) { DRM_ERROR("i915 GVT-g loading failed due to Graphics virtualization is not yet supported with GuC submission\n"); return -EIO; } @@ -123,7 +118,7 @@ int intel_gvt_init(struct drm_i915_private *dev_priv) return 0; bail: - i915.enable_gvt = 0; + i915_modparams.enable_gvt = 0; return 0; } diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index d9d87d96fb69..348a4f7ffb67 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -27,13 +27,9 @@ static bool ipehr_is_semaphore_wait(struct intel_engine_cs *engine, u32 ipehr) { - if (INTEL_GEN(engine->i915) >= 8) { - return (ipehr >> 23) == 0x1c; - } else { - ipehr &= ~MI_SEMAPHORE_SYNC_MASK; - return ipehr == (MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | - MI_SEMAPHORE_REGISTER); - } + ipehr &= ~MI_SEMAPHORE_SYNC_MASK; + return ipehr == (MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | + MI_SEMAPHORE_REGISTER); } static struct intel_engine_cs * @@ -41,31 +37,20 @@ semaphore_wait_to_signaller_ring(struct intel_engine_cs *engine, u32 ipehr, u64 offset) { struct drm_i915_private *dev_priv = engine->i915; + u32 sync_bits = ipehr & MI_SEMAPHORE_SYNC_MASK; struct intel_engine_cs *signaller; enum intel_engine_id id; - if (INTEL_GEN(dev_priv) >= 8) { - for_each_engine(signaller, dev_priv, id) { - if (engine == signaller) - continue; - - if (offset == signaller->semaphore.signal_ggtt[engine->hw_id]) - return signaller; - } - } else { - u32 sync_bits = ipehr & MI_SEMAPHORE_SYNC_MASK; + for_each_engine(signaller, dev_priv, id) { + if (engine == signaller) + continue; - for_each_engine(signaller, dev_priv, id) { - if(engine == signaller) - continue; - - if (sync_bits == signaller->semaphore.mbox.wait[engine->hw_id]) - return signaller; - } + if (sync_bits == signaller->semaphore.mbox.wait[engine->hw_id]) + return signaller; } - DRM_DEBUG_DRIVER("No signaller ring found for %s, ipehr 0x%08x, offset 0x%016llx\n", - engine->name, ipehr, offset); + DRM_DEBUG_DRIVER("No signaller ring found for %s, ipehr 0x%08x\n", + engine->name, ipehr); return ERR_PTR(-ENODEV); } @@ -135,11 +120,6 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno) return NULL; *seqno = ioread32(vaddr + head + 4) + 1; - if (INTEL_GEN(dev_priv) >= 8) { - offset = ioread32(vaddr + head + 12); - offset <<= 32; - offset |= ioread32(vaddr + head + 8); - } return semaphore_wait_to_signaller_ring(engine, ipehr, offset); } @@ -273,7 +253,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) return ENGINE_WAIT_KICK; } - if (INTEL_GEN(dev_priv) >= 6 && tmp & RING_WAIT_SEMAPHORE) { + if (IS_GEN(dev_priv, 6, 7) && tmp & RING_WAIT_SEMAPHORE) { switch (semaphore_passed(engine)) { default: return ENGINE_DEAD; @@ -369,13 +349,18 @@ static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, case ENGINE_ACTIVE_HEAD: case ENGINE_ACTIVE_SUBUNITS: - /* Seqno stuck with still active engine gets leeway, + /* + * Seqno stuck with still active engine gets leeway, * in hopes that it is just a long shader. */ timeout = I915_SEQNO_DEAD_TIMEOUT; break; case ENGINE_DEAD: + if (drm_debug & DRM_UT_DRIVER) { + struct drm_printer p = drm_debug_printer("hangcheck"); + intel_engine_dump(engine, &p, "%s", engine->name); + } break; default: @@ -426,9 +411,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work) struct intel_engine_cs *engine; enum intel_engine_id id; unsigned int hung = 0, stuck = 0; - int busy_count = 0; - if (!i915.enable_hangcheck) + if (!i915_modparams.enable_hangcheck) return; if (!READ_ONCE(dev_priv->gt.awake)) @@ -444,30 +428,26 @@ static void i915_hangcheck_elapsed(struct work_struct *work) intel_uncore_arm_unclaimed_mmio_detection(dev_priv); for_each_engine(engine, dev_priv, id) { - struct intel_engine_hangcheck cur_state, *hc = &cur_state; - const bool busy = intel_engine_has_waiter(engine); + struct intel_engine_hangcheck hc; semaphore_clear_deadlocks(dev_priv); - hangcheck_load_sample(engine, hc); - hangcheck_accumulate_sample(engine, hc); - hangcheck_store_sample(engine, hc); + hangcheck_load_sample(engine, &hc); + hangcheck_accumulate_sample(engine, &hc); + hangcheck_store_sample(engine, &hc); if (engine->hangcheck.stalled) { hung |= intel_engine_flag(engine); - if (hc->action != ENGINE_DEAD) + if (hc.action != ENGINE_DEAD) stuck |= intel_engine_flag(engine); } - - busy_count += busy; } if (hung) hangcheck_declare_hang(dev_priv, hung, stuck); /* Reset timer in case GPU hangs without another request being added */ - if (busy_count) - i915_queue_hangcheck(dev_priv); + i915_queue_hangcheck(dev_priv); } void intel_engine_init_hangcheck(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index e8abea7594ec..179d0ad3889d 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -70,7 +70,7 @@ static struct intel_hdmi *intel_attached_hdmi(struct drm_connector *connector) return enc_to_intel_hdmi(&intel_attached_encoder(connector)->base); } -static u32 g4x_infoframe_index(enum hdmi_infoframe_type type) +static u32 g4x_infoframe_index(unsigned int type) { switch (type) { case HDMI_INFOFRAME_TYPE_AVI: @@ -85,7 +85,7 @@ static u32 g4x_infoframe_index(enum hdmi_infoframe_type type) } } -static u32 g4x_infoframe_enable(enum hdmi_infoframe_type type) +static u32 g4x_infoframe_enable(unsigned int type) { switch (type) { case HDMI_INFOFRAME_TYPE_AVI: @@ -100,9 +100,11 @@ static u32 g4x_infoframe_enable(enum hdmi_infoframe_type type) } } -static u32 hsw_infoframe_enable(enum hdmi_infoframe_type type) +static u32 hsw_infoframe_enable(unsigned int type) { switch (type) { + case DP_SDP_VSC: + return VIDEO_DIP_ENABLE_VSC_HSW; case HDMI_INFOFRAME_TYPE_AVI: return VIDEO_DIP_ENABLE_AVI_HSW; case HDMI_INFOFRAME_TYPE_SPD: @@ -118,10 +120,12 @@ static u32 hsw_infoframe_enable(enum hdmi_infoframe_type type) static i915_reg_t hsw_dip_data_reg(struct drm_i915_private *dev_priv, enum transcoder cpu_transcoder, - enum hdmi_infoframe_type type, + unsigned int type, int i) { switch (type) { + case DP_SDP_VSC: + return HSW_TVIDEO_DIP_VSC_DATA(cpu_transcoder, i); case HDMI_INFOFRAME_TYPE_AVI: return HSW_TVIDEO_DIP_AVI_DATA(cpu_transcoder, i); case HDMI_INFOFRAME_TYPE_SPD: @@ -136,7 +140,7 @@ hsw_dip_data_reg(struct drm_i915_private *dev_priv, static void g4x_write_infoframe(struct drm_encoder *encoder, const struct intel_crtc_state *crtc_state, - enum hdmi_infoframe_type type, + unsigned int type, const void *frame, ssize_t len) { const uint32_t *data = frame; @@ -182,7 +186,7 @@ static bool g4x_infoframe_enabled(struct drm_encoder *encoder, if ((val & VIDEO_DIP_ENABLE) == 0) return false; - if ((val & VIDEO_DIP_PORT_MASK) != VIDEO_DIP_PORT(intel_dig_port->port)) + if ((val & VIDEO_DIP_PORT_MASK) != VIDEO_DIP_PORT(intel_dig_port->base.port)) return false; return val & (VIDEO_DIP_ENABLE_AVI | @@ -191,7 +195,7 @@ static bool g4x_infoframe_enabled(struct drm_encoder *encoder, static void ibx_write_infoframe(struct drm_encoder *encoder, const struct intel_crtc_state *crtc_state, - enum hdmi_infoframe_type type, + unsigned int type, const void *frame, ssize_t len) { const uint32_t *data = frame; @@ -241,7 +245,7 @@ static bool ibx_infoframe_enabled(struct drm_encoder *encoder, if ((val & VIDEO_DIP_ENABLE) == 0) return false; - if ((val & VIDEO_DIP_PORT_MASK) != VIDEO_DIP_PORT(intel_dig_port->port)) + if ((val & VIDEO_DIP_PORT_MASK) != VIDEO_DIP_PORT(intel_dig_port->base.port)) return false; return val & (VIDEO_DIP_ENABLE_AVI | @@ -251,7 +255,7 @@ static bool ibx_infoframe_enabled(struct drm_encoder *encoder, static void cpt_write_infoframe(struct drm_encoder *encoder, const struct intel_crtc_state *crtc_state, - enum hdmi_infoframe_type type, + unsigned int type, const void *frame, ssize_t len) { const uint32_t *data = frame; @@ -309,7 +313,7 @@ static bool cpt_infoframe_enabled(struct drm_encoder *encoder, static void vlv_write_infoframe(struct drm_encoder *encoder, const struct intel_crtc_state *crtc_state, - enum hdmi_infoframe_type type, + unsigned int type, const void *frame, ssize_t len) { const uint32_t *data = frame; @@ -358,7 +362,7 @@ static bool vlv_infoframe_enabled(struct drm_encoder *encoder, if ((val & VIDEO_DIP_ENABLE) == 0) return false; - if ((val & VIDEO_DIP_PORT_MASK) != VIDEO_DIP_PORT(intel_dig_port->port)) + if ((val & VIDEO_DIP_PORT_MASK) != VIDEO_DIP_PORT(intel_dig_port->base.port)) return false; return val & (VIDEO_DIP_ENABLE_AVI | @@ -368,7 +372,7 @@ static bool vlv_infoframe_enabled(struct drm_encoder *encoder, static void hsw_write_infoframe(struct drm_encoder *encoder, const struct intel_crtc_state *crtc_state, - enum hdmi_infoframe_type type, + unsigned int type, const void *frame, ssize_t len) { const uint32_t *data = frame; @@ -377,6 +381,8 @@ static void hsw_write_infoframe(struct drm_encoder *encoder, enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; i915_reg_t ctl_reg = HSW_TVIDEO_DIP_CTL(cpu_transcoder); i915_reg_t data_reg; + int data_size = type == DP_SDP_VSC ? + VIDEO_DIP_VSC_DATA_SIZE : VIDEO_DIP_DATA_SIZE; int i; u32 val = I915_READ(ctl_reg); @@ -392,7 +398,7 @@ static void hsw_write_infoframe(struct drm_encoder *encoder, data++; } /* Write every possible data byte to force correct ECC calculation. */ - for (; i < VIDEO_DIP_DATA_SIZE; i += 4) + for (; i < data_size; i += 4) I915_WRITE(hsw_dip_data_reg(dev_priv, cpu_transcoder, type, i >> 2), 0); mmiowb(); @@ -434,7 +440,7 @@ static void intel_write_infoframe(struct drm_encoder *encoder, const struct intel_crtc_state *crtc_state, union hdmi_infoframe *frame) { - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); + struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); uint8_t buffer[VIDEO_DIP_DATA_SIZE]; ssize_t len; @@ -450,7 +456,7 @@ static void intel_write_infoframe(struct drm_encoder *encoder, buffer[3] = 0; len++; - intel_hdmi->write_infoframe(encoder, crtc_state, frame->any.type, buffer, len); + intel_dig_port->write_infoframe(encoder, crtc_state, frame->any.type, buffer, len); } static void intel_hdmi_set_avi_infoframe(struct drm_encoder *encoder, @@ -481,7 +487,8 @@ static void intel_hdmi_set_avi_infoframe(struct drm_encoder *encoder, crtc_state->limited_color_range ? HDMI_QUANTIZATION_RANGE_LIMITED : HDMI_QUANTIZATION_RANGE_FULL, - intel_hdmi->rgb_quant_range_selectable); + intel_hdmi->rgb_quant_range_selectable, + is_hdmi2_sink); /* TODO: handle pixel repetition for YCBCR420 outputs */ intel_write_infoframe(encoder, crtc_state, &frame); @@ -506,12 +513,14 @@ static void intel_hdmi_set_spd_infoframe(struct drm_encoder *encoder, static void intel_hdmi_set_hdmi_infoframe(struct drm_encoder *encoder, - const struct intel_crtc_state *crtc_state) + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { union hdmi_infoframe frame; int ret; ret = drm_hdmi_vendor_infoframe_from_display_mode(&frame.vendor.hdmi, + conn_state->connector, &crtc_state->base.adjusted_mode); if (ret < 0) return; @@ -529,7 +538,7 @@ static void g4x_set_infoframes(struct drm_encoder *encoder, struct intel_hdmi *intel_hdmi = &intel_dig_port->hdmi; i915_reg_t reg = VIDEO_DIP_CTL; u32 val = I915_READ(reg); - u32 port = VIDEO_DIP_PORT(intel_dig_port->port); + u32 port = VIDEO_DIP_PORT(intel_dig_port->base.port); assert_hdmi_port_disabled(intel_hdmi); @@ -578,7 +587,7 @@ static void g4x_set_infoframes(struct drm_encoder *encoder, intel_hdmi_set_avi_infoframe(encoder, crtc_state); intel_hdmi_set_spd_infoframe(encoder, crtc_state); - intel_hdmi_set_hdmi_infoframe(encoder, crtc_state); + intel_hdmi_set_hdmi_infoframe(encoder, crtc_state, conn_state); } static bool hdmi_sink_is_deep_color(const struct drm_connector_state *conn_state) @@ -680,7 +689,7 @@ static void ibx_set_infoframes(struct drm_encoder *encoder, struct intel_hdmi *intel_hdmi = &intel_dig_port->hdmi; i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe); u32 val = I915_READ(reg); - u32 port = VIDEO_DIP_PORT(intel_dig_port->port); + u32 port = VIDEO_DIP_PORT(intel_dig_port->base.port); assert_hdmi_port_disabled(intel_hdmi); @@ -719,7 +728,7 @@ static void ibx_set_infoframes(struct drm_encoder *encoder, intel_hdmi_set_avi_infoframe(encoder, crtc_state); intel_hdmi_set_spd_infoframe(encoder, crtc_state); - intel_hdmi_set_hdmi_infoframe(encoder, crtc_state); + intel_hdmi_set_hdmi_infoframe(encoder, crtc_state, conn_state); } static void cpt_set_infoframes(struct drm_encoder *encoder, @@ -762,7 +771,7 @@ static void cpt_set_infoframes(struct drm_encoder *encoder, intel_hdmi_set_avi_infoframe(encoder, crtc_state); intel_hdmi_set_spd_infoframe(encoder, crtc_state); - intel_hdmi_set_hdmi_infoframe(encoder, crtc_state); + intel_hdmi_set_hdmi_infoframe(encoder, crtc_state, conn_state); } static void vlv_set_infoframes(struct drm_encoder *encoder, @@ -776,7 +785,7 @@ static void vlv_set_infoframes(struct drm_encoder *encoder, struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); i915_reg_t reg = VLV_TVIDEO_DIP_CTL(intel_crtc->pipe); u32 val = I915_READ(reg); - u32 port = VIDEO_DIP_PORT(intel_dig_port->port); + u32 port = VIDEO_DIP_PORT(intel_dig_port->base.port); assert_hdmi_port_disabled(intel_hdmi); @@ -815,7 +824,7 @@ static void vlv_set_infoframes(struct drm_encoder *encoder, intel_hdmi_set_avi_infoframe(encoder, crtc_state); intel_hdmi_set_spd_infoframe(encoder, crtc_state); - intel_hdmi_set_hdmi_infoframe(encoder, crtc_state); + intel_hdmi_set_hdmi_infoframe(encoder, crtc_state, conn_state); } static void hsw_set_infoframes(struct drm_encoder *encoder, @@ -848,7 +857,7 @@ static void hsw_set_infoframes(struct drm_encoder *encoder, intel_hdmi_set_avi_infoframe(encoder, crtc_state); intel_hdmi_set_spd_infoframe(encoder, crtc_state); - intel_hdmi_set_hdmi_infoframe(encoder, crtc_state); + intel_hdmi_set_hdmi_infoframe(encoder, crtc_state, conn_state); } void intel_dp_dual_mode_set_tmds_output(struct intel_hdmi *hdmi, bool enable) @@ -945,11 +954,14 @@ static void intel_hdmi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_digital_port *intel_dig_port = hdmi_to_dig_port(intel_hdmi); struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); u32 tmp, flags = 0; int dotclock; + pipe_config->output_types |= BIT(INTEL_OUTPUT_HDMI); + tmp = I915_READ(intel_hdmi->hdmi_reg); if (tmp & SDVO_HSYNC_ACTIVE_HIGH) @@ -965,7 +977,7 @@ static void intel_hdmi_get_config(struct intel_encoder *encoder, if (tmp & HDMI_MODE_SELECT_HDMI) pipe_config->has_hdmi_sink = true; - if (intel_hdmi->infoframe_enabled(&encoder->base, pipe_config)) + if (intel_dig_port->infoframe_enabled(&encoder->base, pipe_config)) pipe_config->has_infoframe = true; if (tmp & SDVO_AUDIO_ENABLE) @@ -991,8 +1003,8 @@ static void intel_hdmi_get_config(struct intel_encoder *encoder, } static void intel_enable_hdmi_audio(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); @@ -1003,8 +1015,8 @@ static void intel_enable_hdmi_audio(struct intel_encoder *encoder, } static void g4x_enable_hdmi(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -1025,8 +1037,8 @@ static void g4x_enable_hdmi(struct intel_encoder *encoder, } static void ibx_enable_hdmi(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -1075,8 +1087,8 @@ static void ibx_enable_hdmi(struct intel_encoder *encoder, } static void cpt_enable_hdmi(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -1130,18 +1142,20 @@ static void cpt_enable_hdmi(struct intel_encoder *encoder, } static void vlv_enable_hdmi(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { } static void intel_disable_hdmi(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_digital_port *intel_dig_port = + hdmi_to_dig_port(intel_hdmi); struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); u32 temp; @@ -1184,54 +1198,67 @@ static void intel_disable_hdmi(struct intel_encoder *encoder, intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true); } - intel_hdmi->set_infoframes(&encoder->base, false, old_crtc_state, old_conn_state); + intel_dig_port->set_infoframes(&encoder->base, false, + old_crtc_state, old_conn_state); intel_dp_dual_mode_set_tmds_output(intel_hdmi, false); } static void g4x_disable_hdmi(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { if (old_crtc_state->has_audio) - intel_audio_codec_disable(encoder); + intel_audio_codec_disable(encoder, + old_crtc_state, old_conn_state); intel_disable_hdmi(encoder, old_crtc_state, old_conn_state); } static void pch_disable_hdmi(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { if (old_crtc_state->has_audio) - intel_audio_codec_disable(encoder); + intel_audio_codec_disable(encoder, + old_crtc_state, old_conn_state); } static void pch_post_disable_hdmi(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { intel_disable_hdmi(encoder, old_crtc_state, old_conn_state); } -static int intel_hdmi_source_max_tmds_clock(struct drm_i915_private *dev_priv) +static int intel_hdmi_source_max_tmds_clock(struct intel_encoder *encoder) { - if (IS_G4X(dev_priv)) - return 165000; - else if (IS_GEMINILAKE(dev_priv)) - return 594000; - else if (IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8) - return 300000; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + const struct ddi_vbt_port_info *info = + &dev_priv->vbt.ddi_port_info[encoder->port]; + int max_tmds_clock; + + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + max_tmds_clock = 594000; + else if (INTEL_GEN(dev_priv) >= 8 || IS_HASWELL(dev_priv)) + max_tmds_clock = 300000; + else if (INTEL_GEN(dev_priv) >= 5) + max_tmds_clock = 225000; else - return 225000; + max_tmds_clock = 165000; + + if (info->max_tmds_clock) + max_tmds_clock = min(max_tmds_clock, info->max_tmds_clock); + + return max_tmds_clock; } static int hdmi_port_clock_limit(struct intel_hdmi *hdmi, bool respect_downstream_limits, bool force_dvi) { - struct drm_device *dev = intel_hdmi_to_dev(hdmi); - int max_tmds_clock = intel_hdmi_source_max_tmds_clock(to_i915(dev)); + struct intel_encoder *encoder = &hdmi_to_dig_port(hdmi)->base; + int max_tmds_clock = intel_hdmi_source_max_tmds_clock(encoder); if (respect_downstream_limits) { struct intel_connector *connector = hdmi->attached_connector; @@ -1314,7 +1341,7 @@ intel_hdmi_mode_valid(struct drm_connector *connector, return status; } -static bool hdmi_12bpc_possible(struct intel_crtc_state *crtc_state) +static bool hdmi_12bpc_possible(const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); @@ -1326,6 +1353,12 @@ static bool hdmi_12bpc_possible(struct intel_crtc_state *crtc_state) if (HAS_GMCH_DISPLAY(dev_priv)) return false; + if (crtc_state->pipe_bpp <= 8*3) + return false; + + if (!crtc_state->has_hdmi_sink) + return false; + /* * HDMI 12bpc affects the clocks, so it's only possible * when not cloning with other encoder types. @@ -1350,7 +1383,7 @@ static bool hdmi_12bpc_possible(struct intel_crtc_state *crtc_state) } } - /* Display Wa #1139 */ + /* Display WA #1139: glk */ if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1) && crtc_state->base.adjusted_mode.htotal > 5460) return false; @@ -1451,9 +1484,8 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, * outputs. We also need to check that the higher clock still fits * within limits. */ - if (pipe_config->pipe_bpp > 8*3 && pipe_config->has_hdmi_sink && !force_dvi && - hdmi_port_clock_valid(intel_hdmi, clock_12bpc, true, force_dvi) == MODE_OK && - hdmi_12bpc_possible(pipe_config)) { + if (hdmi_12bpc_possible(pipe_config) && + hdmi_port_clock_valid(intel_hdmi, clock_12bpc, true, force_dvi) == MODE_OK) { DRM_DEBUG_KMS("picking bpc to 12 for HDMI output\n"); desired_bpp = 12*3; @@ -1482,7 +1514,8 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, pipe_config->lane_count = 4; - if (scdc->scrambling.supported && IS_GEMINILAKE(dev_priv)) { + if (scdc->scrambling.supported && (INTEL_GEN(dev_priv) >= 10 || + IS_GEMINILAKE(dev_priv))) { if (scdc->scrambling.low_rates) pipe_config->hdmi_scrambling = true; @@ -1516,7 +1549,7 @@ intel_hdmi_dp_dual_mode_detect(struct drm_connector *connector, bool has_edid) { struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_hdmi *hdmi = intel_attached_hdmi(connector); - enum port port = hdmi_to_dig_port(hdmi)->port; + enum port port = hdmi_to_dig_port(hdmi)->base.port; struct i2c_adapter *adapter = intel_gmbus_get_adapter(dev_priv, hdmi->ddc_bus); enum drm_dp_dual_mode_type type = drm_dp_dual_mode_detect(adapter); @@ -1562,12 +1595,20 @@ intel_hdmi_set_edid(struct drm_connector *connector) struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector); struct edid *edid; bool connected = false; + struct i2c_adapter *i2c; intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); - edid = drm_get_edid(connector, - intel_gmbus_get_adapter(dev_priv, - intel_hdmi->ddc_bus)); + i2c = intel_gmbus_get_adapter(dev_priv, intel_hdmi->ddc_bus); + + edid = drm_get_edid(connector, i2c); + + if (!edid && !intel_gmbus_is_forced_bit(i2c)) { + DRM_DEBUG_KMS("HDMI GMBUS EDID read failed, retry using GPIO bit-banging\n"); + intel_gmbus_force_bit(i2c, true); + edid = drm_get_edid(connector, i2c); + intel_gmbus_force_bit(i2c, false); + } intel_hdmi_dp_dual_mode_detect(connector, edid != NULL); @@ -1600,12 +1641,9 @@ intel_hdmi_detect(struct drm_connector *connector, bool force) intel_hdmi_unset_edid(connector); - if (intel_hdmi_set_edid(connector)) { - struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector); - - hdmi_to_dig_port(intel_hdmi)->base.type = INTEL_OUTPUT_HDMI; + if (intel_hdmi_set_edid(connector)) status = connector_status_connected; - } else + else status = connector_status_disconnected; intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS); @@ -1616,8 +1654,6 @@ intel_hdmi_detect(struct drm_connector *connector, bool force) static void intel_hdmi_force(struct drm_connector *connector) { - struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector); - DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); @@ -1627,7 +1663,6 @@ intel_hdmi_force(struct drm_connector *connector) return; intel_hdmi_set_edid(connector); - hdmi_to_dig_port(intel_hdmi)->base.type = INTEL_OUTPUT_HDMI; } static int intel_hdmi_get_modes(struct drm_connector *connector) @@ -1642,36 +1677,35 @@ static int intel_hdmi_get_modes(struct drm_connector *connector) } static void intel_hdmi_pre_enable(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + struct intel_digital_port *intel_dig_port = + enc_to_dig_port(&encoder->base); intel_hdmi_prepare(encoder, pipe_config); - intel_hdmi->set_infoframes(&encoder->base, - pipe_config->has_hdmi_sink, - pipe_config, conn_state); + intel_dig_port->set_infoframes(&encoder->base, + pipe_config->has_infoframe, + pipe_config, conn_state); } static void vlv_hdmi_pre_enable(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); - struct intel_hdmi *intel_hdmi = &dport->hdmi; - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - vlv_phy_pre_encoder_enable(encoder); + vlv_phy_pre_encoder_enable(encoder, pipe_config); /* HDMI 1.0V-2dB */ vlv_set_phy_signal_level(encoder, 0x2b245f5f, 0x00002000, 0x5578b83a, 0x2b247878); - intel_hdmi->set_infoframes(&encoder->base, - pipe_config->has_hdmi_sink, - pipe_config, conn_state); + dport->set_infoframes(&encoder->base, + pipe_config->has_infoframe, + pipe_config, conn_state); g4x_enable_hdmi(encoder, pipe_config, conn_state); @@ -1679,41 +1713,41 @@ static void vlv_hdmi_pre_enable(struct intel_encoder *encoder, } static void vlv_hdmi_pre_pll_enable(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { intel_hdmi_prepare(encoder, pipe_config); - vlv_phy_pre_pll_enable(encoder); + vlv_phy_pre_pll_enable(encoder, pipe_config); } static void chv_hdmi_pre_pll_enable(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { intel_hdmi_prepare(encoder, pipe_config); - chv_phy_pre_pll_enable(encoder); + chv_phy_pre_pll_enable(encoder, pipe_config); } static void chv_hdmi_post_pll_disable(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { - chv_phy_post_pll_disable(encoder); + chv_phy_post_pll_disable(encoder, old_crtc_state); } static void vlv_hdmi_post_disable(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { /* Reset lanes to avoid HDMI flicker (VLV w/a) */ - vlv_phy_reset_lanes(encoder); + vlv_phy_reset_lanes(encoder, old_crtc_state); } static void chv_hdmi_post_disable(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -1721,29 +1755,28 @@ static void chv_hdmi_post_disable(struct intel_encoder *encoder, mutex_lock(&dev_priv->sb_lock); /* Assert data lane reset */ - chv_data_lane_soft_reset(encoder, true); + chv_data_lane_soft_reset(encoder, old_crtc_state, true); mutex_unlock(&dev_priv->sb_lock); } static void chv_hdmi_pre_enable(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); - struct intel_hdmi *intel_hdmi = &dport->hdmi; struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - chv_phy_pre_encoder_enable(encoder); + chv_phy_pre_encoder_enable(encoder, pipe_config); /* FIXME: Program the support xxx V-dB */ /* Use 800mV-0dB */ chv_set_phy_signal_level(encoder, 128, 102, false); - intel_hdmi->set_infoframes(&encoder->base, - pipe_config->has_hdmi_sink, - pipe_config, conn_state); + dport->set_infoframes(&encoder->base, + pipe_config->has_infoframe, + pipe_config, conn_state); g4x_enable_hdmi(encoder, pipe_config, conn_state); @@ -1958,6 +1991,34 @@ static u8 intel_hdmi_ddc_pin(struct drm_i915_private *dev_priv, return ddc_pin; } +void intel_infoframe_init(struct intel_digital_port *intel_dig_port) +{ + struct drm_i915_private *dev_priv = + to_i915(intel_dig_port->base.base.dev); + + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { + intel_dig_port->write_infoframe = vlv_write_infoframe; + intel_dig_port->set_infoframes = vlv_set_infoframes; + intel_dig_port->infoframe_enabled = vlv_infoframe_enabled; + } else if (IS_G4X(dev_priv)) { + intel_dig_port->write_infoframe = g4x_write_infoframe; + intel_dig_port->set_infoframes = g4x_set_infoframes; + intel_dig_port->infoframe_enabled = g4x_infoframe_enabled; + } else if (HAS_DDI(dev_priv)) { + intel_dig_port->write_infoframe = hsw_write_infoframe; + intel_dig_port->set_infoframes = hsw_set_infoframes; + intel_dig_port->infoframe_enabled = hsw_infoframe_enabled; + } else if (HAS_PCH_IBX(dev_priv)) { + intel_dig_port->write_infoframe = ibx_write_infoframe; + intel_dig_port->set_infoframes = ibx_set_infoframes; + intel_dig_port->infoframe_enabled = ibx_infoframe_enabled; + } else { + intel_dig_port->write_infoframe = cpt_write_infoframe; + intel_dig_port->set_infoframes = cpt_set_infoframes; + intel_dig_port->infoframe_enabled = cpt_infoframe_enabled; + } +} + void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, struct intel_connector *intel_connector) { @@ -1966,7 +2027,7 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_device *dev = intel_encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum port port = intel_dig_port->port; + enum port port = intel_encoder->port; DRM_DEBUG_KMS("Adding HDMI connector on port %c\n", port_name(port)); @@ -1984,7 +2045,7 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, connector->doublescan_allowed = 0; connector->stereo_allowed = 1; - if (IS_GEMINILAKE(dev_priv)) + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) connector->ycbcr_420_allowed = true; intel_hdmi->ddc_bus = intel_hdmi_ddc_pin(dev_priv, port); @@ -1993,28 +2054,6 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, return; intel_encoder->hpd_pin = intel_hpd_pin(port); - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - intel_hdmi->write_infoframe = vlv_write_infoframe; - intel_hdmi->set_infoframes = vlv_set_infoframes; - intel_hdmi->infoframe_enabled = vlv_infoframe_enabled; - } else if (IS_G4X(dev_priv)) { - intel_hdmi->write_infoframe = g4x_write_infoframe; - intel_hdmi->set_infoframes = g4x_set_infoframes; - intel_hdmi->infoframe_enabled = g4x_infoframe_enabled; - } else if (HAS_DDI(dev_priv)) { - intel_hdmi->write_infoframe = hsw_write_infoframe; - intel_hdmi->set_infoframes = hsw_set_infoframes; - intel_hdmi->infoframe_enabled = hsw_infoframe_enabled; - } else if (HAS_PCH_IBX(dev_priv)) { - intel_hdmi->write_infoframe = ibx_write_infoframe; - intel_hdmi->set_infoframes = ibx_set_infoframes; - intel_hdmi->infoframe_enabled = ibx_infoframe_enabled; - } else { - intel_hdmi->write_infoframe = cpt_write_infoframe; - intel_hdmi->set_infoframes = cpt_set_infoframes; - intel_hdmi->infoframe_enabled = cpt_infoframe_enabled; - } - if (HAS_DDI(dev_priv)) intel_connector->get_hw_state = intel_ddi_connector_get_hw_state; else @@ -2108,10 +2147,11 @@ void intel_hdmi_init(struct drm_i915_private *dev_priv, if (IS_G4X(dev_priv)) intel_encoder->cloneable |= 1 << INTEL_OUTPUT_HDMI; - intel_dig_port->port = port; intel_dig_port->hdmi.hdmi_reg = hdmi_reg; intel_dig_port->dp.output_reg = INVALID_MMIO_REG; intel_dig_port->max_lanes = 4; + intel_infoframe_init(intel_dig_port); + intel_hdmi_init_connector(intel_dig_port, intel_connector); } diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 6145fa0d6773..8ed05182f944 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -21,9 +21,11 @@ * IN THE SOFTWARE. * */ -#include <linux/firmware.h> + +#include <linux/types.h> + +#include "intel_huc.h" #include "i915_drv.h" -#include "intel_uc.h" /** * DOC: HuC Firmware @@ -52,10 +54,6 @@ #define KBL_HUC_FW_MINOR 00 #define KBL_BLD_NUM 1810 -#define GLK_HUC_FW_MAJOR 02 -#define GLK_HUC_FW_MINOR 00 -#define GLK_BLD_NUM 1748 - #define HUC_FW_PATH(platform, major, minor, bld_num) \ "i915/" __stringify(platform) "_huc_ver" __stringify(major) "_" \ __stringify(minor) "_" __stringify(bld_num) ".bin" @@ -72,8 +70,51 @@ MODULE_FIRMWARE(I915_BXT_HUC_UCODE); KBL_HUC_FW_MINOR, KBL_BLD_NUM) MODULE_FIRMWARE(I915_KBL_HUC_UCODE); -#define I915_GLK_HUC_UCODE HUC_FW_PATH(glk, GLK_HUC_FW_MAJOR, \ - GLK_HUC_FW_MINOR, GLK_BLD_NUM) +static void huc_fw_select(struct intel_uc_fw *huc_fw) +{ + struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw); + struct drm_i915_private *dev_priv = huc_to_i915(huc); + + GEM_BUG_ON(huc_fw->type != INTEL_UC_FW_TYPE_HUC); + + if (!HAS_HUC(dev_priv)) + return; + + if (i915_modparams.huc_firmware_path) { + huc_fw->path = i915_modparams.huc_firmware_path; + huc_fw->major_ver_wanted = 0; + huc_fw->minor_ver_wanted = 0; + } else if (IS_SKYLAKE(dev_priv)) { + huc_fw->path = I915_SKL_HUC_UCODE; + huc_fw->major_ver_wanted = SKL_HUC_FW_MAJOR; + huc_fw->minor_ver_wanted = SKL_HUC_FW_MINOR; + } else if (IS_BROXTON(dev_priv)) { + huc_fw->path = I915_BXT_HUC_UCODE; + huc_fw->major_ver_wanted = BXT_HUC_FW_MAJOR; + huc_fw->minor_ver_wanted = BXT_HUC_FW_MINOR; + } else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { + huc_fw->path = I915_KBL_HUC_UCODE; + huc_fw->major_ver_wanted = KBL_HUC_FW_MAJOR; + huc_fw->minor_ver_wanted = KBL_HUC_FW_MINOR; + } else { + DRM_WARN("%s: No firmware known for this platform!\n", + intel_uc_fw_type_repr(huc_fw->type)); + } +} + +/** + * intel_huc_init_early() - initializes HuC struct + * @huc: intel_huc struct + * + * On platforms with HuC selects firmware for uploading + */ +void intel_huc_init_early(struct intel_huc *huc) +{ + struct intel_uc_fw *huc_fw = &huc->fw; + + intel_uc_fw_init(huc_fw, INTEL_UC_FW_TYPE_HUC); + huc_fw_select(huc_fw); +} /** * huc_ucode_xfer() - DMA's the firmware @@ -83,26 +124,15 @@ MODULE_FIRMWARE(I915_KBL_HUC_UCODE); * * Return: 0 on success, non-zero on failure */ -static int huc_ucode_xfer(struct drm_i915_private *dev_priv) +static int huc_ucode_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma) { - struct intel_uc_fw *huc_fw = &dev_priv->huc.fw; - struct i915_vma *vma; + struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw); + struct drm_i915_private *dev_priv = huc_to_i915(huc); unsigned long offset = 0; u32 size; int ret; - ret = i915_gem_object_set_to_gtt_domain(huc_fw->obj, false); - if (ret) { - DRM_DEBUG_DRIVER("set-domain failed %d\n", ret); - return ret; - } - - vma = i915_gem_object_ggtt_pin(huc_fw->obj, NULL, 0, 0, - PIN_OFFSET_BIAS | GUC_WOPCM_TOP); - if (IS_ERR(vma)) { - DRM_DEBUG_DRIVER("pin failed %d\n", (int)PTR_ERR(vma)); - return PTR_ERR(vma); - } + GEM_BUG_ON(huc_fw->type != INTEL_UC_FW_TYPE_HUC); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); @@ -124,7 +154,7 @@ static int huc_ucode_xfer(struct drm_i915_private *dev_priv) I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(HUC_UKERNEL | START_DMA)); /* Wait for DMA to finish */ - ret = wait_for((I915_READ(DMA_CTRL) & START_DMA) == 0, 100); + ret = intel_wait_for_register_fw(dev_priv, DMA_CTRL, START_DMA, 0, 100); DRM_DEBUG_DRIVER("HuC DMA transfer wait over with ret %d\n", ret); @@ -133,140 +163,68 @@ static int huc_ucode_xfer(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - /* - * We keep the object pages for reuse during resume. But we can unpin it - * now that DMA has completed, so it doesn't continue to take up space. - */ - i915_vma_unpin(vma); - return ret; } /** - * intel_huc_select_fw() - selects HuC firmware for loading - * @huc: intel_huc struct - */ -void intel_huc_select_fw(struct intel_huc *huc) -{ - struct drm_i915_private *dev_priv = huc_to_i915(huc); - - huc->fw.path = NULL; - huc->fw.fetch_status = INTEL_UC_FIRMWARE_NONE; - huc->fw.load_status = INTEL_UC_FIRMWARE_NONE; - huc->fw.type = INTEL_UC_FW_TYPE_HUC; - - if (i915.huc_firmware_path) { - huc->fw.path = i915.huc_firmware_path; - huc->fw.major_ver_wanted = 0; - huc->fw.minor_ver_wanted = 0; - } else if (IS_SKYLAKE(dev_priv)) { - huc->fw.path = I915_SKL_HUC_UCODE; - huc->fw.major_ver_wanted = SKL_HUC_FW_MAJOR; - huc->fw.minor_ver_wanted = SKL_HUC_FW_MINOR; - } else if (IS_BROXTON(dev_priv)) { - huc->fw.path = I915_BXT_HUC_UCODE; - huc->fw.major_ver_wanted = BXT_HUC_FW_MAJOR; - huc->fw.minor_ver_wanted = BXT_HUC_FW_MINOR; - } else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { - huc->fw.path = I915_KBL_HUC_UCODE; - huc->fw.major_ver_wanted = KBL_HUC_FW_MAJOR; - huc->fw.minor_ver_wanted = KBL_HUC_FW_MINOR; - } else if (IS_GEMINILAKE(dev_priv)) { - huc->fw.path = I915_GLK_HUC_UCODE; - huc->fw.major_ver_wanted = GLK_HUC_FW_MAJOR; - huc->fw.minor_ver_wanted = GLK_HUC_FW_MINOR; - } else { - DRM_ERROR("No HuC firmware known for platform with HuC!\n"); - return; - } -} - -/** * intel_huc_init_hw() - load HuC uCode to device * @huc: intel_huc structure * - * Called from guc_setup() during driver loading and also after a GPU reset. - * Be note that HuC loading must be done before GuC loading. + * Called from intel_uc_init_hw() during driver loading and also after a GPU + * reset. Be note that HuC loading must be done before GuC loading. * * The firmware image should have already been fetched into memory by the - * earlier call to intel_huc_init(), so here we need only check that + * earlier call to intel_uc_init_fw(), so here we need only check that * is succeeded, and then transfer the image to the h/w. * */ -void intel_huc_init_hw(struct intel_huc *huc) +int intel_huc_init_hw(struct intel_huc *huc) { - struct drm_i915_private *dev_priv = huc_to_i915(huc); - int err; - - DRM_DEBUG_DRIVER("%s fw status: fetch %s, load %s\n", - huc->fw.path, - intel_uc_fw_status_repr(huc->fw.fetch_status), - intel_uc_fw_status_repr(huc->fw.load_status)); - - if (huc->fw.fetch_status != INTEL_UC_FIRMWARE_SUCCESS) - return; - - huc->fw.load_status = INTEL_UC_FIRMWARE_PENDING; - - err = huc_ucode_xfer(dev_priv); - - huc->fw.load_status = err ? - INTEL_UC_FIRMWARE_FAIL : INTEL_UC_FIRMWARE_SUCCESS; - - DRM_DEBUG_DRIVER("%s fw status: fetch %s, load %s\n", - huc->fw.path, - intel_uc_fw_status_repr(huc->fw.fetch_status), - intel_uc_fw_status_repr(huc->fw.load_status)); - - if (huc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) - DRM_ERROR("Failed to complete HuC uCode load with ret %d\n", err); - - return; + return intel_uc_fw_upload(&huc->fw, huc_ucode_xfer); } /** - * intel_guc_auth_huc() - authenticate ucode - * @dev_priv: the drm_i915_device + * intel_huc_auth() - Authenticate HuC uCode + * @huc: intel_huc structure * - * Triggers a HuC fw authentication request to the GuC via intel_guc_action_ - * authenticate_huc interface. + * Called after HuC and GuC firmware loading during intel_uc_init_hw(). + * + * This function pins HuC firmware image object into GGTT. + * Then it invokes GuC action to authenticate passing the offset to RSA + * signature through intel_guc_auth_huc(). It then waits for 50ms for + * firmware verification ACK and unpins the object. */ -void intel_guc_auth_huc(struct drm_i915_private *dev_priv) +int intel_huc_auth(struct intel_huc *huc) { - struct intel_guc *guc = &dev_priv->guc; - struct intel_huc *huc = &dev_priv->huc; + struct drm_i915_private *i915 = huc_to_i915(huc); + struct intel_guc *guc = &i915->guc; struct i915_vma *vma; int ret; - u32 data[2]; if (huc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) - return; + return -ENOEXEC; vma = i915_gem_object_ggtt_pin(huc->fw.obj, NULL, 0, 0, PIN_OFFSET_BIAS | GUC_WOPCM_TOP); if (IS_ERR(vma)) { - DRM_ERROR("failed to pin huc fw object %d\n", - (int)PTR_ERR(vma)); - return; + ret = PTR_ERR(vma); + DRM_ERROR("HuC: Failed to pin huc fw object %d\n", ret); + return ret; } - /* Specify auth action and where public signature is. */ - data[0] = INTEL_GUC_ACTION_AUTHENTICATE_HUC; - data[1] = guc_ggtt_offset(vma) + huc->fw.rsa_offset; - - ret = intel_guc_send(guc, data, ARRAY_SIZE(data)); + ret = intel_guc_auth_huc(guc, + guc_ggtt_offset(vma) + huc->fw.rsa_offset); if (ret) { DRM_ERROR("HuC: GuC did not ack Auth request %d\n", ret); goto out; } /* Check authentication status, it should be done by now */ - ret = intel_wait_for_register(dev_priv, - HUC_STATUS2, - HUC_FW_VERIFIED, - HUC_FW_VERIFIED, - 50); - + ret = intel_wait_for_register(i915, + HUC_STATUS2, + HUC_FW_VERIFIED, + HUC_FW_VERIFIED, + 50); if (ret) { DRM_ERROR("HuC: Authentication failed %d\n", ret); goto out; @@ -274,5 +232,5 @@ void intel_guc_auth_huc(struct drm_i915_private *dev_priv) out: i915_vma_unpin(vma); + return ret; } - diff --git a/drivers/gpu/drm/i915/intel_huc.h b/drivers/gpu/drm/i915/intel_huc.h new file mode 100644 index 000000000000..40039db59e04 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_huc.h @@ -0,0 +1,41 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_HUC_H_ +#define _INTEL_HUC_H_ + +#include "intel_uc_fw.h" + +struct intel_huc { + /* Generic uC firmware management */ + struct intel_uc_fw fw; + + /* HuC-specific additions */ +}; + +void intel_huc_init_early(struct intel_huc *huc); +int intel_huc_init_hw(struct intel_huc *huc); +int intel_huc_auth(struct intel_huc *huc); + +#endif diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index eb5827110d8f..ef9f91a0b0c9 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -128,22 +128,46 @@ intel_i2c_reset(struct drm_i915_private *dev_priv) I915_WRITE(GMBUS4, 0); } -static void intel_i2c_quirk_set(struct drm_i915_private *dev_priv, bool enable) +static void pnv_gmbus_clock_gating(struct drm_i915_private *dev_priv, + bool enable) { u32 val; /* When using bit bashing for I2C, this bit needs to be set to 1 */ - if (!IS_PINEVIEW(dev_priv)) - return; - val = I915_READ(DSPCLK_GATE_D); - if (enable) - val |= DPCUNIT_CLOCK_GATE_DISABLE; + if (!enable) + val |= PNV_GMBUSUNIT_CLOCK_GATE_DISABLE; else - val &= ~DPCUNIT_CLOCK_GATE_DISABLE; + val &= ~PNV_GMBUSUNIT_CLOCK_GATE_DISABLE; I915_WRITE(DSPCLK_GATE_D, val); } +static void pch_gmbus_clock_gating(struct drm_i915_private *dev_priv, + bool enable) +{ + u32 val; + + val = I915_READ(SOUTH_DSPCLK_GATE_D); + if (!enable) + val |= PCH_GMBUSUNIT_CLOCK_GATE_DISABLE; + else + val &= ~PCH_GMBUSUNIT_CLOCK_GATE_DISABLE; + I915_WRITE(SOUTH_DSPCLK_GATE_D, val); +} + +static void bxt_gmbus_clock_gating(struct drm_i915_private *dev_priv, + bool enable) +{ + u32 val; + + val = I915_READ(GEN9_CLKGATE_DIS_4); + if (!enable) + val |= BXT_GMBUS_GATING_DIS; + else + val &= ~BXT_GMBUS_GATING_DIS; + I915_WRITE(GEN9_CLKGATE_DIS_4, val); +} + static u32 get_reserved(struct intel_gmbus *bus) { struct drm_i915_private *dev_priv = bus->dev_priv; @@ -221,7 +245,10 @@ intel_gpio_pre_xfer(struct i2c_adapter *adapter) struct drm_i915_private *dev_priv = bus->dev_priv; intel_i2c_reset(dev_priv); - intel_i2c_quirk_set(dev_priv, true); + + if (IS_PINEVIEW(dev_priv)) + pnv_gmbus_clock_gating(dev_priv, false); + set_data(bus, 1); set_clock(bus, 1); udelay(I2C_RISEFALL_TIME); @@ -238,7 +265,9 @@ intel_gpio_post_xfer(struct i2c_adapter *adapter) set_data(bus, 1); set_clock(bus, 1); - intel_i2c_quirk_set(dev_priv, false); + + if (IS_PINEVIEW(dev_priv)) + pnv_gmbus_clock_gating(dev_priv, true); } static void @@ -438,7 +467,9 @@ static bool gmbus_is_index_read(struct i2c_msg *msgs, int i, int num) { return (i + 1 < num && - !(msgs[i].flags & I2C_M_RD) && msgs[i].len <= 2 && + msgs[i].addr == msgs[i + 1].addr && + !(msgs[i].flags & I2C_M_RD) && + (msgs[i].len == 1 || msgs[i].len == 2) && (msgs[i + 1].flags & I2C_M_RD)); } @@ -479,6 +510,13 @@ do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num) int i = 0, inc, try = 0; int ret = 0; + /* Display WA #0868: skl,bxt,kbl,cfl,glk,cnl */ + if (IS_GEN9_LP(dev_priv)) + bxt_gmbus_clock_gating(dev_priv, false); + else if (HAS_PCH_SPT(dev_priv) || + HAS_PCH_KBP(dev_priv) || HAS_PCH_CNP(dev_priv)) + pch_gmbus_clock_gating(dev_priv, false); + retry: I915_WRITE_FW(GMBUS0, bus->reg0); @@ -580,6 +618,13 @@ timeout: ret = -EAGAIN; out: + /* Display WA #0868: skl,bxt,kbl,cfl,glk,cnl */ + if (IS_GEN9_LP(dev_priv)) + bxt_gmbus_clock_gating(dev_priv, true); + else if (HAS_PCH_SPT(dev_priv) || + HAS_PCH_KBP(dev_priv) || HAS_PCH_CNP(dev_priv)) + pch_gmbus_clock_gating(dev_priv, true); + return ret; } diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index 3bf65288ffff..5809b29044fc 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -193,7 +193,7 @@ static bool lpe_audio_detect(struct drm_i915_private *dev_priv) }; if (!pci_dev_present(atom_hdaudio_ids)) { - DRM_INFO("%s\n", "HDaudio controller not detected, using LPE audio instead\n"); + DRM_INFO("HDaudio controller not detected, using LPE audio instead\n"); lpe_present = true; } } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 6f972e6ec663..7ece2f061b9e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -136,6 +136,7 @@ #include <drm/drmP.h> #include <drm/i915_drm.h> #include "i915_drv.h" +#include "i915_gem_render_state.h" #include "intel_mocs.h" #define RING_EXECLIST_QFULL (1 << 0x2) @@ -153,9 +154,7 @@ #define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15) #define GEN8_CTX_STATUS_COMPLETED_MASK \ - (GEN8_CTX_STATUS_ACTIVE_IDLE | \ - GEN8_CTX_STATUS_PREEMPTED | \ - GEN8_CTX_STATUS_ELEMENT_SWITCH) + (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED) #define CTX_LRI_HEADER_0 0x01 #define CTX_CONTEXT_CONTROL 0x02 @@ -208,8 +207,9 @@ /* Typical size of the average request (2 pipecontrols and a MI_BB) */ #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ - #define WA_TAIL_DWORDS 2 +#define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS) +#define PREEMPT_ID 0x1 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine); @@ -219,38 +219,6 @@ static void execlists_init_reg_state(u32 *reg_state, struct intel_ring *ring); /** - * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists - * @dev_priv: i915 device private - * @enable_execlists: value of i915.enable_execlists module parameter. - * - * Only certain platforms support Execlists (the prerequisites being - * support for Logical Ring Contexts and Aliasing PPGTT or better). - * - * Return: 1 if Execlists is supported and has to be enabled. - */ -int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enable_execlists) -{ - /* On platforms with execlist available, vGPU will only - * support execlist mode, no ring buffer mode. - */ - if (HAS_LOGICAL_RING_CONTEXTS(dev_priv) && intel_vgpu_active(dev_priv)) - return 1; - - if (INTEL_GEN(dev_priv) >= 9) - return 1; - - if (enable_execlists == 0) - return 0; - - if (HAS_LOGICAL_RING_CONTEXTS(dev_priv) && - USES_PPGTT(dev_priv) && - i915.use_mmio_flip >= 0) - return 1; - - return 0; -} - -/** * intel_lr_context_descriptor_update() - calculate & cache the descriptor * descriptor for a pinned context * @ctx: Context to work on @@ -279,17 +247,121 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx, BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (1<<GEN8_CTX_ID_WIDTH)); desc = ctx->desc_template; /* bits 0-11 */ - desc |= i915_ggtt_offset(ce->state) + LRC_PPHWSP_PN * PAGE_SIZE; + desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE; /* bits 12-31 */ desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ ce->lrc_desc = desc; } -uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) +static struct i915_priolist * +lookup_priolist(struct intel_engine_cs *engine, + struct i915_priotree *pt, + int prio) { - return ctx->engine[engine->id].lrc_desc; + struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_priolist *p; + struct rb_node **parent, *rb; + bool first = true; + + if (unlikely(execlists->no_priolist)) + prio = I915_PRIORITY_NORMAL; + +find_priolist: + /* most positive priority is scheduled first, equal priorities fifo */ + rb = NULL; + parent = &execlists->queue.rb_node; + while (*parent) { + rb = *parent; + p = rb_entry(rb, typeof(*p), node); + if (prio > p->priority) { + parent = &rb->rb_left; + } else if (prio < p->priority) { + parent = &rb->rb_right; + first = false; + } else { + return p; + } + } + + if (prio == I915_PRIORITY_NORMAL) { + p = &execlists->default_priolist; + } else { + p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC); + /* Convert an allocation failure to a priority bump */ + if (unlikely(!p)) { + prio = I915_PRIORITY_NORMAL; /* recurses just once */ + + /* To maintain ordering with all rendering, after an + * allocation failure we have to disable all scheduling. + * Requests will then be executed in fifo, and schedule + * will ensure that dependencies are emitted in fifo. + * There will be still some reordering with existing + * requests, so if userspace lied about their + * dependencies that reordering may be visible. + */ + execlists->no_priolist = true; + goto find_priolist; + } + } + + p->priority = prio; + INIT_LIST_HEAD(&p->requests); + rb_link_node(&p->node, rb, parent); + rb_insert_color(&p->node, &execlists->queue); + + if (first) + execlists->first = &p->node; + + return ptr_pack_bits(p, first, 1); +} + +static void unwind_wa_tail(struct drm_i915_gem_request *rq) +{ + rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES); + assert_ring_tail_valid(rq->ring, rq->tail); +} + +static void __unwind_incomplete_requests(struct intel_engine_cs *engine) +{ + struct drm_i915_gem_request *rq, *rn; + struct i915_priolist *uninitialized_var(p); + int last_prio = I915_PRIORITY_INVALID; + + lockdep_assert_held(&engine->timeline->lock); + + list_for_each_entry_safe_reverse(rq, rn, + &engine->timeline->requests, + link) { + if (i915_gem_request_completed(rq)) + return; + + __i915_gem_request_unsubmit(rq); + unwind_wa_tail(rq); + + GEM_BUG_ON(rq->priotree.priority == I915_PRIORITY_INVALID); + if (rq->priotree.priority != last_prio) { + p = lookup_priolist(engine, + &rq->priotree, + rq->priotree.priority); + p = ptr_mask_bits(p, 1); + + last_prio = rq->priotree.priority; + } + + list_add(&rq->priotree.link, &p->requests); + } +} + +void +execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) +{ + struct intel_engine_cs *engine = + container_of(execlists, typeof(*engine), execlists); + + spin_lock_irq(&engine->timeline->lock); + __unwind_incomplete_requests(engine); + spin_unlock_irq(&engine->timeline->lock); } static inline void @@ -307,6 +379,20 @@ execlists_context_status_change(struct drm_i915_gem_request *rq, status, rq); } +static inline void +execlists_context_schedule_in(struct drm_i915_gem_request *rq) +{ + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); + intel_engine_context_in(rq->engine); +} + +static inline void +execlists_context_schedule_out(struct drm_i915_gem_request *rq) +{ + intel_engine_context_out(rq->engine); + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); +} + static void execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state) { @@ -336,14 +422,18 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) return ce->lrc_desc; } +static inline void elsp_write(u64 desc, u32 __iomem *elsp) +{ + writel(upper_32_bits(desc), elsp); + writel(lower_32_bits(desc), elsp); +} + static void execlists_submit_ports(struct intel_engine_cs *engine) { - struct execlist_port *port = engine->execlist_port; - u32 __iomem *elsp = - engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine)); + struct execlist_port *port = engine->execlists.port; unsigned int n; - for (n = ARRAY_SIZE(engine->execlist_port); n--; ) { + for (n = execlists_num_ports(&engine->execlists); n--; ) { struct drm_i915_gem_request *rq; unsigned int count; u64 desc; @@ -352,18 +442,23 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) if (rq) { GEM_BUG_ON(count > !n); if (!count++) - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); + execlists_context_schedule_in(rq); port_set(&port[n], port_pack(rq, count)); desc = execlists_update_context(rq); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); + + GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x\n", + engine->name, n, + port[n].context_id, count, + rq->global_seqno); } else { GEM_BUG_ON(!n); desc = 0; } - writel(upper_32_bits(desc), elsp); - writel(lower_32_bits(desc), elsp); + elsp_write(desc, engine->execlists.elsp); } + execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK); } static bool ctx_single_port_submission(const struct i915_gem_context *ctx) @@ -395,25 +490,38 @@ static void port_assign(struct execlist_port *port, port_set(port, port_pack(i915_gem_request_get(rq), port_count(port))); } +static void inject_preempt_context(struct intel_engine_cs *engine) +{ + struct intel_context *ce = + &engine->i915->preempt_context->engine[engine->id]; + unsigned int n; + + GEM_BUG_ON(engine->i915->preempt_context->hw_id != PREEMPT_ID); + GEM_BUG_ON(!IS_ALIGNED(ce->ring->size, WA_TAIL_BYTES)); + + memset(ce->ring->vaddr + ce->ring->tail, 0, WA_TAIL_BYTES); + ce->ring->tail += WA_TAIL_BYTES; + ce->ring->tail &= (ce->ring->size - 1); + ce->lrc_reg_state[CTX_RING_TAIL+1] = ce->ring->tail; + + GEM_TRACE("%s\n", engine->name); + for (n = execlists_num_ports(&engine->execlists); --n; ) + elsp_write(0, engine->execlists.elsp); + + elsp_write(ce->lrc_desc, engine->execlists.elsp); + execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK); +} + static void execlists_dequeue(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *last; - struct execlist_port *port = engine->execlist_port; + struct intel_engine_execlists * const execlists = &engine->execlists; + struct execlist_port *port = execlists->port; + const struct execlist_port * const last_port = + &execlists->port[execlists->port_mask]; + struct drm_i915_gem_request *last = port_request(port); struct rb_node *rb; bool submit = false; - last = port_request(port); - if (last) - /* WaIdleLiteRestore:bdw,skl - * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL - * as we resubmit the request. See gen8_emit_breadcrumb() - * for where we prepare the padding after the end of the - * request. - */ - last->tail = last->wa_tail; - - GEM_BUG_ON(port_isset(&port[1])); - /* Hardware submission is through 2 ports. Conceptually each port * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is * static for a context, and unique to each, so we only execute @@ -436,9 +544,79 @@ static void execlists_dequeue(struct intel_engine_cs *engine) */ spin_lock_irq(&engine->timeline->lock); - rb = engine->execlist_first; - GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb); - while (rb) { + rb = execlists->first; + GEM_BUG_ON(rb_first(&execlists->queue) != rb); + if (!rb) + goto unlock; + + if (last) { + /* + * Don't resubmit or switch until all outstanding + * preemptions (lite-restore) are seen. Then we + * know the next preemption status we see corresponds + * to this ELSP update. + */ + GEM_BUG_ON(!port_count(&port[0])); + if (port_count(&port[0]) > 1) + goto unlock; + + /* + * If we write to ELSP a second time before the HW has had + * a chance to respond to the previous write, we can confuse + * the HW and hit "undefined behaviour". After writing to ELSP, + * we must then wait until we see a context-switch event from + * the HW to indicate that it has had a chance to respond. + */ + if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) + goto unlock; + + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915) && + rb_entry(rb, struct i915_priolist, node)->priority > + max(last->priotree.priority, 0)) { + /* + * Switch to our empty preempt context so + * the state of the GPU is known (idle). + */ + inject_preempt_context(engine); + execlists_set_active(execlists, + EXECLISTS_ACTIVE_PREEMPT); + goto unlock; + } else { + /* + * In theory, we could coalesce more requests onto + * the second port (the first port is active, with + * no preemptions pending). However, that means we + * then have to deal with the possible lite-restore + * of the second port (as we submit the ELSP, there + * may be a context-switch) but also we may complete + * the resubmission before the context-switch. Ergo, + * coalescing onto the second port will cause a + * preemption event, but we cannot predict whether + * that will affect port[0] or port[1]. + * + * If the second port is already active, we can wait + * until the next context-switch before contemplating + * new requests. The GPU will be busy and we should be + * able to resubmit the new ELSP before it idles, + * avoiding pipeline bubbles (momentary pauses where + * the driver is unable to keep up the supply of new + * work). + */ + if (port_count(&port[1])) + goto unlock; + + /* WaIdleLiteRestore:bdw,skl + * Apply the wa NOOPs to prevent + * ring:HEAD == req:TAIL as we resubmit the + * request. See gen8_emit_breadcrumb() for + * where we prepare the padding after the + * end of the request. + */ + last->tail = last->wa_tail; + } + } + + do { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); struct drm_i915_gem_request *rq, *rn; @@ -460,7 +638,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * combine this request with the last, then we * are done. */ - if (port != engine->execlist_port) { + if (port == last_port) { __list_del_many(&p->requests, &rq->priotree.link); goto done; @@ -485,48 +663,120 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (submit) port_assign(port, last); port++; + + GEM_BUG_ON(port_isset(port)); } INIT_LIST_HEAD(&rq->priotree.link); - rq->priotree.priority = INT_MAX; - __i915_gem_request_submit(rq); - trace_i915_gem_request_in(rq, port_index(port, engine)); + trace_i915_gem_request_in(rq, port_index(port, execlists)); last = rq; submit = true; } rb = rb_next(rb); - rb_erase(&p->node, &engine->execlist_queue); + rb_erase(&p->node, &execlists->queue); INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); - } + } while (rb); done: - engine->execlist_first = rb; + execlists->first = rb; if (submit) port_assign(port, last); +unlock: spin_unlock_irq(&engine->timeline->lock); - if (submit) + if (submit) { + execlists_set_active(execlists, EXECLISTS_ACTIVE_USER); execlists_submit_ports(engine); + } } -static bool execlists_elsp_ready(const struct intel_engine_cs *engine) +void +execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) { - const struct execlist_port *port = engine->execlist_port; + struct execlist_port *port = execlists->port; + unsigned int num_ports = execlists_num_ports(execlists); + + while (num_ports-- && port_isset(port)) { + struct drm_i915_gem_request *rq = port_request(port); + + GEM_BUG_ON(!execlists->active); + intel_engine_context_out(rq->engine); + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_PREEMPTED); + i915_gem_request_put(rq); - return port_count(&port[0]) + port_count(&port[1]) < 2; + memset(port, 0, sizeof(*port)); + port++; + } +} + +static void execlists_cancel_requests(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + struct drm_i915_gem_request *rq, *rn; + struct rb_node *rb; + unsigned long flags; + + spin_lock_irqsave(&engine->timeline->lock, flags); + + /* Cancel the requests on the HW and clear the ELSP tracker. */ + execlists_cancel_port_requests(execlists); + + /* Mark all executing requests as skipped. */ + list_for_each_entry(rq, &engine->timeline->requests, link) { + GEM_BUG_ON(!rq->global_seqno); + if (!i915_gem_request_completed(rq)) + dma_fence_set_error(&rq->fence, -EIO); + } + + /* Flush the queued requests to the timeline list (for retiring). */ + rb = execlists->first; + while (rb) { + struct i915_priolist *p = rb_entry(rb, typeof(*p), node); + + list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { + INIT_LIST_HEAD(&rq->priotree.link); + + dma_fence_set_error(&rq->fence, -EIO); + __i915_gem_request_submit(rq); + } + + rb = rb_next(rb); + rb_erase(&p->node, &execlists->queue); + INIT_LIST_HEAD(&p->requests); + if (p->priority != I915_PRIORITY_NORMAL) + kmem_cache_free(engine->i915->priorities, p); + } + + /* Remaining _unready_ requests will be nop'ed when submitted */ + + + execlists->queue = RB_ROOT; + execlists->first = NULL; + GEM_BUG_ON(port_isset(execlists->port)); + + /* + * The port is checked prior to scheduling a tasklet, but + * just in case we have suspended the tasklet to do the + * wedging make sure that when it wakes, it decides there + * is no work to do by clearing the irq_posted bit. + */ + clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + + spin_unlock_irqrestore(&engine->timeline->lock, flags); } /* * Check the unread Context Status Buffers and manage the submission of new * contexts to the ELSP accordingly. */ -static void intel_lrc_irq_handler(unsigned long data) +static void execlists_submission_tasklet(unsigned long data) { - struct intel_engine_cs *engine = (struct intel_engine_cs *)data; - struct execlist_port *port = engine->execlist_port; + struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; + struct intel_engine_execlists * const execlists = &engine->execlists; + struct execlist_port * const port = execlists->port; struct drm_i915_private *dev_priv = engine->i915; /* We can skip acquiring intel_runtime_pm_get() here as it was taken @@ -538,19 +788,24 @@ static void intel_lrc_irq_handler(unsigned long data) */ GEM_BUG_ON(!dev_priv->gt.awake); - intel_uncore_forcewake_get(dev_priv, engine->fw_domains); + intel_uncore_forcewake_get(dev_priv, execlists->fw_domains); /* Prefer doing test_and_clear_bit() as a two stage operation to avoid * imposing the cost of a locked atomic transaction when submitting a * new request (outside of the context-switch interrupt). */ while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) { - u32 __iomem *csb_mmio = - dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)); - u32 __iomem *buf = - dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)); + /* The HWSP contains a (cacheable) mirror of the CSB */ + const u32 *buf = + &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; unsigned int head, tail; + if (unlikely(execlists->csb_use_mmio)) { + buf = (u32 * __force) + (dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); + execlists->csb_head = -1; /* force mmio read of CSB ptrs */ + } + /* The write will be ordered by the uncached read (itself * a memory barrier), so we do not need another in the form * of a locked instruction. The race between the interrupt @@ -562,9 +817,24 @@ static void intel_lrc_irq_handler(unsigned long data) * is set and we do a new loop. */ __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - head = readl(csb_mmio); - tail = GEN8_CSB_WRITE_PTR(head); - head = GEN8_CSB_READ_PTR(head); + if (unlikely(execlists->csb_head == -1)) { /* following a reset */ + head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); + tail = GEN8_CSB_WRITE_PTR(head); + head = GEN8_CSB_READ_PTR(head); + execlists->csb_head = head; + } else { + const int write_idx = + intel_hws_csb_write_index(dev_priv) - + I915_HWS_CSB_BUF0_INDEX; + + head = execlists->csb_head; + tail = READ_ONCE(buf[write_idx]); + } + GEM_TRACE("%s cs-irq head=%d [%d], tail=%d [%d]\n", + engine->name, + head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), + tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))))); + while (head != tail) { struct drm_i915_gem_request *rq; unsigned int status; @@ -590,26 +860,67 @@ static void intel_lrc_irq_handler(unsigned long data) * status notifier. */ - status = readl(buf + 2 * head); + status = READ_ONCE(buf[2 * head]); /* maybe mmio! */ + GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n", + engine->name, head, + status, buf[2*head + 1], + execlists->active); + + if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE | + GEN8_CTX_STATUS_PREEMPTED)) + execlists_set_active(execlists, + EXECLISTS_ACTIVE_HWACK); + if (status & GEN8_CTX_STATUS_ACTIVE_IDLE) + execlists_clear_active(execlists, + EXECLISTS_ACTIVE_HWACK); + if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) continue; + /* We should never get a COMPLETED | IDLE_ACTIVE! */ + GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE); + + if (status & GEN8_CTX_STATUS_COMPLETE && + buf[2*head + 1] == PREEMPT_ID) { + GEM_TRACE("%s preempt-idle\n", engine->name); + + execlists_cancel_port_requests(execlists); + execlists_unwind_incomplete_requests(execlists); + + GEM_BUG_ON(!execlists_is_active(execlists, + EXECLISTS_ACTIVE_PREEMPT)); + execlists_clear_active(execlists, + EXECLISTS_ACTIVE_PREEMPT); + continue; + } + + if (status & GEN8_CTX_STATUS_PREEMPTED && + execlists_is_active(execlists, + EXECLISTS_ACTIVE_PREEMPT)) + continue; + + GEM_BUG_ON(!execlists_is_active(execlists, + EXECLISTS_ACTIVE_USER)); + /* Check the context/desc id for this event matches */ - GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) != - port->context_id); + GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); rq = port_unpack(port, &count); + GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x\n", + engine->name, + port->context_id, count, + rq ? rq->global_seqno : 0); GEM_BUG_ON(count == 0); if (--count == 0) { GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); + GEM_BUG_ON(port_isset(&port[1]) && + !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)); GEM_BUG_ON(!i915_gem_request_completed(rq)); - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); - + execlists_context_schedule_out(rq); trace_i915_gem_request_out(rq); i915_gem_request_put(rq); - port[0] = port[1]; - memset(&port[1], 0, sizeof(port[1])); + execlists_port_complete(execlists, port); } else { port_set(port, port_pack(rq, count)); } @@ -617,80 +928,33 @@ static void intel_lrc_irq_handler(unsigned long data) /* After the final element, the hw should be idle */ GEM_BUG_ON(port_count(port) == 0 && !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); + if (port_count(port) == 0) + execlists_clear_active(execlists, + EXECLISTS_ACTIVE_USER); } - writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8), - csb_mmio); + if (head != execlists->csb_head) { + execlists->csb_head = head; + writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8), + dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); + } } - if (execlists_elsp_ready(engine)) + if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)) execlists_dequeue(engine); - intel_uncore_forcewake_put(dev_priv, engine->fw_domains); + intel_uncore_forcewake_put(dev_priv, execlists->fw_domains); } -static bool -insert_request(struct intel_engine_cs *engine, - struct i915_priotree *pt, - int prio) +static void insert_request(struct intel_engine_cs *engine, + struct i915_priotree *pt, + int prio) { - struct i915_priolist *p; - struct rb_node **parent, *rb; - bool first = true; - - if (unlikely(engine->no_priolist)) - prio = I915_PRIORITY_NORMAL; - -find_priolist: - /* most positive priority is scheduled first, equal priorities fifo */ - rb = NULL; - parent = &engine->execlist_queue.rb_node; - while (*parent) { - rb = *parent; - p = rb_entry(rb, typeof(*p), node); - if (prio > p->priority) { - parent = &rb->rb_left; - } else if (prio < p->priority) { - parent = &rb->rb_right; - first = false; - } else { - list_add_tail(&pt->link, &p->requests); - return false; - } - } - - if (prio == I915_PRIORITY_NORMAL) { - p = &engine->default_priolist; - } else { - p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC); - /* Convert an allocation failure to a priority bump */ - if (unlikely(!p)) { - prio = I915_PRIORITY_NORMAL; /* recurses just once */ - - /* To maintain ordering with all rendering, after an - * allocation failure we have to disable all scheduling. - * Requests will then be executed in fifo, and schedule - * will ensure that dependencies are emitted in fifo. - * There will be still some reordering with existing - * requests, so if userspace lied about their - * dependencies that reordering may be visible. - */ - engine->no_priolist = true; - goto find_priolist; - } - } - - p->priority = prio; - rb_link_node(&p->node, rb, parent); - rb_insert_color(&p->node, &engine->execlist_queue); + struct i915_priolist *p = lookup_priolist(engine, pt, prio); - INIT_LIST_HEAD(&p->requests); - list_add_tail(&pt->link, &p->requests); - - if (first) - engine->execlist_first = &p->node; - - return first; + list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests); + if (ptr_unmask_bits(p, 1)) + tasklet_hi_schedule(&engine->execlists.tasklet); } static void execlists_submit_request(struct drm_i915_gem_request *request) @@ -701,24 +965,23 @@ static void execlists_submit_request(struct drm_i915_gem_request *request) /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&engine->timeline->lock, flags); - if (insert_request(engine, - &request->priotree, - request->priotree.priority)) { - if (execlists_elsp_ready(engine)) - tasklet_hi_schedule(&engine->irq_tasklet); - } + insert_request(engine, &request->priotree, request->priotree.priority); - GEM_BUG_ON(!engine->execlist_first); + GEM_BUG_ON(!engine->execlists.first); GEM_BUG_ON(list_empty(&request->priotree.link)); spin_unlock_irqrestore(&engine->timeline->lock, flags); } +static struct drm_i915_gem_request *pt_to_request(struct i915_priotree *pt) +{ + return container_of(pt, struct drm_i915_gem_request, priotree); +} + static struct intel_engine_cs * pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked) { - struct intel_engine_cs *engine = - container_of(pt, struct drm_i915_gem_request, priotree)->engine; + struct intel_engine_cs *engine = pt_to_request(pt)->engine; GEM_BUG_ON(!locked); @@ -737,6 +1000,11 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) struct i915_dependency stack; LIST_HEAD(dfs); + GEM_BUG_ON(prio == I915_PRIORITY_INVALID); + + if (i915_gem_request_completed(request)) + return; + if (prio <= READ_ONCE(request->priotree.priority)) return; @@ -772,6 +1040,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) * engines. */ list_for_each_entry(p, &pt->signalers_list, signal_link) { + if (i915_gem_request_completed(pt_to_request(p->signaler))) + continue; + GEM_BUG_ON(p->signaler->priority < pt->priority); if (prio > READ_ONCE(p->signaler->priority)) list_move_tail(&p->dfs_link, &dfs); @@ -785,7 +1056,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) * execlists_submit_request()), we can set our own priority and skip * acquiring the engine locks. */ - if (request->priotree.priority == INT_MIN) { + if (request->priotree.priority == I915_PRIORITY_INVALID) { GEM_BUG_ON(!list_empty(&request->priotree.link)); request->priotree.priority = prio; if (stack.dfs_link.next == stack.dfs_link.prev) @@ -815,8 +1086,29 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) } spin_unlock_irq(&engine->timeline->lock); +} + +static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma) +{ + unsigned int flags; + int err; + + /* + * Clear this page out of any CPU caches for coherent swap-in/out. + * We only want to do this on the first bind so that we do not stall + * on an active context (which by nature is already on the GPU). + */ + if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { + err = i915_gem_object_set_to_gtt_domain(vma->obj, true); + if (err) + return err; + } - /* XXX Do we need to preempt to make room for us and our deps? */ + flags = PIN_GLOBAL | PIN_HIGH; + if (ctx->ggtt_offset_bias) + flags |= PIN_OFFSET_BIAS | ctx->ggtt_offset_bias; + + return i915_vma_pin(vma, 0, GEN8_LR_CONTEXT_ALIGN, flags); } static struct intel_ring * @@ -824,7 +1116,6 @@ execlists_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { struct intel_context *ce = &ctx->engine[engine->id]; - unsigned int flags; void *vaddr; int ret; @@ -841,11 +1132,7 @@ execlists_context_pin(struct intel_engine_cs *engine, } GEM_BUG_ON(!ce->state); - flags = PIN_GLOBAL | PIN_HIGH; - if (ctx->ggtt_offset_bias) - flags |= PIN_OFFSET_BIAS | ctx->ggtt_offset_bias; - - ret = i915_vma_pin(ce->state, 0, GEN8_LR_CONTEXT_ALIGN, flags); + ret = __context_pin(ctx, ce->state); if (ret) goto err; @@ -865,8 +1152,7 @@ execlists_context_pin(struct intel_engine_cs *engine, ce->lrc_reg_state[CTX_RING_BUFFER_START+1] = i915_ggtt_offset(ce->ring->vma); - ce->state->obj->mm.dirty = true; - + ce->state->obj->pin_global++; i915_gem_context_get(ctx); out: return ce->ring; @@ -893,6 +1179,7 @@ static void execlists_context_unpin(struct intel_engine_cs *engine, intel_ring_unpin(ce->ring); + ce->state->obj->pin_global--; i915_gem_object_unpin_map(ce->state->obj); i915_vma_unpin(ce->state); @@ -903,7 +1190,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) { struct intel_engine_cs *engine = request->engine; struct intel_context *ce = &request->ctx->engine[engine->id]; - u32 *cs; int ret; GEM_BUG_ON(!ce->pin_count); @@ -914,30 +1200,9 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) */ request->reserved_space += EXECLISTS_REQUEST_SIZE; - if (i915.enable_guc_submission) { - /* - * Check that the GuC has space for the request before - * going any further, as the i915_add_request() call - * later on mustn't fail ... - */ - ret = i915_guc_wq_reserve(request); - if (ret) - goto err; - } - - cs = intel_ring_begin(request, 0); - if (IS_ERR(cs)) { - ret = PTR_ERR(cs); - goto err_unreserve; - } - - if (!ce->initialised) { - ret = engine->init_context(request); - if (ret) - goto err_unreserve; - - ce->initialised = true; - } + ret = intel_ring_wait_for_space(request->ring, request->reserved_space); + if (ret) + return ret; /* Note that after this point, we have committed to using * this request as it is being used to both track the @@ -948,12 +1213,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) request->reserved_space -= EXECLISTS_REQUEST_SIZE; return 0; - -err_unreserve: - if (i915.enable_guc_submission) - i915_guc_wq_unreserve(request); -err: - return ret; } /* @@ -1031,6 +1290,8 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES); + *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + /* Pad to end of cacheline */ while ((unsigned long)batch % CACHELINE_BYTES) *batch++ = MI_NOOP; @@ -1044,26 +1305,10 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) return batch; } -/* - * This batch is started immediately after indirect_ctx batch. Since we ensure - * that indirect_ctx ends on a cacheline this batch is aligned automatically. - * - * The number of DWORDS written are returned using this field. - * - * This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding - * to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant. - */ -static u32 *gen8_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch) -{ - /* WaDisableCtxRestoreArbitration:bdw,chv */ - *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - *batch++ = MI_BATCH_BUFFER_END; - - return batch; -} - static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) { + *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ batch = gen8_emit_flush_coherentl3_wa(engine, batch); @@ -1109,6 +1354,8 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) *batch++ = 0; } + *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + /* Pad to end of cacheline */ while ((unsigned long)batch % CACHELINE_BYTES) *batch++ = MI_NOOP; @@ -1116,13 +1363,6 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) return batch; } -static u32 *gen9_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch) -{ - *batch++ = MI_BATCH_BUFFER_END; - - return batch; -} - #define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE) static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) @@ -1175,13 +1415,15 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return -EINVAL; switch (INTEL_GEN(engine->i915)) { + case 10: + return 0; case 9: wa_bb_fn[0] = gen9_init_indirectctx_bb; - wa_bb_fn[1] = gen9_init_perctx_bb; + wa_bb_fn[1] = NULL; break; case 8: wa_bb_fn[0] = gen8_init_indirectctx_bb; - wa_bb_fn[1] = gen8_init_perctx_bb; + wa_bb_fn[1] = NULL; break; default: MISSING_CASE(INTEL_GEN(engine->i915)); @@ -1208,7 +1450,8 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) ret = -EINVAL; break; } - batch_ptr = wa_bb_fn[i](engine, batch_ptr); + if (wa_bb_fn[i]) + batch_ptr = wa_bb_fn[i](engine, batch_ptr); wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset); } @@ -1232,9 +1475,7 @@ static u8 gtiir[] = { static int gen8_init_common_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - struct execlist_port *port = engine->execlist_port; - unsigned int n; - bool submit; + struct intel_engine_execlists * const execlists = &engine->execlists; int ret; ret = intel_mocs_init_engine(engine); @@ -1267,24 +1508,15 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift); clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + execlists->csb_head = -1; + execlists->active = 0; - /* After a GPU reset, we may have requests to replay */ - submit = false; - for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) { - if (!port_isset(&port[n])) - break; - - DRM_DEBUG_DRIVER("Restarting %s:%d from 0x%x\n", - engine->name, n, - port_request(&port[n])->global_seqno); - - /* Discard the current inflight count */ - port_set(&port[n], port_request(&port[n])); - submit = true; - } + execlists->elsp = + dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine)); - if (submit && !i915.enable_guc_submission) - execlists_submit_ports(engine); + /* After a GPU reset, we may have requests to replay */ + if (execlists->first) + tasklet_schedule(&execlists->tasklet); return 0; } @@ -1325,9 +1557,13 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine) static void reset_common_ring(struct intel_engine_cs *engine, struct drm_i915_gem_request *request) { - struct execlist_port *port = engine->execlist_port; + struct intel_engine_execlists * const execlists = &engine->execlists; struct intel_context *ce; - unsigned int n; + unsigned long flags; + + GEM_TRACE("%s seqno=%x\n", + engine->name, request ? request->global_seqno : 0); + spin_lock_irqsave(&engine->timeline->lock, flags); /* * Catch up with any missed context-switch interrupts. @@ -1338,20 +1574,12 @@ static void reset_common_ring(struct intel_engine_cs *engine, * guessing the missed context-switch events by looking at what * requests were completed. */ - if (!request) { - for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) - i915_gem_request_put(port_request(&port[n])); - memset(engine->execlist_port, 0, sizeof(engine->execlist_port)); - return; - } + execlists_cancel_port_requests(execlists); - if (request->ctx != port_request(port)->ctx) { - i915_gem_request_put(port_request(port)); - port[0] = port[1]; - memset(&port[1], 0, sizeof(port[1])); - } + /* Push back any incomplete requests for replay after the reset. */ + __unwind_incomplete_requests(engine); - GEM_BUG_ON(request->ctx != port_request(port)->ctx); + spin_unlock_irqrestore(&engine->timeline->lock, flags); /* If the request was innocent, we leave the request in the ELSP * and will try to replay it on restarting. The context image may @@ -1363,7 +1591,7 @@ static void reset_common_ring(struct intel_engine_cs *engine, * and have to at least restore the RING register in the context * image back to the expected values to skip over the guilty request. */ - if (request->fence.error != -EIO) + if (!request || request->fence.error != -EIO) return; /* We want a simple context + ring to execute the breadcrumb update. @@ -1386,10 +1614,7 @@ static void reset_common_ring(struct intel_engine_cs *engine, intel_ring_update_space(request->ring); /* Reset WaIdleLiteRestore:bdw,skl as well */ - request->tail = - intel_ring_wrap(request->ring, - request->wa_tail - WA_TAIL_DWORDS*sizeof(u32)); - assert_ring_tail_valid(request->ring, request->tail); + unwind_wa_tail(request); } static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) @@ -1448,13 +1673,31 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, if (IS_ERR(cs)) return PTR_ERR(cs); + /* + * WaDisableCtxRestoreArbitration:bdw,chv + * + * We don't need to perform MI_ARB_ENABLE as often as we do (in + * particular all the gen that do not need the w/a at all!), if we + * took care to make sure that on every switch into this context + * (both ordinary and for preemption) that arbitrartion was enabled + * we would be fine. However, there doesn't seem to be a downside to + * being paranoid and making sure it is set before each batch and + * every context-switch. + * + * Note that if we fail to enable arbitration before the request + * is complete, then we do not see the context-switch interrupt and + * the engine hangs (with RING_HEAD == RING_TAIL). + * + * That satisfies both the GPGPU w/a and our heavy-handed paranoia. + */ + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + /* FIXME(BDW): Address space and security selectors. */ *cs++ = MI_BATCH_BUFFER_START_GEN8 | (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)) | (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); *cs++ = lower_32_bits(offset); *cs++ = upper_32_bits(offset); - *cs++ = MI_NOOP; intel_ring_advance(req, cs); return 0; @@ -1583,7 +1826,8 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, */ static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs) { - *cs++ = MI_NOOP; + /* Ensure there's always at least one preemption point per-request. */ + *cs++ = MI_ARB_CHECK; *cs++ = MI_NOOP; request->wa_tail = intel_ring_offset(request, cs); } @@ -1593,10 +1837,8 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); - *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; - *cs++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT; - *cs++ = 0; - *cs++ = request->global_seqno; + cs = gen8_emit_ggtt_write(cs, request->global_seqno, + intel_hws_seqno_address(request->engine)); *cs++ = MI_USER_INTERRUPT; *cs++ = MI_NOOP; request->tail = intel_ring_offset(request, cs); @@ -1604,27 +1846,16 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) gen8_emit_wa_tail(request, cs); } - static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS; -static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, +static void gen8_emit_breadcrumb_rcs(struct drm_i915_gem_request *request, u32 *cs) { /* We're using qword write, seqno should be aligned to 8 bytes. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1); - /* w/a for post sync ops following a GPGPU operation we - * need a prior CS_STALL, which is emitted by the flush - * following the batch. - */ - *cs++ = GFX_OP_PIPE_CONTROL(6); - *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE; - *cs++ = intel_hws_seqno_address(request->engine); - *cs++ = 0; - *cs++ = request->global_seqno; - /* We're thrashing one dword of HWS. */ - *cs++ = 0; + cs = gen8_emit_ggtt_write_rcs(cs, request->global_seqno, + intel_hws_seqno_address(request->engine)); *cs++ = MI_USER_INTERRUPT; *cs++ = MI_NOOP; request->tail = intel_ring_offset(request, cs); @@ -1632,8 +1863,7 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, gen8_emit_wa_tail(request, cs); } - -static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS; +static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS; static int gen8_init_rcs_context(struct drm_i915_gem_request *req) { @@ -1666,8 +1896,9 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) * Tasklet cannot be active at this point due intel_mark_active/idle * so this is just for documentation. */ - if (WARN_ON(test_bit(TASKLET_STATE_SCHED, &engine->irq_tasklet.state))) - tasklet_kill(&engine->irq_tasklet); + if (WARN_ON(test_bit(TASKLET_STATE_SCHED, + &engine->execlists.tasklet.state))) + tasklet_kill(&engine->execlists.tasklet); dev_priv = engine->i915; @@ -1678,11 +1909,6 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) if (engine->cleanup) engine->cleanup(engine); - if (engine->status_page.vma) { - i915_gem_object_unpin_map(engine->status_page.vma->obj); - engine->status_page.vma = NULL; - } - intel_engine_cleanup_common(engine); lrc_destroy_wa_ctx(engine); @@ -1694,8 +1920,14 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) static void execlists_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = execlists_submit_request; + engine->cancel_requests = execlists_cancel_requests; engine->schedule = execlists_schedule; - engine->irq_tasklet.func = intel_lrc_irq_handler; + engine->execlists.tasklet.func = execlists_submission_tasklet; + + engine->park = NULL; + engine->unpark = NULL; + + engine->flags |= I915_ENGINE_SUPPORTS_STATS; } static void @@ -1729,24 +1961,6 @@ logical_ring_default_irqs(struct intel_engine_cs *engine) engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; } -static int -lrc_setup_hws(struct intel_engine_cs *engine, struct i915_vma *vma) -{ - const int hws_offset = LRC_PPHWSP_PN * PAGE_SIZE; - void *hws; - - /* The HWSP is part of the default context object in LRC mode. */ - hws = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); - if (IS_ERR(hws)) - return PTR_ERR(hws); - - engine->status_page.page_addr = hws + hws_offset; - engine->status_page.ggtt_offset = i915_ggtt_offset(vma) + hws_offset; - engine->status_page.vma = vma; - - return 0; -} - static void logical_ring_setup(struct intel_engine_cs *engine) { @@ -1770,32 +1984,23 @@ logical_ring_setup(struct intel_engine_cs *engine) RING_CONTEXT_STATUS_BUF_BASE(engine), FW_REG_READ); - engine->fw_domains = fw_domains; + engine->execlists.fw_domains = fw_domains; - tasklet_init(&engine->irq_tasklet, - intel_lrc_irq_handler, (unsigned long)engine); + tasklet_init(&engine->execlists.tasklet, + execlists_submission_tasklet, (unsigned long)engine); logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine); } -static int -logical_ring_init(struct intel_engine_cs *engine) +static int logical_ring_init(struct intel_engine_cs *engine) { - struct i915_gem_context *dctx = engine->i915->kernel_context; int ret; ret = intel_engine_init_common(engine); if (ret) goto error; - /* And setup the hardware status page. */ - ret = lrc_setup_hws(engine, dctx->engine[engine->id].state); - if (ret) { - DRM_ERROR("Failed to set up hws %s: %d\n", engine->name, ret); - goto error; - } - return 0; error: @@ -1820,8 +2025,8 @@ int logical_render_ring_init(struct intel_engine_cs *engine) engine->init_hw = gen8_init_render_ring; engine->init_context = gen8_init_rcs_context; engine->emit_flush = gen8_emit_flush_render; - engine->emit_breadcrumb = gen8_emit_breadcrumb_render; - engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_render_sz; + engine->emit_breadcrumb = gen8_emit_breadcrumb_rcs; + engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_rcs_sz; ret = intel_engine_create_scratch(engine, PAGE_SIZE); if (ret) @@ -1938,7 +2143,6 @@ static void execlists_init_reg_state(u32 *regs, CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(engine), _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | - CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | (HAS_RESOURCE_STREAMER(dev_priv) ? CTX_CTRL_RS_CTX_ENABLE : 0))); CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0); @@ -1953,13 +2157,12 @@ static void execlists_init_reg_state(u32 *regs, CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0); CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0); if (rcs) { - CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0); + struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; + CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0); CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET, RING_INDIRECT_CTX_OFFSET(base), 0); - - if (engine->wa_ctx.vma) { - struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; + if (wa_ctx->indirect_ctx.size) { u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); regs[CTX_RCS_INDIRECT_CTX + 1] = @@ -1968,6 +2171,11 @@ static void execlists_init_reg_state(u32 *regs, regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] = intel_lr_indirect_ctx_offset(engine) << 6; + } + + CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0); + if (wa_ctx->per_ctx.size) { + u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); regs[CTX_BB_PER_CTX_PTR + 1] = (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; @@ -2011,6 +2219,7 @@ populate_lr_context(struct i915_gem_context *ctx, struct intel_ring *ring) { void *vaddr; + u32 *regs; int ret; ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true); @@ -2027,11 +2236,31 @@ populate_lr_context(struct i915_gem_context *ctx, } ctx_obj->mm.dirty = true; + if (engine->default_state) { + /* + * We only want to copy over the template context state; + * skipping over the headers reserved for GuC communication, + * leaving those as zero. + */ + const unsigned long start = LRC_HEADER_PAGES * PAGE_SIZE; + void *defaults; + + defaults = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (IS_ERR(defaults)) + return PTR_ERR(defaults); + + memcpy(vaddr + start, defaults + start, engine->context_size); + i915_gem_object_unpin_map(engine->default_state); + } + /* The second page of the context object contains some fields which must * be set up prior to the first execution. */ - - execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE, - ctx, engine, ring); + regs = vaddr + LRC_STATE_PN * PAGE_SIZE; + execlists_init_reg_state(regs, ctx, engine, ring); + if (!engine->default_state) + regs[CTX_CONTEXT_CONTROL + 1] |= + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); i915_gem_object_unpin_map(ctx_obj); @@ -2052,8 +2281,11 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); - /* One extra page as the sharing data between driver and GuC */ - context_size += PAGE_SIZE * LRC_PPHWSP_PN; + /* + * Before the actual start of the context image, we insert a few pages + * for our own use and for sharing with the GuC. + */ + context_size += LRC_HEADER_PAGES * PAGE_SIZE; ctx_obj = i915_gem_object_create(ctx->i915, context_size); if (IS_ERR(ctx_obj)) { @@ -2081,7 +2313,6 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, ce->ring = ring; ce->state = vma; - ce->initialised |= engine->init_context == NULL; return 0; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 57ef5833c427..6d4f9b995a11 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -25,6 +25,7 @@ #define _INTEL_LRC_H_ #include "intel_ringbuffer.h" +#include "i915_gem_context.h" #define GEN8_LR_CONTEXT_ALIGN I915_GTT_MIN_ALIGNMENT @@ -60,6 +61,7 @@ enum { INTEL_CONTEXT_SCHEDULE_IN = 0, INTEL_CONTEXT_SCHEDULE_OUT, + INTEL_CONTEXT_SCHEDULE_PREEMPTED, }; /* Logical Rings */ @@ -69,20 +71,40 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine); /* Logical Ring Contexts */ -/* One extra page is added before LRC for GuC as shared data */ +/* + * We allocate a header at the start of the context image for our own + * use, therefore the actual location of the logical state is offset + * from the start of the VMA. The layout is + * + * | [guc] | [hwsp] [logical state] | + * |<- our header ->|<- context image ->| + * + */ +/* The first page is used for sharing data with the GuC */ #define LRC_GUCSHR_PN (0) -#define LRC_PPHWSP_PN (LRC_GUCSHR_PN + 1) -#define LRC_STATE_PN (LRC_PPHWSP_PN + 1) +#define LRC_GUCSHR_SZ (1) +/* At the start of the context image is its per-process HWS page */ +#define LRC_PPHWSP_PN (LRC_GUCSHR_PN + LRC_GUCSHR_SZ) +#define LRC_PPHWSP_SZ (1) +/* Finally we have the logical state for the context */ +#define LRC_STATE_PN (LRC_PPHWSP_PN + LRC_PPHWSP_SZ) + +/* + * Currently we include the PPHWSP in __intel_engine_context_size() so + * the size of the header is synonymous with the start of the PPHWSP. + */ +#define LRC_HEADER_PAGES LRC_PPHWSP_PN struct drm_i915_private; struct i915_gem_context; void intel_lr_context_resume(struct drm_i915_private *dev_priv); -uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, - struct intel_engine_cs *engine); -/* Execlists */ -int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, - int enable_execlists); +static inline uint64_t +intel_lr_context_descriptor(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + return ctx->engine[engine->id].lrc_desc; +} #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c index beb9baaf2f2e..dcbc786479f9 100644 --- a/drivers/gpu/drm/i915/intel_lspcon.c +++ b/drivers/gpu/drm/i915/intel_lspcon.c @@ -56,7 +56,7 @@ static enum drm_lspcon_mode lspcon_get_current_mode(struct intel_lspcon *lspcon) struct i2c_adapter *adapter = &lspcon_to_intel_dp(lspcon)->aux.ddc; if (drm_lspcon_get_mode(adapter, ¤t_mode)) { - DRM_ERROR("Error reading LSPCON mode\n"); + DRM_DEBUG_KMS("Error reading LSPCON mode\n"); return DRM_LSPCON_MODE_INVALID; } return current_mode; @@ -68,16 +68,15 @@ static enum drm_lspcon_mode lspcon_wait_mode(struct intel_lspcon *lspcon, enum drm_lspcon_mode current_mode; current_mode = lspcon_get_current_mode(lspcon); - if (current_mode == mode || current_mode == DRM_LSPCON_MODE_INVALID) + if (current_mode == mode) goto out; DRM_DEBUG_KMS("Waiting for LSPCON mode %s to settle\n", lspcon_mode_name(mode)); - wait_for((current_mode = lspcon_get_current_mode(lspcon)) == mode || - current_mode == DRM_LSPCON_MODE_INVALID, 100); + wait_for((current_mode = lspcon_get_current_mode(lspcon)) == mode, 100); if (current_mode != mode) - DRM_DEBUG_KMS("LSPCON mode hasn't settled\n"); + DRM_ERROR("LSPCON mode hasn't settled\n"); out: DRM_DEBUG_KMS("Current LSPCON mode %s\n", @@ -133,6 +132,7 @@ static bool lspcon_wake_native_aux_ch(struct intel_lspcon *lspcon) static bool lspcon_probe(struct intel_lspcon *lspcon) { + int retry; enum drm_dp_dual_mode_type adaptor_type; struct i2c_adapter *adapter = &lspcon_to_intel_dp(lspcon)->aux.ddc; enum drm_lspcon_mode expected_mode; @@ -141,10 +141,18 @@ static bool lspcon_probe(struct intel_lspcon *lspcon) DRM_LSPCON_MODE_PCON : DRM_LSPCON_MODE_LS; /* Lets probe the adaptor and check its type */ - adaptor_type = drm_dp_dual_mode_detect(adapter); + for (retry = 0; retry < 6; retry++) { + if (retry) + usleep_range(500, 1000); + + adaptor_type = drm_dp_dual_mode_detect(adapter); + if (adaptor_type == DRM_DP_DUAL_MODE_LSPCON) + break; + } + if (adaptor_type != DRM_DP_DUAL_MODE_LSPCON) { DRM_DEBUG_KMS("No LSPCON detected, found %s\n", - drm_dp_get_dual_mode_type_name(adaptor_type)); + drm_dp_get_dual_mode_type_name(adaptor_type)); return false; } diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 8e215777c7f4..ef80499113ee 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -125,6 +125,8 @@ static void intel_lvds_get_config(struct intel_encoder *encoder, struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base); u32 tmp, flags = 0; + pipe_config->output_types |= BIT(INTEL_OUTPUT_LVDS); + tmp = I915_READ(lvds_encoder->reg); if (tmp & LVDS_HSYNC_POLARITY) flags |= DRM_MODE_FLAG_NHSYNC; @@ -229,8 +231,8 @@ static void intel_lvds_pps_init_hw(struct drm_i915_private *dev_priv, } static void intel_pre_enable_lvds(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -306,8 +308,8 @@ static void intel_pre_enable_lvds(struct intel_encoder *encoder, * Sets the power state for the panel. */ static void intel_enable_lvds(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { struct drm_device *dev = encoder->base.dev; struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base); @@ -324,8 +326,8 @@ static void intel_enable_lvds(struct intel_encoder *encoder, } static void intel_disable_lvds(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -339,8 +341,8 @@ static void intel_disable_lvds(struct intel_encoder *encoder, } static void gmch_disable_lvds(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { intel_panel_disable_backlight(old_conn_state); @@ -349,15 +351,15 @@ static void gmch_disable_lvds(struct intel_encoder *encoder, } static void pch_disable_lvds(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { intel_panel_disable_backlight(old_conn_state); } static void pch_post_disable_lvds(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { intel_disable_lvds(encoder, old_crtc_state, old_conn_state); } @@ -880,8 +882,8 @@ static bool compute_is_dual_link_lvds(struct intel_lvds_encoder *lvds_encoder) struct drm_i915_private *dev_priv = to_i915(dev); /* use the module option value if specified */ - if (i915.lvds_channel_mode > 0) - return i915.lvds_channel_mode == 2; + if (i915_modparams.lvds_channel_mode > 0) + return i915_modparams.lvds_channel_mode == 2; /* single channel LVDS is limited to 112 MHz */ if (lvds_encoder->attached_connector->base.panel.fixed_mode->clock @@ -939,10 +941,8 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) struct drm_display_mode *fixed_mode = NULL; struct drm_display_mode *downclock_mode = NULL; struct edid *edid; - struct intel_crtc *crtc; i915_reg_t lvds_reg; u32 lvds; - int pipe; u8 pin; u32 allowed_scalers; @@ -1113,22 +1113,11 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) * on. If so, assume that whatever is currently programmed is the * correct mode. */ - - /* Ironlake: FIXME if still fail, not try pipe mode now */ - if (HAS_PCH_SPLIT(dev_priv)) - goto failed; - - pipe = (lvds & LVDS_PIPEB_SELECT) ? 1 : 0; - crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - - if (crtc && (lvds & LVDS_PORT_EN)) { - fixed_mode = intel_crtc_mode_get(dev, &crtc->base); - if (fixed_mode) { - DRM_DEBUG_KMS("using current (BIOS) mode: "); - drm_mode_debug_printmodeline(fixed_mode); - fixed_mode->type |= DRM_MODE_TYPE_PREFERRED; - goto out; - } + fixed_mode = intel_encoder_current_mode(intel_encoder); + if (fixed_mode) { + DRM_DEBUG_KMS("using current (BIOS) mode: "); + drm_mode_debug_printmodeline(fixed_mode); + fixed_mode->type |= DRM_MODE_TYPE_PREFERRED; } /* If we still don't have a mode after all that, give up. */ diff --git a/drivers/gpu/drm/i915/intel_modes.c b/drivers/gpu/drm/i915/intel_modes.c index 28a778b785ac..4e43f873c889 100644 --- a/drivers/gpu/drm/i915/intel_modes.c +++ b/drivers/gpu/drm/i915/intel_modes.c @@ -57,7 +57,6 @@ int intel_connector_update_modes(struct drm_connector *connector, drm_mode_connector_update_edid_property(connector, edid); ret = drm_add_edid_modes(connector, edid); - drm_edid_to_eld(connector, edid); intel_connector_update_eld_conn_type(connector); diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index 98154efcb2f4..c58e5f53bab0 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -32,6 +32,8 @@ #include <drm/drmP.h> #include <drm/i915_drm.h> + +#include "intel_opregion.h" #include "i915_drv.h" #include "intel_drv.h" @@ -367,7 +369,7 @@ int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, if (intel_encoder->type == INTEL_OUTPUT_DSI) port = 0; else - port = intel_ddi_get_encoder_port(intel_encoder); + port = intel_encoder->port; if (port == PORT_E) { port = 0; @@ -383,7 +385,7 @@ int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, case INTEL_OUTPUT_ANALOG: type = DISPLAY_TYPE_CRT; break; - case INTEL_OUTPUT_UNKNOWN: + case INTEL_OUTPUT_DDI: case INTEL_OUTPUT_DP: case INTEL_OUTPUT_HDMI: case INTEL_OUTPUT_DP_MST: @@ -921,7 +923,7 @@ static int intel_load_vbt_firmware(struct drm_i915_private *dev_priv) { struct intel_opregion *opregion = &dev_priv->opregion; const struct firmware *fw = NULL; - const char *name = i915.vbt_firmware; + const char *name = i915_modparams.vbt_firmware; int ret; if (!name || !*name) diff --git a/drivers/gpu/drm/i915/intel_opregion.h b/drivers/gpu/drm/i915/intel_opregion.h new file mode 100644 index 000000000000..e0e437ba9e51 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_opregion.h @@ -0,0 +1,106 @@ +/* + * Copyright © 2008-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_OPREGION_H_ +#define _INTEL_OPREGION_H_ + +#include <linux/workqueue.h> +#include <linux/pci.h> + +struct drm_i915_private; +struct intel_encoder; + +struct opregion_header; +struct opregion_acpi; +struct opregion_swsci; +struct opregion_asle; + +struct intel_opregion { + struct opregion_header *header; + struct opregion_acpi *acpi; + struct opregion_swsci *swsci; + u32 swsci_gbda_sub_functions; + u32 swsci_sbcb_sub_functions; + struct opregion_asle *asle; + void *rvda; + void *vbt_firmware; + const void *vbt; + u32 vbt_size; + u32 *lid_state; + struct work_struct asle_work; +}; + +#define OPREGION_SIZE (8 * 1024) + +#ifdef CONFIG_ACPI + +int intel_opregion_setup(struct drm_i915_private *dev_priv); +void intel_opregion_register(struct drm_i915_private *dev_priv); +void intel_opregion_unregister(struct drm_i915_private *dev_priv); +void intel_opregion_asle_intr(struct drm_i915_private *dev_priv); +int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, + bool enable); +int intel_opregion_notify_adapter(struct drm_i915_private *dev_priv, + pci_power_t state); +int intel_opregion_get_panel_type(struct drm_i915_private *dev_priv); + +#else /* CONFIG_ACPI*/ + +static inline int intel_opregion_setup(struct drm_i915_private *dev_priv) +{ + return 0; +} + +static inline void intel_opregion_register(struct drm_i915_private *dev_priv) +{ +} + +static inline void intel_opregion_unregister(struct drm_i915_private *dev_priv) +{ +} + +static inline void intel_opregion_asle_intr(struct drm_i915_private *dev_priv) +{ +} + +static inline int +intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, bool enable) +{ + return 0; +} + +static inline int +intel_opregion_notify_adapter(struct drm_i915_private *dev, pci_power_t state) +{ + return 0; +} + +static inline int intel_opregion_get_panel_type(struct drm_i915_private *dev) +{ + return -ENODEV; +} + +#endif /* CONFIG_ACPI */ + +#endif diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index aace22e7ccac..41e9465d44a8 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -219,7 +219,7 @@ intel_overlay_map_regs(struct intel_overlay *overlay) if (OVERLAY_NEEDS_PHYSICAL(dev_priv)) regs = (struct overlay_registers __iomem *)overlay->reg_bo->phys_handle->vaddr; else - regs = io_mapping_map_wc(&dev_priv->ggtt.mappable, + regs = io_mapping_map_wc(&dev_priv->ggtt.iomap, overlay->flip_addr, PAGE_SIZE); @@ -1134,7 +1134,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, if (!params) return -ENOMEM; - drmmode_crtc = drm_crtc_find(dev, put_image_rec->crtc_id); + drmmode_crtc = drm_crtc_find(dev, file_priv, put_image_rec->crtc_id); if (!drmmode_crtc) { ret = -ENOENT; goto out_free; @@ -1508,7 +1508,7 @@ intel_overlay_map_regs_atomic(struct intel_overlay *overlay) regs = (struct overlay_registers __iomem *) overlay->reg_bo->phys_handle->vaddr; else - regs = io_mapping_map_atomic_wc(&dev_priv->ggtt.mappable, + regs = io_mapping_map_atomic_wc(&dev_priv->ggtt.iomap, overlay->flip_addr); return regs; diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 3b1c5d783ee7..fa6831f8c004 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -379,13 +379,13 @@ enum drm_connector_status intel_panel_detect(struct drm_i915_private *dev_priv) { /* Assume that the BIOS does not lie through the OpRegion... */ - if (!i915.panel_ignore_lid && dev_priv->opregion.lid_state) { + if (!i915_modparams.panel_ignore_lid && dev_priv->opregion.lid_state) { return *dev_priv->opregion.lid_state & 0x1 ? connector_status_connected : connector_status_disconnected; } - switch (i915.panel_ignore_lid) { + switch (i915_modparams.panel_ignore_lid) { case -2: return connector_status_connected; case -1: @@ -465,10 +465,10 @@ static u32 intel_panel_compute_brightness(struct intel_connector *connector, WARN_ON(panel->backlight.max == 0); - if (i915.invert_brightness < 0) + if (i915_modparams.invert_brightness < 0) return val; - if (i915.invert_brightness > 0 || + if (i915_modparams.invert_brightness > 0 || dev_priv->quirks & QUIRK_INVERT_BRIGHTNESS) { return panel->backlight.max - val + panel->backlight.min; } @@ -543,25 +543,6 @@ static u32 pwm_get_backlight(struct intel_connector *connector) return DIV_ROUND_UP(duty_ns * 100, CRC_PMIC_PWM_PERIOD_NS); } -static u32 intel_panel_get_backlight(struct intel_connector *connector) -{ - struct drm_i915_private *dev_priv = to_i915(connector->base.dev); - struct intel_panel *panel = &connector->panel; - u32 val = 0; - - mutex_lock(&dev_priv->backlight_lock); - - if (panel->backlight.enabled) { - val = panel->backlight.get(connector); - val = intel_panel_compute_brightness(connector, val); - } - - mutex_unlock(&dev_priv->backlight_lock); - - DRM_DEBUG_DRIVER("get backlight PWM = %d\n", val); - return val; -} - static void lpt_set_backlight(const struct drm_connector_state *conn_state, u32 level) { struct intel_connector *connector = to_intel_connector(conn_state->connector); @@ -649,31 +630,6 @@ intel_panel_actually_set_backlight(const struct drm_connector_state *conn_state, panel->backlight.set(conn_state, level); } -/* set backlight brightness to level in range [0..max], scaling wrt hw min */ -static void intel_panel_set_backlight(const struct drm_connector_state *conn_state, - u32 user_level, u32 user_max) -{ - struct intel_connector *connector = to_intel_connector(conn_state->connector); - struct drm_i915_private *dev_priv = to_i915(connector->base.dev); - struct intel_panel *panel = &connector->panel; - u32 hw_level; - - if (!panel->backlight.present) - return; - - mutex_lock(&dev_priv->backlight_lock); - - WARN_ON(panel->backlight.max == 0); - - hw_level = scale_user_to_hw(connector, user_level, user_max); - panel->backlight.level = hw_level; - - if (panel->backlight.enabled) - intel_panel_actually_set_backlight(conn_state, hw_level); - - mutex_unlock(&dev_priv->backlight_lock); -} - /* set backlight brightness to level in range [0..max], assuming hw min is * respected. */ @@ -1182,6 +1138,50 @@ void intel_panel_enable_backlight(const struct intel_crtc_state *crtc_state, } #if IS_ENABLED(CONFIG_BACKLIGHT_CLASS_DEVICE) +static u32 intel_panel_get_backlight(struct intel_connector *connector) +{ + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_panel *panel = &connector->panel; + u32 val = 0; + + mutex_lock(&dev_priv->backlight_lock); + + if (panel->backlight.enabled) { + val = panel->backlight.get(connector); + val = intel_panel_compute_brightness(connector, val); + } + + mutex_unlock(&dev_priv->backlight_lock); + + DRM_DEBUG_DRIVER("get backlight PWM = %d\n", val); + return val; +} + +/* set backlight brightness to level in range [0..max], scaling wrt hw min */ +static void intel_panel_set_backlight(const struct drm_connector_state *conn_state, + u32 user_level, u32 user_max) +{ + struct intel_connector *connector = to_intel_connector(conn_state->connector); + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_panel *panel = &connector->panel; + u32 hw_level; + + if (!panel->backlight.present) + return; + + mutex_lock(&dev_priv->backlight_lock); + + WARN_ON(panel->backlight.max == 0); + + hw_level = scale_user_to_hw(connector, user_level, user_max); + panel->backlight.level = hw_level; + + if (panel->backlight.enabled) + intel_panel_actually_set_backlight(conn_state, hw_level); + + mutex_unlock(&dev_priv->backlight_lock); +} + static int intel_backlight_device_update_status(struct backlight_device *bd) { struct intel_connector *connector = bl_get_data(bd); diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c index 8fbd2bd0877f..1f5cd572a7ff 100644 --- a/drivers/gpu/drm/i915/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/intel_pipe_crc.c @@ -206,11 +206,11 @@ static const char *pipe_crc_source_name(enum intel_pipe_crc_source source) static int display_crc_ctl_show(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = m->private; - int i; + enum pipe pipe; - for (i = 0; i < I915_MAX_PIPES; i++) - seq_printf(m, "%c %s\n", pipe_name(i), - pipe_crc_source_name(dev_priv->pipe_crc[i].source)); + for_each_pipe(dev_priv, pipe) + seq_printf(m, "%c %s\n", pipe_name(pipe), + pipe_crc_source_name(dev_priv->pipe_crc[pipe].source)); return 0; } @@ -269,7 +269,7 @@ static int i9xx_pipe_crc_auto_source(struct drm_i915_private *dev_priv, case INTEL_OUTPUT_DP: case INTEL_OUTPUT_EDP: dig_port = enc_to_dig_port(&encoder->base); - switch (dig_port->port) { + switch (dig_port->base.port) { case PORT_B: *source = INTEL_PIPE_CRC_SOURCE_DP_B; break; @@ -281,7 +281,7 @@ static int i9xx_pipe_crc_auto_source(struct drm_i915_private *dev_priv, break; default: WARN(1, "nonexisting DP port %c\n", - port_name(dig_port->port)); + port_name(dig_port->base.port)); break; } break; @@ -506,8 +506,8 @@ static int ilk_pipe_crc_ctl_reg(enum intel_pipe_crc_source *source, return 0; } -static void hsw_trans_edp_pipe_A_crc_wa(struct drm_i915_private *dev_priv, - bool enable) +static void hsw_pipe_A_crc_wa(struct drm_i915_private *dev_priv, + bool enable) { struct drm_device *dev = &dev_priv->drm; struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, PIPE_A); @@ -533,10 +533,22 @@ retry: goto put_state; } - pipe_config->pch_pfit.force_thru = enable; - if (pipe_config->cpu_transcoder == TRANSCODER_EDP && - pipe_config->pch_pfit.enabled != enable) - pipe_config->base.connectors_changed = true; + if (HAS_IPS(dev_priv)) { + /* + * When IPS gets enabled, the pipe CRC changes. Since IPS gets + * enabled and disabled dynamically based on package C states, + * user space can't make reliable use of the CRCs, so let's just + * completely disable it. + */ + pipe_config->ips_force_disable = enable; + } + + if (IS_HASWELL(dev_priv)) { + pipe_config->pch_pfit.force_thru = enable; + if (pipe_config->cpu_transcoder == TRANSCODER_EDP && + pipe_config->pch_pfit.enabled != enable) + pipe_config->base.connectors_changed = true; + } ret = drm_atomic_commit(state); @@ -570,8 +582,9 @@ static int ivb_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, *val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_SPRITE_IVB; break; case INTEL_PIPE_CRC_SOURCE_PF: - if (IS_HASWELL(dev_priv) && pipe == PIPE_A) - hsw_trans_edp_pipe_A_crc_wa(dev_priv, true); + if ((IS_HASWELL(dev_priv) || + IS_BROADWELL(dev_priv)) && pipe == PIPE_A) + hsw_pipe_A_crc_wa(dev_priv, true); *val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_PF_IVB; break; @@ -606,7 +619,6 @@ static int pipe_crc_set_source(struct drm_i915_private *dev_priv, enum intel_pipe_crc_source source) { struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[pipe]; - struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); enum intel_display_power_domain power_domain; u32 val = 0; /* shut up gcc */ int ret; @@ -643,14 +655,6 @@ static int pipe_crc_set_source(struct drm_i915_private *dev_priv, goto out; } - /* - * When IPS gets enabled, the pipe CRC changes. Since IPS gets - * enabled and disabled dynamically based on package C states, - * user space can't make reliable use of the CRCs, so let's just - * completely disable it. - */ - hsw_disable_ips(crtc); - spin_lock_irq(&pipe_crc->lock); kfree(pipe_crc->entries); pipe_crc->entries = entries; @@ -691,10 +695,9 @@ static int pipe_crc_set_source(struct drm_i915_private *dev_priv, g4x_undo_pipe_scramble_reset(dev_priv, pipe); else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) vlv_undo_pipe_scramble_reset(dev_priv, pipe); - else if (IS_HASWELL(dev_priv) && pipe == PIPE_A) - hsw_trans_edp_pipe_A_crc_wa(dev_priv, false); - - hsw_enable_ips(crtc); + else if ((IS_HASWELL(dev_priv) || + IS_BROADWELL(dev_priv)) && pipe == PIPE_A) + hsw_pipe_A_crc_wa(dev_priv, false); } ret = 0; @@ -770,11 +773,12 @@ display_crc_ctl_parse_object(const char *buf, enum intel_pipe_crc_object *o) return -EINVAL; } -static int display_crc_ctl_parse_pipe(const char *buf, enum pipe *pipe) +static int display_crc_ctl_parse_pipe(struct drm_i915_private *dev_priv, + const char *buf, enum pipe *pipe) { const char name = buf[0]; - if (name < 'A' || name >= pipe_name(I915_MAX_PIPES)) + if (name < 'A' || name >= pipe_name(INTEL_INFO(dev_priv)->num_pipes)) return -EINVAL; *pipe = name - 'A'; @@ -823,7 +827,7 @@ static int display_crc_ctl_parse(struct drm_i915_private *dev_priv, return -EINVAL; } - if (display_crc_ctl_parse_pipe(words[1], &pipe) < 0) { + if (display_crc_ctl_parse_pipe(dev_priv, words[1], &pipe) < 0) { DRM_DEBUG_DRIVER("unknown pipe %s\n", words[1]); return -EINVAL; } @@ -914,7 +918,6 @@ int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name, { struct drm_i915_private *dev_priv = crtc->dev->dev_private; struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[crtc->index]; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum intel_display_power_domain power_domain; enum intel_pipe_crc_source source; u32 val = 0; /* shut up gcc */ @@ -935,16 +938,6 @@ int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name, if (ret != 0) goto out; - if (source) { - /* - * When IPS gets enabled, the pipe CRC changes. Since IPS gets - * enabled and disabled dynamically based on package C states, - * user space can't make reliable use of the CRCs, so let's just - * completely disable it. - */ - hsw_disable_ips(intel_crtc); - } - I915_WRITE(PIPE_CRC_CTL(crtc->index), val); POSTING_READ(PIPE_CRC_CTL(crtc->index)); @@ -953,10 +946,9 @@ int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name, g4x_undo_pipe_scramble_reset(dev_priv, crtc->index); else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) vlv_undo_pipe_scramble_reset(dev_priv, crtc->index); - else if (IS_HASWELL(dev_priv) && crtc->index == PIPE_A) - hsw_trans_edp_pipe_A_crc_wa(dev_priv, false); - - hsw_enable_ips(intel_crtc); + else if ((IS_HASWELL(dev_priv) || + IS_BROADWELL(dev_priv)) && crtc->index == PIPE_A) + hsw_pipe_A_crc_wa(dev_priv, false); } pipe_crc->skipped = 0; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index cb950752c346..1a6e699e19e0 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -52,33 +52,26 @@ * which brings the most power savings; deeper states save more power, but * require higher latency to switch to and wake up. */ -#define INTEL_RC6_ENABLE (1<<0) -#define INTEL_RC6p_ENABLE (1<<1) -#define INTEL_RC6pp_ENABLE (1<<2) static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) { + if (HAS_LLC(dev_priv)) { + /* + * WaCompressedResourceDisplayNewHashMode:skl,kbl + * Display WA #0390: skl,kbl + * + * Must match Sampler, Pixel Back End, and Media. See + * WaCompressedResourceSamplerPbeMediaNewHashMode. + */ + I915_WRITE(CHICKEN_PAR1_1, + I915_READ(CHICKEN_PAR1_1) | + SKL_DE_COMPRESSED_HASH_MODE); + } + /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl,cfl */ I915_WRITE(CHICKEN_PAR1_1, I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP); - /* - * Display WA#0390: skl,bxt,kbl,glk - * - * Must match Sampler, Pixel Back End, and Media - * (0xE194 bit 8, 0x7014 bit 13, 0x4DDC bits 27 and 31). - * - * Including bits outside the page in the hash would - * require 2 (or 4?) MiB alignment of resources. Just - * assume the defaul hashing mode which only uses bits - * within the page. - */ - I915_WRITE(CHICKEN_PAR1_1, - I915_READ(CHICKEN_PAR1_1) & ~SKL_RC_HASH_OUTSIDE); - - I915_WRITE(GEN8_CONFIG0, - I915_READ(GEN8_CONFIG0) | GEN9_DEFAULT_FIXES); - /* WaEnableChickenDCPR:skl,bxt,kbl,glk,cfl */ I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM); @@ -317,7 +310,7 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable) { u32 val; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); if (enable) @@ -332,14 +325,14 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable) FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) DRM_ERROR("timed out waiting for Punit DDR DVFS request\n"); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable) { u32 val; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); if (enable) @@ -348,7 +341,7 @@ static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable) val &= ~DSP_MAXFIFO_PM5_ENABLE; vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } #define FW_WM(value, plane) \ @@ -516,38 +509,41 @@ static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state) fifo_state->plane[PLANE_CURSOR] = 63; } -static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv, int plane) +static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv, + enum i9xx_plane_id i9xx_plane) { uint32_t dsparb = I915_READ(DSPARB); int size; size = dsparb & 0x7f; - if (plane) + if (i9xx_plane == PLANE_B) size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size; - DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, - plane ? "B" : "A", size); + DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n", + dsparb, plane_name(i9xx_plane), size); return size; } -static int i830_get_fifo_size(struct drm_i915_private *dev_priv, int plane) +static int i830_get_fifo_size(struct drm_i915_private *dev_priv, + enum i9xx_plane_id i9xx_plane) { uint32_t dsparb = I915_READ(DSPARB); int size; size = dsparb & 0x1ff; - if (plane) + if (i9xx_plane == PLANE_B) size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size; size >>= 1; /* Convert to cachelines */ - DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, - plane ? "B" : "A", size); + DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n", + dsparb, plane_name(i9xx_plane), size); return size; } -static int i845_get_fifo_size(struct drm_i915_private *dev_priv, int plane) +static int i845_get_fifo_size(struct drm_i915_private *dev_priv, + enum i9xx_plane_id i9xx_plane) { uint32_t dsparb = I915_READ(DSPARB); int size; @@ -555,9 +551,8 @@ static int i845_get_fifo_size(struct drm_i915_private *dev_priv, int plane) size = dsparb & 0x7f; size >>= 2; /* Convert to cachelines */ - DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, - plane ? "B" : "A", - size); + DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n", + dsparb, plane_name(i9xx_plane), size); return size; } @@ -923,7 +918,7 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc) * and the size of 8 whole lines. This adjustment is always performed * in the actual pixel depth regardless of whether FBC is enabled or not." */ -static int g4x_tlb_miss_wa(int fifo_size, int width, int cpp) +static unsigned int g4x_tlb_miss_wa(int fifo_size, int width, int cpp) { int tlb_miss = fifo_size * 64 - width * cpp * 8; @@ -1100,8 +1095,8 @@ static uint16_t g4x_compute_wm(const struct intel_crtc_state *crtc_state, struct drm_i915_private *dev_priv = to_i915(plane->base.dev); const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; - int clock, htotal, cpp, width, wm; - int latency = dev_priv->wm.pri_latency[level] * 10; + unsigned int latency = dev_priv->wm.pri_latency[level] * 10; + unsigned int clock, htotal, cpp, width, wm; if (latency == 0) return USHRT_MAX; @@ -1140,7 +1135,7 @@ static uint16_t g4x_compute_wm(const struct intel_crtc_state *crtc_state, level == G4X_WM_LEVEL_NORMAL) { wm = intel_wm_method1(clock, cpp, latency); } else { - int small, large; + unsigned int small, large; small = intel_wm_method1(clock, cpp, latency); large = intel_wm_method2(clock, htotal, width, cpp, latency); @@ -1153,7 +1148,7 @@ static uint16_t g4x_compute_wm(const struct intel_crtc_state *crtc_state, wm = DIV_ROUND_UP(wm, 64) + 2; - return min_t(int, wm, USHRT_MAX); + return min_t(unsigned int, wm, USHRT_MAX); } static bool g4x_raw_plane_wm_set(struct intel_crtc_state *crtc_state, @@ -1322,21 +1317,21 @@ static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state) int num_active_planes = hweight32(crtc_state->active_planes & ~BIT(PLANE_CURSOR)); const struct g4x_pipe_wm *raw; - struct intel_plane_state *plane_state; + const struct intel_plane_state *old_plane_state; + const struct intel_plane_state *new_plane_state; struct intel_plane *plane; enum plane_id plane_id; int i, level; unsigned int dirty = 0; - for_each_intel_plane_in_state(state, plane, plane_state, i) { - const struct intel_plane_state *old_plane_state = - to_intel_plane_state(plane->base.state); - - if (plane_state->base.crtc != &crtc->base && + for_each_oldnew_intel_plane_in_state(state, plane, + old_plane_state, + new_plane_state, i) { + if (new_plane_state->base.crtc != &crtc->base && old_plane_state->base.crtc != &crtc->base) continue; - if (g4x_raw_plane_wm_compute(crtc_state, plane_state)) + if (g4x_raw_plane_wm_compute(crtc_state, new_plane_state)) dirty |= BIT(plane->id); } @@ -1404,17 +1399,29 @@ static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state) static int g4x_compute_intermediate_wm(struct drm_device *dev, struct intel_crtc *crtc, - struct intel_crtc_state *crtc_state) -{ - struct g4x_wm_state *intermediate = &crtc_state->wm.g4x.intermediate; - const struct g4x_wm_state *optimal = &crtc_state->wm.g4x.optimal; - const struct g4x_wm_state *active = &crtc->wm.active.g4x; + struct intel_crtc_state *new_crtc_state) +{ + struct g4x_wm_state *intermediate = &new_crtc_state->wm.g4x.intermediate; + const struct g4x_wm_state *optimal = &new_crtc_state->wm.g4x.optimal; + struct intel_atomic_state *intel_state = + to_intel_atomic_state(new_crtc_state->base.state); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(intel_state, crtc); + const struct g4x_wm_state *active = &old_crtc_state->wm.g4x.optimal; enum plane_id plane_id; + if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) { + *intermediate = *optimal; + + intermediate->cxsr = false; + intermediate->hpll_en = false; + goto out; + } + intermediate->cxsr = optimal->cxsr && active->cxsr && - !crtc_state->disable_cxsr; + !new_crtc_state->disable_cxsr; intermediate->hpll_en = optimal->hpll_en && active->hpll_en && - !crtc_state->disable_cxsr; + !new_crtc_state->disable_cxsr; intermediate->fbc_en = optimal->fbc_en && active->fbc_en; for_each_plane_id_on_crtc(crtc, plane_id) { @@ -1456,12 +1463,13 @@ static int g4x_compute_intermediate_wm(struct drm_device *dev, WARN_ON(intermediate->hpll.fbc > g4x_fbc_fifo_size(2) && intermediate->fbc_en && intermediate->hpll_en); +out: /* * If our intermediate WM are identical to the final WM, then we can * omit the post-vblank programming; only update if it's different. */ if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0) - crtc_state->wm.need_postvbl_update = true; + new_crtc_state->wm.need_postvbl_update = true; return 0; } @@ -1597,7 +1605,7 @@ static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, struct drm_i915_private *dev_priv = to_i915(plane->base.dev); const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; - int clock, htotal, cpp, width, wm; + unsigned int clock, htotal, cpp, width, wm; if (dev_priv->wm.pri_latency[level] == 0) return USHRT_MAX; @@ -1623,7 +1631,7 @@ static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, dev_priv->wm.pri_latency[level] * 10); } - return min_t(int, wm, USHRT_MAX); + return min_t(unsigned int, wm, USHRT_MAX); } static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes) @@ -1831,21 +1839,21 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) int num_active_planes = hweight32(crtc_state->active_planes & ~BIT(PLANE_CURSOR)); bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->base); - struct intel_plane_state *plane_state; + const struct intel_plane_state *old_plane_state; + const struct intel_plane_state *new_plane_state; struct intel_plane *plane; enum plane_id plane_id; int level, ret, i; unsigned int dirty = 0; - for_each_intel_plane_in_state(state, plane, plane_state, i) { - const struct intel_plane_state *old_plane_state = - to_intel_plane_state(plane->base.state); - - if (plane_state->base.crtc != &crtc->base && + for_each_oldnew_intel_plane_in_state(state, plane, + old_plane_state, + new_plane_state, i) { + if (new_plane_state->base.crtc != &crtc->base && old_plane_state->base.crtc != &crtc->base) continue; - if (vlv_raw_plane_wm_compute(crtc_state, plane_state)) + if (vlv_raw_plane_wm_compute(crtc_state, new_plane_state)) dirty |= BIT(plane->id); } @@ -1864,7 +1872,7 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) /* cursor changes don't warrant a FIFO recompute */ if (dirty & ~BIT(PLANE_CURSOR)) { const struct intel_crtc_state *old_crtc_state = - to_intel_crtc_state(crtc->base.state); + intel_atomic_get_old_crtc_state(state, crtc); const struct vlv_fifo_state *old_fifo_state = &old_crtc_state->wm.vlv.fifo_state; @@ -2024,16 +2032,27 @@ static void vlv_atomic_update_fifo(struct intel_atomic_state *state, static int vlv_compute_intermediate_wm(struct drm_device *dev, struct intel_crtc *crtc, - struct intel_crtc_state *crtc_state) -{ - struct vlv_wm_state *intermediate = &crtc_state->wm.vlv.intermediate; - const struct vlv_wm_state *optimal = &crtc_state->wm.vlv.optimal; - const struct vlv_wm_state *active = &crtc->wm.active.vlv; + struct intel_crtc_state *new_crtc_state) +{ + struct vlv_wm_state *intermediate = &new_crtc_state->wm.vlv.intermediate; + const struct vlv_wm_state *optimal = &new_crtc_state->wm.vlv.optimal; + struct intel_atomic_state *intel_state = + to_intel_atomic_state(new_crtc_state->base.state); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(intel_state, crtc); + const struct vlv_wm_state *active = &old_crtc_state->wm.vlv.optimal; int level; + if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) { + *intermediate = *optimal; + + intermediate->cxsr = false; + goto out; + } + intermediate->num_levels = min(optimal->num_levels, active->num_levels); intermediate->cxsr = optimal->cxsr && active->cxsr && - !crtc_state->disable_cxsr; + !new_crtc_state->disable_cxsr; for (level = 0; level < intermediate->num_levels; level++) { enum plane_id plane_id; @@ -2052,12 +2071,13 @@ static int vlv_compute_intermediate_wm(struct drm_device *dev, vlv_invalidate_wms(crtc, intermediate, level); +out: /* * If our intermediate WM are identical to the final WM, then we can * omit the post-vblank programming; only update if it's different. */ if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0) - crtc_state->wm.need_postvbl_update = true; + new_crtc_state->wm.need_postvbl_update = true; return 0; } @@ -2256,8 +2276,8 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) else wm_info = &i830_a_wm_info; - fifo_size = dev_priv->display.get_fifo_size(dev_priv, 0); - crtc = intel_get_crtc_for_plane(dev_priv, 0); + fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_A); + crtc = intel_get_crtc_for_plane(dev_priv, PLANE_A); if (intel_crtc_active(crtc)) { const struct drm_display_mode *adjusted_mode = &crtc->config->base.adjusted_mode; @@ -2283,8 +2303,8 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) if (IS_GEN2(dev_priv)) wm_info = &i830_bc_wm_info; - fifo_size = dev_priv->display.get_fifo_size(dev_priv, 1); - crtc = intel_get_crtc_for_plane(dev_priv, 1); + fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_B); + crtc = intel_get_crtc_for_plane(dev_priv, PLANE_B); if (intel_crtc_active(crtc)) { const struct drm_display_mode *adjusted_mode = &crtc->config->base.adjusted_mode; @@ -2396,7 +2416,7 @@ static void i845_update_wm(struct intel_crtc *unused_crtc) adjusted_mode = &crtc->config->base.adjusted_mode; planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock, &i845_wm_info, - dev_priv->display.get_fifo_size(dev_priv, 0), + dev_priv->display.get_fifo_size(dev_priv, PLANE_A), 4, pessimal_latency_ns); fwater_lo = I915_READ(FW_BLC) & ~0xfff; fwater_lo |= (3<<8) | planea_wm; @@ -2785,11 +2805,11 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, /* read the first set of memory latencies[0:3] */ val = 0; /* data0 to be programmed to 0 for first set */ - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_read(dev_priv, GEN9_PCODE_READ_MEM_LATENCY, &val); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("SKL Mailbox read error = %d\n", ret); @@ -2806,11 +2826,11 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, /* read the second set of memory latencies[4:7] */ val = 1; /* data0 to be programmed to 1 for second set */ - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_read(dev_priv, GEN9_PCODE_READ_MEM_LATENCY, &val); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("SKL Mailbox read error = %d\n", ret); return; @@ -3119,7 +3139,11 @@ static int ilk_compute_intermediate_wm(struct drm_device *dev, struct intel_crtc_state *newstate) { struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate; - struct intel_pipe_wm *b = &intel_crtc->wm.active.ilk; + struct intel_atomic_state *intel_state = + to_intel_atomic_state(newstate->base.state); + const struct intel_crtc_state *oldstate = + intel_atomic_get_old_crtc_state(intel_state, intel_crtc); + const struct intel_pipe_wm *b = &oldstate->wm.ilk.optimal; int level, max_level = ilk_wm_max_level(to_i915(dev)); /* @@ -3128,6 +3152,9 @@ static int ilk_compute_intermediate_wm(struct drm_device *dev, * and after the vblank. */ *a = newstate->wm.ilk.optimal; + if (!newstate->base.active || drm_atomic_crtc_needs_modeset(&newstate->base)) + return 0; + a->pipe_enabled |= b->pipe_enabled; a->sprites_enabled |= b->sprites_enabled; a->sprites_scaled |= b->sprites_scaled; @@ -3594,13 +3621,13 @@ intel_enable_sagv(struct drm_i915_private *dev_priv) return 0; DRM_DEBUG_KMS("Enabling the SAGV\n"); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL, GEN9_SAGV_ENABLE); /* We don't need to wait for the SAGV when enabling */ - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); /* * Some skl systems, pre-release machines in particular, @@ -3631,14 +3658,14 @@ intel_disable_sagv(struct drm_i915_private *dev_priv) return 0; DRM_DEBUG_KMS("Disabling the SAGV\n"); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); /* bspec says to keep retrying for at least 1 ms */ ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL, GEN9_SAGV_DISABLE, GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED, 1); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); /* * Some skl systems, pre-release machines in particular, @@ -3918,6 +3945,7 @@ skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state) int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, struct intel_crtc_state *cstate) { + struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); struct drm_crtc_state *crtc_state = &cstate->base; struct drm_atomic_state *state = crtc_state->state; struct drm_plane *plane; @@ -3960,7 +3988,7 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, crtc_clock = crtc_state->adjusted_mode.crtc_clock; dotclk = to_intel_atomic_state(state)->cdclk.logical.cdclk; - if (IS_GEMINILAKE(to_i915(intel_crtc->base.dev))) + if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10) dotclk *= 2; pipe_max_pixel_rate = div_round_up_u32_fixed16(dotclk, pipe_downscale); @@ -4361,134 +4389,147 @@ skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate, downscale_amount); } -static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, - struct intel_crtc_state *cstate, - const struct intel_plane_state *intel_pstate, - uint16_t ddb_allocation, - int level, - uint16_t *out_blocks, /* out */ - uint8_t *out_lines, /* out */ - bool *enabled /* out */) +static int +skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv, + struct intel_crtc_state *cstate, + const struct intel_plane_state *intel_pstate, + struct skl_wm_params *wp) { struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane); const struct drm_plane_state *pstate = &intel_pstate->base; const struct drm_framebuffer *fb = pstate->fb; - uint32_t latency = dev_priv->wm.skl_latency[level]; - uint_fixed_16_16_t method1, method2; - uint_fixed_16_16_t plane_blocks_per_line; - uint_fixed_16_16_t selected_result; uint32_t interm_pbpl; - uint32_t plane_bytes_per_line; - uint32_t res_blocks, res_lines; - uint8_t cpp; - uint32_t width = 0; - uint32_t plane_pixel_rate; - uint_fixed_16_16_t y_tile_minimum; - uint32_t y_min_scanlines; struct intel_atomic_state *state = to_intel_atomic_state(cstate->base.state); bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); - bool y_tiled, x_tiled; - if (latency == 0 || - !intel_wm_plane_visible(cstate, intel_pstate)) { - *enabled = false; + if (!intel_wm_plane_visible(cstate, intel_pstate)) return 0; - } - - y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED || - fb->modifier == I915_FORMAT_MOD_Yf_TILED || - fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || - fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS; - x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED; - - /* Display WA #1141: kbl,cfl */ - if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) && - dev_priv->ipc_enabled) - latency += 4; - if (apply_memory_bw_wa && x_tiled) - latency += 15; + wp->y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED || + fb->modifier == I915_FORMAT_MOD_Yf_TILED || + fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || + fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS; + wp->x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED; + wp->rc_surface = fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || + fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS; if (plane->id == PLANE_CURSOR) { - width = intel_pstate->base.crtc_w; + wp->width = intel_pstate->base.crtc_w; } else { /* * Src coordinates are already rotated by 270 degrees for * the 90/270 degree plane rotation cases (to match the * GTT mapping), hence no need to account for rotation here. */ - width = drm_rect_width(&intel_pstate->base.src) >> 16; + wp->width = drm_rect_width(&intel_pstate->base.src) >> 16; } - cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] : - fb->format->cpp[0]; - plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate); + wp->cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] : + fb->format->cpp[0]; + wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, + intel_pstate); if (drm_rotation_90_or_270(pstate->rotation)) { - switch (cpp) { + switch (wp->cpp) { case 1: - y_min_scanlines = 16; + wp->y_min_scanlines = 16; break; case 2: - y_min_scanlines = 8; + wp->y_min_scanlines = 8; break; case 4: - y_min_scanlines = 4; + wp->y_min_scanlines = 4; break; default: - MISSING_CASE(cpp); + MISSING_CASE(wp->cpp); return -EINVAL; } } else { - y_min_scanlines = 4; + wp->y_min_scanlines = 4; } if (apply_memory_bw_wa) - y_min_scanlines *= 2; + wp->y_min_scanlines *= 2; - plane_bytes_per_line = width * cpp; - if (y_tiled) { - interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line * - y_min_scanlines, 512); + wp->plane_bytes_per_line = wp->width * wp->cpp; + if (wp->y_tiled) { + interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line * + wp->y_min_scanlines, 512); if (INTEL_GEN(dev_priv) >= 10) interm_pbpl++; - plane_blocks_per_line = div_fixed16(interm_pbpl, - y_min_scanlines); - } else if (x_tiled && INTEL_GEN(dev_priv) == 9) { - interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512); - plane_blocks_per_line = u32_to_fixed16(interm_pbpl); + wp->plane_blocks_per_line = div_fixed16(interm_pbpl, + wp->y_min_scanlines); + } else if (wp->x_tiled && IS_GEN9(dev_priv)) { + interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512); + wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl); } else { - interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1; - plane_blocks_per_line = u32_to_fixed16(interm_pbpl); + interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512) + 1; + wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl); } - method1 = skl_wm_method1(dev_priv, plane_pixel_rate, cpp, latency); - method2 = skl_wm_method2(plane_pixel_rate, + wp->y_tile_minimum = mul_u32_fixed16(wp->y_min_scanlines, + wp->plane_blocks_per_line); + wp->linetime_us = fixed16_to_u32_round_up( + intel_get_linetime_us(cstate)); + + return 0; +} + +static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, + struct intel_crtc_state *cstate, + const struct intel_plane_state *intel_pstate, + uint16_t ddb_allocation, + int level, + const struct skl_wm_params *wp, + uint16_t *out_blocks, /* out */ + uint8_t *out_lines, /* out */ + bool *enabled /* out */) +{ + const struct drm_plane_state *pstate = &intel_pstate->base; + uint32_t latency = dev_priv->wm.skl_latency[level]; + uint_fixed_16_16_t method1, method2; + uint_fixed_16_16_t selected_result; + uint32_t res_blocks, res_lines; + struct intel_atomic_state *state = + to_intel_atomic_state(cstate->base.state); + bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); + + if (latency == 0 || + !intel_wm_plane_visible(cstate, intel_pstate)) { + *enabled = false; + return 0; + } + + /* Display WA #1141: kbl,cfl */ + if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) || + IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0)) && + dev_priv->ipc_enabled) + latency += 4; + + if (apply_memory_bw_wa && wp->x_tiled) + latency += 15; + + method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate, + wp->cpp, latency); + method2 = skl_wm_method2(wp->plane_pixel_rate, cstate->base.adjusted_mode.crtc_htotal, latency, - plane_blocks_per_line); - - y_tile_minimum = mul_u32_fixed16(y_min_scanlines, - plane_blocks_per_line); + wp->plane_blocks_per_line); - if (y_tiled) { - selected_result = max_fixed16(method2, y_tile_minimum); + if (wp->y_tiled) { + selected_result = max_fixed16(method2, wp->y_tile_minimum); } else { - uint32_t linetime_us; - - linetime_us = fixed16_to_u32_round_up( - intel_get_linetime_us(cstate)); - if ((cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1) && - (plane_bytes_per_line / 512 < 1)) + if ((wp->cpp * cstate->base.adjusted_mode.crtc_htotal / + 512 < 1) && (wp->plane_bytes_per_line / 512 < 1)) selected_result = method2; else if (ddb_allocation >= - fixed16_to_u32_round_up(plane_blocks_per_line)) + fixed16_to_u32_round_up(wp->plane_blocks_per_line)) selected_result = min_fixed16(method1, method2); - else if (latency >= linetime_us) + else if (latency >= wp->linetime_us) selected_result = min_fixed16(method1, method2); else selected_result = method1; @@ -4496,19 +4537,18 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, res_blocks = fixed16_to_u32_round_up(selected_result) + 1; res_lines = div_round_up_fixed16(selected_result, - plane_blocks_per_line); + wp->plane_blocks_per_line); /* Display WA #1125: skl,bxt,kbl,glk */ - if (level == 0 && - (fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || - fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS)) - res_blocks += fixed16_to_u32_round_up(y_tile_minimum); + if (level == 0 && wp->rc_surface) + res_blocks += fixed16_to_u32_round_up(wp->y_tile_minimum); /* Display WA #1126: skl,bxt,kbl,glk */ if (level >= 1 && level <= 7) { - if (y_tiled) { - res_blocks += fixed16_to_u32_round_up(y_tile_minimum); - res_lines += y_min_scanlines; + if (wp->y_tiled) { + res_blocks += fixed16_to_u32_round_up( + wp->y_tile_minimum); + res_lines += wp->y_min_scanlines; } else { res_blocks++; } @@ -4546,6 +4586,7 @@ skl_compute_wm_levels(const struct drm_i915_private *dev_priv, struct skl_ddb_allocation *ddb, struct intel_crtc_state *cstate, const struct intel_plane_state *intel_pstate, + const struct skl_wm_params *wm_params, struct skl_plane_wm *wm) { struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); @@ -4569,6 +4610,7 @@ skl_compute_wm_levels(const struct drm_i915_private *dev_priv, intel_pstate, ddb_blocks, level, + wm_params, &result->plane_res_b, &result->plane_res_l, &result->plane_en); @@ -4594,20 +4636,65 @@ skl_compute_linetime_wm(struct intel_crtc_state *cstate) linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us)); - /* Display WA #1135: bxt. */ - if (IS_BROXTON(dev_priv) && dev_priv->ipc_enabled) - linetime_wm = DIV_ROUND_UP(linetime_wm, 2); + /* Display WA #1135: bxt:ALL GLK:ALL */ + if ((IS_BROXTON(dev_priv) || IS_GEMINILAKE(dev_priv)) && + dev_priv->ipc_enabled) + linetime_wm /= 2; return linetime_wm; } static void skl_compute_transition_wm(struct intel_crtc_state *cstate, + struct skl_wm_params *wp, + struct skl_wm_level *wm_l0, + uint16_t ddb_allocation, struct skl_wm_level *trans_wm /* out */) { + struct drm_device *dev = cstate->base.crtc->dev; + const struct drm_i915_private *dev_priv = to_i915(dev); + uint16_t trans_min, trans_y_tile_min; + const uint16_t trans_amount = 10; /* This is configurable amount */ + uint16_t trans_offset_b, res_blocks; + if (!cstate->base.active) + goto exit; + + /* Transition WM are not recommended by HW team for GEN9 */ + if (INTEL_GEN(dev_priv) <= 9) + goto exit; + + /* Transition WM don't make any sense if ipc is disabled */ + if (!dev_priv->ipc_enabled) + goto exit; + + if (INTEL_GEN(dev_priv) >= 10) + trans_min = 4; + + trans_offset_b = trans_min + trans_amount; + + if (wp->y_tiled) { + trans_y_tile_min = (uint16_t) mul_round_up_u32_fixed16(2, + wp->y_tile_minimum); + res_blocks = max(wm_l0->plane_res_b, trans_y_tile_min) + + trans_offset_b; + } else { + res_blocks = wm_l0->plane_res_b + trans_offset_b; + + /* WA BUG:1938466 add one block for non y-tile planes */ + if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0)) + res_blocks += 1; + + } + + res_blocks += 1; + + if (res_blocks < ddb_allocation) { + trans_wm->plane_res_b = res_blocks; + trans_wm->plane_en = true; return; + } - /* Until we know more, just disable transition WMs */ +exit: trans_wm->plane_en = false; } @@ -4633,14 +4720,25 @@ static int skl_build_pipe_wm(struct intel_crtc_state *cstate, const struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate); enum plane_id plane_id = to_intel_plane(plane)->id; + struct skl_wm_params wm_params; + enum pipe pipe = to_intel_crtc(cstate->base.crtc)->pipe; + uint16_t ddb_blocks; wm = &pipe_wm->planes[plane_id]; + ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][plane_id]); + memset(&wm_params, 0, sizeof(struct skl_wm_params)); + + ret = skl_compute_plane_wm_params(dev_priv, cstate, + intel_pstate, &wm_params); + if (ret) + return ret; ret = skl_compute_wm_levels(dev_priv, ddb, cstate, - intel_pstate, wm); + intel_pstate, &wm_params, wm); if (ret) return ret; - skl_compute_transition_wm(cstate, &wm->trans_wm); + skl_compute_transition_wm(cstate, &wm_params, &wm->wm[0], + ddb_blocks, &wm->trans_wm); } pipe_wm->linetime = skl_compute_linetime_wm(cstate); @@ -4736,16 +4834,18 @@ static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a, return a->start < b->end && b->start < a->end; } -bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry **entries, +bool skl_ddb_allocation_overlaps(struct drm_i915_private *dev_priv, + const struct skl_ddb_entry **entries, const struct skl_ddb_entry *ddb, int ignore) { - int i; + enum pipe pipe; - for (i = 0; i < I915_MAX_PIPES; i++) - if (i != ignore && entries[i] && - skl_ddb_entries_overlap(ddb, entries[i])) + for_each_pipe(dev_priv, pipe) { + if (pipe != ignore && entries[pipe] && + skl_ddb_entries_overlap(ddb, entries[pipe])) return true; + } return false; } @@ -5535,7 +5635,7 @@ void vlv_wm_get_hw_state(struct drm_device *dev) wm->level = VLV_WM_LEVEL_PM2; if (IS_CHERRYVIEW(dev_priv)) { - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); if (val & DSP_MAXFIFO_PM5_ENABLE) @@ -5565,7 +5665,7 @@ void vlv_wm_get_hw_state(struct drm_device *dev) wm->level = VLV_WM_LEVEL_DDR_DVFS; } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } for_each_intel_crtc(dev, crtc) { @@ -5669,12 +5769,30 @@ void vlv_wm_sanitize(struct drm_i915_private *dev_priv) mutex_unlock(&dev_priv->wm.wm_mutex); } +/* + * FIXME should probably kill this and improve + * the real watermark readout/sanitation instead + */ +static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv) +{ + I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN); + I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN); + I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN); + + /* + * Don't touch WM1S_LP_EN here. + * Doing so could cause underruns. + */ +} + void ilk_wm_get_hw_state(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); struct ilk_wm_values *hw = &dev_priv->wm.hw; struct drm_crtc *crtc; + ilk_init_lp_watermarks(dev_priv); + for_each_crtc(dev, crtc) ilk_pipe_wm_get_hw_state(crtc); @@ -5739,6 +5857,36 @@ void intel_update_watermarks(struct intel_crtc *crtc) dev_priv->display.update_wm(crtc); } +void intel_enable_ipc(struct drm_i915_private *dev_priv) +{ + u32 val; + + /* Display WA #0477 WaDisableIPC: skl */ + if (IS_SKYLAKE(dev_priv)) { + dev_priv->ipc_enabled = false; + return; + } + + val = I915_READ(DISP_ARB_CTL2); + + if (dev_priv->ipc_enabled) + val |= DISP_IPC_ENABLE; + else + val &= ~DISP_IPC_ENABLE; + + I915_WRITE(DISP_ARB_CTL2, val); +} + +void intel_init_ipc(struct drm_i915_private *dev_priv) +{ + dev_priv->ipc_enabled = false; + if (!HAS_IPC(dev_priv)) + return; + + dev_priv->ipc_enabled = true; + intel_enable_ipc(dev_priv); +} + /* * Lock protecting IPS related data structures */ @@ -5872,6 +6020,7 @@ static void ironlake_disable_drps(struct drm_i915_private *dev_priv) */ static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 limits; /* Only set the down limit when we've reached the lowest level to avoid @@ -5881,13 +6030,13 @@ static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val) * frequency, if the down threshold expires in that window we will not * receive a down interrupt. */ if (INTEL_GEN(dev_priv) >= 9) { - limits = (dev_priv->rps.max_freq_softlimit) << 23; - if (val <= dev_priv->rps.min_freq_softlimit) - limits |= (dev_priv->rps.min_freq_softlimit) << 14; + limits = (rps->max_freq_softlimit) << 23; + if (val <= rps->min_freq_softlimit) + limits |= (rps->min_freq_softlimit) << 14; } else { - limits = dev_priv->rps.max_freq_softlimit << 24; - if (val <= dev_priv->rps.min_freq_softlimit) - limits |= dev_priv->rps.min_freq_softlimit << 16; + limits = rps->max_freq_softlimit << 24; + if (val <= rps->min_freq_softlimit) + limits |= rps->min_freq_softlimit << 16; } return limits; @@ -5895,39 +6044,40 @@ static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val) static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; int new_power; u32 threshold_up = 0, threshold_down = 0; /* in % */ u32 ei_up = 0, ei_down = 0; - new_power = dev_priv->rps.power; - switch (dev_priv->rps.power) { + new_power = rps->power; + switch (rps->power) { case LOW_POWER: - if (val > dev_priv->rps.efficient_freq + 1 && - val > dev_priv->rps.cur_freq) + if (val > rps->efficient_freq + 1 && + val > rps->cur_freq) new_power = BETWEEN; break; case BETWEEN: - if (val <= dev_priv->rps.efficient_freq && - val < dev_priv->rps.cur_freq) + if (val <= rps->efficient_freq && + val < rps->cur_freq) new_power = LOW_POWER; - else if (val >= dev_priv->rps.rp0_freq && - val > dev_priv->rps.cur_freq) + else if (val >= rps->rp0_freq && + val > rps->cur_freq) new_power = HIGH_POWER; break; case HIGH_POWER: - if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && - val < dev_priv->rps.cur_freq) + if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 && + val < rps->cur_freq) new_power = BETWEEN; break; } /* Max/min bins are special */ - if (val <= dev_priv->rps.min_freq_softlimit) + if (val <= rps->min_freq_softlimit) new_power = LOW_POWER; - if (val >= dev_priv->rps.max_freq_softlimit) + if (val >= rps->max_freq_softlimit) new_power = HIGH_POWER; - if (new_power == dev_priv->rps.power) + if (new_power == rps->power) return; /* Note the units here are not exactly 1us, but 1280ns. */ @@ -5990,20 +6140,21 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) GEN6_RP_DOWN_IDLE_AVG); skip_hw_write: - dev_priv->rps.power = new_power; - dev_priv->rps.up_threshold = threshold_up; - dev_priv->rps.down_threshold = threshold_down; - dev_priv->rps.last_adj = 0; + rps->power = new_power; + rps->up_threshold = threshold_up; + rps->down_threshold = threshold_down; + rps->last_adj = 0; } static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 mask = 0; /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */ - if (val > dev_priv->rps.min_freq_softlimit) + if (val > rps->min_freq_softlimit) mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; - if (val < dev_priv->rps.max_freq_softlimit) + if (val < rps->max_freq_softlimit) mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; mask &= dev_priv->pm_rps_events; @@ -6016,10 +6167,12 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* min/max delay may still have been modified so be sure to * write the limits value. */ - if (val != dev_priv->rps.cur_freq) { + if (val != rps->cur_freq) { gen6_set_rps_thresholds(dev_priv, val); if (INTEL_GEN(dev_priv) >= 9) @@ -6041,7 +6194,7 @@ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val)); I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); - dev_priv->rps.cur_freq = val; + rps->cur_freq = val; trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); return 0; @@ -6057,7 +6210,7 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); - if (val != dev_priv->rps.cur_freq) { + if (val != dev_priv->gt_pm.rps.cur_freq) { err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); if (err) return err; @@ -6065,7 +6218,7 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) gen6_set_rps_thresholds(dev_priv, val); } - dev_priv->rps.cur_freq = val; + dev_priv->gt_pm.rps.cur_freq = val; trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); return 0; @@ -6080,10 +6233,11 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) */ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) { - u32 val = dev_priv->rps.idle_freq; + struct intel_rps *rps = &dev_priv->gt_pm.rps; + u32 val = rps->idle_freq; int err; - if (dev_priv->rps.cur_freq <= val) + if (rps->cur_freq <= val) return; /* The punit delays the write of the frequency and voltage until it @@ -6108,34 +6262,38 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) void gen6_rps_busy(struct drm_i915_private *dev_priv) { - mutex_lock(&dev_priv->rps.hw_lock); - if (dev_priv->rps.enabled) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + mutex_lock(&dev_priv->pcu_lock); + if (rps->enabled) { u8 freq; if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED) gen6_rps_reset_ei(dev_priv); I915_WRITE(GEN6_PMINTRMSK, - gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); + gen6_rps_pm_mask(dev_priv, rps->cur_freq)); gen6_enable_rps_interrupts(dev_priv); /* Use the user's desired frequency as a guide, but for better * performance, jump directly to RPe as our starting frequency. */ - freq = max(dev_priv->rps.cur_freq, - dev_priv->rps.efficient_freq); + freq = max(rps->cur_freq, + rps->efficient_freq); if (intel_set_rps(dev_priv, clamp(freq, - dev_priv->rps.min_freq_softlimit, - dev_priv->rps.max_freq_softlimit))) + rps->min_freq_softlimit, + rps->max_freq_softlimit))) DRM_DEBUG_DRIVER("Failed to set idle frequency\n"); } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } void gen6_rps_idle(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* Flush our bottom-half so that it does not race with us * setting the idle frequency and so that it is bounded by * our rpm wakeref. And then disable the interrupts to stop any @@ -6143,58 +6301,60 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) */ gen6_disable_rps_interrupts(dev_priv); - mutex_lock(&dev_priv->rps.hw_lock); - if (dev_priv->rps.enabled) { + mutex_lock(&dev_priv->pcu_lock); + if (rps->enabled) { if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) vlv_set_rps_idle(dev_priv); else - gen6_set_rps(dev_priv, dev_priv->rps.idle_freq); - dev_priv->rps.last_adj = 0; + gen6_set_rps(dev_priv, rps->idle_freq); + rps->last_adj = 0; I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0)); } - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } void gen6_rps_boost(struct drm_i915_gem_request *rq, - struct intel_rps_client *rps) + struct intel_rps_client *rps_client) { - struct drm_i915_private *i915 = rq->i915; + struct intel_rps *rps = &rq->i915->gt_pm.rps; + unsigned long flags; bool boost; /* This is intentionally racy! We peek at the state here, then * validate inside the RPS worker. */ - if (!i915->rps.enabled) + if (!rps->enabled) return; boost = false; - spin_lock_irq(&rq->lock); + spin_lock_irqsave(&rq->lock, flags); if (!rq->waitboost && !i915_gem_request_completed(rq)) { - atomic_inc(&i915->rps.num_waiters); + atomic_inc(&rps->num_waiters); rq->waitboost = true; boost = true; } - spin_unlock_irq(&rq->lock); + spin_unlock_irqrestore(&rq->lock, flags); if (!boost) return; - if (READ_ONCE(i915->rps.cur_freq) < i915->rps.boost_freq) - schedule_work(&i915->rps.work); + if (READ_ONCE(rps->cur_freq) < rps->boost_freq) + schedule_work(&rps->work); - atomic_inc(rps ? &rps->boosts : &i915->rps.boosts); + atomic_inc(rps_client ? &rps_client->boosts : &rps->boosts); } int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; int err; - lockdep_assert_held(&dev_priv->rps.hw_lock); - GEM_BUG_ON(val > dev_priv->rps.max_freq); - GEM_BUG_ON(val < dev_priv->rps.min_freq); + lockdep_assert_held(&dev_priv->pcu_lock); + GEM_BUG_ON(val > rps->max_freq); + GEM_BUG_ON(val < rps->min_freq); - if (!dev_priv->rps.enabled) { - dev_priv->rps.cur_freq = val; + if (!rps->enabled) { + rps->cur_freq = val; return 0; } @@ -6217,21 +6377,30 @@ static void gen9_disable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RP_CONTROL, 0); } -static void gen6_disable_rps(struct drm_i915_private *dev_priv) +static void gen6_disable_rc6(struct drm_i915_private *dev_priv) { I915_WRITE(GEN6_RC_CONTROL, 0); +} + +static void gen6_disable_rps(struct drm_i915_private *dev_priv) +{ I915_WRITE(GEN6_RPNSWREQ, 1 << 31); I915_WRITE(GEN6_RP_CONTROL, 0); } -static void cherryview_disable_rps(struct drm_i915_private *dev_priv) +static void cherryview_disable_rc6(struct drm_i915_private *dev_priv) { I915_WRITE(GEN6_RC_CONTROL, 0); } -static void valleyview_disable_rps(struct drm_i915_private *dev_priv) +static void cherryview_disable_rps(struct drm_i915_private *dev_priv) { - /* we're doing forcewake before Disabling RC6, + I915_WRITE(GEN6_RP_CONTROL, 0); +} + +static void valleyview_disable_rc6(struct drm_i915_private *dev_priv) +{ + /* We're doing forcewake before Disabling RC6, * This what the BIOS expects when going into suspend */ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); @@ -6240,29 +6409,13 @@ static void valleyview_disable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static void intel_print_rc6_info(struct drm_i915_private *dev_priv, u32 mode) +static void valleyview_disable_rps(struct drm_i915_private *dev_priv) { - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1))) - mode = GEN6_RC_CTL_RC6_ENABLE; - else - mode = 0; - } - if (HAS_RC6p(dev_priv)) - DRM_DEBUG_DRIVER("Enabling RC6 states: " - "RC6 %s RC6p %s RC6pp %s\n", - onoff(mode & GEN6_RC_CTL_RC6_ENABLE), - onoff(mode & GEN6_RC_CTL_RC6p_ENABLE), - onoff(mode & GEN6_RC_CTL_RC6pp_ENABLE)); - - else - DRM_DEBUG_DRIVER("Enabling RC6 states: RC6 %s\n", - onoff(mode & GEN6_RC_CTL_RC6_ENABLE)); + I915_WRITE(GEN6_RP_CONTROL, 0); } static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; bool enable_rc6 = true; unsigned long rc6_ctx_base; u32 rc_ctl; @@ -6287,9 +6440,8 @@ static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv) * for this check. */ rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK; - if (!((rc6_ctx_base >= ggtt->stolen_reserved_base) && - (rc6_ctx_base + PAGE_SIZE <= ggtt->stolen_reserved_base + - ggtt->stolen_reserved_size))) { + if (!((rc6_ctx_base >= dev_priv->dsm_reserved.start) && + (rc6_ctx_base + PAGE_SIZE < dev_priv->dsm_reserved.end))) { DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n"); enable_rc6 = false; } @@ -6322,64 +6474,54 @@ static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv) return enable_rc6; } -int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6) +static bool sanitize_rc6(struct drm_i915_private *i915) { - /* No RC6 before Ironlake and code is gone for ilk. */ - if (INTEL_INFO(dev_priv)->gen < 6) - return 0; + struct intel_device_info *info = mkwrite_device_info(i915); - if (!enable_rc6) - return 0; + /* Powersaving is controlled by the host when inside a VM */ + if (intel_vgpu_active(i915)) + info->has_rc6 = 0; - if (IS_GEN9_LP(dev_priv) && !bxt_check_bios_rc6_setup(dev_priv)) { + if (info->has_rc6 && + IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) { DRM_INFO("RC6 disabled by BIOS\n"); - return 0; + info->has_rc6 = 0; } - /* Respect the kernel parameter if it is set */ - if (enable_rc6 >= 0) { - int mask; - - if (HAS_RC6p(dev_priv)) - mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE | - INTEL_RC6pp_ENABLE; - else - mask = INTEL_RC6_ENABLE; - - if ((enable_rc6 & mask) != enable_rc6) - DRM_DEBUG_DRIVER("Adjusting RC6 mask to %d " - "(requested %d, valid %d)\n", - enable_rc6 & mask, enable_rc6, mask); - - return enable_rc6 & mask; - } - - if (IS_IVYBRIDGE(dev_priv)) - return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE); + /* + * We assume that we do not have any deep rc6 levels if we don't have + * have the previous rc6 level supported, i.e. we use HAS_RC6() + * as the initial coarse check for rc6 in general, moving on to + * progressively finer/deeper levels. + */ + if (!info->has_rc6 && info->has_rc6p) + info->has_rc6p = 0; - return INTEL_RC6_ENABLE; + return info->has_rc6; } static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* All of these values are in units of 50MHz */ /* static values from HW: RP0 > RP1 > RPn (min_freq) */ if (IS_GEN9_LP(dev_priv)) { u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP); - dev_priv->rps.rp0_freq = (rp_state_cap >> 16) & 0xff; - dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; - dev_priv->rps.min_freq = (rp_state_cap >> 0) & 0xff; + rps->rp0_freq = (rp_state_cap >> 16) & 0xff; + rps->rp1_freq = (rp_state_cap >> 8) & 0xff; + rps->min_freq = (rp_state_cap >> 0) & 0xff; } else { u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); - dev_priv->rps.rp0_freq = (rp_state_cap >> 0) & 0xff; - dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; - dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff; + rps->rp0_freq = (rp_state_cap >> 0) & 0xff; + rps->rp1_freq = (rp_state_cap >> 8) & 0xff; + rps->min_freq = (rp_state_cap >> 16) & 0xff; } /* hw_max = RP0 until we check for overclocking */ - dev_priv->rps.max_freq = dev_priv->rps.rp0_freq; + rps->max_freq = rps->rp0_freq; - dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; + rps->efficient_freq = rps->rp1_freq; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { u32 ddcc_status = 0; @@ -6387,33 +6529,34 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) if (sandybridge_pcode_read(dev_priv, HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, &ddcc_status) == 0) - dev_priv->rps.efficient_freq = + rps->efficient_freq = clamp_t(u8, ((ddcc_status >> 8) & 0xff), - dev_priv->rps.min_freq, - dev_priv->rps.max_freq); + rps->min_freq, + rps->max_freq); } if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { /* Store the frequency values in 16.66 MHZ units, which is * the natural hardware unit for SKL */ - dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER; - dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER; - dev_priv->rps.min_freq *= GEN9_FREQ_SCALER; - dev_priv->rps.max_freq *= GEN9_FREQ_SCALER; - dev_priv->rps.efficient_freq *= GEN9_FREQ_SCALER; + rps->rp0_freq *= GEN9_FREQ_SCALER; + rps->rp1_freq *= GEN9_FREQ_SCALER; + rps->min_freq *= GEN9_FREQ_SCALER; + rps->max_freq *= GEN9_FREQ_SCALER; + rps->efficient_freq *= GEN9_FREQ_SCALER; } } static void reset_rps(struct drm_i915_private *dev_priv, int (*set)(struct drm_i915_private *, u8)) { - u8 freq = dev_priv->rps.cur_freq; + struct intel_rps *rps = &dev_priv->gt_pm.rps; + u8 freq = rps->cur_freq; /* force a reset */ - dev_priv->rps.power = -1; - dev_priv->rps.cur_freq = -1; + rps->power = -1; + rps->cur_freq = -1; if (set(dev_priv, freq)) DRM_ERROR("Failed to reset RPS to initial values\n"); @@ -6424,9 +6567,10 @@ static void gen9_enable_rps(struct drm_i915_private *dev_priv) { intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - /* Program defaults and thresholds for RPS*/ - I915_WRITE(GEN6_RC_VIDEO_FREQ, - GEN9_FREQUENCY(dev_priv->rps.rp1_freq)); + /* Program defaults and thresholds for RPS */ + if (IS_GEN9(dev_priv)) + I915_WRITE(GEN6_RC_VIDEO_FREQ, + GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq)); /* 1 second timeout*/ I915_WRITE(GEN6_RP_DOWN_TIMEOUT, @@ -6446,7 +6590,7 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; - uint32_t rc6_mask = 0; + u32 rc6_mode; /* 1a: Software RC state - RC0 */ I915_WRITE(GEN6_RC_STATE, 0); @@ -6459,12 +6603,19 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RC_CONTROL, 0); /* 2b: Program RC6 thresholds.*/ - - /* WaRsDoubleRc6WrlWithCoarsePowerGating: Doubling WRL only when CPG is enabled */ - if (IS_SKYLAKE(dev_priv)) + if (INTEL_GEN(dev_priv) >= 10) { + I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); + I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150); + } else if (IS_SKYLAKE(dev_priv)) { + /* + * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only + * when CPG is enabled + */ I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16); - else + } else { I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16); + } + I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ for_each_engine(engine, dev_priv, id) @@ -6480,12 +6631,18 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25); /* 3a: Enable RC6 */ - if (intel_enable_rc6() & INTEL_RC6_ENABLE) - rc6_mask = GEN6_RC_CTL_RC6_ENABLE; - DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE)); I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ + + /* WaRsUseTimeoutMode:cnl (pre-prod) */ + if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0)) + rc6_mode = GEN7_RC_CTL_TO_MODE; + else + rc6_mode = GEN6_RC_CTL_EI_MODE(1); + I915_WRITE(GEN6_RC_CONTROL, - GEN6_RC_CTL_HW_ENABLE | GEN6_RC_CTL_EI_MODE(1) | rc6_mask); + GEN6_RC_CTL_HW_ENABLE | + GEN6_RC_CTL_RC6_ENABLE | + rc6_mode); /* * 3b: Enable Coarse Power Gating only when RC6 is enabled. @@ -6494,22 +6651,21 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) I915_WRITE(GEN9_PG_ENABLE, 0); else - I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? - (GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE) : 0); + I915_WRITE(GEN9_PG_ENABLE, + GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static void gen8_enable_rps(struct drm_i915_private *dev_priv) +static void gen8_enable_rc6(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; - uint32_t rc6_mask = 0; /* 1a: Software RC state - RC0 */ I915_WRITE(GEN6_RC_STATE, 0); - /* 1c & 1d: Get forcewake during program sequence. Although the driver + /* 1b: Get forcewake during program sequence. Although the driver * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); @@ -6523,36 +6679,36 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); I915_WRITE(GEN6_RC_SLEEP, 0); - if (IS_BROADWELL(dev_priv)) - I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ - else - I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ + I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ /* 3: Enable RC6 */ - if (intel_enable_rc6() & INTEL_RC6_ENABLE) - rc6_mask = GEN6_RC_CTL_RC6_ENABLE; - intel_print_rc6_info(dev_priv, rc6_mask); - if (IS_BROADWELL(dev_priv)) - I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | - GEN7_RC_CTL_TO_MODE | - rc6_mask); - else - I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_EI_MODE(1) | - rc6_mask); - /* 4 Program defaults and thresholds for RPS*/ + I915_WRITE(GEN6_RC_CONTROL, + GEN6_RC_CTL_HW_ENABLE | + GEN7_RC_CTL_TO_MODE | + GEN6_RC_CTL_RC6_ENABLE); + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); +} + +static void gen8_enable_rps(struct drm_i915_private *dev_priv) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + /* 1 Program defaults and thresholds for RPS*/ I915_WRITE(GEN6_RPNSWREQ, - HSW_FREQUENCY(dev_priv->rps.rp1_freq)); + HSW_FREQUENCY(rps->rp1_freq)); I915_WRITE(GEN6_RC_VIDEO_FREQ, - HSW_FREQUENCY(dev_priv->rps.rp1_freq)); + HSW_FREQUENCY(rps->rp1_freq)); /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */ /* Docs recommend 900MHz, and 300 MHz respectively */ I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, - dev_priv->rps.max_freq_softlimit << 24 | - dev_priv->rps.min_freq_softlimit << 16); + rps->max_freq_softlimit << 24 | + rps->min_freq_softlimit << 16); I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */ I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/ @@ -6561,7 +6717,7 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - /* 5: Enable RPS */ + /* 2: Enable RPS */ I915_WRITE(GEN6_RP_CONTROL, GEN6_RP_MEDIA_TURBO | GEN6_RP_MEDIA_HW_NORMAL_MODE | @@ -6570,30 +6726,19 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) GEN6_RP_UP_BUSY_AVG | GEN6_RP_DOWN_IDLE_AVG); - /* 6: Ring frequency + overclocking (our driver does this later */ - reset_rps(dev_priv, gen6_set_rps); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static void gen6_enable_rps(struct drm_i915_private *dev_priv) +static void gen6_enable_rc6(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; - u32 rc6vids, rc6_mask = 0; + u32 rc6vids, rc6_mask; u32 gtfifodbg; - int rc6_mode; int ret; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - - /* Here begins a magic sequence of register writes to enable - * auto-downclocking. - * - * Perhaps there might be some value in exposing these to - * userspace... - */ I915_WRITE(GEN6_RC_STATE, 0); /* Clear the DBG now so we don't confuse earlier errors */ @@ -6626,33 +6771,17 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RC6p_THRESHOLD, 150000); I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ - /* Check if we are enabling RC6 */ - rc6_mode = intel_enable_rc6(); - if (rc6_mode & INTEL_RC6_ENABLE) - rc6_mask |= GEN6_RC_CTL_RC6_ENABLE; - /* We don't use those on Haswell */ - if (!IS_HASWELL(dev_priv)) { - if (rc6_mode & INTEL_RC6p_ENABLE) - rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; - - if (rc6_mode & INTEL_RC6pp_ENABLE) - rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; - } - - intel_print_rc6_info(dev_priv, rc6_mask); - + rc6_mask = GEN6_RC_CTL_RC6_ENABLE; + if (HAS_RC6p(dev_priv)) + rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; + if (HAS_RC6pp(dev_priv)) + rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; I915_WRITE(GEN6_RC_CONTROL, rc6_mask | GEN6_RC_CTL_EI_MODE(1) | GEN6_RC_CTL_HW_ENABLE); - /* Power down if completely idle for over 50ms */ - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000); - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - - reset_rps(dev_priv, gen6_set_rps); - rc6vids = 0; ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); if (IS_GEN6(dev_priv) && ret) { @@ -6670,8 +6799,28 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } +static void gen6_enable_rps(struct drm_i915_private *dev_priv) +{ + /* Here begins a magic sequence of register writes to enable + * auto-downclocking. + * + * Perhaps there might be some value in exposing these to + * userspace... + */ + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + /* Power down if completely idle for over 50ms */ + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000); + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); + + reset_rps(dev_priv, gen6_set_rps); + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); +} + static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; int min_freq = 15; unsigned int gpu_freq; unsigned int max_ia_freq, min_ring_freq; @@ -6679,7 +6828,7 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) int scaling_factor = 180; struct cpufreq_policy *policy; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); policy = cpufreq_cpu_get(0); if (policy) { @@ -6702,11 +6851,11 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { /* Convert GT frequency to 50 HZ units */ - min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER; - max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER; + min_gpu_freq = rps->min_freq / GEN9_FREQ_SCALER; + max_gpu_freq = rps->max_freq / GEN9_FREQ_SCALER; } else { - min_gpu_freq = dev_priv->rps.min_freq; - max_gpu_freq = dev_priv->rps.max_freq; + min_gpu_freq = rps->min_freq; + max_gpu_freq = rps->max_freq; } /* @@ -6869,7 +7018,7 @@ static void valleyview_check_pctx(struct drm_i915_private *dev_priv) { unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; - WARN_ON(pctx_addr != dev_priv->mm.stolen_base + + WARN_ON(pctx_addr != dev_priv->dsm.start + dev_priv->vlv_pctx->stolen->start); } @@ -6884,16 +7033,15 @@ static void cherryview_check_pctx(struct drm_i915_private *dev_priv) static void cherryview_setup_pctx(struct drm_i915_private *dev_priv) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; - unsigned long pctx_paddr, paddr; + resource_size_t pctx_paddr, paddr; + resource_size_t pctx_size = 32*1024; u32 pcbr; - int pctx_size = 32*1024; pcbr = I915_READ(VLV_PCBR); if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) { DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); - paddr = (dev_priv->mm.stolen_base + - (ggtt->stolen_size - pctx_size)); + paddr = dev_priv->dsm.end + 1 - pctx_size; + GEM_BUG_ON(paddr > U32_MAX); pctx_paddr = (paddr & (~4095)); I915_WRITE(VLV_PCBR, pctx_paddr); @@ -6905,16 +7053,16 @@ static void cherryview_setup_pctx(struct drm_i915_private *dev_priv) static void valleyview_setup_pctx(struct drm_i915_private *dev_priv) { struct drm_i915_gem_object *pctx; - unsigned long pctx_paddr; + resource_size_t pctx_paddr; + resource_size_t pctx_size = 24*1024; u32 pcbr; - int pctx_size = 24*1024; pcbr = I915_READ(VLV_PCBR); if (pcbr) { /* BIOS set it up already, grab the pre-alloc'd space */ - int pcbr_offset; + resource_size_t pcbr_offset; - pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base; + pcbr_offset = (pcbr & (~4095)) - dev_priv->dsm.start; pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv, pcbr_offset, I915_GTT_OFFSET_NONE, @@ -6938,7 +7086,11 @@ static void valleyview_setup_pctx(struct drm_i915_private *dev_priv) goto out; } - pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start; + GEM_BUG_ON(range_overflows_t(u64, + dev_priv->dsm.start, + pctx->stolen->start, + U32_MAX)); + pctx_paddr = dev_priv->dsm.start + pctx->stolen->start; I915_WRITE(VLV_PCBR, pctx_paddr); out: @@ -6957,17 +7109,18 @@ static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv) static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv) { - dev_priv->rps.gpll_ref_freq = + dev_priv->gt_pm.rps.gpll_ref_freq = vlv_get_cck_clock(dev_priv, "GPLL ref", CCK_GPLL_CLOCK_CONTROL, dev_priv->czclk_freq); DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", - dev_priv->rps.gpll_ref_freq); + dev_priv->gt_pm.rps.gpll_ref_freq); } static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; valleyview_setup_pctx(dev_priv); @@ -6989,30 +7142,31 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) } DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); - dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv); - dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; + rps->max_freq = valleyview_rps_max_freq(dev_priv); + rps->rp0_freq = rps->max_freq; DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq), - dev_priv->rps.max_freq); + intel_gpu_freq(dev_priv, rps->max_freq), + rps->max_freq); - dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv); + rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv); DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), - dev_priv->rps.efficient_freq); + intel_gpu_freq(dev_priv, rps->efficient_freq), + rps->efficient_freq); - dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv); + rps->rp1_freq = valleyview_rps_guar_freq(dev_priv); DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), - dev_priv->rps.rp1_freq); + intel_gpu_freq(dev_priv, rps->rp1_freq), + rps->rp1_freq); - dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv); + rps->min_freq = valleyview_rps_min_freq(dev_priv); DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), - dev_priv->rps.min_freq); + intel_gpu_freq(dev_priv, rps->min_freq), + rps->min_freq); } static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; cherryview_setup_pctx(dev_priv); @@ -7033,31 +7187,29 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) } DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); - dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv); - dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; + rps->max_freq = cherryview_rps_max_freq(dev_priv); + rps->rp0_freq = rps->max_freq; DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.max_freq), - dev_priv->rps.max_freq); + intel_gpu_freq(dev_priv, rps->max_freq), + rps->max_freq); - dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv); + rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv); DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), - dev_priv->rps.efficient_freq); + intel_gpu_freq(dev_priv, rps->efficient_freq), + rps->efficient_freq); - dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv); + rps->rp1_freq = cherryview_rps_guar_freq(dev_priv); DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), - dev_priv->rps.rp1_freq); + intel_gpu_freq(dev_priv, rps->rp1_freq), + rps->rp1_freq); - dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv); + rps->min_freq = cherryview_rps_min_freq(dev_priv); DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", - intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), - dev_priv->rps.min_freq); + intel_gpu_freq(dev_priv, rps->min_freq), + rps->min_freq); - WARN_ONCE((dev_priv->rps.max_freq | - dev_priv->rps.efficient_freq | - dev_priv->rps.rp1_freq | - dev_priv->rps.min_freq) & 1, + WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq | + rps->min_freq) & 1, "Odd GPU freq values\n"); } @@ -7066,13 +7218,11 @@ static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv) valleyview_cleanup_pctx(dev_priv); } -static void cherryview_enable_rps(struct drm_i915_private *dev_priv) +static void cherryview_enable_rc6(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; - u32 gtfifodbg, val, rc6_mode = 0, pcbr; - - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + u32 gtfifodbg, rc6_mode, pcbr; gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV | GT_FIFO_FREE_ENTRIES_CHV); @@ -7103,7 +7253,7 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv) /* TO threshold set to 500 us ( 0x186 * 1.28 us) */ I915_WRITE(GEN6_RC6_THRESHOLD, 0x186); - /* allows RC6 residency counter to work */ + /* Allows RC6 residency counter to work */ I915_WRITE(VLV_COUNTER_CONTROL, _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | VLV_MEDIA_RC6_COUNT_EN | @@ -7113,13 +7263,21 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv) pcbr = I915_READ(VLV_PCBR); /* 3: Enable RC6 */ - if ((intel_enable_rc6() & INTEL_RC6_ENABLE) && - (pcbr >> VLV_PCBR_ADDR_SHIFT)) + rc6_mode = 0; + if (pcbr >> VLV_PCBR_ADDR_SHIFT) rc6_mode = GEN7_RC_CTL_TO_MODE; - I915_WRITE(GEN6_RC_CONTROL, rc6_mode); - /* 4 Program defaults and thresholds for RPS*/ + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); +} + +static void cherryview_enable_rps(struct drm_i915_private *dev_priv) +{ + u32 val; + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + /* 1: Program defaults and thresholds for RPS*/ I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); @@ -7128,7 +7286,7 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - /* 5: Enable RPS */ + /* 2: Enable RPS */ I915_WRITE(GEN6_RP_CONTROL, GEN6_RP_MEDIA_HW_NORMAL_MODE | GEN6_RP_MEDIA_IS_GFX | @@ -7155,13 +7313,11 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } -static void valleyview_enable_rps(struct drm_i915_private *dev_priv) +static void valleyview_enable_rc6(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; - u32 gtfifodbg, val, rc6_mode = 0; - - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + u32 gtfifodbg; valleyview_check_pctx(dev_priv); @@ -7172,28 +7328,11 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GTFIFODBG, gtfifodbg); } - /* If VLV, Forcewake all wells, else re-direct to regular path */ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); /* Disable RC states. */ I915_WRITE(GEN6_RC_CONTROL, 0); - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); - I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); - I915_WRITE(GEN6_RP_UP_EI, 66000); - I915_WRITE(GEN6_RP_DOWN_EI, 350000); - - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - - I915_WRITE(GEN6_RP_CONTROL, - GEN6_RP_MEDIA_TURBO | - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_CONT); - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); @@ -7203,7 +7342,7 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); - /* allows RC6 residency counter to work */ + /* Allows RC6 residency counter to work */ I915_WRITE(VLV_COUNTER_CONTROL, _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | VLV_MEDIA_RC0_COUNT_EN | @@ -7211,12 +7350,33 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv) VLV_MEDIA_RC6_COUNT_EN | VLV_RENDER_RC6_COUNT_EN)); - if (intel_enable_rc6() & INTEL_RC6_ENABLE) - rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; + I915_WRITE(GEN6_RC_CONTROL, + GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL); - intel_print_rc6_info(dev_priv, rc6_mode); + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); +} - I915_WRITE(GEN6_RC_CONTROL, rc6_mode); +static void valleyview_enable_rps(struct drm_i915_private *dev_priv) +{ + u32 val; + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); + I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); + I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); + I915_WRITE(GEN6_RP_UP_EI, 66000); + I915_WRITE(GEN6_RP_DOWN_EI, 350000); + + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); + + I915_WRITE(GEN6_RP_CONTROL, + GEN6_RP_MEDIA_TURBO | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_CONT); /* Setting Fixed Bias */ val = VLV_OVERRIDE_EN | @@ -7425,7 +7585,7 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) lockdep_assert_held(&mchdev_lock); - pxvid = I915_READ(PXVFREQ(dev_priv->rps.cur_freq)); + pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq)); pxvid = (pxvid >> 24) & 0x7f; ext_v = pvid_to_extvid(dev_priv, pxvid); @@ -7712,17 +7872,18 @@ static void intel_init_emon(struct drm_i915_private *dev_priv) void intel_init_gt_powersave(struct drm_i915_private *dev_priv) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* * RPM depends on RC6 to save restore the GT HW context, so make RC6 a * requirement. */ - if (!i915.enable_rc6) { + if (!sanitize_rc6(dev_priv)) { DRM_INFO("RC6 disabled, disabling runtime PM support\n"); intel_runtime_pm_get(dev_priv); } - mutex_lock(&dev_priv->drm.struct_mutex); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); /* Initialize RPS limits (for userspace) */ if (IS_CHERRYVIEW(dev_priv)) @@ -7733,16 +7894,16 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) gen6_init_rps_frequencies(dev_priv); /* Derive initial user preferences/limits from the hardware limits */ - dev_priv->rps.idle_freq = dev_priv->rps.min_freq; - dev_priv->rps.cur_freq = dev_priv->rps.idle_freq; + rps->idle_freq = rps->min_freq; + rps->cur_freq = rps->idle_freq; - dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; - dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; + rps->max_freq_softlimit = rps->max_freq; + rps->min_freq_softlimit = rps->min_freq; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - dev_priv->rps.min_freq_softlimit = + rps->min_freq_softlimit = max_t(int, - dev_priv->rps.efficient_freq, + rps->efficient_freq, intel_freq_opcode(dev_priv, 450)); /* After setting max-softlimit, find the overclock max freq */ @@ -7753,19 +7914,16 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, ¶ms); if (params & BIT(31)) { /* OC supported */ DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n", - (dev_priv->rps.max_freq & 0xff) * 50, + (rps->max_freq & 0xff) * 50, (params & 0xff) * 50); - dev_priv->rps.max_freq = params & 0xff; + rps->max_freq = params & 0xff; } } /* Finally allow us to boost to max by default */ - dev_priv->rps.boost_freq = dev_priv->rps.max_freq; - - mutex_unlock(&dev_priv->rps.hw_lock); - mutex_unlock(&dev_priv->drm.struct_mutex); + rps->boost_freq = rps->max_freq; - intel_autoenable_gt_powersave(dev_priv); + mutex_unlock(&dev_priv->pcu_lock); } void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) @@ -7773,7 +7931,7 @@ void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) if (IS_VALLEYVIEW(dev_priv)) valleyview_cleanup_gt_powersave(dev_priv); - if (!i915.enable_rc6) + if (!HAS_RC6(dev_priv)) intel_runtime_pm_put(dev_priv); } @@ -7790,149 +7948,163 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) < 6) return; - if (cancel_delayed_work_sync(&dev_priv->rps.autoenable_work)) - intel_runtime_pm_put(dev_priv); - /* gen6_rps_idle() will be called later to disable interrupts */ } void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) { - dev_priv->rps.enabled = true; /* force disabling */ + dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */ + dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */ intel_disable_gt_powersave(dev_priv); gen6_reset_rps_interrupts(dev_priv); } -void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) +static inline void intel_disable_llc_pstate(struct drm_i915_private *i915) { - if (!READ_ONCE(dev_priv->rps.enabled)) + lockdep_assert_held(&i915->pcu_lock); + + if (!i915->gt_pm.llc_pstate.enabled) return; - mutex_lock(&dev_priv->rps.hw_lock); + /* Currently there is no HW configuration to be done to disable. */ - if (INTEL_GEN(dev_priv) >= 9) { + i915->gt_pm.llc_pstate.enabled = false; +} + +static void intel_disable_rc6(struct drm_i915_private *dev_priv) +{ + lockdep_assert_held(&dev_priv->pcu_lock); + + if (!dev_priv->gt_pm.rc6.enabled) + return; + + if (INTEL_GEN(dev_priv) >= 9) gen9_disable_rc6(dev_priv); + else if (IS_CHERRYVIEW(dev_priv)) + cherryview_disable_rc6(dev_priv); + else if (IS_VALLEYVIEW(dev_priv)) + valleyview_disable_rc6(dev_priv); + else if (INTEL_GEN(dev_priv) >= 6) + gen6_disable_rc6(dev_priv); + + dev_priv->gt_pm.rc6.enabled = false; +} + +static void intel_disable_rps(struct drm_i915_private *dev_priv) +{ + lockdep_assert_held(&dev_priv->pcu_lock); + + if (!dev_priv->gt_pm.rps.enabled) + return; + + if (INTEL_GEN(dev_priv) >= 9) gen9_disable_rps(dev_priv); - } else if (IS_CHERRYVIEW(dev_priv)) { + else if (IS_CHERRYVIEW(dev_priv)) cherryview_disable_rps(dev_priv); - } else if (IS_VALLEYVIEW(dev_priv)) { + else if (IS_VALLEYVIEW(dev_priv)) valleyview_disable_rps(dev_priv); - } else if (INTEL_GEN(dev_priv) >= 6) { + else if (INTEL_GEN(dev_priv) >= 6) gen6_disable_rps(dev_priv); - } else if (IS_IRONLAKE_M(dev_priv)) { + else if (IS_IRONLAKE_M(dev_priv)) ironlake_disable_drps(dev_priv); - } - dev_priv->rps.enabled = false; - mutex_unlock(&dev_priv->rps.hw_lock); + dev_priv->gt_pm.rps.enabled = false; } -void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) +void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) { - /* We shouldn't be disabling as we submit, so this should be less - * racy than it appears! - */ - if (READ_ONCE(dev_priv->rps.enabled)) + mutex_lock(&dev_priv->pcu_lock); + + intel_disable_rc6(dev_priv); + intel_disable_rps(dev_priv); + if (HAS_LLC(dev_priv)) + intel_disable_llc_pstate(dev_priv); + + mutex_unlock(&dev_priv->pcu_lock); +} + +static inline void intel_enable_llc_pstate(struct drm_i915_private *i915) +{ + lockdep_assert_held(&i915->pcu_lock); + + if (i915->gt_pm.llc_pstate.enabled) return; - /* Powersaving is controlled by the host when inside a VM */ - if (intel_vgpu_active(dev_priv)) + gen6_update_ring_freq(i915); + + i915->gt_pm.llc_pstate.enabled = true; +} + +static void intel_enable_rc6(struct drm_i915_private *dev_priv) +{ + lockdep_assert_held(&dev_priv->pcu_lock); + + if (dev_priv->gt_pm.rc6.enabled) return; - mutex_lock(&dev_priv->rps.hw_lock); + if (IS_CHERRYVIEW(dev_priv)) + cherryview_enable_rc6(dev_priv); + else if (IS_VALLEYVIEW(dev_priv)) + valleyview_enable_rc6(dev_priv); + else if (INTEL_GEN(dev_priv) >= 9) + gen9_enable_rc6(dev_priv); + else if (IS_BROADWELL(dev_priv)) + gen8_enable_rc6(dev_priv); + else if (INTEL_GEN(dev_priv) >= 6) + gen6_enable_rc6(dev_priv); + + dev_priv->gt_pm.rc6.enabled = true; +} + +static void intel_enable_rps(struct drm_i915_private *dev_priv) +{ + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + lockdep_assert_held(&dev_priv->pcu_lock); + + if (rps->enabled) + return; if (IS_CHERRYVIEW(dev_priv)) { cherryview_enable_rps(dev_priv); } else if (IS_VALLEYVIEW(dev_priv)) { valleyview_enable_rps(dev_priv); } else if (INTEL_GEN(dev_priv) >= 9) { - gen9_enable_rc6(dev_priv); gen9_enable_rps(dev_priv); - if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) - gen6_update_ring_freq(dev_priv); } else if (IS_BROADWELL(dev_priv)) { gen8_enable_rps(dev_priv); - gen6_update_ring_freq(dev_priv); } else if (INTEL_GEN(dev_priv) >= 6) { gen6_enable_rps(dev_priv); - gen6_update_ring_freq(dev_priv); } else if (IS_IRONLAKE_M(dev_priv)) { ironlake_enable_drps(dev_priv); intel_init_emon(dev_priv); } - WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq); - WARN_ON(dev_priv->rps.idle_freq > dev_priv->rps.max_freq); + WARN_ON(rps->max_freq < rps->min_freq); + WARN_ON(rps->idle_freq > rps->max_freq); - WARN_ON(dev_priv->rps.efficient_freq < dev_priv->rps.min_freq); - WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq); + WARN_ON(rps->efficient_freq < rps->min_freq); + WARN_ON(rps->efficient_freq > rps->max_freq); - dev_priv->rps.enabled = true; - mutex_unlock(&dev_priv->rps.hw_lock); + rps->enabled = true; } -static void __intel_autoenable_gt_powersave(struct work_struct *work) +void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), rps.autoenable_work.work); - struct intel_engine_cs *rcs; - struct drm_i915_gem_request *req; - - if (READ_ONCE(dev_priv->rps.enabled)) - goto out; - - rcs = dev_priv->engine[RCS]; - if (rcs->last_retired_context) - goto out; - - if (!rcs->init_context) - goto out; - - mutex_lock(&dev_priv->drm.struct_mutex); - - req = i915_gem_request_alloc(rcs, dev_priv->kernel_context); - if (IS_ERR(req)) - goto unlock; - - if (!i915.enable_execlists && i915_switch_context(req) == 0) - rcs->init_context(req); - - /* Mark the device busy, calling intel_enable_gt_powersave() */ - i915_add_request(req); + /* Powersaving is controlled by the host when inside a VM */ + if (intel_vgpu_active(dev_priv)) + return; -unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); -out: - intel_runtime_pm_put(dev_priv); -} + mutex_lock(&dev_priv->pcu_lock); -void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv) -{ - if (READ_ONCE(dev_priv->rps.enabled)) - return; + if (HAS_RC6(dev_priv)) + intel_enable_rc6(dev_priv); + intel_enable_rps(dev_priv); + if (HAS_LLC(dev_priv)) + intel_enable_llc_pstate(dev_priv); - if (IS_IRONLAKE_M(dev_priv)) { - ironlake_enable_drps(dev_priv); - intel_init_emon(dev_priv); - } else if (INTEL_INFO(dev_priv)->gen >= 6) { - /* - * PCU communication is slow and this doesn't need to be - * done at any specific time, so do this out of our fast path - * to make resume and init faster. - * - * We depend on the HW RC6 power context save/restore - * mechanism when entering D3 through runtime PM suspend. So - * disable RPM until RPS/RC6 is properly setup. We can only - * get here via the driver load/system resume/runtime resume - * paths, so the _noresume version is enough (and in case of - * runtime resume it's necessary). - */ - if (queue_delayed_work(dev_priv->wq, - &dev_priv->rps.autoenable_work, - round_jiffies_up_relative(HZ))) - intel_runtime_pm_get_noresume(dev_priv); - } + mutex_unlock(&dev_priv->pcu_lock); } static void ibx_init_clock_gating(struct drm_i915_private *dev_priv) @@ -7959,19 +8131,7 @@ static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv) } } -static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv) -{ - I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN); - I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN); - I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN); - - /* - * Don't touch WM1S_LP_EN here. - * Doing so could cause underruns. - */ -} - -static void ironlake_init_clock_gating(struct drm_i915_private *dev_priv) +static void ilk_init_clock_gating(struct drm_i915_private *dev_priv) { uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; @@ -8004,8 +8164,6 @@ static void ironlake_init_clock_gating(struct drm_i915_private *dev_priv) (I915_READ(DISP_ARB_CTL) | DISP_FBC_WM_DIS)); - ilk_init_lp_watermarks(dev_priv); - /* * Based on the document from hardware guys the following bits * should be set unconditionally in order to enable FBC. @@ -8118,8 +8276,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_GT_MODE, _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - ilk_init_lp_watermarks(dev_priv); - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); @@ -8257,7 +8413,58 @@ static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv, I915_WRITE(GEN7_MISCCPCTL, misccpctl); } -static void kabylake_init_clock_gating(struct drm_i915_private *dev_priv) +static void cnp_init_clock_gating(struct drm_i915_private *dev_priv) +{ + if (!HAS_PCH_CNP(dev_priv)) + return; + + /* Display WA #1181: cnp */ + I915_WRITE(SOUTH_DSPCLK_GATE_D, I915_READ(SOUTH_DSPCLK_GATE_D) | + CNP_PWM_CGE_GATING_DISABLE); +} + +static void cnl_init_clock_gating(struct drm_i915_private *dev_priv) +{ + u32 val; + cnp_init_clock_gating(dev_priv); + + /* This is not an Wa. Enable for better image quality */ + I915_WRITE(_3D_CHICKEN3, + _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE)); + + /* WaEnableChickenDCPR:cnl */ + I915_WRITE(GEN8_CHICKEN_DCPR_1, + I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM); + + /* WaFbcWakeMemOn:cnl */ + I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | + DISP_FBC_MEMORY_WAKE); + + val = I915_READ(SLICE_UNIT_LEVEL_CLKGATE); + /* ReadHitWriteOnlyDisable:cnl */ + val |= RCCUNIT_CLKGATE_DIS; + /* WaSarbUnitClockGatingDisable:cnl (pre-prod) */ + if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0)) + val |= SARBUNIT_CLKGATE_DIS; + I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val); + + /* WaDisableVFclkgate:cnl */ + val = I915_READ(UNSLICE_UNIT_LEVEL_CLKGATE); + val |= VFUNIT_CLKGATE_DIS; + I915_WRITE(UNSLICE_UNIT_LEVEL_CLKGATE, val); +} + +static void cfl_init_clock_gating(struct drm_i915_private *dev_priv) +{ + cnp_init_clock_gating(dev_priv); + gen9_init_clock_gating(dev_priv); + + /* WaFbcNukeOnHostModify:cfl */ + I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) | + ILK_DPFC_NUKE_ON_ANY_MODIFICATION); +} + +static void kbl_init_clock_gating(struct drm_i915_private *dev_priv) { gen9_init_clock_gating(dev_priv); @@ -8271,12 +8478,12 @@ static void kabylake_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | GEN6_GAMUNIT_CLOCK_GATE_DISABLE); - /* WaFbcNukeOnHostModify:kbl,cfl */ + /* WaFbcNukeOnHostModify:kbl */ I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) | ILK_DPFC_NUKE_ON_ANY_MODIFICATION); } -static void skylake_init_clock_gating(struct drm_i915_private *dev_priv) +static void skl_init_clock_gating(struct drm_i915_private *dev_priv) { gen9_init_clock_gating(dev_priv); @@ -8289,12 +8496,13 @@ static void skylake_init_clock_gating(struct drm_i915_private *dev_priv) ILK_DPFC_NUKE_ON_ANY_MODIFICATION); } -static void broadwell_init_clock_gating(struct drm_i915_private *dev_priv) +static void bdw_init_clock_gating(struct drm_i915_private *dev_priv) { + /* The GTT cache must be disabled if the system is using 2M pages. */ + bool can_use_gtt_cache = !HAS_PAGE_SIZES(dev_priv, + I915_GTT_PAGE_SIZE_2M); enum pipe pipe; - ilk_init_lp_watermarks(dev_priv); - /* WaSwitchSolVfFArbitrationPriority:bdw */ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); @@ -8325,12 +8533,8 @@ static void broadwell_init_clock_gating(struct drm_i915_private *dev_priv) /* WaProgramL3SqcReg1Default:bdw */ gen8_set_l3sqc_credits(dev_priv, 30, 2); - /* - * WaGttCachingOffByDefault:bdw - * GTT cache may not work with big pages, so if those - * are ever enabled GTT cache may need to be disabled. - */ - I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL); + /* WaGttCachingOffByDefault:bdw */ + I915_WRITE(HSW_GTT_CACHE_EN, can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); /* WaKVMNotificationOnConfigChange:bdw */ I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1) @@ -8347,10 +8551,8 @@ static void broadwell_init_clock_gating(struct drm_i915_private *dev_priv) I915_READ(GEN6_UCGCTL1) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE); } -static void haswell_init_clock_gating(struct drm_i915_private *dev_priv) +static void hsw_init_clock_gating(struct drm_i915_private *dev_priv) { - ilk_init_lp_watermarks(dev_priv); - /* L3 caching of data atomics doesn't work -- disable it. */ I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); I915_WRITE(HSW_ROW_CHICKEN3, @@ -8394,19 +8596,13 @@ static void haswell_init_clock_gating(struct drm_i915_private *dev_priv) /* WaSwitchSolVfFArbitrationPriority:hsw */ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); - /* WaRsPkgCStateDisplayPMReq:hsw */ - I915_WRITE(CHICKEN_PAR1_1, - I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES); - lpt_init_clock_gating(dev_priv); } -static void ivybridge_init_clock_gating(struct drm_i915_private *dev_priv) +static void ivb_init_clock_gating(struct drm_i915_private *dev_priv) { uint32_t snpcr; - ilk_init_lp_watermarks(dev_priv); - I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); /* WaDisableEarlyCull:ivb */ @@ -8498,7 +8694,7 @@ static void ivybridge_init_clock_gating(struct drm_i915_private *dev_priv) gen6_check_mch_setup(dev_priv); } -static void valleyview_init_clock_gating(struct drm_i915_private *dev_priv) +static void vlv_init_clock_gating(struct drm_i915_private *dev_priv) { /* WaDisableEarlyCull:vlv */ I915_WRITE(_3D_CHICKEN3, @@ -8578,7 +8774,7 @@ static void valleyview_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS); } -static void cherryview_init_clock_gating(struct drm_i915_private *dev_priv) +static void chv_init_clock_gating(struct drm_i915_private *dev_priv) { /* WaVSRefCountFullforceMissDisable:chv */ /* WaDSRefCountFullforceMissDisable:chv */ @@ -8638,7 +8834,7 @@ static void g4x_init_clock_gating(struct drm_i915_private *dev_priv) g4x_disable_trickle_feed(dev_priv); } -static void crestline_init_clock_gating(struct drm_i915_private *dev_priv) +static void i965gm_init_clock_gating(struct drm_i915_private *dev_priv) { I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE); I915_WRITE(RENCLK_GATE_D2, 0); @@ -8652,7 +8848,7 @@ static void crestline_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); } -static void broadwater_init_clock_gating(struct drm_i915_private *dev_priv) +static void i965g_init_clock_gating(struct drm_i915_private *dev_priv) { I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE | I965_RCC_CLOCK_GATE_DISABLE | @@ -8737,34 +8933,38 @@ static void nop_init_clock_gating(struct drm_i915_private *dev_priv) */ void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) { - if (IS_SKYLAKE(dev_priv)) - dev_priv->display.init_clock_gating = skylake_init_clock_gating; - else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) - dev_priv->display.init_clock_gating = kabylake_init_clock_gating; + if (IS_CANNONLAKE(dev_priv)) + dev_priv->display.init_clock_gating = cnl_init_clock_gating; + else if (IS_COFFEELAKE(dev_priv)) + dev_priv->display.init_clock_gating = cfl_init_clock_gating; + else if (IS_SKYLAKE(dev_priv)) + dev_priv->display.init_clock_gating = skl_init_clock_gating; + else if (IS_KABYLAKE(dev_priv)) + dev_priv->display.init_clock_gating = kbl_init_clock_gating; else if (IS_BROXTON(dev_priv)) dev_priv->display.init_clock_gating = bxt_init_clock_gating; else if (IS_GEMINILAKE(dev_priv)) dev_priv->display.init_clock_gating = glk_init_clock_gating; else if (IS_BROADWELL(dev_priv)) - dev_priv->display.init_clock_gating = broadwell_init_clock_gating; + dev_priv->display.init_clock_gating = bdw_init_clock_gating; else if (IS_CHERRYVIEW(dev_priv)) - dev_priv->display.init_clock_gating = cherryview_init_clock_gating; + dev_priv->display.init_clock_gating = chv_init_clock_gating; else if (IS_HASWELL(dev_priv)) - dev_priv->display.init_clock_gating = haswell_init_clock_gating; + dev_priv->display.init_clock_gating = hsw_init_clock_gating; else if (IS_IVYBRIDGE(dev_priv)) - dev_priv->display.init_clock_gating = ivybridge_init_clock_gating; + dev_priv->display.init_clock_gating = ivb_init_clock_gating; else if (IS_VALLEYVIEW(dev_priv)) - dev_priv->display.init_clock_gating = valleyview_init_clock_gating; + dev_priv->display.init_clock_gating = vlv_init_clock_gating; else if (IS_GEN6(dev_priv)) dev_priv->display.init_clock_gating = gen6_init_clock_gating; else if (IS_GEN5(dev_priv)) - dev_priv->display.init_clock_gating = ironlake_init_clock_gating; + dev_priv->display.init_clock_gating = ilk_init_clock_gating; else if (IS_G4X(dev_priv)) dev_priv->display.init_clock_gating = g4x_init_clock_gating; else if (IS_I965GM(dev_priv)) - dev_priv->display.init_clock_gating = crestline_init_clock_gating; + dev_priv->display.init_clock_gating = i965gm_init_clock_gating; else if (IS_I965G(dev_priv)) - dev_priv->display.init_clock_gating = broadwater_init_clock_gating; + dev_priv->display.init_clock_gating = i965g_init_clock_gating; else if (IS_GEN3(dev_priv)) dev_priv->display.init_clock_gating = gen3_init_clock_gating; else if (IS_I85X(dev_priv) || IS_I865G(dev_priv)) @@ -8907,7 +9107,7 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val { int status; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); /* GEN6_PCODE_* are outside of the forcewake domain, we can * use te fw I915_READ variants to reduce the amount of work @@ -8949,12 +9149,12 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val return 0; } -int sandybridge_pcode_write(struct drm_i915_private *dev_priv, - u32 mbox, u32 val) +int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, + u32 mbox, u32 val, int timeout_us) { int status; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); /* GEN6_PCODE_* are outside of the forcewake domain, we can * use te fw I915_READ variants to reduce the amount of work @@ -8973,7 +9173,7 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv, if (__intel_wait_for_register_fw(dev_priv, GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, - 500, 0, NULL)) { + timeout_us, 0, NULL)) { DRM_ERROR("timeout waiting for pcode write of 0x%08x to mbox %x to finish for %ps\n", val, mbox, __builtin_return_address(0)); return -ETIMEDOUT; @@ -9031,7 +9231,7 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, u32 status; int ret; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); #define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \ &status) @@ -9046,7 +9246,7 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, ret = 0; goto out; } - ret = _wait_for(COND, timeout_base_ms * 1000, 10); + ret = _wait_for(COND, timeout_base_ms * 1000, 10, 10); if (!ret) goto out; @@ -9073,31 +9273,39 @@ out: static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* * N = val - 0xb7 * Slow = Fast = GPLL ref * N */ - return DIV_ROUND_CLOSEST(dev_priv->rps.gpll_ref_freq * (val - 0xb7), 1000); + return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000); } static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val) { - return DIV_ROUND_CLOSEST(1000 * val, dev_priv->rps.gpll_ref_freq) + 0xb7; + struct intel_rps *rps = &dev_priv->gt_pm.rps; + + return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7; } static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* * N = val / 2 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2 */ - return DIV_ROUND_CLOSEST(dev_priv->rps.gpll_ref_freq * val, 2 * 2 * 1000); + return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000); } static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val) { + struct intel_rps *rps = &dev_priv->gt_pm.rps; + /* CHV needs even values */ - return DIV_ROUND_CLOSEST(2 * 1000 * val, dev_priv->rps.gpll_ref_freq) * 2; + return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2; } int intel_gpu_freq(struct drm_i915_private *dev_priv, int val) @@ -9126,65 +9334,27 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val) return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER); } -struct request_boost { - struct work_struct work; - struct drm_i915_gem_request *req; -}; - -static void __intel_rps_boost_work(struct work_struct *work) -{ - struct request_boost *boost = container_of(work, struct request_boost, work); - struct drm_i915_gem_request *req = boost->req; - - if (!i915_gem_request_completed(req)) - gen6_rps_boost(req, NULL); - - i915_gem_request_put(req); - kfree(boost); -} - -void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req) -{ - struct request_boost *boost; - - if (req == NULL || INTEL_GEN(req->i915) < 6) - return; - - if (i915_gem_request_completed(req)) - return; - - boost = kmalloc(sizeof(*boost), GFP_ATOMIC); - if (boost == NULL) - return; - - boost->req = i915_gem_request_get(req); - - INIT_WORK(&boost->work, __intel_rps_boost_work); - queue_work(req->i915->wq, &boost->work); -} - void intel_pm_setup(struct drm_i915_private *dev_priv) { - mutex_init(&dev_priv->rps.hw_lock); + mutex_init(&dev_priv->pcu_lock); - INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work, - __intel_autoenable_gt_powersave); - atomic_set(&dev_priv->rps.num_waiters, 0); + atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0); - dev_priv->pm.suspended = false; - atomic_set(&dev_priv->pm.wakeref_count, 0); + dev_priv->runtime_pm.suspended = false; + atomic_set(&dev_priv->runtime_pm.wakeref_count, 0); } static u64 vlv_residency_raw(struct drm_i915_private *dev_priv, const i915_reg_t reg) { u32 lower, upper, tmp; + unsigned long flags; int loop = 2; /* The register accessed do not need forcewake. We borrow * uncore lock to prevent concurrent access to range reg. */ - spin_lock_irq(&dev_priv->uncore.lock); + spin_lock_irqsave(&dev_priv->uncore.lock, flags); /* vlv and chv residency counters are 40 bits in width. * With a control bit, we can choose between upper or lower @@ -9215,39 +9385,51 @@ static u64 vlv_residency_raw(struct drm_i915_private *dev_priv, * now. */ - spin_unlock_irq(&dev_priv->uncore.lock); + spin_unlock_irqrestore(&dev_priv->uncore.lock, flags); return lower | (u64)upper << 8; } -u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, +u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv, const i915_reg_t reg) { - u64 time_hw, units, div; + u64 time_hw; + u32 mul, div; - if (!intel_enable_rc6()) + if (!HAS_RC6(dev_priv)) return 0; - intel_runtime_pm_get(dev_priv); - /* On VLV and CHV, residency time is in CZ units rather than 1.28us */ if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - units = 1000; + mul = 1000000; div = dev_priv->czclk_freq; - time_hw = vlv_residency_raw(dev_priv, reg); - } else if (IS_GEN9_LP(dev_priv)) { - units = 1000; - div = 1200; /* 833.33ns */ - - time_hw = I915_READ(reg); } else { - units = 128000; /* 1.28us */ - div = 100000; + /* 833.33ns units on Gen9LP, 1.28us elsewhere. */ + if (IS_GEN9_LP(dev_priv)) { + mul = 10000; + div = 12; + } else { + mul = 1280; + div = 1; + } time_hw = I915_READ(reg); } - intel_runtime_pm_put(dev_priv); - return DIV_ROUND_UP_ULL(time_hw * units, div); + return DIV_ROUND_UP_ULL(time_hw * mul, div); +} + +u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat) +{ + u32 cagf; + + if (INTEL_GEN(dev_priv) >= 9) + cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; + else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; + else + cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; + + return cagf; } diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 1b31ab002dae..2e32615eeada 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -58,6 +58,9 @@ static bool is_edp_psr(struct intel_dp *intel_dp) { + if (!intel_dp_is_edp(intel_dp)) + return false; + return intel_dp->psr_dpcd[0] & DP_PSR_IS_SUPPORTED; } @@ -72,90 +75,54 @@ static bool vlv_is_psr_active_on_pipe(struct drm_device *dev, int pipe) (val == VLV_EDP_PSR_ACTIVE_SF_UPDATE); } -static void intel_psr_write_vsc(struct intel_dp *intel_dp, - const struct edp_vsc_psr *vsc_psr) -{ - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *crtc = to_intel_crtc(dig_port->base.base.crtc); - enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; - i915_reg_t ctl_reg = HSW_TVIDEO_DIP_CTL(cpu_transcoder); - uint32_t *data = (uint32_t *) vsc_psr; - unsigned int i; - - /* As per BSPec (Pipe Video Data Island Packet), we need to disable - the video DIP being updated before program video DIP data buffer - registers for DIP being updated. */ - I915_WRITE(ctl_reg, 0); - POSTING_READ(ctl_reg); - - for (i = 0; i < sizeof(*vsc_psr); i += 4) { - I915_WRITE(HSW_TVIDEO_DIP_VSC_DATA(cpu_transcoder, - i >> 2), *data); - data++; - } - for (; i < VIDEO_DIP_VSC_DATA_SIZE; i += 4) - I915_WRITE(HSW_TVIDEO_DIP_VSC_DATA(cpu_transcoder, - i >> 2), 0); - - I915_WRITE(ctl_reg, VIDEO_DIP_ENABLE_VSC_HSW); - POSTING_READ(ctl_reg); -} - -static void vlv_psr_setup_vsc(struct intel_dp *intel_dp) +static void vlv_psr_setup_vsc(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_crtc *crtc = intel_dig_port->base.base.crtc; - enum pipe pipe = to_intel_crtc(crtc)->pipe; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); uint32_t val; /* VLV auto-generate VSC package as per EDP 1.3 spec, Table 3.10 */ - val = I915_READ(VLV_VSCSDP(pipe)); + val = I915_READ(VLV_VSCSDP(crtc->pipe)); val &= ~VLV_EDP_PSR_SDP_FREQ_MASK; val |= VLV_EDP_PSR_SDP_FREQ_EVFRAME; - I915_WRITE(VLV_VSCSDP(pipe), val); + I915_WRITE(VLV_VSCSDP(crtc->pipe), val); } -static void skl_psr_setup_su_vsc(struct intel_dp *intel_dp) +static void hsw_psr_setup_vsc(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) { - struct edp_vsc_psr psr_vsc; struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); + struct edp_vsc_psr psr_vsc; - /* Prepare VSC Header for SU as per EDP 1.4 spec, Table 6.11 */ - memset(&psr_vsc, 0, sizeof(psr_vsc)); - psr_vsc.sdp_header.HB0 = 0; - psr_vsc.sdp_header.HB1 = 0x7; - if (dev_priv->psr.colorimetry_support && - dev_priv->psr.y_cord_support) { - psr_vsc.sdp_header.HB2 = 0x5; - psr_vsc.sdp_header.HB3 = 0x13; - } else if (dev_priv->psr.y_cord_support) { - psr_vsc.sdp_header.HB2 = 0x4; - psr_vsc.sdp_header.HB3 = 0xe; + if (dev_priv->psr.psr2_support) { + /* Prepare VSC Header for SU as per EDP 1.4 spec, Table 6.11 */ + memset(&psr_vsc, 0, sizeof(psr_vsc)); + psr_vsc.sdp_header.HB0 = 0; + psr_vsc.sdp_header.HB1 = 0x7; + if (dev_priv->psr.colorimetry_support && + dev_priv->psr.y_cord_support) { + psr_vsc.sdp_header.HB2 = 0x5; + psr_vsc.sdp_header.HB3 = 0x13; + } else if (dev_priv->psr.y_cord_support) { + psr_vsc.sdp_header.HB2 = 0x4; + psr_vsc.sdp_header.HB3 = 0xe; + } else { + psr_vsc.sdp_header.HB2 = 0x3; + psr_vsc.sdp_header.HB3 = 0xc; + } } else { - psr_vsc.sdp_header.HB2 = 0x3; - psr_vsc.sdp_header.HB3 = 0xc; + /* Prepare VSC packet as per EDP 1.3 spec, Table 3.10 */ + memset(&psr_vsc, 0, sizeof(psr_vsc)); + psr_vsc.sdp_header.HB0 = 0; + psr_vsc.sdp_header.HB1 = 0x7; + psr_vsc.sdp_header.HB2 = 0x2; + psr_vsc.sdp_header.HB3 = 0x8; } - intel_psr_write_vsc(intel_dp, &psr_vsc); -} - -static void hsw_psr_setup_vsc(struct intel_dp *intel_dp) -{ - struct edp_vsc_psr psr_vsc; - - /* Prepare VSC packet as per EDP 1.3 spec, Table 3.10 */ - memset(&psr_vsc, 0, sizeof(psr_vsc)); - psr_vsc.sdp_header.HB0 = 0; - psr_vsc.sdp_header.HB1 = 0x7; - psr_vsc.sdp_header.HB2 = 0x2; - psr_vsc.sdp_header.HB3 = 0x8; - intel_psr_write_vsc(intel_dp, &psr_vsc); + intel_dig_port->write_infoframe(&intel_dig_port->base.base, crtc_state, + DP_SDP_VSC, &psr_vsc, sizeof(psr_vsc)); } static void vlv_psr_enable_sink(struct intel_dp *intel_dp) @@ -196,7 +163,7 @@ static void hsw_psr_enable_sink(struct intel_dp *intel_dp) [3] = 1 - 1, [4] = DP_SET_POWER_D0, }; - enum port port = dig_port->port; + enum port port = dig_port->base.port; u32 aux_ctl; int i; @@ -233,16 +200,15 @@ static void hsw_psr_enable_sink(struct intel_dp *intel_dp) I915_WRITE(aux_ctl_reg, aux_ctl); } -static void vlv_psr_enable_source(struct intel_dp *intel_dp) +static void vlv_psr_enable_source(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_crtc *crtc = dig_port->base.base.crtc; - enum pipe pipe = to_intel_crtc(crtc)->pipe; + struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - /* Transition from PSR_state 0 to PSR_state 1, i.e. PSR Inactive */ - I915_WRITE(VLV_PSRCTL(pipe), + /* Transition from PSR_state 0 (disabled) to PSR_state 1 (inactive) */ + I915_WRITE(VLV_PSRCTL(crtc->pipe), VLV_EDP_PSR_MODE_SW_TIMER | VLV_EDP_PSR_SRC_TRANSMITTER_STATE | VLV_EDP_PSR_ENABLE); @@ -256,16 +222,17 @@ static void vlv_psr_activate(struct intel_dp *intel_dp) struct drm_crtc *crtc = dig_port->base.base.crtc; enum pipe pipe = to_intel_crtc(crtc)->pipe; - /* Let's do the transition from PSR_state 1 to PSR_state 2 - * that is PSR transition to active - static frame transmission. - * Then Hardware is responsible for the transition to PSR_state 3 - * that is PSR active - no Remote Frame Buffer (RFB) update. + /* + * Let's do the transition from PSR_state 1 (inactive) to + * PSR_state 2 (transition to active - static frame transmission). + * Then Hardware is responsible for the transition to + * PSR_state 3 (active - no Remote Frame Buffer (RFB) update). */ I915_WRITE(VLV_PSRCTL(pipe), I915_READ(VLV_PSRCTL(pipe)) | VLV_EDP_PSR_ACTIVE_ENTRY); } -static void intel_enable_source_psr1(struct intel_dp *intel_dp) +static void hsw_activate_psr1(struct intel_dp *intel_dp) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = dig_port->base.base.dev; @@ -319,7 +286,7 @@ static void intel_enable_source_psr1(struct intel_dp *intel_dp) I915_WRITE(EDP_PSR_CTL, val); } -static void intel_enable_source_psr2(struct intel_dp *intel_dp) +static void hsw_activate_psr2(struct intel_dp *intel_dp) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = dig_port->base.base.dev; @@ -333,6 +300,7 @@ static void intel_enable_source_psr2(struct intel_dp *intel_dp) */ uint32_t idle_frames = max(6, dev_priv->vbt.psr.idle_frames); uint32_t val; + uint8_t sink_latency; val = idle_frames << EDP_PSR_IDLE_FRAME_SHIFT; @@ -340,8 +308,16 @@ static void intel_enable_source_psr2(struct intel_dp *intel_dp) * mesh at all with our frontbuffer tracking. And the hw alone isn't * good enough. */ val |= EDP_PSR2_ENABLE | - EDP_SU_TRACK_ENABLE | - EDP_FRAMES_BEFORE_SU_ENTRY; + EDP_SU_TRACK_ENABLE; + + if (drm_dp_dpcd_readb(&intel_dp->aux, + DP_SYNCHRONIZATION_LATENCY_IN_SINK, + &sink_latency) == 1) { + sink_latency &= DP_MAX_RESYNC_FRAME_COUNT_MASK; + } else { + sink_latency = 0; + } + val |= EDP_PSR2_FRAME_BEFORE_SU(sink_latency + 1); if (dev_priv->vbt.psr.tp2_tp3_wakeup_time > 5) val |= EDP_PSR2_TP2_TIME_2500; @@ -355,35 +331,43 @@ static void intel_enable_source_psr2(struct intel_dp *intel_dp) I915_WRITE(EDP_PSR2_CTL, val); } -static void hsw_psr_enable_source(struct intel_dp *intel_dp) +static void hsw_psr_activate(struct intel_dp *intel_dp) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); + /* On HSW+ after we enable PSR on source it will activate it + * as soon as it match configure idle_frame count. So + * we just actually enable it here on activation time. + */ + /* psr1 and psr2 are mutually exclusive.*/ if (dev_priv->psr.psr2_support) - intel_enable_source_psr2(intel_dp); + hsw_activate_psr2(intel_dp); else - intel_enable_source_psr1(intel_dp); + hsw_activate_psr1(intel_dp); } -static bool intel_psr_match_conditions(struct intel_dp *intel_dp) +void intel_psr_compute_config(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_crtc *crtc = dig_port->base.base.crtc; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); const struct drm_display_mode *adjusted_mode = - &intel_crtc->config->base.adjusted_mode; + &crtc_state->base.adjusted_mode; int psr_setup_time; - lockdep_assert_held(&dev_priv->psr.lock); - WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex)); - WARN_ON(!drm_modeset_is_locked(&crtc->mutex)); + if (!HAS_PSR(dev_priv)) + return; + + if (!is_edp_psr(intel_dp)) + return; - dev_priv->psr.source_ok = false; + if (!i915_modparams.enable_psr) { + DRM_DEBUG_KMS("PSR disable by flag\n"); + return; + } /* * HSW spec explicitly says PSR is tied to port A. @@ -392,68 +376,72 @@ static bool intel_psr_match_conditions(struct intel_dp *intel_dp) * ones. Since by Display design transcoder EDP is tied to port A * we can safely escape based on the port A. */ - if (HAS_DDI(dev_priv) && dig_port->port != PORT_A) { + if (HAS_DDI(dev_priv) && dig_port->base.port != PORT_A) { DRM_DEBUG_KMS("PSR condition failed: Port not supported\n"); - return false; - } - - if (!i915.enable_psr) { - DRM_DEBUG_KMS("PSR disable by flag\n"); - return false; + return; } if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && !dev_priv->psr.link_standby) { DRM_ERROR("PSR condition failed: Link off requested but not supported on this platform\n"); - return false; + return; } if (IS_HASWELL(dev_priv) && - I915_READ(HSW_STEREO_3D_CTL(intel_crtc->config->cpu_transcoder)) & + I915_READ(HSW_STEREO_3D_CTL(crtc_state->cpu_transcoder)) & S3D_ENABLE) { DRM_DEBUG_KMS("PSR condition failed: Stereo 3D is Enabled\n"); - return false; + return; } if (IS_HASWELL(dev_priv) && adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) { DRM_DEBUG_KMS("PSR condition failed: Interlaced is Enabled\n"); - return false; + return; } psr_setup_time = drm_dp_psr_setup_time(intel_dp->psr_dpcd); if (psr_setup_time < 0) { DRM_DEBUG_KMS("PSR condition failed: Invalid PSR setup time (0x%02x)\n", intel_dp->psr_dpcd[1]); - return false; + return; } if (intel_usecs_to_scanlines(adjusted_mode, psr_setup_time) > adjusted_mode->crtc_vtotal - adjusted_mode->crtc_vdisplay - 1) { DRM_DEBUG_KMS("PSR condition failed: PSR setup time (%d us) too long\n", psr_setup_time); - return false; + return; + } + + /* + * FIXME psr2_support is messed up. It's both computed + * dynamically during PSR enable, and extracted from sink + * caps during eDP detection. + */ + if (!dev_priv->psr.psr2_support) { + crtc_state->has_psr = true; + return; } /* PSR2 is restricted to work with panel resolutions upto 3200x2000 */ - if (dev_priv->psr.psr2_support && - (intel_crtc->config->pipe_src_w > 3200 || - intel_crtc->config->pipe_src_h > 2000)) { - dev_priv->psr.psr2_support = false; - return false; + if (adjusted_mode->crtc_hdisplay > 3200 || + adjusted_mode->crtc_vdisplay > 2000) { + DRM_DEBUG_KMS("PSR2 disabled, panel resolution too big\n"); + return; } /* * FIXME:enable psr2 only for y-cordinate psr2 panels * After gtc implementation , remove this restriction. */ - if (!dev_priv->psr.y_cord_support && dev_priv->psr.psr2_support) { + if (!dev_priv->psr.y_cord_support) { DRM_DEBUG_KMS("PSR2 disabled, panel does not support Y coordinate\n"); - return false; + return; } - dev_priv->psr.source_ok = true; - return true; + crtc_state->has_psr = true; + crtc_state->has_psr2 = true; } static void intel_psr_activate(struct intel_dp *intel_dp) @@ -469,160 +457,140 @@ static void intel_psr_activate(struct intel_dp *intel_dp) WARN_ON(dev_priv->psr.active); lockdep_assert_held(&dev_priv->psr.lock); - /* Enable/Re-enable PSR on the host */ - if (HAS_DDI(dev_priv)) - /* On HSW+ after we enable PSR on source it will activate it - * as soon as it match configure idle_frame count. So - * we just actually enable it here on activation time. - */ - hsw_psr_enable_source(intel_dp); - else - vlv_psr_activate(intel_dp); - + dev_priv->psr.activate(intel_dp); dev_priv->psr.active = true; } +static void hsw_psr_enable_source(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) +{ + struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + struct drm_device *dev = dig_port->base.base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; + u32 chicken; + + if (dev_priv->psr.psr2_support) { + chicken = PSR2_VSC_ENABLE_PROG_HEADER; + if (dev_priv->psr.y_cord_support) + chicken |= PSR2_ADD_VERTICAL_LINE_COUNT; + I915_WRITE(CHICKEN_TRANS(cpu_transcoder), chicken); + + I915_WRITE(EDP_PSR_DEBUG_CTL, + EDP_PSR_DEBUG_MASK_MEMUP | + EDP_PSR_DEBUG_MASK_HPD | + EDP_PSR_DEBUG_MASK_LPSP | + EDP_PSR_DEBUG_MASK_MAX_SLEEP | + EDP_PSR_DEBUG_MASK_DISP_REG_WRITE); + } else { + /* + * Per Spec: Avoid continuous PSR exit by masking MEMUP + * and HPD. also mask LPSP to avoid dependency on other + * drivers that might block runtime_pm besides + * preventing other hw tracking issues now we can rely + * on frontbuffer tracking. + */ + I915_WRITE(EDP_PSR_DEBUG_CTL, + EDP_PSR_DEBUG_MASK_MEMUP | + EDP_PSR_DEBUG_MASK_HPD | + EDP_PSR_DEBUG_MASK_LPSP); + } +} + /** * intel_psr_enable - Enable PSR * @intel_dp: Intel DP + * @crtc_state: new CRTC state * * This function can only be called after the pipe is fully trained and enabled. */ -void intel_psr_enable(struct intel_dp *intel_dp) +void intel_psr_enable(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *crtc = to_intel_crtc(intel_dig_port->base.base.crtc); - enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; - u32 chicken; - if (!HAS_PSR(dev_priv)) { - DRM_DEBUG_KMS("PSR not supported on this platform\n"); + if (!crtc_state->has_psr) return; - } - - if (!is_edp_psr(intel_dp)) { - DRM_DEBUG_KMS("PSR not supported by this panel\n"); - return; - } + WARN_ON(dev_priv->drrs.dp); mutex_lock(&dev_priv->psr.lock); if (dev_priv->psr.enabled) { DRM_DEBUG_KMS("PSR already in use\n"); goto unlock; } - if (!intel_psr_match_conditions(intel_dp)) - goto unlock; + dev_priv->psr.psr2_support = crtc_state->has_psr2; + dev_priv->psr.source_ok = true; dev_priv->psr.busy_frontbuffer_bits = 0; - if (HAS_DDI(dev_priv)) { - if (dev_priv->psr.psr2_support) { - skl_psr_setup_su_vsc(intel_dp); - chicken = PSR2_VSC_ENABLE_PROG_HEADER; - if (dev_priv->psr.y_cord_support) - chicken |= PSR2_ADD_VERTICAL_LINE_COUNT; - I915_WRITE(CHICKEN_TRANS(cpu_transcoder), chicken); - I915_WRITE(EDP_PSR_DEBUG_CTL, - EDP_PSR_DEBUG_MASK_MEMUP | - EDP_PSR_DEBUG_MASK_HPD | - EDP_PSR_DEBUG_MASK_LPSP | - EDP_PSR_DEBUG_MASK_MAX_SLEEP | - EDP_PSR_DEBUG_MASK_DISP_REG_WRITE); - } else { - /* set up vsc header for psr1 */ - hsw_psr_setup_vsc(intel_dp); - /* - * Per Spec: Avoid continuous PSR exit by masking MEMUP - * and HPD. also mask LPSP to avoid dependency on other - * drivers that might block runtime_pm besides - * preventing other hw tracking issues now we can rely - * on frontbuffer tracking. - */ - I915_WRITE(EDP_PSR_DEBUG_CTL, - EDP_PSR_DEBUG_MASK_MEMUP | - EDP_PSR_DEBUG_MASK_HPD | - EDP_PSR_DEBUG_MASK_LPSP); - } - - /* Enable PSR on the panel */ - hsw_psr_enable_sink(intel_dp); + dev_priv->psr.setup_vsc(intel_dp, crtc_state); + dev_priv->psr.enable_sink(intel_dp); + dev_priv->psr.enable_source(intel_dp, crtc_state); + dev_priv->psr.enabled = intel_dp; - if (INTEL_GEN(dev_priv) >= 9) - intel_psr_activate(intel_dp); + if (INTEL_GEN(dev_priv) >= 9) { + intel_psr_activate(intel_dp); } else { - vlv_psr_setup_vsc(intel_dp); - - /* Enable PSR on the panel */ - vlv_psr_enable_sink(intel_dp); - - /* On HSW+ enable_source also means go to PSR entry/active - * state as soon as idle_frame achieved and here would be - * to soon. However on VLV enable_source just enable PSR - * but let it on inactive state. So we might do this prior - * to active transition, i.e. here. + /* + * FIXME: Activation should happen immediately since this + * function is just called after pipe is fully trained and + * enabled. + * However on some platforms we face issues when first + * activation follows a modeset so quickly. + * - On VLV/CHV we get bank screen on first activation + * - On HSW/BDW we get a recoverable frozen screen until + * next exit-activate sequence. */ - vlv_psr_enable_source(intel_dp); - } - - /* - * FIXME: Activation should happen immediately since this function - * is just called after pipe is fully trained and enabled. - * However on every platform we face issues when first activation - * follows a modeset so quickly. - * - On VLV/CHV we get bank screen on first activation - * - On HSW/BDW we get a recoverable frozen screen until next - * exit-activate sequence. - */ - if (INTEL_GEN(dev_priv) < 9) schedule_delayed_work(&dev_priv->psr.work, msecs_to_jiffies(intel_dp->panel_power_cycle_delay * 5)); + } - dev_priv->psr.enabled = intel_dp; unlock: mutex_unlock(&dev_priv->psr.lock); } -static void vlv_psr_disable(struct intel_dp *intel_dp) +static void vlv_psr_disable(struct intel_dp *intel_dp, + const struct intel_crtc_state *old_crtc_state) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = - to_intel_crtc(intel_dig_port->base.base.crtc); + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); uint32_t val; if (dev_priv->psr.active) { - /* Put VLV PSR back to PSR_state 0 that is PSR Disabled. */ + /* Put VLV PSR back to PSR_state 0 (disabled). */ if (intel_wait_for_register(dev_priv, - VLV_PSRSTAT(intel_crtc->pipe), + VLV_PSRSTAT(crtc->pipe), VLV_EDP_PSR_IN_TRANS, 0, 1)) WARN(1, "PSR transition took longer than expected\n"); - val = I915_READ(VLV_PSRCTL(intel_crtc->pipe)); + val = I915_READ(VLV_PSRCTL(crtc->pipe)); val &= ~VLV_EDP_PSR_ACTIVE_ENTRY; val &= ~VLV_EDP_PSR_ENABLE; val &= ~VLV_EDP_PSR_MODE_MASK; - I915_WRITE(VLV_PSRCTL(intel_crtc->pipe), val); + I915_WRITE(VLV_PSRCTL(crtc->pipe), val); dev_priv->psr.active = false; } else { - WARN_ON(vlv_is_psr_active_on_pipe(dev, intel_crtc->pipe)); + WARN_ON(vlv_is_psr_active_on_pipe(dev, crtc->pipe)); } } -static void hsw_psr_disable(struct intel_dp *intel_dp) +static void hsw_psr_disable(struct intel_dp *intel_dp, + const struct intel_crtc_state *old_crtc_state) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); if (dev_priv->psr.active) { - i915_reg_t psr_ctl; + i915_reg_t psr_status; u32 psr_status_mask; if (dev_priv->psr.aux_frame_sync) @@ -631,24 +599,24 @@ static void hsw_psr_disable(struct intel_dp *intel_dp) 0); if (dev_priv->psr.psr2_support) { - psr_ctl = EDP_PSR2_CTL; + psr_status = EDP_PSR2_STATUS_CTL; psr_status_mask = EDP_PSR2_STATUS_STATE_MASK; - I915_WRITE(psr_ctl, - I915_READ(psr_ctl) & + I915_WRITE(EDP_PSR2_CTL, + I915_READ(EDP_PSR2_CTL) & ~(EDP_PSR2_ENABLE | EDP_SU_TRACK_ENABLE)); } else { - psr_ctl = EDP_PSR_STATUS_CTL; + psr_status = EDP_PSR_STATUS_CTL; psr_status_mask = EDP_PSR_STATUS_STATE_MASK; - I915_WRITE(psr_ctl, - I915_READ(psr_ctl) & ~EDP_PSR_ENABLE); + I915_WRITE(EDP_PSR_CTL, + I915_READ(EDP_PSR_CTL) & ~EDP_PSR_ENABLE); } /* Wait till PSR is idle */ if (intel_wait_for_register(dev_priv, - psr_ctl, psr_status_mask, 0, + psr_status, psr_status_mask, 0, 2000)) DRM_ERROR("Timed out waiting for PSR Idle State\n"); @@ -664,26 +632,27 @@ static void hsw_psr_disable(struct intel_dp *intel_dp) /** * intel_psr_disable - Disable PSR * @intel_dp: Intel DP + * @old_crtc_state: old CRTC state * * This function needs to be called before disabling pipe. */ -void intel_psr_disable(struct intel_dp *intel_dp) +void intel_psr_disable(struct intel_dp *intel_dp, + const struct intel_crtc_state *old_crtc_state) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); + if (!old_crtc_state->has_psr) + return; + mutex_lock(&dev_priv->psr.lock); if (!dev_priv->psr.enabled) { mutex_unlock(&dev_priv->psr.lock); return; } - /* Disable PSR on Source */ - if (HAS_DDI(dev_priv)) - hsw_psr_disable(intel_dp); - else - vlv_psr_disable(intel_dp); + dev_priv->psr.disable_source(intel_dp, old_crtc_state); /* Disable PSR on Sink */ drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, 0); @@ -783,17 +752,20 @@ static void intel_psr_exit(struct drm_i915_private *dev_priv) } else { val = I915_READ(VLV_PSRCTL(pipe)); - /* Here we do the transition from PSR_state 3 to PSR_state 5 - * directly once PSR State 4 that is active with single frame - * update can be skipped. PSR_state 5 that is PSR exit then - * Hardware is responsible to transition back to PSR_state 1 - * that is PSR inactive. Same state after - * vlv_edp_psr_enable_source. + /* + * Here we do the transition drirectly from + * PSR_state 3 (active - no Remote Frame Buffer (RFB) update) to + * PSR_state 5 (exit). + * PSR State 4 (active with single frame update) can be skipped. + * On PSR_state 5 (exit) Hardware is responsible to transition + * back to PSR_state 1 (inactive). + * Now we are at Same state after vlv_psr_enable_source. */ val &= ~VLV_EDP_PSR_ACTIVE_ENTRY; I915_WRITE(VLV_PSRCTL(pipe), val); - /* Send AUX wake up - Spec says after transitioning to PSR + /* + * Send AUX wake up - Spec says after transitioning to PSR * active we have to send AUX wake up by writing 01h in DPCD * 600h of sink device. * XXX: This might slow down the transition, but without this @@ -824,6 +796,9 @@ void intel_psr_single_frame_update(struct drm_i915_private *dev_priv, enum pipe pipe; u32 val; + if (!HAS_PSR(dev_priv)) + return; + /* * Single frame update is already supported on BDW+ but it requires * many W/A and it isn't really needed. @@ -870,6 +845,9 @@ void intel_psr_invalidate(struct drm_i915_private *dev_priv, struct drm_crtc *crtc; enum pipe pipe; + if (!HAS_PSR(dev_priv)) + return; + mutex_lock(&dev_priv->psr.lock); if (!dev_priv->psr.enabled) { mutex_unlock(&dev_priv->psr.lock); @@ -907,6 +885,9 @@ void intel_psr_flush(struct drm_i915_private *dev_priv, struct drm_crtc *crtc; enum pipe pipe; + if (!HAS_PSR(dev_priv)) + return; + mutex_lock(&dev_priv->psr.lock); if (!dev_priv->psr.enabled) { mutex_unlock(&dev_priv->psr.lock); @@ -939,12 +920,15 @@ void intel_psr_flush(struct drm_i915_private *dev_priv, */ void intel_psr_init(struct drm_i915_private *dev_priv) { + if (!HAS_PSR(dev_priv)) + return; + dev_priv->psr_mmio_base = IS_HASWELL(dev_priv) ? HSW_EDP_PSR_BASE : BDW_EDP_PSR_BASE; /* Per platform default: all disabled. */ - if (i915.enable_psr == -1) - i915.enable_psr = 0; + if (i915_modparams.enable_psr == -1) + i915_modparams.enable_psr = 0; /* Set link_standby x link_off defaults */ if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) @@ -958,15 +942,29 @@ void intel_psr_init(struct drm_i915_private *dev_priv) dev_priv->psr.link_standby = dev_priv->vbt.psr.full_link; /* Override link_standby x link_off defaults */ - if (i915.enable_psr == 2 && !dev_priv->psr.link_standby) { + if (i915_modparams.enable_psr == 2 && !dev_priv->psr.link_standby) { DRM_DEBUG_KMS("PSR: Forcing link standby\n"); dev_priv->psr.link_standby = true; } - if (i915.enable_psr == 3 && dev_priv->psr.link_standby) { + if (i915_modparams.enable_psr == 3 && dev_priv->psr.link_standby) { DRM_DEBUG_KMS("PSR: Forcing main link off\n"); dev_priv->psr.link_standby = false; } INIT_DELAYED_WORK(&dev_priv->psr.work, intel_psr_work); mutex_init(&dev_priv->psr.lock); + + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { + dev_priv->psr.enable_source = vlv_psr_enable_source; + dev_priv->psr.disable_source = vlv_psr_disable; + dev_priv->psr.enable_sink = vlv_psr_enable_sink; + dev_priv->psr.activate = vlv_psr_activate; + dev_priv->psr.setup_vsc = vlv_psr_setup_vsc; + } else { + dev_priv->psr.enable_source = hsw_psr_enable_source; + dev_priv->psr.disable_source = hsw_psr_disable; + dev_priv->psr.enable_sink = hsw_psr_enable_sink; + dev_priv->psr.activate = hsw_psr_activate; + dev_priv->psr.setup_vsc = hsw_psr_setup_vsc; + } } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index cdf084ef5aae..e2085820b586 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -28,9 +28,12 @@ */ #include <linux/log2.h> + #include <drm/drmP.h> -#include "i915_drv.h" #include <drm/i915_drm.h> + +#include "i915_drv.h" +#include "i915_gem_render_state.h" #include "i915_trace.h" #include "intel_drv.h" @@ -337,50 +340,6 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) return 0; } -static int -gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) -{ - u32 flags; - u32 *cs; - - cs = intel_ring_begin(req, mode & EMIT_INVALIDATE ? 12 : 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - flags = PIPE_CONTROL_CS_STALL; - - if (mode & EMIT_FLUSH) { - flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; - flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; - flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; - flags |= PIPE_CONTROL_FLUSH_ENABLE; - } - if (mode & EMIT_INVALIDATE) { - flags |= PIPE_CONTROL_TLB_INVALIDATE; - flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; - - /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */ - cs = gen8_emit_pipe_control(cs, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD, - 0); - } - - cs = gen8_emit_pipe_control(cs, flags, - i915_ggtt_offset(req->engine->scratch) + - 2 * CACHELINE_BYTES); - - intel_ring_advance(req, cs); - - return 0; -} - static void ring_setup_phys_status_page(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -402,17 +361,18 @@ static void intel_ring_setup_status_page(struct intel_engine_cs *engine) */ if (IS_GEN7(dev_priv)) { switch (engine->id) { + /* + * No more rings exist on Gen7. Default case is only to shut up + * gcc switch check warning. + */ + default: + GEM_BUG_ON(engine->id); case RCS: mmio = RENDER_HWS_PGA_GEN7; break; case BCS: mmio = BLT_HWS_PGA_GEN7; break; - /* - * VCS2 actually doesn't exist on Gen7. Only shut up - * gcc switch check warning - */ - case VCS2: case VCS: mmio = BSD_HWS_PGA_GEN7; break; @@ -423,20 +383,16 @@ static void intel_ring_setup_status_page(struct intel_engine_cs *engine) } else if (IS_GEN6(dev_priv)) { mmio = RING_HWS_PGA_GEN6(engine->mmio_base); } else { - /* XXX: gen8 returns to sanity */ mmio = RING_HWS_PGA(engine->mmio_base); } + if (INTEL_GEN(dev_priv) >= 6) + I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff); + I915_WRITE(mmio, engine->status_page.ggtt_offset); POSTING_READ(mmio); - /* - * Flush the TLB for this page - * - * FIXME: These two bits have disappeared on gen8, so a question - * arises: do we still need this and if so how should we go about - * invalidating the TLB? - */ + /* Flush the TLB for this page */ if (IS_GEN(dev_priv, 6, 7)) { i915_reg_t reg = RING_INSTPM(engine->mmio_base); @@ -476,14 +432,13 @@ static bool stop_ring(struct intel_engine_cs *engine) } } - I915_WRITE_CTL(engine, 0); + I915_WRITE_HEAD(engine, I915_READ_TAIL(engine)); + I915_WRITE_HEAD(engine, 0); I915_WRITE_TAIL(engine, 0); - if (INTEL_GEN(dev_priv) > 2) { - (void)I915_READ_CTL(engine); - I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING)); - } + /* The ring must be empty before it is disabled */ + I915_WRITE_CTL(engine, 0); return (I915_READ_HEAD(engine) & HEAD_ADDR) == 0; } @@ -566,6 +521,9 @@ static int init_ring_common(struct intel_engine_cs *engine) intel_engine_init_hangcheck(engine); + if (INTEL_GEN(dev_priv) > 2) + I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING)); + out: intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); @@ -575,7 +533,16 @@ out: static void reset_ring_common(struct intel_engine_cs *engine, struct drm_i915_gem_request *request) { - /* Try to restore the logical GPU state to match the continuation + /* + * RC6 must be prevented until the reset is complete and the engine + * reinitialised. If it occurs in the middle of this sequence, the + * state written to/loaded from the power context is ill-defined (e.g. + * the PP_BASE_DIR may be lost). + */ + assert_forcewakes_active(engine->i915, FORCEWAKE_ALL); + + /* + * Try to restore the logical GPU state to match the continuation * of the request queue. If we skip the context/PD restore, then * the next request may try to execute assuming that its context * is valid and loaded on the GPU and so may try to access invalid @@ -593,8 +560,6 @@ static void reset_ring_common(struct intel_engine_cs *engine, struct intel_context *ce = &request->ctx->engine[engine->id]; struct i915_hw_ppgtt *ppgtt; - /* FIXME consider gen8 reset */ - if (ce->state) { I915_WRITE(CCID, i915_ggtt_offset(ce->state) | @@ -626,6 +591,7 @@ static void reset_ring_common(struct intel_engine_cs *engine, request->ring->head = request->postfix; } else { engine->legacy_active_context = NULL; + engine->legacy_active_ppgtt = NULL; } } @@ -695,62 +661,6 @@ static int init_render_ring(struct intel_engine_cs *engine) return init_workarounds_ring(engine); } -static void render_ring_cleanup(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - i915_vma_unpin_and_release(&dev_priv->semaphore); -} - -static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *cs) -{ - struct drm_i915_private *dev_priv = req->i915; - struct intel_engine_cs *waiter; - enum intel_engine_id id; - - for_each_engine(waiter, dev_priv, id) { - u64 gtt_offset = req->engine->semaphore.signal_ggtt[id]; - if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) - continue; - - *cs++ = GFX_OP_PIPE_CONTROL(6); - *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_CS_STALL; - *cs++ = lower_32_bits(gtt_offset); - *cs++ = upper_32_bits(gtt_offset); - *cs++ = req->global_seqno; - *cs++ = 0; - *cs++ = MI_SEMAPHORE_SIGNAL | - MI_SEMAPHORE_TARGET(waiter->hw_id); - *cs++ = 0; - } - - return cs; -} - -static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *cs) -{ - struct drm_i915_private *dev_priv = req->i915; - struct intel_engine_cs *waiter; - enum intel_engine_id id; - - for_each_engine(waiter, dev_priv, id) { - u64 gtt_offset = req->engine->semaphore.signal_ggtt[id]; - if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) - continue; - - *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; - *cs++ = lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT; - *cs++ = upper_32_bits(gtt_offset); - *cs++ = req->global_seqno; - *cs++ = MI_SEMAPHORE_SIGNAL | - MI_SEMAPHORE_TARGET(waiter->hw_id); - *cs++ = 0; - } - - return cs; -} - static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs) { struct drm_i915_private *dev_priv = req->i915; @@ -778,6 +688,24 @@ static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs) return cs; } +static void cancel_requests(struct intel_engine_cs *engine) +{ + struct drm_i915_gem_request *request; + unsigned long flags; + + spin_lock_irqsave(&engine->timeline->lock, flags); + + /* Mark all submitted requests as skipped. */ + list_for_each_entry(request, &engine->timeline->requests, link) { + GEM_BUG_ON(!request->global_seqno); + if (!i915_gem_request_completed(request)) + dma_fence_set_error(&request->fence, -EIO); + } + /* Remaining _unready_ requests will be nop'ed when submitted */ + + spin_unlock_irqrestore(&engine->timeline->lock, flags); +} + static void i9xx_submit_request(struct drm_i915_gem_request *request) { struct drm_i915_private *dev_priv = request->i915; @@ -815,70 +743,6 @@ static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) req->engine->semaphore.signal(req, cs)); } -static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req, - u32 *cs) -{ - struct intel_engine_cs *engine = req->engine; - - if (engine->semaphore.signal) - cs = engine->semaphore.signal(req, cs); - - *cs++ = GFX_OP_PIPE_CONTROL(6); - *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE; - *cs++ = intel_hws_seqno_address(engine); - *cs++ = 0; - *cs++ = req->global_seqno; - /* We're thrashing one dword of HWS. */ - *cs++ = 0; - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; - - req->tail = intel_ring_offset(req, cs); - assert_ring_tail_valid(req->ring, req->tail); -} - -static const int gen8_render_emit_breadcrumb_sz = 8; - -/** - * intel_ring_sync - sync the waiter to the signaller on seqno - * - * @waiter - ring that is waiting - * @signaller - ring which has, or will signal - * @seqno - seqno which the waiter will block on - */ - -static int -gen8_ring_sync_to(struct drm_i915_gem_request *req, - struct drm_i915_gem_request *signal) -{ - struct drm_i915_private *dev_priv = req->i915; - u64 offset = GEN8_WAIT_OFFSET(req->engine, signal->engine->id); - struct i915_hw_ppgtt *ppgtt; - u32 *cs; - - cs = intel_ring_begin(req, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_GLOBAL_GTT | - MI_SEMAPHORE_SAD_GTE_SDD; - *cs++ = signal->global_seqno; - *cs++ = lower_32_bits(offset); - *cs++ = upper_32_bits(offset); - intel_ring_advance(req, cs); - - /* When the !RCS engines idle waiting upon a semaphore, they lose their - * pagetables and we must reload them before executing the batch. - * We do this on the i915_switch_context() following the wait and - * before the dispatch. - */ - ppgtt = req->ctx->ppgtt; - if (ppgtt && req->engine->id != RCS) - ppgtt->pd_dirty_rings |= intel_engine_flag(req->engine); - return 0; -} - static int gen6_ring_sync_to(struct drm_i915_gem_request *req, struct drm_i915_gem_request *signal) @@ -1054,25 +918,6 @@ hsw_vebox_irq_disable(struct intel_engine_cs *engine) gen6_mask_pm_irq(dev_priv, engine->irq_enable_mask); } -static void -gen8_irq_enable(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - I915_WRITE_IMR(engine, - ~(engine->irq_enable_mask | - engine->irq_keep_mask)); - POSTING_READ_FW(RING_IMR(engine->mmio_base)); -} - -static void -gen8_irq_disable(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - I915_WRITE_IMR(engine, ~engine->irq_keep_mask); -} - static int i965_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 length, @@ -1174,113 +1019,7 @@ i915_emit_bb_start(struct drm_i915_gem_request *req, return 0; } -static void cleanup_phys_status_page(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - if (!dev_priv->status_page_dmah) - return; - - drm_pci_free(&dev_priv->drm, dev_priv->status_page_dmah); - engine->status_page.page_addr = NULL; -} -static void cleanup_status_page(struct intel_engine_cs *engine) -{ - struct i915_vma *vma; - struct drm_i915_gem_object *obj; - - vma = fetch_and_zero(&engine->status_page.vma); - if (!vma) - return; - - obj = vma->obj; - - i915_vma_unpin(vma); - i915_vma_close(vma); - - i915_gem_object_unpin_map(obj); - __i915_gem_object_release_unless_active(obj); -} - -static int init_status_page(struct intel_engine_cs *engine) -{ - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - unsigned int flags; - void *vaddr; - int ret; - - obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); - if (IS_ERR(obj)) { - DRM_ERROR("Failed to allocate status page\n"); - return PTR_ERR(obj); - } - - ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); - if (ret) - goto err; - - vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto err; - } - - flags = PIN_GLOBAL; - if (!HAS_LLC(engine->i915)) - /* On g33, we cannot place HWS above 256MiB, so - * restrict its pinning to the low mappable arena. - * Though this restriction is not documented for - * gen4, gen5, or byt, they also behave similarly - * and hang if the HWS is placed at the top of the - * GTT. To generalise, it appears that all !llc - * platforms have issues with us placing the HWS - * above the mappable region (even though we never - * actualy map it). - */ - flags |= PIN_MAPPABLE; - ret = i915_vma_pin(vma, 0, 4096, flags); - if (ret) - goto err; - - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - ret = PTR_ERR(vaddr); - goto err_unpin; - } - - engine->status_page.vma = vma; - engine->status_page.ggtt_offset = i915_ggtt_offset(vma); - engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE); - - DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", - engine->name, i915_ggtt_offset(vma)); - return 0; - -err_unpin: - i915_vma_unpin(vma); -err: - i915_gem_object_put(obj); - return ret; -} - -static int init_phys_status_page(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - GEM_BUG_ON(engine->id != RCS); - - dev_priv->status_page_dmah = - drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE); - if (!dev_priv->status_page_dmah) - return -ENOMEM; - - engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr; - memset(engine->status_page.page_addr, 0, PAGE_SIZE); - - return 0; -} int intel_ring_pin(struct intel_ring *ring, struct drm_i915_private *i915, @@ -1321,6 +1060,8 @@ int intel_ring_pin(struct intel_ring *ring, if (IS_ERR(addr)) goto err; + vma->obj->pin_global++; + ring->vaddr = addr; return 0; @@ -1352,6 +1093,7 @@ void intel_ring_unpin(struct intel_ring *ring) i915_gem_object_unpin_map(ring->vma->obj); ring->vaddr = NULL; + ring->vma->obj->pin_global--; i915_vma_unpin(ring->vma); } @@ -1433,12 +1175,13 @@ static int context_pin(struct i915_gem_context *ctx) struct i915_vma *vma = ctx->engine[RCS].state; int ret; - /* Clear this page out of any CPU caches for coherent swap-in/out. + /* + * Clear this page out of any CPU caches for coherent swap-in/out. * We only want to do this on the first bind so that we do not stall * on an active context (which by nature is already on the GPU). */ if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { - ret = i915_gem_object_set_to_gtt_domain(vma->obj, false); + ret = i915_gem_object_set_to_gtt_domain(vma->obj, true); if (ret) return ret; } @@ -1453,11 +1196,34 @@ alloc_context_vma(struct intel_engine_cs *engine) struct drm_i915_private *i915 = engine->i915; struct drm_i915_gem_object *obj; struct i915_vma *vma; + int err; obj = i915_gem_object_create(i915, engine->context_size); if (IS_ERR(obj)) return ERR_CAST(obj); + if (engine->default_state) { + void *defaults, *vaddr; + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_obj; + } + + defaults = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (IS_ERR(defaults)) { + err = PTR_ERR(defaults); + goto err_map; + } + + memcpy(vaddr, defaults, engine->context_size); + + i915_gem_object_unpin_map(engine->default_state); + i915_gem_object_unpin_map(obj); + } + /* * Try to make the context utilize L3 as well as LLC. * @@ -1479,10 +1245,18 @@ alloc_context_vma(struct intel_engine_cs *engine) } vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); - if (IS_ERR(vma)) - i915_gem_object_put(obj); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } return vma; + +err_map: + i915_gem_object_unpin_map(obj); +err_obj: + i915_gem_object_put(obj); + return ERR_PTR(err); } static struct intel_ring * @@ -1515,19 +1289,9 @@ intel_ring_context_pin(struct intel_engine_cs *engine, if (ret) goto err; - ce->state->obj->mm.dirty = true; + ce->state->obj->pin_global++; } - /* The kernel context is only used as a placeholder for flushing the - * active context. It is never used for submitting user rendering and - * as such never requires the golden render context, and so we can skip - * emitting it when we switch to the kernel context. This is required - * as during eviction we cannot allocate and pin the renderstate in - * order to initialise the context. - */ - if (i915_gem_context_is_kernel(ctx)) - ce->initialised = true; - i915_gem_context_get(ctx); out: @@ -1550,8 +1314,10 @@ static void intel_ring_context_unpin(struct intel_engine_cs *engine, if (--ce->pin_count) return; - if (ce->state) + if (ce->state) { + ce->state->obj->pin_global--; i915_vma_unpin(ce->state); + } i915_gem_context_put(ctx); } @@ -1567,17 +1333,10 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) if (err) goto err; - if (HWS_NEEDS_PHYSICAL(engine->i915)) - err = init_phys_status_page(engine); - else - err = init_status_page(engine); - if (err) - goto err; - ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE); if (IS_ERR(ring)) { err = PTR_ERR(ring); - goto err_hws; + goto err; } /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ @@ -1592,11 +1351,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) err_ring: intel_ring_free(ring); -err_hws: - if (HWS_NEEDS_PHYSICAL(engine->i915)) - cleanup_phys_status_page(engine); - else - cleanup_status_page(engine); err: intel_engine_cleanup_common(engine); return err; @@ -1615,11 +1369,6 @@ void intel_engine_cleanup(struct intel_engine_cs *engine) if (engine->cleanup) engine->cleanup(engine); - if (HWS_NEEDS_PHYSICAL(dev_priv)) - cleanup_phys_status_page(engine); - else - cleanup_status_page(engine); - intel_engine_cleanup_common(engine); dev_priv->engine[engine->id] = NULL; @@ -1636,10 +1385,194 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) intel_ring_reset(engine->buffer, 0); } -static int ring_request_alloc(struct drm_i915_gem_request *request) +static inline int mi_set_context(struct drm_i915_gem_request *rq, u32 flags) { + struct drm_i915_private *i915 = rq->i915; + struct intel_engine_cs *engine = rq->engine; + enum intel_engine_id id; + const int num_rings = + /* Use an extended w/a on gen7 if signalling from other rings */ + (HAS_LEGACY_SEMAPHORES(i915) && IS_GEN7(i915)) ? + INTEL_INFO(i915)->num_rings - 1 : + 0; + int len; u32 *cs; + flags |= MI_MM_SPACE_GTT; + if (IS_HASWELL(i915)) + /* These flags are for resource streamer on HSW+ */ + flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN; + else + flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN; + + len = 4; + if (IS_GEN7(i915)) + len += 2 + (num_rings ? 4*num_rings + 6 : 0); + + cs = intel_ring_begin(rq, len); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ + if (IS_GEN7(i915)) { + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + if (num_rings) { + struct intel_engine_cs *signaller; + + *cs++ = MI_LOAD_REGISTER_IMM(num_rings); + for_each_engine(signaller, i915, id) { + if (signaller == engine) + continue; + + *cs++ = i915_mmio_reg_offset( + RING_PSMI_CTL(signaller->mmio_base)); + *cs++ = _MASKED_BIT_ENABLE( + GEN6_PSMI_SLEEP_MSG_DISABLE); + } + } + } + + *cs++ = MI_NOOP; + *cs++ = MI_SET_CONTEXT; + *cs++ = i915_ggtt_offset(rq->ctx->engine[RCS].state) | flags; + /* + * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP + * WaMiSetContext_Hang:snb,ivb,vlv + */ + *cs++ = MI_NOOP; + + if (IS_GEN7(i915)) { + if (num_rings) { + struct intel_engine_cs *signaller; + i915_reg_t last_reg = {}; /* keep gcc quiet */ + + *cs++ = MI_LOAD_REGISTER_IMM(num_rings); + for_each_engine(signaller, i915, id) { + if (signaller == engine) + continue; + + last_reg = RING_PSMI_CTL(signaller->mmio_base); + *cs++ = i915_mmio_reg_offset(last_reg); + *cs++ = _MASKED_BIT_DISABLE( + GEN6_PSMI_SLEEP_MSG_DISABLE); + } + + /* Insert a delay before the next switch! */ + *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + *cs++ = i915_mmio_reg_offset(last_reg); + *cs++ = i915_ggtt_offset(engine->scratch); + *cs++ = MI_NOOP; + } + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + } + + intel_ring_advance(rq, cs); + + return 0; +} + +static int remap_l3(struct drm_i915_gem_request *rq, int slice) +{ + u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice]; + int i; + + if (!remap_info) + return 0; + + cs = intel_ring_begin(rq, GEN7_L3LOG_SIZE/4 * 2 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* + * Note: We do not worry about the concurrent register cacheline hang + * here because no other code should access these registers other than + * at initialization time. + */ + *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4); + for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { + *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i)); + *cs++ = remap_info[i]; + } + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + return 0; +} + +static int switch_context(struct drm_i915_gem_request *rq) +{ + struct intel_engine_cs *engine = rq->engine; + struct i915_gem_context *to_ctx = rq->ctx; + struct i915_hw_ppgtt *to_mm = + to_ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; + struct i915_gem_context *from_ctx = engine->legacy_active_context; + struct i915_hw_ppgtt *from_mm = engine->legacy_active_ppgtt; + u32 hw_flags = 0; + int ret, i; + + lockdep_assert_held(&rq->i915->drm.struct_mutex); + GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); + + if (to_mm != from_mm || + (to_mm && intel_engine_flag(engine) & to_mm->pd_dirty_rings)) { + trace_switch_mm(engine, to_ctx); + ret = to_mm->switch_mm(to_mm, rq); + if (ret) + goto err; + + to_mm->pd_dirty_rings &= ~intel_engine_flag(engine); + engine->legacy_active_ppgtt = to_mm; + hw_flags = MI_FORCE_RESTORE; + } + + if (to_ctx->engine[engine->id].state && + (to_ctx != from_ctx || hw_flags & MI_FORCE_RESTORE)) { + GEM_BUG_ON(engine->id != RCS); + + /* + * The kernel context(s) is treated as pure scratch and is not + * expected to retain any state (as we sacrifice it during + * suspend and on resume it may be corrupted). This is ok, + * as nothing actually executes using the kernel context; it + * is purely used for flushing user contexts. + */ + if (i915_gem_context_is_kernel(to_ctx)) + hw_flags = MI_RESTORE_INHIBIT; + + ret = mi_set_context(rq, hw_flags); + if (ret) + goto err_mm; + + engine->legacy_active_context = to_ctx; + } + + if (to_ctx->remap_slice) { + for (i = 0; i < MAX_L3_SLICES; i++) { + if (!(to_ctx->remap_slice & BIT(i))) + continue; + + ret = remap_l3(rq, i); + if (ret) + goto err_ctx; + } + + to_ctx->remap_slice = 0; + } + + return 0; + +err_ctx: + engine->legacy_active_context = from_ctx; +err_mm: + engine->legacy_active_ppgtt = from_mm; +err: + return ret; +} + +static int ring_request_alloc(struct drm_i915_gem_request *request) +{ + int ret; + GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count); /* Flush enough space to reduce the likelihood of waiting after @@ -1648,37 +1581,28 @@ static int ring_request_alloc(struct drm_i915_gem_request *request) */ request->reserved_space += LEGACY_REQUEST_SIZE; - cs = intel_ring_begin(request, 0); - if (IS_ERR(cs)) - return PTR_ERR(cs); + ret = intel_ring_wait_for_space(request->ring, request->reserved_space); + if (ret) + return ret; + + ret = switch_context(request); + if (ret) + return ret; request->reserved_space -= LEGACY_REQUEST_SIZE; return 0; } -static noinline int wait_for_space(struct drm_i915_gem_request *req, - unsigned int bytes) +static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) { - struct intel_ring *ring = req->ring; struct drm_i915_gem_request *target; long timeout; - lockdep_assert_held(&req->i915->drm.struct_mutex); + lockdep_assert_held(&ring->vma->vm->i915->drm.struct_mutex); if (intel_ring_update_space(ring) >= bytes) return 0; - /* - * Space is reserved in the ringbuffer for finalising the request, - * as that cannot be allowed to fail. During request finalisation, - * reserved_space is set to 0 to stop the overallocation and the - * assumption is that then we never need to wait (which has the - * risk of failing with EINTR). - * - * See also i915_gem_request_alloc() and i915_add_request(). - */ - GEM_BUG_ON(!req->reserved_space); - list_for_each_entry(target, &ring->request_list, ring_link) { /* Would completion of this request free enough space? */ if (bytes <= __intel_ring_space(target->postfix, @@ -1702,6 +1626,22 @@ static noinline int wait_for_space(struct drm_i915_gem_request *req, return 0; } +int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes) +{ + GEM_BUG_ON(bytes > ring->effective_size); + if (unlikely(bytes > ring->effective_size - ring->emit)) + bytes += ring->size - ring->emit; + + if (unlikely(bytes > ring->space)) { + int ret = wait_for_space(ring, bytes); + if (unlikely(ret)) + return ret; + } + + GEM_BUG_ON(ring->space < bytes); + return 0; +} + u32 *intel_ring_begin(struct drm_i915_gem_request *req, unsigned int num_dwords) { @@ -1741,7 +1681,20 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, } if (unlikely(total_bytes > ring->space)) { - int ret = wait_for_space(req, total_bytes); + int ret; + + /* + * Space is reserved in the ringbuffer for finalising the + * request, as that cannot be allowed to fail. During request + * finalisation, reserved_space is set to 0 to stop the + * overallocation and the assumption is that then we never need + * to wait (which has the risk of failing with EINTR). + * + * See also i915_gem_request_alloc() and i915_add_request(). + */ + GEM_BUG_ON(!req->reserved_space); + + ret = wait_for_space(ring, total_bytes); if (unlikely(ret)) return ERR_PTR(ret); } @@ -1836,8 +1789,6 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) return PTR_ERR(cs); cmd = MI_FLUSH_DW; - if (INTEL_GEN(req->i915) >= 8) - cmd += 1; /* We always require a command barrier so that subsequent * commands, such as breadcrumb interrupts, are strictly ordered @@ -1857,38 +1808,9 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) *cs++ = cmd; *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; - if (INTEL_GEN(req->i915) >= 8) { - *cs++ = 0; /* upper addr */ - *cs++ = 0; /* value */ - } else { - *cs++ = 0; - *cs++ = MI_NOOP; - } - intel_ring_advance(req, cs); - return 0; -} - -static int -gen8_emit_bb_start(struct drm_i915_gem_request *req, - u64 offset, u32 len, - unsigned int dispatch_flags) -{ - bool ppgtt = USES_PPGTT(req->i915) && - !(dispatch_flags & I915_DISPATCH_SECURE); - u32 *cs; - - cs = intel_ring_begin(req, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* FIXME(BDW): Address space and security selectors. */ - *cs++ = MI_BATCH_BUFFER_START_GEN8 | (ppgtt << 8) | (dispatch_flags & - I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); - *cs++ = lower_32_bits(offset); - *cs++ = upper_32_bits(offset); + *cs++ = 0; *cs++ = MI_NOOP; intel_ring_advance(req, cs); - return 0; } @@ -1945,8 +1867,6 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) return PTR_ERR(cs); cmd = MI_FLUSH_DW; - if (INTEL_GEN(req->i915) >= 8) - cmd += 1; /* We always require a command barrier so that subsequent * commands, such as breadcrumb interrupts, are strictly ordered @@ -1965,13 +1885,8 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) cmd |= MI_INVALIDATE_TLB; *cs++ = cmd; *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; - if (INTEL_GEN(req->i915) >= 8) { - *cs++ = 0; /* upper addr */ - *cs++ = 0; /* value */ - } else { - *cs++ = 0; - *cs++ = MI_NOOP; - } + *cs++ = 0; + *cs++ = MI_NOOP; intel_ring_advance(req, cs); return 0; @@ -1980,110 +1895,61 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { - struct drm_i915_gem_object *obj; - int ret, i; + int i; - if (!i915.semaphores) + if (!HAS_LEGACY_SEMAPHORES(dev_priv)) return; - if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore) { - struct i915_vma *vma; - - obj = i915_gem_object_create(dev_priv, PAGE_SIZE); - if (IS_ERR(obj)) - goto err; + GEM_BUG_ON(INTEL_GEN(dev_priv) < 6); + engine->semaphore.sync_to = gen6_ring_sync_to; + engine->semaphore.signal = gen6_signal; - vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); - if (IS_ERR(vma)) - goto err_obj; - - ret = i915_gem_object_set_to_gtt_domain(obj, false); - if (ret) - goto err_obj; - - ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); - if (ret) - goto err_obj; - - dev_priv->semaphore = vma; - } - - if (INTEL_GEN(dev_priv) >= 8) { - u32 offset = i915_ggtt_offset(dev_priv->semaphore); - - engine->semaphore.sync_to = gen8_ring_sync_to; - engine->semaphore.signal = gen8_xcs_signal; - - for (i = 0; i < I915_NUM_ENGINES; i++) { - u32 ring_offset; - - if (i != engine->id) - ring_offset = offset + GEN8_SEMAPHORE_OFFSET(engine->id, i); - else - ring_offset = MI_SEMAPHORE_SYNC_INVALID; - - engine->semaphore.signal_ggtt[i] = ring_offset; - } - } else if (INTEL_GEN(dev_priv) >= 6) { - engine->semaphore.sync_to = gen6_ring_sync_to; - engine->semaphore.signal = gen6_signal; - - /* - * The current semaphore is only applied on pre-gen8 - * platform. And there is no VCS2 ring on the pre-gen8 - * platform. So the semaphore between RCS and VCS2 is - * initialized as INVALID. Gen8 will initialize the - * sema between VCS2 and RCS later. - */ - for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) { - static const struct { - u32 wait_mbox; - i915_reg_t mbox_reg; - } sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = { - [RCS_HW] = { - [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC }, - [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC }, - [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC }, - }, - [VCS_HW] = { - [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC }, - [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC }, - [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC }, - }, - [BCS_HW] = { - [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC }, - [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC }, - [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC }, - }, - [VECS_HW] = { - [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC }, - [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC }, - [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC }, - }, - }; + /* + * The current semaphore is only applied on pre-gen8 + * platform. And there is no VCS2 ring on the pre-gen8 + * platform. So the semaphore between RCS and VCS2 is + * initialized as INVALID. + */ + for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) { + static const struct { u32 wait_mbox; i915_reg_t mbox_reg; + } sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = { + [RCS_HW] = { + [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC }, + [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC }, + [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC }, + }, + [VCS_HW] = { + [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC }, + [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC }, + [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC }, + }, + [BCS_HW] = { + [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC }, + [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC }, + [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC }, + }, + [VECS_HW] = { + [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC }, + [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC }, + [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC }, + }, + }; + u32 wait_mbox; + i915_reg_t mbox_reg; - if (i == engine->hw_id) { - wait_mbox = MI_SEMAPHORE_SYNC_INVALID; - mbox_reg = GEN6_NOSYNC; - } else { - wait_mbox = sem_data[engine->hw_id][i].wait_mbox; - mbox_reg = sem_data[engine->hw_id][i].mbox_reg; - } - - engine->semaphore.mbox.wait[i] = wait_mbox; - engine->semaphore.mbox.signal[i] = mbox_reg; + if (i == engine->hw_id) { + wait_mbox = MI_SEMAPHORE_SYNC_INVALID; + mbox_reg = GEN6_NOSYNC; + } else { + wait_mbox = sem_data[engine->hw_id][i].wait_mbox; + mbox_reg = sem_data[engine->hw_id][i].mbox_reg; } - } - return; - -err_obj: - i915_gem_object_put(obj); -err: - DRM_DEBUG_DRIVER("Failed to allocate space for semaphores, disabling\n"); - i915.semaphores = 0; + engine->semaphore.mbox.wait[i] = wait_mbox; + engine->semaphore.mbox.signal[i] = mbox_reg; + } } static void intel_ring_init_irq(struct drm_i915_private *dev_priv, @@ -2091,11 +1957,7 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv, { engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << engine->irq_shift; - if (INTEL_GEN(dev_priv) >= 8) { - engine->irq_enable = gen8_irq_enable; - engine->irq_disable = gen8_irq_disable; - engine->irq_seqno_barrier = gen6_seqno_barrier; - } else if (INTEL_GEN(dev_priv) >= 6) { + if (INTEL_GEN(dev_priv) >= 6) { engine->irq_enable = gen6_irq_enable; engine->irq_disable = gen6_irq_disable; engine->irq_seqno_barrier = gen6_seqno_barrier; @@ -2115,16 +1977,24 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv, static void i9xx_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = i9xx_submit_request; + engine->cancel_requests = cancel_requests; + + engine->park = NULL; + engine->unpark = NULL; } static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine) { + i9xx_set_default_submission(engine); engine->submit_request = gen6_bsd_submit_request; } static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { + /* gen8+ are only supported with execlists */ + GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8); + intel_ring_init_irq(dev_priv, engine); intel_ring_init_semaphores(dev_priv, engine); @@ -2138,26 +2008,20 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->emit_breadcrumb = i9xx_emit_breadcrumb; engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz; - if (i915.semaphores) { + if (HAS_LEGACY_SEMAPHORES(dev_priv)) { int num_rings; engine->emit_breadcrumb = gen6_sema_emit_breadcrumb; num_rings = INTEL_INFO(dev_priv)->num_rings - 1; - if (INTEL_GEN(dev_priv) >= 8) { - engine->emit_breadcrumb_sz += num_rings * 6; - } else { - engine->emit_breadcrumb_sz += num_rings * 3; - if (num_rings & 1) - engine->emit_breadcrumb_sz++; - } + engine->emit_breadcrumb_sz += num_rings * 3; + if (num_rings & 1) + engine->emit_breadcrumb_sz++; } engine->set_default_submission = i9xx_set_default_submission; - if (INTEL_GEN(dev_priv) >= 8) - engine->emit_bb_start = gen8_emit_bb_start; - else if (INTEL_GEN(dev_priv) >= 6) + if (INTEL_GEN(dev_priv) >= 6) engine->emit_bb_start = gen6_emit_bb_start; else if (INTEL_GEN(dev_priv) >= 4) engine->emit_bb_start = i965_emit_bb_start; @@ -2177,20 +2041,7 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) if (HAS_L3_DPF(dev_priv)) engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT; - if (INTEL_GEN(dev_priv) >= 8) { - engine->init_context = intel_rcs_ctx_init; - engine->emit_breadcrumb = gen8_render_emit_breadcrumb; - engine->emit_breadcrumb_sz = gen8_render_emit_breadcrumb_sz; - engine->emit_flush = gen8_render_ring_flush; - if (i915.semaphores) { - int num_rings; - - engine->semaphore.signal = gen8_rcs_signal; - - num_rings = INTEL_INFO(dev_priv)->num_rings - 1; - engine->emit_breadcrumb_sz += num_rings * 8; - } - } else if (INTEL_GEN(dev_priv) >= 6) { + if (INTEL_GEN(dev_priv) >= 6) { engine->init_context = intel_rcs_ctx_init; engine->emit_flush = gen7_render_ring_flush; if (IS_GEN6(dev_priv)) @@ -2209,7 +2060,6 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) engine->emit_bb_start = hsw_emit_bb_start; engine->init_hw = init_render_ring; - engine->cleanup = render_ring_cleanup; ret = intel_init_ring_buffer(engine); if (ret) @@ -2239,8 +2089,7 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) if (IS_GEN6(dev_priv)) engine->set_default_submission = gen6_bsd_set_default_submission; engine->emit_flush = gen6_bsd_ring_flush; - if (INTEL_GEN(dev_priv) < 8) - engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; + engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; } else { engine->mmio_base = BSD_RING_BASE; engine->emit_flush = bsd_ring_flush; @@ -2260,8 +2109,7 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) intel_ring_default_vfuncs(dev_priv, engine); engine->emit_flush = gen6_ring_flush; - if (INTEL_GEN(dev_priv) < 8) - engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; + engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; return intel_init_ring_buffer(engine); } @@ -2273,12 +2121,9 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) intel_ring_default_vfuncs(dev_priv, engine); engine->emit_flush = gen6_ring_flush; - - if (INTEL_GEN(dev_priv) < 8) { - engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; - engine->irq_enable = hsw_vebox_irq_enable; - engine->irq_disable = hsw_vebox_irq_disable; - } + engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; + engine->irq_enable = hsw_vebox_irq_enable; + engine->irq_disable = hsw_vebox_irq_disable; return intel_init_ring_buffer(engine); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 6b2067f10824..a0e7a6c2a57c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -6,8 +6,11 @@ #include "i915_gem_batch_pool.h" #include "i915_gem_request.h" #include "i915_gem_timeline.h" +#include "i915_pmu.h" #include "i915_selftest.h" +struct drm_printer; + #define I915_CMD_HASH_ORDER 9 /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, @@ -45,16 +48,6 @@ struct intel_hw_status_page { /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. */ -#define gen8_semaphore_seqno_size sizeof(uint64_t) -#define GEN8_SEMAPHORE_OFFSET(__from, __to) \ - (((__from) * I915_NUM_ENGINES + (__to)) * gen8_semaphore_seqno_size) -#define GEN8_SIGNAL_OFFSET(__ring, to) \ - (dev_priv->semaphore->node.start + \ - GEN8_SEMAPHORE_OFFSET((__ring)->id, (to))) -#define GEN8_WAIT_OFFSET(__ring, from) \ - (dev_priv->semaphore->node.start + \ - GEN8_SEMAPHORE_OFFSET(from, (__ring)->id)) - enum intel_engine_hangcheck_action { ENGINE_IDLE = 0, ENGINE_WAIT, @@ -164,7 +157,6 @@ struct i915_ctx_workarounds { }; struct drm_i915_gem_request; -struct intel_render_state; /* * Engine IDs definitions. @@ -185,16 +177,123 @@ struct i915_priolist { int priority; }; +/** + * struct intel_engine_execlists - execlist submission queue and port state + * + * The struct intel_engine_execlists represents the combined logical state of + * driver and the hardware state for execlist mode of submission. + */ +struct intel_engine_execlists { + /** + * @tasklet: softirq tasklet for bottom handler + */ + struct tasklet_struct tasklet; + + /** + * @default_priolist: priority list for I915_PRIORITY_NORMAL + */ + struct i915_priolist default_priolist; + + /** + * @no_priolist: priority lists disabled + */ + bool no_priolist; + + /** + * @elsp: the ExecList Submission Port register + */ + u32 __iomem *elsp; + + /** + * @port: execlist port states + * + * For each hardware ELSP (ExecList Submission Port) we keep + * track of the last request and the number of times we submitted + * that port to hw. We then count the number of times the hw reports + * a context completion or preemption. As only one context can + * be active on hw, we limit resubmission of context to port[0]. This + * is called Lite Restore, of the context. + */ + struct execlist_port { + /** + * @request_count: combined request and submission count + */ + struct drm_i915_gem_request *request_count; +#define EXECLIST_COUNT_BITS 2 +#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) +#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) +#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS) +#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS) +#define port_set(p, packed) ((p)->request_count = (packed)) +#define port_isset(p) ((p)->request_count) +#define port_index(p, execlists) ((p) - (execlists)->port) + + /** + * @context_id: context ID for port + */ + GEM_DEBUG_DECL(u32 context_id); + +#define EXECLIST_MAX_PORTS 2 + } port[EXECLIST_MAX_PORTS]; + + /** + * @active: is the HW active? We consider the HW as active after + * submitting any context for execution and until we have seen the + * last context completion event. After that, we do not expect any + * more events until we submit, and so can park the HW. + * + * As we have a small number of different sources from which we feed + * the HW, we track the state of each inside a single bitfield. + */ + unsigned int active; +#define EXECLISTS_ACTIVE_USER 0 +#define EXECLISTS_ACTIVE_PREEMPT 1 +#define EXECLISTS_ACTIVE_HWACK 2 + + /** + * @port_mask: number of execlist ports - 1 + */ + unsigned int port_mask; + + /** + * @queue: queue of requests, in priority lists + */ + struct rb_root queue; + + /** + * @first: leftmost level in priority @queue + */ + struct rb_node *first; + + /** + * @fw_domains: forcewake domains for irq tasklet + */ + unsigned int fw_domains; + + /** + * @csb_head: context status buffer head + */ + unsigned int csb_head; + + /** + * @csb_use_mmio: access csb through mmio, instead of hwsp + */ + bool csb_use_mmio; +}; + #define INTEL_ENGINE_CS_MAX_NAME 8 struct intel_engine_cs { struct drm_i915_private *i915; char name[INTEL_ENGINE_CS_MAX_NAME]; + enum intel_engine_id id; - unsigned int uabi_id; unsigned int hw_id; unsigned int guc_id; + u8 uabi_id; + u8 uabi_class; + u8 class; u8 instance; u32 context_size; @@ -204,7 +303,7 @@ struct intel_engine_cs { struct intel_ring *buffer; struct intel_timeline *timeline; - struct intel_render_state *render_state; + struct drm_i915_gem_object *default_state; atomic_t irq_count; unsigned long irq_posted; @@ -240,12 +339,35 @@ struct intel_engine_cs { struct timer_list hangcheck; /* detect missed interrupts */ unsigned int hangcheck_interrupts; + unsigned int irq_enabled; bool irq_armed : 1; - bool irq_enabled : 1; I915_SELFTEST_DECLARE(bool mock : 1); } breadcrumbs; + struct { + /** + * @enable: Bitmask of enable sample events on this engine. + * + * Bits correspond to sample event types, for instance + * I915_SAMPLE_QUEUED is bit 0 etc. + */ + u32 enable; + /** + * @enable_count: Reference count for the enabled samplers. + * + * Index number corresponds to the bit number from @enable. + */ + unsigned int enable_count[I915_PMU_SAMPLE_BITS]; + /** + * @sample: Counter values for sampling events. + * + * Our internal timer stores the current counters in this field. + */ +#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1) + struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX]; + } pmu; + /* * A pool of objects to use as shadow copies of client batch buffers * when the command parser is enabled. Prevents the client from @@ -266,6 +388,9 @@ struct intel_engine_cs { void (*reset_hw)(struct intel_engine_cs *engine, struct drm_i915_gem_request *req); + void (*park)(struct intel_engine_cs *engine); + void (*unpark)(struct intel_engine_cs *engine); + void (*set_default_submission)(struct intel_engine_cs *engine); struct intel_ring *(*context_pin)(struct intel_engine_cs *engine, @@ -307,6 +432,14 @@ struct intel_engine_cs { void (*schedule)(struct drm_i915_gem_request *request, int priority); + /* + * Cancel all requests on the hardware, or queued for execution. + * This should only cancel the ready requests that have been + * submitted to the engine (via the engine->submit_request callback). + * This is called when marking the device as wedged. + */ + void (*cancel_requests)(struct intel_engine_cs *engine); + /* Some chipsets are not quite as coherent as advertised and need * an expensive kick to force a true read of the up-to-date seqno. * However, the up-to-date seqno is not always required and the last @@ -354,18 +487,15 @@ struct intel_engine_cs { * ie. transpose of f(x, y) */ struct { - union { #define GEN6_SEMAPHORE_LAST VECS_HW #define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1) #define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0) - struct { - /* our mbox written by others */ - u32 wait[GEN6_NUM_SEMAPHORES]; - /* mboxes this ring signals to */ - i915_reg_t signal[GEN6_NUM_SEMAPHORES]; - } mbox; - u64 signal_ggtt[I915_NUM_ENGINES]; - }; + struct { + /* our mbox written by others */ + u32 wait[GEN6_NUM_SEMAPHORES]; + /* mboxes this ring signals to */ + i915_reg_t signal[GEN6_NUM_SEMAPHORES]; + } mbox; /* AKA wait() */ int (*sync_to)(struct drm_i915_gem_request *req, @@ -373,25 +503,7 @@ struct intel_engine_cs { u32 *(*signal)(struct drm_i915_gem_request *req, u32 *cs); } semaphore; - /* Execlists */ - struct tasklet_struct irq_tasklet; - struct i915_priolist default_priolist; - bool no_priolist; - struct execlist_port { - struct drm_i915_gem_request *request_count; -#define EXECLIST_COUNT_BITS 2 -#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) -#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) -#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS) -#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS) -#define port_set(p, packed) ((p)->request_count = (packed)) -#define port_isset(p) ((p)->request_count) -#define port_index(p, e) ((p) - (e)->execlist_port) - GEM_DEBUG_DECL(u32 context_id); - } execlist_port[2]; - struct rb_root execlist_queue; - struct rb_node *execlist_first; - unsigned int fw_domains; + struct intel_engine_execlists execlists; /* Contexts are pinned whilst they are active on the GPU. The last * context executed remains active whilst the GPU is idle - the @@ -411,13 +523,16 @@ struct intel_engine_cs { * stream (ring). */ struct i915_gem_context *legacy_active_context; + struct i915_hw_ppgtt *legacy_active_ppgtt; /* status_notifier: list of callbacks for context-switch changes */ struct atomic_notifier_head context_status_notifier; struct intel_engine_hangcheck hangcheck; - bool needs_cmd_parser; +#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) +#define I915_ENGINE_SUPPORTS_STATS BIT(1) + unsigned int flags; /* * Table of commands the command parser needs to know about @@ -442,8 +557,96 @@ struct intel_engine_cs { * certain bits to encode the command length in the header). */ u32 (*get_cmd_length_mask)(u32 cmd_header); + + struct { + /** + * @lock: Lock protecting the below fields. + */ + spinlock_t lock; + /** + * @enabled: Reference count indicating number of listeners. + */ + unsigned int enabled; + /** + * @active: Number of contexts currently scheduled in. + */ + unsigned int active; + /** + * @enabled_at: Timestamp when busy stats were enabled. + */ + ktime_t enabled_at; + /** + * @start: Timestamp of the last idle to active transition. + * + * Idle is defined as active == 0, active is active > 0. + */ + ktime_t start; + /** + * @total: Total time this engine was busy. + * + * Accumulated time not counting the most recent block in cases + * where engine is currently busy (active > 0). + */ + ktime_t total; + } stats; }; +static inline bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; +} + +static inline bool intel_engine_supports_stats(struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_SUPPORTS_STATS; +} + +static inline void +execlists_set_active(struct intel_engine_execlists *execlists, + unsigned int bit) +{ + __set_bit(bit, (unsigned long *)&execlists->active); +} + +static inline void +execlists_clear_active(struct intel_engine_execlists *execlists, + unsigned int bit) +{ + __clear_bit(bit, (unsigned long *)&execlists->active); +} + +static inline bool +execlists_is_active(const struct intel_engine_execlists *execlists, + unsigned int bit) +{ + return test_bit(bit, (unsigned long *)&execlists->active); +} + +void +execlists_cancel_port_requests(struct intel_engine_execlists * const execlists); + +void +execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); + +static inline unsigned int +execlists_num_ports(const struct intel_engine_execlists * const execlists) +{ + return execlists->port_mask + 1; +} + +static inline void +execlists_port_complete(struct intel_engine_execlists * const execlists, + struct execlist_port * const port) +{ + const unsigned int m = execlists->port_mask; + + GEM_BUG_ON(port_index(port, execlists) != 0); + GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); + + memmove(port, port + 1, m * sizeof(struct execlist_port)); + memset(port + m, 0, sizeof(struct execlist_port)); +} + static inline unsigned int intel_engine_flag(const struct intel_engine_cs *engine) { @@ -494,9 +697,15 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) */ #define I915_GEM_HWS_INDEX 0x30 #define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT) +#define I915_GEM_HWS_PREEMPT_INDEX 0x32 +#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT) #define I915_GEM_HWS_SCRATCH_INDEX 0x40 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) +#define I915_HWS_CSB_BUF0_INDEX 0x10 +#define I915_HWS_CSB_WRITE_INDEX 0x1f +#define CNL_HWS_CSB_WRITE_INDEX 0x2f + struct intel_ring * intel_engine_create_ring(struct intel_engine_cs *engine, int size); int intel_ring_pin(struct intel_ring *ring, @@ -514,6 +723,7 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); +int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes); u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req, unsigned int n); @@ -642,6 +852,11 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR; } +static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine) +{ + return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR; +} + /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); @@ -712,6 +927,9 @@ unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); #define ENGINE_WAKEUP_WAITER BIT(0) #define ENGINE_WAKEUP_ASLEEP BIT(1) +void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); +void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); + void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); @@ -730,22 +948,123 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) return batch + 6; } +static inline u32 * +gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset) +{ + /* We're using qword write, offset should be aligned to 8 bytes. */ + GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); + + /* w/a for post sync ops following a GPGPU operation we + * need a prior CS_STALL, which is emitted by the flush + * following the batch. + */ + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE; + *cs++ = gtt_offset; + *cs++ = 0; + *cs++ = value; + /* We're thrashing one dword of HWS. */ + *cs++ = 0; + + return cs; +} + +static inline u32 * +gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset) +{ + /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ + GEM_BUG_ON(gtt_offset & (1 << 5)); + /* Offset should be aligned to 8 bytes for both (QW/DW) write types */ + GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); + + *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; + *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT; + *cs++ = 0; + *cs++ = value; + + return cs; +} + bool intel_engine_is_idle(struct intel_engine_cs *engine); bool intel_engines_are_idle(struct drm_i915_private *dev_priv); -void intel_engines_mark_idle(struct drm_i915_private *i915); +bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine); + +void intel_engines_park(struct drm_i915_private *i915); +void intel_engines_unpark(struct drm_i915_private *i915); + void intel_engines_reset_default_submission(struct drm_i915_private *i915); +unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915); -static inline bool -__intel_engine_can_store_dword(unsigned int gen, unsigned int class) +bool intel_engine_can_store_dword(struct intel_engine_cs *engine); + +__printf(3, 4) +void intel_engine_dump(struct intel_engine_cs *engine, + struct drm_printer *m, + const char *header, ...); + +struct intel_engine_cs * +intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance); + +static inline void intel_engine_context_in(struct intel_engine_cs *engine) { - if (gen <= 2) - return false; /* uses physical not virtual addresses */ + unsigned long flags; + + if (READ_ONCE(engine->stats.enabled) == 0) + return; + + spin_lock_irqsave(&engine->stats.lock, flags); - if (gen == 6 && class == VIDEO_DECODE_CLASS) - return false; /* b0rked */ + if (engine->stats.enabled > 0) { + if (engine->stats.active++ == 0) + engine->stats.start = ktime_get(); + GEM_BUG_ON(engine->stats.active == 0); + } + + spin_unlock_irqrestore(&engine->stats.lock, flags); +} + +static inline void intel_engine_context_out(struct intel_engine_cs *engine) +{ + unsigned long flags; + + if (READ_ONCE(engine->stats.enabled) == 0) + return; + + spin_lock_irqsave(&engine->stats.lock, flags); + + if (engine->stats.enabled > 0) { + ktime_t last; + + if (engine->stats.active && --engine->stats.active == 0) { + /* + * Decrement the active context count and in case GPU + * is now idle add up to the running total. + */ + last = ktime_sub(ktime_get(), engine->stats.start); + + engine->stats.total = ktime_add(engine->stats.total, + last); + } else if (engine->stats.active == 0) { + /* + * After turning on engine stats, context out might be + * the first event in which case we account from the + * time stats gathering was turned on. + */ + last = ktime_sub(ktime_get(), engine->stats.enabled_at); + + engine->stats.total = ktime_add(engine->stats.total, + last); + } + } - return true; + spin_unlock_irqrestore(&engine->stats.lock, flags); } +int intel_enable_engine_stats(struct intel_engine_cs *engine); +void intel_disable_engine_stats(struct intel_engine_cs *engine); + +ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine); + #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 49577eba8e7e..d758da6156a8 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -130,6 +130,8 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) return "INIT"; case POWER_DOMAIN_MODESET: return "MODESET"; + case POWER_DOMAIN_GT_IRQ: + return "GT_IRQ"; default: MISSING_CASE(domain); return "?"; @@ -187,7 +189,7 @@ bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv, struct i915_power_well *power_well; bool is_enabled; - if (dev_priv->pm.suspended) + if (dev_priv->runtime_pm.suspended) return false; is_enabled = true; @@ -598,6 +600,11 @@ void gen9_enable_dc5(struct drm_i915_private *dev_priv) DRM_DEBUG_KMS("Enabling DC5\n"); + /* Wa Display #1183: skl,kbl,cfl */ + if (IS_GEN9_BC(dev_priv)) + I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) | + SKL_SELECT_ALTERNATE_DC_EXIT); + gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC5); } @@ -625,6 +632,11 @@ void skl_disable_dc6(struct drm_i915_private *dev_priv) { DRM_DEBUG_KMS("Disabling DC6\n"); + /* Wa Display #1183: skl,kbl,cfl */ + if (IS_GEN9_BC(dev_priv)) + I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) | + SKL_SELECT_ALTERNATE_DC_EXIT); + gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); } @@ -705,7 +717,8 @@ static void gen9_dc_off_power_well_enable(struct drm_i915_private *dev_priv, gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); dev_priv->display.get_cdclk(dev_priv, &cdclk_state); - WARN_ON(!intel_cdclk_state_compare(&dev_priv->cdclk.hw, &cdclk_state)); + /* Can't read out voltage_level so can't use intel_cdclk_changed() */ + WARN_ON(intel_cdclk_needs_modeset(&dev_priv->cdclk.hw, &cdclk_state)); gen9_assert_dbuf_enabled(dev_priv); @@ -785,7 +798,7 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv, state = enable ? PUNIT_PWRGT_PWR_ON(power_well_id) : PUNIT_PWRGT_PWR_GATE(power_well_id); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); #define COND \ ((vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask) == state) @@ -806,7 +819,7 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv, #undef COND out: - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } static void vlv_power_well_enable(struct drm_i915_private *dev_priv, @@ -833,7 +846,7 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv, mask = PUNIT_PWRGT_MASK(power_well_id); ctrl = PUNIT_PWRGT_PWR_ON(power_well_id); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); state = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask; /* @@ -852,7 +865,7 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv, ctrl = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL) & mask; WARN_ON(ctrl != state); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); return enabled; } @@ -1364,7 +1377,7 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv, bool enabled; u32 state, ctrl; - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); state = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe); /* @@ -1381,7 +1394,7 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv, ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSC_MASK(pipe); WARN_ON(ctrl << 16 != state); - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); return enabled; } @@ -1396,7 +1409,7 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv, state = enable ? DP_SSS_PWR_ON(pipe) : DP_SSS_PWR_GATE(pipe); - mutex_lock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->pcu_lock); #define COND \ ((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe)) == state) @@ -1417,7 +1430,7 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv, #undef COND out: - mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->pcu_lock); } static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv, @@ -1704,6 +1717,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_INIT)) #define SKL_DISPLAY_DC_OFF_POWER_DOMAINS ( \ SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_GT_IRQ) | \ BIT_ULL(POWER_DOMAIN_MODESET) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ BIT_ULL(POWER_DOMAIN_INIT)) @@ -1722,12 +1736,13 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_AUX_C) | \ BIT_ULL(POWER_DOMAIN_AUDIO) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ - BIT_ULL(POWER_DOMAIN_GMBUS) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DISPLAY_DC_OFF_POWER_DOMAINS ( \ BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_GT_IRQ) | \ BIT_ULL(POWER_DOMAIN_MODESET) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_GMBUS) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DPIO_CMN_A_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) | \ @@ -1784,8 +1799,10 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_DC_OFF_POWER_DOMAINS ( \ GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_GT_IRQ) | \ BIT_ULL(POWER_DOMAIN_MODESET) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_GMBUS) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \ @@ -2413,7 +2430,7 @@ static uint32_t get_allowed_dc_mask(const struct drm_i915_private *dev_priv, mask = 0; } - if (!i915.disable_power_well) + if (!i915_modparams.disable_power_well) max_dc = 0; if (enable_dc >= 0 && enable_dc <= max_dc) { @@ -2471,10 +2488,11 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) { struct i915_power_domains *power_domains = &dev_priv->power_domains; - i915.disable_power_well = sanitize_disable_power_well_option(dev_priv, - i915.disable_power_well); - dev_priv->csr.allowed_dc_mask = get_allowed_dc_mask(dev_priv, - i915.enable_dc); + i915_modparams.disable_power_well = + sanitize_disable_power_well_option(dev_priv, + i915_modparams.disable_power_well); + dev_priv->csr.allowed_dc_mask = + get_allowed_dc_mask(dev_priv, i915_modparams.enable_dc); BUILD_BUG_ON(POWER_DOMAIN_NUM > 64); @@ -2535,7 +2553,7 @@ void intel_power_domains_fini(struct drm_i915_private *dev_priv) intel_display_set_init_power(dev_priv, true); /* Remove the refcount we took to keep power well support disabled. */ - if (!i915.disable_power_well) + if (!i915_modparams.disable_power_well) intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); /* @@ -2707,30 +2725,67 @@ void bxt_display_core_uninit(struct drm_i915_private *dev_priv) usleep_range(10, 30); /* 10 us delay per Bspec */ } -#define CNL_PROCMON_IDX(val) \ - (((val) & (PROCESS_INFO_MASK | VOLTAGE_INFO_MASK)) >> VOLTAGE_INFO_SHIFT) -#define NUM_CNL_PROCMON \ - (CNL_PROCMON_IDX(VOLTAGE_INFO_MASK | PROCESS_INFO_MASK) + 1) +enum { + PROCMON_0_85V_DOT_0, + PROCMON_0_95V_DOT_0, + PROCMON_0_95V_DOT_1, + PROCMON_1_05V_DOT_0, + PROCMON_1_05V_DOT_1, +}; static const struct cnl_procmon { u32 dw1, dw9, dw10; -} cnl_procmon_values[NUM_CNL_PROCMON] = { - [CNL_PROCMON_IDX(VOLTAGE_INFO_0_85V | PROCESS_INFO_DOT_0)] = - { .dw1 = 0x00 << 16, .dw9 = 0x62AB67BB, .dw10 = 0x51914F96, }, - [CNL_PROCMON_IDX(VOLTAGE_INFO_0_95V | PROCESS_INFO_DOT_0)] = - { .dw1 = 0x00 << 16, .dw9 = 0x86E172C7, .dw10 = 0x77CA5EAB, }, - [CNL_PROCMON_IDX(VOLTAGE_INFO_0_95V | PROCESS_INFO_DOT_1)] = - { .dw1 = 0x00 << 16, .dw9 = 0x93F87FE1, .dw10 = 0x8AE871C5, }, - [CNL_PROCMON_IDX(VOLTAGE_INFO_1_05V | PROCESS_INFO_DOT_0)] = - { .dw1 = 0x00 << 16, .dw9 = 0x98FA82DD, .dw10 = 0x89E46DC1, }, - [CNL_PROCMON_IDX(VOLTAGE_INFO_1_05V | PROCESS_INFO_DOT_1)] = - { .dw1 = 0x44 << 16, .dw9 = 0x9A00AB25, .dw10 = 0x8AE38FF1, }, +} cnl_procmon_values[] = { + [PROCMON_0_85V_DOT_0] = + { .dw1 = 0x00000000, .dw9 = 0x62AB67BB, .dw10 = 0x51914F96, }, + [PROCMON_0_95V_DOT_0] = + { .dw1 = 0x00000000, .dw9 = 0x86E172C7, .dw10 = 0x77CA5EAB, }, + [PROCMON_0_95V_DOT_1] = + { .dw1 = 0x00000000, .dw9 = 0x93F87FE1, .dw10 = 0x8AE871C5, }, + [PROCMON_1_05V_DOT_0] = + { .dw1 = 0x00000000, .dw9 = 0x98FA82DD, .dw10 = 0x89E46DC1, }, + [PROCMON_1_05V_DOT_1] = + { .dw1 = 0x00440000, .dw9 = 0x9A00AB25, .dw10 = 0x8AE38FF1, }, }; +static void cnl_set_procmon_ref_values(struct drm_i915_private *dev_priv) +{ + const struct cnl_procmon *procmon; + u32 val; + + val = I915_READ(CNL_PORT_COMP_DW3); + switch (val & (PROCESS_INFO_MASK | VOLTAGE_INFO_MASK)) { + default: + MISSING_CASE(val); + case VOLTAGE_INFO_0_85V | PROCESS_INFO_DOT_0: + procmon = &cnl_procmon_values[PROCMON_0_85V_DOT_0]; + break; + case VOLTAGE_INFO_0_95V | PROCESS_INFO_DOT_0: + procmon = &cnl_procmon_values[PROCMON_0_95V_DOT_0]; + break; + case VOLTAGE_INFO_0_95V | PROCESS_INFO_DOT_1: + procmon = &cnl_procmon_values[PROCMON_0_95V_DOT_1]; + break; + case VOLTAGE_INFO_1_05V | PROCESS_INFO_DOT_0: + procmon = &cnl_procmon_values[PROCMON_1_05V_DOT_0]; + break; + case VOLTAGE_INFO_1_05V | PROCESS_INFO_DOT_1: + procmon = &cnl_procmon_values[PROCMON_1_05V_DOT_1]; + break; + } + + val = I915_READ(CNL_PORT_COMP_DW1); + val &= ~((0xff << 16) | 0xff); + val |= procmon->dw1; + I915_WRITE(CNL_PORT_COMP_DW1, val); + + I915_WRITE(CNL_PORT_COMP_DW9, procmon->dw9); + I915_WRITE(CNL_PORT_COMP_DW10, procmon->dw10); +} + static void cnl_display_core_init(struct drm_i915_private *dev_priv, bool resume) { struct i915_power_domains *power_domains = &dev_priv->power_domains; - const struct cnl_procmon *procmon; struct i915_power_well *well; u32 val; @@ -2746,18 +2801,7 @@ static void cnl_display_core_init(struct drm_i915_private *dev_priv, bool resume val &= ~CNL_COMP_PWR_DOWN; I915_WRITE(CHICKEN_MISC_2, val); - val = I915_READ(CNL_PORT_COMP_DW3); - procmon = &cnl_procmon_values[CNL_PROCMON_IDX(val)]; - - WARN_ON(procmon->dw10 == 0); - - val = I915_READ(CNL_PORT_COMP_DW1); - val &= ~((0xff << 16) | 0xff); - val |= procmon->dw1; - I915_WRITE(CNL_PORT_COMP_DW1, val); - - I915_WRITE(CNL_PORT_COMP_DW9, procmon->dw9); - I915_WRITE(CNL_PORT_COMP_DW10, procmon->dw10); + cnl_set_procmon_ref_values(dev_priv); val = I915_READ(CNL_PORT_COMP_DW0); val |= COMP_INIT; @@ -2787,9 +2831,6 @@ static void cnl_display_core_init(struct drm_i915_private *dev_priv, bool resume intel_csr_load_program(dev_priv); } -#undef CNL_PROCMON_IDX -#undef NUM_CNL_PROCMON - static void cnl_display_core_uninit(struct drm_i915_private *dev_priv) { struct i915_power_domains *power_domains = &dev_priv->power_domains; @@ -2975,7 +3016,7 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume) /* For now, we need the power well to be always enabled. */ intel_display_set_init_power(dev_priv, true); /* Disable power support if the user asked so. */ - if (!i915.disable_power_well) + if (!i915_modparams.disable_power_well) intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); intel_power_domains_sync_hw(dev_priv); power_domains->initializing = false; @@ -2994,7 +3035,7 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv) * Even if power well support was disabled we still want to disable * power wells while we are system suspended. */ - if (!i915.disable_power_well) + if (!i915_modparams.disable_power_well) intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); if (IS_CANNONLAKE(dev_priv)) @@ -3104,7 +3145,7 @@ void intel_runtime_pm_get(struct drm_i915_private *dev_priv) ret = pm_runtime_get_sync(kdev); WARN_ONCE(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret); - atomic_inc(&dev_priv->pm.wakeref_count); + atomic_inc(&dev_priv->runtime_pm.wakeref_count); assert_rpm_wakelock_held(dev_priv); } @@ -3138,7 +3179,7 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv) return false; } - atomic_inc(&dev_priv->pm.wakeref_count); + atomic_inc(&dev_priv->runtime_pm.wakeref_count); assert_rpm_wakelock_held(dev_priv); return true; @@ -3169,7 +3210,7 @@ void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv) assert_rpm_wakelock_held(dev_priv); pm_runtime_get_noresume(kdev); - atomic_inc(&dev_priv->pm.wakeref_count); + atomic_inc(&dev_priv->runtime_pm.wakeref_count); } /** @@ -3186,7 +3227,7 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv) struct device *kdev = &pdev->dev; assert_rpm_wakelock_held(dev_priv); - atomic_dec(&dev_priv->pm.wakeref_count); + atomic_dec(&dev_priv->runtime_pm.wakeref_count); pm_runtime_mark_last_busy(kdev); pm_runtime_put_autosuspend(kdev); diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 29a3b0f5bec7..2b8764897d68 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -201,11 +201,8 @@ to_intel_sdvo_connector(struct drm_connector *connector) return container_of(connector, struct intel_sdvo_connector, base.base); } -static struct intel_sdvo_connector_state * -to_intel_sdvo_connector_state(struct drm_connector_state *conn_state) -{ - return container_of(conn_state, struct intel_sdvo_connector_state, base.base); -} +#define to_intel_sdvo_connector_state(conn_state) \ + container_of((conn_state), struct intel_sdvo_connector_state, base.base) static bool intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, uint16_t flags); @@ -998,7 +995,7 @@ static bool intel_sdvo_write_infoframe(struct intel_sdvo *intel_sdvo, } static bool intel_sdvo_set_avi_infoframe(struct intel_sdvo *intel_sdvo, - struct intel_crtc_state *pipe_config) + const struct intel_crtc_state *pipe_config) { uint8_t sdvo_data[HDMI_INFOFRAME_SIZE(AVI)]; union hdmi_infoframe frame; @@ -1032,7 +1029,7 @@ static bool intel_sdvo_set_avi_infoframe(struct intel_sdvo *intel_sdvo, } static bool intel_sdvo_set_tv_format(struct intel_sdvo *intel_sdvo, - struct drm_connector_state *conn_state) + const struct drm_connector_state *conn_state) { struct intel_sdvo_tv_format format; uint32_t format_map; @@ -1202,9 +1199,9 @@ static bool intel_sdvo_compute_config(struct intel_encoder *encoder, } while (0) static void intel_sdvo_update_props(struct intel_sdvo *intel_sdvo, - struct intel_sdvo_connector_state *sdvo_state) + const struct intel_sdvo_connector_state *sdvo_state) { - struct drm_connector_state *conn_state = &sdvo_state->base.base; + const struct drm_connector_state *conn_state = &sdvo_state->base.base; struct intel_sdvo_connector *intel_sdvo_conn = to_intel_sdvo_connector(conn_state->connector); uint16_t val; @@ -1258,14 +1255,15 @@ static void intel_sdvo_update_props(struct intel_sdvo *intel_sdvo, } static void intel_sdvo_pre_enable(struct intel_encoder *intel_encoder, - struct intel_crtc_state *crtc_state, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; - struct intel_sdvo_connector_state *sdvo_state = to_intel_sdvo_connector_state(conn_state); - struct drm_display_mode *mode = &crtc_state->base.mode; + const struct intel_sdvo_connector_state *sdvo_state = + to_intel_sdvo_connector_state(conn_state); + const struct drm_display_mode *mode = &crtc_state->base.mode; struct intel_sdvo *intel_sdvo = to_sdvo(intel_encoder); u32 sdvox; struct intel_sdvo_in_out_map in_out; @@ -1431,6 +1429,8 @@ static void intel_sdvo_get_config(struct intel_encoder *encoder, u8 val; bool ret; + pipe_config->output_types |= BIT(INTEL_OUTPUT_SDVO); + sdvox = I915_READ(intel_sdvo->sdvo_reg); ret = intel_sdvo_get_input_timing(intel_sdvo, &dtd); @@ -1507,12 +1507,12 @@ static void intel_sdvo_get_config(struct intel_encoder *encoder, } static void intel_disable_sdvo(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_sdvo *intel_sdvo = to_sdvo(encoder); - struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); u32 temp; intel_sdvo_set_active_outputs(intel_sdvo, 0); @@ -1552,26 +1552,26 @@ static void intel_disable_sdvo(struct intel_encoder *encoder, } static void pch_disable_sdvo(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { } static void pch_post_disable_sdvo(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { intel_disable_sdvo(encoder, old_crtc_state, old_conn_state); } static void intel_enable_sdvo(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_sdvo *intel_sdvo = to_sdvo(encoder); - struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc); u32 temp; bool input1, input2; int i; diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index 7d971cb56116..75c872bb8cc9 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -81,7 +81,7 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr) { u32 val = 0; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); mutex_lock(&dev_priv->sb_lock); vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, @@ -95,7 +95,7 @@ int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val) { int err; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); mutex_lock(&dev_priv->sb_lock); err = vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, @@ -125,7 +125,7 @@ u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr) { u32 val = 0; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); mutex_lock(&dev_priv->sb_lock); vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_NC, diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 524933b01483..dd485f59eb1d 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -66,12 +66,17 @@ int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, 1000 * adjusted_mode->crtc_htotal); } +/* FIXME: We should instead only take spinlocks once for the entire update + * instead of once per mmio. */ +#if IS_ENABLED(CONFIG_PROVE_LOCKING) +#define VBLANK_EVASION_TIME_US 250 +#else #define VBLANK_EVASION_TIME_US 100 +#endif /** * intel_pipe_update_start() - start update of a set of display registers - * @crtc: the crtc of which the registers are going to be updated - * @start_vbl_count: vblank counter return pointer used for error checking + * @new_crtc_state: the new crtc state * * Mark the start of an update to pipe registers that should be updated * atomically regarding vblank. If the next vblank will happens within @@ -79,18 +84,18 @@ int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, * * After a successful call to this function, interrupts will be disabled * until a subsequent call to intel_pipe_update_end(). That is done to - * avoid random delays. The value written to @start_vbl_count should be - * supplied to intel_pipe_update_end() for error checking. + * avoid random delays. */ -void intel_pipe_update_start(struct intel_crtc *crtc) +void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - const struct drm_display_mode *adjusted_mode = &crtc->config->base.adjusted_mode; + const struct drm_display_mode *adjusted_mode = &new_crtc_state->base.adjusted_mode; long timeout = msecs_to_jiffies_timeout(1); int scanline, min, max, vblank_start; wait_queue_head_t *wq = drm_crtc_vblank_waitqueue(&crtc->base); bool need_vlv_dsi_wa = (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && - intel_crtc_has_type(crtc->config, INTEL_OUTPUT_DSI); + intel_crtc_has_type(new_crtc_state, INTEL_OUTPUT_DSI); DEFINE_WAIT(wait); vblank_start = adjusted_mode->crtc_vblank_start; @@ -170,15 +175,15 @@ void intel_pipe_update_start(struct intel_crtc *crtc) /** * intel_pipe_update_end() - end update of a set of display registers - * @crtc: the crtc of which the registers were updated - * @start_vbl_count: start vblank counter (used for error checking) + * @new_crtc_state: the new crtc state * * Mark the end of an update started with intel_pipe_update_start(). This * re-enables interrupts and verifies the update was actually completed - * before a vblank using the value of @start_vbl_count. + * before a vblank. */ -void intel_pipe_update_end(struct intel_crtc *crtc) +void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); enum pipe pipe = crtc->pipe; int scanline_end = intel_get_crtc_scanline(crtc); u32 end_vbl_count = intel_crtc_get_vblank_counter(crtc); @@ -191,14 +196,14 @@ void intel_pipe_update_end(struct intel_crtc *crtc) * Would be slightly nice to just grab the vblank count and arm the * event outside of the critical section - the spinlock might spin for a * while ... */ - if (crtc->base.state->event) { + if (new_crtc_state->base.event) { WARN_ON(drm_crtc_vblank_get(&crtc->base) != 0); spin_lock(&crtc->base.dev->event_lock); - drm_crtc_arm_vblank_event(&crtc->base, crtc->base.state->event); + drm_crtc_arm_vblank_event(&crtc->base, new_crtc_state->base.event); spin_unlock(&crtc->base.dev->event_lock); - crtc->base.state->event = NULL; + new_crtc_state->base.event = NULL; } local_irq_enable(); @@ -225,7 +230,7 @@ void intel_pipe_update_end(struct intel_crtc *crtc) #endif } -static void +void skl_update_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) @@ -258,13 +263,9 @@ skl_update_plane(struct intel_plane *plane, spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - if (IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) I915_WRITE_FW(PLANE_COLOR_CTL(pipe, plane_id), - PLANE_COLOR_PIPE_GAMMA_ENABLE | - PLANE_COLOR_PIPE_CSC_ENABLE | - PLANE_COLOR_PLANE_GAMMA_DISABLE); - } - + plane_state->color_ctl); if (key->flags) { I915_WRITE_FW(PLANE_KEYVAL(pipe, plane_id), key->min_value); I915_WRITE_FW(PLANE_KEYMAX(pipe, plane_id), key->max_value); @@ -306,7 +307,7 @@ skl_update_plane(struct intel_plane *plane, spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } -static void +void skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); @@ -324,6 +325,26 @@ skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } +bool +skl_plane_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum plane_id plane_id = plane->id; + enum pipe pipe = plane->pipe; + bool ret; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(PLANE_CTL(pipe, plane_id)) & PLANE_CTL_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static void chv_update_csc(struct intel_plane *plane, uint32_t format) { @@ -501,6 +522,26 @@ vlv_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } +static bool +vlv_plane_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum plane_id plane_id = plane->id; + enum pipe pipe = plane->pipe; + bool ret; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(SPCNTR(pipe, plane_id)) & SP_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -641,6 +682,25 @@ ivb_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } +static bool +ivb_plane_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum pipe pipe = plane->pipe; + bool ret; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(SPRCTL(pipe)) & SPRITE_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -772,6 +832,25 @@ g4x_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } +static bool +g4x_plane_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum pipe pipe = plane->pipe; + bool ret; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(DVSCNTR(pipe)) & DVS_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static int intel_check_sprite_plane(struct intel_plane *plane, struct intel_crtc_state *crtc_state, @@ -973,6 +1052,9 @@ intel_check_sprite_plane(struct intel_plane *plane, state->ctl = g4x_sprite_ctl(crtc_state, state); } + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + state->color_ctl = glk_plane_color_ctl(crtc_state, state); + return 0; } @@ -995,7 +1077,7 @@ int intel_sprite_set_colorkey(struct drm_device *dev, void *data, set->flags & I915_SET_COLORKEY_DESTINATION) return -EINVAL; - plane = drm_plane_find(dev, set->plane_id); + plane = drm_plane_find(dev, file_priv, set->plane_id); if (!plane || plane->type != DRM_PLANE_TYPE_OVERLAY) return -ENOENT; @@ -1227,6 +1309,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->update_plane = skl_update_plane; intel_plane->disable_plane = skl_disable_plane; + intel_plane->get_hw_state = skl_plane_get_hw_state; plane_formats = skl_plane_formats; num_plane_formats = ARRAY_SIZE(skl_plane_formats); @@ -1237,6 +1320,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->update_plane = skl_update_plane; intel_plane->disable_plane = skl_disable_plane; + intel_plane->get_hw_state = skl_plane_get_hw_state; plane_formats = skl_plane_formats; num_plane_formats = ARRAY_SIZE(skl_plane_formats); @@ -1247,6 +1331,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->update_plane = vlv_update_plane; intel_plane->disable_plane = vlv_disable_plane; + intel_plane->get_hw_state = vlv_plane_get_hw_state; plane_formats = vlv_plane_formats; num_plane_formats = ARRAY_SIZE(vlv_plane_formats); @@ -1262,6 +1347,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->update_plane = ivb_update_plane; intel_plane->disable_plane = ivb_disable_plane; + intel_plane->get_hw_state = ivb_plane_get_hw_state; plane_formats = snb_plane_formats; num_plane_formats = ARRAY_SIZE(snb_plane_formats); @@ -1272,6 +1358,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->update_plane = g4x_update_plane; intel_plane->disable_plane = g4x_disable_plane; + intel_plane->get_hw_state = g4x_plane_get_hw_state; modifiers = i9xx_plane_format_modifiers; if (IS_GEN6(dev_priv)) { @@ -1297,7 +1384,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, } intel_plane->pipe = pipe; - intel_plane->plane = plane; + intel_plane->i9xx_plane = plane; intel_plane->id = PLANE_SPRITE0 + plane; intel_plane->frontbuffer_bit = INTEL_FRONTBUFFER_SPRITE(pipe, plane); intel_plane->check_plane = intel_check_sprite_plane; diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index 906893c006d8..b3dabc219e6a 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -814,23 +814,23 @@ intel_tv_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe) static void intel_enable_tv(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); /* Prevents vblank waits from timing out in intel_tv_detect_type() */ intel_wait_for_vblank(dev_priv, - to_intel_crtc(encoder->base.crtc)->pipe); + to_intel_crtc(pipe_config->base.crtc)->pipe); I915_WRITE(TV_CTL, I915_READ(TV_CTL) | TV_ENC_ENABLE); } static void intel_disable_tv(struct intel_encoder *encoder, - struct intel_crtc_state *old_crtc_state, - struct drm_connector_state *old_conn_state) + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -838,7 +838,7 @@ intel_disable_tv(struct intel_encoder *encoder, I915_WRITE(TV_CTL, I915_READ(TV_CTL) & ~TV_ENC_ENABLE); } -static const struct tv_mode *intel_tv_mode_find(struct drm_connector_state *conn_state) +static const struct tv_mode *intel_tv_mode_find(const struct drm_connector_state *conn_state) { int format = conn_state->tv.mode; @@ -868,6 +868,8 @@ static void intel_tv_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { + pipe_config->output_types |= BIT(INTEL_OUTPUT_TVOUT); + pipe_config->base.adjusted_mode.crtc_clock = pipe_config->port_clock; } @@ -976,11 +978,11 @@ static void set_color_conversion(struct drm_i915_private *dev_priv, } static void intel_tv_pre_enable(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config, - struct drm_connector_state *conn_state) + const struct intel_crtc_state *pipe_config, + const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); + struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->base.crtc); struct intel_tv *intel_tv = enc_to_tv(encoder); const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state); u32 tv_ctl; @@ -1385,7 +1387,7 @@ intel_tv_get_modes(struct drm_connector *connector) mode_ptr->vsync_end = mode_ptr->vsync_start + 1; mode_ptr->vtotal = vactive_s + 33; - tmp = (u64) tv_mode->refresh * mode_ptr->vtotal; + tmp = mul_u32_u32(tv_mode->refresh, mode_ptr->vtotal); tmp *= mode_ptr->htotal; tmp = div_u64(tmp, 1000000); mode_ptr->clock = (int) tmp; diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 0178ba42a0e5..d82ca0f438f5 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -22,22 +22,10 @@ * */ -#include "i915_drv.h" #include "intel_uc.h" -#include <linux/firmware.h> - -/* Cleans up uC firmware by releasing the firmware GEM obj. - */ -static void __intel_uc_fw_fini(struct intel_uc_fw *uc_fw) -{ - struct drm_i915_gem_object *obj; - - obj = fetch_and_zero(&uc_fw->obj); - if (obj) - i915_gem_object_put(obj); - - uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; -} +#include "intel_guc_submission.h" +#include "intel_guc.h" +#include "i915_drv.h" /* Reset GuC providing us with fresh state for both GuC and HuC. */ @@ -46,9 +34,9 @@ static int __intel_uc_reset_hw(struct drm_i915_private *dev_priv) int ret; u32 guc_status; - ret = intel_guc_reset(dev_priv); + ret = intel_reset_guc(dev_priv); if (ret) { - DRM_ERROR("GuC reset failed, ret = %d\n", ret); + DRM_ERROR("Failed to reset GuC, ret = %d\n", ret); return ret; } @@ -60,237 +48,111 @@ static int __intel_uc_reset_hw(struct drm_i915_private *dev_priv) return ret; } -void intel_uc_sanitize_options(struct drm_i915_private *dev_priv) +static int __get_platform_enable_guc(struct drm_i915_private *dev_priv) { - if (!HAS_GUC(dev_priv)) { - if (i915.enable_guc_loading > 0 || - i915.enable_guc_submission > 0) - DRM_INFO("Ignoring GuC options, no hardware\n"); - - i915.enable_guc_loading = 0; - i915.enable_guc_submission = 0; - return; - } + struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; + struct intel_uc_fw *huc_fw = &dev_priv->huc.fw; + int enable_guc = 0; - /* A negative value means "use platform default" */ - if (i915.enable_guc_loading < 0) - i915.enable_guc_loading = HAS_GUC_UCODE(dev_priv); + /* Default is to enable GuC/HuC if we know their firmwares */ + if (intel_uc_fw_is_selected(guc_fw)) + enable_guc |= ENABLE_GUC_SUBMISSION; + if (intel_uc_fw_is_selected(huc_fw)) + enable_guc |= ENABLE_GUC_LOAD_HUC; - /* Verify firmware version */ - if (i915.enable_guc_loading) { - if (HAS_HUC_UCODE(dev_priv)) - intel_huc_select_fw(&dev_priv->huc); + /* Any platform specific fine-tuning can be done here */ - if (intel_guc_select_fw(&dev_priv->guc)) - i915.enable_guc_loading = 0; - } + return enable_guc; +} - /* Can't enable guc submission without guc loaded */ - if (!i915.enable_guc_loading) - i915.enable_guc_submission = 0; +/** + * intel_uc_sanitize_options - sanitize uC related modparam options + * @dev_priv: device private + * + * In case of "enable_guc" option this function will attempt to modify + * it only if it was initially set to "auto(-1)". Default value for this + * modparam varies between platforms and it is hardcoded in driver code. + * Any other modparam value is only monitored against availability of the + * related hardware or firmware definitions. + */ +void intel_uc_sanitize_options(struct drm_i915_private *dev_priv) +{ + struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; + struct intel_uc_fw *huc_fw = &dev_priv->huc.fw; /* A negative value means "use platform default" */ - if (i915.enable_guc_submission < 0) - i915.enable_guc_submission = HAS_GUC_SCHED(dev_priv); -} + if (i915_modparams.enable_guc < 0) + i915_modparams.enable_guc = __get_platform_enable_guc(dev_priv); + + DRM_DEBUG_DRIVER("enable_guc=%d (submission:%s huc:%s)\n", + i915_modparams.enable_guc, + yesno(intel_uc_is_using_guc_submission()), + yesno(intel_uc_is_using_huc())); + + /* Verify GuC firmware availability */ + if (intel_uc_is_using_guc() && !intel_uc_fw_is_selected(guc_fw)) { + DRM_WARN("Incompatible option detected: enable_guc=%d, %s!\n", + i915_modparams.enable_guc, + !HAS_GUC(dev_priv) ? "no GuC hardware" : + "no GuC firmware"); + } -static void gen8_guc_raise_irq(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); + /* Verify HuC firmware availability */ + if (intel_uc_is_using_huc() && !intel_uc_fw_is_selected(huc_fw)) { + DRM_WARN("Incompatible option detected: enable_guc=%d, %s!\n", + i915_modparams.enable_guc, + !HAS_HUC(dev_priv) ? "no HuC hardware" : + "no HuC firmware"); + } - I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER); + /* Make sure that sanitization was done */ + GEM_BUG_ON(i915_modparams.enable_guc < 0); } void intel_uc_init_early(struct drm_i915_private *dev_priv) { - struct intel_guc *guc = &dev_priv->guc; - - intel_guc_ct_init_early(&guc->ct); - - mutex_init(&guc->send_mutex); - guc->send = intel_guc_send_nop; - guc->notify = gen8_guc_raise_irq; + intel_guc_init_early(&dev_priv->guc); + intel_huc_init_early(&dev_priv->huc); } -static void fetch_uc_fw(struct drm_i915_private *dev_priv, - struct intel_uc_fw *uc_fw) +void intel_uc_init_fw(struct drm_i915_private *dev_priv) { - struct pci_dev *pdev = dev_priv->drm.pdev; - struct drm_i915_gem_object *obj; - const struct firmware *fw = NULL; - struct uc_css_header *css; - size_t size; - int err; - - if (!uc_fw->path) + if (!USES_GUC(dev_priv)) return; - uc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING; - - DRM_DEBUG_DRIVER("before requesting firmware: uC fw fetch status %s\n", - intel_uc_fw_status_repr(uc_fw->fetch_status)); - - err = request_firmware(&fw, uc_fw->path, &pdev->dev); - if (err) - goto fail; - if (!fw) - goto fail; - - DRM_DEBUG_DRIVER("fetch uC fw from %s succeeded, fw %p\n", - uc_fw->path, fw); - - /* Check the size of the blob before examining buffer contents */ - if (fw->size < sizeof(struct uc_css_header)) { - DRM_NOTE("Firmware header is missing\n"); - goto fail; - } - - css = (struct uc_css_header *)fw->data; - - /* Firmware bits always start from header */ - uc_fw->header_offset = 0; - uc_fw->header_size = (css->header_size_dw - css->modulus_size_dw - - css->key_size_dw - css->exponent_size_dw) * sizeof(u32); - - if (uc_fw->header_size != sizeof(struct uc_css_header)) { - DRM_NOTE("CSS header definition mismatch\n"); - goto fail; - } - - /* then, uCode */ - uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size; - uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); + if (USES_HUC(dev_priv)) + intel_uc_fw_fetch(dev_priv, &dev_priv->huc.fw); - /* now RSA */ - if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) { - DRM_NOTE("RSA key size is bad\n"); - goto fail; - } - uc_fw->rsa_offset = uc_fw->ucode_offset + uc_fw->ucode_size; - uc_fw->rsa_size = css->key_size_dw * sizeof(u32); - - /* At least, it should have header, uCode and RSA. Size of all three. */ - size = uc_fw->header_size + uc_fw->ucode_size + uc_fw->rsa_size; - if (fw->size < size) { - DRM_NOTE("Missing firmware components\n"); - goto fail; - } - - /* - * The GuC firmware image has the version number embedded at a - * well-known offset within the firmware blob; note that major / minor - * version are TWO bytes each (i.e. u16), although all pointers and - * offsets are defined in terms of bytes (u8). - */ - switch (uc_fw->type) { - case INTEL_UC_FW_TYPE_GUC: - /* Header and uCode will be loaded to WOPCM. Size of the two. */ - size = uc_fw->header_size + uc_fw->ucode_size; - - /* Top 32k of WOPCM is reserved (8K stack + 24k RC6 context). */ - if (size > intel_guc_wopcm_size(dev_priv)) { - DRM_ERROR("Firmware is too large to fit in WOPCM\n"); - goto fail; - } - uc_fw->major_ver_found = css->guc.sw_version >> 16; - uc_fw->minor_ver_found = css->guc.sw_version & 0xFFFF; - break; - - case INTEL_UC_FW_TYPE_HUC: - uc_fw->major_ver_found = css->huc.sw_version >> 16; - uc_fw->minor_ver_found = css->huc.sw_version & 0xFFFF; - break; - - default: - DRM_ERROR("Unknown firmware type %d\n", uc_fw->type); - err = -ENOEXEC; - goto fail; - } - - if (uc_fw->major_ver_wanted == 0 && uc_fw->minor_ver_wanted == 0) { - DRM_NOTE("Skipping %s firmware version check\n", - intel_uc_fw_type_repr(uc_fw->type)); - } else if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || - uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { - DRM_NOTE("%s firmware version %d.%d, required %d.%d\n", - intel_uc_fw_type_repr(uc_fw->type), - uc_fw->major_ver_found, uc_fw->minor_ver_found, - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); - err = -ENOEXEC; - goto fail; - } - - DRM_DEBUG_DRIVER("firmware version %d.%d OK (minimum %d.%d)\n", - uc_fw->major_ver_found, uc_fw->minor_ver_found, - uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); - - obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto fail; - } - - uc_fw->obj = obj; - uc_fw->size = fw->size; - - DRM_DEBUG_DRIVER("uC fw fetch status SUCCESS, obj %p\n", - uc_fw->obj); - - release_firmware(fw); - uc_fw->fetch_status = INTEL_UC_FIRMWARE_SUCCESS; - return; - -fail: - DRM_WARN("Failed to fetch valid uC firmware from %s (error %d)\n", - uc_fw->path, err); - DRM_DEBUG_DRIVER("uC fw fetch status FAIL; err %d, fw %p, obj %p\n", - err, fw, uc_fw->obj); - - release_firmware(fw); /* OK even if fw is NULL */ - uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL; -} - -void intel_uc_init_fw(struct drm_i915_private *dev_priv) -{ - fetch_uc_fw(dev_priv, &dev_priv->huc.fw); - fetch_uc_fw(dev_priv, &dev_priv->guc.fw); + intel_uc_fw_fetch(dev_priv, &dev_priv->guc.fw); } void intel_uc_fini_fw(struct drm_i915_private *dev_priv) { - __intel_uc_fw_fini(&dev_priv->guc.fw); - __intel_uc_fw_fini(&dev_priv->huc.fw); -} + if (!USES_GUC(dev_priv)) + return; -static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i) -{ - GEM_BUG_ON(!guc->send_regs.base); - GEM_BUG_ON(!guc->send_regs.count); - GEM_BUG_ON(i >= guc->send_regs.count); + intel_uc_fw_fini(&dev_priv->guc.fw); - return _MMIO(guc->send_regs.base + 4 * i); + if (USES_HUC(dev_priv)) + intel_uc_fw_fini(&dev_priv->huc.fw); } -static void guc_init_send_regs(struct intel_guc *guc) +/** + * intel_uc_init_mmio - setup uC MMIO access + * + * @dev_priv: device private + * + * Setup minimal state necessary for MMIO accesses later in the + * initialization sequence. + */ +void intel_uc_init_mmio(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); - enum forcewake_domains fw_domains = 0; - unsigned int i; - - guc->send_regs.base = i915_mmio_reg_offset(SOFT_SCRATCH(0)); - guc->send_regs.count = SOFT_SCRATCH_COUNT - 1; - - for (i = 0; i < guc->send_regs.count; i++) { - fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, - guc_send_reg(guc, i), - FW_REG_READ | FW_REG_WRITE); - } - guc->send_regs.fw_domains = fw_domains; + intel_guc_init_send_regs(&dev_priv->guc); } static void guc_capture_load_err_log(struct intel_guc *guc) { - if (!guc->log.vma || i915.guc_log_level < 0) + if (!guc->log.vma || i915_modparams.guc_log_level < 0) return; if (!guc->load_err_log) @@ -309,8 +171,6 @@ static int guc_enable_communication(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); - guc_init_send_regs(guc); - if (HAS_GUC_CT(dev_priv)) return intel_guc_enable_ct(guc); @@ -328,30 +188,89 @@ static void guc_disable_communication(struct intel_guc *guc) guc->send = intel_guc_send_nop; } -int intel_uc_init_hw(struct drm_i915_private *dev_priv) +int intel_uc_init_wq(struct drm_i915_private *dev_priv) +{ + int ret; + + if (!USES_GUC(dev_priv)) + return 0; + + ret = intel_guc_init_wq(&dev_priv->guc); + if (ret) { + DRM_ERROR("Couldn't allocate workqueues for GuC\n"); + return ret; + } + + return 0; +} + +void intel_uc_fini_wq(struct drm_i915_private *dev_priv) +{ + if (!USES_GUC(dev_priv)) + return; + + intel_guc_fini_wq(&dev_priv->guc); +} + +int intel_uc_init(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; - int ret, attempts; + int ret; - if (!i915.enable_guc_loading) + if (!USES_GUC(dev_priv)) return 0; - guc_disable_communication(guc); - gen9_reset_guc_interrupts(dev_priv); + if (!HAS_GUC(dev_priv)) + return -ENODEV; - /* We need to notify the guc whenever we change the GGTT */ - i915_ggtt_enable_guc(dev_priv); + ret = intel_guc_init(guc); + if (ret) + return ret; - if (i915.enable_guc_submission) { + if (USES_GUC_SUBMISSION(dev_priv)) { /* * This is stuff we need to have available at fw load time * if we are planning to enable submission later */ - ret = i915_guc_submission_init(dev_priv); - if (ret) - goto err_guc; + ret = intel_guc_submission_init(guc); + if (ret) { + intel_guc_fini(guc); + return ret; + } } + return 0; +} + +void intel_uc_fini(struct drm_i915_private *dev_priv) +{ + struct intel_guc *guc = &dev_priv->guc; + + if (!USES_GUC(dev_priv)) + return; + + GEM_BUG_ON(!HAS_GUC(dev_priv)); + + if (USES_GUC_SUBMISSION(dev_priv)) + intel_guc_submission_fini(guc); + + intel_guc_fini(guc); +} + +int intel_uc_init_hw(struct drm_i915_private *dev_priv) +{ + struct intel_guc *guc = &dev_priv->guc; + struct intel_huc *huc = &dev_priv->huc; + int ret, attempts; + + if (!USES_GUC(dev_priv)) + return 0; + + GEM_BUG_ON(!HAS_GUC(dev_priv)); + + guc_disable_communication(guc); + gen9_reset_guc_interrupts(dev_priv); + /* init WOPCM */ I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv)); I915_WRITE(DMA_GUC_WOPCM_OFFSET, @@ -371,10 +290,16 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) */ ret = __intel_uc_reset_hw(dev_priv); if (ret) - goto err_submission; + goto err_out; + + if (USES_HUC(dev_priv)) { + ret = intel_huc_init_hw(huc); + if (ret) + goto err_out; + } - intel_huc_init_hw(&dev_priv->huc); - ret = intel_guc_init_hw(&dev_priv->guc); + intel_guc_init_params(guc); + ret = intel_guc_fw_upload(guc); if (ret == 0 || ret != -EAGAIN) break; @@ -390,150 +315,67 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) if (ret) goto err_log_capture; - intel_guc_auth_huc(dev_priv); - if (i915.enable_guc_submission) { - if (i915.guc_log_level >= 0) + if (USES_HUC(dev_priv)) { + ret = intel_huc_auth(huc); + if (ret) + goto err_communication; + } + + if (USES_GUC_SUBMISSION(dev_priv)) { + if (i915_modparams.guc_log_level >= 0) gen9_enable_guc_interrupts(dev_priv); - ret = i915_guc_submission_enable(dev_priv); + ret = intel_guc_submission_enable(guc); if (ret) goto err_interrupts; } + dev_info(dev_priv->drm.dev, "GuC firmware version %u.%u\n", + guc->fw.major_ver_found, guc->fw.minor_ver_found); + dev_info(dev_priv->drm.dev, "GuC submission %s\n", + enableddisabled(USES_GUC_SUBMISSION(dev_priv))); + dev_info(dev_priv->drm.dev, "HuC %s\n", + enableddisabled(USES_HUC(dev_priv))); + return 0; /* * We've failed to load the firmware :( - * - * Decide whether to disable GuC submission and fall back to - * execlist mode, and whether to hide the error by returning - * zero or to return -EIO, which the caller will treat as a - * nonfatal error (i.e. it doesn't prevent driver load, but - * marks the GPU as wedged until reset). */ err_interrupts: - guc_disable_communication(guc); gen9_disable_guc_interrupts(dev_priv); +err_communication: + guc_disable_communication(guc); err_log_capture: guc_capture_load_err_log(guc); -err_submission: - if (i915.enable_guc_submission) - i915_guc_submission_fini(dev_priv); -err_guc: - i915_ggtt_disable_guc(dev_priv); - - DRM_ERROR("GuC init failed\n"); - if (i915.enable_guc_loading > 1 || i915.enable_guc_submission > 1) - ret = -EIO; - else - ret = 0; - - if (i915.enable_guc_submission) { - i915.enable_guc_submission = 0; - DRM_NOTE("Falling back from GuC submission to execlist mode\n"); - } - - i915.enable_guc_loading = 0; - DRM_NOTE("GuC firmware loading disabled\n"); +err_out: + /* + * Note that there is no fallback as either user explicitly asked for + * the GuC or driver default option was to run with the GuC enabled. + */ + if (GEM_WARN_ON(ret == -EIO)) + ret = -EINVAL; + dev_err(dev_priv->drm.dev, "GuC initialization failed %d\n", ret); return ret; } void intel_uc_fini_hw(struct drm_i915_private *dev_priv) { - guc_free_load_err_log(&dev_priv->guc); - - if (!i915.enable_guc_loading) - return; - - if (i915.enable_guc_submission) - i915_guc_submission_disable(dev_priv); - - guc_disable_communication(&dev_priv->guc); - - if (i915.enable_guc_submission) { - gen9_disable_guc_interrupts(dev_priv); - i915_guc_submission_fini(dev_priv); - } - - i915_ggtt_disable_guc(dev_priv); -} - -int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len) -{ - WARN(1, "Unexpected send: action=%#x\n", *action); - return -ENODEV; -} - -/* - * This function implements the MMIO based host to GuC interface. - */ -int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - u32 status; - int i; - int ret; - - GEM_BUG_ON(!len); - GEM_BUG_ON(len > guc->send_regs.count); - - /* If CT is available, we expect to use MMIO only during init/fini */ - GEM_BUG_ON(HAS_GUC_CT(dev_priv) && - *action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER && - *action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER); - - mutex_lock(&guc->send_mutex); - intel_uncore_forcewake_get(dev_priv, guc->send_regs.fw_domains); - - for (i = 0; i < len; i++) - I915_WRITE(guc_send_reg(guc, i), action[i]); - - POSTING_READ(guc_send_reg(guc, i - 1)); - - intel_guc_notify(guc); - - /* - * No GuC command should ever take longer than 10ms. - * Fast commands should still complete in 10us. - */ - ret = __intel_wait_for_register_fw(dev_priv, - guc_send_reg(guc, 0), - INTEL_GUC_RECV_MASK, - INTEL_GUC_RECV_MASK, - 10, 10, &status); - if (status != INTEL_GUC_STATUS_SUCCESS) { - /* - * Either the GuC explicitly returned an error (which - * we convert to -EIO here) or no response at all was - * received within the timeout limit (-ETIMEDOUT) - */ - if (ret != -ETIMEDOUT) - ret = -EIO; + struct intel_guc *guc = &dev_priv->guc; - DRM_WARN("INTEL_GUC_SEND: Action 0x%X failed;" - " ret=%d status=0x%08X response=0x%08X\n", - action[0], ret, status, I915_READ(SOFT_SCRATCH(15))); - } + guc_free_load_err_log(guc); - intel_uncore_forcewake_put(dev_priv, guc->send_regs.fw_domains); - mutex_unlock(&guc->send_mutex); + if (!USES_GUC(dev_priv)) + return; - return ret; -} + GEM_BUG_ON(!HAS_GUC(dev_priv)); -int intel_guc_sample_forcewake(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - u32 action[2]; + if (USES_GUC_SUBMISSION(dev_priv)) + intel_guc_submission_disable(guc); - action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE; - /* WaRsDisableCoarsePowerGating:skl,bxt */ - if (!intel_enable_rc6() || NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) - action[1] = 0; - else - /* bit 0 and 1 are for Render and Media domain separately */ - action[1] = GUC_FORCEWAKE_RENDER | GUC_FORCEWAKE_MEDIA; + guc_disable_communication(guc); - return intel_guc_send(guc, action, ARRAY_SIZE(action)); + if (USES_GUC_SUBMISSION(dev_priv)) + gen9_disable_guc_interrupts(dev_priv); } diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 22ae52b17b0f..8a7249722ef1 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -24,256 +24,38 @@ #ifndef _INTEL_UC_H_ #define _INTEL_UC_H_ -#include "intel_guc_fwif.h" -#include "i915_guc_reg.h" -#include "intel_ringbuffer.h" -#include "intel_guc_ct.h" -#include "i915_vma.h" +#include "intel_guc.h" +#include "intel_huc.h" +#include "i915_params.h" -struct drm_i915_gem_request; - -/* - * This structure primarily describes the GEM object shared with the GuC. - * The specs sometimes refer to this object as a "GuC context", but we use - * the term "client" to avoid confusion with hardware contexts. This - * GEM object is held for the entire lifetime of our interaction with - * the GuC, being allocated before the GuC is loaded with its firmware. - * Because there's no way to update the address used by the GuC after - * initialisation, the shared object must stay pinned into the GGTT as - * long as the GuC is in use. We also keep the first page (only) mapped - * into kernel address space, as it includes shared data that must be - * updated on every request submission. - * - * The single GEM object described here is actually made up of several - * separate areas, as far as the GuC is concerned. The first page (kept - * kmap'd) includes the "process descriptor" which holds sequence data for - * the doorbell, and one cacheline which actually *is* the doorbell; a - * write to this will "ring the doorbell" (i.e. send an interrupt to the - * GuC). The subsequent pages of the client object constitute the work - * queue (a circular array of work items), again described in the process - * descriptor. Work queue pages are mapped momentarily as required. - * - * We also keep a few statistics on failures. Ideally, these should all - * be zero! - * no_wq_space: times that the submission pre-check found no space was - * available in the work queue (note, the queue is shared, - * not per-engine). It is OK for this to be nonzero, but - * it should not be huge! - * b_fail: failed to ring the doorbell. This should never happen, unless - * somehow the hardware misbehaves, or maybe if the GuC firmware - * crashes? We probably need to reset the GPU to recover. - * retcode: errno from last guc_submit() - */ -struct i915_guc_client { - struct i915_vma *vma; - void *vaddr; - struct i915_gem_context *owner; - struct intel_guc *guc; - - uint32_t engines; /* bitmap of (host) engine ids */ - uint32_t priority; - u32 stage_id; - uint32_t proc_desc_offset; - - u16 doorbell_id; - unsigned long doorbell_offset; - u32 doorbell_cookie; - - spinlock_t wq_lock; - uint32_t wq_offset; - uint32_t wq_size; - uint32_t wq_tail; - uint32_t wq_rsvd; - uint32_t no_wq_space; - - /* Per-engine counts of GuC submissions */ - uint64_t submissions[I915_NUM_ENGINES]; -}; - -enum intel_uc_fw_status { - INTEL_UC_FIRMWARE_FAIL = -1, - INTEL_UC_FIRMWARE_NONE = 0, - INTEL_UC_FIRMWARE_PENDING, - INTEL_UC_FIRMWARE_SUCCESS -}; - -/* User-friendly representation of an enum */ -static inline -const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) -{ - switch (status) { - case INTEL_UC_FIRMWARE_FAIL: - return "FAIL"; - case INTEL_UC_FIRMWARE_NONE: - return "NONE"; - case INTEL_UC_FIRMWARE_PENDING: - return "PENDING"; - case INTEL_UC_FIRMWARE_SUCCESS: - return "SUCCESS"; - } - return "<invalid>"; -} - -enum intel_uc_fw_type { - INTEL_UC_FW_TYPE_GUC, - INTEL_UC_FW_TYPE_HUC -}; - -/* User-friendly representation of an enum */ -static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type) -{ - switch (type) { - case INTEL_UC_FW_TYPE_GUC: - return "GuC"; - case INTEL_UC_FW_TYPE_HUC: - return "HuC"; - } - return "uC"; -} - -/* - * This structure encapsulates all the data needed during the process - * of fetching, caching, and loading the firmware image into the GuC. - */ -struct intel_uc_fw { - const char *path; - size_t size; - struct drm_i915_gem_object *obj; - enum intel_uc_fw_status fetch_status; - enum intel_uc_fw_status load_status; - - uint16_t major_ver_wanted; - uint16_t minor_ver_wanted; - uint16_t major_ver_found; - uint16_t minor_ver_found; - - enum intel_uc_fw_type type; - uint32_t header_size; - uint32_t header_offset; - uint32_t rsa_size; - uint32_t rsa_offset; - uint32_t ucode_size; - uint32_t ucode_offset; -}; - -struct intel_guc_log { - uint32_t flags; - struct i915_vma *vma; - /* The runtime stuff gets created only when GuC logging gets enabled */ - struct { - void *buf_addr; - struct workqueue_struct *flush_wq; - struct work_struct flush_work; - struct rchan *relay_chan; - } runtime; - /* logging related stats */ - u32 capture_miss_count; - u32 flush_interrupt_count; - u32 prev_overflow_count[GUC_MAX_LOG_BUFFER]; - u32 total_overflow_count[GUC_MAX_LOG_BUFFER]; - u32 flush_count[GUC_MAX_LOG_BUFFER]; -}; - -struct intel_guc { - struct intel_uc_fw fw; - struct intel_guc_log log; - struct intel_guc_ct ct; - - /* Log snapshot if GuC errors during load */ - struct drm_i915_gem_object *load_err_log; - - /* intel_guc_recv interrupt related state */ - bool interrupts_enabled; - - struct i915_vma *ads_vma; - struct i915_vma *stage_desc_pool; - void *stage_desc_pool_vaddr; - struct ida stage_ids; - - struct i915_guc_client *execbuf_client; - - DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS); - uint32_t db_cacheline; /* Cyclic counter mod pagesize */ - - /* GuC's FW specific registers used in MMIO send */ - struct { - u32 base; - unsigned int count; - enum forcewake_domains fw_domains; - } send_regs; - - /* To serialize the intel_guc_send actions */ - struct mutex send_mutex; - - /* GuC's FW specific send function */ - int (*send)(struct intel_guc *guc, const u32 *data, u32 len); - - /* GuC's FW specific notify function */ - void (*notify)(struct intel_guc *guc); -}; - -struct intel_huc { - /* Generic uC firmware management */ - struct intel_uc_fw fw; - - /* HuC-specific additions */ -}; - -/* intel_uc.c */ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv); void intel_uc_init_early(struct drm_i915_private *dev_priv); +void intel_uc_init_mmio(struct drm_i915_private *dev_priv); void intel_uc_init_fw(struct drm_i915_private *dev_priv); void intel_uc_fini_fw(struct drm_i915_private *dev_priv); +int intel_uc_init_wq(struct drm_i915_private *dev_priv); +void intel_uc_fini_wq(struct drm_i915_private *dev_priv); int intel_uc_init_hw(struct drm_i915_private *dev_priv); void intel_uc_fini_hw(struct drm_i915_private *dev_priv); -int intel_guc_sample_forcewake(struct intel_guc *guc); -int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len); -int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len); +int intel_uc_init(struct drm_i915_private *dev_priv); +void intel_uc_fini(struct drm_i915_private *dev_priv); -static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) +static inline bool intel_uc_is_using_guc(void) { - return guc->send(guc, action, len); + GEM_BUG_ON(i915_modparams.enable_guc < 0); + return i915_modparams.enable_guc > 0; } -static inline void intel_guc_notify(struct intel_guc *guc) +static inline bool intel_uc_is_using_guc_submission(void) { - guc->notify(guc); + GEM_BUG_ON(i915_modparams.enable_guc < 0); + return i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION; } -/* intel_guc_loader.c */ -int intel_guc_select_fw(struct intel_guc *guc); -int intel_guc_init_hw(struct intel_guc *guc); -int intel_guc_suspend(struct drm_i915_private *dev_priv); -int intel_guc_resume(struct drm_i915_private *dev_priv); -u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv); - -/* i915_guc_submission.c */ -int i915_guc_submission_init(struct drm_i915_private *dev_priv); -int i915_guc_submission_enable(struct drm_i915_private *dev_priv); -int i915_guc_wq_reserve(struct drm_i915_gem_request *rq); -void i915_guc_wq_unreserve(struct drm_i915_gem_request *request); -void i915_guc_submission_disable(struct drm_i915_private *dev_priv); -void i915_guc_submission_fini(struct drm_i915_private *dev_priv); -struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); - -/* intel_guc_log.c */ -int intel_guc_log_create(struct intel_guc *guc); -void intel_guc_log_destroy(struct intel_guc *guc); -int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val); -void i915_guc_log_register(struct drm_i915_private *dev_priv); -void i915_guc_log_unregister(struct drm_i915_private *dev_priv); - -static inline u32 guc_ggtt_offset(struct i915_vma *vma) +static inline bool intel_uc_is_using_huc(void) { - u32 offset = i915_ggtt_offset(vma); - GEM_BUG_ON(offset < GUC_WOPCM_TOP); - GEM_BUG_ON(range_overflows_t(u64, offset, vma->size, GUC_GGTT_TOP)); - return offset; + GEM_BUG_ON(i915_modparams.enable_guc < 0); + return i915_modparams.enable_guc & ENABLE_GUC_LOAD_HUC; } -/* intel_huc.c */ -void intel_huc_select_fw(struct intel_huc *huc); -void intel_huc_init_hw(struct intel_huc *huc); -void intel_guc_auth_huc(struct drm_i915_private *dev_priv); - #endif diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c new file mode 100644 index 000000000000..784eff9cdfc8 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_uc_fw.c @@ -0,0 +1,318 @@ +/* + * Copyright © 2016-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include <drm/drm_print.h> + +#include "intel_uc_fw.h" +#include "i915_drv.h" + +/** + * intel_uc_fw_fetch - fetch uC firmware + * + * @dev_priv: device private + * @uc_fw: uC firmware + * + * Fetch uC firmware into GEM obj. + */ +void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, + struct intel_uc_fw *uc_fw) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + struct drm_i915_gem_object *obj; + const struct firmware *fw = NULL; + struct uc_css_header *css; + size_t size; + int err; + + DRM_DEBUG_DRIVER("%s fw fetch %s\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path); + + if (!uc_fw->path) + return; + + uc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING; + DRM_DEBUG_DRIVER("%s fw fetch %s\n", + intel_uc_fw_type_repr(uc_fw->type), + intel_uc_fw_status_repr(uc_fw->fetch_status)); + + err = request_firmware(&fw, uc_fw->path, &pdev->dev); + if (err) { + DRM_DEBUG_DRIVER("%s fw request_firmware err=%d\n", + intel_uc_fw_type_repr(uc_fw->type), err); + goto fail; + } + + DRM_DEBUG_DRIVER("%s fw size %zu ptr %p\n", + intel_uc_fw_type_repr(uc_fw->type), fw->size, fw); + + /* Check the size of the blob before examining buffer contents */ + if (fw->size < sizeof(struct uc_css_header)) { + DRM_WARN("%s: Unexpected firmware size (%zu, min %zu)\n", + intel_uc_fw_type_repr(uc_fw->type), + fw->size, sizeof(struct uc_css_header)); + err = -ENODATA; + goto fail; + } + + css = (struct uc_css_header *)fw->data; + + /* Firmware bits always start from header */ + uc_fw->header_offset = 0; + uc_fw->header_size = (css->header_size_dw - css->modulus_size_dw - + css->key_size_dw - css->exponent_size_dw) * + sizeof(u32); + + if (uc_fw->header_size != sizeof(struct uc_css_header)) { + DRM_WARN("%s: Mismatched firmware header definition\n", + intel_uc_fw_type_repr(uc_fw->type)); + err = -ENOEXEC; + goto fail; + } + + /* then, uCode */ + uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size; + uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); + + /* Header and uCode will be loaded to WOPCM */ + size = uc_fw->header_size + uc_fw->ucode_size; + if (size > intel_guc_wopcm_size(dev_priv)) { + DRM_WARN("%s: Firmware is too large to fit in WOPCM\n", + intel_uc_fw_type_repr(uc_fw->type)); + err = -E2BIG; + goto fail; + } + + /* now RSA */ + if (css->key_size_dw != UOS_RSA_SCRATCH_COUNT) { + DRM_WARN("%s: Mismatched firmware RSA key size (%u)\n", + intel_uc_fw_type_repr(uc_fw->type), css->key_size_dw); + err = -ENOEXEC; + goto fail; + } + uc_fw->rsa_offset = uc_fw->ucode_offset + uc_fw->ucode_size; + uc_fw->rsa_size = css->key_size_dw * sizeof(u32); + + /* At least, it should have header, uCode and RSA. Size of all three. */ + size = uc_fw->header_size + uc_fw->ucode_size + uc_fw->rsa_size; + if (fw->size < size) { + DRM_WARN("%s: Truncated firmware (%zu, expected %zu)\n", + intel_uc_fw_type_repr(uc_fw->type), fw->size, size); + err = -ENOEXEC; + goto fail; + } + + /* + * The GuC firmware image has the version number embedded at a + * well-known offset within the firmware blob; note that major / minor + * version are TWO bytes each (i.e. u16), although all pointers and + * offsets are defined in terms of bytes (u8). + */ + switch (uc_fw->type) { + case INTEL_UC_FW_TYPE_GUC: + uc_fw->major_ver_found = css->guc.sw_version >> 16; + uc_fw->minor_ver_found = css->guc.sw_version & 0xFFFF; + break; + + case INTEL_UC_FW_TYPE_HUC: + uc_fw->major_ver_found = css->huc.sw_version >> 16; + uc_fw->minor_ver_found = css->huc.sw_version & 0xFFFF; + break; + + default: + MISSING_CASE(uc_fw->type); + break; + } + + DRM_DEBUG_DRIVER("%s fw version %u.%u (wanted %u.%u)\n", + intel_uc_fw_type_repr(uc_fw->type), + uc_fw->major_ver_found, uc_fw->minor_ver_found, + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); + + if (uc_fw->major_ver_wanted == 0 && uc_fw->minor_ver_wanted == 0) { + DRM_NOTE("%s: Skipping firmware version check\n", + intel_uc_fw_type_repr(uc_fw->type)); + } else if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || + uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { + DRM_NOTE("%s: Wrong firmware version (%u.%u, required %u.%u)\n", + intel_uc_fw_type_repr(uc_fw->type), + uc_fw->major_ver_found, uc_fw->minor_ver_found, + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted); + err = -ENOEXEC; + goto fail; + } + + obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + DRM_DEBUG_DRIVER("%s fw object_create err=%d\n", + intel_uc_fw_type_repr(uc_fw->type), err); + goto fail; + } + + uc_fw->obj = obj; + uc_fw->size = fw->size; + uc_fw->fetch_status = INTEL_UC_FIRMWARE_SUCCESS; + DRM_DEBUG_DRIVER("%s fw fetch %s\n", + intel_uc_fw_type_repr(uc_fw->type), + intel_uc_fw_status_repr(uc_fw->fetch_status)); + + release_firmware(fw); + return; + +fail: + uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL; + DRM_DEBUG_DRIVER("%s fw fetch %s\n", + intel_uc_fw_type_repr(uc_fw->type), + intel_uc_fw_status_repr(uc_fw->fetch_status)); + + DRM_WARN("%s: Failed to fetch firmware %s (error %d)\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); + DRM_INFO("%s: Firmware can be downloaded from %s\n", + intel_uc_fw_type_repr(uc_fw->type), INTEL_UC_FIRMWARE_URL); + + release_firmware(fw); /* OK even if fw is NULL */ +} + +/** + * intel_uc_fw_upload - load uC firmware using custom loader + * + * @uc_fw: uC firmware + * @loader: custom uC firmware loader function + * + * Loads uC firmware using custom loader and updates internal flags. + */ +int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, + int (*xfer)(struct intel_uc_fw *uc_fw, + struct i915_vma *vma)) +{ + struct i915_vma *vma; + int err; + + DRM_DEBUG_DRIVER("%s fw load %s\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path); + + if (uc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS) + return -ENOEXEC; + + uc_fw->load_status = INTEL_UC_FIRMWARE_PENDING; + DRM_DEBUG_DRIVER("%s fw load %s\n", + intel_uc_fw_type_repr(uc_fw->type), + intel_uc_fw_status_repr(uc_fw->load_status)); + + /* Pin object with firmware */ + err = i915_gem_object_set_to_gtt_domain(uc_fw->obj, false); + if (err) { + DRM_DEBUG_DRIVER("%s fw set-domain err=%d\n", + intel_uc_fw_type_repr(uc_fw->type), err); + goto fail; + } + + vma = i915_gem_object_ggtt_pin(uc_fw->obj, NULL, 0, 0, + PIN_OFFSET_BIAS | GUC_WOPCM_TOP); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + DRM_DEBUG_DRIVER("%s fw ggtt-pin err=%d\n", + intel_uc_fw_type_repr(uc_fw->type), err); + goto fail; + } + + /* Call custom loader */ + err = xfer(uc_fw, vma); + + /* + * We keep the object pages for reuse during resume. But we can unpin it + * now that DMA has completed, so it doesn't continue to take up space. + */ + i915_vma_unpin(vma); + + if (err) + goto fail; + + uc_fw->load_status = INTEL_UC_FIRMWARE_SUCCESS; + DRM_DEBUG_DRIVER("%s fw load %s\n", + intel_uc_fw_type_repr(uc_fw->type), + intel_uc_fw_status_repr(uc_fw->load_status)); + + DRM_INFO("%s: Loaded firmware %s (version %u.%u)\n", + intel_uc_fw_type_repr(uc_fw->type), + uc_fw->path, + uc_fw->major_ver_found, uc_fw->minor_ver_found); + + return 0; + +fail: + uc_fw->load_status = INTEL_UC_FIRMWARE_FAIL; + DRM_DEBUG_DRIVER("%s fw load %s\n", + intel_uc_fw_type_repr(uc_fw->type), + intel_uc_fw_status_repr(uc_fw->load_status)); + + DRM_WARN("%s: Failed to load firmware %s (error %d)\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); + + return err; +} + +/** + * intel_uc_fw_fini - cleanup uC firmware + * + * @uc_fw: uC firmware + * + * Cleans up uC firmware by releasing the firmware GEM obj. + */ +void intel_uc_fw_fini(struct intel_uc_fw *uc_fw) +{ + struct drm_i915_gem_object *obj; + + obj = fetch_and_zero(&uc_fw->obj); + if (obj) + i915_gem_object_put(obj); + + uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; +} + +/** + * intel_uc_fw_dump - dump information about uC firmware + * @uc_fw: uC firmware + * @p: the &drm_printer + * + * Pretty printer for uC firmware. + */ +void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p) +{ + drm_printf(p, "%s firmware: %s\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path); + drm_printf(p, "\tstatus: fetch %s, load %s\n", + intel_uc_fw_status_repr(uc_fw->fetch_status), + intel_uc_fw_status_repr(uc_fw->load_status)); + drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n", + uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted, + uc_fw->major_ver_found, uc_fw->minor_ver_found); + drm_printf(p, "\theader: offset %u, size %u\n", + uc_fw->header_offset, uc_fw->header_size); + drm_printf(p, "\tuCode: offset %u, size %u\n", + uc_fw->ucode_offset, uc_fw->ucode_size); + drm_printf(p, "\tRSA: offset %u, size %u\n", + uc_fw->rsa_offset, uc_fw->rsa_size); +} diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h new file mode 100644 index 000000000000..d5fd4609c785 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_uc_fw.h @@ -0,0 +1,126 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_UC_FW_H_ +#define _INTEL_UC_FW_H_ + +struct drm_printer; +struct drm_i915_private; +struct i915_vma; + +/* Home of GuC, HuC and DMC firmwares */ +#define INTEL_UC_FIRMWARE_URL "https://01.org/linuxgraphics/downloads/firmware" + +enum intel_uc_fw_status { + INTEL_UC_FIRMWARE_FAIL = -1, + INTEL_UC_FIRMWARE_NONE = 0, + INTEL_UC_FIRMWARE_PENDING, + INTEL_UC_FIRMWARE_SUCCESS +}; + +enum intel_uc_fw_type { + INTEL_UC_FW_TYPE_GUC, + INTEL_UC_FW_TYPE_HUC +}; + +/* + * This structure encapsulates all the data needed during the process + * of fetching, caching, and loading the firmware image into the uC. + */ +struct intel_uc_fw { + const char *path; + size_t size; + struct drm_i915_gem_object *obj; + enum intel_uc_fw_status fetch_status; + enum intel_uc_fw_status load_status; + + /* + * The firmware build process will generate a version header file with major and + * minor version defined. The versions are built into CSS header of firmware. + * i915 kernel driver set the minimal firmware version required per platform. + */ + u16 major_ver_wanted; + u16 minor_ver_wanted; + u16 major_ver_found; + u16 minor_ver_found; + + enum intel_uc_fw_type type; + u32 header_size; + u32 header_offset; + u32 rsa_size; + u32 rsa_offset; + u32 ucode_size; + u32 ucode_offset; +}; + +static inline +const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) +{ + switch (status) { + case INTEL_UC_FIRMWARE_FAIL: + return "FAIL"; + case INTEL_UC_FIRMWARE_NONE: + return "NONE"; + case INTEL_UC_FIRMWARE_PENDING: + return "PENDING"; + case INTEL_UC_FIRMWARE_SUCCESS: + return "SUCCESS"; + } + return "<invalid>"; +} + +static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type) +{ + switch (type) { + case INTEL_UC_FW_TYPE_GUC: + return "GuC"; + case INTEL_UC_FW_TYPE_HUC: + return "HuC"; + } + return "uC"; +} + +static inline +void intel_uc_fw_init(struct intel_uc_fw *uc_fw, enum intel_uc_fw_type type) +{ + uc_fw->path = NULL; + uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; + uc_fw->load_status = INTEL_UC_FIRMWARE_NONE; + uc_fw->type = type; +} + +static inline bool intel_uc_fw_is_selected(struct intel_uc_fw *uc_fw) +{ + return uc_fw->path != NULL; +} + +void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, + struct intel_uc_fw *uc_fw); +int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, + int (*xfer)(struct intel_uc_fw *uc_fw, + struct i915_vma *vma)); +void intel_uc_fw_fini(struct intel_uc_fw *uc_fw); +void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p); + +#endif diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 1d7b879cc68c..89547b614aa6 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -69,17 +69,104 @@ fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d) HRTIMER_MODE_REL); } +static inline int +__wait_for_ack(const struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d, + const u32 ack, + const u32 value) +{ + return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack) == value, + FORCEWAKE_ACK_TIMEOUT_MS); +} + +static inline int +wait_ack_clear(const struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d, + const u32 ack) +{ + return __wait_for_ack(i915, d, ack, 0); +} + +static inline int +wait_ack_set(const struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d, + const u32 ack) +{ + return __wait_for_ack(i915, d, ack, ack); +} + static inline void fw_domain_wait_ack_clear(const struct drm_i915_private *i915, const struct intel_uncore_forcewake_domain *d) { - if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & - FORCEWAKE_KERNEL) == 0, - FORCEWAKE_ACK_TIMEOUT_MS)) + if (wait_ack_clear(i915, d, FORCEWAKE_KERNEL)) DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n", intel_uncore_forcewake_domain_to_str(d->id)); } +enum ack_type { + ACK_CLEAR = 0, + ACK_SET +}; + +static int +fw_domain_wait_ack_with_fallback(const struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d, + const enum ack_type type) +{ + const u32 ack_bit = FORCEWAKE_KERNEL; + const u32 value = type == ACK_SET ? ack_bit : 0; + unsigned int pass; + bool ack_detected; + + /* + * There is a possibility of driver's wake request colliding + * with hardware's own wake requests and that can cause + * hardware to not deliver the driver's ack message. + * + * Use a fallback bit toggle to kick the gpu state machine + * in the hope that the original ack will be delivered along with + * the fallback ack. + * + * This workaround is described in HSDES #1604254524 + */ + + pass = 1; + do { + wait_ack_clear(i915, d, FORCEWAKE_KERNEL_FALLBACK); + + __raw_i915_write32(i915, d->reg_set, + _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL_FALLBACK)); + /* Give gt some time to relax before the polling frenzy */ + udelay(10 * pass); + wait_ack_set(i915, d, FORCEWAKE_KERNEL_FALLBACK); + + ack_detected = (__raw_i915_read32(i915, d->reg_ack) & ack_bit) == value; + + __raw_i915_write32(i915, d->reg_set, + _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL_FALLBACK)); + } while (!ack_detected && pass++ < 10); + + DRM_DEBUG_DRIVER("%s had to use fallback to %s ack, 0x%x (passes %u)\n", + intel_uncore_forcewake_domain_to_str(d->id), + type == ACK_SET ? "set" : "clear", + __raw_i915_read32(i915, d->reg_ack), + pass); + + return ack_detected ? 0 : -ETIMEDOUT; +} + +static inline void +fw_domain_wait_ack_clear_fallback(const struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d) +{ + if (likely(!wait_ack_clear(i915, d, FORCEWAKE_KERNEL))) + return; + + if (fw_domain_wait_ack_with_fallback(i915, d, ACK_CLEAR)) + fw_domain_wait_ack_clear(i915, d); +} + static inline void fw_domain_get(struct drm_i915_private *i915, const struct intel_uncore_forcewake_domain *d) @@ -88,17 +175,26 @@ fw_domain_get(struct drm_i915_private *i915, } static inline void -fw_domain_wait_ack(const struct drm_i915_private *i915, - const struct intel_uncore_forcewake_domain *d) +fw_domain_wait_ack_set(const struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d) { - if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & - FORCEWAKE_KERNEL), - FORCEWAKE_ACK_TIMEOUT_MS)) + if (wait_ack_set(i915, d, FORCEWAKE_KERNEL)) DRM_ERROR("%s: timed out waiting for forcewake ack request.\n", intel_uncore_forcewake_domain_to_str(d->id)); } static inline void +fw_domain_wait_ack_set_fallback(const struct drm_i915_private *i915, + const struct intel_uncore_forcewake_domain *d) +{ + if (likely(!wait_ack_set(i915, d, FORCEWAKE_KERNEL))) + return; + + if (fw_domain_wait_ack_with_fallback(i915, d, ACK_SET)) + fw_domain_wait_ack_set(i915, d); +} + +static inline void fw_domain_put(const struct drm_i915_private *i915, const struct intel_uncore_forcewake_domain *d) { @@ -119,7 +215,27 @@ fw_domains_get(struct drm_i915_private *i915, enum forcewake_domains fw_domains) } for_each_fw_domain_masked(d, fw_domains, i915, tmp) - fw_domain_wait_ack(i915, d); + fw_domain_wait_ack_set(i915, d); + + i915->uncore.fw_domains_active |= fw_domains; +} + +static void +fw_domains_get_with_fallback(struct drm_i915_private *i915, + enum forcewake_domains fw_domains) +{ + struct intel_uncore_forcewake_domain *d; + unsigned int tmp; + + GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains); + + for_each_fw_domain_masked(d, fw_domains, i915, tmp) { + fw_domain_wait_ack_clear_fallback(i915, d); + fw_domain_get(i915, d); + } + + for_each_fw_domain_masked(d, fw_domains, i915, tmp) + fw_domain_wait_ack_set_fallback(i915, d); i915->uncore.fw_domains_active |= fw_domains; } @@ -229,6 +345,7 @@ intel_uncore_fw_release_timer(struct hrtimer *timer) return HRTIMER_NORESTART; } +/* Note callers must have acquired the PUNIT->PMIC bus, before calling this. */ static void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv, bool restore) { @@ -237,6 +354,8 @@ static void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv, int retry_count = 100; enum forcewake_domains fw, active_domains; + iosf_mbi_assert_punit_acquired(); + /* Hold uncore.lock across reset to prevent any register access * with forcewake not set correctly. Wait until all pending * timers are run before holding. @@ -416,14 +535,18 @@ static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv, GT_FIFO_CTL_RC6_POLICY_STALL); } + iosf_mbi_punit_acquire(); intel_uncore_forcewake_reset(dev_priv, restore_forcewake); + iosf_mbi_punit_release(); } void intel_uncore_suspend(struct drm_i915_private *dev_priv) { - iosf_mbi_unregister_pmic_bus_access_notifier( + iosf_mbi_punit_acquire(); + iosf_mbi_unregister_pmic_bus_access_notifier_unlocked( &dev_priv->uncore.pmic_bus_access_nb); intel_uncore_forcewake_reset(dev_priv, false); + iosf_mbi_punit_release(); } void intel_uncore_resume_early(struct drm_i915_private *dev_priv) @@ -434,10 +557,14 @@ void intel_uncore_resume_early(struct drm_i915_private *dev_priv) i915_check_and_clear_faults(dev_priv); } -void intel_uncore_sanitize(struct drm_i915_private *dev_priv) +void intel_uncore_runtime_resume(struct drm_i915_private *dev_priv) { - i915.enable_rc6 = sanitize_rc6_option(dev_priv, i915.enable_rc6); + iosf_mbi_register_pmic_bus_access_notifier( + &dev_priv->uncore.pmic_bus_access_nb); +} +void intel_uncore_sanitize(struct drm_i915_private *dev_priv) +{ /* BIOS often leaves RC6 enabled, but disable it for hw init */ intel_sanitize_gt_powersave(dev_priv); } @@ -490,6 +617,57 @@ void intel_uncore_forcewake_get(struct drm_i915_private *dev_priv, } /** + * intel_uncore_forcewake_user_get - claim forcewake on behalf of userspace + * @dev_priv: i915 device instance + * + * This function is a wrapper around intel_uncore_forcewake_get() to acquire + * the GT powerwell and in the process disable our debugging for the + * duration of userspace's bypass. + */ +void intel_uncore_forcewake_user_get(struct drm_i915_private *dev_priv) +{ + spin_lock_irq(&dev_priv->uncore.lock); + if (!dev_priv->uncore.user_forcewake.count++) { + intel_uncore_forcewake_get__locked(dev_priv, FORCEWAKE_ALL); + + /* Save and disable mmio debugging for the user bypass */ + dev_priv->uncore.user_forcewake.saved_mmio_check = + dev_priv->uncore.unclaimed_mmio_check; + dev_priv->uncore.user_forcewake.saved_mmio_debug = + i915_modparams.mmio_debug; + + dev_priv->uncore.unclaimed_mmio_check = 0; + i915_modparams.mmio_debug = 0; + } + spin_unlock_irq(&dev_priv->uncore.lock); +} + +/** + * intel_uncore_forcewake_user_put - release forcewake on behalf of userspace + * @dev_priv: i915 device instance + * + * This function complements intel_uncore_forcewake_user_get() and releases + * the GT powerwell taken on behalf of the userspace bypass. + */ +void intel_uncore_forcewake_user_put(struct drm_i915_private *dev_priv) +{ + spin_lock_irq(&dev_priv->uncore.lock); + if (!--dev_priv->uncore.user_forcewake.count) { + if (intel_uncore_unclaimed_mmio(dev_priv)) + dev_info(dev_priv->drm.dev, + "Invalid mmio detected during user access\n"); + + dev_priv->uncore.unclaimed_mmio_check = + dev_priv->uncore.user_forcewake.saved_mmio_check; + i915_modparams.mmio_debug = + dev_priv->uncore.user_forcewake.saved_mmio_debug; + + intel_uncore_forcewake_put__locked(dev_priv, FORCEWAKE_ALL); + } + spin_unlock_irq(&dev_priv->uncore.lock); +} + +/** * intel_uncore_forcewake_get__locked - grab forcewake domain references * @dev_priv: i915 device instance * @fw_domains: forcewake domains to get reference on @@ -574,7 +752,23 @@ void assert_forcewakes_inactive(struct drm_i915_private *dev_priv) if (!dev_priv->uncore.funcs.force_wake_get) return; - WARN_ON(dev_priv->uncore.fw_domains_active); + WARN(dev_priv->uncore.fw_domains_active, + "Expected all fw_domains to be inactive, but %08x are still on\n", + dev_priv->uncore.fw_domains_active); +} + +void assert_forcewakes_active(struct drm_i915_private *dev_priv, + enum forcewake_domains fw_domains) +{ + if (!dev_priv->uncore.funcs.force_wake_get) + return; + + assert_rpm_wakelock_held(dev_priv); + + fw_domains &= dev_priv->uncore.fw_domains; + WARN(fw_domains & ~dev_priv->uncore.fw_domains_active, + "Expected %08x fw_domains to be active, but %08x are off\n", + fw_domains, fw_domains & ~dev_priv->uncore.fw_domains_active); } /* We give fast paths for the really cool registers */ @@ -790,7 +984,8 @@ __unclaimed_reg_debug(struct drm_i915_private *dev_priv, "Unclaimed %s register 0x%x\n", read ? "read from" : "write to", i915_mmio_reg_offset(reg))) - i915.mmio_debug--; /* Only report the first N failures */ + /* Only report the first N failures */ + i915_modparams.mmio_debug--; } static inline void @@ -799,7 +994,7 @@ unclaimed_reg_debug(struct drm_i915_private *dev_priv, const bool read, const bool before) { - if (likely(!i915.mmio_debug)) + if (likely(!i915_modparams.mmio_debug)) return; __unclaimed_reg_debug(dev_priv, reg, read, before); @@ -1073,7 +1268,8 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) } if (INTEL_GEN(dev_priv) >= 9) { - dev_priv->uncore.funcs.force_wake_get = fw_domains_get; + dev_priv->uncore.funcs.force_wake_get = + fw_domains_get_with_fallback; dev_priv->uncore.funcs.force_wake_put = fw_domains_put; fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, FORCEWAKE_RENDER_GEN9, @@ -1171,8 +1367,15 @@ static int i915_pmic_bus_access_notifier(struct notifier_block *nb, * bus, which will be busy after this notification, leading to: * "render: timed out waiting for forcewake ack request." * errors. + * + * The notifier is unregistered during intel_runtime_suspend(), + * so it's ok to access the HW here without holding a RPM + * wake reference -> disable wakeref asserts for the time of + * the access. */ + disable_rpm_wakeref_asserts(dev_priv); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + enable_rpm_wakeref_asserts(dev_priv); break; case MBI_PMIC_BUS_ACCESS_END: intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); @@ -1227,116 +1430,122 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) iosf_mbi_register_pmic_bus_access_notifier( &dev_priv->uncore.pmic_bus_access_nb); - - i915_check_and_clear_faults(dev_priv); } void intel_uncore_fini(struct drm_i915_private *dev_priv) { - iosf_mbi_unregister_pmic_bus_access_notifier( - &dev_priv->uncore.pmic_bus_access_nb); - /* Paranoia: make sure we have disabled everything before we exit. */ intel_uncore_sanitize(dev_priv); + + iosf_mbi_punit_acquire(); + iosf_mbi_unregister_pmic_bus_access_notifier_unlocked( + &dev_priv->uncore.pmic_bus_access_nb); intel_uncore_forcewake_reset(dev_priv, false); + iosf_mbi_punit_release(); } -#define GEN_RANGE(l, h) GENMASK((h) - 1, (l) - 1) - -static const struct register_whitelist { - i915_reg_t offset_ldw, offset_udw; - uint32_t size; - /* supported gens, 0x10 for 4, 0x30 for 4 and 5, etc. */ - uint32_t gen_bitmask; -} whitelist[] = { - { .offset_ldw = RING_TIMESTAMP(RENDER_RING_BASE), - .offset_udw = RING_TIMESTAMP_UDW(RENDER_RING_BASE), - .size = 8, .gen_bitmask = GEN_RANGE(4, 9) }, -}; +static const struct reg_whitelist { + i915_reg_t offset_ldw; + i915_reg_t offset_udw; + u16 gen_mask; + u8 size; +} reg_read_whitelist[] = { { + .offset_ldw = RING_TIMESTAMP(RENDER_RING_BASE), + .offset_udw = RING_TIMESTAMP_UDW(RENDER_RING_BASE), + .gen_mask = INTEL_GEN_MASK(4, 10), + .size = 8 +} }; int i915_reg_read_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_reg_read *reg = data; - struct register_whitelist const *entry = whitelist; - unsigned size; - i915_reg_t offset_ldw, offset_udw; - int i, ret = 0; - - for (i = 0; i < ARRAY_SIZE(whitelist); i++, entry++) { - if (i915_mmio_reg_offset(entry->offset_ldw) == (reg->offset & -entry->size) && - (INTEL_INFO(dev_priv)->gen_mask & entry->gen_bitmask)) + struct reg_whitelist const *entry; + unsigned int flags; + int remain; + int ret = 0; + + entry = reg_read_whitelist; + remain = ARRAY_SIZE(reg_read_whitelist); + while (remain) { + u32 entry_offset = i915_mmio_reg_offset(entry->offset_ldw); + + GEM_BUG_ON(!is_power_of_2(entry->size)); + GEM_BUG_ON(entry->size > 8); + GEM_BUG_ON(entry_offset & (entry->size - 1)); + + if (INTEL_INFO(dev_priv)->gen_mask & entry->gen_mask && + entry_offset == (reg->offset & -entry->size)) break; + entry++; + remain--; } - if (i == ARRAY_SIZE(whitelist)) + if (!remain) return -EINVAL; - /* We use the low bits to encode extra flags as the register should - * be naturally aligned (and those that are not so aligned merely - * limit the available flags for that register). - */ - offset_ldw = entry->offset_ldw; - offset_udw = entry->offset_udw; - size = entry->size; - size |= reg->offset ^ i915_mmio_reg_offset(offset_ldw); + flags = reg->offset & (entry->size - 1); intel_runtime_pm_get(dev_priv); - - switch (size) { - case 8 | 1: - reg->val = I915_READ64_2x32(offset_ldw, offset_udw); - break; - case 8: - reg->val = I915_READ64(offset_ldw); - break; - case 4: - reg->val = I915_READ(offset_ldw); - break; - case 2: - reg->val = I915_READ16(offset_ldw); - break; - case 1: - reg->val = I915_READ8(offset_ldw); - break; - default: + if (entry->size == 8 && flags == I915_REG_READ_8B_WA) + reg->val = I915_READ64_2x32(entry->offset_ldw, + entry->offset_udw); + else if (entry->size == 8 && flags == 0) + reg->val = I915_READ64(entry->offset_ldw); + else if (entry->size == 4 && flags == 0) + reg->val = I915_READ(entry->offset_ldw); + else if (entry->size == 2 && flags == 0) + reg->val = I915_READ16(entry->offset_ldw); + else if (entry->size == 1 && flags == 0) + reg->val = I915_READ8(entry->offset_ldw); + else ret = -EINVAL; - goto out; - } - -out: intel_runtime_pm_put(dev_priv); + return ret; } -static void gen3_stop_rings(struct drm_i915_private *dev_priv) +static void gen3_stop_engine(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + const u32 base = engine->mmio_base; + const i915_reg_t mode = RING_MI_MODE(base); + + I915_WRITE_FW(mode, _MASKED_BIT_ENABLE(STOP_RING)); + if (intel_wait_for_register_fw(dev_priv, + mode, + MODE_IDLE, + MODE_IDLE, + 500)) + DRM_DEBUG_DRIVER("%s: timed out on STOP_RING\n", + engine->name); + + I915_WRITE_FW(RING_HEAD(base), I915_READ_FW(RING_TAIL(base))); + + I915_WRITE_FW(RING_HEAD(base), 0); + I915_WRITE_FW(RING_TAIL(base), 0); + + /* The ring must be empty before it is disabled */ + I915_WRITE_FW(RING_CTL(base), 0); + + /* Check acts as a post */ + if (I915_READ_FW(RING_HEAD(base)) != 0) + DRM_DEBUG_DRIVER("%s: ring head not parked\n", + engine->name); +} + +static void i915_stop_engines(struct drm_i915_private *dev_priv, + unsigned engine_mask) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, dev_priv, id) { - const u32 base = engine->mmio_base; - const i915_reg_t mode = RING_MI_MODE(base); - - I915_WRITE_FW(mode, _MASKED_BIT_ENABLE(STOP_RING)); - if (intel_wait_for_register_fw(dev_priv, - mode, - MODE_IDLE, - MODE_IDLE, - 500)) - DRM_DEBUG_DRIVER("%s: timed out on STOP_RING\n", - engine->name); - - I915_WRITE_FW(RING_CTL(base), 0); - I915_WRITE_FW(RING_HEAD(base), 0); - I915_WRITE_FW(RING_TAIL(base), 0); - - /* Check acts as a post */ - if (I915_READ_FW(RING_HEAD(base)) != 0) - DRM_DEBUG_DRIVER("%s: ring head not parked\n", - engine->name); - } + if (INTEL_GEN(dev_priv) < 3) + return; + + for_each_engine_masked(engine, dev_priv, engine_mask, id) + gen3_stop_engine(engine); } static bool i915_reset_complete(struct pci_dev *pdev) @@ -1371,9 +1580,6 @@ static int g33_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) { struct pci_dev *pdev = dev_priv->drm.pdev; - /* Stop engines before we reset; see g4x_do_reset() below for why. */ - gen3_stop_rings(dev_priv); - pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); return wait_for(g4x_reset_complete(pdev), 500); } @@ -1388,12 +1594,6 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE); POSTING_READ(VDECCLK_GATE_D); - /* We stop engines, otherwise we might get failed reset and a - * dead gpu (on elk). - * WaMediaResetMainRingCleanup:ctg,elk (presumably) - */ - gen3_stop_rings(dev_priv); - pci_write_config_byte(pdev, I915_GDRST, GRDOM_MEDIA | GRDOM_RESET_ENABLE); ret = wait_for(g4x_reset_complete(pdev), 500); @@ -1662,7 +1862,7 @@ typedef int (*reset_func)(struct drm_i915_private *, unsigned engine_mask); static reset_func intel_get_gpu_reset(struct drm_i915_private *dev_priv) { - if (!i915.reset) + if (!i915_modparams.reset) return NULL; if (INTEL_INFO(dev_priv)->gen >= 8) @@ -1683,22 +1883,34 @@ static reset_func intel_get_gpu_reset(struct drm_i915_private *dev_priv) int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) { - reset_func reset; + reset_func reset = intel_get_gpu_reset(dev_priv); int retry; int ret; might_sleep(); - reset = intel_get_gpu_reset(dev_priv); - if (reset == NULL) - return -ENODEV; - /* If the power well sleeps during the reset, the reset * request may be dropped and never completes (causing -EIO). */ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); for (retry = 0; retry < 3; retry++) { - ret = reset(dev_priv, engine_mask); + + /* We stop engines, otherwise we might get failed reset and a + * dead gpu (on elk). Also as modern gpu as kbl can suffer + * from system hang if batchbuffer is progressing when + * the reset is issued, regardless of READY_TO_RESET ack. + * Thus assume it is best to stop engines on all gens + * where we have a gpu reset. + * + * WaMediaResetMainRingCleanup:ctg,elk (presumably) + * + * FIXME: Wa for more modern gens needs to be validated + */ + i915_stop_engines(dev_priv, engine_mask); + + ret = -ENODEV; + if (reset) + ret = reset(dev_priv, engine_mask); if (ret != -ETIMEDOUT) break; @@ -1714,18 +1926,13 @@ bool intel_has_gpu_reset(struct drm_i915_private *dev_priv) return intel_get_gpu_reset(dev_priv) != NULL; } -/* - * When GuC submission is enabled, GuC manages ELSP and can initiate the - * engine reset too. For now, fall back to full GPU reset if it is enabled. - */ bool intel_has_reset_engine(struct drm_i915_private *dev_priv) { return (dev_priv->info.has_reset_engine && - !dev_priv->guc.execbuf_client && - i915.reset >= 2); + i915_modparams.reset >= 2); } -int intel_guc_reset(struct drm_i915_private *dev_priv) +int intel_reset_guc(struct drm_i915_private *dev_priv) { int ret; @@ -1747,7 +1954,7 @@ bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv) bool intel_uncore_arm_unclaimed_mmio_detection(struct drm_i915_private *dev_priv) { - if (unlikely(i915.mmio_debug || + if (unlikely(i915_modparams.mmio_debug || dev_priv->uncore.unclaimed_mmio_check <= 0)) return false; @@ -1755,7 +1962,7 @@ intel_uncore_arm_unclaimed_mmio_detection(struct drm_i915_private *dev_priv) DRM_DEBUG("Unclaimed register detected, " "enabling oneshot unclaimed register reporting. " "Please use i915.mmio_debug=N for more information.\n"); - i915.mmio_debug++; + i915_modparams.mmio_debug++; dev_priv->uncore.unclaimed_mmio_check--; return true; } diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 5f90278da461..9ce079b5dd0d 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -25,6 +25,12 @@ #ifndef __INTEL_UNCORE_H__ #define __INTEL_UNCORE_H__ +#include <linux/spinlock.h> +#include <linux/notifier.h> +#include <linux/hrtimer.h> + +#include "i915_reg.h" + struct drm_i915_private; enum forcewake_domain_id { @@ -102,6 +108,13 @@ struct intel_uncore { i915_reg_t reg_ack; } fw_domain[FW_DOMAIN_ID_COUNT]; + struct { + unsigned int count; + + int saved_mmio_check; + int saved_mmio_debug; + } user_forcewake; + int unclaimed_mmio_check; }; @@ -121,9 +134,12 @@ bool intel_uncore_arm_unclaimed_mmio_detection(struct drm_i915_private *dev_priv void intel_uncore_fini(struct drm_i915_private *dev_priv); void intel_uncore_suspend(struct drm_i915_private *dev_priv); void intel_uncore_resume_early(struct drm_i915_private *dev_priv); +void intel_uncore_runtime_resume(struct drm_i915_private *dev_priv); u64 intel_uncore_edram_size(struct drm_i915_private *dev_priv); void assert_forcewakes_inactive(struct drm_i915_private *dev_priv); +void assert_forcewakes_active(struct drm_i915_private *dev_priv, + enum forcewake_domains fw_domains); const char *intel_uncore_forcewake_domain_to_str(const enum forcewake_domain_id id); enum forcewake_domains @@ -144,6 +160,9 @@ void intel_uncore_forcewake_get__locked(struct drm_i915_private *dev_priv, void intel_uncore_forcewake_put__locked(struct drm_i915_private *dev_priv, enum forcewake_domains domains); +void intel_uncore_forcewake_user_get(struct drm_i915_private *dev_priv); +void intel_uncore_forcewake_user_put(struct drm_i915_private *dev_priv); + int intel_wait_for_register(struct drm_i915_private *dev_priv, i915_reg_t reg, u32 mask, diff --git a/drivers/gpu/drm/i915/intel_vbt_defs.h b/drivers/gpu/drm/i915/intel_vbt_defs.h index a92e7762f596..98dff6058d3c 100644 --- a/drivers/gpu/drm/i915/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/intel_vbt_defs.h @@ -149,16 +149,19 @@ struct bdb_general_features { u8 ssc_freq:1; u8 enable_lfp_on_override:1; u8 disable_ssc_ddt:1; - u8 rsvd7:1; + u8 underscan_vga_timings:1; u8 display_clock_mode:1; - u8 rsvd8:1; /* finish byte */ + u8 vbios_hotplug_support:1; /* bits 3 */ u8 disable_smooth_vision:1; u8 single_dvi:1; - u8 rsvd9:1; + u8 rotate_180:1; /* 181 */ u8 fdi_rx_polarity_inverted:1; - u8 rsvd10:4; /* finish byte */ + u8 vbios_extended_mode:1; /* 160 */ + u8 copy_ilfp_dtd_to_sdvo_lvds_dtd:1; /* 160 */ + u8 panel_best_fit_timing:1; /* 160 */ + u8 ignore_strap_state:1; /* 160 */ /* bits 4 */ u8 legacy_monitor_detect; @@ -167,9 +170,10 @@ struct bdb_general_features { u8 int_crt_support:1; u8 int_tv_support:1; u8 int_efp_support:1; - u8 dp_ssc_enb:1; /* PCH attached eDP supports SSC */ + u8 dp_ssc_enable:1; /* PCH attached eDP supports SSC */ u8 dp_ssc_freq:1; /* SSC freq for PCH attached eDP */ - u8 rsvd11:3; /* finish byte */ + u8 dp_ssc_dongle_supported:1; + u8 rsvd11:2; /* finish byte */ } __packed; /* pre-915 */ @@ -206,6 +210,56 @@ struct bdb_general_features { #define DEVICE_TYPE_LFP_LVDS_DUAL 0x5162 #define DEVICE_TYPE_LFP_LVDS_DUAL_HDCP 0x51e2 +/* Add the device class for LFP, TV, HDMI */ +#define DEVICE_TYPE_INT_LFP 0x1022 +#define DEVICE_TYPE_INT_TV 0x1009 +#define DEVICE_TYPE_HDMI 0x60D2 +#define DEVICE_TYPE_DP 0x68C6 +#define DEVICE_TYPE_DP_DUAL_MODE 0x60D6 +#define DEVICE_TYPE_eDP 0x78C6 + +#define DEVICE_TYPE_CLASS_EXTENSION (1 << 15) +#define DEVICE_TYPE_POWER_MANAGEMENT (1 << 14) +#define DEVICE_TYPE_HOTPLUG_SIGNALING (1 << 13) +#define DEVICE_TYPE_INTERNAL_CONNECTOR (1 << 12) +#define DEVICE_TYPE_NOT_HDMI_OUTPUT (1 << 11) +#define DEVICE_TYPE_MIPI_OUTPUT (1 << 10) +#define DEVICE_TYPE_COMPOSITE_OUTPUT (1 << 9) +#define DEVICE_TYPE_DUAL_CHANNEL (1 << 8) +#define DEVICE_TYPE_HIGH_SPEED_LINK (1 << 6) +#define DEVICE_TYPE_LVDS_SINGALING (1 << 5) +#define DEVICE_TYPE_TMDS_DVI_SIGNALING (1 << 4) +#define DEVICE_TYPE_VIDEO_SIGNALING (1 << 3) +#define DEVICE_TYPE_DISPLAYPORT_OUTPUT (1 << 2) +#define DEVICE_TYPE_DIGITAL_OUTPUT (1 << 1) +#define DEVICE_TYPE_ANALOG_OUTPUT (1 << 0) + +/* + * Bits we care about when checking for DEVICE_TYPE_eDP. Depending on the + * system, the other bits may or may not be set for eDP outputs. + */ +#define DEVICE_TYPE_eDP_BITS \ + (DEVICE_TYPE_INTERNAL_CONNECTOR | \ + DEVICE_TYPE_MIPI_OUTPUT | \ + DEVICE_TYPE_COMPOSITE_OUTPUT | \ + DEVICE_TYPE_DUAL_CHANNEL | \ + DEVICE_TYPE_LVDS_SINGALING | \ + DEVICE_TYPE_TMDS_DVI_SIGNALING | \ + DEVICE_TYPE_VIDEO_SIGNALING | \ + DEVICE_TYPE_DISPLAYPORT_OUTPUT | \ + DEVICE_TYPE_ANALOG_OUTPUT) + +#define DEVICE_TYPE_DP_DUAL_MODE_BITS \ + (DEVICE_TYPE_INTERNAL_CONNECTOR | \ + DEVICE_TYPE_MIPI_OUTPUT | \ + DEVICE_TYPE_COMPOSITE_OUTPUT | \ + DEVICE_TYPE_LVDS_SINGALING | \ + DEVICE_TYPE_TMDS_DVI_SIGNALING | \ + DEVICE_TYPE_VIDEO_SIGNALING | \ + DEVICE_TYPE_DISPLAYPORT_OUTPUT | \ + DEVICE_TYPE_DIGITAL_OUTPUT | \ + DEVICE_TYPE_ANALOG_OUTPUT) + #define DEVICE_CFG_NONE 0x00 #define DEVICE_CFG_12BIT_DVOB 0x01 #define DEVICE_CFG_12BIT_DVOC 0x02 @@ -226,77 +280,140 @@ struct bdb_general_features { #define DEVICE_WIRE_DVOB_MASTER 0x0d #define DEVICE_WIRE_DVOC_MASTER 0x0e +/* dvo_port pre BDB 155 */ #define DEVICE_PORT_DVOA 0x00 /* none on 845+ */ #define DEVICE_PORT_DVOB 0x01 #define DEVICE_PORT_DVOC 0x02 +/* dvo_port BDB 155+ */ +#define DVO_PORT_HDMIA 0 +#define DVO_PORT_HDMIB 1 +#define DVO_PORT_HDMIC 2 +#define DVO_PORT_HDMID 3 +#define DVO_PORT_LVDS 4 +#define DVO_PORT_TV 5 +#define DVO_PORT_CRT 6 +#define DVO_PORT_DPB 7 +#define DVO_PORT_DPC 8 +#define DVO_PORT_DPD 9 +#define DVO_PORT_DPA 10 +#define DVO_PORT_DPE 11 /* 193 */ +#define DVO_PORT_HDMIE 12 /* 193 */ +#define DVO_PORT_MIPIA 21 /* 171 */ +#define DVO_PORT_MIPIB 22 /* 171 */ +#define DVO_PORT_MIPIC 23 /* 171 */ +#define DVO_PORT_MIPID 24 /* 171 */ + +#define HDMI_MAX_DATA_RATE_PLATFORM 0 /* 204 */ +#define HDMI_MAX_DATA_RATE_297 1 /* 204 */ +#define HDMI_MAX_DATA_RATE_165 2 /* 204 */ + +#define LEGACY_CHILD_DEVICE_CONFIG_SIZE 33 + +/* DDC Bus DDI Type 155+ */ +enum vbt_gmbus_ddi { + DDC_BUS_DDI_B = 0x1, + DDC_BUS_DDI_C, + DDC_BUS_DDI_D, + DDC_BUS_DDI_F, +}; + /* - * We used to keep this struct but without any version control. We should avoid - * using it in the future, but it should be safe to keep using it in the old - * code. Do not change; we rely on its size. + * The child device config, aka the display device data structure, provides a + * description of a port and its configuration on the platform. + * + * The child device config size has been increased, and fields have been added + * and their meaning has changed over time. Care must be taken when accessing + * basically any of the fields to ensure the correct interpretation for the BDB + * version in question. + * + * When we copy the child device configs to dev_priv->vbt.child_dev, we reserve + * space for the full structure below, and initialize the tail not actually + * present in VBT to zeros. Accessing those fields is fine, as long as the + * default zero is taken into account, again according to the BDB version. + * + * BDB versions 155 and below are considered legacy, and version 155 seems to be + * a baseline for some of the VBT documentation. When adding new fields, please + * include the BDB version when the field was added, if it's above that. */ -struct old_child_dev_config { +struct child_device_config { u16 handle; - u16 device_type; - u8 device_id[10]; /* ascii string */ - u16 addin_offset; - u8 dvo_port; /* See Device_PORT_* above */ - u8 i2c_pin; - u8 slave_addr; - u8 ddc_pin; - u16 edid_ptr; - u8 dvo_cfg; /* See DEVICE_CFG_* above */ - u8 dvo2_port; - u8 i2c2_pin; - u8 slave2_addr; - u8 ddc2_pin; - u8 capabilities; - u8 dvo_wiring;/* See DEVICE_WIRE_* above */ - u8 dvo2_wiring; - u16 extended_type; - u8 dvo_function; -} __packed; + u16 device_type; /* See DEVICE_TYPE_* above */ + + union { + u8 device_id[10]; /* ascii string */ + struct { + u8 i2c_speed; + u8 dp_onboard_redriver; /* 158 */ + u8 dp_ondock_redriver; /* 158 */ + u8 hdmi_level_shifter_value:5; /* 169 */ + u8 hdmi_max_data_rate:3; /* 204 */ + u16 dtd_buf_ptr; /* 161 */ + u8 edidless_efp:1; /* 161 */ + u8 compression_enable:1; /* 198 */ + u8 compression_method:1; /* 198 */ + u8 ganged_edp:1; /* 202 */ + u8 reserved0:4; + u8 compression_structure_index:4; /* 198 */ + u8 reserved1:4; + u8 slave_port; /* 202 */ + u8 reserved2; + } __packed; + } __packed; -/* This one contains field offsets that are known to be common for all BDB - * versions. Notice that the meaning of the contents contents may still change, - * but at least the offsets are consistent. */ - -struct common_child_dev_config { - u16 handle; - u16 device_type; - u8 not_common1[12]; - u8 dvo_port; - u8 not_common2[2]; + u16 addin_offset; + u8 dvo_port; /* See DEVICE_PORT_* and DVO_PORT_* above */ + u8 i2c_pin; + u8 slave_addr; u8 ddc_pin; u16 edid_ptr; u8 dvo_cfg; /* See DEVICE_CFG_* above */ - u8 efp_routed:1; - u8 lane_reversal:1; - u8 lspcon:1; - u8 iboost:1; - u8 hpd_invert:1; - u8 flag_reserved:3; - u8 hdmi_support:1; - u8 dp_support:1; - u8 tmds_support:1; - u8 support_reserved:5; - u8 aux_channel; - u8 not_common3[11]; - u8 iboost_level; -} __packed; + union { + struct { + u8 dvo2_port; + u8 i2c2_pin; + u8 slave2_addr; + u8 ddc2_pin; + } __packed; + struct { + u8 efp_routed:1; /* 158 */ + u8 lane_reversal:1; /* 184 */ + u8 lspcon:1; /* 192 */ + u8 iboost:1; /* 196 */ + u8 hpd_invert:1; /* 196 */ + u8 flag_reserved:3; + u8 hdmi_support:1; /* 158 */ + u8 dp_support:1; /* 158 */ + u8 tmds_support:1; /* 158 */ + u8 support_reserved:5; + u8 aux_channel; + u8 dongle_detect; + } __packed; + } __packed; + + u8 pipe_cap:2; + u8 sdvo_stall:1; /* 158 */ + u8 hpd_status:2; + u8 integrated_encoder:1; + u8 capabilities_reserved:2; + u8 dvo_wiring; /* See DEVICE_WIRE_* above */ + + union { + u8 dvo2_wiring; + u8 mipi_bridge_type; /* 171 */ + } __packed; -/* This field changes depending on the BDB version, so the most reliable way to - * read it is by checking the BDB version and reading the raw pointer. */ -union child_device_config { - /* This one is safe to be used anywhere, but the code should still check - * the BDB version. */ - u8 raw[33]; - /* This one should only be kept for legacy code. */ - struct old_child_dev_config old; - /* This one should also be safe to use anywhere, even without version - * checks. */ - struct common_child_dev_config common; + u16 extended_type; + u8 dvo_function; + u8 dp_usb_type_c:1; /* 195 */ + u8 flags2_reserved:7; /* 195 */ + u8 dp_gpio_index; /* 195 */ + u16 dp_gpio_pin_num; /* 195 */ + u8 dp_iboost_level:4; /* 196 */ + u8 hdmi_iboost_level:4; /* 196 */ + u8 dp_max_link_rate:2; /* 216 CNL+ */ + u8 dp_max_link_rate_reserved:6; /* 216 */ } __packed; struct bdb_general_definitions { @@ -585,23 +702,38 @@ struct bdb_driver_features { #define EDP_VSWING_1_2V 3 -struct edp_link_params { +struct edp_fast_link_params { u8 rate:4; u8 lanes:4; u8 preemphasis:4; u8 vswing:4; } __packed; +struct edp_pwm_delays { + u16 pwm_on_to_backlight_enable; + u16 backlight_disable_to_pwm_off; +} __packed; + +struct edp_full_link_params { + u8 preemphasis:4; + u8 vswing:4; +} __packed; + struct bdb_edp { struct edp_power_seq power_seqs[16]; u32 color_depth; - struct edp_link_params link_params[16]; + struct edp_fast_link_params fast_link_params[16]; u32 sdrrs_msa_timing_delay; /* ith bit indicates enabled/disabled for (i+1)th panel */ - u16 edp_s3d_feature; - u16 edp_t3_optimization; - u64 edp_vswing_preemph; /* v173 */ + u16 edp_s3d_feature; /* 162 */ + u16 edp_t3_optimization; /* 165 */ + u64 edp_vswing_preemph; /* 173 */ + u16 fast_link_training; /* 182 */ + u16 dpcd_600h_write_required; /* 185 */ + struct edp_pwm_delays pwm_delays[16]; /* 186 */ + u16 full_link_params_provided; /* 199 */ + struct edp_full_link_params full_link_params[16]; /* 199 */ } __packed; struct psr_table { @@ -745,81 +877,6 @@ struct bdb_psr { #define SWF14_APM_STANDBY 0x1 #define SWF14_APM_RESTORE 0x0 -/* Add the device class for LFP, TV, HDMI */ -#define DEVICE_TYPE_INT_LFP 0x1022 -#define DEVICE_TYPE_INT_TV 0x1009 -#define DEVICE_TYPE_HDMI 0x60D2 -#define DEVICE_TYPE_DP 0x68C6 -#define DEVICE_TYPE_DP_DUAL_MODE 0x60D6 -#define DEVICE_TYPE_eDP 0x78C6 - -#define DEVICE_TYPE_CLASS_EXTENSION (1 << 15) -#define DEVICE_TYPE_POWER_MANAGEMENT (1 << 14) -#define DEVICE_TYPE_HOTPLUG_SIGNALING (1 << 13) -#define DEVICE_TYPE_INTERNAL_CONNECTOR (1 << 12) -#define DEVICE_TYPE_NOT_HDMI_OUTPUT (1 << 11) -#define DEVICE_TYPE_MIPI_OUTPUT (1 << 10) -#define DEVICE_TYPE_COMPOSITE_OUTPUT (1 << 9) -#define DEVICE_TYPE_DUAL_CHANNEL (1 << 8) -#define DEVICE_TYPE_HIGH_SPEED_LINK (1 << 6) -#define DEVICE_TYPE_LVDS_SINGALING (1 << 5) -#define DEVICE_TYPE_TMDS_DVI_SIGNALING (1 << 4) -#define DEVICE_TYPE_VIDEO_SIGNALING (1 << 3) -#define DEVICE_TYPE_DISPLAYPORT_OUTPUT (1 << 2) -#define DEVICE_TYPE_DIGITAL_OUTPUT (1 << 1) -#define DEVICE_TYPE_ANALOG_OUTPUT (1 << 0) - -/* - * Bits we care about when checking for DEVICE_TYPE_eDP - * Depending on the system, the other bits may or may not - * be set for eDP outputs. - */ -#define DEVICE_TYPE_eDP_BITS \ - (DEVICE_TYPE_INTERNAL_CONNECTOR | \ - DEVICE_TYPE_MIPI_OUTPUT | \ - DEVICE_TYPE_COMPOSITE_OUTPUT | \ - DEVICE_TYPE_DUAL_CHANNEL | \ - DEVICE_TYPE_LVDS_SINGALING | \ - DEVICE_TYPE_TMDS_DVI_SIGNALING | \ - DEVICE_TYPE_VIDEO_SIGNALING | \ - DEVICE_TYPE_DISPLAYPORT_OUTPUT | \ - DEVICE_TYPE_ANALOG_OUTPUT) - -#define DEVICE_TYPE_DP_DUAL_MODE_BITS \ - (DEVICE_TYPE_INTERNAL_CONNECTOR | \ - DEVICE_TYPE_MIPI_OUTPUT | \ - DEVICE_TYPE_COMPOSITE_OUTPUT | \ - DEVICE_TYPE_LVDS_SINGALING | \ - DEVICE_TYPE_TMDS_DVI_SIGNALING | \ - DEVICE_TYPE_VIDEO_SIGNALING | \ - DEVICE_TYPE_DISPLAYPORT_OUTPUT | \ - DEVICE_TYPE_DIGITAL_OUTPUT | \ - DEVICE_TYPE_ANALOG_OUTPUT) - -/* define the DVO port for HDMI output type */ -#define DVO_B 1 -#define DVO_C 2 -#define DVO_D 3 - -/* Possible values for the "DVO Port" field for versions >= 155: */ -#define DVO_PORT_HDMIA 0 -#define DVO_PORT_HDMIB 1 -#define DVO_PORT_HDMIC 2 -#define DVO_PORT_HDMID 3 -#define DVO_PORT_LVDS 4 -#define DVO_PORT_TV 5 -#define DVO_PORT_CRT 6 -#define DVO_PORT_DPB 7 -#define DVO_PORT_DPC 8 -#define DVO_PORT_DPD 9 -#define DVO_PORT_DPA 10 -#define DVO_PORT_DPE 11 -#define DVO_PORT_HDMIE 12 -#define DVO_PORT_MIPIA 21 -#define DVO_PORT_MIPIB 22 -#define DVO_PORT_MIPIC 23 -#define DVO_PORT_MIPID 24 - /* Block 52 contains MIPI configuration block * 6 * bdb_mipi_config, followed by 6 pps data block * block below diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/selftests/huge_gem_object.c index c5c7e8efbdd3..a2632df39173 100644 --- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.c @@ -37,8 +37,7 @@ static void huge_free_pages(struct drm_i915_gem_object *obj, kfree(pages); } -static struct sg_table * -huge_get_pages(struct drm_i915_gem_object *obj) +static int huge_get_pages(struct drm_i915_gem_object *obj) { #define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) const unsigned long nreal = obj->scratch / PAGE_SIZE; @@ -49,11 +48,11 @@ huge_get_pages(struct drm_i915_gem_object *obj) pages = kmalloc(sizeof(*pages), GFP); if (!pages) - return ERR_PTR(-ENOMEM); + return -ENOMEM; if (sg_alloc_table(pages, npages, GFP)) { kfree(pages); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } sg = pages->sgl; @@ -81,11 +80,14 @@ huge_get_pages(struct drm_i915_gem_object *obj) if (i915_gem_gtt_prepare_pages(obj, pages)) goto err; - return pages; + __i915_gem_object_set_pages(obj, pages, PAGE_SIZE); + + return 0; err: huge_free_pages(obj, pages); - return ERR_PTR(-ENOMEM); + + return -ENOMEM; #undef GFP } diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c new file mode 100644 index 000000000000..2ea69394f428 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -0,0 +1,1769 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +#include <linux/prime_numbers.h> + +#include "mock_drm.h" +#include "i915_random.h" + +static const unsigned int page_sizes[] = { + I915_GTT_PAGE_SIZE_2M, + I915_GTT_PAGE_SIZE_64K, + I915_GTT_PAGE_SIZE_4K, +}; + +static unsigned int get_largest_page_size(struct drm_i915_private *i915, + u64 rem) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) { + unsigned int page_size = page_sizes[i]; + + if (HAS_PAGE_SIZES(i915, page_size) && rem >= page_size) + return page_size; + } + + return 0; +} + +static void huge_pages_free_pages(struct sg_table *st) +{ + struct scatterlist *sg; + + for (sg = st->sgl; sg; sg = __sg_next(sg)) { + if (sg_page(sg)) + __free_pages(sg_page(sg), get_order(sg->length)); + } + + sg_free_table(st); + kfree(st); +} + +static int get_huge_pages(struct drm_i915_gem_object *obj) +{ +#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) + unsigned int page_mask = obj->mm.page_mask; + struct sg_table *st; + struct scatterlist *sg; + unsigned int sg_page_sizes; + u64 rem; + + st = kmalloc(sizeof(*st), GFP); + if (!st) + return -ENOMEM; + + if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) { + kfree(st); + return -ENOMEM; + } + + rem = obj->base.size; + sg = st->sgl; + st->nents = 0; + sg_page_sizes = 0; + + /* + * Our goal here is simple, we want to greedily fill the object from + * largest to smallest page-size, while ensuring that we use *every* + * page-size as per the given page-mask. + */ + do { + unsigned int bit = ilog2(page_mask); + unsigned int page_size = BIT(bit); + int order = get_order(page_size); + + do { + struct page *page; + + GEM_BUG_ON(order >= MAX_ORDER); + page = alloc_pages(GFP | __GFP_ZERO, order); + if (!page) + goto err; + + sg_set_page(sg, page, page_size, 0); + sg_page_sizes |= page_size; + st->nents++; + + rem -= page_size; + if (!rem) { + sg_mark_end(sg); + break; + } + + sg = __sg_next(sg); + } while ((rem - ((page_size-1) & page_mask)) >= page_size); + + page_mask &= (page_size-1); + } while (page_mask); + + if (i915_gem_gtt_prepare_pages(obj, st)) + goto err; + + obj->mm.madv = I915_MADV_DONTNEED; + + GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask); + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return 0; + +err: + sg_set_page(sg, NULL, 0, 0); + sg_mark_end(sg); + huge_pages_free_pages(st); + + return -ENOMEM; +} + +static void put_huge_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + i915_gem_gtt_finish_pages(obj, pages); + huge_pages_free_pages(pages); + + obj->mm.dirty = false; + obj->mm.madv = I915_MADV_WILLNEED; +} + +static const struct drm_i915_gem_object_ops huge_page_ops = { + .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | + I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = get_huge_pages, + .put_pages = put_huge_pages, +}; + +static struct drm_i915_gem_object * +huge_pages_object(struct drm_i915_private *i915, + u64 size, + unsigned int page_mask) +{ + struct drm_i915_gem_object *obj; + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, BIT(__ffs(page_mask)))); + + if (size >> PAGE_SHIFT > INT_MAX) + return ERR_PTR(-E2BIG); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(i915); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &huge_page_ops); + + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->cache_level = I915_CACHE_NONE; + + obj->mm.page_mask = page_mask; + + return obj; +} + +static int fake_get_huge_pages(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + const u64 max_len = rounddown_pow_of_two(UINT_MAX); + struct sg_table *st; + struct scatterlist *sg; + unsigned int sg_page_sizes; + u64 rem; + + st = kmalloc(sizeof(*st), GFP); + if (!st) + return -ENOMEM; + + if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) { + kfree(st); + return -ENOMEM; + } + + /* Use optimal page sized chunks to fill in the sg table */ + rem = obj->base.size; + sg = st->sgl; + st->nents = 0; + sg_page_sizes = 0; + do { + unsigned int page_size = get_largest_page_size(i915, rem); + unsigned int len = min(page_size * div_u64(rem, page_size), + max_len); + + GEM_BUG_ON(!page_size); + + sg->offset = 0; + sg->length = len; + sg_dma_len(sg) = len; + sg_dma_address(sg) = page_size; + + sg_page_sizes |= len; + + st->nents++; + + rem -= len; + if (!rem) { + sg_mark_end(sg); + break; + } + + sg = sg_next(sg); + } while (1); + + obj->mm.madv = I915_MADV_DONTNEED; + + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return 0; +} + +static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct sg_table *st; + struct scatterlist *sg; + unsigned int page_size; + + st = kmalloc(sizeof(*st), GFP); + if (!st) + return -ENOMEM; + + if (sg_alloc_table(st, 1, GFP)) { + kfree(st); + return -ENOMEM; + } + + sg = st->sgl; + st->nents = 1; + + page_size = get_largest_page_size(i915, obj->base.size); + GEM_BUG_ON(!page_size); + + sg->offset = 0; + sg->length = obj->base.size; + sg_dma_len(sg) = obj->base.size; + sg_dma_address(sg) = page_size; + + obj->mm.madv = I915_MADV_DONTNEED; + + __i915_gem_object_set_pages(obj, st, sg->length); + + return 0; +#undef GFP +} + +static void fake_free_huge_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + sg_free_table(pages); + kfree(pages); +} + +static void fake_put_huge_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + fake_free_huge_pages(obj, pages); + obj->mm.dirty = false; + obj->mm.madv = I915_MADV_WILLNEED; +} + +static const struct drm_i915_gem_object_ops fake_ops = { + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = fake_get_huge_pages, + .put_pages = fake_put_huge_pages, +}; + +static const struct drm_i915_gem_object_ops fake_ops_single = { + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = fake_get_huge_pages_single, + .put_pages = fake_put_huge_pages, +}; + +static struct drm_i915_gem_object * +fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) +{ + struct drm_i915_gem_object *obj; + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); + + if (size >> PAGE_SHIFT > UINT_MAX) + return ERR_PTR(-E2BIG); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(i915); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + + if (single) + i915_gem_object_init(obj, &fake_ops_single); + else + i915_gem_object_init(obj, &fake_ops); + + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->cache_level = I915_CACHE_NONE; + + return obj; +} + +static int igt_check_page_sizes(struct i915_vma *vma) +{ + struct drm_i915_private *i915 = to_i915(vma->obj->base.dev); + unsigned int supported = INTEL_INFO(i915)->page_sizes; + struct drm_i915_gem_object *obj = vma->obj; + int err = 0; + + if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) { + pr_err("unsupported page_sizes.sg=%u, supported=%u\n", + vma->page_sizes.sg & ~supported, supported); + err = -EINVAL; + } + + if (!HAS_PAGE_SIZES(i915, vma->page_sizes.gtt)) { + pr_err("unsupported page_sizes.gtt=%u, supported=%u\n", + vma->page_sizes.gtt & ~supported, supported); + err = -EINVAL; + } + + if (vma->page_sizes.phys != obj->mm.page_sizes.phys) { + pr_err("vma->page_sizes.phys(%u) != obj->mm.page_sizes.phys(%u)\n", + vma->page_sizes.phys, obj->mm.page_sizes.phys); + err = -EINVAL; + } + + if (vma->page_sizes.sg != obj->mm.page_sizes.sg) { + pr_err("vma->page_sizes.sg(%u) != obj->mm.page_sizes.sg(%u)\n", + vma->page_sizes.sg, obj->mm.page_sizes.sg); + err = -EINVAL; + } + + if (obj->mm.page_sizes.gtt) { + pr_err("obj->page_sizes.gtt(%u) should never be set\n", + obj->mm.page_sizes.gtt); + err = -EINVAL; + } + + return err; +} + +static int igt_mock_exhaust_device_supported_pages(void *arg) +{ + struct i915_hw_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->base.i915; + unsigned int saved_mask = INTEL_INFO(i915)->page_sizes; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int i, j, single; + int err; + + /* + * Sanity check creating objects with every valid page support + * combination for our mock device. + */ + + for (i = 1; i < BIT(ARRAY_SIZE(page_sizes)); i++) { + unsigned int combination = 0; + + for (j = 0; j < ARRAY_SIZE(page_sizes); j++) { + if (i & BIT(j)) + combination |= page_sizes[j]; + } + + mkwrite_device_info(i915)->page_sizes = combination; + + for (single = 0; single <= 1; ++single) { + obj = fake_huge_pages_object(i915, combination, !!single); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_device; + } + + if (obj->base.size != combination) { + pr_err("obj->base.size=%zu, expected=%u\n", + obj->base.size, combination); + err = -EINVAL; + goto out_put; + } + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_close; + + err = igt_check_page_sizes(vma); + + if (vma->page_sizes.sg != combination) { + pr_err("page_sizes.sg=%u, expected=%u\n", + vma->page_sizes.sg, combination); + err = -EINVAL; + } + + i915_vma_unpin(vma); + i915_vma_close(vma); + + i915_gem_object_put(obj); + + if (err) + goto out_device; + } + } + + goto out_device; + +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); +out_device: + mkwrite_device_info(i915)->page_sizes = saved_mask; + + return err; +} + +static int igt_mock_ppgtt_misaligned_dma(void *arg) +{ + struct i915_hw_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->base.i915; + unsigned long supported = INTEL_INFO(i915)->page_sizes; + struct drm_i915_gem_object *obj; + int bit; + int err; + + /* + * Sanity check dma misalignment for huge pages -- the dma addresses we + * insert into the paging structures need to always respect the page + * size alignment. + */ + + bit = ilog2(I915_GTT_PAGE_SIZE_64K); + + for_each_set_bit_from(bit, &supported, + ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { + IGT_TIMEOUT(end_time); + unsigned int page_size = BIT(bit); + unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; + unsigned int offset; + unsigned int size = + round_up(page_size, I915_GTT_PAGE_SIZE_2M) << 1; + struct i915_vma *vma; + + obj = fake_huge_pages_object(i915, size, true); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + if (obj->base.size != size) { + pr_err("obj->base.size=%zu, expected=%u\n", + obj->base.size, size); + err = -EINVAL; + goto out_put; + } + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + /* Force the page size for this object */ + obj->mm.page_sizes.sg = page_size; + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_unpin; + } + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + + err = igt_check_page_sizes(vma); + + if (vma->page_sizes.gtt != page_size) { + pr_err("page_sizes.gtt=%u, expected %u\n", + vma->page_sizes.gtt, page_size); + err = -EINVAL; + } + + i915_vma_unpin(vma); + + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + /* + * Try all the other valid offsets until the next + * boundary -- should always fall back to using 4K + * pages. + */ + for (offset = 4096; offset < page_size; offset += 4096) { + err = i915_vma_unbind(vma); + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + err = i915_vma_pin(vma, 0, 0, flags | offset); + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + err = igt_check_page_sizes(vma); + + if (vma->page_sizes.gtt != I915_GTT_PAGE_SIZE_4K) { + pr_err("page_sizes.gtt=%u, expected %lu\n", + vma->page_sizes.gtt, I915_GTT_PAGE_SIZE_4K); + err = -EINVAL; + } + + i915_vma_unpin(vma); + + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + if (igt_timeout(end_time, + "%s timed out at offset %x with page-size %x\n", + __func__, offset, page_size)) + break; + } + + i915_vma_close(vma); + + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + + return 0; + +out_unpin: + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} + +static void close_object_list(struct list_head *objects, + struct i915_hw_ppgtt *ppgtt) +{ + struct drm_i915_gem_object *obj, *on; + + list_for_each_entry_safe(obj, on, objects, st_link) { + struct i915_vma *vma; + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (!IS_ERR(vma)) + i915_vma_close(vma); + + list_del(&obj->st_link); + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } +} + +static int igt_mock_ppgtt_huge_fill(void *arg) +{ + struct i915_hw_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->base.i915; + unsigned long max_pages = ppgtt->base.total >> PAGE_SHIFT; + unsigned long page_num; + bool single = false; + LIST_HEAD(objects); + IGT_TIMEOUT(end_time); + int err = -ENODEV; + + for_each_prime_number_from(page_num, 1, max_pages) { + struct drm_i915_gem_object *obj; + u64 size = page_num << PAGE_SHIFT; + struct i915_vma *vma; + unsigned int expected_gtt = 0; + int i; + + obj = fake_huge_pages_object(i915, size, single); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + break; + } + + if (obj->base.size != size) { + pr_err("obj->base.size=%zd, expected=%llu\n", + obj->base.size, size); + i915_gem_object_put(obj); + err = -EINVAL; + break; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + break; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + break; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + break; + + err = igt_check_page_sizes(vma); + if (err) { + i915_vma_unpin(vma); + break; + } + + /* + * Figure out the expected gtt page size knowing that we go from + * largest to smallest page size sg chunks, and that we align to + * the largest page size. + */ + for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) { + unsigned int page_size = page_sizes[i]; + + if (HAS_PAGE_SIZES(i915, page_size) && + size >= page_size) { + expected_gtt |= page_size; + size &= page_size-1; + } + } + + GEM_BUG_ON(!expected_gtt); + GEM_BUG_ON(size); + + if (expected_gtt & I915_GTT_PAGE_SIZE_4K) + expected_gtt &= ~I915_GTT_PAGE_SIZE_64K; + + i915_vma_unpin(vma); + + if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { + if (!IS_ALIGNED(vma->node.start, + I915_GTT_PAGE_SIZE_2M)) { + pr_err("node.start(%llx) not aligned to 2M\n", + vma->node.start); + err = -EINVAL; + break; + } + + if (!IS_ALIGNED(vma->node.size, + I915_GTT_PAGE_SIZE_2M)) { + pr_err("node.size(%llx) not aligned to 2M\n", + vma->node.size); + err = -EINVAL; + break; + } + } + + if (vma->page_sizes.gtt != expected_gtt) { + pr_err("gtt=%u, expected=%u, size=%zd, single=%s\n", + vma->page_sizes.gtt, expected_gtt, + obj->base.size, yesno(!!single)); + err = -EINVAL; + break; + } + + if (igt_timeout(end_time, + "%s timed out at size %zd\n", + __func__, obj->base.size)) + break; + + single = !single; + } + + close_object_list(&objects, ppgtt); + + if (err == -ENOMEM || err == -ENOSPC) + err = 0; + + return err; +} + +static int igt_mock_ppgtt_64K(void *arg) +{ + struct i915_hw_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->base.i915; + struct drm_i915_gem_object *obj; + const struct object_info { + unsigned int size; + unsigned int gtt; + unsigned int offset; + } objects[] = { + /* Cases with forced padding/alignment */ + { + .size = SZ_64K, + .gtt = I915_GTT_PAGE_SIZE_64K, + .offset = 0, + }, + { + .size = SZ_64K + SZ_4K, + .gtt = I915_GTT_PAGE_SIZE_4K, + .offset = 0, + }, + { + .size = SZ_64K - SZ_4K, + .gtt = I915_GTT_PAGE_SIZE_4K, + .offset = 0, + }, + { + .size = SZ_2M, + .gtt = I915_GTT_PAGE_SIZE_64K, + .offset = 0, + }, + { + .size = SZ_2M - SZ_4K, + .gtt = I915_GTT_PAGE_SIZE_4K, + .offset = 0, + }, + { + .size = SZ_2M + SZ_4K, + .gtt = I915_GTT_PAGE_SIZE_64K | I915_GTT_PAGE_SIZE_4K, + .offset = 0, + }, + { + .size = SZ_2M + SZ_64K, + .gtt = I915_GTT_PAGE_SIZE_64K, + .offset = 0, + }, + { + .size = SZ_2M - SZ_64K, + .gtt = I915_GTT_PAGE_SIZE_64K, + .offset = 0, + }, + /* Try without any forced padding/alignment */ + { + .size = SZ_64K, + .offset = SZ_2M, + .gtt = I915_GTT_PAGE_SIZE_4K, + }, + { + .size = SZ_128K, + .offset = SZ_2M - SZ_64K, + .gtt = I915_GTT_PAGE_SIZE_4K, + }, + }; + struct i915_vma *vma; + int i, single; + int err; + + /* + * Sanity check some of the trickiness with 64K pages -- either we can + * safely mark the whole page-table(2M block) as 64K, or we have to + * always fallback to 4K. + */ + + if (!HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K)) + return 0; + + for (i = 0; i < ARRAY_SIZE(objects); ++i) { + unsigned int size = objects[i].size; + unsigned int expected_gtt = objects[i].gtt; + unsigned int offset = objects[i].offset; + unsigned int flags = PIN_USER; + + for (single = 0; single <= 1; single++) { + obj = fake_huge_pages_object(i915, size, !!single); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_object_put; + + /* + * Disable 2M pages -- We only want to use 64K/4K pages + * for this test. + */ + obj->mm.page_sizes.sg &= ~I915_GTT_PAGE_SIZE_2M; + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_object_unpin; + } + + if (offset) + flags |= PIN_OFFSET_FIXED | offset; + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + goto out_vma_close; + + err = igt_check_page_sizes(vma); + if (err) + goto out_vma_unpin; + + if (!offset && vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { + if (!IS_ALIGNED(vma->node.start, + I915_GTT_PAGE_SIZE_2M)) { + pr_err("node.start(%llx) not aligned to 2M\n", + vma->node.start); + err = -EINVAL; + goto out_vma_unpin; + } + + if (!IS_ALIGNED(vma->node.size, + I915_GTT_PAGE_SIZE_2M)) { + pr_err("node.size(%llx) not aligned to 2M\n", + vma->node.size); + err = -EINVAL; + goto out_vma_unpin; + } + } + + if (vma->page_sizes.gtt != expected_gtt) { + pr_err("gtt=%u, expected=%u, i=%d, single=%s\n", + vma->page_sizes.gtt, expected_gtt, i, + yesno(!!single)); + err = -EINVAL; + goto out_vma_unpin; + } + + i915_vma_unpin(vma); + i915_vma_close(vma); + + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + } + + return 0; + +out_vma_unpin: + i915_vma_unpin(vma); +out_vma_close: + i915_vma_close(vma); +out_object_unpin: + i915_gem_object_unpin_pages(obj); +out_object_put: + i915_gem_object_put(obj); + + return err; +} + +static struct i915_vma * +gpu_write_dw(struct i915_vma *vma, u64 offset, u32 val) +{ + struct drm_i915_private *i915 = to_i915(vma->obj->base.dev); + const int gen = INTEL_GEN(vma->vm->i915); + unsigned int count = vma->size >> PAGE_SHIFT; + struct drm_i915_gem_object *obj; + struct i915_vma *batch; + unsigned int size; + u32 *cmd; + int n; + int err; + + size = (1 + 4 * count) * sizeof(u32); + size = round_up(size, PAGE_SIZE); + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + offset += vma->node.start; + + for (n = 0; n < count; n++) { + if (gen >= 8) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4; + *cmd++ = lower_32_bits(offset); + *cmd++ = upper_32_bits(offset); + *cmd++ = val; + } else if (gen >= 4) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4 | + (gen < 6 ? 1 << 22 : 0); + *cmd++ = 0; + *cmd++ = offset; + *cmd++ = val; + } else { + *cmd++ = MI_STORE_DWORD_IMM | 1 << 22; + *cmd++ = offset; + *cmd++ = val; + } + + offset += PAGE_SIZE; + } + + *cmd = MI_BATCH_BUFFER_END; + + i915_gem_object_unpin_map(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + goto err; + + batch = i915_vma_instance(obj, vma->vm, NULL); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err; + } + + err = i915_vma_pin(batch, 0, 0, PIN_USER); + if (err) + goto err; + + return batch; + +err: + i915_gem_object_put(obj); + + return ERR_PTR(err); +} + +static int gpu_write(struct i915_vma *vma, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + u32 dword, + u32 value) +{ + struct drm_i915_gem_request *rq; + struct i915_vma *batch; + int flags = 0; + int err; + + GEM_BUG_ON(!intel_engine_can_store_dword(engine)); + + err = i915_gem_object_set_to_gtt_domain(vma->obj, true); + if (err) + return err; + + rq = i915_gem_request_alloc(engine, ctx); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + batch = gpu_write_dw(vma, dword * sizeof(u32), value); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err_request; + } + + i915_vma_move_to_active(batch, rq, 0); + i915_gem_object_set_active_reference(batch->obj); + i915_vma_unpin(batch); + i915_vma_close(batch); + + err = engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + flags); + if (err) + goto err_request; + + i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + + reservation_object_lock(vma->resv, NULL); + reservation_object_add_excl_fence(vma->resv, &rq->fence); + reservation_object_unlock(vma->resv); + +err_request: + __i915_add_request(rq, err == 0); + + return err; +} + +static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) +{ + unsigned int needs_flush; + unsigned long n; + int err; + + err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush); + if (err) + return err; + + for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) { + u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n)); + + if (needs_flush & CLFLUSH_BEFORE) + drm_clflush_virt_range(ptr, PAGE_SIZE); + + if (ptr[dword] != val) { + pr_err("n=%lu ptr[%u]=%u, val=%u\n", + n, dword, ptr[dword], val); + kunmap_atomic(ptr); + err = -EINVAL; + break; + } + + kunmap_atomic(ptr); + } + + i915_gem_obj_finish_shmem_access(obj); + + return err; +} + +static int __igt_write_huge(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct drm_i915_gem_object *obj, + u64 size, u64 offset, + u32 dword, u32 val) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; + struct i915_vma *vma; + int err; + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_vma_unbind(vma); + if (err) + goto out_vma_close; + + err = i915_vma_pin(vma, size, 0, flags | offset); + if (err) { + /* + * The ggtt may have some pages reserved so + * refrain from erroring out. + */ + if (err == -ENOSPC && i915_is_ggtt(vm)) + err = 0; + + goto out_vma_close; + } + + err = igt_check_page_sizes(vma); + if (err) + goto out_vma_unpin; + + err = gpu_write(vma, ctx, engine, dword, val); + if (err) { + pr_err("gpu-write failed at offset=%llx\n", offset); + goto out_vma_unpin; + } + + err = cpu_check(obj, dword, val); + if (err) { + pr_err("cpu-check failed at offset=%llx\n", offset); + goto out_vma_unpin; + } + +out_vma_unpin: + i915_vma_unpin(vma); +out_vma_close: + i915_vma_close(vma); + + return err; +} + +static int igt_write_huge(struct i915_gem_context *ctx, + struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + static struct intel_engine_cs *engines[I915_NUM_ENGINES]; + struct intel_engine_cs *engine; + I915_RND_STATE(prng); + IGT_TIMEOUT(end_time); + unsigned int max_page_size; + unsigned int id; + u64 max; + u64 num; + u64 size; + int *order; + int i, n; + int err = 0; + + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + + size = obj->base.size; + if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) + size = round_up(size, I915_GTT_PAGE_SIZE_2M); + + max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg); + max = div_u64((vm->total - size), max_page_size); + + n = 0; + for_each_engine(engine, i915, id) { + if (!intel_engine_can_store_dword(engine)) { + pr_info("store-dword-imm not supported on engine=%u\n", id); + continue; + } + engines[n++] = engine; + } + + if (!n) + return 0; + + /* + * To keep things interesting when alternating between engines in our + * randomized order, lets also make feeding to the same engine a few + * times in succession a possibility by enlarging the permutation array. + */ + order = i915_random_order(n * I915_NUM_ENGINES, &prng); + if (!order) + return -ENOMEM; + + /* + * Try various offsets in an ascending/descending fashion until we + * timeout -- we want to avoid issues hidden by effectively always using + * offset = 0. + */ + i = 0; + for_each_prime_number_from(num, 0, max) { + u64 offset_low = num * max_page_size; + u64 offset_high = (max - num) * max_page_size; + u32 dword = offset_in_page(num) / 4; + + engine = engines[order[i] % n]; + i = (i + 1) % (n * I915_NUM_ENGINES); + + err = __igt_write_huge(ctx, engine, obj, size, offset_low, dword, num + 1); + if (err) + break; + + err = __igt_write_huge(ctx, engine, obj, size, offset_high, dword, num + 1); + if (err) + break; + + if (igt_timeout(end_time, + "%s timed out on engine=%u, offset_low=%llx offset_high=%llx, max_page_size=%x\n", + __func__, engine->id, offset_low, offset_high, max_page_size)) + break; + } + + kfree(order); + + return err; +} + +static int igt_ppgtt_exhaust_huge(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + unsigned long supported = INTEL_INFO(i915)->page_sizes; + static unsigned int pages[ARRAY_SIZE(page_sizes)]; + struct drm_i915_gem_object *obj; + unsigned int size_mask; + unsigned int page_mask; + int n, i; + int err = -ENODEV; + + if (supported == I915_GTT_PAGE_SIZE_4K) + return 0; + + /* + * Sanity check creating objects with a varying mix of page sizes -- + * ensuring that our writes lands in the right place. + */ + + n = 0; + for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) + pages[n++] = BIT(i); + + for (size_mask = 2; size_mask < BIT(n); size_mask++) { + unsigned int size = 0; + + for (i = 0; i < n; i++) { + if (size_mask & BIT(i)) + size |= pages[i]; + } + + /* + * For our page mask we want to enumerate all the page-size + * combinations which will fit into our chosen object size. + */ + for (page_mask = 2; page_mask <= size_mask; page_mask++) { + unsigned int page_sizes = 0; + + for (i = 0; i < n; i++) { + if (page_mask & BIT(i)) + page_sizes |= pages[i]; + } + + /* + * Ensure that we can actually fill the given object + * with our chosen page mask. + */ + if (!IS_ALIGNED(size, BIT(__ffs(page_sizes)))) + continue; + + obj = huge_pages_object(i915, size, page_sizes); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_device; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + + if (err == -ENOMEM) { + pr_info("unable to get pages, size=%u, pages=%u\n", + size, page_sizes); + err = 0; + break; + } + + pr_err("pin_pages failed, size=%u, pages=%u\n", + size_mask, page_mask); + + goto out_device; + } + + /* Force the page-size for the gtt insertion */ + obj->mm.page_sizes.sg = page_sizes; + + err = igt_write_huge(ctx, obj); + if (err) { + pr_err("exhaust write-huge failed with size=%u\n", + size); + goto out_unpin; + } + + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + } + + goto out_device; + +out_unpin: + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); +out_device: + mkwrite_device_info(i915)->page_sizes = supported; + + return err; +} + +static int igt_ppgtt_internal_huge(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_gem_object *obj; + static const unsigned int sizes[] = { + SZ_64K, + SZ_128K, + SZ_256K, + SZ_512K, + SZ_1M, + SZ_2M, + }; + int i; + int err; + + /* + * Sanity check that the HW uses huge pages correctly through internal + * -- ensure that our writes land in the right place. + */ + + for (i = 0; i < ARRAY_SIZE(sizes); ++i) { + unsigned int size = sizes[i]; + + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) { + pr_info("internal unable to allocate huge-page(s) with size=%u\n", + size); + goto out_unpin; + } + + err = igt_write_huge(ctx, obj); + if (err) { + pr_err("internal write-huge failed with size=%u\n", + size); + goto out_unpin; + } + + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + + return 0; + +out_unpin: + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} + +static inline bool igt_can_allocate_thp(struct drm_i915_private *i915) +{ + return i915->mm.gemfs && has_transparent_hugepage(); +} + +static int igt_ppgtt_gemfs_huge(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_gem_object *obj; + static const unsigned int sizes[] = { + SZ_2M, + SZ_4M, + SZ_8M, + SZ_16M, + SZ_32M, + }; + int i; + int err; + + /* + * Sanity check that the HW uses huge pages correctly through gemfs -- + * ensure that our writes land in the right place. + */ + + if (!igt_can_allocate_thp(i915)) { + pr_info("missing THP support, skipping\n"); + return 0; + } + + for (i = 0; i < ARRAY_SIZE(sizes); ++i) { + unsigned int size = sizes[i]; + + obj = i915_gem_object_create(i915, size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { + pr_info("finishing test early, gemfs unable to allocate huge-page(s) with size=%u\n", + size); + goto out_unpin; + } + + err = igt_write_huge(ctx, obj); + if (err) { + pr_err("gemfs write-huge failed with size=%u\n", + size); + goto out_unpin; + } + + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + + return 0; + +out_unpin: + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} + +static int igt_ppgtt_pin_update(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *dev_priv = ctx->i915; + unsigned long supported = INTEL_INFO(dev_priv)->page_sizes; + struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; + int first, last; + int err; + + /* + * Make sure there's no funny business when doing a PIN_UPDATE -- in the + * past we had a subtle issue with being able to incorrectly do multiple + * alloc va ranges on the same object when doing a PIN_UPDATE, which + * resulted in some pretty nasty bugs, though only when using + * huge-gtt-pages. + */ + + if (!USES_FULL_48BIT_PPGTT(dev_priv)) { + pr_info("48b PPGTT not supported, skipping\n"); + return 0; + } + + first = ilog2(I915_GTT_PAGE_SIZE_64K); + last = ilog2(I915_GTT_PAGE_SIZE_2M); + + for_each_set_bit_from(first, &supported, last + 1) { + unsigned int page_size = BIT(first); + + obj = i915_gem_object_create_internal(dev_priv, page_size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, SZ_2M, 0, flags); + if (err) + goto out_close; + + if (vma->page_sizes.sg < page_size) { + pr_info("Unable to allocate page-size %x, finishing test early\n", + page_size); + goto out_unpin; + } + + err = igt_check_page_sizes(vma); + if (err) + goto out_unpin; + + if (vma->page_sizes.gtt != page_size) { + dma_addr_t addr = i915_gem_object_get_dma_address(obj, 0); + + /* + * The only valid reason for this to ever fail would be + * if the dma-mapper screwed us over when we did the + * dma_map_sg(), since it has the final say over the dma + * address. + */ + if (IS_ALIGNED(addr, page_size)) { + pr_err("page_sizes.gtt=%u, expected=%u\n", + vma->page_sizes.gtt, page_size); + err = -EINVAL; + } else { + pr_info("dma address misaligned, finishing test early\n"); + } + + goto out_unpin; + } + + err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE); + if (err) + goto out_unpin; + + i915_vma_unpin(vma); + i915_vma_close(vma); + + i915_gem_object_put(obj); + } + + obj = i915_gem_object_create_internal(dev_priv, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, &ppgtt->base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + goto out_close; + + /* + * Make sure we don't end up with something like where the pde is still + * pointing to the 2M page, and the pt we just filled-in is dangling -- + * we can check this by writing to the first page where it would then + * land in the now stale 2M page. + */ + + err = gpu_write(vma, ctx, dev_priv->engine[RCS], 0, 0xdeadbeaf); + if (err) + goto out_unpin; + + err = cpu_check(obj, 0, 0xdeadbeaf); + +out_unpin: + i915_vma_unpin(vma); +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); + + return err; +} + +static int igt_tmpfs_fallback(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + struct vfsmount *gemfs = i915->mm.gemfs; + struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *vaddr; + int err = 0; + + /* + * Make sure that we don't burst into a ball of flames upon falling back + * to tmpfs, which we rely on if on the off-chance we encouter a failure + * when setting up gemfs. + */ + + i915->mm.gemfs = NULL; + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_restore; + } + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out_put; + } + *vaddr = 0xdeadbeaf; + + i915_gem_object_unpin_map(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_close; + + err = igt_check_page_sizes(vma); + + i915_vma_unpin(vma); +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); +out_restore: + i915->mm.gemfs = gemfs; + + return err; +} + +static int igt_shrink_thp(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned int flags = PIN_USER; + int err; + + /* + * Sanity check shrinking huge-paged object -- make sure nothing blows + * up. + */ + + if (!igt_can_allocate_thp(i915)) { + pr_info("missing THP support, skipping\n"); + return 0; + } + + obj = i915_gem_object_create(i915, SZ_2M); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + goto out_close; + + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { + pr_info("failed to allocate THP, finishing test early\n"); + goto out_unpin; + } + + err = igt_check_page_sizes(vma); + if (err) + goto out_unpin; + + err = gpu_write(vma, ctx, i915->engine[RCS], 0, 0xdeadbeaf); + if (err) + goto out_unpin; + + i915_vma_unpin(vma); + + /* + * Now that the pages are *unpinned* shrink-all should invoke + * shmem to truncate our pages. + */ + i915_gem_shrink_all(i915); + if (i915_gem_object_has_pages(obj)) { + pr_err("shrink-all didn't truncate the pages\n"); + err = -EINVAL; + goto out_close; + } + + if (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys) { + pr_err("residual page-size bits left\n"); + err = -EINVAL; + goto out_close; + } + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + goto out_close; + + err = cpu_check(obj, 0, 0xdeadbeaf); + +out_unpin: + i915_vma_unpin(vma); +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); + + return err; +} + +int i915_gem_huge_page_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_mock_exhaust_device_supported_pages), + SUBTEST(igt_mock_ppgtt_misaligned_dma), + SUBTEST(igt_mock_ppgtt_huge_fill), + SUBTEST(igt_mock_ppgtt_64K), + }; + int saved_ppgtt = i915_modparams.enable_ppgtt; + struct drm_i915_private *dev_priv; + struct pci_dev *pdev; + struct i915_hw_ppgtt *ppgtt; + int err; + + dev_priv = mock_gem_device(); + if (!dev_priv) + return -ENOMEM; + + /* Pretend to be a device which supports the 48b PPGTT */ + i915_modparams.enable_ppgtt = 3; + + pdev = dev_priv->drm.pdev; + dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(39)); + + mutex_lock(&dev_priv->drm.struct_mutex); + ppgtt = i915_ppgtt_create(dev_priv, ERR_PTR(-ENODEV), "mock"); + if (IS_ERR(ppgtt)) { + err = PTR_ERR(ppgtt); + goto out_unlock; + } + + if (!i915_vm_is_48bit(&ppgtt->base)) { + pr_err("failed to create 48b PPGTT\n"); + err = -EINVAL; + goto out_close; + } + + /* If we were ever hit this then it's time to mock the 64K scratch */ + if (!i915_vm_has_scratch_64K(&ppgtt->base)) { + pr_err("PPGTT missing 64K scratch page\n"); + err = -EINVAL; + goto out_close; + } + + err = i915_subtests(tests, ppgtt); + +out_close: + i915_ppgtt_close(&ppgtt->base); + i915_ppgtt_put(ppgtt); + +out_unlock: + mutex_unlock(&dev_priv->drm.struct_mutex); + + i915_modparams.enable_ppgtt = saved_ppgtt; + + drm_dev_unref(&dev_priv->drm); + + return err; +} + +int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_shrink_thp), + SUBTEST(igt_ppgtt_pin_update), + SUBTEST(igt_tmpfs_fallback), + SUBTEST(igt_ppgtt_exhaust_huge), + SUBTEST(igt_ppgtt_gemfs_huge), + SUBTEST(igt_ppgtt_internal_huge), + }; + struct drm_file *file; + struct i915_gem_context *ctx; + int err; + + if (!USES_PPGTT(dev_priv)) { + pr_info("PPGTT not supported, skipping live-selftests\n"); + return 0; + } + + file = mock_file(dev_priv); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&dev_priv->drm.struct_mutex); + intel_runtime_pm_get(dev_priv); + + ctx = live_context(dev_priv, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_unlock; + } + + err = i915_subtests(tests, ctx); + +out_unlock: + intel_runtime_pm_put(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); + + mock_file_free(dev_priv, file); + + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index 35d778d70626..7a0d1e17c1ad 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -33,7 +33,7 @@ static int cpu_set(struct drm_i915_gem_object *obj, { unsigned int needs_clflush; struct page *page; - typeof(v) *map; + u32 *map; int err; err = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); @@ -59,7 +59,7 @@ static int cpu_get(struct drm_i915_gem_object *obj, { unsigned int needs_clflush; struct page *page; - typeof(v) map; + u32 *map; int err; err = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); @@ -82,7 +82,7 @@ static int gtt_set(struct drm_i915_gem_object *obj, u32 v) { struct i915_vma *vma; - typeof(v) *map; + u32 __iomem *map; int err; err = i915_gem_object_set_to_gtt_domain(obj, true); @@ -98,7 +98,7 @@ static int gtt_set(struct drm_i915_gem_object *obj, if (IS_ERR(map)) return PTR_ERR(map); - map[offset / sizeof(*map)] = v; + iowrite32(v, &map[offset / sizeof(*map)]); i915_vma_unpin_iomap(vma); return 0; @@ -109,7 +109,7 @@ static int gtt_get(struct drm_i915_gem_object *obj, u32 *v) { struct i915_vma *vma; - typeof(v) map; + u32 __iomem *map; int err; err = i915_gem_object_set_to_gtt_domain(obj, false); @@ -125,7 +125,7 @@ static int gtt_get(struct drm_i915_gem_object *obj, if (IS_ERR(map)) return PTR_ERR(map); - *v = map[offset / sizeof(*map)]; + *v = ioread32(&map[offset / sizeof(*map)]); i915_vma_unpin_iomap(vma); return 0; @@ -135,7 +135,7 @@ static int wc_set(struct drm_i915_gem_object *obj, unsigned long offset, u32 v) { - typeof(v) *map; + u32 *map; int err; err = i915_gem_object_set_to_wc_domain(obj, true); @@ -156,7 +156,7 @@ static int wc_get(struct drm_i915_gem_object *obj, unsigned long offset, u32 *v) { - typeof(v) map; + u32 *map; int err; err = i915_gem_object_set_to_wc_domain(obj, false); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index fb0a58fc8348..56a803d11916 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -158,14 +158,6 @@ static int gpu_fill(struct drm_i915_gem_object *obj, goto err_batch; } - err = engine->emit_flush(rq, EMIT_INVALIDATE); - if (err) - goto err_request; - - err = i915_switch_context(rq); - if (err) - goto err_request; - flags = 0; if (INTEL_GEN(vm->i915) <= 5) flags |= I915_DISPATCH_SECURE; @@ -272,6 +264,23 @@ out_unmap: return err; } +static int file_add_object(struct drm_file *file, + struct drm_i915_gem_object *obj) +{ + int err; + + GEM_BUG_ON(obj->base.handle_count); + + /* tie the object to the drm_file for easy reaping */ + err = idr_alloc(&file->object_idr, &obj->base, 1, 0, GFP_KERNEL); + if (err < 0) + return err; + + i915_gem_object_get(obj); + obj->base.handle_count++; + return 0; +} + static struct drm_i915_gem_object * create_test_object(struct i915_gem_context *ctx, struct drm_file *file, @@ -281,7 +290,6 @@ create_test_object(struct i915_gem_context *ctx, struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &ctx->i915->ggtt.base; u64 size; - u32 handle; int err; size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); @@ -291,8 +299,7 @@ create_test_object(struct i915_gem_context *ctx, if (IS_ERR(obj)) return obj; - /* tie the handle to the drm_file for easy reaping */ - err = drm_gem_handle_create(file, &obj->base, &handle); + err = file_add_object(file, obj); i915_gem_object_put(obj); if (err) return ERR_PTR(err); @@ -325,7 +332,7 @@ static int igt_ctx_exec(void *arg) LIST_HEAD(objects); unsigned long ncontexts, ndwords, dw; bool first_shared_gtt = true; - int err; + int err = -ENODEV; /* Create a few different contexts (with different mm) and write * through each ctx/mm using the GPU making sure those writes end @@ -369,7 +376,9 @@ static int igt_ctx_exec(void *arg) } } + intel_runtime_pm_get(i915); err = gpu_fill(obj, ctx, engine, dw); + intel_runtime_pm_put(i915); if (err) { pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), @@ -417,7 +426,7 @@ static int fake_aliasing_ppgtt_enable(struct drm_i915_private *i915) if (err) return err; - list_for_each_entry(obj, &i915->mm.bound_list, global_link) { + list_for_each_entry(obj, &i915->mm.bound_list, mm.link) { struct i915_vma *vma; vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 5ea373221f49..e1ddad635d73 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -24,6 +24,9 @@ #include "../i915_selftest.h" +#include "lib_sw_fence.h" +#include "mock_context.h" +#include "mock_drm.h" #include "mock_gem_device.h" static int populate_ggtt(struct drm_i915_private *i915) @@ -47,7 +50,7 @@ static int populate_ggtt(struct drm_i915_private *i915) if (!list_empty(&i915->mm.unbound_list)) { size = 0; - list_for_each_entry(obj, &i915->mm.unbound_list, global_link) + list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) size++; pr_err("Found %lld objects unbound!\n", size); @@ -74,10 +77,10 @@ static void cleanup_objects(struct drm_i915_private *i915) { struct drm_i915_gem_object *obj, *on; - list_for_each_entry_safe(obj, on, &i915->mm.unbound_list, global_link) + list_for_each_entry_safe(obj, on, &i915->mm.unbound_list, mm.link) i915_gem_object_put(obj); - list_for_each_entry_safe(obj, on, &i915->mm.bound_list, global_link) + list_for_each_entry_safe(obj, on, &i915->mm.bound_list, mm.link) i915_gem_object_put(obj); mutex_unlock(&i915->drm.struct_mutex); @@ -149,8 +152,6 @@ static int igt_overcommit(void *arg) goto cleanup; } - list_move(&obj->global_link, &i915->mm.unbound_list); - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); if (!IS_ERR(vma) || PTR_ERR(vma) != -ENOSPC) { pr_err("Failed to evict+insert, i915_gem_object_ggtt_pin returned err=%d\n", (int)PTR_ERR(vma)); @@ -325,6 +326,150 @@ cleanup: return err; } +static int igt_evict_contexts(void *arg) +{ + const u64 PRETEND_GGTT_SIZE = 16ull << 20; + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct reserved { + struct drm_mm_node node; + struct reserved *next; + } *reserved = NULL; + struct drm_mm_node hole; + unsigned long count; + int err; + + /* + * The purpose of this test is to verify that we will trigger an + * eviction in the GGTT when constructing a request that requires + * additional space in the GGTT for pinning the context. This space + * is not directly tied to the request so reclaiming it requires + * extra work. + * + * As such this test is only meaningful for full-ppgtt environments + * where the GTT space of the request is separate from the GGTT + * allocation required to build the request. + */ + if (!USES_FULL_PPGTT(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + intel_runtime_pm_get(i915); + + /* Reserve a block so that we know we have enough to fit a few rq */ + memset(&hole, 0, sizeof(hole)); + err = i915_gem_gtt_insert(&i915->ggtt.base, &hole, + PRETEND_GGTT_SIZE, 0, I915_COLOR_UNEVICTABLE, + 0, i915->ggtt.base.total, + PIN_NOEVICT); + if (err) + goto out_locked; + + /* Make the GGTT appear small by filling it with unevictable nodes */ + count = 0; + do { + struct reserved *r; + + r = kcalloc(1, sizeof(*r), GFP_KERNEL); + if (!r) { + err = -ENOMEM; + goto out_locked; + } + + if (i915_gem_gtt_insert(&i915->ggtt.base, &r->node, + 1ul << 20, 0, I915_COLOR_UNEVICTABLE, + 0, i915->ggtt.base.total, + PIN_NOEVICT)) { + kfree(r); + break; + } + + r->next = reserved; + reserved = r; + + count++; + } while (1); + drm_mm_remove_node(&hole); + mutex_unlock(&i915->drm.struct_mutex); + pr_info("Filled GGTT with %lu 1MiB nodes\n", count); + + /* Overfill the GGTT with context objects and so try to evict one. */ + for_each_engine(engine, i915, id) { + struct i915_sw_fence fence; + struct drm_file *file; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + count = 0; + mutex_lock(&i915->drm.struct_mutex); + onstack_fence_init(&fence); + do { + struct drm_i915_gem_request *rq; + struct i915_gem_context *ctx; + + ctx = live_context(i915, file); + if (!ctx) + break; + + /* We will need some GGTT space for the rq's context */ + igt_evict_ctl.fail_if_busy = true; + rq = i915_gem_request_alloc(engine, ctx); + igt_evict_ctl.fail_if_busy = false; + + if (IS_ERR(rq)) { + /* When full, fail_if_busy will trigger EBUSY */ + if (PTR_ERR(rq) != -EBUSY) { + pr_err("Unexpected error from request alloc (ctx hw id %u, on %s): %d\n", + ctx->hw_id, engine->name, + (int)PTR_ERR(rq)); + err = PTR_ERR(rq); + } + break; + } + + /* Keep every request/ctx pinned until we are full */ + err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, + &fence, + GFP_KERNEL); + if (err < 0) + break; + + i915_add_request(rq); + count++; + err = 0; + } while(1); + mutex_unlock(&i915->drm.struct_mutex); + + onstack_fence_fini(&fence); + pr_info("Submitted %lu contexts/requests on %s\n", + count, engine->name); + + mock_file_free(i915, file); + if (err) + break; + } + + mutex_lock(&i915->drm.struct_mutex); +out_locked: + while (reserved) { + struct reserved *next = reserved->next; + + drm_mm_remove_node(&reserved->node); + kfree(reserved); + + reserved = next; + } + if (drm_mm_node_allocated(&hole)) + drm_mm_remove_node(&hole); + intel_runtime_pm_put(i915); + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} + int i915_gem_evict_mock_selftests(void) { static const struct i915_subtest tests[] = { @@ -348,3 +493,12 @@ int i915_gem_evict_mock_selftests(void) drm_dev_unref(&i915->drm); return err; } + +int i915_gem_evict_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_evict_contexts), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 6b132caffa18..4a28d713a7d8 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -39,25 +39,26 @@ static void fake_free_pages(struct drm_i915_gem_object *obj, kfree(pages); } -static struct sg_table * -fake_get_pages(struct drm_i915_gem_object *obj) +static int fake_get_pages(struct drm_i915_gem_object *obj) { #define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) #define PFN_BIAS 0x1000 struct sg_table *pages; struct scatterlist *sg; + unsigned int sg_page_sizes; typeof(obj->base.size) rem; pages = kmalloc(sizeof(*pages), GFP); if (!pages) - return ERR_PTR(-ENOMEM); + return -ENOMEM; rem = round_up(obj->base.size, BIT(31)) >> 31; if (sg_alloc_table(pages, rem, GFP)) { kfree(pages); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } + sg_page_sizes = 0; rem = obj->base.size; for (sg = pages->sgl; sg; sg = sg_next(sg)) { unsigned long len = min_t(typeof(rem), rem, BIT(31)); @@ -66,13 +67,17 @@ fake_get_pages(struct drm_i915_gem_object *obj) sg_set_page(sg, pfn_to_page(PFN_BIAS), len, 0); sg_dma_address(sg) = page_to_phys(sg_page(sg)); sg_dma_len(sg) = len; + sg_page_sizes |= len; rem -= len; } GEM_BUG_ON(rem); obj->mm.madv = I915_MADV_DONTNEED; - return pages; + + __i915_gem_object_set_pages(obj, pages, sg_page_sizes); + + return 0; #undef GFP } @@ -211,13 +216,21 @@ static int lowlevel_hole(struct drm_i915_private *i915, hole_size = (hole_end - hole_start) >> size; if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32)) hole_size = KMALLOC_MAX_SIZE / sizeof(u32); - count = hole_size; + count = hole_size >> 1; + if (!count) { + pr_debug("%s: hole is too small [%llx - %llx] >> %d: %lld\n", + __func__, hole_start, hole_end, size, hole_size); + break; + } + do { - count >>= 1; order = i915_random_order(count, &prng); - } while (!order && count); - if (!order) - break; + if (order) + break; + } while (count >>= 1); + if (!count) + return -ENOMEM; + GEM_BUG_ON(!order); GEM_BUG_ON(count * BIT_ULL(size) > vm->total); GEM_BUG_ON(hole_start + count * BIT_ULL(size) > hole_end); @@ -262,7 +275,9 @@ static int lowlevel_hole(struct drm_i915_private *i915, mock_vma.node.size = BIT_ULL(size); mock_vma.node.start = addr; + intel_runtime_pm_get(i915); vm->insert_entries(vm, &mock_vma, I915_CACHE_NONE, 0); + intel_runtime_pm_put(i915); } count = n; @@ -692,18 +707,26 @@ static int drunk_hole(struct drm_i915_private *i915, unsigned int *order, count, n; struct i915_vma *vma; u64 hole_size; - int err; + int err = -ENODEV; hole_size = (hole_end - hole_start) >> size; if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32)) hole_size = KMALLOC_MAX_SIZE / sizeof(u32); - count = hole_size; + count = hole_size >> 1; + if (!count) { + pr_debug("%s: hole is too small [%llx - %llx] >> %d: %lld\n", + __func__, hole_start, hole_end, size, hole_size); + break; + } + do { - count >>= 1; order = i915_random_order(count, &prng); - } while (!order && count); - if (!order) - break; + if (order) + break; + } while (count >>= 1); + if (!count) + return -ENOMEM; + GEM_BUG_ON(!order); /* Ignore allocation failures (i.e. don't report them as * a test failure) as we are purposefully allocating very @@ -951,7 +974,7 @@ static int exercise_ggtt(struct drm_i915_private *i915, u64 hole_start, hole_end, last = 0; struct drm_mm_node *node; IGT_TIMEOUT(end_time); - int err; + int err = 0; mutex_lock(&i915->drm.struct_mutex); restart: @@ -1042,6 +1065,7 @@ static int igt_ggtt_page(void *arg) goto out_remove; } + intel_runtime_pm_get(i915); for (n = 0; n < count; n++) { u64 offset = tmp.start + order[n] * PAGE_SIZE; u32 __iomem *vaddr; @@ -1050,7 +1074,7 @@ static int igt_ggtt_page(void *arg) i915_gem_object_get_dma_address(obj, 0), offset, I915_CACHE_NONE, 0); - vaddr = io_mapping_map_atomic_wc(&ggtt->mappable, offset); + vaddr = io_mapping_map_atomic_wc(&ggtt->iomap, offset); iowrite32(n, vaddr + n); io_mapping_unmap_atomic(vaddr); @@ -1068,7 +1092,7 @@ static int igt_ggtt_page(void *arg) i915_gem_object_get_dma_address(obj, 0), offset, I915_CACHE_NONE, 0); - vaddr = io_mapping_map_atomic_wc(&ggtt->mappable, offset); + vaddr = io_mapping_map_atomic_wc(&ggtt->iomap, offset); val = ioread32(vaddr + n); io_mapping_unmap_atomic(vaddr); @@ -1081,6 +1105,7 @@ static int igt_ggtt_page(void *arg) break; } } + intel_runtime_pm_put(i915); kfree(order); out_remove: @@ -1155,7 +1180,7 @@ static int igt_gtt_reserve(void *arg) struct drm_i915_gem_object *obj, *on; LIST_HEAD(objects); u64 total; - int err; + int err = -ENODEV; /* i915_gem_gtt_reserve() tries to reserve the precise range * for the node, and evicts if it has to. So our test checks that @@ -1346,7 +1371,7 @@ static int igt_gtt_insert(void *arg) }, *ii; LIST_HEAD(objects); u64 total; - int err; + int err = -ENODEV; /* i915_gem_gtt_insert() tries to allocate some free space in the GTT * to the node, evicting if required. diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 8f011c447e41..f32aa6bb79e2 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -251,14 +251,6 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, return PTR_ERR(io); } - err = i915_vma_get_fence(vma); - if (err) { - pr_err("Failed to get fence for partial view: offset=%lu\n", - page); - i915_vma_unpin_iomap(vma); - return err; - } - iowrite32(page, io + n * PAGE_SIZE/sizeof(*io)); i915_vma_unpin_iomap(vma); @@ -325,6 +317,7 @@ static int igt_partial_tiling(void *arg) } mutex_lock(&i915->drm.struct_mutex); + intel_runtime_pm_get(i915); if (1) { IGT_TIMEOUT(end); @@ -426,6 +419,7 @@ next_tiling: ; } out_unlock: + intel_runtime_pm_put(i915); mutex_unlock(&i915->drm.struct_mutex); i915_gem_object_unpin_pages(obj); out: diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index 6664cb2eb0b8..647bf2bbd799 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -215,7 +215,9 @@ static int igt_request_rewind(void *arg) } i915_gem_request_get(vip); i915_add_request(vip); + rcu_read_lock(); request->engine->submit_request(request); + rcu_read_unlock(); mutex_unlock(&i915->drm.struct_mutex); @@ -330,7 +332,7 @@ static int live_nop_request(void *arg) struct intel_engine_cs *engine; struct live_test t; unsigned int id; - int err; + int err = -ENODEV; /* Submit various sized batches of empty requests, to each engine * (individually), and wait for the batch to complete. We can check @@ -418,7 +420,10 @@ static struct i915_vma *empty_batch(struct drm_i915_private *i915) err = PTR_ERR(cmd); goto err; } + *cmd = MI_BATCH_BUFFER_END; + i915_gem_chipset_flush(i915); + i915_gem_object_unpin_map(obj); err = i915_gem_object_set_to_gtt_domain(obj, false); @@ -454,14 +459,6 @@ empty_request(struct intel_engine_cs *engine, if (IS_ERR(request)) return request; - err = engine->emit_flush(request, EMIT_INVALIDATE); - if (err) - goto out_request; - - err = i915_switch_context(request); - if (err) - goto out_request; - err = engine->emit_bb_start(request, batch->node.start, batch->node.size, @@ -605,8 +602,8 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915) *cmd++ = lower_32_bits(vma->node.start); } *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ + i915_gem_chipset_flush(i915); - wmb(); i915_gem_object_unpin_map(obj); return vma; @@ -625,7 +622,7 @@ static int recursive_batch_resolve(struct i915_vma *batch) return PTR_ERR(cmd); *cmd = MI_BATCH_BUFFER_END; - wmb(); + i915_gem_chipset_flush(batch->vm->i915); i915_gem_object_unpin_map(batch->obj); @@ -670,12 +667,6 @@ static int live_all_engines(void *arg) goto out_request; } - err = engine->emit_flush(request[id], EMIT_INVALIDATE); - GEM_BUG_ON(err); - - err = i915_switch_context(request[id]); - GEM_BUG_ON(err); - err = engine->emit_bb_start(request[id], batch->node.start, batch->node.size, @@ -792,12 +783,6 @@ static int live_sequential_engines(void *arg) } } - err = engine->emit_flush(request[id], EMIT_INVALIDATE); - GEM_BUG_ON(err); - - err = i915_switch_context(request[id]); - GEM_BUG_ON(err); - err = engine->emit_bb_start(request[id], batch->node.start, batch->node.size, @@ -858,7 +843,8 @@ out_request: I915_MAP_WC); if (!IS_ERR(cmd)) { *cmd = MI_BATCH_BUFFER_END; - wmb(); + i915_gem_chipset_flush(i915); + i915_gem_object_unpin_map(request[id]->batch->obj); } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c b/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c index 7a44dab631b8..3000e6a7d82d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c @@ -79,7 +79,7 @@ static int igt_sync(void *arg) }, *p; struct intel_timeline *tl; int order, offset; - int ret; + int ret = -ENODEV; tl = mock_timeline(0); if (!tl) @@ -121,7 +121,7 @@ out: static unsigned int random_engine(struct rnd_state *rnd) { - return ((u64)prandom_u32_state(rnd) * I915_NUM_ENGINES) >> 32; + return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd); } static int bench_sync(void *arg) diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 1519f1b7841b..088f45bc6199 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -16,5 +16,8 @@ selftest(objects, i915_gem_object_live_selftests) selftest(dmabuf, i915_gem_dmabuf_live_selftests) selftest(coherency, i915_gem_coherency_live_selftests) selftest(gtt, i915_gem_gtt_live_selftests) +selftest(evict, i915_gem_evict_live_selftests) +selftest(hugepages, i915_gem_huge_page_live_selftests) selftest(contexts, i915_gem_context_live_selftests) selftest(hangcheck, intel_hangcheck_live_selftests) +selftest(guc, intel_guc_live_selftest) diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index e5a9e5dcf2f3..19c6fce837df 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -22,3 +22,4 @@ selftest(dmabuf, i915_gem_dmabuf_mock_selftests) selftest(vma, i915_vma_mock_selftests) selftest(evict, i915_gem_evict_mock_selftests) selftest(gtt, i915_gem_gtt_mock_selftests) +selftest(hugepages, i915_gem_huge_page_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_random.c b/drivers/gpu/drm/i915/selftests/i915_random.c index 222c511bea49..2088ae57aa89 100644 --- a/drivers/gpu/drm/i915/selftests/i915_random.c +++ b/drivers/gpu/drm/i915/selftests/i915_random.c @@ -41,11 +41,6 @@ u64 i915_prandom_u64_state(struct rnd_state *rnd) return x; } -static inline u32 i915_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state) -{ - return upper_32_bits((u64)prandom_u32_state(state) * ep_ro); -} - void i915_random_reorder(unsigned int *order, unsigned int count, struct rnd_state *state) { @@ -62,7 +57,7 @@ unsigned int *i915_random_order(unsigned int count, struct rnd_state *state) { unsigned int *order, i; - order = kmalloc_array(count, sizeof(*order), GFP_KERNEL); + order = kmalloc_array(count, sizeof(*order), GFP_KERNEL | __GFP_NOWARN); if (!order) return order; diff --git a/drivers/gpu/drm/i915/selftests/i915_random.h b/drivers/gpu/drm/i915/selftests/i915_random.h index 6c9379871384..7dffedc501ca 100644 --- a/drivers/gpu/drm/i915/selftests/i915_random.h +++ b/drivers/gpu/drm/i915/selftests/i915_random.h @@ -43,6 +43,11 @@ u64 i915_prandom_u64_state(struct rnd_state *rnd); +static inline u32 i915_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state) +{ + return upper_32_bits(mul_u32_u32(prandom_u32_state(state), ep_ro)); +} + unsigned int *i915_random_order(unsigned int count, struct rnd_state *state); void i915_random_reorder(unsigned int *order, diff --git a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c index 19d145d6bf52..ea01d0fe3ace 100644 --- a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c @@ -24,6 +24,7 @@ #include <linux/completion.h> #include <linux/delay.h> +#include <linux/prime_numbers.h> #include "../i915_selftest.h" @@ -565,6 +566,46 @@ err_in: return ret; } +static int test_timer(void *arg) +{ + unsigned long target, delay; + struct timed_fence tf; + + timed_fence_init(&tf, target = jiffies); + if (!i915_sw_fence_done(&tf.fence)) { + pr_err("Fence with immediate expiration not signaled\n"); + goto err; + } + timed_fence_fini(&tf); + + for_each_prime_number(delay, i915_selftest.timeout_jiffies/2) { + timed_fence_init(&tf, target = jiffies + delay); + if (i915_sw_fence_done(&tf.fence)) { + pr_err("Fence with future expiration (%lu jiffies) already signaled\n", delay); + goto err; + } + + i915_sw_fence_wait(&tf.fence); + if (!i915_sw_fence_done(&tf.fence)) { + pr_err("Fence not signaled after wait\n"); + goto err; + } + if (time_before(jiffies, target)) { + pr_err("Fence signaled too early, target=%lu, now=%lu\n", + target, jiffies); + goto err; + } + + timed_fence_fini(&tf); + } + + return 0; + +err: + timed_fence_fini(&tf); + return -EINVAL; +} + int i915_sw_fence_mock_selftests(void) { static const struct i915_subtest tests[] = { @@ -576,6 +617,7 @@ int i915_sw_fence_mock_selftests(void) SUBTEST(test_C_AB), SUBTEST(test_chain), SUBTEST(test_ipc), + SUBTEST(test_timer), }; return i915_subtests(tests, NULL); diff --git a/drivers/gpu/drm/i915/selftests/i915_syncmap.c b/drivers/gpu/drm/i915/selftests/i915_syncmap.c index bcab3d00a785..47f4ae18a1ef 100644 --- a/drivers/gpu/drm/i915/selftests/i915_syncmap.c +++ b/drivers/gpu/drm/i915/selftests/i915_syncmap.c @@ -333,7 +333,7 @@ static int igt_syncmap_join_below(void *arg) { struct i915_syncmap *sync; unsigned int step, order, idx; - int err; + int err = -ENODEV; i915_syncmap_init(&sync); @@ -402,7 +402,7 @@ static int igt_syncmap_neighbours(void *arg) I915_RND_STATE(prng); IGT_TIMEOUT(end_time); struct i915_syncmap *sync; - int err; + int err = -ENODEV; /* * Each leaf holds KSYNCMAP seqno. Check that when we create KSYNCMAP @@ -447,7 +447,7 @@ static int igt_syncmap_compact(void *arg) { struct i915_syncmap *sync; unsigned int idx, order; - int err; + int err = -ENODEV; i915_syncmap_init(&sync); diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index 2e86ec136b35..eb89e301b602 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -150,7 +150,7 @@ static int igt_vma_create(void *arg) IGT_TIMEOUT(end_time); LIST_HEAD(contexts); LIST_HEAD(objects); - int err; + int err = -ENOMEM; /* Exercise creating many vma amonst many objections, checking the * vma creation and lookup routines. diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c index 828904b7d468..54fc571b1102 100644 --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c @@ -271,13 +271,7 @@ struct igt_wakeup { u32 seqno; }; -static int wait_atomic(atomic_t *p) -{ - schedule(); - return 0; -} - -static int wait_atomic_timeout(atomic_t *p) +static int wait_atomic_timeout(atomic_t *p, unsigned int mode) { return schedule_timeout(10 * HZ) ? 0 : -ETIMEDOUT; } @@ -348,7 +342,7 @@ static void igt_wake_all_sync(atomic_t *ready, atomic_set(ready, 0); wake_up_all(wq); - wait_on_atomic_t(set, wait_atomic, TASK_UNINTERRUPTIBLE); + wait_on_atomic_t(set, atomic_t_wait, TASK_UNINTERRUPTIBLE); atomic_set(ready, count); atomic_set(done, count); } diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c new file mode 100644 index 000000000000..3f9016466dea --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_guc.c @@ -0,0 +1,353 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +/* max doorbell number + negative test for each client type */ +#define ATTEMPTS (GUC_NUM_DOORBELLS + GUC_CLIENT_PRIORITY_NUM) + +static struct intel_guc_client *clients[ATTEMPTS]; + +static bool available_dbs(struct intel_guc *guc, u32 priority) +{ + unsigned long offset; + unsigned long end; + u16 id; + + /* first half is used for normal priority, second half for high */ + offset = 0; + end = GUC_NUM_DOORBELLS / 2; + if (priority <= GUC_CLIENT_PRIORITY_HIGH) { + offset = end; + end += offset; + } + + id = find_next_zero_bit(guc->doorbell_bitmap, end, offset); + if (id < end) + return true; + + return false; +} + +static int check_all_doorbells(struct intel_guc *guc) +{ + u16 db_id; + + pr_info_once("Max number of doorbells: %d", GUC_NUM_DOORBELLS); + for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id) { + if (!doorbell_ok(guc, db_id)) { + pr_err("doorbell %d, not ok\n", db_id); + return -EIO; + } + } + + return 0; +} + +/* + * Basic client sanity check, handy to validate create_clients. + */ +static int validate_client(struct intel_guc_client *client, + int client_priority, + bool is_preempt_client) +{ + struct drm_i915_private *dev_priv = guc_to_i915(client->guc); + struct i915_gem_context *ctx_owner = is_preempt_client ? + dev_priv->preempt_context : dev_priv->kernel_context; + + if (client->owner != ctx_owner || + client->engines != INTEL_INFO(dev_priv)->ring_mask || + client->priority != client_priority || + client->doorbell_id == GUC_DOORBELL_INVALID) + return -EINVAL; + else + return 0; +} + +static bool client_doorbell_in_sync(struct intel_guc_client *client) +{ + return doorbell_ok(client->guc, client->doorbell_id); +} + +/* + * Check that we're able to synchronize guc_clients with their doorbells + * + * We're creating clients and reserving doorbells once, at module load. During + * module lifetime, GuC, doorbell HW, and i915 state may go out of sync due to + * GuC being reset. In other words - GuC clients are still around, but the + * status of their doorbells may be incorrect. This is the reason behind + * validating that the doorbells status expected by the driver matches what the + * GuC/HW have. + */ +static int igt_guc_clients(void *args) +{ + struct drm_i915_private *dev_priv = args; + struct intel_guc *guc; + int err = 0; + + GEM_BUG_ON(!HAS_GUC(dev_priv)); + mutex_lock(&dev_priv->drm.struct_mutex); + + guc = &dev_priv->guc; + if (!guc) { + pr_err("No guc object!\n"); + err = -EINVAL; + goto unlock; + } + + err = check_all_doorbells(guc); + if (err) + goto unlock; + + /* + * Get rid of clients created during driver load because the test will + * recreate them. + */ + guc_clients_destroy(guc); + if (guc->execbuf_client || guc->preempt_client) { + pr_err("guc_clients_destroy lied!\n"); + err = -EINVAL; + goto unlock; + } + + err = guc_clients_create(guc); + if (err) { + pr_err("Failed to create clients\n"); + goto unlock; + } + GEM_BUG_ON(!guc->execbuf_client); + GEM_BUG_ON(!guc->preempt_client); + + err = validate_client(guc->execbuf_client, + GUC_CLIENT_PRIORITY_KMD_NORMAL, false); + if (err) { + pr_err("execbug client validation failed\n"); + goto out; + } + + err = validate_client(guc->preempt_client, + GUC_CLIENT_PRIORITY_KMD_HIGH, true); + if (err) { + pr_err("preempt client validation failed\n"); + goto out; + } + + /* each client should now have reserved a doorbell */ + if (!has_doorbell(guc->execbuf_client) || + !has_doorbell(guc->preempt_client)) { + pr_err("guc_clients_create didn't reserve doorbells\n"); + err = -EINVAL; + goto out; + } + + /* Now create the doorbells */ + guc_clients_doorbell_init(guc); + + /* each client should now have received a doorbell */ + if (!client_doorbell_in_sync(guc->execbuf_client) || + !client_doorbell_in_sync(guc->preempt_client)) { + pr_err("failed to initialize the doorbells\n"); + err = -EINVAL; + goto out; + } + + /* + * Basic test - an attempt to reallocate a valid doorbell to the + * client it is currently assigned should not cause a failure. + */ + err = guc_clients_doorbell_init(guc); + if (err) + goto out; + + /* + * Negative test - a client with no doorbell (invalid db id). + * After destroying the doorbell, the db id is changed to + * GUC_DOORBELL_INVALID and the firmware will reject any attempt to + * allocate a doorbell with an invalid id (db has to be reserved before + * allocation). + */ + destroy_doorbell(guc->execbuf_client); + if (client_doorbell_in_sync(guc->execbuf_client)) { + pr_err("destroy db did not work\n"); + err = -EINVAL; + goto out; + } + + unreserve_doorbell(guc->execbuf_client); + err = guc_clients_doorbell_init(guc); + if (err != -EIO) { + pr_err("unexpected (err = %d)", err); + goto out; + } + + if (!available_dbs(guc, guc->execbuf_client->priority)) { + pr_err("doorbell not available when it should\n"); + err = -EIO; + goto out; + } + + /* clean after test */ + err = reserve_doorbell(guc->execbuf_client); + if (err) { + pr_err("failed to reserve back the doorbell back\n"); + } + err = create_doorbell(guc->execbuf_client); + if (err) { + pr_err("recreate doorbell failed\n"); + goto out; + } + +out: + /* + * Leave clean state for other test, plus the driver always destroy the + * clients during unload. + */ + destroy_doorbell(guc->execbuf_client); + destroy_doorbell(guc->preempt_client); + guc_clients_destroy(guc); + guc_clients_create(guc); + guc_clients_doorbell_init(guc); +unlock: + mutex_unlock(&dev_priv->drm.struct_mutex); + return err; +} + +/* + * Create as many clients as number of doorbells. Note that there's already + * client(s)/doorbell(s) created during driver load, but this test creates + * its own and do not interact with the existing ones. + */ +static int igt_guc_doorbells(void *arg) +{ + struct drm_i915_private *dev_priv = arg; + struct intel_guc *guc; + int i, err = 0; + u16 db_id; + + GEM_BUG_ON(!HAS_GUC(dev_priv)); + mutex_lock(&dev_priv->drm.struct_mutex); + + guc = &dev_priv->guc; + if (!guc) { + pr_err("No guc object!\n"); + err = -EINVAL; + goto unlock; + } + + err = check_all_doorbells(guc); + if (err) + goto unlock; + + for (i = 0; i < ATTEMPTS; i++) { + clients[i] = guc_client_alloc(dev_priv, + INTEL_INFO(dev_priv)->ring_mask, + i % GUC_CLIENT_PRIORITY_NUM, + dev_priv->kernel_context); + + if (!clients[i]) { + pr_err("[%d] No guc client\n", i); + err = -EINVAL; + goto out; + } + + if (IS_ERR(clients[i])) { + if (PTR_ERR(clients[i]) != -ENOSPC) { + pr_err("[%d] unexpected error\n", i); + err = PTR_ERR(clients[i]); + goto out; + } + + if (available_dbs(guc, i % GUC_CLIENT_PRIORITY_NUM)) { + pr_err("[%d] non-db related alloc fail\n", i); + err = -EINVAL; + goto out; + } + + /* expected, ran out of dbs for this client type */ + continue; + } + + /* + * The check below is only valid because we keep a doorbell + * assigned during the whole life of the client. + */ + if (clients[i]->stage_id >= GUC_NUM_DOORBELLS) { + pr_err("[%d] more clients than doorbells (%d >= %d)\n", + i, clients[i]->stage_id, GUC_NUM_DOORBELLS); + err = -EINVAL; + goto out; + } + + err = validate_client(clients[i], + i % GUC_CLIENT_PRIORITY_NUM, false); + if (err) { + pr_err("[%d] client_alloc sanity check failed!\n", i); + err = -EINVAL; + goto out; + } + + db_id = clients[i]->doorbell_id; + + err = create_doorbell(clients[i]); + if (err) { + pr_err("[%d] Failed to create a doorbell\n", i); + goto out; + } + + /* doorbell id shouldn't change, we are holding the mutex */ + if (db_id != clients[i]->doorbell_id) { + pr_err("[%d] doorbell id changed (%d != %d)\n", + i, db_id, clients[i]->doorbell_id); + err = -EINVAL; + goto out; + } + + err = check_all_doorbells(guc); + if (err) + goto out; + } + +out: + for (i = 0; i < ATTEMPTS; i++) + if (!IS_ERR_OR_NULL(clients[i])) { + destroy_doorbell(clients[i]); + guc_client_free(clients[i]); + } +unlock: + mutex_unlock(&dev_priv->drm.struct_mutex); + return err; +} + +int intel_guc_live_selftest(struct drm_i915_private *dev_priv) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_guc_clients), + SUBTEST(igt_guc_doorbells), + }; + + if (!USES_GUC_SUBMISSION(dev_priv)) + return 0; + + return i915_subtests(tests, dev_priv); +} diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 02e52a146ed8..d1f91a533afa 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -114,14 +114,6 @@ static int emit_recurse_batch(struct hang *h, if (err) goto unpin_vma; - err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); - if (err) - goto unpin_hws; - - err = i915_switch_context(rq); - if (err) - goto unpin_hws; - i915_vma_move_to_active(vma, rq, 0); if (!i915_gem_object_has_active_reference(vma->obj)) { i915_gem_object_get(vma->obj); @@ -140,6 +132,12 @@ static int emit_recurse_batch(struct hang *h, *batch++ = lower_32_bits(hws_address(hws, rq)); *batch++ = upper_32_bits(hws_address(hws, rq)); *batch++ = rq->fence.seqno; + *batch++ = MI_ARB_CHECK; + + memset(batch, 0, 1024); + batch += 1024 / sizeof(*batch); + + *batch++ = MI_ARB_CHECK; *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; *batch++ = lower_32_bits(vma->node.start); *batch++ = upper_32_bits(vma->node.start); @@ -148,6 +146,12 @@ static int emit_recurse_batch(struct hang *h, *batch++ = 0; *batch++ = lower_32_bits(hws_address(hws, rq)); *batch++ = rq->fence.seqno; + *batch++ = MI_ARB_CHECK; + + memset(batch, 0, 1024); + batch += 1024 / sizeof(*batch); + + *batch++ = MI_ARB_CHECK; *batch++ = MI_BATCH_BUFFER_START | 1 << 8; *batch++ = lower_32_bits(vma->node.start); } else if (INTEL_GEN(i915) >= 4) { @@ -155,16 +159,29 @@ static int emit_recurse_batch(struct hang *h, *batch++ = 0; *batch++ = lower_32_bits(hws_address(hws, rq)); *batch++ = rq->fence.seqno; + *batch++ = MI_ARB_CHECK; + + memset(batch, 0, 1024); + batch += 1024 / sizeof(*batch); + + *batch++ = MI_ARB_CHECK; *batch++ = MI_BATCH_BUFFER_START | 2 << 6; *batch++ = lower_32_bits(vma->node.start); } else { *batch++ = MI_STORE_DWORD_IMM; *batch++ = lower_32_bits(hws_address(hws, rq)); *batch++ = rq->fence.seqno; + *batch++ = MI_ARB_CHECK; + + memset(batch, 0, 1024); + batch += 1024 / sizeof(*batch); + + *batch++ = MI_ARB_CHECK; *batch++ = MI_BATCH_BUFFER_START | 2 << 6 | 1; *batch++ = lower_32_bits(vma->node.start); } *batch++ = MI_BATCH_BUFFER_END; /* not reached */ + i915_gem_chipset_flush(h->i915); flags = 0; if (INTEL_GEN(vm->i915) <= 5) @@ -172,7 +189,6 @@ static int emit_recurse_batch(struct hang *h, err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags); -unpin_hws: i915_vma_unpin(hws); unpin_vma: i915_vma_unpin(vma); @@ -231,7 +247,7 @@ static u32 hws_seqno(const struct hang *h, static void hang_fini(struct hang *h) { *h->batch = MI_BATCH_BUFFER_END; - wmb(); + i915_gem_chipset_flush(h->i915); i915_gem_object_unpin_map(h->obj); i915_gem_object_put(h->obj); @@ -242,6 +258,16 @@ static void hang_fini(struct hang *h) i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED); } +static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq) +{ + return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq), + rq->fence.seqno), + 10) && + wait_for(i915_seqno_passed(hws_seqno(h, rq), + rq->fence.seqno), + 1000)); +} + static int igt_hang_sanitycheck(void *arg) { struct drm_i915_private *i915 = arg; @@ -275,6 +301,8 @@ static int igt_hang_sanitycheck(void *arg) i915_gem_request_get(rq); *h.batch = MI_BATCH_BUFFER_END; + i915_gem_chipset_flush(i915); + __i915_add_request(rq, true); timeout = i915_wait_request(rq, @@ -302,6 +330,9 @@ static void global_reset_lock(struct drm_i915_private *i915) struct intel_engine_cs *engine; enum intel_engine_id id; + pr_debug("%s: current gpu_error=%08lx\n", + __func__, i915->gpu_error.flags); + while (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags)) wait_event(i915->gpu_error.reset_queue, !test_bit(I915_RESET_BACKOFF, @@ -359,54 +390,128 @@ static int igt_global_reset(void *arg) return err; } -static int igt_reset_engine(void *arg) +static int __igt_reset_engine(struct drm_i915_private *i915, bool active) { - struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; enum intel_engine_id id; - unsigned int reset_count, reset_engine_count; + struct hang h; int err = 0; - /* Check that we can issue a global GPU and engine reset */ + /* Check that we can issue an engine reset on an idle engine (no-op) */ if (!intel_has_reset_engine(i915)) return 0; + if (active) { + mutex_lock(&i915->drm.struct_mutex); + err = hang_init(&h, i915); + mutex_unlock(&i915->drm.struct_mutex); + if (err) + return err; + } + for_each_engine(engine, i915, id) { - set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags); + unsigned int reset_count, reset_engine_count; + IGT_TIMEOUT(end_time); + + if (active && !intel_engine_can_store_dword(engine)) + continue; + reset_count = i915_reset_count(&i915->gpu_error); reset_engine_count = i915_reset_engine_count(&i915->gpu_error, engine); - err = i915_reset_engine(engine, I915_RESET_QUIET); - if (err) { - pr_err("i915_reset_engine failed\n"); - break; - } + set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + do { + if (active) { + struct drm_i915_gem_request *rq; + + mutex_lock(&i915->drm.struct_mutex); + rq = hang_create_request(&h, engine, + i915->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + mutex_unlock(&i915->drm.struct_mutex); + break; + } + + i915_gem_request_get(rq); + __i915_add_request(rq, true); + mutex_unlock(&i915->drm.struct_mutex); + + if (!wait_for_hang(&h, rq)) { + struct drm_printer p = drm_info_printer(i915->drm.dev); + + pr_err("%s: Failed to start request %x, at %x\n", + __func__, rq->fence.seqno, hws_seqno(&h, rq)); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + i915_gem_request_put(rq); + err = -EIO; + break; + } - if (i915_reset_count(&i915->gpu_error) != reset_count) { - pr_err("Full GPU reset recorded! (engine reset expected)\n"); - err = -EINVAL; - break; - } + i915_gem_request_put(rq); + } + + engine->hangcheck.stalled = true; + engine->hangcheck.seqno = + intel_engine_get_seqno(engine); + + err = i915_reset_engine(engine, I915_RESET_QUIET); + if (err) { + pr_err("i915_reset_engine failed\n"); + break; + } - if (i915_reset_engine_count(&i915->gpu_error, engine) == - reset_engine_count) { - pr_err("No %s engine reset recorded!\n", engine->name); - err = -EINVAL; + if (i915_reset_count(&i915->gpu_error) != reset_count) { + pr_err("Full GPU reset recorded! (engine reset expected)\n"); + err = -EINVAL; + break; + } + + reset_engine_count += active; + if (i915_reset_engine_count(&i915->gpu_error, engine) != + reset_engine_count) { + pr_err("%s engine reset %srecorded!\n", + engine->name, active ? "not " : ""); + err = -EINVAL; + break; + } + + engine->hangcheck.stalled = false; + } while (time_before(jiffies, end_time)); + clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + + if (err) break; - } - clear_bit(I915_RESET_ENGINE + engine->id, - &i915->gpu_error.flags); + cond_resched(); } if (i915_terminally_wedged(&i915->gpu_error)) err = -EIO; + if (active) { + mutex_lock(&i915->drm.struct_mutex); + hang_fini(&h); + mutex_unlock(&i915->drm.struct_mutex); + } + return err; } +static int igt_reset_idle_engine(void *arg) +{ + return __igt_reset_engine(arg, false); +} + +static int igt_reset_active_engine(void *arg) +{ + return __igt_reset_engine(arg, true); +} + static int active_engine(void *data) { struct intel_engine_cs *engine = data; @@ -468,11 +573,12 @@ err_file: return err; } -static int igt_reset_active_engines(void *arg) +static int __igt_reset_engine_others(struct drm_i915_private *i915, + bool active) { - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine, *active; + struct intel_engine_cs *engine, *other; enum intel_engine_id id, tmp; + struct hang h; int err = 0; /* Check that issuing a reset on one engine does not interfere @@ -482,24 +588,36 @@ static int igt_reset_active_engines(void *arg) if (!intel_has_reset_engine(i915)) return 0; + if (active) { + mutex_lock(&i915->drm.struct_mutex); + err = hang_init(&h, i915); + mutex_unlock(&i915->drm.struct_mutex); + if (err) + return err; + } + for_each_engine(engine, i915, id) { - struct task_struct *threads[I915_NUM_ENGINES]; + struct task_struct *threads[I915_NUM_ENGINES] = {}; unsigned long resets[I915_NUM_ENGINES]; unsigned long global = i915_reset_count(&i915->gpu_error); + unsigned long count = 0; IGT_TIMEOUT(end_time); + if (active && !intel_engine_can_store_dword(engine)) + continue; + memset(threads, 0, sizeof(threads)); - for_each_engine(active, i915, tmp) { + for_each_engine(other, i915, tmp) { struct task_struct *tsk; - if (active == engine) - continue; - resets[tmp] = i915_reset_engine_count(&i915->gpu_error, - active); + other); + + if (other == engine) + continue; - tsk = kthread_run(active_engine, active, - "igt/%s", active->name); + tsk = kthread_run(active_engine, other, + "igt/%s", other->name); if (IS_ERR(tsk)) { err = PTR_ERR(tsk); goto unwind; @@ -509,20 +627,70 @@ static int igt_reset_active_engines(void *arg) get_task_struct(tsk); } - set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags); + set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { + if (active) { + struct drm_i915_gem_request *rq; + + mutex_lock(&i915->drm.struct_mutex); + rq = hang_create_request(&h, engine, + i915->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + mutex_unlock(&i915->drm.struct_mutex); + break; + } + + i915_gem_request_get(rq); + __i915_add_request(rq, true); + mutex_unlock(&i915->drm.struct_mutex); + + if (!wait_for_hang(&h, rq)) { + struct drm_printer p = drm_info_printer(i915->drm.dev); + + pr_err("%s: Failed to start request %x, at %x\n", + __func__, rq->fence.seqno, hws_seqno(&h, rq)); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + i915_gem_request_put(rq); + err = -EIO; + break; + } + + i915_gem_request_put(rq); + } + + engine->hangcheck.stalled = true; + engine->hangcheck.seqno = + intel_engine_get_seqno(engine); + err = i915_reset_engine(engine, I915_RESET_QUIET); if (err) { - pr_err("i915_reset_engine(%s) failed, err=%d\n", - engine->name, err); + pr_err("i915_reset_engine(%s:%s) failed, err=%d\n", + engine->name, active ? "active" : "idle", err); break; } + + engine->hangcheck.stalled = false; + count++; } while (time_before(jiffies, end_time)); - clear_bit(I915_RESET_ENGINE + engine->id, - &i915->gpu_error.flags); + clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + pr_info("i915_reset_engine(%s:%s): %lu resets\n", + engine->name, active ? "active" : "idle", count); + + if (i915_reset_engine_count(&i915->gpu_error, engine) - + resets[engine->id] != (active ? count : 0)) { + pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n", + engine->name, active ? "active" : "idle", count, + i915_reset_engine_count(&i915->gpu_error, + engine) - resets[engine->id]); + if (!err) + err = -EINVAL; + } unwind: - for_each_engine(active, i915, tmp) { + for_each_engine(other, i915, tmp) { int ret; if (!threads[tmp]) @@ -530,27 +698,29 @@ unwind: ret = kthread_stop(threads[tmp]); if (ret) { - pr_err("kthread for active engine %s failed, err=%d\n", - active->name, ret); + pr_err("kthread for other engine %s failed, err=%d\n", + other->name, ret); if (!err) err = ret; } put_task_struct(threads[tmp]); if (resets[tmp] != i915_reset_engine_count(&i915->gpu_error, - active)) { + other)) { pr_err("Innocent engine %s was reset (count=%ld)\n", - active->name, + other->name, i915_reset_engine_count(&i915->gpu_error, - active) - resets[tmp]); - err = -EIO; + other) - resets[tmp]); + if (!err) + err = -EINVAL; } } if (global != i915_reset_count(&i915->gpu_error)) { pr_err("Global reset (count=%ld)!\n", i915_reset_count(&i915->gpu_error) - global); - err = -EIO; + if (!err) + err = -EINVAL; } if (err) @@ -562,9 +732,25 @@ unwind: if (i915_terminally_wedged(&i915->gpu_error)) err = -EIO; + if (active) { + mutex_lock(&i915->drm.struct_mutex); + hang_fini(&h); + mutex_unlock(&i915->drm.struct_mutex); + } + return err; } +static int igt_reset_idle_engine_others(void *arg) +{ + return __igt_reset_engine_others(arg, false); +} + +static int igt_reset_active_engine_others(void *arg) +{ + return __igt_reset_engine_others(arg, true); +} + static u32 fake_hangcheck(struct drm_i915_gem_request *rq) { u32 reset_count; @@ -580,16 +766,6 @@ static u32 fake_hangcheck(struct drm_i915_gem_request *rq) return reset_count; } -static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq) -{ - return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq), - rq->fence.seqno), - 10) && - wait_for(i915_seqno_passed(hws_seqno(h, rq), - rq->fence.seqno), - 1000)); -} - static int igt_wait_reset(void *arg) { struct drm_i915_private *i915 = arg; @@ -621,7 +797,15 @@ static int igt_wait_reset(void *arg) __i915_add_request(rq, true); if (!wait_for_hang(&h, rq)) { - pr_err("Failed to start request %x\n", rq->fence.seqno); + struct drm_printer p = drm_info_printer(i915->drm.dev); + + pr_err("%s: Failed to start request %x, at %x\n", + __func__, rq->fence.seqno, hws_seqno(&h, rq)); + intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); + + i915_reset(i915, 0); + i915_gem_set_wedged(i915); + err = -EIO; goto out_rq; } @@ -708,10 +892,19 @@ static int igt_reset_queue(void *arg) __i915_add_request(rq, true); if (!wait_for_hang(&h, prev)) { - pr_err("Failed to start request %x\n", - prev->fence.seqno); + struct drm_printer p = drm_info_printer(i915->drm.dev); + + pr_err("%s: Failed to start request %x, at %x\n", + __func__, prev->fence.seqno, hws_seqno(&h, prev)); + intel_engine_dump(prev->engine, &p, + "%s\n", prev->engine->name); + i915_gem_request_put(rq); i915_gem_request_put(prev); + + i915_reset(i915, 0); + i915_gem_set_wedged(i915); + err = -EIO; goto fini; } @@ -756,7 +949,7 @@ static int igt_reset_queue(void *arg) pr_info("%s: Completed %d resets\n", engine->name, count); *h.batch = MI_BATCH_BUFFER_END; - wmb(); + i915_gem_chipset_flush(i915); i915_gem_request_put(prev); } @@ -806,7 +999,15 @@ static int igt_handle_error(void *arg) __i915_add_request(rq, true); if (!wait_for_hang(&h, rq)) { - pr_err("Failed to start request %x\n", rq->fence.seqno); + struct drm_printer p = drm_info_printer(i915->drm.dev); + + pr_err("%s: Failed to start request %x, at %x\n", + __func__, rq->fence.seqno, hws_seqno(&h, rq)); + intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); + + i915_reset(i915, 0); + i915_gem_set_wedged(i915); + err = -EIO; goto err_request; } @@ -843,17 +1044,29 @@ err_unlock: int intel_hangcheck_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { + SUBTEST(igt_global_reset), /* attempt to recover GPU first */ SUBTEST(igt_hang_sanitycheck), - SUBTEST(igt_global_reset), - SUBTEST(igt_reset_engine), - SUBTEST(igt_reset_active_engines), + SUBTEST(igt_reset_idle_engine), + SUBTEST(igt_reset_active_engine), + SUBTEST(igt_reset_idle_engine_others), + SUBTEST(igt_reset_active_engine_others), SUBTEST(igt_wait_reset), SUBTEST(igt_reset_queue), SUBTEST(igt_handle_error), }; + bool saved_hangcheck; + int err; if (!intel_has_gpu_reset(i915)) return 0; - return i915_subtests(tests, i915); + intel_runtime_pm_get(i915); + saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck); + + err = i915_subtests(tests, i915); + + i915_modparams.enable_hangcheck = saved_hangcheck; + intel_runtime_pm_put(i915); + + return err; } diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 3cac22eb47ce..2f6367643171 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -120,10 +120,10 @@ static int intel_uncore_check_forcewake_domains(struct drm_i915_private *dev_pri !IS_CHERRYVIEW(dev_priv)) return 0; - if (IS_VALLEYVIEW(dev_priv)) /* XXX system lockup! */ - return 0; - - if (IS_BROADWELL(dev_priv)) /* XXX random GPU hang afterwards! */ + /* + * This test may lockup the machine or cause GPU hangs afterwards. + */ + if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN)) return 0; valid = kzalloc(BITS_TO_LONGS(FW_RANGE) * sizeof(*valid), @@ -148,7 +148,10 @@ static int intel_uncore_check_forcewake_domains(struct drm_i915_private *dev_pri for_each_set_bit(offset, valid, FW_RANGE) { i915_reg_t reg = { offset }; + iosf_mbi_punit_acquire(); intel_uncore_forcewake_reset(dev_priv, false); + iosf_mbi_punit_release(); + check_for_unclaimed_mmio(dev_priv); (void)I915_READ(reg); diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c new file mode 100644 index 000000000000..b26f07b55d86 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c @@ -0,0 +1,78 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "lib_sw_fence.h" + +/* Small library of different fence types useful for writing tests */ + +static int __i915_sw_fence_call +nop_fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) +{ + return NOTIFY_DONE; +} + +void __onstack_fence_init(struct i915_sw_fence *fence, + const char *name, + struct lock_class_key *key) +{ + debug_fence_init_onstack(fence); + + __init_waitqueue_head(&fence->wait, name, key); + atomic_set(&fence->pending, 1); + fence->flags = (unsigned long)nop_fence_notify; +} + +void onstack_fence_fini(struct i915_sw_fence *fence) +{ + i915_sw_fence_commit(fence); + i915_sw_fence_fini(fence); +} + +static void timed_fence_wake(struct timer_list *t) +{ + struct timed_fence *tf = from_timer(tf, t, timer); + + i915_sw_fence_commit(&tf->fence); +} + +void timed_fence_init(struct timed_fence *tf, unsigned long expires) +{ + onstack_fence_init(&tf->fence); + + timer_setup_on_stack(&tf->timer, timed_fence_wake, 0); + + if (time_after(expires, jiffies)) + mod_timer(&tf->timer, expires); + else + i915_sw_fence_commit(&tf->fence); +} + +void timed_fence_fini(struct timed_fence *tf) +{ + if (del_timer_sync(&tf->timer)) + i915_sw_fence_commit(&tf->fence); + + destroy_timer_on_stack(&tf->timer); + i915_sw_fence_fini(&tf->fence); +} diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.h b/drivers/gpu/drm/i915/selftests/lib_sw_fence.h new file mode 100644 index 000000000000..474aafb92ae1 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.h @@ -0,0 +1,42 @@ +/* + * lib_sw_fence.h - library routines for testing N:M synchronisation points + * + * Copyright (C) 2017 Intel Corporation + * + * This file is released under the GPLv2. + * + */ + +#ifndef _LIB_SW_FENCE_H_ +#define _LIB_SW_FENCE_H_ + +#include <linux/timer.h> + +#include "../i915_sw_fence.h" + +#ifdef CONFIG_LOCKDEP +#define onstack_fence_init(fence) \ +do { \ + static struct lock_class_key __key; \ + \ + __onstack_fence_init((fence), #fence, &__key); \ +} while (0) +#else +#define onstack_fence_init(fence) \ + __onstack_fence_init((fence), NULL, NULL) +#endif + +void __onstack_fence_init(struct i915_sw_fence *fence, + const char *name, + struct lock_class_key *key); +void onstack_fence_fini(struct i915_sw_fence *fence); + +struct timed_fence { + struct i915_sw_fence fence; + struct timer_list timer; +}; + +void timed_fence_init(struct timed_fence *tf, unsigned long expires); +void timed_fence_fini(struct timed_fence *tf); + +#endif /* _LIB_SW_FENCE_H_ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c index 098ce643ad07..bbf80d42e793 100644 --- a/drivers/gpu/drm/i915/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/selftests/mock_context.c @@ -73,11 +73,7 @@ err_put: void mock_context_close(struct i915_gem_context *ctx) { - i915_gem_context_set_closed(ctx); - - i915_ppgtt_close(&ctx->ppgtt->base); - - i915_gem_context_put(ctx); + context_close(ctx); } void mock_init_contexts(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index fc0fd7498689..55c0e2c15782 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -32,22 +32,37 @@ static struct mock_request *first_request(struct mock_engine *engine) link); } -static void hw_delay_complete(unsigned long data) +static void advance(struct mock_engine *engine, + struct mock_request *request) { - struct mock_engine *engine = (typeof(engine))data; + list_del_init(&request->link); + mock_seqno_advance(&engine->base, request->base.global_seqno); +} + +static void hw_delay_complete(struct timer_list *t) +{ + struct mock_engine *engine = from_timer(engine, t, hw_delay); struct mock_request *request; spin_lock(&engine->hw_lock); - request = first_request(engine); - if (request) { - list_del_init(&request->link); - mock_seqno_advance(&engine->base, request->base.global_seqno); - } - + /* Timer fired, first request is complete */ request = first_request(engine); if (request) - mod_timer(&engine->hw_delay, jiffies + request->delay); + advance(engine, request); + + /* + * Also immediately signal any subsequent 0-delay requests, but + * requeue the timer for the next delayed request. + */ + while ((request = first_request(engine))) { + if (request->delay) { + mod_timer(&engine->hw_delay, jiffies + request->delay); + break; + } + + advance(engine, request); + } spin_unlock(&engine->hw_lock); } @@ -98,16 +113,22 @@ static void mock_submit_request(struct drm_i915_gem_request *request) spin_lock_irq(&engine->hw_lock); list_add_tail(&mock->link, &engine->hw_queue); - if (mock->link.prev == &engine->hw_queue) - mod_timer(&engine->hw_delay, jiffies + mock->delay); + if (mock->link.prev == &engine->hw_queue) { + if (mock->delay) + mod_timer(&engine->hw_delay, jiffies + mock->delay); + else + advance(engine, mock); + } spin_unlock_irq(&engine->hw_lock); } static struct intel_ring *mock_ring(struct intel_engine_cs *engine) { - const unsigned long sz = roundup_pow_of_two(sizeof(struct intel_ring)); + const unsigned long sz = PAGE_SIZE / 2; struct intel_ring *ring; + BUILD_BUG_ON(MIN_SPACE_FOR_ADD_REQUEST > sz); + ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL); if (!ring) return NULL; @@ -161,9 +182,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, /* fake hw queue */ spin_lock_init(&engine->hw_lock); - setup_timer(&engine->hw_delay, - hw_delay_complete, - (unsigned long)engine); + timer_setup(&engine->hw_delay, hw_delay_complete, 0); INIT_LIST_HEAD(&engine->hw_queue); return &engine->base; diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 678723430d78..1bc61f3f76fc 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -83,6 +83,10 @@ static void mock_device_release(struct drm_device *dev) kmem_cache_destroy(i915->vmas); kmem_cache_destroy(i915->objects); + i915_gemfs_fini(i915); + + drm_mode_config_cleanup(&i915->drm); + drm_dev_fini(&i915->drm); put_device(&i915->drm.pdev->dev); } @@ -146,6 +150,11 @@ struct drm_i915_private *mock_gem_device(void) dev_set_name(&pdev->dev, "mock"); dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); +#if IS_ENABLED(CONFIG_IOMMU_API) && defined(CONFIG_INTEL_IOMMU) + /* hack to disable iommu for the fake device; force identity mapping */ + pdev->dev.archdata.iommu = (void *)-1; +#endif + dev_pm_domain_set(&pdev->dev, &pm_domain); pm_runtime_enable(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); @@ -167,20 +176,20 @@ struct drm_i915_private *mock_gem_device(void) mkwrite_device_info(i915)->gen = -1; - spin_lock_init(&i915->mm.object_stat_lock); + mkwrite_device_info(i915)->page_sizes = + I915_GTT_PAGE_SIZE_4K | + I915_GTT_PAGE_SIZE_64K | + I915_GTT_PAGE_SIZE_2M; + mock_uncore_init(i915); + i915_gem_init__mm(i915); init_waitqueue_head(&i915->gpu_error.wait_queue); init_waitqueue_head(&i915->gpu_error.reset_queue); i915->wq = alloc_ordered_workqueue("mock", 0); if (!i915->wq) - goto put_device; - - INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); - init_llist_head(&i915->mm.free_list); - INIT_LIST_HEAD(&i915->mm.unbound_list); - INIT_LIST_HEAD(&i915->mm.bound_list); + goto err_drv; mock_init_contexts(i915); @@ -234,8 +243,16 @@ struct drm_i915_private *mock_gem_device(void) if (!i915->kernel_context) goto err_engine; + i915->preempt_context = mock_context(i915, NULL); + if (!i915->preempt_context) + goto err_kernel_context; + + WARN_ON(i915_gemfs_init(i915)); + return i915; +err_kernel_context: + i915_gem_context_put(i915->kernel_context); err_engine: for_each_engine(engine, i915, id) mock_engine_free(engine); @@ -251,6 +268,9 @@ err_objects: kmem_cache_destroy(i915->objects); err_wq: destroy_workqueue(i915->wq); +err_drv: + drm_mode_config_cleanup(&i915->drm); + drm_dev_fini(&i915->drm); put_device: put_device(&pdev->dev); err: diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index f2118cf535a0..e96873f96116 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -43,7 +43,6 @@ static int mock_bind_ppgtt(struct i915_vma *vma, u32 flags) { GEM_BUG_ON(flags & I915_VMA_GLOBAL_BIND); - vma->pages = vma->obj->mm.pages; vma->flags |= I915_VMA_LOCAL_BIND; return 0; } @@ -84,6 +83,8 @@ mock_ppgtt(struct drm_i915_private *i915, ppgtt->base.insert_entries = mock_insert_entries; ppgtt->base.bind_vma = mock_bind_ppgtt; ppgtt->base.unbind_vma = mock_unbind_ppgtt; + ppgtt->base.set_pages = ppgtt_set_pages; + ppgtt->base.clear_pages = clear_pages; ppgtt->base.cleanup = mock_cleanup; return ppgtt; @@ -93,12 +94,6 @@ static int mock_bind_ggtt(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags) { - int err; - - err = i915_get_ggtt_vma_pages(vma); - if (err) - return err; - vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; return 0; } @@ -115,8 +110,8 @@ void mock_init_ggtt(struct drm_i915_private *i915) ggtt->base.i915 = i915; - ggtt->mappable_base = 0; - ggtt->mappable_end = 2048 * PAGE_SIZE; + ggtt->gmadr = (struct resource) DEFINE_RES_MEM(0, 2048 * PAGE_SIZE); + ggtt->mappable_end = resource_size(&ggtt->gmadr); ggtt->base.total = 4096 * PAGE_SIZE; ggtt->base.clear_range = nop_clear_range; @@ -124,6 +119,8 @@ void mock_init_ggtt(struct drm_i915_private *i915) ggtt->base.insert_entries = mock_insert_entries; ggtt->base.bind_vma = mock_bind_ggtt; ggtt->base.unbind_vma = mock_unbind_ggtt; + ggtt->base.set_pages = ggtt_set_pages; + ggtt->base.clear_pages = clear_pages; ggtt->base.cleanup = mock_cleanup; i915_address_space_init(&ggtt->base, i915, "global"); diff --git a/drivers/gpu/drm/i915/selftests/scatterlist.c b/drivers/gpu/drm/i915/selftests/scatterlist.c index 1cc5d2931753..cd6d2a16071f 100644 --- a/drivers/gpu/drm/i915/selftests/scatterlist.c +++ b/drivers/gpu/drm/i915/selftests/scatterlist.c @@ -189,6 +189,20 @@ static unsigned int random(unsigned long n, return 1 + (prandom_u32_state(rnd) % 1024); } +static unsigned int random_page_size_pages(unsigned long n, + unsigned long count, + struct rnd_state *rnd) +{ + /* 4K, 64K, 2M */ + static unsigned int page_count[] = { + BIT(12) >> PAGE_SHIFT, + BIT(16) >> PAGE_SHIFT, + BIT(21) >> PAGE_SHIFT, + }; + + return page_count[(prandom_u32_state(rnd) % 3)]; +} + static inline bool page_contiguous(struct page *first, struct page *last, unsigned long npages) @@ -252,6 +266,7 @@ static const npages_fn_t npages_funcs[] = { grow, shrink, random, + random_page_size_pages, NULL, }; |