aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gt
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r--drivers/gpu/drm/i915/gt/Makefile5
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_engines.c36
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_engines.h14
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_gt.c42
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_gt.h39
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_gt_pm.c601
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_gt_pm.h14
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_ppgtt.c483
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_ppgtt.h76
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c724
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.h13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs.c66
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c229
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.h85
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h25
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c211
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c22
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c63
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.h21
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h30
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_user.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c1486
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gpu_commands.h29
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c280
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_irq.c12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c83
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.h7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_requests.c51
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_requests.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.c598
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.h587
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c647
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.h7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc_reg.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.c179
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ppgtt.c218
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.c149
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.h6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6_types.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_renderstate.c97
-rw-r--r--drivers/gpu/drm/i915/gt/intel_renderstate.h17
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c142
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c272
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c123
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.c91
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline_types.h14
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c49
-rw-r--r--drivers/gpu/drm/i915/gt/mock_engine.c57
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_context.c120
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_cs.c360
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c36
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_gt_pm.c19
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_hangcheck.c180
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_lrc.c608
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_mocs.c419
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rc6.c203
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rc6.h13
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_timeline.c6
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_workarounds.c72
-rw-r--r--drivers/gpu/drm/i915/gt/selftests/mock_timeline.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftests/mock_timeline.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/Makefile5
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c69
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h46
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c24
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c309
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h52
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h1
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c733
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h54
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c143
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.h36
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c58
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h5
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc.c299
82 files changed, 8950 insertions, 2980 deletions
diff --git a/drivers/gpu/drm/i915/gt/Makefile b/drivers/gpu/drm/i915/gt/Makefile
deleted file mode 100644
index 7e73aa587967..000000000000
--- a/drivers/gpu/drm/i915/gt/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-# For building individual subdir files on the command line
-subdir-ccflags-y += -I$(srctree)/$(src)/..
-
-# Extra header tests
-header-test-pattern-$(CONFIG_DRM_I915_WERROR) := *.h
diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.c b/drivers/gpu/drm/i915/gt/debugfs_engines.c
new file mode 100644
index 000000000000..6a5e9ab20b94
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/debugfs_engines.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: MIT
+
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <drm/drm_print.h>
+
+#include "debugfs_engines.h"
+#include "debugfs_gt.h"
+#include "i915_drv.h" /* for_each_engine! */
+#include "intel_engine.h"
+
+static int engines_show(struct seq_file *m, void *data)
+{
+ struct intel_gt *gt = m->private;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct drm_printer p;
+
+ p = drm_seq_file_printer(m);
+ for_each_engine(engine, gt, id)
+ intel_engine_dump(engine, &p, "%s\n", engine->name);
+
+ return 0;
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(engines);
+
+void debugfs_engines_register(struct intel_gt *gt, struct dentry *root)
+{
+ static const struct debugfs_gt_file files[] = {
+ { "engines", &engines_fops },
+ };
+
+ debugfs_gt_register_files(gt, root, files, ARRAY_SIZE(files));
+}
diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.h b/drivers/gpu/drm/i915/gt/debugfs_engines.h
new file mode 100644
index 000000000000..f69257eaa1cc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/debugfs_engines.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef DEBUGFS_ENGINES_H
+#define DEBUGFS_ENGINES_H
+
+struct intel_gt;
+struct dentry;
+
+void debugfs_engines_register(struct intel_gt *gt, struct dentry *root);
+
+#endif /* DEBUGFS_ENGINES_H */
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.c b/drivers/gpu/drm/i915/gt/debugfs_gt.c
new file mode 100644
index 000000000000..75255aaacaed
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: MIT
+
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/debugfs.h>
+
+#include "debugfs_engines.h"
+#include "debugfs_gt.h"
+#include "debugfs_gt_pm.h"
+#include "i915_drv.h"
+
+void debugfs_gt_register(struct intel_gt *gt)
+{
+ struct dentry *root;
+
+ if (!gt->i915->drm.primary->debugfs_root)
+ return;
+
+ root = debugfs_create_dir("gt", gt->i915->drm.primary->debugfs_root);
+ if (IS_ERR(root))
+ return;
+
+ debugfs_engines_register(gt, root);
+ debugfs_gt_pm_register(gt, root);
+}
+
+void debugfs_gt_register_files(struct intel_gt *gt,
+ struct dentry *root,
+ const struct debugfs_gt_file *files,
+ unsigned long count)
+{
+ while (count--) {
+ if (!files->eval || files->eval(gt))
+ debugfs_create_file(files->name,
+ 0444, root, gt,
+ files->fops);
+
+ files++;
+ }
+}
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.h b/drivers/gpu/drm/i915/gt/debugfs_gt.h
new file mode 100644
index 000000000000..4ea0f06cda8f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef DEBUGFS_GT_H
+#define DEBUGFS_GT_H
+
+#include <linux/file.h>
+
+struct intel_gt;
+
+#define DEFINE_GT_DEBUGFS_ATTRIBUTE(__name) \
+ static int __name ## _open(struct inode *inode, struct file *file) \
+{ \
+ return single_open(file, __name ## _show, inode->i_private); \
+} \
+static const struct file_operations __name ## _fops = { \
+ .owner = THIS_MODULE, \
+ .open = __name ## _open, \
+ .read = seq_read, \
+ .llseek = seq_lseek, \
+ .release = single_release, \
+}
+
+void debugfs_gt_register(struct intel_gt *gt);
+
+struct debugfs_gt_file {
+ const char *name;
+ const struct file_operations *fops;
+ bool (*eval)(const struct intel_gt *gt);
+};
+
+void debugfs_gt_register_files(struct intel_gt *gt,
+ struct dentry *root,
+ const struct debugfs_gt_file *files,
+ unsigned long count);
+
+#endif /* DEBUGFS_GT_H */
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c
new file mode 100644
index 000000000000..059c9e5c002e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c
@@ -0,0 +1,601 @@
+// SPDX-License-Identifier: MIT
+
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/seq_file.h>
+
+#include "debugfs_gt.h"
+#include "debugfs_gt_pm.h"
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_llc.h"
+#include "intel_rc6.h"
+#include "intel_rps.h"
+#include "intel_runtime_pm.h"
+#include "intel_sideband.h"
+#include "intel_uncore.h"
+
+static int fw_domains_show(struct seq_file *m, void *data)
+{
+ struct intel_gt *gt = m->private;
+ struct intel_uncore *uncore = gt->uncore;
+ struct intel_uncore_forcewake_domain *fw_domain;
+ unsigned int tmp;
+
+ seq_printf(m, "user.bypass_count = %u\n",
+ uncore->user_forcewake_count);
+
+ for_each_fw_domain(fw_domain, uncore, tmp)
+ seq_printf(m, "%s.wake_count = %u\n",
+ intel_uncore_forcewake_domain_to_str(fw_domain->id),
+ READ_ONCE(fw_domain->wake_count));
+
+ return 0;
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(fw_domains);
+
+static void print_rc6_res(struct seq_file *m,
+ const char *title,
+ const i915_reg_t reg)
+{
+ struct intel_gt *gt = m->private;
+ intel_wakeref_t wakeref;
+
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+ seq_printf(m, "%s %u (%llu us)\n", title,
+ intel_uncore_read(gt->uncore, reg),
+ intel_rc6_residency_us(&gt->rc6, reg));
+}
+
+static int vlv_drpc(struct seq_file *m)
+{
+ struct intel_gt *gt = m->private;
+ struct intel_uncore *uncore = gt->uncore;
+ u32 rcctl1, pw_status;
+
+ pw_status = intel_uncore_read(uncore, VLV_GTLC_PW_STATUS);
+ rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL);
+
+ seq_printf(m, "RC6 Enabled: %s\n",
+ yesno(rcctl1 & (GEN7_RC_CTL_TO_MODE |
+ GEN6_RC_CTL_EI_MODE(1))));
+ seq_printf(m, "Render Power Well: %s\n",
+ (pw_status & VLV_GTLC_PW_RENDER_STATUS_MASK) ? "Up" : "Down");
+ seq_printf(m, "Media Power Well: %s\n",
+ (pw_status & VLV_GTLC_PW_MEDIA_STATUS_MASK) ? "Up" : "Down");
+
+ print_rc6_res(m, "Render RC6 residency since boot:", VLV_GT_RENDER_RC6);
+ print_rc6_res(m, "Media RC6 residency since boot:", VLV_GT_MEDIA_RC6);
+
+ return fw_domains_show(m, NULL);
+}
+
+static int gen6_drpc(struct seq_file *m)
+{
+ struct intel_gt *gt = m->private;
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+ u32 gt_core_status, rcctl1, rc6vids = 0;
+ u32 gen9_powergate_enable = 0, gen9_powergate_status = 0;
+
+ gt_core_status = intel_uncore_read_fw(uncore, GEN6_GT_CORE_STATUS);
+
+ rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL);
+ if (INTEL_GEN(i915) >= 9) {
+ gen9_powergate_enable =
+ intel_uncore_read(uncore, GEN9_PG_ENABLE);
+ gen9_powergate_status =
+ intel_uncore_read(uncore, GEN9_PWRGT_DOMAIN_STATUS);
+ }
+
+ if (INTEL_GEN(i915) <= 7)
+ sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS,
+ &rc6vids, NULL);
+
+ seq_printf(m, "RC1e Enabled: %s\n",
+ yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE));
+ seq_printf(m, "RC6 Enabled: %s\n",
+ yesno(rcctl1 & GEN6_RC_CTL_RC6_ENABLE));
+ if (INTEL_GEN(i915) >= 9) {
+ seq_printf(m, "Render Well Gating Enabled: %s\n",
+ yesno(gen9_powergate_enable & GEN9_RENDER_PG_ENABLE));
+ seq_printf(m, "Media Well Gating Enabled: %s\n",
+ yesno(gen9_powergate_enable & GEN9_MEDIA_PG_ENABLE));
+ }
+ seq_printf(m, "Deep RC6 Enabled: %s\n",
+ yesno(rcctl1 & GEN6_RC_CTL_RC6p_ENABLE));
+ seq_printf(m, "Deepest RC6 Enabled: %s\n",
+ yesno(rcctl1 & GEN6_RC_CTL_RC6pp_ENABLE));
+ seq_puts(m, "Current RC state: ");
+ switch (gt_core_status & GEN6_RCn_MASK) {
+ case GEN6_RC0:
+ if (gt_core_status & GEN6_CORE_CPD_STATE_MASK)
+ seq_puts(m, "Core Power Down\n");
+ else
+ seq_puts(m, "on\n");
+ break;
+ case GEN6_RC3:
+ seq_puts(m, "RC3\n");
+ break;
+ case GEN6_RC6:
+ seq_puts(m, "RC6\n");
+ break;
+ case GEN6_RC7:
+ seq_puts(m, "RC7\n");
+ break;
+ default:
+ seq_puts(m, "Unknown\n");
+ break;
+ }
+
+ seq_printf(m, "Core Power Down: %s\n",
+ yesno(gt_core_status & GEN6_CORE_CPD_STATE_MASK));
+ if (INTEL_GEN(i915) >= 9) {
+ seq_printf(m, "Render Power Well: %s\n",
+ (gen9_powergate_status &
+ GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down");
+ seq_printf(m, "Media Power Well: %s\n",
+ (gen9_powergate_status &
+ GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down");
+ }
+
+ /* Not exactly sure what this is */
+ print_rc6_res(m, "RC6 \"Locked to RPn\" residency since boot:",
+ GEN6_GT_GFX_RC6_LOCKED);
+ print_rc6_res(m, "RC6 residency since boot:", GEN6_GT_GFX_RC6);
+ print_rc6_res(m, "RC6+ residency since boot:", GEN6_GT_GFX_RC6p);
+ print_rc6_res(m, "RC6++ residency since boot:", GEN6_GT_GFX_RC6pp);
+
+ if (INTEL_GEN(i915) <= 7) {
+ seq_printf(m, "RC6 voltage: %dmV\n",
+ GEN6_DECODE_RC6_VID(((rc6vids >> 0) & 0xff)));
+ seq_printf(m, "RC6+ voltage: %dmV\n",
+ GEN6_DECODE_RC6_VID(((rc6vids >> 8) & 0xff)));
+ seq_printf(m, "RC6++ voltage: %dmV\n",
+ GEN6_DECODE_RC6_VID(((rc6vids >> 16) & 0xff)));
+ }
+
+ return fw_domains_show(m, NULL);
+}
+
+static int ilk_drpc(struct seq_file *m)
+{
+ struct intel_gt *gt = m->private;
+ struct intel_uncore *uncore = gt->uncore;
+ u32 rgvmodectl, rstdbyctl;
+ u16 crstandvid;
+
+ rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
+ rstdbyctl = intel_uncore_read(uncore, RSTDBYCTL);
+ crstandvid = intel_uncore_read16(uncore, CRSTANDVID);
+
+ seq_printf(m, "HD boost: %s\n", yesno(rgvmodectl & MEMMODE_BOOST_EN));
+ seq_printf(m, "Boost freq: %d\n",
+ (rgvmodectl & MEMMODE_BOOST_FREQ_MASK) >>
+ MEMMODE_BOOST_FREQ_SHIFT);
+ seq_printf(m, "HW control enabled: %s\n",
+ yesno(rgvmodectl & MEMMODE_HWIDLE_EN));
+ seq_printf(m, "SW control enabled: %s\n",
+ yesno(rgvmodectl & MEMMODE_SWMODE_EN));
+ seq_printf(m, "Gated voltage change: %s\n",
+ yesno(rgvmodectl & MEMMODE_RCLK_GATE));
+ seq_printf(m, "Starting frequency: P%d\n",
+ (rgvmodectl & MEMMODE_FSTART_MASK) >> MEMMODE_FSTART_SHIFT);
+ seq_printf(m, "Max P-state: P%d\n",
+ (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT);
+ seq_printf(m, "Min P-state: P%d\n", (rgvmodectl & MEMMODE_FMIN_MASK));
+ seq_printf(m, "RS1 VID: %d\n", (crstandvid & 0x3f));
+ seq_printf(m, "RS2 VID: %d\n", ((crstandvid >> 8) & 0x3f));
+ seq_printf(m, "Render standby enabled: %s\n",
+ yesno(!(rstdbyctl & RCX_SW_EXIT)));
+ seq_puts(m, "Current RS state: ");
+ switch (rstdbyctl & RSX_STATUS_MASK) {
+ case RSX_STATUS_ON:
+ seq_puts(m, "on\n");
+ break;
+ case RSX_STATUS_RC1:
+ seq_puts(m, "RC1\n");
+ break;
+ case RSX_STATUS_RC1E:
+ seq_puts(m, "RC1E\n");
+ break;
+ case RSX_STATUS_RS1:
+ seq_puts(m, "RS1\n");
+ break;
+ case RSX_STATUS_RS2:
+ seq_puts(m, "RS2 (RC6)\n");
+ break;
+ case RSX_STATUS_RS3:
+ seq_puts(m, "RC3 (RC6+)\n");
+ break;
+ default:
+ seq_puts(m, "unknown\n");
+ break;
+ }
+
+ return 0;
+}
+
+static int drpc_show(struct seq_file *m, void *unused)
+{
+ struct intel_gt *gt = m->private;
+ struct drm_i915_private *i915 = gt->i915;
+ intel_wakeref_t wakeref;
+ int err = -ENODEV;
+
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
+ if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+ err = vlv_drpc(m);
+ else if (INTEL_GEN(i915) >= 6)
+ err = gen6_drpc(m);
+ else
+ err = ilk_drpc(m);
+ }
+
+ return err;
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(drpc);
+
+static int frequency_show(struct seq_file *m, void *unused)
+{
+ struct intel_gt *gt = m->private;
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+ struct intel_rps *rps = &gt->rps;
+ intel_wakeref_t wakeref;
+
+ wakeref = intel_runtime_pm_get(uncore->rpm);
+
+ if (IS_GEN(i915, 5)) {
+ u16 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
+ u16 rgvstat = intel_uncore_read16(uncore, MEMSTAT_ILK);
+
+ seq_printf(m, "Requested P-state: %d\n", (rgvswctl >> 8) & 0xf);
+ seq_printf(m, "Requested VID: %d\n", rgvswctl & 0x3f);
+ seq_printf(m, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) >>
+ MEMSTAT_VID_SHIFT);
+ seq_printf(m, "Current P-state: %d\n",
+ (rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT);
+ } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
+ u32 rpmodectl, freq_sts;
+
+ rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL);
+ seq_printf(m, "Video Turbo Mode: %s\n",
+ yesno(rpmodectl & GEN6_RP_MEDIA_TURBO));
+ seq_printf(m, "HW control enabled: %s\n",
+ yesno(rpmodectl & GEN6_RP_ENABLE));
+ seq_printf(m, "SW control enabled: %s\n",
+ yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) ==
+ GEN6_RP_MEDIA_SW_MODE));
+
+ vlv_punit_get(i915);
+ freq_sts = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
+ vlv_punit_put(i915);
+
+ seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts);
+ seq_printf(m, "DDR freq: %d MHz\n", i915->mem_freq);
+
+ seq_printf(m, "actual GPU freq: %d MHz\n",
+ intel_gpu_freq(rps, (freq_sts >> 8) & 0xff));
+
+ seq_printf(m, "current GPU freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->cur_freq));
+
+ seq_printf(m, "max GPU freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->max_freq));
+
+ seq_printf(m, "min GPU freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->min_freq));
+
+ seq_printf(m, "idle GPU freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->idle_freq));
+
+ seq_printf(m, "efficient (RPe) frequency: %d MHz\n",
+ intel_gpu_freq(rps, rps->efficient_freq));
+ } else if (INTEL_GEN(i915) >= 6) {
+ u32 rp_state_limits;
+ u32 gt_perf_status;
+ u32 rp_state_cap;
+ u32 rpmodectl, rpinclimit, rpdeclimit;
+ u32 rpstat, cagf, reqf;
+ u32 rpupei, rpcurup, rpprevup;
+ u32 rpdownei, rpcurdown, rpprevdown;
+ u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask;
+ int max_freq;
+
+ rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS);
+ if (IS_GEN9_LP(i915)) {
+ rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP);
+ gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS);
+ } else {
+ rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
+ gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS);
+ }
+
+ /* RPSTAT1 is in the GT power well */
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+ reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ);
+ if (INTEL_GEN(i915) >= 9) {
+ reqf >>= 23;
+ } else {
+ reqf &= ~GEN6_TURBO_DISABLE;
+ if (IS_HASWELL(i915) || IS_BROADWELL(i915))
+ reqf >>= 24;
+ else
+ reqf >>= 25;
+ }
+ reqf = intel_gpu_freq(rps, reqf);
+
+ rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL);
+ rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
+ rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
+
+ rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1);
+ rpupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK;
+ rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK;
+ rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK;
+ rpdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK;
+ rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK;
+ rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK;
+ cagf = intel_rps_read_actual_frequency(rps);
+
+ intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+
+ if (INTEL_GEN(i915) >= 11) {
+ pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE);
+ pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK);
+ /*
+ * The equivalent to the PM ISR & IIR cannot be read
+ * without affecting the current state of the system
+ */
+ pm_isr = 0;
+ pm_iir = 0;
+ } else if (INTEL_GEN(i915) >= 8) {
+ pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2));
+ pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2));
+ pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2));
+ pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2));
+ } else {
+ pm_ier = intel_uncore_read(uncore, GEN6_PMIER);
+ pm_imr = intel_uncore_read(uncore, GEN6_PMIMR);
+ pm_isr = intel_uncore_read(uncore, GEN6_PMISR);
+ pm_iir = intel_uncore_read(uncore, GEN6_PMIIR);
+ }
+ pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK);
+
+ seq_printf(m, "Video Turbo Mode: %s\n",
+ yesno(rpmodectl & GEN6_RP_MEDIA_TURBO));
+ seq_printf(m, "HW control enabled: %s\n",
+ yesno(rpmodectl & GEN6_RP_ENABLE));
+ seq_printf(m, "SW control enabled: %s\n",
+ yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) ==
+ GEN6_RP_MEDIA_SW_MODE));
+
+ seq_printf(m, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n",
+ pm_ier, pm_imr, pm_mask);
+ if (INTEL_GEN(i915) <= 10)
+ seq_printf(m, "PM ISR=0x%08x IIR=0x%08x\n",
+ pm_isr, pm_iir);
+ seq_printf(m, "pm_intrmsk_mbz: 0x%08x\n",
+ rps->pm_intrmsk_mbz);
+ seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
+ seq_printf(m, "Render p-state ratio: %d\n",
+ (gt_perf_status & (INTEL_GEN(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8);
+ seq_printf(m, "Render p-state VID: %d\n",
+ gt_perf_status & 0xff);
+ seq_printf(m, "Render p-state limit: %d\n",
+ rp_state_limits & 0xff);
+ seq_printf(m, "RPSTAT1: 0x%08x\n", rpstat);
+ seq_printf(m, "RPMODECTL: 0x%08x\n", rpmodectl);
+ seq_printf(m, "RPINCLIMIT: 0x%08x\n", rpinclimit);
+ seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
+ seq_printf(m, "RPNSWREQ: %dMHz\n", reqf);
+ seq_printf(m, "CAGF: %dMHz\n", cagf);
+ seq_printf(m, "RP CUR UP EI: %d (%dus)\n",
+ rpupei, GT_PM_INTERVAL_TO_US(i915, rpupei));
+ seq_printf(m, "RP CUR UP: %d (%dus)\n",
+ rpcurup, GT_PM_INTERVAL_TO_US(i915, rpcurup));
+ seq_printf(m, "RP PREV UP: %d (%dus)\n",
+ rpprevup, GT_PM_INTERVAL_TO_US(i915, rpprevup));
+ seq_printf(m, "Up threshold: %d%%\n",
+ rps->power.up_threshold);
+
+ seq_printf(m, "RP CUR DOWN EI: %d (%dus)\n",
+ rpdownei, GT_PM_INTERVAL_TO_US(i915, rpdownei));
+ seq_printf(m, "RP CUR DOWN: %d (%dus)\n",
+ rpcurdown, GT_PM_INTERVAL_TO_US(i915, rpcurdown));
+ seq_printf(m, "RP PREV DOWN: %d (%dus)\n",
+ rpprevdown, GT_PM_INTERVAL_TO_US(i915, rpprevdown));
+ seq_printf(m, "Down threshold: %d%%\n",
+ rps->power.down_threshold);
+
+ max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 :
+ rp_state_cap >> 16) & 0xff;
+ max_freq *= (IS_GEN9_BC(i915) ||
+ INTEL_GEN(i915) >= 10 ? GEN9_FREQ_SCALER : 1);
+ seq_printf(m, "Lowest (RPN) frequency: %dMHz\n",
+ intel_gpu_freq(rps, max_freq));
+
+ max_freq = (rp_state_cap & 0xff00) >> 8;
+ max_freq *= (IS_GEN9_BC(i915) ||
+ INTEL_GEN(i915) >= 10 ? GEN9_FREQ_SCALER : 1);
+ seq_printf(m, "Nominal (RP1) frequency: %dMHz\n",
+ intel_gpu_freq(rps, max_freq));
+
+ max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 16 :
+ rp_state_cap >> 0) & 0xff;
+ max_freq *= (IS_GEN9_BC(i915) ||
+ INTEL_GEN(i915) >= 10 ? GEN9_FREQ_SCALER : 1);
+ seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
+ intel_gpu_freq(rps, max_freq));
+ seq_printf(m, "Max overclocked frequency: %dMHz\n",
+ intel_gpu_freq(rps, rps->max_freq));
+
+ seq_printf(m, "Current freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->cur_freq));
+ seq_printf(m, "Actual freq: %d MHz\n", cagf);
+ seq_printf(m, "Idle freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->idle_freq));
+ seq_printf(m, "Min freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->min_freq));
+ seq_printf(m, "Boost freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->boost_freq));
+ seq_printf(m, "Max freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->max_freq));
+ seq_printf(m,
+ "efficient (RPe) frequency: %d MHz\n",
+ intel_gpu_freq(rps, rps->efficient_freq));
+ } else {
+ seq_puts(m, "no P-state info available\n");
+ }
+
+ seq_printf(m, "Current CD clock frequency: %d kHz\n", i915->cdclk.hw.cdclk);
+ seq_printf(m, "Max CD clock frequency: %d kHz\n", i915->max_cdclk_freq);
+ seq_printf(m, "Max pixel clock frequency: %d kHz\n", i915->max_dotclk_freq);
+
+ intel_runtime_pm_put(uncore->rpm, wakeref);
+
+ return 0;
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(frequency);
+
+static int llc_show(struct seq_file *m, void *data)
+{
+ struct intel_gt *gt = m->private;
+ struct drm_i915_private *i915 = gt->i915;
+ const bool edram = INTEL_GEN(i915) > 8;
+ struct intel_rps *rps = &gt->rps;
+ unsigned int max_gpu_freq, min_gpu_freq;
+ intel_wakeref_t wakeref;
+ int gpu_freq, ia_freq;
+
+ seq_printf(m, "LLC: %s\n", yesno(HAS_LLC(i915)));
+ seq_printf(m, "%s: %uMB\n", edram ? "eDRAM" : "eLLC",
+ i915->edram_size_mb);
+
+ min_gpu_freq = rps->min_freq;
+ max_gpu_freq = rps->max_freq;
+ if (IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) {
+ /* Convert GT frequency to 50 HZ units */
+ min_gpu_freq /= GEN9_FREQ_SCALER;
+ max_gpu_freq /= GEN9_FREQ_SCALER;
+ }
+
+ seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n");
+
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+ for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
+ ia_freq = gpu_freq;
+ sandybridge_pcode_read(i915,
+ GEN6_PCODE_READ_MIN_FREQ_TABLE,
+ &ia_freq, NULL);
+ seq_printf(m, "%d\t\t%d\t\t\t\t%d\n",
+ intel_gpu_freq(rps,
+ (gpu_freq *
+ (IS_GEN9_BC(i915) ||
+ INTEL_GEN(i915) >= 10 ?
+ GEN9_FREQ_SCALER : 1))),
+ ((ia_freq >> 0) & 0xff) * 100,
+ ((ia_freq >> 8) & 0xff) * 100);
+ }
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+
+ return 0;
+}
+
+static bool llc_eval(const struct intel_gt *gt)
+{
+ return HAS_LLC(gt->i915);
+}
+
+DEFINE_GT_DEBUGFS_ATTRIBUTE(llc);
+
+static const char *rps_power_to_str(unsigned int power)
+{
+ static const char * const strings[] = {
+ [LOW_POWER] = "low power",
+ [BETWEEN] = "mixed",
+ [HIGH_POWER] = "high power",
+ };
+
+ if (power >= ARRAY_SIZE(strings) || !strings[power])
+ return "unknown";
+
+ return strings[power];
+}
+
+static int rps_boost_show(struct seq_file *m, void *data)
+{
+ struct intel_gt *gt = m->private;
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_rps *rps = &gt->rps;
+
+ seq_printf(m, "RPS enabled? %d\n", rps->enabled);
+ seq_printf(m, "GPU busy? %s\n", yesno(gt->awake));
+ seq_printf(m, "Boosts outstanding? %d\n",
+ atomic_read(&rps->num_waiters));
+ seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive));
+ seq_printf(m, "Frequency requested %d, actual %d\n",
+ intel_gpu_freq(rps, rps->cur_freq),
+ intel_rps_read_actual_frequency(rps));
+ seq_printf(m, " min hard:%d, soft:%d; max soft:%d, hard:%d\n",
+ intel_gpu_freq(rps, rps->min_freq),
+ intel_gpu_freq(rps, rps->min_freq_softlimit),
+ intel_gpu_freq(rps, rps->max_freq_softlimit),
+ intel_gpu_freq(rps, rps->max_freq));
+ seq_printf(m, " idle:%d, efficient:%d, boost:%d\n",
+ intel_gpu_freq(rps, rps->idle_freq),
+ intel_gpu_freq(rps, rps->efficient_freq),
+ intel_gpu_freq(rps, rps->boost_freq));
+
+ seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts));
+
+ if (INTEL_GEN(i915) >= 6 && rps->enabled && gt->awake) {
+ struct intel_uncore *uncore = gt->uncore;
+ u32 rpup, rpupei;
+ u32 rpdown, rpdownei;
+
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+ rpup = intel_uncore_read_fw(uncore, GEN6_RP_CUR_UP) & GEN6_RP_EI_MASK;
+ rpupei = intel_uncore_read_fw(uncore, GEN6_RP_CUR_UP_EI) & GEN6_RP_EI_MASK;
+ rpdown = intel_uncore_read_fw(uncore, GEN6_RP_CUR_DOWN) & GEN6_RP_EI_MASK;
+ rpdownei = intel_uncore_read_fw(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_RP_EI_MASK;
+ intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+
+ seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n",
+ rps_power_to_str(rps->power.mode));
+ seq_printf(m, " Avg. up: %d%% [above threshold? %d%%]\n",
+ rpup && rpupei ? 100 * rpup / rpupei : 0,
+ rps->power.up_threshold);
+ seq_printf(m, " Avg. down: %d%% [below threshold? %d%%]\n",
+ rpdown && rpdownei ? 100 * rpdown / rpdownei : 0,
+ rps->power.down_threshold);
+ } else {
+ seq_puts(m, "\nRPS Autotuning inactive\n");
+ }
+
+ return 0;
+}
+
+static bool rps_eval(const struct intel_gt *gt)
+{
+ return HAS_RPS(gt->i915);
+}
+
+DEFINE_GT_DEBUGFS_ATTRIBUTE(rps_boost);
+
+void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root)
+{
+ static const struct debugfs_gt_file files[] = {
+ { "drpc", &drpc_fops, NULL },
+ { "frequency", &frequency_fops, NULL },
+ { "forcewake", &fw_domains_fops, NULL },
+ { "llc", &llc_fops, llc_eval },
+ { "rps_boost", &rps_boost_fops, rps_eval },
+ };
+
+ debugfs_gt_register_files(gt, root, files, ARRAY_SIZE(files));
+}
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h
new file mode 100644
index 000000000000..4cf5f5c9da7d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef DEBUGFS_GT_PM_H
+#define DEBUGFS_GT_PM_H
+
+struct intel_gt;
+struct dentry;
+
+void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root);
+
+#endif /* DEBUGFS_GT_PM_H */
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
new file mode 100644
index 000000000000..f4fec7eb4064
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -0,0 +1,483 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/log2.h>
+
+#include "gen6_ppgtt.h"
+#include "i915_scatterlist.h"
+#include "i915_trace.h"
+#include "i915_vgpu.h"
+#include "intel_gt.h"
+
+/* Write pde (index) from the page directory @pd to the page table @pt */
+static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt,
+ const unsigned int pde,
+ const struct i915_page_table *pt)
+{
+ /* Caller needs to make sure the write completes if necessary */
+ iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
+ ppgtt->pd_addr + pde);
+}
+
+void gen7_ppgtt_enable(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ u32 ecochk;
+
+ intel_uncore_rmw(uncore, GAC_ECO_BITS, 0, ECOBITS_PPGTT_CACHE64B);
+
+ ecochk = intel_uncore_read(uncore, GAM_ECOCHK);
+ if (IS_HASWELL(i915)) {
+ ecochk |= ECOCHK_PPGTT_WB_HSW;
+ } else {
+ ecochk |= ECOCHK_PPGTT_LLC_IVB;
+ ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
+ }
+ intel_uncore_write(uncore, GAM_ECOCHK, ecochk);
+
+ for_each_engine(engine, gt, id) {
+ /* GFX_MODE is per-ring on gen7+ */
+ ENGINE_WRITE(engine,
+ RING_MODE_GEN7,
+ _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+ }
+}
+
+void gen6_ppgtt_enable(struct intel_gt *gt)
+{
+ struct intel_uncore *uncore = gt->uncore;
+
+ intel_uncore_rmw(uncore,
+ GAC_ECO_BITS,
+ 0,
+ ECOBITS_SNB_BIT | ECOBITS_PPGTT_CACHE64B);
+
+ intel_uncore_rmw(uncore,
+ GAB_CTL,
+ 0,
+ GAB_CTL_CONT_AFTER_PAGEFAULT);
+
+ intel_uncore_rmw(uncore,
+ GAM_ECOCHK,
+ 0,
+ ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
+
+ if (HAS_PPGTT(uncore->i915)) /* may be disabled for VT-d */
+ intel_uncore_write(uncore,
+ GFX_MODE,
+ _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+}
+
+/* PPGTT support for Sandybdrige/Gen6 and later */
+static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+ struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
+ const unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+ const gen6_pte_t scratch_pte = vm->scratch[0].encode;
+ unsigned int pde = first_entry / GEN6_PTES;
+ unsigned int pte = first_entry % GEN6_PTES;
+ unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
+
+ while (num_entries) {
+ struct i915_page_table * const pt =
+ i915_pt_entry(ppgtt->base.pd, pde++);
+ const unsigned int count = min(num_entries, GEN6_PTES - pte);
+ gen6_pte_t *vaddr;
+
+ GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1]));
+
+ num_entries -= count;
+
+ GEM_BUG_ON(count > atomic_read(&pt->used));
+ if (!atomic_sub_return(count, &pt->used))
+ ppgtt->scan_for_unused_pt = true;
+
+ /*
+ * Note that the hw doesn't support removing PDE on the fly
+ * (they are cached inside the context with no means to
+ * invalidate the cache), so we can only reset the PTE
+ * entries back to scratch.
+ */
+
+ vaddr = kmap_atomic_px(pt);
+ memset32(vaddr + pte, scratch_pte, count);
+ kunmap_atomic(vaddr);
+
+ pte = 0;
+ }
+}
+
+static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags)
+{
+ struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+ struct i915_page_directory * const pd = ppgtt->pd;
+ unsigned int first_entry = vma->node.start / I915_GTT_PAGE_SIZE;
+ unsigned int act_pt = first_entry / GEN6_PTES;
+ unsigned int act_pte = first_entry % GEN6_PTES;
+ const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
+ struct sgt_dma iter = sgt_dma(vma);
+ gen6_pte_t *vaddr;
+
+ GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]);
+
+ vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
+ do {
+ GEM_BUG_ON(iter.sg->length < I915_GTT_PAGE_SIZE);
+ vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
+
+ iter.dma += I915_GTT_PAGE_SIZE;
+ if (iter.dma == iter.max) {
+ iter.sg = __sg_next(iter.sg);
+ if (!iter.sg)
+ break;
+
+ iter.dma = sg_dma_address(iter.sg);
+ iter.max = iter.dma + iter.sg->length;
+ }
+
+ if (++act_pte == GEN6_PTES) {
+ kunmap_atomic(vaddr);
+ vaddr = kmap_atomic_px(i915_pt_entry(pd, ++act_pt));
+ act_pte = 0;
+ }
+ } while (1);
+ kunmap_atomic(vaddr);
+
+ vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+}
+
+static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end)
+{
+ struct i915_page_directory * const pd = ppgtt->base.pd;
+ struct i915_page_table *pt;
+ unsigned int pde;
+
+ start = round_down(start, SZ_64K);
+ end = round_up(end, SZ_64K) - start;
+
+ mutex_lock(&ppgtt->flush);
+
+ gen6_for_each_pde(pt, pd, start, end, pde)
+ gen6_write_pde(ppgtt, pde, pt);
+
+ mb();
+ ioread32(ppgtt->pd_addr + pde - 1);
+ gen6_ggtt_invalidate(ppgtt->base.vm.gt->ggtt);
+ mb();
+
+ mutex_unlock(&ppgtt->flush);
+}
+
+static int gen6_alloc_va_range(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+ struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
+ struct i915_page_directory * const pd = ppgtt->base.pd;
+ struct i915_page_table *pt, *alloc = NULL;
+ intel_wakeref_t wakeref;
+ u64 from = start;
+ unsigned int pde;
+ int ret = 0;
+
+ wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
+
+ spin_lock(&pd->lock);
+ gen6_for_each_pde(pt, pd, start, length, pde) {
+ const unsigned int count = gen6_pte_count(start, length);
+
+ if (px_base(pt) == px_base(&vm->scratch[1])) {
+ spin_unlock(&pd->lock);
+
+ pt = fetch_and_zero(&alloc);
+ if (!pt)
+ pt = alloc_pt(vm);
+ if (IS_ERR(pt)) {
+ ret = PTR_ERR(pt);
+ goto unwind_out;
+ }
+
+ fill32_px(pt, vm->scratch[0].encode);
+
+ spin_lock(&pd->lock);
+ if (pd->entry[pde] == &vm->scratch[1]) {
+ pd->entry[pde] = pt;
+ } else {
+ alloc = pt;
+ pt = pd->entry[pde];
+ }
+ }
+
+ atomic_add(count, &pt->used);
+ }
+ spin_unlock(&pd->lock);
+
+ if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND))
+ gen6_flush_pd(ppgtt, from, start);
+
+ goto out;
+
+unwind_out:
+ gen6_ppgtt_clear_range(vm, from, start - from);
+out:
+ if (alloc)
+ free_px(vm, alloc);
+ intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
+ return ret;
+}
+
+static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
+{
+ struct i915_address_space * const vm = &ppgtt->base.vm;
+ struct i915_page_directory * const pd = ppgtt->base.pd;
+ int ret;
+
+ ret = setup_scratch_page(vm, __GFP_HIGHMEM);
+ if (ret)
+ return ret;
+
+ vm->scratch[0].encode =
+ vm->pte_encode(px_dma(&vm->scratch[0]),
+ I915_CACHE_NONE, PTE_READ_ONLY);
+
+ if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[1])))) {
+ cleanup_scratch_page(vm);
+ return -ENOMEM;
+ }
+
+ fill32_px(&vm->scratch[1], vm->scratch[0].encode);
+ memset_p(pd->entry, &vm->scratch[1], I915_PDES);
+
+ return 0;
+}
+
+static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
+{
+ struct i915_page_directory * const pd = ppgtt->base.pd;
+ struct i915_page_dma * const scratch =
+ px_base(&ppgtt->base.vm.scratch[1]);
+ struct i915_page_table *pt;
+ u32 pde;
+
+ gen6_for_all_pdes(pt, pd, pde)
+ if (px_base(pt) != scratch)
+ free_px(&ppgtt->base.vm, pt);
+}
+
+static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
+{
+ struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
+
+ __i915_vma_put(ppgtt->vma);
+
+ gen6_ppgtt_free_pd(ppgtt);
+ free_scratch(vm);
+
+ mutex_destroy(&ppgtt->flush);
+ mutex_destroy(&ppgtt->pin_mutex);
+ kfree(ppgtt->base.pd);
+}
+
+static int pd_vma_set_pages(struct i915_vma *vma)
+{
+ vma->pages = ERR_PTR(-ENODEV);
+ return 0;
+}
+
+static void pd_vma_clear_pages(struct i915_vma *vma)
+{
+ GEM_BUG_ON(!vma->pages);
+
+ vma->pages = NULL;
+}
+
+static int pd_vma_bind(struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 unused)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
+ struct gen6_ppgtt *ppgtt = vma->private;
+ u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
+
+ px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
+ ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
+
+ gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total);
+ return 0;
+}
+
+static void pd_vma_unbind(struct i915_vma *vma)
+{
+ struct gen6_ppgtt *ppgtt = vma->private;
+ struct i915_page_directory * const pd = ppgtt->base.pd;
+ struct i915_page_dma * const scratch =
+ px_base(&ppgtt->base.vm.scratch[1]);
+ struct i915_page_table *pt;
+ unsigned int pde;
+
+ if (!ppgtt->scan_for_unused_pt)
+ return;
+
+ /* Free all no longer used page tables */
+ gen6_for_all_pdes(pt, ppgtt->base.pd, pde) {
+ if (px_base(pt) == scratch || atomic_read(&pt->used))
+ continue;
+
+ free_px(&ppgtt->base.vm, pt);
+ pd->entry[pde] = scratch;
+ }
+
+ ppgtt->scan_for_unused_pt = false;
+}
+
+static const struct i915_vma_ops pd_vma_ops = {
+ .set_pages = pd_vma_set_pages,
+ .clear_pages = pd_vma_clear_pages,
+ .bind_vma = pd_vma_bind,
+ .unbind_vma = pd_vma_unbind,
+};
+
+static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
+{
+ struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt;
+ struct i915_vma *vma;
+
+ GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
+ GEM_BUG_ON(size > ggtt->vm.total);
+
+ vma = i915_vma_alloc();
+ if (!vma)
+ return ERR_PTR(-ENOMEM);
+
+ i915_active_init(&vma->active, NULL, NULL);
+
+ kref_init(&vma->ref);
+ mutex_init(&vma->pages_mutex);
+ vma->vm = i915_vm_get(&ggtt->vm);
+ vma->ops = &pd_vma_ops;
+ vma->private = ppgtt;
+
+ vma->size = size;
+ vma->fence_size = size;
+ atomic_set(&vma->flags, I915_VMA_GGTT);
+ vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
+
+ INIT_LIST_HEAD(&vma->obj_link);
+ INIT_LIST_HEAD(&vma->closed_link);
+
+ return vma;
+}
+
+int gen6_ppgtt_pin(struct i915_ppgtt *base)
+{
+ struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
+ int err;
+
+ GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open));
+
+ /*
+ * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
+ * which will be pinned into every active context.
+ * (When vma->pin_count becomes atomic, I expect we will naturally
+ * need a larger, unpacked, type and kill this redundancy.)
+ */
+ if (atomic_add_unless(&ppgtt->pin_count, 1, 0))
+ return 0;
+
+ if (mutex_lock_interruptible(&ppgtt->pin_mutex))
+ return -EINTR;
+
+ /*
+ * PPGTT PDEs reside in the GGTT and consists of 512 entries. The
+ * allocator works in address space sizes, so it's multiplied by page
+ * size. We allocate at the top of the GTT to avoid fragmentation.
+ */
+ err = 0;
+ if (!atomic_read(&ppgtt->pin_count))
+ err = i915_ggtt_pin(ppgtt->vma, GEN6_PD_ALIGN, PIN_HIGH);
+ if (!err)
+ atomic_inc(&ppgtt->pin_count);
+ mutex_unlock(&ppgtt->pin_mutex);
+
+ return err;
+}
+
+void gen6_ppgtt_unpin(struct i915_ppgtt *base)
+{
+ struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
+
+ GEM_BUG_ON(!atomic_read(&ppgtt->pin_count));
+ if (atomic_dec_and_test(&ppgtt->pin_count))
+ i915_vma_unpin(ppgtt->vma);
+}
+
+void gen6_ppgtt_unpin_all(struct i915_ppgtt *base)
+{
+ struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
+
+ if (!atomic_read(&ppgtt->pin_count))
+ return;
+
+ i915_vma_unpin(ppgtt->vma);
+ atomic_set(&ppgtt->pin_count, 0);
+}
+
+struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
+{
+ struct i915_ggtt * const ggtt = gt->ggtt;
+ struct gen6_ppgtt *ppgtt;
+ int err;
+
+ ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
+ if (!ppgtt)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_init(&ppgtt->flush);
+ mutex_init(&ppgtt->pin_mutex);
+
+ ppgtt_init(&ppgtt->base, gt);
+ ppgtt->base.vm.top = 1;
+
+ ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND;
+ ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
+ ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
+ ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
+ ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
+
+ ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
+
+ ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd));
+ if (!ppgtt->base.pd) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ err = gen6_ppgtt_init_scratch(ppgtt);
+ if (err)
+ goto err_pd;
+
+ ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE);
+ if (IS_ERR(ppgtt->vma)) {
+ err = PTR_ERR(ppgtt->vma);
+ goto err_scratch;
+ }
+
+ return &ppgtt->base;
+
+err_scratch:
+ free_scratch(&ppgtt->base.vm);
+err_pd:
+ kfree(ppgtt->base.pd);
+err_free:
+ mutex_destroy(&ppgtt->pin_mutex);
+ kfree(ppgtt);
+ return ERR_PTR(err);
+}
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
new file mode 100644
index 000000000000..72e481806c96
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef __GEN6_PPGTT_H__
+#define __GEN6_PPGTT_H__
+
+#include "intel_gtt.h"
+
+struct gen6_ppgtt {
+ struct i915_ppgtt base;
+
+ struct mutex flush;
+ struct i915_vma *vma;
+ gen6_pte_t __iomem *pd_addr;
+
+ atomic_t pin_count;
+ struct mutex pin_mutex;
+
+ bool scan_for_unused_pt;
+};
+
+static inline u32 gen6_pte_index(u32 addr)
+{
+ return i915_pte_index(addr, GEN6_PDE_SHIFT);
+}
+
+static inline u32 gen6_pte_count(u32 addr, u32 length)
+{
+ return i915_pte_count(addr, length, GEN6_PDE_SHIFT);
+}
+
+static inline u32 gen6_pde_index(u32 addr)
+{
+ return i915_pde_index(addr, GEN6_PDE_SHIFT);
+}
+
+#define __to_gen6_ppgtt(base) container_of(base, struct gen6_ppgtt, base)
+
+static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base)
+{
+ BUILD_BUG_ON(offsetof(struct gen6_ppgtt, base));
+ return __to_gen6_ppgtt(base);
+}
+
+/*
+ * gen6_for_each_pde() iterates over every pde from start until start+length.
+ * If start and start+length are not perfectly divisible, the macro will round
+ * down and up as needed. Start=0 and length=2G effectively iterates over
+ * every PDE in the system. The macro modifies ALL its parameters except 'pd',
+ * so each of the other parameters should preferably be a simple variable, or
+ * at most an lvalue with no side-effects!
+ */
+#define gen6_for_each_pde(pt, pd, start, length, iter) \
+ for (iter = gen6_pde_index(start); \
+ length > 0 && iter < I915_PDES && \
+ (pt = i915_pt_entry(pd, iter), true); \
+ ({ u32 temp = ALIGN(start+1, 1 << GEN6_PDE_SHIFT); \
+ temp = min(temp - start, length); \
+ start += temp, length -= temp; }), ++iter)
+
+#define gen6_for_all_pdes(pt, pd, iter) \
+ for (iter = 0; \
+ iter < I915_PDES && \
+ (pt = i915_pt_entry(pd, iter), true); \
+ ++iter)
+
+int gen6_ppgtt_pin(struct i915_ppgtt *base);
+void gen6_ppgtt_unpin(struct i915_ppgtt *base);
+void gen6_ppgtt_unpin_all(struct i915_ppgtt *base);
+void gen6_ppgtt_enable(struct intel_gt *gt);
+void gen7_ppgtt_enable(struct intel_gt *gt);
+struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
new file mode 100644
index 000000000000..4d1de2d97d5c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -0,0 +1,724 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/log2.h>
+
+#include "gen8_ppgtt.h"
+#include "i915_scatterlist.h"
+#include "i915_trace.h"
+#include "i915_vgpu.h"
+#include "intel_gt.h"
+#include "intel_gtt.h"
+
+static u64 gen8_pde_encode(const dma_addr_t addr,
+ const enum i915_cache_level level)
+{
+ u64 pde = addr | _PAGE_PRESENT | _PAGE_RW;
+
+ if (level != I915_CACHE_NONE)
+ pde |= PPAT_CACHED_PDE;
+ else
+ pde |= PPAT_UNCACHED;
+
+ return pde;
+}
+
+static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
+{
+ struct drm_i915_private *i915 = ppgtt->vm.i915;
+ struct intel_uncore *uncore = ppgtt->vm.gt->uncore;
+ enum vgt_g2v_type msg;
+ int i;
+
+ if (create)
+ atomic_inc(px_used(ppgtt->pd)); /* never remove */
+ else
+ atomic_dec(px_used(ppgtt->pd));
+
+ mutex_lock(&i915->vgpu.lock);
+
+ if (i915_vm_is_4lvl(&ppgtt->vm)) {
+ const u64 daddr = px_dma(ppgtt->pd);
+
+ intel_uncore_write(uncore,
+ vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
+ intel_uncore_write(uncore,
+ vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
+
+ msg = create ?
+ VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
+ VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY;
+ } else {
+ for (i = 0; i < GEN8_3LVL_PDPES; i++) {
+ const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
+
+ intel_uncore_write(uncore,
+ vgtif_reg(pdp[i].lo),
+ lower_32_bits(daddr));
+ intel_uncore_write(uncore,
+ vgtif_reg(pdp[i].hi),
+ upper_32_bits(daddr));
+ }
+
+ msg = create ?
+ VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
+ VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY;
+ }
+
+ /* g2v_notify atomically (via hv trap) consumes the message packet. */
+ intel_uncore_write(uncore, vgtif_reg(g2v_notify), msg);
+
+ mutex_unlock(&i915->vgpu.lock);
+}
+
+/* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */
+#define GEN8_PAGE_SIZE (SZ_4K) /* page and page-directory sizes are the same */
+#define GEN8_PTE_SHIFT (ilog2(GEN8_PAGE_SIZE))
+#define GEN8_PDES (GEN8_PAGE_SIZE / sizeof(u64))
+#define gen8_pd_shift(lvl) ((lvl) * ilog2(GEN8_PDES))
+#define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl))
+#define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl))
+#define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl))
+
+#define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt)
+
+static inline unsigned int
+gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx)
+{
+ const int shift = gen8_pd_shift(lvl);
+ const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
+
+ GEM_BUG_ON(start >= end);
+ end += ~mask >> gen8_pd_shift(1);
+
+ *idx = i915_pde_index(start, shift);
+ if ((start ^ end) & mask)
+ return GEN8_PDES - *idx;
+ else
+ return i915_pde_index(end, shift) - *idx;
+}
+
+static inline bool gen8_pd_contains(u64 start, u64 end, int lvl)
+{
+ const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
+
+ GEM_BUG_ON(start >= end);
+ return (start ^ end) & mask && (start & ~mask) == 0;
+}
+
+static inline unsigned int gen8_pt_count(u64 start, u64 end)
+{
+ GEM_BUG_ON(start >= end);
+ if ((start ^ end) >> gen8_pd_shift(1))
+ return GEN8_PDES - (start & (GEN8_PDES - 1));
+ else
+ return end - start;
+}
+
+static inline unsigned int
+gen8_pd_top_count(const struct i915_address_space *vm)
+{
+ unsigned int shift = __gen8_pte_shift(vm->top);
+ return (vm->total + (1ull << shift) - 1) >> shift;
+}
+
+static inline struct i915_page_directory *
+gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx)
+{
+ struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
+
+ if (vm->top == 2)
+ return ppgtt->pd;
+ else
+ return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top));
+}
+
+static inline struct i915_page_directory *
+gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr)
+{
+ return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT);
+}
+
+static void __gen8_ppgtt_cleanup(struct i915_address_space *vm,
+ struct i915_page_directory *pd,
+ int count, int lvl)
+{
+ if (lvl) {
+ void **pde = pd->entry;
+
+ do {
+ if (!*pde)
+ continue;
+
+ __gen8_ppgtt_cleanup(vm, *pde, GEN8_PDES, lvl - 1);
+ } while (pde++, --count);
+ }
+
+ free_px(vm, pd);
+}
+
+static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
+{
+ struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+
+ if (intel_vgpu_active(vm->i915))
+ gen8_ppgtt_notify_vgt(ppgtt, false);
+
+ __gen8_ppgtt_cleanup(vm, ppgtt->pd, gen8_pd_top_count(vm), vm->top);
+ free_scratch(vm);
+}
+
+static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
+ struct i915_page_directory * const pd,
+ u64 start, const u64 end, int lvl)
+{
+ const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
+ unsigned int idx, len;
+
+ GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
+
+ len = gen8_pd_range(start, end, lvl--, &idx);
+ DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
+ __func__, vm, lvl + 1, start, end,
+ idx, len, atomic_read(px_used(pd)));
+ GEM_BUG_ON(!len || len >= atomic_read(px_used(pd)));
+
+ do {
+ struct i915_page_table *pt = pd->entry[idx];
+
+ if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) &&
+ gen8_pd_contains(start, end, lvl)) {
+ DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n",
+ __func__, vm, lvl + 1, idx, start, end);
+ clear_pd_entry(pd, idx, scratch);
+ __gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl);
+ start += (u64)I915_PDES << gen8_pd_shift(lvl);
+ continue;
+ }
+
+ if (lvl) {
+ start = __gen8_ppgtt_clear(vm, as_pd(pt),
+ start, end, lvl);
+ } else {
+ unsigned int count;
+ u64 *vaddr;
+
+ count = gen8_pt_count(start, end);
+ DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n",
+ __func__, vm, lvl, start, end,
+ gen8_pd_index(start, 0), count,
+ atomic_read(&pt->used));
+ GEM_BUG_ON(!count || count >= atomic_read(&pt->used));
+
+ vaddr = kmap_atomic_px(pt);
+ memset64(vaddr + gen8_pd_index(start, 0),
+ vm->scratch[0].encode,
+ count);
+ kunmap_atomic(vaddr);
+
+ atomic_sub(count, &pt->used);
+ start += count;
+ }
+
+ if (release_pd_entry(pd, idx, pt, scratch))
+ free_px(vm, pt);
+ } while (idx++, --len);
+
+ return start;
+}
+
+static void gen8_ppgtt_clear(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+ GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
+ GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
+ GEM_BUG_ON(range_overflows(start, length, vm->total));
+
+ start >>= GEN8_PTE_SHIFT;
+ length >>= GEN8_PTE_SHIFT;
+ GEM_BUG_ON(length == 0);
+
+ __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
+ start, start + length, vm->top);
+}
+
+static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
+ struct i915_page_directory * const pd,
+ u64 * const start, const u64 end, int lvl)
+{
+ const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
+ struct i915_page_table *alloc = NULL;
+ unsigned int idx, len;
+ int ret = 0;
+
+ GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
+
+ len = gen8_pd_range(*start, end, lvl--, &idx);
+ DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
+ __func__, vm, lvl + 1, *start, end,
+ idx, len, atomic_read(px_used(pd)));
+ GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1));
+
+ spin_lock(&pd->lock);
+ GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */
+ do {
+ struct i915_page_table *pt = pd->entry[idx];
+
+ if (!pt) {
+ spin_unlock(&pd->lock);
+
+ DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
+ __func__, vm, lvl + 1, idx);
+
+ pt = fetch_and_zero(&alloc);
+ if (lvl) {
+ if (!pt) {
+ pt = &alloc_pd(vm)->pt;
+ if (IS_ERR(pt)) {
+ ret = PTR_ERR(pt);
+ goto out;
+ }
+ }
+
+ fill_px(pt, vm->scratch[lvl].encode);
+ } else {
+ if (!pt) {
+ pt = alloc_pt(vm);
+ if (IS_ERR(pt)) {
+ ret = PTR_ERR(pt);
+ goto out;
+ }
+ }
+
+ if (intel_vgpu_active(vm->i915) ||
+ gen8_pt_count(*start, end) < I915_PDES)
+ fill_px(pt, vm->scratch[lvl].encode);
+ }
+
+ spin_lock(&pd->lock);
+ if (likely(!pd->entry[idx]))
+ set_pd_entry(pd, idx, pt);
+ else
+ alloc = pt, pt = pd->entry[idx];
+ }
+
+ if (lvl) {
+ atomic_inc(&pt->used);
+ spin_unlock(&pd->lock);
+
+ ret = __gen8_ppgtt_alloc(vm, as_pd(pt),
+ start, end, lvl);
+ if (unlikely(ret)) {
+ if (release_pd_entry(pd, idx, pt, scratch))
+ free_px(vm, pt);
+ goto out;
+ }
+
+ spin_lock(&pd->lock);
+ atomic_dec(&pt->used);
+ GEM_BUG_ON(!atomic_read(&pt->used));
+ } else {
+ unsigned int count = gen8_pt_count(*start, end);
+
+ DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n",
+ __func__, vm, lvl, *start, end,
+ gen8_pd_index(*start, 0), count,
+ atomic_read(&pt->used));
+
+ atomic_add(count, &pt->used);
+ /* All other pdes may be simultaneously removed */
+ GEM_BUG_ON(atomic_read(&pt->used) > NALLOC * I915_PDES);
+ *start += count;
+ }
+ } while (idx++, --len);
+ spin_unlock(&pd->lock);
+out:
+ if (alloc)
+ free_px(vm, alloc);
+ return ret;
+}
+
+static int gen8_ppgtt_alloc(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+ u64 from;
+ int err;
+
+ GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
+ GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
+ GEM_BUG_ON(range_overflows(start, length, vm->total));
+
+ start >>= GEN8_PTE_SHIFT;
+ length >>= GEN8_PTE_SHIFT;
+ GEM_BUG_ON(length == 0);
+ from = start;
+
+ err = __gen8_ppgtt_alloc(vm, i915_vm_to_ppgtt(vm)->pd,
+ &start, start + length, vm->top);
+ if (unlikely(err && from != start))
+ __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
+ from, start, vm->top);
+
+ return err;
+}
+
+static __always_inline u64
+gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
+ struct i915_page_directory *pdp,
+ struct sgt_dma *iter,
+ u64 idx,
+ enum i915_cache_level cache_level,
+ u32 flags)
+{
+ struct i915_page_directory *pd;
+ const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+ gen8_pte_t *vaddr;
+
+ pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
+ vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
+ do {
+ GEM_BUG_ON(iter->sg->length < I915_GTT_PAGE_SIZE);
+ vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
+
+ iter->dma += I915_GTT_PAGE_SIZE;
+ if (iter->dma >= iter->max) {
+ iter->sg = __sg_next(iter->sg);
+ if (!iter->sg) {
+ idx = 0;
+ break;
+ }
+
+ iter->dma = sg_dma_address(iter->sg);
+ iter->max = iter->dma + iter->sg->length;
+ }
+
+ if (gen8_pd_index(++idx, 0) == 0) {
+ if (gen8_pd_index(idx, 1) == 0) {
+ /* Limited by sg length for 3lvl */
+ if (gen8_pd_index(idx, 2) == 0)
+ break;
+
+ pd = pdp->entry[gen8_pd_index(idx, 2)];
+ }
+
+ kunmap_atomic(vaddr);
+ vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
+ }
+ } while (1);
+ kunmap_atomic(vaddr);
+
+ return idx;
+}
+
+static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
+ struct sgt_dma *iter,
+ enum i915_cache_level cache_level,
+ u32 flags)
+{
+ const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+ u64 start = vma->node.start;
+ dma_addr_t rem = iter->sg->length;
+
+ GEM_BUG_ON(!i915_vm_is_4lvl(vma->vm));
+
+ do {
+ struct i915_page_directory * const pdp =
+ gen8_pdp_for_page_address(vma->vm, start);
+ struct i915_page_directory * const pd =
+ i915_pd_entry(pdp, __gen8_pte_index(start, 2));
+ gen8_pte_t encode = pte_encode;
+ unsigned int maybe_64K = -1;
+ unsigned int page_size;
+ gen8_pte_t *vaddr;
+ u16 index;
+
+ if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
+ IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
+ rem >= I915_GTT_PAGE_SIZE_2M &&
+ !__gen8_pte_index(start, 0)) {
+ index = __gen8_pte_index(start, 1);
+ encode |= GEN8_PDE_PS_2M;
+ page_size = I915_GTT_PAGE_SIZE_2M;
+
+ vaddr = kmap_atomic_px(pd);
+ } else {
+ struct i915_page_table *pt =
+ i915_pt_entry(pd, __gen8_pte_index(start, 1));
+
+ index = __gen8_pte_index(start, 0);
+ page_size = I915_GTT_PAGE_SIZE;
+
+ if (!index &&
+ vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
+ IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
+ (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
+ rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))
+ maybe_64K = __gen8_pte_index(start, 1);
+
+ vaddr = kmap_atomic_px(pt);
+ }
+
+ do {
+ GEM_BUG_ON(iter->sg->length < page_size);
+ vaddr[index++] = encode | iter->dma;
+
+ start += page_size;
+ iter->dma += page_size;
+ rem -= page_size;
+ if (iter->dma >= iter->max) {
+ iter->sg = __sg_next(iter->sg);
+ if (!iter->sg)
+ break;
+
+ rem = iter->sg->length;
+ iter->dma = sg_dma_address(iter->sg);
+ iter->max = iter->dma + rem;
+
+ if (maybe_64K != -1 && index < I915_PDES &&
+ !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
+ (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
+ rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)))
+ maybe_64K = -1;
+
+ if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
+ break;
+ }
+ } while (rem >= page_size && index < I915_PDES);
+
+ kunmap_atomic(vaddr);
+
+ /*
+ * Is it safe to mark the 2M block as 64K? -- Either we have
+ * filled whole page-table with 64K entries, or filled part of
+ * it and have reached the end of the sg table and we have
+ * enough padding.
+ */
+ if (maybe_64K != -1 &&
+ (index == I915_PDES ||
+ (i915_vm_has_scratch_64K(vma->vm) &&
+ !iter->sg && IS_ALIGNED(vma->node.start +
+ vma->node.size,
+ I915_GTT_PAGE_SIZE_2M)))) {
+ vaddr = kmap_atomic_px(pd);
+ vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
+ kunmap_atomic(vaddr);
+ page_size = I915_GTT_PAGE_SIZE_64K;
+
+ /*
+ * We write all 4K page entries, even when using 64K
+ * pages. In order to verify that the HW isn't cheating
+ * by using the 4K PTE instead of the 64K PTE, we want
+ * to remove all the surplus entries. If the HW skipped
+ * the 64K PTE, it will read/write into the scratch page
+ * instead - which we detect as missing results during
+ * selftests.
+ */
+ if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) {
+ u16 i;
+
+ encode = vma->vm->scratch[0].encode;
+ vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K));
+
+ for (i = 1; i < index; i += 16)
+ memset64(vaddr + i, encode, 15);
+
+ kunmap_atomic(vaddr);
+ }
+ }
+
+ vma->page_sizes.gtt |= page_size;
+ } while (iter->sg);
+}
+
+static void gen8_ppgtt_insert(struct i915_address_space *vm,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags)
+{
+ struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
+ struct sgt_dma iter = sgt_dma(vma);
+
+ if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
+ gen8_ppgtt_insert_huge(vma, &iter, cache_level, flags);
+ } else {
+ u64 idx = vma->node.start >> GEN8_PTE_SHIFT;
+
+ do {
+ struct i915_page_directory * const pdp =
+ gen8_pdp_for_page_index(vm, idx);
+
+ idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx,
+ cache_level, flags);
+ } while (idx);
+
+ vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+ }
+}
+
+static int gen8_init_scratch(struct i915_address_space *vm)
+{
+ int ret;
+ int i;
+
+ /*
+ * If everybody agrees to not to write into the scratch page,
+ * we can reuse it for all vm, keeping contexts and processes separate.
+ */
+ if (vm->has_read_only && vm->gt->vm && !i915_is_ggtt(vm->gt->vm)) {
+ struct i915_address_space *clone = vm->gt->vm;
+
+ GEM_BUG_ON(!clone->has_read_only);
+
+ vm->scratch_order = clone->scratch_order;
+ memcpy(vm->scratch, clone->scratch, sizeof(vm->scratch));
+ px_dma(&vm->scratch[0]) = 0; /* no xfer of ownership */
+ return 0;
+ }
+
+ ret = setup_scratch_page(vm, __GFP_HIGHMEM);
+ if (ret)
+ return ret;
+
+ vm->scratch[0].encode =
+ gen8_pte_encode(px_dma(&vm->scratch[0]),
+ I915_CACHE_LLC, vm->has_read_only);
+
+ for (i = 1; i <= vm->top; i++) {
+ if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[i]))))
+ goto free_scratch;
+
+ fill_px(&vm->scratch[i], vm->scratch[i - 1].encode);
+ vm->scratch[i].encode =
+ gen8_pde_encode(px_dma(&vm->scratch[i]),
+ I915_CACHE_LLC);
+ }
+
+ return 0;
+
+free_scratch:
+ free_scratch(vm);
+ return -ENOMEM;
+}
+
+static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
+{
+ struct i915_address_space *vm = &ppgtt->vm;
+ struct i915_page_directory *pd = ppgtt->pd;
+ unsigned int idx;
+
+ GEM_BUG_ON(vm->top != 2);
+ GEM_BUG_ON(gen8_pd_top_count(vm) != GEN8_3LVL_PDPES);
+
+ for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) {
+ struct i915_page_directory *pde;
+
+ pde = alloc_pd(vm);
+ if (IS_ERR(pde))
+ return PTR_ERR(pde);
+
+ fill_px(pde, vm->scratch[1].encode);
+ set_pd_entry(pd, idx, pde);
+ atomic_inc(px_used(pde)); /* keep pinned */
+ }
+ wmb();
+
+ return 0;
+}
+
+static struct i915_page_directory *
+gen8_alloc_top_pd(struct i915_address_space *vm)
+{
+ const unsigned int count = gen8_pd_top_count(vm);
+ struct i915_page_directory *pd;
+
+ GEM_BUG_ON(count > ARRAY_SIZE(pd->entry));
+
+ pd = __alloc_pd(offsetof(typeof(*pd), entry[count]));
+ if (unlikely(!pd))
+ return ERR_PTR(-ENOMEM);
+
+ if (unlikely(setup_page_dma(vm, px_base(pd)))) {
+ kfree(pd);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count);
+ atomic_inc(px_used(pd)); /* mark as pinned */
+ return pd;
+}
+
+/*
+ * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
+ * with a net effect resembling a 2-level page table in normal x86 terms. Each
+ * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
+ * space.
+ *
+ */
+struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
+{
+ struct i915_ppgtt *ppgtt;
+ int err;
+
+ ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
+ if (!ppgtt)
+ return ERR_PTR(-ENOMEM);
+
+ ppgtt_init(ppgtt, gt);
+ ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
+
+ /*
+ * From bdw, there is hw support for read-only pages in the PPGTT.
+ *
+ * Gen11 has HSDES#:1807136187 unresolved. Disable ro support
+ * for now.
+ *
+ * Gen12 has inherited the same read-only fault issue from gen11.
+ */
+ ppgtt->vm.has_read_only = !IS_GEN_RANGE(gt->i915, 11, 12);
+
+ /*
+ * There are only few exceptions for gen >=6. chv and bxt.
+ * And we are not sure about the latter so play safe for now.
+ */
+ if (IS_CHERRYVIEW(gt->i915) || IS_BROXTON(gt->i915))
+ ppgtt->vm.pt_kmap_wc = true;
+
+ err = gen8_init_scratch(&ppgtt->vm);
+ if (err)
+ goto err_free;
+
+ ppgtt->pd = gen8_alloc_top_pd(&ppgtt->vm);
+ if (IS_ERR(ppgtt->pd)) {
+ err = PTR_ERR(ppgtt->pd);
+ goto err_free_scratch;
+ }
+
+ if (!i915_vm_is_4lvl(&ppgtt->vm)) {
+ err = gen8_preallocate_top_level_pdp(ppgtt);
+ if (err)
+ goto err_free_pd;
+ }
+
+ ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
+ ppgtt->vm.insert_entries = gen8_ppgtt_insert;
+ ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
+ ppgtt->vm.clear_range = gen8_ppgtt_clear;
+
+ if (intel_vgpu_active(gt->i915))
+ gen8_ppgtt_notify_vgt(ppgtt, true);
+
+ ppgtt->vm.cleanup = gen8_ppgtt_cleanup;
+
+ return ppgtt;
+
+err_free_pd:
+ __gen8_ppgtt_cleanup(&ppgtt->vm, ppgtt->pd,
+ gen8_pd_top_count(&ppgtt->vm), ppgtt->vm.top);
+err_free_scratch:
+ free_scratch(&ppgtt->vm);
+err_free:
+ kfree(ppgtt);
+ return ERR_PTR(err);
+}
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
new file mode 100644
index 000000000000..76a08b9c1f5c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef __GEN8_PPGTT_H__
+#define __GEN8_PPGTT_H__
+
+struct intel_gt;
+
+struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 55317081d48b..0ba524a414c6 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -28,6 +28,8 @@
#include "i915_drv.h"
#include "i915_trace.h"
+#include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
static void irq_enable(struct intel_engine_cs *engine)
{
@@ -53,15 +55,17 @@ static void irq_disable(struct intel_engine_cs *engine)
static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
{
+ struct intel_engine_cs *engine =
+ container_of(b, struct intel_engine_cs, breadcrumbs);
+
lockdep_assert_held(&b->irq_lock);
GEM_BUG_ON(!b->irq_enabled);
if (!--b->irq_enabled)
- irq_disable(container_of(b,
- struct intel_engine_cs,
- breadcrumbs));
+ irq_disable(engine);
b->irq_armed = false;
+ intel_gt_pm_put_async(engine->gt);
}
void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
@@ -127,16 +131,23 @@ __dma_fence_signal__notify(struct dma_fence *fence,
}
}
-void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
+static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
+ struct intel_engine_cs *engine =
+ container_of(b, struct intel_engine_cs, breadcrumbs);
+
+ intel_engine_add_retire(engine, tl);
+}
+
+static void signal_irq_work(struct irq_work *work)
+{
+ struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
const ktime_t timestamp = ktime_get();
struct intel_context *ce, *cn;
struct list_head *pos, *next;
- unsigned long flags;
LIST_HEAD(signal);
- spin_lock_irqsave(&b->irq_lock, flags);
+ spin_lock(&b->irq_lock);
if (b->irq_armed && list_empty(&b->signalers))
__intel_breadcrumbs_disarm_irq(b);
@@ -177,44 +188,41 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
if (!list_is_first(pos, &ce->signals)) {
/* Advance the list to the first incomplete request */
__list_del_many(&ce->signals, pos);
- if (&ce->signals == pos) /* now empty */
+ if (&ce->signals == pos) { /* now empty */
list_del_init(&ce->signal_link);
+ add_retire(b, ce->timeline);
+ }
}
}
- spin_unlock_irqrestore(&b->irq_lock, flags);
+ spin_unlock(&b->irq_lock);
list_for_each_safe(pos, next, &signal) {
struct i915_request *rq =
list_entry(pos, typeof(*rq), signal_link);
struct list_head cb_list;
- spin_lock_irqsave(&rq->lock, flags);
+ spin_lock(&rq->lock);
list_replace(&rq->fence.cb_list, &cb_list);
__dma_fence_signal__timestamp(&rq->fence, timestamp);
__dma_fence_signal__notify(&rq->fence, &cb_list);
- spin_unlock_irqrestore(&rq->lock, flags);
+ spin_unlock(&rq->lock);
i915_request_put(rq);
}
}
-static void signal_irq_work(struct irq_work *work)
-{
- struct intel_engine_cs *engine =
- container_of(work, typeof(*engine), breadcrumbs.irq_work);
-
- intel_engine_breadcrumbs_irq(engine);
-}
-
-static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
+static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
{
struct intel_engine_cs *engine =
container_of(b, struct intel_engine_cs, breadcrumbs);
lockdep_assert_held(&b->irq_lock);
if (b->irq_armed)
- return;
+ return true;
+
+ if (!intel_gt_pm_get_if_awake(engine->gt))
+ return false;
/*
* The breadcrumb irq will be disarmed on the interrupt after the
@@ -234,6 +242,8 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
if (!b->irq_enabled++)
irq_enable(engine);
+
+ return true;
}
void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
@@ -271,19 +281,20 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
- struct intel_context *ce = rq->hw_context;
+ struct intel_context *ce = rq->context;
struct list_head *pos;
spin_lock(&b->irq_lock);
GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
- __intel_breadcrumbs_arm_irq(b);
+ if (!__intel_breadcrumbs_arm_irq(b))
+ goto unlock;
/*
* We keep the seqno in retirement order, so we can break
- * inside intel_engine_breadcrumbs_irq as soon as we've passed
- * the last completed request (or seen a request that hasn't
- * event started). We could iterate the timeline->requests list,
+ * inside intel_engine_signal_breadcrumbs as soon as we've
+ * passed the last completed request (or seen a request that
+ * hasn't event started). We could walk the timeline->requests,
* but keeping a separate signalers_list has the advantage of
* hopefully being much smaller than the full list and so
* provides faster iteration and detection when there are no
@@ -306,6 +317,7 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
GEM_BUG_ON(!check_signal_order(ce, rq));
set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+unlock:
spin_unlock(&b->irq_lock);
}
@@ -326,7 +338,7 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
*/
spin_lock(&b->irq_lock);
if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
- struct intel_context *ce = rq->hw_context;
+ struct intel_context *ce = rq->context;
list_del(&rq->signal_link);
if (list_empty(&ce->signals))
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index ef7bc41ffffa..23137b2a8689 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -31,8 +31,7 @@ void intel_context_free(struct intel_context *ce)
}
struct intel_context *
-intel_context_create(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine)
+intel_context_create(struct intel_engine_cs *engine)
{
struct intel_context *ce;
@@ -40,39 +39,82 @@ intel_context_create(struct i915_gem_context *ctx,
if (!ce)
return ERR_PTR(-ENOMEM);
- intel_context_init(ce, ctx, engine);
+ intel_context_init(ce, engine);
return ce;
}
-int __intel_context_do_pin(struct intel_context *ce)
+int intel_context_alloc_state(struct intel_context *ce)
{
- int err;
+ int err = 0;
if (mutex_lock_interruptible(&ce->pin_mutex))
return -EINTR;
- if (likely(!atomic_read(&ce->pin_count))) {
- intel_wakeref_t wakeref;
+ if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+ err = ce->ops->alloc(ce);
+ if (unlikely(err))
+ goto unlock;
- if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
- err = ce->ops->alloc(ce);
- if (unlikely(err))
- goto err;
+ set_bit(CONTEXT_ALLOC_BIT, &ce->flags);
+ }
+
+unlock:
+ mutex_unlock(&ce->pin_mutex);
+ return err;
+}
+
+static int intel_context_active_acquire(struct intel_context *ce)
+{
+ int err;
+
+ err = i915_active_acquire(&ce->active);
+ if (err)
+ return err;
- __set_bit(CONTEXT_ALLOC_BIT, &ce->flags);
+ /* Preallocate tracking nodes */
+ if (!intel_context_is_barrier(ce)) {
+ err = i915_active_acquire_preallocate_barrier(&ce->active,
+ ce->engine);
+ if (err) {
+ i915_active_release(&ce->active);
+ return err;
}
+ }
+
+ return 0;
+}
- err = 0;
- with_intel_runtime_pm(ce->engine->uncore->rpm, wakeref)
- err = ce->ops->pin(ce);
+static void intel_context_active_release(struct intel_context *ce)
+{
+ /* Nodes preallocated in intel_context_active() */
+ i915_active_acquire_barrier(&ce->active);
+ i915_active_release(&ce->active);
+}
+
+int __intel_context_do_pin(struct intel_context *ce)
+{
+ int err;
+
+ if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
+ err = intel_context_alloc_state(ce);
if (err)
+ return err;
+ }
+
+ if (mutex_lock_interruptible(&ce->pin_mutex))
+ return -EINTR;
+
+ if (likely(!atomic_read(&ce->pin_count))) {
+ err = intel_context_active_acquire(ce);
+ if (unlikely(err))
goto err;
- GEM_TRACE("%s context:%llx pin ring:{head:%04x, tail:%04x}\n",
- ce->engine->name, ce->timeline->fence_context,
- ce->ring->head, ce->ring->tail);
+ err = ce->ops->pin(ce);
+ if (unlikely(err))
+ goto err_active;
- i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */
+ CE_TRACE(ce, "pin ring:{head:%04x, tail:%04x}\n",
+ ce->ring->head, ce->ring->tail);
smp_mb__before_atomic(); /* flush pin before it is visible */
}
@@ -83,6 +125,8 @@ int __intel_context_do_pin(struct intel_context *ce)
mutex_unlock(&ce->pin_mutex);
return 0;
+err_active:
+ intel_context_active_release(ce);
err:
mutex_unlock(&ce->pin_mutex);
return err;
@@ -90,39 +134,36 @@ err:
void intel_context_unpin(struct intel_context *ce)
{
- if (likely(atomic_add_unless(&ce->pin_count, -1, 1)))
+ if (!atomic_dec_and_test(&ce->pin_count))
return;
- /* We may be called from inside intel_context_pin() to evict another */
- intel_context_get(ce);
- mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING);
-
- if (likely(atomic_dec_and_test(&ce->pin_count))) {
- GEM_TRACE("%s context:%llx retire\n",
- ce->engine->name, ce->timeline->fence_context);
-
- ce->ops->unpin(ce);
+ CE_TRACE(ce, "unpin\n");
+ ce->ops->unpin(ce);
- i915_gem_context_put(ce->gem_context);
- intel_context_active_release(ce);
- }
-
- mutex_unlock(&ce->pin_mutex);
+ /*
+ * Once released, we may asynchronously drop the active reference.
+ * As that may be the only reference keeping the context alive,
+ * take an extra now so that it is not freed before we finish
+ * dereferencing it.
+ */
+ intel_context_get(ce);
+ intel_context_active_release(ce);
intel_context_put(ce);
}
static int __context_pin_state(struct i915_vma *vma)
{
- u64 flags;
+ unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
int err;
- flags = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
- flags |= PIN_HIGH | PIN_GLOBAL;
-
- err = i915_vma_pin(vma, 0, 0, flags);
+ err = i915_ggtt_pin(vma, 0, bias | PIN_HIGH);
if (err)
return err;
+ err = i915_active_acquire(&vma->active);
+ if (err)
+ goto err_unpin;
+
/*
* And mark it as a globally pinned object to let the shrinker know
* it cannot reclaim the object until we release it.
@@ -131,27 +172,57 @@ static int __context_pin_state(struct i915_vma *vma)
vma->obj->mm.dirty = true;
return 0;
+
+err_unpin:
+ i915_vma_unpin(vma);
+ return err;
}
static void __context_unpin_state(struct i915_vma *vma)
{
i915_vma_make_shrinkable(vma);
+ i915_active_release(&vma->active);
__i915_vma_unpin(vma);
}
+static int __ring_active(struct intel_ring *ring)
+{
+ int err;
+
+ err = i915_active_acquire(&ring->vma->active);
+ if (err)
+ return err;
+
+ err = intel_ring_pin(ring);
+ if (err)
+ goto err_active;
+
+ return 0;
+
+err_active:
+ i915_active_release(&ring->vma->active);
+ return err;
+}
+
+static void __ring_retire(struct intel_ring *ring)
+{
+ intel_ring_unpin(ring);
+ i915_active_release(&ring->vma->active);
+}
+
__i915_active_call
static void __intel_context_retire(struct i915_active *active)
{
struct intel_context *ce = container_of(active, typeof(*ce), active);
- GEM_TRACE("%s context:%llx retire\n",
- ce->engine->name, ce->timeline->fence_context);
+ CE_TRACE(ce, "retire\n");
+ set_bit(CONTEXT_VALID_BIT, &ce->flags);
if (ce->state)
__context_unpin_state(ce->state);
intel_timeline_unpin(ce->timeline);
- intel_ring_unpin(ce->ring);
+ __ring_retire(ce->ring);
intel_context_put(ce);
}
@@ -161,9 +232,11 @@ static int __intel_context_active(struct i915_active *active)
struct intel_context *ce = container_of(active, typeof(*ce), active);
int err;
+ CE_TRACE(ce, "active\n");
+
intel_context_get(ce);
- err = intel_ring_pin(ce->ring);
+ err = __ring_active(ce->ring);
if (err)
goto err_put;
@@ -183,66 +256,27 @@ static int __intel_context_active(struct i915_active *active)
err_timeline:
intel_timeline_unpin(ce->timeline);
err_ring:
- intel_ring_unpin(ce->ring);
+ __ring_retire(ce->ring);
err_put:
intel_context_put(ce);
return err;
}
-int intel_context_active_acquire(struct intel_context *ce)
-{
- int err;
-
- err = i915_active_acquire(&ce->active);
- if (err)
- return err;
-
- /* Preallocate tracking nodes */
- if (!i915_gem_context_is_kernel(ce->gem_context)) {
- err = i915_active_acquire_preallocate_barrier(&ce->active,
- ce->engine);
- if (err) {
- i915_active_release(&ce->active);
- return err;
- }
- }
-
- return 0;
-}
-
-void intel_context_active_release(struct intel_context *ce)
-{
- /* Nodes preallocated in intel_context_active() */
- i915_active_acquire_barrier(&ce->active);
- i915_active_release(&ce->active);
-}
-
void
intel_context_init(struct intel_context *ce,
- struct i915_gem_context *ctx,
struct intel_engine_cs *engine)
{
- struct i915_address_space *vm;
-
GEM_BUG_ON(!engine->cops);
+ GEM_BUG_ON(!engine->gt->vm);
kref_init(&ce->ref);
- ce->gem_context = ctx;
- rcu_read_lock();
- vm = rcu_dereference(ctx->vm);
- if (vm)
- ce->vm = i915_vm_get(vm);
- else
- ce->vm = i915_vm_get(&engine->gt->ggtt->vm);
- rcu_read_unlock();
- if (ctx->timeline)
- ce->timeline = intel_timeline_get(ctx->timeline);
-
ce->engine = engine;
ce->ops = engine->cops;
ce->sseu = engine->sseu;
- ce->ring = __intel_context_ring_size(SZ_16K);
+ ce->ring = __intel_context_ring_size(SZ_4K);
+
+ ce->vm = i915_vm_get(engine->gt->vm);
INIT_LIST_HEAD(&ce->signal_link);
INIT_LIST_HEAD(&ce->signals);
@@ -307,30 +341,11 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
int err;
/* Only suitable for use in remotely modifying this context */
- GEM_BUG_ON(rq->hw_context == ce);
+ GEM_BUG_ON(rq->context == ce);
if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */
- /*
- * Ideally, we just want to insert our foreign fence as
- * a barrier into the remove context, such that this operation
- * occurs after all current operations in that context, and
- * all future operations must occur after this.
- *
- * Currently, the timeline->last_request tracking is guarded
- * by its mutex and so we must obtain that to atomically
- * insert our barrier. However, since we already hold our
- * timeline->mutex, we must be careful against potential
- * inversion if we are the kernel_context as the remote context
- * will itself poke at the kernel_context when it needs to
- * unpin. Ergo, if already locked, we drop both locks and
- * try again (through the magic of userspace repeating EAGAIN).
- */
- if (!mutex_trylock(&tl->mutex))
- return -EAGAIN;
-
/* Queue this switch after current activity by this context. */
err = i915_active_fence_set(&tl->last_request, rq);
- mutex_unlock(&tl->mutex);
if (err)
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 68b3d317d959..30bd248827d8 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -7,7 +7,9 @@
#ifndef __INTEL_CONTEXT_H__
#define __INTEL_CONTEXT_H__
+#include <linux/bitops.h>
#include <linux/lockdep.h>
+#include <linux/types.h>
#include "i915_active.h"
#include "intel_context_types.h"
@@ -15,14 +17,21 @@
#include "intel_ring_types.h"
#include "intel_timeline_types.h"
+#define CE_TRACE(ce, fmt, ...) do { \
+ const struct intel_context *ce__ = (ce); \
+ ENGINE_TRACE(ce__->engine, "context:%llx " fmt, \
+ ce__->timeline->fence_context, \
+ ##__VA_ARGS__); \
+} while (0)
+
void intel_context_init(struct intel_context *ce,
- struct i915_gem_context *ctx,
struct intel_engine_cs *engine);
void intel_context_fini(struct intel_context *ce);
struct intel_context *
-intel_context_create(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine);
+intel_context_create(struct intel_engine_cs *engine);
+
+int intel_context_alloc_state(struct intel_context *ce);
void intel_context_free(struct intel_context *ce);
@@ -69,9 +78,14 @@ static inline void intel_context_unlock_pinned(struct intel_context *ce)
int __intel_context_do_pin(struct intel_context *ce);
+static inline bool intel_context_pin_if_active(struct intel_context *ce)
+{
+ return atomic_inc_not_zero(&ce->pin_count);
+}
+
static inline int intel_context_pin(struct intel_context *ce)
{
- if (likely(atomic_inc_not_zero(&ce->pin_count)))
+ if (likely(intel_context_pin_if_active(ce)))
return 0;
return __intel_context_do_pin(ce);
@@ -109,9 +123,6 @@ static inline void intel_context_exit(struct intel_context *ce)
ce->ops->exit(ce);
}
-int intel_context_active_acquire(struct intel_context *ce);
-void intel_context_active_release(struct intel_context *ce);
-
static inline struct intel_context *intel_context_get(struct intel_context *ce)
{
kref_get(&ce->ref);
@@ -153,4 +164,64 @@ static inline struct intel_ring *__intel_context_ring_size(u64 sz)
return u64_to_ptr(struct intel_ring, sz);
}
+static inline bool intel_context_is_barrier(const struct intel_context *ce)
+{
+ return test_bit(CONTEXT_BARRIER_BIT, &ce->flags);
+}
+
+static inline bool intel_context_use_semaphores(const struct intel_context *ce)
+{
+ return test_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
+}
+
+static inline void intel_context_set_use_semaphores(struct intel_context *ce)
+{
+ set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
+}
+
+static inline void intel_context_clear_use_semaphores(struct intel_context *ce)
+{
+ clear_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
+}
+
+static inline bool intel_context_is_banned(const struct intel_context *ce)
+{
+ return test_bit(CONTEXT_BANNED, &ce->flags);
+}
+
+static inline bool intel_context_set_banned(struct intel_context *ce)
+{
+ return test_and_set_bit(CONTEXT_BANNED, &ce->flags);
+}
+
+static inline bool
+intel_context_force_single_submission(const struct intel_context *ce)
+{
+ return test_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ce->flags);
+}
+
+static inline void
+intel_context_set_single_submission(struct intel_context *ce)
+{
+ __set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ce->flags);
+}
+
+static inline bool
+intel_context_nopreempt(const struct intel_context *ce)
+{
+ return test_bit(CONTEXT_NOPREEMPT, &ce->flags);
+}
+
+static inline void
+intel_context_set_nopreempt(struct intel_context *ce)
+{
+ set_bit(CONTEXT_NOPREEMPT, &ce->flags);
+}
+
+static inline void
+intel_context_clear_nopreempt(struct intel_context *ce)
+{
+ clear_bit(CONTEXT_NOPREEMPT, &ce->flags);
+}
+
#endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 6959b05ae5f8..ca1420fb8b53 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -17,6 +17,8 @@
#include "intel_engine_types.h"
#include "intel_sseu.h"
+#define CONTEXT_REDZONE POISON_INUSE
+
struct i915_gem_context;
struct i915_vma;
struct intel_context;
@@ -44,7 +46,7 @@ struct intel_context {
#define intel_context_inflight_count(ce) ptr_unmask_bits((ce)->inflight, 2)
struct i915_address_space *vm;
- struct i915_gem_context *gem_context;
+ struct i915_gem_context __rcu *gem_context;
struct list_head signal_link;
struct list_head signals;
@@ -54,7 +56,13 @@ struct intel_context {
struct intel_timeline *timeline;
unsigned long flags;
-#define CONTEXT_ALLOC_BIT 0
+#define CONTEXT_BARRIER_BIT 0
+#define CONTEXT_ALLOC_BIT 1
+#define CONTEXT_VALID_BIT 2
+#define CONTEXT_USE_SEMAPHORES 3
+#define CONTEXT_BANNED 4
+#define CONTEXT_FORCE_SINGLE_SUBMISSION 5
+#define CONTEXT_NOPREEMPT 6
u32 *lrc_reg_state;
u64 lrc_desc;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 01765a7ec18f..5df003061e44 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -29,6 +29,13 @@ struct intel_gt;
#define CACHELINE_BYTES 64
#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32))
+#define ENGINE_TRACE(e, fmt, ...) do { \
+ const struct intel_engine_cs *e__ __maybe_unused = (e); \
+ GEM_TRACE("%s %s: " fmt, \
+ dev_name(e__->i915->drm.dev), e__->name, \
+ ##__VA_ARGS__); \
+} while (0)
+
/*
* The register defines to be used with the following macros need to accept a
* base param, e.g:
@@ -177,15 +184,15 @@ void intel_engine_stop(struct intel_engine_cs *engine);
void intel_engine_cleanup(struct intel_engine_cs *engine);
int intel_engines_init_mmio(struct intel_gt *gt);
-int intel_engines_setup(struct intel_gt *gt);
int intel_engines_init(struct intel_gt *gt);
-void intel_engines_cleanup(struct intel_gt *gt);
+
+void intel_engines_release(struct intel_gt *gt);
+void intel_engines_free(struct intel_gt *gt);
int intel_engine_init_common(struct intel_engine_cs *engine);
void intel_engine_cleanup_common(struct intel_engine_cs *engine);
int intel_ring_submission_setup(struct intel_engine_cs *engine);
-int intel_ring_submission_init(struct intel_engine_cs *engine);
int intel_engine_stop_cs(struct intel_engine_cs *engine);
void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine);
@@ -195,7 +202,7 @@ void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask);
u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine);
-void intel_engine_get_instdone(struct intel_engine_cs *engine,
+void intel_engine_get_instdone(const struct intel_engine_cs *engine,
struct intel_instdone *instdone);
void intel_engine_init_execlists(struct intel_engine_cs *engine);
@@ -206,13 +213,11 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
static inline void
-intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine)
+intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
{
irq_work_queue(&engine->breadcrumbs.irq_work);
}
-void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine);
-
void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
@@ -270,8 +275,8 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
static inline void __intel_engine_reset(struct intel_engine_cs *engine,
bool stalled)
{
- if (engine->reset.reset)
- engine->reset.reset(engine, stalled);
+ if (engine->reset.rewind)
+ engine->reset.rewind(engine, stalled);
engine->serial++; /* contexts lost */
}
@@ -296,7 +301,7 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
struct i915_request *
intel_engine_find_active_request(struct intel_engine_cs *engine);
-u32 intel_engine_context_size(struct drm_i915_private *i915, u8 class);
+u32 intel_engine_context_size(struct intel_gt *gt, u8 class);
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 813bd3a610d2..f451ef376548 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -141,7 +141,7 @@ static const struct engine_info intel_engines[] = {
/**
* intel_engine_context_size() - return the size of the context for an engine
- * @dev_priv: i915 device private
+ * @gt: the gt
* @class: engine class
*
* Each engine class may require a different amount of space for a context
@@ -153,17 +153,18 @@ static const struct engine_info intel_engines[] = {
* in LRC mode, but does not include the "shared data page" used with
* GuC submission. The caller should account for this if using the GuC.
*/
-u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
+u32 intel_engine_context_size(struct intel_gt *gt, u8 class)
{
+ struct intel_uncore *uncore = gt->uncore;
u32 cxt_size;
BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
switch (class) {
case RENDER_CLASS:
- switch (INTEL_GEN(dev_priv)) {
+ switch (INTEL_GEN(gt->i915)) {
default:
- MISSING_CASE(INTEL_GEN(dev_priv));
+ MISSING_CASE(INTEL_GEN(gt->i915));
return DEFAULT_LR_CONTEXT_RENDER_SIZE;
case 12:
case 11:
@@ -175,14 +176,14 @@ u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
case 8:
return GEN8_LR_CONTEXT_RENDER_SIZE;
case 7:
- if (IS_HASWELL(dev_priv))
+ if (IS_HASWELL(gt->i915))
return HSW_CXT_TOTAL_SIZE;
- cxt_size = I915_READ(GEN7_CXT_SIZE);
+ cxt_size = intel_uncore_read(uncore, GEN7_CXT_SIZE);
return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
PAGE_SIZE);
case 6:
- cxt_size = I915_READ(CXT_SIZE);
+ cxt_size = intel_uncore_read(uncore, CXT_SIZE);
return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
PAGE_SIZE);
case 5:
@@ -197,9 +198,9 @@ u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
* minimum allocation anyway so it should all come
* out in the wash.
*/
- cxt_size = I915_READ(CXT_SIZE) + 1;
+ cxt_size = intel_uncore_read(uncore, CXT_SIZE) + 1;
DRM_DEBUG_DRIVER("gen%d CXT_SIZE = %d bytes [0x%08x]\n",
- INTEL_GEN(dev_priv),
+ INTEL_GEN(gt->i915),
cxt_size * 64,
cxt_size - 1);
return round_up(cxt_size * 64, PAGE_SIZE);
@@ -216,7 +217,7 @@ u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
case VIDEO_DECODE_CLASS:
case VIDEO_ENHANCEMENT_CLASS:
case COPY_ENGINE_CLASS:
- if (INTEL_GEN(dev_priv) < 8)
+ if (INTEL_GEN(gt->i915) < 8)
return 0;
return GEN8_LR_CONTEXT_OTHER_SIZE;
}
@@ -318,14 +319,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
engine->props.timeslice_duration_ms =
CONFIG_DRM_I915_TIMESLICE_DURATION;
- /*
- * To be overridden by the backend on setup. However to facilitate
- * cleanup on error during setup, we always provide the destroy vfunc.
- */
- engine->destroy = (typeof(engine->destroy))kfree;
-
- engine->context_size = intel_engine_context_size(gt->i915,
- engine->class);
+ engine->context_size = intel_engine_context_size(gt, engine->class);
if (WARN_ON(engine->context_size > BIT(20)))
engine->context_size = 0;
if (engine->context_size)
@@ -334,6 +328,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
/* Nothing to do here, execute in order of dependencies */
engine->schedule = NULL;
+ ewma__engine_latency_init(&engine->latency);
seqlock_init(&engine->stats.lock);
ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
@@ -344,7 +339,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
gt->engine_class[info->class][info->instance] = engine;
gt->engine[id] = engine;
- intel_engine_add_user(engine);
gt->i915->engine[id] = engine;
return 0;
@@ -390,21 +384,39 @@ static void intel_setup_engine_capabilities(struct intel_gt *gt)
}
/**
- * intel_engines_cleanup() - free the resources allocated for Command Streamers
+ * intel_engines_release() - free the resources allocated for Command Streamers
* @gt: pointer to struct intel_gt
*/
-void intel_engines_cleanup(struct intel_gt *gt)
+void intel_engines_release(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ /* Decouple the backend; but keep the layout for late GPU resets */
for_each_engine(engine, gt, id) {
- engine->destroy(engine);
- gt->engine[id] = NULL;
+ if (!engine->release)
+ continue;
+
+ engine->release(engine);
+ engine->release = NULL;
+
+ memset(&engine->reset, 0, sizeof(engine->reset));
+
gt->i915->engine[id] = NULL;
}
}
+void intel_engines_free(struct intel_gt *gt)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ for_each_engine(engine, gt, id) {
+ kfree(engine);
+ gt->engine[id] = NULL;
+ }
+}
+
/**
* intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
* @gt: pointer to struct intel_gt
@@ -455,38 +467,7 @@ int intel_engines_init_mmio(struct intel_gt *gt)
return 0;
cleanup:
- intel_engines_cleanup(gt);
- return err;
-}
-
-/**
- * intel_engines_init() - init the Engine Command Streamers
- * @gt: pointer to struct intel_gt
- *
- * Return: non-zero if the initialization failed.
- */
-int intel_engines_init(struct intel_gt *gt)
-{
- int (*init)(struct intel_engine_cs *engine);
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- int err;
-
- if (HAS_EXECLISTS(gt->i915))
- init = intel_execlists_submission_init;
- else
- init = intel_ring_submission_init;
-
- for_each_engine(engine, gt, id) {
- err = init(engine);
- if (err)
- goto cleanup;
- }
-
- return 0;
-
-cleanup:
- intel_engines_cleanup(gt);
+ intel_engines_free(gt);
return err;
}
@@ -601,7 +582,7 @@ err:
return ret;
}
-static int intel_engine_setup_common(struct intel_engine_cs *engine)
+static int engine_setup_common(struct intel_engine_cs *engine)
{
int err;
@@ -631,49 +612,6 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
return 0;
}
-/**
- * intel_engines_setup- setup engine state not requiring hw access
- * @gt: pointer to struct intel_gt
- *
- * Initializes engine structure members shared between legacy and execlists
- * submission modes which do not require hardware access.
- *
- * Typically done early in the submission mode specific engine setup stage.
- */
-int intel_engines_setup(struct intel_gt *gt)
-{
- int (*setup)(struct intel_engine_cs *engine);
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- int err;
-
- if (HAS_EXECLISTS(gt->i915))
- setup = intel_execlists_submission_setup;
- else
- setup = intel_ring_submission_setup;
-
- for_each_engine(engine, gt, id) {
- err = intel_engine_setup_common(engine);
- if (err)
- goto cleanup;
-
- err = setup(engine);
- if (err)
- goto cleanup;
-
- /* We expect the backend to take control over its state */
- GEM_BUG_ON(engine->destroy == (typeof(engine->destroy))kfree);
-
- GEM_BUG_ON(!engine->cops);
- }
-
- return 0;
-
-cleanup:
- intel_engines_cleanup(gt);
- return err;
-}
-
struct measure_breadcrumb {
struct i915_request rq;
struct intel_timeline timeline;
@@ -757,13 +695,13 @@ create_kernel_context(struct intel_engine_cs *engine)
struct intel_context *ce;
int err;
- ce = intel_context_create(engine->i915->kernel_context, engine);
+ ce = intel_context_create(engine);
if (IS_ERR(ce))
return ce;
- ce->ring = __intel_context_ring_size(SZ_4K);
+ __set_bit(CONTEXT_BARRIER_BIT, &ce->flags);
- err = intel_context_pin(ce);
+ err = intel_context_pin(ce); /* perma-pin so it is always available */
if (err) {
intel_context_put(ce);
return ERR_PTR(err);
@@ -791,13 +729,19 @@ create_kernel_context(struct intel_engine_cs *engine)
*
* Returns zero on success or an error code on failure.
*/
-int intel_engine_init_common(struct intel_engine_cs *engine)
+static int engine_init_common(struct intel_engine_cs *engine)
{
struct intel_context *ce;
int ret;
engine->set_default_submission(engine);
+ ret = measure_breadcrumb_dw(engine);
+ if (ret < 0)
+ return ret;
+
+ engine->emit_fini_breadcrumb_dw = ret;
+
/*
* We may need to do things with the shrinker which
* require us to immediately switch back to the default
@@ -812,18 +756,38 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
engine->kernel_context = ce;
- ret = measure_breadcrumb_dw(engine);
- if (ret < 0)
- goto err_unpin;
+ return 0;
+}
- engine->emit_fini_breadcrumb_dw = ret;
+int intel_engines_init(struct intel_gt *gt)
+{
+ int (*setup)(struct intel_engine_cs *engine);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err;
- return 0;
+ if (HAS_EXECLISTS(gt->i915))
+ setup = intel_execlists_submission_setup;
+ else
+ setup = intel_ring_submission_setup;
-err_unpin:
- intel_context_unpin(ce);
- intel_context_put(ce);
- return ret;
+ for_each_engine(engine, gt, id) {
+ err = engine_setup_common(engine);
+ if (err)
+ return err;
+
+ err = setup(engine);
+ if (err)
+ return err;
+
+ err = engine_init_common(engine);
+ if (err)
+ return err;
+
+ intel_engine_add_user(engine);
+ }
+
+ return 0;
}
/**
@@ -836,6 +800,7 @@ err_unpin:
void intel_engine_cleanup_common(struct intel_engine_cs *engine)
{
GEM_BUG_ON(!list_empty(&engine->active.requests));
+ tasklet_kill(&engine->execlists.tasklet); /* flush the callback */
cleanup_status_page(engine);
@@ -911,7 +876,7 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine)
if (INTEL_GEN(engine->i915) < 3)
return -ENODEV;
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
@@ -920,7 +885,7 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine)
mode, MODE_IDLE, MODE_IDLE,
1000, stop_timeout(engine),
NULL)) {
- GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name);
+ ENGINE_TRACE(engine, "timed out on STOP_RING -> IDLE\n");
err = -ETIMEDOUT;
}
@@ -932,7 +897,7 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine)
void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine)
{
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
}
@@ -949,8 +914,8 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
}
static u32
-read_subslice_reg(struct intel_engine_cs *engine, int slice, int subslice,
- i915_reg_t reg)
+read_subslice_reg(const struct intel_engine_cs *engine,
+ int slice, int subslice, i915_reg_t reg)
{
struct drm_i915_private *i915 = engine->i915;
struct intel_uncore *uncore = engine->uncore;
@@ -994,7 +959,7 @@ read_subslice_reg(struct intel_engine_cs *engine, int slice, int subslice,
}
/* NB: please notice the memset */
-void intel_engine_get_instdone(struct intel_engine_cs *engine,
+void intel_engine_get_instdone(const struct intel_engine_cs *engine,
struct intel_instdone *instdone)
{
struct drm_i915_private *i915 = engine->i915;
@@ -1478,6 +1443,10 @@ void intel_engine_dump(struct intel_engine_cs *engine,
drm_printf(m, "*** WEDGED ***\n");
drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
+ drm_printf(m, "\tBarriers?: %s\n",
+ yesno(!llist_empty(&engine->barrier_tasks)));
+ drm_printf(m, "\tLatency: %luus\n",
+ ewma__engine_latency_read(&engine->latency));
rcu_read_lock();
rq = READ_ONCE(engine->heartbeat.systole);
@@ -1517,9 +1486,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,
print_request_ring(m, rq);
- if (rq->hw_context->lrc_reg_state) {
+ if (rq->context->lrc_reg_state) {
drm_printf(m, "Logical Ring Context:\n");
- hexdump(m, rq->hw_context->lrc_reg_state, PAGE_SIZE);
+ hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE);
}
}
spin_unlock_irqrestore(&engine->active.lock, flags);
@@ -1580,7 +1549,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
for (port = execlists->pending; (rq = *port); port++) {
/* Exclude any contexts already counted in active */
- if (!intel_context_inflight_count(rq->hw_context))
+ if (!intel_context_inflight_count(rq->context))
engine->stats.active++;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 06aa14c7aa8c..6c6fd185457c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -63,15 +63,15 @@ static void heartbeat(struct work_struct *wrk)
struct intel_context *ce = engine->kernel_context;
struct i915_request *rq;
- if (!intel_engine_pm_get_if_awake(engine))
- return;
-
rq = engine->heartbeat.systole;
if (rq && i915_request_completed(rq)) {
i915_request_put(rq);
engine->heartbeat.systole = NULL;
}
+ if (!intel_engine_pm_get_if_awake(engine))
+ return;
+
if (intel_gt_is_wedged(engine->gt))
goto out;
@@ -199,7 +199,7 @@ int intel_engine_pulse(struct intel_engine_cs *engine)
goto out_unlock;
}
- rq->flags |= I915_REQUEST_SENTINEL;
+ __set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
idle_pulse(engine, rq);
__i915_request_commit(rq);
@@ -215,18 +215,26 @@ out_rpm:
int intel_engine_flush_barriers(struct intel_engine_cs *engine)
{
struct i915_request *rq;
+ int err = 0;
if (llist_empty(&engine->barrier_tasks))
return 0;
+ if (!intel_engine_pm_get_if_awake(engine))
+ return 0;
+
rq = i915_request_create(engine->kernel_context);
- if (IS_ERR(rq))
- return PTR_ERR(rq);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_rpm;
+ }
idle_pulse(engine, rq);
i915_request_add(rq);
- return 0;
+out_rpm:
+ intel_engine_pm_put(engine);
+ return err;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index c1dd0cd3efc7..ea90ab3e396e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -6,6 +6,7 @@
#include "i915_drv.h"
+#include "intel_context.h"
#include "intel_engine.h"
#include "intel_engine_heartbeat.h"
#include "intel_engine_pm.h"
@@ -19,9 +20,10 @@ static int __engine_unpark(struct intel_wakeref *wf)
{
struct intel_engine_cs *engine =
container_of(wf, typeof(*engine), wakeref);
+ struct intel_context *ce;
void *map;
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
intel_gt_pm_get(engine->gt);
@@ -33,6 +35,27 @@ static int __engine_unpark(struct intel_wakeref *wf)
if (!IS_ERR_OR_NULL(map))
engine->pinned_default_state = map;
+ /* Discard stale context state from across idling */
+ ce = engine->kernel_context;
+ if (ce) {
+ GEM_BUG_ON(test_bit(CONTEXT_VALID_BIT, &ce->flags));
+
+ /* First poison the image to verify we never fully trust it */
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) {
+ struct drm_i915_gem_object *obj = ce->state->obj;
+ int type = i915_coherent_map_type(engine->i915);
+
+ map = i915_gem_object_pin_map(obj, type);
+ if (!IS_ERR(map)) {
+ memset(map, CONTEXT_REDZONE, obj->base.size);
+ i915_gem_object_flush_map(obj);
+ i915_gem_object_unpin_map(obj);
+ }
+ }
+
+ ce->ops->reset(ce);
+ }
+
if (engine->unpark)
engine->unpark(engine);
@@ -73,6 +96,15 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
+static void duration(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+ struct i915_request *rq = to_request(fence);
+
+ ewma__engine_latency_add(&rq->engine->latency,
+ ktime_us_delta(rq->fence.timestamp,
+ rq->duration.emitted));
+}
+
static void
__queue_and_release_pm(struct i915_request *rq,
struct intel_timeline *tl,
@@ -80,7 +112,7 @@ __queue_and_release_pm(struct i915_request *rq,
{
struct intel_gt_timelines *timelines = &engine->gt->timelines;
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
/*
* We have to serialise all potential retirement paths with our
@@ -113,14 +145,16 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
unsigned long flags;
bool result = true;
- /* Already inside the kernel context, safe to power down. */
- if (engine->wakeref_serial == engine->serial)
- return true;
-
/* GPU is pointing to the void, as good as in the kernel context. */
if (intel_gt_is_wedged(engine->gt))
return true;
+ GEM_BUG_ON(!intel_context_is_barrier(ce));
+
+ /* Already inside the kernel context, safe to power down. */
+ if (engine->wakeref_serial == engine->serial)
+ return true;
+
/*
* Note, we do this without taking the timeline->mutex. We cannot
* as we may be called while retiring the kernel context and so
@@ -163,7 +197,18 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
/* Install ourselves as a preemption barrier */
rq->sched.attr.priority = I915_PRIORITY_BARRIER;
- __i915_request_commit(rq);
+ if (likely(!__i915_request_commit(rq))) { /* engine should be idle! */
+ /*
+ * Use an interrupt for precise measurement of duration,
+ * otherwise we rely on someone else retiring all the requests
+ * which may delay the signaling (i.e. we will likely wait
+ * until the background request retirement running every
+ * second or two).
+ */
+ BUILD_BUG_ON(sizeof(rq->duration) > sizeof(rq->submitq));
+ dma_fence_add_callback(&rq->fence, &rq->duration.cb, duration);
+ rq->duration.emitted = ktime_get();
+ }
/* Expose ourselves to the world */
__queue_and_release_pm(rq, ce->timeline, engine);
@@ -183,7 +228,7 @@ static void call_idle_barriers(struct intel_engine_cs *engine)
container_of((struct list_head *)node,
typeof(*cb), node);
- cb->func(NULL, cb);
+ cb->func(ERR_PTR(-EAGAIN), cb);
}
}
@@ -204,7 +249,7 @@ static int __engine_park(struct intel_wakeref *wf)
if (!switch_to_kernel_context(engine))
return -EBUSY;
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
call_idle_barriers(engine); /* cleanup after wedging */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
index 24e20344dc22..e52c2b0cb245 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
@@ -7,6 +7,7 @@
#ifndef INTEL_ENGINE_PM_H
#define INTEL_ENGINE_PM_H
+#include "i915_request.h"
#include "intel_engine_types.h"
#include "intel_wakeref.h"
@@ -41,6 +42,26 @@ static inline void intel_engine_pm_flush(struct intel_engine_cs *engine)
intel_wakeref_unlock_wait(&engine->wakeref);
}
+static inline struct i915_request *
+intel_engine_create_kernel_request(struct intel_engine_cs *engine)
+{
+ struct i915_request *rq;
+
+ /*
+ * The engine->kernel_context is special as it is used inside
+ * the engine-pm barrier (see __engine_park()), circumventing
+ * the usual mutexes and relying on the engine-pm barrier
+ * instead. So whenever we use the engine->kernel_context
+ * outside of the barrier, we must manually handle the
+ * engine wakeref to serialise with the use inside.
+ */
+ intel_engine_pm_get(engine);
+ rq = i915_request_create(engine->kernel_context);
+ intel_engine_pm_put(engine);
+
+ return rq;
+}
+
void intel_engine_init__pm(struct intel_engine_cs *engine);
#endif /* INTEL_ENGINE_PM_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 17f1f1441efc..350da59e605b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -7,6 +7,7 @@
#ifndef __INTEL_ENGINE_TYPES__
#define __INTEL_ENGINE_TYPES__
+#include <linux/average.h>
#include <linux/hashtable.h>
#include <linux/irq_work.h>
#include <linux/kref.h>
@@ -119,6 +120,9 @@ enum intel_engine_id {
#define INVALID_ENGINE ((enum intel_engine_id)-1)
};
+/* A simple estimator for the round-trip latency of an engine */
+DECLARE_EWMA(_engine_latency, 6, 4)
+
struct st_preempt_hang {
struct completion completion;
unsigned int count;
@@ -274,8 +278,8 @@ struct intel_engine_cs {
u8 class;
u8 instance;
- u8 uabi_class;
- u8 uabi_instance;
+ u16 uabi_class;
+ u16 uabi_instance;
u32 uabi_capabilities;
u32 context_size;
@@ -316,6 +320,13 @@ struct intel_engine_cs {
struct intel_timeline *timeline;
} legacy;
+ /*
+ * We track the average duration of the idle pulse on parking the
+ * engine to keep an estimate of the how the fast the engine is
+ * under ideal conditions.
+ */
+ struct ewma__engine_latency latency;
+
/* Rather than have every client wait upon all user interrupts,
* with the herd waking after every interrupt and each doing the
* heavyweight seqno dance, we delegate the task (of being the
@@ -389,7 +400,10 @@ struct intel_engine_cs {
struct {
void (*prepare)(struct intel_engine_cs *engine);
- void (*reset)(struct intel_engine_cs *engine, bool stalled);
+
+ void (*rewind)(struct intel_engine_cs *engine, bool stalled);
+ void (*cancel)(struct intel_engine_cs *engine);
+
void (*finish)(struct intel_engine_cs *engine);
} reset;
@@ -439,15 +453,7 @@ struct intel_engine_cs {
void (*schedule)(struct i915_request *request,
const struct i915_sched_attr *attr);
- /*
- * Cancel all requests on the hardware, or queued for execution.
- * This should only cancel the ready requests that have been
- * submitted to the engine (via the engine->submit_request callback).
- * This is called when marking the device as wedged.
- */
- void (*cancel_requests)(struct intel_engine_cs *engine);
-
- void (*destroy)(struct intel_engine_cs *engine);
+ void (*release)(struct intel_engine_cs *engine);
struct intel_engine_execlists execlists;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 7f7150a733f4..9e7f12bef828 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -11,6 +11,7 @@
#include "i915_drv.h"
#include "intel_engine.h"
#include "intel_engine_user.h"
+#include "intel_gt.h"
struct intel_engine_cs *
intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance)
@@ -200,6 +201,9 @@ void intel_engines_driver_register(struct drm_i915_private *i915)
uabi_node);
char old[sizeof(engine->name)];
+ if (intel_gt_has_init_error(engine->gt))
+ continue; /* ignore incomplete engines */
+
GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes));
engine->uabi_class = uabi_classes[engine->class];
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
new file mode 100644
index 000000000000..531d501be01f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -0,0 +1,1486 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/stop_machine.h>
+
+#include <asm/set_memory.h>
+#include <asm/smp.h>
+
+#include "intel_gt.h"
+#include "i915_drv.h"
+#include "i915_scatterlist.h"
+#include "i915_vgpu.h"
+
+#include "intel_gtt.h"
+
+static int
+i915_get_ggtt_vma_pages(struct i915_vma *vma);
+
+static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
+ unsigned long color,
+ u64 *start,
+ u64 *end)
+{
+ if (i915_node_color_differs(node, color))
+ *start += I915_GTT_PAGE_SIZE;
+
+ /*
+ * Also leave a space between the unallocated reserved node after the
+ * GTT and any objects within the GTT, i.e. we use the color adjustment
+ * to insert a guard page to prevent prefetches crossing over the
+ * GTT boundary.
+ */
+ node = list_next_entry(node, node_list);
+ if (node->color != color)
+ *end -= I915_GTT_PAGE_SIZE;
+}
+
+static int ggtt_init_hw(struct i915_ggtt *ggtt)
+{
+ struct drm_i915_private *i915 = ggtt->vm.i915;
+
+ i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
+
+ ggtt->vm.is_ggtt = true;
+
+ /* Only VLV supports read-only GGTT mappings */
+ ggtt->vm.has_read_only = IS_VALLEYVIEW(i915);
+
+ if (!HAS_LLC(i915) && !HAS_PPGTT(i915))
+ ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust;
+
+ if (ggtt->mappable_end) {
+ if (!io_mapping_init_wc(&ggtt->iomap,
+ ggtt->gmadr.start,
+ ggtt->mappable_end)) {
+ ggtt->vm.cleanup(&ggtt->vm);
+ return -EIO;
+ }
+
+ ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start,
+ ggtt->mappable_end);
+ }
+
+ i915_ggtt_init_fences(ggtt);
+
+ return 0;
+}
+
+/**
+ * i915_ggtt_init_hw - Initialize GGTT hardware
+ * @i915: i915 device
+ */
+int i915_ggtt_init_hw(struct drm_i915_private *i915)
+{
+ int ret;
+
+ stash_init(&i915->mm.wc_stash);
+
+ /*
+ * Note that we use page colouring to enforce a guard page at the
+ * end of the address space. This is required as the CS may prefetch
+ * beyond the end of the batch buffer, across the page boundary,
+ * and beyond the end of the GTT if we do not provide a guard.
+ */
+ ret = ggtt_init_hw(&i915->ggtt);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+/*
+ * Certain Gen5 chipsets require require idling the GPU before
+ * unmapping anything from the GTT when VT-d is enabled.
+ */
+static bool needs_idle_maps(struct drm_i915_private *i915)
+{
+ /*
+ * Query intel_iommu to see if we need the workaround. Presumably that
+ * was loaded first.
+ */
+ return IS_GEN(i915, 5) && IS_MOBILE(i915) && intel_vtd_active();
+}
+
+static void ggtt_suspend_mappings(struct i915_ggtt *ggtt)
+{
+ struct drm_i915_private *i915 = ggtt->vm.i915;
+
+ /*
+ * Don't bother messing with faults pre GEN6 as we have little
+ * documentation supporting that it's a good idea.
+ */
+ if (INTEL_GEN(i915) < 6)
+ return;
+
+ intel_gt_check_and_clear_faults(ggtt->vm.gt);
+
+ ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
+
+ ggtt->invalidate(ggtt);
+}
+
+void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915)
+{
+ ggtt_suspend_mappings(&i915->ggtt);
+}
+
+void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
+{
+ struct intel_uncore *uncore = ggtt->vm.gt->uncore;
+
+ spin_lock_irq(&uncore->lock);
+ intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+ intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6);
+ spin_unlock_irq(&uncore->lock);
+}
+
+static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
+{
+ struct intel_uncore *uncore = ggtt->vm.gt->uncore;
+
+ /*
+ * Note that as an uncached mmio write, this will flush the
+ * WCB of the writes into the GGTT before it triggers the invalidate.
+ */
+ intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+}
+
+static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
+{
+ struct intel_uncore *uncore = ggtt->vm.gt->uncore;
+ struct drm_i915_private *i915 = ggtt->vm.i915;
+
+ gen8_ggtt_invalidate(ggtt);
+
+ if (INTEL_GEN(i915) >= 12)
+ intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR,
+ GEN12_GUC_TLB_INV_CR_INVALIDATE);
+ else
+ intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+}
+
+static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt)
+{
+ intel_gtt_chipset_flush();
+}
+
+static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
+{
+ writeq(pte, addr);
+}
+
+static void gen8_ggtt_insert_page(struct i915_address_space *vm,
+ dma_addr_t addr,
+ u64 offset,
+ enum i915_cache_level level,
+ u32 unused)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ gen8_pte_t __iomem *pte =
+ (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
+
+ gen8_set_pte(pte, gen8_pte_encode(addr, level, 0));
+
+ ggtt->invalidate(ggtt);
+}
+
+static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
+ struct i915_vma *vma,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ struct sgt_iter sgt_iter;
+ gen8_pte_t __iomem *gtt_entries;
+ const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0);
+ dma_addr_t addr;
+
+ /*
+ * Note that we ignore PTE_READ_ONLY here. The caller must be careful
+ * not to allow the user to override access to a read only page.
+ */
+
+ gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
+ gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE;
+ for_each_sgt_daddr(addr, sgt_iter, vma->pages)
+ gen8_set_pte(gtt_entries++, pte_encode | addr);
+
+ /*
+ * We want to flush the TLBs only after we're certain all the PTE
+ * updates have finished.
+ */
+ ggtt->invalidate(ggtt);
+}
+
+static void gen6_ggtt_insert_page(struct i915_address_space *vm,
+ dma_addr_t addr,
+ u64 offset,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ gen6_pte_t __iomem *pte =
+ (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
+
+ iowrite32(vm->pte_encode(addr, level, flags), pte);
+
+ ggtt->invalidate(ggtt);
+}
+
+/*
+ * Binds an object into the global gtt with the specified cache level.
+ * The object will be accessible to the GPU via commands whose operands
+ * reference offsets within the global GTT as well as accessible by the GPU
+ * through the GMADR mapped BAR (i915->mm.gtt->gtt).
+ */
+static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
+ struct i915_vma *vma,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
+ unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE;
+ struct sgt_iter iter;
+ dma_addr_t addr;
+
+ for_each_sgt_daddr(addr, iter, vma->pages)
+ iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
+
+ /*
+ * We want to flush the TLBs only after we're certain all the PTE
+ * updates have finished.
+ */
+ ggtt->invalidate(ggtt);
+}
+
+static void nop_clear_range(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+}
+
+static void gen8_ggtt_clear_range(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+ unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
+ const gen8_pte_t scratch_pte = vm->scratch[0].encode;
+ gen8_pte_t __iomem *gtt_base =
+ (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
+ const int max_entries = ggtt_total_entries(ggtt) - first_entry;
+ int i;
+
+ if (WARN(num_entries > max_entries,
+ "First entry = %d; Num entries = %d (max=%d)\n",
+ first_entry, num_entries, max_entries))
+ num_entries = max_entries;
+
+ for (i = 0; i < num_entries; i++)
+ gen8_set_pte(&gtt_base[i], scratch_pte);
+}
+
+static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
+{
+ /*
+ * Make sure the internal GAM fifo has been cleared of all GTT
+ * writes before exiting stop_machine(). This guarantees that
+ * any aperture accesses waiting to start in another process
+ * cannot back up behind the GTT writes causing a hang.
+ * The register can be any arbitrary GAM register.
+ */
+ intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6);
+}
+
+struct insert_page {
+ struct i915_address_space *vm;
+ dma_addr_t addr;
+ u64 offset;
+ enum i915_cache_level level;
+};
+
+static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
+{
+ struct insert_page *arg = _arg;
+
+ gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
+ bxt_vtd_ggtt_wa(arg->vm);
+
+ return 0;
+}
+
+static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
+ dma_addr_t addr,
+ u64 offset,
+ enum i915_cache_level level,
+ u32 unused)
+{
+ struct insert_page arg = { vm, addr, offset, level };
+
+ stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
+}
+
+struct insert_entries {
+ struct i915_address_space *vm;
+ struct i915_vma *vma;
+ enum i915_cache_level level;
+ u32 flags;
+};
+
+static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
+{
+ struct insert_entries *arg = _arg;
+
+ gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags);
+ bxt_vtd_ggtt_wa(arg->vm);
+
+ return 0;
+}
+
+static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
+ struct i915_vma *vma,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ struct insert_entries arg = { vm, vma, level, flags };
+
+ stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
+}
+
+struct clear_range {
+ struct i915_address_space *vm;
+ u64 start;
+ u64 length;
+};
+
+static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
+{
+ struct clear_range *arg = _arg;
+
+ gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
+ bxt_vtd_ggtt_wa(arg->vm);
+
+ return 0;
+}
+
+static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
+ u64 start,
+ u64 length)
+{
+ struct clear_range arg = { vm, start, length };
+
+ stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
+}
+
+static void gen6_ggtt_clear_range(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+ unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
+ gen6_pte_t scratch_pte, __iomem *gtt_base =
+ (gen6_pte_t __iomem *)ggtt->gsm + first_entry;
+ const int max_entries = ggtt_total_entries(ggtt) - first_entry;
+ int i;
+
+ if (WARN(num_entries > max_entries,
+ "First entry = %d; Num entries = %d (max=%d)\n",
+ first_entry, num_entries, max_entries))
+ num_entries = max_entries;
+
+ scratch_pte = vm->scratch[0].encode;
+ for (i = 0; i < num_entries; i++)
+ iowrite32(scratch_pte, &gtt_base[i]);
+}
+
+static void i915_ggtt_insert_page(struct i915_address_space *vm,
+ dma_addr_t addr,
+ u64 offset,
+ enum i915_cache_level cache_level,
+ u32 unused)
+{
+ unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+ AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
+
+ intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
+}
+
+static void i915_ggtt_insert_entries(struct i915_address_space *vm,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 unused)
+{
+ unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+ AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
+
+ intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT,
+ flags);
+}
+
+static void i915_ggtt_clear_range(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+ intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
+}
+
+static int ggtt_bind_vma(struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags)
+{
+ struct drm_i915_gem_object *obj = vma->obj;
+ u32 pte_flags;
+
+ /* Applicable to VLV (gen8+ do not support RO in the GGTT) */
+ pte_flags = 0;
+ if (i915_gem_object_is_readonly(obj))
+ pte_flags |= PTE_READ_ONLY;
+
+ vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
+
+ vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+
+ /*
+ * Without aliasing PPGTT there's no difference between
+ * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
+ * upgrade to both bound if we bind either to avoid double-binding.
+ */
+ atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags);
+
+ return 0;
+}
+
+static void ggtt_unbind_vma(struct i915_vma *vma)
+{
+ vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
+}
+
+static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
+{
+ u64 size;
+ int ret;
+
+ if (!USES_GUC(ggtt->vm.i915))
+ return 0;
+
+ GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
+ size = ggtt->vm.total - GUC_GGTT_TOP;
+
+ ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size,
+ GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE,
+ PIN_NOEVICT);
+ if (ret)
+ DRM_DEBUG_DRIVER("Failed to reserve top of GGTT for GuC\n");
+
+ return ret;
+}
+
+static void ggtt_release_guc_top(struct i915_ggtt *ggtt)
+{
+ if (drm_mm_node_allocated(&ggtt->uc_fw))
+ drm_mm_remove_node(&ggtt->uc_fw);
+}
+
+static void cleanup_init_ggtt(struct i915_ggtt *ggtt)
+{
+ ggtt_release_guc_top(ggtt);
+ if (drm_mm_node_allocated(&ggtt->error_capture))
+ drm_mm_remove_node(&ggtt->error_capture);
+ mutex_destroy(&ggtt->error_mutex);
+}
+
+static int init_ggtt(struct i915_ggtt *ggtt)
+{
+ /*
+ * Let GEM Manage all of the aperture.
+ *
+ * However, leave one page at the end still bound to the scratch page.
+ * There are a number of places where the hardware apparently prefetches
+ * past the end of the object, and we've seen multiple hangs with the
+ * GPU head pointer stuck in a batchbuffer bound at the last page of the
+ * aperture. One page should be enough to keep any prefetching inside
+ * of the aperture.
+ */
+ unsigned long hole_start, hole_end;
+ struct drm_mm_node *entry;
+ int ret;
+
+ /*
+ * GuC requires all resources that we're sharing with it to be placed in
+ * non-WOPCM memory. If GuC is not present or not in use we still need a
+ * small bias as ring wraparound at offset 0 sometimes hangs. No idea
+ * why.
+ */
+ ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE,
+ intel_wopcm_guc_size(&ggtt->vm.i915->wopcm));
+
+ ret = intel_vgt_balloon(ggtt);
+ if (ret)
+ return ret;
+
+ mutex_init(&ggtt->error_mutex);
+ if (ggtt->mappable_end) {
+ /* Reserve a mappable slot for our lockless error capture */
+ ret = drm_mm_insert_node_in_range(&ggtt->vm.mm,
+ &ggtt->error_capture,
+ PAGE_SIZE, 0,
+ I915_COLOR_UNEVICTABLE,
+ 0, ggtt->mappable_end,
+ DRM_MM_INSERT_LOW);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * The upper portion of the GuC address space has a sizeable hole
+ * (several MB) that is inaccessible by GuC. Reserve this range within
+ * GGTT as it can comfortably hold GuC/HuC firmware images.
+ */
+ ret = ggtt_reserve_guc_top(ggtt);
+ if (ret)
+ goto err;
+
+ /* Clear any non-preallocated blocks */
+ drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) {
+ DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
+ hole_start, hole_end);
+ ggtt->vm.clear_range(&ggtt->vm, hole_start,
+ hole_end - hole_start);
+ }
+
+ /* And finally clear the reserved guard page */
+ ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE);
+
+ return 0;
+
+err:
+ cleanup_init_ggtt(ggtt);
+ return ret;
+}
+
+static int aliasing_gtt_bind_vma(struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags)
+{
+ u32 pte_flags;
+ int ret;
+
+ /* Currently applicable only to VLV */
+ pte_flags = 0;
+ if (i915_gem_object_is_readonly(vma->obj))
+ pte_flags |= PTE_READ_ONLY;
+
+ if (flags & I915_VMA_LOCAL_BIND) {
+ struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias;
+
+ if (flags & I915_VMA_ALLOC) {
+ ret = alias->vm.allocate_va_range(&alias->vm,
+ vma->node.start,
+ vma->size);
+ if (ret)
+ return ret;
+
+ set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
+ }
+
+ GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT,
+ __i915_vma_flags(vma)));
+ alias->vm.insert_entries(&alias->vm, vma,
+ cache_level, pte_flags);
+ }
+
+ if (flags & I915_VMA_GLOBAL_BIND)
+ vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
+
+ return 0;
+}
+
+static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
+{
+ if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
+ struct i915_address_space *vm = vma->vm;
+
+ vm->clear_range(vm, vma->node.start, vma->size);
+ }
+
+ if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) {
+ struct i915_address_space *vm =
+ &i915_vm_to_ggtt(vma->vm)->alias->vm;
+
+ vm->clear_range(vm, vma->node.start, vma->size);
+ }
+}
+
+static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
+{
+ struct i915_ppgtt *ppgtt;
+ int err;
+
+ ppgtt = i915_ppgtt_create(ggtt->vm.gt);
+ if (IS_ERR(ppgtt))
+ return PTR_ERR(ppgtt);
+
+ if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) {
+ err = -ENODEV;
+ goto err_ppgtt;
+ }
+
+ /*
+ * Note we only pre-allocate as far as the end of the global
+ * GTT. On 48b / 4-level page-tables, the difference is very,
+ * very significant! We have to preallocate as GVT/vgpu does
+ * not like the page directory disappearing.
+ */
+ err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total);
+ if (err)
+ goto err_ppgtt;
+
+ ggtt->alias = ppgtt;
+ ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
+
+ GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
+ ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
+
+ GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma);
+ ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
+
+ return 0;
+
+err_ppgtt:
+ i915_vm_put(&ppgtt->vm);
+ return err;
+}
+
+static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt)
+{
+ struct i915_ppgtt *ppgtt;
+
+ ppgtt = fetch_and_zero(&ggtt->alias);
+ if (!ppgtt)
+ return;
+
+ i915_vm_put(&ppgtt->vm);
+
+ ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
+ ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
+}
+
+int i915_init_ggtt(struct drm_i915_private *i915)
+{
+ int ret;
+
+ ret = init_ggtt(&i915->ggtt);
+ if (ret)
+ return ret;
+
+ if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) {
+ ret = init_aliasing_ppgtt(&i915->ggtt);
+ if (ret)
+ cleanup_init_ggtt(&i915->ggtt);
+ }
+
+ return 0;
+}
+
+static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
+{
+ struct i915_vma *vma, *vn;
+
+ atomic_set(&ggtt->vm.open, 0);
+
+ rcu_barrier(); /* flush the RCU'ed__i915_vm_release */
+ flush_workqueue(ggtt->vm.i915->wq);
+
+ mutex_lock(&ggtt->vm.mutex);
+
+ list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
+ WARN_ON(__i915_vma_unbind(vma));
+
+ if (drm_mm_node_allocated(&ggtt->error_capture))
+ drm_mm_remove_node(&ggtt->error_capture);
+ mutex_destroy(&ggtt->error_mutex);
+
+ ggtt_release_guc_top(ggtt);
+ intel_vgt_deballoon(ggtt);
+
+ ggtt->vm.cleanup(&ggtt->vm);
+
+ mutex_unlock(&ggtt->vm.mutex);
+ i915_address_space_fini(&ggtt->vm);
+
+ arch_phys_wc_del(ggtt->mtrr);
+
+ if (ggtt->iomap.size)
+ io_mapping_fini(&ggtt->iomap);
+}
+
+/**
+ * i915_ggtt_driver_release - Clean up GGTT hardware initialization
+ * @i915: i915 device
+ */
+void i915_ggtt_driver_release(struct drm_i915_private *i915)
+{
+ struct pagevec *pvec;
+
+ fini_aliasing_ppgtt(&i915->ggtt);
+
+ ggtt_cleanup_hw(&i915->ggtt);
+
+ pvec = &i915->mm.wc_stash.pvec;
+ if (pvec->nr) {
+ set_pages_array_wb(pvec->pages, pvec->nr);
+ __pagevec_release(pvec);
+ }
+}
+
+static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
+{
+ snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
+ snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
+ return snb_gmch_ctl << 20;
+}
+
+static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
+{
+ bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
+ bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
+ if (bdw_gmch_ctl)
+ bdw_gmch_ctl = 1 << bdw_gmch_ctl;
+
+#ifdef CONFIG_X86_32
+ /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */
+ if (bdw_gmch_ctl > 4)
+ bdw_gmch_ctl = 4;
+#endif
+
+ return bdw_gmch_ctl << 20;
+}
+
+static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
+{
+ gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
+ gmch_ctrl &= SNB_GMCH_GGMS_MASK;
+
+ if (gmch_ctrl)
+ return 1 << (20 + gmch_ctrl);
+
+ return 0;
+}
+
+static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
+{
+ struct drm_i915_private *i915 = ggtt->vm.i915;
+ struct pci_dev *pdev = i915->drm.pdev;
+ phys_addr_t phys_addr;
+ int ret;
+
+ /* For Modern GENs the PTEs and register space are split in the BAR */
+ phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
+
+ /*
+ * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range
+ * will be dropped. For WC mappings in general we have 64 byte burst
+ * writes when the WC buffer is flushed, so we can't use it, but have to
+ * resort to an uncached mapping. The WC issue is easily caught by the
+ * readback check when writing GTT PTE entries.
+ */
+ if (IS_GEN9_LP(i915) || INTEL_GEN(i915) >= 10)
+ ggtt->gsm = ioremap(phys_addr, size);
+ else
+ ggtt->gsm = ioremap_wc(phys_addr, size);
+ if (!ggtt->gsm) {
+ DRM_ERROR("Failed to map the ggtt page table\n");
+ return -ENOMEM;
+ }
+
+ ret = setup_scratch_page(&ggtt->vm, GFP_DMA32);
+ if (ret) {
+ DRM_ERROR("Scratch setup failed\n");
+ /* iounmap will also get called at remove, but meh */
+ iounmap(ggtt->gsm);
+ return ret;
+ }
+
+ ggtt->vm.scratch[0].encode =
+ ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]),
+ I915_CACHE_NONE, 0);
+
+ return 0;
+}
+
+int ggtt_set_pages(struct i915_vma *vma)
+{
+ int ret;
+
+ GEM_BUG_ON(vma->pages);
+
+ ret = i915_get_ggtt_vma_pages(vma);
+ if (ret)
+ return ret;
+
+ vma->page_sizes = vma->obj->mm.page_sizes;
+
+ return 0;
+}
+
+static void gen6_gmch_remove(struct i915_address_space *vm)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+
+ iounmap(ggtt->gsm);
+ cleanup_scratch_page(vm);
+}
+
+static struct resource pci_resource(struct pci_dev *pdev, int bar)
+{
+ return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar),
+ pci_resource_len(pdev, bar));
+}
+
+static int gen8_gmch_probe(struct i915_ggtt *ggtt)
+{
+ struct drm_i915_private *i915 = ggtt->vm.i915;
+ struct pci_dev *pdev = i915->drm.pdev;
+ unsigned int size;
+ u16 snb_gmch_ctl;
+ int err;
+
+ /* TODO: We're not aware of mappable constraints on gen8 yet */
+ if (!IS_DGFX(i915)) {
+ ggtt->gmadr = pci_resource(pdev, 2);
+ ggtt->mappable_end = resource_size(&ggtt->gmadr);
+ }
+
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
+ if (!err)
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
+ if (err)
+ DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
+
+ pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
+ if (IS_CHERRYVIEW(i915))
+ size = chv_get_total_gtt_size(snb_gmch_ctl);
+ else
+ size = gen8_get_total_gtt_size(snb_gmch_ctl);
+
+ ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
+ ggtt->vm.cleanup = gen6_gmch_remove;
+ ggtt->vm.insert_page = gen8_ggtt_insert_page;
+ ggtt->vm.clear_range = nop_clear_range;
+ if (intel_scanout_needs_vtd_wa(i915))
+ ggtt->vm.clear_range = gen8_ggtt_clear_range;
+
+ ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
+
+ /* Serialize GTT updates with aperture access on BXT if VT-d is on. */
+ if (intel_ggtt_update_needs_vtd_wa(i915) ||
+ IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) {
+ ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
+ ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL;
+ if (ggtt->vm.clear_range != nop_clear_range)
+ ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL;
+ }
+
+ ggtt->invalidate = gen8_ggtt_invalidate;
+
+ ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
+ ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
+ ggtt->vm.vma_ops.set_pages = ggtt_set_pages;
+ ggtt->vm.vma_ops.clear_pages = clear_pages;
+
+ ggtt->vm.pte_encode = gen8_pte_encode;
+
+ setup_private_pat(ggtt->vm.gt->uncore);
+
+ return ggtt_probe_common(ggtt, size);
+}
+
+static u64 snb_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+ switch (level) {
+ case I915_CACHE_L3_LLC:
+ case I915_CACHE_LLC:
+ pte |= GEN6_PTE_CACHE_LLC;
+ break;
+ case I915_CACHE_NONE:
+ pte |= GEN6_PTE_UNCACHED;
+ break;
+ default:
+ MISSING_CASE(level);
+ }
+
+ return pte;
+}
+
+static u64 ivb_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+ switch (level) {
+ case I915_CACHE_L3_LLC:
+ pte |= GEN7_PTE_CACHE_L3_LLC;
+ break;
+ case I915_CACHE_LLC:
+ pte |= GEN6_PTE_CACHE_LLC;
+ break;
+ case I915_CACHE_NONE:
+ pte |= GEN6_PTE_UNCACHED;
+ break;
+ default:
+ MISSING_CASE(level);
+ }
+
+ return pte;
+}
+
+static u64 byt_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+ if (!(flags & PTE_READ_ONLY))
+ pte |= BYT_PTE_WRITEABLE;
+
+ if (level != I915_CACHE_NONE)
+ pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
+
+ return pte;
+}
+
+static u64 hsw_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+ if (level != I915_CACHE_NONE)
+ pte |= HSW_WB_LLC_AGE3;
+
+ return pte;
+}
+
+static u64 iris_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+ switch (level) {
+ case I915_CACHE_NONE:
+ break;
+ case I915_CACHE_WT:
+ pte |= HSW_WT_ELLC_LLC_AGE3;
+ break;
+ default:
+ pte |= HSW_WB_ELLC_LLC_AGE3;
+ break;
+ }
+
+ return pte;
+}
+
+static int gen6_gmch_probe(struct i915_ggtt *ggtt)
+{
+ struct drm_i915_private *i915 = ggtt->vm.i915;
+ struct pci_dev *pdev = i915->drm.pdev;
+ unsigned int size;
+ u16 snb_gmch_ctl;
+ int err;
+
+ ggtt->gmadr = pci_resource(pdev, 2);
+ ggtt->mappable_end = resource_size(&ggtt->gmadr);
+
+ /*
+ * 64/512MB is the current min/max we actually know of, but this is
+ * just a coarse sanity check.
+ */
+ if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
+ DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end);
+ return -ENXIO;
+ }
+
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
+ if (!err)
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
+ if (err)
+ DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
+ pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
+
+ size = gen6_get_total_gtt_size(snb_gmch_ctl);
+ ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
+
+ ggtt->vm.clear_range = nop_clear_range;
+ if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
+ ggtt->vm.clear_range = gen6_ggtt_clear_range;
+ ggtt->vm.insert_page = gen6_ggtt_insert_page;
+ ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
+ ggtt->vm.cleanup = gen6_gmch_remove;
+
+ ggtt->invalidate = gen6_ggtt_invalidate;
+
+ if (HAS_EDRAM(i915))
+ ggtt->vm.pte_encode = iris_pte_encode;
+ else if (IS_HASWELL(i915))
+ ggtt->vm.pte_encode = hsw_pte_encode;
+ else if (IS_VALLEYVIEW(i915))
+ ggtt->vm.pte_encode = byt_pte_encode;
+ else if (INTEL_GEN(i915) >= 7)
+ ggtt->vm.pte_encode = ivb_pte_encode;
+ else
+ ggtt->vm.pte_encode = snb_pte_encode;
+
+ ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
+ ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
+ ggtt->vm.vma_ops.set_pages = ggtt_set_pages;
+ ggtt->vm.vma_ops.clear_pages = clear_pages;
+
+ return ggtt_probe_common(ggtt, size);
+}
+
+static void i915_gmch_remove(struct i915_address_space *vm)
+{
+ intel_gmch_remove();
+}
+
+static int i915_gmch_probe(struct i915_ggtt *ggtt)
+{
+ struct drm_i915_private *i915 = ggtt->vm.i915;
+ phys_addr_t gmadr_base;
+ int ret;
+
+ ret = intel_gmch_probe(i915->bridge_dev, i915->drm.pdev, NULL);
+ if (!ret) {
+ DRM_ERROR("failed to set up gmch\n");
+ return -EIO;
+ }
+
+ intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end);
+
+ ggtt->gmadr =
+ (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end);
+
+ ggtt->do_idle_maps = needs_idle_maps(i915);
+ ggtt->vm.insert_page = i915_ggtt_insert_page;
+ ggtt->vm.insert_entries = i915_ggtt_insert_entries;
+ ggtt->vm.clear_range = i915_ggtt_clear_range;
+ ggtt->vm.cleanup = i915_gmch_remove;
+
+ ggtt->invalidate = gmch_ggtt_invalidate;
+
+ ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
+ ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
+ ggtt->vm.vma_ops.set_pages = ggtt_set_pages;
+ ggtt->vm.vma_ops.clear_pages = clear_pages;
+
+ if (unlikely(ggtt->do_idle_maps))
+ dev_notice(i915->drm.dev,
+ "Applying Ironlake quirks for intel_iommu\n");
+
+ return 0;
+}
+
+static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ int ret;
+
+ ggtt->vm.gt = gt;
+ ggtt->vm.i915 = i915;
+ ggtt->vm.dma = &i915->drm.pdev->dev;
+
+ if (INTEL_GEN(i915) <= 5)
+ ret = i915_gmch_probe(ggtt);
+ else if (INTEL_GEN(i915) < 8)
+ ret = gen6_gmch_probe(ggtt);
+ else
+ ret = gen8_gmch_probe(ggtt);
+ if (ret)
+ return ret;
+
+ if ((ggtt->vm.total - 1) >> 32) {
+ DRM_ERROR("We never expected a Global GTT with more than 32bits"
+ " of address space! Found %lldM!\n",
+ ggtt->vm.total >> 20);
+ ggtt->vm.total = 1ULL << 32;
+ ggtt->mappable_end =
+ min_t(u64, ggtt->mappable_end, ggtt->vm.total);
+ }
+
+ if (ggtt->mappable_end > ggtt->vm.total) {
+ DRM_ERROR("mappable aperture extends past end of GGTT,"
+ " aperture=%pa, total=%llx\n",
+ &ggtt->mappable_end, ggtt->vm.total);
+ ggtt->mappable_end = ggtt->vm.total;
+ }
+
+ /* GMADR is the PCI mmio aperture into the global GTT. */
+ DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20);
+ DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20);
+ DRM_DEBUG_DRIVER("DSM size = %lluM\n",
+ (u64)resource_size(&intel_graphics_stolen_res) >> 20);
+
+ return 0;
+}
+
+/**
+ * i915_ggtt_probe_hw - Probe GGTT hardware location
+ * @i915: i915 device
+ */
+int i915_ggtt_probe_hw(struct drm_i915_private *i915)
+{
+ int ret;
+
+ ret = ggtt_probe_hw(&i915->ggtt, &i915->gt);
+ if (ret)
+ return ret;
+
+ if (intel_vtd_active())
+ dev_info(i915->drm.dev, "VT-d active for gfx access\n");
+
+ return 0;
+}
+
+int i915_ggtt_enable_hw(struct drm_i915_private *i915)
+{
+ if (INTEL_GEN(i915) < 6 && !intel_enable_gtt())
+ return -EIO;
+
+ return 0;
+}
+
+void i915_ggtt_enable_guc(struct i915_ggtt *ggtt)
+{
+ GEM_BUG_ON(ggtt->invalidate != gen8_ggtt_invalidate);
+
+ ggtt->invalidate = guc_ggtt_invalidate;
+
+ ggtt->invalidate(ggtt);
+}
+
+void i915_ggtt_disable_guc(struct i915_ggtt *ggtt)
+{
+ /* XXX Temporary pardon for error unload */
+ if (ggtt->invalidate == gen8_ggtt_invalidate)
+ return;
+
+ /* We should only be called after i915_ggtt_enable_guc() */
+ GEM_BUG_ON(ggtt->invalidate != guc_ggtt_invalidate);
+
+ ggtt->invalidate = gen8_ggtt_invalidate;
+
+ ggtt->invalidate(ggtt);
+}
+
+static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
+{
+ struct i915_vma *vma;
+ bool flush = false;
+ int open;
+
+ intel_gt_check_and_clear_faults(ggtt->vm.gt);
+
+ mutex_lock(&ggtt->vm.mutex);
+
+ /* First fill our portion of the GTT with scratch pages */
+ ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
+
+ /* Skip rewriting PTE on VMA unbind. */
+ open = atomic_xchg(&ggtt->vm.open, 0);
+
+ /* clflush objects bound into the GGTT and rebind them. */
+ list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) {
+ struct drm_i915_gem_object *obj = vma->obj;
+
+ if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
+ continue;
+
+ clear_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma));
+ WARN_ON(i915_vma_bind(vma,
+ obj ? obj->cache_level : 0,
+ PIN_GLOBAL, NULL));
+ if (obj) { /* only used during resume => exclusive access */
+ flush |= fetch_and_zero(&obj->write_domain);
+ obj->read_domains |= I915_GEM_DOMAIN_GTT;
+ }
+ }
+
+ atomic_set(&ggtt->vm.open, open);
+ ggtt->invalidate(ggtt);
+
+ mutex_unlock(&ggtt->vm.mutex);
+
+ if (flush)
+ wbinvd_on_all_cpus();
+}
+
+void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915)
+{
+ struct i915_ggtt *ggtt = &i915->ggtt;
+
+ ggtt_restore_mappings(ggtt);
+
+ if (INTEL_GEN(i915) >= 8)
+ setup_private_pat(ggtt->vm.gt->uncore);
+}
+
+static struct scatterlist *
+rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset,
+ unsigned int width, unsigned int height,
+ unsigned int stride,
+ struct sg_table *st, struct scatterlist *sg)
+{
+ unsigned int column, row;
+ unsigned int src_idx;
+
+ for (column = 0; column < width; column++) {
+ src_idx = stride * (height - 1) + column + offset;
+ for (row = 0; row < height; row++) {
+ st->nents++;
+ /*
+ * We don't need the pages, but need to initialize
+ * the entries so the sg list can be happily traversed.
+ * The only thing we need are DMA addresses.
+ */
+ sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0);
+ sg_dma_address(sg) =
+ i915_gem_object_get_dma_address(obj, src_idx);
+ sg_dma_len(sg) = I915_GTT_PAGE_SIZE;
+ sg = sg_next(sg);
+ src_idx -= stride;
+ }
+ }
+
+ return sg;
+}
+
+static noinline struct sg_table *
+intel_rotate_pages(struct intel_rotation_info *rot_info,
+ struct drm_i915_gem_object *obj)
+{
+ unsigned int size = intel_rotation_info_size(rot_info);
+ struct sg_table *st;
+ struct scatterlist *sg;
+ int ret = -ENOMEM;
+ int i;
+
+ /* Allocate target SG list. */
+ st = kmalloc(sizeof(*st), GFP_KERNEL);
+ if (!st)
+ goto err_st_alloc;
+
+ ret = sg_alloc_table(st, size, GFP_KERNEL);
+ if (ret)
+ goto err_sg_alloc;
+
+ st->nents = 0;
+ sg = st->sgl;
+
+ for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
+ sg = rotate_pages(obj, rot_info->plane[i].offset,
+ rot_info->plane[i].width, rot_info->plane[i].height,
+ rot_info->plane[i].stride, st, sg);
+ }
+
+ return st;
+
+err_sg_alloc:
+ kfree(st);
+err_st_alloc:
+
+ DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
+ obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
+
+ return ERR_PTR(ret);
+}
+
+static struct scatterlist *
+remap_pages(struct drm_i915_gem_object *obj, unsigned int offset,
+ unsigned int width, unsigned int height,
+ unsigned int stride,
+ struct sg_table *st, struct scatterlist *sg)
+{
+ unsigned int row;
+
+ for (row = 0; row < height; row++) {
+ unsigned int left = width * I915_GTT_PAGE_SIZE;
+
+ while (left) {
+ dma_addr_t addr;
+ unsigned int length;
+
+ /*
+ * We don't need the pages, but need to initialize
+ * the entries so the sg list can be happily traversed.
+ * The only thing we need are DMA addresses.
+ */
+
+ addr = i915_gem_object_get_dma_address_len(obj, offset, &length);
+
+ length = min(left, length);
+
+ st->nents++;
+
+ sg_set_page(sg, NULL, length, 0);
+ sg_dma_address(sg) = addr;
+ sg_dma_len(sg) = length;
+ sg = sg_next(sg);
+
+ offset += length / I915_GTT_PAGE_SIZE;
+ left -= length;
+ }
+
+ offset += stride - width;
+ }
+
+ return sg;
+}
+
+static noinline struct sg_table *
+intel_remap_pages(struct intel_remapped_info *rem_info,
+ struct drm_i915_gem_object *obj)
+{
+ unsigned int size = intel_remapped_info_size(rem_info);
+ struct sg_table *st;
+ struct scatterlist *sg;
+ int ret = -ENOMEM;
+ int i;
+
+ /* Allocate target SG list. */
+ st = kmalloc(sizeof(*st), GFP_KERNEL);
+ if (!st)
+ goto err_st_alloc;
+
+ ret = sg_alloc_table(st, size, GFP_KERNEL);
+ if (ret)
+ goto err_sg_alloc;
+
+ st->nents = 0;
+ sg = st->sgl;
+
+ for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) {
+ sg = remap_pages(obj, rem_info->plane[i].offset,
+ rem_info->plane[i].width, rem_info->plane[i].height,
+ rem_info->plane[i].stride, st, sg);
+ }
+
+ i915_sg_trim(st);
+
+ return st;
+
+err_sg_alloc:
+ kfree(st);
+err_st_alloc:
+
+ DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n",
+ obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size);
+
+ return ERR_PTR(ret);
+}
+
+static noinline struct sg_table *
+intel_partial_pages(const struct i915_ggtt_view *view,
+ struct drm_i915_gem_object *obj)
+{
+ struct sg_table *st;
+ struct scatterlist *sg, *iter;
+ unsigned int count = view->partial.size;
+ unsigned int offset;
+ int ret = -ENOMEM;
+
+ st = kmalloc(sizeof(*st), GFP_KERNEL);
+ if (!st)
+ goto err_st_alloc;
+
+ ret = sg_alloc_table(st, count, GFP_KERNEL);
+ if (ret)
+ goto err_sg_alloc;
+
+ iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset);
+ GEM_BUG_ON(!iter);
+
+ sg = st->sgl;
+ st->nents = 0;
+ do {
+ unsigned int len;
+
+ len = min(iter->length - (offset << PAGE_SHIFT),
+ count << PAGE_SHIFT);
+ sg_set_page(sg, NULL, len, 0);
+ sg_dma_address(sg) =
+ sg_dma_address(iter) + (offset << PAGE_SHIFT);
+ sg_dma_len(sg) = len;
+
+ st->nents++;
+ count -= len >> PAGE_SHIFT;
+ if (count == 0) {
+ sg_mark_end(sg);
+ i915_sg_trim(st); /* Drop any unused tail entries. */
+
+ return st;
+ }
+
+ sg = __sg_next(sg);
+ iter = __sg_next(iter);
+ offset = 0;
+ } while (1);
+
+err_sg_alloc:
+ kfree(st);
+err_st_alloc:
+ return ERR_PTR(ret);
+}
+
+static int
+i915_get_ggtt_vma_pages(struct i915_vma *vma)
+{
+ int ret;
+
+ /*
+ * The vma->pages are only valid within the lifespan of the borrowed
+ * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
+ * must be the vma->pages. A simple rule is that vma->pages must only
+ * be accessed when the obj->mm.pages are pinned.
+ */
+ GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
+
+ switch (vma->ggtt_view.type) {
+ default:
+ GEM_BUG_ON(vma->ggtt_view.type);
+ /* fall through */
+ case I915_GGTT_VIEW_NORMAL:
+ vma->pages = vma->obj->mm.pages;
+ return 0;
+
+ case I915_GGTT_VIEW_ROTATED:
+ vma->pages =
+ intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
+ break;
+
+ case I915_GGTT_VIEW_REMAPPED:
+ vma->pages =
+ intel_remap_pages(&vma->ggtt_view.remapped, vma->obj);
+ break;
+
+ case I915_GGTT_VIEW_PARTIAL:
+ vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
+ break;
+ }
+
+ ret = 0;
+ if (IS_ERR(vma->pages)) {
+ ret = PTR_ERR(vma->pages);
+ vma->pages = NULL;
+ DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
+ vma->ggtt_view.type, ret);
+ }
+ return ret;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 4294f146f13c..51b8718513bc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -7,6 +7,8 @@
#ifndef _INTEL_GPU_COMMANDS_H_
#define _INTEL_GPU_COMMANDS_H_
+#include <linux/bitops.h>
+
/*
* Target address alignments required for GPU access e.g.
* MI_STORE_DWORD_IMM.
@@ -319,4 +321,31 @@
#define COLOR_BLT ((0x2<<29)|(0x40<<22))
#define SRC_COPY_BLT ((0x2<<29)|(0x43<<22))
+/*
+ * Used to convert any address to canonical form.
+ * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
+ * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
+ * addresses to be in a canonical form:
+ * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
+ * canonical form [63:48] == [47]."
+ */
+#define GEN8_HIGH_ADDRESS_BIT 47
+static inline u64 gen8_canonical_addr(u64 address)
+{
+ return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
+}
+
+static inline u64 gen8_noncanonical_addr(u64 address)
+{
+ return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0);
+}
+
+static inline u32 *__gen6_emit_bb_start(u32 *cs, u32 addr, unsigned int flags)
+{
+ *cs++ = MI_BATCH_BUFFER_START | flags;
+ *cs++ = addr;
+
+ return cs;
+}
+
#endif /* _INTEL_GPU_COMMANDS_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 4c26daf7ee46..da2b6e2ae692 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -3,12 +3,15 @@
* Copyright © 2019 Intel Corporation
*/
+#include "debugfs_gt.h"
#include "i915_drv.h"
+#include "intel_context.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
#include "intel_gt_requests.h"
#include "intel_mocs.h"
#include "intel_rc6.h"
+#include "intel_renderstate.h"
#include "intel_rps.h"
#include "intel_uncore.h"
#include "intel_pm.h"
@@ -25,6 +28,7 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
intel_gt_init_reset(gt);
intel_gt_init_requests(gt);
+ intel_gt_init_timelines(gt);
intel_gt_pm_init_early(gt);
intel_rps_init_early(&gt->rps);
@@ -34,8 +38,6 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt)
{
gt->ggtt = ggtt;
-
- intel_gt_sanitize(gt, false);
}
static void init_unused_ring(struct intel_gt *gt, u32 base)
@@ -73,11 +75,6 @@ int intel_gt_init_hw(struct intel_gt *gt)
struct intel_uncore *uncore = gt->uncore;
int ret;
- BUG_ON(!i915->kernel_context);
- ret = intel_gt_terminally_wedged(gt);
- if (ret)
- return ret;
-
gt->last_init_time = ktime_get();
/* Double layer security blanket, see i915_gem_init() */
@@ -303,7 +300,7 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt)
intel_gt_chipset_flush(gt);
- with_intel_runtime_pm(uncore->rpm, wakeref) {
+ with_intel_runtime_pm_if_in_use(uncore->rpm, wakeref) {
unsigned long flags;
spin_lock_irqsave(&uncore->lock, flags);
@@ -323,6 +320,8 @@ void intel_gt_chipset_flush(struct intel_gt *gt)
void intel_gt_driver_register(struct intel_gt *gt)
{
intel_rps_driver_register(&gt->rps);
+
+ debugfs_gt_register(gt);
}
static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
@@ -364,22 +363,272 @@ static void intel_gt_fini_scratch(struct intel_gt *gt)
i915_vma_unpin_and_release(&gt->scratch, 0);
}
+static struct i915_address_space *kernel_vm(struct intel_gt *gt)
+{
+ if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING)
+ return &i915_ppgtt_create(gt)->vm;
+ else
+ return i915_vm_get(&gt->ggtt->vm);
+}
+
+static int __intel_context_flush_retire(struct intel_context *ce)
+{
+ struct intel_timeline *tl;
+
+ tl = intel_context_timeline_lock(ce);
+ if (IS_ERR(tl))
+ return PTR_ERR(tl);
+
+ intel_context_timeline_unlock(tl);
+ return 0;
+}
+
+static int __engines_record_defaults(struct intel_gt *gt)
+{
+ struct i915_request *requests[I915_NUM_ENGINES] = {};
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * As we reset the gpu during very early sanitisation, the current
+ * register state on the GPU should reflect its defaults values.
+ * We load a context onto the hw (with restore-inhibit), then switch
+ * over to a second context to save that default register state. We
+ * can then prime every new context with that state so they all start
+ * from the same default HW values.
+ */
+
+ for_each_engine(engine, gt, id) {
+ struct intel_renderstate so;
+ struct intel_context *ce;
+ struct i915_request *rq;
+
+ /* We must be able to switch to something! */
+ GEM_BUG_ON(!engine->kernel_context);
+
+ err = intel_renderstate_init(&so, engine);
+ if (err)
+ goto out;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto out;
+ }
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ intel_context_put(ce);
+ goto out;
+ }
+
+ err = intel_engine_emit_ctx_wa(rq);
+ if (err)
+ goto err_rq;
+
+ err = intel_renderstate_emit(&so, rq);
+ if (err)
+ goto err_rq;
+
+err_rq:
+ requests[id] = i915_request_get(rq);
+ i915_request_add(rq);
+ intel_renderstate_fini(&so);
+ if (err)
+ goto out;
+ }
+
+ /* Flush the default context image to memory, and enable powersaving. */
+ if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+ err = -EIO;
+ goto out;
+ }
+
+ for (id = 0; id < ARRAY_SIZE(requests); id++) {
+ struct i915_request *rq;
+ struct i915_vma *state;
+ void *vaddr;
+
+ rq = requests[id];
+ if (!rq)
+ continue;
+
+ GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags));
+ state = rq->context->state;
+ if (!state)
+ continue;
+
+ /* Serialise with retirement on another CPU */
+ GEM_BUG_ON(!i915_request_completed(rq));
+ err = __intel_context_flush_retire(rq->context);
+ if (err)
+ goto out;
+
+ /* We want to be able to unbind the state from the GGTT */
+ GEM_BUG_ON(intel_context_is_pinned(rq->context));
+
+ /*
+ * As we will hold a reference to the logical state, it will
+ * not be torn down with the context, and importantly the
+ * object will hold onto its vma (making it possible for a
+ * stray GTT write to corrupt our defaults). Unmap the vma
+ * from the GTT to prevent such accidents and reclaim the
+ * space.
+ */
+ err = i915_vma_unbind(state);
+ if (err)
+ goto out;
+
+ i915_gem_object_lock(state->obj);
+ err = i915_gem_object_set_to_cpu_domain(state->obj, false);
+ i915_gem_object_unlock(state->obj);
+ if (err)
+ goto out;
+
+ i915_gem_object_set_cache_coherency(state->obj, I915_CACHE_LLC);
+
+ /* Check we can acquire the image of the context state */
+ vaddr = i915_gem_object_pin_map(state->obj, I915_MAP_FORCE_WB);
+ if (IS_ERR(vaddr)) {
+ err = PTR_ERR(vaddr);
+ goto out;
+ }
+
+ rq->engine->default_state = i915_gem_object_get(state->obj);
+ i915_gem_object_unpin_map(state->obj);
+ }
+
+out:
+ /*
+ * If we have to abandon now, we expect the engines to be idle
+ * and ready to be torn-down. The quickest way we can accomplish
+ * this is by declaring ourselves wedged.
+ */
+ if (err)
+ intel_gt_set_wedged(gt);
+
+ for (id = 0; id < ARRAY_SIZE(requests); id++) {
+ struct intel_context *ce;
+ struct i915_request *rq;
+
+ rq = requests[id];
+ if (!rq)
+ continue;
+
+ ce = rq->context;
+ i915_request_put(rq);
+ intel_context_put(ce);
+ }
+ return err;
+}
+
+static int __engines_verify_workarounds(struct intel_gt *gt)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ return 0;
+
+ for_each_engine(engine, gt, id) {
+ if (intel_engine_verify_workarounds(engine, "load"))
+ err = -EIO;
+ }
+
+ return err;
+}
+
+static void __intel_gt_disable(struct intel_gt *gt)
+{
+ intel_gt_set_wedged_on_init(gt);
+
+ intel_gt_suspend_prepare(gt);
+ intel_gt_suspend_late(gt);
+
+ GEM_BUG_ON(intel_gt_pm_is_awake(gt));
+}
+
int intel_gt_init(struct intel_gt *gt)
{
int err;
- err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K);
+ err = i915_inject_probe_error(gt->i915, -ENODEV);
if (err)
return err;
+ /*
+ * This is just a security blanket to placate dragons.
+ * On some systems, we very sporadically observe that the first TLBs
+ * used by the CS may be stale, despite us poking the TLB reset. If
+ * we hold the forcewake during initialisation these problems
+ * just magically go away.
+ */
+ intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
+
+ err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K);
+ if (err)
+ goto out_fw;
+
intel_gt_pm_init(gt);
- return 0;
+ gt->vm = kernel_vm(gt);
+ if (!gt->vm) {
+ err = -ENOMEM;
+ goto err_pm;
+ }
+
+ err = intel_engines_init(gt);
+ if (err)
+ goto err_engines;
+
+ intel_uc_init(&gt->uc);
+
+ err = intel_gt_resume(gt);
+ if (err)
+ goto err_uc_init;
+
+ err = __engines_record_defaults(gt);
+ if (err)
+ goto err_gt;
+
+ err = __engines_verify_workarounds(gt);
+ if (err)
+ goto err_gt;
+
+ err = i915_inject_probe_error(gt->i915, -EIO);
+ if (err)
+ goto err_gt;
+
+ goto out_fw;
+err_gt:
+ __intel_gt_disable(gt);
+ intel_uc_fini_hw(&gt->uc);
+err_uc_init:
+ intel_uc_fini(&gt->uc);
+err_engines:
+ intel_engines_release(gt);
+ i915_vm_put(fetch_and_zero(&gt->vm));
+err_pm:
+ intel_gt_pm_fini(gt);
+ intel_gt_fini_scratch(gt);
+out_fw:
+ if (err)
+ intel_gt_set_wedged_on_init(gt);
+ intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
+ return err;
}
void intel_gt_driver_remove(struct intel_gt *gt)
{
- GEM_BUG_ON(gt->awake);
+ __intel_gt_disable(gt);
+
+ intel_uc_fini_hw(&gt->uc);
+ intel_uc_fini(&gt->uc);
+
+ intel_engines_release(gt);
}
void intel_gt_driver_unregister(struct intel_gt *gt)
@@ -389,6 +638,12 @@ void intel_gt_driver_unregister(struct intel_gt *gt)
void intel_gt_driver_release(struct intel_gt *gt)
{
+ struct i915_address_space *vm;
+
+ vm = fetch_and_zero(&gt->vm);
+ if (vm) /* FIXME being called twice on error paths :( */
+ i915_vm_put(vm);
+
intel_gt_pm_fini(gt);
intel_gt_fini_scratch(gt);
}
@@ -396,5 +651,8 @@ void intel_gt_driver_release(struct intel_gt *gt)
void intel_gt_driver_late_release(struct intel_gt *gt)
{
intel_uc_driver_late_release(&gt->uc);
+ intel_gt_fini_requests(gt);
intel_gt_fini_reset(gt);
+ intel_gt_fini_timelines(gt);
+ intel_engines_free(gt);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 5436f8c30708..1dac441cb8f4 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -12,6 +12,12 @@
struct drm_i915_private;
+#define GT_TRACE(gt, fmt, ...) do { \
+ const struct intel_gt *gt__ __maybe_unused = (gt); \
+ GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \
+ ##__VA_ARGS__); \
+} while (0)
+
static inline struct intel_gt *uc_to_gt(struct intel_uc *uc)
{
return container_of(uc, struct intel_gt, uc);
@@ -52,9 +58,14 @@ static inline u32 intel_gt_scratch_offset(const struct intel_gt *gt,
return i915_ggtt_offset(gt->scratch) + field;
}
-static inline bool intel_gt_is_wedged(struct intel_gt *gt)
+static inline bool intel_gt_is_wedged(const struct intel_gt *gt)
{
return __intel_reset_failed(&gt->reset);
}
+static inline bool intel_gt_has_init_error(const struct intel_gt *gt)
+{
+ return test_bit(I915_WEDGED_ON_INIT, &gt->reset.flags);
+}
+
#endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index 973ee7eded64..f796bdf1ed30 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -28,7 +28,7 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
tasklet = true;
if (iir & GT_RENDER_USER_INTERRUPT) {
- intel_engine_breadcrumbs_irq(engine);
+ intel_engine_signal_breadcrumbs(engine);
tasklet |= intel_engine_needs_breadcrumb_tasklet(engine);
}
@@ -245,9 +245,9 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
void gen5_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
{
if (gt_iir & GT_RENDER_USER_INTERRUPT)
- intel_engine_breadcrumbs_irq(gt->engine_class[RENDER_CLASS][0]);
+ intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]);
if (gt_iir & ILK_BSD_USER_INTERRUPT)
- intel_engine_breadcrumbs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0]);
+ intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]);
}
static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
@@ -271,11 +271,11 @@ static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
{
if (gt_iir & GT_RENDER_USER_INTERRUPT)
- intel_engine_breadcrumbs_irq(gt->engine_class[RENDER_CLASS][0]);
+ intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]);
if (gt_iir & GT_BSD_USER_INTERRUPT)
- intel_engine_breadcrumbs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0]);
+ intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]);
if (gt_iir & GT_BLT_USER_INTERRUPT)
- intel_engine_breadcrumbs_irq(gt->engine_class[COPY_ENGINE_CLASS][0]);
+ intel_engine_signal_breadcrumbs(gt->engine_class[COPY_ENGINE_CLASS][0]);
if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
GT_BSD_CS_ERROR_INTERRUPT |
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index a459a42ad5c2..d1c2f034296a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -43,7 +43,7 @@ static int __gt_unpark(struct intel_wakeref *wf)
struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref);
struct drm_i915_private *i915 = gt->i915;
- GEM_TRACE("\n");
+ GT_TRACE(gt, "\n");
i915_globals_unpark();
@@ -61,9 +61,7 @@ static int __gt_unpark(struct intel_wakeref *wf)
gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
GEM_BUG_ON(!gt->awake);
- if (NEEDS_RC6_CTX_CORRUPTION_WA(i915))
- intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
-
+ intel_rc6_unpark(&gt->rc6);
intel_rps_unpark(&gt->rps);
i915_pmu_gt_unparked(i915);
@@ -78,24 +76,21 @@ static int __gt_park(struct intel_wakeref *wf)
intel_wakeref_t wakeref = fetch_and_zero(&gt->awake);
struct drm_i915_private *i915 = gt->i915;
- GEM_TRACE("\n");
+ GT_TRACE(gt, "\n");
intel_gt_park_requests(gt);
i915_vma_parked(gt);
i915_pmu_gt_parked(i915);
intel_rps_park(&gt->rps);
+ intel_rc6_park(&gt->rc6);
/* Everything switched off, flush any residual interrupt just in case */
intel_synchronize_irq(i915);
- if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) {
- intel_rc6_ctx_wa_check(&i915->gt.rc6);
- intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
- }
-
+ /* Defer dropping the display power well for 100ms, it's slow! */
GEM_BUG_ON(!wakeref);
- intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
+ intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref);
i915_globals_park();
@@ -131,23 +126,13 @@ static bool reset_engines(struct intel_gt *gt)
return __intel_gt_reset(gt, ALL_ENGINES) == 0;
}
-/**
- * intel_gt_sanitize: called after the GPU has lost power
- * @gt: the i915 GT container
- * @force: ignore a failed reset and sanitize engine state anyway
- *
- * Anytime we reset the GPU, either with an explicit GPU reset or through a
- * PCI power cycle, the GPU loses state and we must reset our state tracking
- * to match. Note that calling intel_gt_sanitize() if the GPU has not
- * been reset results in much confusion!
- */
-void intel_gt_sanitize(struct intel_gt *gt, bool force)
+static void gt_sanitize(struct intel_gt *gt, bool force)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
intel_wakeref_t wakeref;
- GEM_TRACE("force:%s\n", yesno(force));
+ GT_TRACE(gt, "force:%s", yesno(force));
/* Use a raw wakeref to avoid calling intel_display_power_get early */
wakeref = intel_runtime_pm_get(gt->uncore->rpm);
@@ -192,9 +177,13 @@ int intel_gt_resume(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
- int err = 0;
+ int err;
- GEM_TRACE("\n");
+ err = intel_gt_has_init_error(gt);
+ if (err)
+ return err;
+
+ GT_TRACE(gt, "\n");
/*
* After resume, we may need to poke into the pinned kernel
@@ -206,21 +195,26 @@ int intel_gt_resume(struct intel_gt *gt)
intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
intel_rc6_sanitize(&gt->rc6);
+ gt_sanitize(gt, true);
+ if (intel_gt_is_wedged(gt)) {
+ err = -EIO;
+ goto out_fw;
+ }
+
+ /* Only when the HW is re-initialised, can we replay the requests */
+ err = intel_gt_init_hw(gt);
+ if (err) {
+ dev_err(gt->i915->drm.dev,
+ "Failed to initialize GPU, declaring it wedged!\n");
+ goto err_wedged;
+ }
intel_rps_enable(&gt->rps);
intel_llc_enable(&gt->llc);
for_each_engine(engine, gt, id) {
- struct intel_context *ce;
-
intel_engine_pm_get(engine);
- ce = engine->kernel_context;
- if (ce) {
- GEM_BUG_ON(!intel_context_is_pinned(ce));
- ce->ops->reset(ce);
- }
-
engine->serial++; /* kernel context lost */
err = engine->resume(engine);
@@ -229,7 +223,7 @@ int intel_gt_resume(struct intel_gt *gt)
dev_err(gt->i915->drm.dev,
"Failed to restart %s (%d)\n",
engine->name, err);
- break;
+ goto err_wedged;
}
}
@@ -239,10 +233,14 @@ int intel_gt_resume(struct intel_gt *gt)
user_forcewake(gt, false);
+out_fw:
intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
intel_gt_pm_put(gt);
-
return err;
+
+err_wedged:
+ intel_gt_set_wedged(gt);
+ goto out_fw;
}
static void wait_for_suspend(struct intel_gt *gt)
@@ -256,6 +254,7 @@ static void wait_for_suspend(struct intel_gt *gt)
* the gpu quiet.
*/
intel_gt_set_wedged(gt);
+ intel_gt_retire_requests(gt);
}
intel_gt_pm_wait_for_idle(gt);
@@ -285,6 +284,11 @@ void intel_gt_suspend_late(struct intel_gt *gt)
/* We expect to be idle already; but also want to be independent */
wait_for_suspend(gt);
+ if (is_mock_gt(gt))
+ return;
+
+ GEM_BUG_ON(gt->awake);
+
/*
* On disabling the device, we want to turn off HW access to memory
* that we no longer own.
@@ -304,22 +308,21 @@ void intel_gt_suspend_late(struct intel_gt *gt)
intel_llc_disable(&gt->llc);
}
- intel_gt_sanitize(gt, false);
+ gt_sanitize(gt, false);
- GEM_TRACE("\n");
+ GT_TRACE(gt, "\n");
}
void intel_gt_runtime_suspend(struct intel_gt *gt)
{
intel_uc_runtime_suspend(&gt->uc);
- GEM_TRACE("\n");
+ GT_TRACE(gt, "\n");
}
int intel_gt_runtime_resume(struct intel_gt *gt)
{
- GEM_TRACE("\n");
-
+ GT_TRACE(gt, "\n");
intel_gt_init_swizzling(gt);
return intel_uc_runtime_resume(&gt->uc);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index 990efc27a4e4..60f0e2fbe55c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -22,6 +22,11 @@ static inline void intel_gt_pm_get(struct intel_gt *gt)
intel_wakeref_get(&gt->wakeref);
}
+static inline void __intel_gt_pm_get(struct intel_gt *gt)
+{
+ __intel_wakeref_get(&gt->wakeref);
+}
+
static inline bool intel_gt_pm_get_if_awake(struct intel_gt *gt)
{
return intel_wakeref_get_if_active(&gt->wakeref);
@@ -46,8 +51,6 @@ void intel_gt_pm_init_early(struct intel_gt *gt);
void intel_gt_pm_init(struct intel_gt *gt);
void intel_gt_pm_fini(struct intel_gt *gt);
-void intel_gt_sanitize(struct intel_gt *gt, bool force);
-
void intel_gt_suspend_prepare(struct intel_gt *gt);
void intel_gt_suspend_late(struct intel_gt *gt);
int intel_gt_resume(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
index 3dc13ecf41bf..7ef1d37970f6 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -8,27 +8,40 @@
#include "i915_drv.h" /* for_each_engine() */
#include "i915_request.h"
+#include "intel_engine_heartbeat.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
#include "intel_gt_requests.h"
#include "intel_timeline.h"
-static void retire_requests(struct intel_timeline *tl)
+static bool retire_requests(struct intel_timeline *tl)
{
struct i915_request *rq, *rn;
list_for_each_entry_safe(rq, rn, &tl->requests, link)
if (!i915_request_retire(rq))
- break;
+ return false;
+
+ /* And check nothing new was submitted */
+ return !i915_active_fence_isset(&tl->last_request);
}
-static void flush_submission(struct intel_gt *gt)
+static bool flush_submission(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ bool active = false;
+
+ if (!intel_gt_pm_is_awake(gt))
+ return false;
- for_each_engine(engine, gt, id)
+ for_each_engine(engine, gt, id) {
intel_engine_flush_submission(engine);
+ active |= flush_work(&engine->retire_work);
+ active |= flush_work(&engine->wakeref.work);
+ }
+
+ return active;
}
static void engine_retire(struct work_struct *work)
@@ -62,19 +75,16 @@ static void engine_retire(struct work_struct *work)
static bool add_retire(struct intel_engine_cs *engine,
struct intel_timeline *tl)
{
+#define STUB ((struct intel_timeline *)1)
struct intel_timeline *first;
/*
* We open-code a llist here to include the additional tag [BIT(0)]
* so that we know when the timeline is already on a
* retirement queue: either this engine or another.
- *
- * However, we rely on that a timeline can only be active on a single
- * engine at any one time and that add_retire() is called before the
- * engine releases the timeline and transferred to another to retire.
*/
- if (READ_ONCE(tl->retire)) /* already queued */
+ if (cmpxchg(&tl->retire, NULL, STUB)) /* already queued */
return false;
intel_timeline_get(tl);
@@ -109,7 +119,6 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
struct intel_gt_timelines *timelines = &gt->timelines;
struct intel_timeline *tl, *tn;
unsigned long active_count = 0;
- unsigned long flags;
bool interruptible;
LIST_HEAD(free);
@@ -118,8 +127,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
timeout = -timeout, interruptible = false;
flush_submission(gt); /* kick the ksoftirqd tasklets */
-
- spin_lock_irqsave(&timelines->lock, flags);
+ spin_lock(&timelines->lock);
list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
if (!mutex_trylock(&tl->mutex)) {
active_count++; /* report busy to caller, try again? */
@@ -129,7 +137,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
intel_timeline_get(tl);
GEM_BUG_ON(!atomic_read(&tl->active_count));
atomic_inc(&tl->active_count); /* pin the list element */
- spin_unlock_irqrestore(&timelines->lock, flags);
+ spin_unlock(&timelines->lock);
if (timeout > 0) {
struct dma_fence *fence;
@@ -143,16 +151,15 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
}
}
- retire_requests(tl);
+ if (!retire_requests(tl) || flush_submission(gt))
+ active_count++;
- spin_lock_irqsave(&timelines->lock, flags);
+ spin_lock(&timelines->lock);
/* Resume iteration after dropping lock */
list_safe_reset_next(tl, tn, link);
if (atomic_dec_and_test(&tl->active_count))
list_del(&tl->link);
- else
- active_count += !!rcu_access_pointer(tl->last_request.fence);
mutex_unlock(&tl->mutex);
@@ -162,7 +169,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
list_add(&tl->link, &free);
}
}
- spin_unlock_irqrestore(&timelines->lock, flags);
+ spin_unlock(&timelines->lock);
list_for_each_entry_safe(tl, tn, &free, link)
__intel_timeline_free(&tl->kref);
@@ -190,9 +197,9 @@ static void retire_work_handler(struct work_struct *work)
struct intel_gt *gt =
container_of(work, typeof(*gt), requests.retire_work.work);
- intel_gt_retire_requests(gt);
schedule_delayed_work(&gt->requests.retire_work,
round_jiffies_up_relative(HZ));
+ intel_gt_retire_requests(gt);
}
void intel_gt_init_requests(struct intel_gt *gt)
@@ -210,3 +217,9 @@ void intel_gt_unpark_requests(struct intel_gt *gt)
schedule_delayed_work(&gt->requests.retire_work,
round_jiffies_up_relative(HZ));
}
+
+void intel_gt_fini_requests(struct intel_gt *gt)
+{
+ /* Wait until the work is marked as finished before unloading! */
+ cancel_delayed_work_sync(&gt->requests.retire_work);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
index d626fb115386..dbac53baf1cb 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
@@ -27,5 +27,6 @@ int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
void intel_gt_init_requests(struct intel_gt *gt);
void intel_gt_park_requests(struct intel_gt *gt);
void intel_gt_unpark_requests(struct intel_gt *gt);
+void intel_gt_fini_requests(struct intel_gt *gt);
#endif /* INTEL_GT_REQUESTS_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index d4e14dbd172e..96890dd12b5f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -90,6 +90,13 @@ struct intel_gt {
struct intel_engine_cs *engine[I915_NUM_ENGINES];
struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1]
[MAX_ENGINE_INSTANCE + 1];
+
+ /*
+ * Default address space (either GGTT or ppGTT depending on arch).
+ *
+ * Reserved for exclusive use by the kernel.
+ */
+ struct i915_address_space *vm;
};
enum intel_gt_scratch_field {
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
new file mode 100644
index 000000000000..16acdc5d6734
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -0,0 +1,598 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/slab.h> /* fault-inject.h is not standalone! */
+
+#include <linux/fault-inject.h>
+
+#include "i915_trace.h"
+#include "intel_gt.h"
+#include "intel_gtt.h"
+
+void stash_init(struct pagestash *stash)
+{
+ pagevec_init(&stash->pvec);
+ spin_lock_init(&stash->lock);
+}
+
+static struct page *stash_pop_page(struct pagestash *stash)
+{
+ struct page *page = NULL;
+
+ spin_lock(&stash->lock);
+ if (likely(stash->pvec.nr))
+ page = stash->pvec.pages[--stash->pvec.nr];
+ spin_unlock(&stash->lock);
+
+ return page;
+}
+
+static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec)
+{
+ unsigned int nr;
+
+ spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING);
+
+ nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec));
+ memcpy(stash->pvec.pages + stash->pvec.nr,
+ pvec->pages + pvec->nr - nr,
+ sizeof(pvec->pages[0]) * nr);
+ stash->pvec.nr += nr;
+
+ spin_unlock(&stash->lock);
+
+ pvec->nr -= nr;
+}
+
+static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
+{
+ struct pagevec stack;
+ struct page *page;
+
+ if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
+ i915_gem_shrink_all(vm->i915);
+
+ page = stash_pop_page(&vm->free_pages);
+ if (page)
+ return page;
+
+ if (!vm->pt_kmap_wc)
+ return alloc_page(gfp);
+
+ /* Look in our global stash of WC pages... */
+ page = stash_pop_page(&vm->i915->mm.wc_stash);
+ if (page)
+ return page;
+
+ /*
+ * Otherwise batch allocate pages to amortize cost of set_pages_wc.
+ *
+ * We have to be careful as page allocation may trigger the shrinker
+ * (via direct reclaim) which will fill up the WC stash underneath us.
+ * So we add our WB pages into a temporary pvec on the stack and merge
+ * them into the WC stash after all the allocations are complete.
+ */
+ pagevec_init(&stack);
+ do {
+ struct page *page;
+
+ page = alloc_page(gfp);
+ if (unlikely(!page))
+ break;
+
+ stack.pages[stack.nr++] = page;
+ } while (pagevec_space(&stack));
+
+ if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) {
+ page = stack.pages[--stack.nr];
+
+ /* Merge spare WC pages to the global stash */
+ if (stack.nr)
+ stash_push_pagevec(&vm->i915->mm.wc_stash, &stack);
+
+ /* Push any surplus WC pages onto the local VM stash */
+ if (stack.nr)
+ stash_push_pagevec(&vm->free_pages, &stack);
+ }
+
+ /* Return unwanted leftovers */
+ if (unlikely(stack.nr)) {
+ WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr));
+ __pagevec_release(&stack);
+ }
+
+ return page;
+}
+
+static void vm_free_pages_release(struct i915_address_space *vm,
+ bool immediate)
+{
+ struct pagevec *pvec = &vm->free_pages.pvec;
+ struct pagevec stack;
+
+ lockdep_assert_held(&vm->free_pages.lock);
+ GEM_BUG_ON(!pagevec_count(pvec));
+
+ if (vm->pt_kmap_wc) {
+ /*
+ * When we use WC, first fill up the global stash and then
+ * only if full immediately free the overflow.
+ */
+ stash_push_pagevec(&vm->i915->mm.wc_stash, pvec);
+
+ /*
+ * As we have made some room in the VM's free_pages,
+ * we can wait for it to fill again. Unless we are
+ * inside i915_address_space_fini() and must
+ * immediately release the pages!
+ */
+ if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1))
+ return;
+
+ /*
+ * We have to drop the lock to allow ourselves to sleep,
+ * so take a copy of the pvec and clear the stash for
+ * others to use it as we sleep.
+ */
+ stack = *pvec;
+ pagevec_reinit(pvec);
+ spin_unlock(&vm->free_pages.lock);
+
+ pvec = &stack;
+ set_pages_array_wb(pvec->pages, pvec->nr);
+
+ spin_lock(&vm->free_pages.lock);
+ }
+
+ __pagevec_release(pvec);
+}
+
+static void vm_free_page(struct i915_address_space *vm, struct page *page)
+{
+ /*
+ * On !llc, we need to change the pages back to WB. We only do so
+ * in bulk, so we rarely need to change the page attributes here,
+ * but doing so requires a stop_machine() from deep inside arch/x86/mm.
+ * To make detection of the possible sleep more likely, use an
+ * unconditional might_sleep() for everybody.
+ */
+ might_sleep();
+ spin_lock(&vm->free_pages.lock);
+ while (!pagevec_space(&vm->free_pages.pvec))
+ vm_free_pages_release(vm, false);
+ GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE);
+ pagevec_add(&vm->free_pages.pvec, page);
+ spin_unlock(&vm->free_pages.lock);
+}
+
+void __i915_vm_close(struct i915_address_space *vm)
+{
+ struct i915_vma *vma, *vn;
+
+ mutex_lock(&vm->mutex);
+ list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
+ struct drm_i915_gem_object *obj = vma->obj;
+
+ /* Keep the obj (and hence the vma) alive as _we_ destroy it */
+ if (!kref_get_unless_zero(&obj->base.refcount))
+ continue;
+
+ atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
+ WARN_ON(__i915_vma_unbind(vma));
+ __i915_vma_put(vma);
+
+ i915_gem_object_put(obj);
+ }
+ GEM_BUG_ON(!list_empty(&vm->bound_list));
+ mutex_unlock(&vm->mutex);
+}
+
+void i915_address_space_fini(struct i915_address_space *vm)
+{
+ spin_lock(&vm->free_pages.lock);
+ if (pagevec_count(&vm->free_pages.pvec))
+ vm_free_pages_release(vm, true);
+ GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec));
+ spin_unlock(&vm->free_pages.lock);
+
+ drm_mm_takedown(&vm->mm);
+
+ mutex_destroy(&vm->mutex);
+}
+
+static void __i915_vm_release(struct work_struct *work)
+{
+ struct i915_address_space *vm =
+ container_of(work, struct i915_address_space, rcu.work);
+
+ vm->cleanup(vm);
+ i915_address_space_fini(vm);
+
+ kfree(vm);
+}
+
+void i915_vm_release(struct kref *kref)
+{
+ struct i915_address_space *vm =
+ container_of(kref, struct i915_address_space, ref);
+
+ GEM_BUG_ON(i915_is_ggtt(vm));
+ trace_i915_ppgtt_release(vm);
+
+ queue_rcu_work(vm->i915->wq, &vm->rcu);
+}
+
+void i915_address_space_init(struct i915_address_space *vm, int subclass)
+{
+ kref_init(&vm->ref);
+ INIT_RCU_WORK(&vm->rcu, __i915_vm_release);
+ atomic_set(&vm->open, 1);
+
+ /*
+ * The vm->mutex must be reclaim safe (for use in the shrinker).
+ * Do a dummy acquire now under fs_reclaim so that any allocation
+ * attempt holding the lock is immediately reported by lockdep.
+ */
+ mutex_init(&vm->mutex);
+ lockdep_set_subclass(&vm->mutex, subclass);
+ i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
+
+ GEM_BUG_ON(!vm->total);
+ drm_mm_init(&vm->mm, 0, vm->total);
+ vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
+
+ stash_init(&vm->free_pages);
+
+ INIT_LIST_HEAD(&vm->bound_list);
+}
+
+void clear_pages(struct i915_vma *vma)
+{
+ GEM_BUG_ON(!vma->pages);
+
+ if (vma->pages != vma->obj->mm.pages) {
+ sg_free_table(vma->pages);
+ kfree(vma->pages);
+ }
+ vma->pages = NULL;
+
+ memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
+}
+
+static int __setup_page_dma(struct i915_address_space *vm,
+ struct i915_page_dma *p,
+ gfp_t gfp)
+{
+ p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL);
+ if (unlikely(!p->page))
+ return -ENOMEM;
+
+ p->daddr = dma_map_page_attrs(vm->dma,
+ p->page, 0, PAGE_SIZE,
+ PCI_DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC |
+ DMA_ATTR_NO_WARN);
+ if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
+ vm_free_page(vm, p->page);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p)
+{
+ return __setup_page_dma(vm, p, __GFP_HIGHMEM);
+}
+
+void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p)
+{
+ dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ vm_free_page(vm, p->page);
+}
+
+void
+fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count)
+{
+ kunmap_atomic(memset64(kmap_atomic(p->page), val, count));
+}
+
+int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
+{
+ unsigned long size;
+
+ /*
+ * In order to utilize 64K pages for an object with a size < 2M, we will
+ * need to support a 64K scratch page, given that every 16th entry for a
+ * page-table operating in 64K mode must point to a properly aligned 64K
+ * region, including any PTEs which happen to point to scratch.
+ *
+ * This is only relevant for the 48b PPGTT where we support
+ * huge-gtt-pages, see also i915_vma_insert(). However, as we share the
+ * scratch (read-only) between all vm, we create one 64k scratch page
+ * for all.
+ */
+ size = I915_GTT_PAGE_SIZE_4K;
+ if (i915_vm_is_4lvl(vm) &&
+ HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
+ size = I915_GTT_PAGE_SIZE_64K;
+ gfp |= __GFP_NOWARN;
+ }
+ gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL;
+
+ do {
+ unsigned int order = get_order(size);
+ struct page *page;
+ dma_addr_t addr;
+
+ page = alloc_pages(gfp, order);
+ if (unlikely(!page))
+ goto skip;
+
+ addr = dma_map_page_attrs(vm->dma,
+ page, 0, size,
+ PCI_DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC |
+ DMA_ATTR_NO_WARN);
+ if (unlikely(dma_mapping_error(vm->dma, addr)))
+ goto free_page;
+
+ if (unlikely(!IS_ALIGNED(addr, size)))
+ goto unmap_page;
+
+ vm->scratch[0].base.page = page;
+ vm->scratch[0].base.daddr = addr;
+ vm->scratch_order = order;
+ return 0;
+
+unmap_page:
+ dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL);
+free_page:
+ __free_pages(page, order);
+skip:
+ if (size == I915_GTT_PAGE_SIZE_4K)
+ return -ENOMEM;
+
+ size = I915_GTT_PAGE_SIZE_4K;
+ gfp &= ~__GFP_NOWARN;
+ } while (1);
+}
+
+void cleanup_scratch_page(struct i915_address_space *vm)
+{
+ struct i915_page_dma *p = px_base(&vm->scratch[0]);
+ unsigned int order = vm->scratch_order;
+
+ dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT,
+ PCI_DMA_BIDIRECTIONAL);
+ __free_pages(p->page, order);
+}
+
+void free_scratch(struct i915_address_space *vm)
+{
+ int i;
+
+ if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */
+ return;
+
+ for (i = 1; i <= vm->top; i++) {
+ if (!px_dma(&vm->scratch[i]))
+ break;
+ cleanup_page_dma(vm, px_base(&vm->scratch[i]));
+ }
+
+ cleanup_scratch_page(vm);
+}
+
+void gtt_write_workarounds(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+
+ /*
+ * This function is for gtt related workarounds. This function is
+ * called on driver load and after a GPU reset, so you can place
+ * workarounds here even if they get overwritten by GPU reset.
+ */
+ /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
+ if (IS_BROADWELL(i915))
+ intel_uncore_write(uncore,
+ GEN8_L3_LRA_1_GPGPU,
+ GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
+ else if (IS_CHERRYVIEW(i915))
+ intel_uncore_write(uncore,
+ GEN8_L3_LRA_1_GPGPU,
+ GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
+ else if (IS_GEN9_LP(i915))
+ intel_uncore_write(uncore,
+ GEN8_L3_LRA_1_GPGPU,
+ GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
+ else if (INTEL_GEN(i915) >= 9 && INTEL_GEN(i915) <= 11)
+ intel_uncore_write(uncore,
+ GEN8_L3_LRA_1_GPGPU,
+ GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
+
+ /*
+ * To support 64K PTEs we need to first enable the use of the
+ * Intermediate-Page-Size(IPS) bit of the PDE field via some magical
+ * mmio, otherwise the page-walker will simply ignore the IPS bit. This
+ * shouldn't be needed after GEN10.
+ *
+ * 64K pages were first introduced from BDW+, although technically they
+ * only *work* from gen9+. For pre-BDW we instead have the option for
+ * 32K pages, but we don't currently have any support for it in our
+ * driver.
+ */
+ if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) &&
+ INTEL_GEN(i915) <= 10)
+ intel_uncore_rmw(uncore,
+ GEN8_GAMW_ECO_DEV_RW_IA,
+ 0,
+ GAMW_ECO_ENABLE_64K_IPS_FIELD);
+
+ if (IS_GEN_RANGE(i915, 8, 11)) {
+ bool can_use_gtt_cache = true;
+
+ /*
+ * According to the BSpec if we use 2M/1G pages then we also
+ * need to disable the GTT cache. At least on BDW we can see
+ * visual corruption when using 2M pages, and not disabling the
+ * GTT cache.
+ */
+ if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M))
+ can_use_gtt_cache = false;
+
+ /* WaGttCachingOffByDefault */
+ intel_uncore_write(uncore,
+ HSW_GTT_CACHE_EN,
+ can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
+ WARN_ON_ONCE(can_use_gtt_cache &&
+ intel_uncore_read(uncore,
+ HSW_GTT_CACHE_EN) == 0);
+ }
+}
+
+u64 gen8_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW;
+
+ if (unlikely(flags & PTE_READ_ONLY))
+ pte &= ~_PAGE_RW;
+
+ switch (level) {
+ case I915_CACHE_NONE:
+ pte |= PPAT_UNCACHED;
+ break;
+ case I915_CACHE_WT:
+ pte |= PPAT_DISPLAY_ELLC;
+ break;
+ default:
+ pte |= PPAT_CACHED;
+ break;
+ }
+
+ return pte;
+}
+
+static void tgl_setup_private_ppat(struct intel_uncore *uncore)
+{
+ /* TGL doesn't support LLC or AGE settings */
+ intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB);
+ intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC);
+ intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT);
+ intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC);
+ intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB);
+ intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB);
+ intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB);
+ intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB);
+}
+
+static void cnl_setup_private_ppat(struct intel_uncore *uncore)
+{
+ intel_uncore_write(uncore,
+ GEN10_PAT_INDEX(0),
+ GEN8_PPAT_WB | GEN8_PPAT_LLC);
+ intel_uncore_write(uncore,
+ GEN10_PAT_INDEX(1),
+ GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
+ intel_uncore_write(uncore,
+ GEN10_PAT_INDEX(2),
+ GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
+ intel_uncore_write(uncore,
+ GEN10_PAT_INDEX(3),
+ GEN8_PPAT_UC);
+ intel_uncore_write(uncore,
+ GEN10_PAT_INDEX(4),
+ GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
+ intel_uncore_write(uncore,
+ GEN10_PAT_INDEX(5),
+ GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
+ intel_uncore_write(uncore,
+ GEN10_PAT_INDEX(6),
+ GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
+ intel_uncore_write(uncore,
+ GEN10_PAT_INDEX(7),
+ GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
+}
+
+/*
+ * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
+ * bits. When using advanced contexts each context stores its own PAT, but
+ * writing this data shouldn't be harmful even in those cases.
+ */
+static void bdw_setup_private_ppat(struct intel_uncore *uncore)
+{
+ u64 pat;
+
+ pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */
+ GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
+ GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
+ GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */
+ GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
+ GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
+ GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
+ GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
+
+ intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
+ intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
+}
+
+static void chv_setup_private_ppat(struct intel_uncore *uncore)
+{
+ u64 pat;
+
+ /*
+ * Map WB on BDW to snooped on CHV.
+ *
+ * Only the snoop bit has meaning for CHV, the rest is
+ * ignored.
+ *
+ * The hardware will never snoop for certain types of accesses:
+ * - CPU GTT (GMADR->GGTT->no snoop->memory)
+ * - PPGTT page tables
+ * - some other special cycles
+ *
+ * As with BDW, we also need to consider the following for GT accesses:
+ * "For GGTT, there is NO pat_sel[2:0] from the entry,
+ * so RTL will always use the value corresponding to
+ * pat_sel = 000".
+ * Which means we must set the snoop bit in PAT entry 0
+ * in order to keep the global status page working.
+ */
+
+ pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
+ GEN8_PPAT(1, 0) |
+ GEN8_PPAT(2, 0) |
+ GEN8_PPAT(3, 0) |
+ GEN8_PPAT(4, CHV_PPAT_SNOOP) |
+ GEN8_PPAT(5, CHV_PPAT_SNOOP) |
+ GEN8_PPAT(6, CHV_PPAT_SNOOP) |
+ GEN8_PPAT(7, CHV_PPAT_SNOOP);
+
+ intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
+ intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
+}
+
+void setup_private_pat(struct intel_uncore *uncore)
+{
+ struct drm_i915_private *i915 = uncore->i915;
+
+ GEM_BUG_ON(INTEL_GEN(i915) < 8);
+
+ if (INTEL_GEN(i915) >= 12)
+ tgl_setup_private_ppat(uncore);
+ else if (INTEL_GEN(i915) >= 10)
+ cnl_setup_private_ppat(uncore);
+ else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915))
+ chv_setup_private_ppat(uncore);
+ else
+ bdw_setup_private_ppat(uncore);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/mock_gtt.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
new file mode 100644
index 000000000000..7da7681c20b1
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -0,0 +1,587 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ *
+ * Please try to maintain the following order within this file unless it makes
+ * sense to do otherwise. From top to bottom:
+ * 1. typedefs
+ * 2. #defines, and macros
+ * 3. structure definitions
+ * 4. function prototypes
+ *
+ * Within each section, please try to order by generation in ascending order,
+ * from top to bottom (ie. gen6 on the top, gen8 on the bottom).
+ */
+
+#ifndef __INTEL_GTT_H__
+#define __INTEL_GTT_H__
+
+#include <linux/io-mapping.h>
+#include <linux/kref.h>
+#include <linux/mm.h>
+#include <linux/pagevec.h>
+#include <linux/scatterlist.h>
+#include <linux/workqueue.h>
+
+#include <drm/drm_mm.h>
+
+#include "gt/intel_reset.h"
+#include "i915_gem_fence_reg.h"
+#include "i915_selftest.h"
+#include "i915_vma_types.h"
+
+#define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
+
+#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT)
+#define DBG(...) trace_printk(__VA_ARGS__)
+#else
+#define DBG(...)
+#endif
+
+#define NALLOC 3 /* 1 normal, 1 for concurrent threads, 1 for preallocation */
+
+#define I915_GTT_PAGE_SIZE_4K BIT_ULL(12)
+#define I915_GTT_PAGE_SIZE_64K BIT_ULL(16)
+#define I915_GTT_PAGE_SIZE_2M BIT_ULL(21)
+
+#define I915_GTT_PAGE_SIZE I915_GTT_PAGE_SIZE_4K
+#define I915_GTT_MAX_PAGE_SIZE I915_GTT_PAGE_SIZE_2M
+
+#define I915_GTT_PAGE_MASK -I915_GTT_PAGE_SIZE
+
+#define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE
+
+#define I915_FENCE_REG_NONE -1
+#define I915_MAX_NUM_FENCES 32
+/* 32 fences + sign bit for FENCE_REG_NONE */
+#define I915_MAX_NUM_FENCE_BITS 6
+
+typedef u32 gen6_pte_t;
+typedef u64 gen8_pte_t;
+
+#define ggtt_total_entries(ggtt) ((ggtt)->vm.total >> PAGE_SHIFT)
+
+#define I915_PTES(pte_len) ((unsigned int)(PAGE_SIZE / (pte_len)))
+#define I915_PTE_MASK(pte_len) (I915_PTES(pte_len) - 1)
+#define I915_PDES 512
+#define I915_PDE_MASK (I915_PDES - 1)
+
+/* gen6-hsw has bit 11-4 for physical addr bit 39-32 */
+#define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0))
+#define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr)
+#define GEN6_PDE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr)
+#define GEN6_PTE_CACHE_LLC (2 << 1)
+#define GEN6_PTE_UNCACHED (1 << 1)
+#define GEN6_PTE_VALID REG_BIT(0)
+
+#define GEN6_PTES I915_PTES(sizeof(gen6_pte_t))
+#define GEN6_PD_SIZE (I915_PDES * PAGE_SIZE)
+#define GEN6_PD_ALIGN (PAGE_SIZE * 16)
+#define GEN6_PDE_SHIFT 22
+#define GEN6_PDE_VALID REG_BIT(0)
+#define NUM_PTE(pde_shift) (1 << (pde_shift - PAGE_SHIFT))
+
+#define GEN7_PTE_CACHE_L3_LLC (3 << 1)
+
+#define BYT_PTE_SNOOPED_BY_CPU_CACHES REG_BIT(2)
+#define BYT_PTE_WRITEABLE REG_BIT(1)
+
+/*
+ * Cacheability Control is a 4-bit value. The low three bits are stored in bits
+ * 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
+ */
+#define HSW_CACHEABILITY_CONTROL(bits) ((((bits) & 0x7) << 1) | \
+ (((bits) & 0x8) << (11 - 3)))
+#define HSW_WB_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x2)
+#define HSW_WB_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x3)
+#define HSW_WB_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x8)
+#define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb)
+#define HSW_WT_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x7)
+#define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6)
+#define HSW_PTE_UNCACHED (0)
+#define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0))
+#define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr)
+
+/*
+ * GEN8 32b style address is defined as a 3 level page table:
+ * 31:30 | 29:21 | 20:12 | 11:0
+ * PDPE | PDE | PTE | offset
+ * The difference as compared to normal x86 3 level page table is the PDPEs are
+ * programmed via register.
+ *
+ * GEN8 48b style address is defined as a 4 level page table:
+ * 47:39 | 38:30 | 29:21 | 20:12 | 11:0
+ * PML4E | PDPE | PDE | PTE | offset
+ */
+#define GEN8_3LVL_PDPES 4
+
+#define PPAT_UNCACHED (_PAGE_PWT | _PAGE_PCD)
+#define PPAT_CACHED_PDE 0 /* WB LLC */
+#define PPAT_CACHED _PAGE_PAT /* WB LLCeLLC */
+#define PPAT_DISPLAY_ELLC _PAGE_PCD /* WT eLLC */
+
+#define CHV_PPAT_SNOOP REG_BIT(6)
+#define GEN8_PPAT_AGE(x) ((x)<<4)
+#define GEN8_PPAT_LLCeLLC (3<<2)
+#define GEN8_PPAT_LLCELLC (2<<2)
+#define GEN8_PPAT_LLC (1<<2)
+#define GEN8_PPAT_WB (3<<0)
+#define GEN8_PPAT_WT (2<<0)
+#define GEN8_PPAT_WC (1<<0)
+#define GEN8_PPAT_UC (0<<0)
+#define GEN8_PPAT_ELLC_OVERRIDE (0<<2)
+#define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8))
+
+#define GEN8_PDE_IPS_64K BIT(11)
+#define GEN8_PDE_PS_2M BIT(7)
+
+#define for_each_sgt_daddr(__dp, __iter, __sgt) \
+ __for_each_sgt_daddr(__dp, __iter, __sgt, I915_GTT_PAGE_SIZE)
+
+struct i915_page_dma {
+ struct page *page;
+ union {
+ dma_addr_t daddr;
+
+ /*
+ * For gen6/gen7 only. This is the offset in the GGTT
+ * where the page directory entries for PPGTT begin
+ */
+ u32 ggtt_offset;
+ };
+};
+
+struct i915_page_scratch {
+ struct i915_page_dma base;
+ u64 encode;
+};
+
+struct i915_page_table {
+ struct i915_page_dma base;
+ atomic_t used;
+};
+
+struct i915_page_directory {
+ struct i915_page_table pt;
+ spinlock_t lock;
+ void *entry[512];
+};
+
+#define __px_choose_expr(x, type, expr, other) \
+ __builtin_choose_expr( \
+ __builtin_types_compatible_p(typeof(x), type) || \
+ __builtin_types_compatible_p(typeof(x), const type), \
+ ({ type __x = (type)(x); expr; }), \
+ other)
+
+#define px_base(px) \
+ __px_choose_expr(px, struct i915_page_dma *, __x, \
+ __px_choose_expr(px, struct i915_page_scratch *, &__x->base, \
+ __px_choose_expr(px, struct i915_page_table *, &__x->base, \
+ __px_choose_expr(px, struct i915_page_directory *, &__x->pt.base, \
+ (void)0))))
+#define px_dma(px) (px_base(px)->daddr)
+
+#define px_pt(px) \
+ __px_choose_expr(px, struct i915_page_table *, __x, \
+ __px_choose_expr(px, struct i915_page_directory *, &__x->pt, \
+ (void)0))
+#define px_used(px) (&px_pt(px)->used)
+
+enum i915_cache_level;
+
+struct drm_i915_file_private;
+struct drm_i915_gem_object;
+struct i915_vma;
+struct intel_gt;
+
+struct i915_vma_ops {
+ /* Map an object into an address space with the given cache flags. */
+ int (*bind_vma)(struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags);
+ /*
+ * Unmap an object from an address space. This usually consists of
+ * setting the valid PTE entries to a reserved scratch page.
+ */
+ void (*unbind_vma)(struct i915_vma *vma);
+
+ int (*set_pages)(struct i915_vma *vma);
+ void (*clear_pages)(struct i915_vma *vma);
+};
+
+struct pagestash {
+ spinlock_t lock;
+ struct pagevec pvec;
+};
+
+void stash_init(struct pagestash *stash);
+
+struct i915_address_space {
+ struct kref ref;
+ struct rcu_work rcu;
+
+ struct drm_mm mm;
+ struct intel_gt *gt;
+ struct drm_i915_private *i915;
+ struct device *dma;
+ /*
+ * Every address space belongs to a struct file - except for the global
+ * GTT that is owned by the driver (and so @file is set to NULL). In
+ * principle, no information should leak from one context to another
+ * (or between files/processes etc) unless explicitly shared by the
+ * owner. Tracking the owner is important in order to free up per-file
+ * objects along with the file, to aide resource tracking, and to
+ * assign blame.
+ */
+ struct drm_i915_file_private *file;
+ u64 total; /* size addr space maps (ex. 2GB for ggtt) */
+ u64 reserved; /* size addr space reserved */
+
+ unsigned int bind_async_flags;
+
+ /*
+ * Each active user context has its own address space (in full-ppgtt).
+ * Since the vm may be shared between multiple contexts, we count how
+ * many contexts keep us "open". Once open hits zero, we are closed
+ * and do not allow any new attachments, and proceed to shutdown our
+ * vma and page directories.
+ */
+ atomic_t open;
+
+ struct mutex mutex; /* protects vma and our lists */
+#define VM_CLASS_GGTT 0
+#define VM_CLASS_PPGTT 1
+
+ struct i915_page_scratch scratch[4];
+ unsigned int scratch_order;
+ unsigned int top;
+
+ /**
+ * List of vma currently bound.
+ */
+ struct list_head bound_list;
+
+ struct pagestash free_pages;
+
+ /* Global GTT */
+ bool is_ggtt:1;
+
+ /* Some systems require uncached updates of the page directories */
+ bool pt_kmap_wc:1;
+
+ /* Some systems support read-only mappings for GGTT and/or PPGTT */
+ bool has_read_only:1;
+
+ u64 (*pte_encode)(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags); /* Create a valid PTE */
+#define PTE_READ_ONLY BIT(0)
+
+ int (*allocate_va_range)(struct i915_address_space *vm,
+ u64 start, u64 length);
+ void (*clear_range)(struct i915_address_space *vm,
+ u64 start, u64 length);
+ void (*insert_page)(struct i915_address_space *vm,
+ dma_addr_t addr,
+ u64 offset,
+ enum i915_cache_level cache_level,
+ u32 flags);
+ void (*insert_entries)(struct i915_address_space *vm,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags);
+ void (*cleanup)(struct i915_address_space *vm);
+
+ struct i915_vma_ops vma_ops;
+
+ I915_SELFTEST_DECLARE(struct fault_attr fault_attr);
+ I915_SELFTEST_DECLARE(bool scrub_64K);
+};
+
+/*
+ * The Graphics Translation Table is the way in which GEN hardware translates a
+ * Graphics Virtual Address into a Physical Address. In addition to the normal
+ * collateral associated with any va->pa translations GEN hardware also has a
+ * portion of the GTT which can be mapped by the CPU and remain both coherent
+ * and correct (in cases like swizzling). That region is referred to as GMADR in
+ * the spec.
+ */
+struct i915_ggtt {
+ struct i915_address_space vm;
+
+ struct io_mapping iomap; /* Mapping to our CPU mappable region */
+ struct resource gmadr; /* GMADR resource */
+ resource_size_t mappable_end; /* End offset that we can CPU map */
+
+ /** "Graphics Stolen Memory" holds the global PTEs */
+ void __iomem *gsm;
+ void (*invalidate)(struct i915_ggtt *ggtt);
+
+ /** PPGTT used for aliasing the PPGTT with the GTT */
+ struct i915_ppgtt *alias;
+
+ bool do_idle_maps;
+
+ int mtrr;
+
+ /** Bit 6 swizzling required for X tiling */
+ u32 bit_6_swizzle_x;
+ /** Bit 6 swizzling required for Y tiling */
+ u32 bit_6_swizzle_y;
+
+ u32 pin_bias;
+
+ unsigned int num_fences;
+ struct i915_fence_reg fence_regs[I915_MAX_NUM_FENCES];
+ struct list_head fence_list;
+
+ /**
+ * List of all objects in gtt_space, currently mmaped by userspace.
+ * All objects within this list must also be on bound_list.
+ */
+ struct list_head userfault_list;
+
+ /* Manual runtime pm autosuspend delay for user GGTT mmaps */
+ struct intel_wakeref_auto userfault_wakeref;
+
+ struct mutex error_mutex;
+ struct drm_mm_node error_capture;
+ struct drm_mm_node uc_fw;
+};
+
+struct i915_ppgtt {
+ struct i915_address_space vm;
+
+ struct i915_page_directory *pd;
+};
+
+#define i915_is_ggtt(vm) ((vm)->is_ggtt)
+
+static inline bool
+i915_vm_is_4lvl(const struct i915_address_space *vm)
+{
+ return (vm->total - 1) >> 32;
+}
+
+static inline bool
+i915_vm_has_scratch_64K(struct i915_address_space *vm)
+{
+ return vm->scratch_order == get_order(I915_GTT_PAGE_SIZE_64K);
+}
+
+static inline bool
+i915_vm_has_cache_coloring(struct i915_address_space *vm)
+{
+ return i915_is_ggtt(vm) && vm->mm.color_adjust;
+}
+
+static inline struct i915_ggtt *
+i915_vm_to_ggtt(struct i915_address_space *vm)
+{
+ BUILD_BUG_ON(offsetof(struct i915_ggtt, vm));
+ GEM_BUG_ON(!i915_is_ggtt(vm));
+ return container_of(vm, struct i915_ggtt, vm);
+}
+
+static inline struct i915_ppgtt *
+i915_vm_to_ppgtt(struct i915_address_space *vm)
+{
+ BUILD_BUG_ON(offsetof(struct i915_ppgtt, vm));
+ GEM_BUG_ON(i915_is_ggtt(vm));
+ return container_of(vm, struct i915_ppgtt, vm);
+}
+
+static inline struct i915_address_space *
+i915_vm_get(struct i915_address_space *vm)
+{
+ kref_get(&vm->ref);
+ return vm;
+}
+
+void i915_vm_release(struct kref *kref);
+
+static inline void i915_vm_put(struct i915_address_space *vm)
+{
+ kref_put(&vm->ref, i915_vm_release);
+}
+
+static inline struct i915_address_space *
+i915_vm_open(struct i915_address_space *vm)
+{
+ GEM_BUG_ON(!atomic_read(&vm->open));
+ atomic_inc(&vm->open);
+ return i915_vm_get(vm);
+}
+
+static inline bool
+i915_vm_tryopen(struct i915_address_space *vm)
+{
+ if (atomic_add_unless(&vm->open, 1, 0))
+ return i915_vm_get(vm);
+
+ return false;
+}
+
+void __i915_vm_close(struct i915_address_space *vm);
+
+static inline void
+i915_vm_close(struct i915_address_space *vm)
+{
+ GEM_BUG_ON(!atomic_read(&vm->open));
+ if (atomic_dec_and_test(&vm->open))
+ __i915_vm_close(vm);
+
+ i915_vm_put(vm);
+}
+
+void i915_address_space_init(struct i915_address_space *vm, int subclass);
+void i915_address_space_fini(struct i915_address_space *vm);
+
+static inline u32 i915_pte_index(u64 address, unsigned int pde_shift)
+{
+ const u32 mask = NUM_PTE(pde_shift) - 1;
+
+ return (address >> PAGE_SHIFT) & mask;
+}
+
+/*
+ * Helper to counts the number of PTEs within the given length. This count
+ * does not cross a page table boundary, so the max value would be
+ * GEN6_PTES for GEN6, and GEN8_PTES for GEN8.
+ */
+static inline u32 i915_pte_count(u64 addr, u64 length, unsigned int pde_shift)
+{
+ const u64 mask = ~((1ULL << pde_shift) - 1);
+ u64 end;
+
+ GEM_BUG_ON(length == 0);
+ GEM_BUG_ON(offset_in_page(addr | length));
+
+ end = addr + length;
+
+ if ((addr & mask) != (end & mask))
+ return NUM_PTE(pde_shift) - i915_pte_index(addr, pde_shift);
+
+ return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift);
+}
+
+static inline u32 i915_pde_index(u64 addr, u32 shift)
+{
+ return (addr >> shift) & I915_PDE_MASK;
+}
+
+static inline struct i915_page_table *
+i915_pt_entry(const struct i915_page_directory * const pd,
+ const unsigned short n)
+{
+ return pd->entry[n];
+}
+
+static inline struct i915_page_directory *
+i915_pd_entry(const struct i915_page_directory * const pdp,
+ const unsigned short n)
+{
+ return pdp->entry[n];
+}
+
+static inline dma_addr_t
+i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
+{
+ struct i915_page_dma *pt = ppgtt->pd->entry[n];
+
+ return px_dma(pt ?: px_base(&ppgtt->vm.scratch[ppgtt->vm.top]));
+}
+
+void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt);
+
+int i915_ggtt_probe_hw(struct drm_i915_private *i915);
+int i915_ggtt_init_hw(struct drm_i915_private *i915);
+int i915_ggtt_enable_hw(struct drm_i915_private *i915);
+void i915_ggtt_enable_guc(struct i915_ggtt *ggtt);
+void i915_ggtt_disable_guc(struct i915_ggtt *ggtt);
+int i915_init_ggtt(struct drm_i915_private *i915);
+void i915_ggtt_driver_release(struct drm_i915_private *i915);
+
+static inline bool i915_ggtt_has_aperture(const struct i915_ggtt *ggtt)
+{
+ return ggtt->mappable_end > 0;
+}
+
+int i915_ppgtt_init_hw(struct intel_gt *gt);
+
+struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt);
+
+void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915);
+void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915);
+
+u64 gen8_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags);
+
+int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p);
+void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p);
+
+#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
+
+void
+fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count);
+
+#define fill_px(px, v) fill_page_dma(px_base(px), (v), PAGE_SIZE / sizeof(u64))
+#define fill32_px(px, v) do { \
+ u64 v__ = lower_32_bits(v); \
+ fill_px((px), v__ << 32 | v__); \
+} while (0)
+
+int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp);
+void cleanup_scratch_page(struct i915_address_space *vm);
+void free_scratch(struct i915_address_space *vm);
+
+struct i915_page_table *alloc_pt(struct i915_address_space *vm);
+struct i915_page_directory *alloc_pd(struct i915_address_space *vm);
+struct i915_page_directory *__alloc_pd(size_t sz);
+
+void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd);
+
+#define free_px(vm, px) free_pd(vm, px_base(px))
+
+void
+__set_pd_entry(struct i915_page_directory * const pd,
+ const unsigned short idx,
+ struct i915_page_dma * const to,
+ u64 (*encode)(const dma_addr_t, const enum i915_cache_level));
+
+#define set_pd_entry(pd, idx, to) \
+ __set_pd_entry((pd), (idx), px_base(to), gen8_pde_encode)
+
+void
+clear_pd_entry(struct i915_page_directory * const pd,
+ const unsigned short idx,
+ const struct i915_page_scratch * const scratch);
+
+bool
+release_pd_entry(struct i915_page_directory * const pd,
+ const unsigned short idx,
+ struct i915_page_table * const pt,
+ const struct i915_page_scratch * const scratch);
+void gen6_ggtt_invalidate(struct i915_ggtt *ggtt);
+
+int ggtt_set_pages(struct i915_vma *vma);
+int ppgtt_set_pages(struct i915_vma *vma);
+void clear_pages(struct i915_vma *vma);
+
+void gtt_write_workarounds(struct intel_gt *gt);
+
+void setup_private_pat(struct intel_uncore *uncore);
+
+static inline struct sgt_dma {
+ struct scatterlist *sg;
+ dma_addr_t dma, max;
+} sgt_dma(struct i915_vma *vma) {
+ struct scatterlist *sg = vma->pages->sgl;
+ dma_addr_t addr = sg_dma_address(sg);
+
+ return (struct sgt_dma){ sg, addr, addr + sg->length };
+}
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 75dd0e0367b7..0cf0f6fae675 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -133,12 +133,11 @@
*/
#include <linux/interrupt.h>
-#include "gem/i915_gem_context.h"
-
#include "i915_drv.h"
#include "i915_perf.h"
#include "i915_trace.h"
#include "i915_vgpu.h"
+#include "intel_context.h"
#include "intel_engine_pm.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
@@ -489,17 +488,23 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
return desc;
}
-static u32 *set_offsets(u32 *regs,
+static inline unsigned int dword_in_page(void *addr)
+{
+ return offset_in_page(addr) / sizeof(u32);
+}
+
+static void set_offsets(u32 *regs,
const u8 *data,
- const struct intel_engine_cs *engine)
+ const struct intel_engine_cs *engine,
+ bool clear)
#define NOP(x) (BIT(7) | (x))
-#define LRI(count, flags) ((flags) << 6 | (count))
+#define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
#define POSTED BIT(0)
#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
#define REG16(x) \
(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
(((x) >> 2) & 0x7f)
-#define END() 0
+#define END(x) 0, (x)
{
const u32 base = engine->mmio_base;
@@ -507,7 +512,10 @@ static u32 *set_offsets(u32 *regs,
u8 count, flags;
if (*data & BIT(7)) { /* skip */
- regs += *data++ & ~BIT(7);
+ count = *data++ & ~BIT(7);
+ if (clear)
+ memset32(regs, MI_NOOP, count);
+ regs += count;
continue;
}
@@ -533,12 +541,25 @@ static u32 *set_offsets(u32 *regs,
offset |= v & ~BIT(7);
} while (v & BIT(7));
- *regs = base + (offset << 2);
+ regs[0] = base + (offset << 2);
+ if (clear)
+ regs[1] = 0;
regs += 2;
} while (--count);
}
- return regs;
+ if (clear) {
+ u8 count = *++data;
+
+ /* Clear past the tail for HW access */
+ GEM_BUG_ON(dword_in_page(regs) > count);
+ memset32(regs, MI_NOOP, count - dword_in_page(regs));
+
+ /* Close the batch; used mainly by live_lrc_layout() */
+ *regs = MI_BATCH_BUFFER_END;
+ if (INTEL_GEN(engine->i915) >= 10)
+ *regs |= BIT(0);
+ }
}
static const u8 gen8_xcs_offsets[] = {
@@ -573,7 +594,7 @@ static const u8 gen8_xcs_offsets[] = {
REG16(0x200),
REG(0x028),
- END(),
+ END(80)
};
static const u8 gen9_xcs_offsets[] = {
@@ -657,7 +678,7 @@ static const u8 gen9_xcs_offsets[] = {
REG16(0x67c),
REG(0x068),
- END(),
+ END(176)
};
static const u8 gen12_xcs_offsets[] = {
@@ -689,7 +710,7 @@ static const u8 gen12_xcs_offsets[] = {
REG16(0x274),
REG16(0x270),
- END(),
+ END(80)
};
static const u8 gen8_rcs_offsets[] = {
@@ -726,7 +747,91 @@ static const u8 gen8_rcs_offsets[] = {
LRI(1, 0),
REG(0x0c8),
- END(),
+ END(80)
+};
+
+static const u8 gen9_rcs_offsets[] = {
+ NOP(1),
+ LRI(14, POSTED),
+ REG16(0x244),
+ REG(0x34),
+ REG(0x30),
+ REG(0x38),
+ REG(0x3c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x11c),
+ REG(0x114),
+ REG(0x118),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+
+ NOP(3),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ NOP(13),
+ LRI(1, 0),
+ REG(0xc8),
+
+ NOP(13),
+ LRI(44, POSTED),
+ REG(0x28),
+ REG(0x9c),
+ REG(0xc0),
+ REG(0x178),
+ REG(0x17c),
+ REG16(0x358),
+ REG(0x170),
+ REG(0x150),
+ REG(0x154),
+ REG(0x158),
+ REG16(0x41c),
+ REG16(0x600),
+ REG16(0x604),
+ REG16(0x608),
+ REG16(0x60c),
+ REG16(0x610),
+ REG16(0x614),
+ REG16(0x618),
+ REG16(0x61c),
+ REG16(0x620),
+ REG16(0x624),
+ REG16(0x628),
+ REG16(0x62c),
+ REG16(0x630),
+ REG16(0x634),
+ REG16(0x638),
+ REG16(0x63c),
+ REG16(0x640),
+ REG16(0x644),
+ REG16(0x648),
+ REG16(0x64c),
+ REG16(0x650),
+ REG16(0x654),
+ REG16(0x658),
+ REG16(0x65c),
+ REG16(0x660),
+ REG16(0x664),
+ REG16(0x668),
+ REG16(0x66c),
+ REG16(0x670),
+ REG16(0x674),
+ REG16(0x678),
+ REG16(0x67c),
+ REG(0x68),
+
+ END(176)
};
static const u8 gen11_rcs_offsets[] = {
@@ -767,7 +872,7 @@ static const u8 gen11_rcs_offsets[] = {
LRI(1, 0),
REG(0x0c8),
- END(),
+ END(80)
};
static const u8 gen12_rcs_offsets[] = {
@@ -808,7 +913,7 @@ static const u8 gen12_rcs_offsets[] = {
LRI(1, 0),
REG(0x0c8),
- END(),
+ END(80)
};
#undef END
@@ -833,6 +938,8 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
return gen12_rcs_offsets;
else if (INTEL_GEN(engine->i915) >= 11)
return gen11_rcs_offsets;
+ else if (INTEL_GEN(engine->i915) >= 9)
+ return gen9_rcs_offsets;
else
return gen8_rcs_offsets;
} else {
@@ -880,7 +987,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
list_move(&rq->sched.link, pl);
active = rq;
} else {
- struct intel_engine_cs *owner = rq->hw_context->engine;
+ struct intel_engine_cs *owner = rq->context->engine;
/*
* Decouple the virtual breadcrumb before moving it
@@ -983,6 +1090,58 @@ static void intel_engine_context_out(struct intel_engine_cs *engine)
write_sequnlock_irqrestore(&engine->stats.lock, flags);
}
+static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
+{
+ if (INTEL_GEN(engine->i915) >= 12)
+ return 0x60;
+ else if (INTEL_GEN(engine->i915) >= 9)
+ return 0x54;
+ else if (engine->class == RENDER_CLASS)
+ return 0x58;
+ else
+ return -1;
+}
+
+static void
+execlists_check_context(const struct intel_context *ce,
+ const struct intel_engine_cs *engine)
+{
+ const struct intel_ring *ring = ce->ring;
+ u32 *regs = ce->lrc_reg_state;
+ bool valid = true;
+ int x;
+
+ if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
+ pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
+ engine->name,
+ regs[CTX_RING_START],
+ i915_ggtt_offset(ring->vma));
+ regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
+ valid = false;
+ }
+
+ if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
+ (RING_CTL_SIZE(ring->size) | RING_VALID)) {
+ pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
+ engine->name,
+ regs[CTX_RING_CTL],
+ (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
+ regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
+ valid = false;
+ }
+
+ x = lrc_ring_mi_mode(engine);
+ if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
+ pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
+ engine->name, regs[x + 1]);
+ regs[x + 1] &= ~STOP_RING;
+ regs[x + 1] |= STOP_RING << 16;
+ valid = false;
+ }
+
+ WARN_ONCE(!valid, "Invalid lrc state found before submission\n");
+}
+
static void restore_default_state(struct intel_context *ce,
struct intel_engine_cs *engine)
{
@@ -999,7 +1158,7 @@ static void restore_default_state(struct intel_context *ce,
static void reset_active(struct i915_request *rq,
struct intel_engine_cs *engine)
{
- struct intel_context * const ce = rq->hw_context;
+ struct intel_context * const ce = rq->context;
u32 head;
/*
@@ -1017,8 +1176,8 @@ static void reset_active(struct i915_request *rq,
* remain correctly ordered. And we defer to __i915_request_submit()
* so that all asynchronous waits are correctly handled.
*/
- GEM_TRACE("%s(%s): { rq=%llx:%lld }\n",
- __func__, engine->name, rq->fence.context, rq->fence.seqno);
+ ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",
+ rq->fence.context, rq->fence.seqno);
/* On resubmission of the active request, payload will be scrubbed */
if (i915_request_completed(rq))
@@ -1040,13 +1199,16 @@ static inline struct intel_engine_cs *
__execlists_schedule_in(struct i915_request *rq)
{
struct intel_engine_cs * const engine = rq->engine;
- struct intel_context * const ce = rq->hw_context;
+ struct intel_context * const ce = rq->context;
intel_context_get(ce);
- if (unlikely(i915_gem_context_is_banned(ce->gem_context)))
+ if (unlikely(intel_context_is_banned(ce)))
reset_active(rq, engine);
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ execlists_check_context(ce, engine);
+
if (ce->tag) {
/* Use a fixed tag for OA and friends */
ce->lrc_desc |= (u64)ce->tag << 32;
@@ -1054,12 +1216,12 @@ __execlists_schedule_in(struct i915_request *rq)
/* We don't need a strict matching tag, just different values */
ce->lrc_desc &= ~GENMASK_ULL(47, 37);
ce->lrc_desc |=
- (u64)(engine->context_tag++ % NUM_CONTEXT_TAG) <<
+ (u64)(++engine->context_tag % NUM_CONTEXT_TAG) <<
GEN11_SW_CTX_ID_SHIFT;
BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
}
- intel_gt_pm_get(engine->gt);
+ __intel_gt_pm_get(engine->gt);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
intel_engine_context_in(engine);
@@ -1069,7 +1231,7 @@ __execlists_schedule_in(struct i915_request *rq)
static inline struct i915_request *
execlists_schedule_in(struct i915_request *rq, int idx)
{
- struct intel_context * const ce = rq->hw_context;
+ struct intel_context * const ce = rq->context;
struct intel_engine_cs *old;
GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
@@ -1100,7 +1262,7 @@ static inline void
__execlists_schedule_out(struct i915_request *rq,
struct intel_engine_cs * const engine)
{
- struct intel_context * const ce = rq->hw_context;
+ struct intel_context * const ce = rq->context;
/*
* NB process_csb() is not under the engine->active.lock and hence
@@ -1138,7 +1300,7 @@ __execlists_schedule_out(struct i915_request *rq,
static inline void
execlists_schedule_out(struct i915_request *rq)
{
- struct intel_context * const ce = rq->hw_context;
+ struct intel_context * const ce = rq->context;
struct intel_engine_cs *cur, *old;
trace_i915_request_out(rq);
@@ -1155,7 +1317,7 @@ execlists_schedule_out(struct i915_request *rq)
static u64 execlists_update_context(struct i915_request *rq)
{
- struct intel_context *ce = rq->hw_context;
+ struct intel_context *ce = rq->context;
u64 desc = ce->lrc_desc;
u32 tail;
@@ -1186,17 +1348,8 @@ static u64 execlists_update_context(struct i915_request *rq)
* may not be visible to the HW prior to the completion of the UC
* register write and that we may begin execution from the context
* before its image is complete leading to invalid PD chasing.
- *
- * Furthermore, Braswell, at least, wants a full mb to be sure that
- * the writes are coherent in memory (visible to the GPU) prior to
- * execution, and not just visible to other CPUs (as is the result of
- * wmb).
*/
- mb();
-
- /* Wa_1607138340:tgl */
- if (IS_TGL_REVID(rq->i915, TGL_REVID_A0, TGL_REVID_A0))
- desc |= CTX_DESC_FORCE_RESTORE;
+ wmb();
ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
return desc;
@@ -1224,15 +1377,14 @@ trace_ports(const struct intel_engine_execlists *execlists,
if (!ports[0])
return;
- GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n",
- engine->name, msg,
- ports[0]->fence.context,
- ports[0]->fence.seqno,
- i915_request_completed(ports[0]) ? "!" :
- i915_request_started(ports[0]) ? "*" :
- "",
- ports[1] ? ports[1]->fence.context : 0,
- ports[1] ? ports[1]->fence.seqno : 0);
+ ENGINE_TRACE(engine, "%s { %llx:%lld%s, %llx:%lld }\n", msg,
+ ports[0]->fence.context,
+ ports[0]->fence.seqno,
+ i915_request_completed(ports[0]) ? "!" :
+ i915_request_started(ports[0]) ? "*" :
+ "",
+ ports[1] ? ports[1]->fence.context : 0,
+ ports[1] ? ports[1]->fence.seqno : 0);
}
static __maybe_unused bool
@@ -1256,33 +1408,56 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
}
for (port = execlists->pending; (rq = *port); port++) {
- if (ce == rq->hw_context) {
- GEM_TRACE_ERR("Duplicate context in pending[%zd]\n",
+ unsigned long flags;
+ bool ok = true;
+
+ GEM_BUG_ON(!kref_read(&rq->fence.refcount));
+ GEM_BUG_ON(!i915_request_is_active(rq));
+
+ if (ce == rq->context) {
+ GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n",
+ ce->timeline->fence_context,
port - execlists->pending);
return false;
}
+ ce = rq->context;
- ce = rq->hw_context;
- if (i915_request_completed(rq))
+ /* Hold tightly onto the lock to prevent concurrent retires! */
+ if (!spin_trylock_irqsave(&rq->lock, flags))
continue;
- if (i915_active_is_idle(&ce->active)) {
- GEM_TRACE_ERR("Inactive context in pending[%zd]\n",
+ if (i915_request_completed(rq))
+ goto unlock;
+
+ if (i915_active_is_idle(&ce->active) &&
+ !intel_context_is_barrier(ce)) {
+ GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n",
+ ce->timeline->fence_context,
port - execlists->pending);
- return false;
+ ok = false;
+ goto unlock;
}
if (!i915_vma_is_pinned(ce->state)) {
- GEM_TRACE_ERR("Unpinned context in pending[%zd]\n",
+ GEM_TRACE_ERR("Unpinned context:%llx in pending[%zd]\n",
+ ce->timeline->fence_context,
port - execlists->pending);
- return false;
+ ok = false;
+ goto unlock;
}
if (!i915_vma_is_pinned(ce->ring->vma)) {
- GEM_TRACE_ERR("Unpinned ringbuffer in pending[%zd]\n",
+ GEM_TRACE_ERR("Unpinned ring:%llx in pending[%zd]\n",
+ ce->timeline->fence_context,
port - execlists->pending);
- return false;
+ ok = false;
+ goto unlock;
}
+
+unlock:
+ spin_unlock_irqrestore(&rq->lock, flags);
+ if (!ok)
+ return false;
}
return ce;
@@ -1327,7 +1502,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
static bool ctx_single_port_submission(const struct intel_context *ce)
{
return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
- i915_gem_context_force_single_submission(ce->gem_context));
+ intel_context_force_single_submission(ce));
}
static bool can_merge_ctx(const struct intel_context *prev,
@@ -1359,11 +1534,11 @@ static bool can_merge_rq(const struct i915_request *prev,
if (i915_request_completed(next))
return true;
- if (unlikely((prev->flags ^ next->flags) &
- (I915_REQUEST_NOPREEMPT | I915_REQUEST_SENTINEL)))
+ if (unlikely((prev->fence.flags ^ next->fence.flags) &
+ (I915_FENCE_FLAG_NOPREEMPT | I915_FENCE_FLAG_SENTINEL)))
return false;
- if (!can_merge_ctx(prev->hw_context, next->hw_context))
+ if (!can_merge_ctx(prev->context, next->context))
return false;
return true;
@@ -1372,7 +1547,7 @@ static bool can_merge_rq(const struct i915_request *prev,
static void virtual_update_register_offsets(u32 *regs,
struct intel_engine_cs *engine)
{
- set_offsets(regs, reg_offsets(engine), engine);
+ set_offsets(regs, reg_offsets(engine), engine, false);
}
static bool virtual_matches(const struct virtual_engine *ve,
@@ -1411,7 +1586,7 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
if (!list_empty(&ve->context.signal_link)) {
list_move_tail(&ve->context.signal_link,
&engine->breadcrumbs.signalers);
- intel_engine_queue_breadcrumbs(engine);
+ intel_engine_signal_breadcrumbs(engine);
}
spin_unlock(&old->breadcrumbs.irq_lock);
}
@@ -1519,7 +1694,7 @@ active_timeslice(const struct intel_engine_cs *engine)
{
const struct i915_request *rq = *engine->execlists.active;
- if (i915_request_completed(rq))
+ if (!rq || i915_request_completed(rq))
return 0;
if (engine->execlists.switch_priority_hint < effective_prio(rq))
@@ -1550,7 +1725,7 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine)
return 0;
/* Force a fast reset for terminated contexts (ignoring sysfs!) */
- if (unlikely(i915_gem_context_is_banned(rq->gem_context)))
+ if (unlikely(intel_context_is_banned(rq->context)))
return 1;
return READ_ONCE(engine->props.preempt_timeout_ms);
@@ -1565,6 +1740,11 @@ static void set_preempt_timeout(struct intel_engine_cs *engine)
active_preempt_timeout(engine));
}
+static inline void clear_ports(struct i915_request **ports, int count)
+{
+ memset_p((void **)ports, NULL, count);
+}
+
static void execlists_dequeue(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -1627,12 +1807,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
last = last_active(execlists);
if (last) {
if (need_preempt(engine, last, rb)) {
- GEM_TRACE("%s: preempting last=%llx:%lld, prio=%d, hint=%d\n",
- engine->name,
- last->fence.context,
- last->fence.seqno,
- last->sched.attr.priority,
- execlists->queue_priority_hint);
+ ENGINE_TRACE(engine,
+ "preempting last=%llx:%lld, prio=%d, hint=%d\n",
+ last->fence.context,
+ last->fence.seqno,
+ last->sched.attr.priority,
+ execlists->queue_priority_hint);
record_preemption(execlists);
/*
@@ -1658,16 +1838,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* tendency to ignore us rewinding the TAIL to the
* end of an earlier request.
*/
- last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+ last->context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
last = NULL;
} else if (need_timeslice(engine, last) &&
timer_expired(&engine->execlists.timer)) {
- GEM_TRACE("%s: expired last=%llx:%lld, prio=%d, hint=%d\n",
- engine->name,
- last->fence.context,
- last->fence.seqno,
- last->sched.attr.priority,
- execlists->queue_priority_hint);
+ ENGINE_TRACE(engine,
+ "expired last=%llx:%lld, prio=%d, hint=%d\n",
+ last->fence.context,
+ last->fence.seqno,
+ last->sched.attr.priority,
+ execlists->queue_priority_hint);
ring_set_paused(engine, 1);
defer_active(engine);
@@ -1730,7 +1910,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
GEM_BUG_ON(rq != ve->request);
GEM_BUG_ON(rq->engine != &ve->base);
- GEM_BUG_ON(rq->hw_context != &ve->context);
+ GEM_BUG_ON(rq->context != &ve->context);
if (rq_prio(rq) >= queue_prio(execlists)) {
if (!virtual_matches(ve, rq, engine)) {
@@ -1744,14 +1924,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
return; /* leave this for another */
}
- GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n",
- engine->name,
- rq->fence.context,
- rq->fence.seqno,
- i915_request_completed(rq) ? "!" :
- i915_request_started(rq) ? "*" :
- "",
- yesno(engine != ve->siblings[0]));
+ ENGINE_TRACE(engine,
+ "virtual rq=%llx:%lld%s, new engine? %s\n",
+ rq->fence.context,
+ rq->fence.seqno,
+ i915_request_completed(rq) ? "!" :
+ i915_request_started(rq) ? "*" :
+ "",
+ yesno(engine != ve->siblings[0]));
ve->request = NULL;
ve->base.execlists.queue_priority_hint = INT_MIN;
@@ -1849,7 +2029,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* same LRCA, i.e. we must submit 2 different
* contexts if we submit 2 ELSP.
*/
- if (last->hw_context == rq->hw_context)
+ if (last->context == rq->context)
goto done;
if (i915_request_has_sentinel(last))
@@ -1862,8 +2042,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* the same context (even though a different
* request) to the second port.
*/
- if (ctx_single_port_submission(last->hw_context) ||
- ctx_single_port_submission(rq->hw_context))
+ if (ctx_single_port_submission(last->context) ||
+ ctx_single_port_submission(rq->context))
goto done;
merge = false;
@@ -1877,8 +2057,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
}
GEM_BUG_ON(last &&
- !can_merge_ctx(last->hw_context,
- rq->hw_context));
+ !can_merge_ctx(last->context,
+ rq->context));
submit = true;
last = rq;
@@ -1907,9 +2087,6 @@ done:
* interrupt for secondary ports).
*/
execlists->queue_priority_hint = queue_prio(execlists);
- GEM_TRACE("%s: queue_priority_hint:%d, submit:%s\n",
- engine->name, execlists->queue_priority_hint,
- yesno(submit));
if (submit) {
*port = execlists_schedule_in(last, port - execlists->pending);
@@ -1928,10 +2105,9 @@ done:
goto skip_submit;
}
+ clear_ports(port + 1, last_port - port);
- memset(port + 1, 0, (last_port - port) * sizeof(*port));
execlists_submit_ports(engine);
-
set_preempt_timeout(engine);
} else {
skip_submit:
@@ -1946,13 +2122,14 @@ cancel_port_requests(struct intel_engine_execlists * const execlists)
for (port = execlists->pending; *port; port++)
execlists_schedule_out(*port);
- memset(execlists->pending, 0, sizeof(execlists->pending));
+ clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
/* Mark the end of active before we overwrite *active */
for (port = xchg(&execlists->active, execlists->pending); *port; port++)
execlists_schedule_out(*port);
- WRITE_ONCE(execlists->active,
- memset(execlists->inflight, 0, sizeof(execlists->inflight)));
+ clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
+
+ WRITE_ONCE(execlists->active, execlists->inflight);
}
static inline void
@@ -2058,7 +2235,7 @@ static void process_csb(struct intel_engine_cs *engine)
*/
head = execlists->csb_head;
tail = READ_ONCE(*execlists->csb_write);
- GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
+ ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
if (unlikely(head == tail))
return;
@@ -2096,9 +2273,8 @@ static void process_csb(struct intel_engine_cs *engine)
* status notifier.
*/
- GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x\n",
- engine->name, head,
- buf[2 * head + 0], buf[2 * head + 1]);
+ ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
+ head, buf[2 * head + 0], buf[2 * head + 1]);
if (INTEL_GEN(engine->i915) >= 12)
promote = gen12_csb_parse(execlists, buf + 2 * head);
@@ -2109,7 +2285,6 @@ static void process_csb(struct intel_engine_cs *engine)
/* Point active to the new ELSP; prevent overwriting */
WRITE_ONCE(execlists->active, execlists->pending);
- set_timeslice(engine);
if (!inject_preempt_hang(execlists))
ring_set_paused(engine, 0);
@@ -2150,6 +2325,7 @@ static void process_csb(struct intel_engine_cs *engine)
} while (head != tail);
execlists->csb_head = head;
+ set_timeslice(engine);
/*
* Gen11 has proven to fail wrt global observation point between
@@ -2189,10 +2365,9 @@ static noinline void preempt_reset(struct intel_engine_cs *engine)
/* Mark this tasklet as disabled to avoid waiting for it to complete */
tasklet_disable_nosync(&engine->execlists.tasklet);
- GEM_TRACE("%s: preempt timeout %lu+%ums\n",
- engine->name,
- READ_ONCE(engine->props.preempt_timeout_ms),
- jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
+ ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n",
+ READ_ONCE(engine->props.preempt_timeout_ms),
+ jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
intel_engine_reset(engine, "preemption time out");
tasklet_enable(&engine->execlists.tasklet);
@@ -2333,7 +2508,7 @@ set_redzone(void *vaddr, const struct intel_engine_cs *engine)
vaddr += engine->context_size;
- memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE);
+ memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
}
static void
@@ -2344,7 +2519,7 @@ check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
vaddr += engine->context_size;
- if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE))
+ if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
dev_err_once(engine->i915->drm.dev,
"%s context redzone overwritten!\n",
engine->name);
@@ -2369,7 +2544,7 @@ __execlists_update_reg_state(const struct intel_context *ce,
GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
- regs[CTX_RING_BUFFER_START] = i915_ggtt_offset(ring->vma);
+ regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
regs[CTX_RING_HEAD] = ring->head;
regs[CTX_RING_TAIL] = ring->tail;
@@ -2387,33 +2562,21 @@ __execlists_context_pin(struct intel_context *ce,
struct intel_engine_cs *engine)
{
void *vaddr;
- int ret;
GEM_BUG_ON(!ce->state);
-
- ret = intel_context_active_acquire(ce);
- if (ret)
- goto err;
GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
vaddr = i915_gem_object_pin_map(ce->state->obj,
i915_coherent_map_type(engine->i915) |
I915_MAP_OVERRIDE);
- if (IS_ERR(vaddr)) {
- ret = PTR_ERR(vaddr);
- goto unpin_active;
- }
+ if (IS_ERR(vaddr))
+ return PTR_ERR(vaddr);
- ce->lrc_desc = lrc_descriptor(ce, engine);
+ ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
__execlists_update_reg_state(ce, engine);
return 0;
-
-unpin_active:
- intel_context_active_release(ce);
-err:
- return ret;
}
static int execlists_context_pin(struct intel_context *ce)
@@ -2428,6 +2591,9 @@ static int execlists_context_alloc(struct intel_context *ce)
static void execlists_context_reset(struct intel_context *ce)
{
+ CE_TRACE(ce, "reset\n");
+ GEM_BUG_ON(!intel_context_is_pinned(ce));
+
/*
* Because we emit WA_TAIL_DWORDS there may be a disparity
* between our bookkeeping in ce->ring->head and ce->ring->tail and
@@ -2444,8 +2610,14 @@ static void execlists_context_reset(struct intel_context *ce)
* So to avoid that we reset the context images upon resume. For
* simplicity, we just zero everything out.
*/
- intel_ring_reset(ce->ring, 0);
+ intel_ring_reset(ce->ring, ce->ring->emit);
+
+ /* Scrub away the garbage */
+ execlists_init_reg_state(ce->lrc_reg_state,
+ ce, ce->engine, ce->ring, true);
__execlists_update_reg_state(ce, ce->engine);
+
+ ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
}
static const struct intel_context_ops execlists_context_ops = {
@@ -2497,7 +2669,7 @@ static int execlists_request_alloc(struct i915_request *request)
{
int ret;
- GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
+ GEM_BUG_ON(!intel_context_is_pinned(request->context));
/*
* Flush enough space to reduce the likelihood of waiting after
@@ -2664,6 +2836,14 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
batch = gen8_emit_flush_coherentl3_wa(engine, batch);
+ /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
+ batch = gen8_emit_pipe_control(batch,
+ PIPE_CONTROL_FLUSH_L3 |
+ PIPE_CONTROL_STORE_DATA_INDEX |
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_QW_WRITE,
+ LRC_PPHWSP_SCRATCH_ADDR);
+
batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
/* WaMediaPoolStateCmdInWABB:bxt,glk */
@@ -2859,6 +3039,8 @@ static void enable_execlists(struct intel_engine_cs *engine)
RING_HWS_PGA,
i915_ggtt_offset(engine->status_page.vma));
ENGINE_POSTING_READ(engine, RING_HWS_PGA);
+
+ engine->context_tag = 0;
}
static bool unexpected_starting_state(struct intel_engine_cs *engine)
@@ -2898,8 +3080,8 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
struct intel_engine_execlists * const execlists = &engine->execlists;
unsigned long flags;
- GEM_TRACE("%s: depth<-%d\n", engine->name,
- atomic_read(&execlists->tasklet.count));
+ ENGINE_TRACE(engine, "depth<-%d\n",
+ atomic_read(&execlists->tasklet.count));
/*
* Prevent request submission to the hardware until we have
@@ -2952,26 +3134,20 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
WRITE_ONCE(*execlists->csb_write, reset_value);
wmb(); /* Make sure this is visible to HW (paranoia?) */
+ /*
+ * Sometimes Icelake forgets to reset its pointers on a GPU reset.
+ * Bludgeon them with a mmio update to be sure.
+ */
+ ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
+ reset_value << 8 | reset_value);
+ ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
+
invalidate_csb_entries(&execlists->csb_status[0],
&execlists->csb_status[reset_value]);
}
-static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
+static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
{
- if (INTEL_GEN(engine->i915) >= 12)
- return 0x60;
- else if (INTEL_GEN(engine->i915) >= 9)
- return 0x54;
- else if (engine->class == RENDER_CLASS)
- return 0x58;
- else
- return -1;
-}
-
-static void __execlists_reset_reg_state(const struct intel_context *ce,
- const struct intel_engine_cs *engine)
-{
- u32 *regs = ce->lrc_reg_state;
int x;
x = lrc_ring_mi_mode(engine);
@@ -2981,6 +3157,14 @@ static void __execlists_reset_reg_state(const struct intel_context *ce,
}
}
+static void __execlists_reset_reg_state(const struct intel_context *ce,
+ const struct intel_engine_cs *engine)
+{
+ u32 *regs = ce->lrc_reg_state;
+
+ __reset_stop_ring(regs, engine);
+}
+
static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -3008,7 +3192,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
/* We still have requests in-flight; the engine should be active */
GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
- ce = rq->hw_context;
+ ce = rq->context;
GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
if (i915_request_completed(rq)) {
@@ -3065,8 +3249,8 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
restore_default_state(ce, engine);
out_replay:
- GEM_TRACE("%s replay {head:%04x, tail:%04x}\n",
- engine->name, ce->ring->head, ce->ring->tail);
+ ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
+ ce->ring->head, ce->ring->tail);
intel_ring_update_space(ce->ring);
__execlists_reset_reg_state(ce, engine);
__execlists_update_reg_state(ce, engine);
@@ -3078,11 +3262,11 @@ unwind:
__unwind_incomplete_requests(engine);
}
-static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
+static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
{
unsigned long flags;
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
spin_lock_irqsave(&engine->active.lock, flags);
@@ -3096,14 +3280,14 @@ static void nop_submission_tasklet(unsigned long data)
/* The driver is wedged; don't process any more events. */
}
-static void execlists_cancel_requests(struct intel_engine_cs *engine)
+static void execlists_reset_cancel(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
struct i915_request *rq, *rn;
struct rb_node *rb;
unsigned long flags;
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
/*
* Before we call engine->cancel_requests(), we should have exclusive
@@ -3190,13 +3374,13 @@ static void execlists_reset_finish(struct intel_engine_cs *engine)
if (__tasklet_enable(&execlists->tasklet))
/* And kick in case we missed a new request submission. */
tasklet_hi_schedule(&execlists->tasklet);
- GEM_TRACE("%s: depth->%d\n", engine->name,
- atomic_read(&execlists->tasklet.count));
+ ENGINE_TRACE(engine, "depth->%d\n",
+ atomic_read(&execlists->tasklet.count));
}
-static int gen8_emit_bb_start(struct i915_request *rq,
- u64 offset, u32 len,
- const unsigned int flags)
+static int gen8_emit_bb_start_noarb(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags)
{
u32 *cs;
@@ -3230,7 +3414,7 @@ static int gen8_emit_bb_start(struct i915_request *rq,
return 0;
}
-static int gen9_emit_bb_start(struct i915_request *rq,
+static int gen8_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags)
{
@@ -3685,12 +3869,12 @@ static void execlists_park(struct intel_engine_cs *engine)
void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
{
engine->submit_request = execlists_submit_request;
- engine->cancel_requests = execlists_cancel_requests;
engine->schedule = i915_schedule;
engine->execlists.tasklet.func = execlists_submission_tasklet;
engine->reset.prepare = execlists_reset_prepare;
- engine->reset.reset = execlists_reset;
+ engine->reset.rewind = execlists_reset_rewind;
+ engine->reset.cancel = execlists_reset_cancel;
engine->reset.finish = execlists_reset_finish;
engine->park = execlists_park;
@@ -3705,13 +3889,27 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
if (INTEL_GEN(engine->i915) >= 12)
engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
+
+ if (intel_engine_has_preemption(engine))
+ engine->emit_bb_start = gen8_emit_bb_start;
+ else
+ engine->emit_bb_start = gen8_emit_bb_start_noarb;
+}
+
+static void execlists_shutdown(struct intel_engine_cs *engine)
+{
+ /* Synchronise with residual timers and any softirq they raise */
+ del_timer_sync(&engine->execlists.timer);
+ del_timer_sync(&engine->execlists.preempt);
+ tasklet_kill(&engine->execlists.tasklet);
}
-static void execlists_destroy(struct intel_engine_cs *engine)
+static void execlists_release(struct intel_engine_cs *engine)
{
+ execlists_shutdown(engine);
+
intel_engine_cleanup_common(engine);
lrc_destroy_wa_ctx(engine);
- kfree(engine);
}
static void
@@ -3719,13 +3917,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
{
/* Default vfuncs which can be overriden by each engine. */
- engine->destroy = execlists_destroy;
engine->resume = execlists_resume;
- engine->reset.prepare = execlists_reset_prepare;
- engine->reset.reset = execlists_reset;
- engine->reset.finish = execlists_reset_finish;
-
engine->cops = &execlists_context_ops;
engine->request_alloc = execlists_request_alloc;
@@ -3748,10 +3941,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
* until a more refined solution exists.
*/
}
- if (IS_GEN(engine->i915, 8))
- engine->emit_bb_start = gen8_emit_bb_start;
- else
- engine->emit_bb_start = gen9_emit_bb_start;
}
static inline void
@@ -3795,6 +3984,11 @@ static void rcs_submission_override(struct intel_engine_cs *engine)
int intel_execlists_submission_setup(struct intel_engine_cs *engine)
{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct drm_i915_private *i915 = engine->i915;
+ struct intel_uncore *uncore = engine->uncore;
+ u32 base = engine->mmio_base;
+
tasklet_init(&engine->execlists.tasklet,
execlists_submission_tasklet, (unsigned long)engine);
timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
@@ -3806,21 +4000,6 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
if (engine->class == RENDER_CLASS)
rcs_submission_override(engine);
- return 0;
-}
-
-int intel_execlists_submission_init(struct intel_engine_cs *engine)
-{
- struct intel_engine_execlists * const execlists = &engine->execlists;
- struct drm_i915_private *i915 = engine->i915;
- struct intel_uncore *uncore = engine->uncore;
- u32 base = engine->mmio_base;
- int ret;
-
- ret = intel_engine_init_common(engine);
- if (ret)
- return ret;
-
if (intel_init_workaround_bb(engine))
/*
* We continue even if we fail to initialize WA batch
@@ -3852,6 +4031,9 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
reset_csb_pointers(engine);
+ /* Finally, take ownership and responsibility for cleanup! */
+ engine->release = execlists_release;
+
return 0;
}
@@ -3891,18 +4073,21 @@ static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
static void init_common_reg_state(u32 * const regs,
const struct intel_engine_cs *engine,
- const struct intel_ring *ring)
+ const struct intel_ring *ring,
+ bool inhibit)
{
- regs[CTX_CONTEXT_CONTROL] =
- _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
- _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
+ u32 ctl;
+
+ ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
+ ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+ if (inhibit)
+ ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
if (INTEL_GEN(engine->i915) < 11)
- regs[CTX_CONTEXT_CONTROL] |=
- _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
- CTX_CTRL_RS_CTX_ENABLE);
+ ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
+ CTX_CTRL_RS_CTX_ENABLE);
+ regs[CTX_CONTEXT_CONTROL] = ctl;
- regs[CTX_RING_BUFFER_CONTROL] = RING_CTL_SIZE(ring->size) | RING_VALID;
- regs[CTX_BB_STATE] = RING_BB_PPGTT;
+ regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
}
static void init_wa_bb_reg_state(u32 * const regs,
@@ -3958,7 +4143,7 @@ static void execlists_init_reg_state(u32 *regs,
const struct intel_context *ce,
const struct intel_engine_cs *engine,
const struct intel_ring *ring,
- bool close)
+ bool inhibit)
{
/*
* A context is actually a big batch buffer with several
@@ -3970,21 +4155,17 @@ static void execlists_init_reg_state(u32 *regs,
*
* Must keep consistent with virtual_update_register_offsets().
*/
- u32 *bbe = set_offsets(regs, reg_offsets(engine), engine);
+ set_offsets(regs, reg_offsets(engine), engine, inhibit);
- if (close) { /* Close the batch; used mainly by live_lrc_layout() */
- *bbe = MI_BATCH_BUFFER_END;
- if (INTEL_GEN(engine->i915) >= 10)
- *bbe |= BIT(0);
- }
-
- init_common_reg_state(regs, engine, ring);
+ init_common_reg_state(regs, engine, ring, inhibit);
init_ppgtt_reg_state(regs, vm_alias(ce->vm));
init_wa_bb_reg_state(regs, engine,
INTEL_GEN(engine->i915) >= 12 ?
GEN12_CTX_BB_PER_CTX_PTR :
CTX_BB_PER_CTX_PTR);
+
+ __reset_stop_ring(regs, engine);
}
static int
@@ -3995,7 +4176,6 @@ populate_lr_context(struct intel_context *ce,
{
bool inhibit = true;
void *vaddr;
- u32 *regs;
int ret;
vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
@@ -4019,16 +4199,14 @@ populate_lr_context(struct intel_context *ce,
memcpy(vaddr, defaults, engine->context_size);
i915_gem_object_unpin_map(engine->default_state);
+ __set_bit(CONTEXT_VALID_BIT, &ce->flags);
inhibit = false;
}
/* The second page of the context object contains some fields which must
* be set up prior to the first execution. */
- regs = vaddr + LRC_STATE_PN * PAGE_SIZE;
- execlists_init_reg_state(regs, ce, engine, ring, inhibit);
- if (inhibit)
- regs[CTX_CONTEXT_CONTROL] |=
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+ execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
+ ce, engine, ring, inhibit);
ret = 0;
err_unpin_ctx:
@@ -4166,6 +4344,13 @@ static void virtual_engine_initial_hint(struct virtual_engine *ve)
ve->siblings[0]);
}
+static int virtual_context_alloc(struct intel_context *ce)
+{
+ struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+
+ return __execlists_context_alloc(ce, ve->siblings[0]);
+}
+
static int virtual_context_pin(struct intel_context *ce)
{
struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
@@ -4203,6 +4388,8 @@ static void virtual_context_exit(struct intel_context *ce)
}
static const struct intel_context_ops virtual_context_ops = {
+ .alloc = virtual_context_alloc,
+
.pin = virtual_context_pin,
.unpin = execlists_context_unpin,
@@ -4229,10 +4416,9 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
mask = ve->siblings[0]->mask;
}
- GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n",
- ve->base.name,
- rq->fence.context, rq->fence.seqno,
- mask, ve->base.execlists.queue_priority_hint);
+ ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
+ rq->fence.context, rq->fence.seqno,
+ mask, ve->base.execlists.queue_priority_hint);
return mask;
}
@@ -4323,10 +4509,9 @@ static void virtual_submit_request(struct i915_request *rq)
struct i915_request *old;
unsigned long flags;
- GEM_TRACE("%s: rq=%llx:%lld\n",
- ve->base.name,
- rq->fence.context,
- rq->fence.seqno);
+ ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
+ rq->fence.context,
+ rq->fence.seqno);
GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
@@ -4394,8 +4579,7 @@ virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
}
struct intel_context *
-intel_execlists_create_virtual(struct i915_gem_context *ctx,
- struct intel_engine_cs **siblings,
+intel_execlists_create_virtual(struct intel_engine_cs **siblings,
unsigned int count)
{
struct virtual_engine *ve;
@@ -4406,19 +4590,21 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
return ERR_PTR(-EINVAL);
if (count == 1)
- return intel_context_create(ctx, siblings[0]);
+ return intel_context_create(siblings[0]);
ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
if (!ve)
return ERR_PTR(-ENOMEM);
- ve->base.i915 = ctx->i915;
+ ve->base.i915 = siblings[0]->i915;
ve->base.gt = siblings[0]->gt;
ve->base.uncore = siblings[0]->uncore;
ve->base.id = -1;
+
ve->base.class = OTHER_CLASS;
ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
+ ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
/*
* The decision on whether to submit a request using semaphores
@@ -4439,7 +4625,6 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
intel_engine_init_breadcrumbs(&ve->base);
-
intel_engine_init_execlists(&ve->base);
ve->base.cops = &virtual_context_ops;
@@ -4455,7 +4640,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
virtual_submission_tasklet,
(unsigned long)ve);
- intel_context_init(&ve->context, ctx, &ve->base);
+ intel_context_init(&ve->context, &ve->base);
for (n = 0; n < count; n++) {
struct intel_engine_cs *sibling = siblings[n];
@@ -4522,12 +4707,6 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
- err = __execlists_context_alloc(&ve->context, siblings[0]);
- if (err)
- goto err_put;
-
- __set_bit(CONTEXT_ALLOC_BIT, &ve->context.flags);
-
return &ve->context;
err_put:
@@ -4536,14 +4715,12 @@ err_put:
}
struct intel_context *
-intel_execlists_clone_virtual(struct i915_gem_context *ctx,
- struct intel_engine_cs *src)
+intel_execlists_clone_virtual(struct intel_engine_cs *src)
{
struct virtual_engine *se = to_virtual_engine(src);
struct intel_context *dst;
- dst = intel_execlists_create_virtual(ctx,
- se->siblings,
+ dst = intel_execlists_create_virtual(se->siblings,
se->num_siblings);
if (IS_ERR(dst))
return dst;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index 04511d8ebdc1..dfbc214e14f5 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -83,7 +83,6 @@ enum {
void intel_logical_ring_cleanup(struct intel_engine_cs *engine);
int intel_execlists_submission_setup(struct intel_engine_cs *engine);
-int intel_execlists_submission_init(struct intel_engine_cs *engine);
/* Logical Ring Contexts */
/* At the start of the context image is its per-process HWS page */
@@ -111,13 +110,11 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
unsigned int max);
struct intel_context *
-intel_execlists_create_virtual(struct i915_gem_context *ctx,
- struct intel_engine_cs **siblings,
+intel_execlists_create_virtual(struct intel_engine_cs **siblings,
unsigned int count);
struct intel_context *
-intel_execlists_clone_virtual(struct i915_gem_context *ctx,
- struct intel_engine_cs *src);
+intel_execlists_clone_virtual(struct intel_engine_cs *src);
int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
const struct intel_engine_cs *master,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index 06ab0276e10e..08a3be65f700 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -13,8 +13,8 @@
#define CTX_CONTEXT_CONTROL (0x02 + 1)
#define CTX_RING_HEAD (0x04 + 1)
#define CTX_RING_TAIL (0x06 + 1)
-#define CTX_RING_BUFFER_START (0x08 + 1)
-#define CTX_RING_BUFFER_CONTROL (0x0a + 1)
+#define CTX_RING_START (0x08 + 1)
+#define CTX_RING_CTL (0x0a + 1)
#define CTX_BB_STATE (0x10 + 1)
#define CTX_BB_PER_CTX_PTR (0x18 + 1)
#define CTX_PDP3_UDW (0x24 + 1)
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 2b977991b785..eeef90b55c64 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -127,7 +127,7 @@ struct drm_i915_mocs_table {
LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \
L3_3_WB)
-static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
+static const struct drm_i915_mocs_entry skl_mocs_table[] = {
GEN9_MOCS_ENTRIES,
MOCS_ENTRY(I915_MOCS_CACHED,
LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3),
@@ -233,7 +233,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
L3_1_UC)
-static const struct drm_i915_mocs_entry tigerlake_mocs_table[] = {
+static const struct drm_i915_mocs_entry tgl_mocs_table[] = {
/* Base - Error (Reserved for Non-Use) */
MOCS_ENTRY(0, 0x0, 0x0),
/* Base - Reserved */
@@ -267,7 +267,7 @@ static const struct drm_i915_mocs_entry tigerlake_mocs_table[] = {
L3_3_WB),
};
-static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
+static const struct drm_i915_mocs_entry icl_mocs_table[] = {
/* Base - Uncached (Deprecated) */
MOCS_ENTRY(I915_MOCS_UNCACHED,
LE_1_UC | LE_TC_1_LLC,
@@ -283,65 +283,42 @@ static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
static bool get_mocs_settings(const struct drm_i915_private *i915,
struct drm_i915_mocs_table *table)
{
- bool result = false;
-
if (INTEL_GEN(i915) >= 12) {
- table->size = ARRAY_SIZE(tigerlake_mocs_table);
- table->table = tigerlake_mocs_table;
+ table->size = ARRAY_SIZE(tgl_mocs_table);
+ table->table = tgl_mocs_table;
table->n_entries = GEN11_NUM_MOCS_ENTRIES;
- result = true;
} else if (IS_GEN(i915, 11)) {
- table->size = ARRAY_SIZE(icelake_mocs_table);
- table->table = icelake_mocs_table;
+ table->size = ARRAY_SIZE(icl_mocs_table);
+ table->table = icl_mocs_table;
table->n_entries = GEN11_NUM_MOCS_ENTRIES;
- result = true;
} else if (IS_GEN9_BC(i915) || IS_CANNONLAKE(i915)) {
- table->size = ARRAY_SIZE(skylake_mocs_table);
+ table->size = ARRAY_SIZE(skl_mocs_table);
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
- table->table = skylake_mocs_table;
- result = true;
+ table->table = skl_mocs_table;
} else if (IS_GEN9_LP(i915)) {
table->size = ARRAY_SIZE(broxton_mocs_table);
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
table->table = broxton_mocs_table;
- result = true;
} else {
WARN_ONCE(INTEL_GEN(i915) >= 9,
"Platform that should have a MOCS table does not.\n");
+ return false;
}
+ if (GEM_DEBUG_WARN_ON(table->size > table->n_entries))
+ return false;
+
/* WaDisableSkipCaching:skl,bxt,kbl,glk */
if (IS_GEN(i915, 9)) {
int i;
for (i = 0; i < table->size; i++)
- if (WARN_ON(table->table[i].l3cc_value &
- (L3_ESC(1) | L3_SCC(0x7))))
+ if (GEM_DEBUG_WARN_ON(table->table[i].l3cc_value &
+ (L3_ESC(1) | L3_SCC(0x7))))
return false;
}
- return result;
-}
-
-static i915_reg_t mocs_register(const struct intel_engine_cs *engine, int index)
-{
- switch (engine->id) {
- case RCS0:
- return GEN9_GFX_MOCS(index);
- case VCS0:
- return GEN9_MFX0_MOCS(index);
- case BCS0:
- return GEN9_BLT_MOCS(index);
- case VECS0:
- return GEN9_VEBOX_MOCS(index);
- case VCS1:
- return GEN9_MFX1_MOCS(index);
- case VCS2:
- return GEN11_MFX2_MOCS(index);
- default:
- MISSING_CASE(engine->id);
- return INVALID_MMIO_REG;
- }
+ return true;
}
/*
@@ -351,29 +328,47 @@ static i915_reg_t mocs_register(const struct intel_engine_cs *engine, int index)
static u32 get_entry_control(const struct drm_i915_mocs_table *table,
unsigned int index)
{
- if (table->table[index].used)
+ if (index < table->size && table->table[index].used)
return table->table[index].control_value;
return table->table[I915_MOCS_PTE].control_value;
}
-static void init_mocs_table(struct intel_engine_cs *engine,
- const struct drm_i915_mocs_table *table)
+#define for_each_mocs(mocs, t, i) \
+ for (i = 0; \
+ i < (t)->n_entries ? (mocs = get_entry_control((t), i)), 1 : 0;\
+ i++)
+
+static void __init_mocs_table(struct intel_uncore *uncore,
+ const struct drm_i915_mocs_table *table,
+ u32 addr)
{
- struct intel_uncore *uncore = engine->uncore;
- u32 unused_value = table->table[I915_MOCS_PTE].control_value;
unsigned int i;
+ u32 mocs;
+
+ for_each_mocs(mocs, table, i)
+ intel_uncore_write_fw(uncore, _MMIO(addr + i * 4), mocs);
+}
- for (i = 0; i < table->size; i++)
- intel_uncore_write_fw(uncore,
- mocs_register(engine, i),
- get_entry_control(table, i));
+static u32 mocs_offset(const struct intel_engine_cs *engine)
+{
+ static const u32 offset[] = {
+ [RCS0] = __GEN9_RCS0_MOCS0,
+ [VCS0] = __GEN9_VCS0_MOCS0,
+ [VCS1] = __GEN9_VCS1_MOCS0,
+ [VECS0] = __GEN9_VECS0_MOCS0,
+ [BCS0] = __GEN9_BCS0_MOCS0,
+ [VCS2] = __GEN11_VCS2_MOCS0,
+ };
+
+ GEM_BUG_ON(engine->id >= ARRAY_SIZE(offset));
+ return offset[engine->id];
+}
- /* All remaining entries are unused */
- for (; i < table->n_entries; i++)
- intel_uncore_write_fw(uncore,
- mocs_register(engine, i),
- unused_value);
+static void init_mocs_table(struct intel_engine_cs *engine,
+ const struct drm_i915_mocs_table *table)
+{
+ __init_mocs_table(engine->uncore, table, mocs_offset(engine));
}
/*
@@ -383,51 +378,34 @@ static void init_mocs_table(struct intel_engine_cs *engine,
static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table,
unsigned int index)
{
- if (table->table[index].used)
+ if (index < table->size && table->table[index].used)
return table->table[index].l3cc_value;
return table->table[I915_MOCS_PTE].l3cc_value;
}
-static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
- u16 low,
- u16 high)
+static inline u32 l3cc_combine(u16 low, u16 high)
{
return low | (u32)high << 16;
}
+#define for_each_l3cc(l3cc, t, i) \
+ for (i = 0; \
+ i < ((t)->n_entries + 1) / 2 ? \
+ (l3cc = l3cc_combine(get_entry_l3cc((t), 2 * i), \
+ get_entry_l3cc((t), 2 * i + 1))), 1 : \
+ 0; \
+ i++)
+
static void init_l3cc_table(struct intel_engine_cs *engine,
const struct drm_i915_mocs_table *table)
{
struct intel_uncore *uncore = engine->uncore;
- u16 unused_value = table->table[I915_MOCS_PTE].l3cc_value;
unsigned int i;
+ u32 l3cc;
- for (i = 0; i < table->size / 2; i++) {
- u16 low = get_entry_l3cc(table, 2 * i);
- u16 high = get_entry_l3cc(table, 2 * i + 1);
-
- intel_uncore_write(uncore,
- GEN9_LNCFCMOCS(i),
- l3cc_combine(table, low, high));
- }
-
- /* Odd table size - 1 left over */
- if (table->size & 1) {
- u16 low = get_entry_l3cc(table, 2 * i);
-
- intel_uncore_write(uncore,
- GEN9_LNCFCMOCS(i),
- l3cc_combine(table, low, unused_value));
- i++;
- }
-
- /* All remaining entries are also unused */
- for (; i < table->n_entries / 2; i++)
- intel_uncore_write(uncore,
- GEN9_LNCFCMOCS(i),
- l3cc_combine(table, unused_value,
- unused_value));
+ for_each_l3cc(l3cc, table, i)
+ intel_uncore_write_fw(uncore, GEN9_LNCFCMOCS(i), l3cc);
}
void intel_mocs_init_engine(struct intel_engine_cs *engine)
@@ -448,11 +426,14 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
init_l3cc_table(engine, &table);
}
-static void intel_mocs_init_global(struct intel_gt *gt)
+static u32 global_mocs_offset(void)
+{
+ return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0));
+}
+
+static void init_global_mocs(struct intel_gt *gt)
{
- struct intel_uncore *uncore = gt->uncore;
struct drm_i915_mocs_table table;
- unsigned int index;
/*
* LLC and eDRAM control values are not applicable to dgfx
@@ -460,32 +441,18 @@ static void intel_mocs_init_global(struct intel_gt *gt)
if (IS_DGFX(gt->i915))
return;
- GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915));
-
if (!get_mocs_settings(gt->i915, &table))
return;
- if (GEM_DEBUG_WARN_ON(table.size > table.n_entries))
- return;
-
- for (index = 0; index < table.size; index++)
- intel_uncore_write(uncore,
- GEN12_GLOBAL_MOCS(index),
- table.table[index].control_value);
-
- /*
- * Ok, now set the unused entries to the invalid entry (index 0). These
- * entries are officially undefined and no contract for the contents and
- * settings is given for these entries.
- */
- for (; index < table.n_entries; index++)
- intel_uncore_write(uncore,
- GEN12_GLOBAL_MOCS(index),
- table.table[0].control_value);
+ __init_mocs_table(gt->uncore, &table, global_mocs_offset());
}
void intel_mocs_init(struct intel_gt *gt)
{
if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915))
- intel_mocs_init_global(gt);
+ init_global_mocs(gt);
}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_mocs.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
new file mode 100644
index 000000000000..f86f7e68ce5e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/slab.h>
+
+#include "i915_trace.h"
+#include "intel_gtt.h"
+#include "gen6_ppgtt.h"
+#include "gen8_ppgtt.h"
+
+struct i915_page_table *alloc_pt(struct i915_address_space *vm)
+{
+ struct i915_page_table *pt;
+
+ pt = kmalloc(sizeof(*pt), I915_GFP_ALLOW_FAIL);
+ if (unlikely(!pt))
+ return ERR_PTR(-ENOMEM);
+
+ if (unlikely(setup_page_dma(vm, &pt->base))) {
+ kfree(pt);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ atomic_set(&pt->used, 0);
+ return pt;
+}
+
+struct i915_page_directory *__alloc_pd(size_t sz)
+{
+ struct i915_page_directory *pd;
+
+ pd = kzalloc(sz, I915_GFP_ALLOW_FAIL);
+ if (unlikely(!pd))
+ return NULL;
+
+ spin_lock_init(&pd->lock);
+ return pd;
+}
+
+struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
+{
+ struct i915_page_directory *pd;
+
+ pd = __alloc_pd(sizeof(*pd));
+ if (unlikely(!pd))
+ return ERR_PTR(-ENOMEM);
+
+ if (unlikely(setup_page_dma(vm, px_base(pd)))) {
+ kfree(pd);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return pd;
+}
+
+void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd)
+{
+ cleanup_page_dma(vm, pd);
+ kfree(pd);
+}
+
+static inline void
+write_dma_entry(struct i915_page_dma * const pdma,
+ const unsigned short idx,
+ const u64 encoded_entry)
+{
+ u64 * const vaddr = kmap_atomic(pdma->page);
+
+ vaddr[idx] = encoded_entry;
+ kunmap_atomic(vaddr);
+}
+
+void
+__set_pd_entry(struct i915_page_directory * const pd,
+ const unsigned short idx,
+ struct i915_page_dma * const to,
+ u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
+{
+ /* Each thread pre-pins the pd, and we may have a thread per pde. */
+ GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * ARRAY_SIZE(pd->entry));
+
+ atomic_inc(px_used(pd));
+ pd->entry[idx] = to;
+ write_dma_entry(px_base(pd), idx, encode(to->daddr, I915_CACHE_LLC));
+}
+
+void
+clear_pd_entry(struct i915_page_directory * const pd,
+ const unsigned short idx,
+ const struct i915_page_scratch * const scratch)
+{
+ GEM_BUG_ON(atomic_read(px_used(pd)) == 0);
+
+ write_dma_entry(px_base(pd), idx, scratch->encode);
+ pd->entry[idx] = NULL;
+ atomic_dec(px_used(pd));
+}
+
+bool
+release_pd_entry(struct i915_page_directory * const pd,
+ const unsigned short idx,
+ struct i915_page_table * const pt,
+ const struct i915_page_scratch * const scratch)
+{
+ bool free = false;
+
+ if (atomic_add_unless(&pt->used, -1, 1))
+ return false;
+
+ spin_lock(&pd->lock);
+ if (atomic_dec_and_test(&pt->used)) {
+ clear_pd_entry(pd, idx, scratch);
+ free = true;
+ }
+ spin_unlock(&pd->lock);
+
+ return free;
+}
+
+int i915_ppgtt_init_hw(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+
+ gtt_write_workarounds(gt);
+
+ if (IS_GEN(i915, 6))
+ gen6_ppgtt_enable(gt);
+ else if (IS_GEN(i915, 7))
+ gen7_ppgtt_enable(gt);
+
+ return 0;
+}
+
+static struct i915_ppgtt *
+__ppgtt_create(struct intel_gt *gt)
+{
+ if (INTEL_GEN(gt->i915) < 8)
+ return gen6_ppgtt_create(gt);
+ else
+ return gen8_ppgtt_create(gt);
+}
+
+struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt)
+{
+ struct i915_ppgtt *ppgtt;
+
+ ppgtt = __ppgtt_create(gt);
+ if (IS_ERR(ppgtt))
+ return ppgtt;
+
+ trace_i915_ppgtt_create(&ppgtt->vm);
+
+ return ppgtt;
+}
+
+static int ppgtt_bind_vma(struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags)
+{
+ u32 pte_flags;
+ int err;
+
+ if (flags & I915_VMA_ALLOC) {
+ err = vma->vm->allocate_va_range(vma->vm,
+ vma->node.start, vma->size);
+ if (err)
+ return err;
+
+ set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
+ }
+
+ /* Applicable to VLV, and gen8+ */
+ pte_flags = 0;
+ if (i915_gem_object_is_readonly(vma->obj))
+ pte_flags |= PTE_READ_ONLY;
+
+ GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)));
+ vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
+ wmb();
+
+ return 0;
+}
+
+static void ppgtt_unbind_vma(struct i915_vma *vma)
+{
+ if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)))
+ vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
+}
+
+int ppgtt_set_pages(struct i915_vma *vma)
+{
+ GEM_BUG_ON(vma->pages);
+
+ vma->pages = vma->obj->mm.pages;
+
+ vma->page_sizes = vma->obj->mm.page_sizes;
+
+ return 0;
+}
+
+void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+
+ ppgtt->vm.gt = gt;
+ ppgtt->vm.i915 = i915;
+ ppgtt->vm.dma = &i915->drm.pdev->dev;
+ ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size);
+
+ i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
+
+ ppgtt->vm.vma_ops.bind_vma = ppgtt_bind_vma;
+ ppgtt->vm.vma_ops.unbind_vma = ppgtt_unbind_vma;
+ ppgtt->vm.vma_ops.set_pages = ppgtt_set_pages;
+ ppgtt->vm.vma_ops.clear_pages = clear_pages;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index 700104b90163..9e303c29d6e3 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -88,21 +88,18 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
* do not want the enable hysteresis to less than the wakeup latency.
*
* igt/gem_exec_nop/sequential provides a rough estimate for the
- * service latency, and puts it around 10us for Broadwell (and other
- * big core) and around 40us for Broxton (and other low power cores).
- * [Note that for legacy ringbuffer submission, this is less than 1us!]
- * However, the wakeup latency on Broxton is closer to 100us. To be
- * conservative, we have to factor in a context switch on top (due
- * to ksoftirqd).
+ * service latency, and puts it under 10us for Icelake, similar to
+ * Broadwell+, To be conservative, we want to factor in a context
+ * switch on top (due to ksoftirqd).
*/
- set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
- set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
+ set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60);
+ set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60);
/* 3a: Enable RC6 */
- set(uncore, GEN6_RC_CONTROL,
- GEN6_RC_CTL_HW_ENABLE |
- GEN6_RC_CTL_RC6_ENABLE |
- GEN6_RC_CTL_EI_MODE(1));
+ rc6->ctl_enable =
+ GEN6_RC_CTL_HW_ENABLE |
+ GEN6_RC_CTL_RC6_ENABLE |
+ GEN6_RC_CTL_EI_MODE(1);
set(uncore, GEN9_PG_ENABLE,
GEN9_RENDER_PG_ENABLE |
@@ -173,10 +170,10 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6)
else
rc6_mode = GEN6_RC_CTL_EI_MODE(1);
- set(uncore, GEN6_RC_CONTROL,
- GEN6_RC_CTL_HW_ENABLE |
- GEN6_RC_CTL_RC6_ENABLE |
- rc6_mode);
+ rc6->ctl_enable =
+ GEN6_RC_CTL_HW_ENABLE |
+ GEN6_RC_CTL_RC6_ENABLE |
+ rc6_mode;
/*
* WaRsDisableCoarsePowerGating:skl,cnl
@@ -203,10 +200,10 @@ static void gen8_rc6_enable(struct intel_rc6 *rc6)
set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
/* 3: Enable RC6 */
- set(uncore, GEN6_RC_CONTROL,
+ rc6->ctl_enable =
GEN6_RC_CTL_HW_ENABLE |
GEN7_RC_CTL_TO_MODE |
- GEN6_RC_CTL_RC6_ENABLE);
+ GEN6_RC_CTL_RC6_ENABLE;
}
static void gen6_rc6_enable(struct intel_rc6 *rc6)
@@ -242,10 +239,10 @@ static void gen6_rc6_enable(struct intel_rc6 *rc6)
rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
if (HAS_RC6pp(i915))
rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
- set(uncore, GEN6_RC_CONTROL,
+ rc6->ctl_enable =
rc6_mask |
GEN6_RC_CTL_EI_MODE(1) |
- GEN6_RC_CTL_HW_ENABLE);
+ GEN6_RC_CTL_HW_ENABLE;
rc6vids = 0;
ret = sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS,
@@ -363,7 +360,7 @@ static void chv_rc6_enable(struct intel_rc6 *rc6)
VLV_RENDER_RC6_COUNT_EN));
/* 3: Enable RC6 */
- set(uncore, GEN6_RC_CONTROL, GEN7_RC_CTL_TO_MODE);
+ rc6->ctl_enable = GEN7_RC_CTL_TO_MODE;
}
static void vlv_rc6_enable(struct intel_rc6 *rc6)
@@ -389,8 +386,8 @@ static void vlv_rc6_enable(struct intel_rc6 *rc6)
VLV_MEDIA_RC6_COUNT_EN |
VLV_RENDER_RC6_COUNT_EN));
- set(uncore, GEN6_RC_CONTROL,
- GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
+ rc6->ctl_enable =
+ GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
}
static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
@@ -491,64 +488,19 @@ static void rpm_put(struct intel_rc6 *rc6)
rc6->wakeref = false;
}
-static bool intel_rc6_ctx_corrupted(struct intel_rc6 *rc6)
-{
- return !intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO);
-}
-
-static void intel_rc6_ctx_wa_init(struct intel_rc6 *rc6)
+static bool pctx_corrupted(struct intel_rc6 *rc6)
{
struct drm_i915_private *i915 = rc6_to_i915(rc6);
if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
- return;
-
- if (intel_rc6_ctx_corrupted(rc6)) {
- DRM_INFO("RC6 context corrupted, disabling runtime power management\n");
- rc6->ctx_corrupted = true;
- }
-}
-
-/**
- * intel_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA
- * @rc6: rc6 state
- *
- * Perform any steps needed to re-init the RC6 CTX WA after system resume.
- */
-void intel_rc6_ctx_wa_resume(struct intel_rc6 *rc6)
-{
- if (rc6->ctx_corrupted && !intel_rc6_ctx_corrupted(rc6)) {
- DRM_INFO("RC6 context restored, re-enabling runtime power management\n");
- rc6->ctx_corrupted = false;
- }
-}
-
-/**
- * intel_rc6_ctx_wa_check - check for a new RC6 CTX corruption
- * @rc6: rc6 state
- *
- * Check if an RC6 CTX corruption has happened since the last check and if so
- * disable RC6 and runtime power management.
-*/
-void intel_rc6_ctx_wa_check(struct intel_rc6 *rc6)
-{
- struct drm_i915_private *i915 = rc6_to_i915(rc6);
-
- if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
- return;
-
- if (rc6->ctx_corrupted)
- return;
-
- if (!intel_rc6_ctx_corrupted(rc6))
- return;
-
- DRM_NOTE("RC6 context corruption, disabling runtime power management\n");
+ return false;
- intel_rc6_disable(rc6);
- rc6->ctx_corrupted = true;
+ if (intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO))
+ return false;
- return;
+ dev_notice(i915->drm.dev,
+ "RC6 context corruption, disabling runtime power management\n");
+ return true;
}
static void __intel_rc6_disable(struct intel_rc6 *rc6)
@@ -575,8 +527,6 @@ void intel_rc6_init(struct intel_rc6 *rc6)
if (!rc6_supported(rc6))
return;
- intel_rc6_ctx_wa_init(rc6);
-
if (IS_CHERRYVIEW(i915))
err = chv_rc6_init(rc6);
else if (IS_VALLEYVIEW(i915))
@@ -611,9 +561,6 @@ void intel_rc6_enable(struct intel_rc6 *rc6)
GEM_BUG_ON(rc6->enabled);
- if (rc6->ctx_corrupted)
- return;
-
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
if (IS_CHERRYVIEW(i915))
@@ -629,13 +576,51 @@ void intel_rc6_enable(struct intel_rc6 *rc6)
else if (INTEL_GEN(i915) >= 6)
gen6_rc6_enable(rc6);
+ rc6->manual = rc6->ctl_enable & GEN6_RC_CTL_RC6_ENABLE;
+ if (NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+ rc6->ctl_enable = 0;
+
intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+ if (unlikely(pctx_corrupted(rc6)))
+ return;
+
/* rc6 is ready, runtime-pm is go! */
rpm_put(rc6);
rc6->enabled = true;
}
+void intel_rc6_unpark(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+
+ if (!rc6->enabled)
+ return;
+
+ /* Restore HW timers for automatic RC6 entry while busy */
+ set(uncore, GEN6_RC_CONTROL, rc6->ctl_enable);
+}
+
+void intel_rc6_park(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+
+ if (!rc6->enabled)
+ return;
+
+ if (unlikely(pctx_corrupted(rc6))) {
+ intel_rc6_disable(rc6);
+ return;
+ }
+
+ if (!rc6->manual)
+ return;
+
+ /* Turn off the HW timers and go directly to rc6 */
+ set(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE);
+ set(uncore, GEN6_RC_STATE, 0x4 << RC_SW_TARGET_STATE_SHIFT);
+}
+
void intel_rc6_disable(struct intel_rc6 *rc6)
{
if (!rc6->enabled)
@@ -785,3 +770,7 @@ u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg)
{
return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(rc6, reg), 1000);
}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_rc6.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.h b/drivers/gpu/drm/i915/gt/intel_rc6.h
index 1370f6834a4c..9f0f23fca8af 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.h
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.h
@@ -15,6 +15,9 @@ struct intel_rc6;
void intel_rc6_init(struct intel_rc6 *rc6);
void intel_rc6_fini(struct intel_rc6 *rc6);
+void intel_rc6_unpark(struct intel_rc6 *rc6);
+void intel_rc6_park(struct intel_rc6 *rc6);
+
void intel_rc6_sanitize(struct intel_rc6 *rc6);
void intel_rc6_enable(struct intel_rc6 *rc6);
void intel_rc6_disable(struct intel_rc6 *rc6);
@@ -22,7 +25,4 @@ void intel_rc6_disable(struct intel_rc6 *rc6);
u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, i915_reg_t reg);
u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg);
-void intel_rc6_ctx_wa_check(struct intel_rc6 *rc6);
-void intel_rc6_ctx_wa_resume(struct intel_rc6 *rc6);
-
#endif /* INTEL_RC6_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
index 89ad5697a8d4..bfbb623f7a4f 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
@@ -18,12 +18,14 @@ struct intel_rc6 {
u64 prev_hw_residency[4];
u64 cur_residency[4];
+ u32 ctl_enable;
+
struct drm_i915_gem_object *pctx;
bool supported : 1;
bool enabled : 1;
+ bool manual : 1;
bool wakeref : 1;
- bool ctx_corrupted : 1;
};
#endif /* INTEL_RC6_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c
index c4edc35e7d89..5954ecc3207f 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.c
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c
@@ -29,16 +29,6 @@
#include "intel_renderstate.h"
#include "intel_ring.h"
-struct intel_renderstate {
- const struct intel_renderstate_rodata *rodata;
- struct drm_i915_gem_object *obj;
- struct i915_vma *vma;
- u32 batch_offset;
- u32 batch_size;
- u32 aux_offset;
- u32 aux_size;
-};
-
static const struct intel_renderstate_rodata *
render_state_get_rodata(const struct intel_engine_cs *engine)
{
@@ -84,11 +74,11 @@ static int render_state_setup(struct intel_renderstate *so,
u32 *d;
int ret;
- ret = i915_gem_object_prepare_write(so->obj, &needs_clflush);
+ ret = i915_gem_object_prepare_write(so->vma->obj, &needs_clflush);
if (ret)
return ret;
- d = kmap_atomic(i915_gem_object_get_dirty_page(so->obj, 0));
+ d = kmap_atomic(i915_gem_object_get_dirty_page(so->vma->obj, 0));
while (i < rodata->batch_items) {
u32 s = rodata->batch[i];
@@ -166,7 +156,7 @@ static int render_state_setup(struct intel_renderstate *so,
ret = 0;
out:
- i915_gem_object_finish_access(so->obj);
+ i915_gem_object_finish_access(so->vma->obj);
return ret;
err:
@@ -177,61 +167,84 @@ err:
#undef OUT_BATCH
-int intel_renderstate_emit(struct i915_request *rq)
+int intel_renderstate_init(struct intel_renderstate *so,
+ struct intel_engine_cs *engine)
{
- struct intel_engine_cs *engine = rq->engine;
- struct intel_renderstate so = {}; /* keep the compiler happy */
+ struct drm_i915_gem_object *obj;
int err;
- so.rodata = render_state_get_rodata(engine);
- if (!so.rodata)
+ memset(so, 0, sizeof(*so));
+
+ so->rodata = render_state_get_rodata(engine);
+ if (!so->rodata)
return 0;
- if (so.rodata->batch_items * 4 > PAGE_SIZE)
+ if (so->rodata->batch_items * 4 > PAGE_SIZE)
return -EINVAL;
- so.obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
- if (IS_ERR(so.obj))
- return PTR_ERR(so.obj);
+ obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
- so.vma = i915_vma_instance(so.obj, &engine->gt->ggtt->vm, NULL);
- if (IS_ERR(so.vma)) {
- err = PTR_ERR(so.vma);
+ so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
+ if (IS_ERR(so->vma)) {
+ err = PTR_ERR(so->vma);
goto err_obj;
}
- err = i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ err = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
if (err)
goto err_vma;
- err = render_state_setup(&so, rq->i915);
+ err = render_state_setup(so, engine->i915);
if (err)
goto err_unpin;
+ return 0;
+
+err_unpin:
+ i915_vma_unpin(so->vma);
+err_vma:
+ i915_vma_close(so->vma);
+err_obj:
+ i915_gem_object_put(obj);
+ so->vma = NULL;
+ return err;
+}
+
+int intel_renderstate_emit(struct intel_renderstate *so,
+ struct i915_request *rq)
+{
+ struct intel_engine_cs *engine = rq->engine;
+ int err;
+
+ if (!so->vma)
+ return 0;
+
err = engine->emit_bb_start(rq,
- so.batch_offset, so.batch_size,
+ so->batch_offset, so->batch_size,
I915_DISPATCH_SECURE);
if (err)
- goto err_unpin;
+ return err;
- if (so.aux_size > 8) {
+ if (so->aux_size > 8) {
err = engine->emit_bb_start(rq,
- so.aux_offset, so.aux_size,
+ so->aux_offset, so->aux_size,
I915_DISPATCH_SECURE);
if (err)
- goto err_unpin;
+ return err;
}
- i915_vma_lock(so.vma);
- err = i915_request_await_object(rq, so.vma->obj, false);
+ i915_vma_lock(so->vma);
+ err = i915_request_await_object(rq, so->vma->obj, false);
if (err == 0)
- err = i915_vma_move_to_active(so.vma, rq, 0);
- i915_vma_unlock(so.vma);
-err_unpin:
- i915_vma_unpin(so.vma);
-err_vma:
- i915_vma_close(so.vma);
-err_obj:
- i915_gem_object_put(so.obj);
+ err = i915_vma_move_to_active(so->vma, rq, 0);
+ i915_vma_unlock(so->vma);
+
return err;
}
+
+void intel_renderstate_fini(struct intel_renderstate *so)
+{
+ i915_vma_unpin_and_release(&so->vma, 0);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.h b/drivers/gpu/drm/i915/gt/intel_renderstate.h
index 8d5079145054..5700be69a05a 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.h
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.h
@@ -27,6 +27,8 @@
#include <linux/types.h>
struct i915_request;
+struct intel_engine_cs;
+struct i915_vma;
struct intel_renderstate_rodata {
const u32 *reloc;
@@ -46,6 +48,19 @@ extern const struct intel_renderstate_rodata gen7_null_state;
extern const struct intel_renderstate_rodata gen8_null_state;
extern const struct intel_renderstate_rodata gen9_null_state;
-int intel_renderstate_emit(struct i915_request *rq);
+struct intel_renderstate {
+ const struct intel_renderstate_rodata *rodata;
+ struct i915_vma *vma;
+ u32 batch_offset;
+ u32 batch_size;
+ u32 aux_offset;
+ u32 aux_size;
+};
+
+int intel_renderstate_init(struct intel_renderstate *so,
+ struct intel_engine_cs *engine);
+int intel_renderstate_emit(struct intel_renderstate *so,
+ struct i915_request *rq);
+void intel_renderstate_fini(struct intel_renderstate *so);
#endif /* _INTEL_RENDERSTATE_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index c97423a76642..beee0cf89bce 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -21,6 +21,7 @@
#include "intel_reset.h"
#include "uc/intel_guc.h"
+#include "uc/intel_guc_submission.h"
#define RESET_MAX_RETRIES 3
@@ -40,27 +41,29 @@ static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr)
static void engine_skip_context(struct i915_request *rq)
{
struct intel_engine_cs *engine = rq->engine;
- struct i915_gem_context *hung_ctx = rq->gem_context;
+ struct intel_context *hung_ctx = rq->context;
if (!i915_request_is_active(rq))
return;
lockdep_assert_held(&engine->active.lock);
list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
- if (rq->gem_context == hung_ctx)
+ if (rq->context == hung_ctx)
i915_request_skip(rq, -EIO);
}
-static void client_mark_guilty(struct drm_i915_file_private *file_priv,
- const struct i915_gem_context *ctx)
+static void client_mark_guilty(struct i915_gem_context *ctx, bool banned)
{
- unsigned int score;
+ struct drm_i915_file_private *file_priv = ctx->file_priv;
unsigned long prev_hang;
+ unsigned int score;
+
+ if (IS_ERR_OR_NULL(file_priv))
+ return;
- if (i915_gem_context_is_banned(ctx))
+ score = 0;
+ if (banned)
score = I915_CLIENT_SCORE_CONTEXT_BAN;
- else
- score = 0;
prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
@@ -75,17 +78,38 @@ static void client_mark_guilty(struct drm_i915_file_private *file_priv,
}
}
-static bool context_mark_guilty(struct i915_gem_context *ctx)
+static bool mark_guilty(struct i915_request *rq)
{
+ struct i915_gem_context *ctx;
unsigned long prev_hang;
bool banned;
int i;
+ rcu_read_lock();
+ ctx = rcu_dereference(rq->context->gem_context);
+ if (ctx && !kref_get_unless_zero(&ctx->ref))
+ ctx = NULL;
+ rcu_read_unlock();
+ if (!ctx)
+ return false;
+
+ if (i915_gem_context_is_closed(ctx)) {
+ intel_context_set_banned(rq->context);
+ banned = true;
+ goto out;
+ }
+
atomic_inc(&ctx->guilty_count);
/* Cool contexts are too cool to be banned! (Used for reset testing.) */
- if (!i915_gem_context_is_bannable(ctx))
- return false;
+ if (!i915_gem_context_is_bannable(ctx)) {
+ banned = false;
+ goto out;
+ }
+
+ dev_notice(ctx->i915->drm.dev,
+ "%s context reset due to GPU hang\n",
+ ctx->name);
/* Record the timestamp for the last N hangs */
prev_hang = ctx->hang_timestamp[0];
@@ -100,38 +124,43 @@ static bool context_mark_guilty(struct i915_gem_context *ctx)
if (banned) {
DRM_DEBUG_DRIVER("context %s: guilty %d, banned\n",
ctx->name, atomic_read(&ctx->guilty_count));
- i915_gem_context_set_banned(ctx);
+ intel_context_set_banned(rq->context);
}
- if (!IS_ERR_OR_NULL(ctx->file_priv))
- client_mark_guilty(ctx->file_priv, ctx);
+ client_mark_guilty(ctx, banned);
+out:
+ i915_gem_context_put(ctx);
return banned;
}
-static void context_mark_innocent(struct i915_gem_context *ctx)
+static void mark_innocent(struct i915_request *rq)
{
- atomic_inc(&ctx->active_count);
+ struct i915_gem_context *ctx;
+
+ rcu_read_lock();
+ ctx = rcu_dereference(rq->context->gem_context);
+ if (ctx)
+ atomic_inc(&ctx->active_count);
+ rcu_read_unlock();
}
void __i915_request_reset(struct i915_request *rq, bool guilty)
{
- GEM_TRACE("%s rq=%llx:%lld, guilty? %s\n",
- rq->engine->name,
- rq->fence.context,
- rq->fence.seqno,
- yesno(guilty));
+ RQ_TRACE(rq, "guilty? %s\n", yesno(guilty));
GEM_BUG_ON(i915_request_completed(rq));
+ rcu_read_lock(); /* protect the GEM context */
if (guilty) {
i915_request_skip(rq, -EIO);
- if (context_mark_guilty(rq->gem_context))
+ if (mark_guilty(rq))
engine_skip_context(rq);
} else {
dma_fence_set_error(&rq->fence, -EAGAIN);
- context_mark_innocent(rq->gem_context);
+ mark_innocent(rq);
}
+ rcu_read_unlock();
}
static bool i915_in_reset(struct pci_dev *pdev)
@@ -218,9 +247,8 @@ out:
return ret;
}
-static int ironlake_do_reset(struct intel_gt *gt,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
+static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask,
+ unsigned int retry)
{
struct intel_uncore *uncore = gt->uncore;
int ret;
@@ -564,7 +592,7 @@ static reset_func intel_get_gpu_reset(const struct intel_gt *gt)
else if (INTEL_GEN(i915) >= 6)
return gen6_reset_engines;
else if (INTEL_GEN(i915) >= 5)
- return ironlake_do_reset;
+ return ilk_do_reset;
else if (IS_G4X(i915))
return g4x_do_reset;
else if (IS_G33(i915) || IS_PINEVIEW(i915))
@@ -592,7 +620,7 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
*/
intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) {
- GEM_TRACE("engine_mask=%x\n", engine_mask);
+ GT_TRACE(gt, "engine_mask=%x\n", engine_mask);
preempt_disable();
ret = reset(gt, engine_mask, retry);
preempt_enable();
@@ -647,7 +675,8 @@ static void reset_prepare_engine(struct intel_engine_cs *engine)
* GPU state upon resume, i.e. fail to restart after a reset.
*/
intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
- engine->reset.prepare(engine);
+ if (engine->reset.prepare)
+ engine->reset.prepare(engine);
}
static void revoke_mmaps(struct intel_gt *gt)
@@ -667,8 +696,13 @@ static void revoke_mmaps(struct intel_gt *gt)
continue;
GEM_BUG_ON(vma->fence != &gt->ggtt->fence_regs[i]);
- node = &vma->obj->base.vma_node;
+
+ if (!vma->mmo)
+ continue;
+
+ node = &vma->mmo->vma_node;
vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT;
+
unmap_mapping_range(gt->i915->drm.anon_inode->i_mapping,
drm_vma_node_offset_addr(node) + vma_offset,
vma->size,
@@ -722,10 +756,11 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
static void reset_finish_engine(struct intel_engine_cs *engine)
{
- engine->reset.finish(engine);
+ if (engine->reset.finish)
+ engine->reset.finish(engine);
intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
- intel_engine_breadcrumbs_irq(engine);
+ intel_engine_signal_breadcrumbs(engine);
}
static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake)
@@ -745,8 +780,7 @@ static void nop_submit_request(struct i915_request *request)
struct intel_engine_cs *engine = request->engine;
unsigned long flags;
- GEM_TRACE("%s fence %llx:%lld -> -EIO\n",
- engine->name, request->fence.context, request->fence.seqno);
+ RQ_TRACE(request, "-EIO\n");
dma_fence_set_error(&request->fence, -EIO);
spin_lock_irqsave(&engine->active.lock, flags);
@@ -754,7 +788,7 @@ static void nop_submit_request(struct i915_request *request)
i915_request_mark_complete(request);
spin_unlock_irqrestore(&engine->active.lock, flags);
- intel_engine_queue_breadcrumbs(engine);
+ intel_engine_signal_breadcrumbs(engine);
}
static void __intel_gt_set_wedged(struct intel_gt *gt)
@@ -773,7 +807,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
intel_engine_dump(engine, &p, "%s\n", engine->name);
}
- GEM_TRACE("start\n");
+ GT_TRACE(gt, "start\n");
/*
* First, stop submission to hw, but do not yet complete requests by
@@ -799,11 +833,12 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
/* Mark all executing requests as skipped */
for_each_engine(engine, gt, id)
- engine->cancel_requests(engine);
+ if (engine->reset.cancel)
+ engine->reset.cancel(engine);
reset_finish(gt, awake);
- GEM_TRACE("end\n");
+ GT_TRACE(gt, "end\n");
}
void intel_gt_set_wedged(struct intel_gt *gt)
@@ -820,7 +855,6 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
{
struct intel_gt_timelines *timelines = &gt->timelines;
struct intel_timeline *tl;
- unsigned long flags;
bool ok;
if (!test_bit(I915_WEDGED, &gt->reset.flags))
@@ -830,7 +864,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
if (test_bit(I915_WEDGED_ON_INIT, &gt->reset.flags))
return false;
- GEM_TRACE("start\n");
+ GT_TRACE(gt, "start\n");
/*
* Before unwedging, make sure that all pending operations
@@ -842,7 +876,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
*
* No more can be submitted until we reset the wedged bit.
*/
- spin_lock_irqsave(&timelines->lock, flags);
+ spin_lock(&timelines->lock);
list_for_each_entry(tl, &timelines->active_list, link) {
struct dma_fence *fence;
@@ -850,7 +884,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
if (!fence)
continue;
- spin_unlock_irqrestore(&timelines->lock, flags);
+ spin_unlock(&timelines->lock);
/*
* All internal dependencies (i915_requests) will have
@@ -863,10 +897,10 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
dma_fence_put(fence);
/* Restart iteration after droping lock */
- spin_lock_irqsave(&timelines->lock, flags);
+ spin_lock(&timelines->lock);
tl = list_entry(&timelines->active_list, typeof(*tl), link);
}
- spin_unlock_irqrestore(&timelines->lock, flags);
+ spin_unlock(&timelines->lock);
/* We must reset pending GPU events before restoring our submission */
ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */
@@ -892,7 +926,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
*/
intel_engines_reset_default_submission(gt);
- GEM_TRACE("end\n");
+ GT_TRACE(gt, "end\n");
smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
clear_bit(I915_WEDGED, &gt->reset.flags);
@@ -967,7 +1001,7 @@ void intel_gt_reset(struct intel_gt *gt,
intel_engine_mask_t awake;
int ret;
- GEM_TRACE("flags=%lx\n", gt->reset.flags);
+ GT_TRACE(gt, "flags=%lx\n", gt->reset.flags);
might_sleep();
GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &gt->reset.flags));
@@ -1070,9 +1104,10 @@ static inline int intel_gt_reset_engine(struct intel_engine_cs *engine)
int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
{
struct intel_gt *gt = engine->gt;
+ bool uses_guc = intel_engine_in_guc_submission_mode(engine);
int ret;
- GEM_TRACE("%s flags=%lx\n", engine->name, gt->reset.flags);
+ ENGINE_TRACE(engine, "flags=%lx\n", gt->reset.flags);
GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags));
if (!intel_engine_pm_get_if_awake(engine))
@@ -1085,14 +1120,14 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
"Resetting %s for %s\n", engine->name, msg);
atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]);
- if (!engine->gt->uc.guc.execbuf_client)
+ if (!uses_guc)
ret = intel_gt_reset_engine(engine);
else
ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine);
if (ret) {
/* If we fail here, we expect to fallback to a global reset */
DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n",
- engine->gt->uc.guc.execbuf_client ? "GuC " : "",
+ uses_guc ? "GuC " : "",
engine->name, ret);
goto out;
}
@@ -1195,7 +1230,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
engine_mask &= INTEL_INFO(gt->i915)->engine_mask;
if (flags & I915_ERROR_CAPTURE) {
- i915_capture_error_state(gt->i915, engine_mask, msg);
+ i915_capture_error_state(gt->i915);
intel_gt_clear_error_registers(gt, engine_mask);
}
@@ -1288,10 +1323,10 @@ int intel_gt_terminally_wedged(struct intel_gt *gt)
if (!intel_gt_is_wedged(gt))
return 0;
- /* Reset still in progress? Maybe we will recover? */
- if (!test_bit(I915_RESET_BACKOFF, &gt->reset.flags))
+ if (intel_gt_has_init_error(gt))
return -EIO;
+ /* Reset still in progress? Maybe we will recover? */
if (wait_event_interruptible(gt->reset.queue,
!test_bit(I915_RESET_BACKOFF,
&gt->reset.flags)))
@@ -1313,6 +1348,9 @@ void intel_gt_init_reset(struct intel_gt *gt)
init_waitqueue_head(&gt->reset.queue);
mutex_init(&gt->reset.mutex);
init_srcu_struct(&gt->reset.backoff_srcu);
+
+ /* no GPU until we are ready! */
+ __set_bit(I915_WEDGED, &gt->reset.flags);
}
void intel_gt_fini_reset(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index a47d5a7c32c9..bc44fe8e5ffa 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -33,6 +33,7 @@
#include "gem/i915_gem_context.h"
+#include "gen6_ppgtt.h"
#include "i915_drv.h"
#include "i915_trace.h"
#include "intel_context.h"
@@ -362,6 +363,12 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode)
*/
flags |= PIPE_CONTROL_CS_STALL;
+ /*
+ * CS_STALL suggests at least a post-sync write.
+ */
+ flags |= PIPE_CONTROL_QW_WRITE;
+ flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+
/* Just flush everything. Experiments have shown that reducing the
* number of bits based on the write domains has little performance
* impact.
@@ -380,13 +387,6 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode)
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
- /*
- * TLB invalidate requires a post-sync write.
- */
- flags |= PIPE_CONTROL_QW_WRITE;
- flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
-
- flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
/* Workaround: we must issue a pipe_control with CS-stall bit
* set before a pipe_control command that has the state cache
@@ -454,7 +454,8 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
- *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+ *cs++ = MI_FLUSH_DW | MI_INVALIDATE_TLB |
+ MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
*cs++ = rq->fence.seqno;
@@ -496,14 +497,13 @@ static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys)
{
- struct drm_i915_private *dev_priv = engine->i915;
u32 addr;
addr = lower_32_bits(phys);
- if (INTEL_GEN(dev_priv) >= 4)
+ if (INTEL_GEN(engine->i915) >= 4)
addr |= (phys >> 28) & 0xf0;
- I915_WRITE(HWS_PGA, addr);
+ intel_uncore_write(engine->uncore, HWS_PGA, addr);
}
static struct page *status_page(struct intel_engine_cs *engine)
@@ -522,14 +522,13 @@ static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
static void set_hwsp(struct intel_engine_cs *engine, u32 offset)
{
- struct drm_i915_private *dev_priv = engine->i915;
i915_reg_t hwsp;
/*
* The ring status page addresses are no longer next to the rest of
* the ring registers as of gen7.
*/
- if (IS_GEN(dev_priv, 7)) {
+ if (IS_GEN(engine->i915, 7)) {
switch (engine->id) {
/*
* No more rings exist on Gen7. Default case is only to shut up
@@ -551,14 +550,14 @@ static void set_hwsp(struct intel_engine_cs *engine, u32 offset)
hwsp = VEBOX_HWS_PGA_GEN7;
break;
}
- } else if (IS_GEN(dev_priv, 6)) {
+ } else if (IS_GEN(engine->i915, 6)) {
hwsp = RING_HWS_PGA_GEN6(engine->mmio_base);
} else {
hwsp = RING_HWS_PGA(engine->mmio_base);
}
- I915_WRITE(hwsp, offset);
- POSTING_READ(hwsp);
+ intel_uncore_write(engine->uncore, hwsp, offset);
+ intel_uncore_posting_read(engine->uncore, hwsp);
}
static void flush_cs_tlb(struct intel_engine_cs *engine)
@@ -633,8 +632,8 @@ static int xcs_resume(struct intel_engine_cs *engine)
struct intel_ring *ring = engine->legacy.ring;
int ret = 0;
- GEM_TRACE("%s: ring:{HEAD:%04x, TAIL:%04x}\n",
- engine->name, ring->head, ring->tail);
+ ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n",
+ ring->head, ring->tail);
intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
@@ -721,7 +720,7 @@ static int xcs_resume(struct intel_engine_cs *engine)
}
/* Papering over lost _interrupts_ immediately following the restart */
- intel_engine_queue_breadcrumbs(engine);
+ intel_engine_signal_breadcrumbs(engine);
out:
intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
@@ -747,10 +746,10 @@ static void reset_prepare(struct intel_engine_cs *engine)
*
* FIXME: Wa for more modern gens needs to be validated
*/
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
if (intel_engine_stop_cs(engine))
- GEM_TRACE("%s: timed out on STOP_RING\n", engine->name);
+ ENGINE_TRACE(engine, "timed out on STOP_RING\n");
intel_uncore_write_fw(uncore,
RING_HEAD(base),
@@ -766,12 +765,11 @@ static void reset_prepare(struct intel_engine_cs *engine)
/* Check acts as a post */
if (intel_uncore_read_fw(uncore, RING_HEAD(base)))
- GEM_TRACE("%s: ring head [%x] not parked\n",
- engine->name,
- intel_uncore_read_fw(uncore, RING_HEAD(base)));
+ ENGINE_TRACE(engine, "ring head [%x] not parked\n",
+ intel_uncore_read_fw(uncore, RING_HEAD(base)));
}
-static void reset_ring(struct intel_engine_cs *engine, bool stalled)
+static void reset_rewind(struct intel_engine_cs *engine, bool stalled)
{
struct i915_request *pos, *rq;
unsigned long flags;
@@ -842,7 +840,8 @@ static void reset_finish(struct intel_engine_cs *engine)
static int rcs_resume(struct intel_engine_cs *engine)
{
- struct drm_i915_private *dev_priv = engine->i915;
+ struct drm_i915_private *i915 = engine->i915;
+ struct intel_uncore *uncore = engine->uncore;
/*
* Disable CONSTANT_BUFFER before it is loaded from the context
@@ -854,13 +853,14 @@ static int rcs_resume(struct intel_engine_cs *engine)
* they are already accustomed to from before contexts were
* enabled.
*/
- if (IS_GEN(dev_priv, 4))
- I915_WRITE(ECOSKPD,
+ if (IS_GEN(i915, 4))
+ intel_uncore_write(uncore, ECOSKPD,
_MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE));
/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
- if (IS_GEN_RANGE(dev_priv, 4, 6))
- I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
+ if (IS_GEN_RANGE(i915, 4, 6))
+ intel_uncore_write(uncore, MI_MODE,
+ _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
/* We need to disable the AsyncFlip performance optimisations in order
* to use MI_WAIT_FOR_EVENT within the CS. It should already be
@@ -868,38 +868,40 @@ static int rcs_resume(struct intel_engine_cs *engine)
*
* WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
*/
- if (IS_GEN_RANGE(dev_priv, 6, 7))
- I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
+ if (IS_GEN_RANGE(i915, 6, 7))
+ intel_uncore_write(uncore, MI_MODE,
+ _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
/* Required for the hardware to program scanline values for waiting */
/* WaEnableFlushTlbInvalidationMode:snb */
- if (IS_GEN(dev_priv, 6))
- I915_WRITE(GFX_MODE,
+ if (IS_GEN(i915, 6))
+ intel_uncore_write(uncore, GFX_MODE,
_MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
/* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
- if (IS_GEN(dev_priv, 7))
- I915_WRITE(GFX_MODE_GEN7,
+ if (IS_GEN(i915, 7))
+ intel_uncore_write(uncore, GFX_MODE_GEN7,
_MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
_MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
- if (IS_GEN(dev_priv, 6)) {
+ if (IS_GEN(i915, 6)) {
/* From the Sandybridge PRM, volume 1 part 3, page 24:
* "If this bit is set, STCunit will have LRA as replacement
* policy. [...] This bit must be reset. LRA replacement
* policy is not supported."
*/
- I915_WRITE(CACHE_MODE_0,
+ intel_uncore_write(uncore, CACHE_MODE_0,
_MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
}
- if (IS_GEN_RANGE(dev_priv, 6, 7))
- I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
+ if (IS_GEN_RANGE(i915, 6, 7))
+ intel_uncore_write(uncore, INSTPM,
+ _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
return xcs_resume(engine);
}
-static void cancel_requests(struct intel_engine_cs *engine)
+static void reset_cancel(struct intel_engine_cs *engine)
{
struct i915_request *request;
unsigned long flags;
@@ -1318,6 +1320,8 @@ static int ring_context_alloc(struct intel_context *ce)
return PTR_ERR(vma);
ce->state = vma;
+ if (engine->default_state)
+ __set_bit(CONTEXT_VALID_BIT, &ce->flags);
}
return 0;
@@ -1325,26 +1329,12 @@ static int ring_context_alloc(struct intel_context *ce)
static int ring_context_pin(struct intel_context *ce)
{
- int err;
-
- err = intel_context_active_acquire(ce);
- if (err)
- return err;
-
- err = __context_pin_ppgtt(ce);
- if (err)
- goto err_active;
-
- return 0;
-
-err_active:
- intel_context_active_release(ce);
- return err;
+ return __context_pin_ppgtt(ce);
}
static void ring_context_reset(struct intel_context *ce)
{
- intel_ring_reset(ce->ring, 0);
+ intel_ring_reset(ce->ring, ce->ring->emit);
}
static const struct intel_context_ops ring_context_ops = {
@@ -1360,46 +1350,38 @@ static const struct intel_context_ops ring_context_ops = {
.destroy = ring_context_destroy,
};
-static int load_pd_dir(struct i915_request *rq, const struct i915_ppgtt *ppgtt)
+static int load_pd_dir(struct i915_request *rq,
+ const struct i915_ppgtt *ppgtt,
+ u32 valid)
{
const struct intel_engine_cs * const engine = rq->engine;
u32 *cs;
- cs = intel_ring_begin(rq, 6);
+ cs = intel_ring_begin(rq, 12);
if (IS_ERR(cs))
return PTR_ERR(cs);
*cs++ = MI_LOAD_REGISTER_IMM(1);
*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
- *cs++ = PP_DIR_DCLV_2G;
+ *cs++ = valid;
*cs++ = MI_LOAD_REGISTER_IMM(1);
*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
*cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-static int flush_pd_dir(struct i915_request *rq)
-{
- const struct intel_engine_cs * const engine = rq->engine;
- u32 *cs;
-
- cs = intel_ring_begin(rq, 4);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- /* Stall until the page table load is complete */
+ /* Stall until the page table load is complete? */
*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
- *cs++ = intel_gt_scratch_offset(rq->engine->gt,
+ *cs++ = intel_gt_scratch_offset(engine->gt,
INTEL_GT_SCRATCH_FIELD_DEFAULT);
- *cs++ = MI_NOOP;
+
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+ *cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
intel_ring_advance(rq, cs);
- return 0;
+
+ return rq->engine->emit_flush(rq, EMIT_FLUSH);
}
static inline int mi_set_context(struct i915_request *rq, u32 flags)
@@ -1413,14 +1395,6 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
int len;
u32 *cs;
- flags |= MI_MM_SPACE_GTT;
- if (IS_HASWELL(i915))
- /* These flags are for resource streamer on HSW+ */
- flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN;
- else
- /* We need to save the extended state for powersaving modes */
- flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN;
-
len = 4;
if (IS_GEN(i915, 7))
len += 2 + (num_engines ? 4 * num_engines + 6 : 0);
@@ -1485,7 +1459,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
*cs++ = MI_NOOP;
*cs++ = MI_SET_CONTEXT;
- *cs++ = i915_ggtt_offset(rq->hw_context->state) | flags;
+ *cs++ = i915_ggtt_offset(rq->context->state) | flags;
/*
* w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
* WaMiSetContext_Hang:snb,ivb,vlv
@@ -1555,10 +1529,10 @@ static int remap_l3_slice(struct i915_request *rq, int slice)
static int remap_l3(struct i915_request *rq)
{
- struct i915_gem_context *ctx = rq->gem_context;
+ struct i915_gem_context *ctx = i915_request_gem_context(rq);
int i, err;
- if (!ctx->remap_slice)
+ if (!ctx || !ctx->remap_slice)
return 0;
for (i = 0; i < MAX_L3_SLICES; i++) {
@@ -1574,65 +1548,59 @@ static int remap_l3(struct i915_request *rq)
return 0;
}
-static int switch_context(struct i915_request *rq)
+static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
{
- struct intel_context *ce = rq->hw_context;
- struct i915_address_space *vm = vm_alias(ce);
int ret;
- GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
+ if (!vm)
+ return 0;
- if (vm) {
- ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm));
- if (ret)
- return ret;
- }
+ ret = rq->engine->emit_flush(rq, EMIT_FLUSH);
+ if (ret)
+ return ret;
- if (ce->state) {
- u32 hw_flags;
+ /*
+ * Not only do we need a full barrier (post-sync write) after
+ * invalidating the TLBs, but we need to wait a little bit
+ * longer. Whether this is merely delaying us, or the
+ * subsequent flush is a key part of serialising with the
+ * post-sync op, this extra pass appears vital before a
+ * mm switch!
+ */
+ ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
+ if (ret)
+ return ret;
- GEM_BUG_ON(rq->engine->id != RCS0);
+ return rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+}
- /*
- * The kernel context(s) is treated as pure scratch and is not
- * expected to retain any state (as we sacrifice it during
- * suspend and on resume it may be corrupted). This is ok,
- * as nothing actually executes using the kernel context; it
- * is purely used for flushing user contexts.
- */
- hw_flags = 0;
- if (i915_gem_context_is_kernel(rq->gem_context))
- hw_flags = MI_RESTORE_INHIBIT;
+static int switch_context(struct i915_request *rq)
+{
+ struct intel_context *ce = rq->context;
+ int ret;
- ret = mi_set_context(rq, hw_flags);
- if (ret)
- return ret;
- }
+ GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
- if (vm) {
- struct intel_engine_cs *engine = rq->engine;
+ ret = switch_mm(rq, vm_alias(ce));
+ if (ret)
+ return ret;
- ret = engine->emit_flush(rq, EMIT_INVALIDATE);
- if (ret)
- return ret;
+ if (ce->state) {
+ u32 flags;
- ret = flush_pd_dir(rq);
- if (ret)
- return ret;
+ GEM_BUG_ON(rq->engine->id != RCS0);
- /*
- * Not only do we need a full barrier (post-sync write) after
- * invalidating the TLBs, but we need to wait a little bit
- * longer. Whether this is merely delaying us, or the
- * subsequent flush is a key part of serialising with the
- * post-sync op, this extra pass appears vital before a
- * mm switch!
- */
- ret = engine->emit_flush(rq, EMIT_INVALIDATE);
- if (ret)
- return ret;
+ /* For resource streamer on HSW+ and power context elsewhere */
+ BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN);
+ BUILD_BUG_ON(HSW_MI_RS_RESTORE_STATE_EN != MI_RESTORE_EXT_STATE_EN);
- ret = engine->emit_flush(rq, EMIT_FLUSH);
+ flags = MI_SAVE_EXT_STATE_EN | MI_MM_SPACE_GTT;
+ if (test_bit(CONTEXT_VALID_BIT, &ce->flags))
+ flags |= MI_RESTORE_EXT_STATE_EN;
+ else
+ flags |= MI_RESTORE_INHIBIT;
+
+ ret = mi_set_context(rq, flags);
if (ret)
return ret;
}
@@ -1648,7 +1616,7 @@ static int ring_request_alloc(struct i915_request *request)
{
int ret;
- GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
+ GEM_BUG_ON(!intel_context_is_pinned(request->context));
GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb);
/*
@@ -1804,7 +1772,6 @@ static int gen6_ring_flush(struct i915_request *rq, u32 mode)
static void i9xx_set_default_submission(struct intel_engine_cs *engine)
{
engine->submit_request = i9xx_submit_request;
- engine->cancel_requests = cancel_requests;
engine->park = NULL;
engine->unpark = NULL;
@@ -1816,7 +1783,7 @@ static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
engine->submit_request = gen6_bsd_submit_request;
}
-static void ring_destroy(struct intel_engine_cs *engine)
+static void ring_release(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
@@ -1830,8 +1797,6 @@ static void ring_destroy(struct intel_engine_cs *engine)
intel_timeline_unpin(engine->legacy.timeline);
intel_timeline_put(engine->legacy.timeline);
-
- kfree(engine);
}
static void setup_irq(struct intel_engine_cs *engine)
@@ -1862,11 +1827,10 @@ static void setup_common(struct intel_engine_cs *engine)
setup_irq(engine);
- engine->destroy = ring_destroy;
-
engine->resume = xcs_resume;
engine->reset.prepare = reset_prepare;
- engine->reset.reset = reset_ring;
+ engine->reset.rewind = reset_rewind;
+ engine->reset.cancel = reset_cancel;
engine->reset.finish = reset_finish;
engine->cops = &ring_context_ops;
@@ -1977,6 +1941,10 @@ static void setup_vecs(struct intel_engine_cs *engine)
int intel_ring_submission_setup(struct intel_engine_cs *engine)
{
+ struct intel_timeline *timeline;
+ struct intel_ring *ring;
+ int err;
+
setup_common(engine);
switch (engine->class) {
@@ -1997,15 +1965,6 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine)
return -ENODEV;
}
- return 0;
-}
-
-int intel_ring_submission_init(struct intel_engine_cs *engine)
-{
- struct intel_timeline *timeline;
- struct intel_ring *ring;
- int err;
-
timeline = intel_timeline_create(engine->gt, engine->status_page.vma);
if (IS_ERR(timeline)) {
err = PTR_ERR(timeline);
@@ -2031,16 +1990,13 @@ int intel_ring_submission_init(struct intel_engine_cs *engine)
engine->legacy.ring = ring;
engine->legacy.timeline = timeline;
- err = intel_engine_init_common(engine);
- if (err)
- goto err_ring_unpin;
-
GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma);
+ /* Finally, take ownership and responsibility for cleanup! */
+ engine->release = ring_release;
+
return 0;
-err_ring_unpin:
- intel_ring_unpin(ring);
err_ring:
intel_ring_put(ring);
err_timeline_unpin:
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index 20d6ee148afc..d2a3d935d186 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -37,6 +37,11 @@ static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask)
return mask & ~rps->pm_intrmsk_mbz;
}
+static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
+{
+ intel_uncore_write_fw(uncore, reg, val);
+}
+
static u32 rps_pm_mask(struct intel_rps *rps, u8 val)
{
u32 mask = 0;
@@ -78,8 +83,7 @@ static void rps_enable_interrupts(struct intel_rps *rps)
gen6_gt_pm_enable_irq(gt, rps->pm_events);
spin_unlock_irq(&gt->irq_lock);
- intel_uncore_write(gt->uncore, GEN6_PMINTRMSK,
- rps_pm_mask(rps, rps->cur_freq));
+ set(gt->uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, rps->cur_freq));
}
static void gen6_rps_reset_interrupts(struct intel_rps *rps)
@@ -113,8 +117,7 @@ static void rps_disable_interrupts(struct intel_rps *rps)
rps->pm_events = 0;
- intel_uncore_write(gt->uncore, GEN6_PMINTRMSK,
- rps_pm_sanitize_mask(rps, ~0u));
+ set(gt->uncore, GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u));
spin_lock_irq(&gt->irq_lock);
gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS);
@@ -573,25 +576,21 @@ static void rps_set_power(struct intel_rps *rps, int new_power)
if (IS_VALLEYVIEW(i915))
goto skip_hw_write;
- intel_uncore_write(uncore, GEN6_RP_UP_EI,
- GT_INTERVAL_FROM_US(i915, ei_up));
- intel_uncore_write(uncore, GEN6_RP_UP_THRESHOLD,
- GT_INTERVAL_FROM_US(i915,
- ei_up * threshold_up / 100));
-
- intel_uncore_write(uncore, GEN6_RP_DOWN_EI,
- GT_INTERVAL_FROM_US(i915, ei_down));
- intel_uncore_write(uncore, GEN6_RP_DOWN_THRESHOLD,
- GT_INTERVAL_FROM_US(i915,
- ei_down * threshold_down / 100));
-
- intel_uncore_write(uncore, GEN6_RP_CONTROL,
- (INTEL_GEN(i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
- GEN6_RP_MEDIA_HW_NORMAL_MODE |
- GEN6_RP_MEDIA_IS_GFX |
- GEN6_RP_ENABLE |
- GEN6_RP_UP_BUSY_AVG |
- GEN6_RP_DOWN_IDLE_AVG);
+ set(uncore, GEN6_RP_UP_EI, GT_INTERVAL_FROM_US(i915, ei_up));
+ set(uncore, GEN6_RP_UP_THRESHOLD,
+ GT_INTERVAL_FROM_US(i915, ei_up * threshold_up / 100));
+
+ set(uncore, GEN6_RP_DOWN_EI, GT_INTERVAL_FROM_US(i915, ei_down));
+ set(uncore, GEN6_RP_DOWN_THRESHOLD,
+ GT_INTERVAL_FROM_US(i915, ei_down * threshold_down / 100));
+
+ set(uncore, GEN6_RP_CONTROL,
+ (INTEL_GEN(i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
+ GEN6_RP_MEDIA_HW_NORMAL_MODE |
+ GEN6_RP_MEDIA_IS_GFX |
+ GEN6_RP_ENABLE |
+ GEN6_RP_UP_BUSY_AVG |
+ GEN6_RP_DOWN_IDLE_AVG);
skip_hw_write:
rps->power.mode = new_power;
@@ -666,7 +665,7 @@ static int gen6_rps_set(struct intel_rps *rps, u8 val)
swreq = (GEN6_FREQUENCY(val) |
GEN6_OFFSET(0) |
GEN6_AGGRESSIVE_TURBO);
- intel_uncore_write(uncore, GEN6_RPNSWREQ, swreq);
+ set(uncore, GEN6_RPNSWREQ, swreq);
return 0;
}
@@ -683,7 +682,7 @@ static int vlv_rps_set(struct intel_rps *rps, u8 val)
return err;
}
-static int rps_set(struct intel_rps *rps, u8 val)
+static int rps_set(struct intel_rps *rps, u8 val, bool update)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
int err;
@@ -701,7 +700,8 @@ static int rps_set(struct intel_rps *rps, u8 val)
if (err)
return err;
- gen6_rps_set_thresholds(rps, val);
+ if (update)
+ gen6_rps_set_thresholds(rps, val);
rps->last_freq = val;
return 0;
@@ -761,7 +761,7 @@ void intel_rps_park(struct intel_rps *rps)
* power than the render powerwell.
*/
intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA);
- rps_set(rps, rps->idle_freq);
+ rps_set(rps, rps->idle_freq, false);
intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA);
}
@@ -777,7 +777,7 @@ void intel_rps_boost(struct i915_request *rq)
spin_lock_irqsave(&rq->lock, flags);
if (!i915_request_has_waitboost(rq) &&
!dma_fence_is_signaled_locked(&rq->fence)) {
- rq->flags |= I915_REQUEST_WAITBOOST;
+ set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags);
if (!atomic_fetch_inc(&rps->num_waiters) &&
READ_ONCE(rps->cur_freq) < rps->boost_freq)
@@ -790,14 +790,16 @@ void intel_rps_boost(struct i915_request *rq)
int intel_rps_set(struct intel_rps *rps, u8 val)
{
- int err = 0;
+ int err;
lockdep_assert_held(&rps->lock);
GEM_BUG_ON(val > rps->max_freq);
GEM_BUG_ON(val < rps->min_freq);
if (rps->active) {
- err = rps_set(rps, val);
+ err = rps_set(rps, val, true);
+ if (err)
+ return err;
/*
* Make sure we continue to get interrupts
@@ -806,18 +808,15 @@ int intel_rps_set(struct intel_rps *rps, u8 val)
if (INTEL_GEN(rps_to_i915(rps)) >= 6) {
struct intel_uncore *uncore = rps_to_uncore(rps);
- intel_uncore_write(uncore, GEN6_RP_INTERRUPT_LIMITS,
- rps_limits(rps, val));
+ set(uncore,
+ GEN6_RP_INTERRUPT_LIMITS, rps_limits(rps, val));
- intel_uncore_write(uncore, GEN6_PMINTRMSK,
- rps_pm_mask(rps, val));
+ set(uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, val));
}
}
- if (err == 0)
- rps->cur_freq = val;
-
- return err;
+ rps->cur_freq = val;
+ return 0;
}
static void gen6_rps_init(struct intel_rps *rps)
@@ -878,7 +877,7 @@ static bool rps_reset(struct intel_rps *rps)
rps->power.mode = -1;
rps->last_freq = -1;
- if (rps_set(rps, rps->min_freq)) {
+ if (rps_set(rps, rps->min_freq, true)) {
DRM_ERROR("Failed to reset RPS to initial values\n");
return false;
}
@@ -1201,7 +1200,7 @@ void intel_rps_enable(struct intel_rps *rps)
static void gen6_rps_disable(struct intel_rps *rps)
{
- intel_uncore_write(rps_to_uncore(rps), GEN6_RP_CONTROL, 0);
+ set(rps_to_uncore(rps), GEN6_RP_CONTROL, 0);
}
void intel_rps_disable(struct intel_rps *rps)
@@ -1566,7 +1565,7 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
return;
if (pm_iir & PM_VEBOX_USER_INTERRUPT)
- intel_engine_breadcrumbs_irq(gt->engine[VECS0]);
+ intel_engine_signal_breadcrumbs(gt->engine[VECS0]);
if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
@@ -1663,23 +1662,53 @@ void intel_rps_init(struct intel_rps *rps)
if (INTEL_GEN(i915) <= 7)
rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED;
- if (INTEL_GEN(i915) >= 8)
+ if (INTEL_GEN(i915) >= 8 && INTEL_GEN(i915) < 11)
rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
}
-u32 intel_get_cagf(struct intel_rps *rps, u32 rpstat)
+u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 cagf;
- if (INTEL_GEN(i915) >= 9)
+ if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+ cagf = (rpstat >> 8) & 0xff;
+ else if (INTEL_GEN(i915) >= 9)
cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
else
cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
- return cagf;
+ return cagf;
+}
+
+static u32 read_cagf(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 freq;
+
+ if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
+ vlv_punit_get(i915);
+ freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
+ vlv_punit_put(i915);
+ } else {
+ freq = intel_uncore_read(rps_to_gt(rps)->uncore, GEN6_RPSTAT1);
+ }
+
+ return intel_rps_get_cagf(rps, freq);
+}
+
+u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
+{
+ struct intel_runtime_pm *rpm = rps_to_gt(rps)->uncore->rpm;
+ intel_wakeref_t wakeref;
+ u32 freq = 0;
+
+ with_intel_runtime_pm_if_in_use(rpm, wakeref)
+ freq = intel_gpu_freq(rps, read_cagf(rps));
+
+ return freq;
}
/* External interface for intel_ips.ko */
@@ -1715,6 +1744,7 @@ void intel_rps_driver_register(struct intel_rps *rps)
* set up, to avoid intel-ips sneaking in and reading bogus values.
*/
if (IS_GEN(gt->i915, 5)) {
+ GEM_BUG_ON(ips_mchdev);
rcu_assign_pointer(ips_mchdev, gt->i915);
ips_ping_for_i915_load();
}
@@ -1722,7 +1752,8 @@ void intel_rps_driver_register(struct intel_rps *rps)
void intel_rps_driver_unregister(struct intel_rps *rps)
{
- rcu_assign_pointer(ips_mchdev, NULL);
+ if (rcu_access_pointer(ips_mchdev) == rps_to_i915(rps))
+ rcu_assign_pointer(ips_mchdev, NULL);
}
static struct drm_i915_private *mchdev_get(void)
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h
index 9518c66c9792..dfa98194f3b2 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps.h
@@ -29,7 +29,8 @@ void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive);
int intel_gpu_freq(struct intel_rps *rps, int val);
int intel_freq_opcode(struct intel_rps *rps, int val);
-u32 intel_get_cagf(struct intel_rps *rps, u32 rpstat1);
+u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1);
+u32 intel_rps_read_actual_frequency(struct intel_rps *rps);
void gen5_rps_irq_handler(struct intel_rps *rps);
void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 649798c184fb..87716529cd2f 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -15,6 +15,9 @@
#define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
#define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
+#define CACHELINE_BITS 6
+#define CACHELINE_FREE CACHELINE_BITS
+
struct intel_timeline_hwsp {
struct intel_gt *gt;
struct intel_gt_timelines *gt_timelines;
@@ -23,14 +26,6 @@ struct intel_timeline_hwsp {
u64 free_bitmap;
};
-struct intel_timeline_cacheline {
- struct i915_active active;
- struct intel_timeline_hwsp *hwsp;
- void *vaddr;
-#define CACHELINE_BITS 6
-#define CACHELINE_FREE CACHELINE_BITS
-};
-
static struct i915_vma *__hwsp_alloc(struct intel_gt *gt)
{
struct drm_i915_private *i915 = gt->i915;
@@ -133,7 +128,7 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
__idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
i915_active_fini(&cl->active);
- kfree(cl);
+ kfree_rcu(cl, rcu);
}
__i915_active_call
@@ -254,7 +249,7 @@ int intel_timeline_init(struct intel_timeline *timeline,
mutex_init(&timeline->mutex);
- INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex);
+ INIT_ACTIVE_FENCE(&timeline->last_request);
INIT_LIST_HEAD(&timeline->requests);
i915_syncmap_init(&timeline->sync);
@@ -262,7 +257,7 @@ int intel_timeline_init(struct intel_timeline *timeline,
return 0;
}
-static void timelines_init(struct intel_gt *gt)
+void intel_gt_init_timelines(struct intel_gt *gt)
{
struct intel_gt_timelines *timelines = &gt->timelines;
@@ -273,11 +268,6 @@ static void timelines_init(struct intel_gt *gt)
INIT_LIST_HEAD(&timelines->hwsp_free_list);
}
-void intel_timelines_init(struct drm_i915_private *i915)
-{
- timelines_init(&i915->gt);
-}
-
void intel_timeline_fini(struct intel_timeline *timeline)
{
GEM_BUG_ON(atomic_read(&timeline->pin_count));
@@ -338,7 +328,6 @@ int intel_timeline_pin(struct intel_timeline *tl)
void intel_timeline_enter(struct intel_timeline *tl)
{
struct intel_gt_timelines *timelines = &tl->gt->timelines;
- unsigned long flags;
/*
* Pretend we are serialised by the timeline->mutex.
@@ -359,21 +348,19 @@ void intel_timeline_enter(struct intel_timeline *tl)
* use atomic to manipulate tl->active_count.
*/
lockdep_assert_held(&tl->mutex);
- GEM_BUG_ON(!atomic_read(&tl->pin_count));
if (atomic_add_unless(&tl->active_count, 1, 0))
return;
- spin_lock_irqsave(&timelines->lock, flags);
+ spin_lock(&timelines->lock);
if (!atomic_fetch_inc(&tl->active_count))
list_add_tail(&tl->link, &timelines->active_list);
- spin_unlock_irqrestore(&timelines->lock, flags);
+ spin_unlock(&timelines->lock);
}
void intel_timeline_exit(struct intel_timeline *tl)
{
struct intel_gt_timelines *timelines = &tl->gt->timelines;
- unsigned long flags;
/* See intel_timeline_enter() */
lockdep_assert_held(&tl->mutex);
@@ -382,10 +369,10 @@ void intel_timeline_exit(struct intel_timeline *tl)
if (atomic_add_unless(&tl->active_count, -1, 1))
return;
- spin_lock_irqsave(&timelines->lock, flags);
+ spin_lock(&timelines->lock);
if (atomic_dec_and_test(&tl->active_count))
list_del(&tl->link);
- spin_unlock_irqrestore(&timelines->lock, flags);
+ spin_unlock(&timelines->lock);
/*
* Since this timeline is idle, all bariers upon which we were waiting
@@ -521,46 +508,35 @@ int intel_timeline_read_hwsp(struct i915_request *from,
struct i915_request *to,
u32 *hwsp)
{
- struct intel_timeline *tl;
+ struct intel_timeline_cacheline *cl;
int err;
+ GEM_BUG_ON(!rcu_access_pointer(from->hwsp_cacheline));
+
rcu_read_lock();
- tl = rcu_dereference(from->timeline);
- if (i915_request_completed(from) || !kref_get_unless_zero(&tl->kref))
- tl = NULL;
+ cl = rcu_dereference(from->hwsp_cacheline);
+ if (unlikely(!i915_active_acquire_if_busy(&cl->active)))
+ goto unlock; /* seqno wrapped and completed! */
+ if (unlikely(i915_request_completed(from)))
+ goto release;
rcu_read_unlock();
- if (!tl) /* already completed */
- return 1;
-
- GEM_BUG_ON(rcu_access_pointer(to->timeline) == tl);
-
- err = -EBUSY;
- if (mutex_trylock(&tl->mutex)) {
- struct intel_timeline_cacheline *cl = from->hwsp_cacheline;
- if (i915_request_completed(from)) {
- err = 1;
- goto unlock;
- }
+ err = cacheline_ref(cl, to);
+ if (err)
+ goto out;
- err = cacheline_ref(cl, to);
- if (err)
- goto unlock;
+ *hwsp = i915_ggtt_offset(cl->hwsp->vma) +
+ ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES;
- if (likely(cl == tl->hwsp_cacheline)) {
- *hwsp = tl->hwsp_offset;
- } else { /* across a seqno wrap, recover the original offset */
- *hwsp = i915_ggtt_offset(cl->hwsp->vma) +
- ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) *
- CACHELINE_BYTES;
- }
+out:
+ i915_active_release(&cl->active);
+ return err;
+release:
+ i915_active_release(&cl->active);
unlock:
- mutex_unlock(&tl->mutex);
- }
- intel_timeline_put(tl);
-
- return err;
+ rcu_read_unlock();
+ return 1;
}
void intel_timeline_unpin(struct intel_timeline *tl)
@@ -583,7 +559,7 @@ void __intel_timeline_free(struct kref *kref)
kfree_rcu(timeline, rcu);
}
-static void timelines_fini(struct intel_gt *gt)
+void intel_gt_fini_timelines(struct intel_gt *gt)
{
struct intel_gt_timelines *timelines = &gt->timelines;
@@ -591,11 +567,6 @@ static void timelines_fini(struct intel_gt *gt)
GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list));
}
-void intel_timelines_fini(struct drm_i915_private *i915)
-{
- timelines_fini(&i915->gt);
-}
-
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "gt/selftests/mock_timeline.c"
#include "gt/selftest_timeline.c"
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h
index f583af1ba18d..f5b7eade3809 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.h
@@ -88,7 +88,7 @@ int intel_timeline_read_hwsp(struct i915_request *from,
struct i915_request *until,
u32 *hwsp_offset);
-void intel_timelines_init(struct drm_i915_private *i915);
-void intel_timelines_fini(struct drm_i915_private *i915);
+void intel_gt_init_timelines(struct intel_gt *gt);
+void intel_gt_fini_timelines(struct intel_gt *gt);
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
index aaf15cbe1ce1..02181c5020db 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
@@ -10,14 +10,15 @@
#include <linux/list.h>
#include <linux/kref.h>
#include <linux/mutex.h>
+#include <linux/rcupdate.h>
#include <linux/types.h>
#include "i915_active_types.h"
-struct drm_i915_private;
struct i915_vma;
-struct intel_timeline_cacheline;
struct i915_syncmap;
+struct intel_gt;
+struct intel_timeline_hwsp;
struct intel_timeline {
u64 fence_context;
@@ -87,4 +88,13 @@ struct intel_timeline {
struct rcu_head rcu;
};
+struct intel_timeline_cacheline {
+ struct i915_active active;
+
+ struct intel_timeline_hwsp *hwsp;
+ void *vaddr;
+
+ struct rcu_head rcu;
+};
+
#endif /* __I915_TIMELINE_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index e4bccc14602f..4e292d4bf7b9 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -6,6 +6,7 @@
#include "i915_drv.h"
#include "intel_context.h"
+#include "intel_engine_pm.h"
#include "intel_gt.h"
#include "intel_ring.h"
#include "intel_workarounds.h"
@@ -146,21 +147,27 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
}
}
-static void
-wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
- u32 val)
+static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
+ u32 val, u32 read_mask)
{
struct i915_wa wa = {
.reg = reg,
.mask = mask,
.val = val,
- .read = mask,
+ .read = read_mask,
};
_wa_add(wal, &wa);
}
static void
+wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
+ u32 val)
+{
+ wa_add(wal, reg, mask, val, mask);
+}
+
+static void
wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
{
wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
@@ -247,7 +254,7 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
/* WaDisableDopClockGating:bdw
*
- * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
+ * Also see the related UCGTCL1 write in bdw_init_clock_gating()
* to disable EUTC clock gating.
*/
WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
@@ -568,9 +575,24 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
+ u32 val;
+
/* Wa_1409142259:tgl */
WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
+
+ /* Wa_1604555607:tgl */
+ val = intel_uncore_read(engine->uncore, FF_MODE2);
+ val &= ~FF_MODE2_TDS_TIMER_MASK;
+ val |= FF_MODE2_TDS_TIMER_128;
+ /*
+ * FIXME: FF_MODE2 register is not readable till TGL B0. We can
+ * enable verification of WA from the later steppings, which enables
+ * the read of FF_MODE2.
+ */
+ wa_add(wal, FF_MODE2, FF_MODE2_TDS_TIMER_MASK, val,
+ IS_TGL_REVID(engine->i915, TGL_REVID_A0, TGL_REVID_A0) ? 0 :
+ FF_MODE2_TDS_TIMER_MASK);
}
static void
@@ -1315,6 +1337,14 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN6_RC_SLEEP_PSMI_CONTROL,
GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
GEN8_RC_SEMA_IDLE_MSG_DISABLE);
+
+ /*
+ * Wa_1606679103:tgl
+ * (see also Wa_1606682166:icl)
+ */
+ wa_write_or(wal,
+ GEN7_SARCHKMD,
+ GEN7_DISABLE_SAMPLER_PREFETCH);
}
if (IS_GEN(i915, 11)) {
@@ -1574,7 +1604,9 @@ static int engine_wa_list_verify(struct intel_context *ce,
if (IS_ERR(vma))
return PTR_ERR(vma);
+ intel_engine_pm_get(ce->engine);
rq = intel_context_create_request(ce);
+ intel_engine_pm_put(ce->engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_vma;
@@ -1584,16 +1616,17 @@ static int engine_wa_list_verify(struct intel_context *ce,
if (err)
goto err_vma;
+ i915_request_get(rq);
i915_request_add(rq);
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
err = -ETIME;
- goto err_vma;
+ goto err_rq;
}
results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
if (IS_ERR(results)) {
err = PTR_ERR(results);
- goto err_vma;
+ goto err_rq;
}
err = 0;
@@ -1607,6 +1640,8 @@ static int engine_wa_list_verify(struct intel_context *ce,
i915_gem_object_unpin_map(vma->obj);
+err_rq:
+ i915_request_put(rq);
err_vma:
i915_vma_unpin(vma);
i915_vma_put(vma);
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 83f549d203a0..a560b7eee2cd 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -77,7 +77,7 @@ static void advance(struct i915_request *request)
i915_request_mark_complete(request);
GEM_BUG_ON(!i915_request_completed(request));
- intel_engine_queue_breadcrumbs(request->engine);
+ intel_engine_signal_breadcrumbs(request->engine);
}
static void hw_delay_complete(struct timer_list *t)
@@ -149,7 +149,11 @@ static int mock_context_alloc(struct intel_context *ce)
static int mock_context_pin(struct intel_context *ce)
{
- return intel_context_active_acquire(ce);
+ return 0;
+}
+
+static void mock_context_reset(struct intel_context *ce)
+{
}
static const struct intel_context_ops mock_context_ops = {
@@ -161,6 +165,7 @@ static const struct intel_context_ops mock_context_ops = {
.enter = intel_context_enter_engine,
.exit = intel_context_exit_engine,
+ .reset = mock_context_reset,
.destroy = mock_context_destroy,
};
@@ -207,16 +212,12 @@ static void mock_reset_prepare(struct intel_engine_cs *engine)
{
}
-static void mock_reset(struct intel_engine_cs *engine, bool stalled)
+static void mock_reset_rewind(struct intel_engine_cs *engine, bool stalled)
{
GEM_BUG_ON(stalled);
}
-static void mock_reset_finish(struct intel_engine_cs *engine)
-{
-}
-
-static void mock_cancel_requests(struct intel_engine_cs *engine)
+static void mock_reset_cancel(struct intel_engine_cs *engine)
{
struct i915_request *request;
unsigned long flags;
@@ -234,6 +235,24 @@ static void mock_cancel_requests(struct intel_engine_cs *engine)
spin_unlock_irqrestore(&engine->active.lock, flags);
}
+static void mock_reset_finish(struct intel_engine_cs *engine)
+{
+}
+
+static void mock_engine_release(struct intel_engine_cs *engine)
+{
+ struct mock_engine *mock =
+ container_of(engine, typeof(*mock), base);
+
+ GEM_BUG_ON(timer_pending(&mock->hw_delay));
+
+ intel_context_unpin(engine->kernel_context);
+ intel_context_put(engine->kernel_context);
+
+ intel_engine_fini_retire(engine);
+ intel_engine_fini_breadcrumbs(engine);
+}
+
struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
const char *name,
int id)
@@ -265,9 +284,11 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
engine->base.submit_request = mock_submit_request;
engine->base.reset.prepare = mock_reset_prepare;
- engine->base.reset.reset = mock_reset;
+ engine->base.reset.rewind = mock_reset_rewind;
+ engine->base.reset.cancel = mock_reset_cancel;
engine->base.reset.finish = mock_reset_finish;
- engine->base.cancel_requests = mock_cancel_requests;
+
+ engine->base.release = mock_engine_release;
i915->gt.engine[id] = &engine->base;
i915->gt.engine_class[0][id] = &engine->base;
@@ -290,6 +311,7 @@ int mock_engine_init(struct intel_engine_cs *engine)
intel_engine_init_breadcrumbs(engine);
intel_engine_init_execlists(engine);
intel_engine_init__pm(engine);
+ intel_engine_init_retire(engine);
intel_engine_pool_init(&engine->pool);
ce = create_kernel_context(engine);
@@ -321,18 +343,3 @@ void mock_engine_flush(struct intel_engine_cs *engine)
void mock_engine_reset(struct intel_engine_cs *engine)
{
}
-
-void mock_engine_free(struct intel_engine_cs *engine)
-{
- struct mock_engine *mock =
- container_of(engine, typeof(*mock), base);
-
- GEM_BUG_ON(timer_pending(&mock->hw_delay));
-
- intel_context_unpin(engine->kernel_context);
- intel_context_put(engine->kernel_context);
-
- intel_engine_fini_breadcrumbs(engine);
-
- kfree(engine);
-}
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index bc720defc6b8..e874dfaa5316 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -5,6 +5,7 @@
*/
#include "i915_selftest.h"
+#include "intel_engine_heartbeat.h"
#include "intel_engine_pm.h"
#include "intel_gt.h"
@@ -47,35 +48,36 @@ static int context_sync(struct intel_context *ce)
mutex_lock(&tl->mutex);
do {
- struct dma_fence *fence;
+ struct i915_request *rq;
long timeout;
- fence = i915_active_fence_get(&tl->last_request);
- if (!fence)
+ if (list_empty(&tl->requests))
break;
- timeout = dma_fence_wait_timeout(fence, false, HZ / 10);
+ rq = list_last_entry(&tl->requests, typeof(*rq), link);
+ i915_request_get(rq);
+
+ timeout = i915_request_wait(rq, 0, HZ / 10);
if (timeout < 0)
err = timeout;
else
- i915_request_retire_upto(to_request(fence));
+ i915_request_retire_upto(rq);
- dma_fence_put(fence);
+ i915_request_put(rq);
} while (!err);
mutex_unlock(&tl->mutex);
return err;
}
-static int __live_context_size(struct intel_engine_cs *engine,
- struct i915_gem_context *fixme)
+static int __live_context_size(struct intel_engine_cs *engine)
{
struct intel_context *ce;
struct i915_request *rq;
void *vaddr;
int err;
- ce = intel_context_create(fixme, engine);
+ ce = intel_context_create(engine);
if (IS_ERR(ce))
return PTR_ERR(ce);
@@ -118,7 +120,7 @@ static int __live_context_size(struct intel_engine_cs *engine,
goto err_unpin;
/* Force the context switch */
- rq = i915_request_create(engine->kernel_context);
+ rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_unpin;
@@ -143,7 +145,6 @@ static int live_context_size(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
- struct i915_gem_context *fixme;
enum intel_engine_id id;
int err = 0;
@@ -152,10 +153,6 @@ static int live_context_size(void *arg)
* HW tries to write past the end of one.
*/
- fixme = kernel_context(gt->i915);
- if (IS_ERR(fixme))
- return PTR_ERR(fixme);
-
for_each_engine(engine, gt, id) {
struct {
struct drm_i915_gem_object *state;
@@ -180,7 +177,7 @@ static int live_context_size(void *arg)
/* Overlaps with the execlists redzone */
engine->context_size += I915_GTT_PAGE_SIZE;
- err = __live_context_size(engine, fixme);
+ err = __live_context_size(engine);
engine->context_size -= I915_GTT_PAGE_SIZE;
@@ -193,13 +190,12 @@ static int live_context_size(void *arg)
break;
}
- kernel_context_close(fixme);
return err;
}
-static int __live_active_context(struct intel_engine_cs *engine,
- struct i915_gem_context *fixme)
+static int __live_active_context(struct intel_engine_cs *engine)
{
+ unsigned long saved_heartbeat;
struct intel_context *ce;
int pass;
int err;
@@ -223,40 +219,55 @@ static int __live_active_context(struct intel_engine_cs *engine,
return -EINVAL;
}
- ce = intel_context_create(fixme, engine);
+ ce = intel_context_create(engine);
if (IS_ERR(ce))
return PTR_ERR(ce);
+ saved_heartbeat = engine->props.heartbeat_interval_ms;
+ engine->props.heartbeat_interval_ms = 0;
+
for (pass = 0; pass <= 2; pass++) {
struct i915_request *rq;
+ intel_engine_pm_get(engine);
+
rq = intel_context_create_request(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
- goto err;
+ goto out_engine;
}
err = request_sync(rq);
if (err)
- goto err;
+ goto out_engine;
/* Context will be kept active until after an idle-barrier. */
if (i915_active_is_idle(&ce->active)) {
pr_err("context is not active; expected idle-barrier (%s pass %d)\n",
engine->name, pass);
err = -EINVAL;
- goto err;
+ goto out_engine;
}
if (!intel_engine_pm_is_awake(engine)) {
pr_err("%s is asleep before idle-barrier\n",
engine->name);
err = -EINVAL;
- goto err;
+ goto out_engine;
}
+
+out_engine:
+ intel_engine_pm_put(engine);
+ if (err)
+ goto err;
}
/* Now make sure our idle-barriers are flushed */
+ err = intel_engine_flush_barriers(engine);
+ if (err)
+ goto err;
+
+ /* Wait for the barrier and in the process wait for engine to park */
err = context_sync(engine->kernel_context);
if (err)
goto err;
@@ -266,12 +277,15 @@ static int __live_active_context(struct intel_engine_cs *engine,
err = -EINVAL;
}
+ intel_engine_pm_flush(engine);
+
if (intel_engine_pm_is_awake(engine)) {
struct drm_printer p = drm_debug_printer(__func__);
intel_engine_dump(engine, &p,
- "%s is still awake after idle-barriers\n",
- engine->name);
+ "%s is still awake:%d after idle-barriers\n",
+ engine->name,
+ atomic_read(&engine->wakeref.count));
GEM_TRACE_DUMP();
err = -EINVAL;
@@ -279,6 +293,7 @@ static int __live_active_context(struct intel_engine_cs *engine,
}
err:
+ engine->props.heartbeat_interval_ms = saved_heartbeat;
intel_context_put(ce);
return err;
}
@@ -287,23 +302,11 @@ static int live_active_context(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
- struct i915_gem_context *fixme;
enum intel_engine_id id;
- struct drm_file *file;
int err = 0;
- file = mock_file(gt->i915);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- fixme = live_context(gt->i915, file);
- if (IS_ERR(fixme)) {
- err = PTR_ERR(fixme);
- goto out_file;
- }
-
for_each_engine(engine, gt, id) {
- err = __live_active_context(engine, fixme);
+ err = __live_active_context(engine);
if (err)
break;
@@ -312,8 +315,6 @@ static int live_active_context(void *arg)
break;
}
-out_file:
- mock_file_free(gt->i915, file);
return err;
}
@@ -345,10 +346,10 @@ unpin:
return err;
}
-static int __live_remote_context(struct intel_engine_cs *engine,
- struct i915_gem_context *fixme)
+static int __live_remote_context(struct intel_engine_cs *engine)
{
struct intel_context *local, *remote;
+ unsigned long saved_heartbeat;
int pass;
int err;
@@ -360,16 +361,26 @@ static int __live_remote_context(struct intel_engine_cs *engine,
* clobber the idle-barrier.
*/
- remote = intel_context_create(fixme, engine);
+ if (intel_engine_pm_is_awake(engine)) {
+ pr_err("%s is awake before starting %s!\n",
+ engine->name, __func__);
+ return -EINVAL;
+ }
+
+ remote = intel_context_create(engine);
if (IS_ERR(remote))
return PTR_ERR(remote);
- local = intel_context_create(fixme, engine);
+ local = intel_context_create(engine);
if (IS_ERR(local)) {
err = PTR_ERR(local);
goto err_remote;
}
+ saved_heartbeat = engine->props.heartbeat_interval_ms;
+ engine->props.heartbeat_interval_ms = 0;
+ intel_engine_pm_get(engine);
+
for (pass = 0; pass <= 2; pass++) {
err = __remote_sync(local, remote);
if (err)
@@ -387,6 +398,9 @@ static int __live_remote_context(struct intel_engine_cs *engine,
}
}
+ intel_engine_pm_put(engine);
+ engine->props.heartbeat_interval_ms = saved_heartbeat;
+
intel_context_put(local);
err_remote:
intel_context_put(remote);
@@ -397,23 +411,11 @@ static int live_remote_context(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
- struct i915_gem_context *fixme;
enum intel_engine_id id;
- struct drm_file *file;
int err = 0;
- file = mock_file(gt->i915);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- fixme = live_context(gt->i915, file);
- if (IS_ERR(fixme)) {
- err = PTR_ERR(fixme);
- goto out_file;
- }
-
for_each_engine(engine, gt, id) {
- err = __live_remote_context(engine, fixme);
+ err = __live_remote_context(engine);
if (err)
break;
@@ -422,8 +424,6 @@ static int live_remote_context(void *arg)
break;
}
-out_file:
- mock_file_free(gt->i915, file);
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
index 3880f07c29b8..f88e445a1cae 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
@@ -4,7 +4,365 @@
* Copyright © 2018 Intel Corporation
*/
-#include "../i915_selftest.h"
+#include <linux/sort.h>
+
+#include "intel_gt_pm.h"
+#include "intel_rps.h"
+
+#include "i915_selftest.h"
+#include "selftests/igt_flush_test.h"
+
+#define COUNT 5
+
+static int cmp_u32(const void *A, const void *B)
+{
+ const u32 *a = A, *b = B;
+
+ return *a - *b;
+}
+
+static void perf_begin(struct intel_gt *gt)
+{
+ intel_gt_pm_get(gt);
+
+ /* Boost gpufreq to max [waitboost] and keep it fixed */
+ atomic_inc(&gt->rps.num_waiters);
+ schedule_work(&gt->rps.work);
+ flush_work(&gt->rps.work);
+}
+
+static int perf_end(struct intel_gt *gt)
+{
+ atomic_dec(&gt->rps.num_waiters);
+ intel_gt_pm_put(gt);
+
+ return igt_flush_test(gt->i915);
+}
+
+static int write_timestamp(struct i915_request *rq, int slot)
+{
+ u32 cmd;
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
+ if (INTEL_GEN(rq->i915) >= 8)
+ cmd++;
+ *cs++ = cmd;
+ *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
+ *cs++ = i915_request_timeline(rq)->hwsp_offset + slot * sizeof(u32);
+ *cs++ = 0;
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static struct i915_vma *create_empty_batch(struct intel_context *ce)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ u32 *cs;
+ int err;
+
+ obj = i915_gem_object_create_internal(ce->engine->i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_put;
+ }
+
+ cs[0] = MI_BATCH_BUFFER_END;
+
+ i915_gem_object_flush_map(obj);
+
+ vma = i915_vma_instance(obj, ce->vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_unpin;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err)
+ goto err_unpin;
+
+ i915_gem_object_unpin_map(obj);
+ return vma;
+
+err_unpin:
+ i915_gem_object_unpin_map(obj);
+err_put:
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
+}
+
+static u32 trifilter(u32 *a)
+{
+ u64 sum;
+
+ sort(a, COUNT, sizeof(*a), cmp_u32, NULL);
+
+ sum = mul_u32_u32(a[2], 2);
+ sum += a[1];
+ sum += a[3];
+
+ return sum >> 2;
+}
+
+static int perf_mi_bb_start(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */
+ return 0;
+
+ perf_begin(gt);
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce = engine->kernel_context;
+ struct i915_vma *batch;
+ u32 cycles[COUNT];
+ int i;
+
+ intel_engine_pm_get(engine);
+
+ batch = create_empty_batch(ce);
+ if (IS_ERR(batch)) {
+ err = PTR_ERR(batch);
+ intel_engine_pm_put(engine);
+ break;
+ }
+
+ err = i915_vma_sync(batch);
+ if (err) {
+ intel_engine_pm_put(engine);
+ i915_vma_put(batch);
+ break;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(cycles); i++) {
+ struct i915_request *rq;
+
+ rq = i915_request_create(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ break;
+ }
+
+ err = write_timestamp(rq, 2);
+ if (err)
+ goto out;
+
+ err = rq->engine->emit_bb_start(rq,
+ batch->node.start, 8,
+ 0);
+ if (err)
+ goto out;
+
+ err = write_timestamp(rq, 3);
+ if (err)
+ goto out;
+
+out:
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0)
+ err = -EIO;
+ i915_request_put(rq);
+ if (err)
+ break;
+
+ cycles[i] = rq->hwsp_seqno[3] - rq->hwsp_seqno[2];
+ }
+ i915_vma_put(batch);
+ intel_engine_pm_put(engine);
+ if (err)
+ break;
+
+ pr_info("%s: MI_BB_START cycles: %u\n",
+ engine->name, trifilter(cycles));
+ }
+ if (perf_end(gt))
+ err = -EIO;
+
+ return err;
+}
+
+static struct i915_vma *create_nop_batch(struct intel_context *ce)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ u32 *cs;
+ int err;
+
+ obj = i915_gem_object_create_internal(ce->engine->i915, SZ_64K);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_put;
+ }
+
+ memset(cs, 0, SZ_64K);
+ cs[SZ_64K / sizeof(*cs) - 1] = MI_BATCH_BUFFER_END;
+
+ i915_gem_object_flush_map(obj);
+
+ vma = i915_vma_instance(obj, ce->vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_unpin;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err)
+ goto err_unpin;
+
+ i915_gem_object_unpin_map(obj);
+ return vma;
+
+err_unpin:
+ i915_gem_object_unpin_map(obj);
+err_put:
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
+}
+
+static int perf_mi_noop(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */
+ return 0;
+
+ perf_begin(gt);
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce = engine->kernel_context;
+ struct i915_vma *base, *nop;
+ u32 cycles[COUNT];
+ int i;
+
+ intel_engine_pm_get(engine);
+
+ base = create_empty_batch(ce);
+ if (IS_ERR(base)) {
+ err = PTR_ERR(base);
+ intel_engine_pm_put(engine);
+ break;
+ }
+
+ err = i915_vma_sync(base);
+ if (err) {
+ i915_vma_put(base);
+ intel_engine_pm_put(engine);
+ break;
+ }
+
+ nop = create_nop_batch(ce);
+ if (IS_ERR(nop)) {
+ err = PTR_ERR(nop);
+ i915_vma_put(base);
+ intel_engine_pm_put(engine);
+ break;
+ }
+
+ err = i915_vma_sync(nop);
+ if (err) {
+ i915_vma_put(nop);
+ i915_vma_put(base);
+ intel_engine_pm_put(engine);
+ break;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(cycles); i++) {
+ struct i915_request *rq;
+
+ rq = i915_request_create(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ break;
+ }
+
+ err = write_timestamp(rq, 2);
+ if (err)
+ goto out;
+
+ err = rq->engine->emit_bb_start(rq,
+ base->node.start, 8,
+ 0);
+ if (err)
+ goto out;
+
+ err = write_timestamp(rq, 3);
+ if (err)
+ goto out;
+
+ err = rq->engine->emit_bb_start(rq,
+ nop->node.start,
+ nop->node.size,
+ 0);
+ if (err)
+ goto out;
+
+ err = write_timestamp(rq, 4);
+ if (err)
+ goto out;
+
+out:
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0)
+ err = -EIO;
+ i915_request_put(rq);
+ if (err)
+ break;
+
+ cycles[i] =
+ (rq->hwsp_seqno[4] - rq->hwsp_seqno[3]) -
+ (rq->hwsp_seqno[3] - rq->hwsp_seqno[2]);
+ }
+ i915_vma_put(nop);
+ i915_vma_put(base);
+ intel_engine_pm_put(engine);
+ if (err)
+ break;
+
+ pr_info("%s: 16K MI_NOOP cycles: %u\n",
+ engine->name, trifilter(cycles));
+ }
+ if (perf_end(gt))
+ err = -EIO;
+
+ return err;
+}
+
+int intel_engine_cs_perf_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(perf_mi_bb_start),
+ SUBTEST(perf_mi_noop),
+ };
+
+ if (intel_gt_is_wedged(&i915->gt))
+ return 0;
+
+ return intel_gt_live_subtests(tests, &i915->gt);
+}
static int intel_mmio_bases_check(void *arg)
{
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
index e864406bd2d9..43d4d589749f 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -11,6 +11,28 @@
#include "intel_gt_requests.h"
#include "i915_selftest.h"
+static int timeline_sync(struct intel_timeline *tl)
+{
+ struct dma_fence *fence;
+ long timeout;
+
+ fence = i915_active_fence_get(&tl->last_request);
+ if (!fence)
+ return 0;
+
+ timeout = dma_fence_wait_timeout(fence, true, HZ / 2);
+ dma_fence_put(fence);
+ if (timeout < 0)
+ return timeout;
+
+ return 0;
+}
+
+static int engine_sync_barrier(struct intel_engine_cs *engine)
+{
+ return timeline_sync(engine->kernel_context->timeline);
+}
+
struct pulse {
struct i915_active active;
struct kref kref;
@@ -53,9 +75,7 @@ static struct pulse *pulse_create(void)
static void pulse_unlock_wait(struct pulse *p)
{
- mutex_lock(&p->active.mutex);
- mutex_unlock(&p->active.mutex);
- flush_work(&p->active.work);
+ i915_active_unlock_wait(&p->active);
}
static int __live_idle_pulse(struct intel_engine_cs *engine,
@@ -92,7 +112,12 @@ static int __live_idle_pulse(struct intel_engine_cs *engine,
GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
- if (intel_gt_retire_requests_timeout(engine->gt, HZ / 5)) {
+ if (engine_sync_barrier(engine)) {
+ struct drm_printer m = drm_err_printer("pulse");
+
+ pr_err("%s: no heartbeat pulse?\n", engine->name);
+ intel_engine_dump(engine, &m, "%s", engine->name);
+
err = -ETIME;
goto out;
}
@@ -175,8 +200,7 @@ static int __live_heartbeat_fast(struct intel_engine_cs *engine)
int err;
int i;
- ce = intel_context_create(engine->kernel_context->gem_context,
- engine);
+ ce = intel_context_create(engine);
if (IS_ERR(ce))
return PTR_ERR(ce);
diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
index d1752f15702a..09ff8e4f88af 100644
--- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
@@ -6,6 +6,7 @@
*/
#include "selftest_llc.h"
+#include "selftest_rc6.h"
static int live_gt_resume(void *arg)
{
@@ -50,6 +51,7 @@ static int live_gt_resume(void *arg)
int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
+ SUBTEST(live_rc6_manual),
SUBTEST(live_gt_resume),
};
@@ -58,3 +60,20 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
return intel_gt_live_subtests(tests, &i915->gt);
}
+
+int intel_gt_pm_late_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ /*
+ * These tests may leave the system in an undesirable state.
+ * They are intended to be run last in CI and the system
+ * rebooted afterwards.
+ */
+ SUBTEST(live_rc6_ctx_wa),
+ };
+
+ if (intel_gt_is_wedged(&i915->gt))
+ return 0;
+
+ return intel_gt_live_subtests(tests, &i915->gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 85e9ccf5c304..3e5e6c86e843 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -25,7 +25,9 @@
#include <linux/kthread.h>
#include "gem/i915_gem_context.h"
-#include "gt/intel_gt.h"
+
+#include "intel_gt.h"
+#include "intel_engine_heartbeat.h"
#include "intel_engine_pm.h"
#include "i915_selftest.h"
@@ -308,6 +310,24 @@ static bool wait_until_running(struct hang *h, struct i915_request *rq)
1000));
}
+static void engine_heartbeat_disable(struct intel_engine_cs *engine,
+ unsigned long *saved)
+{
+ *saved = engine->props.heartbeat_interval_ms;
+ engine->props.heartbeat_interval_ms = 0;
+
+ intel_engine_pm_get(engine);
+ intel_engine_park_heartbeat(engine);
+}
+
+static void engine_heartbeat_enable(struct intel_engine_cs *engine,
+ unsigned long saved)
+{
+ intel_engine_pm_put(engine);
+
+ engine->props.heartbeat_interval_ms = saved;
+}
+
static int igt_hang_sanitycheck(void *arg)
{
struct intel_gt *gt = arg;
@@ -377,36 +397,30 @@ static int igt_reset_nop(void *arg)
struct intel_gt *gt = arg;
struct i915_gpu_error *global = &gt->i915->gpu_error;
struct intel_engine_cs *engine;
- struct i915_gem_context *ctx;
unsigned int reset_count, count;
enum intel_engine_id id;
- struct drm_file *file;
IGT_TIMEOUT(end_time);
int err = 0;
/* Check that we can reset during non-user portions of requests */
- file = mock_file(gt->i915);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- ctx = live_context(gt->i915, file);
- if (IS_ERR(ctx)) {
- err = PTR_ERR(ctx);
- goto out;
- }
-
- i915_gem_context_clear_bannable(ctx);
reset_count = i915_reset_count(global);
count = 0;
do {
for_each_engine(engine, gt, id) {
+ struct intel_context *ce;
int i;
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ break;
+ }
+
for (i = 0; i < 16; i++) {
struct i915_request *rq;
- rq = igt_request_alloc(ctx, engine);
+ rq = intel_context_create_request(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
break;
@@ -414,6 +428,8 @@ static int igt_reset_nop(void *arg)
i915_request_add(rq);
}
+
+ intel_context_put(ce);
}
igt_global_reset_lock(gt);
@@ -437,10 +453,7 @@ static int igt_reset_nop(void *arg)
} while (time_before(jiffies, end_time));
pr_info("%s: %d resets\n", __func__, count);
- err = igt_flush_test(gt->i915);
-out:
- mock_file_free(gt->i915, file);
- if (intel_gt_is_wedged(gt))
+ if (igt_flush_test(gt->i915))
err = -EIO;
return err;
}
@@ -450,36 +463,29 @@ static int igt_reset_nop_engine(void *arg)
struct intel_gt *gt = arg;
struct i915_gpu_error *global = &gt->i915->gpu_error;
struct intel_engine_cs *engine;
- struct i915_gem_context *ctx;
enum intel_engine_id id;
- struct drm_file *file;
- int err = 0;
/* Check that we can engine-reset during non-user portions */
if (!intel_has_reset_engine(gt))
return 0;
- file = mock_file(gt->i915);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- ctx = live_context(gt->i915, file);
- if (IS_ERR(ctx)) {
- err = PTR_ERR(ctx);
- goto out;
- }
-
- i915_gem_context_clear_bannable(ctx);
for_each_engine(engine, gt, id) {
- unsigned int reset_count, reset_engine_count;
- unsigned int count;
+ unsigned int reset_count, reset_engine_count, count;
+ struct intel_context *ce;
+ unsigned long heartbeat;
IGT_TIMEOUT(end_time);
+ int err;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
reset_count = i915_reset_count(global);
reset_engine_count = i915_reset_engine_count(global, engine);
count = 0;
+ engine_heartbeat_disable(engine, &heartbeat);
set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
do {
int i;
@@ -494,7 +500,7 @@ static int igt_reset_nop_engine(void *arg)
for (i = 0; i < 16; i++) {
struct i915_request *rq;
- rq = igt_request_alloc(ctx, engine);
+ rq = intel_context_create_request(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
break;
@@ -523,22 +529,18 @@ static int igt_reset_nop_engine(void *arg)
}
} while (time_before(jiffies, end_time));
clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
- pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
+ engine_heartbeat_enable(engine, heartbeat);
- if (err)
- break;
+ pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
- err = igt_flush_test(gt->i915);
+ intel_context_put(ce);
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
if (err)
- break;
+ return err;
}
- err = igt_flush_test(gt->i915);
-out:
- mock_file_free(gt->i915, file);
- if (intel_gt_is_wedged(gt))
- err = -EIO;
- return err;
+ return 0;
}
static int __igt_reset_engine(struct intel_gt *gt, bool active)
@@ -562,6 +564,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
for_each_engine(engine, gt, id) {
unsigned int reset_count, reset_engine_count;
+ unsigned long heartbeat;
IGT_TIMEOUT(end_time);
if (active && !intel_engine_can_store_dword(engine))
@@ -577,7 +580,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
reset_count = i915_reset_count(global);
reset_engine_count = i915_reset_engine_count(global, engine);
- intel_engine_pm_get(engine);
+ engine_heartbeat_disable(engine, &heartbeat);
set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
do {
if (active) {
@@ -629,7 +632,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
}
} while (time_before(jiffies, end_time));
clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
- intel_engine_pm_put(engine);
+ engine_heartbeat_enable(engine, heartbeat);
if (err)
break;
@@ -699,43 +702,43 @@ static int active_engine(void *data)
struct active_engine *arg = data;
struct intel_engine_cs *engine = arg->engine;
struct i915_request *rq[8] = {};
- struct i915_gem_context *ctx[ARRAY_SIZE(rq)];
- struct drm_file *file;
- unsigned long count = 0;
+ struct intel_context *ce[ARRAY_SIZE(rq)];
+ unsigned long count;
int err = 0;
- file = mock_file(engine->i915);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- for (count = 0; count < ARRAY_SIZE(ctx); count++) {
- ctx[count] = live_context(engine->i915, file);
- if (IS_ERR(ctx[count])) {
- err = PTR_ERR(ctx[count]);
+ for (count = 0; count < ARRAY_SIZE(ce); count++) {
+ ce[count] = intel_context_create(engine);
+ if (IS_ERR(ce[count])) {
+ err = PTR_ERR(ce[count]);
while (--count)
- i915_gem_context_put(ctx[count]);
- goto err_file;
+ intel_context_put(ce[count]);
+ return err;
}
}
+ count = 0;
while (!kthread_should_stop()) {
unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1);
struct i915_request *old = rq[idx];
struct i915_request *new;
- new = igt_request_alloc(ctx[idx], engine);
+ new = intel_context_create_request(ce[idx]);
if (IS_ERR(new)) {
err = PTR_ERR(new);
break;
}
- if (arg->flags & TEST_PRIORITY)
- ctx[idx]->sched.priority =
- i915_prandom_u32_max_state(512, &prng);
-
rq[idx] = i915_request_get(new);
i915_request_add(new);
+ if (engine->schedule && arg->flags & TEST_PRIORITY) {
+ struct i915_sched_attr attr = {
+ .priority =
+ i915_prandom_u32_max_state(512, &prng),
+ };
+ engine->schedule(rq[idx], &attr);
+ }
+
err = active_request_put(old);
if (err)
break;
@@ -749,10 +752,10 @@ static int active_engine(void *data)
/* Keep the first error */
if (!err)
err = err__;
+
+ intel_context_put(ce[count]);
}
-err_file:
- mock_file_free(engine->i915, file);
return err;
}
@@ -786,6 +789,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
struct active_engine threads[I915_NUM_ENGINES] = {};
unsigned long device = i915_reset_count(global);
unsigned long count = 0, reported;
+ unsigned long heartbeat;
IGT_TIMEOUT(end_time);
if (flags & TEST_ACTIVE &&
@@ -828,7 +832,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
yield(); /* start all threads before we begin */
- intel_engine_pm_get(engine);
+ engine_heartbeat_disable(engine, &heartbeat);
set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
do {
struct i915_request *rq = NULL;
@@ -902,7 +906,8 @@ static int __igt_reset_engines(struct intel_gt *gt,
}
} while (time_before(jiffies, end_time));
clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
- intel_engine_pm_put(engine);
+ engine_heartbeat_enable(engine, heartbeat);
+
pr_info("i915_reset_engine(%s:%s): %lu resets\n",
engine->name, test_name, count);
@@ -1300,32 +1305,21 @@ static int igt_reset_evict_ggtt(void *arg)
static int igt_reset_evict_ppgtt(void *arg)
{
struct intel_gt *gt = arg;
- struct i915_gem_context *ctx;
- struct i915_address_space *vm;
- struct drm_file *file;
+ struct i915_ppgtt *ppgtt;
int err;
- file = mock_file(gt->i915);
- if (IS_ERR(file))
- return PTR_ERR(file);
+ /* aliasing == global gtt locking, covered above */
+ if (INTEL_PPGTT(gt->i915) < INTEL_PPGTT_FULL)
+ return 0;
- ctx = live_context(gt->i915, file);
- if (IS_ERR(ctx)) {
- err = PTR_ERR(ctx);
- goto out;
- }
+ ppgtt = i915_ppgtt_create(gt);
+ if (IS_ERR(ppgtt))
+ return PTR_ERR(ppgtt);
- err = 0;
- vm = i915_gem_context_get_vm_rcu(ctx);
- if (!i915_is_ggtt(vm)) {
- /* aliasing == global gtt locking, covered above */
- err = __igt_reset_evict_vma(gt, vm,
- evict_vma, EXEC_OBJECT_WRITE);
- }
- i915_vm_put(vm);
+ err = __igt_reset_evict_vma(gt, &ppgtt->vm,
+ evict_vma, EXEC_OBJECT_WRITE);
+ i915_vm_put(&ppgtt->vm);
-out:
- mock_file_free(gt->i915, file);
return err;
}
@@ -1504,7 +1498,7 @@ static int igt_handle_error(void *arg)
struct intel_engine_cs *engine = gt->engine[RCS0];
struct hang h;
struct i915_request *rq;
- struct i915_gpu_state *error;
+ struct i915_gpu_coredump *error;
int err;
/* Check that we can issue a global GPU and engine reset */
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index eb71ac2f992c..15cda024e3e4 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -50,14 +50,31 @@ static struct i915_vma *create_scratch(struct intel_gt *gt)
return vma;
}
+static void engine_heartbeat_disable(struct intel_engine_cs *engine,
+ unsigned long *saved)
+{
+ *saved = engine->props.heartbeat_interval_ms;
+ engine->props.heartbeat_interval_ms = 0;
+
+ intel_engine_pm_get(engine);
+ intel_engine_park_heartbeat(engine);
+}
+
+static void engine_heartbeat_enable(struct intel_engine_cs *engine,
+ unsigned long saved)
+{
+ intel_engine_pm_put(engine);
+
+ engine->props.heartbeat_interval_ms = saved;
+}
+
static int live_sanitycheck(void *arg)
{
struct intel_gt *gt = arg;
- struct i915_gem_engines_iter it;
- struct i915_gem_context *ctx;
- struct intel_context *ce;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
struct igt_spinner spin;
- int err = -ENOMEM;
+ int err = 0;
if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
return 0;
@@ -65,17 +82,20 @@ static int live_sanitycheck(void *arg)
if (igt_spinner_init(&spin, gt))
return -ENOMEM;
- ctx = kernel_context(gt->i915);
- if (!ctx)
- goto err_spin;
-
- for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce;
struct i915_request *rq;
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ break;
+ }
+
rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
- goto err_ctx;
+ goto out_ctx;
}
i915_request_add(rq);
@@ -84,21 +104,21 @@ static int live_sanitycheck(void *arg)
GEM_TRACE_DUMP();
intel_gt_set_wedged(gt);
err = -EIO;
- goto err_ctx;
+ goto out_ctx;
}
igt_spinner_end(&spin);
if (igt_flush_test(gt->i915)) {
err = -EIO;
- goto err_ctx;
+ goto out_ctx;
}
+
+out_ctx:
+ intel_context_put(ce);
+ if (err)
+ break;
}
- err = 0;
-err_ctx:
- i915_gem_context_unlock_engines(ctx);
- kernel_context_close(ctx);
-err_spin:
igt_spinner_fini(&spin);
return err;
}
@@ -106,7 +126,6 @@ err_spin:
static int live_unlite_restore(struct intel_gt *gt, int prio)
{
struct intel_engine_cs *engine;
- struct i915_gem_context *ctx;
enum intel_engine_id id;
struct igt_spinner spin;
int err = -ENOMEM;
@@ -119,15 +138,12 @@ static int live_unlite_restore(struct intel_gt *gt, int prio)
if (igt_spinner_init(&spin, gt))
return err;
- ctx = kernel_context(gt->i915);
- if (!ctx)
- goto err_spin;
-
err = 0;
for_each_engine(engine, gt, id) {
struct intel_context *ce[2] = {};
struct i915_request *rq[2];
struct igt_live_test t;
+ unsigned long saved;
int n;
if (prio && !intel_engine_has_preemption(engine))
@@ -140,11 +156,12 @@ static int live_unlite_restore(struct intel_gt *gt, int prio)
err = -EIO;
break;
}
+ engine_heartbeat_disable(engine, &saved);
for (n = 0; n < ARRAY_SIZE(ce); n++) {
struct intel_context *tmp;
- tmp = intel_context_create(ctx, engine);
+ tmp = intel_context_create(engine);
if (IS_ERR(tmp)) {
err = PTR_ERR(tmp);
goto err_ce;
@@ -247,14 +264,13 @@ err_ce:
intel_context_put(ce[n]);
}
+ engine_heartbeat_enable(engine, saved);
if (igt_live_test_end(&t))
err = -EIO;
if (err)
break;
}
- kernel_context_close(ctx);
-err_spin:
igt_spinner_fini(&spin);
return err;
}
@@ -309,17 +325,17 @@ emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
static struct i915_request *
semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
{
- struct i915_gem_context *ctx;
+ struct intel_context *ce;
struct i915_request *rq;
int err;
- ctx = kernel_context(engine->i915);
- if (!ctx)
- return ERR_PTR(-ENOMEM);
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return ERR_CAST(ce);
- rq = igt_request_alloc(ctx, engine);
+ rq = intel_context_create_request(ce);
if (IS_ERR(rq))
- goto out_ctx;
+ goto out_ce;
err = 0;
if (rq->engine->emit_init_breadcrumb)
@@ -332,8 +348,8 @@ semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
if (err)
rq = ERR_PTR(err);
-out_ctx:
- kernel_context_close(ctx);
+out_ce:
+ intel_context_put(ce);
return rq;
}
@@ -348,7 +364,7 @@ release_queue(struct intel_engine_cs *engine,
struct i915_request *rq;
u32 *cs;
- rq = i915_request_create(engine->kernel_context);
+ rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq))
return PTR_ERR(rq);
@@ -468,12 +484,16 @@ static int live_timeslice_preempt(void *arg)
enum intel_engine_id id;
for_each_engine(engine, gt, id) {
+ unsigned long saved;
+
if (!intel_engine_has_preemption(engine))
continue;
memset(vaddr, 0, PAGE_SIZE);
+ engine_heartbeat_disable(engine, &saved);
err = slice_semaphore_queue(engine, vma, count);
+ engine_heartbeat_enable(engine, saved);
if (err)
goto err_pin;
@@ -497,7 +517,7 @@ static struct i915_request *nop_request(struct intel_engine_cs *engine)
{
struct i915_request *rq;
- rq = i915_request_create(engine->kernel_context);
+ rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq))
return rq;
@@ -507,13 +527,19 @@ static struct i915_request *nop_request(struct intel_engine_cs *engine)
return rq;
}
-static void wait_for_submit(struct intel_engine_cs *engine,
- struct i915_request *rq)
+static int wait_for_submit(struct intel_engine_cs *engine,
+ struct i915_request *rq,
+ unsigned long timeout)
{
+ timeout += jiffies;
do {
cond_resched();
intel_engine_flush_submission(engine);
- } while (!i915_request_is_active(rq));
+ if (i915_request_is_active(rq))
+ return 0;
+ } while (time_before(jiffies, timeout));
+
+ return -ETIME;
}
static long timeslice_threshold(const struct intel_engine_cs *engine)
@@ -566,40 +592,49 @@ static int live_timeslice_queue(void *arg)
.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
};
struct i915_request *rq, *nop;
+ unsigned long saved;
if (!intel_engine_has_preemption(engine))
continue;
+ engine_heartbeat_disable(engine, &saved);
memset(vaddr, 0, PAGE_SIZE);
/* ELSP[0]: semaphore wait */
rq = semaphore_queue(engine, vma, 0);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
- goto err_pin;
+ goto err_heartbeat;
}
engine->schedule(rq, &attr);
- wait_for_submit(engine, rq);
+ err = wait_for_submit(engine, rq, HZ / 2);
+ if (err) {
+ pr_err("%s: Timed out trying to submit semaphores\n",
+ engine->name);
+ goto err_rq;
+ }
/* ELSP[1]: nop request */
nop = nop_request(engine);
if (IS_ERR(nop)) {
err = PTR_ERR(nop);
- i915_request_put(rq);
- goto err_pin;
+ goto err_rq;
}
- wait_for_submit(engine, nop);
+ err = wait_for_submit(engine, nop, HZ / 2);
i915_request_put(nop);
+ if (err) {
+ pr_err("%s: Timed out trying to submit nop\n",
+ engine->name);
+ goto err_rq;
+ }
GEM_BUG_ON(i915_request_completed(rq));
GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
/* Queue: semaphore signal, matching priority as semaphore */
err = release_queue(engine, vma, 1, effective_prio(rq));
- if (err) {
- i915_request_put(rq);
- goto err_pin;
- }
+ if (err)
+ goto err_rq;
intel_engine_flush_submission(engine);
if (!READ_ONCE(engine->execlists.timer.expires) &&
@@ -630,12 +665,14 @@ static int live_timeslice_queue(void *arg)
memset(vaddr, 0xff, PAGE_SIZE);
err = -EIO;
}
+err_rq:
i915_request_put(rq);
+err_heartbeat:
+ engine_heartbeat_enable(engine, saved);
if (err)
break;
}
-err_pin:
i915_vma_unpin(vma);
err_map:
i915_gem_object_unpin_map(obj);
@@ -748,15 +785,19 @@ static int live_busywait_preempt(void *arg)
*cs++ = 0;
intel_ring_advance(lo, cs);
+
+ i915_request_get(lo);
i915_request_add(lo);
if (wait_for(READ_ONCE(*map), 10)) {
+ i915_request_put(lo);
err = -ETIMEDOUT;
goto err_vma;
}
/* Low priority request should be busywaiting now */
if (i915_request_wait(lo, 0, 1) != -ETIME) {
+ i915_request_put(lo);
pr_err("%s: Busywaiting request did not!\n",
engine->name);
err = -EIO;
@@ -766,6 +807,7 @@ static int live_busywait_preempt(void *arg)
hi = igt_request_alloc(ctx_hi, engine);
if (IS_ERR(hi)) {
err = PTR_ERR(hi);
+ i915_request_put(lo);
goto err_vma;
}
@@ -773,6 +815,7 @@ static int live_busywait_preempt(void *arg)
if (IS_ERR(cs)) {
err = PTR_ERR(cs);
i915_request_add(hi);
+ i915_request_put(lo);
goto err_vma;
}
@@ -793,11 +836,13 @@ static int live_busywait_preempt(void *arg)
intel_engine_dump(engine, &p, "%s\n", engine->name);
GEM_TRACE_DUMP();
+ i915_request_put(lo);
intel_gt_set_wedged(gt);
err = -EIO;
goto err_vma;
}
GEM_BUG_ON(READ_ONCE(*map));
+ i915_request_put(lo);
if (igt_live_test_end(&t)) {
err = -EIO;
@@ -1108,7 +1153,7 @@ static int live_nopreempt(void *arg)
}
/* Low priority client, but unpreemptable! */
- rq_a->flags |= I915_REQUEST_NOPREEMPT;
+ __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
i915_request_add(rq_a);
if (!igt_wait_for_spinner(&a.spin, rq_a)) {
@@ -1187,13 +1232,13 @@ static int __cancel_active0(struct live_preempt_cancel *arg)
__func__, arg->engine->name))
return -EIO;
- clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags);
rq = spinner_create_request(&arg->a.spin,
arg->a.ctx, arg->engine,
MI_ARB_CHECK);
if (IS_ERR(rq))
return PTR_ERR(rq);
+ clear_bit(CONTEXT_BANNED, &rq->context->flags);
i915_request_get(rq);
i915_request_add(rq);
if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
@@ -1201,7 +1246,7 @@ static int __cancel_active0(struct live_preempt_cancel *arg)
goto out;
}
- i915_gem_context_set_banned(arg->a.ctx);
+ intel_context_set_banned(rq->context);
err = intel_engine_pulse(arg->engine);
if (err)
goto out;
@@ -1236,13 +1281,13 @@ static int __cancel_active1(struct live_preempt_cancel *arg)
__func__, arg->engine->name))
return -EIO;
- clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags);
rq[0] = spinner_create_request(&arg->a.spin,
arg->a.ctx, arg->engine,
MI_NOOP); /* no preemption */
if (IS_ERR(rq[0]))
return PTR_ERR(rq[0]);
+ clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
i915_request_get(rq[0]);
i915_request_add(rq[0]);
if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
@@ -1250,7 +1295,6 @@ static int __cancel_active1(struct live_preempt_cancel *arg)
goto out;
}
- clear_bit(CONTEXT_BANNED, &arg->b.ctx->flags);
rq[1] = spinner_create_request(&arg->b.spin,
arg->b.ctx, arg->engine,
MI_ARB_CHECK);
@@ -1259,13 +1303,14 @@ static int __cancel_active1(struct live_preempt_cancel *arg)
goto out;
}
+ clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
i915_request_get(rq[1]);
err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
i915_request_add(rq[1]);
if (err)
goto out;
- i915_gem_context_set_banned(arg->b.ctx);
+ intel_context_set_banned(rq[1]->context);
err = intel_engine_pulse(arg->engine);
if (err)
goto out;
@@ -1308,13 +1353,13 @@ static int __cancel_queued(struct live_preempt_cancel *arg)
__func__, arg->engine->name))
return -EIO;
- clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags);
rq[0] = spinner_create_request(&arg->a.spin,
arg->a.ctx, arg->engine,
MI_ARB_CHECK);
if (IS_ERR(rq[0]))
return PTR_ERR(rq[0]);
+ clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
i915_request_get(rq[0]);
i915_request_add(rq[0]);
if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
@@ -1322,13 +1367,13 @@ static int __cancel_queued(struct live_preempt_cancel *arg)
goto out;
}
- clear_bit(CONTEXT_BANNED, &arg->b.ctx->flags);
rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
if (IS_ERR(rq[1])) {
err = PTR_ERR(rq[1]);
goto out;
}
+ clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
i915_request_get(rq[1]);
err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
i915_request_add(rq[1]);
@@ -1349,7 +1394,7 @@ static int __cancel_queued(struct live_preempt_cancel *arg)
if (err)
goto out;
- i915_gem_context_set_banned(arg->a.ctx);
+ intel_context_set_banned(rq[2]->context);
err = intel_engine_pulse(arg->engine);
if (err)
goto out;
@@ -1396,13 +1441,13 @@ static int __cancel_hostile(struct live_preempt_cancel *arg)
return 0;
GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
- clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags);
rq = spinner_create_request(&arg->a.spin,
arg->a.ctx, arg->engine,
MI_NOOP); /* preemption disabled */
if (IS_ERR(rq))
return PTR_ERR(rq);
+ clear_bit(CONTEXT_BANNED, &rq->context->flags);
i915_request_get(rq);
i915_request_add(rq);
if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
@@ -1410,7 +1455,7 @@ static int __cancel_hostile(struct live_preempt_cancel *arg)
goto out;
}
- i915_gem_context_set_banned(arg->a.ctx);
+ intel_context_set_banned(rq->context);
err = intel_engine_pulse(arg->engine); /* force reset */
if (err)
goto out;
@@ -1665,6 +1710,7 @@ static int live_suppress_wait_preempt(void *arg)
{
struct intel_gt *gt = arg;
struct preempt_client client[4];
+ struct i915_request *rq[ARRAY_SIZE(client)] = {};
struct intel_engine_cs *engine;
enum intel_engine_id id;
int err = -ENOMEM;
@@ -1698,7 +1744,6 @@ static int live_suppress_wait_preempt(void *arg)
continue;
for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
- struct i915_request *rq[ARRAY_SIZE(client)];
struct i915_request *dummy;
engine->execlists.preempt_hang.count = 0;
@@ -1708,18 +1753,22 @@ static int live_suppress_wait_preempt(void *arg)
goto err_client_3;
for (i = 0; i < ARRAY_SIZE(client); i++) {
- rq[i] = spinner_create_request(&client[i].spin,
- client[i].ctx, engine,
- MI_NOOP);
- if (IS_ERR(rq[i])) {
- err = PTR_ERR(rq[i]);
+ struct i915_request *this;
+
+ this = spinner_create_request(&client[i].spin,
+ client[i].ctx, engine,
+ MI_NOOP);
+ if (IS_ERR(this)) {
+ err = PTR_ERR(this);
goto err_wedged;
}
/* Disable NEWCLIENT promotion */
- __i915_active_fence_set(&i915_request_timeline(rq[i])->last_request,
+ __i915_active_fence_set(&i915_request_timeline(this)->last_request,
&dummy->fence);
- i915_request_add(rq[i]);
+
+ rq[i] = i915_request_get(this);
+ i915_request_add(this);
}
dummy_request_free(dummy);
@@ -1740,8 +1789,11 @@ static int live_suppress_wait_preempt(void *arg)
goto err_wedged;
}
- for (i = 0; i < ARRAY_SIZE(client); i++)
+ for (i = 0; i < ARRAY_SIZE(client); i++) {
igt_spinner_end(&client[i].spin);
+ i915_request_put(rq[i]);
+ rq[i] = NULL;
+ }
if (igt_flush_test(gt->i915))
goto err_wedged;
@@ -1769,8 +1821,10 @@ err_client_0:
return err;
err_wedged:
- for (i = 0; i < ARRAY_SIZE(client); i++)
+ for (i = 0; i < ARRAY_SIZE(client); i++) {
igt_spinner_end(&client[i].spin);
+ i915_request_put(rq[i]);
+ }
intel_gt_set_wedged(gt);
err = -EIO;
goto err_client_3;
@@ -1815,6 +1869,8 @@ static int live_chain_preempt(void *arg)
MI_ARB_CHECK);
if (IS_ERR(rq))
goto err_wedged;
+
+ i915_request_get(rq);
i915_request_add(rq);
ring_size = rq->wa_tail - rq->head;
@@ -1827,8 +1883,10 @@ static int live_chain_preempt(void *arg)
igt_spinner_end(&lo.spin);
if (i915_request_wait(rq, 0, HZ / 2) < 0) {
pr_err("Timed out waiting to flush %s\n", engine->name);
+ i915_request_put(rq);
goto err_wedged;
}
+ i915_request_put(rq);
if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
err = -EIO;
@@ -1862,6 +1920,8 @@ static int live_chain_preempt(void *arg)
rq = igt_request_alloc(hi.ctx, engine);
if (IS_ERR(rq))
goto err_wedged;
+
+ i915_request_get(rq);
i915_request_add(rq);
engine->schedule(rq, &attr);
@@ -1874,14 +1934,19 @@ static int live_chain_preempt(void *arg)
count);
intel_engine_dump(engine, &p,
"%s\n", engine->name);
+ i915_request_put(rq);
goto err_wedged;
}
igt_spinner_end(&lo.spin);
+ i915_request_put(rq);
rq = igt_request_alloc(lo.ctx, engine);
if (IS_ERR(rq))
goto err_wedged;
+
+ i915_request_get(rq);
i915_request_add(rq);
+
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
struct drm_printer p =
drm_info_printer(gt->i915->drm.dev);
@@ -1890,8 +1955,11 @@ static int live_chain_preempt(void *arg)
count);
intel_engine_dump(engine, &p,
"%s\n", engine->name);
+
+ i915_request_put(rq);
goto err_wedged;
}
+ i915_request_put(rq);
}
if (igt_live_test_end(&t)) {
@@ -1915,6 +1983,201 @@ err_wedged:
goto err_client_lo;
}
+static int create_gang(struct intel_engine_cs *engine,
+ struct i915_request **prev)
+{
+ struct drm_i915_gem_object *obj;
+ struct intel_context *ce;
+ struct i915_request *rq;
+ struct i915_vma *vma;
+ u32 *cs;
+ int err;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ obj = i915_gem_object_create_internal(engine->i915, 4096);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto err_ce;
+ }
+
+ vma = i915_vma_instance(obj, ce->vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_obj;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err)
+ goto err_obj;
+
+ cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
+ if (IS_ERR(cs))
+ goto err_obj;
+
+ /* Semaphore target: spin until zero */
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
+ *cs++ = MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD;
+ *cs++ = 0;
+ *cs++ = lower_32_bits(vma->node.start);
+ *cs++ = upper_32_bits(vma->node.start);
+
+ if (*prev) {
+ u64 offset = (*prev)->batch->node.start;
+
+ /* Terminate the spinner in the next lower priority batch. */
+ *cs++ = MI_STORE_DWORD_IMM_GEN4;
+ *cs++ = lower_32_bits(offset);
+ *cs++ = upper_32_bits(offset);
+ *cs++ = 0;
+ }
+
+ *cs++ = MI_BATCH_BUFFER_END;
+ i915_gem_object_flush_map(obj);
+ i915_gem_object_unpin_map(obj);
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ goto err_obj;
+
+ rq->batch = vma;
+ i915_request_get(rq);
+
+ i915_vma_lock(vma);
+ err = i915_request_await_object(rq, vma->obj, false);
+ if (!err)
+ err = i915_vma_move_to_active(vma, rq, 0);
+ if (!err)
+ err = rq->engine->emit_bb_start(rq,
+ vma->node.start,
+ PAGE_SIZE, 0);
+ i915_vma_unlock(vma);
+ i915_request_add(rq);
+ if (err)
+ goto err_rq;
+
+ i915_gem_object_put(obj);
+ intel_context_put(ce);
+
+ rq->client_link.next = &(*prev)->client_link;
+ *prev = rq;
+ return 0;
+
+err_rq:
+ i915_request_put(rq);
+err_obj:
+ i915_gem_object_put(obj);
+err_ce:
+ intel_context_put(ce);
+ return err;
+}
+
+static int live_preempt_gang(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
+ return 0;
+
+ /*
+ * Build as long a chain of preempters as we can, with each
+ * request higher priority than the last. Once we are ready, we release
+ * the last batch which then precolates down the chain, each releasing
+ * the next oldest in turn. The intent is to simply push as hard as we
+ * can with the number of preemptions, trying to exceed narrow HW
+ * limits. At a minimum, we insist that we can sort all the user
+ * high priority levels into execution order.
+ */
+
+ for_each_engine(engine, gt, id) {
+ struct i915_request *rq = NULL;
+ struct igt_live_test t;
+ IGT_TIMEOUT(end_time);
+ int prio = 0;
+ int err = 0;
+ u32 *cs;
+
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
+ return -EIO;
+
+ do {
+ struct i915_sched_attr attr = {
+ .priority = I915_USER_PRIORITY(prio++),
+ };
+
+ err = create_gang(engine, &rq);
+ if (err)
+ break;
+
+ /* Submit each spinner at increasing priority */
+ engine->schedule(rq, &attr);
+
+ if (prio <= I915_PRIORITY_MAX)
+ continue;
+
+ if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT))
+ break;
+
+ if (__igt_timeout(end_time, NULL))
+ break;
+ } while (1);
+ pr_debug("%s: Preempt chain of %d requests\n",
+ engine->name, prio);
+
+ /*
+ * Such that the last spinner is the highest priority and
+ * should execute first. When that spinner completes,
+ * it will terminate the next lowest spinner until there
+ * are no more spinners and the gang is complete.
+ */
+ cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
+ if (!IS_ERR(cs)) {
+ *cs = 0;
+ i915_gem_object_unpin_map(rq->batch->obj);
+ } else {
+ err = PTR_ERR(cs);
+ intel_gt_set_wedged(gt);
+ }
+
+ while (rq) { /* wait for each rq from highest to lowest prio */
+ struct i915_request *n =
+ list_next_entry(rq, client_link);
+
+ if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
+ struct drm_printer p =
+ drm_info_printer(engine->i915->drm.dev);
+
+ pr_err("Failed to flush chain of %d requests, at %d\n",
+ prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
+ intel_engine_dump(engine, &p,
+ "%s\n", engine->name);
+
+ err = -ETIME;
+ }
+
+ i915_request_put(rq);
+ rq = n;
+ }
+
+ if (igt_live_test_end(&t))
+ err = -EIO;
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
static int live_preempt_hang(void *arg)
{
struct intel_gt *gt = arg;
@@ -2391,28 +2654,18 @@ static int nop_virtual_engine(struct intel_gt *gt,
#define CHAIN BIT(0)
{
IGT_TIMEOUT(end_time);
- struct i915_request *request[16];
- struct i915_gem_context *ctx[16];
+ struct i915_request *request[16] = {};
struct intel_context *ve[16];
unsigned long n, prime, nc;
struct igt_live_test t;
ktime_t times[2] = {};
int err;
- GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx));
+ GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
for (n = 0; n < nctx; n++) {
- ctx[n] = kernel_context(gt->i915);
- if (!ctx[n]) {
- err = -ENOMEM;
- nctx = n;
- goto out;
- }
-
- ve[n] = intel_execlists_create_virtual(ctx[n],
- siblings, nsibling);
+ ve[n] = intel_execlists_create_virtual(siblings, nsibling);
if (IS_ERR(ve[n])) {
- kernel_context_close(ctx[n]);
err = PTR_ERR(ve[n]);
nctx = n;
goto out;
@@ -2421,7 +2674,6 @@ static int nop_virtual_engine(struct intel_gt *gt,
err = intel_context_pin(ve[n]);
if (err) {
intel_context_put(ve[n]);
- kernel_context_close(ctx[n]);
nctx = n;
goto out;
}
@@ -2437,27 +2689,35 @@ static int nop_virtual_engine(struct intel_gt *gt,
if (flags & CHAIN) {
for (nc = 0; nc < nctx; nc++) {
for (n = 0; n < prime; n++) {
- request[nc] =
- i915_request_create(ve[nc]);
- if (IS_ERR(request[nc])) {
- err = PTR_ERR(request[nc]);
+ struct i915_request *rq;
+
+ rq = i915_request_create(ve[nc]);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
goto out;
}
- i915_request_add(request[nc]);
+ if (request[nc])
+ i915_request_put(request[nc]);
+ request[nc] = i915_request_get(rq);
+ i915_request_add(rq);
}
}
} else {
for (n = 0; n < prime; n++) {
for (nc = 0; nc < nctx; nc++) {
- request[nc] =
- i915_request_create(ve[nc]);
- if (IS_ERR(request[nc])) {
- err = PTR_ERR(request[nc]);
+ struct i915_request *rq;
+
+ rq = i915_request_create(ve[nc]);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
goto out;
}
- i915_request_add(request[nc]);
+ if (request[nc])
+ i915_request_put(request[nc]);
+ request[nc] = i915_request_get(rq);
+ i915_request_add(rq);
}
}
}
@@ -2483,6 +2743,11 @@ static int nop_virtual_engine(struct intel_gt *gt,
if (prime == 1)
times[0] = times[1];
+ for (nc = 0; nc < nctx; nc++) {
+ i915_request_put(request[nc]);
+ request[nc] = NULL;
+ }
+
if (__igt_timeout(end_time, NULL))
break;
}
@@ -2500,9 +2765,9 @@ out:
err = -EIO;
for (nc = 0; nc < nctx; nc++) {
+ i915_request_put(request[nc]);
intel_context_unpin(ve[nc]);
intel_context_put(ve[nc]);
- kernel_context_close(ctx[nc]);
}
return err;
}
@@ -2561,7 +2826,6 @@ static int mask_virtual_engine(struct intel_gt *gt,
unsigned int nsibling)
{
struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
- struct i915_gem_context *ctx;
struct intel_context *ve;
struct igt_live_test t;
unsigned int n;
@@ -2572,11 +2836,7 @@ static int mask_virtual_engine(struct intel_gt *gt,
* restrict it to our desired engine within the virtual engine.
*/
- ctx = kernel_context(gt->i915);
- if (!ctx)
- return -ENOMEM;
-
- ve = intel_execlists_create_virtual(ctx, siblings, nsibling);
+ ve = intel_execlists_create_virtual(siblings, nsibling);
if (IS_ERR(ve)) {
err = PTR_ERR(ve);
goto out_close;
@@ -2644,7 +2904,6 @@ out_unpin:
out_put:
intel_context_put(ve);
out_close:
- kernel_context_close(ctx);
return err;
}
@@ -2684,7 +2943,6 @@ static int preserved_virtual_engine(struct intel_gt *gt,
unsigned int nsibling)
{
struct i915_request *last = NULL;
- struct i915_gem_context *ctx;
struct intel_context *ve;
struct i915_vma *scratch;
struct igt_live_test t;
@@ -2692,17 +2950,11 @@ static int preserved_virtual_engine(struct intel_gt *gt,
int err = 0;
u32 *cs;
- ctx = kernel_context(gt->i915);
- if (!ctx)
- return -ENOMEM;
-
scratch = create_scratch(siblings[0]->gt);
- if (IS_ERR(scratch)) {
- err = PTR_ERR(scratch);
- goto out_close;
- }
+ if (IS_ERR(scratch))
+ return PTR_ERR(scratch);
- ve = intel_execlists_create_virtual(ctx, siblings, nsibling);
+ ve = intel_execlists_create_virtual(siblings, nsibling);
if (IS_ERR(ve)) {
err = PTR_ERR(ve);
goto out_scratch;
@@ -2785,8 +3037,6 @@ out_put:
intel_context_put(ve);
out_scratch:
i915_vma_unpin_and_release(&scratch, 0);
-out_close:
- kernel_context_close(ctx);
return err;
}
@@ -2838,16 +3088,54 @@ static int bond_virtual_engine(struct intel_gt *gt,
#define BOND_SCHEDULE BIT(0)
{
struct intel_engine_cs *master;
- struct i915_gem_context *ctx;
struct i915_request *rq[16];
enum intel_engine_id id;
+ struct igt_spinner spin;
unsigned long n;
int err;
+ /*
+ * A set of bonded requests is intended to be run concurrently
+ * across a number of engines. We use one request per-engine
+ * and a magic fence to schedule each of the bonded requests
+ * at the same time. A consequence of our current scheduler is that
+ * we only move requests to the HW ready queue when the request
+ * becomes ready, that is when all of its prerequisite fences have
+ * been signaled. As one of those fences is the master submit fence,
+ * there is a delay on all secondary fences as the HW may be
+ * currently busy. Equally, as all the requests are independent,
+ * they may have other fences that delay individual request
+ * submission to HW. Ergo, we do not guarantee that all requests are
+ * immediately submitted to HW at the same time, just that if the
+ * rules are abided by, they are ready at the same time as the
+ * first is submitted. Userspace can embed semaphores in its batch
+ * to ensure parallel execution of its phases as it requires.
+ * Though naturally it gets requested that perhaps the scheduler should
+ * take care of parallel execution, even across preemption events on
+ * different HW. (The proper answer is of course "lalalala".)
+ *
+ * With the submit-fence, we have identified three possible phases
+ * of synchronisation depending on the master fence: queued (not
+ * ready), executing, and signaled. The first two are quite simple
+ * and checked below. However, the signaled master fence handling is
+ * contentious. Currently we do not distinguish between a signaled
+ * fence and an expired fence, as once signaled it does not convey
+ * any information about the previous execution. It may even be freed
+ * and hence checking later it may not exist at all. Ergo we currently
+ * do not apply the bonding constraint for an already signaled fence,
+ * as our expectation is that it should not constrain the secondaries
+ * and is outside of the scope of the bonded request API (i.e. all
+ * userspace requests are meant to be running in parallel). As
+ * it imposes no constraint, and is effectively a no-op, we do not
+ * check below as normal execution flows are checked extensively above.
+ *
+ * XXX Is the degenerate handling of signaled submit fences the
+ * expected behaviour for userpace?
+ */
+
GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
- ctx = kernel_context(gt->i915);
- if (!ctx)
+ if (igt_spinner_init(&spin, gt))
return -ENOMEM;
err = 0;
@@ -2860,7 +3148,9 @@ static int bond_virtual_engine(struct intel_gt *gt,
memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
- rq[0] = igt_request_alloc(ctx, master);
+ rq[0] = igt_spinner_create_request(&spin,
+ master->kernel_context,
+ MI_NOOP);
if (IS_ERR(rq[0])) {
err = PTR_ERR(rq[0]);
goto out;
@@ -2873,16 +3163,21 @@ static int bond_virtual_engine(struct intel_gt *gt,
&fence,
GFP_KERNEL);
}
+
i915_request_add(rq[0]);
if (err < 0)
goto out;
+ if (!(flags & BOND_SCHEDULE) &&
+ !igt_wait_for_spinner(&spin, rq[0])) {
+ err = -EIO;
+ goto out;
+ }
+
for (n = 0; n < nsibling; n++) {
struct intel_context *ve;
- ve = intel_execlists_create_virtual(ctx,
- siblings,
- nsibling);
+ ve = intel_execlists_create_virtual(siblings, nsibling);
if (IS_ERR(ve)) {
err = PTR_ERR(ve);
onstack_fence_fini(&fence);
@@ -2924,6 +3219,8 @@ static int bond_virtual_engine(struct intel_gt *gt,
}
}
onstack_fence_fini(&fence);
+ intel_engine_flush_submission(master);
+ igt_spinner_end(&spin);
if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
pr_err("Master request did not execute (on %s)!\n",
@@ -2960,7 +3257,7 @@ out:
if (igt_flush_test(gt->i915))
err = -EIO;
- kernel_context_close(ctx);
+ igt_spinner_fini(&spin);
return err;
}
@@ -3028,6 +3325,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_suppress_self_preempt),
SUBTEST(live_suppress_wait_preempt),
SUBTEST(live_chain_preempt),
+ SUBTEST(live_preempt_gang),
SUBTEST(live_preempt_hang),
SUBTEST(live_preempt_timeout),
SUBTEST(live_preempt_smoke),
@@ -3080,7 +3378,7 @@ static int live_lrc_layout(void *arg)
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
- u32 *mem;
+ u32 *lrc;
int err;
/*
@@ -3088,13 +3386,13 @@ static int live_lrc_layout(void *arg)
* match the layout saved by HW.
*/
- mem = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (!mem)
+ lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!lrc)
return -ENOMEM;
err = 0;
for_each_engine(engine, gt, id) {
- u32 *hw, *lrc;
+ u32 *hw;
int dw;
if (!engine->default_state)
@@ -3108,8 +3406,7 @@ static int live_lrc_layout(void *arg)
}
hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
- lrc = memset(mem, 0, PAGE_SIZE);
- execlists_init_reg_state(lrc,
+ execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
engine->kernel_context,
engine,
engine->kernel_context->ring,
@@ -3124,6 +3421,13 @@ static int live_lrc_layout(void *arg)
continue;
}
+ if (lrc[dw] == 0) {
+ pr_debug("%s: skipped instruction %x at dword %d\n",
+ engine->name, lri, dw);
+ dw++;
+ continue;
+ }
+
if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
pr_err("%s: Expected LRI command at dword %d, found %08x\n",
engine->name, dw, lri);
@@ -3172,7 +3476,7 @@ static int live_lrc_layout(void *arg)
break;
}
- kfree(mem);
+ kfree(lrc);
return err;
}
@@ -3207,12 +3511,12 @@ static int live_lrc_fixed(void *arg)
} tbl[] = {
{
i915_mmio_reg_offset(RING_START(engine->mmio_base)),
- CTX_RING_BUFFER_START - 1,
+ CTX_RING_START - 1,
"RING_START"
},
{
i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
- CTX_RING_BUFFER_CONTROL - 1,
+ CTX_RING_CTL - 1,
"RING_CTL"
},
{
@@ -3231,7 +3535,7 @@ static int live_lrc_fixed(void *arg)
"RING_MI_MODE"
},
{
- engine->mmio_base + 0x110,
+ i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
CTX_BB_STATE - 1,
"BB_STATE"
},
@@ -3270,8 +3574,7 @@ static int live_lrc_fixed(void *arg)
return err;
}
-static int __live_lrc_state(struct i915_gem_context *fixme,
- struct intel_engine_cs *engine,
+static int __live_lrc_state(struct intel_engine_cs *engine,
struct i915_vma *scratch)
{
struct intel_context *ce;
@@ -3286,7 +3589,7 @@ static int __live_lrc_state(struct i915_gem_context *fixme,
int err;
int n;
- ce = intel_context_create(fixme, engine);
+ ce = intel_context_create(engine);
if (IS_ERR(ce))
return PTR_ERR(ce);
@@ -3360,7 +3663,6 @@ static int live_lrc_state(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
- struct i915_gem_context *fixme;
struct i915_vma *scratch;
enum intel_engine_id id;
int err = 0;
@@ -3370,18 +3672,12 @@ static int live_lrc_state(void *arg)
* intel_context.
*/
- fixme = kernel_context(gt->i915);
- if (!fixme)
- return -ENOMEM;
-
scratch = create_scratch(gt);
- if (IS_ERR(scratch)) {
- err = PTR_ERR(scratch);
- goto out_close;
- }
+ if (IS_ERR(scratch))
+ return PTR_ERR(scratch);
for_each_engine(engine, gt, id) {
- err = __live_lrc_state(fixme, engine, scratch);
+ err = __live_lrc_state(engine, scratch);
if (err)
break;
}
@@ -3390,8 +3686,6 @@ static int live_lrc_state(void *arg)
err = -EIO;
i915_vma_unpin_and_release(&scratch, 0);
-out_close:
- kernel_context_close(fixme);
return err;
}
@@ -3401,7 +3695,7 @@ static int gpr_make_dirty(struct intel_engine_cs *engine)
u32 *cs;
int n;
- rq = i915_request_create(engine->kernel_context);
+ rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq))
return PTR_ERR(rq);
@@ -3424,8 +3718,7 @@ static int gpr_make_dirty(struct intel_engine_cs *engine)
return 0;
}
-static int __live_gpr_clear(struct i915_gem_context *fixme,
- struct intel_engine_cs *engine,
+static int __live_gpr_clear(struct intel_engine_cs *engine,
struct i915_vma *scratch)
{
struct intel_context *ce;
@@ -3441,7 +3734,7 @@ static int __live_gpr_clear(struct i915_gem_context *fixme,
if (err)
return err;
- ce = intel_context_create(fixme, engine);
+ ce = intel_context_create(engine);
if (IS_ERR(ce))
return PTR_ERR(ce);
@@ -3503,7 +3796,6 @@ static int live_gpr_clear(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
- struct i915_gem_context *fixme;
struct i915_vma *scratch;
enum intel_engine_id id;
int err = 0;
@@ -3513,18 +3805,12 @@ static int live_gpr_clear(void *arg)
* to avoid leaking any information from previous contexts.
*/
- fixme = kernel_context(gt->i915);
- if (!fixme)
- return -ENOMEM;
-
scratch = create_scratch(gt);
- if (IS_ERR(scratch)) {
- err = PTR_ERR(scratch);
- goto out_close;
- }
+ if (IS_ERR(scratch))
+ return PTR_ERR(scratch);
for_each_engine(engine, gt, id) {
- err = __live_gpr_clear(fixme, engine, scratch);
+ err = __live_gpr_clear(engine, scratch);
if (err)
break;
}
@@ -3533,8 +3819,6 @@ static int live_gpr_clear(void *arg)
err = -EIO;
i915_vma_unpin_and_release(&scratch, 0);
-out_close:
- kernel_context_close(fixme);
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c
new file mode 100644
index 000000000000..de1f83100fb6
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
@@ -0,0 +1,419 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "gt/intel_engine_pm.h"
+#include "i915_selftest.h"
+
+#include "gem/selftests/mock_context.h"
+#include "selftests/igt_reset.h"
+#include "selftests/igt_spinner.h"
+
+struct live_mocs {
+ struct drm_i915_mocs_table table;
+ struct i915_vma *scratch;
+ void *vaddr;
+};
+
+static int request_add_sync(struct i915_request *rq, int err)
+{
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (i915_request_wait(rq, 0, HZ / 5) < 0)
+ err = -ETIME;
+ i915_request_put(rq);
+
+ return err;
+}
+
+static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin)
+{
+ int err = 0;
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (spin && !igt_wait_for_spinner(spin, rq))
+ err = -ETIME;
+ i915_request_put(rq);
+
+ return err;
+}
+
+static struct i915_vma *create_scratch(struct intel_gt *gt)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int err;
+
+ obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
+
+ vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
+ if (IS_ERR(vma)) {
+ i915_gem_object_put(obj);
+ return vma;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+ if (err) {
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
+ }
+
+ return vma;
+}
+
+static int live_mocs_init(struct live_mocs *arg, struct intel_gt *gt)
+{
+ int err;
+
+ if (!get_mocs_settings(gt->i915, &arg->table))
+ return -EINVAL;
+
+ arg->scratch = create_scratch(gt);
+ if (IS_ERR(arg->scratch))
+ return PTR_ERR(arg->scratch);
+
+ arg->vaddr = i915_gem_object_pin_map(arg->scratch->obj, I915_MAP_WB);
+ if (IS_ERR(arg->vaddr)) {
+ err = PTR_ERR(arg->vaddr);
+ goto err_scratch;
+ }
+
+ return 0;
+
+err_scratch:
+ i915_vma_unpin_and_release(&arg->scratch, 0);
+ return err;
+}
+
+static void live_mocs_fini(struct live_mocs *arg)
+{
+ i915_vma_unpin_and_release(&arg->scratch, I915_VMA_RELEASE_MAP);
+}
+
+static int read_regs(struct i915_request *rq,
+ u32 addr, unsigned int count,
+ uint32_t *offset)
+{
+ unsigned int i;
+ u32 *cs;
+
+ GEM_BUG_ON(!IS_ALIGNED(*offset, sizeof(u32)));
+
+ cs = intel_ring_begin(rq, 4 * count);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ for (i = 0; i < count; i++) {
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+ *cs++ = addr;
+ *cs++ = *offset;
+ *cs++ = 0;
+
+ addr += sizeof(u32);
+ *offset += sizeof(u32);
+ }
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int read_mocs_table(struct i915_request *rq,
+ const struct drm_i915_mocs_table *table,
+ uint32_t *offset)
+{
+ u32 addr;
+
+ if (HAS_GLOBAL_MOCS_REGISTERS(rq->i915))
+ addr = global_mocs_offset();
+ else
+ addr = mocs_offset(rq->engine);
+
+ return read_regs(rq, addr, table->n_entries, offset);
+}
+
+static int read_l3cc_table(struct i915_request *rq,
+ const struct drm_i915_mocs_table *table,
+ uint32_t *offset)
+{
+ u32 addr = i915_mmio_reg_offset(GEN9_LNCFCMOCS(0));
+
+ return read_regs(rq, addr, (table->n_entries + 1) / 2, offset);
+}
+
+static int check_mocs_table(struct intel_engine_cs *engine,
+ const struct drm_i915_mocs_table *table,
+ uint32_t **vaddr)
+{
+ unsigned int i;
+ u32 expect;
+
+ for_each_mocs(expect, table, i) {
+ if (**vaddr != expect) {
+ pr_err("%s: Invalid MOCS[%d] entry, found %08x, expected %08x\n",
+ engine->name, i, **vaddr, expect);
+ return -EINVAL;
+ }
+ ++*vaddr;
+ }
+
+ return 0;
+}
+
+static bool mcr_range(struct drm_i915_private *i915, u32 offset)
+{
+ /*
+ * Registers in this range are affected by the MCR selector
+ * which only controls CPU initiated MMIO. Routing does not
+ * work for CS access so we cannot verify them on this path.
+ */
+ return INTEL_GEN(i915) >= 8 && offset >= 0xb000 && offset <= 0xb4ff;
+}
+
+static int check_l3cc_table(struct intel_engine_cs *engine,
+ const struct drm_i915_mocs_table *table,
+ uint32_t **vaddr)
+{
+ /* Can we read the MCR range 0xb00 directly? See intel_workarounds! */
+ u32 reg = i915_mmio_reg_offset(GEN9_LNCFCMOCS(0));
+ unsigned int i;
+ u32 expect;
+
+ for_each_l3cc(expect, table, i) {
+ if (!mcr_range(engine->i915, reg) && **vaddr != expect) {
+ pr_err("%s: Invalid L3CC[%d] entry, found %08x, expected %08x\n",
+ engine->name, i, **vaddr, expect);
+ return -EINVAL;
+ }
+ ++*vaddr;
+ reg += 4;
+ }
+
+ return 0;
+}
+
+static int check_mocs_engine(struct live_mocs *arg,
+ struct intel_context *ce)
+{
+ struct i915_vma *vma = arg->scratch;
+ struct i915_request *rq;
+ u32 offset;
+ u32 *vaddr;
+ int err;
+
+ memset32(arg->vaddr, STACK_MAGIC, PAGE_SIZE / sizeof(u32));
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ i915_vma_lock(vma);
+ err = i915_request_await_object(rq, vma->obj, true);
+ if (!err)
+ err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+ i915_vma_unlock(vma);
+
+ /* Read the mocs tables back using SRM */
+ offset = i915_ggtt_offset(vma);
+ if (!err)
+ err = read_mocs_table(rq, &arg->table, &offset);
+ if (!err && ce->engine->class == RENDER_CLASS)
+ err = read_l3cc_table(rq, &arg->table, &offset);
+ offset -= i915_ggtt_offset(vma);
+ GEM_BUG_ON(offset > PAGE_SIZE);
+
+ err = request_add_sync(rq, err);
+ if (err)
+ return err;
+
+ /* Compare the results against the expected tables */
+ vaddr = arg->vaddr;
+ if (!err)
+ err = check_mocs_table(ce->engine, &arg->table, &vaddr);
+ if (!err && ce->engine->class == RENDER_CLASS)
+ err = check_l3cc_table(ce->engine, &arg->table, &vaddr);
+ if (err)
+ return err;
+
+ GEM_BUG_ON(arg->vaddr + offset != vaddr);
+ return 0;
+}
+
+static int live_mocs_kernel(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct live_mocs mocs;
+ int err;
+
+ /* Basic check the system is configured with the expected mocs table */
+
+ err = live_mocs_init(&mocs, gt);
+ if (err)
+ return err;
+
+ for_each_engine(engine, gt, id) {
+ intel_engine_pm_get(engine);
+ err = check_mocs_engine(&mocs, engine->kernel_context);
+ intel_engine_pm_put(engine);
+ if (err)
+ break;
+ }
+
+ live_mocs_fini(&mocs);
+ return err;
+}
+
+static int live_mocs_clean(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct live_mocs mocs;
+ int err;
+
+ /* Every new context should see the same mocs table */
+
+ err = live_mocs_init(&mocs, gt);
+ if (err)
+ return err;
+
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ break;
+ }
+
+ err = check_mocs_engine(&mocs, ce);
+ intel_context_put(ce);
+ if (err)
+ break;
+ }
+
+ live_mocs_fini(&mocs);
+ return err;
+}
+
+static int active_engine_reset(struct intel_context *ce,
+ const char *reason)
+{
+ struct igt_spinner spin;
+ struct i915_request *rq;
+ int err;
+
+ err = igt_spinner_init(&spin, ce->engine->gt);
+ if (err)
+ return err;
+
+ rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
+ if (IS_ERR(rq)) {
+ igt_spinner_fini(&spin);
+ return PTR_ERR(rq);
+ }
+
+ err = request_add_spin(rq, &spin);
+ if (err == 0)
+ err = intel_engine_reset(ce->engine, reason);
+
+ igt_spinner_end(&spin);
+ igt_spinner_fini(&spin);
+
+ return err;
+}
+
+static int __live_mocs_reset(struct live_mocs *mocs,
+ struct intel_context *ce)
+{
+ int err;
+
+ err = intel_engine_reset(ce->engine, "mocs");
+ if (err)
+ return err;
+
+ err = check_mocs_engine(mocs, ce);
+ if (err)
+ return err;
+
+ err = active_engine_reset(ce, "mocs");
+ if (err)
+ return err;
+
+ err = check_mocs_engine(mocs, ce);
+ if (err)
+ return err;
+
+ intel_gt_reset(ce->engine->gt, ce->engine->mask, "mocs");
+
+ err = check_mocs_engine(mocs, ce);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int live_mocs_reset(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct live_mocs mocs;
+ int err = 0;
+
+ /* Check the mocs setup is retained over per-engine and global resets */
+
+ if (!intel_has_reset_engine(gt))
+ return 0;
+
+ err = live_mocs_init(&mocs, gt);
+ if (err)
+ return err;
+
+ igt_global_reset_lock(gt);
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ break;
+ }
+
+ intel_engine_pm_get(engine);
+ err = __live_mocs_reset(&mocs, ce);
+ intel_engine_pm_put(engine);
+
+ intel_context_put(ce);
+ if (err)
+ break;
+ }
+ igt_global_reset_unlock(gt);
+
+ live_mocs_fini(&mocs);
+ return err;
+}
+
+int intel_mocs_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(live_mocs_kernel),
+ SUBTEST(live_mocs_clean),
+ SUBTEST(live_mocs_reset),
+ };
+ struct drm_i915_mocs_table table;
+
+ if (!get_mocs_settings(i915, &table))
+ return 0;
+
+ return intel_gt_live_subtests(tests, &i915->gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c
new file mode 100644
index 000000000000..8cc55a0e9e06
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c
@@ -0,0 +1,203 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "intel_context.h"
+#include "intel_engine_pm.h"
+#include "intel_gt_requests.h"
+#include "intel_ring.h"
+#include "selftest_rc6.h"
+
+#include "selftests/i915_random.h"
+
+int live_rc6_manual(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_rc6 *rc6 = &gt->rc6;
+ intel_wakeref_t wakeref;
+ u64 res[2];
+ int err = 0;
+
+ /*
+ * Our claim is that we can "encourage" the GPU to enter rc6 at will.
+ * Let's try it!
+ */
+
+ if (!rc6->enabled)
+ return 0;
+
+ /* bsw/byt use a PCU and decouple RC6 from our manual control */
+ if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915))
+ return 0;
+
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+
+ /* Force RC6 off for starters */
+ __intel_rc6_disable(rc6);
+ msleep(1); /* wakeup is not immediate, takes about 100us on icl */
+
+ res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+ msleep(250);
+ res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+ if ((res[1] - res[0]) >> 10) {
+ pr_err("RC6 residency increased by %lldus while disabled for 250ms!\n",
+ (res[1] - res[0]) >> 10);
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ /* Manually enter RC6 */
+ intel_rc6_park(rc6);
+
+ res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+ msleep(100);
+ res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+
+ if (res[1] == res[0]) {
+ pr_err("Did not enter RC6! RC6_STATE=%08x, RC6_CONTROL=%08x\n",
+ intel_uncore_read_fw(gt->uncore, GEN6_RC_STATE),
+ intel_uncore_read_fw(gt->uncore, GEN6_RC_CONTROL));
+ err = -EINVAL;
+ }
+
+ /* Restore what should have been the original state! */
+ intel_rc6_unpark(rc6);
+
+out_unlock:
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+ return err;
+}
+
+static const u32 *__live_rc6_ctx(struct intel_context *ce)
+{
+ struct i915_request *rq;
+ const u32 *result;
+ u32 cmd;
+ u32 *cs;
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ return ERR_CAST(rq);
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs)) {
+ i915_request_add(rq);
+ return cs;
+ }
+
+ cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
+ if (INTEL_GEN(rq->i915) >= 8)
+ cmd++;
+
+ *cs++ = cmd;
+ *cs++ = i915_mmio_reg_offset(GEN8_RC6_CTX_INFO);
+ *cs++ = ce->timeline->hwsp_offset + 8;
+ *cs++ = 0;
+ intel_ring_advance(rq, cs);
+
+ result = rq->hwsp_seqno + 2;
+ i915_request_add(rq);
+
+ return result;
+}
+
+static struct intel_engine_cs **
+randomised_engines(struct intel_gt *gt,
+ struct rnd_state *prng,
+ unsigned int *count)
+{
+ struct intel_engine_cs *engine, **engines;
+ enum intel_engine_id id;
+ int n;
+
+ n = 0;
+ for_each_engine(engine, gt, id)
+ n++;
+ if (!n)
+ return NULL;
+
+ engines = kmalloc_array(n, sizeof(*engines), GFP_KERNEL);
+ if (!engines)
+ return NULL;
+
+ n = 0;
+ for_each_engine(engine, gt, id)
+ engines[n++] = engine;
+
+ i915_prandom_shuffle(engines, sizeof(*engines), n, prng);
+
+ *count = n;
+ return engines;
+}
+
+int live_rc6_ctx_wa(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs **engines;
+ unsigned int n, count;
+ I915_RND_STATE(prng);
+ int err = 0;
+
+ /* A read of CTX_INFO upsets rc6. Poke the bear! */
+ if (INTEL_GEN(gt->i915) < 8)
+ return 0;
+
+ engines = randomised_engines(gt, &prng, &count);
+ if (!engines)
+ return 0;
+
+ for (n = 0; n < count; n++) {
+ struct intel_engine_cs *engine = engines[n];
+ int pass;
+
+ for (pass = 0; pass < 2; pass++) {
+ struct intel_context *ce;
+ unsigned int resets =
+ i915_reset_engine_count(&gt->i915->gpu_error,
+ engine);
+ const u32 *res;
+
+ /* Use a sacrifical context */
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto out;
+ }
+
+ intel_engine_pm_get(engine);
+ res = __live_rc6_ctx(ce);
+ intel_engine_pm_put(engine);
+ intel_context_put(ce);
+ if (IS_ERR(res)) {
+ err = PTR_ERR(res);
+ goto out;
+ }
+
+ if (intel_gt_wait_for_idle(gt, HZ / 5) == -ETIME) {
+ intel_gt_set_wedged(gt);
+ err = -ETIME;
+ goto out;
+ }
+
+ intel_gt_pm_wait_for_idle(gt);
+ pr_debug("%s: CTX_INFO=%0x\n",
+ engine->name, READ_ONCE(*res));
+
+ if (resets !=
+ i915_reset_engine_count(&gt->i915->gpu_error,
+ engine)) {
+ pr_err("%s: GPU reset required\n",
+ engine->name);
+ add_taint_for_CI(TAINT_WARN);
+ err = -EIO;
+ goto out;
+ }
+ }
+ }
+
+out:
+ kfree(engines);
+ return err;
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.h b/drivers/gpu/drm/i915/gt/selftest_rc6.h
new file mode 100644
index 000000000000..762fd442d7b2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_rc6.h
@@ -0,0 +1,13 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef SELFTEST_RC6_H
+#define SELFTEST_RC6_H
+
+int live_rc6_ctx_wa(void *arg);
+int live_rc6_manual(void *arg);
+
+#endif /* SELFTEST_RC6_H */
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index f04a59fe5d2c..e2d78cc22fb4 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -458,7 +458,7 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
goto out;
}
- rq = i915_request_create(engine->kernel_context);
+ rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq))
goto out_unpin;
@@ -675,9 +675,7 @@ static int live_hwsp_wrap(void *arg)
if (!intel_engine_can_store_dword(engine))
continue;
- intel_engine_pm_get(engine);
- rq = i915_request_create(engine->kernel_context);
- intel_engine_pm_put(engine);
+ rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out;
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index abce6e4ec9c0..ac1921854cbf 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -264,22 +264,15 @@ static int
switch_to_scratch_context(struct intel_engine_cs *engine,
struct igt_spinner *spin)
{
- struct i915_gem_context *ctx;
struct intel_context *ce;
struct i915_request *rq;
int err = 0;
- ctx = kernel_context(engine->i915);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- GEM_BUG_ON(i915_gem_context_is_bannable(ctx));
-
- ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
- GEM_BUG_ON(IS_ERR(ce));
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
rq = igt_spinner_create_request(spin, ce, MI_NOOP);
-
intel_context_put(ce);
if (IS_ERR(rq)) {
@@ -293,7 +286,6 @@ err:
if (err && spin)
igt_spinner_end(spin);
- kernel_context_close(ctx);
return err;
}
@@ -367,20 +359,17 @@ out_ctx:
return err;
}
-static struct i915_vma *create_batch(struct i915_gem_context *ctx)
+static struct i915_vma *create_batch(struct i915_address_space *vm)
{
struct drm_i915_gem_object *obj;
- struct i915_address_space *vm;
struct i915_vma *vma;
int err;
- obj = i915_gem_object_create_internal(ctx->i915, 16 * PAGE_SIZE);
+ obj = i915_gem_object_create_internal(vm->i915, 16 * PAGE_SIZE);
if (IS_ERR(obj))
return ERR_CAST(obj);
- vm = i915_gem_context_get_vm_rcu(ctx);
vma = i915_vma_instance(obj, vm, NULL);
- i915_vm_put(vm);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_obj;
@@ -452,8 +441,7 @@ static int whitelist_writable_count(struct intel_engine_cs *engine)
return count;
}
-static int check_dirty_whitelist(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine)
+static int check_dirty_whitelist(struct intel_context *ce)
{
const u32 values[] = {
0x00000000,
@@ -481,19 +469,17 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
0xffff00ff,
0xffffffff,
};
- struct i915_address_space *vm;
+ struct intel_engine_cs *engine = ce->engine;
struct i915_vma *scratch;
struct i915_vma *batch;
int err = 0, i, v;
u32 *cs, *results;
- vm = i915_gem_context_get_vm_rcu(ctx);
- scratch = create_scratch(vm, 2 * ARRAY_SIZE(values) + 1);
- i915_vm_put(vm);
+ scratch = create_scratch(ce->vm, 2 * ARRAY_SIZE(values) + 1);
if (IS_ERR(scratch))
return PTR_ERR(scratch);
- batch = create_batch(ctx);
+ batch = create_batch(ce->vm);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
goto out_scratch;
@@ -518,7 +504,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
srm = MI_STORE_REGISTER_MEM;
lrm = MI_LOAD_REGISTER_MEM;
- if (INTEL_GEN(ctx->i915) >= 8)
+ if (INTEL_GEN(engine->i915) >= 8)
lrm++, srm++;
pr_debug("%s: Writing garbage to %x\n",
@@ -577,7 +563,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
i915_gem_object_unpin_map(batch->obj);
intel_gt_chipset_flush(engine->gt);
- rq = igt_request_alloc(ctx, engine);
+ rq = intel_context_create_request(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out_batch;
@@ -696,7 +682,7 @@ out_unpin:
break;
}
- if (igt_flush_test(ctx->i915))
+ if (igt_flush_test(engine->i915))
err = -EIO;
out_batch:
i915_vma_unpin_and_release(&batch, 0);
@@ -709,38 +695,31 @@ static int live_dirty_whitelist(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
- struct i915_gem_context *ctx;
enum intel_engine_id id;
- struct drm_file *file;
- int err = 0;
/* Can the user write to the whitelisted registers? */
if (INTEL_GEN(gt->i915) < 7) /* minimum requirement for LRI, SRM, LRM */
return 0;
- file = mock_file(gt->i915);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- ctx = live_context(gt->i915, file);
- if (IS_ERR(ctx)) {
- err = PTR_ERR(ctx);
- goto out_file;
- }
-
for_each_engine(engine, gt, id) {
+ struct intel_context *ce;
+ int err;
+
if (engine->whitelist.count == 0)
continue;
- err = check_dirty_whitelist(ctx, engine);
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ err = check_dirty_whitelist(ce);
+ intel_context_put(ce);
if (err)
- goto out_file;
+ return err;
}
-out_file:
- mock_file_free(gt->i915, file);
- return err;
+ return 0;
}
static int live_reset_whitelist(void *arg)
@@ -830,12 +809,15 @@ err_req:
static int scrub_whitelisted_registers(struct i915_gem_context *ctx,
struct intel_engine_cs *engine)
{
+ struct i915_address_space *vm;
struct i915_request *rq;
struct i915_vma *batch;
int i, err = 0;
u32 *cs;
- batch = create_batch(ctx);
+ vm = i915_gem_context_get_vm_rcu(ctx);
+ batch = create_batch(vm);
+ i915_vm_put(vm);
if (IS_ERR(batch))
return PTR_ERR(batch);
diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
index 2a77c051f36a..aeb1d1f616e8 100644
--- a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
@@ -15,7 +15,7 @@ void mock_timeline_init(struct intel_timeline *timeline, u64 context)
mutex_init(&timeline->mutex);
- INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex);
+ INIT_ACTIVE_FENCE(&timeline->last_request);
INIT_LIST_HEAD(&timeline->requests);
i915_syncmap_init(&timeline->sync);
diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h
index 689efc66c908..d2bcc3df6183 100644
--- a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h
+++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h
@@ -7,6 +7,8 @@
#ifndef __MOCK_TIMELINE__
#define __MOCK_TIMELINE__
+#include <linux/types.h>
+
struct intel_timeline;
void mock_timeline_init(struct intel_timeline *timeline, u64 context);
diff --git a/drivers/gpu/drm/i915/gt/uc/Makefile b/drivers/gpu/drm/i915/gt/uc/Makefile
deleted file mode 100644
index bec94d434cb6..000000000000
--- a/drivers/gpu/drm/i915/gt/uc/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-# For building individual subdir files on the command line
-subdir-ccflags-y += -I$(srctree)/$(src)/../..
-
-# Extra header tests
-header-test-pattern-$(CONFIG_DRM_I915_WERROR) := *.h
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 3ee4a4e7689d..5d00a3b2d914 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -32,18 +32,17 @@
* just the HuC, but more are expected to land in the future).
*/
-static void gen8_guc_raise_irq(struct intel_guc *guc)
+void intel_guc_notify(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
- intel_uncore_write(gt->uncore, GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER);
-}
-
-static void gen11_guc_raise_irq(struct intel_guc *guc)
-{
- struct intel_gt *gt = guc_to_gt(guc);
-
- intel_uncore_write(gt->uncore, GEN11_GUC_HOST_INTERRUPT, 0);
+ /*
+ * On Gen11+, the value written to the register is passes as a payload
+ * to the FW. However, the FW currently treats all values the same way
+ * (H2G interrupt), so we can just write the value that the HW expects
+ * on older gens.
+ */
+ intel_uncore_write(gt->uncore, guc->notify_reg, GUC_SEND_TRIGGER);
}
static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i)
@@ -177,15 +176,13 @@ void intel_guc_init_early(struct intel_guc *guc)
mutex_init(&guc->send_mutex);
spin_lock_init(&guc->irq_lock);
- guc->send = intel_guc_send_nop;
- guc->handler = intel_guc_to_host_event_handler_nop;
if (INTEL_GEN(i915) >= 11) {
- guc->notify = gen11_guc_raise_irq;
+ guc->notify_reg = GEN11_GUC_HOST_INTERRUPT;
guc->interrupts.reset = gen11_reset_guc_interrupts;
guc->interrupts.enable = gen11_enable_guc_interrupts;
guc->interrupts.disable = gen11_disable_guc_interrupts;
} else {
- guc->notify = gen8_guc_raise_irq;
+ guc->notify_reg = GUC_SEND_INTERRUPT;
guc->interrupts.reset = gen9_reset_guc_interrupts;
guc->interrupts.enable = gen9_enable_guc_interrupts;
guc->interrupts.disable = gen9_disable_guc_interrupts;
@@ -401,18 +398,8 @@ void intel_guc_fini(struct intel_guc *guc)
intel_guc_log_destroy(&guc->log);
intel_uc_fw_fini(&guc->fw);
intel_uc_fw_cleanup_fetch(&guc->fw);
-}
-int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len,
- u32 *response_buf, u32 response_buf_size)
-{
- WARN(1, "Unexpected send: action=%#x\n", *action);
- return -ENODEV;
-}
-
-void intel_guc_to_host_event_handler_nop(struct intel_guc *guc)
-{
- WARN(1, "Unexpected event: no suitable handler\n");
+ intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_DISABLED);
}
/*
@@ -704,3 +691,37 @@ err:
i915_gem_object_put(obj);
return vma;
}
+
+/**
+ * intel_guc_allocate_and_map_vma() - Allocate and map VMA for GuC usage
+ * @guc: the guc
+ * @size: size of area to allocate (both virtual space and memory)
+ * @out_vma: return variable for the allocated vma pointer
+ * @out_vaddr: return variable for the obj mapping
+ *
+ * This wrapper calls intel_guc_allocate_vma() and then maps the allocated
+ * object with I915_MAP_WB.
+ *
+ * Return: 0 if successful, a negative errno code otherwise.
+ */
+int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size,
+ struct i915_vma **out_vma, void **out_vaddr)
+{
+ struct i915_vma *vma;
+ void *vaddr;
+
+ vma = intel_guc_allocate_vma(guc, size);
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
+
+ vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
+ if (IS_ERR(vaddr)) {
+ i915_vma_unpin_and_release(&vma, 0);
+ return PTR_ERR(vaddr);
+ }
+
+ *out_vma = vma;
+ *out_vaddr = vaddr;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index e6400204a2bd..910d49590068 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -20,8 +20,8 @@ struct __guc_ads_blob;
/*
* Top level structure of GuC. It handles firmware loading and manages client
- * pool and doorbells. intel_guc owns a intel_guc_client to replace the legacy
- * ExecList submission.
+ * pool. intel_guc owns a intel_guc_client to replace the legacy ExecList
+ * submission.
*/
struct intel_guc {
struct intel_uc_fw fw;
@@ -46,13 +46,13 @@ struct intel_guc {
struct i915_vma *stage_desc_pool;
void *stage_desc_pool_vaddr;
- struct ida stage_ids;
- struct intel_guc_client *execbuf_client;
+ struct i915_vma *workqueue;
+ void *workqueue_vaddr;
+ spinlock_t wq_lock;
- DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS);
- /* Cyclic counter mod pagesize */
- u32 db_cacheline;
+ struct i915_vma *proc_desc;
+ void *proc_desc_vaddr;
/* Control params for fw initialization */
u32 params[GUC_CTL_MAX_DWORDS];
@@ -64,44 +64,33 @@ struct intel_guc {
enum forcewake_domains fw_domains;
} send_regs;
+ /* register used to send interrupts to the GuC FW */
+ i915_reg_t notify_reg;
+
/* Store msg (e.g. log flush) that we see while CTBs are disabled */
u32 mmio_msg;
/* To serialize the intel_guc_send actions */
struct mutex send_mutex;
-
- /* GuC's FW specific send function */
- int (*send)(struct intel_guc *guc, const u32 *data, u32 len,
- u32 *response_buf, u32 response_buf_size);
-
- /* GuC's FW specific event handler function */
- void (*handler)(struct intel_guc *guc);
-
- /* GuC's FW specific notify function */
- void (*notify)(struct intel_guc *guc);
};
static
inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len)
{
- return guc->send(guc, action, len, NULL, 0);
+ return intel_guc_ct_send(&guc->ct, action, len, NULL, 0);
}
static inline int
intel_guc_send_and_receive(struct intel_guc *guc, const u32 *action, u32 len,
u32 *response_buf, u32 response_buf_size)
{
- return guc->send(guc, action, len, response_buf, response_buf_size);
-}
-
-static inline void intel_guc_notify(struct intel_guc *guc)
-{
- guc->notify(guc);
+ return intel_guc_ct_send(&guc->ct, action, len,
+ response_buf, response_buf_size);
}
static inline void intel_guc_to_host_event_handler(struct intel_guc *guc)
{
- guc->handler(guc);
+ intel_guc_ct_event_handler(&guc->ct);
}
/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */
@@ -136,12 +125,9 @@ void intel_guc_init_send_regs(struct intel_guc *guc);
void intel_guc_write_params(struct intel_guc *guc);
int intel_guc_init(struct intel_guc *guc);
void intel_guc_fini(struct intel_guc *guc);
-int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len,
- u32 *response_buf, u32 response_buf_size);
+void intel_guc_notify(struct intel_guc *guc);
int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len,
u32 *response_buf, u32 response_buf_size);
-void intel_guc_to_host_event_handler(struct intel_guc *guc);
-void intel_guc_to_host_event_handler_nop(struct intel_guc *guc);
int intel_guc_to_host_process_recv_msg(struct intel_guc *guc,
const u32 *payload, u32 len);
int intel_guc_sample_forcewake(struct intel_guc *guc);
@@ -149,6 +135,8 @@ int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset);
int intel_guc_suspend(struct intel_guc *guc);
int intel_guc_resume(struct intel_guc *guc);
struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size);
+int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size,
+ struct i915_vma **out_vma, void **out_vaddr);
static inline bool intel_guc_is_supported(struct intel_guc *guc)
{
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index ca6674b8e00c..101728006ae9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -93,7 +93,8 @@ static void __guc_ads_init(struct intel_guc *guc)
*/
blob->ads.golden_context_lrca[engine_class] = 0;
blob->ads.eng_state_size[engine_class] =
- intel_engine_context_size(dev_priv, engine_class) -
+ intel_engine_context_size(guc_to_gt(guc),
+ engine_class) -
skipped_size;
}
@@ -135,32 +136,19 @@ static void __guc_ads_init(struct intel_guc *guc)
int intel_guc_ads_create(struct intel_guc *guc)
{
const u32 size = PAGE_ALIGN(sizeof(struct __guc_ads_blob));
- struct i915_vma *vma;
- void *blob;
int ret;
GEM_BUG_ON(guc->ads_vma);
- vma = intel_guc_allocate_vma(guc, size);
- if (IS_ERR(vma))
- return PTR_ERR(vma);
+ ret = intel_guc_allocate_and_map_vma(guc, size, &guc->ads_vma,
+ (void **)&guc->ads_blob);
- blob = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
- if (IS_ERR(blob)) {
- ret = PTR_ERR(blob);
- goto err_vma;
- }
-
- guc->ads_vma = vma;
- guc->ads_blob = blob;
+ if (ret)
+ return ret;
__guc_ads_init(guc);
return 0;
-
-err_vma:
- i915_vma_unpin_and_release(&guc->ads_vma, 0);
- return ret;
}
void intel_guc_ads_destroy(struct intel_guc *guc)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index b49115517510..c6f971a049f9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -37,13 +37,10 @@ static void ct_incoming_request_worker_func(struct work_struct *w);
*/
void intel_guc_ct_init_early(struct intel_guc_ct *ct)
{
- /* we're using static channel owners */
- ct->host_channel.owner = CTB_OWNER_HOST;
-
- spin_lock_init(&ct->lock);
- INIT_LIST_HEAD(&ct->pending_requests);
- INIT_LIST_HEAD(&ct->incoming_requests);
- INIT_WORK(&ct->worker, ct_incoming_request_worker_func);
+ spin_lock_init(&ct->requests.lock);
+ INIT_LIST_HEAD(&ct->requests.pending);
+ INIT_LIST_HEAD(&ct->requests.incoming);
+ INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func);
}
static inline struct intel_guc *ct_to_guc(struct intel_guc_ct *ct)
@@ -64,14 +61,13 @@ static inline const char *guc_ct_buffer_type_to_str(u32 type)
}
static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc,
- u32 cmds_addr, u32 size, u32 owner)
+ u32 cmds_addr, u32 size)
{
- CT_DEBUG_DRIVER("CT: desc %p init addr=%#x size=%u owner=%u\n",
- desc, cmds_addr, size, owner);
+ CT_DEBUG_DRIVER("CT: init addr=%#x size=%u\n", cmds_addr, size);
memset(desc, 0, sizeof(*desc));
desc->addr = cmds_addr;
desc->size = size;
- desc->owner = owner;
+ desc->owner = CTB_OWNER_HOST;
}
static void guc_ct_buffer_desc_reset(struct guc_ct_buffer_desc *desc)
@@ -104,12 +100,11 @@ static int guc_action_register_ct_buffer(struct intel_guc *guc,
}
static int guc_action_deregister_ct_buffer(struct intel_guc *guc,
- u32 owner,
u32 type)
{
u32 action[] = {
INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER,
- owner,
+ CTB_OWNER_HOST,
type
};
int err;
@@ -117,20 +112,27 @@ static int guc_action_deregister_ct_buffer(struct intel_guc *guc,
/* Can't use generic send(), CT deregistration must go over MMIO */
err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0);
if (err)
- DRM_ERROR("CT: deregister %s buffer failed; owner=%d err=%d\n",
- guc_ct_buffer_type_to_str(type), owner, err);
+ DRM_ERROR("CT: deregister %s buffer failed; err=%d\n",
+ guc_ct_buffer_type_to_str(type), err);
return err;
}
-static int ctch_init(struct intel_guc *guc,
- struct intel_guc_ct_channel *ctch)
+/**
+ * intel_guc_ct_init - Init buffer-based communication
+ * @ct: pointer to CT struct
+ *
+ * Allocate memory required for buffer-based communication.
+ *
+ * Return: 0 on success, a negative errno code on failure.
+ */
+int intel_guc_ct_init(struct intel_guc_ct *ct)
{
- struct i915_vma *vma;
+ struct intel_guc *guc = ct_to_guc(ct);
void *blob;
int err;
int i;
- GEM_BUG_ON(ctch->vma);
+ GEM_BUG_ON(ct->vma);
/* We allocate 1 page to hold both descriptors and both buffers.
* ___________.....................
@@ -154,71 +156,65 @@ static int ctch_init(struct intel_guc *guc,
* other code will need updating as well.
*/
- /* allocate vma */
- vma = intel_guc_allocate_vma(guc, PAGE_SIZE);
- if (IS_ERR(vma)) {
- err = PTR_ERR(vma);
- goto err_out;
+ err = intel_guc_allocate_and_map_vma(guc, PAGE_SIZE, &ct->vma, &blob);
+ if (err) {
+ DRM_ERROR("CT: channel allocation failed; err=%d\n", err);
+ return err;
}
- ctch->vma = vma;
- /* map first page */
- blob = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
- if (IS_ERR(blob)) {
- err = PTR_ERR(blob);
- goto err_vma;
- }
CT_DEBUG_DRIVER("CT: vma base=%#x\n",
- intel_guc_ggtt_offset(guc, ctch->vma));
+ intel_guc_ggtt_offset(guc, ct->vma));
/* store pointers to desc and cmds */
- for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) {
- GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV));
- ctch->ctbs[i].desc = blob + PAGE_SIZE/4 * i;
- ctch->ctbs[i].cmds = blob + PAGE_SIZE/4 * i + PAGE_SIZE/2;
+ for (i = 0; i < ARRAY_SIZE(ct->ctbs); i++) {
+ GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV));
+ ct->ctbs[i].desc = blob + PAGE_SIZE/4 * i;
+ ct->ctbs[i].cmds = blob + PAGE_SIZE/4 * i + PAGE_SIZE/2;
}
return 0;
-
-err_vma:
- i915_vma_unpin_and_release(&ctch->vma, 0);
-err_out:
- CT_DEBUG_DRIVER("CT: channel %d initialization failed; err=%d\n",
- ctch->owner, err);
- return err;
}
-static void ctch_fini(struct intel_guc *guc,
- struct intel_guc_ct_channel *ctch)
+/**
+ * intel_guc_ct_fini - Fini buffer-based communication
+ * @ct: pointer to CT struct
+ *
+ * Deallocate memory required for buffer-based communication.
+ */
+void intel_guc_ct_fini(struct intel_guc_ct *ct)
{
- GEM_BUG_ON(ctch->enabled);
+ GEM_BUG_ON(ct->enabled);
- i915_vma_unpin_and_release(&ctch->vma, I915_VMA_RELEASE_MAP);
+ i915_vma_unpin_and_release(&ct->vma, I915_VMA_RELEASE_MAP);
}
-static int ctch_enable(struct intel_guc *guc,
- struct intel_guc_ct_channel *ctch)
+/**
+ * intel_guc_ct_enable - Enable buffer based command transport.
+ * @ct: pointer to CT struct
+ *
+ * Return: 0 on success, a negative errno code on failure.
+ */
+int intel_guc_ct_enable(struct intel_guc_ct *ct)
{
+ struct intel_guc *guc = ct_to_guc(ct);
u32 base;
int err;
int i;
- GEM_BUG_ON(!ctch->vma);
-
- GEM_BUG_ON(ctch->enabled);
+ GEM_BUG_ON(ct->enabled);
/* vma should be already allocated and map'ed */
- base = intel_guc_ggtt_offset(guc, ctch->vma);
+ GEM_BUG_ON(!ct->vma);
+ base = intel_guc_ggtt_offset(guc, ct->vma);
/* (re)initialize descriptors
* cmds buffers are in the second half of the blob page
*/
- for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) {
+ for (i = 0; i < ARRAY_SIZE(ct->ctbs); i++) {
GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV));
- guc_ct_buffer_desc_init(ctch->ctbs[i].desc,
+ guc_ct_buffer_desc_init(ct->ctbs[i].desc,
base + PAGE_SIZE/4 * i + PAGE_SIZE/2,
- PAGE_SIZE/4,
- ctch->owner);
+ PAGE_SIZE/4);
}
/* register buffers, starting wirh RECV buffer
@@ -236,38 +232,42 @@ static int ctch_enable(struct intel_guc *guc,
if (unlikely(err))
goto err_deregister;
- ctch->enabled = true;
+ ct->enabled = true;
return 0;
err_deregister:
guc_action_deregister_ct_buffer(guc,
- ctch->owner,
INTEL_GUC_CT_BUFFER_TYPE_RECV);
err_out:
- DRM_ERROR("CT: can't open channel %d; err=%d\n", ctch->owner, err);
+ DRM_ERROR("CT: can't open channel; err=%d\n", err);
return err;
}
-static void ctch_disable(struct intel_guc *guc,
- struct intel_guc_ct_channel *ctch)
+/**
+ * intel_guc_ct_disable - Disable buffer based command transport.
+ * @ct: pointer to CT struct
+ */
+void intel_guc_ct_disable(struct intel_guc_ct *ct)
{
- GEM_BUG_ON(!ctch->enabled);
+ struct intel_guc *guc = ct_to_guc(ct);
- ctch->enabled = false;
+ GEM_BUG_ON(!ct->enabled);
- guc_action_deregister_ct_buffer(guc,
- ctch->owner,
- INTEL_GUC_CT_BUFFER_TYPE_SEND);
- guc_action_deregister_ct_buffer(guc,
- ctch->owner,
- INTEL_GUC_CT_BUFFER_TYPE_RECV);
+ ct->enabled = false;
+
+ if (intel_guc_is_running(guc)) {
+ guc_action_deregister_ct_buffer(guc,
+ INTEL_GUC_CT_BUFFER_TYPE_SEND);
+ guc_action_deregister_ct_buffer(guc,
+ INTEL_GUC_CT_BUFFER_TYPE_RECV);
+ }
}
-static u32 ctch_get_next_fence(struct intel_guc_ct_channel *ctch)
+static u32 ct_get_next_fence(struct intel_guc_ct *ct)
{
/* For now it's trivial */
- return ++ctch->next_fence;
+ return ++ct->requests.next_fence;
}
/**
@@ -440,35 +440,34 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status)
return err;
}
-static int ctch_send(struct intel_guc_ct *ct,
- struct intel_guc_ct_channel *ctch,
- const u32 *action,
- u32 len,
- u32 *response_buf,
- u32 response_buf_size,
- u32 *status)
+static int ct_send(struct intel_guc_ct *ct,
+ const u32 *action,
+ u32 len,
+ u32 *response_buf,
+ u32 response_buf_size,
+ u32 *status)
{
- struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_SEND];
+ struct intel_guc_ct_buffer *ctb = &ct->ctbs[CTB_SEND];
struct guc_ct_buffer_desc *desc = ctb->desc;
struct ct_request request;
unsigned long flags;
u32 fence;
int err;
- GEM_BUG_ON(!ctch->enabled);
+ GEM_BUG_ON(!ct->enabled);
GEM_BUG_ON(!len);
GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
GEM_BUG_ON(!response_buf && response_buf_size);
- fence = ctch_get_next_fence(ctch);
+ fence = ct_get_next_fence(ct);
request.fence = fence;
request.status = 0;
request.response_len = response_buf_size;
request.response_buf = response_buf;
- spin_lock_irqsave(&ct->lock, flags);
- list_add_tail(&request.link, &ct->pending_requests);
- spin_unlock_irqrestore(&ct->lock, flags);
+ spin_lock_irqsave(&ct->requests.lock, flags);
+ list_add_tail(&request.link, &ct->requests.pending);
+ spin_unlock_irqrestore(&ct->requests.lock, flags);
err = ctb_write(ctb, action, len, fence, !!response_buf);
if (unlikely(err))
@@ -501,9 +500,9 @@ static int ctch_send(struct intel_guc_ct *ct,
}
unlink:
- spin_lock_irqsave(&ct->lock, flags);
+ spin_lock_irqsave(&ct->requests.lock, flags);
list_del(&request.link);
- spin_unlock_irqrestore(&ct->lock, flags);
+ spin_unlock_irqrestore(&ct->requests.lock, flags);
return err;
}
@@ -511,18 +510,21 @@ unlink:
/*
* Command Transport (CT) buffer based GuC send function.
*/
-int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len,
+int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len,
u32 *response_buf, u32 response_buf_size)
{
- struct intel_guc_ct *ct = &guc->ct;
- struct intel_guc_ct_channel *ctch = &ct->host_channel;
+ struct intel_guc *guc = ct_to_guc(ct);
u32 status = ~0; /* undefined */
int ret;
+ if (unlikely(!ct->enabled)) {
+ WARN(1, "Unexpected send: action=%#x\n", *action);
+ return -ENODEV;
+ }
+
mutex_lock(&guc->send_mutex);
- ret = ctch_send(ct, ctch, action, len, response_buf, response_buf_size,
- &status);
+ ret = ct_send(ct, action, len, response_buf, response_buf_size, &status);
if (unlikely(ret < 0)) {
DRM_ERROR("CT: send action %#X failed; err=%d status=%#X\n",
action[0], ret, status);
@@ -653,8 +655,8 @@ static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg)
CT_DEBUG_DRIVER("CT: response fence %u status %#x\n", fence, status);
- spin_lock(&ct->lock);
- list_for_each_entry(req, &ct->pending_requests, link) {
+ spin_lock(&ct->requests.lock);
+ list_for_each_entry(req, &ct->requests.pending, link) {
if (unlikely(fence != req->fence)) {
CT_DEBUG_DRIVER("CT: request %u awaits response\n",
req->fence);
@@ -672,7 +674,7 @@ static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg)
found = true;
break;
}
- spin_unlock(&ct->lock);
+ spin_unlock(&ct->requests.lock);
if (!found)
DRM_ERROR("CT: unsolicited response %*ph\n", 4 * msglen, msg);
@@ -710,13 +712,13 @@ static bool ct_process_incoming_requests(struct intel_guc_ct *ct)
u32 *payload;
bool done;
- spin_lock_irqsave(&ct->lock, flags);
- request = list_first_entry_or_null(&ct->incoming_requests,
+ spin_lock_irqsave(&ct->requests.lock, flags);
+ request = list_first_entry_or_null(&ct->requests.incoming,
struct ct_incoming_request, link);
if (request)
list_del(&request->link);
- done = !!list_empty(&ct->incoming_requests);
- spin_unlock_irqrestore(&ct->lock, flags);
+ done = !!list_empty(&ct->requests.incoming);
+ spin_unlock_irqrestore(&ct->requests.lock, flags);
if (!request)
return true;
@@ -734,12 +736,13 @@ static bool ct_process_incoming_requests(struct intel_guc_ct *ct)
static void ct_incoming_request_worker_func(struct work_struct *w)
{
- struct intel_guc_ct *ct = container_of(w, struct intel_guc_ct, worker);
+ struct intel_guc_ct *ct =
+ container_of(w, struct intel_guc_ct, requests.worker);
bool done;
done = ct_process_incoming_requests(ct);
if (!done)
- queue_work(system_unbound_wq, &ct->worker);
+ queue_work(system_unbound_wq, &ct->requests.worker);
}
/**
@@ -777,23 +780,28 @@ static int ct_handle_request(struct intel_guc_ct *ct, const u32 *msg)
}
memcpy(request->msg, msg, 4 * msglen);
- spin_lock_irqsave(&ct->lock, flags);
- list_add_tail(&request->link, &ct->incoming_requests);
- spin_unlock_irqrestore(&ct->lock, flags);
+ spin_lock_irqsave(&ct->requests.lock, flags);
+ list_add_tail(&request->link, &ct->requests.incoming);
+ spin_unlock_irqrestore(&ct->requests.lock, flags);
- queue_work(system_unbound_wq, &ct->worker);
+ queue_work(system_unbound_wq, &ct->requests.worker);
return 0;
}
-static void ct_process_host_channel(struct intel_guc_ct *ct)
+/*
+ * When we're communicating with the GuC over CT, GuC uses events
+ * to notify us about new messages being posted on the RECV buffer.
+ */
+void intel_guc_ct_event_handler(struct intel_guc_ct *ct)
{
- struct intel_guc_ct_channel *ctch = &ct->host_channel;
- struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_RECV];
+ struct intel_guc_ct_buffer *ctb = &ct->ctbs[CTB_RECV];
u32 msg[GUC_CT_MSG_LEN_MASK + 1]; /* one extra dw for the header */
int err = 0;
- if (!ctch->enabled)
+ if (unlikely(!ct->enabled)) {
+ WARN(1, "Unexpected GuC event received while CT disabled!\n");
return;
+ }
do {
err = ctb_read(ctb, msg);
@@ -812,86 +820,3 @@ static void ct_process_host_channel(struct intel_guc_ct *ct)
}
}
-/*
- * When we're communicating with the GuC over CT, GuC uses events
- * to notify us about new messages being posted on the RECV buffer.
- */
-void intel_guc_to_host_event_handler_ct(struct intel_guc *guc)
-{
- struct intel_guc_ct *ct = &guc->ct;
-
- ct_process_host_channel(ct);
-}
-
-/**
- * intel_guc_ct_init - Init CT communication
- * @ct: pointer to CT struct
- *
- * Allocate memory required for communication via
- * the CT channel.
- *
- * Return: 0 on success, a negative errno code on failure.
- */
-int intel_guc_ct_init(struct intel_guc_ct *ct)
-{
- struct intel_guc *guc = ct_to_guc(ct);
- struct intel_guc_ct_channel *ctch = &ct->host_channel;
- int err;
-
- err = ctch_init(guc, ctch);
- if (unlikely(err)) {
- DRM_ERROR("CT: can't open channel %d; err=%d\n",
- ctch->owner, err);
- return err;
- }
-
- GEM_BUG_ON(!ctch->vma);
- return 0;
-}
-
-/**
- * intel_guc_ct_fini - Fini CT communication
- * @ct: pointer to CT struct
- *
- * Deallocate memory required for communication via
- * the CT channel.
- */
-void intel_guc_ct_fini(struct intel_guc_ct *ct)
-{
- struct intel_guc *guc = ct_to_guc(ct);
- struct intel_guc_ct_channel *ctch = &ct->host_channel;
-
- ctch_fini(guc, ctch);
-}
-
-/**
- * intel_guc_ct_enable - Enable buffer based command transport.
- * @ct: pointer to CT struct
- *
- * Return: 0 on success, a negative errno code on failure.
- */
-int intel_guc_ct_enable(struct intel_guc_ct *ct)
-{
- struct intel_guc *guc = ct_to_guc(ct);
- struct intel_guc_ct_channel *ctch = &ct->host_channel;
-
- if (ctch->enabled)
- return 0;
-
- return ctch_enable(guc, ctch);
-}
-
-/**
- * intel_guc_ct_disable - Disable buffer based command transport.
- * @ct: pointer to CT struct
- */
-void intel_guc_ct_disable(struct intel_guc_ct *ct)
-{
- struct intel_guc *guc = ct_to_guc(ct);
- struct intel_guc_ct_channel *ctch = &ct->host_channel;
-
- if (!ctch->enabled)
- return;
-
- ctch_disable(guc, ctch);
-}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
index 7c24d83f5c24..3e7fe237cfa5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
@@ -35,44 +35,28 @@ struct intel_guc_ct_buffer {
u32 *cmds;
};
-/** Represents pair of command transport buffers.
- *
- * Buffers go in pairs to allow bi-directional communication.
- * To simplify the code we place both of them in the same vma.
- * Buffers from the same pair must share unique owner id.
- *
- * @vma: pointer to the vma with pair of CT buffers
- * @ctbs: buffers for sending(0) and receiving(1) commands
- * @owner: unique identifier
- * @next_fence: fence to be used with next send command
- */
-struct intel_guc_ct_channel {
- struct i915_vma *vma;
- struct intel_guc_ct_buffer ctbs[2];
- u32 owner;
- u32 next_fence;
- bool enabled;
-};
-/** Holds all command transport channels.
+/** Top-level structure for Command Transport related data
*
- * @host_channel: main channel used by the host
+ * Includes a pair of CT buffers for bi-directional communication and tracking
+ * for the H2G and G2H requests sent and received through the buffers.
*/
struct intel_guc_ct {
- struct intel_guc_ct_channel host_channel;
- /* other channels are tbd */
+ struct i915_vma *vma;
+ bool enabled;
- /** @lock: protects pending requests list */
- spinlock_t lock;
+ /* buffers for sending(0) and receiving(1) commands */
+ struct intel_guc_ct_buffer ctbs[2];
- /** @pending_requests: list of requests waiting for response */
- struct list_head pending_requests;
+ struct {
+ u32 next_fence; /* fence to be used with next request to send */
- /** @incoming_requests: list of incoming requests */
- struct list_head incoming_requests;
+ spinlock_t lock; /* protects pending requests list */
+ struct list_head pending; /* requests waiting for response */
- /** @worker: worker for handling incoming requests */
- struct work_struct worker;
+ struct list_head incoming; /* incoming requests */
+ struct work_struct worker; /* handler for incoming requests */
+ } requests;
};
void intel_guc_ct_init_early(struct intel_guc_ct *ct);
@@ -81,13 +65,13 @@ void intel_guc_ct_fini(struct intel_guc_ct *ct);
int intel_guc_ct_enable(struct intel_guc_ct *ct);
void intel_guc_ct_disable(struct intel_guc_ct *ct);
-static inline void intel_guc_ct_stop(struct intel_guc_ct *ct)
+static inline bool intel_guc_ct_enabled(struct intel_guc_ct *ct)
{
- ct->host_channel.enabled = false;
+ return ct->enabled;
}
-int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len,
+int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len,
u32 *response_buf, u32 response_buf_size);
-void intel_guc_to_host_event_handler_ct(struct intel_guc *guc);
+void intel_guc_ct_event_handler(struct intel_guc_ct *ct);
#endif /* _INTEL_GUC_CT_H_ */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index 5528224448f6..3a1c47d600ea 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -149,7 +149,7 @@ int intel_guc_fw_upload(struct intel_guc *guc)
* Current uCode expects the code to be loaded at 8k; locations below
* this are used for the stack.
*/
- ret = intel_uc_fw_upload(&guc->fw, gt, 0x2000, UOS_MOVE);
+ ret = intel_uc_fw_upload(&guc->fw, 0x2000, UOS_MOVE);
if (ret)
goto out;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index a26a85d50209..a6b733c146c9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -31,7 +31,6 @@
#define GUC_DOORBELL_INVALID 256
-#define GUC_DB_SIZE (PAGE_SIZE)
#define GUC_WQ_SIZE (PAGE_SIZE * 2)
/* Work queue item header definitions */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 2498c55e0ea5..9e42324fdecd 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -18,15 +18,6 @@
#include "i915_drv.h"
#include "i915_trace.h"
-enum {
- GUC_PREEMPT_NONE = 0,
- GUC_PREEMPT_INPROGRESS,
- GUC_PREEMPT_FINISHED,
-};
-#define GUC_PREEMPT_BREADCRUMB_DWORDS 0x8
-#define GUC_PREEMPT_BREADCRUMB_BYTES \
- (sizeof(u32) * GUC_PREEMPT_BREADCRUMB_DWORDS)
-
/**
* DOC: GuC-based command submission
*
@@ -36,25 +27,14 @@ enum {
* code) matches the old submission model and will be updated as part of the
* upgrade to the new flow.
*
- * GuC client:
- * A intel_guc_client refers to a submission path through GuC. Currently, there
- * is only one client, which is charged with all submissions to the GuC. This
- * struct is the owner of a doorbell, a process descriptor and a workqueue (all
- * of them inside a single gem object that contains all required pages for these
- * elements).
- *
* GuC stage descriptor:
* During initialization, the driver allocates a static pool of 1024 such
- * descriptors, and shares them with the GuC.
- * Currently, there exists a 1:1 mapping between a intel_guc_client and a
- * guc_stage_desc (via the client's stage_id), so effectively only one
- * gets used. This stage descriptor lets the GuC know about the doorbell,
- * workqueue and process descriptor. Theoretically, it also lets the GuC
- * know about our HW contexts (context ID, etc...), but we actually
- * employ a kind of submission where the GuC uses the LRCA sent via the work
- * item instead (the single guc_stage_desc associated to execbuf client
- * contains information about the default kernel context only, but this is
- * essentially unused). This is called a "proxy" submission.
+ * descriptors, and shares them with the GuC. Currently, we only use one
+ * descriptor. This stage descriptor lets the GuC know about the workqueue and
+ * process descriptor. Theoretically, it also lets the GuC know about our HW
+ * contexts (context ID, etc...), but we actually employ a kind of submission
+ * where the GuC uses the LRCA sent via the work item instead. This is called
+ * a "proxy" submission.
*
* The Scratch registers:
* There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
@@ -63,11 +43,6 @@ enum {
* Firmware writes a success/fail code back to the action register after
* processes the request. The kernel driver polls waiting for this update and
* then proceeds.
- * See intel_guc_send()
- *
- * Doorbells:
- * Doorbells are interrupts to uKernel. A doorbell is a single cache line (QW)
- * mapped into process space.
*
* Work Items:
* There are several types of work items that the host may place into a
@@ -84,213 +59,45 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb)
return rb_entry(rb, struct i915_priolist, node);
}
-static inline bool is_high_priority(struct intel_guc_client *client)
+static struct guc_stage_desc *__get_stage_desc(struct intel_guc *guc, u32 id)
{
- return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH ||
- client->priority == GUC_CLIENT_PRIORITY_HIGH);
-}
-
-static int reserve_doorbell(struct intel_guc_client *client)
-{
- unsigned long offset;
- unsigned long end;
- u16 id;
-
- GEM_BUG_ON(client->doorbell_id != GUC_DOORBELL_INVALID);
+ struct guc_stage_desc *base = guc->stage_desc_pool_vaddr;
- /*
- * The bitmap tracks which doorbell registers are currently in use.
- * It is split into two halves; the first half is used for normal
- * priority contexts, the second half for high-priority ones.
- */
- offset = 0;
- end = GUC_NUM_DOORBELLS / 2;
- if (is_high_priority(client)) {
- offset = end;
- end += offset;
- }
-
- id = find_next_zero_bit(client->guc->doorbell_bitmap, end, offset);
- if (id == end)
- return -ENOSPC;
-
- __set_bit(id, client->guc->doorbell_bitmap);
- client->doorbell_id = id;
- DRM_DEBUG_DRIVER("client %u (high prio=%s) reserved doorbell: %d\n",
- client->stage_id, yesno(is_high_priority(client)),
- id);
- return 0;
+ return &base[id];
}
-static bool has_doorbell(struct intel_guc_client *client)
+static int guc_workqueue_create(struct intel_guc *guc)
{
- if (client->doorbell_id == GUC_DOORBELL_INVALID)
- return false;
-
- return test_bit(client->doorbell_id, client->guc->doorbell_bitmap);
+ return intel_guc_allocate_and_map_vma(guc, GUC_WQ_SIZE, &guc->workqueue,
+ &guc->workqueue_vaddr);
}
-static void unreserve_doorbell(struct intel_guc_client *client)
+static void guc_workqueue_destroy(struct intel_guc *guc)
{
- GEM_BUG_ON(!has_doorbell(client));
-
- __clear_bit(client->doorbell_id, client->guc->doorbell_bitmap);
- client->doorbell_id = GUC_DOORBELL_INVALID;
+ i915_vma_unpin_and_release(&guc->workqueue, I915_VMA_RELEASE_MAP);
}
/*
- * Tell the GuC to allocate or deallocate a specific doorbell
- */
-
-static int __guc_allocate_doorbell(struct intel_guc *guc, u32 stage_id)
-{
- u32 action[] = {
- INTEL_GUC_ACTION_ALLOCATE_DOORBELL,
- stage_id
- };
-
- return intel_guc_send(guc, action, ARRAY_SIZE(action));
-}
-
-static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 stage_id)
-{
- u32 action[] = {
- INTEL_GUC_ACTION_DEALLOCATE_DOORBELL,
- stage_id
- };
-
- return intel_guc_send(guc, action, ARRAY_SIZE(action));
-}
-
-static struct guc_stage_desc *__get_stage_desc(struct intel_guc_client *client)
-{
- struct guc_stage_desc *base = client->guc->stage_desc_pool_vaddr;
-
- return &base[client->stage_id];
-}
-
-/*
- * Initialise, update, or clear doorbell data shared with the GuC
- *
- * These functions modify shared data and so need access to the mapped
- * client object which contains the page being used for the doorbell
+ * Initialise the process descriptor shared with the GuC firmware.
*/
-
-static void __update_doorbell_desc(struct intel_guc_client *client, u16 new_id)
-{
- struct guc_stage_desc *desc;
-
- /* Update the GuC's idea of the doorbell ID */
- desc = __get_stage_desc(client);
- desc->db_id = new_id;
-}
-
-static struct guc_doorbell_info *__get_doorbell(struct intel_guc_client *client)
-{
- return client->vaddr + client->doorbell_offset;
-}
-
-static bool __doorbell_valid(struct intel_guc *guc, u16 db_id)
-{
- struct intel_uncore *uncore = guc_to_gt(guc)->uncore;
-
- GEM_BUG_ON(db_id >= GUC_NUM_DOORBELLS);
- return intel_uncore_read(uncore, GEN8_DRBREGL(db_id)) & GEN8_DRB_VALID;
-}
-
-static void __init_doorbell(struct intel_guc_client *client)
-{
- struct guc_doorbell_info *doorbell;
-
- doorbell = __get_doorbell(client);
- doorbell->db_status = GUC_DOORBELL_ENABLED;
- doorbell->cookie = 0;
-}
-
-static void __fini_doorbell(struct intel_guc_client *client)
-{
- struct guc_doorbell_info *doorbell;
- u16 db_id = client->doorbell_id;
-
- doorbell = __get_doorbell(client);
- doorbell->db_status = GUC_DOORBELL_DISABLED;
-
- /* Doorbell release flow requires that we wait for GEN8_DRB_VALID bit
- * to go to zero after updating db_status before we call the GuC to
- * release the doorbell
- */
- if (wait_for_us(!__doorbell_valid(client->guc, db_id), 10))
- WARN_ONCE(true, "Doorbell never became invalid after disable\n");
-}
-
-static int create_doorbell(struct intel_guc_client *client)
+static int guc_proc_desc_create(struct intel_guc *guc)
{
- int ret;
-
- if (WARN_ON(!has_doorbell(client)))
- return -ENODEV; /* internal setup error, should never happen */
-
- __update_doorbell_desc(client, client->doorbell_id);
- __init_doorbell(client);
-
- ret = __guc_allocate_doorbell(client->guc, client->stage_id);
- if (ret) {
- __fini_doorbell(client);
- __update_doorbell_desc(client, GUC_DOORBELL_INVALID);
- DRM_DEBUG_DRIVER("Couldn't create client %u doorbell: %d\n",
- client->stage_id, ret);
- return ret;
- }
+ const u32 size = PAGE_ALIGN(sizeof(struct guc_process_desc));
- return 0;
+ return intel_guc_allocate_and_map_vma(guc, size, &guc->proc_desc,
+ &guc->proc_desc_vaddr);
}
-static int destroy_doorbell(struct intel_guc_client *client)
+static void guc_proc_desc_destroy(struct intel_guc *guc)
{
- int ret;
-
- GEM_BUG_ON(!has_doorbell(client));
-
- __fini_doorbell(client);
- ret = __guc_deallocate_doorbell(client->guc, client->stage_id);
- if (ret)
- DRM_ERROR("Couldn't destroy client %u doorbell: %d\n",
- client->stage_id, ret);
-
- __update_doorbell_desc(client, GUC_DOORBELL_INVALID);
-
- return ret;
+ i915_vma_unpin_and_release(&guc->proc_desc, I915_VMA_RELEASE_MAP);
}
-static unsigned long __select_cacheline(struct intel_guc *guc)
-{
- unsigned long offset;
-
- /* Doorbell uses a single cache line within a page */
- offset = offset_in_page(guc->db_cacheline);
-
- /* Moving to next cache line to reduce contention */
- guc->db_cacheline += cache_line_size();
-
- DRM_DEBUG_DRIVER("reserved cacheline 0x%lx, next 0x%x, linesize %u\n",
- offset, guc->db_cacheline, cache_line_size());
- return offset;
-}
-
-static inline struct guc_process_desc *
-__get_process_desc(struct intel_guc_client *client)
-{
- return client->vaddr + client->proc_desc_offset;
-}
-
-/*
- * Initialise the process descriptor shared with the GuC firmware.
- */
-static void guc_proc_desc_init(struct intel_guc_client *client)
+static void guc_proc_desc_init(struct intel_guc *guc)
{
struct guc_process_desc *desc;
- desc = memset(__get_process_desc(client), 0, sizeof(*desc));
+ desc = memset(guc->proc_desc_vaddr, 0, sizeof(*desc));
/*
* XXX: pDoorbell and WQVBaseAddress are pointers in process address
@@ -301,47 +108,27 @@ static void guc_proc_desc_init(struct intel_guc_client *client)
desc->wq_base_addr = 0;
desc->db_base_addr = 0;
- desc->stage_id = client->stage_id;
desc->wq_size_bytes = GUC_WQ_SIZE;
desc->wq_status = WQ_STATUS_ACTIVE;
- desc->priority = client->priority;
+ desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL;
}
-static void guc_proc_desc_fini(struct intel_guc_client *client)
+static void guc_proc_desc_fini(struct intel_guc *guc)
{
- struct guc_process_desc *desc;
-
- desc = __get_process_desc(client);
- memset(desc, 0, sizeof(*desc));
+ memset(guc->proc_desc_vaddr, 0, sizeof(struct guc_process_desc));
}
static int guc_stage_desc_pool_create(struct intel_guc *guc)
{
- struct i915_vma *vma;
- void *vaddr;
-
- vma = intel_guc_allocate_vma(guc,
- PAGE_ALIGN(sizeof(struct guc_stage_desc) *
- GUC_MAX_STAGE_DESCRIPTORS));
- if (IS_ERR(vma))
- return PTR_ERR(vma);
-
- vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
- if (IS_ERR(vaddr)) {
- i915_vma_unpin_and_release(&vma, 0);
- return PTR_ERR(vaddr);
- }
-
- guc->stage_desc_pool = vma;
- guc->stage_desc_pool_vaddr = vaddr;
- ida_init(&guc->stage_ids);
+ u32 size = PAGE_ALIGN(sizeof(struct guc_stage_desc) *
+ GUC_MAX_STAGE_DESCRIPTORS);
- return 0;
+ return intel_guc_allocate_and_map_vma(guc, size, &guc->stage_desc_pool,
+ &guc->stage_desc_pool_vaddr);
}
static void guc_stage_desc_pool_destroy(struct intel_guc *guc)
{
- ida_destroy(&guc->stage_ids);
i915_vma_unpin_and_release(&guc->stage_desc_pool, I915_VMA_RELEASE_MAP);
}
@@ -349,63 +136,49 @@ static void guc_stage_desc_pool_destroy(struct intel_guc *guc)
* Initialise/clear the stage descriptor shared with the GuC firmware.
*
* This descriptor tells the GuC where (in GGTT space) to find the important
- * data structures relating to this client (doorbell, process descriptor,
- * write queue, etc).
+ * data structures related to work submission (process descriptor, write queue,
+ * etc).
*/
-static void guc_stage_desc_init(struct intel_guc_client *client)
+static void guc_stage_desc_init(struct intel_guc *guc)
{
- struct intel_guc *guc = client->guc;
struct guc_stage_desc *desc;
- u32 gfx_addr;
- desc = __get_stage_desc(client);
+ /* we only use 1 stage desc, so hardcode it to 0 */
+ desc = __get_stage_desc(guc, 0);
memset(desc, 0, sizeof(*desc));
desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE |
GUC_STAGE_DESC_ATTR_KERNEL;
- if (is_high_priority(client))
- desc->attribute |= GUC_STAGE_DESC_ATTR_PREEMPT;
- desc->stage_id = client->stage_id;
- desc->priority = client->priority;
- desc->db_id = client->doorbell_id;
- /*
- * The doorbell, process descriptor, and workqueue are all parts
- * of the client object, which the GuC will reference via the GGTT
- */
- gfx_addr = intel_guc_ggtt_offset(guc, client->vma);
- desc->db_trigger_phy = sg_dma_address(client->vma->pages->sgl) +
- client->doorbell_offset;
- desc->db_trigger_cpu = ptr_to_u64(__get_doorbell(client));
- desc->db_trigger_uk = gfx_addr + client->doorbell_offset;
- desc->process_desc = gfx_addr + client->proc_desc_offset;
- desc->wq_addr = gfx_addr + GUC_DB_SIZE;
- desc->wq_size = GUC_WQ_SIZE;
+ desc->stage_id = 0;
+ desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL;
- desc->desc_private = ptr_to_u64(client);
+ desc->process_desc = intel_guc_ggtt_offset(guc, guc->proc_desc);
+ desc->wq_addr = intel_guc_ggtt_offset(guc, guc->workqueue);
+ desc->wq_size = GUC_WQ_SIZE;
}
-static void guc_stage_desc_fini(struct intel_guc_client *client)
+static void guc_stage_desc_fini(struct intel_guc *guc)
{
struct guc_stage_desc *desc;
- desc = __get_stage_desc(client);
+ desc = __get_stage_desc(guc, 0);
memset(desc, 0, sizeof(*desc));
}
/* Construct a Work Item and append it to the GuC's Work Queue */
-static void guc_wq_item_append(struct intel_guc_client *client,
+static void guc_wq_item_append(struct intel_guc *guc,
u32 target_engine, u32 context_desc,
u32 ring_tail, u32 fence_id)
{
/* wqi_len is in DWords, and does not include the one-word header */
const size_t wqi_size = sizeof(struct guc_wq_item);
const u32 wqi_len = wqi_size / sizeof(u32) - 1;
- struct guc_process_desc *desc = __get_process_desc(client);
+ struct guc_process_desc *desc = guc->proc_desc_vaddr;
struct guc_wq_item *wqi;
u32 wq_off;
- lockdep_assert_held(&client->wq_lock);
+ lockdep_assert_held(&guc->wq_lock);
/* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
* should not have the case where structure wqi is across page, neither
@@ -425,58 +198,30 @@ static void guc_wq_item_append(struct intel_guc_client *client,
GUC_WQ_SIZE) < wqi_size);
GEM_BUG_ON(wq_off & (wqi_size - 1));
- /* WQ starts from the page after doorbell / process_desc */
- wqi = client->vaddr + wq_off + GUC_DB_SIZE;
-
- if (I915_SELFTEST_ONLY(client->use_nop_wqi)) {
- wqi->header = WQ_TYPE_NOOP | (wqi_len << WQ_LEN_SHIFT);
- } else {
- /* Now fill in the 4-word work queue item */
- wqi->header = WQ_TYPE_INORDER |
- (wqi_len << WQ_LEN_SHIFT) |
- (target_engine << WQ_TARGET_SHIFT) |
- WQ_NO_WCFLUSH_WAIT;
- wqi->context_desc = context_desc;
- wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT;
- GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX);
- wqi->fence_id = fence_id;
- }
+ wqi = guc->workqueue_vaddr + wq_off;
+
+ /* Now fill in the 4-word work queue item */
+ wqi->header = WQ_TYPE_INORDER |
+ (wqi_len << WQ_LEN_SHIFT) |
+ (target_engine << WQ_TARGET_SHIFT) |
+ WQ_NO_WCFLUSH_WAIT;
+ wqi->context_desc = context_desc;
+ wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT;
+ GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX);
+ wqi->fence_id = fence_id;
/* Make the update visible to GuC */
WRITE_ONCE(desc->tail, (wq_off + wqi_size) & (GUC_WQ_SIZE - 1));
}
-static void guc_ring_doorbell(struct intel_guc_client *client)
-{
- struct guc_doorbell_info *db;
- u32 cookie;
-
- lockdep_assert_held(&client->wq_lock);
-
- /* pointer of current doorbell cacheline */
- db = __get_doorbell(client);
-
- /*
- * We're not expecting the doorbell cookie to change behind our back,
- * we also need to treat 0 as a reserved value.
- */
- cookie = READ_ONCE(db->cookie);
- WARN_ON_ONCE(xchg(&db->cookie, cookie + 1 ?: cookie + 2) != cookie);
-
- /* XXX: doorbell was lost and need to acquire it again */
- GEM_BUG_ON(db->db_status != GUC_DOORBELL_ENABLED);
-}
-
static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
{
- struct intel_guc_client *client = guc->execbuf_client;
struct intel_engine_cs *engine = rq->engine;
- u32 ctx_desc = lower_32_bits(rq->hw_context->lrc_desc);
+ u32 ctx_desc = lower_32_bits(rq->context->lrc_desc);
u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);
- guc_wq_item_append(client, engine->guc_id, ctx_desc,
+ guc_wq_item_append(guc, engine->guc_id, ctx_desc,
ring_tail, rq->fence.seqno);
- guc_ring_doorbell(client);
}
/*
@@ -488,10 +233,9 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
*/
static void flush_ggtt_writes(struct i915_vma *vma)
{
- struct drm_i915_private *i915 = vma->vm->i915;
-
if (i915_vma_is_map_and_fenceable(vma))
- intel_uncore_posting_read_fw(&i915->uncore, GUC_STATUS);
+ intel_uncore_posting_read_fw(vma->vm->gt->uncore,
+ GUC_STATUS);
}
static void guc_submit(struct intel_engine_cs *engine,
@@ -499,9 +243,8 @@ static void guc_submit(struct intel_engine_cs *engine,
struct i915_request **end)
{
struct intel_guc *guc = &engine->gt->uc.guc;
- struct intel_guc_client *client = guc->execbuf_client;
- spin_lock(&client->wq_lock);
+ spin_lock(&guc->wq_lock);
do {
struct i915_request *rq = *out++;
@@ -510,7 +253,7 @@ static void guc_submit(struct intel_engine_cs *engine,
guc_add_request(guc, rq);
} while (out != end);
- spin_unlock(&client->wq_lock);
+ spin_unlock(&guc->wq_lock);
}
static inline int rq_prio(const struct i915_request *rq)
@@ -529,7 +272,7 @@ static struct i915_request *schedule_in(struct i915_request *rq, int idx)
* required if we generalise the inflight tracking.
*/
- intel_gt_pm_get(rq->engine->gt);
+ __intel_gt_pm_get(rq->engine->gt);
return i915_request_get(rq);
}
@@ -537,7 +280,7 @@ static void schedule_out(struct i915_request *rq)
{
trace_i915_request_out(rq);
- intel_gt_pm_put(rq->engine->gt);
+ intel_gt_pm_put_async(rq->engine->gt);
i915_request_put(rq);
}
@@ -572,7 +315,7 @@ static void __guc_dequeue(struct intel_engine_cs *engine)
int i;
priolist_for_each_request_consume(rq, rn, p, i) {
- if (last && rq->hw_context != last->hw_context) {
+ if (last && rq->context != last->context) {
if (port == last_port)
goto done;
@@ -631,7 +374,7 @@ static void guc_reset_prepare(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
/*
* Prevent request submission to the hardware until we have
@@ -658,7 +401,7 @@ cancel_port_requests(struct intel_engine_execlists * const execlists)
memset(execlists->inflight, 0, sizeof(execlists->inflight));
}
-static void guc_reset(struct intel_engine_cs *engine, bool stalled)
+static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
struct i915_request *rq;
@@ -677,20 +420,20 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled)
stalled = false;
__i915_request_reset(rq, stalled);
- intel_lr_context_reset(engine, rq->hw_context, rq->head, stalled);
+ intel_lr_context_reset(engine, rq->context, rq->head, stalled);
out_unlock:
spin_unlock_irqrestore(&engine->active.lock, flags);
}
-static void guc_cancel_requests(struct intel_engine_cs *engine)
+static void guc_reset_cancel(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
struct i915_request *rq, *rn;
struct rb_node *rb;
unsigned long flags;
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
/*
* Before we call engine->cancel_requests(), we should have exclusive
@@ -751,8 +494,8 @@ static void guc_reset_finish(struct intel_engine_cs *engine)
/* And kick in case we missed a new request submission. */
tasklet_hi_schedule(&execlists->tasklet);
- GEM_TRACE("%s: depth->%d\n", engine->name,
- atomic_read(&execlists->tasklet.count));
+ ENGINE_TRACE(engine, "depth->%d\n",
+ atomic_read(&execlists->tasklet.count));
}
/*
@@ -761,213 +504,6 @@ static void guc_reset_finish(struct intel_engine_cs *engine)
* path of guc_submit() above.
*/
-/* Check that a doorbell register is in the expected state */
-static bool doorbell_ok(struct intel_guc *guc, u16 db_id)
-{
- bool valid;
-
- GEM_BUG_ON(db_id >= GUC_NUM_DOORBELLS);
-
- valid = __doorbell_valid(guc, db_id);
-
- if (test_bit(db_id, guc->doorbell_bitmap) == valid)
- return true;
-
- DRM_DEBUG_DRIVER("Doorbell %u has unexpected state: valid=%s\n",
- db_id, yesno(valid));
-
- return false;
-}
-
-static bool guc_verify_doorbells(struct intel_guc *guc)
-{
- bool doorbells_ok = true;
- u16 db_id;
-
- for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id)
- if (!doorbell_ok(guc, db_id))
- doorbells_ok = false;
-
- return doorbells_ok;
-}
-
-/**
- * guc_client_alloc() - Allocate an intel_guc_client
- * @guc: the intel_guc structure
- * @priority: four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW
- * The kernel client to replace ExecList submission is created with
- * NORMAL priority. Priority of a client for scheduler can be HIGH,
- * while a preemption context can use CRITICAL.
- *
- * Return: An intel_guc_client object if success, else NULL.
- */
-static struct intel_guc_client *
-guc_client_alloc(struct intel_guc *guc, u32 priority)
-{
- struct intel_guc_client *client;
- struct i915_vma *vma;
- void *vaddr;
- int ret;
-
- client = kzalloc(sizeof(*client), GFP_KERNEL);
- if (!client)
- return ERR_PTR(-ENOMEM);
-
- client->guc = guc;
- client->priority = priority;
- client->doorbell_id = GUC_DOORBELL_INVALID;
- spin_lock_init(&client->wq_lock);
-
- ret = ida_simple_get(&guc->stage_ids, 0, GUC_MAX_STAGE_DESCRIPTORS,
- GFP_KERNEL);
- if (ret < 0)
- goto err_client;
-
- client->stage_id = ret;
-
- /* The first page is doorbell/proc_desc. Two followed pages are wq. */
- vma = intel_guc_allocate_vma(guc, GUC_DB_SIZE + GUC_WQ_SIZE);
- if (IS_ERR(vma)) {
- ret = PTR_ERR(vma);
- goto err_id;
- }
-
- /* We'll keep just the first (doorbell/proc) page permanently kmap'd. */
- client->vma = vma;
-
- vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
- if (IS_ERR(vaddr)) {
- ret = PTR_ERR(vaddr);
- goto err_vma;
- }
- client->vaddr = vaddr;
-
- ret = reserve_doorbell(client);
- if (ret)
- goto err_vaddr;
-
- client->doorbell_offset = __select_cacheline(guc);
-
- /*
- * Since the doorbell only requires a single cacheline, we can save
- * space by putting the application process descriptor in the same
- * page. Use the half of the page that doesn't include the doorbell.
- */
- if (client->doorbell_offset >= (GUC_DB_SIZE / 2))
- client->proc_desc_offset = 0;
- else
- client->proc_desc_offset = (GUC_DB_SIZE / 2);
-
- DRM_DEBUG_DRIVER("new priority %u client %p: stage_id %u\n",
- priority, client, client->stage_id);
- DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%lx\n",
- client->doorbell_id, client->doorbell_offset);
-
- return client;
-
-err_vaddr:
- i915_gem_object_unpin_map(client->vma->obj);
-err_vma:
- i915_vma_unpin_and_release(&client->vma, 0);
-err_id:
- ida_simple_remove(&guc->stage_ids, client->stage_id);
-err_client:
- kfree(client);
- return ERR_PTR(ret);
-}
-
-static void guc_client_free(struct intel_guc_client *client)
-{
- unreserve_doorbell(client);
- i915_vma_unpin_and_release(&client->vma, I915_VMA_RELEASE_MAP);
- ida_simple_remove(&client->guc->stage_ids, client->stage_id);
- kfree(client);
-}
-
-static inline bool ctx_save_restore_disabled(struct intel_context *ce)
-{
- u32 sr = ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1];
-
-#define SR_DISABLED \
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | \
- CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)
-
- return (sr & SR_DISABLED) == SR_DISABLED;
-
-#undef SR_DISABLED
-}
-
-static int guc_clients_create(struct intel_guc *guc)
-{
- struct intel_guc_client *client;
-
- GEM_BUG_ON(guc->execbuf_client);
-
- client = guc_client_alloc(guc, GUC_CLIENT_PRIORITY_KMD_NORMAL);
- if (IS_ERR(client)) {
- DRM_ERROR("Failed to create GuC client for submission!\n");
- return PTR_ERR(client);
- }
- guc->execbuf_client = client;
-
- return 0;
-}
-
-static void guc_clients_destroy(struct intel_guc *guc)
-{
- struct intel_guc_client *client;
-
- client = fetch_and_zero(&guc->execbuf_client);
- if (client)
- guc_client_free(client);
-}
-
-static int __guc_client_enable(struct intel_guc_client *client)
-{
- int ret;
-
- guc_proc_desc_init(client);
- guc_stage_desc_init(client);
-
- ret = create_doorbell(client);
- if (ret)
- goto fail;
-
- return 0;
-
-fail:
- guc_stage_desc_fini(client);
- guc_proc_desc_fini(client);
- return ret;
-}
-
-static void __guc_client_disable(struct intel_guc_client *client)
-{
- /*
- * By the time we're here, GuC may have already been reset. if that is
- * the case, instead of trying (in vain) to communicate with it, let's
- * just cleanup the doorbell HW and our internal state.
- */
- if (intel_guc_is_running(client->guc))
- destroy_doorbell(client);
- else
- __fini_doorbell(client);
-
- guc_stage_desc_fini(client);
- guc_proc_desc_fini(client);
-}
-
-static int guc_clients_enable(struct intel_guc *guc)
-{
- return __guc_client_enable(guc->execbuf_client);
-}
-
-static void guc_clients_disable(struct intel_guc *guc)
-{
- if (guc->execbuf_client)
- __guc_client_disable(guc->execbuf_client);
-}
-
/*
* Set up the memory resources to be shared with the GuC (via the GGTT)
* at firmware loading time.
@@ -988,13 +524,20 @@ int intel_guc_submission_init(struct intel_guc *guc)
*/
GEM_BUG_ON(!guc->stage_desc_pool);
- WARN_ON(!guc_verify_doorbells(guc));
- ret = guc_clients_create(guc);
+ ret = guc_workqueue_create(guc);
if (ret)
goto err_pool;
+ ret = guc_proc_desc_create(guc);
+ if (ret)
+ goto err_workqueue;
+
+ spin_lock_init(&guc->wq_lock);
+
return 0;
+err_workqueue:
+ guc_workqueue_destroy(guc);
err_pool:
guc_stage_desc_pool_destroy(guc);
return ret;
@@ -1002,83 +545,37 @@ err_pool:
void intel_guc_submission_fini(struct intel_guc *guc)
{
- guc_clients_destroy(guc);
- WARN_ON(!guc_verify_doorbells(guc));
-
- if (guc->stage_desc_pool)
+ if (guc->stage_desc_pool) {
+ guc_proc_desc_destroy(guc);
+ guc_workqueue_destroy(guc);
guc_stage_desc_pool_destroy(guc);
+ }
}
static void guc_interrupts_capture(struct intel_gt *gt)
{
- struct intel_rps *rps = &gt->rps;
struct intel_uncore *uncore = gt->uncore;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- int irqs;
+ u32 irqs = GT_CONTEXT_SWITCH_INTERRUPT;
+ u32 dmask = irqs << 16 | irqs;
- /* tell all command streamers to forward interrupts (but not vblank)
- * to GuC
- */
- irqs = _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING);
- for_each_engine(engine, gt, id)
- ENGINE_WRITE(engine, RING_MODE_GEN7, irqs);
-
- /* route USER_INTERRUPT to Host, all others are sent to GuC. */
- irqs = GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT |
- GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
- /* These three registers have the same bit definitions */
- intel_uncore_write(uncore, GUC_BCS_RCS_IER, ~irqs);
- intel_uncore_write(uncore, GUC_VCS2_VCS1_IER, ~irqs);
- intel_uncore_write(uncore, GUC_WD_VECS_IER, ~irqs);
+ GEM_BUG_ON(INTEL_GEN(gt->i915) < 11);
- /*
- * The REDIRECT_TO_GUC bit of the PMINTRMSK register directs all
- * (unmasked) PM interrupts to the GuC. All other bits of this
- * register *disable* generation of a specific interrupt.
- *
- * 'pm_intrmsk_mbz' indicates bits that are NOT to be set when
- * writing to the PM interrupt mask register, i.e. interrupts
- * that must not be disabled.
- *
- * If the GuC is handling these interrupts, then we must not let
- * the PM code disable ANY interrupt that the GuC is expecting.
- * So for each ENABLED (0) bit in this register, we must SET the
- * bit in pm_intrmsk_mbz so that it's left enabled for the GuC.
- * GuC needs ARAT expired interrupt unmasked hence it is set in
- * pm_intrmsk_mbz.
- *
- * Here we CLEAR REDIRECT_TO_GUC bit in pm_intrmsk_mbz, which will
- * result in the register bit being left SET!
- */
- rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK;
- rps->pm_intrmsk_mbz &= ~GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
+ /* Don't handle the ctx switch interrupt in GuC submission mode */
+ intel_uncore_rmw(uncore, GEN11_RENDER_COPY_INTR_ENABLE, dmask, 0);
+ intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, dmask, 0);
}
static void guc_interrupts_release(struct intel_gt *gt)
{
- struct intel_rps *rps = &gt->rps;
struct intel_uncore *uncore = gt->uncore;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- int irqs;
+ u32 irqs = GT_CONTEXT_SWITCH_INTERRUPT;
+ u32 dmask = irqs << 16 | irqs;
- /*
- * tell all command streamers NOT to forward interrupts or vblank
- * to GuC.
- */
- irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_NEVER);
- irqs |= _MASKED_BIT_DISABLE(GFX_INTERRUPT_STEERING);
- for_each_engine(engine, gt, id)
- ENGINE_WRITE(engine, RING_MODE_GEN7, irqs);
-
- /* route all GT interrupts to the host */
- intel_uncore_write(uncore, GUC_BCS_RCS_IER, 0);
- intel_uncore_write(uncore, GUC_VCS2_VCS1_IER, 0);
- intel_uncore_write(uncore, GUC_WD_VECS_IER, 0);
-
- rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
- rps->pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK;
+ GEM_BUG_ON(INTEL_GEN(gt->i915) < 11);
+
+ /* Handle ctx switch interrupts again */
+ intel_uncore_rmw(uncore, GEN11_RENDER_COPY_INTR_ENABLE, 0, dmask);
+ intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, 0, dmask);
}
static void guc_set_default_submission(struct intel_engine_cs *engine)
@@ -1102,11 +599,10 @@ static void guc_set_default_submission(struct intel_engine_cs *engine)
engine->park = engine->unpark = NULL;
engine->reset.prepare = guc_reset_prepare;
- engine->reset.reset = guc_reset;
+ engine->reset.rewind = guc_reset_rewind;
+ engine->reset.cancel = guc_reset_cancel;
engine->reset.finish = guc_reset_finish;
- engine->cancel_requests = guc_cancel_requests;
-
engine->flags &= ~I915_ENGINE_SUPPORTS_STATS;
engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
@@ -1119,16 +615,11 @@ static void guc_set_default_submission(struct intel_engine_cs *engine)
GEM_BUG_ON(engine->irq_enable || engine->irq_disable);
}
-int intel_guc_submission_enable(struct intel_guc *guc)
+void intel_guc_submission_enable(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
struct intel_engine_cs *engine;
enum intel_engine_id id;
- int err;
-
- err = i915_inject_probe_error(gt->i915, -ENXIO);
- if (err)
- return err;
/*
* We're using GuC work items for submitting work through GuC. Since
@@ -1143,11 +634,8 @@ int intel_guc_submission_enable(struct intel_guc *guc)
sizeof(struct guc_wq_item) *
I915_NUM_ENGINES > GUC_WQ_SIZE);
- GEM_BUG_ON(!guc->execbuf_client);
-
- err = guc_clients_enable(guc);
- if (err)
- return err;
+ guc_proc_desc_init(guc);
+ guc_stage_desc_init(guc);
/* Take over from manual control of ELSP (execlists) */
guc_interrupts_capture(gt);
@@ -1156,8 +644,6 @@ int intel_guc_submission_enable(struct intel_guc *guc)
engine->set_default_submission = guc_set_default_submission;
engine->set_default_submission(engine);
}
-
- return 0;
}
void intel_guc_submission_disable(struct intel_guc *guc)
@@ -1166,8 +652,12 @@ void intel_guc_submission_disable(struct intel_guc *guc)
GEM_BUG_ON(gt->awake); /* GT should be parked first */
+ /* Note: By the time we're here, GuC may have already been reset */
+
guc_interrupts_release(gt);
- guc_clients_disable(guc);
+
+ guc_stage_desc_fini(guc);
+ guc_proc_desc_fini(guc);
}
static bool __guc_submission_support(struct intel_guc *guc)
@@ -1186,6 +676,7 @@ void intel_guc_submission_init_early(struct intel_guc *guc)
guc->submission_supported = __guc_submission_support(guc);
}
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftest_guc.c"
-#endif
+bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine)
+{
+ return engine->set_default_submission == guc_set_default_submission;
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
index 54d716828352..e402a2932592 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
@@ -6,62 +6,18 @@
#ifndef _INTEL_GUC_SUBMISSION_H_
#define _INTEL_GUC_SUBMISSION_H_
-#include <linux/spinlock.h>
+#include <linux/types.h>
-#include "gt/intel_engine_types.h"
-
-#include "i915_gem.h"
-#include "i915_selftest.h"
-
-struct drm_i915_private;
-
-/*
- * This structure primarily describes the GEM object shared with the GuC.
- * The specs sometimes refer to this object as a "GuC context", but we use
- * the term "client" to avoid confusion with hardware contexts. This
- * GEM object is held for the entire lifetime of our interaction with
- * the GuC, being allocated before the GuC is loaded with its firmware.
- * Because there's no way to update the address used by the GuC after
- * initialisation, the shared object must stay pinned into the GGTT as
- * long as the GuC is in use. We also keep the first page (only) mapped
- * into kernel address space, as it includes shared data that must be
- * updated on every request submission.
- *
- * The single GEM object described here is actually made up of several
- * separate areas, as far as the GuC is concerned. The first page (kept
- * kmap'd) includes the "process descriptor" which holds sequence data for
- * the doorbell, and one cacheline which actually *is* the doorbell; a
- * write to this will "ring the doorbell" (i.e. send an interrupt to the
- * GuC). The subsequent pages of the client object constitute the work
- * queue (a circular array of work items), again described in the process
- * descriptor. Work queue pages are mapped momentarily as required.
- */
-struct intel_guc_client {
- struct i915_vma *vma;
- void *vaddr;
- struct intel_guc *guc;
-
- /* bitmap of (host) engine ids */
- u32 priority;
- u32 stage_id;
- u32 proc_desc_offset;
-
- u16 doorbell_id;
- unsigned long doorbell_offset;
-
- /* Protects GuC client's WQ access */
- spinlock_t wq_lock;
-
- /* For testing purposes, use nop WQ items instead of real ones */
- I915_SELFTEST_DECLARE(bool use_nop_wqi);
-};
+struct intel_guc;
+struct intel_engine_cs;
void intel_guc_submission_init_early(struct intel_guc *guc);
int intel_guc_submission_init(struct intel_guc *guc);
-int intel_guc_submission_enable(struct intel_guc *guc);
+void intel_guc_submission_enable(struct intel_guc *guc);
void intel_guc_submission_disable(struct intel_guc *guc);
void intel_guc_submission_fini(struct intel_guc *guc);
int intel_guc_preempt_work_create(struct intel_guc *guc);
void intel_guc_preempt_work_destroy(struct intel_guc *guc);
+bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine);
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
index d654340d4d03..eee193bf2cc4 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
@@ -39,5 +39,5 @@ void intel_huc_fw_init_early(struct intel_huc *huc)
int intel_huc_fw_upload(struct intel_huc *huc)
{
/* HW doesn't look at destination address for HuC, so set it to 0 */
- return intel_uc_fw_upload(&huc->fw, huc_to_gt(huc), 0, HUC_UKERNEL);
+ return intel_uc_fw_upload(&huc->fw, 0, HUC_UKERNEL);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 629b19377a29..64934a876a50 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -12,6 +12,9 @@
#include "i915_drv.h"
+static const struct intel_uc_ops uc_ops_off;
+static const struct intel_uc_ops uc_ops_on;
+
/* Reset GuC providing us with fresh state for both GuC and HuC.
*/
static int __intel_uc_reset_hw(struct intel_uc *uc)
@@ -89,6 +92,11 @@ void intel_uc_init_early(struct intel_uc *uc)
intel_huc_init_early(&uc->huc);
__confirm_options(uc);
+
+ if (intel_uc_uses_guc(uc))
+ uc->ops = &uc_ops_on;
+ else
+ uc->ops = &uc_ops_off;
}
void intel_uc_driver_late_release(struct intel_uc *uc)
@@ -123,6 +131,11 @@ static void __uc_free_load_err_log(struct intel_uc *uc)
i915_gem_object_put(log);
}
+static inline bool guc_communication_enabled(struct intel_guc *guc)
+{
+ return intel_guc_ct_enabled(&guc->ct);
+}
+
/*
* Events triggered while CT buffers are disabled are logged in the SCRATCH_15
* register using the same bits used in the CT message payload. Since our
@@ -158,7 +171,7 @@ static void guc_handle_mmio_msg(struct intel_guc *guc)
struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
/* we need communication to be enabled to reply to GuC */
- GEM_BUG_ON(guc->handler == intel_guc_to_host_event_handler_nop);
+ GEM_BUG_ON(!guc_communication_enabled(guc));
if (!guc->mmio_msg)
return;
@@ -185,11 +198,6 @@ static void guc_disable_interrupts(struct intel_guc *guc)
guc->interrupts.disable(guc);
}
-static inline bool guc_communication_enabled(struct intel_guc *guc)
-{
- return guc->send != intel_guc_send_nop;
-}
-
static int guc_enable_communication(struct intel_guc *guc)
{
struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
@@ -205,9 +213,6 @@ static int guc_enable_communication(struct intel_guc *guc)
if (ret)
return ret;
- guc->send = intel_guc_send_ct;
- guc->handler = intel_guc_to_host_event_handler_ct;
-
/* check for mmio messages received before/during the CT enable */
guc_get_mmio_msg(guc);
guc_handle_mmio_msg(guc);
@@ -216,7 +221,7 @@ static int guc_enable_communication(struct intel_guc *guc)
/* check for CT messages received before we enabled interrupts */
spin_lock_irq(&i915->irq_lock);
- intel_guc_to_host_event_handler_ct(guc);
+ intel_guc_ct_event_handler(&guc->ct);
spin_unlock_irq(&i915->irq_lock);
DRM_INFO("GuC communication enabled\n");
@@ -224,7 +229,7 @@ static int guc_enable_communication(struct intel_guc *guc)
return 0;
}
-static void __guc_stop_communication(struct intel_guc *guc)
+static void guc_disable_communication(struct intel_guc *guc)
{
/*
* Events generated during or after CT disable are logged by guc in
@@ -235,23 +240,6 @@ static void __guc_stop_communication(struct intel_guc *guc)
guc_disable_interrupts(guc);
- guc->send = intel_guc_send_nop;
- guc->handler = intel_guc_to_host_event_handler_nop;
-}
-
-static void guc_stop_communication(struct intel_guc *guc)
-{
- intel_guc_ct_stop(&guc->ct);
-
- __guc_stop_communication(guc);
-
- DRM_INFO("GuC communication stopped\n");
-}
-
-static void guc_disable_communication(struct intel_guc *guc)
-{
- __guc_stop_communication(guc);
-
intel_guc_ct_disable(&guc->ct);
/*
@@ -265,41 +253,33 @@ static void guc_disable_communication(struct intel_guc *guc)
DRM_INFO("GuC communication disabled\n");
}
-void intel_uc_fetch_firmwares(struct intel_uc *uc)
+static void __uc_fetch_firmwares(struct intel_uc *uc)
{
- struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
int err;
- if (!intel_uc_uses_guc(uc))
- return;
+ GEM_BUG_ON(!intel_uc_uses_guc(uc));
- err = intel_uc_fw_fetch(&uc->guc.fw, i915);
+ err = intel_uc_fw_fetch(&uc->guc.fw);
if (err)
return;
if (intel_uc_uses_huc(uc))
- intel_uc_fw_fetch(&uc->huc.fw, i915);
+ intel_uc_fw_fetch(&uc->huc.fw);
}
-void intel_uc_cleanup_firmwares(struct intel_uc *uc)
+static void __uc_cleanup_firmwares(struct intel_uc *uc)
{
- if (!intel_uc_uses_guc(uc))
- return;
-
- if (intel_uc_uses_huc(uc))
- intel_uc_fw_cleanup_fetch(&uc->huc.fw);
-
+ intel_uc_fw_cleanup_fetch(&uc->huc.fw);
intel_uc_fw_cleanup_fetch(&uc->guc.fw);
}
-void intel_uc_init(struct intel_uc *uc)
+static void __uc_init(struct intel_uc *uc)
{
struct intel_guc *guc = &uc->guc;
struct intel_huc *huc = &uc->huc;
int ret;
- if (!intel_uc_uses_guc(uc))
- return;
+ GEM_BUG_ON(!intel_uc_uses_guc(uc));
/* XXX: GuC submission is unavailable for now */
GEM_BUG_ON(intel_uc_supports_guc_submission(uc));
@@ -314,17 +294,10 @@ void intel_uc_init(struct intel_uc *uc)
intel_huc_init(huc);
}
-void intel_uc_fini(struct intel_uc *uc)
+static void __uc_fini(struct intel_uc *uc)
{
- struct intel_guc *guc = &uc->guc;
-
- if (!intel_uc_uses_guc(uc))
- return;
-
- if (intel_uc_uses_huc(uc))
- intel_huc_fini(&uc->huc);
-
- intel_guc_fini(guc);
+ intel_huc_fini(&uc->huc);
+ intel_guc_fini(&uc->guc);
__uc_free_load_err_log(uc);
}
@@ -342,14 +315,6 @@ static int __uc_sanitize(struct intel_uc *uc)
return __intel_uc_reset_hw(uc);
}
-void intel_uc_sanitize(struct intel_uc *uc)
-{
- if (!intel_uc_supports_guc(uc))
- return;
-
- __uc_sanitize(uc);
-}
-
/* Initialize and verify the uC regs related to uC positioning in WOPCM */
static int uc_init_wopcm(struct intel_uc *uc)
{
@@ -413,13 +378,8 @@ static bool uc_is_wopcm_locked(struct intel_uc *uc)
(intel_uncore_read(uncore, DMA_GUC_WOPCM_OFFSET) & GUC_WOPCM_OFFSET_VALID);
}
-int intel_uc_init_hw(struct intel_uc *uc)
+static int __uc_check_hw(struct intel_uc *uc)
{
- struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
- struct intel_guc *guc = &uc->guc;
- struct intel_huc *huc = &uc->huc;
- int ret, attempts;
-
if (!intel_uc_supports_guc(uc))
return 0;
@@ -428,11 +388,24 @@ int intel_uc_init_hw(struct intel_uc *uc)
* before on this system after reboot, otherwise we risk GPU hangs.
* To check if GuC was loaded before we look at WOPCM registers.
*/
- if (!intel_uc_uses_guc(uc) && !uc_is_wopcm_locked(uc))
- return 0;
+ if (uc_is_wopcm_locked(uc))
+ return -EIO;
+
+ return 0;
+}
+
+static int __uc_init_hw(struct intel_uc *uc)
+{
+ struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
+ struct intel_guc *guc = &uc->guc;
+ struct intel_huc *huc = &uc->huc;
+ int ret, attempts;
+
+ GEM_BUG_ON(!intel_uc_supports_guc(uc));
+ GEM_BUG_ON(!intel_uc_uses_guc(uc));
if (!intel_uc_fw_is_available(&guc->fw)) {
- ret = uc_is_wopcm_locked(uc) ||
+ ret = __uc_check_hw(uc) ||
intel_uc_fw_is_overridden(&guc->fw) ||
intel_uc_supports_guc_submission(uc) ?
intel_uc_fw_status_to_error(guc->fw.status) : 0;
@@ -486,11 +459,8 @@ int intel_uc_init_hw(struct intel_uc *uc)
if (ret)
goto err_communication;
- if (intel_uc_supports_guc_submission(uc)) {
- ret = intel_guc_submission_enable(guc);
- if (ret)
- goto err_communication;
- }
+ if (intel_uc_supports_guc_submission(uc))
+ intel_guc_submission_enable(guc);
dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n",
intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_GUC), guc->fw.path,
@@ -531,7 +501,7 @@ err_out:
return -EIO;
}
-void intel_uc_fini_hw(struct intel_uc *uc)
+static void __uc_fini_hw(struct intel_uc *uc)
{
struct intel_guc *guc = &uc->guc;
@@ -560,7 +530,7 @@ void intel_uc_reset_prepare(struct intel_uc *uc)
if (!intel_guc_is_running(guc))
return;
- guc_stop_communication(guc);
+ guc_disable_communication(guc);
__uc_sanitize(uc);
}
@@ -631,3 +601,20 @@ int intel_uc_runtime_resume(struct intel_uc *uc)
*/
return __uc_resume(uc, true);
}
+
+static const struct intel_uc_ops uc_ops_off = {
+ .init_hw = __uc_check_hw,
+};
+
+static const struct intel_uc_ops uc_ops_on = {
+ .sanitize = __uc_sanitize,
+
+ .init_fw = __uc_fetch_firmwares,
+ .fini_fw = __uc_cleanup_firmwares,
+
+ .init = __uc_init,
+ .fini = __uc_fini,
+
+ .init_hw = __uc_init_hw,
+ .fini_hw = __uc_fini_hw,
+};
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
index 527995c21196..49c913524686 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
@@ -10,7 +10,20 @@
#include "intel_huc.h"
#include "i915_params.h"
+struct intel_uc;
+
+struct intel_uc_ops {
+ int (*sanitize)(struct intel_uc *uc);
+ void (*init_fw)(struct intel_uc *uc);
+ void (*fini_fw)(struct intel_uc *uc);
+ void (*init)(struct intel_uc *uc);
+ void (*fini)(struct intel_uc *uc);
+ int (*init_hw)(struct intel_uc *uc);
+ void (*fini_hw)(struct intel_uc *uc);
+};
+
struct intel_uc {
+ struct intel_uc_ops const *ops;
struct intel_guc guc;
struct intel_huc huc;
@@ -21,13 +34,6 @@ struct intel_uc {
void intel_uc_init_early(struct intel_uc *uc);
void intel_uc_driver_late_release(struct intel_uc *uc);
void intel_uc_init_mmio(struct intel_uc *uc);
-void intel_uc_fetch_firmwares(struct intel_uc *uc);
-void intel_uc_cleanup_firmwares(struct intel_uc *uc);
-void intel_uc_sanitize(struct intel_uc *uc);
-void intel_uc_init(struct intel_uc *uc);
-int intel_uc_init_hw(struct intel_uc *uc);
-void intel_uc_fini_hw(struct intel_uc *uc);
-void intel_uc_fini(struct intel_uc *uc);
void intel_uc_reset_prepare(struct intel_uc *uc);
void intel_uc_suspend(struct intel_uc *uc);
void intel_uc_runtime_suspend(struct intel_uc *uc);
@@ -64,4 +70,20 @@ static inline bool intel_uc_uses_huc(struct intel_uc *uc)
return intel_huc_is_enabled(&uc->huc);
}
+#define intel_uc_ops_function(_NAME, _OPS, _TYPE, _RET) \
+static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \
+{ \
+ if (uc->ops->_OPS) \
+ return uc->ops->_OPS(uc); \
+ return _RET; \
+}
+intel_uc_ops_function(sanitize, sanitize, int, 0);
+intel_uc_ops_function(fetch_firmwares, init_fw, void, );
+intel_uc_ops_function(cleanup_firmwares, fini_fw, void, );
+intel_uc_ops_function(init, init, void, );
+intel_uc_ops_function(fini, fini, void, );
+intel_uc_ops_function(init_hw, init_hw, int, 0);
+intel_uc_ops_function(fini_hw, fini_hw, void, );
+#undef intel_uc_ops_function
+
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 66a30ab7044a..8ee0a0c7f447 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -11,7 +11,6 @@
#include "intel_uc_fw_abi.h"
#include "i915_drv.h"
-#ifdef CONFIG_DRM_I915_DEBUG_GUC
static inline struct intel_gt *__uc_fw_to_gt(struct intel_uc_fw *uc_fw)
{
GEM_BUG_ON(uc_fw->status == INTEL_UC_FIRMWARE_UNINITIALIZED);
@@ -22,6 +21,7 @@ static inline struct intel_gt *__uc_fw_to_gt(struct intel_uc_fw *uc_fw)
return container_of(uc_fw, struct intel_gt, uc.huc.fw);
}
+#ifdef CONFIG_DRM_I915_DEBUG_GUC
void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
enum intel_uc_fw_status status)
{
@@ -219,10 +219,9 @@ void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
INTEL_UC_FIRMWARE_NOT_SUPPORTED);
}
-static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw,
- struct drm_i915_private *i915,
- int e)
+static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw, int e)
{
+ struct drm_i915_private *i915 = __uc_fw_to_gt(uc_fw)->i915;
bool user = e == -EINVAL;
if (i915_inject_probe_error(i915, e)) {
@@ -260,14 +259,14 @@ static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw,
/**
* intel_uc_fw_fetch - fetch uC firmware
* @uc_fw: uC firmware
- * @i915: device private
*
* Fetch uC firmware into GEM obj.
*
* Return: 0 on success, a negative errno code on failure.
*/
-int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915)
+int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
{
+ struct drm_i915_private *i915 = __uc_fw_to_gt(uc_fw)->i915;
struct device *dev = i915->drm.dev;
struct drm_i915_gem_object *obj;
const struct firmware *fw = NULL;
@@ -282,8 +281,8 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915)
if (err)
return err;
- __force_fw_fetch_failures(uc_fw, i915, -EINVAL);
- __force_fw_fetch_failures(uc_fw, i915, -ESTALE);
+ __force_fw_fetch_failures(uc_fw, -EINVAL);
+ __force_fw_fetch_failures(uc_fw, -ESTALE);
err = request_firmware(&fw, uc_fw->path, dev);
if (err)
@@ -390,8 +389,9 @@ fail:
return err;
}
-static u32 uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw, struct i915_ggtt *ggtt)
+static u32 uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw)
{
+ struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
struct drm_mm_node *node = &ggtt->uc_fw;
GEM_BUG_ON(!drm_mm_node_allocated(node));
@@ -401,13 +401,12 @@ static u32 uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw, struct i915_ggtt *ggtt)
return lower_32_bits(node->start);
}
-static void intel_uc_fw_ggtt_bind(struct intel_uc_fw *uc_fw,
- struct intel_gt *gt)
+static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw)
{
struct drm_i915_gem_object *obj = uc_fw->obj;
- struct i915_ggtt *ggtt = gt->ggtt;
+ struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
struct i915_vma dummy = {
- .node.start = uc_fw_ggtt_offset(uc_fw, ggtt),
+ .node.start = uc_fw_ggtt_offset(uc_fw),
.node.size = obj->base.size,
.pages = obj->mm.pages,
.vm = &ggtt->vm,
@@ -422,19 +421,18 @@ static void intel_uc_fw_ggtt_bind(struct intel_uc_fw *uc_fw,
ggtt->vm.insert_entries(&ggtt->vm, &dummy, I915_CACHE_NONE, 0);
}
-static void intel_uc_fw_ggtt_unbind(struct intel_uc_fw *uc_fw,
- struct intel_gt *gt)
+static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw)
{
struct drm_i915_gem_object *obj = uc_fw->obj;
- struct i915_ggtt *ggtt = gt->ggtt;
- u64 start = uc_fw_ggtt_offset(uc_fw, ggtt);
+ struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
+ u64 start = uc_fw_ggtt_offset(uc_fw);
ggtt->vm.clear_range(&ggtt->vm, start, obj->base.size);
}
-static int uc_fw_xfer(struct intel_uc_fw *uc_fw, struct intel_gt *gt,
- u32 wopcm_offset, u32 dma_flags)
+static int uc_fw_xfer(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags)
{
+ struct intel_gt *gt = __uc_fw_to_gt(uc_fw);
struct intel_uncore *uncore = gt->uncore;
u64 offset;
int ret;
@@ -446,13 +444,13 @@ static int uc_fw_xfer(struct intel_uc_fw *uc_fw, struct intel_gt *gt,
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
/* Set the source address for the uCode */
- offset = uc_fw_ggtt_offset(uc_fw, gt->ggtt);
+ offset = uc_fw_ggtt_offset(uc_fw);
GEM_BUG_ON(upper_32_bits(offset) & 0xFFFF0000);
intel_uncore_write_fw(uncore, DMA_ADDR_0_LOW, lower_32_bits(offset));
intel_uncore_write_fw(uncore, DMA_ADDR_0_HIGH, upper_32_bits(offset));
/* Set the DMA destination */
- intel_uncore_write_fw(uncore, DMA_ADDR_1_LOW, wopcm_offset);
+ intel_uncore_write_fw(uncore, DMA_ADDR_1_LOW, dst_offset);
intel_uncore_write_fw(uncore, DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM);
/*
@@ -484,17 +482,16 @@ static int uc_fw_xfer(struct intel_uc_fw *uc_fw, struct intel_gt *gt,
/**
* intel_uc_fw_upload - load uC firmware using custom loader
* @uc_fw: uC firmware
- * @gt: the intel_gt structure
- * @wopcm_offset: destination offset in wopcm
+ * @dst_offset: destination offset
* @dma_flags: flags for flags for dma ctrl
*
* Loads uC firmware and updates internal flags.
*
* Return: 0 on success, non-zero on failure.
*/
-int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct intel_gt *gt,
- u32 wopcm_offset, u32 dma_flags)
+int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags)
{
+ struct intel_gt *gt = __uc_fw_to_gt(uc_fw);
int err;
/* make sure the status was cleared the last time we reset the uc */
@@ -508,9 +505,9 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct intel_gt *gt,
return -ENOEXEC;
/* Call custom loader */
- intel_uc_fw_ggtt_bind(uc_fw, gt);
- err = uc_fw_xfer(uc_fw, gt, wopcm_offset, dma_flags);
- intel_uc_fw_ggtt_unbind(uc_fw, gt);
+ uc_fw_bind_ggtt(uc_fw);
+ err = uc_fw_xfer(uc_fw, dst_offset, dma_flags);
+ uc_fw_unbind_ggtt(uc_fw);
if (err)
goto fail;
@@ -547,10 +544,7 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw)
void intel_uc_fw_fini(struct intel_uc_fw *uc_fw)
{
- if (!intel_uc_fw_is_available(uc_fw))
- return;
-
- i915_gem_object_unpin_pages(uc_fw->obj);
+ intel_uc_fw_cleanup_fetch(uc_fw);
}
/**
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index 7a0a5989afc9..1f30543d0d2d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -229,10 +229,9 @@ static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw)
void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
enum intel_uc_fw_type type, bool supported,
enum intel_platform platform, u8 rev);
-int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915);
+int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw);
void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw);
-int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct intel_gt *gt,
- u32 wopcm_offset, u32 dma_flags);
+int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 offset, u32 dma_flags);
int intel_uc_fw_init(struct intel_uc_fw *uc_fw);
void intel_uc_fw_fini(struct intel_uc_fw *uc_fw);
size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len);
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
deleted file mode 100644
index d8a80388bd31..000000000000
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
+++ /dev/null
@@ -1,299 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2017 Intel Corporation
- */
-
-#include "i915_selftest.h"
-#include "gem/i915_gem_pm.h"
-
-/* max doorbell number + negative test for each client type */
-#define ATTEMPTS (GUC_NUM_DOORBELLS + GUC_CLIENT_PRIORITY_NUM)
-
-static struct intel_guc_client *clients[ATTEMPTS];
-
-static bool available_dbs(struct intel_guc *guc, u32 priority)
-{
- unsigned long offset;
- unsigned long end;
- u16 id;
-
- /* first half is used for normal priority, second half for high */
- offset = 0;
- end = GUC_NUM_DOORBELLS / 2;
- if (priority <= GUC_CLIENT_PRIORITY_HIGH) {
- offset = end;
- end += offset;
- }
-
- id = find_next_zero_bit(guc->doorbell_bitmap, end, offset);
- if (id < end)
- return true;
-
- return false;
-}
-
-static int check_all_doorbells(struct intel_guc *guc)
-{
- u16 db_id;
-
- pr_info_once("Max number of doorbells: %d", GUC_NUM_DOORBELLS);
- for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id) {
- if (!doorbell_ok(guc, db_id)) {
- pr_err("doorbell %d, not ok\n", db_id);
- return -EIO;
- }
- }
-
- return 0;
-}
-
-static int ring_doorbell_nop(struct intel_guc_client *client)
-{
- struct guc_process_desc *desc = __get_process_desc(client);
- int err;
-
- client->use_nop_wqi = true;
-
- spin_lock_irq(&client->wq_lock);
-
- guc_wq_item_append(client, 0, 0, 0, 0);
- guc_ring_doorbell(client);
-
- spin_unlock_irq(&client->wq_lock);
-
- client->use_nop_wqi = false;
-
- /* if there are no issues GuC will update the WQ head and keep the
- * WQ in active status
- */
- err = wait_for(READ_ONCE(desc->head) == READ_ONCE(desc->tail), 10);
- if (err) {
- pr_err("doorbell %u ring failed!\n", client->doorbell_id);
- return -EIO;
- }
-
- if (desc->wq_status != WQ_STATUS_ACTIVE) {
- pr_err("doorbell %u ring put WQ in bad state (%u)!\n",
- client->doorbell_id, desc->wq_status);
- return -EIO;
- }
-
- return 0;
-}
-
-/*
- * Basic client sanity check, handy to validate create_clients.
- */
-static int validate_client(struct intel_guc_client *client, int client_priority)
-{
- if (client->priority != client_priority ||
- client->doorbell_id == GUC_DOORBELL_INVALID)
- return -EINVAL;
- else
- return 0;
-}
-
-static bool client_doorbell_in_sync(struct intel_guc_client *client)
-{
- return !client || doorbell_ok(client->guc, client->doorbell_id);
-}
-
-/*
- * Check that we're able to synchronize guc_clients with their doorbells
- *
- * We're creating clients and reserving doorbells once, at module load. During
- * module lifetime, GuC, doorbell HW, and i915 state may go out of sync due to
- * GuC being reset. In other words - GuC clients are still around, but the
- * status of their doorbells may be incorrect. This is the reason behind
- * validating that the doorbells status expected by the driver matches what the
- * GuC/HW have.
- */
-static int igt_guc_clients(void *arg)
-{
- struct intel_gt *gt = arg;
- struct intel_guc *guc = &gt->uc.guc;
- intel_wakeref_t wakeref;
- int err = 0;
-
- GEM_BUG_ON(!HAS_GT_UC(gt->i915));
- wakeref = intel_runtime_pm_get(gt->uncore->rpm);
-
- err = check_all_doorbells(guc);
- if (err)
- goto unlock;
-
- /*
- * Get rid of clients created during driver load because the test will
- * recreate them.
- */
- guc_clients_disable(guc);
- guc_clients_destroy(guc);
- if (guc->execbuf_client) {
- pr_err("guc_clients_destroy lied!\n");
- err = -EINVAL;
- goto unlock;
- }
-
- err = guc_clients_create(guc);
- if (err) {
- pr_err("Failed to create clients\n");
- goto unlock;
- }
- GEM_BUG_ON(!guc->execbuf_client);
-
- err = validate_client(guc->execbuf_client,
- GUC_CLIENT_PRIORITY_KMD_NORMAL);
- if (err) {
- pr_err("execbug client validation failed\n");
- goto out;
- }
-
- /* the client should now have reserved a doorbell */
- if (!has_doorbell(guc->execbuf_client)) {
- pr_err("guc_clients_create didn't reserve doorbells\n");
- err = -EINVAL;
- goto out;
- }
-
- /* Now enable the clients */
- guc_clients_enable(guc);
-
- /* each client should now have received a doorbell */
- if (!client_doorbell_in_sync(guc->execbuf_client)) {
- pr_err("failed to initialize the doorbells\n");
- err = -EINVAL;
- goto out;
- }
-
- /*
- * Basic test - an attempt to reallocate a valid doorbell to the
- * client it is currently assigned should not cause a failure.
- */
- err = create_doorbell(guc->execbuf_client);
-
-out:
- /*
- * Leave clean state for other test, plus the driver always destroy the
- * clients during unload.
- */
- guc_clients_disable(guc);
- guc_clients_destroy(guc);
- guc_clients_create(guc);
- guc_clients_enable(guc);
-unlock:
- intel_runtime_pm_put(gt->uncore->rpm, wakeref);
- return err;
-}
-
-/*
- * Create as many clients as number of doorbells. Note that there's already
- * client(s)/doorbell(s) created during driver load, but this test creates
- * its own and do not interact with the existing ones.
- */
-static int igt_guc_doorbells(void *arg)
-{
- struct intel_gt *gt = arg;
- struct intel_guc *guc = &gt->uc.guc;
- intel_wakeref_t wakeref;
- int i, err = 0;
- u16 db_id;
-
- GEM_BUG_ON(!HAS_GT_UC(gt->i915));
- wakeref = intel_runtime_pm_get(gt->uncore->rpm);
-
- err = check_all_doorbells(guc);
- if (err)
- goto unlock;
-
- for (i = 0; i < ATTEMPTS; i++) {
- clients[i] = guc_client_alloc(guc, i % GUC_CLIENT_PRIORITY_NUM);
-
- if (!clients[i]) {
- pr_err("[%d] No guc client\n", i);
- err = -EINVAL;
- goto out;
- }
-
- if (IS_ERR(clients[i])) {
- if (PTR_ERR(clients[i]) != -ENOSPC) {
- pr_err("[%d] unexpected error\n", i);
- err = PTR_ERR(clients[i]);
- goto out;
- }
-
- if (available_dbs(guc, i % GUC_CLIENT_PRIORITY_NUM)) {
- pr_err("[%d] non-db related alloc fail\n", i);
- err = -EINVAL;
- goto out;
- }
-
- /* expected, ran out of dbs for this client type */
- continue;
- }
-
- /*
- * The check below is only valid because we keep a doorbell
- * assigned during the whole life of the client.
- */
- if (clients[i]->stage_id >= GUC_NUM_DOORBELLS) {
- pr_err("[%d] more clients than doorbells (%d >= %d)\n",
- i, clients[i]->stage_id, GUC_NUM_DOORBELLS);
- err = -EINVAL;
- goto out;
- }
-
- err = validate_client(clients[i], i % GUC_CLIENT_PRIORITY_NUM);
- if (err) {
- pr_err("[%d] client_alloc sanity check failed!\n", i);
- err = -EINVAL;
- goto out;
- }
-
- db_id = clients[i]->doorbell_id;
-
- err = __guc_client_enable(clients[i]);
- if (err) {
- pr_err("[%d] Failed to create a doorbell\n", i);
- goto out;
- }
-
- /* doorbell id shouldn't change, we are holding the mutex */
- if (db_id != clients[i]->doorbell_id) {
- pr_err("[%d] doorbell id changed (%d != %d)\n",
- i, db_id, clients[i]->doorbell_id);
- err = -EINVAL;
- goto out;
- }
-
- err = check_all_doorbells(guc);
- if (err)
- goto out;
-
- err = ring_doorbell_nop(clients[i]);
- if (err)
- goto out;
- }
-
-out:
- for (i = 0; i < ATTEMPTS; i++)
- if (!IS_ERR_OR_NULL(clients[i])) {
- __guc_client_disable(clients[i]);
- guc_client_free(clients[i]);
- }
-unlock:
- intel_runtime_pm_put(gt->uncore->rpm, wakeref);
- return err;
-}
-
-int intel_guc_live_selftest(struct drm_i915_private *i915)
-{
- static const struct i915_subtest tests[] = {
- SUBTEST(igt_guc_clients),
- SUBTEST(igt_guc_doorbells),
- };
-
- if (!USES_GUC_SUBMISSION(i915))
- return 0;
-
- return intel_gt_live_subtests(tests, &i915->gt);
-}