aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915')
-rw-r--r--drivers/gpu/drm/i915/Makefile4
-rw-r--r--drivers/gpu/drm/i915/display/intel_atomic_plane.c4
-rw-r--r--drivers/gpu/drm/i915/display/intel_bw.c2
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.c22
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpt.c4
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbc.c2
-rw-r--r--drivers/gpu/drm/i915/display/intel_overlay.c2
-rw-r--r--drivers/gpu/drm/i915/display/skl_universal_plane.c2
-rw-r--r--drivers/gpu/drm/i915/dma_resv_utils.c17
-rw-r--r--drivers/gpu/drm/i915/dma_resv_utils.h13
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_clflush.c18
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c26
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_create.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c15
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_domain.c22
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c198
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_internal.c44
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_mman.c13
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.c92
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.h48
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_types.h48
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pages.c32
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_phys.c6
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pm.c6
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_region.c4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shmem.c195
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shrinker.c137
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_stolen.c19
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_throttle.c3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm.c784
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm.h41
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c627
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h41
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_userptr.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_wait.c12
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gemfs.c5
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/huge_pages.c134
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c2
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c71
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c4
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c26
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c44
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_ppgtt.c151
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_ppgtt.h2
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.c2
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c34
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c16
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c68
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c40
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_stats.h33
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h84
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_user.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c37
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c414
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c119
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_debugfs.h21
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.c31
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.h12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_migrate.c32
-rw-r--r--drivers/gpu/drm/i915/gt/intel_migrate.h9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ppgtt.c12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.c15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_region_lmem.c17
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c58
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c143
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.h5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c502
-rw-r--r--drivers/gpu/drm/i915/gt/mock_engine.c50
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_context.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_cs.c4
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c4
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_pm.c35
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_execlists.c6
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_gt_pm.c8
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_hangcheck.c22
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_lrc.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_migrate.c7
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_mocs.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_reset.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_ring_submission.c4
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_slpc.c6
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_timeline.c6
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_workarounds.c4
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h1
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h42
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c21
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h5
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c18
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c33
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h13
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.h5
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c62
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c157
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h3
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h13
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c373
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.c75
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c9
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c188
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h18
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc.c175
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c2
-rw-r--r--drivers/gpu/drm/i915/gvt/gtt.c12
-rw-r--r--drivers/gpu/drm/i915/gvt/gvt.c2
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.c2
-rw-r--r--drivers/gpu/drm/i915/i915_active.c28
-rw-r--r--drivers/gpu/drm/i915/i915_active.h17
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c42
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs_params.c4
-rw-r--r--drivers/gpu/drm/i915/i915_deps.c237
-rw-r--r--drivers/gpu/drm/i915/i915_deps.h45
-rw-r--r--drivers/gpu/drm/i915/i915_driver.c49
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h111
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c51
-rw-r--r--drivers/gpu/drm/i915/i915_getparam.c10
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c234
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.h4
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c74
-rw-r--r--drivers/gpu/drm/i915/i915_params.c3
-rw-r--r--drivers/gpu/drm/i915/i915_params.h1
-rw-r--r--drivers/gpu/drm/i915/i915_pci.c20
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c24
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.c14
-rw-r--r--drivers/gpu/drm/i915/i915_query.c2
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h171
-rw-r--r--drivers/gpu/drm/i915/i915_request.c93
-rw-r--r--drivers/gpu/drm/i915/i915_request.h25
-rw-r--r--drivers/gpu/drm/i915/i915_scatterlist.c62
-rw-r--r--drivers/gpu/drm/i915/i915_scatterlist.h76
-rw-r--r--drivers/gpu/drm/i915/i915_sw_fence.c28
-rw-r--r--drivers/gpu/drm/i915/i915_sw_fence.h23
-rw-r--r--drivers/gpu/drm/i915/i915_sw_fence_work.c2
-rw-r--r--drivers/gpu/drm/i915/i915_sysfs.c41
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c526
-rw-r--r--drivers/gpu/drm/i915/i915_vma.h14
-rw-r--r--drivers/gpu/drm/i915/i915_vma_snapshot.c134
-rw-r--r--drivers/gpu/drm/i915/i915_vma_snapshot.h112
-rw-r--r--drivers/gpu/drm/i915/i915_vma_types.h2
-rw-r--r--drivers/gpu/drm/i915/intel_device_info.c33
-rw-r--r--drivers/gpu/drm/i915/intel_device_info.h13
-rw-r--r--drivers/gpu/drm/i915/intel_gvt.c2
-rw-r--r--drivers/gpu/drm/i915/intel_memory_region.c154
-rw-r--r--drivers/gpu/drm/i915/intel_memory_region.h9
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c37
-rw-r--r--drivers/gpu/drm/i915/intel_region_ttm.c50
-rw-r--r--drivers/gpu/drm/i915/intel_region_ttm.h7
-rw-r--r--drivers/gpu/drm/i915/intel_runtime_pm.c3
-rw-r--r--drivers/gpu/drm/i915/intel_step.c77
-rw-r--r--drivers/gpu/drm/i915/intel_step.h3
-rw-r--r--drivers/gpu/drm/i915/intel_uncore.c52
-rw-r--r--drivers/gpu/drm/i915/intel_uncore.h7
-rw-r--r--drivers/gpu/drm/i915/intel_wopcm.c2
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_pm.c37
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_pm.h19
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_tee.c4
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_active.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem_evict.c25
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem_gtt.c18
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_perf.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_request.c24
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_selftest.c4
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_sw_fence.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_vma.c17
-rw-r--r--drivers/gpu/drm/i915/selftests/igt_flush_test.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/igt_live_test.c4
-rw-r--r--drivers/gpu/drm/i915/selftests/igt_reset.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/intel_memory_region.c12
-rw-r--r--drivers/gpu/drm/i915/selftests/intel_uncore.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/lib_sw_fence.c8
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_gem_device.c34
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_gtt.c12
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_region.c19
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_uncore.c2
187 files changed, 6534 insertions, 2535 deletions
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 0db42a60c89f..6836b020a5be 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -61,7 +61,6 @@ i915-y += i915_driver.o \
# core library code
i915-y += \
- dma_resv_utils.o \
i915_memcpy.o \
i915_mm.o \
i915_sw_fence.o \
@@ -155,6 +154,7 @@ gem-y += \
gem/i915_gem_throttle.o \
gem/i915_gem_tiling.o \
gem/i915_gem_ttm.o \
+ gem/i915_gem_ttm_move.o \
gem/i915_gem_ttm_pm.o \
gem/i915_gem_userptr.o \
gem/i915_gem_wait.o \
@@ -164,6 +164,7 @@ i915-y += \
i915_active.o \
i915_buddy.o \
i915_cmd_parser.o \
+ i915_deps.o \
i915_gem_evict.o \
i915_gem_gtt.o \
i915_gem_ww.o \
@@ -174,6 +175,7 @@ i915-y += \
i915_trace_points.o \
i915_ttm_buddy_manager.o \
i915_vma.o \
+ i915_vma_snapshot.o \
intel_wopcm.o
# general-purpose microcontroller (GuC) support
diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
index 314c64e662dc..bec02333bdeb 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
@@ -821,7 +821,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane,
* maximum clocks following a vblank miss (see do_rps_boost()).
*/
if (!state->rps_interactive) {
- intel_rps_mark_interactive(&dev_priv->gt.rps, true);
+ intel_rps_mark_interactive(&to_gt(dev_priv)->rps, true);
state->rps_interactive = true;
}
@@ -855,7 +855,7 @@ intel_cleanup_plane_fb(struct drm_plane *plane,
return;
if (state->rps_interactive) {
- intel_rps_mark_interactive(&dev_priv->gt.rps, false);
+ intel_rps_mark_interactive(&to_gt(dev_priv)->rps, false);
state->rps_interactive = false;
}
diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c
index 156b060236c2..c35bad21b657 100644
--- a/drivers/gpu/drm/i915/display/intel_bw.c
+++ b/drivers/gpu/drm/i915/display/intel_bw.c
@@ -631,7 +631,7 @@ static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv,
for_each_pipe(dev_priv, pipe)
data_rate += bw_state->data_rate[pipe];
- if (DISPLAY_VER(dev_priv) >= 13 && intel_vtd_active())
+ if (DISPLAY_VER(dev_priv) >= 13 && intel_vtd_active(dev_priv))
data_rate = data_rate * 105 / 100;
return data_rate;
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 49be51c32a62..7728795ee26d 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -835,7 +835,7 @@ __intel_display_resume(struct drm_device *dev,
static bool gpu_reset_clobbers_display(struct drm_i915_private *dev_priv)
{
return (INTEL_INFO(dev_priv)->gpu_reset_clobbers_display &&
- intel_has_gpu_reset(&dev_priv->gt));
+ intel_has_gpu_reset(to_gt(dev_priv)));
}
void intel_display_prepare_reset(struct drm_i915_private *dev_priv)
@@ -854,14 +854,14 @@ void intel_display_prepare_reset(struct drm_i915_private *dev_priv)
return;
/* We have a modeset vs reset deadlock, defensively unbreak it. */
- set_bit(I915_RESET_MODESET, &dev_priv->gt.reset.flags);
+ set_bit(I915_RESET_MODESET, &to_gt(dev_priv)->reset.flags);
smp_mb__after_atomic();
- wake_up_bit(&dev_priv->gt.reset.flags, I915_RESET_MODESET);
+ wake_up_bit(&to_gt(dev_priv)->reset.flags, I915_RESET_MODESET);
if (atomic_read(&dev_priv->gpu_error.pending_fb_pin)) {
drm_dbg_kms(&dev_priv->drm,
"Modeset potentially stuck, unbreaking through wedging\n");
- intel_gt_set_wedged(&dev_priv->gt);
+ intel_gt_set_wedged(to_gt(dev_priv));
}
/*
@@ -912,7 +912,7 @@ void intel_display_finish_reset(struct drm_i915_private *dev_priv)
return;
/* reset doesn't touch the display */
- if (!test_bit(I915_RESET_MODESET, &dev_priv->gt.reset.flags))
+ if (!test_bit(I915_RESET_MODESET, &to_gt(dev_priv)->reset.flags))
return;
state = fetch_and_zero(&dev_priv->modeset_restore_state);
@@ -950,7 +950,7 @@ unlock:
drm_modeset_acquire_fini(ctx);
mutex_unlock(&dev->mode_config.mutex);
- clear_bit_unlock(I915_RESET_MODESET, &dev_priv->gt.reset.flags);
+ clear_bit_unlock(I915_RESET_MODESET, &to_gt(dev_priv)->reset.flags);
}
static void icl_set_pipe_chicken(const struct intel_crtc_state *crtc_state)
@@ -1291,7 +1291,7 @@ static bool needs_async_flip_vtd_wa(const struct intel_crtc_state *crtc_state)
{
struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev);
- return crtc_state->uapi.async_flip && intel_vtd_active() &&
+ return crtc_state->uapi.async_flip && intel_vtd_active(i915) &&
(DISPLAY_VER(i915) == 9 || IS_BROADWELL(i915) || IS_HASWELL(i915));
}
@@ -8524,19 +8524,19 @@ static void intel_atomic_commit_fence_wait(struct intel_atomic_state *intel_stat
for (;;) {
prepare_to_wait(&intel_state->commit_ready.wait,
&wait_fence, TASK_UNINTERRUPTIBLE);
- prepare_to_wait(bit_waitqueue(&dev_priv->gt.reset.flags,
+ prepare_to_wait(bit_waitqueue(&to_gt(dev_priv)->reset.flags,
I915_RESET_MODESET),
&wait_reset, TASK_UNINTERRUPTIBLE);
if (i915_sw_fence_done(&intel_state->commit_ready) ||
- test_bit(I915_RESET_MODESET, &dev_priv->gt.reset.flags))
+ test_bit(I915_RESET_MODESET, &to_gt(dev_priv)->reset.flags))
break;
schedule();
}
finish_wait(&intel_state->commit_ready.wait, &wait_fence);
- finish_wait(bit_waitqueue(&dev_priv->gt.reset.flags,
+ finish_wait(bit_waitqueue(&to_gt(dev_priv)->reset.flags,
I915_RESET_MODESET),
&wait_reset);
}
@@ -8786,7 +8786,7 @@ static void intel_atomic_commit_work(struct work_struct *work)
intel_atomic_commit_tail(state);
}
-static int __i915_sw_fence_call
+static int
intel_atomic_commit_ready(struct i915_sw_fence *fence,
enum i915_sw_fence_notify notify)
{
diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c
index 963ca7155b06..8f674745e7e0 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -264,7 +264,7 @@ intel_dpt_create(struct intel_framebuffer *fb)
vm = &dpt->vm;
- vm->gt = &i915->gt;
+ vm->gt = to_gt(i915);
vm->i915 = i915;
vm->dma = i915->drm.dev;
vm->total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
@@ -279,8 +279,6 @@ intel_dpt_create(struct intel_framebuffer *fb)
vm->vma_ops.bind_vma = dpt_bind_vma;
vm->vma_ops.unbind_vma = dpt_unbind_vma;
- vm->vma_ops.set_pages = ggtt_set_pages;
- vm->vma_ops.clear_pages = clear_pages;
vm->pte_encode = gen8_ggtt_pte_encode;
diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index 7fd11d735ca4..465dc4e97ea8 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -1639,7 +1639,7 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *i915)
static bool need_fbc_vtd_wa(struct drm_i915_private *i915)
{
/* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */
- if (intel_vtd_active() &&
+ if (intel_vtd_active(i915) &&
(IS_SKYLAKE(i915) || IS_BROXTON(i915))) {
drm_info(&i915->drm,
"Disabling framebuffer compression (FBC) to prevent screen flicker with VT-d enabled\n");
diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index be0a74b0bb64..5358f03b52db 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -1386,7 +1386,7 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv)
if (!HAS_OVERLAY(dev_priv))
return;
- engine = dev_priv->gt.engine[RCS0];
+ engine = to_gt(dev_priv)->engine[RCS0];
if (!engine || !engine->kernel_context)
return;
diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c
index 3ee3f5bf974b..1223075595ff 100644
--- a/drivers/gpu/drm/i915/display/skl_universal_plane.c
+++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c
@@ -1751,7 +1751,7 @@ static bool bo_has_valid_encryption(struct drm_i915_gem_object *obj)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
- return intel_pxp_key_check(&i915->gt.pxp, obj, false) == 0;
+ return intel_pxp_key_check(&to_gt(i915)->pxp, obj, false) == 0;
}
static bool pxp_is_borked(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/dma_resv_utils.c b/drivers/gpu/drm/i915/dma_resv_utils.c
deleted file mode 100644
index 7df91b7e4ca8..000000000000
--- a/drivers/gpu/drm/i915/dma_resv_utils.c
+++ /dev/null
@@ -1,17 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2020 Intel Corporation
- */
-
-#include <linux/dma-resv.h>
-
-#include "dma_resv_utils.h"
-
-void dma_resv_prune(struct dma_resv *resv)
-{
- if (dma_resv_trylock(resv)) {
- if (dma_resv_test_signaled(resv, true))
- dma_resv_add_excl_fence(resv, NULL);
- dma_resv_unlock(resv);
- }
-}
diff --git a/drivers/gpu/drm/i915/dma_resv_utils.h b/drivers/gpu/drm/i915/dma_resv_utils.h
deleted file mode 100644
index b9d8fb5f8367..000000000000
--- a/drivers/gpu/drm/i915/dma_resv_utils.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2020 Intel Corporation
- */
-
-#ifndef DMA_RESV_UTILS_H
-#define DMA_RESV_UTILS_H
-
-struct dma_resv;
-
-void dma_resv_prune(struct dma_resv *resv);
-
-#endif /* DMA_RESV_UTILS_H */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
index f0435c6feb68..8a248003dfae 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -69,10 +69,16 @@ static struct clflush *clflush_work_create(struct drm_i915_gem_object *obj)
bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
unsigned int flags)
{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct clflush *clflush;
assert_object_held(obj);
+ if (IS_DGFX(i915)) {
+ WARN_ON_ONCE(obj->cache_dirty);
+ return false;
+ }
+
/*
* Stolen memory is always coherent with the GPU as it is explicitly
* marked as wc by the system, or the system is cache-coherent.
@@ -105,16 +111,24 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
if (clflush) {
i915_sw_fence_await_reservation(&clflush->base.chain,
obj->base.resv, NULL, true,
- i915_fence_timeout(to_i915(obj->base.dev)),
+ i915_fence_timeout(i915),
I915_FENCE_GFP);
dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma);
dma_fence_work_commit(&clflush->base);
+ /*
+ * We must have successfully populated the pages(since we are
+ * holding a pin on the pages as per the flush worker) to reach
+ * this point, which must mean we have already done the required
+ * flush-on-acquire, hence resetting cache_dirty here should be
+ * safe.
+ */
+ obj->cache_dirty = false;
} else if (obj->mm.pages) {
__do_clflush(obj);
+ obj->cache_dirty = false;
} else {
GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
}
- obj->cache_dirty = false;
return true;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index ebd775cb1661..00327b750fbb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -237,7 +237,7 @@ static int proto_context_set_persistence(struct drm_i915_private *i915,
* colateral damage, and we should not pretend we can by
* exposing the interface.
*/
- if (!intel_has_reset_engine(&i915->gt))
+ if (!intel_has_reset_engine(to_gt(i915)))
return -ENODEV;
pc->user_flags &= ~BIT(UCONTEXT_PERSISTENCE);
@@ -254,7 +254,7 @@ static int proto_context_set_protected(struct drm_i915_private *i915,
if (!protected) {
pc->uses_protected_content = false;
- } else if (!intel_pxp_is_enabled(&i915->gt.pxp)) {
+ } else if (!intel_pxp_is_enabled(&to_gt(i915)->pxp)) {
ret = -ENODEV;
} else if ((pc->user_flags & BIT(UCONTEXT_RECOVERABLE)) ||
!(pc->user_flags & BIT(UCONTEXT_BANNABLE))) {
@@ -268,8 +268,8 @@ static int proto_context_set_protected(struct drm_i915_private *i915,
*/
pc->pxp_wakeref = intel_runtime_pm_get(&i915->runtime_pm);
- if (!intel_pxp_is_active(&i915->gt.pxp))
- ret = intel_pxp_start(&i915->gt.pxp);
+ if (!intel_pxp_is_active(&to_gt(i915)->pxp))
+ ret = intel_pxp_start(&to_gt(i915)->pxp);
}
return ret;
@@ -564,6 +564,7 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base,
container_of_user(base, typeof(*ext), base);
const struct set_proto_ctx_engines *set = data;
struct drm_i915_private *i915 = set->i915;
+ struct i915_engine_class_instance prev_engine;
u64 flags;
int err = 0, n, i, j;
u16 slot, width, num_siblings;
@@ -571,7 +572,7 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base,
intel_engine_mask_t prev_mask;
/* FIXME: This is NIY for execlists */
- if (!(intel_uc_uses_guc_submission(&i915->gt.uc)))
+ if (!(intel_uc_uses_guc_submission(&to_gt(i915)->uc)))
return -ENODEV;
if (get_user(slot, &ext->engine_index))
@@ -629,7 +630,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base,
/* Create contexts / engines */
for (i = 0; i < width; ++i) {
intel_engine_mask_t current_mask = 0;
- struct i915_engine_class_instance prev_engine;
for (j = 0; j < num_siblings; ++j) {
struct i915_engine_class_instance ci;
@@ -833,7 +833,7 @@ static int set_proto_ctx_sseu(struct drm_i915_file_private *fpriv,
sseu = &pc->legacy_rcs_sseu;
}
- ret = i915_gem_user_to_context_sseu(&i915->gt, &user_sseu, sseu);
+ ret = i915_gem_user_to_context_sseu(to_gt(i915), &user_sseu, sseu);
if (ret)
return ret;
@@ -1001,7 +1001,7 @@ static void free_engines_rcu(struct rcu_head *rcu)
free_engines(engines);
}
-static int __i915_sw_fence_call
+static int
engines_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
{
struct i915_gem_engines *engines =
@@ -1044,7 +1044,7 @@ static struct i915_gem_engines *alloc_engines(unsigned int count)
static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx,
struct intel_sseu rcs_sseu)
{
- const struct intel_gt *gt = &ctx->i915->gt;
+ const struct intel_gt *gt = to_gt(ctx->i915);
struct intel_engine_cs *engine;
struct i915_gem_engines *e, *err;
enum intel_engine_id id;
@@ -1521,7 +1521,7 @@ static int __context_set_persistence(struct i915_gem_context *ctx, bool state)
* colateral damage, and we should not pretend we can by
* exposing the interface.
*/
- if (!intel_has_reset_engine(&ctx->i915->gt))
+ if (!intel_has_reset_engine(to_gt(ctx->i915)))
return -ENODEV;
i915_gem_context_clear_persistence(ctx);
@@ -1559,7 +1559,7 @@ i915_gem_create_context(struct drm_i915_private *i915,
} else if (HAS_FULL_PPGTT(i915)) {
struct i915_ppgtt *ppgtt;
- ppgtt = i915_ppgtt_create(&i915->gt, 0);
+ ppgtt = i915_ppgtt_create(to_gt(i915), 0);
if (IS_ERR(ppgtt)) {
drm_dbg(&i915->drm, "PPGTT setup failed (%ld)\n",
PTR_ERR(ppgtt));
@@ -1742,7 +1742,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
if (args->flags)
return -EINVAL;
- ppgtt = i915_ppgtt_create(&i915->gt, 0);
+ ppgtt = i915_ppgtt_create(to_gt(i915), 0);
if (IS_ERR(ppgtt))
return PTR_ERR(ppgtt);
@@ -2194,7 +2194,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
if (args->flags & I915_CONTEXT_CREATE_FLAGS_UNKNOWN)
return -EINVAL;
- ret = intel_gt_terminally_wedged(&i915->gt);
+ ret = intel_gt_terminally_wedged(to_gt(i915));
if (ret)
return ret;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index 8955d6abcef1..9402d4bf4ffc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -379,7 +379,7 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data
if (ext.flags)
return -EINVAL;
- if (!intel_pxp_is_enabled(&ext_data->i915->gt.pxp))
+ if (!intel_pxp_is_enabled(&to_gt(ext_data->i915)->pxp))
return -ENODEV;
ext_data->flags |= I915_BO_PROTECTED;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index e8a58c997170..1b526039a60d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -248,8 +248,19 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
if (IS_ERR(pages))
return PTR_ERR(pages);
- /* XXX: consider doing a vmap flush or something */
- if (!HAS_LLC(i915) || i915_gem_object_can_bypass_llc(obj))
+ /*
+ * DG1 is special here since it still snoops transactions even with
+ * CACHE_NONE. This is not the case with other HAS_SNOOP platforms. We
+ * might need to revisit this as we add new discrete platforms.
+ *
+ * XXX: Consider doing a vmap flush or something, where possible.
+ * Currently we just do a heavy handed wbinvd_on_all_cpus() here since
+ * the underlying sg_table might not even point to struct pages, so we
+ * can't just call drm_clflush_sg or similar, like we do elsewhere in
+ * the driver.
+ */
+ if (i915_gem_object_can_bypass_llc(obj) ||
+ (!HAS_LLC(i915) && !IS_DG1(i915)))
wbinvd_on_all_cpus();
sg_page_sizes = i915_sg_dma_sizes(pages->sgl);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index b684a62bf3b0..26532c07d467 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -18,10 +18,32 @@
static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+ if (IS_DGFX(i915))
+ return false;
+
return !(obj->cache_level == I915_CACHE_NONE ||
obj->cache_level == I915_CACHE_WT);
}
+bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+ if (obj->cache_dirty)
+ return false;
+
+ if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
+ return true;
+
+ if (IS_DGFX(i915))
+ return false;
+
+ /* Currently in use by HW (display engine)? Keep flushed. */
+ return i915_gem_object_is_framebuffer(obj);
+}
+
static void
flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
{
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 1ff1b76d5206..355a7b68fdac 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -31,6 +31,7 @@
#include "i915_gem_ioctls.h"
#include "i915_trace.h"
#include "i915_user_extensions.h"
+#include "i915_vma_snapshot.h"
struct eb_vma {
struct i915_vma *vma;
@@ -309,11 +310,15 @@ struct i915_execbuffer {
struct eb_fence *fences;
unsigned long num_fences;
+#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
+ struct i915_capture_list *capture_lists[MAX_ENGINE_INSTANCE + 1];
+#endif
};
static int eb_parse(struct i915_execbuffer *eb);
static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle);
static void eb_unpin_engine(struct i915_execbuffer *eb);
+static void eb_capture_release(struct i915_execbuffer *eb);
static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
{
@@ -992,7 +997,7 @@ static int eb_validate_vmas(struct i915_execbuffer *eb)
}
if (!(ev->flags & EXEC_OBJECT_WRITE)) {
- err = dma_resv_reserve_shared(vma->resv, 1);
+ err = dma_resv_reserve_shared(vma->obj->base.resv, 1);
if (err)
return err;
}
@@ -1045,6 +1050,7 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
i915_vma_put(vma);
}
+ eb_capture_release(eb);
eb_unpin_engine(eb);
}
@@ -1094,6 +1100,47 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
return &i915->ggtt;
}
+static void reloc_cache_unmap(struct reloc_cache *cache)
+{
+ void *vaddr;
+
+ if (!cache->vaddr)
+ return;
+
+ vaddr = unmask_page(cache->vaddr);
+ if (cache->vaddr & KMAP)
+ kunmap_atomic(vaddr);
+ else
+ io_mapping_unmap_atomic((void __iomem *)vaddr);
+}
+
+static void reloc_cache_remap(struct reloc_cache *cache,
+ struct drm_i915_gem_object *obj)
+{
+ void *vaddr;
+
+ if (!cache->vaddr)
+ return;
+
+ if (cache->vaddr & KMAP) {
+ struct page *page = i915_gem_object_get_page(obj, cache->page);
+
+ vaddr = kmap_atomic(page);
+ cache->vaddr = unmask_flags(cache->vaddr) |
+ (unsigned long)vaddr;
+ } else {
+ struct i915_ggtt *ggtt = cache_to_ggtt(cache);
+ unsigned long offset;
+
+ offset = cache->node.start;
+ if (!drm_mm_node_allocated(&cache->node))
+ offset += cache->page << PAGE_SHIFT;
+
+ cache->vaddr = (unsigned long)
+ io_mapping_map_atomic_wc(&ggtt->iomap, offset);
+ }
+}
+
static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb)
{
void *vaddr;
@@ -1358,10 +1405,17 @@ eb_relocate_entry(struct i915_execbuffer *eb,
* batchbuffers.
*/
if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
- GRAPHICS_VER(eb->i915) == 6) {
+ GRAPHICS_VER(eb->i915) == 6 &&
+ !i915_vma_is_bound(target->vma, I915_VMA_GLOBAL_BIND)) {
+ struct i915_vma *vma = target->vma;
+
+ reloc_cache_unmap(&eb->reloc_cache);
+ mutex_lock(&vma->vm->mutex);
err = i915_vma_bind(target->vma,
target->vma->obj->cache_level,
PIN_GLOBAL, NULL);
+ mutex_unlock(&vma->vm->mutex);
+ reloc_cache_remap(&eb->reloc_cache, ev->vma->obj);
if (err)
return err;
}
@@ -1882,36 +1936,113 @@ eb_find_first_request_added(struct i915_execbuffer *eb)
return NULL;
}
-static int eb_move_to_gpu(struct i915_execbuffer *eb)
+#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
+
+/* Stage with GFP_KERNEL allocations before we enter the signaling critical path */
+static void eb_capture_stage(struct i915_execbuffer *eb)
{
const unsigned int count = eb->buffer_count;
- unsigned int i = count;
- int err = 0, j;
+ unsigned int i = count, j;
+ struct i915_vma_snapshot *vsnap;
while (i--) {
struct eb_vma *ev = &eb->vma[i];
struct i915_vma *vma = ev->vma;
unsigned int flags = ev->flags;
- struct drm_i915_gem_object *obj = vma->obj;
- assert_vma_held(vma);
+ if (!(flags & EXEC_OBJECT_CAPTURE))
+ continue;
- if (flags & EXEC_OBJECT_CAPTURE) {
+ vsnap = i915_vma_snapshot_alloc(GFP_KERNEL);
+ if (!vsnap)
+ continue;
+
+ i915_vma_snapshot_init(vsnap, vma, "user");
+ for_each_batch_create_order(eb, j) {
struct i915_capture_list *capture;
- for_each_batch_create_order(eb, j) {
- if (!eb->requests[j])
- break;
+ capture = kmalloc(sizeof(*capture), GFP_KERNEL);
+ if (!capture)
+ continue;
- capture = kmalloc(sizeof(*capture), GFP_KERNEL);
- if (capture) {
- capture->next =
- eb->requests[j]->capture_list;
- capture->vma = vma;
- eb->requests[j]->capture_list = capture;
- }
- }
+ capture->next = eb->capture_lists[j];
+ capture->vma_snapshot = i915_vma_snapshot_get(vsnap);
+ eb->capture_lists[j] = capture;
+ }
+ i915_vma_snapshot_put(vsnap);
+ }
+}
+
+/* Commit once we're in the critical path */
+static void eb_capture_commit(struct i915_execbuffer *eb)
+{
+ unsigned int j;
+
+ for_each_batch_create_order(eb, j) {
+ struct i915_request *rq = eb->requests[j];
+
+ if (!rq)
+ break;
+
+ rq->capture_list = eb->capture_lists[j];
+ eb->capture_lists[j] = NULL;
+ }
+}
+
+/*
+ * Release anything that didn't get committed due to errors.
+ * The capture_list will otherwise be freed at request retire.
+ */
+static void eb_capture_release(struct i915_execbuffer *eb)
+{
+ unsigned int j;
+
+ for_each_batch_create_order(eb, j) {
+ if (eb->capture_lists[j]) {
+ i915_request_free_capture_list(eb->capture_lists[j]);
+ eb->capture_lists[j] = NULL;
}
+ }
+}
+
+static void eb_capture_list_clear(struct i915_execbuffer *eb)
+{
+ memset(eb->capture_lists, 0, sizeof(eb->capture_lists));
+}
+
+#else
+
+static void eb_capture_stage(struct i915_execbuffer *eb)
+{
+}
+
+static void eb_capture_commit(struct i915_execbuffer *eb)
+{
+}
+
+static void eb_capture_release(struct i915_execbuffer *eb)
+{
+}
+
+static void eb_capture_list_clear(struct i915_execbuffer *eb)
+{
+}
+
+#endif
+
+static int eb_move_to_gpu(struct i915_execbuffer *eb)
+{
+ const unsigned int count = eb->buffer_count;
+ unsigned int i = count;
+ int err = 0, j;
+
+ while (i--) {
+ struct eb_vma *ev = &eb->vma[i];
+ struct i915_vma *vma = ev->vma;
+ unsigned int flags = ev->flags;
+ struct drm_i915_gem_object *obj = vma->obj;
+
+ assert_vma_held(vma);
/*
* If the GPU is not _reading_ through the CPU cache, we need
@@ -1992,6 +2123,8 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
/* Unconditionally flush any chipset caches (for streaming writes). */
intel_gt_chipset_flush(eb->gt);
+ eb_capture_commit(eb);
+
return 0;
err_skip:
@@ -2166,7 +2299,7 @@ static int eb_parse(struct i915_execbuffer *eb)
goto err_trampoline;
}
- err = dma_resv_reserve_shared(shadow->resv, 1);
+ err = dma_resv_reserve_shared(shadow->obj->base.resv, 1);
if (err)
goto err_trampoline;
@@ -2278,9 +2411,9 @@ static int eb_submit(struct i915_execbuffer *eb)
return err;
}
-static int num_vcs_engines(const struct drm_i915_private *i915)
+static int num_vcs_engines(struct drm_i915_private *i915)
{
- return hweight_long(VDBOX_MASK(&i915->gt));
+ return hweight_long(VDBOX_MASK(to_gt(i915)));
}
/*
@@ -3019,7 +3152,7 @@ eb_composite_fence_create(struct i915_execbuffer *eb, int out_fence_fd)
fence_array = dma_fence_array_create(eb->num_batches,
fences,
eb->context->parallel.fence_context,
- eb->context->parallel.seqno,
+ eb->context->parallel.seqno++,
false);
if (!fence_array) {
kfree(fences);
@@ -3116,7 +3249,7 @@ eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence,
/* Allocate a request for this batch buffer nice and early. */
eb->requests[i] = i915_request_create(eb_find_context(eb, i));
if (IS_ERR(eb->requests[i])) {
- out_fence = ERR_PTR(PTR_ERR(eb->requests[i]));
+ out_fence = ERR_CAST(eb->requests[i]);
eb->requests[i] = NULL;
return out_fence;
}
@@ -3134,13 +3267,14 @@ eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence,
}
/*
- * Whilst this request exists, batch_obj will be on the
- * active_list, and so will hold the active reference. Only when
- * this request is retired will the batch_obj be moved onto
- * the inactive_list and lose its active reference. Hence we do
- * not need to explicitly hold another reference here.
+ * Not really on stack, but we don't want to call
+ * kfree on the batch_snapshot when we put it, so use the
+ * _onstack interface.
*/
- eb->requests[i]->batch = eb->batches[i]->vma;
+ if (eb->batches[i]->vma)
+ i915_vma_snapshot_init_onstack(&eb->requests[i]->batch_snapshot,
+ eb->batches[i]->vma,
+ "batch");
if (eb->batch_pool) {
GEM_BUG_ON(intel_context_is_parallel(eb->context));
intel_gt_buffer_pool_mark_active(eb->batch_pool,
@@ -3189,6 +3323,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.fences = NULL;
eb.num_fences = 0;
+ eb_capture_list_clear(&eb);
+
memset(eb.requests, 0, sizeof(struct i915_request *) *
ARRAY_SIZE(eb.requests));
eb.composite_fence = NULL;
@@ -3275,10 +3411,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
}
ww_acquire_done(&eb.ww.ctx);
+ eb_capture_stage(&eb);
out_fence = eb_requests_create(&eb, in_fence, out_fence_fd);
if (IS_ERR(out_fence)) {
err = PTR_ERR(out_fence);
+ out_fence = NULL;
if (eb.requests[0])
goto err_request;
else
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index a57a6b7013c2..c5150a1ee3d2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -145,24 +145,10 @@ static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = {
.put_pages = i915_gem_object_put_pages_internal,
};
-/**
- * i915_gem_object_create_internal: create an object with volatile pages
- * @i915: the i915 device
- * @size: the size in bytes of backing storage to allocate for the object
- *
- * Creates a new object that wraps some internal memory for private use.
- * This object is not backed by swappable storage, and as such its contents
- * are volatile and only valid whilst pinned. If the object is reaped by the
- * shrinker, its pages and data will be discarded. Equally, it is not a full
- * GEM object and so not valid for access from userspace. This makes it useful
- * for hardware interfaces like ringbuffers (which are pinned from the time
- * the request is written to the time the hardware stops accessing it), but
- * not for contexts (which need to be preserved when not active for later
- * reuse). Note that it is not cleared upon allocation.
- */
struct drm_i915_gem_object *
-i915_gem_object_create_internal(struct drm_i915_private *i915,
- phys_addr_t size)
+__i915_gem_object_create_internal(struct drm_i915_private *i915,
+ const struct drm_i915_gem_object_ops *ops,
+ phys_addr_t size)
{
static struct lock_class_key lock_class;
struct drm_i915_gem_object *obj;
@@ -179,7 +165,7 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
return ERR_PTR(-ENOMEM);
drm_gem_private_object_init(&i915->drm, &obj->base, size);
- i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, 0);
+ i915_gem_object_init(obj, ops, &lock_class, 0);
obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
/*
@@ -199,3 +185,25 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
return obj;
}
+
+/**
+ * i915_gem_object_create_internal: create an object with volatile pages
+ * @i915: the i915 device
+ * @size: the size in bytes of backing storage to allocate for the object
+ *
+ * Creates a new object that wraps some internal memory for private use.
+ * This object is not backed by swappable storage, and as such its contents
+ * are volatile and only valid whilst pinned. If the object is reaped by the
+ * shrinker, its pages and data will be discarded. Equally, it is not a full
+ * GEM object and so not valid for access from userspace. This makes it useful
+ * for hardware interfaces like ringbuffers (which are pinned from the time
+ * the request is written to the time the hardware stops accessing it), but
+ * not for contexts (which need to be preserved when not active for later
+ * reuse). Note that it is not cleared upon allocation.
+ */
+struct drm_i915_gem_object *
+i915_gem_object_create_internal(struct drm_i915_private *i915,
+ phys_addr_t size)
+{
+ return __i915_gem_object_create_internal(i915, &i915_gem_object_internal_ops, size);
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 39bb15eafc07..1478c02a82cb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -73,7 +73,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
if (args->flags & ~(I915_MMAP_WC))
return -EINVAL;
- if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
+ if (args->flags & I915_MMAP_WC && !pat_enabled())
return -ENODEV;
obj = i915_gem_object_lookup(file, args->handle);
@@ -538,6 +538,9 @@ void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
{
struct i915_mmap_offset *mmo, *mn;
+ if (obj->ops->unmap_virtual)
+ obj->ops->unmap_virtual(obj);
+
spin_lock(&obj->mmo.lock);
rbtree_postorder_for_each_entry_safe(mmo, mn,
&obj->mmo.offsets, offset) {
@@ -646,7 +649,7 @@ mmap_offset_attach(struct drm_i915_gem_object *obj,
goto insert;
/* Attempt to reap some mmap space from dead objects */
- err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT,
+ err = intel_gt_retire_requests_timeout(to_gt(i915), MAX_SCHEDULE_TIMEOUT,
NULL);
if (err)
goto err;
@@ -737,7 +740,7 @@ i915_gem_dumb_mmap_offset(struct drm_file *file,
if (HAS_LMEM(to_i915(dev)))
mmap_type = I915_MMAP_TYPE_FIXED;
- else if (boot_cpu_has(X86_FEATURE_PAT))
+ else if (pat_enabled())
mmap_type = I915_MMAP_TYPE_WC;
else if (!i915_ggtt_has_aperture(&to_i915(dev)->ggtt))
return -ENODEV;
@@ -793,7 +796,7 @@ i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
break;
case I915_MMAP_OFFSET_WC:
- if (!boot_cpu_has(X86_FEATURE_PAT))
+ if (!pat_enabled())
return -ENODEV;
type = I915_MMAP_TYPE_WC;
break;
@@ -803,7 +806,7 @@ i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
break;
case I915_MMAP_OFFSET_UC:
- if (!boot_cpu_has(X86_FEATURE_PAT))
+ if (!pat_enabled())
return -ENODEV;
type = I915_MMAP_TYPE_UC;
break;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 1e426a42a36c..d87b508b59b1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -31,6 +31,7 @@
#include "i915_gem_context.h"
#include "i915_gem_mman.h"
#include "i915_gem_object.h"
+#include "i915_gem_ttm.h"
#include "i915_memcpy.h"
#include "i915_trace.h"
@@ -91,7 +92,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
}
/**
- * i915_gem_object_fini - Clean up a GEM object initialization
+ * __i915_gem_object_fini - Clean up a GEM object initialization
* @obj: The gem object to cleanup
*
* This function cleans up gem object fields that are set up by
@@ -107,25 +108,29 @@ void __i915_gem_object_fini(struct drm_i915_gem_object *obj)
}
/**
- * Mark up the object's coherency levels for a given cache_level
+ * i915_gem_object_set_cache_coherency - Mark up the object's coherency levels
+ * for a given cache_level
* @obj: #drm_i915_gem_object
* @cache_level: cache level
*/
void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
unsigned int cache_level)
{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
obj->cache_level = cache_level;
if (cache_level != I915_CACHE_NONE)
obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
I915_BO_CACHE_COHERENT_FOR_WRITE);
- else if (HAS_LLC(to_i915(obj->base.dev)))
+ else if (HAS_LLC(i915))
obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ;
else
obj->cache_coherent = 0;
obj->cache_dirty =
- !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
+ !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE) &&
+ !IS_DGFX(i915);
}
bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj)
@@ -257,6 +262,8 @@ static void __i915_gem_object_free_mmaps(struct drm_i915_gem_object *obj)
*/
void __i915_gem_object_pages_fini(struct drm_i915_gem_object *obj)
{
+ assert_object_held(obj);
+
if (!list_empty(&obj->vma.list)) {
struct i915_vma *vma;
@@ -323,7 +330,16 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
obj->ops->delayed_free(obj);
continue;
}
+
+ if (!i915_gem_object_trylock(obj, NULL)) {
+ /* busy, toss it back to the pile */
+ if (llist_add(&obj->freed, &i915->mm.free_list))
+ queue_delayed_work(i915->wq, &i915->mm.free_work, msecs_to_jiffies(10));
+ continue;
+ }
+
__i915_gem_object_pages_fini(obj);
+ i915_gem_object_unlock(obj);
__i915_gem_free_object(obj);
/* But keep the pointer alive for RCU-protected lookups */
@@ -343,7 +359,7 @@ void i915_gem_flush_free_objects(struct drm_i915_private *i915)
static void __i915_gem_free_work(struct work_struct *work)
{
struct drm_i915_private *i915 =
- container_of(work, struct drm_i915_private, mm.free_work);
+ container_of(work, struct drm_i915_private, mm.free_work.work);
i915_gem_flush_free_objects(i915);
}
@@ -364,15 +380,6 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj)
atomic_inc(&i915->mm.free_count);
/*
- * This serializes freeing with the shrinker. Since the free
- * is delayed, first by RCU then by the workqueue, we want the
- * shrinker to be able to free pages of unreferenced objects,
- * or else we may oom whilst there are plenty of deferred
- * freed objects.
- */
- i915_gem_object_make_unshrinkable(obj);
-
- /*
* Since we require blocking on struct_mutex to unbind the freed
* object from the GPU before releasing resources back to the
* system, we can not do that directly from the RCU callback (which may
@@ -384,7 +391,7 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj)
*/
if (llist_add(&obj->freed, &i915->mm.free_list))
- queue_work(i915->wq, &i915->mm.free_work);
+ queue_delayed_work(i915->wq, &i915->mm.free_work, 0);
}
void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
@@ -456,7 +463,7 @@ i915_gem_object_read_from_page_iomap(struct drm_i915_gem_object *obj, u64 offset
* from can't cross a page boundary. The caller must ensure that @obj pages
* are pinned and that @obj is synced wrt. any related writes.
*
- * Returns 0 on success or -ENODEV if the type of @obj's backing store is
+ * Return: %0 on success or -ENODEV if the type of @obj's backing store is
* unsupported.
*/
int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset, void *dst, int size)
@@ -709,7 +716,7 @@ bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj,
void i915_gem_init__objects(struct drm_i915_private *i915)
{
- INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
+ INIT_DELAYED_WORK(&i915->mm.free_work, __i915_gem_free_work);
}
void i915_objects_module_exit(void)
@@ -732,6 +739,57 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = {
.export = i915_gem_prime_export,
};
+/**
+ * i915_gem_object_get_moving_fence - Get the object's moving fence if any
+ * @obj: The object whose moving fence to get.
+ *
+ * A non-signaled moving fence means that there is an async operation
+ * pending on the object that needs to be waited on before setting up
+ * any GPU- or CPU PTEs to the object's pages.
+ *
+ * Return: A refcounted pointer to the object's moving fence if any,
+ * NULL otherwise.
+ */
+struct dma_fence *
+i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj)
+{
+ return dma_fence_get(i915_gem_to_ttm(obj)->moving);
+}
+
+/**
+ * i915_gem_object_wait_moving_fence - Wait for the object's moving fence if any
+ * @obj: The object whose moving fence to wait for.
+ * @intr: Whether to wait interruptible.
+ *
+ * If the moving fence signaled without an error, it is detached from the
+ * object and put.
+ *
+ * Return: 0 if successful, -ERESTARTSYS if the wait was interrupted,
+ * negative error code if the async operation represented by the
+ * moving fence failed.
+ */
+int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj,
+ bool intr)
+{
+ struct dma_fence *fence = i915_gem_to_ttm(obj)->moving;
+ int ret;
+
+ assert_object_held(obj);
+ if (!fence)
+ return 0;
+
+ ret = dma_fence_wait(fence, intr);
+ if (ret)
+ return ret;
+
+ if (fence->error)
+ return fence->error;
+
+ i915_gem_to_ttm(obj)->moving = NULL;
+ dma_fence_put(fence);
+ return 0;
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/huge_gem_object.c"
#include "selftests/huge_pages.c"
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 59201801cec5..f66d46882ea7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -93,7 +93,6 @@ void i915_gem_flush_free_objects(struct drm_i915_private *i915);
struct sg_table *
__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj);
-void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
/**
* i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
@@ -211,9 +210,13 @@ static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object
return __i915_gem_object_lock(obj, ww, true);
}
-static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
+static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj,
+ struct i915_gem_ww_ctx *ww)
{
- return dma_resv_trylock(obj->base.resv);
+ if (!ww)
+ return dma_resv_trylock(obj->base.resv);
+ else
+ return ww_mutex_trylock(&obj->base.resv->lock, &ww->ctx);
}
static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
@@ -296,6 +299,12 @@ i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj)
}
static inline bool
+i915_gem_object_has_self_managed_shrink_list(const struct drm_i915_gem_object *obj)
+{
+ return i915_gem_object_type_has(obj, I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST);
+}
+
+static inline bool
i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj)
{
return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_PROXY);
@@ -449,7 +458,7 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
}
int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
-void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
+int i915_gem_object_truncate(struct drm_i915_gem_object *obj);
void i915_gem_object_writeback(struct drm_i915_gem_object *obj);
/**
@@ -512,11 +521,18 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
i915_gem_object_unpin_pages(obj);
}
+struct dma_fence *
+i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj);
+
+int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj,
+ bool intr);
+
void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
unsigned int cache_level);
bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj);
void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj);
+bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj);
int __must_check
i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
@@ -533,25 +549,15 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj);
void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj);
+void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj);
+void __i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj);
void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj);
-static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
-{
- if (obj->cache_dirty)
- return false;
-
- if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
- return true;
-
- /* Currently in use by HW (display engine)? Keep flushed. */
- return i915_gem_object_is_framebuffer(obj);
-}
-
static inline void __start_cpu_write(struct drm_i915_gem_object *obj)
{
obj->read_domains = I915_GEM_DOMAIN_CPU;
obj->write_domain = I915_GEM_DOMAIN_CPU;
- if (cpu_write_needs_clflush(obj))
+ if (i915_gem_cpu_write_needs_clflush(obj))
obj->cache_dirty = true;
}
@@ -613,6 +619,14 @@ int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj,
bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj,
enum intel_memory_type type);
+int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
+ size_t size, struct intel_memory_region *mr,
+ struct address_space *mapping,
+ unsigned int max_segment);
+void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping,
+ bool dirty, bool backup);
+void __shmem_writeback(size_t size, struct address_space *mapping);
+
#ifdef CONFIG_MMU_NOTIFIER
static inline bool
i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index da85169006d4..0dd107dcecc2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -34,9 +34,11 @@ struct i915_lut_handle {
struct drm_i915_gem_object_ops {
unsigned int flags;
-#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1)
-#define I915_GEM_OBJECT_IS_PROXY BIT(2)
-#define I915_GEM_OBJECT_NO_MMAP BIT(3)
+#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1)
+/* Skip the shrinker management in set_pages/unset_pages */
+#define I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST BIT(2)
+#define I915_GEM_OBJECT_IS_PROXY BIT(3)
+#define I915_GEM_OBJECT_NO_MMAP BIT(4)
/* Interface between the GEM object and its backing storage.
* get_pages() is called once prior to the use of the associated set
@@ -54,14 +56,18 @@ struct drm_i915_gem_object_ops {
int (*get_pages)(struct drm_i915_gem_object *obj);
void (*put_pages)(struct drm_i915_gem_object *obj,
struct sg_table *pages);
- void (*truncate)(struct drm_i915_gem_object *obj);
+ int (*truncate)(struct drm_i915_gem_object *obj);
void (*writeback)(struct drm_i915_gem_object *obj);
+ int (*shrinker_release_pages)(struct drm_i915_gem_object *obj,
+ bool no_gpu_wait,
+ bool should_writeback);
int (*pread)(struct drm_i915_gem_object *obj,
const struct drm_i915_gem_pread *arg);
int (*pwrite)(struct drm_i915_gem_object *obj,
const struct drm_i915_gem_pwrite *arg);
u64 (*mmap_offset)(struct drm_i915_gem_object *obj);
+ void (*unmap_virtual)(struct drm_i915_gem_object *obj);
int (*dmabuf_export)(struct drm_i915_gem_object *obj);
@@ -305,6 +311,7 @@ struct drm_i915_gem_object {
#define I915_BO_READONLY BIT(6)
#define I915_TILING_QUIRK_BIT 7 /* unknown swizzling; do not release! */
#define I915_BO_PROTECTED BIT(8)
+#define I915_BO_WAS_BOUND_BIT 9
/**
* @mem_flags - Mutable placement-related flags
*
@@ -486,9 +493,37 @@ struct drm_i915_gem_object {
* instead go through the pin/unpin interfaces.
*/
atomic_t pages_pin_count;
+
+ /**
+ * @shrink_pin: Prevents the pages from being made visible to
+ * the shrinker, while the shrink_pin is non-zero. Most users
+ * should pretty much never have to care about this, outside of
+ * some special use cases.
+ *
+ * By default most objects will start out as visible to the
+ * shrinker(if I915_GEM_OBJECT_IS_SHRINKABLE) as soon as the
+ * backing pages are attached to the object, like in
+ * __i915_gem_object_set_pages(). They will then be removed the
+ * shrinker list once the pages are released.
+ *
+ * The @shrink_pin is incremented by calling
+ * i915_gem_object_make_unshrinkable(), which will also remove
+ * the object from the shrinker list, if the pin count was zero.
+ *
+ * Callers will then typically call
+ * i915_gem_object_make_shrinkable() or
+ * i915_gem_object_make_purgeable() to decrement the pin count,
+ * and make the pages visible again.
+ */
atomic_t shrink_pin;
/**
+ * @ttm_shrinkable: True when the object is using shmem pages
+ * underneath. Protected by the object lock.
+ */
+ bool ttm_shrinkable;
+
+ /**
* Priority list of potential placements for this object.
*/
struct intel_memory_region **placements;
@@ -512,6 +547,7 @@ struct drm_i915_gem_object {
*/
struct list_head region_link;
+ struct i915_refct_sgt *rsgt;
struct sg_table *pages;
void *mapping;
@@ -547,7 +583,7 @@ struct drm_i915_gem_object {
struct i915_gem_object_page_iter get_dma_page;
/**
- * Element within i915->mm.unbound_list or i915->mm.bound_list,
+ * Element within i915->mm.shrink_list or i915->mm.purge_list,
* locked by i915->mm.obj_lock.
*/
struct list_head link;
@@ -565,7 +601,7 @@ struct drm_i915_gem_object {
} mm;
struct {
- struct sg_table *cached_io_st;
+ struct i915_refct_sgt *cached_io_rsgt;
struct i915_gem_object_page_iter get_io_page;
struct drm_i915_gem_object *backup;
bool created:1;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 8eb1c3a6fc9c..a50f884973bc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -10,6 +10,8 @@
#include "i915_gem_lmem.h"
#include "i915_gem_mman.h"
+#include "gt/intel_gt.h"
+
void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
struct sg_table *pages,
unsigned int sg_page_sizes)
@@ -26,6 +28,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
/* Make the pages coherent with the GPU (flushing any swapin). */
if (obj->cache_dirty) {
+ WARN_ON_ONCE(IS_DGFX(i915));
obj->write_domain = 0;
if (i915_gem_object_has_struct_page(obj))
drm_clflush_sg(pages);
@@ -68,7 +71,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
shrinkable = false;
}
- if (shrinkable) {
+ if (shrinkable && !i915_gem_object_has_self_managed_shrink_list(obj)) {
struct list_head *list;
unsigned long flags;
@@ -158,11 +161,12 @@ retry:
}
/* Immediately discard the backing storage */
-void i915_gem_object_truncate(struct drm_i915_gem_object *obj)
+int i915_gem_object_truncate(struct drm_i915_gem_object *obj)
{
- drm_gem_free_mmap_offset(&obj->base);
if (obj->ops->truncate)
- obj->ops->truncate(obj);
+ return obj->ops->truncate(obj);
+
+ return 0;
}
/* Try to discard unwanted pages */
@@ -208,7 +212,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
if (i915_gem_object_is_volatile(obj))
obj->mm.madv = I915_MADV_WILLNEED;
- i915_gem_object_make_unshrinkable(obj);
+ if (!i915_gem_object_has_self_managed_shrink_list(obj))
+ i915_gem_object_make_unshrinkable(obj);
if (obj->mm.mapping) {
unmap_object(obj, page_mask_bits(obj->mm.mapping));
@@ -218,6 +223,14 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
__i915_gem_object_reset_page_iter(obj);
obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
+ if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ intel_wakeref_t wakeref;
+
+ with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref)
+ intel_gt_invalidate_tlbs(to_gt(i915));
+ }
+
return pages;
}
@@ -414,8 +427,13 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
}
if (!ptr) {
- if (GEM_WARN_ON(type == I915_MAP_WC &&
- !static_cpu_has(X86_FEATURE_PAT)))
+ err = i915_gem_object_wait_moving_fence(obj, true);
+ if (err) {
+ ptr = ERR_PTR(err);
+ goto err_unpin;
+ }
+
+ if (GEM_WARN_ON(type == I915_MAP_WC && !pat_enabled()))
ptr = ERR_PTR(-ENODEV);
else if (i915_gem_object_has_struct_page(obj))
ptr = i915_gem_object_map_page(obj, type);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
index 7986612f48fa..ca6faffcc496 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -19,6 +19,7 @@
static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
{
struct address_space *mapping = obj->base.filp->f_mapping;
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct scatterlist *sg;
struct sg_table *st;
dma_addr_t dma;
@@ -73,7 +74,7 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
dst += PAGE_SIZE;
}
- intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt);
+ intel_gt_chipset_flush(to_gt(i915));
/* We're no longer struct page backed */
obj->mem_flags &= ~I915_BO_FLAG_STRUCT_PAGE;
@@ -140,6 +141,7 @@ int i915_gem_object_pwrite_phys(struct drm_i915_gem_object *obj,
{
void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset;
char __user *user_data = u64_to_user_ptr(args->data_ptr);
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
int err;
err = i915_gem_object_wait(obj,
@@ -159,7 +161,7 @@ int i915_gem_object_pwrite_phys(struct drm_i915_gem_object *obj,
return -EFAULT;
drm_clflush_virt_range(vaddr, args->size);
- intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt);
+ intel_gt_chipset_flush(to_gt(i915));
i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
return 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 726b40e1fbb0..ac56124760e1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -35,7 +35,7 @@ void i915_gem_suspend(struct drm_i915_private *i915)
* state. Fortunately, the kernel_context is disposable and we do
* not rely on its state.
*/
- intel_gt_suspend_prepare(&i915->gt);
+ intel_gt_suspend_prepare(to_gt(i915));
i915_gem_drain_freed_objects(i915);
}
@@ -153,7 +153,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
* machine in an unusable condition.
*/
- intel_gt_suspend_late(&i915->gt);
+ intel_gt_suspend_late(to_gt(i915));
spin_lock_irqsave(&i915->mm.obj_lock, flags);
for (phase = phases; *phase; phase++) {
@@ -223,7 +223,7 @@ void i915_gem_resume(struct drm_i915_private *i915)
* guarantee that the context image is complete. So let's just reset
* it and start again.
*/
- intel_gt_resume(&i915->gt);
+ intel_gt_resume(to_gt(i915));
ret = lmem_restore(i915, I915_TTM_BACKUP_ALLOW_GPU);
GEM_WARN_ON(ret);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c
index a016ccec36f3..a4350227e9ae 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_region.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c
@@ -11,7 +11,7 @@
void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj,
struct intel_memory_region *mem)
{
- obj->mm.region = intel_memory_region_get(mem);
+ obj->mm.region = mem;
mutex_lock(&mem->objects.lock);
list_add(&obj->mm.region_link, &mem->objects.list);
@@ -25,8 +25,6 @@ void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj)
mutex_lock(&mem->objects.lock);
list_del(&obj->mm.region_link);
mutex_unlock(&mem->objects.lock);
-
- intel_memory_region_put(mem);
}
struct drm_i915_gem_object *
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index d77da59fae04..cc9fe258fba7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -25,62 +25,67 @@ static void check_release_pagevec(struct pagevec *pvec)
cond_resched();
}
-static int shmem_get_pages(struct drm_i915_gem_object *obj)
+void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping,
+ bool dirty, bool backup)
{
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
- struct intel_memory_region *mem = obj->mm.region;
- const unsigned long page_count = obj->base.size / PAGE_SIZE;
+ struct sgt_iter sgt_iter;
+ struct pagevec pvec;
+ struct page *page;
+
+ mapping_clear_unevictable(mapping);
+
+ pagevec_init(&pvec);
+ for_each_sgt_page(page, sgt_iter, st) {
+ if (dirty)
+ set_page_dirty(page);
+
+ if (backup)
+ mark_page_accessed(page);
+
+ if (!pagevec_add(&pvec, page))
+ check_release_pagevec(&pvec);
+ }
+ if (pagevec_count(&pvec))
+ check_release_pagevec(&pvec);
+
+ sg_free_table(st);
+}
+
+int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
+ size_t size, struct intel_memory_region *mr,
+ struct address_space *mapping,
+ unsigned int max_segment)
+{
+ const unsigned long page_count = size / PAGE_SIZE;
unsigned long i;
- struct address_space *mapping;
- struct sg_table *st;
struct scatterlist *sg;
- struct sgt_iter sgt_iter;
struct page *page;
unsigned long last_pfn = 0; /* suppress gcc warning */
- unsigned int max_segment = i915_sg_segment_size();
- unsigned int sg_page_sizes;
gfp_t noreclaim;
int ret;
/*
- * Assert that the object is not currently in any GPU domain. As it
- * wasn't in the GTT, there shouldn't be any way it could have been in
- * a GPU cache
- */
- GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
- GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
-
- /*
* If there's no chance of allocating enough pages for the whole
* object, bail early.
*/
- if (obj->base.size > resource_size(&mem->region))
+ if (size > resource_size(&mr->region))
return -ENOMEM;
- st = kmalloc(sizeof(*st), GFP_KERNEL);
- if (!st)
+ if (sg_alloc_table(st, page_count, GFP_KERNEL))
return -ENOMEM;
-rebuild_st:
- if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
- kfree(st);
- return -ENOMEM;
- }
-
/*
* Get the list of pages out of our struct file. They'll be pinned
* at this point until we release them.
*
* Fail silently without starting the shrinker
*/
- mapping = obj->base.filp->f_mapping;
mapping_set_unevictable(mapping);
noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
sg = st->sgl;
st->nents = 0;
- sg_page_sizes = 0;
for (i = 0; i < page_count; i++) {
const unsigned int shrink[] = {
I915_SHRINK_BOUND | I915_SHRINK_UNBOUND,
@@ -135,10 +140,9 @@ rebuild_st:
if (!i ||
sg->length >= max_segment ||
page_to_pfn(page) != last_pfn + 1) {
- if (i) {
- sg_page_sizes |= sg->length;
+ if (i)
sg = sg_next(sg);
- }
+
st->nents++;
sg_set_page(sg, page, PAGE_SIZE, 0);
} else {
@@ -149,14 +153,67 @@ rebuild_st:
/* Check that the i965g/gm workaround works. */
GEM_BUG_ON(gfp & __GFP_DMA32 && last_pfn >= 0x00100000UL);
}
- if (sg) { /* loop terminated early; short sg table */
- sg_page_sizes |= sg->length;
+ if (sg) /* loop terminated early; short sg table */
sg_mark_end(sg);
- }
/* Trim unused sg entries to avoid wasting memory. */
i915_sg_trim(st);
+ return 0;
+err_sg:
+ sg_mark_end(sg);
+ if (sg != st->sgl) {
+ shmem_sg_free_table(st, mapping, false, false);
+ } else {
+ mapping_clear_unevictable(mapping);
+ sg_free_table(st);
+ }
+
+ /*
+ * shmemfs first checks if there is enough memory to allocate the page
+ * and reports ENOSPC should there be insufficient, along with the usual
+ * ENOMEM for a genuine allocation failure.
+ *
+ * We use ENOSPC in our driver to mean that we have run out of aperture
+ * space and so want to translate the error from shmemfs back to our
+ * usual understanding of ENOMEM.
+ */
+ if (ret == -ENOSPC)
+ ret = -ENOMEM;
+
+ return ret;
+}
+
+static int shmem_get_pages(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ struct intel_memory_region *mem = obj->mm.region;
+ struct address_space *mapping = obj->base.filp->f_mapping;
+ const unsigned long page_count = obj->base.size / PAGE_SIZE;
+ unsigned int max_segment = i915_sg_segment_size();
+ struct sg_table *st;
+ struct sgt_iter sgt_iter;
+ struct page *page;
+ int ret;
+
+ /*
+ * Assert that the object is not currently in any GPU domain. As it
+ * wasn't in the GTT, there shouldn't be any way it could have been in
+ * a GPU cache
+ */
+ GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
+ GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
+
+rebuild_st:
+ st = kmalloc(sizeof(*st), GFP_KERNEL);
+ if (!st)
+ return -ENOMEM;
+
+ ret = shmem_sg_alloc_table(i915, st, obj->base.size, mem, mapping,
+ max_segment);
+ if (ret)
+ goto err_st;
+
ret = i915_gem_gtt_prepare_pages(obj, st);
if (ret) {
/*
@@ -168,6 +225,7 @@ rebuild_st:
for_each_sgt_page(page, sgt_iter, st)
put_page(page);
sg_free_table(st);
+ kfree(st);
max_segment = PAGE_SIZE;
goto rebuild_st;
@@ -185,28 +243,12 @@ rebuild_st:
if (i915_gem_object_can_bypass_llc(obj))
obj->cache_dirty = true;
- __i915_gem_object_set_pages(obj, st, sg_page_sizes);
+ __i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl));
return 0;
-err_sg:
- sg_mark_end(sg);
err_pages:
- mapping_clear_unevictable(mapping);
- if (sg != st->sgl) {
- struct pagevec pvec;
-
- pagevec_init(&pvec);
- for_each_sgt_page(page, sgt_iter, st) {
- if (!pagevec_add(&pvec, page))
- check_release_pagevec(&pvec);
- }
- if (pagevec_count(&pvec))
- check_release_pagevec(&pvec);
- }
- sg_free_table(st);
- kfree(st);
-
+ shmem_sg_free_table(st, mapping, false, false);
/*
* shmemfs first checks if there is enough memory to allocate the page
* and reports ENOSPC should there be insufficient, along with the usual
@@ -216,13 +258,16 @@ err_pages:
* space and so want to translate the error from shmemfs back to our
* usual understanding of ENOMEM.
*/
+err_st:
if (ret == -ENOSPC)
ret = -ENOMEM;
+ kfree(st);
+
return ret;
}
-static void
+static int
shmem_truncate(struct drm_i915_gem_object *obj)
{
/*
@@ -234,12 +279,12 @@ shmem_truncate(struct drm_i915_gem_object *obj)
shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
obj->mm.madv = __I915_MADV_PURGED;
obj->mm.pages = ERR_PTR(-EFAULT);
+
+ return 0;
}
-static void
-shmem_writeback(struct drm_i915_gem_object *obj)
+void __shmem_writeback(size_t size, struct address_space *mapping)
{
- struct address_space *mapping;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_NONE,
.nr_to_write = SWAP_CLUSTER_MAX,
@@ -255,10 +300,9 @@ shmem_writeback(struct drm_i915_gem_object *obj)
* instead of invoking writeback so they are aged and paged out
* as normal.
*/
- mapping = obj->base.filp->f_mapping;
/* Begin writeback on each dirty page */
- for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) {
+ for (i = 0; i < size >> PAGE_SHIFT; i++) {
struct page *page;
page = find_lock_page(mapping, i);
@@ -281,6 +325,12 @@ put:
}
}
+static void
+shmem_writeback(struct drm_i915_gem_object *obj)
+{
+ __shmem_writeback(obj->base.size, obj->base.filp->f_mapping);
+}
+
void
__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
struct sg_table *pages,
@@ -313,11 +363,6 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages)
{
- struct sgt_iter sgt_iter;
- struct pagevec pvec;
- struct page *page;
-
- GEM_WARN_ON(IS_DGFX(to_i915(obj->base.dev)));
__i915_gem_object_release_shmem(obj, pages, true);
i915_gem_gtt_finish_pages(obj, pages);
@@ -325,25 +370,10 @@ void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_
if (i915_gem_object_needs_bit17_swizzle(obj))
i915_gem_object_save_bit_17_swizzle(obj, pages);
- mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
-
- pagevec_init(&pvec);
- for_each_sgt_page(page, sgt_iter, pages) {
- if (obj->mm.dirty)
- set_page_dirty(page);
-
- if (obj->mm.madv == I915_MADV_WILLNEED)
- mark_page_accessed(page);
-
- if (!pagevec_add(&pvec, page))
- check_release_pagevec(&pvec);
- }
- if (pagevec_count(&pvec))
- check_release_pagevec(&pvec);
- obj->mm.dirty = false;
-
- sg_free_table(pages);
+ shmem_sg_free_table(pages, file_inode(obj->base.filp)->i_mapping,
+ obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED);
kfree(pages);
+ obj->mm.dirty = false;
}
static void
@@ -634,9 +664,10 @@ static int init_shmem(struct intel_memory_region *mem)
return 0; /* Don't error, we can simply fallback to the kernel mnt */
}
-static void release_shmem(struct intel_memory_region *mem)
+static int release_shmem(struct intel_memory_region *mem)
{
i915_gemfs_fini(mem->i915);
+ return 0;
}
static const struct intel_memory_region_ops shmem_region_ops = {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index 5ab136ffdeb2..cc927e49d21f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -15,7 +15,6 @@
#include "gt/intel_gt_requests.h"
-#include "dma_resv_utils.h"
#include "i915_trace.h"
static bool swap_available(void)
@@ -37,8 +36,8 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
return swap_available() || obj->mm.madv == I915_MADV_DONTNEED;
}
-static bool unsafe_drop_pages(struct drm_i915_gem_object *obj,
- unsigned long shrink, bool trylock_vm)
+static int drop_pages(struct drm_i915_gem_object *obj,
+ unsigned long shrink, bool trylock_vm)
{
unsigned long flags;
@@ -56,19 +55,25 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj,
return false;
}
-static void try_to_writeback(struct drm_i915_gem_object *obj,
- unsigned int flags)
+static int try_to_writeback(struct drm_i915_gem_object *obj, unsigned int flags)
{
+ if (obj->ops->shrinker_release_pages)
+ return obj->ops->shrinker_release_pages(obj,
+ !(flags & I915_SHRINK_ACTIVE),
+ flags & I915_SHRINK_WRITEBACK);
+
switch (obj->mm.madv) {
case I915_MADV_DONTNEED:
i915_gem_object_truncate(obj);
- return;
+ return 0;
case __I915_MADV_PURGED:
- return;
+ return 0;
}
if (flags & I915_SHRINK_WRITEBACK)
i915_gem_object_writeback(obj);
+
+ return 0;
}
/**
@@ -148,7 +153,7 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww,
*/
if (shrink & I915_SHRINK_ACTIVE)
/* Retire requests to unpin all idle contexts */
- intel_gt_retire_requests(&i915->gt);
+ intel_gt_retire_requests(to_gt(i915));
/*
* As we may completely rewrite the (un)bound list whilst unbinding
@@ -209,27 +214,23 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww,
spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
- err = 0;
- if (unsafe_drop_pages(obj, shrink, trylock_vm)) {
- /* May arrive from get_pages on another bo */
- if (!ww) {
- if (!i915_gem_object_trylock(obj))
- goto skip;
- } else {
- err = i915_gem_object_lock(obj, ww);
- if (err)
- goto skip;
- }
-
- if (!__i915_gem_object_put_pages(obj)) {
- try_to_writeback(obj, shrink);
- count += obj->base.size >> PAGE_SHIFT;
- }
- if (!ww)
- i915_gem_object_unlock(obj);
+ /* May arrive from get_pages on another bo */
+ if (!ww) {
+ if (!i915_gem_object_trylock(obj, NULL))
+ goto skip;
+ } else {
+ err = i915_gem_object_lock(obj, ww);
+ if (err)
+ goto skip;
}
- dma_resv_prune(obj->base.resv);
+ if (drop_pages(obj, shrink, trylock_vm) &&
+ !__i915_gem_object_put_pages(obj) &&
+ !try_to_writeback(obj, shrink))
+ count += obj->base.size >> PAGE_SHIFT;
+
+ if (!ww)
+ i915_gem_object_unlock(obj);
scanned += obj->base.size >> PAGE_SHIFT;
skip:
@@ -404,12 +405,18 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
list_for_each_entry_safe(vma, next,
&i915->ggtt.vm.bound_list, vm_link) {
unsigned long count = vma->node.size >> PAGE_SHIFT;
+ struct drm_i915_gem_object *obj = vma->obj;
if (!vma->iomap || i915_vma_is_active(vma))
continue;
+ if (!i915_gem_object_trylock(obj, NULL))
+ continue;
+
if (__i915_vma_unbind(vma) == 0)
freed_pages += count;
+
+ i915_gem_object_unlock(obj);
}
mutex_unlock(&i915->ggtt.vm.mutex);
@@ -458,6 +465,16 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
+/**
+ * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
+ * default all object types that support shrinking(see IS_SHRINKABLE), will also
+ * make the object visible to the shrinker after allocating the system memory
+ * pages.
+ * @obj: The GEM object.
+ *
+ * This is typically used for special kernel internal objects that can't be
+ * easily processed by the shrinker, like if they are perma-pinned.
+ */
void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj)
{
struct drm_i915_private *i915 = obj_to_i915(obj);
@@ -482,13 +499,12 @@ void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj)
spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
}
-static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
- struct list_head *head)
+static void ___i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
+ struct list_head *head)
{
struct drm_i915_private *i915 = obj_to_i915(obj);
unsigned long flags;
- GEM_BUG_ON(!i915_gem_object_has_pages(obj));
if (!i915_gem_object_is_shrinkable(obj))
return;
@@ -508,14 +524,67 @@ static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
}
+/**
+ * __i915_gem_object_make_shrinkable - Move the object to the tail of the
+ * shrinkable list. Objects on this list might be swapped out. Used with
+ * WILLNEED objects.
+ * @obj: The GEM object.
+ *
+ * DO NOT USE. This is intended to be called on very special objects that don't
+ * yet have mm.pages, but are guaranteed to have potentially reclaimable pages
+ * underneath.
+ */
+void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
+{
+ ___i915_gem_object_make_shrinkable(obj,
+ &obj_to_i915(obj)->mm.shrink_list);
+}
+
+/**
+ * __i915_gem_object_make_purgeable - Move the object to the tail of the
+ * purgeable list. Objects on this list might be swapped out. Used with
+ * DONTNEED objects.
+ * @obj: The GEM object.
+ *
+ * DO NOT USE. This is intended to be called on very special objects that don't
+ * yet have mm.pages, but are guaranteed to have potentially reclaimable pages
+ * underneath.
+ */
+void __i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
+{
+ ___i915_gem_object_make_shrinkable(obj,
+ &obj_to_i915(obj)->mm.purge_list);
+}
+
+/**
+ * i915_gem_object_make_shrinkable - Move the object to the tail of the
+ * shrinkable list. Objects on this list might be swapped out. Used with
+ * WILLNEED objects.
+ * @obj: The GEM object.
+ *
+ * MUST only be called on objects which have backing pages.
+ *
+ * MUST be balanced with previous call to i915_gem_object_make_unshrinkable().
+ */
void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
{
- __i915_gem_object_make_shrinkable(obj,
- &obj_to_i915(obj)->mm.shrink_list);
+ GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+ __i915_gem_object_make_shrinkable(obj);
}
+/**
+ * i915_gem_object_make_purgeable - Move the object to the tail of the purgeable
+ * list. Used with DONTNEED objects. Unlike with shrinkable objects, the
+ * shrinker will attempt to discard the backing pages, instead of trying to swap
+ * them out.
+ * @obj: The GEM object.
+ *
+ * MUST only be called on objects which have backing pages.
+ *
+ * MUST be balanced with previous call to i915_gem_object_make_unshrinkable().
+ */
void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
{
- __i915_gem_object_make_shrinkable(obj,
- &obj_to_i915(obj)->mm.purge_list);
+ GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+ __i915_gem_object_make_purgeable(obj);
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index ddd37ccb1362..7df50fd6cc7b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -399,7 +399,7 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
return 0;
}
- if (intel_vtd_active() && GRAPHICS_VER(i915) < 8) {
+ if (intel_vtd_active(i915) && GRAPHICS_VER(i915) < 8) {
drm_notice(&i915->drm,
"%s, disabling use of stolen memory\n",
"DMAR active");
@@ -488,6 +488,9 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
return 0;
}
+ /* Exclude the reserved region from driver use */
+ mem->region.end = reserved_base - 1;
+
/* It is possible for the reserved area to end before the end of stolen
* memory, so just consider the start. */
reserved_total = stolen_top - reserved_base;
@@ -653,7 +656,7 @@ static int __i915_gem_object_create_stolen(struct intel_memory_region *mem,
cache_level = HAS_LLC(mem->i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
i915_gem_object_set_cache_coherency(obj, cache_level);
- if (WARN_ON(!i915_gem_object_trylock(obj)))
+ if (WARN_ON(!i915_gem_object_trylock(obj, NULL)))
return -EBUSY;
i915_gem_object_init_memory_region(obj, mem);
@@ -720,9 +723,10 @@ static int init_stolen_smem(struct intel_memory_region *mem)
return i915_gem_init_stolen(mem);
}
-static void release_stolen_smem(struct intel_memory_region *mem)
+static int release_stolen_smem(struct intel_memory_region *mem)
{
i915_gem_cleanup_stolen(mem->i915);
+ return 0;
}
static const struct intel_memory_region_ops i915_region_stolen_smem_ops = {
@@ -759,10 +763,11 @@ err_fini:
return err;
}
-static void release_stolen_lmem(struct intel_memory_region *mem)
+static int release_stolen_lmem(struct intel_memory_region *mem)
{
io_mapping_fini(&mem->iomap);
i915_gem_cleanup_stolen(mem->i915);
+ return 0;
}
static const struct intel_memory_region_ops i915_region_stolen_lmem_ops = {
@@ -778,6 +783,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
struct intel_uncore *uncore = &i915->uncore;
struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
struct intel_memory_region *mem;
+ resource_size_t min_page_size;
resource_size_t io_start;
resource_size_t lmem_size;
u64 lmem_base;
@@ -789,8 +795,11 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
lmem_size = pci_resource_len(pdev, 2) - lmem_base;
io_start = pci_resource_start(pdev, 2) + lmem_base;
+ min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K :
+ I915_GTT_PAGE_SIZE_4K;
+
mem = intel_memory_region_create(i915, lmem_base, lmem_size,
- I915_GTT_PAGE_SIZE_4K, io_start,
+ min_page_size, io_start,
type, instance,
&i915_region_stolen_lmem_ops);
if (IS_ERR(mem))
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
index 1929d6cf4150..75501db71041 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
@@ -38,12 +38,13 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
{
const unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
struct drm_i915_file_private *file_priv = file->driver_priv;
+ struct drm_i915_private *i915 = to_i915(dev);
struct i915_gem_context *ctx;
unsigned long idx;
long ret;
/* ABI: return -EIO if already wedged */
- ret = intel_gt_terminally_wedged(&to_i915(dev)->gt);
+ ret = intel_gt_terminally_wedged(to_gt(i915));
if (ret)
return ret;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 74a1ffd0d7dd..de3fe79b665a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -14,13 +14,9 @@
#include "gem/i915_gem_object.h"
#include "gem/i915_gem_region.h"
#include "gem/i915_gem_ttm.h"
+#include "gem/i915_gem_ttm_move.h"
#include "gem/i915_gem_ttm_pm.h"
-
-#include "gt/intel_engine_pm.h"
-#include "gt/intel_gt.h"
-#include "gt/intel_migrate.h"
-
#define I915_TTM_PRIO_PURGE 0
#define I915_TTM_PRIO_NO_PAGES 1
#define I915_TTM_PRIO_HAS_PAGES 2
@@ -34,7 +30,9 @@
* struct i915_ttm_tt - TTM page vector with additional private information
* @ttm: The base TTM page vector.
* @dev: The struct device used for dma mapping and unmapping.
- * @cached_st: The cached scatter-gather table.
+ * @cached_rsgt: The cached scatter-gather table.
+ * @is_shmem: Set if using shmem.
+ * @filp: The shmem file, if using shmem backend.
*
* Note that DMA may be going on right up to the point where the page-
* vector is unpopulated in delayed destroy. Hence keep the
@@ -45,7 +43,10 @@
struct i915_ttm_tt {
struct ttm_tt ttm;
struct device *dev;
- struct sg_table *cached_st;
+ struct i915_refct_sgt cached_rsgt;
+
+ bool is_shmem;
+ struct file *filp;
};
static const struct ttm_place sys_placement_flags = {
@@ -103,37 +104,15 @@ static int i915_ttm_err_to_gem(int err)
return err;
}
-static bool gpu_binds_iomem(struct ttm_resource *mem)
-{
- return mem->mem_type != TTM_PL_SYSTEM;
-}
-
-static bool cpu_maps_iomem(struct ttm_resource *mem)
-{
- /* Once / if we support GGTT, this is also false for cached ttm_tts */
- return mem->mem_type != TTM_PL_SYSTEM;
-}
-
-static enum i915_cache_level
-i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res,
- struct ttm_tt *ttm)
-{
- return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && !gpu_binds_iomem(res) &&
- ttm->caching == ttm_cached) ? I915_CACHE_LLC :
- I915_CACHE_NONE;
-}
-
-static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj);
-
static enum ttm_caching
i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj)
{
/*
- * Objects only allowed in system get cached cpu-mappings.
- * Other objects get WC mapping for now. Even if in system.
+ * Objects only allowed in system get cached cpu-mappings, or when
+ * evicting lmem-only buffers to system for swapping. Other objects get
+ * WC mapping for now. Even if in system.
*/
- if (obj->mm.region->type == INTEL_MEMORY_SYSTEM &&
- obj->mm.n_placements <= 1)
+ if (obj->mm.n_placements <= 1)
return ttm_cached;
return ttm_write_combined;
@@ -179,15 +158,103 @@ i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj,
placement->busy_placement = busy;
}
+static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev,
+ struct ttm_tt *ttm,
+ struct ttm_operation_ctx *ctx)
+{
+ struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
+ struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM];
+ struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+ const unsigned int max_segment = i915_sg_segment_size();
+ const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT;
+ struct file *filp = i915_tt->filp;
+ struct sgt_iter sgt_iter;
+ struct sg_table *st;
+ struct page *page;
+ unsigned long i;
+ int err;
+
+ if (!filp) {
+ struct address_space *mapping;
+ gfp_t mask;
+
+ filp = shmem_file_setup("i915-shmem-tt", size, VM_NORESERVE);
+ if (IS_ERR(filp))
+ return PTR_ERR(filp);
+
+ mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
+
+ mapping = filp->f_mapping;
+ mapping_set_gfp_mask(mapping, mask);
+ GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
+
+ i915_tt->filp = filp;
+ }
+
+ st = &i915_tt->cached_rsgt.table;
+ err = shmem_sg_alloc_table(i915, st, size, mr, filp->f_mapping,
+ max_segment);
+ if (err)
+ return err;
+
+ err = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ if (err)
+ goto err_free_st;
+
+ i = 0;
+ for_each_sgt_page(page, sgt_iter, st)
+ ttm->pages[i++] = page;
+
+ if (ttm->page_flags & TTM_TT_FLAG_SWAPPED)
+ ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED;
+
+ return 0;
+
+err_free_st:
+ shmem_sg_free_table(st, filp->f_mapping, false, false);
+
+ return err;
+}
+
+static void i915_ttm_tt_shmem_unpopulate(struct ttm_tt *ttm)
+{
+ struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+ bool backup = ttm->page_flags & TTM_TT_FLAG_SWAPPED;
+ struct sg_table *st = &i915_tt->cached_rsgt.table;
+
+ shmem_sg_free_table(st, file_inode(i915_tt->filp)->i_mapping,
+ backup, backup);
+}
+
+static void i915_ttm_tt_release(struct kref *ref)
+{
+ struct i915_ttm_tt *i915_tt =
+ container_of(ref, typeof(*i915_tt), cached_rsgt.kref);
+ struct sg_table *st = &i915_tt->cached_rsgt.table;
+
+ GEM_WARN_ON(st->sgl);
+
+ kfree(i915_tt);
+}
+
+static const struct i915_refct_sgt_ops tt_rsgt_ops = {
+ .release = i915_ttm_tt_release
+};
+
static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
uint32_t page_flags)
{
struct ttm_resource_manager *man =
ttm_manager_type(bo->bdev, bo->resource->mem_type);
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+ enum ttm_caching caching;
struct i915_ttm_tt *i915_tt;
int ret;
+ if (!obj)
+ return NULL;
+
i915_tt = kzalloc(sizeof(*i915_tt), GFP_KERNEL);
if (!i915_tt)
return NULL;
@@ -196,38 +263,66 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
man->use_tt)
page_flags |= TTM_TT_FLAG_ZERO_ALLOC;
- ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags,
- i915_ttm_select_tt_caching(obj));
- if (ret) {
- kfree(i915_tt);
- return NULL;
+ caching = i915_ttm_select_tt_caching(obj);
+ if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached) {
+ page_flags |= TTM_TT_FLAG_EXTERNAL |
+ TTM_TT_FLAG_EXTERNAL_MAPPABLE;
+ i915_tt->is_shmem = true;
}
+ ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, caching);
+ if (ret)
+ goto err_free;
+
+ __i915_refct_sgt_init(&i915_tt->cached_rsgt, bo->base.size,
+ &tt_rsgt_ops);
+
i915_tt->dev = obj->base.dev->dev;
return &i915_tt->ttm;
+
+err_free:
+ kfree(i915_tt);
+ return NULL;
+}
+
+static int i915_ttm_tt_populate(struct ttm_device *bdev,
+ struct ttm_tt *ttm,
+ struct ttm_operation_ctx *ctx)
+{
+ struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+
+ if (i915_tt->is_shmem)
+ return i915_ttm_tt_shmem_populate(bdev, ttm, ctx);
+
+ return ttm_pool_alloc(&bdev->pool, ttm, ctx);
}
static void i915_ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
{
struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+ struct sg_table *st = &i915_tt->cached_rsgt.table;
- if (i915_tt->cached_st) {
- dma_unmap_sgtable(i915_tt->dev, i915_tt->cached_st,
- DMA_BIDIRECTIONAL, 0);
- sg_free_table(i915_tt->cached_st);
- kfree(i915_tt->cached_st);
- i915_tt->cached_st = NULL;
+ if (st->sgl)
+ dma_unmap_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, 0);
+
+ if (i915_tt->is_shmem) {
+ i915_ttm_tt_shmem_unpopulate(ttm);
+ } else {
+ sg_free_table(st);
+ ttm_pool_free(&bdev->pool, ttm);
}
- ttm_pool_free(&bdev->pool, ttm);
}
static void i915_ttm_tt_destroy(struct ttm_device *bdev, struct ttm_tt *ttm)
{
struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+ if (i915_tt->filp)
+ fput(i915_tt->filp);
+
ttm_tt_fini(ttm);
- kfree(i915_tt);
+ i915_refct_sgt_put(&i915_tt->cached_rsgt);
}
static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo,
@@ -235,6 +330,17 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo,
{
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+ if (!obj)
+ return false;
+
+ /*
+ * EXTERNAL objects should never be swapped out by TTM, instead we need
+ * to handle that ourselves. TTM will already skip such objects for us,
+ * but we would like to avoid grabbing locks for no good reason.
+ */
+ if (bo->ttm && bo->ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
+ return false;
+
/* Will do for now. Our pinned objects are still on TTM's LRU lists */
return i915_gem_object_evictable(obj);
}
@@ -245,28 +351,19 @@ static void i915_ttm_evict_flags(struct ttm_buffer_object *bo,
*placement = i915_sys_placement;
}
-static int i915_ttm_move_notify(struct ttm_buffer_object *bo)
-{
- struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
- int ret;
-
- ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
- if (ret)
- return ret;
-
- ret = __i915_gem_object_put_pages(obj);
- if (ret)
- return ret;
-
- return 0;
-}
-
-static void i915_ttm_free_cached_io_st(struct drm_i915_gem_object *obj)
+/**
+ * i915_ttm_free_cached_io_rsgt - Free object cached LMEM information
+ * @obj: The GEM object
+ * This function frees any LMEM-related information that is cached on
+ * the object. For example the radix tree for fast page lookup and the
+ * cached refcounted sg-table
+ */
+void i915_ttm_free_cached_io_rsgt(struct drm_i915_gem_object *obj)
{
struct radix_tree_iter iter;
void __rcu **slot;
- if (!obj->ttm.cached_io_st)
+ if (!obj->ttm.cached_io_rsgt)
return;
rcu_read_lock();
@@ -274,93 +371,106 @@ static void i915_ttm_free_cached_io_st(struct drm_i915_gem_object *obj)
radix_tree_delete(&obj->ttm.get_io_page.radix, iter.index);
rcu_read_unlock();
- sg_free_table(obj->ttm.cached_io_st);
- kfree(obj->ttm.cached_io_st);
- obj->ttm.cached_io_st = NULL;
+ i915_refct_sgt_put(obj->ttm.cached_io_rsgt);
+ obj->ttm.cached_io_rsgt = NULL;
}
-static void
-i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj)
+/**
+ * i915_ttm_purge - Clear an object of its memory
+ * @obj: The object
+ *
+ * This function is called to clear an object of it's memory when it is
+ * marked as not needed anymore.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int i915_ttm_purge(struct drm_i915_gem_object *obj)
{
struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+ struct i915_ttm_tt *i915_tt =
+ container_of(bo->ttm, typeof(*i915_tt), ttm);
+ struct ttm_operation_ctx ctx = {
+ .interruptible = true,
+ .no_wait_gpu = false,
+ };
+ struct ttm_placement place = {};
+ int ret;
- if (cpu_maps_iomem(bo->resource) || bo->ttm->caching != ttm_cached) {
- obj->write_domain = I915_GEM_DOMAIN_WC;
- obj->read_domains = I915_GEM_DOMAIN_WC;
- } else {
- obj->write_domain = I915_GEM_DOMAIN_CPU;
- obj->read_domains = I915_GEM_DOMAIN_CPU;
- }
-}
+ if (obj->mm.madv == __I915_MADV_PURGED)
+ return 0;
-static void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj)
-{
- struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
- unsigned int cache_level;
- unsigned int i;
+ ret = ttm_bo_validate(bo, &place, &ctx);
+ if (ret)
+ return ret;
- /*
- * If object was moved to an allowable region, update the object
- * region to consider it migrated. Note that if it's currently not
- * in an allowable region, it's evicted and we don't update the
- * object region.
- */
- if (intel_region_to_ttm_type(obj->mm.region) != bo->resource->mem_type) {
- for (i = 0; i < obj->mm.n_placements; ++i) {
- struct intel_memory_region *mr = obj->mm.placements[i];
-
- if (intel_region_to_ttm_type(mr) == bo->resource->mem_type &&
- mr != obj->mm.region) {
- i915_gem_object_release_memory_region(obj);
- i915_gem_object_init_memory_region(obj, mr);
- break;
- }
- }
+ if (bo->ttm && i915_tt->filp) {
+ /*
+ * The below fput(which eventually calls shmem_truncate) might
+ * be delayed by worker, so when directly called to purge the
+ * pages(like by the shrinker) we should try to be more
+ * aggressive and release the pages immediately.
+ */
+ shmem_truncate_range(file_inode(i915_tt->filp),
+ 0, (loff_t)-1);
+ fput(fetch_and_zero(&i915_tt->filp));
}
- obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM);
-
- obj->mem_flags |= cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM :
- I915_BO_FLAG_STRUCT_PAGE;
+ obj->write_domain = 0;
+ obj->read_domains = 0;
+ i915_ttm_adjust_gem_after_move(obj);
+ i915_ttm_free_cached_io_rsgt(obj);
+ obj->mm.madv = __I915_MADV_PURGED;
- cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource,
- bo->ttm);
- i915_gem_object_set_cache_coherency(obj, cache_level);
+ return 0;
}
-static void i915_ttm_purge(struct drm_i915_gem_object *obj)
+static int i915_ttm_shrinker_release_pages(struct drm_i915_gem_object *obj,
+ bool no_wait_gpu,
+ bool should_writeback)
{
struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+ struct i915_ttm_tt *i915_tt =
+ container_of(bo->ttm, typeof(*i915_tt), ttm);
struct ttm_operation_ctx ctx = {
.interruptible = true,
- .no_wait_gpu = false,
+ .no_wait_gpu = no_wait_gpu,
};
struct ttm_placement place = {};
int ret;
- if (obj->mm.madv == __I915_MADV_PURGED)
- return;
+ if (!bo->ttm || bo->resource->mem_type != TTM_PL_SYSTEM)
+ return 0;
+
+ GEM_BUG_ON(!i915_tt->is_shmem);
+
+ if (!i915_tt->filp)
+ return 0;
+
+ ret = ttm_bo_wait_ctx(bo, &ctx);
+ if (ret)
+ return ret;
+
+ switch (obj->mm.madv) {
+ case I915_MADV_DONTNEED:
+ return i915_ttm_purge(obj);
+ case __I915_MADV_PURGED:
+ return 0;
+ }
+
+ if (bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED)
+ return 0;
- /* TTM's purge interface. Note that we might be reentering. */
+ bo->ttm->page_flags |= TTM_TT_FLAG_SWAPPED;
ret = ttm_bo_validate(bo, &place, &ctx);
- if (!ret) {
- obj->write_domain = 0;
- obj->read_domains = 0;
- i915_ttm_adjust_gem_after_move(obj);
- i915_ttm_free_cached_io_st(obj);
- obj->mm.madv = __I915_MADV_PURGED;
+ if (ret) {
+ bo->ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED;
+ return ret;
}
-}
-static void i915_ttm_swap_notify(struct ttm_buffer_object *bo)
-{
- struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
- int ret = i915_ttm_move_notify(bo);
+ if (should_writeback)
+ __shmem_writeback(obj->base.size, i915_tt->filp->f_mapping);
- GEM_WARN_ON(ret);
- GEM_WARN_ON(obj->ttm.cached_io_st);
- if (!ret && obj->mm.madv != I915_MADV_WILLNEED)
- i915_ttm_purge(obj);
+ return 0;
}
static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo)
@@ -369,232 +479,115 @@ static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo)
if (likely(obj)) {
__i915_gem_object_pages_fini(obj);
- i915_ttm_free_cached_io_st(obj);
+ i915_ttm_free_cached_io_rsgt(obj);
}
}
-static struct intel_memory_region *
-i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type)
-{
- struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
-
- /* There's some room for optimization here... */
- GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM &&
- ttm_mem_type < I915_PL_LMEM0);
- if (ttm_mem_type == I915_PL_SYSTEM)
- return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM,
- 0);
-
- return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL,
- ttm_mem_type - I915_PL_LMEM0);
-}
-
-static struct sg_table *i915_ttm_tt_get_st(struct ttm_tt *ttm)
+static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt *ttm)
{
struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
struct sg_table *st;
int ret;
- if (i915_tt->cached_st)
- return i915_tt->cached_st;
-
- st = kzalloc(sizeof(*st), GFP_KERNEL);
- if (!st)
- return ERR_PTR(-ENOMEM);
+ if (i915_tt->cached_rsgt.table.sgl)
+ return i915_refct_sgt_get(&i915_tt->cached_rsgt);
+ st = &i915_tt->cached_rsgt.table;
ret = sg_alloc_table_from_pages_segment(st,
ttm->pages, ttm->num_pages,
0, (unsigned long)ttm->num_pages << PAGE_SHIFT,
i915_sg_segment_size(), GFP_KERNEL);
if (ret) {
- kfree(st);
+ st->sgl = NULL;
return ERR_PTR(ret);
}
ret = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, 0);
if (ret) {
sg_free_table(st);
- kfree(st);
return ERR_PTR(ret);
}
- i915_tt->cached_st = st;
- return st;
+ return i915_refct_sgt_get(&i915_tt->cached_rsgt);
}
-static struct sg_table *
+/**
+ * i915_ttm_resource_get_st - Get a refcounted sg-table pointing to the
+ * resource memory
+ * @obj: The GEM object used for sg-table caching
+ * @res: The struct ttm_resource for which an sg-table is requested.
+ *
+ * This function returns a refcounted sg-table representing the memory
+ * pointed to by @res. If @res is the object's current resource it may also
+ * cache the sg_table on the object or attempt to access an already cached
+ * sg-table. The refcounted sg-table needs to be put when no-longer in use.
+ *
+ * Return: A valid pointer to a struct i915_refct_sgt or error pointer on
+ * failure.
+ */
+struct i915_refct_sgt *
i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
struct ttm_resource *res)
{
struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
- if (!gpu_binds_iomem(res))
+ if (!i915_ttm_gtt_binds_lmem(res))
return i915_ttm_tt_get_st(bo->ttm);
/*
* If CPU mapping differs, we need to add the ttm_tt pages to
* the resulting st. Might make sense for GGTT.
*/
- GEM_WARN_ON(!cpu_maps_iomem(res));
- return intel_region_ttm_resource_to_st(obj->mm.region, res);
-}
-
-static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
- bool clear,
- struct ttm_resource *dst_mem,
- struct ttm_tt *dst_ttm,
- struct sg_table *dst_st)
-{
- struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915),
- bdev);
- struct ttm_resource_manager *src_man =
- ttm_manager_type(bo->bdev, bo->resource->mem_type);
- struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
- struct sg_table *src_st;
- struct i915_request *rq;
- struct ttm_tt *src_ttm = bo->ttm;
- enum i915_cache_level src_level, dst_level;
- int ret;
+ GEM_WARN_ON(!i915_ttm_cpu_maps_iomem(res));
+ if (bo->resource == res) {
+ if (!obj->ttm.cached_io_rsgt) {
+ struct i915_refct_sgt *rsgt;
- if (!i915->gt.migrate.context || intel_gt_is_wedged(&i915->gt))
- return -EINVAL;
+ rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region,
+ res);
+ if (IS_ERR(rsgt))
+ return rsgt;
- dst_level = i915_ttm_cache_level(i915, dst_mem, dst_ttm);
- if (clear) {
- if (bo->type == ttm_bo_type_kernel)
- return -EINVAL;
-
- intel_engine_pm_get(i915->gt.migrate.context->engine);
- ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL,
- dst_st->sgl, dst_level,
- gpu_binds_iomem(dst_mem),
- 0, &rq);
-
- if (!ret && rq) {
- i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
- i915_request_put(rq);
- }
- intel_engine_pm_put(i915->gt.migrate.context->engine);
- } else {
- src_st = src_man->use_tt ? i915_ttm_tt_get_st(src_ttm) :
- obj->ttm.cached_io_st;
-
- src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm);
- intel_engine_pm_get(i915->gt.migrate.context->engine);
- ret = intel_context_migrate_copy(i915->gt.migrate.context,
- NULL, src_st->sgl, src_level,
- gpu_binds_iomem(bo->resource),
- dst_st->sgl, dst_level,
- gpu_binds_iomem(dst_mem),
- &rq);
- if (!ret && rq) {
- i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
- i915_request_put(rq);
+ obj->ttm.cached_io_rsgt = rsgt;
}
- intel_engine_pm_put(i915->gt.migrate.context->engine);
+ return i915_refct_sgt_get(obj->ttm.cached_io_rsgt);
}
- return ret;
+ return intel_region_ttm_resource_to_rsgt(obj->mm.region, res);
}
-static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
- struct ttm_resource *dst_mem,
- struct ttm_tt *dst_ttm,
- struct sg_table *dst_st,
- bool allow_accel)
+static int i915_ttm_truncate(struct drm_i915_gem_object *obj)
{
- int ret = -EINVAL;
+ struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+ int err;
- if (allow_accel)
- ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm, dst_st);
- if (ret) {
- struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
- struct intel_memory_region *dst_reg, *src_reg;
- union {
- struct ttm_kmap_iter_tt tt;
- struct ttm_kmap_iter_iomap io;
- } _dst_iter, _src_iter;
- struct ttm_kmap_iter *dst_iter, *src_iter;
-
- dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
- src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type);
- GEM_BUG_ON(!dst_reg || !src_reg);
-
- dst_iter = !cpu_maps_iomem(dst_mem) ?
- ttm_kmap_iter_tt_init(&_dst_iter.tt, dst_ttm) :
- ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap,
- dst_st, dst_reg->region.start);
-
- src_iter = !cpu_maps_iomem(bo->resource) ?
- ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) :
- ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap,
- obj->ttm.cached_io_st,
- src_reg->region.start);
-
- ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter);
- }
+ WARN_ON_ONCE(obj->mm.madv == I915_MADV_WILLNEED);
+
+ err = i915_ttm_move_notify(bo);
+ if (err)
+ return err;
+
+ return i915_ttm_purge(obj);
}
-static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
- struct ttm_operation_ctx *ctx,
- struct ttm_resource *dst_mem,
- struct ttm_place *hop)
+static void i915_ttm_swap_notify(struct ttm_buffer_object *bo)
{
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
- struct ttm_resource_manager *dst_man =
- ttm_manager_type(bo->bdev, dst_mem->mem_type);
- struct ttm_tt *ttm = bo->ttm;
- struct sg_table *dst_st;
- bool clear;
int ret;
- /* Sync for now. We could do the actual copy async. */
- ret = ttm_bo_wait_ctx(bo, ctx);
- if (ret)
- return ret;
+ if (!obj)
+ return;
ret = i915_ttm_move_notify(bo);
- if (ret)
- return ret;
-
- if (obj->mm.madv != I915_MADV_WILLNEED) {
+ GEM_WARN_ON(ret);
+ GEM_WARN_ON(obj->ttm.cached_io_rsgt);
+ if (!ret && obj->mm.madv != I915_MADV_WILLNEED)
i915_ttm_purge(obj);
- ttm_resource_free(bo, &dst_mem);
- return 0;
- }
-
- /* Populate ttm with pages if needed. Typically system memory. */
- if (ttm && (dst_man->use_tt || (ttm->page_flags & TTM_TT_FLAG_SWAPPED))) {
- ret = ttm_tt_populate(bo->bdev, ttm, ctx);
- if (ret)
- return ret;
- }
-
- dst_st = i915_ttm_resource_get_st(obj, dst_mem);
- if (IS_ERR(dst_st))
- return PTR_ERR(dst_st);
-
- clear = !cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm));
- if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC)))
- __i915_ttm_move(bo, clear, dst_mem, bo->ttm, dst_st, true);
-
- ttm_bo_move_sync_cleanup(bo, dst_mem);
- i915_ttm_adjust_domains_after_move(obj);
- i915_ttm_free_cached_io_st(obj);
-
- if (gpu_binds_iomem(dst_mem) || cpu_maps_iomem(dst_mem)) {
- obj->ttm.cached_io_st = dst_st;
- obj->ttm.get_io_page.sg_pos = dst_st->sgl;
- obj->ttm.get_io_page.sg_idx = 0;
- }
-
- i915_ttm_adjust_gem_after_move(obj);
- return 0;
}
static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem)
{
- if (!cpu_maps_iomem(mem))
+ if (!i915_ttm_cpu_maps_iomem(mem))
return 0;
mem->bus.caching = ttm_write_combined;
@@ -607,19 +600,26 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
unsigned long page_offset)
{
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
- unsigned long base = obj->mm.region->iomap.base - obj->mm.region->region.start;
struct scatterlist *sg;
+ unsigned long base;
unsigned int ofs;
+ GEM_BUG_ON(!obj);
GEM_WARN_ON(bo->ttm);
+ base = obj->mm.region->iomap.base - obj->mm.region->region.start;
sg = __i915_gem_object_get_sg(obj, &obj->ttm.get_io_page, page_offset, &ofs, true);
return ((base + sg_dma_address(sg)) >> PAGE_SHIFT) + ofs;
}
+/*
+ * All callbacks need to take care not to downcast a struct ttm_buffer_object
+ * without checking its subclass, since it might be a TTM ghost object.
+ */
static struct ttm_device_funcs i915_ttm_bo_driver = {
.ttm_tt_create = i915_ttm_tt_create,
+ .ttm_tt_populate = i915_ttm_tt_populate,
.ttm_tt_unpopulate = i915_ttm_tt_unpopulate,
.ttm_tt_destroy = i915_ttm_tt_destroy,
.eviction_valuable = i915_ttm_eviction_valuable,
@@ -649,7 +649,6 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
.interruptible = true,
.no_wait_gpu = false,
};
- struct sg_table *st;
int real_num_busy;
int ret;
@@ -676,7 +675,6 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
return i915_ttm_err_to_gem(ret);
}
- i915_ttm_adjust_lru(obj);
if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) {
ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx);
if (ret)
@@ -687,14 +685,19 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
}
if (!i915_gem_object_has_pages(obj)) {
- /* Object either has a page vector or is an iomem object */
- st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st;
- if (IS_ERR(st))
- return PTR_ERR(st);
+ struct i915_refct_sgt *rsgt =
+ i915_ttm_resource_get_st(obj, bo->resource);
- __i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl));
+ if (IS_ERR(rsgt))
+ return PTR_ERR(rsgt);
+
+ GEM_BUG_ON(obj->mm.rsgt);
+ obj->mm.rsgt = rsgt;
+ __i915_gem_object_set_pages(obj, &rsgt->table,
+ i915_sg_dma_sizes(rsgt->table.sgl));
}
+ i915_ttm_adjust_lru(obj);
return ret;
}
@@ -766,12 +769,21 @@ static void i915_ttm_put_pages(struct drm_i915_gem_object *obj,
* and shrinkers will move it out if needed.
*/
- i915_ttm_adjust_lru(obj);
+ if (obj->mm.rsgt)
+ i915_refct_sgt_put(fetch_and_zero(&obj->mm.rsgt));
}
-static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
+/**
+ * i915_ttm_adjust_lru - Adjust an object's position on relevant LRU lists.
+ * @obj: The object
+ */
+void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
{
struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+ struct i915_ttm_tt *i915_tt =
+ container_of(bo->ttm, typeof(*i915_tt), ttm);
+ bool shrinkable =
+ bo->ttm && i915_tt->filp && ttm_tt_is_populated(bo->ttm);
/*
* Don't manipulate the TTM LRUs while in TTM bo destruction.
@@ -781,10 +793,53 @@ static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
return;
/*
+ * We skip managing the shrinker LRU in set_pages() and just manage
+ * everything here. This does at least solve the issue with having
+ * temporary shmem mappings(like with evicted lmem) not being visible to
+ * the shrinker. Only our shmem objects are shrinkable, everything else
+ * we keep as unshrinkable.
+ *
+ * To make sure everything plays nice we keep an extra shrink pin in TTM
+ * if the underlying pages are not currently shrinkable. Once we release
+ * our pin, like when the pages are moved to shmem, the pages will then
+ * be added to the shrinker LRU, assuming the caller isn't also holding
+ * a pin.
+ *
+ * TODO: consider maybe also bumping the shrinker list here when we have
+ * already unpinned it, which should give us something more like an LRU.
+ *
+ * TODO: There is a small window of opportunity for this function to
+ * get called from eviction after we've dropped the last GEM refcount,
+ * but before the TTM deleted flag is set on the object. Avoid
+ * adjusting the shrinker list in such cases, since the object is
+ * not available to the shrinker anyway due to its zero refcount.
+ * To fix this properly we should move to a TTM shrinker LRU list for
+ * these objects.
+ */
+ if (kref_get_unless_zero(&obj->base.refcount)) {
+ if (shrinkable != obj->mm.ttm_shrinkable) {
+ if (shrinkable) {
+ if (obj->mm.madv == I915_MADV_WILLNEED)
+ __i915_gem_object_make_shrinkable(obj);
+ else
+ __i915_gem_object_make_purgeable(obj);
+ } else {
+ i915_gem_object_make_unshrinkable(obj);
+ }
+
+ obj->mm.ttm_shrinkable = shrinkable;
+ }
+ i915_gem_object_put(obj);
+ }
+
+ /*
* Put on the correct LRU list depending on the MADV status
*/
spin_lock(&bo->bdev->lru_lock);
- if (obj->mm.madv != I915_MADV_WILLNEED) {
+ if (shrinkable) {
+ /* Try to keep shmem_tt from being considered for shrinking. */
+ bo->priority = TTM_MAX_BO_PRIORITY - 1;
+ } else if (obj->mm.madv != I915_MADV_WILLNEED) {
bo->priority = I915_TTM_PRIO_PURGE;
} else if (!i915_gem_object_has_pages(obj)) {
if (bo->priority < I915_TTM_PRIO_HAS_PAGES)
@@ -823,15 +878,44 @@ static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj)
static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
{
struct vm_area_struct *area = vmf->vma;
- struct drm_i915_gem_object *obj =
- i915_ttm_to_gem(area->vm_private_data);
+ struct ttm_buffer_object *bo = area->vm_private_data;
+ struct drm_device *dev = bo->base.dev;
+ struct drm_i915_gem_object *obj;
+ vm_fault_t ret;
+ int idx;
+
+ obj = i915_ttm_to_gem(bo);
+ if (!obj)
+ return VM_FAULT_SIGBUS;
/* Sanity check that we allow writing into this object */
if (unlikely(i915_gem_object_is_readonly(obj) &&
area->vm_flags & VM_WRITE))
return VM_FAULT_SIGBUS;
- return ttm_bo_vm_fault(vmf);
+ ret = ttm_bo_vm_reserve(bo, vmf);
+ if (ret)
+ return ret;
+
+ if (obj->mm.madv != I915_MADV_WILLNEED) {
+ dma_resv_unlock(bo->base.resv);
+ return VM_FAULT_SIGBUS;
+ }
+
+ if (drm_dev_enter(dev, &idx)) {
+ ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
+ TTM_BO_VM_NUM_PREFAULT);
+ drm_dev_exit(idx);
+ } else {
+ ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
+ }
+ if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+ return ret;
+
+ i915_ttm_adjust_lru(obj);
+
+ dma_resv_unlock(bo->base.resv);
+ return ret;
}
static int
@@ -880,16 +964,27 @@ static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj)
return drm_vma_node_offset_addr(&obj->base.vma_node);
}
+static void i915_ttm_unmap_virtual(struct drm_i915_gem_object *obj)
+{
+ ttm_bo_unmap_virtual(i915_gem_to_ttm(obj));
+}
+
static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = {
.name = "i915_gem_object_ttm",
+ .flags = I915_GEM_OBJECT_IS_SHRINKABLE |
+ I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST,
.get_pages = i915_ttm_get_pages,
.put_pages = i915_ttm_put_pages,
- .truncate = i915_ttm_purge,
+ .truncate = i915_ttm_truncate,
+ .shrinker_release_pages = i915_ttm_shrinker_release_pages,
+
.adjust_lru = i915_ttm_adjust_lru,
.delayed_free = i915_ttm_delayed_free,
.migrate = i915_ttm_migrate,
+
.mmap_offset = i915_ttm_mmap_offset,
+ .unmap_virtual = i915_ttm_unmap_virtual,
.mmap_ops = &vm_ops_ttm,
};
@@ -901,6 +996,18 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo)
mutex_destroy(&obj->ttm.get_io_page.lock);
if (obj->ttm.created) {
+ /*
+ * We freely manage the shrinker LRU outide of the mm.pages life
+ * cycle. As a result when destroying the object we should be
+ * extra paranoid and ensure we remove it from the LRU, before
+ * we free the object.
+ *
+ * Touching the ttm_shrinkable outside of the object lock here
+ * should be safe now that the last GEM object ref was dropped.
+ */
+ if (obj->mm.ttm_shrinkable)
+ i915_gem_object_make_unshrinkable(obj);
+
i915_ttm_backup_free(obj);
/* This releases all gem object bindings to the backend. */
@@ -940,10 +1047,9 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
i915_gem_object_init(obj, &i915_gem_ttm_obj_ops, &lock_class, flags);
/* Don't put on a region list until we're either locked or fully initialized. */
- obj->mm.region = intel_memory_region_get(mem);
+ obj->mm.region = mem;
INIT_LIST_HEAD(&obj->mm.region_link);
- i915_gem_object_make_unshrinkable(obj);
INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN);
mutex_init(&obj->ttm.get_io_page.lock);
bo_type = (obj->flags & I915_BO_ALLOC_USER) ? ttm_bo_type_device :
@@ -955,6 +1061,14 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
GEM_BUG_ON(page_size && obj->mm.n_placements);
/*
+ * Keep an extra shrink pin to prevent the object from being made
+ * shrinkable too early. If the ttm_tt is ever allocated in shmem, we
+ * drop the pin. The TTM backend manages the shrinker LRU itself,
+ * outside of the normal mm.pages life cycle.
+ */
+ i915_gem_object_make_unshrinkable(obj);
+
+ /*
* If this function fails, it will call the destructor, but
* our caller still owns the object. So no freeing in the
* destructor until obj->ttm.created is true.
@@ -980,6 +1094,7 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
static const struct intel_memory_region_ops ttm_system_region_ops = {
.init_object = __i915_gem_ttm_object_init,
+ .release = intel_region_ttm_fini,
};
struct intel_memory_region *
@@ -999,50 +1114,3 @@ i915_gem_ttm_system_setup(struct drm_i915_private *i915,
intel_memory_region_set_name(mr, "system-ttm");
return mr;
}
-
-/**
- * i915_gem_obj_copy_ttm - Copy the contents of one ttm-based gem object to
- * another
- * @dst: The destination object
- * @src: The source object
- * @allow_accel: Allow using the blitter. Otherwise TTM memcpy is used.
- * @intr: Whether to perform waits interruptible:
- *
- * Note: The caller is responsible for assuring that the underlying
- * TTM objects are populated if needed and locked.
- *
- * Return: Zero on success. Negative error code on error. If @intr == true,
- * then it may return -ERESTARTSYS or -EINTR.
- */
-int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
- struct drm_i915_gem_object *src,
- bool allow_accel, bool intr)
-{
- struct ttm_buffer_object *dst_bo = i915_gem_to_ttm(dst);
- struct ttm_buffer_object *src_bo = i915_gem_to_ttm(src);
- struct ttm_operation_ctx ctx = {
- .interruptible = intr,
- };
- struct sg_table *dst_st;
- int ret;
-
- assert_object_held(dst);
- assert_object_held(src);
-
- /*
- * Sync for now. This will change with async moves.
- */
- ret = ttm_bo_wait_ctx(dst_bo, &ctx);
- if (!ret)
- ret = ttm_bo_wait_ctx(src_bo, &ctx);
- if (ret)
- return ret;
-
- dst_st = gpu_binds_iomem(dst_bo->resource) ?
- dst->ttm.cached_io_st : i915_ttm_tt_get_st(dst_bo->ttm);
-
- __i915_ttm_move(src_bo, false, dst_bo->resource, dst_bo->ttm,
- dst_st, allow_accel);
-
- return 0;
-}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
index 0b7291dd897c..9d698ad00853 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
@@ -5,6 +5,8 @@
#ifndef _I915_GEM_TTM_H_
#define _I915_GEM_TTM_H_
+#include <drm/ttm/ttm_placement.h>
+
#include "gem/i915_gem_object_types.h"
/**
@@ -35,7 +37,7 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo);
static inline struct drm_i915_gem_object *
i915_ttm_to_gem(struct ttm_buffer_object *bo)
{
- if (GEM_WARN_ON(bo->destroy != i915_ttm_bo_destroy))
+ if (bo->destroy != i915_ttm_bo_destroy)
return NULL;
return container_of(bo, struct drm_i915_gem_object, __do_not_access);
@@ -47,10 +49,6 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
resource_size_t page_size,
unsigned int flags);
-int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
- struct drm_i915_gem_object *src,
- bool allow_accel, bool intr);
-
/* Internal I915 TTM declarations and definitions below. */
#define I915_PL_LMEM0 TTM_PL_PRIV
@@ -60,4 +58,37 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
struct ttm_placement *i915_ttm_sys_placement(void);
+void i915_ttm_free_cached_io_rsgt(struct drm_i915_gem_object *obj);
+
+struct i915_refct_sgt *
+i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
+ struct ttm_resource *res);
+
+void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj);
+
+int i915_ttm_purge(struct drm_i915_gem_object *obj);
+
+/**
+ * i915_ttm_gtt_binds_lmem - Should the memory be viewed as LMEM by the GTT?
+ * @mem: struct ttm_resource representing the memory.
+ *
+ * Return: true if memory should be viewed as LMEM for GTT binding purposes,
+ * false otherwise.
+ */
+static inline bool i915_ttm_gtt_binds_lmem(struct ttm_resource *mem)
+{
+ return mem->mem_type != I915_PL_SYSTEM;
+}
+
+/**
+ * i915_ttm_cpu_maps_iomem - Should the memory be viewed as IOMEM by the CPU?
+ * @mem: struct ttm_resource representing the memory.
+ *
+ * Return: true if memory should be viewed as IOMEM for CPU mapping purposes.
+ */
+static inline bool i915_ttm_cpu_maps_iomem(struct ttm_resource *mem)
+{
+ /* Once / if we support GGTT, this is also false for cached ttm_tts */
+ return mem->mem_type != I915_PL_SYSTEM;
+}
#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
new file mode 100644
index 000000000000..ee9612a3ee5e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -0,0 +1,627 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <drm/ttm/ttm_bo_driver.h>
+
+#include "i915_deps.h"
+#include "i915_drv.h"
+#include "intel_memory_region.h"
+#include "intel_region_ttm.h"
+
+#include "gem/i915_gem_object.h"
+#include "gem/i915_gem_region.h"
+#include "gem/i915_gem_ttm.h"
+#include "gem/i915_gem_ttm_move.h"
+
+#include "gt/intel_engine_pm.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_migrate.h"
+
+/**
+ * DOC: Selftest failure modes for failsafe migration:
+ *
+ * For fail_gpu_migration, the gpu blit scheduled is always a clear blit
+ * rather than a copy blit, and then we force the failure paths as if
+ * the blit fence returned an error.
+ *
+ * For fail_work_allocation we fail the kmalloc of the async worker, we
+ * sync the gpu blit. If it then fails, or fail_gpu_migration is set to
+ * true, then a memcpy operation is performed sync.
+ */
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+static bool fail_gpu_migration;
+static bool fail_work_allocation;
+
+void i915_ttm_migrate_set_failure_modes(bool gpu_migration,
+ bool work_allocation)
+{
+ fail_gpu_migration = gpu_migration;
+ fail_work_allocation = work_allocation;
+}
+#endif
+
+static enum i915_cache_level
+i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res,
+ struct ttm_tt *ttm)
+{
+ return ((HAS_LLC(i915) || HAS_SNOOP(i915)) &&
+ !i915_ttm_gtt_binds_lmem(res) &&
+ ttm->caching == ttm_cached) ? I915_CACHE_LLC :
+ I915_CACHE_NONE;
+}
+
+static struct intel_memory_region *
+i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type)
+{
+ struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
+
+ /* There's some room for optimization here... */
+ GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM &&
+ ttm_mem_type < I915_PL_LMEM0);
+ if (ttm_mem_type == I915_PL_SYSTEM)
+ return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM,
+ 0);
+
+ return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL,
+ ttm_mem_type - I915_PL_LMEM0);
+}
+
+/**
+ * i915_ttm_adjust_domains_after_move - Adjust the GEM domains after a
+ * TTM move
+ * @obj: The gem object
+ */
+void i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj)
+{
+ struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+
+ if (i915_ttm_cpu_maps_iomem(bo->resource) || bo->ttm->caching != ttm_cached) {
+ obj->write_domain = I915_GEM_DOMAIN_WC;
+ obj->read_domains = I915_GEM_DOMAIN_WC;
+ } else {
+ obj->write_domain = I915_GEM_DOMAIN_CPU;
+ obj->read_domains = I915_GEM_DOMAIN_CPU;
+ }
+}
+
+/**
+ * i915_ttm_adjust_gem_after_move - Adjust the GEM state after a TTM move
+ * @obj: The gem object
+ *
+ * Adjusts the GEM object's region, mem_flags and cache coherency after a
+ * TTM move.
+ */
+void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj)
+{
+ struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+ unsigned int cache_level;
+ unsigned int i;
+
+ /*
+ * If object was moved to an allowable region, update the object
+ * region to consider it migrated. Note that if it's currently not
+ * in an allowable region, it's evicted and we don't update the
+ * object region.
+ */
+ if (intel_region_to_ttm_type(obj->mm.region) != bo->resource->mem_type) {
+ for (i = 0; i < obj->mm.n_placements; ++i) {
+ struct intel_memory_region *mr = obj->mm.placements[i];
+
+ if (intel_region_to_ttm_type(mr) == bo->resource->mem_type &&
+ mr != obj->mm.region) {
+ i915_gem_object_release_memory_region(obj);
+ i915_gem_object_init_memory_region(obj, mr);
+ break;
+ }
+ }
+ }
+
+ obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM);
+
+ obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM :
+ I915_BO_FLAG_STRUCT_PAGE;
+
+ cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource,
+ bo->ttm);
+ i915_gem_object_set_cache_coherency(obj, cache_level);
+}
+
+/**
+ * i915_ttm_move_notify - Prepare an object for move
+ * @bo: The ttm buffer object.
+ *
+ * This function prepares an object for move by removing all GPU bindings,
+ * removing all CPU mapings and finally releasing the pages sg-table.
+ *
+ * Return: 0 if successful, negative error code on error.
+ */
+int i915_ttm_move_notify(struct ttm_buffer_object *bo)
+{
+ struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+ int ret;
+
+ ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
+ if (ret)
+ return ret;
+
+ ret = __i915_gem_object_put_pages(obj);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
+ bool clear,
+ struct ttm_resource *dst_mem,
+ struct ttm_tt *dst_ttm,
+ struct sg_table *dst_st,
+ const struct i915_deps *deps)
+{
+ struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915),
+ bdev);
+ struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+ struct i915_request *rq;
+ struct ttm_tt *src_ttm = bo->ttm;
+ enum i915_cache_level src_level, dst_level;
+ int ret;
+
+ if (!to_gt(i915)->migrate.context || intel_gt_is_wedged(to_gt(i915)))
+ return ERR_PTR(-EINVAL);
+
+ /* With fail_gpu_migration, we always perform a GPU clear. */
+ if (I915_SELFTEST_ONLY(fail_gpu_migration))
+ clear = true;
+
+ dst_level = i915_ttm_cache_level(i915, dst_mem, dst_ttm);
+ if (clear) {
+ if (bo->type == ttm_bo_type_kernel &&
+ !I915_SELFTEST_ONLY(fail_gpu_migration))
+ return ERR_PTR(-EINVAL);
+
+ intel_engine_pm_get(to_gt(i915)->migrate.context->engine);
+ ret = intel_context_migrate_clear(to_gt(i915)->migrate.context, deps,
+ dst_st->sgl, dst_level,
+ i915_ttm_gtt_binds_lmem(dst_mem),
+ 0, &rq);
+ } else {
+ struct i915_refct_sgt *src_rsgt =
+ i915_ttm_resource_get_st(obj, bo->resource);
+
+ if (IS_ERR(src_rsgt))
+ return ERR_CAST(src_rsgt);
+
+ src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm);
+ intel_engine_pm_get(to_gt(i915)->migrate.context->engine);
+ ret = intel_context_migrate_copy(to_gt(i915)->migrate.context,
+ deps, src_rsgt->table.sgl,
+ src_level,
+ i915_ttm_gtt_binds_lmem(bo->resource),
+ dst_st->sgl, dst_level,
+ i915_ttm_gtt_binds_lmem(dst_mem),
+ &rq);
+
+ i915_refct_sgt_put(src_rsgt);
+ }
+
+ intel_engine_pm_put(to_gt(i915)->migrate.context->engine);
+
+ if (ret && rq) {
+ i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
+ i915_request_put(rq);
+ }
+
+ return ret ? ERR_PTR(ret) : &rq->fence;
+}
+
+/**
+ * struct i915_ttm_memcpy_arg - argument for the bo memcpy functionality.
+ * @_dst_iter: Storage space for the destination kmap iterator.
+ * @_src_iter: Storage space for the source kmap iterator.
+ * @dst_iter: Pointer to the destination kmap iterator.
+ * @src_iter: Pointer to the source kmap iterator.
+ * @clear: Whether to clear instead of copy.
+ * @src_rsgt: Refcounted scatter-gather list of source memory.
+ * @dst_rsgt: Refcounted scatter-gather list of destination memory.
+ */
+struct i915_ttm_memcpy_arg {
+ union {
+ struct ttm_kmap_iter_tt tt;
+ struct ttm_kmap_iter_iomap io;
+ } _dst_iter,
+ _src_iter;
+ struct ttm_kmap_iter *dst_iter;
+ struct ttm_kmap_iter *src_iter;
+ unsigned long num_pages;
+ bool clear;
+ struct i915_refct_sgt *src_rsgt;
+ struct i915_refct_sgt *dst_rsgt;
+};
+
+/**
+ * struct i915_ttm_memcpy_work - Async memcpy worker under a dma-fence.
+ * @fence: The dma-fence.
+ * @work: The work struct use for the memcpy work.
+ * @lock: The fence lock. Not used to protect anything else ATM.
+ * @irq_work: Low latency worker to signal the fence since it can't be done
+ * from the callback for lockdep reasons.
+ * @cb: Callback for the accelerated migration fence.
+ * @arg: The argument for the memcpy functionality.
+ */
+struct i915_ttm_memcpy_work {
+ struct dma_fence fence;
+ struct work_struct work;
+ /* The fence lock */
+ spinlock_t lock;
+ struct irq_work irq_work;
+ struct dma_fence_cb cb;
+ struct i915_ttm_memcpy_arg arg;
+};
+
+static void i915_ttm_move_memcpy(struct i915_ttm_memcpy_arg *arg)
+{
+ ttm_move_memcpy(arg->clear, arg->num_pages,
+ arg->dst_iter, arg->src_iter);
+}
+
+static void i915_ttm_memcpy_init(struct i915_ttm_memcpy_arg *arg,
+ struct ttm_buffer_object *bo, bool clear,
+ struct ttm_resource *dst_mem,
+ struct ttm_tt *dst_ttm,
+ struct i915_refct_sgt *dst_rsgt)
+{
+ struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+ struct intel_memory_region *dst_reg, *src_reg;
+
+ dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
+ src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type);
+ GEM_BUG_ON(!dst_reg || !src_reg);
+
+ arg->dst_iter = !i915_ttm_cpu_maps_iomem(dst_mem) ?
+ ttm_kmap_iter_tt_init(&arg->_dst_iter.tt, dst_ttm) :
+ ttm_kmap_iter_iomap_init(&arg->_dst_iter.io, &dst_reg->iomap,
+ &dst_rsgt->table, dst_reg->region.start);
+
+ arg->src_iter = !i915_ttm_cpu_maps_iomem(bo->resource) ?
+ ttm_kmap_iter_tt_init(&arg->_src_iter.tt, bo->ttm) :
+ ttm_kmap_iter_iomap_init(&arg->_src_iter.io, &src_reg->iomap,
+ &obj->ttm.cached_io_rsgt->table,
+ src_reg->region.start);
+ arg->clear = clear;
+ arg->num_pages = bo->base.size >> PAGE_SHIFT;
+
+ arg->dst_rsgt = i915_refct_sgt_get(dst_rsgt);
+ arg->src_rsgt = clear ? NULL :
+ i915_ttm_resource_get_st(obj, bo->resource);
+}
+
+static void i915_ttm_memcpy_release(struct i915_ttm_memcpy_arg *arg)
+{
+ i915_refct_sgt_put(arg->src_rsgt);
+ i915_refct_sgt_put(arg->dst_rsgt);
+}
+
+static void __memcpy_work(struct work_struct *work)
+{
+ struct i915_ttm_memcpy_work *copy_work =
+ container_of(work, typeof(*copy_work), work);
+ struct i915_ttm_memcpy_arg *arg = &copy_work->arg;
+ bool cookie = dma_fence_begin_signalling();
+
+ i915_ttm_move_memcpy(arg);
+ dma_fence_end_signalling(cookie);
+
+ dma_fence_signal(&copy_work->fence);
+
+ i915_ttm_memcpy_release(arg);
+ dma_fence_put(&copy_work->fence);
+}
+
+static void __memcpy_irq_work(struct irq_work *irq_work)
+{
+ struct i915_ttm_memcpy_work *copy_work =
+ container_of(irq_work, typeof(*copy_work), irq_work);
+ struct i915_ttm_memcpy_arg *arg = &copy_work->arg;
+
+ dma_fence_signal(&copy_work->fence);
+ i915_ttm_memcpy_release(arg);
+ dma_fence_put(&copy_work->fence);
+}
+
+static void __memcpy_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+ struct i915_ttm_memcpy_work *copy_work =
+ container_of(cb, typeof(*copy_work), cb);
+
+ if (unlikely(fence->error || I915_SELFTEST_ONLY(fail_gpu_migration))) {
+ INIT_WORK(&copy_work->work, __memcpy_work);
+ queue_work(system_unbound_wq, &copy_work->work);
+ } else {
+ init_irq_work(&copy_work->irq_work, __memcpy_irq_work);
+ irq_work_queue(&copy_work->irq_work);
+ }
+}
+
+static const char *get_driver_name(struct dma_fence *fence)
+{
+ return "i915_ttm_memcpy_work";
+}
+
+static const char *get_timeline_name(struct dma_fence *fence)
+{
+ return "unbound";
+}
+
+static const struct dma_fence_ops dma_fence_memcpy_ops = {
+ .get_driver_name = get_driver_name,
+ .get_timeline_name = get_timeline_name,
+};
+
+static struct dma_fence *
+i915_ttm_memcpy_work_arm(struct i915_ttm_memcpy_work *work,
+ struct dma_fence *dep)
+{
+ int ret;
+
+ spin_lock_init(&work->lock);
+ dma_fence_init(&work->fence, &dma_fence_memcpy_ops, &work->lock, 0, 0);
+ dma_fence_get(&work->fence);
+ ret = dma_fence_add_callback(dep, &work->cb, __memcpy_cb);
+ if (ret) {
+ if (ret != -ENOENT)
+ dma_fence_wait(dep, false);
+
+ return ERR_PTR(I915_SELFTEST_ONLY(fail_gpu_migration) ? -EINVAL :
+ dep->error);
+ }
+
+ return &work->fence;
+}
+
+static struct dma_fence *
+__i915_ttm_move(struct ttm_buffer_object *bo,
+ const struct ttm_operation_ctx *ctx, bool clear,
+ struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm,
+ struct i915_refct_sgt *dst_rsgt, bool allow_accel,
+ const struct i915_deps *move_deps)
+{
+ struct i915_ttm_memcpy_work *copy_work = NULL;
+ struct i915_ttm_memcpy_arg _arg, *arg = &_arg;
+ struct dma_fence *fence = ERR_PTR(-EINVAL);
+
+ if (allow_accel) {
+ fence = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm,
+ &dst_rsgt->table, move_deps);
+
+ /*
+ * We only need to intercept the error when moving to lmem.
+ * When moving to system, TTM or shmem will provide us with
+ * cleared pages.
+ */
+ if (!IS_ERR(fence) && !i915_ttm_gtt_binds_lmem(dst_mem) &&
+ !I915_SELFTEST_ONLY(fail_gpu_migration ||
+ fail_work_allocation))
+ goto out;
+ }
+
+ /* If we've scheduled gpu migration. Try to arm error intercept. */
+ if (!IS_ERR(fence)) {
+ struct dma_fence *dep = fence;
+
+ if (!I915_SELFTEST_ONLY(fail_work_allocation))
+ copy_work = kzalloc(sizeof(*copy_work), GFP_KERNEL);
+
+ if (copy_work) {
+ arg = &copy_work->arg;
+ i915_ttm_memcpy_init(arg, bo, clear, dst_mem, dst_ttm,
+ dst_rsgt);
+ fence = i915_ttm_memcpy_work_arm(copy_work, dep);
+ } else {
+ dma_fence_wait(dep, false);
+ fence = ERR_PTR(I915_SELFTEST_ONLY(fail_gpu_migration) ?
+ -EINVAL : fence->error);
+ }
+ dma_fence_put(dep);
+
+ if (!IS_ERR(fence))
+ goto out;
+ } else if (move_deps) {
+ int err = i915_deps_sync(move_deps, ctx);
+
+ if (err)
+ return ERR_PTR(err);
+ }
+
+ /* Error intercept failed or no accelerated migration to start with */
+ if (!copy_work)
+ i915_ttm_memcpy_init(arg, bo, clear, dst_mem, dst_ttm,
+ dst_rsgt);
+ i915_ttm_move_memcpy(arg);
+ i915_ttm_memcpy_release(arg);
+ kfree(copy_work);
+
+ return NULL;
+out:
+ if (!fence && copy_work) {
+ i915_ttm_memcpy_release(arg);
+ kfree(copy_work);
+ }
+
+ return fence;
+}
+
+static int
+prev_deps(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+ struct i915_deps *deps)
+{
+ int ret;
+
+ ret = i915_deps_add_dependency(deps, bo->moving, ctx);
+ if (!ret)
+ ret = i915_deps_add_resv(deps, bo->base.resv, ctx);
+
+ return ret;
+}
+
+/**
+ * i915_ttm_move - The TTM move callback used by i915.
+ * @bo: The buffer object.
+ * @evict: Whether this is an eviction.
+ * @dst_mem: The destination ttm resource.
+ * @hop: If we need multihop, what temporary memory type to move to.
+ *
+ * Return: 0 if successful, negative error code otherwise.
+ */
+int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
+ struct ttm_operation_ctx *ctx,
+ struct ttm_resource *dst_mem,
+ struct ttm_place *hop)
+{
+ struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+ struct ttm_resource_manager *dst_man =
+ ttm_manager_type(bo->bdev, dst_mem->mem_type);
+ struct dma_fence *migration_fence = NULL;
+ struct ttm_tt *ttm = bo->ttm;
+ struct i915_refct_sgt *dst_rsgt;
+ bool clear;
+ int ret;
+
+ if (GEM_WARN_ON(!obj)) {
+ ttm_bo_move_null(bo, dst_mem);
+ return 0;
+ }
+
+ ret = i915_ttm_move_notify(bo);
+ if (ret)
+ return ret;
+
+ if (obj->mm.madv != I915_MADV_WILLNEED) {
+ i915_ttm_purge(obj);
+ ttm_resource_free(bo, &dst_mem);
+ return 0;
+ }
+
+ /* Populate ttm with pages if needed. Typically system memory. */
+ if (ttm && (dst_man->use_tt || (ttm->page_flags & TTM_TT_FLAG_SWAPPED))) {
+ ret = ttm_tt_populate(bo->bdev, ttm, ctx);
+ if (ret)
+ return ret;
+ }
+
+ dst_rsgt = i915_ttm_resource_get_st(obj, dst_mem);
+ if (IS_ERR(dst_rsgt))
+ return PTR_ERR(dst_rsgt);
+
+ clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm));
+ if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) {
+ struct i915_deps deps;
+
+ i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+ ret = prev_deps(bo, ctx, &deps);
+ if (ret) {
+ i915_refct_sgt_put(dst_rsgt);
+ return ret;
+ }
+
+ migration_fence = __i915_ttm_move(bo, ctx, clear, dst_mem, bo->ttm,
+ dst_rsgt, true, &deps);
+ i915_deps_fini(&deps);
+ }
+
+ /* We can possibly get an -ERESTARTSYS here */
+ if (IS_ERR(migration_fence)) {
+ i915_refct_sgt_put(dst_rsgt);
+ return PTR_ERR(migration_fence);
+ }
+
+ if (migration_fence) {
+ ret = ttm_bo_move_accel_cleanup(bo, migration_fence, evict,
+ true, dst_mem);
+ if (ret) {
+ dma_fence_wait(migration_fence, false);
+ ttm_bo_move_sync_cleanup(bo, dst_mem);
+ }
+ dma_fence_put(migration_fence);
+ } else {
+ ttm_bo_move_sync_cleanup(bo, dst_mem);
+ }
+
+ i915_ttm_adjust_domains_after_move(obj);
+ i915_ttm_free_cached_io_rsgt(obj);
+
+ if (i915_ttm_gtt_binds_lmem(dst_mem) || i915_ttm_cpu_maps_iomem(dst_mem)) {
+ obj->ttm.cached_io_rsgt = dst_rsgt;
+ obj->ttm.get_io_page.sg_pos = dst_rsgt->table.sgl;
+ obj->ttm.get_io_page.sg_idx = 0;
+ } else {
+ i915_refct_sgt_put(dst_rsgt);
+ }
+
+ i915_ttm_adjust_lru(obj);
+ i915_ttm_adjust_gem_after_move(obj);
+ return 0;
+}
+
+/**
+ * i915_gem_obj_copy_ttm - Copy the contents of one ttm-based gem object to
+ * another
+ * @dst: The destination object
+ * @src: The source object
+ * @allow_accel: Allow using the blitter. Otherwise TTM memcpy is used.
+ * @intr: Whether to perform waits interruptible:
+ *
+ * Note: The caller is responsible for assuring that the underlying
+ * TTM objects are populated if needed and locked.
+ *
+ * Return: Zero on success. Negative error code on error. If @intr == true,
+ * then it may return -ERESTARTSYS or -EINTR.
+ */
+int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
+ struct drm_i915_gem_object *src,
+ bool allow_accel, bool intr)
+{
+ struct ttm_buffer_object *dst_bo = i915_gem_to_ttm(dst);
+ struct ttm_buffer_object *src_bo = i915_gem_to_ttm(src);
+ struct ttm_operation_ctx ctx = {
+ .interruptible = intr,
+ };
+ struct i915_refct_sgt *dst_rsgt;
+ struct dma_fence *copy_fence;
+ struct i915_deps deps;
+ int ret;
+
+ assert_object_held(dst);
+ assert_object_held(src);
+ i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+
+ ret = dma_resv_reserve_shared(src_bo->base.resv, 1);
+ if (ret)
+ return ret;
+
+ ret = i915_deps_add_resv(&deps, dst_bo->base.resv, &ctx);
+ if (ret)
+ return ret;
+
+ ret = i915_deps_add_resv(&deps, src_bo->base.resv, &ctx);
+ if (ret)
+ return ret;
+
+ dst_rsgt = i915_ttm_resource_get_st(dst, dst_bo->resource);
+ copy_fence = __i915_ttm_move(src_bo, &ctx, false, dst_bo->resource,
+ dst_bo->ttm, dst_rsgt, allow_accel,
+ &deps);
+
+ i915_deps_fini(&deps);
+ i915_refct_sgt_put(dst_rsgt);
+ if (IS_ERR_OR_NULL(copy_fence))
+ return PTR_ERR_OR_ZERO(copy_fence);
+
+ dma_resv_add_excl_fence(dst_bo->base.resv, copy_fence);
+ dma_resv_add_shared_fence(src_bo->base.resv, copy_fence);
+
+ dma_fence_put(copy_fence);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h
new file mode 100644
index 000000000000..d2e7f149e05c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+#ifndef _I915_GEM_TTM_MOVE_H_
+#define _I915_GEM_TTM_MOVE_H_
+
+#include <linux/types.h>
+
+#include "i915_selftest.h"
+
+struct ttm_buffer_object;
+struct ttm_operation_ctx;
+struct ttm_place;
+struct ttm_resource;
+struct ttm_tt;
+
+struct drm_i915_gem_object;
+struct i915_refct_sgt;
+
+int i915_ttm_move_notify(struct ttm_buffer_object *bo);
+
+I915_SELFTEST_DECLARE(void i915_ttm_migrate_set_failure_modes(bool gpu_migration,
+ bool work_allocation));
+
+int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
+ struct drm_i915_gem_object *src,
+ bool allow_accel, bool intr);
+
+/* Internal I915 TTM declarations and definitions below. */
+
+int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
+ struct ttm_operation_ctx *ctx,
+ struct ttm_resource *dst_mem,
+ struct ttm_place *hop);
+
+void i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj);
+
+void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c
index 3b6d14b5c604..9aad84059d56 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c
@@ -12,6 +12,7 @@
#include "gem/i915_gem_region.h"
#include "gem/i915_gem_ttm.h"
+#include "gem/i915_gem_ttm_move.h"
#include "gem/i915_gem_ttm_pm.h"
/**
@@ -79,6 +80,7 @@ static int i915_ttm_backup(struct i915_gem_apply_to_region *apply,
err = i915_gem_obj_copy_ttm(backup, obj, pm_apply->allow_gpu, false);
GEM_WARN_ON(err);
+ ttm_bo_wait_ctx(backup_bo, &ctx);
obj->ttm.backup = backup;
return 0;
@@ -169,6 +171,7 @@ static int i915_ttm_restore(struct i915_gem_apply_to_region *apply,
err = i915_gem_obj_copy_ttm(obj, backup, pm_apply->allow_gpu,
false);
GEM_WARN_ON(err);
+ ttm_bo_wait_ctx(backup_bo, &ctx);
obj->ttm.backup = NULL;
err = 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 2cb51e3dbb62..6d1a71d6404c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -530,7 +530,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
* On almost all of the older hw, we cannot tell the GPU that
* a page is readonly.
*/
- if (!dev_priv->gt.vm->has_read_only)
+ if (!to_gt(dev_priv)->vm->has_read_only)
return -ENODEV;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
index f11325484110..dab3d30c09a0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
@@ -10,7 +10,6 @@
#include "gt/intel_engine.h"
-#include "dma_resv_utils.h"
#include "i915_gem_ioctls.h"
#include "i915_gem_object.h"
@@ -52,13 +51,6 @@ i915_gem_object_wait_reservation(struct dma_resv *resv,
}
dma_resv_iter_end(&cursor);
- /*
- * Opportunistically prune the fences iff we know they have *all* been
- * signaled.
- */
- if (timeout > 0)
- dma_resv_prune(resv);
-
return ret;
}
@@ -262,6 +254,6 @@ int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj,
unsigned int flags)
{
might_sleep();
- /* NOP for now. */
- return 0;
+
+ return i915_gem_object_wait_moving_fence(obj, !!(flags & I915_WAIT_INTERRUPTIBLE));
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c
index dbdbdc344d87..7271fbf813fa 100644
--- a/drivers/gpu/drm/i915/gem/i915_gemfs.c
+++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c
@@ -12,6 +12,7 @@
int i915_gemfs_init(struct drm_i915_private *i915)
{
+ char huge_opt[] = "huge=within_size"; /* r/w */
struct file_system_type *type;
struct vfsmount *gemfs;
char *opts;
@@ -31,10 +32,8 @@ int i915_gemfs_init(struct drm_i915_private *i915)
*/
opts = NULL;
- if (intel_vtd_active()) {
+ if (intel_vtd_active(i915)) {
if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
- static char huge_opt[] = "huge=within_size"; /* r/w */
-
opts = huge_opt;
drm_info(&i915->drm,
"Transparent Hugepage mode '%s'\n",
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index b2003133deaf..11f0aa65f8a3 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -22,6 +22,22 @@
#include "selftests/mock_region.h"
#include "selftests/i915_random.h"
+static struct i915_gem_context *hugepage_ctx(struct drm_i915_private *i915,
+ struct file *file)
+{
+ struct i915_gem_context *ctx = live_context(i915, file);
+ struct i915_address_space *vm;
+
+ if (IS_ERR(ctx))
+ return ctx;
+
+ vm = ctx->vm;
+ if (vm)
+ WRITE_ONCE(vm->scrub_64K, true);
+
+ return ctx;
+}
+
static const unsigned int page_sizes[] = {
I915_GTT_PAGE_SIZE_2M,
I915_GTT_PAGE_SIZE_64K,
@@ -552,7 +568,7 @@ out_unpin:
out_put:
i915_gem_object_put(obj);
out_region:
- intel_memory_region_put(mem);
+ intel_memory_region_destroy(mem);
return err;
}
@@ -959,6 +975,8 @@ static int igt_mock_ppgtt_64K(void *arg)
__i915_gem_object_put_pages(obj);
i915_gem_object_unlock(obj);
i915_gem_object_put(obj);
+
+ i915_gem_drain_freed_objects(i915);
}
}
@@ -1080,10 +1098,6 @@ static int __igt_write_huge(struct intel_context *ce,
if (IS_ERR(vma))
return PTR_ERR(vma);
- err = i915_vma_unbind(vma);
- if (err)
- return err;
-
err = i915_vma_pin(vma, size, 0, flags | offset);
if (err) {
/*
@@ -1117,7 +1131,7 @@ out_vma_unpin:
return err;
}
-static int igt_write_huge(struct i915_gem_context *ctx,
+static int igt_write_huge(struct drm_i915_private *i915,
struct drm_i915_gem_object *obj)
{
struct i915_gem_engines *engines;
@@ -1127,6 +1141,8 @@ static int igt_write_huge(struct i915_gem_context *ctx,
IGT_TIMEOUT(end_time);
unsigned int max_page_size;
unsigned int count;
+ struct i915_gem_context *ctx;
+ struct file *file;
u64 max;
u64 num;
u64 size;
@@ -1134,6 +1150,16 @@ static int igt_write_huge(struct i915_gem_context *ctx,
int i, n;
int err = 0;
+ file = mock_file(i915);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ ctx = hugepage_ctx(i915, file);
+ if (IS_ERR(ctx)) {
+ err = PTR_ERR(ctx);
+ goto out;
+ }
+
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
size = obj->base.size;
@@ -1153,7 +1179,7 @@ static int igt_write_huge(struct i915_gem_context *ctx,
}
i915_gem_context_unlock_engines(ctx);
if (!n)
- return 0;
+ goto out;
/*
* To keep things interesting when alternating between engines in our
@@ -1215,6 +1241,8 @@ static int igt_write_huge(struct i915_gem_context *ctx,
kfree(order);
+out:
+ fput(file);
return err;
}
@@ -1277,8 +1305,7 @@ static u32 igt_random_size(struct rnd_state *prng,
static int igt_ppgtt_smoke_huge(void *arg)
{
- struct i915_gem_context *ctx = arg;
- struct drm_i915_private *i915 = ctx->i915;
+ struct drm_i915_private *i915 = arg;
struct drm_i915_gem_object *obj;
I915_RND_STATE(prng);
struct {
@@ -1302,6 +1329,7 @@ static int igt_ppgtt_smoke_huge(void *arg)
u32 min = backends[i].min;
u32 max = backends[i].max;
u32 size = max;
+
try_again:
size = igt_random_size(&prng, min, rounddown_pow_of_two(size));
@@ -1336,7 +1364,7 @@ try_again:
goto out_unpin;
}
- err = igt_write_huge(ctx, obj);
+ err = igt_write_huge(i915, obj);
if (err) {
pr_err("%s write-huge failed with size=%u, i=%d\n",
__func__, size, i);
@@ -1363,8 +1391,7 @@ out_put:
static int igt_ppgtt_sanity_check(void *arg)
{
- struct i915_gem_context *ctx = arg;
- struct drm_i915_private *i915 = ctx->i915;
+ struct drm_i915_private *i915 = arg;
unsigned int supported = INTEL_INFO(i915)->page_sizes;
struct {
igt_create_fn fn;
@@ -1431,7 +1458,7 @@ static int igt_ppgtt_sanity_check(void *arg)
if (pages)
obj->mm.page_sizes.sg = pages;
- err = igt_write_huge(ctx, obj);
+ err = igt_write_huge(i915, obj);
i915_gem_object_lock(obj, NULL);
i915_gem_object_unpin_pages(obj);
@@ -1458,15 +1485,27 @@ out:
static int igt_tmpfs_fallback(void *arg)
{
- struct i915_gem_context *ctx = arg;
- struct drm_i915_private *i915 = ctx->i915;
+ struct drm_i915_private *i915 = arg;
+ struct i915_address_space *vm;
+ struct i915_gem_context *ctx;
struct vfsmount *gemfs = i915->mm.gemfs;
- struct i915_address_space *vm = i915_gem_context_get_eb_vm(ctx);
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
+ struct file *file;
u32 *vaddr;
int err = 0;
+ file = mock_file(i915);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ ctx = hugepage_ctx(i915, file);
+ if (IS_ERR(ctx)) {
+ err = PTR_ERR(ctx);
+ goto out;
+ }
+ vm = i915_gem_context_get_eb_vm(ctx);
+
/*
* Make sure that we don't burst into a ball of flames upon falling back
* to tmpfs, which we rely on if on the off-chance we encouter a failure
@@ -1510,33 +1549,47 @@ out_restore:
i915->mm.gemfs = gemfs;
i915_vm_put(vm);
+out:
+ fput(file);
return err;
}
static int igt_shrink_thp(void *arg)
{
- struct i915_gem_context *ctx = arg;
- struct drm_i915_private *i915 = ctx->i915;
- struct i915_address_space *vm = i915_gem_context_get_eb_vm(ctx);
+ struct drm_i915_private *i915 = arg;
+ struct i915_address_space *vm;
+ struct i915_gem_context *ctx;
struct drm_i915_gem_object *obj;
struct i915_gem_engines_iter it;
struct intel_context *ce;
struct i915_vma *vma;
+ struct file *file;
unsigned int flags = PIN_USER;
unsigned int n;
bool should_swap;
- int err = 0;
+ int err;
+
+ if (!igt_can_allocate_thp(i915)) {
+ pr_info("missing THP support, skipping\n");
+ return 0;
+ }
+
+ file = mock_file(i915);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ ctx = hugepage_ctx(i915, file);
+ if (IS_ERR(ctx)) {
+ err = PTR_ERR(ctx);
+ goto out;
+ }
+ vm = i915_gem_context_get_eb_vm(ctx);
/*
* Sanity check shrinking huge-paged object -- make sure nothing blows
* up.
*/
- if (!igt_can_allocate_thp(i915)) {
- pr_info("missing THP support, skipping\n");
- goto out_vm;
- }
-
obj = i915_gem_object_create_shmem(i915, SZ_2M);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
@@ -1626,7 +1679,8 @@ out_put:
i915_gem_object_put(obj);
out_vm:
i915_vm_put(vm);
-
+out:
+ fput(file);
return err;
}
@@ -1651,7 +1705,7 @@ int i915_gem_huge_page_mock_selftests(void)
mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL;
mkwrite_device_info(dev_priv)->ppgtt_size = 48;
- ppgtt = i915_ppgtt_create(&dev_priv->gt, 0);
+ ppgtt = i915_ppgtt_create(to_gt(dev_priv), 0);
if (IS_ERR(ppgtt)) {
err = PTR_ERR(ppgtt);
goto out_unlock;
@@ -1687,36 +1741,14 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
SUBTEST(igt_ppgtt_smoke_huge),
SUBTEST(igt_ppgtt_sanity_check),
};
- struct i915_gem_context *ctx;
- struct i915_address_space *vm;
- struct file *file;
- int err;
if (!HAS_PPGTT(i915)) {
pr_info("PPGTT not supported, skipping live-selftests\n");
return 0;
}
- if (intel_gt_is_wedged(&i915->gt))
+ if (intel_gt_is_wedged(to_gt(i915)))
return 0;
- file = mock_file(i915);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- ctx = live_context(i915, file);
- if (IS_ERR(ctx)) {
- err = PTR_ERR(ctx);
- goto out_file;
- }
-
- vm = ctx->vm;
- if (vm)
- WRITE_ONCE(vm->scrub_64K, true);
-
- err = i915_subtests(tests, ctx);
-
-out_file:
- fput(file);
- return err;
+ return i915_live_subtests(tests, i915);
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index 8402ed925a69..75947e9dada2 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -592,7 +592,7 @@ int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915)
SUBTEST(igt_client_tiled_blits),
};
- if (intel_gt_is_wedged(&i915->gt))
+ if (intel_gt_is_wedged(to_gt(i915)))
return 0;
return i915_live_subtests(tests, i915);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index b32f7fed2d9c..3f41fe5ec9d4 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -88,9 +88,9 @@ static int live_nop_switch(void *arg)
rq = i915_request_get(this);
i915_request_add(this);
}
- if (i915_request_wait(rq, 0, HZ) < 0) {
+ if (i915_request_wait(rq, 0, 10 * HZ) < 0) {
pr_err("Failed to populated %d contexts\n", nctx);
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(to_gt(i915));
i915_request_put(rq);
err = -EIO;
goto out_file;
@@ -146,7 +146,7 @@ static int live_nop_switch(void *arg)
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
pr_err("Switching between %ld contexts timed out\n",
prime);
- intel_gt_set_wedged(&i915->gt);
+ intel_gt_set_wedged(to_gt(i915));
i915_request_put(rq);
break;
}
@@ -1223,7 +1223,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
return 0;
if (flags & TEST_RESET)
- igt_global_reset_lock(&i915->gt);
+ igt_global_reset_lock(to_gt(i915));
obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
if (IS_ERR(obj)) {
@@ -1306,7 +1306,7 @@ out_put:
out_unlock:
if (flags & TEST_RESET)
- igt_global_reset_unlock(&i915->gt);
+ igt_global_reset_unlock(to_gt(i915));
if (ret)
pr_err("%s: Failed with %d!\n", name, ret);
@@ -1481,10 +1481,10 @@ static int check_scratch(struct i915_address_space *vm, u64 offset)
static int write_to_scratch(struct i915_gem_context *ctx,
struct intel_engine_cs *engine,
+ struct drm_i915_gem_object *obj,
u64 offset, u32 value)
{
struct drm_i915_private *i915 = ctx->i915;
- struct drm_i915_gem_object *obj;
struct i915_address_space *vm;
struct i915_request *rq;
struct i915_vma *vma;
@@ -1497,15 +1497,9 @@ static int write_to_scratch(struct i915_gem_context *ctx,
if (err)
return err;
- obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
-
cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
- if (IS_ERR(cmd)) {
- err = PTR_ERR(cmd);
- goto out;
- }
+ if (IS_ERR(cmd))
+ return PTR_ERR(cmd);
*cmd++ = MI_STORE_DWORD_IMM_GEN4;
if (GRAPHICS_VER(i915) >= 8) {
@@ -1569,17 +1563,19 @@ err_unpin:
i915_vma_unpin(vma);
out_vm:
i915_vm_put(vm);
-out:
- i915_gem_object_put(obj);
+
+ if (!err)
+ err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
+
return err;
}
static int read_from_scratch(struct i915_gem_context *ctx,
struct intel_engine_cs *engine,
+ struct drm_i915_gem_object *obj,
u64 offset, u32 *value)
{
struct drm_i915_private *i915 = ctx->i915;
- struct drm_i915_gem_object *obj;
struct i915_address_space *vm;
const u32 result = 0x100;
struct i915_request *rq;
@@ -1594,10 +1590,6 @@ static int read_from_scratch(struct i915_gem_context *ctx,
if (err)
return err;
- obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
- if (IS_ERR(obj))
- return PTR_ERR(obj);
-
if (GRAPHICS_VER(i915) >= 8) {
const u32 GPR0 = engine->mmio_base + 0x600;
@@ -1615,7 +1607,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
if (IS_ERR(cmd)) {
err = PTR_ERR(cmd);
- goto out;
+ goto err_unpin;
}
memset(cmd, POISON_INUSE, PAGE_SIZE);
@@ -1651,7 +1643,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
if (IS_ERR(cmd)) {
err = PTR_ERR(cmd);
- goto out;
+ goto err_unpin;
}
memset(cmd, POISON_INUSE, PAGE_SIZE);
@@ -1722,8 +1714,10 @@ err_unpin:
i915_vma_unpin(vma);
out_vm:
i915_vm_put(vm);
-out:
- i915_gem_object_put(obj);
+
+ if (!err)
+ err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
+
return err;
}
@@ -1757,6 +1751,7 @@ static int igt_vm_isolation(void *arg)
{
struct drm_i915_private *i915 = arg;
struct i915_gem_context *ctx_a, *ctx_b;
+ struct drm_i915_gem_object *obj_a, *obj_b;
unsigned long num_engines, count;
struct intel_engine_cs *engine;
struct igt_live_test t;
@@ -1810,6 +1805,18 @@ static int igt_vm_isolation(void *arg)
vm_total = ctx_a->vm->total;
GEM_BUG_ON(ctx_b->vm->total != vm_total);
+ obj_a = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ if (IS_ERR(obj_a)) {
+ err = PTR_ERR(obj_a);
+ goto out_file;
+ }
+
+ obj_b = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ if (IS_ERR(obj_b)) {
+ err = PTR_ERR(obj_b);
+ goto put_a;
+ }
+
count = 0;
num_engines = 0;
for_each_uabi_engine(engine, i915) {
@@ -1832,13 +1839,13 @@ static int igt_vm_isolation(void *arg)
I915_GTT_PAGE_SIZE, vm_total,
sizeof(u32), alignof_dword);
- err = write_to_scratch(ctx_a, engine,
+ err = write_to_scratch(ctx_a, engine, obj_a,
offset, 0xdeadbeef);
if (err == 0)
- err = read_from_scratch(ctx_b, engine,
+ err = read_from_scratch(ctx_b, engine, obj_b,
offset, &value);
if (err)
- goto out_file;
+ goto put_b;
if (value != expected) {
pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n",
@@ -1847,7 +1854,7 @@ static int igt_vm_isolation(void *arg)
lower_32_bits(offset),
this);
err = -EINVAL;
- goto out_file;
+ goto put_b;
}
this++;
@@ -1858,6 +1865,10 @@ static int igt_vm_isolation(void *arg)
pr_info("Checked %lu scratch offsets across %lu engines\n",
count, num_engines);
+put_b:
+ i915_gem_object_put(obj_b);
+put_a:
+ i915_gem_object_put(obj_a);
out_file:
if (igt_live_test_end(&t))
err = -EIO;
@@ -1877,7 +1888,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *i915)
SUBTEST(igt_vm_isolation),
};
- if (intel_gt_is_wedged(&i915->gt))
+ if (intel_gt_is_wedged(to_gt(i915)))
return 0;
return i915_live_subtests(tests, i915);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
index 4a6bb64c3a35..3cc74b0fed06 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
@@ -102,7 +102,7 @@ static int igt_dmabuf_import_same_driver_lmem(void *arg)
obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &lmem, 1);
if (IS_ERR(obj)) {
pr_err("__i915_gem_object_create_user failed with err=%ld\n",
- PTR_ERR(dmabuf));
+ PTR_ERR(obj));
err = PTR_ERR(obj);
goto out_ret;
}
@@ -158,7 +158,7 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915,
regions, num_regions);
if (IS_ERR(obj)) {
pr_err("__i915_gem_object_create_user failed with err=%ld\n",
- PTR_ERR(dmabuf));
+ PTR_ERR(obj));
err = PTR_ERR(obj);
goto out_ret;
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
index 28a700f08b49..ecb691c81d1e 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
@@ -4,6 +4,7 @@
*/
#include "gt/intel_migrate.h"
+#include "gem/i915_gem_ttm_move.h"
static int igt_fill_check_buffer(struct drm_i915_gem_object *obj,
bool fill)
@@ -227,17 +228,38 @@ out_put:
return err;
}
+static int igt_lmem_pages_failsafe_migrate(void *arg)
+{
+ int fail_gpu, fail_alloc, ret;
+
+ for (fail_gpu = 0; fail_gpu < 2; ++fail_gpu) {
+ for (fail_alloc = 0; fail_alloc < 2; ++fail_alloc) {
+ pr_info("Simulated failure modes: gpu: %d, alloc: %d\n",
+ fail_gpu, fail_alloc);
+ i915_ttm_migrate_set_failure_modes(fail_gpu,
+ fail_alloc);
+ ret = igt_lmem_pages_migrate(arg);
+ if (ret)
+ goto out_err;
+ }
+ }
+
+out_err:
+ i915_ttm_migrate_set_failure_modes(false, false);
+ return ret;
+}
+
int i915_gem_migrate_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(igt_smem_create_migrate),
SUBTEST(igt_lmem_create_migrate),
SUBTEST(igt_same_create_migrate),
- SUBTEST(igt_lmem_pages_migrate),
+ SUBTEST(igt_lmem_pages_failsafe_migrate),
};
if (!HAS_LMEM(i915))
return 0;
- return intel_gt_live_subtests(tests, &i915->gt);
+ return intel_gt_live_subtests(tests, to_gt(i915));
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 6d30cdfa80f3..c6291429b00c 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -84,6 +84,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
struct rnd_state *prng)
{
const unsigned long npages = obj->base.size / PAGE_SIZE;
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct i915_ggtt_view view;
struct i915_vma *vma;
unsigned long page;
@@ -141,7 +142,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
if (offset >= obj->base.size)
goto out;
- intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt);
+ intel_gt_flush_ggtt_writes(to_gt(i915));
p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
cpu = kmap(p) + offset_in_page(offset);
@@ -175,6 +176,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj,
{
const unsigned int nreal = obj->scratch / PAGE_SIZE;
const unsigned long npages = obj->base.size / PAGE_SIZE;
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct i915_vma *vma;
unsigned long page;
int err;
@@ -234,7 +236,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj,
if (offset >= obj->base.size)
continue;
- intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt);
+ intel_gt_flush_ggtt_writes(to_gt(i915));
p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
cpu = kmap(p) + offset_in_page(offset);
@@ -616,14 +618,14 @@ static bool assert_mmap_offset(struct drm_i915_private *i915,
static void disable_retire_worker(struct drm_i915_private *i915)
{
i915_gem_driver_unregister__shrinker(i915);
- intel_gt_pm_get(&i915->gt);
- cancel_delayed_work_sync(&i915->gt.requests.retire_work);
+ intel_gt_pm_get(to_gt(i915));
+ cancel_delayed_work_sync(&to_gt(i915)->requests.retire_work);
}
static void restore_retire_worker(struct drm_i915_private *i915)
{
igt_flush_test(i915);
- intel_gt_pm_put(&i915->gt);
+ intel_gt_pm_put(to_gt(i915));
i915_gem_driver_register__shrinker(i915);
}
@@ -651,8 +653,8 @@ static int igt_mmap_offset_exhaustion(void *arg)
/* Disable background reaper */
disable_retire_worker(i915);
- GEM_BUG_ON(!i915->gt.awake);
- intel_gt_retire_requests(&i915->gt);
+ GEM_BUG_ON(!to_gt(i915)->awake);
+ intel_gt_retire_requests(to_gt(i915));
i915_gem_drain_freed_objects(i915);
/* Trim the device mmap space to only a page */
@@ -728,7 +730,7 @@ static int igt_mmap_offset_exhaustion(void *arg)
/* Now fill with busy dead objects that we expect to reap */
for (loop = 0; loop < 3; loop++) {
- if (intel_gt_is_wedged(&i915->gt))
+ if (intel_gt_is_wedged(to_gt(i915)))
break;
obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
@@ -942,7 +944,7 @@ static int __igt_mmap(struct drm_i915_private *i915,
}
if (type == I915_MMAP_TYPE_GTT)
- intel_gt_flush_ggtt_writes(&i915->gt);
+ intel_gt_flush_ggtt_writes(to_gt(i915));
err = wc_check(obj);
if (err == -ENXIO)
@@ -1049,7 +1051,7 @@ static int __igt_mmap_access(struct drm_i915_private *i915,
goto out_unmap;
}
- intel_gt_flush_ggtt_writes(&i915->gt);
+ intel_gt_flush_ggtt_writes(to_gt(i915));
err = access_process_vm(current, addr, &x, sizeof(x), 0);
if (err != sizeof(x)) {
@@ -1065,7 +1067,7 @@ static int __igt_mmap_access(struct drm_i915_private *i915,
goto out_unmap;
}
- intel_gt_flush_ggtt_writes(&i915->gt);
+ intel_gt_flush_ggtt_writes(to_gt(i915));
err = __get_user(y, ptr);
if (err) {
@@ -1165,7 +1167,7 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
}
if (type == I915_MMAP_TYPE_GTT)
- intel_gt_flush_ggtt_writes(&i915->gt);
+ intel_gt_flush_ggtt_writes(to_gt(i915));
for_each_uabi_engine(engine, i915) {
struct i915_request *rq;
@@ -1366,20 +1368,10 @@ static int __igt_mmap_revoke(struct drm_i915_private *i915,
}
}
- if (!obj->ops->mmap_ops) {
- err = check_absent(addr, obj->base.size);
- if (err) {
- pr_err("%s: was not absent\n", obj->mm.region->name);
- goto out_unmap;
- }
- } else {
- /* ttm allows access to evicted regions by design */
-
- err = check_present(addr, obj->base.size);
- if (err) {
- pr_err("%s: was not present\n", obj->mm.region->name);
- goto out_unmap;
- }
+ err = check_absent(addr, obj->base.size);
+ if (err) {
+ pr_err("%s: was not absent\n", obj->mm.region->name);
+ goto out_unmap;
}
out_unmap:
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index bc995f41058d..56999186830b 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -186,7 +186,6 @@ static void gen6_alloc_va_range(struct i915_address_space *vm,
pt = stash->pt[0];
__i915_gem_object_pin_pages(pt->base);
- i915_gem_object_make_unshrinkable(pt->base);
fill32_px(pt, vm->scratch[0]->encode);
@@ -263,30 +262,14 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
{
struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
- __i915_vma_put(ppgtt->vma);
-
gen6_ppgtt_free_pd(ppgtt);
free_scratch(vm);
mutex_destroy(&ppgtt->flush);
- mutex_destroy(&ppgtt->pin_mutex);
free_pd(&ppgtt->base.vm, ppgtt->base.pd);
}
-static int pd_vma_set_pages(struct i915_vma *vma)
-{
- vma->pages = ERR_PTR(-ENODEV);
- return 0;
-}
-
-static void pd_vma_clear_pages(struct i915_vma *vma)
-{
- GEM_BUG_ON(!vma->pages);
-
- vma->pages = NULL;
-}
-
static void pd_vma_bind(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma *vma,
@@ -326,43 +309,10 @@ static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma)
}
static const struct i915_vma_ops pd_vma_ops = {
- .set_pages = pd_vma_set_pages,
- .clear_pages = pd_vma_clear_pages,
.bind_vma = pd_vma_bind,
.unbind_vma = pd_vma_unbind,
};
-static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
-{
- struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt;
- struct i915_vma *vma;
-
- GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
- GEM_BUG_ON(size > ggtt->vm.total);
-
- vma = i915_vma_alloc();
- if (!vma)
- return ERR_PTR(-ENOMEM);
-
- i915_active_init(&vma->active, NULL, NULL, 0);
-
- kref_init(&vma->ref);
- mutex_init(&vma->pages_mutex);
- vma->vm = i915_vm_get(&ggtt->vm);
- vma->ops = &pd_vma_ops;
- vma->private = ppgtt;
-
- vma->size = size;
- vma->fence_size = size;
- atomic_set(&vma->flags, I915_VMA_GGTT);
- vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
-
- INIT_LIST_HEAD(&vma->obj_link);
- INIT_LIST_HEAD(&vma->closed_link);
-
- return vma;
-}
-
int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww)
{
struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
@@ -379,42 +329,92 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww)
if (atomic_add_unless(&ppgtt->pin_count, 1, 0))
return 0;
- if (mutex_lock_interruptible(&ppgtt->pin_mutex))
- return -EINTR;
+ /* grab the ppgtt resv to pin the object */
+ err = i915_vm_lock_objects(&ppgtt->base.vm, ww);
+ if (err)
+ return err;
/*
* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
* allocator works in address space sizes, so it's multiplied by page
* size. We allocate at the top of the GTT to avoid fragmentation.
*/
- err = 0;
- if (!atomic_read(&ppgtt->pin_count))
+ if (!atomic_read(&ppgtt->pin_count)) {
err = i915_ggtt_pin(ppgtt->vma, ww, GEN6_PD_ALIGN, PIN_HIGH);
+
+ GEM_BUG_ON(ppgtt->vma->fence);
+ clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(ppgtt->vma));
+ }
if (!err)
atomic_inc(&ppgtt->pin_count);
- mutex_unlock(&ppgtt->pin_mutex);
return err;
}
-void gen6_ppgtt_unpin(struct i915_ppgtt *base)
+static int pd_dummy_obj_get_pages(struct drm_i915_gem_object *obj)
{
- struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
+ obj->mm.pages = ZERO_SIZE_PTR;
+ return 0;
+}
- GEM_BUG_ON(!atomic_read(&ppgtt->pin_count));
- if (atomic_dec_and_test(&ppgtt->pin_count))
- i915_vma_unpin(ppgtt->vma);
+static void pd_dummy_obj_put_pages(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
+{
}
-void gen6_ppgtt_unpin_all(struct i915_ppgtt *base)
+static const struct drm_i915_gem_object_ops pd_dummy_obj_ops = {
+ .name = "pd_dummy_obj",
+ .get_pages = pd_dummy_obj_get_pages,
+ .put_pages = pd_dummy_obj_put_pages,
+};
+
+static struct i915_page_directory *
+gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt)
{
- struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
+ struct i915_ggtt * const ggtt = ppgtt->base.vm.gt->ggtt;
+ struct i915_page_directory *pd;
+ int err;
- if (!atomic_read(&ppgtt->pin_count))
- return;
+ pd = __alloc_pd(I915_PDES);
+ if (unlikely(!pd))
+ return ERR_PTR(-ENOMEM);
- i915_vma_unpin(ppgtt->vma);
- atomic_set(&ppgtt->pin_count, 0);
+ pd->pt.base = __i915_gem_object_create_internal(ppgtt->base.vm.gt->i915,
+ &pd_dummy_obj_ops,
+ I915_PDES * SZ_4K);
+ if (IS_ERR(pd->pt.base)) {
+ err = PTR_ERR(pd->pt.base);
+ pd->pt.base = NULL;
+ goto err_pd;
+ }
+
+ pd->pt.base->base.resv = i915_vm_resv_get(&ppgtt->base.vm);
+ pd->pt.base->shares_resv_from = &ppgtt->base.vm;
+
+ ppgtt->vma = i915_vma_instance(pd->pt.base, &ggtt->vm, NULL);
+ if (IS_ERR(ppgtt->vma)) {
+ err = PTR_ERR(ppgtt->vma);
+ ppgtt->vma = NULL;
+ goto err_pd;
+ }
+
+ /* The dummy object we create is special, override ops.. */
+ ppgtt->vma->ops = &pd_vma_ops;
+ ppgtt->vma->private = ppgtt;
+ return pd;
+
+err_pd:
+ free_pd(&ppgtt->base.vm, pd);
+ return ERR_PTR(err);
+}
+
+void gen6_ppgtt_unpin(struct i915_ppgtt *base)
+{
+ struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
+
+ GEM_BUG_ON(!atomic_read(&ppgtt->pin_count));
+ if (atomic_dec_and_test(&ppgtt->pin_count))
+ i915_vma_unpin(ppgtt->vma);
}
struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
@@ -428,7 +428,6 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
return ERR_PTR(-ENOMEM);
mutex_init(&ppgtt->flush);
- mutex_init(&ppgtt->pin_mutex);
ppgtt_init(&ppgtt->base, gt, 0);
ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t));
@@ -441,21 +440,16 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma;
+ ppgtt->base.vm.alloc_scratch_dma = alloc_pt_dma;
ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
- ppgtt->base.pd = __alloc_pd(I915_PDES);
- if (!ppgtt->base.pd) {
- err = -ENOMEM;
- goto err_free;
- }
-
err = gen6_ppgtt_init_scratch(ppgtt);
if (err)
- goto err_pd;
+ goto err_free;
- ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE);
- if (IS_ERR(ppgtt->vma)) {
- err = PTR_ERR(ppgtt->vma);
+ ppgtt->base.pd = gen6_alloc_top_pd(ppgtt);
+ if (IS_ERR(ppgtt->base.pd)) {
+ err = PTR_ERR(ppgtt->base.pd);
goto err_scratch;
}
@@ -463,10 +457,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
err_scratch:
free_scratch(&ppgtt->base.vm);
-err_pd:
- free_pd(&ppgtt->base.vm, ppgtt->base.pd);
err_free:
- mutex_destroy(&ppgtt->pin_mutex);
kfree(ppgtt);
return ERR_PTR(err);
}
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
index 6a61a5c3a85a..5e5cf2ec3309 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
@@ -19,7 +19,6 @@ struct gen6_ppgtt {
u32 pp_dir;
atomic_t pin_count;
- struct mutex pin_mutex;
bool scan_for_unused_pt;
};
@@ -71,7 +70,6 @@ static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base)
int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww);
void gen6_ppgtt_unpin(struct i915_ppgtt *base);
-void gen6_ppgtt_unpin_all(struct i915_ppgtt *base);
void gen6_ppgtt_enable(struct intel_gt *gt);
void gen7_ppgtt_enable(struct intel_gt *gt);
struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index 461844dffd7e..e320610dd0b8 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -42,7 +42,7 @@ int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode)
vf_flush_wa = true;
/* WaForGAMHang:kbl */
- if (IS_KBL_GT_STEP(rq->engine->i915, 0, STEP_C0))
+ if (IS_KBL_GRAPHICS_STEP(rq->engine->i915, 0, STEP_C0))
dc_flush_wa = true;
}
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 037a9a6e4889..b012c50f7ce7 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -18,7 +18,7 @@
static u64 gen8_pde_encode(const dma_addr_t addr,
const enum i915_cache_level level)
{
- u64 pde = addr | _PAGE_PRESENT | _PAGE_RW;
+ u64 pde = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
if (level != I915_CACHE_NONE)
pde |= PPAT_CACHED_PDE;
@@ -32,10 +32,10 @@ static u64 gen8_pte_encode(dma_addr_t addr,
enum i915_cache_level level,
u32 flags)
{
- gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW;
+ gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
if (unlikely(flags & PTE_READ_ONLY))
- pte &= ~_PAGE_RW;
+ pte &= ~GEN8_PAGE_RW;
if (flags & PTE_LM)
pte |= GEN12_PPGTT_PTE_LM;
@@ -301,7 +301,6 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm,
pt = stash->pt[!!lvl];
__i915_gem_object_pin_pages(pt->base);
- i915_gem_object_make_unshrinkable(pt->base);
fill_px(pt, vm->scratch[lvl]->encode);
@@ -652,7 +651,7 @@ static int gen8_init_scratch(struct i915_address_space *vm)
vm->scratch[0]->encode =
gen8_pte_encode(px_dma(vm->scratch[0]),
- I915_CACHE_LLC, pte_flags);
+ I915_CACHE_NONE, pte_flags);
for (i = 1; i <= vm->top; i++) {
struct drm_i915_gem_object *obj;
@@ -668,7 +667,7 @@ static int gen8_init_scratch(struct i915_address_space *vm)
}
fill_px(obj, vm->scratch[i - 1]->encode);
- obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_LLC);
+ obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_NONE);
vm->scratch[i] = obj;
}
@@ -777,10 +776,29 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
*/
ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12);
- if (HAS_LMEM(gt->i915))
+ if (HAS_LMEM(gt->i915)) {
ppgtt->vm.alloc_pt_dma = alloc_pt_lmem;
- else
+
+ /*
+ * On some platforms the hw has dropped support for 4K GTT pages
+ * when dealing with LMEM, and due to the design of 64K GTT
+ * pages in the hw, we can only mark the *entire* page-table as
+ * operating in 64K GTT mode, since the enable bit is still on
+ * the pde, and not the pte. And since we still need to allow
+ * 4K GTT pages for SMEM objects, we can't have a "normal" 4K
+ * page-table with scratch pointing to LMEM, since that's
+ * undefined from the hw pov. The simplest solution is to just
+ * move the 64K scratch page to SMEM on such platforms and call
+ * it a day, since that should work for all configurations.
+ */
+ if (HAS_64K_PAGES(gt->i915))
+ ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
+ else
+ ppgtt->vm.alloc_scratch_dma = alloc_pt_lmem;
+ } else {
ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
+ ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
+ }
err = gen8_init_scratch(&ppgtt->vm);
if (err)
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 5634d14052bc..ba083d800a08 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -219,7 +219,7 @@ int __intel_context_do_pin_ww(struct intel_context *ce,
*/
err = i915_gem_object_lock(ce->timeline->hwsp_ggtt->obj, ww);
- if (!err && ce->ring->vma->obj)
+ if (!err)
err = i915_gem_object_lock(ce->ring->vma->obj, ww);
if (!err && ce->state)
err = i915_gem_object_lock(ce->state->obj, ww);
@@ -228,17 +228,17 @@ int __intel_context_do_pin_ww(struct intel_context *ce,
if (err)
return err;
- err = i915_active_acquire(&ce->active);
+ err = ce->ops->pre_pin(ce, ww, &vaddr);
if (err)
goto err_ctx_unpin;
- err = ce->ops->pre_pin(ce, ww, &vaddr);
+ err = i915_active_acquire(&ce->active);
if (err)
- goto err_release;
+ goto err_post_unpin;
err = mutex_lock_interruptible(&ce->pin_mutex);
if (err)
- goto err_post_unpin;
+ goto err_release;
intel_engine_pm_might_get(ce->engine);
@@ -273,11 +273,11 @@ int __intel_context_do_pin_ww(struct intel_context *ce,
err_unlock:
mutex_unlock(&ce->pin_mutex);
+err_release:
+ i915_active_release(&ce->active);
err_post_unpin:
if (!handoff)
ce->ops->post_unpin(ce);
-err_release:
- i915_active_release(&ce->active);
err_ctx_unpin:
intel_context_post_unpin(ce);
@@ -364,7 +364,7 @@ static int __intel_context_active(struct i915_active *active)
return 0;
}
-static int __i915_sw_fence_call
+static int
sw_fence_dummy_notify(struct i915_sw_fence *sf,
enum i915_sw_fence_notify state)
{
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 246c37d72cd7..d8c74bbf9aae 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -211,7 +211,8 @@ static inline void intel_context_enter(struct intel_context *ce)
static inline void intel_context_mark_active(struct intel_context *ce)
{
- lockdep_assert_held(&ce->timeline->mutex);
+ lockdep_assert(lockdep_is_held(&ce->timeline->mutex) ||
+ test_bit(CONTEXT_IS_PARKING, &ce->flags));
++ce->active_count;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 9e0177dc5484..30cd81ad8911 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -118,6 +118,7 @@ struct intel_context {
#define CONTEXT_LRCA_DIRTY 9
#define CONTEXT_GUC_INIT 10
#define CONTEXT_PERMA_PIN 11
+#define CONTEXT_IS_PARKING 12
struct {
u64 timeout_us;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index d70fc19ec60b..30c199bb6ce5 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -326,6 +326,38 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
engine->id = id;
engine->legacy_idx = INVALID_ENGINE;
engine->mask = BIT(id);
+ if (GRAPHICS_VER(gt->i915) >= 11) {
+ static const u32 engine_reset_domains[] = {
+ [RCS0] = GEN11_GRDOM_RENDER,
+ [BCS0] = GEN11_GRDOM_BLT,
+ [VCS0] = GEN11_GRDOM_MEDIA,
+ [VCS1] = GEN11_GRDOM_MEDIA2,
+ [VCS2] = GEN11_GRDOM_MEDIA3,
+ [VCS3] = GEN11_GRDOM_MEDIA4,
+ [VCS4] = GEN11_GRDOM_MEDIA5,
+ [VCS5] = GEN11_GRDOM_MEDIA6,
+ [VCS6] = GEN11_GRDOM_MEDIA7,
+ [VCS7] = GEN11_GRDOM_MEDIA8,
+ [VECS0] = GEN11_GRDOM_VECS,
+ [VECS1] = GEN11_GRDOM_VECS2,
+ [VECS2] = GEN11_GRDOM_VECS3,
+ [VECS3] = GEN11_GRDOM_VECS4,
+ };
+ GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) ||
+ !engine_reset_domains[id]);
+ engine->reset_domain = engine_reset_domains[id];
+ } else {
+ static const u32 engine_reset_domains[] = {
+ [RCS0] = GEN6_GRDOM_RENDER,
+ [BCS0] = GEN6_GRDOM_BLT,
+ [VCS0] = GEN6_GRDOM_MEDIA,
+ [VCS1] = GEN8_GRDOM_MEDIA2,
+ [VECS0] = GEN6_GRDOM_VECS,
+ };
+ GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) ||
+ !engine_reset_domains[id]);
+ engine->reset_domain = engine_reset_domains[id];
+ }
engine->i915 = i915;
engine->gt = gt;
engine->uncore = gt->uncore;
@@ -364,7 +396,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
DRIVER_CAPS(i915)->has_logical_contexts = true;
ewma__engine_latency_init(&engine->latency);
- seqcount_init(&engine->stats.lock);
+ seqcount_init(&engine->stats.execlists.lock);
ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
@@ -1677,14 +1709,18 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
{
+ struct i915_vma_snapshot *vsnap = &rq->batch_snapshot;
void *ring;
int size;
+ if (!i915_vma_snapshot_present(vsnap))
+ vsnap = NULL;
+
drm_printf(m,
"[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n",
rq->head, rq->postfix, rq->tail,
- rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u,
- rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u);
+ vsnap ? upper_32_bits(vsnap->gtt_offset) : ~0u,
+ vsnap ? lower_32_bits(vsnap->gtt_offset) : ~0u);
size = rq->tail - rq->head;
if (rq->tail < rq->head)
@@ -1916,22 +1952,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
intel_engine_print_breadcrumbs(engine, m);
}
-static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine,
- ktime_t *now)
-{
- ktime_t total = engine->stats.total;
-
- /*
- * If the engine is executing something at the moment
- * add it to the total.
- */
- *now = ktime_get();
- if (READ_ONCE(engine->stats.active))
- total = ktime_add(total, ktime_sub(*now, engine->stats.start));
-
- return total;
-}
-
/**
* intel_engine_get_busy_time() - Return current accumulated engine busyness
* @engine: engine to report on
@@ -1941,15 +1961,7 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine,
*/
ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now)
{
- unsigned int seq;
- ktime_t total;
-
- do {
- seq = read_seqcount_begin(&engine->stats.lock);
- total = __intel_engine_get_busy_time(engine, now);
- } while (read_seqcount_retry(&engine->stats.lock, seq));
-
- return total;
+ return engine->busyness(engine, now);
}
struct intel_context *
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index a1334b48dde7..b0a4a2dbe3ee 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -26,7 +26,7 @@ static void dbg_poison_ce(struct intel_context *ce)
int type = i915_coherent_map_type(ce->engine->i915, obj, true);
void *map;
- if (!i915_gem_object_trylock(obj))
+ if (!i915_gem_object_trylock(obj, NULL))
return;
map = i915_gem_object_pin_map(obj, type);
@@ -80,39 +80,6 @@ static int __engine_unpark(struct intel_wakeref *wf)
return 0;
}
-#if IS_ENABLED(CONFIG_LOCKDEP)
-
-static unsigned long __timeline_mark_lock(struct intel_context *ce)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_);
-
- return flags;
-}
-
-static void __timeline_mark_unlock(struct intel_context *ce,
- unsigned long flags)
-{
- mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_);
- local_irq_restore(flags);
-}
-
-#else
-
-static unsigned long __timeline_mark_lock(struct intel_context *ce)
-{
- return 0;
-}
-
-static void __timeline_mark_unlock(struct intel_context *ce,
- unsigned long flags)
-{
-}
-
-#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
-
static void duration(struct dma_fence *fence, struct dma_fence_cb *cb)
{
struct i915_request *rq = to_request(fence);
@@ -159,7 +126,6 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
{
struct intel_context *ce = engine->kernel_context;
struct i915_request *rq;
- unsigned long flags;
bool result = true;
/*
@@ -214,7 +180,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
* engine->wakeref.count, we may see the request completion and retire
* it causing an underflow of the engine->wakeref.
*/
- flags = __timeline_mark_lock(ce);
+ set_bit(CONTEXT_IS_PARKING, &ce->flags);
GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
rq = __i915_request_create(ce, GFP_NOWAIT);
@@ -246,7 +212,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
result = false;
out_unlock:
- __timeline_mark_unlock(ce, flags);
+ clear_bit(CONTEXT_IS_PARKING, &ce->flags);
return result;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_stats.h b/drivers/gpu/drm/i915/gt/intel_engine_stats.h
index 24fbdd94351a..8e762d683e50 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_stats.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_stats.h
@@ -15,45 +15,46 @@
static inline void intel_engine_context_in(struct intel_engine_cs *engine)
{
+ struct intel_engine_execlists_stats *stats = &engine->stats.execlists;
unsigned long flags;
- if (engine->stats.active) {
- engine->stats.active++;
+ if (stats->active) {
+ stats->active++;
return;
}
/* The writer is serialised; but the pmu reader may be from hardirq */
local_irq_save(flags);
- write_seqcount_begin(&engine->stats.lock);
+ write_seqcount_begin(&stats->lock);
- engine->stats.start = ktime_get();
- engine->stats.active++;
+ stats->start = ktime_get();
+ stats->active++;
- write_seqcount_end(&engine->stats.lock);
+ write_seqcount_end(&stats->lock);
local_irq_restore(flags);
- GEM_BUG_ON(!engine->stats.active);
+ GEM_BUG_ON(!stats->active);
}
static inline void intel_engine_context_out(struct intel_engine_cs *engine)
{
+ struct intel_engine_execlists_stats *stats = &engine->stats.execlists;
unsigned long flags;
- GEM_BUG_ON(!engine->stats.active);
- if (engine->stats.active > 1) {
- engine->stats.active--;
+ GEM_BUG_ON(!stats->active);
+ if (stats->active > 1) {
+ stats->active--;
return;
}
local_irq_save(flags);
- write_seqcount_begin(&engine->stats.lock);
+ write_seqcount_begin(&stats->lock);
- engine->stats.active--;
- engine->stats.total =
- ktime_add(engine->stats.total,
- ktime_sub(ktime_get(), engine->stats.start));
+ stats->active--;
+ stats->total = ktime_add(stats->total,
+ ktime_sub(ktime_get(), stats->start));
- write_seqcount_end(&engine->stats.lock);
+ write_seqcount_end(&stats->lock);
local_irq_restore(flags);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index e0f773585c29..36365bdbe1ee 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -257,6 +257,55 @@ struct intel_engine_execlists {
#define INTEL_ENGINE_CS_MAX_NAME 8
+struct intel_engine_execlists_stats {
+ /**
+ * @active: Number of contexts currently scheduled in.
+ */
+ unsigned int active;
+
+ /**
+ * @lock: Lock protecting the below fields.
+ */
+ seqcount_t lock;
+
+ /**
+ * @total: Total time this engine was busy.
+ *
+ * Accumulated time not counting the most recent block in cases where
+ * engine is currently busy (active > 0).
+ */
+ ktime_t total;
+
+ /**
+ * @start: Timestamp of the last idle to active transition.
+ *
+ * Idle is defined as active == 0, active is active > 0.
+ */
+ ktime_t start;
+};
+
+struct intel_engine_guc_stats {
+ /**
+ * @running: Active state of the engine when busyness was last sampled.
+ */
+ bool running;
+
+ /**
+ * @prev_total: Previous value of total runtime clock cycles.
+ */
+ u32 prev_total;
+
+ /**
+ * @total_gt_clks: Total gt clock cycles this engine was busy.
+ */
+ u64 total_gt_clks;
+
+ /**
+ * @start_gt_clk: GT clock time of last idle to active transition.
+ */
+ u64 start_gt_clk;
+};
+
struct intel_engine_cs {
struct drm_i915_private *i915;
struct intel_gt *gt;
@@ -269,6 +318,7 @@ struct intel_engine_cs {
unsigned int guc_id;
intel_engine_mask_t mask;
+ u32 reset_domain;
/**
* @logical_mask: logical mask of engine, reported to user space via
* query IOCTL and used to communicate with the GuC in logical space.
@@ -439,6 +489,12 @@ struct intel_engine_cs {
void (*add_active_request)(struct i915_request *rq);
void (*remove_active_request)(struct i915_request *rq);
+ /*
+ * Get engine busyness and the time at which the busyness was sampled.
+ */
+ ktime_t (*busyness)(struct intel_engine_cs *engine,
+ ktime_t *now);
+
struct intel_engine_execlists execlists;
/*
@@ -488,30 +544,10 @@ struct intel_engine_cs {
u32 (*get_cmd_length_mask)(u32 cmd_header);
struct {
- /**
- * @active: Number of contexts currently scheduled in.
- */
- unsigned int active;
-
- /**
- * @lock: Lock protecting the below fields.
- */
- seqcount_t lock;
-
- /**
- * @total: Total time this engine was busy.
- *
- * Accumulated time not counting the most recent block in cases
- * where engine is currently busy (active > 0).
- */
- ktime_t total;
-
- /**
- * @start: Timestamp of the last idle to active transition.
- *
- * Idle is defined as active == 0, active is active > 0.
- */
- ktime_t start;
+ union {
+ struct intel_engine_execlists_stats execlists;
+ struct intel_engine_guc_stats guc;
+ };
/**
* @rps: Utilisation at last RPS sampling.
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 8f8bea08e734..9ce85a845105 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -116,7 +116,7 @@ static void set_scheduler_caps(struct drm_i915_private *i915)
disabled |= (I915_SCHEDULER_CAP_ENABLED |
I915_SCHEDULER_CAP_PRIORITY);
- if (intel_uc_uses_guc_submission(&i915->gt.uc))
+ if (intel_uc_uses_guc_submission(&to_gt(i915)->uc))
enabled |= I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP;
for (i = 0; i < ARRAY_SIZE(map); i++) {
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index ea8291361d65..0868d88e11fc 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2187,7 +2187,8 @@ struct execlists_capture {
static void execlists_capture_work(struct work_struct *work)
{
struct execlists_capture *cap = container_of(work, typeof(*cap), work);
- const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
+ const gfp_t gfp = __GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL |
+ __GFP_NOWARN;
struct intel_engine_cs *engine = cap->rq->engine;
struct intel_gt_coredump *gt = cap->error->gt;
struct intel_engine_capture_vma *vma;
@@ -3294,6 +3295,38 @@ static void execlists_release(struct intel_engine_cs *engine)
lrc_fini_wa_ctx(engine);
}
+static ktime_t __execlists_engine_busyness(struct intel_engine_cs *engine,
+ ktime_t *now)
+{
+ struct intel_engine_execlists_stats *stats = &engine->stats.execlists;
+ ktime_t total = stats->total;
+
+ /*
+ * If the engine is executing something at the moment
+ * add it to the total.
+ */
+ *now = ktime_get();
+ if (READ_ONCE(stats->active))
+ total = ktime_add(total, ktime_sub(*now, stats->start));
+
+ return total;
+}
+
+static ktime_t execlists_engine_busyness(struct intel_engine_cs *engine,
+ ktime_t *now)
+{
+ struct intel_engine_execlists_stats *stats = &engine->stats.execlists;
+ unsigned int seq;
+ ktime_t total;
+
+ do {
+ seq = read_seqcount_begin(&stats->lock);
+ total = __execlists_engine_busyness(engine, now);
+ } while (read_seqcount_retry(&stats->lock, seq));
+
+ return total;
+}
+
static void
logical_ring_default_vfuncs(struct intel_engine_cs *engine)
{
@@ -3350,6 +3383,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
engine->emit_bb_start = gen8_emit_bb_start;
else
engine->emit_bb_start = gen8_emit_bb_start_noarb;
+
+ engine->busyness = execlists_engine_busyness;
}
static void logical_ring_default_irqs(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 555111c3bee5..5263dda7f8d5 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -22,9 +22,6 @@
#include "intel_gtt.h"
#include "gen8_ppgtt.h"
-static int
-i915_get_ggtt_vma_pages(struct i915_vma *vma);
-
static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
unsigned long color,
u64 *start,
@@ -106,7 +103,7 @@ static bool needs_idle_maps(struct drm_i915_private *i915)
* Query intel_iommu to see if we need the workaround. Presumably that
* was loaded first.
*/
- if (!intel_vtd_active())
+ if (!intel_vtd_active(i915))
return false;
if (GRAPHICS_VER(i915) == 5 && IS_MOBILE(i915))
@@ -209,7 +206,7 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
enum i915_cache_level level,
u32 flags)
{
- gen8_pte_t pte = addr | _PAGE_PRESENT;
+ gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
if (flags & PTE_LM)
pte |= GEN12_GGTT_PTE_LM;
@@ -892,21 +889,6 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
return 0;
}
-int ggtt_set_pages(struct i915_vma *vma)
-{
- int ret;
-
- GEM_BUG_ON(vma->pages);
-
- ret = i915_get_ggtt_vma_pages(vma);
- if (ret)
- return ret;
-
- vma->page_sizes = vma->obj->mm.page_sizes;
-
- return 0;
-}
-
static void gen6_gmch_remove(struct i915_address_space *vm)
{
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
@@ -941,6 +923,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
size = gen8_get_total_gtt_size(snb_gmch_ctl);
ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+ ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY;
ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
@@ -967,8 +950,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
- ggtt->vm.vma_ops.set_pages = ggtt_set_pages;
- ggtt->vm.vma_ops.clear_pages = clear_pages;
ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
@@ -1094,6 +1075,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+ ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
ggtt->vm.clear_range = nop_clear_range;
if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
@@ -1117,8 +1099,6 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
- ggtt->vm.vma_ops.set_pages = ggtt_set_pages;
- ggtt->vm.vma_ops.clear_pages = clear_pages;
return ggtt_probe_common(ggtt, size);
}
@@ -1146,6 +1126,7 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt)
(struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end);
ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+ ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
if (needs_idle_maps(i915)) {
drm_notice(&i915->drm,
@@ -1162,8 +1143,6 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
- ggtt->vm.vma_ops.set_pages = ggtt_set_pages;
- ggtt->vm.vma_ops.clear_pages = clear_pages;
if (unlikely(ggtt->do_idle_maps))
drm_notice(&i915->drm,
@@ -1229,11 +1208,11 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915)
{
int ret;
- ret = ggtt_probe_hw(&i915->ggtt, &i915->gt);
+ ret = ggtt_probe_hw(&i915->ggtt, to_gt(i915));
if (ret)
return ret;
- if (intel_vtd_active())
+ if (intel_vtd_active(i915))
drm_info(&i915->drm, "VT-d active for gfx access\n");
return 0;
@@ -1333,382 +1312,3 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
intel_ggtt_restore_fences(ggtt);
}
-
-static struct scatterlist *
-rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset,
- unsigned int width, unsigned int height,
- unsigned int src_stride, unsigned int dst_stride,
- struct sg_table *st, struct scatterlist *sg)
-{
- unsigned int column, row;
- unsigned int src_idx;
-
- for (column = 0; column < width; column++) {
- unsigned int left;
-
- src_idx = src_stride * (height - 1) + column + offset;
- for (row = 0; row < height; row++) {
- st->nents++;
- /*
- * We don't need the pages, but need to initialize
- * the entries so the sg list can be happily traversed.
- * The only thing we need are DMA addresses.
- */
- sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0);
- sg_dma_address(sg) =
- i915_gem_object_get_dma_address(obj, src_idx);
- sg_dma_len(sg) = I915_GTT_PAGE_SIZE;
- sg = sg_next(sg);
- src_idx -= src_stride;
- }
-
- left = (dst_stride - height) * I915_GTT_PAGE_SIZE;
-
- if (!left)
- continue;
-
- st->nents++;
-
- /*
- * The DE ignores the PTEs for the padding tiles, the sg entry
- * here is just a conenience to indicate how many padding PTEs
- * to insert at this spot.
- */
- sg_set_page(sg, NULL, left, 0);
- sg_dma_address(sg) = 0;
- sg_dma_len(sg) = left;
- sg = sg_next(sg);
- }
-
- return sg;
-}
-
-static noinline struct sg_table *
-intel_rotate_pages(struct intel_rotation_info *rot_info,
- struct drm_i915_gem_object *obj)
-{
- unsigned int size = intel_rotation_info_size(rot_info);
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
- struct sg_table *st;
- struct scatterlist *sg;
- int ret = -ENOMEM;
- int i;
-
- /* Allocate target SG list. */
- st = kmalloc(sizeof(*st), GFP_KERNEL);
- if (!st)
- goto err_st_alloc;
-
- ret = sg_alloc_table(st, size, GFP_KERNEL);
- if (ret)
- goto err_sg_alloc;
-
- st->nents = 0;
- sg = st->sgl;
-
- for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++)
- sg = rotate_pages(obj, rot_info->plane[i].offset,
- rot_info->plane[i].width, rot_info->plane[i].height,
- rot_info->plane[i].src_stride,
- rot_info->plane[i].dst_stride,
- st, sg);
-
- return st;
-
-err_sg_alloc:
- kfree(st);
-err_st_alloc:
-
- drm_dbg(&i915->drm, "Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
- obj->base.size, rot_info->plane[0].width,
- rot_info->plane[0].height, size);
-
- return ERR_PTR(ret);
-}
-
-static struct scatterlist *
-add_padding_pages(unsigned int count,
- struct sg_table *st, struct scatterlist *sg)
-{
- st->nents++;
-
- /*
- * The DE ignores the PTEs for the padding tiles, the sg entry
- * here is just a convenience to indicate how many padding PTEs
- * to insert at this spot.
- */
- sg_set_page(sg, NULL, count * I915_GTT_PAGE_SIZE, 0);
- sg_dma_address(sg) = 0;
- sg_dma_len(sg) = count * I915_GTT_PAGE_SIZE;
- sg = sg_next(sg);
-
- return sg;
-}
-
-static struct scatterlist *
-remap_tiled_color_plane_pages(struct drm_i915_gem_object *obj,
- unsigned int offset, unsigned int alignment_pad,
- unsigned int width, unsigned int height,
- unsigned int src_stride, unsigned int dst_stride,
- struct sg_table *st, struct scatterlist *sg,
- unsigned int *gtt_offset)
-{
- unsigned int row;
-
- if (!width || !height)
- return sg;
-
- if (alignment_pad)
- sg = add_padding_pages(alignment_pad, st, sg);
-
- for (row = 0; row < height; row++) {
- unsigned int left = width * I915_GTT_PAGE_SIZE;
-
- while (left) {
- dma_addr_t addr;
- unsigned int length;
-
- /*
- * We don't need the pages, but need to initialize
- * the entries so the sg list can be happily traversed.
- * The only thing we need are DMA addresses.
- */
-
- addr = i915_gem_object_get_dma_address_len(obj, offset, &length);
-
- length = min(left, length);
-
- st->nents++;
-
- sg_set_page(sg, NULL, length, 0);
- sg_dma_address(sg) = addr;
- sg_dma_len(sg) = length;
- sg = sg_next(sg);
-
- offset += length / I915_GTT_PAGE_SIZE;
- left -= length;
- }
-
- offset += src_stride - width;
-
- left = (dst_stride - width) * I915_GTT_PAGE_SIZE;
-
- if (!left)
- continue;
-
- sg = add_padding_pages(left >> PAGE_SHIFT, st, sg);
- }
-
- *gtt_offset += alignment_pad + dst_stride * height;
-
- return sg;
-}
-
-static struct scatterlist *
-remap_contiguous_pages(struct drm_i915_gem_object *obj,
- unsigned int obj_offset,
- unsigned int count,
- struct sg_table *st, struct scatterlist *sg)
-{
- struct scatterlist *iter;
- unsigned int offset;
-
- iter = i915_gem_object_get_sg_dma(obj, obj_offset, &offset);
- GEM_BUG_ON(!iter);
-
- do {
- unsigned int len;
-
- len = min(sg_dma_len(iter) - (offset << PAGE_SHIFT),
- count << PAGE_SHIFT);
- sg_set_page(sg, NULL, len, 0);
- sg_dma_address(sg) =
- sg_dma_address(iter) + (offset << PAGE_SHIFT);
- sg_dma_len(sg) = len;
-
- st->nents++;
- count -= len >> PAGE_SHIFT;
- if (count == 0)
- return sg;
-
- sg = __sg_next(sg);
- iter = __sg_next(iter);
- offset = 0;
- } while (1);
-}
-
-static struct scatterlist *
-remap_linear_color_plane_pages(struct drm_i915_gem_object *obj,
- unsigned int obj_offset, unsigned int alignment_pad,
- unsigned int size,
- struct sg_table *st, struct scatterlist *sg,
- unsigned int *gtt_offset)
-{
- if (!size)
- return sg;
-
- if (alignment_pad)
- sg = add_padding_pages(alignment_pad, st, sg);
-
- sg = remap_contiguous_pages(obj, obj_offset, size, st, sg);
- sg = sg_next(sg);
-
- *gtt_offset += alignment_pad + size;
-
- return sg;
-}
-
-static struct scatterlist *
-remap_color_plane_pages(const struct intel_remapped_info *rem_info,
- struct drm_i915_gem_object *obj,
- int color_plane,
- struct sg_table *st, struct scatterlist *sg,
- unsigned int *gtt_offset)
-{
- unsigned int alignment_pad = 0;
-
- if (rem_info->plane_alignment)
- alignment_pad = ALIGN(*gtt_offset, rem_info->plane_alignment) - *gtt_offset;
-
- if (rem_info->plane[color_plane].linear)
- sg = remap_linear_color_plane_pages(obj,
- rem_info->plane[color_plane].offset,
- alignment_pad,
- rem_info->plane[color_plane].size,
- st, sg,
- gtt_offset);
-
- else
- sg = remap_tiled_color_plane_pages(obj,
- rem_info->plane[color_plane].offset,
- alignment_pad,
- rem_info->plane[color_plane].width,
- rem_info->plane[color_plane].height,
- rem_info->plane[color_plane].src_stride,
- rem_info->plane[color_plane].dst_stride,
- st, sg,
- gtt_offset);
-
- return sg;
-}
-
-static noinline struct sg_table *
-intel_remap_pages(struct intel_remapped_info *rem_info,
- struct drm_i915_gem_object *obj)
-{
- unsigned int size = intel_remapped_info_size(rem_info);
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
- struct sg_table *st;
- struct scatterlist *sg;
- unsigned int gtt_offset = 0;
- int ret = -ENOMEM;
- int i;
-
- /* Allocate target SG list. */
- st = kmalloc(sizeof(*st), GFP_KERNEL);
- if (!st)
- goto err_st_alloc;
-
- ret = sg_alloc_table(st, size, GFP_KERNEL);
- if (ret)
- goto err_sg_alloc;
-
- st->nents = 0;
- sg = st->sgl;
-
- for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++)
- sg = remap_color_plane_pages(rem_info, obj, i, st, sg, &gtt_offset);
-
- i915_sg_trim(st);
-
- return st;
-
-err_sg_alloc:
- kfree(st);
-err_st_alloc:
-
- drm_dbg(&i915->drm, "Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n",
- obj->base.size, rem_info->plane[0].width,
- rem_info->plane[0].height, size);
-
- return ERR_PTR(ret);
-}
-
-static noinline struct sg_table *
-intel_partial_pages(const struct i915_ggtt_view *view,
- struct drm_i915_gem_object *obj)
-{
- struct sg_table *st;
- struct scatterlist *sg;
- unsigned int count = view->partial.size;
- int ret = -ENOMEM;
-
- st = kmalloc(sizeof(*st), GFP_KERNEL);
- if (!st)
- goto err_st_alloc;
-
- ret = sg_alloc_table(st, count, GFP_KERNEL);
- if (ret)
- goto err_sg_alloc;
-
- st->nents = 0;
-
- sg = remap_contiguous_pages(obj, view->partial.offset, count, st, st->sgl);
-
- sg_mark_end(sg);
- i915_sg_trim(st); /* Drop any unused tail entries. */
-
- return st;
-
-err_sg_alloc:
- kfree(st);
-err_st_alloc:
- return ERR_PTR(ret);
-}
-
-static int
-i915_get_ggtt_vma_pages(struct i915_vma *vma)
-{
- int ret;
-
- /*
- * The vma->pages are only valid within the lifespan of the borrowed
- * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
- * must be the vma->pages. A simple rule is that vma->pages must only
- * be accessed when the obj->mm.pages are pinned.
- */
- GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
-
- switch (vma->ggtt_view.type) {
- default:
- GEM_BUG_ON(vma->ggtt_view.type);
- fallthrough;
- case I915_GGTT_VIEW_NORMAL:
- vma->pages = vma->obj->mm.pages;
- return 0;
-
- case I915_GGTT_VIEW_ROTATED:
- vma->pages =
- intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
- break;
-
- case I915_GGTT_VIEW_REMAPPED:
- vma->pages =
- intel_remap_pages(&vma->ggtt_view.remapped, vma->obj);
- break;
-
- case I915_GGTT_VIEW_PARTIAL:
- vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
- break;
- }
-
- ret = 0;
- if (IS_ERR(vma->pages)) {
- ret = PTR_ERR(vma->pages);
- vma->pages = NULL;
- drm_err(&vma->vm->i915->drm,
- "Failed to get pages for VMA view type %u (%d)!\n",
- vma->ggtt_view.type, ret);
- }
- return ret;
-}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 4814453ab5ab..9aaf491affdd 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -26,13 +26,12 @@
#include "shmem_utils.h"
#include "pxp/intel_pxp.h"
-void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
+void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
{
- gt->i915 = i915;
- gt->uncore = &i915->uncore;
-
spin_lock_init(&gt->irq_lock);
+ mutex_init(&gt->tlb_invalidate_lock);
+
INIT_LIST_HEAD(&gt->closed_vma);
spin_lock_init(&gt->closed_lock);
@@ -49,6 +48,12 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
intel_rps_init_early(&gt->rps);
}
+void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
+{
+ gt->i915 = i915;
+ gt->uncore = &i915->uncore;
+}
+
int intel_gt_probe_lmem(struct intel_gt *gt)
{
struct drm_i915_private *i915 = gt->i915;
@@ -910,3 +915,109 @@ void intel_gt_info_print(const struct intel_gt_info *info,
intel_sseu_dump(&info->sseu, p);
}
+
+struct reg_and_bit {
+ i915_reg_t reg;
+ u32 bit;
+};
+
+static struct reg_and_bit
+get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
+ const i915_reg_t *regs, const unsigned int num)
+{
+ const unsigned int class = engine->class;
+ struct reg_and_bit rb = { };
+
+ if (drm_WARN_ON_ONCE(&engine->i915->drm,
+ class >= num || !regs[class].reg))
+ return rb;
+
+ rb.reg = regs[class];
+ if (gen8 && class == VIDEO_DECODE_CLASS)
+ rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
+ else
+ rb.bit = engine->instance;
+
+ rb.bit = BIT(rb.bit);
+
+ return rb;
+}
+
+void intel_gt_invalidate_tlbs(struct intel_gt *gt)
+{
+ static const i915_reg_t gen8_regs[] = {
+ [RENDER_CLASS] = GEN8_RTCR,
+ [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */
+ [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR,
+ [COPY_ENGINE_CLASS] = GEN8_BTCR,
+ };
+ static const i915_reg_t gen12_regs[] = {
+ [RENDER_CLASS] = GEN12_GFX_TLB_INV_CR,
+ [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR,
+ [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR,
+ [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR,
+ };
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ const i915_reg_t *regs;
+ unsigned int num = 0;
+
+ if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
+ return;
+
+ if (GRAPHICS_VER(i915) == 12) {
+ regs = gen12_regs;
+ num = ARRAY_SIZE(gen12_regs);
+ } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
+ regs = gen8_regs;
+ num = ARRAY_SIZE(gen8_regs);
+ } else if (GRAPHICS_VER(i915) < 8) {
+ return;
+ }
+
+ if (drm_WARN_ONCE(&i915->drm, !num,
+ "Platform does not implement TLB invalidation!"))
+ return;
+
+ GEM_TRACE("\n");
+
+ assert_rpm_wakelock_held(&i915->runtime_pm);
+
+ mutex_lock(&gt->tlb_invalidate_lock);
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+ for_each_engine(engine, gt, id) {
+ /*
+ * HW architecture suggest typical invalidation time at 40us,
+ * with pessimistic cases up to 100us and a recommendation to
+ * cap at 1ms. We go a bit higher just in case.
+ */
+ const unsigned int timeout_us = 100;
+ const unsigned int timeout_ms = 4;
+ struct reg_and_bit rb;
+
+ rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+ if (!i915_mmio_reg_offset(rb.reg))
+ continue;
+
+ intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+ if (__intel_wait_for_register_fw(uncore,
+ rb.reg, rb.bit, 0,
+ timeout_us, timeout_ms,
+ NULL))
+ drm_err_ratelimited(&gt->i915->drm,
+ "%s TLB invalidation did not complete in %ums!\n",
+ engine->name, timeout_ms);
+ }
+
+ /*
+ * Use delayed put since a) we mostly expect a flurry of TLB
+ * invalidations so it is good to avoid paying the forcewake cost and
+ * b) it works around a bug in Icelake which cannot cope with too rapid
+ * transitions.
+ */
+ intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
+ mutex_unlock(&gt->tlb_invalidate_lock);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 74e771871a9b..a913fb6ffec3 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -35,6 +35,7 @@ static inline struct intel_gt *huc_to_gt(struct intel_huc *huc)
}
void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915);
+void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915);
void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt);
int intel_gt_probe_lmem(struct intel_gt *gt);
int intel_gt_init_mmio(struct intel_gt *gt);
@@ -90,4 +91,6 @@ void intel_gt_info_print(const struct intel_gt_info *info,
void intel_gt_watchdog_work(struct work_struct *work);
+void intel_gt_invalidate_tlbs(struct intel_gt *gt);
+
#endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
index acc49c56a9f3..9db3dcbd917f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
@@ -9,11 +9,6 @@
#include "intel_engine_pm.h"
#include "intel_gt_buffer_pool.h"
-static struct intel_gt *to_gt(struct intel_gt_buffer_pool *pool)
-{
- return container_of(pool, struct intel_gt, buffer_pool);
-}
-
static struct list_head *
bucket_for_size(struct intel_gt_buffer_pool *pool, size_t sz)
{
@@ -141,7 +136,7 @@ static struct intel_gt_buffer_pool_node *
node_create(struct intel_gt_buffer_pool *pool, size_t sz,
enum i915_map_type type)
{
- struct intel_gt *gt = to_gt(pool);
+ struct intel_gt *gt = container_of(pool, struct intel_gt, buffer_pool);
struct intel_gt_buffer_pool_node *node;
struct drm_i915_gem_object *obj;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h
index e307ceb99031..17e79b735cfe 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h
@@ -10,11 +10,7 @@
struct intel_gt;
-#define DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(__name) \
- static int __name ## _open(struct inode *inode, struct file *file) \
-{ \
- return single_open(file, __name ## _show, inode->i_private); \
-} \
+#define __GT_DEBUGFS_ATTRIBUTE_FOPS(__name) \
static const struct file_operations __name ## _fops = { \
.owner = THIS_MODULE, \
.open = __name ## _open, \
@@ -23,6 +19,21 @@ static const struct file_operations __name ## _fops = { \
.release = single_release, \
}
+#define DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(__name) \
+static int __name ## _open(struct inode *inode, struct file *file) \
+{ \
+ return single_open(file, __name ## _show, inode->i_private); \
+} \
+__GT_DEBUGFS_ATTRIBUTE_FOPS(__name)
+
+#define DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE_WITH_SIZE(__name, __size_vf) \
+static int __name ## _open(struct inode *inode, struct file *file) \
+{ \
+ return single_open_size(file, __name ## _show, inode->i_private, \
+ __size_vf(inode->i_private)); \
+} \
+__GT_DEBUGFS_ATTRIBUTE_FOPS(__name)
+
void intel_gt_debugfs_register(struct intel_gt *gt);
struct intel_gt_debugfs_file {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 524eaf678790..c0fa41e4c803 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -86,6 +86,7 @@ static int __gt_unpark(struct intel_wakeref *wf)
intel_rc6_unpark(&gt->rc6);
intel_rps_unpark(&gt->rps);
i915_pmu_gt_unparked(i915);
+ intel_guc_busyness_unpark(gt);
intel_gt_unpark_requests(gt);
runtime_begin(gt);
@@ -104,6 +105,7 @@ static int __gt_park(struct intel_wakeref *wf)
runtime_end(gt);
intel_gt_park_requests(gt);
+ intel_guc_busyness_park(gt);
i915_vma_parked(gt);
i915_pmu_gt_parked(i915);
intel_rps_park(&gt->rps);
@@ -301,7 +303,7 @@ void intel_gt_suspend_prepare(struct intel_gt *gt)
user_forcewake(gt, true);
wait_for_suspend(gt);
- intel_pxp_suspend(&gt->pxp, false);
+ intel_pxp_suspend_prepare(&gt->pxp);
}
static suspend_state_t pm_suspend_target(void)
@@ -326,6 +328,7 @@ void intel_gt_suspend_late(struct intel_gt *gt)
GEM_BUG_ON(gt->awake);
intel_uc_suspend(&gt->uc);
+ intel_pxp_suspend(&gt->pxp);
/*
* On disabling the device, we want to turn off HW access to memory
@@ -353,7 +356,7 @@ void intel_gt_suspend_late(struct intel_gt *gt)
void intel_gt_runtime_suspend(struct intel_gt *gt)
{
- intel_pxp_suspend(&gt->pxp, true);
+ intel_pxp_runtime_suspend(&gt->pxp);
intel_uc_runtime_suspend(&gt->uc);
GT_TRACE(gt, "\n");
@@ -371,7 +374,7 @@ int intel_gt_runtime_resume(struct intel_gt *gt)
if (ret)
return ret;
- intel_pxp_resume(&gt->pxp);
+ intel_pxp_runtime_resume(&gt->pxp);
return 0;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 14216cc471b1..f20687796490 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -73,6 +73,8 @@ struct intel_gt {
struct intel_uc uc;
+ struct mutex tlb_invalidate_lock;
+
struct i915_wa_list wa_list;
struct intel_gt_timelines {
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 67d14afa6623..a94be0306464 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -6,6 +6,9 @@
#include <linux/slab.h> /* fault-inject.h is not standalone! */
#include <linux/fault-inject.h>
+#include <linux/sched/mm.h>
+
+#include <drm/drm_cache.h>
#include "gem/i915_gem_lmem.h"
#include "i915_trace.h"
@@ -221,19 +224,6 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
INIT_LIST_HEAD(&vm->bound_list);
}
-void clear_pages(struct i915_vma *vma)
-{
- GEM_BUG_ON(!vma->pages);
-
- if (vma->pages != vma->obj->mm.pages) {
- sg_free_table(vma->pages);
- kfree(vma->pages);
- }
- vma->pages = NULL;
-
- memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
-}
-
void *__px_vaddr(struct drm_i915_gem_object *p)
{
enum i915_map_type type;
@@ -273,6 +263,7 @@ static void poison_scratch_page(struct drm_i915_gem_object *scratch)
val = POISON_FREE;
memset(vaddr, val, scratch->base.size);
+ drm_clflush_virt_range(vaddr, scratch->base.size);
}
int setup_scratch_page(struct i915_address_space *vm)
@@ -298,7 +289,7 @@ int setup_scratch_page(struct i915_address_space *vm)
do {
struct drm_i915_gem_object *obj;
- obj = vm->alloc_pt_dma(vm, size);
+ obj = vm->alloc_scratch_dma(vm, size);
if (IS_ERR(obj))
goto skip;
@@ -334,6 +325,18 @@ skip:
if (size == I915_GTT_PAGE_SIZE_4K)
return -ENOMEM;
+ /*
+ * If we need 64K minimum GTT pages for device local-memory,
+ * like on XEHPSDV, then we need to fail the allocation here,
+ * otherwise we can't safely support the insertion of
+ * local-memory pages for this vm, since the HW expects the
+ * correct physical alignment and size when the page-table is
+ * operating in 64K GTT mode, which includes any scratch PTEs,
+ * since userspace can still touch them.
+ */
+ if (HAS_64K_PAGES(vm->i915))
+ return -ENOMEM;
+
size = I915_GTT_PAGE_SIZE_4K;
} while (1);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index dfeaef680aac..177b42b935a1 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -135,6 +135,9 @@ typedef u64 gen8_pte_t;
#define GEN8_PPAT_ELLC_OVERRIDE (0<<2)
#define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8))
+#define GEN8_PAGE_PRESENT BIT_ULL(0)
+#define GEN8_PAGE_RW BIT_ULL(1)
+
#define GEN8_PDE_IPS_64K BIT(11)
#define GEN8_PDE_PS_2M BIT(7)
@@ -206,9 +209,6 @@ struct i915_vma_ops {
*/
void (*unbind_vma)(struct i915_address_space *vm,
struct i915_vma *vma);
-
- int (*set_pages)(struct i915_vma *vma);
- void (*clear_pages)(struct i915_vma *vma);
};
struct i915_address_space {
@@ -265,6 +265,8 @@ struct i915_address_space {
struct drm_i915_gem_object *
(*alloc_pt_dma)(struct i915_address_space *vm, int sz);
+ struct drm_i915_gem_object *
+ (*alloc_scratch_dma)(struct i915_address_space *vm, int sz);
u64 (*pte_encode)(dma_addr_t addr,
enum i915_cache_level level,
@@ -596,10 +598,6 @@ release_pd_entry(struct i915_page_directory * const pd,
const struct drm_i915_gem_object * const scratch);
void gen6_ggtt_invalidate(struct i915_ggtt *ggtt);
-int ggtt_set_pages(struct i915_vma *vma);
-int ppgtt_set_pages(struct i915_vma *vma);
-void clear_pages(struct i915_vma *vma);
-
void ppgtt_bind_vma(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma *vma,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 1530227c4b91..bda675983c38 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1168,6 +1168,11 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
cs = gen12_emit_cmd_buf_wa(ce, cs);
cs = gen12_emit_restore_scratch(ce, cs);
+ /* Wa_16013000631:dg2 */
+ if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) ||
+ IS_DG2_G11(ce->engine->i915))
+ cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);
+
return cs;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c
index afb1cce9a352..18b44af56969 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -13,7 +13,6 @@
struct insert_pte_data {
u64 offset;
- bool is_lmem;
};
#define CHUNK_SZ SZ_8M /* ~1ms at 8GiB/s preemption delay */
@@ -40,7 +39,7 @@ static void insert_pte(struct i915_address_space *vm,
struct insert_pte_data *d = data;
vm->insert_page(vm, px_dma(pt), d->offset, I915_CACHE_NONE,
- d->is_lmem ? PTE_LM : 0);
+ i915_gem_object_is_lmem(pt->base) ? PTE_LM : 0);
d->offset += PAGE_SIZE;
}
@@ -134,8 +133,7 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt)
goto err_vm;
/* Now allow the GPU to rewrite the PTE via its own ppGTT */
- d.is_lmem = i915_gem_object_is_lmem(vm->vm.scratch[0]);
- vm->vm.foreach(&vm->vm, base, base + sz, insert_pte, &d);
+ vm->vm.foreach(&vm->vm, base, d.offset - base, insert_pte, &d);
}
return &vm->vm;
@@ -281,10 +279,10 @@ static int emit_pte(struct i915_request *rq,
GEM_BUG_ON(GRAPHICS_VER(rq->engine->i915) < 8);
/* Compute the page directory offset for the target address range */
- offset += (u64)rq->engine->instance << 32;
offset >>= 12;
offset *= sizeof(u64);
offset += 2 * CHUNK_SZ;
+ offset += (u64)rq->engine->instance << 32;
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
@@ -406,7 +404,7 @@ static int emit_copy(struct i915_request *rq, int size)
int
intel_context_migrate_copy(struct intel_context *ce,
- struct dma_fence *await,
+ const struct i915_deps *deps,
struct scatterlist *src,
enum i915_cache_level src_cache_level,
bool src_is_lmem,
@@ -433,8 +431,8 @@ intel_context_migrate_copy(struct intel_context *ce,
goto out_ce;
}
- if (await) {
- err = i915_request_await_dma_fence(rq, await);
+ if (deps) {
+ err = i915_request_await_deps(rq, deps);
if (err)
goto out_rq;
@@ -444,7 +442,7 @@ intel_context_migrate_copy(struct intel_context *ce,
goto out_rq;
}
- await = NULL;
+ deps = NULL;
}
/* The PTE updates + copy must not be interrupted. */
@@ -527,7 +525,7 @@ static int emit_clear(struct i915_request *rq, int size, u32 value)
int
intel_context_migrate_clear(struct intel_context *ce,
- struct dma_fence *await,
+ const struct i915_deps *deps,
struct scatterlist *sg,
enum i915_cache_level cache_level,
bool is_lmem,
@@ -552,8 +550,8 @@ intel_context_migrate_clear(struct intel_context *ce,
goto out_ce;
}
- if (await) {
- err = i915_request_await_dma_fence(rq, await);
+ if (deps) {
+ err = i915_request_await_deps(rq, deps);
if (err)
goto out_rq;
@@ -563,7 +561,7 @@ intel_context_migrate_clear(struct intel_context *ce,
goto out_rq;
}
- await = NULL;
+ deps = NULL;
}
/* The PTE updates + clear must not be interrupted. */
@@ -601,7 +599,7 @@ out_ce:
int intel_migrate_copy(struct intel_migrate *m,
struct i915_gem_ww_ctx *ww,
- struct dma_fence *await,
+ const struct i915_deps *deps,
struct scatterlist *src,
enum i915_cache_level src_cache_level,
bool src_is_lmem,
@@ -626,7 +624,7 @@ int intel_migrate_copy(struct intel_migrate *m,
if (err)
goto out;
- err = intel_context_migrate_copy(ce, await,
+ err = intel_context_migrate_copy(ce, deps,
src, src_cache_level, src_is_lmem,
dst, dst_cache_level, dst_is_lmem,
out);
@@ -640,7 +638,7 @@ out:
int
intel_migrate_clear(struct intel_migrate *m,
struct i915_gem_ww_ctx *ww,
- struct dma_fence *await,
+ const struct i915_deps *deps,
struct scatterlist *sg,
enum i915_cache_level cache_level,
bool is_lmem,
@@ -663,7 +661,7 @@ intel_migrate_clear(struct intel_migrate *m,
if (err)
goto out;
- err = intel_context_migrate_clear(ce, await, sg, cache_level,
+ err = intel_context_migrate_clear(ce, deps, sg, cache_level,
is_lmem, value, out);
intel_context_unpin(ce);
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.h b/drivers/gpu/drm/i915/gt/intel_migrate.h
index 4e18e755a00b..ccc677ec4aa3 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.h
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.h
@@ -11,6 +11,7 @@
#include "intel_migrate_types.h"
struct dma_fence;
+struct i915_deps;
struct i915_request;
struct i915_gem_ww_ctx;
struct intel_gt;
@@ -23,7 +24,7 @@ struct intel_context *intel_migrate_create_context(struct intel_migrate *m);
int intel_migrate_copy(struct intel_migrate *m,
struct i915_gem_ww_ctx *ww,
- struct dma_fence *await,
+ const struct i915_deps *deps,
struct scatterlist *src,
enum i915_cache_level src_cache_level,
bool src_is_lmem,
@@ -33,7 +34,7 @@ int intel_migrate_copy(struct intel_migrate *m,
struct i915_request **out);
int intel_context_migrate_copy(struct intel_context *ce,
- struct dma_fence *await,
+ const struct i915_deps *deps,
struct scatterlist *src,
enum i915_cache_level src_cache_level,
bool src_is_lmem,
@@ -45,7 +46,7 @@ int intel_context_migrate_copy(struct intel_context *ce,
int
intel_migrate_clear(struct intel_migrate *m,
struct i915_gem_ww_ctx *ww,
- struct dma_fence *await,
+ const struct i915_deps *deps,
struct scatterlist *sg,
enum i915_cache_level cache_level,
bool is_lmem,
@@ -53,7 +54,7 @@ intel_migrate_clear(struct intel_migrate *m,
struct i915_request **out);
int
intel_context_migrate_clear(struct intel_context *ce,
- struct dma_fence *await,
+ const struct i915_deps *deps,
struct scatterlist *sg,
enum i915_cache_level cache_level,
bool is_lmem,
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 15f9ada28a7a..9c253ba593c6 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -424,7 +424,7 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
table->unused_entries_index = I915_MOCS_PTE;
if (IS_DG2(i915)) {
- if (IS_DG2_GT_STEP(i915, G10, STEP_A0, STEP_B0)) {
+ if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax);
table->table = dg2_mocs_table_g10_ax;
} else {
diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
index 4396bfd630d8..083b3090c69c 100644
--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@@ -289,16 +289,6 @@ void i915_vm_free_pt_stash(struct i915_address_space *vm,
}
}
-int ppgtt_set_pages(struct i915_vma *vma)
-{
- GEM_BUG_ON(vma->pages);
-
- vma->pages = vma->obj->mm.pages;
- vma->page_sizes = vma->obj->mm.page_sizes;
-
- return 0;
-}
-
void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt,
unsigned long lmem_pt_obj_flags)
{
@@ -315,6 +305,4 @@ void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt,
ppgtt->vm.vma_ops.bind_vma = ppgtt_bind_vma;
ppgtt->vm.vma_ops.unbind_vma = ppgtt_unbind_vma;
- ppgtt->vm.vma_ops.set_pages = ppgtt_set_pages;
- ppgtt->vm.vma_ops.clear_pages = clear_pages;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index 799578ae3ed8..bb0d6e363f5d 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -118,10 +118,17 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
GEN6_RC_CTL_RC6_ENABLE |
GEN6_RC_CTL_EI_MODE(1);
- pg_enable =
- GEN9_RENDER_PG_ENABLE |
- GEN9_MEDIA_PG_ENABLE |
- GEN11_MEDIA_SAMPLER_PG_ENABLE;
+ /* Wa_16011777198 - Render powergating must remain disabled */
+ if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
+ IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))
+ pg_enable =
+ GEN9_MEDIA_PG_ENABLE |
+ GEN11_MEDIA_SAMPLER_PG_ENABLE;
+ else
+ pg_enable =
+ GEN9_RENDER_PG_ENABLE |
+ GEN9_MEDIA_PG_ENABLE |
+ GEN11_MEDIA_SAMPLER_PG_ENABLE;
if (GRAPHICS_VER(gt->i915) >= 12) {
for (i = 0; i < I915_MAX_VCS; i++)
diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
index afb35d2e5c73..fde2dcb59809 100644
--- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
+++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
@@ -66,12 +66,16 @@ static void release_fake_lmem_bar(struct intel_memory_region *mem)
DMA_ATTR_FORCE_CONTIGUOUS);
}
-static void
+static int
region_lmem_release(struct intel_memory_region *mem)
{
- intel_region_ttm_fini(mem);
+ int ret;
+
+ ret = intel_region_ttm_fini(mem);
io_mapping_fini(&mem->iomap);
release_fake_lmem_bar(mem);
+
+ return ret;
}
static int
@@ -158,7 +162,7 @@ intel_gt_setup_fake_lmem(struct intel_gt *gt)
static bool get_legacy_lowmem_region(struct intel_uncore *uncore,
u64 *start, u32 *size)
{
- if (!IS_DG1_GT_STEP(uncore->i915, STEP_A0, STEP_C0))
+ if (!IS_DG1_GRAPHICS_STEP(uncore->i915, STEP_A0, STEP_C0))
return false;
*start = 0;
@@ -193,6 +197,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
struct intel_uncore *uncore = gt->uncore;
struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
struct intel_memory_region *mem;
+ resource_size_t min_page_size;
resource_size_t io_start;
resource_size_t lmem_size;
int err;
@@ -207,10 +212,12 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
if (GEM_WARN_ON(lmem_size > pci_resource_len(pdev, 2)))
return ERR_PTR(-ENODEV);
+ min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K :
+ I915_GTT_PAGE_SIZE_4K;
mem = intel_memory_region_create(i915,
0,
lmem_size,
- I915_GTT_PAGE_SIZE_4K,
+ min_page_size,
io_start,
INTEL_MEMORY_LOCAL,
0,
@@ -231,7 +238,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
return mem;
err_region_put:
- intel_memory_region_put(mem);
+ intel_memory_region_destroy(mem);
return ERR_PTR(err);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 5000608189da..6f2821cca409 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -299,13 +299,6 @@ static int gen6_reset_engines(struct intel_gt *gt,
intel_engine_mask_t engine_mask,
unsigned int retry)
{
- static const u32 hw_engine_mask[] = {
- [RCS0] = GEN6_GRDOM_RENDER,
- [BCS0] = GEN6_GRDOM_BLT,
- [VCS0] = GEN6_GRDOM_MEDIA,
- [VCS1] = GEN8_GRDOM_MEDIA2,
- [VECS0] = GEN6_GRDOM_VECS,
- };
struct intel_engine_cs *engine;
u32 hw_mask;
@@ -316,8 +309,7 @@ static int gen6_reset_engines(struct intel_gt *gt,
hw_mask = 0;
for_each_engine_masked(engine, gt, engine_mask, tmp) {
- GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask));
- hw_mask |= hw_engine_mask[engine->id];
+ hw_mask |= engine->reset_domain;
}
}
@@ -494,22 +486,6 @@ static int gen11_reset_engines(struct intel_gt *gt,
intel_engine_mask_t engine_mask,
unsigned int retry)
{
- static const u32 hw_engine_mask[] = {
- [RCS0] = GEN11_GRDOM_RENDER,
- [BCS0] = GEN11_GRDOM_BLT,
- [VCS0] = GEN11_GRDOM_MEDIA,
- [VCS1] = GEN11_GRDOM_MEDIA2,
- [VCS2] = GEN11_GRDOM_MEDIA3,
- [VCS3] = GEN11_GRDOM_MEDIA4,
- [VCS4] = GEN11_GRDOM_MEDIA5,
- [VCS5] = GEN11_GRDOM_MEDIA6,
- [VCS6] = GEN11_GRDOM_MEDIA7,
- [VCS7] = GEN11_GRDOM_MEDIA8,
- [VECS0] = GEN11_GRDOM_VECS,
- [VECS1] = GEN11_GRDOM_VECS2,
- [VECS2] = GEN11_GRDOM_VECS3,
- [VECS3] = GEN11_GRDOM_VECS4,
- };
struct intel_engine_cs *engine;
intel_engine_mask_t tmp;
u32 reset_mask, unlock_mask = 0;
@@ -520,8 +496,7 @@ static int gen11_reset_engines(struct intel_gt *gt,
} else {
reset_mask = 0;
for_each_engine_masked(engine, gt, engine_mask, tmp) {
- GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask));
- reset_mask |= hw_engine_mask[engine->id];
+ reset_mask |= engine->reset_domain;
ret = gen11_lock_sfc(engine, &reset_mask, &unlock_mask);
if (ret)
goto sfc_unlock;
@@ -1369,20 +1344,27 @@ void intel_gt_handle_error(struct intel_gt *gt,
/* Make sure i915_reset_trylock() sees the I915_RESET_BACKOFF */
synchronize_rcu_expedited();
- /* Prevent any other reset-engine attempt. */
- for_each_engine(engine, gt, tmp) {
- while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
- &gt->reset.flags))
- wait_on_bit(&gt->reset.flags,
- I915_RESET_ENGINE + engine->id,
- TASK_UNINTERRUPTIBLE);
+ /*
+ * Prevent any other reset-engine attempt. We don't do this for GuC
+ * submission the GuC owns the per-engine reset, not the i915.
+ */
+ if (!intel_uc_uses_guc_submission(&gt->uc)) {
+ for_each_engine(engine, gt, tmp) {
+ while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
+ &gt->reset.flags))
+ wait_on_bit(&gt->reset.flags,
+ I915_RESET_ENGINE + engine->id,
+ TASK_UNINTERRUPTIBLE);
+ }
}
intel_gt_reset_global(gt, engine_mask, msg);
- for_each_engine(engine, gt, tmp)
- clear_bit_unlock(I915_RESET_ENGINE + engine->id,
- &gt->reset.flags);
+ if (!intel_uc_uses_guc_submission(&gt->uc)) {
+ for_each_engine(engine, gt, tmp)
+ clear_bit_unlock(I915_RESET_ENGINE + engine->id,
+ &gt->reset.flags);
+ }
clear_bit_unlock(I915_RESET_BACKOFF, &gt->reset.flags);
smp_mb__after_atomic();
wake_up_all(&gt->reset.queue);
@@ -1443,6 +1425,7 @@ void intel_gt_set_wedged_on_init(struct intel_gt *gt)
BUILD_BUG_ON(I915_RESET_ENGINE + I915_NUM_ENGINES >
I915_WEDGED_ON_INIT);
intel_gt_set_wedged(gt);
+ i915_disable_error_state(gt->i915, -ENODEV);
set_bit(I915_WEDGED_ON_INIT, &gt->reset.flags);
/* Wedged on init is non-recoverable */
@@ -1452,6 +1435,7 @@ void intel_gt_set_wedged_on_init(struct intel_gt *gt)
void intel_gt_set_wedged_on_fini(struct intel_gt *gt)
{
intel_gt_set_wedged(gt);
+ i915_disable_error_state(gt->i915, -ENODEV);
set_bit(I915_WEDGED_ON_FINI, &gt->reset.flags);
intel_gt_retire_requests(gt); /* cleanup any wedged requests */
}
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 0f1aa1c275b2..a2b7be1d4f5c 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -1358,7 +1358,7 @@ retry:
err = i915_gem_object_lock(timeline->hwsp_ggtt->obj, &ww);
if (!err && gen7_wa_vma)
err = i915_gem_object_lock(gen7_wa_vma->obj, &ww);
- if (!err && engine->legacy.ring->vma->obj)
+ if (!err)
err = i915_gem_object_lock(engine->legacy.ring->vma->obj, &ww);
if (!err)
err = intel_timeline_pin(timeline, &ww);
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index d4f4eb2fc2b5..8a13bc005b45 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -937,8 +937,70 @@ void intel_rps_park(struct intel_rps *rps)
GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq);
}
+u32 intel_rps_get_boost_frequency(struct intel_rps *rps)
+{
+ struct intel_guc_slpc *slpc;
+
+ if (rps_uses_slpc(rps)) {
+ slpc = rps_to_slpc(rps);
+
+ return slpc->boost_freq;
+ } else {
+ return intel_gpu_freq(rps, rps->boost_freq);
+ }
+}
+
+static int rps_set_boost_freq(struct intel_rps *rps, u32 val)
+{
+ bool boost = false;
+
+ /* Validate against (static) hardware limits */
+ val = intel_freq_opcode(rps, val);
+ if (val < rps->min_freq || val > rps->max_freq)
+ return -EINVAL;
+
+ mutex_lock(&rps->lock);
+ if (val != rps->boost_freq) {
+ rps->boost_freq = val;
+ boost = atomic_read(&rps->num_waiters);
+ }
+ mutex_unlock(&rps->lock);
+ if (boost)
+ schedule_work(&rps->work);
+
+ return 0;
+}
+
+int intel_rps_set_boost_frequency(struct intel_rps *rps, u32 freq)
+{
+ struct intel_guc_slpc *slpc;
+
+ if (rps_uses_slpc(rps)) {
+ slpc = rps_to_slpc(rps);
+
+ return intel_guc_slpc_set_boost_freq(slpc, freq);
+ } else {
+ return rps_set_boost_freq(rps, freq);
+ }
+}
+
+void intel_rps_dec_waiters(struct intel_rps *rps)
+{
+ struct intel_guc_slpc *slpc;
+
+ if (rps_uses_slpc(rps)) {
+ slpc = rps_to_slpc(rps);
+
+ intel_guc_slpc_dec_waiters(slpc);
+ } else {
+ atomic_dec(&rps->num_waiters);
+ }
+}
+
void intel_rps_boost(struct i915_request *rq)
{
+ struct intel_guc_slpc *slpc;
+
if (i915_request_signaled(rq) || i915_request_has_waitboost(rq))
return;
@@ -946,6 +1008,16 @@ void intel_rps_boost(struct i915_request *rq)
if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) {
struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps;
+ if (rps_uses_slpc(rps)) {
+ slpc = rps_to_slpc(rps);
+
+ /* Return if old value is non zero */
+ if (!atomic_fetch_inc(&slpc->num_waiters))
+ schedule_work(&slpc->boost_work);
+
+ return;
+ }
+
if (atomic_fetch_inc(&rps->num_waiters))
return;
@@ -2153,6 +2225,65 @@ u32 intel_rps_read_state_cap(struct intel_rps *rps)
return intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
}
+static void intel_rps_set_manual(struct intel_rps *rps, bool enable)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ u32 state = enable ? GEN9_RPSWCTL_ENABLE : GEN9_RPSWCTL_DISABLE;
+
+ /* Allow punit to process software requests */
+ intel_uncore_write(uncore, GEN6_RP_CONTROL, state);
+}
+
+void intel_rps_raise_unslice(struct intel_rps *rps)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ u32 rp0_unslice_req;
+
+ mutex_lock(&rps->lock);
+
+ if (rps_uses_slpc(rps)) {
+ /* RP limits have not been initialized yet for SLPC path */
+ rp0_unslice_req = ((intel_rps_read_state_cap(rps) >> 0)
+ & 0xff) * GEN9_FREQ_SCALER;
+
+ intel_rps_set_manual(rps, true);
+ intel_uncore_write(uncore, GEN6_RPNSWREQ,
+ ((rp0_unslice_req <<
+ GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) |
+ GEN9_IGNORE_SLICE_RATIO));
+ intel_rps_set_manual(rps, false);
+ } else {
+ intel_rps_set(rps, rps->rp0_freq);
+ }
+
+ mutex_unlock(&rps->lock);
+}
+
+void intel_rps_lower_unslice(struct intel_rps *rps)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ u32 rpn_unslice_req;
+
+ mutex_lock(&rps->lock);
+
+ if (rps_uses_slpc(rps)) {
+ /* RP limits have not been initialized yet for SLPC path */
+ rpn_unslice_req = ((intel_rps_read_state_cap(rps) >> 16)
+ & 0xff) * GEN9_FREQ_SCALER;
+
+ intel_rps_set_manual(rps, true);
+ intel_uncore_write(uncore, GEN6_RPNSWREQ,
+ ((rpn_unslice_req <<
+ GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) |
+ GEN9_IGNORE_SLICE_RATIO));
+ intel_rps_set_manual(rps, false);
+ } else {
+ intel_rps_set(rps, rps->min_freq);
+ }
+
+ mutex_unlock(&rps->lock);
+}
+
/* External interface for intel_ips.ko */
static struct drm_i915_private __rcu *ips_mchdev;
@@ -2229,7 +2360,7 @@ unsigned long i915_read_mch_val(void)
return 0;
with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
- struct intel_ips *ips = &i915->gt.rps.ips;
+ struct intel_ips *ips = &to_gt(i915)->rps.ips;
spin_lock_irq(&mchdev_lock);
chipset_val = __ips_chipset_val(ips);
@@ -2256,7 +2387,7 @@ bool i915_gpu_raise(void)
if (!i915)
return false;
- rps = &i915->gt.rps;
+ rps = &to_gt(i915)->rps;
spin_lock_irq(&mchdev_lock);
if (rps->max_freq_softlimit < rps->max_freq)
@@ -2283,7 +2414,7 @@ bool i915_gpu_lower(void)
if (!i915)
return false;
- rps = &i915->gt.rps;
+ rps = &to_gt(i915)->rps;
spin_lock_irq(&mchdev_lock);
if (rps->max_freq_softlimit > rps->min_freq)
@@ -2309,7 +2440,7 @@ bool i915_gpu_busy(void)
if (!i915)
return false;
- ret = i915->gt.awake;
+ ret = to_gt(i915)->awake;
drm_dev_put(&i915->drm);
return ret;
@@ -2332,11 +2463,11 @@ bool i915_gpu_turbo_disable(void)
if (!i915)
return false;
- rps = &i915->gt.rps;
+ rps = &to_gt(i915)->rps;
spin_lock_irq(&mchdev_lock);
rps->max_freq_softlimit = rps->min_freq;
- ret = !__gen5_rps_set(&i915->gt.rps, rps->min_freq);
+ ret = !__gen5_rps_set(&to_gt(i915)->rps, rps->min_freq);
spin_unlock_irq(&mchdev_lock);
drm_dev_put(&i915->drm);
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h
index 11960d64ca82..c6d76a3d1331 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps.h
@@ -23,6 +23,9 @@ void intel_rps_disable(struct intel_rps *rps);
void intel_rps_park(struct intel_rps *rps);
void intel_rps_unpark(struct intel_rps *rps);
void intel_rps_boost(struct i915_request *rq);
+void intel_rps_dec_waiters(struct intel_rps *rps);
+u32 intel_rps_get_boost_frequency(struct intel_rps *rps);
+int intel_rps_set_boost_frequency(struct intel_rps *rps, u32 freq);
int intel_rps_set(struct intel_rps *rps, u8 val);
void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive);
@@ -42,6 +45,8 @@ u32 intel_rps_get_rpn_frequency(struct intel_rps *rps);
u32 intel_rps_read_punit_req(struct intel_rps *rps);
u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps);
u32 intel_rps_read_state_cap(struct intel_rps *rps);
+void intel_rps_raise_unslice(struct intel_rps *rps);
+void intel_rps_lower_unslice(struct intel_rps *rps);
void gen5_rps_irq_handler(struct intel_rps *rps);
void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index a7a0a3acbacb..6a4372c3a3c5 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -483,7 +483,7 @@ static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
gen9_ctx_workarounds_init(engine, wal);
/* WaToEnableHwFixForPushConstHWBug:kbl */
- if (IS_KBL_GT_STEP(i915, STEP_C0, STEP_FOREVER))
+ if (IS_KBL_GRAPHICS_STEP(i915, STEP_C0, STEP_FOREVER))
wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
@@ -561,6 +561,22 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
/*
* These settings aren't actually workarounds, but general tuning settings that
+ * need to be programmed on dg2 platform.
+ */
+static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine,
+ struct i915_wa_list *wal)
+{
+ wa_write_clr_set(wal, GEN11_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
+ REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
+ wa_add(wal,
+ FF_MODE2,
+ FF_MODE2_TDS_TIMER_MASK,
+ FF_MODE2_TDS_TIMER_128,
+ 0, false);
+}
+
+/*
+ * These settings aren't actually workarounds, but general tuning settings that
* need to be programmed on several platforms.
*/
static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine,
@@ -622,13 +638,6 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
FF_MODE2_GS_TIMER_MASK,
FF_MODE2_GS_TIMER_224,
0, false);
-
- /*
- * Wa_14012131227:dg1
- * Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p
- */
- wa_masked_en(wal, GEN7_COMMON_SLICE_CHICKEN1,
- GEN9_RHWO_OPTIMIZATION_DISABLE);
}
static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -645,6 +654,42 @@ static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE);
}
+static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
+ struct i915_wa_list *wal)
+{
+ dg2_ctx_gt_tuning_init(engine, wal);
+
+ /* Wa_16011186671:dg2_g11 */
+ if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
+ wa_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH);
+ wa_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE);
+ }
+
+ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
+ /* Wa_14010469329:dg2_g10 */
+ wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
+ XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE);
+
+ /*
+ * Wa_22010465075:dg2_g10
+ * Wa_22010613112:dg2_g10
+ * Wa_14010698770:dg2_g10
+ */
+ wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
+ GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
+ }
+
+ /* Wa_16013271637:dg2 */
+ wa_masked_en(wal, SLICE_COMMON_ECO_CHICKEN1,
+ MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
+
+ /* Wa_22012532006:dg2 */
+ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
+ IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
+ wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
+ DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
+}
+
static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
@@ -731,7 +776,11 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
if (engine->class != RENDER_CLASS)
goto done;
- if (IS_DG1(i915))
+ if (IS_DG2(i915))
+ dg2_ctx_workarounds_init(engine, wal);
+ else if (IS_XEHPSDV(i915))
+ ; /* noop; none at this time */
+ else if (IS_DG1(i915))
dg1_ctx_workarounds_init(engine, wal);
else if (GRAPHICS_VER(i915) == 12)
gen12_ctx_workarounds_init(engine, wal);
@@ -879,10 +928,51 @@ hsw_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
}
static void
+gen9_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ const struct sseu_dev_info *sseu = &to_gt(i915)->info.sseu;
+ unsigned int slice, subslice;
+ u32 mcr, mcr_mask;
+
+ GEM_BUG_ON(GRAPHICS_VER(i915) != 9);
+
+ /*
+ * WaProgramMgsrForCorrectSliceSpecificMmioReads:gen9,glk,kbl,cml
+ * Before any MMIO read into slice/subslice specific registers, MCR
+ * packet control register needs to be programmed to point to any
+ * enabled s/ss pair. Otherwise, incorrect values will be returned.
+ * This means each subsequent MMIO read will be forwarded to an
+ * specific s/ss combination, but this is OK since these registers
+ * are consistent across s/ss in almost all cases. In the rare
+ * occasions, such as INSTDONE, where this value is dependent
+ * on s/ss combo, the read should be done with read_subslice_reg.
+ */
+ slice = ffs(sseu->slice_mask) - 1;
+ GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask));
+ subslice = ffs(intel_sseu_get_subslices(sseu, slice));
+ GEM_BUG_ON(!subslice);
+ subslice--;
+
+ /*
+ * We use GEN8_MCR..() macros to calculate the |mcr| value for
+ * Gen9 to address WaProgramMgsrForCorrectSliceSpecificMmioReads
+ */
+ mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
+ mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
+
+ drm_dbg(&i915->drm, "MCR slice:%d/subslice:%d = %x\n", slice, subslice, mcr);
+
+ wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
+}
+
+static void
gen9_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
struct drm_i915_private *i915 = gt->i915;
+ /* WaProgramMgsrForCorrectSliceSpecificMmioReads:glk,kbl,cml,gen9 */
+ gen9_wa_init_mcr(i915, wal);
+
/* WaDisableKillLogic:bxt,skl,kbl */
if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915))
wa_write_or(wal,
@@ -917,7 +1007,7 @@ skl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
/* WaInPlaceDecompressionHang:skl */
- if (IS_SKL_GT_STEP(gt->i915, STEP_A0, STEP_H0))
+ if (IS_SKL_GRAPHICS_STEP(gt->i915, STEP_A0, STEP_H0))
wa_write_or(wal,
GEN9_GAMT_ECO_REG_RW_IA,
GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
@@ -929,7 +1019,7 @@ kbl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
gen9_gt_workarounds_init(gt, wal);
/* WaDisableDynamicCreditSharing:kbl */
- if (IS_KBL_GT_STEP(gt->i915, 0, STEP_C0))
+ if (IS_KBL_GRAPHICS_STEP(gt->i915, 0, STEP_C0))
wa_write_or(wal,
GAMT_CHKN_BIT_REG,
GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
@@ -1135,9 +1225,18 @@ icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
GAMT_CHKN_BIT_REG,
GAMT_CHKN_DISABLE_L3_COH_PIPE);
+ /* Wa_1407352427:icl,ehl */
+ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
+ PSDUNIT_CLKGATE_DIS);
+
+ /* Wa_1406680159:icl,ehl */
+ wa_write_or(wal,
+ SUBSLICE_UNIT_LEVEL_CLKGATE,
+ GWUNIT_CLKGATE_DIS);
+
/* Wa_1607087056:icl,ehl,jsl */
if (IS_ICELAKE(i915) ||
- IS_JSL_EHL_GT_STEP(i915, STEP_A0, STEP_B0))
+ IS_JSL_EHL_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
wa_write_or(wal,
SLICE_UNIT_LEVEL_CLKGATE,
L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
@@ -1191,19 +1290,19 @@ tgl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
gen12_gt_workarounds_init(gt, wal);
/* Wa_1409420604:tgl */
- if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0))
+ if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
wa_write_or(wal,
SUBSLICE_UNIT_LEVEL_CLKGATE2,
CPSSUNIT_CLKGATE_DIS);
/* Wa_1607087056:tgl also know as BUG:1409180338 */
- if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0))
+ if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
wa_write_or(wal,
SLICE_UNIT_LEVEL_CLKGATE,
L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
/* Wa_1408615072:tgl[a0] */
- if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0))
+ if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
VSUNIT_CLKGATE_DIS_TGL);
}
@@ -1216,7 +1315,7 @@ dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
gen12_gt_workarounds_init(gt, wal);
/* Wa_1607087056:dg1 */
- if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0))
+ if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
wa_write_or(wal,
SLICE_UNIT_LEVEL_CLKGATE,
L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
@@ -1237,7 +1336,179 @@ dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
static void
xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
+ struct drm_i915_private *i915 = gt->i915;
+
+ xehp_init_mcr(gt, wal);
+
+ /* Wa_1409757795:xehpsdv */
+ wa_write_or(wal, SCCGCTL94DC, CG3DDISURB);
+
+ /* Wa_18011725039:xehpsdv */
+ if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) {
+ wa_masked_dis(wal, MLTICTXCTL, TDONRENDER);
+ wa_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
+ }
+
+ /* Wa_16011155590:xehpsdv */
+ if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
+ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
+ TSGUNIT_CLKGATE_DIS);
+
+ /* Wa_14011780169:xehpsdv */
+ if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_B0, STEP_FOREVER)) {
+ wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
+ GAMTLBVDBOX7_CLKGATE_DIS |
+ GAMTLBVDBOX6_CLKGATE_DIS |
+ GAMTLBVDBOX5_CLKGATE_DIS |
+ GAMTLBVDBOX4_CLKGATE_DIS |
+ GAMTLBVDBOX3_CLKGATE_DIS |
+ GAMTLBVDBOX2_CLKGATE_DIS |
+ GAMTLBVDBOX1_CLKGATE_DIS |
+ GAMTLBVDBOX0_CLKGATE_DIS |
+ GAMTLBKCR_CLKGATE_DIS |
+ GAMTLBGUC_CLKGATE_DIS |
+ GAMTLBBLT_CLKGATE_DIS);
+ wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS |
+ GAMTLBGFXA1_CLKGATE_DIS |
+ GAMTLBCOMPA0_CLKGATE_DIS |
+ GAMTLBCOMPA1_CLKGATE_DIS |
+ GAMTLBCOMPB0_CLKGATE_DIS |
+ GAMTLBCOMPB1_CLKGATE_DIS |
+ GAMTLBCOMPC0_CLKGATE_DIS |
+ GAMTLBCOMPC1_CLKGATE_DIS |
+ GAMTLBCOMPD0_CLKGATE_DIS |
+ GAMTLBCOMPD1_CLKGATE_DIS |
+ GAMTLBMERT_CLKGATE_DIS |
+ GAMTLBVEBOX3_CLKGATE_DIS |
+ GAMTLBVEBOX2_CLKGATE_DIS |
+ GAMTLBVEBOX1_CLKGATE_DIS |
+ GAMTLBVEBOX0_CLKGATE_DIS);
+ }
+
+ /* Wa_14012362059:xehpsdv */
+ wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
+
+ /* Wa_16012725990:xehpsdv */
+ if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_FOREVER))
+ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, VFUNIT_CLKGATE_DIS);
+
+ /* Wa_14011060649:xehpsdv */
+ wa_14011060649(gt, wal);
+
+ /* Wa_14014368820:xehpsdv */
+ wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
+ GLOBAL_INVALIDATION_MODE);
+}
+
+static void
+dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
+{
+ struct intel_engine_cs *engine;
+ int id;
+
xehp_init_mcr(gt, wal);
+
+ /* Wa_14011060649:dg2 */
+ wa_14011060649(gt, wal);
+
+ /*
+ * Although there are per-engine instances of these registers,
+ * they technically exist outside the engine itself and are not
+ * impacted by engine resets. Furthermore, they're part of the
+ * GuC blacklist so trying to treat them as engine workarounds
+ * will result in GuC initialization failure and a wedged GPU.
+ */
+ for_each_engine(engine, gt, id) {
+ if (engine->class != VIDEO_DECODE_CLASS)
+ continue;
+
+ /* Wa_16010515920:dg2_g10 */
+ if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0))
+ wa_write_or(wal, VDBOX_CGCTL3F18(engine->mmio_base),
+ ALNUNIT_CLKGATE_DIS);
+ }
+
+ if (IS_DG2_G10(gt->i915)) {
+ /* Wa_22010523718:dg2 */
+ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
+ CG3DDISCFEG_CLKGATE_DIS);
+
+ /* Wa_14011006942:dg2 */
+ wa_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE,
+ DSS_ROUTER_CLKGATE_DIS);
+ }
+
+ if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) {
+ /* Wa_14010680813:dg2_g10 */
+ wa_write_or(wal, GEN12_GAMSTLB_CTRL, CONTROL_BLOCK_CLKGATE_DIS |
+ EGRESS_BLOCK_CLKGATE_DIS | TAG_BLOCK_CLKGATE_DIS);
+
+ /* Wa_14010948348:dg2_g10 */
+ wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS);
+
+ /* Wa_14011037102:dg2_g10 */
+ wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS);
+
+ /* Wa_14011371254:dg2_g10 */
+ wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS);
+
+ /* Wa_14011431319:dg2_g10 */
+ wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
+ GAMTLBVDBOX7_CLKGATE_DIS |
+ GAMTLBVDBOX6_CLKGATE_DIS |
+ GAMTLBVDBOX5_CLKGATE_DIS |
+ GAMTLBVDBOX4_CLKGATE_DIS |
+ GAMTLBVDBOX3_CLKGATE_DIS |
+ GAMTLBVDBOX2_CLKGATE_DIS |
+ GAMTLBVDBOX1_CLKGATE_DIS |
+ GAMTLBVDBOX0_CLKGATE_DIS |
+ GAMTLBKCR_CLKGATE_DIS |
+ GAMTLBGUC_CLKGATE_DIS |
+ GAMTLBBLT_CLKGATE_DIS);
+ wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS |
+ GAMTLBGFXA1_CLKGATE_DIS |
+ GAMTLBCOMPA0_CLKGATE_DIS |
+ GAMTLBCOMPA1_CLKGATE_DIS |
+ GAMTLBCOMPB0_CLKGATE_DIS |
+ GAMTLBCOMPB1_CLKGATE_DIS |
+ GAMTLBCOMPC0_CLKGATE_DIS |
+ GAMTLBCOMPC1_CLKGATE_DIS |
+ GAMTLBCOMPD0_CLKGATE_DIS |
+ GAMTLBCOMPD1_CLKGATE_DIS |
+ GAMTLBMERT_CLKGATE_DIS |
+ GAMTLBVEBOX3_CLKGATE_DIS |
+ GAMTLBVEBOX2_CLKGATE_DIS |
+ GAMTLBVEBOX1_CLKGATE_DIS |
+ GAMTLBVEBOX0_CLKGATE_DIS);
+
+ /* Wa_14010569222:dg2_g10 */
+ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
+ GAMEDIA_CLKGATE_DIS);
+
+ /* Wa_14011028019:dg2_g10 */
+ wa_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
+ }
+
+ if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) ||
+ IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) {
+ /* Wa_14012362059:dg2 */
+ wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
+ }
+
+ /* Wa_1509235366:dg2 */
+ wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
+ GLOBAL_INVALIDATION_MODE);
+
+ /* Wa_14014830051:dg2 */
+ wa_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
+
+ /*
+ * The following are not actually "workarounds" but rather
+ * recommended tuning settings documented in the bspec's
+ * performance guide section.
+ */
+ wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
+ wa_write_or(wal, GEN12_SQCM, EN_32B_ACCESS);
}
static void
@@ -1245,7 +1516,9 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
{
struct drm_i915_private *i915 = gt->i915;
- if (IS_XEHPSDV(i915))
+ if (IS_DG2(i915))
+ dg2_gt_workarounds_init(gt, wal);
+ else if (IS_XEHPSDV(i915))
xehpsdv_gt_workarounds_init(gt, wal);
else if (IS_DG1(i915))
dg1_gt_workarounds_init(gt, wal);
@@ -1519,7 +1792,7 @@ static void cfl_whitelist_build(struct intel_engine_cs *engine)
RING_FORCE_TO_NONPRIV_RANGE_4);
}
-static void cml_whitelist_build(struct intel_engine_cs *engine)
+static void allow_read_ctx_timestamp(struct intel_engine_cs *engine)
{
struct i915_wa_list *w = &engine->whitelist;
@@ -1527,6 +1800,11 @@ static void cml_whitelist_build(struct intel_engine_cs *engine)
whitelist_reg_ext(w,
RING_CTX_TIMESTAMP(engine->mmio_base),
RING_FORCE_TO_NONPRIV_ACCESS_RD);
+}
+
+static void cml_whitelist_build(struct intel_engine_cs *engine)
+{
+ allow_read_ctx_timestamp(engine);
cfl_whitelist_build(engine);
}
@@ -1535,6 +1813,8 @@ static void icl_whitelist_build(struct intel_engine_cs *engine)
{
struct i915_wa_list *w = &engine->whitelist;
+ allow_read_ctx_timestamp(engine);
+
switch (engine->class) {
case RENDER_CLASS:
/* WaAllowUMDToModifyHalfSliceChicken7:icl */
@@ -1570,15 +1850,9 @@ static void icl_whitelist_build(struct intel_engine_cs *engine)
/* hucStatus2RegOffset */
whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
RING_FORCE_TO_NONPRIV_ACCESS_RD);
- whitelist_reg_ext(w,
- RING_CTX_TIMESTAMP(engine->mmio_base),
- RING_FORCE_TO_NONPRIV_ACCESS_RD);
break;
default:
- whitelist_reg_ext(w,
- RING_CTX_TIMESTAMP(engine->mmio_base),
- RING_FORCE_TO_NONPRIV_ACCESS_RD);
break;
}
}
@@ -1587,6 +1861,8 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine)
{
struct i915_wa_list *w = &engine->whitelist;
+ allow_read_ctx_timestamp(engine);
+
switch (engine->class) {
case RENDER_CLASS:
/*
@@ -1603,16 +1879,17 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine)
RING_FORCE_TO_NONPRIV_ACCESS_RD |
RING_FORCE_TO_NONPRIV_RANGE_4);
- /* Wa_1808121037:tgl */
+ /*
+ * Wa_1808121037:tgl
+ * Wa_14012131227:dg1
+ * Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p
+ */
whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
/* Wa_1806527549:tgl */
whitelist_reg(w, HIZ_CHICKEN);
break;
default:
- whitelist_reg_ext(w,
- RING_CTX_TIMESTAMP(engine->mmio_base),
- RING_FORCE_TO_NONPRIV_ACCESS_RD);
break;
}
}
@@ -1624,13 +1901,46 @@ static void dg1_whitelist_build(struct intel_engine_cs *engine)
tgl_whitelist_build(engine);
/* GEN:BUG:1409280441:dg1 */
- if (IS_DG1_GT_STEP(engine->i915, STEP_A0, STEP_B0) &&
+ if (IS_DG1_GRAPHICS_STEP(engine->i915, STEP_A0, STEP_B0) &&
(engine->class == RENDER_CLASS ||
engine->class == COPY_ENGINE_CLASS))
whitelist_reg_ext(w, RING_ID(engine->mmio_base),
RING_FORCE_TO_NONPRIV_ACCESS_RD);
}
+static void xehpsdv_whitelist_build(struct intel_engine_cs *engine)
+{
+ allow_read_ctx_timestamp(engine);
+}
+
+static void dg2_whitelist_build(struct intel_engine_cs *engine)
+{
+ struct i915_wa_list *w = &engine->whitelist;
+
+ allow_read_ctx_timestamp(engine);
+
+ switch (engine->class) {
+ case RENDER_CLASS:
+ /*
+ * Wa_1507100340:dg2_g10
+ *
+ * This covers 4 registers which are next to one another :
+ * - PS_INVOCATION_COUNT
+ * - PS_INVOCATION_COUNT_UDW
+ * - PS_DEPTH_COUNT
+ * - PS_DEPTH_COUNT_UDW
+ */
+ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0))
+ whitelist_reg_ext(w, PS_INVOCATION_COUNT,
+ RING_FORCE_TO_NONPRIV_ACCESS_RD |
+ RING_FORCE_TO_NONPRIV_RANGE_4);
+
+ break;
+ default:
+ break;
+ }
+}
+
void intel_engine_init_whitelist(struct intel_engine_cs *engine)
{
struct drm_i915_private *i915 = engine->i915;
@@ -1638,7 +1948,11 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine)
wa_init_start(w, "whitelist", engine->name);
- if (IS_DG1(i915))
+ if (IS_DG2(i915))
+ dg2_whitelist_build(engine);
+ else if (IS_XEHPSDV(i915))
+ xehpsdv_whitelist_build(engine);
+ else if (IS_DG1(i915))
dg1_whitelist_build(engine);
else if (GRAPHICS_VER(i915) == 12)
tgl_whitelist_build(engine);
@@ -1712,13 +2026,119 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
CMD_CCTL_MOCS_OVERRIDE(mocs, mocs));
}
}
+
+static bool needs_wa_1308578152(struct intel_engine_cs *engine)
+{
+ u64 dss_mask = intel_sseu_get_subslices(&engine->gt->info.sseu, 0);
+
+ return (dss_mask & GENMASK(GEN_DSS_PER_GSLICE - 1, 0)) == 0;
+}
+
static void
rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
struct drm_i915_private *i915 = engine->i915;
- if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) ||
- IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) {
+ if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
+ /* Wa_14013392000:dg2_g11 */
+ wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE);
+
+ /* Wa_16011620976:dg2_g11 */
+ wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
+ }
+
+ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) ||
+ IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
+ /* Wa_14012419201:dg2 */
+ wa_masked_en(wal, GEN9_ROW_CHICKEN4,
+ GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX);
+ }
+
+ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) ||
+ IS_DG2_G11(engine->i915)) {
+ /*
+ * Wa_22012826095:dg2
+ * Wa_22013059131:dg2
+ */
+ wa_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW,
+ MAXREQS_PER_BANK,
+ REG_FIELD_PREP(MAXREQS_PER_BANK, 2));
+
+ /* Wa_22013059131:dg2 */
+ wa_write_or(wal, LSC_CHICKEN_BIT_0,
+ FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
+ }
+
+ /* Wa_1308578152:dg2_g10 when first gslice is fused off */
+ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) &&
+ needs_wa_1308578152(engine)) {
+ wa_masked_dis(wal, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON,
+ GEN12_REPLAY_MODE_GRANULARITY);
+ }
+
+ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) ||
+ IS_DG2_G11(engine->i915)) {
+ /* Wa_22013037850:dg2 */
+ wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
+ DISABLE_128B_EVICTION_COMMAND_UDW);
+
+ /* Wa_22012856258:dg2 */
+ wa_masked_en(wal, GEN7_ROW_CHICKEN2,
+ GEN12_DISABLE_READ_SUPPRESSION);
+
+ /*
+ * Wa_22010960976:dg2
+ * Wa_14013347512:dg2
+ */
+ wa_masked_dis(wal, GEN12_HDC_CHICKEN0,
+ LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
+ }
+
+ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
+ /*
+ * Wa_1608949956:dg2_g10
+ * Wa_14010198302:dg2_g10
+ */
+ wa_masked_en(wal, GEN8_ROW_CHICKEN,
+ MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE);
+
+ /*
+ * Wa_14010918519:dg2_g10
+ *
+ * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping,
+ * so ignoring verification.
+ */
+ wa_add(wal, LSC_CHICKEN_BIT_0_UDW, 0,
+ FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE,
+ 0, false);
+ }
+
+ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
+ /* Wa_22010430635:dg2 */
+ wa_masked_en(wal,
+ GEN9_ROW_CHICKEN4,
+ GEN12_DISABLE_GRF_CLEAR);
+
+ /* Wa_14010648519:dg2 */
+ wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
+ }
+
+ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
+ IS_DG2_G11(engine->i915)) {
+ /* Wa_22012654132:dg2 */
+ wa_add(wal, GEN10_CACHE_MODE_SS, 0,
+ _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
+ 0 /* write-only, so skip validation */,
+ true);
+ }
+
+ /* Wa_14013202645:dg2 */
+ if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) ||
+ IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
+ wa_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
+
+ if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
+ IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) {
/*
* Wa_1607138336:tgl[a0],dg1[a0]
* Wa_1607063988:tgl[a0],dg1[a0]
@@ -1728,7 +2148,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN12_DISABLE_POSH_BUSY_FF_DOP_CG);
}
- if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) {
+ if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) {
/*
* Wa_1606679103:tgl
* (see also Wa_1606682166:icl)
@@ -1763,7 +2183,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
}
if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
- IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) ||
+ IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
/* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */
wa_masked_en(wal, GEN7_ROW_CHICKEN2,
@@ -1776,8 +2196,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
}
-
- if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) ||
+ if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
/*
* Wa_1607030317:tgl
@@ -1860,15 +2279,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
- /* Wa_1407352427:icl,ehl */
- wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
- PSDUNIT_CLKGATE_DIS);
-
- /* Wa_1406680159:icl,ehl */
- wa_write_or(wal,
- SUBSLICE_UNIT_LEVEL_CLKGATE,
- GWUNIT_CLKGATE_DIS);
-
/*
* Wa_1408767742:icl[a2..forever],ehl[all]
* Wa_1605460711:icl[a0..c0]
@@ -2139,7 +2549,7 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
struct drm_i915_private *i915 = engine->i915;
/* WaKBLVECSSemaphoreWaitPoll:kbl */
- if (IS_KBL_GT_STEP(i915, STEP_A0, STEP_F0)) {
+ if (IS_KBL_GRAPHICS_STEP(i915, STEP_A0, STEP_F0)) {
wa_write(wal,
RING_SEMA_WAIT_POLL(engine->mmio_base),
1);
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 8b89215afe46..c0637bf799a3 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -17,7 +17,7 @@ static int mock_timeline_pin(struct intel_timeline *tl)
{
int err;
- if (WARN_ON(!i915_gem_object_trylock(tl->hwsp_ggtt->obj)))
+ if (WARN_ON(!i915_gem_object_trylock(tl->hwsp_ggtt->obj, NULL)))
return -EBUSY;
err = intel_timeline_pin_map(tl);
@@ -35,9 +35,31 @@ static void mock_timeline_unpin(struct intel_timeline *tl)
atomic_dec(&tl->pin_count);
}
+static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
+{
+ struct i915_address_space *vm = &ggtt->vm;
+ struct drm_i915_private *i915 = vm->i915;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+
+ obj = i915_gem_object_create_internal(i915, size);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ vma = i915_vma_instance(obj, vm, NULL);
+ if (IS_ERR(vma))
+ goto err;
+
+ return vma;
+
+err:
+ i915_gem_object_put(obj);
+ return vma;
+}
+
static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
{
- const unsigned long sz = PAGE_SIZE / 2;
+ const unsigned long sz = PAGE_SIZE;
struct intel_ring *ring;
ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL);
@@ -50,15 +72,11 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
ring->vaddr = (void *)(ring + 1);
atomic_set(&ring->pin_count, 1);
- ring->vma = i915_vma_alloc();
- if (!ring->vma) {
+ ring->vma = create_ring_vma(engine->gt->ggtt, PAGE_SIZE);
+ if (IS_ERR(ring->vma)) {
kfree(ring);
return NULL;
}
- i915_active_init(&ring->vma->active, NULL, NULL, 0);
- __set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(ring->vma));
- __set_bit(DRM_MM_NODE_ALLOCATED_BIT, &ring->vma->node.flags);
- ring->vma->node.size = sz;
intel_ring_update_space(ring);
@@ -67,8 +85,7 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
static void mock_ring_free(struct intel_ring *ring)
{
- i915_active_fini(&ring->vma->active);
- i915_vma_free(ring->vma);
+ i915_vma_put(ring->vma);
kfree(ring);
}
@@ -125,6 +142,7 @@ static void mock_context_unpin(struct intel_context *ce)
static void mock_context_post_unpin(struct intel_context *ce)
{
+ i915_vma_unpin(ce->ring->vma);
}
static void mock_context_destroy(struct kref *ref)
@@ -169,7 +187,7 @@ static int mock_context_alloc(struct intel_context *ce)
static int mock_context_pre_pin(struct intel_context *ce,
struct i915_gem_ww_ctx *ww, void **unused)
{
- return 0;
+ return i915_vma_pin_ww(ce->ring->vma, ww, 0, 0, PIN_GLOBAL | PIN_HIGH);
}
static int mock_context_pin(struct intel_context *ce, void *unused)
@@ -327,7 +345,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
struct mock_engine *engine;
GEM_BUG_ON(id >= I915_NUM_ENGINES);
- GEM_BUG_ON(!i915->gt.uncore);
+ GEM_BUG_ON(!to_gt(i915)->uncore);
engine = kzalloc(sizeof(*engine) + PAGE_SIZE, GFP_KERNEL);
if (!engine)
@@ -335,8 +353,8 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
/* minimal engine setup for requests */
engine->base.i915 = i915;
- engine->base.gt = &i915->gt;
- engine->base.uncore = i915->gt.uncore;
+ engine->base.gt = to_gt(i915);
+ engine->base.uncore = to_gt(i915)->uncore;
snprintf(engine->base.name, sizeof(engine->base.name), "%s", name);
engine->base.id = id;
engine->base.mask = BIT(id);
@@ -359,8 +377,8 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
engine->base.release = mock_engine_release;
- i915->gt.engine[id] = &engine->base;
- i915->gt.engine_class[0][id] = &engine->base;
+ to_gt(i915)->engine[id] = &engine->base;
+ to_gt(i915)->engine_class[0][id] = &engine->base;
/* fake hw queue */
spin_lock_init(&engine->hw_lock);
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index fa7b99a671dd..76fbae358072 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -442,7 +442,7 @@ int intel_context_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_active_context),
SUBTEST(live_remote_context),
};
- struct intel_gt *gt = &i915->gt;
+ struct intel_gt *gt = to_gt(i915);
if (intel_gt_is_wedged(gt))
return 0;
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine.c b/drivers/gpu/drm/i915/gt/selftest_engine.c
index 262764f6d90a..57fea9ea1705 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine.c
@@ -12,7 +12,7 @@ int intel_engine_live_selftests(struct drm_i915_private *i915)
live_engine_pm_selftests,
NULL,
};
- struct intel_gt *gt = &i915->gt;
+ struct intel_gt *gt = to_gt(i915);
typeof(*tests) *fn;
for (fn = tests; *fn; fn++) {
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
index 64abf5feabfa..1b75f478d1b8 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
@@ -361,10 +361,10 @@ int intel_engine_cs_perf_selftests(struct drm_i915_private *i915)
SUBTEST(perf_mi_noop),
};
- if (intel_gt_is_wedged(&i915->gt))
+ if (intel_gt_is_wedged(to_gt(i915)))
return 0;
- return intel_gt_live_subtests(tests, &i915->gt);
+ return intel_gt_live_subtests(tests, to_gt(i915));
}
static int intel_mmio_bases_check(void *arg)
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
index 6e6e4d747cca..273d440a53e3 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -378,13 +378,13 @@ int intel_heartbeat_live_selftests(struct drm_i915_private *i915)
int saved_hangcheck;
int err;
- if (intel_gt_is_wedged(&i915->gt))
+ if (intel_gt_is_wedged(to_gt(i915)))
return 0;
saved_hangcheck = i915->params.enable_hangcheck;
i915->params.enable_hangcheck = INT_MAX;
- err = intel_gt_live_subtests(tests, &i915->gt);
+ err = intel_gt_live_subtests(tests, to_gt(i915));
i915->params.enable_hangcheck = saved_hangcheck;
return err;
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
index 0035be4bf58b..0dcb3ed44a73 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
@@ -215,6 +215,31 @@ static int live_engine_timestamps(void *arg)
return 0;
}
+static int __spin_until_busier(struct intel_engine_cs *engine, ktime_t busyness)
+{
+ ktime_t start, unused, dt;
+
+ if (!intel_engine_uses_guc(engine))
+ return 0;
+
+ /*
+ * In GuC mode of submission, the busyness stats may get updated after
+ * the batch starts running. Poll for a change in busyness and timeout
+ * after 500 us.
+ */
+ start = ktime_get();
+ while (intel_engine_get_busy_time(engine, &unused) == busyness) {
+ dt = ktime_get() - start;
+ if (dt > 10000000) {
+ pr_err("active wait timed out %lld\n", dt);
+ ENGINE_TRACE(engine, "active wait time out %lld\n", dt);
+ return -ETIME;
+ }
+ }
+
+ return 0;
+}
+
static int live_engine_busy_stats(void *arg)
{
struct intel_gt *gt = arg;
@@ -233,6 +258,7 @@ static int live_engine_busy_stats(void *arg)
GEM_BUG_ON(intel_gt_pm_is_awake(gt));
for_each_engine(engine, gt, id) {
struct i915_request *rq;
+ ktime_t busyness, dummy;
ktime_t de, dt;
ktime_t t[2];
@@ -275,16 +301,23 @@ static int live_engine_busy_stats(void *arg)
}
i915_request_add(rq);
+ busyness = intel_engine_get_busy_time(engine, &dummy);
if (!igt_wait_for_spinner(&spin, rq)) {
intel_gt_set_wedged(engine->gt);
err = -ETIME;
goto end;
}
+ err = __spin_until_busier(engine, busyness);
+ if (err) {
+ GEM_TRACE_DUMP();
+ goto end;
+ }
+
ENGINE_TRACE(engine, "measuring busy time\n");
preempt_disable();
de = intel_engine_get_busy_time(engine, &t[0]);
- udelay(100);
+ mdelay(10);
de = ktime_sub(intel_engine_get_busy_time(engine, &t[1]), de);
preempt_enable();
dt = ktime_sub(t[1], t[0]);
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index b367ecfa42de..e10da897e07a 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -4502,11 +4502,11 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_virtual_reset),
};
- if (i915->gt.submission_method != INTEL_SUBMISSION_ELSP)
+ if (to_gt(i915)->submission_method != INTEL_SUBMISSION_ELSP)
return 0;
- if (intel_gt_is_wedged(&i915->gt))
+ if (intel_gt_is_wedged(to_gt(i915)))
return 0;
- return intel_gt_live_subtests(tests, &i915->gt);
+ return intel_gt_live_subtests(tests, to_gt(i915));
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
index 3dec126fb910..be94f863bdef 100644
--- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
@@ -194,10 +194,10 @@ int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_gt_resume),
};
- if (intel_gt_is_wedged(&i915->gt))
+ if (intel_gt_is_wedged(to_gt(i915)))
return 0;
- return intel_gt_live_subtests(tests, &i915->gt);
+ return intel_gt_live_subtests(tests, to_gt(i915));
}
int intel_gt_pm_late_selftests(struct drm_i915_private *i915)
@@ -211,8 +211,8 @@ int intel_gt_pm_late_selftests(struct drm_i915_private *i915)
SUBTEST(live_rc6_ctx_wa),
};
- if (intel_gt_is_wedged(&i915->gt))
+ if (intel_gt_is_wedged(to_gt(i915)))
return 0;
- return intel_gt_live_subtests(tests, &i915->gt);
+ return intel_gt_live_subtests(tests, to_gt(i915));
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 32f8b4f96cfa..4a20ba63446c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -472,7 +472,8 @@ static int igt_reset_nop_engine(void *arg)
count = 0;
st_engine_heartbeat_disable(engine);
- set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+ GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id,
+ &gt->reset.flags));
do {
int i;
@@ -529,7 +530,7 @@ static int igt_reset_nop_engine(void *arg)
break;
}
} while (time_before(jiffies, end_time));
- clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+ clear_and_wake_up_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
st_engine_heartbeat_enable(engine);
pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
@@ -583,7 +584,8 @@ static int igt_reset_fail_engine(void *arg)
}
st_engine_heartbeat_disable(engine);
- set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+ GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id,
+ &gt->reset.flags));
force_reset_timeout(engine);
err = intel_engine_reset(engine, NULL);
@@ -680,7 +682,7 @@ static int igt_reset_fail_engine(void *arg)
out:
pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
skip:
- clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+ clear_and_wake_up_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
st_engine_heartbeat_enable(engine);
intel_context_put(ce);
@@ -735,7 +737,8 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
reset_engine_count = i915_reset_engine_count(global, engine);
st_engine_heartbeat_disable(engine);
- set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+ GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id,
+ &gt->reset.flags));
count = 0;
do {
struct i915_request *rq = NULL;
@@ -825,7 +828,7 @@ restore:
if (err)
break;
} while (time_before(jiffies, end_time));
- clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+ clear_and_wake_up_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
st_engine_heartbeat_enable(engine);
pr_info("%s: Completed %lu %s resets\n",
engine->name, count, active ? "active" : "idle");
@@ -1043,7 +1046,8 @@ static int __igt_reset_engines(struct intel_gt *gt,
yield(); /* start all threads before we begin */
st_engine_heartbeat_disable_no_pm(engine);
- set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+ GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id,
+ &gt->reset.flags));
do {
struct i915_request *rq = NULL;
struct intel_selftest_saved_policy saved;
@@ -1166,7 +1170,7 @@ restore:
if (err)
break;
} while (time_before(jiffies, end_time));
- clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+ clear_and_wake_up_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
st_engine_heartbeat_enable_no_pm(engine);
pr_info("i915_reset_engine(%s:%s): %lu resets\n",
@@ -2015,7 +2019,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
SUBTEST(igt_reset_evict_fence),
SUBTEST(igt_handle_error),
};
- struct intel_gt *gt = &i915->gt;
+ struct intel_gt *gt = to_gt(i915);
intel_wakeref_t wakeref;
int err;
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index b0977a3b699b..618c905daa19 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1847,5 +1847,5 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915)
if (!HAS_LOGICAL_RING_CONTEXTS(i915))
return 0;
- return intel_gt_live_subtests(tests, &i915->gt);
+ return intel_gt_live_subtests(tests, to_gt(i915));
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c
index 12ef2837c89b..fa4293d2944f 100644
--- a/drivers/gpu/drm/i915/gt/selftest_migrate.c
+++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c
@@ -49,6 +49,7 @@ static int copy(struct intel_migrate *migrate,
if (IS_ERR(src))
return 0;
+ sz = src->base.size;
dst = i915_gem_object_create_internal(i915, sz);
if (IS_ERR(dst))
goto err_free_src;
@@ -441,7 +442,7 @@ int intel_migrate_live_selftests(struct drm_i915_private *i915)
SUBTEST(thread_global_copy),
SUBTEST(thread_global_clear),
};
- struct intel_gt *gt = &i915->gt;
+ struct intel_gt *gt = to_gt(i915);
if (!gt->migrate.context)
return 0;
@@ -464,7 +465,7 @@ create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem)
return obj;
}
- i915_gem_object_trylock(obj);
+ i915_gem_object_trylock(obj, NULL);
err = i915_gem_object_pin_pages(obj);
if (err) {
i915_gem_object_unlock(obj);
@@ -657,7 +658,7 @@ int intel_migrate_perf_selftests(struct drm_i915_private *i915)
SUBTEST(perf_clear_blt),
SUBTEST(perf_copy_blt),
};
- struct intel_gt *gt = &i915->gt;
+ struct intel_gt *gt = to_gt(i915);
if (intel_gt_is_wedged(gt))
return 0;
diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c
index 13d25bf2a94a..c1d861333c44 100644
--- a/drivers/gpu/drm/i915/gt/selftest_mocs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
@@ -451,5 +451,5 @@ int intel_mocs_live_selftests(struct drm_i915_private *i915)
if (!get_mocs_settings(i915, &table))
return 0;
- return intel_gt_live_subtests(tests, &i915->gt);
+ return intel_gt_live_subtests(tests, to_gt(i915));
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 7a50c9f4071b..8a873f6bda7f 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -376,7 +376,7 @@ int intel_reset_live_selftests(struct drm_i915_private *i915)
SUBTEST(igt_atomic_reset),
SUBTEST(igt_atomic_engine_reset),
};
- struct intel_gt *gt = &i915->gt;
+ struct intel_gt *gt = to_gt(i915);
if (!intel_has_gpu_reset(gt))
return 0;
diff --git a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
index 041954408d0f..70f9ac1ec2c7 100644
--- a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
@@ -291,8 +291,8 @@ int intel_ring_submission_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_ctx_switch_wa),
};
- if (i915->gt.submission_method > INTEL_SUBMISSION_RING)
+ if (to_gt(i915)->submission_method > INTEL_SUBMISSION_RING)
return 0;
- return intel_gt_live_subtests(tests, &i915->gt);
+ return intel_gt_live_subtests(tests, to_gt(i915));
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c
index 9334bad131a2..b768cea5943d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_slpc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c
@@ -39,7 +39,7 @@ static int slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 freq)
static int live_slpc_clamp_min(void *arg)
{
struct drm_i915_private *i915 = arg;
- struct intel_gt *gt = &i915->gt;
+ struct intel_gt *gt = to_gt(i915);
struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
struct intel_rps *rps = &gt->rps;
struct intel_engine_cs *engine;
@@ -166,7 +166,7 @@ static int live_slpc_clamp_min(void *arg)
static int live_slpc_clamp_max(void *arg)
{
struct drm_i915_private *i915 = arg;
- struct intel_gt *gt = &i915->gt;
+ struct intel_gt *gt = to_gt(i915);
struct intel_guc_slpc *slpc;
struct intel_rps *rps;
struct intel_engine_cs *engine;
@@ -304,7 +304,7 @@ int intel_slpc_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_slpc_clamp_min),
};
- if (intel_gt_is_wedged(&i915->gt))
+ if (intel_gt_is_wedged(to_gt(i915)))
return 0;
return i915_live_subtests(tests, i915);
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index 72a04a1a1678..0410c402f2a3 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -160,7 +160,7 @@ static int mock_hwsp_freelist(void *arg)
INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
- state.gt = &i915->gt;
+ state.gt = to_gt(i915);
/*
* Create a bunch of timelines and check that their HWSP do not overlap.
@@ -1417,8 +1417,8 @@ int intel_timeline_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_hwsp_rollover_user),
};
- if (intel_gt_is_wedged(&i915->gt))
+ if (intel_gt_is_wedged(to_gt(i915)))
return 0;
- return intel_gt_live_subtests(tests, &i915->gt);
+ return intel_gt_live_subtests(tests, to_gt(i915));
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index 962e91ba3be4..0287c2573c51 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -1387,8 +1387,8 @@ int intel_workarounds_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_engine_reset_workarounds),
};
- if (intel_gt_is_wedged(&i915->gt))
+ if (intel_gt_is_wedged(to_gt(i915)))
return 0;
- return intel_gt_live_subtests(tests, &i915->gt);
+ return intel_gt_live_subtests(tests, to_gt(i915));
}
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index ba10bd374cee..fe5d7d261797 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -144,6 +144,7 @@ enum intel_guc_action {
INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
INTEL_GUC_ACTION_RESET_CLIENT = 0x5507,
+ INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
INTEL_GUC_ACTION_LIMIT
};
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 31cf9fb48c7e..f9240d4baa69 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -95,6 +95,11 @@ struct intel_guc {
*/
struct ida guc_ids;
/**
+ * @num_guc_ids: Number of guc_ids, selftest feature to be able
+ * to reduce this number while testing.
+ */
+ int num_guc_ids;
+ /**
* @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc
*/
unsigned long *guc_ids_bitmap;
@@ -138,6 +143,8 @@ struct intel_guc {
u32 ads_regset_size;
/** @ads_golden_ctxt_size: size of the golden contexts in the ADS */
u32 ads_golden_ctxt_size;
+ /** @ads_engine_usage_size: size of engine usage in the ADS */
+ u32 ads_engine_usage_size;
/** @lrc_desc_pool: object allocated to hold the GuC LRC descriptor pool */
struct i915_vma *lrc_desc_pool;
@@ -172,6 +179,41 @@ struct intel_guc {
/** @send_mutex: used to serialize the intel_guc_send actions */
struct mutex send_mutex;
+
+ /**
+ * @timestamp: GT timestamp object that stores a copy of the timestamp
+ * and adjusts it for overflow using a worker.
+ */
+ struct {
+ /**
+ * @lock: Lock protecting the below fields and the engine stats.
+ */
+ spinlock_t lock;
+
+ /**
+ * @gt_stamp: 64 bit extended value of the GT timestamp.
+ */
+ u64 gt_stamp;
+
+ /**
+ * @ping_delay: Period for polling the GT timestamp for
+ * overflow.
+ */
+ unsigned long ping_delay;
+
+ /**
+ * @work: Periodic work to adjust GT timestamp, engine and
+ * context usage for overflows.
+ */
+ struct delayed_work work;
+ } timestamp;
+
+#ifdef CONFIG_DRM_I915_SELFTEST
+ /**
+ * @number_guc_id_stolen: The number of guc_ids that have been stolen
+ */
+ int number_guc_id_stolen;
+#endif
};
static inline struct intel_guc *log_to_guc(struct intel_guc_log *log)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index 4d5611291e28..93a975597b4d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -27,6 +27,8 @@
* | guc_policies |
* +---------------------------------------+
* | guc_gt_system_info |
+ * +---------------------------------------+
+ * | guc_engine_usage |
* +---------------------------------------+ <== static
* | guc_mmio_reg[countA] (engine 0.0) |
* | guc_mmio_reg[countB] (engine 0.1) |
@@ -48,6 +50,7 @@ struct __guc_ads_blob {
struct guc_ads ads;
struct guc_policies policies;
struct guc_gt_system_info system_info;
+ struct guc_engine_usage engine_usage;
/* From here on, location is dynamic! Refer to above diagram. */
struct guc_mmio_reg regset[0];
} __packed;
@@ -629,3 +632,21 @@ void intel_guc_ads_reset(struct intel_guc *guc)
guc_ads_private_data_reset(guc);
}
+
+u32 intel_guc_engine_usage_offset(struct intel_guc *guc)
+{
+ struct __guc_ads_blob *blob = guc->ads_blob;
+ u32 base = intel_guc_ggtt_offset(guc, guc->ads_vma);
+ u32 offset = base + ptr_offset(blob, engine_usage);
+
+ return offset;
+}
+
+struct guc_engine_usage_record *intel_guc_engine_usage(struct intel_engine_cs *engine)
+{
+ struct intel_guc *guc = &engine->gt->uc.guc;
+ struct __guc_ads_blob *blob = guc->ads_blob;
+ u8 guc_class = engine_class_to_guc_class(engine->class);
+
+ return &blob->engine_usage.engines[guc_class][ilog2(engine->logical_mask)];
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h
index 3d85051d57e4..e74c110facff 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h
@@ -6,8 +6,11 @@
#ifndef _INTEL_GUC_ADS_H_
#define _INTEL_GUC_ADS_H_
+#include <linux/types.h>
+
struct intel_guc;
struct drm_printer;
+struct intel_engine_cs;
int intel_guc_ads_create(struct intel_guc *guc);
void intel_guc_ads_destroy(struct intel_guc *guc);
@@ -15,5 +18,7 @@ void intel_guc_ads_init_late(struct intel_guc *guc);
void intel_guc_ads_reset(struct intel_guc *guc);
void intel_guc_ads_print_policy_info(struct intel_guc *guc,
struct drm_printer *p);
+struct guc_engine_usage_record *intel_guc_engine_usage(struct intel_engine_cs *engine);
+u32 intel_guc_engine_usage_offset(struct intel_guc *guc);
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index a0cc34be7b56..aa6dd6415202 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -523,6 +523,15 @@ static inline bool ct_deadlocked(struct intel_guc_ct *ct)
CT_ERROR(ct, "Communication stalled for %lld ms, desc status=%#x,%#x\n",
ktime_ms_delta(ktime_get(), ct->stall_time),
send->status, recv->status);
+ CT_ERROR(ct, "H2G Space: %u (Bytes)\n",
+ atomic_read(&ct->ctbs.send.space) * 4);
+ CT_ERROR(ct, "Head: %u (Dwords)\n", ct->ctbs.send.desc->head);
+ CT_ERROR(ct, "Tail: %u (Dwords)\n", ct->ctbs.send.desc->tail);
+ CT_ERROR(ct, "G2H Space: %u (Bytes)\n",
+ atomic_read(&ct->ctbs.recv.space) * 4);
+ CT_ERROR(ct, "Head: %u\n (Dwords)", ct->ctbs.recv.desc->head);
+ CT_ERROR(ct, "Tail: %u\n (Dwords)", ct->ctbs.recv.desc->tail);
+
ct->ctbs.send.broken = true;
}
@@ -582,12 +591,19 @@ static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw)
static int has_room_nb(struct intel_guc_ct *ct, u32 h2g_dw, u32 g2h_dw)
{
+ bool h2g = h2g_has_room(ct, h2g_dw);
+ bool g2h = g2h_has_room(ct, g2h_dw);
+
lockdep_assert_held(&ct->ctbs.send.lock);
- if (unlikely(!h2g_has_room(ct, h2g_dw) || !g2h_has_room(ct, g2h_dw))) {
+ if (unlikely(!h2g || !g2h)) {
if (ct->stall_time == KTIME_MAX)
ct->stall_time = ktime_get();
+ /* Be paranoid and kick G2H tasklet to free credits */
+ if (!g2h)
+ tasklet_hi_schedule(&ct->receive_tasklet);
+
if (unlikely(ct_deadlocked(ct)))
return -EPIPE;
else
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index 196424be0998..31420ce1ce6b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -40,9 +40,8 @@ static void guc_prepare_xfer(struct intel_uncore *uncore)
}
}
-/* Copy RSA signature from the fw image to HW for verification */
-static int guc_xfer_rsa(struct intel_uc_fw *guc_fw,
- struct intel_uncore *uncore)
+static int guc_xfer_rsa_mmio(struct intel_uc_fw *guc_fw,
+ struct intel_uncore *uncore)
{
u32 rsa[UOS_RSA_SCRATCH_COUNT];
size_t copied;
@@ -58,6 +57,27 @@ static int guc_xfer_rsa(struct intel_uc_fw *guc_fw,
return 0;
}
+static int guc_xfer_rsa_vma(struct intel_uc_fw *guc_fw,
+ struct intel_uncore *uncore)
+{
+ struct intel_guc *guc = container_of(guc_fw, struct intel_guc, fw);
+
+ intel_uncore_write(uncore, UOS_RSA_SCRATCH(0),
+ intel_guc_ggtt_offset(guc, guc_fw->rsa_data));
+
+ return 0;
+}
+
+/* Copy RSA signature from the fw image to HW for verification */
+static int guc_xfer_rsa(struct intel_uc_fw *guc_fw,
+ struct intel_uncore *uncore)
+{
+ if (guc_fw->rsa_data)
+ return guc_xfer_rsa_vma(guc_fw, uncore);
+ else
+ return guc_xfer_rsa_mmio(guc_fw, uncore);
+}
+
/*
* Read the GuC status register (GUC_STATUS) and store it in the
* specified location; then return a boolean indicating whether
@@ -142,7 +162,10 @@ int intel_guc_fw_upload(struct intel_guc *guc)
/*
* Note that GuC needs the CSS header plus uKernel code to be copied
* by the DMA engine in one operation, whereas the RSA signature is
- * loaded via MMIO.
+ * loaded separately, either by copying it to the UOS_RSA_SCRATCH
+ * register (if key size <= 256) or through a ggtt-pinned vma (if key
+ * size > 256). The RSA size and therefore the way we provide it to the
+ * HW is fixed for each platform and hard-coded in the bootrom.
*/
ret = guc_xfer_rsa(&guc->fw, uncore);
if (ret)
@@ -164,6 +187,6 @@ int intel_guc_fw_upload(struct intel_guc *guc)
return 0;
out:
- intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_FAIL);
+ intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
return ret;
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index 722933e26347..7072e30e99f4 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -294,6 +294,19 @@ struct guc_ads {
u32 reserved[15];
} __packed;
+/* Engine usage stats */
+struct guc_engine_usage_record {
+ u32 current_context_index;
+ u32 last_switch_in_stamp;
+ u32 reserved0;
+ u32 total_runtime;
+ u32 reserved1[4];
+} __packed;
+
+struct guc_engine_usage {
+ struct guc_engine_usage_record engines[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+} __packed;
+
/* GuC logging structures */
enum guc_log_buffer_type {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
index ac1ee1d5ce10..fe6ab7550a14 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
@@ -15,9 +15,12 @@
struct intel_guc;
-#ifdef CONFIG_DRM_I915_DEBUG_GUC
+#if defined(CONFIG_DRM_I915_DEBUG_GUC)
#define CRASH_BUFFER_SIZE SZ_2M
#define DEBUG_BUFFER_SIZE SZ_16M
+#elif defined(CONFIG_DRM_I915_DEBUG_GEM)
+#define CRASH_BUFFER_SIZE SZ_1M
+#define DEBUG_BUFFER_SIZE SZ_2M
#else
#define CRASH_BUFFER_SIZE SZ_8K
#define DEBUG_BUFFER_SIZE SZ_64K
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c
index 46026c2c1722..ddfbe334689f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c
@@ -10,28 +10,80 @@
#include "intel_guc.h"
#include "intel_guc_log.h"
#include "intel_guc_log_debugfs.h"
+#include "intel_uc.h"
+
+static u32 obj_to_guc_log_dump_size(struct drm_i915_gem_object *obj)
+{
+ u32 size;
+
+ if (!obj)
+ return PAGE_SIZE;
+
+ /* "0x%08x 0x%08x 0x%08x 0x%08x\n" => 16 bytes -> 44 chars => x2.75 */
+ size = ((obj->base.size * 11) + 3) / 4;
+
+ /* Add padding for final blank line, any extra header info, etc. */
+ size = PAGE_ALIGN(size + PAGE_SIZE);
+
+ return size;
+}
+
+static u32 guc_log_dump_size(struct intel_guc_log *log)
+{
+ struct intel_guc *guc = log_to_guc(log);
+
+ if (!intel_guc_is_supported(guc))
+ return PAGE_SIZE;
+
+ if (!log->vma)
+ return PAGE_SIZE;
+
+ return obj_to_guc_log_dump_size(log->vma->obj);
+}
static int guc_log_dump_show(struct seq_file *m, void *data)
{
struct drm_printer p = drm_seq_file_printer(m);
+ int ret;
- return intel_guc_log_dump(m->private, &p, false);
+ ret = intel_guc_log_dump(m->private, &p, false);
+
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && seq_has_overflowed(m))
+ pr_warn_once("preallocated size:%zx for %s exceeded\n",
+ m->size, __func__);
+
+ return ret;
+}
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE_WITH_SIZE(guc_log_dump, guc_log_dump_size);
+
+static u32 guc_load_err_dump_size(struct intel_guc_log *log)
+{
+ struct intel_guc *guc = log_to_guc(log);
+ struct intel_uc *uc = container_of(guc, struct intel_uc, guc);
+
+ if (!intel_guc_is_supported(guc))
+ return PAGE_SIZE;
+
+ return obj_to_guc_log_dump_size(uc->load_err_log);
}
-DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_log_dump);
static int guc_load_err_log_dump_show(struct seq_file *m, void *data)
{
struct drm_printer p = drm_seq_file_printer(m);
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && seq_has_overflowed(m))
+ pr_warn_once("preallocated size:%zx for %s exceeded\n",
+ m->size, __func__);
+
return intel_guc_log_dump(m->private, &p, true);
}
-DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(guc_load_err_log_dump);
+DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE_WITH_SIZE(guc_load_err_log_dump, guc_load_err_dump_size);
static int guc_log_level_get(void *data, u64 *val)
{
struct intel_guc_log *log = data;
- if (!intel_guc_is_used(log_to_guc(log)))
+ if (!log->vma)
return -ENODEV;
*val = intel_guc_log_get_level(log);
@@ -43,7 +95,7 @@ static int guc_log_level_set(void *data, u64 val)
{
struct intel_guc_log *log = data;
- if (!intel_guc_is_used(log_to_guc(log)))
+ if (!log->vma)
return -ENODEV;
return intel_guc_log_set_level(log, val);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 65a3e7fdb2b2..13b27b8ff74e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -79,29 +79,6 @@ static void slpc_mem_set_disabled(struct slpc_shared_data *data,
slpc_mem_set_param(data, enable_id, 0);
}
-int intel_guc_slpc_init(struct intel_guc_slpc *slpc)
-{
- struct intel_guc *guc = slpc_to_guc(slpc);
- struct drm_i915_private *i915 = slpc_to_i915(slpc);
- u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
- int err;
-
- GEM_BUG_ON(slpc->vma);
-
- err = intel_guc_allocate_and_map_vma(guc, size, &slpc->vma, (void **)&slpc->vaddr);
- if (unlikely(err)) {
- drm_err(&i915->drm,
- "Failed to allocate SLPC struct (err=%pe)\n",
- ERR_PTR(err));
- return err;
- }
-
- slpc->max_freq_softlimit = 0;
- slpc->min_freq_softlimit = 0;
-
- return err;
-}
-
static u32 slpc_get_state(struct intel_guc_slpc *slpc)
{
struct slpc_shared_data *data;
@@ -203,6 +180,86 @@ static int slpc_unset_param(struct intel_guc_slpc *slpc,
return guc_action_slpc_unset_param(guc, id);
}
+static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
+{
+ struct drm_i915_private *i915 = slpc_to_i915(slpc);
+ struct intel_guc *guc = slpc_to_guc(slpc);
+ intel_wakeref_t wakeref;
+ int ret = 0;
+
+ lockdep_assert_held(&slpc->lock);
+
+ if (!intel_guc_is_ready(guc))
+ return -ENODEV;
+
+ /*
+ * This function is a little different as compared to
+ * intel_guc_slpc_set_min_freq(). Softlimit will not be updated
+ * here since this is used to temporarily change min freq,
+ * for example, during a waitboost. Caller is responsible for
+ * checking bounds.
+ */
+
+ with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
+ ret = slpc_set_param(slpc,
+ SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+ freq);
+ if (ret)
+ drm_err(&i915->drm, "Unable to force min freq to %u: %d",
+ freq, ret);
+ }
+
+ return ret;
+}
+
+static void slpc_boost_work(struct work_struct *work)
+{
+ struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work);
+
+ /*
+ * Raise min freq to boost. It's possible that
+ * this is greater than current max. But it will
+ * certainly be limited by RP0. An error setting
+ * the min param is not fatal.
+ */
+ mutex_lock(&slpc->lock);
+ if (atomic_read(&slpc->num_waiters)) {
+ slpc_force_min_freq(slpc, slpc->boost_freq);
+ slpc->num_boosts++;
+ }
+ mutex_unlock(&slpc->lock);
+}
+
+int intel_guc_slpc_init(struct intel_guc_slpc *slpc)
+{
+ struct intel_guc *guc = slpc_to_guc(slpc);
+ struct drm_i915_private *i915 = slpc_to_i915(slpc);
+ u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
+ int err;
+
+ GEM_BUG_ON(slpc->vma);
+
+ err = intel_guc_allocate_and_map_vma(guc, size, &slpc->vma, (void **)&slpc->vaddr);
+ if (unlikely(err)) {
+ drm_err(&i915->drm,
+ "Failed to allocate SLPC struct (err=%pe)\n",
+ ERR_PTR(err));
+ return err;
+ }
+
+ slpc->max_freq_softlimit = 0;
+ slpc->min_freq_softlimit = 0;
+
+ slpc->boost_freq = 0;
+ atomic_set(&slpc->num_waiters, 0);
+ slpc->num_boosts = 0;
+
+ mutex_init(&slpc->lock);
+ INIT_WORK(&slpc->boost_work, slpc_boost_work);
+
+ return err;
+}
+
static const char *slpc_global_state_to_string(enum slpc_global_state state)
{
switch (state) {
@@ -393,7 +450,11 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val)
val > slpc->max_freq_softlimit)
return -EINVAL;
+ /* Need a lock now since waitboost can be modifying min as well */
+ mutex_lock(&slpc->lock);
+
with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
+
ret = slpc_set_param(slpc,
SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
val);
@@ -406,6 +467,8 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val)
if (!ret)
slpc->min_freq_softlimit = val;
+ mutex_unlock(&slpc->lock);
+
return ret;
}
@@ -522,6 +585,9 @@ static void slpc_get_rp_values(struct intel_guc_slpc *slpc)
GT_FREQUENCY_MULTIPLIER;
slpc->min_freq = REG_FIELD_GET(RPN_CAP_MASK, rp_state_cap) *
GT_FREQUENCY_MULTIPLIER;
+
+ if (!slpc->boost_freq)
+ slpc->boost_freq = slpc->rp0_freq;
}
/*
@@ -557,7 +623,7 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
if (unlikely(ret < 0))
return ret;
- intel_guc_pm_intrmsk_enable(&i915->gt);
+ intel_guc_pm_intrmsk_enable(to_gt(i915));
slpc_get_rp_values(slpc);
@@ -588,6 +654,47 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
return 0;
}
+int intel_guc_slpc_set_boost_freq(struct intel_guc_slpc *slpc, u32 val)
+{
+ int ret = 0;
+
+ if (val < slpc->min_freq || val > slpc->rp0_freq)
+ return -EINVAL;
+
+ mutex_lock(&slpc->lock);
+
+ if (slpc->boost_freq != val) {
+ /* Apply only if there are active waiters */
+ if (atomic_read(&slpc->num_waiters)) {
+ ret = slpc_force_min_freq(slpc, val);
+ if (ret) {
+ ret = -EIO;
+ goto done;
+ }
+ }
+
+ slpc->boost_freq = val;
+ }
+
+done:
+ mutex_unlock(&slpc->lock);
+ return ret;
+}
+
+void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc)
+{
+ /*
+ * Return min back to the softlimit.
+ * This is called during request retire,
+ * so we don't need to fail that if the
+ * set_param fails.
+ */
+ mutex_lock(&slpc->lock);
+ if (atomic_dec_and_test(&slpc->num_waiters))
+ slpc_force_min_freq(slpc, slpc->min_freq_softlimit);
+ mutex_unlock(&slpc->lock);
+}
+
int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p)
{
struct drm_i915_private *i915 = slpc_to_i915(slpc);
@@ -611,6 +718,8 @@ int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p
slpc_decode_max_freq(slpc));
drm_printf(p, "\tMin freq: %u MHz\n",
slpc_decode_min_freq(slpc));
+ drm_printf(p, "\twaitboosts: %u\n",
+ slpc->num_boosts);
}
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
index e45054d5b9b4..0caa8fee3c04 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
@@ -34,9 +34,12 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc);
void intel_guc_slpc_fini(struct intel_guc_slpc *slpc);
int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val);
int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val);
+int intel_guc_slpc_set_boost_freq(struct intel_guc_slpc *slpc, u32 val);
int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val);
int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val);
int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p);
void intel_guc_pm_intrmsk_enable(struct intel_gt *gt);
+void intel_guc_slpc_boost(struct intel_guc_slpc *slpc);
+void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc);
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h
index 41d13527666f..bf5b9a563c09 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h
@@ -6,6 +6,9 @@
#ifndef _INTEL_GUC_SLPC_TYPES_H_
#define _INTEL_GUC_SLPC_TYPES_H_
+#include <linux/atomic.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
#include <linux/types.h>
#define SLPC_RESET_TIMEOUT_MS 5
@@ -20,10 +23,20 @@ struct intel_guc_slpc {
u32 min_freq;
u32 rp0_freq;
u32 rp1_freq;
+ u32 boost_freq;
/* frequency softlimits */
u32 min_freq_softlimit;
u32 max_freq_softlimit;
+
+ /* Protects set/reset of boost freq
+ * and value of num_waiters
+ */
+ struct mutex lock;
+
+ struct work_struct boost_work;
+ atomic_t num_waiters;
+ u32 num_boosts;
};
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 4333d139b090..e21505cb8d4c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -14,6 +14,7 @@
#include "gt/intel_engine_regs.h"
#include "gt/intel_gpu_commands.h"
#include "gt/intel_gt.h"
+#include "gt/intel_gt_clock_utils.h"
#include "gt/intel_gt_irq.h"
#include "gt/intel_gt_pm.h"
#include "gt/intel_gt_requests.h"
@@ -22,6 +23,7 @@
#include "gt/intel_mocs.h"
#include "gt/intel_ring.h"
+#include "intel_guc_ads.h"
#include "intel_guc_submission.h"
#include "i915_drv.h"
@@ -144,7 +146,8 @@ guc_create_parallel(struct intel_engine_cs **engines,
* use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
* multi-lrc.
*/
-#define NUMBER_MULTI_LRC_GUC_ID (GUC_MAX_LRC_DESCRIPTORS / 16)
+#define NUMBER_MULTI_LRC_GUC_ID(guc) \
+ ((guc)->submission_state.num_guc_ids / 16)
/*
* Below is a set of functions which control the GuC scheduling state which
@@ -1039,8 +1042,6 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
spin_unlock(&ce->guc_state.lock);
- GEM_BUG_ON(!do_put && !destroyed);
-
if (pending_enable || destroyed || deregister) {
decr_outstanding_submission_g2h(guc);
if (deregister)
@@ -1078,6 +1079,271 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
xa_unlock_irqrestore(&guc->context_lookup, flags);
}
+/*
+ * GuC stores busyness stats for each engine at context in/out boundaries. A
+ * context 'in' logs execution start time, 'out' adds in -> out delta to total.
+ * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with
+ * GuC.
+ *
+ * __i915_pmu_event_read samples engine busyness. When sampling, if context id
+ * is valid (!= ~0) and start is non-zero, the engine is considered to be
+ * active. For an active engine total busyness = total + (now - start), where
+ * 'now' is the time at which the busyness is sampled. For inactive engine,
+ * total busyness = total.
+ *
+ * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain.
+ *
+ * The start and total values provided by GuC are 32 bits and wrap around in a
+ * few minutes. Since perf pmu provides busyness as 64 bit monotonically
+ * increasing ns values, there is a need for this implementation to account for
+ * overflows and extend the GuC provided values to 64 bits before returning
+ * busyness to the user. In order to do that, a worker runs periodically at
+ * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in
+ * 27 seconds for a gt clock frequency of 19.2 MHz).
+ */
+
+#define WRAP_TIME_CLKS U32_MAX
+#define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3)
+
+static void
+__extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
+{
+ u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
+ u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp);
+
+ if (new_start == lower_32_bits(*prev_start))
+ return;
+
+ if (new_start < gt_stamp_last &&
+ (new_start - gt_stamp_last) <= POLL_TIME_CLKS)
+ gt_stamp_hi++;
+
+ if (new_start > gt_stamp_last &&
+ (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi)
+ gt_stamp_hi--;
+
+ *prev_start = ((u64)gt_stamp_hi << 32) | new_start;
+}
+
+static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
+{
+ struct guc_engine_usage_record *rec = intel_guc_engine_usage(engine);
+ struct intel_engine_guc_stats *stats = &engine->stats.guc;
+ struct intel_guc *guc = &engine->gt->uc.guc;
+ u32 last_switch = rec->last_switch_in_stamp;
+ u32 ctx_id = rec->current_context_index;
+ u32 total = rec->total_runtime;
+
+ lockdep_assert_held(&guc->timestamp.lock);
+
+ stats->running = ctx_id != ~0U && last_switch;
+ if (stats->running)
+ __extend_last_switch(guc, &stats->start_gt_clk, last_switch);
+
+ /*
+ * Instead of adjusting the total for overflow, just add the
+ * difference from previous sample stats->total_gt_clks
+ */
+ if (total && total != ~0U) {
+ stats->total_gt_clks += (u32)(total - stats->prev_total);
+ stats->prev_total = total;
+ }
+}
+
+static void guc_update_pm_timestamp(struct intel_guc *guc,
+ struct intel_engine_cs *engine,
+ ktime_t *now)
+{
+ u32 gt_stamp_now, gt_stamp_hi;
+
+ lockdep_assert_held(&guc->timestamp.lock);
+
+ gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
+ gt_stamp_now = intel_uncore_read(engine->uncore,
+ RING_TIMESTAMP(engine->mmio_base));
+ *now = ktime_get();
+
+ if (gt_stamp_now < lower_32_bits(guc->timestamp.gt_stamp))
+ gt_stamp_hi++;
+
+ guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_now;
+}
+
+/*
+ * Unlike the execlist mode of submission total and active times are in terms of
+ * gt clocks. The *now parameter is retained to return the cpu time at which the
+ * busyness was sampled.
+ */
+static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
+{
+ struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc;
+ struct i915_gpu_error *gpu_error = &engine->i915->gpu_error;
+ struct intel_gt *gt = engine->gt;
+ struct intel_guc *guc = &gt->uc.guc;
+ u64 total, gt_stamp_saved;
+ unsigned long flags;
+ u32 reset_count;
+ bool in_reset;
+
+ spin_lock_irqsave(&guc->timestamp.lock, flags);
+
+ /*
+ * If a reset happened, we risk reading partially updated engine
+ * busyness from GuC, so we just use the driver stored copy of busyness.
+ * Synchronize with gt reset using reset_count and the
+ * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count
+ * after I915_RESET_BACKOFF flag, so ensure that the reset_count is
+ * usable by checking the flag afterwards.
+ */
+ reset_count = i915_reset_count(gpu_error);
+ in_reset = test_bit(I915_RESET_BACKOFF, &gt->reset.flags);
+
+ *now = ktime_get();
+
+ /*
+ * The active busyness depends on start_gt_clk and gt_stamp.
+ * gt_stamp is updated by i915 only when gt is awake and the
+ * start_gt_clk is derived from GuC state. To get a consistent
+ * view of activity, we query the GuC state only if gt is awake.
+ */
+ if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
+ stats_saved = *stats;
+ gt_stamp_saved = guc->timestamp.gt_stamp;
+ guc_update_engine_gt_clks(engine);
+ guc_update_pm_timestamp(guc, engine, now);
+ intel_gt_pm_put_async(gt);
+ if (i915_reset_count(gpu_error) != reset_count) {
+ *stats = stats_saved;
+ guc->timestamp.gt_stamp = gt_stamp_saved;
+ }
+ }
+
+ total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks);
+ if (stats->running) {
+ u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk;
+
+ total += intel_gt_clock_interval_to_ns(gt, clk);
+ }
+
+ spin_unlock_irqrestore(&guc->timestamp.lock, flags);
+
+ return ns_to_ktime(total);
+}
+
+static void __reset_guc_busyness_stats(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ unsigned long flags;
+ ktime_t unused;
+
+ cancel_delayed_work_sync(&guc->timestamp.work);
+
+ spin_lock_irqsave(&guc->timestamp.lock, flags);
+
+ for_each_engine(engine, gt, id) {
+ guc_update_pm_timestamp(guc, engine, &unused);
+ guc_update_engine_gt_clks(engine);
+ engine->stats.guc.prev_total = 0;
+ }
+
+ spin_unlock_irqrestore(&guc->timestamp.lock, flags);
+}
+
+static void __update_guc_busyness_stats(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ unsigned long flags;
+ ktime_t unused;
+
+ spin_lock_irqsave(&guc->timestamp.lock, flags);
+ for_each_engine(engine, gt, id) {
+ guc_update_pm_timestamp(guc, engine, &unused);
+ guc_update_engine_gt_clks(engine);
+ }
+ spin_unlock_irqrestore(&guc->timestamp.lock, flags);
+}
+
+static void guc_timestamp_ping(struct work_struct *wrk)
+{
+ struct intel_guc *guc = container_of(wrk, typeof(*guc),
+ timestamp.work.work);
+ struct intel_uc *uc = container_of(guc, typeof(*uc), guc);
+ struct intel_gt *gt = guc_to_gt(guc);
+ intel_wakeref_t wakeref;
+ int srcu, ret;
+
+ /*
+ * Synchronize with gt reset to make sure the worker does not
+ * corrupt the engine/guc stats.
+ */
+ ret = intel_gt_reset_trylock(gt, &srcu);
+ if (ret)
+ return;
+
+ with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
+ __update_guc_busyness_stats(guc);
+
+ intel_gt_reset_unlock(gt, srcu);
+
+ mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
+ guc->timestamp.ping_delay);
+}
+
+static int guc_action_enable_usage_stats(struct intel_guc *guc)
+{
+ u32 offset = intel_guc_engine_usage_offset(guc);
+ u32 action[] = {
+ INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF,
+ offset,
+ 0,
+ };
+
+ return intel_guc_send(guc, action, ARRAY_SIZE(action));
+}
+
+static void guc_init_engine_stats(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ intel_wakeref_t wakeref;
+
+ mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
+ guc->timestamp.ping_delay);
+
+ with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref) {
+ int ret = guc_action_enable_usage_stats(guc);
+
+ if (ret)
+ drm_err(&gt->i915->drm,
+ "Failed to enable usage stats: %d!\n", ret);
+ }
+}
+
+void intel_guc_busyness_park(struct intel_gt *gt)
+{
+ struct intel_guc *guc = &gt->uc.guc;
+
+ if (!guc_submission_initialized(guc))
+ return;
+
+ cancel_delayed_work(&guc->timestamp.work);
+ __update_guc_busyness_stats(guc);
+}
+
+void intel_guc_busyness_unpark(struct intel_gt *gt)
+{
+ struct intel_guc *guc = &gt->uc.guc;
+
+ if (!guc_submission_initialized(guc))
+ return;
+
+ mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
+ guc->timestamp.ping_delay);
+}
+
static inline bool
submission_disabled(struct intel_guc *guc)
{
@@ -1139,6 +1405,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc)
intel_gt_park_heartbeats(guc_to_gt(guc));
disable_submission(guc);
guc->interrupts.disable(guc);
+ __reset_guc_busyness_stats(guc);
/* Flush IRQ handler */
spin_lock_irq(&guc_to_gt(guc)->irq_lock);
@@ -1485,6 +1752,7 @@ static void destroyed_worker_func(struct work_struct *w);
*/
int intel_guc_submission_init(struct intel_guc *guc)
{
+ struct intel_gt *gt = guc_to_gt(guc);
int ret;
if (guc->lrc_desc_pool)
@@ -1509,10 +1777,14 @@ int intel_guc_submission_init(struct intel_guc *guc)
destroyed_worker_func);
guc->submission_state.guc_ids_bitmap =
- bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID, GFP_KERNEL);
+ bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
if (!guc->submission_state.guc_ids_bitmap)
return -ENOMEM;
+ spin_lock_init(&guc->timestamp.lock);
+ INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
+ guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
+
return 0;
}
@@ -1599,13 +1871,13 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
if (intel_context_is_parent(ce))
ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
- NUMBER_MULTI_LRC_GUC_ID,
+ NUMBER_MULTI_LRC_GUC_ID(guc),
order_base_2(ce->parallel.number_children
+ 1));
else
ret = ida_simple_get(&guc->submission_state.guc_ids,
- NUMBER_MULTI_LRC_GUC_ID,
- GUC_MAX_LRC_DESCRIPTORS,
+ NUMBER_MULTI_LRC_GUC_ID(guc),
+ guc->submission_state.num_guc_ids,
GFP_KERNEL | __GFP_RETRY_MAYFAIL |
__GFP_NOWARN);
if (unlikely(ret < 0))
@@ -1663,14 +1935,18 @@ static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce)
GEM_BUG_ON(intel_context_is_parent(cn));
list_del_init(&cn->guc_id.link);
- ce->guc_id = cn->guc_id;
+ ce->guc_id.id = cn->guc_id.id;
- spin_lock(&ce->guc_state.lock);
+ spin_lock(&cn->guc_state.lock);
clr_context_registered(cn);
- spin_unlock(&ce->guc_state.lock);
+ spin_unlock(&cn->guc_state.lock);
set_context_guc_id_invalid(cn);
+#ifdef CONFIG_DRM_I915_SELFTEST
+ guc->number_guc_id_stolen++;
+#endif
+
return 0;
} else {
return -EAGAIN;
@@ -2374,7 +2650,6 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce)
unsigned long flags;
bool disabled;
- lockdep_assert_held(&guc->submission_state.lock);
GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id.id));
GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id));
@@ -2390,7 +2665,7 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce)
}
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
if (unlikely(disabled)) {
- __release_guc_id(guc, ce);
+ release_guc_id(guc, ce);
__guc_context_destroy(ce);
return;
}
@@ -2424,36 +2699,48 @@ static void __guc_context_destroy(struct intel_context *ce)
static void guc_flush_destroyed_contexts(struct intel_guc *guc)
{
- struct intel_context *ce, *cn;
+ struct intel_context *ce;
unsigned long flags;
GEM_BUG_ON(!submission_disabled(guc) &&
guc_submission_initialized(guc));
- spin_lock_irqsave(&guc->submission_state.lock, flags);
- list_for_each_entry_safe(ce, cn,
- &guc->submission_state.destroyed_contexts,
- destroyed_link) {
- list_del_init(&ce->destroyed_link);
- __release_guc_id(guc, ce);
+ while (!list_empty(&guc->submission_state.destroyed_contexts)) {
+ spin_lock_irqsave(&guc->submission_state.lock, flags);
+ ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
+ struct intel_context,
+ destroyed_link);
+ if (ce)
+ list_del_init(&ce->destroyed_link);
+ spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+
+ if (!ce)
+ break;
+
+ release_guc_id(guc, ce);
__guc_context_destroy(ce);
}
- spin_unlock_irqrestore(&guc->submission_state.lock, flags);
}
static void deregister_destroyed_contexts(struct intel_guc *guc)
{
- struct intel_context *ce, *cn;
+ struct intel_context *ce;
unsigned long flags;
- spin_lock_irqsave(&guc->submission_state.lock, flags);
- list_for_each_entry_safe(ce, cn,
- &guc->submission_state.destroyed_contexts,
- destroyed_link) {
- list_del_init(&ce->destroyed_link);
+ while (!list_empty(&guc->submission_state.destroyed_contexts)) {
+ spin_lock_irqsave(&guc->submission_state.lock, flags);
+ ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
+ struct intel_context,
+ destroyed_link);
+ if (ce)
+ list_del_init(&ce->destroyed_link);
+ spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+
+ if (!ce)
+ break;
+
guc_lrc_desc_unpin(ce);
}
- spin_unlock_irqrestore(&guc->submission_state.lock, flags);
}
static void destroyed_worker_func(struct work_struct *w)
@@ -3370,7 +3657,9 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
engine->emit_flush = gen12_emit_flush_xcs;
}
engine->set_default_submission = guc_set_default_submission;
+ engine->busyness = guc_engine_busyness;
+ engine->flags |= I915_ENGINE_SUPPORTS_STATS;
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
engine->flags |= I915_ENGINE_HAS_TIMESLICES;
@@ -3469,6 +3758,7 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine)
void intel_guc_submission_enable(struct intel_guc *guc)
{
guc_init_lrc_mapping(guc);
+ guc_init_engine_stats(guc);
}
void intel_guc_submission_disable(struct intel_guc *guc)
@@ -3495,6 +3785,7 @@ static bool __guc_submission_selected(struct intel_guc *guc)
void intel_guc_submission_init_early(struct intel_guc *guc)
{
+ guc->submission_state.num_guc_ids = GUC_MAX_LRC_DESCRIPTORS;
guc->submission_supported = __guc_submission_supported(guc);
guc->submission_selected = __guc_submission_selected(guc);
}
@@ -3696,6 +3987,7 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc,
const u32 *msg, u32 len)
{
struct intel_context *ce;
+ unsigned long flags;
int desc_idx;
if (unlikely(len != 1)) {
@@ -3704,11 +3996,24 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc,
}
desc_idx = msg[0];
+
+ /*
+ * The context lookup uses the xarray but lookups only require an RCU lock
+ * not the full spinlock. So take the lock explicitly and keep it until the
+ * context has been reference count locked to ensure it can't be destroyed
+ * asynchronously until the reset is done.
+ */
+ xa_lock_irqsave(&guc->context_lookup, flags);
ce = g2h_context_lookup(guc, desc_idx);
+ if (ce)
+ intel_context_get(ce);
+ xa_unlock_irqrestore(&guc->context_lookup, flags);
+
if (unlikely(!ce))
return -EPROTO;
guc_handle_context_reset(guc, ce);
+ intel_context_put(ce);
return 0;
}
@@ -3729,11 +4034,12 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
const u32 *msg, u32 len)
{
struct intel_engine_cs *engine;
+ struct intel_gt *gt = guc_to_gt(guc);
u8 guc_class, instance;
u32 reason;
if (unlikely(len != 3)) {
- drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
+ drm_err(&gt->i915->drm, "Invalid length %u", len);
return -EPROTO;
}
@@ -3743,12 +4049,19 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
engine = guc_lookup_engine(guc, guc_class, instance);
if (unlikely(!engine)) {
- drm_err(&guc_to_gt(guc)->i915->drm,
+ drm_err(&gt->i915->drm,
"Invalid engine %d:%d", guc_class, instance);
return -EPROTO;
}
- intel_gt_handle_error(guc_to_gt(guc), engine->mask,
+ /*
+ * This is an unexpected failure of a hardware feature. So, log a real
+ * error message not just the informational that comes with the reset.
+ */
+ drm_err(&gt->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X",
+ guc_class, instance, engine->name, reason);
+
+ intel_gt_handle_error(gt, engine->mask,
I915_ERROR_CAPTURE,
"GuC failed to reset %s (reason=0x%08x)\n",
engine->name, reason);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
index c7ef44fa0c36..5a95a9f0a8e3 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
@@ -28,6 +28,8 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc,
void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
struct i915_request *hung_rq,
struct drm_printer *m);
+void intel_guc_busyness_park(struct intel_gt *gt);
+void intel_guc_busyness_unpark(struct intel_gt *gt);
bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index ff4b6869b80b..d10b227ac4aa 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -54,65 +54,6 @@ void intel_huc_init_early(struct intel_huc *huc)
}
}
-static int intel_huc_rsa_data_create(struct intel_huc *huc)
-{
- struct intel_gt *gt = huc_to_gt(huc);
- struct intel_guc *guc = &gt->uc.guc;
- struct i915_vma *vma;
- size_t copied;
- void *vaddr;
- int err;
-
- err = i915_inject_probe_error(gt->i915, -ENXIO);
- if (err)
- return err;
-
- /*
- * HuC firmware will sit above GUC_GGTT_TOP and will not map
- * through GTT. Unfortunately, this means GuC cannot perform
- * the HuC auth. as the rsa offset now falls within the GuC
- * inaccessible range. We resort to perma-pinning an additional
- * vma within the accessible range that only contains the rsa
- * signature. The GuC can use this extra pinning to perform
- * the authentication since its GGTT offset will be GuC
- * accessible.
- */
- GEM_BUG_ON(huc->fw.rsa_size > PAGE_SIZE);
- vma = intel_guc_allocate_vma(guc, PAGE_SIZE);
- if (IS_ERR(vma))
- return PTR_ERR(vma);
-
- vaddr = i915_gem_object_pin_map_unlocked(vma->obj,
- i915_coherent_map_type(gt->i915,
- vma->obj, true));
- if (IS_ERR(vaddr)) {
- i915_vma_unpin_and_release(&vma, 0);
- err = PTR_ERR(vaddr);
- goto unpin_out;
- }
-
- copied = intel_uc_fw_copy_rsa(&huc->fw, vaddr, vma->size);
- i915_gem_object_unpin_map(vma->obj);
-
- if (copied < huc->fw.rsa_size) {
- err = -ENOMEM;
- goto unpin_out;
- }
-
- huc->rsa_data = vma;
-
- return 0;
-
-unpin_out:
- i915_vma_unpin_and_release(&vma, 0);
- return err;
-}
-
-static void intel_huc_rsa_data_destroy(struct intel_huc *huc)
-{
- i915_vma_unpin_and_release(&huc->rsa_data, 0);
-}
-
int intel_huc_init(struct intel_huc *huc)
{
struct drm_i915_private *i915 = huc_to_gt(huc)->i915;
@@ -122,21 +63,10 @@ int intel_huc_init(struct intel_huc *huc)
if (err)
goto out;
- /*
- * HuC firmware image is outside GuC accessible range.
- * Copy the RSA signature out of the image into
- * a perma-pinned region set aside for it
- */
- err = intel_huc_rsa_data_create(huc);
- if (err)
- goto out_fini;
-
intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOADABLE);
return 0;
-out_fini:
- intel_uc_fw_fini(&huc->fw);
out:
i915_probe_error(i915, "failed with %d\n", err);
return err;
@@ -147,7 +77,6 @@ void intel_huc_fini(struct intel_huc *huc)
if (!intel_uc_fw_is_loadable(&huc->fw))
return;
- intel_huc_rsa_data_destroy(huc);
intel_uc_fw_fini(&huc->fw);
}
@@ -177,7 +106,7 @@ int intel_huc_auth(struct intel_huc *huc)
goto fail;
ret = intel_guc_auth_huc(guc,
- intel_guc_ggtt_offset(guc, huc->rsa_data));
+ intel_guc_ggtt_offset(guc, huc->fw.rsa_data));
if (ret) {
DRM_ERROR("HuC: GuC did not ack Auth request %d\n", ret);
goto fail;
@@ -199,7 +128,7 @@ int intel_huc_auth(struct intel_huc *huc)
fail:
i915_probe_error(gt->i915, "HuC: Authentication failed %d\n", ret);
- intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_FAIL);
+ intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
return ret;
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
index daee43b661d4..ae8c8a6c8cc8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
@@ -15,8 +15,6 @@ struct intel_huc {
struct intel_uc_fw fw;
/* HuC-specific additions */
- struct i915_vma *rsa_data;
-
struct {
i915_reg_t reg;
u32 mask;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 8f17005ce85f..09ed29df67bc 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -8,6 +8,7 @@
#include "intel_guc.h"
#include "intel_guc_ads.h"
#include "intel_guc_submission.h"
+#include "gt/intel_rps.h"
#include "intel_uc.h"
#include "i915_drv.h"
@@ -462,6 +463,8 @@ static int __uc_init_hw(struct intel_uc *uc)
else
attempts = 1;
+ intel_rps_raise_unslice(&uc_to_gt(uc)->rps);
+
while (attempts--) {
/*
* Always reset the GuC just before (re)loading, so
@@ -499,6 +502,9 @@ static int __uc_init_hw(struct intel_uc *uc)
ret = intel_guc_slpc_enable(&guc->slpc);
if (ret)
goto err_submission;
+ } else {
+ /* Restore GT back to RPn for non-SLPC path */
+ intel_rps_lower_unslice(&uc_to_gt(uc)->rps);
}
drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n",
@@ -529,6 +535,9 @@ err_submission:
err_log_capture:
__uc_capture_load_err_log(uc);
err_out:
+ /* Return GT back to RPn */
+ intel_rps_lower_unslice(&uc_to_gt(uc)->rps);
+
__uc_sanitize(uc);
if (!ret) {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 3aa87be4f2e4..a5af05bde6f2 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -48,22 +48,39 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
* Note that RKL and ADL-S have the same GuC/HuC device ID's and use the same
* firmware as TGL.
*/
-#define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \
- fw_def(ALDERLAKE_P, 0, guc_def(adlp, 62, 0, 3), huc_def(tgl, 7, 9, 3)) \
- fw_def(ALDERLAKE_S, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \
- fw_def(DG1, 0, guc_def(dg1, 62, 0, 0), huc_def(dg1, 7, 9, 3)) \
- fw_def(ROCKETLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \
- fw_def(TIGERLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \
- fw_def(JASPERLAKE, 0, guc_def(ehl, 62, 0, 0), huc_def(ehl, 9, 0, 0)) \
- fw_def(ELKHARTLAKE, 0, guc_def(ehl, 62, 0, 0), huc_def(ehl, 9, 0, 0)) \
- fw_def(ICELAKE, 0, guc_def(icl, 62, 0, 0), huc_def(icl, 9, 0, 0)) \
- fw_def(COMETLAKE, 5, guc_def(cml, 62, 0, 0), huc_def(cml, 4, 0, 0)) \
- fw_def(COMETLAKE, 0, guc_def(kbl, 62, 0, 0), huc_def(kbl, 4, 0, 0)) \
- fw_def(COFFEELAKE, 0, guc_def(kbl, 62, 0, 0), huc_def(kbl, 4, 0, 0)) \
- fw_def(GEMINILAKE, 0, guc_def(glk, 62, 0, 0), huc_def(glk, 4, 0, 0)) \
- fw_def(KABYLAKE, 0, guc_def(kbl, 62, 0, 0), huc_def(kbl, 4, 0, 0)) \
- fw_def(BROXTON, 0, guc_def(bxt, 62, 0, 0), huc_def(bxt, 2, 0, 0)) \
- fw_def(SKYLAKE, 0, guc_def(skl, 62, 0, 0), huc_def(skl, 2, 0, 0))
+#define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \
+ fw_def(ALDERLAKE_P, 0, guc_def(adlp, 62, 0, 3)) \
+ fw_def(ALDERLAKE_S, 0, guc_def(tgl, 62, 0, 0)) \
+ fw_def(DG1, 0, guc_def(dg1, 62, 0, 0)) \
+ fw_def(ROCKETLAKE, 0, guc_def(tgl, 62, 0, 0)) \
+ fw_def(TIGERLAKE, 0, guc_def(tgl, 62, 0, 0)) \
+ fw_def(JASPERLAKE, 0, guc_def(ehl, 62, 0, 0)) \
+ fw_def(ELKHARTLAKE, 0, guc_def(ehl, 62, 0, 0)) \
+ fw_def(ICELAKE, 0, guc_def(icl, 62, 0, 0)) \
+ fw_def(COMETLAKE, 5, guc_def(cml, 62, 0, 0)) \
+ fw_def(COMETLAKE, 0, guc_def(kbl, 62, 0, 0)) \
+ fw_def(COFFEELAKE, 0, guc_def(kbl, 62, 0, 0)) \
+ fw_def(GEMINILAKE, 0, guc_def(glk, 62, 0, 0)) \
+ fw_def(KABYLAKE, 0, guc_def(kbl, 62, 0, 0)) \
+ fw_def(BROXTON, 0, guc_def(bxt, 62, 0, 0)) \
+ fw_def(SKYLAKE, 0, guc_def(skl, 62, 0, 0))
+
+#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_def) \
+ fw_def(ALDERLAKE_P, 0, huc_def(tgl, 7, 9, 3)) \
+ fw_def(ALDERLAKE_S, 0, huc_def(tgl, 7, 9, 3)) \
+ fw_def(DG1, 0, huc_def(dg1, 7, 9, 3)) \
+ fw_def(ROCKETLAKE, 0, huc_def(tgl, 7, 9, 3)) \
+ fw_def(TIGERLAKE, 0, huc_def(tgl, 7, 9, 3)) \
+ fw_def(JASPERLAKE, 0, huc_def(ehl, 9, 0, 0)) \
+ fw_def(ELKHARTLAKE, 0, huc_def(ehl, 9, 0, 0)) \
+ fw_def(ICELAKE, 0, huc_def(icl, 9, 0, 0)) \
+ fw_def(COMETLAKE, 5, huc_def(cml, 4, 0, 0)) \
+ fw_def(COMETLAKE, 0, huc_def(kbl, 4, 0, 0)) \
+ fw_def(COFFEELAKE, 0, huc_def(kbl, 4, 0, 0)) \
+ fw_def(GEMINILAKE, 0, huc_def(glk, 4, 0, 0)) \
+ fw_def(KABYLAKE, 0, huc_def(kbl, 4, 0, 0)) \
+ fw_def(BROXTON, 0, huc_def(bxt, 2, 0, 0)) \
+ fw_def(SKYLAKE, 0, huc_def(skl, 2, 0, 0))
#define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \
"i915/" \
@@ -79,11 +96,11 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
__MAKE_UC_FW_PATH(prefix_, "_huc_", major_, minor_, bld_num_)
/* All blobs need to be declared via MODULE_FIRMWARE() */
-#define INTEL_UC_MODULE_FW(platform_, revid_, guc_, huc_) \
- MODULE_FIRMWARE(guc_); \
- MODULE_FIRMWARE(huc_);
+#define INTEL_UC_MODULE_FW(platform_, revid_, uc_) \
+ MODULE_FIRMWARE(uc_);
-INTEL_UC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH, MAKE_HUC_FW_PATH)
+INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH)
+INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH)
/* The below structs and macros are used to iterate across the list of blobs */
struct __packed uc_fw_blob {
@@ -106,31 +123,47 @@ struct __packed uc_fw_blob {
struct __packed uc_fw_platform_requirement {
enum intel_platform p;
u8 rev; /* first platform rev using this FW */
- const struct uc_fw_blob blobs[INTEL_UC_FW_NUM_TYPES];
+ const struct uc_fw_blob blob;
};
-#define MAKE_FW_LIST(platform_, revid_, guc_, huc_) \
+#define MAKE_FW_LIST(platform_, revid_, uc_) \
{ \
.p = INTEL_##platform_, \
.rev = revid_, \
- .blobs[INTEL_UC_FW_TYPE_GUC] = guc_, \
- .blobs[INTEL_UC_FW_TYPE_HUC] = huc_, \
+ .blob = uc_, \
},
+struct fw_blobs_by_type {
+ const struct uc_fw_platform_requirement *blobs;
+ u32 count;
+};
+
static void
__uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
{
- static const struct uc_fw_platform_requirement fw_blobs[] = {
- INTEL_UC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, HUC_FW_BLOB)
+ static const struct uc_fw_platform_requirement blobs_guc[] = {
+ INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB)
+ };
+ static const struct uc_fw_platform_requirement blobs_huc[] = {
+ INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB)
};
+ static const struct fw_blobs_by_type blobs_all[INTEL_UC_FW_NUM_TYPES] = {
+ [INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) },
+ [INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) },
+ };
+ static const struct uc_fw_platform_requirement *fw_blobs;
enum intel_platform p = INTEL_INFO(i915)->platform;
+ u32 fw_count;
u8 rev = INTEL_REVID(i915);
int i;
- for (i = 0; i < ARRAY_SIZE(fw_blobs) && p <= fw_blobs[i].p; i++) {
+ GEM_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all));
+ fw_blobs = blobs_all[uc_fw->type].blobs;
+ fw_count = blobs_all[uc_fw->type].count;
+
+ for (i = 0; i < fw_count && p <= fw_blobs[i].p; i++) {
if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) {
- const struct uc_fw_blob *blob =
- &fw_blobs[i].blobs[uc_fw->type];
+ const struct uc_fw_blob *blob = &fw_blobs[i].blob;
uc_fw->path = blob->path;
uc_fw->major_ver_wanted = blob->major;
uc_fw->minor_ver_wanted = blob->minor;
@@ -140,7 +173,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
/* make sure the list is ordered as expected */
if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST)) {
- for (i = 1; i < ARRAY_SIZE(fw_blobs); i++) {
+ for (i = 1; i < fw_count; i++) {
if (fw_blobs[i].p < fw_blobs[i - 1].p)
continue;
@@ -322,13 +355,6 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32);
/* now RSA */
- if (unlikely(css->key_size_dw != UOS_RSA_SCRATCH_COUNT)) {
- drm_warn(&i915->drm, "%s firmware %s: unexpected key size: %u != %u\n",
- intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
- css->key_size_dw, UOS_RSA_SCRATCH_COUNT);
- err = -EPROTO;
- goto fail;
- }
uc_fw->rsa_size = css->key_size_dw * sizeof(u32);
/* At least, it should have header, uCode and RSA. Size of all three. */
@@ -540,10 +566,79 @@ fail:
i915_probe_error(gt->i915, "Failed to load %s firmware %s (%d)\n",
intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
err);
- intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_FAIL);
+ intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
return err;
}
+static inline bool uc_fw_need_rsa_in_memory(struct intel_uc_fw *uc_fw)
+{
+ /*
+ * The HW reads the GuC RSA from memory if the key size is > 256 bytes,
+ * while it reads it from the 64 RSA registers if it is smaller.
+ * The HuC RSA is always read from memory.
+ */
+ return uc_fw->type == INTEL_UC_FW_TYPE_HUC || uc_fw->rsa_size > 256;
+}
+
+static int uc_fw_rsa_data_create(struct intel_uc_fw *uc_fw)
+{
+ struct intel_gt *gt = __uc_fw_to_gt(uc_fw);
+ struct i915_vma *vma;
+ size_t copied;
+ void *vaddr;
+ int err;
+
+ err = i915_inject_probe_error(gt->i915, -ENXIO);
+ if (err)
+ return err;
+
+ if (!uc_fw_need_rsa_in_memory(uc_fw))
+ return 0;
+
+ /*
+ * uC firmwares will sit above GUC_GGTT_TOP and will not map through
+ * GGTT. Unfortunately, this means that the GuC HW cannot perform the uC
+ * authentication from memory, as the RSA offset now falls within the
+ * GuC inaccessible range. We resort to perma-pinning an additional vma
+ * within the accessible range that only contains the RSA signature.
+ * The GuC HW can use this extra pinning to perform the authentication
+ * since its GGTT offset will be GuC accessible.
+ */
+ GEM_BUG_ON(uc_fw->rsa_size > PAGE_SIZE);
+ vma = intel_guc_allocate_vma(&gt->uc.guc, PAGE_SIZE);
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
+
+ vaddr = i915_gem_object_pin_map_unlocked(vma->obj,
+ i915_coherent_map_type(gt->i915, vma->obj, true));
+ if (IS_ERR(vaddr)) {
+ i915_vma_unpin_and_release(&vma, 0);
+ err = PTR_ERR(vaddr);
+ goto unpin_out;
+ }
+
+ copied = intel_uc_fw_copy_rsa(uc_fw, vaddr, vma->size);
+ i915_gem_object_unpin_map(vma->obj);
+
+ if (copied < uc_fw->rsa_size) {
+ err = -ENOMEM;
+ goto unpin_out;
+ }
+
+ uc_fw->rsa_data = vma;
+
+ return 0;
+
+unpin_out:
+ i915_vma_unpin_and_release(&vma, 0);
+ return err;
+}
+
+static void uc_fw_rsa_data_destroy(struct intel_uc_fw *uc_fw)
+{
+ i915_vma_unpin_and_release(&uc_fw->rsa_data, 0);
+}
+
int intel_uc_fw_init(struct intel_uc_fw *uc_fw)
{
int err;
@@ -558,14 +653,29 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw)
if (err) {
DRM_DEBUG_DRIVER("%s fw pin-pages err=%d\n",
intel_uc_fw_type_repr(uc_fw->type), err);
- intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_FAIL);
+ goto out;
}
+ err = uc_fw_rsa_data_create(uc_fw);
+ if (err) {
+ DRM_DEBUG_DRIVER("%s fw rsa data creation failed, err=%d\n",
+ intel_uc_fw_type_repr(uc_fw->type), err);
+ goto out_unpin;
+ }
+
+ return 0;
+
+out_unpin:
+ i915_gem_object_unpin_pages(uc_fw->obj);
+out:
+ intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_INIT_FAIL);
return err;
}
void intel_uc_fw_fini(struct intel_uc_fw *uc_fw)
{
+ uc_fw_rsa_data_destroy(uc_fw);
+
if (i915_gem_object_has_pinned_pages(uc_fw->obj))
i915_gem_object_unpin_pages(uc_fw->obj);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index 1e00bf65639e..d9d1dc0b4cbb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -32,11 +32,12 @@ struct intel_gt;
* | | MISSING <--/ | \--> ERROR |
* | fetch | V |
* | | AVAILABLE |
- * +------------+- | -+
+ * +------------+- | \ -+
+ * | | | \--> INIT FAIL |
* | init | V |
* | | /------> LOADABLE <----<-----------\ |
* +------------+- \ / \ \ \ -+
- * | | FAIL <--< \--> TRANSFERRED \ |
+ * | | LOAD FAIL <--< \--> TRANSFERRED \ |
* | upload | \ / \ / |
* | | \---------/ \--> RUNNING |
* +------------+---------------------------------------------------+
@@ -50,8 +51,9 @@ enum intel_uc_fw_status {
INTEL_UC_FIRMWARE_MISSING, /* blob not found on the system */
INTEL_UC_FIRMWARE_ERROR, /* invalid format or version */
INTEL_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */
+ INTEL_UC_FIRMWARE_INIT_FAIL, /* failed to prepare fw objects for load */
INTEL_UC_FIRMWARE_LOADABLE, /* all fw-required objects are ready */
- INTEL_UC_FIRMWARE_FAIL, /* failed to xfer or init/auth the fw */
+ INTEL_UC_FIRMWARE_LOAD_FAIL, /* failed to xfer or init/auth the fw */
INTEL_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */
INTEL_UC_FIRMWARE_RUNNING /* init/auth done */
};
@@ -84,6 +86,7 @@ struct intel_uc_fw {
* or during a GT reset (mutex guarantees single threaded).
*/
struct i915_vma dummy;
+ struct i915_vma *rsa_data;
/*
* The firmware build process will generate a version header file with major and
@@ -130,10 +133,12 @@ const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status)
return "ERROR";
case INTEL_UC_FIRMWARE_AVAILABLE:
return "AVAILABLE";
+ case INTEL_UC_FIRMWARE_INIT_FAIL:
+ return "INIT FAIL";
case INTEL_UC_FIRMWARE_LOADABLE:
return "LOADABLE";
- case INTEL_UC_FIRMWARE_FAIL:
- return "FAIL";
+ case INTEL_UC_FIRMWARE_LOAD_FAIL:
+ return "LOAD FAIL";
case INTEL_UC_FIRMWARE_TRANSFERRED:
return "TRANSFERRED";
case INTEL_UC_FIRMWARE_RUNNING:
@@ -155,7 +160,8 @@ static inline int intel_uc_fw_status_to_error(enum intel_uc_fw_status status)
return -ENOENT;
case INTEL_UC_FIRMWARE_ERROR:
return -ENOEXEC;
- case INTEL_UC_FIRMWARE_FAIL:
+ case INTEL_UC_FIRMWARE_INIT_FAIL:
+ case INTEL_UC_FIRMWARE_LOAD_FAIL:
return -EIO;
case INTEL_UC_FIRMWARE_SELECTED:
return -ESTALE;
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
index fb0e4a7bd8ca..d3327b802b76 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
@@ -3,8 +3,21 @@
* Copyright �� 2021 Intel Corporation
*/
+#include "selftests/igt_spinner.h"
#include "selftests/intel_scheduler_helpers.h"
+static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin)
+{
+ int err = 0;
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (spin && !igt_wait_for_spinner(spin, rq))
+ err = -ETIMEDOUT;
+
+ return err;
+}
+
static struct i915_request *nop_user_request(struct intel_context *ce,
struct i915_request *from)
{
@@ -110,12 +123,172 @@ err:
return ret;
}
+/*
+ * intel_guc_steal_guc_ids - Test to exhaust all guc_ids and then steal one
+ *
+ * This test creates a spinner which is used to block all subsequent submissions
+ * until it completes. Next, a loop creates a context and a NOP request each
+ * iteration until the guc_ids are exhausted (request creation returns -EAGAIN).
+ * The spinner is ended, unblocking all requests created in the loop. At this
+ * point all guc_ids are exhausted but are available to steal. Try to create
+ * another request which should successfully steal a guc_id. Wait on last
+ * request to complete, idle GPU, verify a guc_id was stolen via a counter, and
+ * exit the test. Test also artificially reduces the number of guc_ids so the
+ * test runs in a timely manner.
+ */
+static int intel_guc_steal_guc_ids(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_guc *guc = &gt->uc.guc;
+ int ret, sv, context_index = 0;
+ intel_wakeref_t wakeref;
+ struct intel_engine_cs *engine;
+ struct intel_context **ce;
+ struct igt_spinner spin;
+ struct i915_request *spin_rq = NULL, *rq, *last = NULL;
+ int number_guc_id_stolen = guc->number_guc_id_stolen;
+
+ ce = kzalloc(sizeof(*ce) * GUC_MAX_LRC_DESCRIPTORS, GFP_KERNEL);
+ if (!ce) {
+ pr_err("Context array allocation failed\n");
+ return -ENOMEM;
+ }
+
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+ engine = intel_selftest_find_any_engine(gt);
+ sv = guc->submission_state.num_guc_ids;
+ guc->submission_state.num_guc_ids = 4096;
+
+ /* Create spinner to block requests in below loop */
+ ce[context_index] = intel_context_create(engine);
+ if (IS_ERR(ce[context_index])) {
+ ret = PTR_ERR(ce[context_index]);
+ ce[context_index] = NULL;
+ pr_err("Failed to create context: %d\n", ret);
+ goto err_wakeref;
+ }
+ ret = igt_spinner_init(&spin, engine->gt);
+ if (ret) {
+ pr_err("Failed to create spinner: %d\n", ret);
+ goto err_contexts;
+ }
+ spin_rq = igt_spinner_create_request(&spin, ce[context_index],
+ MI_ARB_CHECK);
+ if (IS_ERR(spin_rq)) {
+ ret = PTR_ERR(spin_rq);
+ pr_err("Failed to create spinner request: %d\n", ret);
+ goto err_contexts;
+ }
+ ret = request_add_spin(spin_rq, &spin);
+ if (ret) {
+ pr_err("Failed to add Spinner request: %d\n", ret);
+ goto err_spin_rq;
+ }
+
+ /* Use all guc_ids */
+ while (ret != -EAGAIN) {
+ ce[++context_index] = intel_context_create(engine);
+ if (IS_ERR(ce[context_index])) {
+ ret = PTR_ERR(ce[context_index--]);
+ ce[context_index] = NULL;
+ pr_err("Failed to create context: %d\n", ret);
+ goto err_spin_rq;
+ }
+
+ rq = nop_user_request(ce[context_index], spin_rq);
+ if (IS_ERR(rq)) {
+ ret = PTR_ERR(rq);
+ rq = NULL;
+ if (ret != -EAGAIN) {
+ pr_err("Failed to create request, %d: %d\n",
+ context_index, ret);
+ goto err_spin_rq;
+ }
+ } else {
+ if (last)
+ i915_request_put(last);
+ last = rq;
+ }
+ }
+
+ /* Release blocked requests */
+ igt_spinner_end(&spin);
+ ret = intel_selftest_wait_for_rq(spin_rq);
+ if (ret) {
+ pr_err("Spin request failed to complete: %d\n", ret);
+ i915_request_put(last);
+ goto err_spin_rq;
+ }
+ i915_request_put(spin_rq);
+ igt_spinner_fini(&spin);
+ spin_rq = NULL;
+
+ /* Wait for last request */
+ ret = i915_request_wait(last, 0, HZ * 30);
+ i915_request_put(last);
+ if (ret < 0) {
+ pr_err("Last request failed to complete: %d\n", ret);
+ goto err_spin_rq;
+ }
+
+ /* Try to steal guc_id */
+ rq = nop_user_request(ce[context_index], NULL);
+ if (IS_ERR(rq)) {
+ ret = PTR_ERR(rq);
+ pr_err("Failed to steal guc_id, %d: %d\n", context_index, ret);
+ goto err_spin_rq;
+ }
+
+ /* Wait for request with stolen guc_id */
+ ret = i915_request_wait(rq, 0, HZ);
+ i915_request_put(rq);
+ if (ret < 0) {
+ pr_err("Request with stolen guc_id failed to complete: %d\n",
+ ret);
+ goto err_spin_rq;
+ }
+
+ /* Wait for idle */
+ ret = intel_gt_wait_for_idle(gt, HZ * 30);
+ if (ret < 0) {
+ pr_err("GT failed to idle: %d\n", ret);
+ goto err_spin_rq;
+ }
+
+ /* Verify a guc_id was stolen */
+ if (guc->number_guc_id_stolen == number_guc_id_stolen) {
+ pr_err("No guc_id was stolen");
+ ret = -EINVAL;
+ } else {
+ ret = 0;
+ }
+
+err_spin_rq:
+ if (spin_rq) {
+ igt_spinner_end(&spin);
+ intel_selftest_wait_for_rq(spin_rq);
+ i915_request_put(spin_rq);
+ igt_spinner_fini(&spin);
+ intel_gt_wait_for_idle(gt, HZ * 30);
+ }
+err_contexts:
+ for (; context_index >= 0 && ce[context_index]; --context_index)
+ intel_context_put(ce[context_index]);
+err_wakeref:
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+ kfree(ce);
+ guc->submission_state.num_guc_ids = sv;
+
+ return ret;
+}
+
int intel_guc_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(intel_guc_scrub_ctbs),
+ SUBTEST(intel_guc_steal_guc_ids),
};
- struct intel_gt *gt = &i915->gt;
+ struct intel_gt *gt = to_gt(i915);
if (intel_gt_is_wedged(gt))
return 0;
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
index 50953c8e8b53..1297ddbf7f88 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
@@ -167,7 +167,7 @@ int intel_guc_multi_lrc_live_selftests(struct drm_i915_private *i915)
static const struct i915_subtest tests[] = {
SUBTEST(intel_guc_multi_lrc_basic),
};
- struct intel_gt *gt = &i915->gt;
+ struct intel_gt *gt = to_gt(i915);
if (intel_gt_is_wedged(gt))
return 0;
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 614156856f16..2aeaf8fd68f0 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -446,17 +446,17 @@ static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e)
|| e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
return (e->val64 != 0);
else
- return (e->val64 & _PAGE_PRESENT);
+ return (e->val64 & GEN8_PAGE_PRESENT);
}
static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e)
{
- e->val64 &= ~_PAGE_PRESENT;
+ e->val64 &= ~GEN8_PAGE_PRESENT;
}
static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e)
{
- e->val64 |= _PAGE_PRESENT;
+ e->val64 |= GEN8_PAGE_PRESENT;
}
static bool gen8_gtt_test_64k_splited(struct intel_gvt_gtt_entry *e)
@@ -2439,7 +2439,7 @@ static int alloc_scratch_pages(struct intel_vgpu *vgpu,
/* The entry parameters like present/writeable/cache type
* set to the same as i915's scratch page tree.
*/
- se.val64 |= _PAGE_PRESENT | _PAGE_RW;
+ se.val64 |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
if (type == GTT_TYPE_PPGTT_PDE_PT)
se.val64 |= PPAT_CACHED;
@@ -2896,7 +2896,7 @@ void intel_gvt_restore_ggtt(struct intel_gvt *gvt)
offset = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
for (idx = 0; idx < num_low; idx++) {
pte = mm->ggtt_mm.host_ggtt_aperture[idx];
- if (pte & _PAGE_PRESENT)
+ if (pte & GEN8_PAGE_PRESENT)
write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte);
}
@@ -2904,7 +2904,7 @@ void intel_gvt_restore_ggtt(struct intel_gvt *gvt)
offset = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
for (idx = 0; idx < num_hi; idx++) {
pte = mm->ggtt_mm.host_ggtt_hidden[idx];
- if (pte & _PAGE_PRESENT)
+ if (pte & GEN8_PAGE_PRESENT)
write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte);
}
}
diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c
index cbac409f6c8a..f0b69e4dcb52 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.c
+++ b/drivers/gpu/drm/i915/gvt/gvt.c
@@ -205,7 +205,7 @@ int intel_gvt_init_device(struct drm_i915_private *i915)
spin_lock_init(&gvt->scheduler.mmio_context_lock);
mutex_init(&gvt->lock);
mutex_init(&gvt->sched_lock);
- gvt->gt = &i915->gt;
+ gvt->gt = to_gt(i915);
i915->gvt = gvt;
init_device_info(gvt);
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 6c804102528b..42a0c9ae0a73 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -1386,7 +1386,7 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
enum intel_engine_id i;
int ret;
- ppgtt = i915_ppgtt_create(&i915->gt, I915_BO_ALLOC_PM_EARLY);
+ ppgtt = i915_ppgtt_create(to_gt(i915), I915_BO_ALLOC_PM_EARLY);
if (IS_ERR(ppgtt))
return PTR_ERR(ppgtt);
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 3103c1e1fd14..ee2b3a375362 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -426,8 +426,9 @@ replace_barrier(struct i915_active *ref, struct i915_active_fence *active)
return true;
}
-int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence)
+int i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
{
+ struct dma_fence *fence = &rq->fence;
struct i915_active_fence *active;
int err;
@@ -436,7 +437,7 @@ int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence)
if (err)
return err;
- active = active_instance(ref, idx);
+ active = active_instance(ref, i915_request_timeline(rq)->fence_context);
if (!active) {
err = -ENOMEM;
goto out;
@@ -477,29 +478,6 @@ __i915_active_set_fence(struct i915_active *ref,
return prev;
}
-static struct i915_active_fence *
-__active_fence(struct i915_active *ref, u64 idx)
-{
- struct active_node *it;
-
- it = __active_lookup(ref, idx);
- if (unlikely(!it)) { /* Contention with parallel tree builders! */
- spin_lock_irq(&ref->tree_lock);
- it = __active_lookup(ref, idx);
- spin_unlock_irq(&ref->tree_lock);
- }
- GEM_BUG_ON(!it); /* slot must be preallocated */
-
- return &it->base;
-}
-
-struct dma_fence *
-__i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence)
-{
- /* Only valid while active, see i915_active_acquire_for_context() */
- return __i915_active_set_fence(ref, __active_fence(ref, idx), fence);
-}
-
struct dma_fence *
i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
{
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 5fcdb0e2bc9e..7eb44132183a 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -164,26 +164,11 @@ void __i915_active_init(struct i915_active *ref,
__i915_active_init(ref, active, retire, flags, &__mkey, &__wkey); \
} while (0)
-struct dma_fence *
-__i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence);
-int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence);
-
-static inline int
-i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
-{
- return i915_active_ref(ref,
- i915_request_timeline(rq)->fence_context,
- &rq->fence);
-}
+int i915_active_add_request(struct i915_active *ref, struct i915_request *rq);
struct dma_fence *
i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f);
-static inline bool i915_active_has_exclusive(struct i915_active *ref)
-{
- return rcu_access_pointer(ref->excl.fence);
-}
-
int __i915_active_wait(struct i915_active *ref, int state);
static inline int i915_active_wait(struct i915_active *ref)
{
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 682011c07ad7..e0e052cdf8b8 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -64,7 +64,8 @@ static int i915_capabilities(struct seq_file *m, void *data)
intel_device_info_print_static(INTEL_INFO(i915), &p);
intel_device_info_print_runtime(RUNTIME_INFO(i915), &p);
- intel_gt_info_print(&i915->gt.info, &p);
+ i915_print_iommu_status(i915, &p);
+ intel_gt_info_print(&to_gt(i915)->info, &p);
intel_driver_caps_print(&i915->caps, &p);
kernel_param_lock(THIS_MODULE);
@@ -292,7 +293,7 @@ static int i915_gpu_info_open(struct inode *inode, struct file *file)
gpu = NULL;
with_intel_runtime_pm(&i915->runtime_pm, wakeref)
- gpu = i915_gpu_coredump(&i915->gt, ALL_ENGINES);
+ gpu = i915_gpu_coredump(to_gt(i915), ALL_ENGINES);
if (IS_ERR(gpu))
return PTR_ERR(gpu);
@@ -350,7 +351,7 @@ static const struct file_operations i915_error_state_fops = {
static int i915_frequency_info(struct seq_file *m, void *unused)
{
struct drm_i915_private *i915 = node_to_i915(m->private);
- struct intel_gt *gt = &i915->gt;
+ struct intel_gt *gt = to_gt(i915);
struct drm_printer p = drm_seq_file_printer(m);
intel_gt_pm_frequency_dump(gt, &p);
@@ -438,11 +439,11 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
static int i915_rps_boost_info(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
- struct intel_rps *rps = &dev_priv->gt.rps;
+ struct intel_rps *rps = &to_gt(dev_priv)->rps;
seq_printf(m, "RPS enabled? %s\n", yesno(intel_rps_is_enabled(rps)));
seq_printf(m, "RPS active? %s\n", yesno(intel_rps_is_active(rps)));
- seq_printf(m, "GPU busy? %s\n", yesno(dev_priv->gt.awake));
+ seq_printf(m, "GPU busy? %s\n", yesno(to_gt(dev_priv)->awake));
seq_printf(m, "Boosts outstanding? %d\n",
atomic_read(&rps->num_waiters));
seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive));
@@ -475,7 +476,7 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
seq_printf(m, "Runtime power status: %s\n",
enableddisabled(!dev_priv->power_domains.init_wakeref));
- seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->gt.awake));
+ seq_printf(m, "GPU idle: %s\n", yesno(!to_gt(dev_priv)->awake));
seq_printf(m, "IRQs disabled: %s\n",
yesno(!intel_irqs_enabled(dev_priv)));
#ifdef CONFIG_PM
@@ -507,18 +508,18 @@ static int i915_engine_info(struct seq_file *m, void *unused)
wakeref = intel_runtime_pm_get(&i915->runtime_pm);
seq_printf(m, "GT awake? %s [%d], %llums\n",
- yesno(i915->gt.awake),
- atomic_read(&i915->gt.wakeref.count),
- ktime_to_ms(intel_gt_get_awake_time(&i915->gt)));
+ yesno(to_gt(i915)->awake),
+ atomic_read(&to_gt(i915)->wakeref.count),
+ ktime_to_ms(intel_gt_get_awake_time(to_gt(i915))));
seq_printf(m, "CS timestamp frequency: %u Hz, %d ns\n",
- i915->gt.clock_frequency,
- i915->gt.clock_period_ns);
+ to_gt(i915)->clock_frequency,
+ to_gt(i915)->clock_period_ns);
p = drm_seq_file_printer(m);
for_each_uabi_engine(engine, i915)
intel_engine_dump(engine, &p, "%s\n", engine->name);
- intel_gt_show_timelines(&i915->gt, &p, i915_request_show_with_schedule);
+ intel_gt_show_timelines(to_gt(i915), &p, i915_request_show_with_schedule);
intel_runtime_pm_put(&i915->runtime_pm, wakeref);
@@ -557,14 +558,14 @@ static int i915_wedged_get(void *data, u64 *val)
{
struct drm_i915_private *i915 = data;
- return intel_gt_debugfs_reset_show(&i915->gt, val);
+ return intel_gt_debugfs_reset_show(to_gt(i915), val);
}
static int i915_wedged_set(void *data, u64 val)
{
struct drm_i915_private *i915 = data;
- return intel_gt_debugfs_reset_store(&i915->gt, val);
+ return intel_gt_debugfs_reset_store(to_gt(i915), val);
}
DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
@@ -580,7 +581,7 @@ i915_perf_noa_delay_set(void *data, u64 val)
* This would lead to infinite waits as we're doing timestamp
* difference on the CS with only 32bits.
*/
- if (intel_gt_ns_to_clock_interval(&i915->gt, val) > U32_MAX)
+ if (intel_gt_ns_to_clock_interval(to_gt(i915), val) > U32_MAX)
return -EINVAL;
atomic64_set(&i915->perf.noa_programming_delay, val);
@@ -665,16 +666,18 @@ static int
i915_drop_caches_set(void *data, u64 val)
{
struct drm_i915_private *i915 = data;
+ unsigned int flags;
int ret;
DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n",
val, val & DROP_ALL);
- ret = gt_drop_caches(&i915->gt, val);
+ ret = gt_drop_caches(to_gt(i915), val);
if (ret)
return ret;
fs_reclaim_acquire(GFP_KERNEL);
+ flags = memalloc_noreclaim_save();
if (val & DROP_BOUND)
i915_gem_shrink(NULL, i915, LONG_MAX, NULL, I915_SHRINK_BOUND);
@@ -683,6 +686,7 @@ i915_drop_caches_set(void *data, u64 val)
if (val & DROP_SHRINK_ALL)
i915_gem_shrink_all(i915);
+ memalloc_noreclaim_restore(flags);
fs_reclaim_release(GFP_KERNEL);
if (val & DROP_RCU)
@@ -701,7 +705,7 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_drop_caches_fops,
static int i915_sseu_status(struct seq_file *m, void *unused)
{
struct drm_i915_private *i915 = node_to_i915(m->private);
- struct intel_gt *gt = &i915->gt;
+ struct intel_gt *gt = to_gt(i915);
return intel_sseu_status(m, gt);
}
@@ -710,14 +714,14 @@ static int i915_forcewake_open(struct inode *inode, struct file *file)
{
struct drm_i915_private *i915 = inode->i_private;
- return intel_gt_pm_debugfs_forcewake_user_open(&i915->gt);
+ return intel_gt_pm_debugfs_forcewake_user_open(to_gt(i915));
}
static int i915_forcewake_release(struct inode *inode, struct file *file)
{
struct drm_i915_private *i915 = inode->i_private;
- return intel_gt_pm_debugfs_forcewake_user_release(&i915->gt);
+ return intel_gt_pm_debugfs_forcewake_user_release(to_gt(i915));
}
static const struct file_operations i915_forcewake_fops = {
diff --git a/drivers/gpu/drm/i915/i915_debugfs_params.c b/drivers/gpu/drm/i915/i915_debugfs_params.c
index 20424275d41e..783c8676eee2 100644
--- a/drivers/gpu/drm/i915/i915_debugfs_params.c
+++ b/drivers/gpu/drm/i915/i915_debugfs_params.c
@@ -40,8 +40,8 @@ static int notify_guc(struct drm_i915_private *i915)
{
int ret = 0;
- if (intel_uc_uses_guc_submission(&i915->gt.uc))
- ret = intel_guc_global_policies_update(&i915->gt.uc.guc);
+ if (intel_uc_uses_guc_submission(&to_gt(i915)->uc))
+ ret = intel_guc_global_policies_update(&to_gt(i915)->uc.guc);
return ret;
}
diff --git a/drivers/gpu/drm/i915/i915_deps.c b/drivers/gpu/drm/i915/i915_deps.c
new file mode 100644
index 000000000000..999210b37325
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_deps.c
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <linux/dma-fence.h>
+#include <linux/slab.h>
+
+#include <drm/ttm/ttm_bo_api.h>
+
+#include "i915_deps.h"
+
+/**
+ * DOC: Set of utilities to dynamically collect dependencies into a
+ * structure which is fed into the GT migration code.
+ *
+ * Once we can do async unbinding, this is also needed to coalesce
+ * the migration fence with the unbind fences if these are coalesced
+ * post-migration.
+ *
+ * While collecting the individual dependencies, we store the refcounted
+ * struct dma_fence pointers in a realloc-managed pointer array, since
+ * that can be easily fed into a dma_fence_array. Other options are
+ * available, like for example an xarray for similarity with drm/sched.
+ * Can be changed easily if needed.
+ *
+ * A struct i915_deps need to be initialized using i915_deps_init().
+ * If i915_deps_add_dependency() or i915_deps_add_resv() return an
+ * error code they will internally call i915_deps_fini(), which frees
+ * all internal references and allocations.
+ */
+
+/* Min number of fence pointers in the array when an allocation occurs. */
+#define I915_DEPS_MIN_ALLOC_CHUNK 8U
+
+static void i915_deps_reset_fences(struct i915_deps *deps)
+{
+ if (deps->fences != &deps->single)
+ kfree(deps->fences);
+ deps->num_deps = 0;
+ deps->fences_size = 1;
+ deps->fences = &deps->single;
+}
+
+/**
+ * i915_deps_init - Initialize an i915_deps structure
+ * @deps: Pointer to the i915_deps structure to initialize.
+ * @gfp: The allocation mode for subsequenst allocations.
+ */
+void i915_deps_init(struct i915_deps *deps, gfp_t gfp)
+{
+ deps->fences = NULL;
+ deps->gfp = gfp;
+ i915_deps_reset_fences(deps);
+}
+
+/**
+ * i915_deps_fini - Finalize an i915_deps structure
+ * @deps: Pointer to the i915_deps structure to finalize.
+ *
+ * This function drops all fence references taken, conditionally frees and
+ * then resets the fences array.
+ */
+void i915_deps_fini(struct i915_deps *deps)
+{
+ unsigned int i;
+
+ for (i = 0; i < deps->num_deps; ++i)
+ dma_fence_put(deps->fences[i]);
+
+ if (deps->fences != &deps->single)
+ kfree(deps->fences);
+}
+
+static int i915_deps_grow(struct i915_deps *deps, struct dma_fence *fence,
+ const struct ttm_operation_ctx *ctx)
+{
+ int ret;
+
+ if (deps->num_deps >= deps->fences_size) {
+ unsigned int new_size = 2 * deps->fences_size;
+ struct dma_fence **new_fences;
+
+ new_size = max(new_size, I915_DEPS_MIN_ALLOC_CHUNK);
+ new_fences = kmalloc_array(new_size, sizeof(*new_fences), deps->gfp);
+ if (!new_fences)
+ goto sync;
+
+ memcpy(new_fences, deps->fences,
+ deps->fences_size * sizeof(*new_fences));
+ swap(new_fences, deps->fences);
+ if (new_fences != &deps->single)
+ kfree(new_fences);
+ deps->fences_size = new_size;
+ }
+ deps->fences[deps->num_deps++] = dma_fence_get(fence);
+ return 0;
+
+sync:
+ if (ctx->no_wait_gpu && !dma_fence_is_signaled(fence)) {
+ ret = -EBUSY;
+ goto unref;
+ }
+
+ ret = dma_fence_wait(fence, ctx->interruptible);
+ if (ret)
+ goto unref;
+
+ ret = fence->error;
+ if (ret)
+ goto unref;
+
+ return 0;
+
+unref:
+ i915_deps_fini(deps);
+ return ret;
+}
+
+/**
+ * i915_deps_sync - Wait for all the fences in the dependency collection
+ * @deps: Pointer to the i915_deps structure the fences of which to wait for.
+ * @ctx: Pointer to a struct ttm_operation_ctx indicating how the waits
+ * should be performed.
+ *
+ * This function waits for fences in the dependency collection. If it
+ * encounters an error during the wait or a fence error, the wait for
+ * further fences is aborted and the error returned.
+ *
+ * Return: Zero if successful, Negative error code on error.
+ */
+int i915_deps_sync(const struct i915_deps *deps, const struct ttm_operation_ctx *ctx)
+{
+ struct dma_fence **fences = deps->fences;
+ unsigned int i;
+ int ret = 0;
+
+ for (i = 0; i < deps->num_deps; ++i, ++fences) {
+ if (ctx->no_wait_gpu && !dma_fence_is_signaled(*fences)) {
+ ret = -EBUSY;
+ break;
+ }
+
+ ret = dma_fence_wait(*fences, ctx->interruptible);
+ if (!ret)
+ ret = (*fences)->error;
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+/**
+ * i915_deps_add_dependency - Add a fence to the dependency collection
+ * @deps: Pointer to the i915_deps structure a fence is to be added to.
+ * @fence: The fence to add.
+ * @ctx: Pointer to a struct ttm_operation_ctx indicating how waits are to
+ * be performed if waiting.
+ *
+ * Adds a fence to the dependency collection, and takes a reference on it.
+ * If the fence context is not zero and there was a later fence from the
+ * same fence context already added, then the fence is not added to the
+ * dependency collection. If the fence context is not zero and there was
+ * an earlier fence already added, then the fence will replace the older
+ * fence from the same context and the reference on the earlier fence will
+ * be dropped.
+ * If there is a failure to allocate memory to accommodate the new fence to
+ * be added, the new fence will instead be waited for and an error may
+ * be returned; depending on the value of @ctx, or if there was a fence
+ * error. If an error was returned, the dependency collection will be
+ * finalized and all fence reference dropped.
+ *
+ * Return: 0 if success. Negative error code on error.
+ */
+int i915_deps_add_dependency(struct i915_deps *deps,
+ struct dma_fence *fence,
+ const struct ttm_operation_ctx *ctx)
+{
+ unsigned int i;
+ int ret;
+
+ if (!fence)
+ return 0;
+
+ if (dma_fence_is_signaled(fence)) {
+ ret = fence->error;
+ if (ret)
+ i915_deps_fini(deps);
+ return ret;
+ }
+
+ for (i = 0; i < deps->num_deps; ++i) {
+ struct dma_fence *entry = deps->fences[i];
+
+ if (!entry->context || entry->context != fence->context)
+ continue;
+
+ if (dma_fence_is_later(fence, entry)) {
+ dma_fence_put(entry);
+ deps->fences[i] = dma_fence_get(fence);
+ }
+
+ return 0;
+ }
+
+ return i915_deps_grow(deps, fence, ctx);
+}
+
+/**
+ * i915_deps_add_resv - Add the fences of a reservation object to a dependency
+ * collection.
+ * @deps: Pointer to the i915_deps structure a fence is to be added to.
+ * @resv: The reservation object, then fences of which to add.
+ * @ctx: Pointer to a struct ttm_operation_ctx indicating how waits are to
+ * be performed if waiting.
+ *
+ * Calls i915_deps_add_depencency() on the indicated fences of @resv.
+ *
+ * Return: Zero on success. Negative error code on error.
+ */
+int i915_deps_add_resv(struct i915_deps *deps, struct dma_resv *resv,
+ const struct ttm_operation_ctx *ctx)
+{
+ struct dma_resv_iter iter;
+ struct dma_fence *fence;
+
+ dma_resv_assert_held(resv);
+ dma_resv_for_each_fence(&iter, resv, true, fence) {
+ int ret = i915_deps_add_dependency(deps, fence, ctx);
+
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_deps.h b/drivers/gpu/drm/i915/i915_deps.h
new file mode 100644
index 000000000000..d76c0106c910
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_deps.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _I915_DEPS_H_
+#define _I915_DEPS_H_
+
+#include <linux/types.h>
+
+struct ttm_operation_ctx;
+struct dma_fence;
+struct dma_resv;
+
+/**
+ * struct i915_deps - Collect dependencies into a single dma-fence
+ * @single: Storage for pointer if the collection is a single fence.
+ * @fences: Allocated array of fence pointers if more than a single fence;
+ * otherwise points to the address of @single.
+ * @num_deps: Current number of dependency fences.
+ * @fences_size: Size of the @fences array in number of pointers.
+ * @gfp: Allocation mode.
+ */
+struct i915_deps {
+ struct dma_fence *single;
+ struct dma_fence **fences;
+ unsigned int num_deps;
+ unsigned int fences_size;
+ gfp_t gfp;
+};
+
+void i915_deps_init(struct i915_deps *deps, gfp_t gfp);
+
+void i915_deps_fini(struct i915_deps *deps);
+
+int i915_deps_add_dependency(struct i915_deps *deps,
+ struct dma_fence *fence,
+ const struct ttm_operation_ctx *ctx);
+
+int i915_deps_add_resv(struct i915_deps *deps, struct dma_resv *resv,
+ const struct ttm_operation_ctx *ctx);
+
+int i915_deps_sync(const struct i915_deps *deps,
+ const struct ttm_operation_ctx *ctx);
+#endif
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index 9898002d8260..4b56d4d83e75 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -294,7 +294,7 @@ static void intel_detect_preproduction_hw(struct drm_i915_private *dev_priv)
static void sanitize_gpu(struct drm_i915_private *i915)
{
if (!INTEL_INFO(i915)->gpu_reset_clobbers_display)
- __intel_gt_reset(&i915->gt, ALL_ENGINES);
+ __intel_gt_reset(to_gt(i915), ALL_ENGINES);
}
/**
@@ -317,8 +317,9 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)
intel_device_info_subplatform_init(dev_priv);
intel_step_init(dev_priv);
+ intel_gt_init_early(to_gt(dev_priv), dev_priv);
intel_uncore_mmio_debug_init_early(&dev_priv->mmio_debug);
- intel_uncore_init_early(&dev_priv->uncore, dev_priv);
+ intel_uncore_init_early(&dev_priv->uncore, to_gt(dev_priv));
spin_lock_init(&dev_priv->irq_lock);
spin_lock_init(&dev_priv->gpu_error.lock);
@@ -349,7 +350,7 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)
intel_wopcm_init_early(&dev_priv->wopcm);
- intel_gt_init_early(&dev_priv->gt, dev_priv);
+ __intel_gt_init_early(to_gt(dev_priv), dev_priv);
i915_gem_init_early(dev_priv);
@@ -370,7 +371,7 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)
err_gem:
i915_gem_cleanup_early(dev_priv);
- intel_gt_driver_late_release(&dev_priv->gt);
+ intel_gt_driver_late_release(to_gt(dev_priv));
intel_region_ttm_device_fini(dev_priv);
err_ttm:
vlv_suspend_cleanup(dev_priv);
@@ -389,7 +390,7 @@ static void i915_driver_late_release(struct drm_i915_private *dev_priv)
intel_irq_fini(dev_priv);
intel_power_domains_cleanup(dev_priv);
i915_gem_cleanup_early(dev_priv);
- intel_gt_driver_late_release(&dev_priv->gt);
+ intel_gt_driver_late_release(to_gt(dev_priv));
intel_region_ttm_device_fini(dev_priv);
vlv_suspend_cleanup(dev_priv);
i915_workqueues_cleanup(dev_priv);
@@ -420,15 +421,19 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv)
if (ret < 0)
return ret;
- ret = intel_uncore_init_mmio(&dev_priv->uncore);
+ ret = intel_uncore_setup_mmio(&dev_priv->uncore);
if (ret < 0)
goto err_bridge;
+ ret = intel_uncore_init_mmio(&dev_priv->uncore);
+ if (ret)
+ goto err_mmio;
+
/* Try to make sure MCHBAR is enabled before poking at it */
intel_setup_mchbar(dev_priv);
intel_device_info_runtime_init(dev_priv);
- ret = intel_gt_init_mmio(&dev_priv->gt);
+ ret = intel_gt_init_mmio(to_gt(dev_priv));
if (ret)
goto err_uncore;
@@ -440,6 +445,8 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv)
err_uncore:
intel_teardown_mchbar(dev_priv);
intel_uncore_fini_mmio(&dev_priv->uncore);
+err_mmio:
+ intel_uncore_cleanup_mmio(&dev_priv->uncore);
err_bridge:
pci_dev_put(dev_priv->bridge_dev);
@@ -454,6 +461,7 @@ static void i915_driver_mmio_release(struct drm_i915_private *dev_priv)
{
intel_teardown_mchbar(dev_priv);
intel_uncore_fini_mmio(&dev_priv->uncore);
+ intel_uncore_cleanup_mmio(&dev_priv->uncore);
pci_dev_put(dev_priv->bridge_dev);
}
@@ -582,9 +590,9 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
if (ret)
goto err_ggtt;
- intel_gt_init_hw_early(&dev_priv->gt, &dev_priv->ggtt);
+ intel_gt_init_hw_early(to_gt(dev_priv), &dev_priv->ggtt);
- ret = intel_gt_probe_lmem(&dev_priv->gt);
+ ret = intel_gt_probe_lmem(to_gt(dev_priv));
if (ret)
goto err_mem_regions;
@@ -697,7 +705,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv)
/* Depends on sysfs having been initialized */
i915_perf_register(dev_priv);
- intel_gt_driver_register(&dev_priv->gt);
+ intel_gt_driver_register(to_gt(dev_priv));
intel_display_driver_register(dev_priv);
@@ -725,7 +733,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv)
intel_display_driver_unregister(dev_priv);
- intel_gt_driver_unregister(&dev_priv->gt);
+ intel_gt_driver_unregister(to_gt(dev_priv));
i915_perf_unregister(dev_priv);
i915_pmu_unregister(dev_priv);
@@ -736,6 +744,12 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv)
i915_gem_driver_unregister(dev_priv);
}
+void
+i915_print_iommu_status(struct drm_i915_private *i915, struct drm_printer *p)
+{
+ drm_printf(p, "iommu: %s\n", enableddisabled(intel_vtd_active(i915)));
+}
+
static void i915_welcome_messages(struct drm_i915_private *dev_priv)
{
if (drm_debug_enabled(DRM_UT_DRIVER)) {
@@ -751,7 +765,8 @@ static void i915_welcome_messages(struct drm_i915_private *dev_priv)
intel_device_info_print_static(INTEL_INFO(dev_priv), &p);
intel_device_info_print_runtime(RUNTIME_INFO(dev_priv), &p);
- intel_gt_info_print(&dev_priv->gt.info, &p);
+ i915_print_iommu_status(dev_priv, &p);
+ intel_gt_info_print(&to_gt(dev_priv)->info, &p);
}
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG))
@@ -812,7 +827,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
return PTR_ERR(i915);
/* Disable nuclear pageflip by default on pre-ILK */
- if (!i915->params.nuclear_pageflip && match_info->graphics_ver < 5)
+ if (!i915->params.nuclear_pageflip && match_info->graphics.ver < 5)
i915->drm.driver_features &= ~DRIVER_ATOMIC;
/*
@@ -1373,7 +1388,7 @@ static int i915_drm_resume_early(struct drm_device *dev)
intel_uncore_resume_early(&dev_priv->uncore);
- intel_gt_check_and_clear_faults(&dev_priv->gt);
+ intel_gt_check_and_clear_faults(to_gt(dev_priv));
intel_display_power_resume_early(dev_priv);
@@ -1556,7 +1571,7 @@ static int intel_runtime_suspend(struct device *kdev)
*/
i915_gem_runtime_suspend(dev_priv);
- intel_gt_runtime_suspend(&dev_priv->gt);
+ intel_gt_runtime_suspend(to_gt(dev_priv));
intel_runtime_pm_disable_interrupts(dev_priv);
@@ -1572,7 +1587,7 @@ static int intel_runtime_suspend(struct device *kdev)
intel_runtime_pm_enable_interrupts(dev_priv);
- intel_gt_runtime_resume(&dev_priv->gt);
+ intel_gt_runtime_resume(to_gt(dev_priv));
enable_rpm_wakeref_asserts(rpm);
@@ -1660,7 +1675,7 @@ static int intel_runtime_resume(struct device *kdev)
* No point of rolling back things in case of an error, as the best
* we can do is to hope that things will still work (and disable RPM).
*/
- intel_gt_runtime_resume(&dev_priv->gt);
+ intel_gt_runtime_resume(to_gt(dev_priv));
/*
* On VLV/CHV display interrupts are part of the display
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cffba01eed20..f954e3926603 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -168,8 +168,6 @@ struct i915_hotplug {
I915_GEM_DOMAIN_VERTEX)
struct drm_i915_private;
-struct i915_mm_struct;
-struct i915_mmu_object;
struct drm_i915_file_private {
struct drm_i915_private *dev_priv;
@@ -466,7 +464,7 @@ struct i915_gem_mm {
* List of objects which are pending destruction.
*/
struct llist_head free_list;
- struct work_struct free_work;
+ struct delayed_work free_work;
/**
* Count of objects pending destructions. Used to skip needlessly
* waiting on an RCU barrier if no objects are waiting to be freed.
@@ -985,7 +983,7 @@ struct drm_i915_private {
struct i915_perf perf;
/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
- struct intel_gt gt;
+ struct intel_gt gt0;
struct {
struct i915_gem_contexts {
@@ -1057,6 +1055,11 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev)
return pci_get_drvdata(pdev);
}
+static inline struct intel_gt *to_gt(struct drm_i915_private *i915)
+{
+ return &i915->gt0;
+}
+
/* Simple iterator over all initialised engines */
#define for_each_engine(engine__, dev_priv__, id__) \
for ((id__) = 0; \
@@ -1114,15 +1117,15 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev)
#define IP_VER(ver, rel) ((ver) << 8 | (rel))
-#define GRAPHICS_VER(i915) (INTEL_INFO(i915)->graphics_ver)
-#define GRAPHICS_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->graphics_ver, \
- INTEL_INFO(i915)->graphics_rel)
+#define GRAPHICS_VER(i915) (INTEL_INFO(i915)->graphics.ver)
+#define GRAPHICS_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->graphics.ver, \
+ INTEL_INFO(i915)->graphics.rel)
#define IS_GRAPHICS_VER(i915, from, until) \
(GRAPHICS_VER(i915) >= (from) && GRAPHICS_VER(i915) <= (until))
-#define MEDIA_VER(i915) (INTEL_INFO(i915)->media_ver)
-#define MEDIA_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->media_ver, \
- INTEL_INFO(i915)->media_rel)
+#define MEDIA_VER(i915) (INTEL_INFO(i915)->media.ver)
+#define MEDIA_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->media.arch, \
+ INTEL_INFO(i915)->media.rel)
#define IS_MEDIA_VER(i915, from, until) \
(MEDIA_VER(i915) >= (from) && MEDIA_VER(i915) <= (until))
@@ -1135,15 +1138,20 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev)
#define HAS_DSB(dev_priv) (INTEL_INFO(dev_priv)->display.has_dsb)
#define INTEL_DISPLAY_STEP(__i915) (RUNTIME_INFO(__i915)->step.display_step)
-#define INTEL_GT_STEP(__i915) (RUNTIME_INFO(__i915)->step.gt_step)
+#define INTEL_GRAPHICS_STEP(__i915) (RUNTIME_INFO(__i915)->step.graphics_step)
+#define INTEL_MEDIA_STEP(__i915) (RUNTIME_INFO(__i915)->step.media_step)
#define IS_DISPLAY_STEP(__i915, since, until) \
(drm_WARN_ON(&(__i915)->drm, INTEL_DISPLAY_STEP(__i915) == STEP_NONE), \
INTEL_DISPLAY_STEP(__i915) >= (since) && INTEL_DISPLAY_STEP(__i915) < (until))
-#define IS_GT_STEP(__i915, since, until) \
- (drm_WARN_ON(&(__i915)->drm, INTEL_GT_STEP(__i915) == STEP_NONE), \
- INTEL_GT_STEP(__i915) >= (since) && INTEL_GT_STEP(__i915) < (until))
+#define IS_GRAPHICS_STEP(__i915, since, until) \
+ (drm_WARN_ON(&(__i915)->drm, INTEL_GRAPHICS_STEP(__i915) == STEP_NONE), \
+ INTEL_GRAPHICS_STEP(__i915) >= (since) && INTEL_GRAPHICS_STEP(__i915) < (until))
+
+#define IS_MEDIA_STEP(__i915, since, until) \
+ (drm_WARN_ON(&(__i915)->drm, INTEL_MEDIA_STEP(__i915) == STEP_NONE), \
+ INTEL_MEDIA_STEP(__i915) >= (since) && INTEL_MEDIA_STEP(__i915) < (until))
static __always_inline unsigned int
__platform_mask_index(const struct intel_runtime_info *info,
@@ -1320,15 +1328,15 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
#define IS_TGL_Y(dev_priv) \
IS_SUBPLATFORM(dev_priv, INTEL_TIGERLAKE, INTEL_SUBPLATFORM_ULX)
-#define IS_SKL_GT_STEP(p, since, until) (IS_SKYLAKE(p) && IS_GT_STEP(p, since, until))
+#define IS_SKL_GRAPHICS_STEP(p, since, until) (IS_SKYLAKE(p) && IS_GRAPHICS_STEP(p, since, until))
-#define IS_KBL_GT_STEP(dev_priv, since, until) \
- (IS_KABYLAKE(dev_priv) && IS_GT_STEP(dev_priv, since, until))
+#define IS_KBL_GRAPHICS_STEP(dev_priv, since, until) \
+ (IS_KABYLAKE(dev_priv) && IS_GRAPHICS_STEP(dev_priv, since, until))
#define IS_KBL_DISPLAY_STEP(dev_priv, since, until) \
(IS_KABYLAKE(dev_priv) && IS_DISPLAY_STEP(dev_priv, since, until))
-#define IS_JSL_EHL_GT_STEP(p, since, until) \
- (IS_JSL_EHL(p) && IS_GT_STEP(p, since, until))
+#define IS_JSL_EHL_GRAPHICS_STEP(p, since, until) \
+ (IS_JSL_EHL(p) && IS_GRAPHICS_STEP(p, since, until))
#define IS_JSL_EHL_DISPLAY_STEP(p, since, until) \
(IS_JSL_EHL(p) && IS_DISPLAY_STEP(p, since, until))
@@ -1336,19 +1344,19 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
(IS_TIGERLAKE(__i915) && \
IS_DISPLAY_STEP(__i915, since, until))
-#define IS_TGL_UY_GT_STEP(__i915, since, until) \
+#define IS_TGL_UY_GRAPHICS_STEP(__i915, since, until) \
((IS_TGL_U(__i915) || IS_TGL_Y(__i915)) && \
- IS_GT_STEP(__i915, since, until))
+ IS_GRAPHICS_STEP(__i915, since, until))
-#define IS_TGL_GT_STEP(__i915, since, until) \
+#define IS_TGL_GRAPHICS_STEP(__i915, since, until) \
(IS_TIGERLAKE(__i915) && !(IS_TGL_U(__i915) || IS_TGL_Y(__i915)) && \
- IS_GT_STEP(__i915, since, until))
+ IS_GRAPHICS_STEP(__i915, since, until))
#define IS_RKL_DISPLAY_STEP(p, since, until) \
(IS_ROCKETLAKE(p) && IS_DISPLAY_STEP(p, since, until))
-#define IS_DG1_GT_STEP(p, since, until) \
- (IS_DG1(p) && IS_GT_STEP(p, since, until))
+#define IS_DG1_GRAPHICS_STEP(p, since, until) \
+ (IS_DG1(p) && IS_GRAPHICS_STEP(p, since, until))
#define IS_DG1_DISPLAY_STEP(p, since, until) \
(IS_DG1(p) && IS_DISPLAY_STEP(p, since, until))
@@ -1356,20 +1364,20 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
(IS_ALDERLAKE_S(__i915) && \
IS_DISPLAY_STEP(__i915, since, until))
-#define IS_ADLS_GT_STEP(__i915, since, until) \
+#define IS_ADLS_GRAPHICS_STEP(__i915, since, until) \
(IS_ALDERLAKE_S(__i915) && \
- IS_GT_STEP(__i915, since, until))
+ IS_GRAPHICS_STEP(__i915, since, until))
#define IS_ADLP_DISPLAY_STEP(__i915, since, until) \
(IS_ALDERLAKE_P(__i915) && \
IS_DISPLAY_STEP(__i915, since, until))
-#define IS_ADLP_GT_STEP(__i915, since, until) \
+#define IS_ADLP_GRAPHICS_STEP(__i915, since, until) \
(IS_ALDERLAKE_P(__i915) && \
- IS_GT_STEP(__i915, since, until))
+ IS_GRAPHICS_STEP(__i915, since, until))
-#define IS_XEHPSDV_GT_STEP(__i915, since, until) \
- (IS_XEHPSDV(__i915) && IS_GT_STEP(__i915, since, until))
+#define IS_XEHPSDV_GRAPHICS_STEP(__i915, since, until) \
+ (IS_XEHPSDV(__i915) && IS_GRAPHICS_STEP(__i915, since, until))
/*
* DG2 hardware steppings are a bit unusual. The hardware design was forked
@@ -1385,9 +1393,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
* and stepping-specific logic will be applied with a general DG2-wide stepping
* number.
*/
-#define IS_DG2_GT_STEP(__i915, variant, since, until) \
+#define IS_DG2_GRAPHICS_STEP(__i915, variant, since, until) \
(IS_SUBPLATFORM(__i915, INTEL_DG2, INTEL_SUBPLATFORM_##variant) && \
- IS_GT_STEP(__i915, since, until))
+ IS_GRAPHICS_STEP(__i915, since, until))
#define IS_DG2_DISPLAY_STEP(__i915, since, until) \
(IS_DG2(__i915) && \
@@ -1504,6 +1512,14 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
#define HAS_MSLICES(dev_priv) \
(INTEL_INFO(dev_priv)->has_mslices)
+/*
+ * Set this flag, when platform requires 64K GTT page sizes or larger for
+ * device local memory access. Also this flag implies that we require or
+ * at least support the compact PT layout for the ppGTT when using the 64K
+ * GTT pages.
+ */
+#define HAS_64K_PAGES(dev_priv) (INTEL_INFO(dev_priv)->has_64k_pages)
+
#define HAS_IPC(dev_priv) (INTEL_INFO(dev_priv)->display.has_ipc)
#define HAS_REGION(i915, i) (INTEL_INFO(i915)->memory_regions & (i))
@@ -1517,7 +1533,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
#define HAS_PXP(dev_priv) ((IS_ENABLED(CONFIG_DRM_I915_PXP) && \
INTEL_INFO(dev_priv)->has_pxp) && \
- VDBOX_MASK(&dev_priv->gt))
+ VDBOX_MASK(to_gt(dev_priv)))
#define HAS_GMCH(dev_priv) (INTEL_INFO(dev_priv)->display.has_gmch)
@@ -1551,26 +1567,27 @@ static inline bool run_as_guest(void)
#define HAS_D12_PLANE_MINIMIZATION(dev_priv) (IS_ROCKETLAKE(dev_priv) || \
IS_ALDERLAKE_S(dev_priv))
-static inline bool intel_vtd_active(void)
+static inline bool intel_vtd_active(struct drm_i915_private *i915)
{
-#ifdef CONFIG_INTEL_IOMMU
- if (intel_iommu_gfx_mapped)
+ if (device_iommu_mapped(i915->drm.dev))
return true;
-#endif
/* Running as a guest, we assume the host is enforcing VT'd */
return run_as_guest();
}
+void
+i915_print_iommu_status(struct drm_i915_private *i915, struct drm_printer *p);
+
static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
{
- return GRAPHICS_VER(dev_priv) >= 6 && intel_vtd_active();
+ return GRAPHICS_VER(dev_priv) >= 6 && intel_vtd_active(dev_priv);
}
static inline bool
intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *i915)
{
- return IS_BROXTON(i915) && intel_vtd_active();
+ return IS_BROXTON(i915) && intel_vtd_active(i915);
}
static inline bool
@@ -1593,7 +1610,8 @@ static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915)
* armed the work again.
*/
while (atomic_read(&i915->mm.free_count)) {
- flush_work(&i915->mm.free_work);
+ flush_delayed_work(&i915->mm.free_work);
+ flush_delayed_work(&i915->bdev.wq);
rcu_barrier();
}
}
@@ -1626,13 +1644,10 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj,
const struct i915_ggtt_view *view,
u64 size, u64 alignment, u64 flags);
-static inline struct i915_vma * __must_check
+struct i915_vma * __must_check
i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
const struct i915_ggtt_view *view,
- u64 size, u64 alignment, u64 flags)
-{
- return i915_gem_object_ggtt_pin_ww(obj, NULL, view, size, alignment, flags);
-}
+ u64 size, u64 alignment, u64 flags);
int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
unsigned long flags);
@@ -1697,6 +1712,10 @@ i915_gem_vm_lookup(struct drm_i915_file_private *file_priv, u32 id)
struct drm_i915_gem_object *
i915_gem_object_create_internal(struct drm_i915_private *dev_priv,
phys_addr_t size);
+struct drm_i915_gem_object *
+__i915_gem_object_create_internal(struct drm_i915_private *dev_priv,
+ const struct drm_i915_gem_object_ops *ops,
+ phys_addr_t size);
/* i915_gem_tiling.c */
static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5e8ed2419c83..5ef959a9f594 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -765,7 +765,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
* perspective, requiring manual detiling by the client.
*/
if (!i915_gem_object_has_struct_page(obj) ||
- cpu_write_needs_clflush(obj))
+ i915_gem_cpu_write_needs_clflush(obj))
/* Note that the gtt paths might fail with non-page-backed user
* pointers (e.g. gtt mappings when moving data between
* textures). Fallback to the shmem path in that case.
@@ -878,6 +878,8 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj,
struct i915_vma *vma;
int ret;
+ GEM_WARN_ON(!ww);
+
if (flags & PIN_MAPPABLE &&
(!view || view->type == I915_GGTT_VIEW_NORMAL)) {
/*
@@ -937,10 +939,7 @@ new_vma:
return ERR_PTR(ret);
}
- if (ww)
- ret = i915_vma_pin_ww(vma, ww, size, alignment, flags | PIN_GLOBAL);
- else
- ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
+ ret = i915_vma_pin_ww(vma, ww, size, alignment, flags | PIN_GLOBAL);
if (ret)
return ERR_PTR(ret);
@@ -960,6 +959,29 @@ new_vma:
return vma;
}
+struct i915_vma * __must_check
+i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
+ const struct i915_ggtt_view *view,
+ u64 size, u64 alignment, u64 flags)
+{
+ struct i915_gem_ww_ctx ww;
+ struct i915_vma *ret;
+ int err;
+
+ for_i915_gem_ww(&ww, err, true) {
+ err = i915_gem_object_lock(obj, &ww);
+ if (err)
+ continue;
+
+ ret = i915_gem_object_ggtt_pin_ww(obj, &ww, view, size,
+ alignment, flags);
+ if (IS_ERR(ret))
+ err = PTR_ERR(ret);
+ }
+
+ return err ? ERR_PTR(err) : ret;
+}
+
int
i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
@@ -1006,7 +1028,8 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
obj->ops->adjust_lru(obj);
}
- if (i915_gem_object_has_pages(obj)) {
+ if (i915_gem_object_has_pages(obj) ||
+ i915_gem_object_has_self_managed_shrink_list(obj)) {
unsigned long flags;
spin_lock_irqsave(&i915->mm.obj_lock, flags);
@@ -1049,7 +1072,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
if (ret)
return ret;
- intel_uc_fetch_firmwares(&dev_priv->gt.uc);
+ intel_uc_fetch_firmwares(&to_gt(dev_priv)->uc);
intel_wopcm_init(&dev_priv->wopcm);
ret = i915_init_ggtt(dev_priv);
@@ -1069,7 +1092,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
*/
intel_init_clock_gating(dev_priv);
- ret = intel_gt_init(&dev_priv->gt);
+ ret = intel_gt_init(to_gt(dev_priv));
if (ret)
goto err_unlock;
@@ -1085,7 +1108,7 @@ err_unlock:
i915_gem_drain_workqueue(dev_priv);
if (ret != -EIO)
- intel_uc_cleanup_firmwares(&dev_priv->gt.uc);
+ intel_uc_cleanup_firmwares(&to_gt(dev_priv)->uc);
if (ret == -EIO) {
/*
@@ -1093,10 +1116,10 @@ err_unlock:
* as wedged. But we only want to do this when the GPU is angry,
* for all other failure, such as an allocation failure, bail.
*/
- if (!intel_gt_is_wedged(&dev_priv->gt)) {
+ if (!intel_gt_is_wedged(to_gt(dev_priv))) {
i915_probe_error(dev_priv,
"Failed to initialize GPU, declaring it wedged!\n");
- intel_gt_set_wedged(&dev_priv->gt);
+ intel_gt_set_wedged(to_gt(dev_priv));
}
/* Minimal basic recovery for KMS */
@@ -1127,7 +1150,7 @@ void i915_gem_driver_remove(struct drm_i915_private *dev_priv)
intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref);
i915_gem_suspend_late(dev_priv);
- intel_gt_driver_remove(&dev_priv->gt);
+ intel_gt_driver_remove(to_gt(dev_priv));
dev_priv->uabi_engines = RB_ROOT;
/* Flush any outstanding unpin_work. */
@@ -1138,9 +1161,9 @@ void i915_gem_driver_remove(struct drm_i915_private *dev_priv)
void i915_gem_driver_release(struct drm_i915_private *dev_priv)
{
- intel_gt_driver_release(&dev_priv->gt);
+ intel_gt_driver_release(to_gt(dev_priv));
- intel_uc_cleanup_firmwares(&dev_priv->gt.uc);
+ intel_uc_cleanup_firmwares(&to_gt(dev_priv)->uc);
i915_gem_drain_freed_objects(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index da6c041c17ad..dbe49fd87283 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -15,7 +15,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
{
struct drm_i915_private *i915 = to_i915(dev);
struct pci_dev *pdev = to_pci_dev(dev->dev);
- const struct sseu_dev_info *sseu = &i915->gt.info.sseu;
+ const struct sseu_dev_info *sseu = &to_gt(i915)->info.sseu;
drm_i915_getparam_t *param = data;
int value = 0;
@@ -84,8 +84,8 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
break;
case I915_PARAM_HAS_GPU_RESET:
value = i915->params.enable_hangcheck &&
- intel_has_gpu_reset(&i915->gt);
- if (value && intel_has_reset_engine(&i915->gt))
+ intel_has_gpu_reset(to_gt(i915));
+ if (value && intel_has_reset_engine(to_gt(i915)))
value = 2;
break;
case I915_PARAM_HAS_RESOURCE_STREAMER:
@@ -98,7 +98,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
value = sseu->min_eu_in_pool;
break;
case I915_PARAM_HUC_STATUS:
- value = intel_huc_check_status(&i915->gt.uc.huc);
+ value = intel_huc_check_status(&to_gt(i915)->uc.huc);
if (value < 0)
return value;
break;
@@ -160,7 +160,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
return -ENODEV;
break;
case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
- value = i915->gt.clock_frequency;
+ value = to_gt(i915)->clock_frequency;
break;
case I915_PARAM_MMAP_GTT_COHERENT:
value = INTEL_INFO(i915)->has_coherent_ggtt;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index b3fc8917598a..edcc2ae6d66c 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -49,8 +49,9 @@
#include "i915_gpu_error.h"
#include "i915_memcpy.h"
#include "i915_scatterlist.h"
+#include "i915_vma_snapshot.h"
-#define ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
+#define ALLOW_FAIL (__GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
#define ATOMIC_MAYFAIL (GFP_ATOMIC | __GFP_NOWARN)
static void __sg_set_buf(struct scatterlist *sg,
@@ -276,16 +277,16 @@ static bool compress_start(struct i915_vma_compress *c)
static void *compress_next_page(struct i915_vma_compress *c,
struct i915_vma_coredump *dst)
{
- void *page;
+ void *page_addr;
+ struct page *page;
- if (dst->page_count >= dst->num_pages)
- return ERR_PTR(-ENOSPC);
-
- page = pool_alloc(&c->pool, ALLOW_FAIL);
- if (!page)
+ page_addr = pool_alloc(&c->pool, ALLOW_FAIL);
+ if (!page_addr)
return ERR_PTR(-ENOMEM);
- return dst->pages[dst->page_count++] = page;
+ page = virt_to_page(page_addr);
+ list_add_tail(&page->lru, &dst->page_list);
+ return page_addr;
}
static int compress_page(struct i915_vma_compress *c,
@@ -398,7 +399,7 @@ static int compress_page(struct i915_vma_compress *c,
if (!(wc && i915_memcpy_from_wc(ptr, src, PAGE_SIZE)))
memcpy(ptr, src, PAGE_SIZE);
- dst->pages[dst->page_count++] = ptr;
+ list_add_tail(&virt_to_page(ptr)->lru, &dst->page_list);
cond_resched();
return 0;
@@ -505,7 +506,7 @@ static void error_print_context(struct drm_i915_error_state_buf *m,
const char *header,
const struct i915_gem_context_coredump *ctx)
{
- const u32 period = m->i915->gt.clock_period_ns;
+ const u32 period = to_gt(m->i915)->clock_period_ns;
err_printf(m, "%s%s[%d] prio %d, guilty %d active %d, runtime total %lluns, avg %lluns\n",
header, ctx->comm, ctx->pid, ctx->sched_attr.priority,
@@ -615,7 +616,7 @@ static void print_error_vma(struct drm_i915_error_state_buf *m,
const struct i915_vma_coredump *vma)
{
char out[ASCII85_BUFSZ];
- int page;
+ struct page *page;
if (!vma)
return;
@@ -629,16 +630,17 @@ static void print_error_vma(struct drm_i915_error_state_buf *m,
err_printf(m, "gtt_page_sizes = 0x%08x\n", vma->gtt_page_sizes);
err_compression_marker(m);
- for (page = 0; page < vma->page_count; page++) {
+ list_for_each_entry(page, &vma->page_list, lru) {
int i, len;
+ const u32 *addr = page_address(page);
len = PAGE_SIZE;
- if (page == vma->page_count - 1)
+ if (page == list_last_entry(&vma->page_list, typeof(*page), lru))
len -= vma->unused;
len = ascii85_encode_len(len);
for (i = 0; i < len; i++)
- err_puts(m, ascii85_encode(vma->pages[page][i], out));
+ err_puts(m, ascii85_encode(addr[i], out));
}
err_puts(m, "\n");
}
@@ -947,10 +949,12 @@ static void i915_vma_coredump_free(struct i915_vma_coredump *vma)
{
while (vma) {
struct i915_vma_coredump *next = vma->next;
- int page;
+ struct page *page, *n;
- for (page = 0; page < vma->page_count; page++)
- free_page((unsigned long)vma->pages[page]);
+ list_for_each_entry_safe(page, n, &vma->page_list, lru) {
+ list_del_init(&page->lru);
+ __free_page(page);
+ }
kfree(vma);
vma = next;
@@ -1010,25 +1014,21 @@ void __i915_gpu_coredump_free(struct kref *error_ref)
static struct i915_vma_coredump *
i915_vma_coredump_create(const struct intel_gt *gt,
- const struct i915_vma *vma,
- const char *name,
+ const struct i915_vma_snapshot *vsnap,
struct i915_vma_compress *compress)
{
struct i915_ggtt *ggtt = gt->ggtt;
const u64 slot = ggtt->error_capture.start;
struct i915_vma_coredump *dst;
- unsigned long num_pages;
struct sgt_iter iter;
int ret;
might_sleep();
- if (!vma || !vma->pages || !compress)
+ if (!vsnap || !vsnap->pages || !compress)
return NULL;
- num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT;
- num_pages = DIV_ROUND_UP(10 * num_pages, 8); /* worstcase zlib growth */
- dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), ALLOW_FAIL);
+ dst = kmalloc(sizeof(*dst), ALLOW_FAIL);
if (!dst)
return NULL;
@@ -1037,14 +1037,13 @@ i915_vma_coredump_create(const struct intel_gt *gt,
return NULL;
}
- strcpy(dst->name, name);
+ INIT_LIST_HEAD(&dst->page_list);
+ strcpy(dst->name, vsnap->name);
dst->next = NULL;
- dst->gtt_offset = vma->node.start;
- dst->gtt_size = vma->node.size;
- dst->gtt_page_sizes = vma->page_sizes.gtt;
- dst->num_pages = num_pages;
- dst->page_count = 0;
+ dst->gtt_offset = vsnap->gtt_offset;
+ dst->gtt_size = vsnap->gtt_size;
+ dst->gtt_page_sizes = vsnap->page_sizes;
dst->unused = 0;
ret = -EINVAL;
@@ -1052,7 +1051,7 @@ i915_vma_coredump_create(const struct intel_gt *gt,
void __iomem *s;
dma_addr_t dma;
- for_each_sgt_daddr(dma, iter, vma->pages) {
+ for_each_sgt_daddr(dma, iter, vsnap->pages) {
mutex_lock(&ggtt->error_mutex);
ggtt->vm.insert_page(&ggtt->vm, dma, slot,
I915_CACHE_NONE, 0);
@@ -1070,11 +1069,11 @@ i915_vma_coredump_create(const struct intel_gt *gt,
if (ret)
break;
}
- } else if (__i915_gem_object_is_lmem(vma->obj)) {
- struct intel_memory_region *mem = vma->obj->mm.region;
+ } else if (vsnap->mr && vsnap->mr->type != INTEL_MEMORY_SYSTEM) {
+ struct intel_memory_region *mem = vsnap->mr;
dma_addr_t dma;
- for_each_sgt_daddr(dma, iter, vma->pages) {
+ for_each_sgt_daddr(dma, iter, vsnap->pages) {
void __iomem *s;
s = io_mapping_map_wc(&mem->iomap,
@@ -1090,7 +1089,7 @@ i915_vma_coredump_create(const struct intel_gt *gt,
} else {
struct page *page;
- for_each_sgt_page(page, iter, vma->pages) {
+ for_each_sgt_page(page, iter, vsnap->pages) {
void *s;
drm_clflush_pages(&page, 1);
@@ -1107,8 +1106,13 @@ i915_vma_coredump_create(const struct intel_gt *gt,
}
if (ret || compress_flush(compress, dst)) {
- while (dst->page_count--)
- pool_free(&compress->pool, dst->pages[dst->page_count]);
+ struct page *page, *n;
+
+ list_for_each_entry_safe_reverse(page, n, &dst->page_list, lru) {
+ list_del_init(&page->lru);
+ pool_free(&compress->pool, page_address(page));
+ }
+
kfree(dst);
dst = NULL;
}
@@ -1321,38 +1325,72 @@ static bool record_context(struct i915_gem_context_coredump *e,
struct intel_engine_capture_vma {
struct intel_engine_capture_vma *next;
- struct i915_vma *vma;
+ struct i915_vma_snapshot *vsnap;
char name[16];
+ bool lockdep_cookie;
};
static struct intel_engine_capture_vma *
-capture_vma(struct intel_engine_capture_vma *next,
- struct i915_vma *vma,
- const char *name,
- gfp_t gfp)
+capture_vma_snapshot(struct intel_engine_capture_vma *next,
+ struct i915_vma_snapshot *vsnap,
+ gfp_t gfp)
{
struct intel_engine_capture_vma *c;
- if (!vma)
+ if (!i915_vma_snapshot_present(vsnap))
return next;
c = kmalloc(sizeof(*c), gfp);
if (!c)
return next;
- if (!i915_active_acquire_if_busy(&vma->active)) {
+ if (!i915_vma_snapshot_resource_pin(vsnap, &c->lockdep_cookie)) {
kfree(c);
return next;
}
- strcpy(c->name, name);
- c->vma = vma; /* reference held while active */
+ strcpy(c->name, vsnap->name);
+ c->vsnap = vsnap;
+ i915_vma_snapshot_get(vsnap);
c->next = next;
return c;
}
static struct intel_engine_capture_vma *
+capture_vma(struct intel_engine_capture_vma *next,
+ struct i915_vma *vma,
+ const char *name,
+ gfp_t gfp)
+{
+ struct i915_vma_snapshot *vsnap;
+
+ if (!vma)
+ return next;
+
+ /*
+ * If the vma isn't pinned, then the vma should be snapshotted
+ * to a struct i915_vma_snapshot at command submission time.
+ * Not here.
+ */
+ GEM_WARN_ON(!i915_vma_is_pinned(vma));
+ if (!i915_vma_is_pinned(vma))
+ return next;
+
+ vsnap = i915_vma_snapshot_alloc(gfp);
+ if (!vsnap)
+ return next;
+
+ i915_vma_snapshot_init(vsnap, vma, name);
+ next = capture_vma_snapshot(next, vsnap, gfp);
+
+ /* FIXME: Replace on async unbind. */
+ i915_vma_snapshot_put(vsnap);
+
+ return next;
+}
+
+static struct intel_engine_capture_vma *
capture_user(struct intel_engine_capture_vma *capture,
const struct i915_request *rq,
gfp_t gfp)
@@ -1360,7 +1398,7 @@ capture_user(struct intel_engine_capture_vma *capture,
struct i915_capture_list *c;
for (c = rq->capture_list; c; c = c->next)
- capture = capture_vma(capture, c->vma, "user", gfp);
+ capture = capture_vma_snapshot(capture, c->vma_snapshot, gfp);
return capture;
}
@@ -1374,6 +1412,33 @@ static void add_vma(struct intel_engine_coredump *ee,
}
}
+static struct i915_vma_coredump *
+create_vma_coredump(const struct intel_gt *gt, struct i915_vma *vma,
+ const char *name, struct i915_vma_compress *compress)
+{
+ struct i915_vma_coredump *ret;
+ struct i915_vma_snapshot tmp;
+
+ if (!vma)
+ return NULL;
+
+ GEM_WARN_ON(!i915_vma_is_pinned(vma));
+ i915_vma_snapshot_init_onstack(&tmp, vma, name);
+ ret = i915_vma_coredump_create(gt, &tmp, compress);
+ i915_vma_snapshot_put_onstack(&tmp);
+
+ return ret;
+}
+
+static void add_vma_coredump(struct intel_engine_coredump *ee,
+ const struct intel_gt *gt,
+ struct i915_vma *vma,
+ const char *name,
+ struct i915_vma_compress *compress)
+{
+ add_vma(ee, create_vma_coredump(gt, vma, name, compress));
+}
+
struct intel_engine_coredump *
intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
{
@@ -1407,7 +1472,7 @@ intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
* as the simplest method to avoid being overwritten
* by userspace.
*/
- vma = capture_vma(vma, rq->batch, "batch", gfp);
+ vma = capture_vma_snapshot(vma, &rq->batch_snapshot, gfp);
vma = capture_user(vma, rq, gfp);
vma = capture_vma(vma, rq->ring->vma, "ring", gfp);
vma = capture_vma(vma, rq->context->state, "HW context", gfp);
@@ -1428,30 +1493,24 @@ intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
while (capture) {
struct intel_engine_capture_vma *this = capture;
- struct i915_vma *vma = this->vma;
+ struct i915_vma_snapshot *vsnap = this->vsnap;
add_vma(ee,
i915_vma_coredump_create(engine->gt,
- vma, this->name,
- compress));
+ vsnap, compress));
- i915_active_release(&vma->active);
+ i915_vma_snapshot_resource_unpin(vsnap, this->lockdep_cookie);
+ i915_vma_snapshot_put(vsnap);
capture = this->next;
kfree(this);
}
- add_vma(ee,
- i915_vma_coredump_create(engine->gt,
- engine->status_page.vma,
- "HW Status",
- compress));
+ add_vma_coredump(ee, engine->gt, engine->status_page.vma,
+ "HW Status", compress);
- add_vma(ee,
- i915_vma_coredump_create(engine->gt,
- engine->wa_ctx.vma,
- "WA context",
- compress));
+ add_vma_coredump(ee, engine->gt, engine->wa_ctx.vma,
+ "WA context", compress);
}
static struct intel_engine_coredump *
@@ -1487,17 +1546,25 @@ capture_engine(struct intel_engine_cs *engine,
}
}
if (rq)
- capture = intel_engine_coredump_add_request(ee, rq,
- ATOMIC_MAYFAIL);
+ rq = i915_request_get_rcu(rq);
+
+ if (!rq)
+ goto no_request_capture;
+
+ capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL);
if (!capture) {
-no_request_capture:
- kfree(ee);
- return NULL;
+ i915_request_put(rq);
+ goto no_request_capture;
}
intel_engine_coredump_add_vma(ee, capture, compress);
+ i915_request_put(rq);
return ee;
+
+no_request_capture:
+ kfree(ee);
+ return NULL;
}
static void
@@ -1551,10 +1618,8 @@ gt_record_uc(struct intel_gt_coredump *gt,
*/
error_uc->guc_fw.path = kstrdup(uc->guc.fw.path, ALLOW_FAIL);
error_uc->huc_fw.path = kstrdup(uc->huc.fw.path, ALLOW_FAIL);
- error_uc->guc_log =
- i915_vma_coredump_create(gt->_gt,
- uc->guc.log.vma, "GuC log buffer",
- compress);
+ error_uc->guc_log = create_vma_coredump(gt->_gt, uc->guc.log.vma,
+ "GuC log buffer", compress);
return error_uc;
}
@@ -1751,10 +1816,7 @@ static void capture_gen(struct i915_gpu_coredump *error)
error->wakelock = atomic_read(&i915->runtime_pm.wakeref_count);
error->suspended = i915->runtime_pm.suspended;
- error->iommu = -1;
-#ifdef CONFIG_INTEL_IOMMU
- error->iommu = intel_iommu_gfx_mapped;
-#endif
+ error->iommu = intel_vtd_active(i915);
error->reset_count = i915_reset_count(&i915->gpu_error);
error->suspend_count = i915->suspend_count;
@@ -1785,7 +1847,7 @@ i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
error->time = ktime_get_real();
error->boottime = ktime_get_boottime();
- error->uptime = ktime_sub(ktime_get(), i915->gt.last_init_time);
+ error->uptime = ktime_sub(ktime_get(), to_gt(i915)->last_init_time);
error->capture = jiffies;
capture_gen(error);
@@ -1840,8 +1902,8 @@ void i915_vma_capture_finish(struct intel_gt_coredump *gt,
kfree(compress);
}
-struct i915_gpu_coredump *
-i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
+static struct i915_gpu_coredump *
+__i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
{
struct drm_i915_private *i915 = gt->i915;
struct i915_gpu_coredump *error;
@@ -1882,6 +1944,22 @@ i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
return error;
}
+struct i915_gpu_coredump *
+i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
+{
+ static DEFINE_MUTEX(capture_mutex);
+ int ret = mutex_lock_interruptible(&capture_mutex);
+ struct i915_gpu_coredump *dump;
+
+ if (ret)
+ return ERR_PTR(ret);
+
+ dump = __i915_gpu_coredump(gt, engine_mask);
+ mutex_unlock(&capture_mutex);
+
+ return dump;
+}
+
void i915_error_state_store(struct i915_gpu_coredump *error)
{
struct drm_i915_private *i915;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index b98d8cdbe4f2..5aedf5129814 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -39,10 +39,8 @@ struct i915_vma_coredump {
u64 gtt_size;
u32 gtt_page_sizes;
- int num_pages;
- int page_count;
int unused;
- u32 *pages[];
+ struct list_head page_list;
};
struct i915_request_coredump {
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 14ae4f9b3fa6..5d0bffa472bc 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1037,7 +1037,7 @@ static void ivb_parity_work(struct work_struct *work)
{
struct drm_i915_private *dev_priv =
container_of(work, typeof(*dev_priv), l3_parity.error_work);
- struct intel_gt *gt = &dev_priv->gt;
+ struct intel_gt *gt = to_gt(dev_priv);
u32 error_status, row, bank, subbank;
char *parity_event[6];
u32 misccpctl;
@@ -1715,9 +1715,9 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg)
intel_uncore_write(&dev_priv->uncore, VLV_MASTER_IER, MASTER_INTERRUPT_ENABLE);
if (gt_iir)
- gen6_gt_irq_handler(&dev_priv->gt, gt_iir);
+ gen6_gt_irq_handler(to_gt(dev_priv), gt_iir);
if (pm_iir)
- gen6_rps_irq_handler(&dev_priv->gt.rps, pm_iir);
+ gen6_rps_irq_handler(&to_gt(dev_priv)->rps, pm_iir);
if (hotplug_status)
i9xx_hpd_irq_handler(dev_priv, hotplug_status);
@@ -1774,7 +1774,7 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg)
ier = intel_uncore_read(&dev_priv->uncore, VLV_IER);
intel_uncore_write(&dev_priv->uncore, VLV_IER, 0);
- gen8_gt_irq_handler(&dev_priv->gt, master_ctl);
+ gen8_gt_irq_handler(to_gt(dev_priv), master_ctl);
if (iir & I915_DISPLAY_PORT_INTERRUPT)
hotplug_status = i9xx_hpd_irq_ack(dev_priv);
@@ -2105,7 +2105,7 @@ static void ilk_display_irq_handler(struct drm_i915_private *dev_priv,
}
if (DISPLAY_VER(dev_priv) == 5 && de_iir & DE_PCU_EVENT)
- gen5_rps_irq_handler(&dev_priv->gt.rps);
+ gen5_rps_irq_handler(&to_gt(dev_priv)->rps);
}
static void ivb_display_irq_handler(struct drm_i915_private *dev_priv,
@@ -2186,9 +2186,9 @@ static irqreturn_t ilk_irq_handler(int irq, void *arg)
if (gt_iir) {
raw_reg_write(regs, GTIIR, gt_iir);
if (GRAPHICS_VER(i915) >= 6)
- gen6_gt_irq_handler(&i915->gt, gt_iir);
+ gen6_gt_irq_handler(to_gt(i915), gt_iir);
else
- gen5_gt_irq_handler(&i915->gt, gt_iir);
+ gen5_gt_irq_handler(to_gt(i915), gt_iir);
ret = IRQ_HANDLED;
}
@@ -2206,7 +2206,7 @@ static irqreturn_t ilk_irq_handler(int irq, void *arg)
u32 pm_iir = raw_reg_read(regs, GEN6_PMIIR);
if (pm_iir) {
raw_reg_write(regs, GEN6_PMIIR, pm_iir);
- gen6_rps_irq_handler(&i915->gt.rps, pm_iir);
+ gen6_rps_irq_handler(&to_gt(i915)->rps, pm_iir);
ret = IRQ_HANDLED;
}
}
@@ -2632,7 +2632,7 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
}
/* Find, queue (onto bottom-halves), then clear each source */
- gen8_gt_irq_handler(&dev_priv->gt, master_ctl);
+ gen8_gt_irq_handler(to_gt(dev_priv), master_ctl);
/* IRQs are synced during runtime_suspend, we don't require a wakeref */
if (master_ctl & ~GEN8_GT_IRQS) {
@@ -2712,7 +2712,7 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg)
{
struct drm_i915_private *i915 = arg;
void __iomem * const regs = i915->uncore.regs;
- struct intel_gt *gt = &i915->gt;
+ struct intel_gt *gt = to_gt(i915);
u32 master_ctl;
u32 gu_misc_iir;
@@ -2768,8 +2768,8 @@ static inline void dg1_master_intr_enable(void __iomem * const regs)
static irqreturn_t dg1_irq_handler(int irq, void *arg)
{
struct drm_i915_private * const i915 = arg;
- struct intel_gt *gt = &i915->gt;
- void __iomem * const regs = i915->uncore.regs;
+ struct intel_gt *gt = to_gt(i915);
+ void __iomem * const regs = gt->uncore->regs;
u32 master_tile_ctl, master_ctl;
u32 gu_misc_iir;
@@ -3072,7 +3072,7 @@ static void ilk_irq_reset(struct drm_i915_private *dev_priv)
intel_uncore_write(uncore, EDP_PSR_IIR, 0xffffffff);
}
- gen5_gt_irq_reset(&dev_priv->gt);
+ gen5_gt_irq_reset(to_gt(dev_priv));
ibx_irq_reset(dev_priv);
}
@@ -3082,7 +3082,7 @@ static void valleyview_irq_reset(struct drm_i915_private *dev_priv)
intel_uncore_write(&dev_priv->uncore, VLV_MASTER_IER, 0);
intel_uncore_posting_read(&dev_priv->uncore, VLV_MASTER_IER);
- gen5_gt_irq_reset(&dev_priv->gt);
+ gen5_gt_irq_reset(to_gt(dev_priv));
spin_lock_irq(&dev_priv->irq_lock);
if (dev_priv->display_irqs_enabled)
@@ -3116,7 +3116,7 @@ static void gen8_irq_reset(struct drm_i915_private *dev_priv)
gen8_master_intr_disable(dev_priv->uncore.regs);
- gen8_gt_irq_reset(&dev_priv->gt);
+ gen8_gt_irq_reset(to_gt(dev_priv));
gen8_display_irq_reset(dev_priv);
GEN3_IRQ_RESET(uncore, GEN8_PCU_);
@@ -3170,11 +3170,12 @@ static void gen11_display_irq_reset(struct drm_i915_private *dev_priv)
static void gen11_irq_reset(struct drm_i915_private *dev_priv)
{
- struct intel_uncore *uncore = &dev_priv->uncore;
+ struct intel_gt *gt = to_gt(dev_priv);
+ struct intel_uncore *uncore = gt->uncore;
gen11_master_intr_disable(dev_priv->uncore.regs);
- gen11_gt_irq_reset(&dev_priv->gt);
+ gen11_gt_irq_reset(gt);
gen11_display_irq_reset(dev_priv);
GEN3_IRQ_RESET(uncore, GEN11_GU_MISC_);
@@ -3183,11 +3184,12 @@ static void gen11_irq_reset(struct drm_i915_private *dev_priv)
static void dg1_irq_reset(struct drm_i915_private *dev_priv)
{
- struct intel_uncore *uncore = &dev_priv->uncore;
+ struct intel_gt *gt = to_gt(dev_priv);
+ struct intel_uncore *uncore = gt->uncore;
dg1_master_intr_disable(dev_priv->uncore.regs);
- gen11_gt_irq_reset(&dev_priv->gt);
+ gen11_gt_irq_reset(gt);
gen11_display_irq_reset(dev_priv);
GEN3_IRQ_RESET(uncore, GEN11_GU_MISC_);
@@ -3247,7 +3249,7 @@ static void cherryview_irq_reset(struct drm_i915_private *dev_priv)
intel_uncore_write(&dev_priv->uncore, GEN8_MASTER_IRQ, 0);
intel_uncore_posting_read(&dev_priv->uncore, GEN8_MASTER_IRQ);
- gen8_gt_irq_reset(&dev_priv->gt);
+ gen8_gt_irq_reset(to_gt(dev_priv));
GEN3_IRQ_RESET(uncore, GEN8_PCU_);
@@ -3704,7 +3706,7 @@ static void ilk_irq_postinstall(struct drm_i915_private *dev_priv)
ibx_irq_postinstall(dev_priv);
- gen5_gt_irq_postinstall(&dev_priv->gt);
+ gen5_gt_irq_postinstall(to_gt(dev_priv));
GEN3_IRQ_INIT(uncore, DE, dev_priv->irq_mask,
display_mask | extra_mask);
@@ -3741,7 +3743,7 @@ void valleyview_disable_display_irqs(struct drm_i915_private *dev_priv)
static void valleyview_irq_postinstall(struct drm_i915_private *dev_priv)
{
- gen5_gt_irq_postinstall(&dev_priv->gt);
+ gen5_gt_irq_postinstall(to_gt(dev_priv));
spin_lock_irq(&dev_priv->irq_lock);
if (dev_priv->display_irqs_enabled)
@@ -3847,7 +3849,7 @@ static void gen8_irq_postinstall(struct drm_i915_private *dev_priv)
else if (HAS_PCH_SPLIT(dev_priv))
ibx_irq_postinstall(dev_priv);
- gen8_gt_irq_postinstall(&dev_priv->gt);
+ gen8_gt_irq_postinstall(to_gt(dev_priv));
gen8_de_irq_postinstall(dev_priv);
gen8_master_intr_enable(dev_priv->uncore.regs);
@@ -3866,13 +3868,14 @@ static void gen11_de_irq_postinstall(struct drm_i915_private *dev_priv)
static void gen11_irq_postinstall(struct drm_i915_private *dev_priv)
{
- struct intel_uncore *uncore = &dev_priv->uncore;
+ struct intel_gt *gt = to_gt(dev_priv);
+ struct intel_uncore *uncore = gt->uncore;
u32 gu_misc_masked = GEN11_GU_MISC_GSE;
if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP)
icp_irq_postinstall(dev_priv);
- gen11_gt_irq_postinstall(&dev_priv->gt);
+ gen11_gt_irq_postinstall(gt);
gen11_de_irq_postinstall(dev_priv);
GEN3_IRQ_INIT(uncore, GEN11_GU_MISC_, ~gu_misc_masked, gu_misc_masked);
@@ -3883,10 +3886,11 @@ static void gen11_irq_postinstall(struct drm_i915_private *dev_priv)
static void dg1_irq_postinstall(struct drm_i915_private *dev_priv)
{
- struct intel_uncore *uncore = &dev_priv->uncore;
+ struct intel_gt *gt = to_gt(dev_priv);
+ struct intel_uncore *uncore = gt->uncore;
u32 gu_misc_masked = GEN11_GU_MISC_GSE;
- gen11_gt_irq_postinstall(&dev_priv->gt);
+ gen11_gt_irq_postinstall(gt);
GEN3_IRQ_INIT(uncore, GEN11_GU_MISC_, ~gu_misc_masked, gu_misc_masked);
@@ -3897,13 +3901,13 @@ static void dg1_irq_postinstall(struct drm_i915_private *dev_priv)
GEN11_DISPLAY_IRQ_ENABLE);
}
- dg1_master_intr_enable(dev_priv->uncore.regs);
- intel_uncore_posting_read(&dev_priv->uncore, DG1_MSTR_TILE_INTR);
+ dg1_master_intr_enable(uncore->regs);
+ intel_uncore_posting_read(uncore, DG1_MSTR_TILE_INTR);
}
static void cherryview_irq_postinstall(struct drm_i915_private *dev_priv)
{
- gen8_gt_irq_postinstall(&dev_priv->gt);
+ gen8_gt_irq_postinstall(to_gt(dev_priv));
spin_lock_irq(&dev_priv->irq_lock);
if (dev_priv->display_irqs_enabled)
@@ -4066,7 +4070,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
intel_uncore_write16(&dev_priv->uncore, GEN2_IIR, iir);
if (iir & I915_USER_INTERRUPT)
- intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir);
+ intel_engine_cs_irq(to_gt(dev_priv)->engine[RCS0], iir);
if (iir & I915_MASTER_ERROR_INTERRUPT)
i8xx_error_irq_handler(dev_priv, eir, eir_stuck);
@@ -4174,7 +4178,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
intel_uncore_write(&dev_priv->uncore, GEN2_IIR, iir);
if (iir & I915_USER_INTERRUPT)
- intel_engine_cs_irq(dev_priv->gt.engine[RCS0], iir);
+ intel_engine_cs_irq(to_gt(dev_priv)->engine[RCS0], iir);
if (iir & I915_MASTER_ERROR_INTERRUPT)
i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
@@ -4319,11 +4323,11 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
intel_uncore_write(&dev_priv->uncore, GEN2_IIR, iir);
if (iir & I915_USER_INTERRUPT)
- intel_engine_cs_irq(dev_priv->gt.engine[RCS0],
+ intel_engine_cs_irq(to_gt(dev_priv)->engine[RCS0],
iir);
if (iir & I915_BSD_USER_INTERRUPT)
- intel_engine_cs_irq(dev_priv->gt.engine[VCS0],
+ intel_engine_cs_irq(to_gt(dev_priv)->engine[VCS0],
iir >> 25);
if (iir & I915_MASTER_ERROR_INTERRUPT)
@@ -4374,7 +4378,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
/* pre-gen11 the guc irqs bits are in the upper 16 bits of the pm reg */
if (HAS_GT_UC(dev_priv) && GRAPHICS_VER(dev_priv) < 11)
- dev_priv->gt.pm_guc_events = GUC_INTR_GUC2HOST << 16;
+ to_gt(dev_priv)->pm_guc_events = GUC_INTR_GUC2HOST << 16;
if (!HAS_DISPLAY(dev_priv))
return;
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index e07f4cfea63a..525ae832aa9a 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -140,6 +140,9 @@ i915_param_named_unsafe(invert_brightness, int, 0400,
i915_param_named(disable_display, bool, 0400,
"Disable display (default: false)");
+i915_param_named(memtest, bool, 0400,
+ "Perform a read/write test of all device memory on module load (default: off)");
+
i915_param_named(mmio_debug, int, 0400,
"Enable the MMIO debug code for the first N failures (default: off). "
"This may negatively affect performance.");
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 8d725b64592d..c9d53ff910a0 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -64,6 +64,7 @@ struct drm_printer;
param(char *, guc_firmware_path, NULL, 0400) \
param(char *, huc_firmware_path, NULL, 0400) \
param(char *, dmc_firmware_path, NULL, 0400) \
+ param(bool, memtest, false, 0400) \
param(int, mmio_debug, -IS_ENABLED(CONFIG_DRM_I915_DEBUG_MMIO), 0600) \
param(int, edp_vswing, 0, 0400) \
param(unsigned int, reset, 3, 0600) \
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 960c358990bc..8261b6455747 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -31,8 +31,8 @@
#define PLATFORM(x) .platform = (x)
#define GEN(x) \
- .graphics_ver = (x), \
- .media_ver = (x), \
+ .graphics.ver = (x), \
+ .media.ver = (x), \
.display.ver = (x)
#define I845_PIPE_OFFSETS \
@@ -904,7 +904,7 @@ static const struct intel_device_info rkl_info = {
static const struct intel_device_info dg1_info = {
GEN12_FEATURES,
DGFX_FEATURES,
- .graphics_rel = 10,
+ .graphics.rel = 10,
PLATFORM(INTEL_DG1),
.display.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D),
.require_force_probe = 1,
@@ -998,8 +998,8 @@ static const struct intel_device_info adl_p_info = {
I915_GTT_PAGE_SIZE_2M
#define XE_HP_FEATURES \
- .graphics_ver = 12, \
- .graphics_rel = 50, \
+ .graphics.ver = 12, \
+ .graphics.rel = 50, \
XE_HP_PAGE_SIZES, \
.dma_mask_size = 46, \
.has_64bit_reloc = 1, \
@@ -1017,8 +1017,8 @@ static const struct intel_device_info adl_p_info = {
.ppgtt_type = INTEL_PPGTT_FULL
#define XE_HPM_FEATURES \
- .media_ver = 12, \
- .media_rel = 50
+ .media.ver = 12, \
+ .media.rel = 50
__maybe_unused
static const struct intel_device_info xehpsdv_info = {
@@ -1027,6 +1027,7 @@ static const struct intel_device_info xehpsdv_info = {
DGFX_FEATURES,
PLATFORM(INTEL_XEHPSDV),
.display = { },
+ .has_64k_pages = 1,
.platform_engine_mask =
BIT(RCS0) | BIT(BCS0) |
BIT(VECS0) | BIT(VECS1) | BIT(VECS2) | BIT(VECS3) |
@@ -1041,9 +1042,10 @@ static const struct intel_device_info dg2_info = {
XE_HPM_FEATURES,
XE_LPD_FEATURES,
DGFX_FEATURES,
- .graphics_rel = 55,
- .media_rel = 55,
+ .graphics.rel = 55,
+ .media.rel = 55,
PLATFORM(INTEL_DG2),
+ .has_64k_pages = 1,
.platform_engine_mask =
BIT(RCS0) | BIT(BCS0) |
BIT(VECS0) | BIT(VECS1) |
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index aa21e9fe3c78..8c2f1e91d0af 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -4274,26 +4274,6 @@ static struct ctl_table oa_table[] = {
{}
};
-static struct ctl_table i915_root[] = {
- {
- .procname = "i915",
- .maxlen = 0,
- .mode = 0555,
- .child = oa_table,
- },
- {}
-};
-
-static struct ctl_table dev_root[] = {
- {
- .procname = "dev",
- .maxlen = 0,
- .mode = 0555,
- .child = i915_root,
- },
- {}
-};
-
static void oa_init_supported_formats(struct i915_perf *perf)
{
struct drm_i915_private *i915 = perf->i915;
@@ -4444,7 +4424,7 @@ void i915_perf_init(struct drm_i915_private *i915)
mutex_init(&perf->lock);
/* Choose a representative limit */
- oa_sample_rate_hard_limit = i915->gt.clock_frequency / 2;
+ oa_sample_rate_hard_limit = to_gt(i915)->clock_frequency / 2;
mutex_init(&perf->metrics_lock);
idr_init_base(&perf->metrics_idr, 1);
@@ -4489,7 +4469,7 @@ static int destroy_config(int id, void *p, void *data)
int i915_perf_sysctl_register(void)
{
- sysctl_header = register_sysctl_table(dev_root);
+ sysctl_header = register_sysctl("dev/i915", oa_table);
return 0;
}
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 290505b432bc..bf93f9720e0a 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -211,8 +211,8 @@ static void init_rc6(struct i915_pmu *pmu)
struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
intel_wakeref_t wakeref;
- with_intel_runtime_pm(i915->gt.uncore->rpm, wakeref) {
- pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt);
+ with_intel_runtime_pm(to_gt(i915)->uncore->rpm, wakeref) {
+ pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(to_gt(i915));
pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur =
pmu->sample[__I915_SAMPLE_RC6].cur;
pmu->sleep_last = ktime_get_raw();
@@ -223,7 +223,7 @@ static void park_rc6(struct drm_i915_private *i915)
{
struct i915_pmu *pmu = &i915->pmu;
- pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt);
+ pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(to_gt(i915));
pmu->sleep_last = ktime_get_raw();
}
@@ -420,7 +420,7 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
struct drm_i915_private *i915 =
container_of(hrtimer, struct drm_i915_private, pmu.timer);
struct i915_pmu *pmu = &i915->pmu;
- struct intel_gt *gt = &i915->gt;
+ struct intel_gt *gt = to_gt(i915);
unsigned int period_ns;
ktime_t now;
@@ -477,7 +477,7 @@ engine_event_status(struct intel_engine_cs *engine,
static int
config_status(struct drm_i915_private *i915, u64 config)
{
- struct intel_gt *gt = &i915->gt;
+ struct intel_gt *gt = to_gt(i915);
switch (config) {
case I915_PMU_ACTUAL_FREQUENCY:
@@ -602,10 +602,10 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
val = READ_ONCE(pmu->irq_count);
break;
case I915_PMU_RC6_RESIDENCY:
- val = get_rc6(&i915->gt);
+ val = get_rc6(to_gt(i915));
break;
case I915_PMU_SOFTWARE_GT_AWAKE_TIME:
- val = ktime_to_ns(intel_gt_get_awake_time(&i915->gt));
+ val = ktime_to_ns(intel_gt_get_awake_time(to_gt(i915)));
break;
}
}
diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
index 51b368be0fc4..2dfbc22857a3 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -31,7 +31,7 @@ static int copy_query_item(void *query_hdr, size_t query_sz,
static int query_topology_info(struct drm_i915_private *dev_priv,
struct drm_i915_query_item *query_item)
{
- const struct sseu_dev_info *sseu = &dev_priv->gt.info.sseu;
+ const struct sseu_dev_info *sseu = &to_gt(dev_priv)->info.sseu;
struct drm_i915_query_topology_info topo;
u32 slice_length, subslice_length, eu_length, total_length;
int ret;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index ef8ae4076d9c..acd0904f52a9 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -329,6 +329,18 @@
#define ECOBITS_PPGTT_CACHE64B (3 << 8)
#define ECOBITS_PPGTT_CACHE4B (0 << 8)
+#define GEN12_GAMCNTRL_CTRL _MMIO(0xcf54)
+#define INVALIDATION_BROADCAST_MODE_DIS REG_BIT(12)
+#define GLOBAL_INVALIDATION_MODE REG_BIT(2)
+
+#define GEN12_GAMSTLB_CTRL _MMIO(0xcf4c)
+#define CONTROL_BLOCK_CLKGATE_DIS REG_BIT(12)
+#define EGRESS_BLOCK_CLKGATE_DIS REG_BIT(11)
+#define TAG_BLOCK_CLKGATE_DIS REG_BIT(7)
+
+#define GEN12_MERT_MOD_CTRL _MMIO(0xcf28)
+#define FORCE_MISS_FTLB REG_BIT(3)
+
#define GAB_CTL _MMIO(0x24000)
#define GAB_CTL_CONT_AFTER_PAGEFAULT (1 << 8)
@@ -542,6 +554,9 @@
#define GEN12_OA_TLB_INV_CR _MMIO(0xceec)
+#define GEN12_SQCM _MMIO(0x8724)
+#define EN_32B_ACCESS REG_BIT(30)
+
/* Gen12 OAR unit */
#define GEN12_OAR_OACONTROL _MMIO(0x2960)
#define GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1
@@ -593,6 +608,9 @@
#define EU_PERF_CNTL5 _MMIO(0xe55c)
#define EU_PERF_CNTL6 _MMIO(0xe65c)
+#define RT_CTRL _MMIO(0xe530)
+#define DIS_NULL_QUERY REG_BIT(10)
+
/*
* OA Boolean state
*/
@@ -1826,6 +1844,8 @@
#define BLT_HWS_PGA_GEN7 _MMIO(0x04280)
#define VEBOX_HWS_PGA_GEN7 _MMIO(0x04380)
+#define GUCPMTIMESTAMP _MMIO(0xC3E8)
+
#define GEN7_TLB_RD_ADDR _MMIO(0x4700)
#define GEN9_GAMT_ECO_REG_RW_IA _MMIO(0x4ab0)
@@ -1840,6 +1860,12 @@
#define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1 << 28)
#define GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT (1 << 24)
+#define GEN8_RTCR _MMIO(0x4260)
+#define GEN8_M1TCR _MMIO(0x4264)
+#define GEN8_M2TCR _MMIO(0x4268)
+#define GEN8_BTCR _MMIO(0x426c)
+#define GEN8_VTCR _MMIO(0x4270)
+
#if 0
#define PRB0_TAIL _MMIO(0x2030)
#define PRB0_HEAD _MMIO(0x2034)
@@ -1886,6 +1912,9 @@
#define NOPID _MMIO(0x2094)
#define HWSTAM _MMIO(0x2098)
+#define VDBOX_CGCTL3F18(base) _MMIO((base) + 0x3f18)
+#define ALNUNIT_CLKGATE_DIS REG_BIT(13)
+
#define ERROR_GEN6 _MMIO(0x40a0)
#define GEN7_ERR_INT _MMIO(0x44040)
#define ERR_INT_POISON (1 << 31)
@@ -1906,6 +1935,11 @@
#define FAULT_VA_HIGH_BITS (0xf << 0)
#define FAULT_GTT_SEL (1 << 4)
+#define GEN12_GFX_TLB_INV_CR _MMIO(0xced8)
+#define GEN12_VD_TLB_INV_CR _MMIO(0xcedc)
+#define GEN12_VE_TLB_INV_CR _MMIO(0xcee0)
+#define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4)
+
#define GEN12_AUX_ERR_DBG _MMIO(0x43f4)
#define FPGA_DBG _MMIO(0x42300)
@@ -1984,6 +2018,15 @@
#define GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE (1 << 2)
#define GEN11_ENABLE_32_PLANE_MODE (1 << 7)
+#define SCCGCTL94DC _MMIO(0x94dc)
+#define CG3DDISURB REG_BIT(14)
+
+#define MLTICTXCTL _MMIO(0xb170)
+#define TDONRENDER REG_BIT(2)
+
+#define L3SQCREG1_CCS0 _MMIO(0xb200)
+#define FLUSHALLNONCOH REG_BIT(5)
+
/* WaClearTdlStateAckDirtyBits */
#define GEN8_STATE_ACK _MMIO(0x20F0)
#define GEN9_STATE_ACK_SLICE1 _MMIO(0x20F8)
@@ -2188,7 +2231,8 @@
#define GEN9_RCS_FE_FSM2 _MMIO(0x22a4)
#define GEN10_CACHE_MODE_SS _MMIO(0xe420)
-#define FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
+#define ENABLE_PREFETCH_INTO_IC REG_BIT(3)
+#define FLOAT_BLEND_OPTIMIZATION_ENABLE REG_BIT(4)
/* Fuse readout registers for GT */
#define HSW_PAVP_FUSE1 _MMIO(0x911C)
@@ -3357,21 +3401,62 @@ enum {
/*
* GEN10 clock gating regs
*/
+
+#define UNSLCGCTL9440 _MMIO(0x9440)
+#define GAMTLBOACS_CLKGATE_DIS REG_BIT(28)
+#define GAMTLBVDBOX5_CLKGATE_DIS REG_BIT(27)
+#define GAMTLBVDBOX6_CLKGATE_DIS REG_BIT(26)
+#define GAMTLBVDBOX3_CLKGATE_DIS REG_BIT(24)
+#define GAMTLBVDBOX4_CLKGATE_DIS REG_BIT(23)
+#define GAMTLBVDBOX7_CLKGATE_DIS REG_BIT(22)
+#define GAMTLBVDBOX2_CLKGATE_DIS REG_BIT(21)
+#define GAMTLBVDBOX0_CLKGATE_DIS REG_BIT(17)
+#define GAMTLBKCR_CLKGATE_DIS REG_BIT(16)
+#define GAMTLBGUC_CLKGATE_DIS REG_BIT(15)
+#define GAMTLBBLT_CLKGATE_DIS REG_BIT(14)
+#define GAMTLBVDBOX1_CLKGATE_DIS REG_BIT(6)
+
+#define UNSLCGCTL9444 _MMIO(0x9444)
+#define GAMTLBGFXA0_CLKGATE_DIS REG_BIT(30)
+#define GAMTLBGFXA1_CLKGATE_DIS REG_BIT(29)
+#define GAMTLBCOMPA0_CLKGATE_DIS REG_BIT(28)
+#define GAMTLBCOMPA1_CLKGATE_DIS REG_BIT(27)
+#define GAMTLBCOMPB0_CLKGATE_DIS REG_BIT(26)
+#define GAMTLBCOMPB1_CLKGATE_DIS REG_BIT(25)
+#define GAMTLBCOMPC0_CLKGATE_DIS REG_BIT(24)
+#define GAMTLBCOMPC1_CLKGATE_DIS REG_BIT(23)
+#define GAMTLBCOMPD0_CLKGATE_DIS REG_BIT(22)
+#define GAMTLBCOMPD1_CLKGATE_DIS REG_BIT(21)
+#define GAMTLBMERT_CLKGATE_DIS REG_BIT(20)
+#define GAMTLBVEBOX3_CLKGATE_DIS REG_BIT(19)
+#define GAMTLBVEBOX2_CLKGATE_DIS REG_BIT(18)
+#define GAMTLBVEBOX1_CLKGATE_DIS REG_BIT(17)
+#define GAMTLBVEBOX0_CLKGATE_DIS REG_BIT(16)
+#define LTCDD_CLKGATE_DIS REG_BIT(10)
+
#define SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4)
#define SARBUNIT_CLKGATE_DIS (1 << 5)
#define RCCUNIT_CLKGATE_DIS (1 << 7)
#define MSCUNIT_CLKGATE_DIS (1 << 10)
+#define NODEDSS_CLKGATE_DIS REG_BIT(12)
#define L3_CLKGATE_DIS REG_BIT(16)
#define L3_CR2X_CLKGATE_DIS REG_BIT(17)
#define SUBSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9524)
-#define GWUNIT_CLKGATE_DIS (1 << 16)
+#define DSS_ROUTER_CLKGATE_DIS REG_BIT(28)
+#define GWUNIT_CLKGATE_DIS REG_BIT(16)
#define SUBSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x9528)
#define CPSSUNIT_CLKGATE_DIS REG_BIT(9)
+#define SSMCGCTL9530 _MMIO(0x9530)
+#define RTFUNIT_CLKGATE_DIS REG_BIT(18)
+
#define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434)
#define VFUNIT_CLKGATE_DIS REG_BIT(20)
+#define TSGUNIT_CLKGATE_DIS REG_BIT(17) /* XEHPSDV */
+#define CG3DDISCFEG_CLKGATE_DIS REG_BIT(17) /* DG2 */
+#define GAMEDIA_CLKGATE_DIS REG_BIT(11)
#define HSUNIT_CLKGATE_DIS REG_BIT(8)
#define VSUNIT_CLKGATE_DIS REG_BIT(3)
@@ -7529,6 +7614,9 @@ enum {
#define GEN9_CTX_PREEMPT_REG _MMIO(0x2248)
#define GEN12_DISABLE_POSH_BUSY_FF_DOP_CG REG_BIT(11)
+#define GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON _MMIO(0x20EC)
+#define GEN12_REPLAY_MODE_GRANULARITY REG_BIT(0)
+
#define GEN8_CS_CHICKEN1 _MMIO(0x2580)
#define GEN9_PREEMPT_3D_OBJECT_LEVEL (1 << 0)
#define GEN9_PREEMPT_GPGPU_LEVEL(hi, lo) (((hi) << 2) | ((lo) << 1))
@@ -7552,9 +7640,10 @@ enum {
#define GEN8_ERRDETBCTRL (1 << 9)
#define GEN11_COMMON_SLICE_CHICKEN3 _MMIO(0x7304)
- #define DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN REG_BIT(12)
- #define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11)
- #define GEN12_DISABLE_CPS_AWARE_COLOR_PIPE REG_BIT(9)
+#define DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN REG_BIT(12)
+#define XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE REG_BIT(12)
+#define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11)
+#define GEN12_DISABLE_CPS_AWARE_COLOR_PIPE REG_BIT(9)
#define HIZ_CHICKEN _MMIO(0x7018)
# define CHV_HZ_8X8_MODE_IN_1X REG_BIT(15)
@@ -7608,6 +7697,12 @@ enum {
#define GEN8_LQSC_FLUSH_COHERENT_LINES (1 << 21)
#define GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE REG_BIT(22)
+#define GEN11_L3SQCREG5 _MMIO(0xb158)
+#define L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0)
+
+#define XEHP_L3SCQREG7 _MMIO(0xb188)
+#define BLEND_FILL_CACHING_OPT_DIS REG_BIT(3)
+
/* GEN8 chicken */
#define HDC_CHICKEN0 _MMIO(0x7300)
#define ICL_HDC_MODE _MMIO(0xE5F4)
@@ -7618,6 +7713,12 @@ enum {
#define HDC_FORCE_NON_COHERENT (1 << 4)
#define HDC_BARRIER_PERFORMANCE_DISABLE (1 << 10)
+#define GEN12_HDC_CHICKEN0 _MMIO(0xE5F0)
+#define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11)
+
+#define SARB_CHICKEN1 _MMIO(0xe90c)
+#define COMP_CKN_IN REG_GENMASK(30, 29)
+
#define GEN8_HDC_CHICKEN1 _MMIO(0x7304)
/* GEN9 chicken */
@@ -7649,6 +7750,10 @@ enum {
#define DG2_RENDER_CCSTAG_4_3_EN REG_BIT(12)
#define PER_PIXEL_ALPHA_BYPASS_EN REG_BIT(7)
+#define VFLSKPD _MMIO(0x62a8)
+#define DIS_OVER_FETCH_CACHE REG_BIT(1)
+#define DIS_MULT_MISS_RD_SQUASH REG_BIT(0)
+
#define FF_MODE2 _MMIO(0x6604)
#define FF_MODE2_GS_TIMER_MASK REG_GENMASK(31, 24)
#define FF_MODE2_GS_TIMER_224 REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224)
@@ -8460,6 +8565,9 @@ enum {
#define GEN8_SDEUNIT_CLOCK_GATE_DISABLE (1 << 14)
#define GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ (1 << 28)
+#define UNSLCGCTL9430 _MMIO(0x9430)
+#define MSQDUNIT_CLKGATE_DIS REG_BIT(3)
+
#define GEN6_GFXPAUSE _MMIO(0xA000)
#define GEN6_RPNSWREQ _MMIO(0xA008)
#define GEN6_TURBO_DISABLE (1 << 31)
@@ -8469,6 +8577,7 @@ enum {
#define GEN6_OFFSET(x) ((x) << 19)
#define GEN6_AGGRESSIVE_TURBO (0 << 15)
#define GEN9_SW_REQ_UNSLICE_RATIO_SHIFT 23
+#define GEN9_IGNORE_SLICE_RATIO (0 << 0)
#define GEN6_RC_VIDEO_FREQ _MMIO(0xA00C)
#define GEN6_RC_CONTROL _MMIO(0xA090)
@@ -8504,6 +8613,9 @@ enum {
#define GEN6_RP_UP_BUSY_CONT (0x4 << 3)
#define GEN6_RP_DOWN_IDLE_AVG (0x2 << 0)
#define GEN6_RP_DOWN_IDLE_CONT (0x1 << 0)
+#define GEN6_RPSWCTL_SHIFT 9
+#define GEN9_RPSWCTL_ENABLE (0x2 << GEN6_RPSWCTL_SHIFT)
+#define GEN9_RPSWCTL_DISABLE (0x0 << GEN6_RPSWCTL_SHIFT)
#define GEN6_RP_UP_THRESHOLD _MMIO(0xA02C)
#define GEN6_RP_DOWN_THRESHOLD _MMIO(0xA030)
#define GEN6_RP_CUR_UP_EI _MMIO(0xA050)
@@ -8775,24 +8887,39 @@ enum {
#define GEN9_CCS_TLB_PREFETCH_ENABLE (1 << 3)
#define GEN8_ROW_CHICKEN _MMIO(0xe4f0)
-#define FLOW_CONTROL_ENABLE (1 << 15)
-#define PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE (1 << 8)
-#define STALL_DOP_GATING_DISABLE (1 << 5)
-#define THROTTLE_12_5 (7 << 2)
-#define DISABLE_EARLY_EOT (1 << 1)
+#define FLOW_CONTROL_ENABLE REG_BIT(15)
+#define UGM_BACKUP_MODE REG_BIT(13)
+#define MDQ_ARBITRATION_MODE REG_BIT(12)
+#define PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE REG_BIT(8)
+#define STALL_DOP_GATING_DISABLE REG_BIT(5)
+#define THROTTLE_12_5 REG_GENMASK(4, 2)
+#define DISABLE_EARLY_EOT REG_BIT(1)
#define GEN7_ROW_CHICKEN2 _MMIO(0xe4f4)
+#define GEN12_DISABLE_READ_SUPPRESSION REG_BIT(15)
#define GEN12_DISABLE_EARLY_READ REG_BIT(14)
+#define GEN12_ENABLE_LARGE_GRF_MODE REG_BIT(12)
#define GEN12_PUSH_CONST_DEREF_HOLD_DIS REG_BIT(8)
+#define LSC_CHICKEN_BIT_0 _MMIO(0xe7c8)
+#define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15)
+#define LSC_CHICKEN_BIT_0_UDW _MMIO(0xe7c8 + 4)
+#define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32)
+#define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32)
+#define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32)
+#define MAXREQS_PER_BANK REG_GENMASK(39 - 32, 37 - 32)
+#define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32)
+
#define GEN7_ROW_CHICKEN2_GT2 _MMIO(0xf4f4)
#define DOP_CLOCK_GATING_DISABLE (1 << 0)
#define PUSH_CONSTANT_DEREF_DISABLE (1 << 8)
#define GEN11_TDL_CLOCK_GATING_FIX_DISABLE (1 << 1)
-#define GEN9_ROW_CHICKEN4 _MMIO(0xe48c)
-#define GEN12_DISABLE_TDL_PUSH REG_BIT(9)
-#define GEN11_DIS_PICK_2ND_EU REG_BIT(7)
+#define GEN9_ROW_CHICKEN4 _MMIO(0xe48c)
+#define GEN12_DISABLE_GRF_CLEAR REG_BIT(13)
+#define GEN12_DISABLE_TDL_PUSH REG_BIT(9)
+#define GEN11_DIS_PICK_2ND_EU REG_BIT(7)
+#define GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX REG_BIT(4)
#define HSW_ROW_CHICKEN3 _MMIO(0xe49c)
#define HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE (1 << 6)
@@ -8807,9 +8934,10 @@ enum {
#define GEN8_SAMPLER_POWER_BYPASS_DIS (1 << 1)
#define GEN9_HALF_SLICE_CHICKEN7 _MMIO(0xe194)
-#define GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR (1 << 8)
-#define GEN9_ENABLE_YV12_BUGFIX (1 << 4)
-#define GEN9_ENABLE_GPGPU_PREEMPTION (1 << 2)
+#define DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA REG_BIT(15)
+#define GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR REG_BIT(8)
+#define GEN9_ENABLE_YV12_BUGFIX REG_BIT(4)
+#define GEN9_ENABLE_GPGPU_PREEMPTION REG_BIT(2)
/* Audio */
#define G4X_AUD_VID_DID _MMIO(DISPLAY_MMIO_BASE(dev_priv) + 0x62020)
@@ -11504,11 +11632,19 @@ enum skl_power_gate {
#define PMFLUSH_GAPL3UNBLOCK (1 << 21)
#define PMFLUSHDONE_LNEBLK (1 << 22)
+#define XEHP_L3NODEARBCFG _MMIO(0xb0b4)
+#define XEHP_LNESPARE REG_BIT(19)
+
#define GEN12_GLOBAL_MOCS(i) _MMIO(0x4000 + (i) * 4) /* Global MOCS regs */
#define GEN12_GSMBASE _MMIO(0x108100)
#define GEN12_DSMBASE _MMIO(0x1080C0)
+#define XEHP_CLOCK_GATE_DIS _MMIO(0x101014)
+#define SGSI_SIDECLK_DIS REG_BIT(17)
+#define SGGI_DIS REG_BIT(15)
+#define SGR_DIS REG_BIT(13)
+
/* gamt regs */
#define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4)
#define GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW 0x67F1427F /* max/min for LRA1/2 */
@@ -11893,4 +12029,7 @@ enum skl_power_gate {
#define CLKGATE_DIS_MISC _MMIO(0x46534)
#define CLKGATE_DIS_MISC_DMASC_GATING_DIS REG_BIT(21)
+#define SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731C)
+#define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14)
+
#endif /* _I915_REG_H_ */
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 55934129a6be..5d94f86940f7 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -29,6 +29,7 @@
#include <linux/sched.h>
#include <linux/sched/clock.h>
#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
#include "gem/i915_gem_context.h"
#include "gt/intel_breadcrumbs.h"
@@ -42,6 +43,7 @@
#include "gt/intel_rps.h"
#include "i915_active.h"
+#include "i915_deps.h"
#include "i915_drv.h"
#include "i915_trace.h"
#include "intel_pm.h"
@@ -114,6 +116,10 @@ static void i915_fence_release(struct dma_fence *fence)
GEM_BUG_ON(rq->guc_prio != GUC_PRIO_INIT &&
rq->guc_prio != GUC_PRIO_FINI);
+ i915_request_free_capture_list(fetch_and_zero(&rq->capture_list));
+ if (i915_vma_snapshot_present(&rq->batch_snapshot))
+ i915_vma_snapshot_put_onstack(&rq->batch_snapshot);
+
/*
* The request is put onto a RCU freelist (i.e. the address
* is immediately reused), mark the fences as being freed now.
@@ -187,19 +193,6 @@ void i915_request_notify_execute_cb_imm(struct i915_request *rq)
__notify_execute_cb(rq, irq_work_imm);
}
-static void free_capture_list(struct i915_request *request)
-{
- struct i915_capture_list *capture;
-
- capture = fetch_and_zero(&request->capture_list);
- while (capture) {
- struct i915_capture_list *next = capture->next;
-
- kfree(capture);
- capture = next;
- }
-}
-
static void __i915_request_fill(struct i915_request *rq, u8 val)
{
void *vaddr = rq->ring->vaddr;
@@ -304,6 +297,38 @@ static void __rq_cancel_watchdog(struct i915_request *rq)
i915_request_put(rq);
}
+#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
+
+/**
+ * i915_request_free_capture_list - Free a capture list
+ * @capture: Pointer to the first list item or NULL
+ *
+ */
+void i915_request_free_capture_list(struct i915_capture_list *capture)
+{
+ while (capture) {
+ struct i915_capture_list *next = capture->next;
+
+ i915_vma_snapshot_put(capture->vma_snapshot);
+ kfree(capture);
+ capture = next;
+ }
+}
+
+#define assert_capture_list_is_null(_rq) GEM_BUG_ON((_rq)->capture_list)
+
+#define clear_capture_list(_rq) ((_rq)->capture_list = NULL)
+
+#else
+
+#define i915_request_free_capture_list(_a) do {} while (0)
+
+#define assert_capture_list_is_null(_a) do {} while (0)
+
+#define clear_capture_list(_rq) do {} while (0)
+
+#endif
+
bool i915_request_retire(struct i915_request *rq)
{
if (!__i915_request_is_complete(rq))
@@ -340,7 +365,7 @@ bool i915_request_retire(struct i915_request *rq)
}
if (test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags))
- atomic_dec(&rq->engine->gt->rps.num_waiters);
+ intel_rps_dec_waiters(&rq->engine->gt->rps);
/*
* We only loosely track inflight requests across preemption,
@@ -360,7 +385,6 @@ bool i915_request_retire(struct i915_request *rq)
intel_context_exit(rq->context);
intel_context_unpin(rq->context);
- free_capture_list(rq);
i915_sched_node_fini(&rq->sched);
i915_request_put(rq);
@@ -720,7 +744,7 @@ void i915_request_cancel(struct i915_request *rq, int error)
intel_context_cancel_request(rq->context, rq);
}
-static int __i915_sw_fence_call
+static int
submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
{
struct i915_request *request =
@@ -756,7 +780,7 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
return NOTIFY_DONE;
}
-static int __i915_sw_fence_call
+static int
semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
{
struct i915_request *rq = container_of(fence, typeof(*rq), semaphore);
@@ -830,11 +854,18 @@ static void __i915_request_ctor(void *arg)
i915_sw_fence_init(&rq->submit, submit_notify);
i915_sw_fence_init(&rq->semaphore, semaphore_notify);
- rq->capture_list = NULL;
+ clear_capture_list(rq);
+ rq->batch_snapshot.present = false;
init_llist_head(&rq->execute_cb);
}
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#define clear_batch_ptr(_rq) ((_rq)->batch = NULL)
+#else
+#define clear_batch_ptr(_a) do {} while (0)
+#endif
+
struct i915_request *
__i915_request_create(struct intel_context *ce, gfp_t gfp)
{
@@ -926,10 +957,11 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
i915_sched_node_reinit(&rq->sched);
/* No zalloc, everything must be cleared after use */
- rq->batch = NULL;
+ clear_batch_ptr(rq);
__rq_init_watchdog(rq);
- GEM_BUG_ON(rq->capture_list);
+ assert_capture_list_is_null(rq);
GEM_BUG_ON(!llist_empty(&rq->execute_cb));
+ GEM_BUG_ON(i915_vma_snapshot_present(&rq->batch_snapshot));
/*
* Reserve space in the ring buffer for all the commands required to
@@ -1514,6 +1546,27 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
}
/**
+ * i915_request_await_deps - set this request to (async) wait upon a struct
+ * i915_deps dma_fence collection
+ * @rq: request we are wishing to use
+ * @deps: The struct i915_deps containing the dependencies.
+ *
+ * Returns 0 if successful, negative error code on error.
+ */
+int i915_request_await_deps(struct i915_request *rq, const struct i915_deps *deps)
+{
+ int i, err;
+
+ for (i = 0; i < deps->num_deps; ++i) {
+ err = i915_request_await_dma_fence(rq, deps->fences[i]);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+/**
* i915_request_await_object - set this request to (async) wait upon a bo
* @to: request we are wishing to use
* @obj: object which may be in use on another ring.
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 3c6e8acd1457..170ee78c2858 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -40,19 +40,27 @@
#include "i915_scheduler.h"
#include "i915_selftest.h"
#include "i915_sw_fence.h"
+#include "i915_vma_snapshot.h"
#include <uapi/drm/i915_drm.h>
struct drm_file;
struct drm_i915_gem_object;
struct drm_printer;
+struct i915_deps;
struct i915_request;
+#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
struct i915_capture_list {
+ struct i915_vma_snapshot *vma_snapshot;
struct i915_capture_list *next;
- struct i915_vma *vma;
};
+void i915_request_free_capture_list(struct i915_capture_list *capture);
+#else
+#define i915_request_free_capture_list(_a) do {} while (0)
+#endif
+
#define RQ_TRACE(rq, fmt, ...) do { \
const struct i915_request *rq__ = (rq); \
ENGINE_TRACE(rq__->engine, "fence %llx:%lld, current %d " fmt, \
@@ -289,10 +297,12 @@ struct i915_request {
/** Preallocate space in the ring for the emitting the request */
u32 reserved_space;
- /** Batch buffer related to this request if any (used for
- * error state dump only).
- */
- struct i915_vma *batch;
+ /** Batch buffer pointer for selftest internal use. */
+ I915_SELFTEST_DECLARE(struct i915_vma *batch);
+
+ struct i915_vma_snapshot batch_snapshot;
+
+#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
/**
* Additional buffers requested by userspace to be captured upon
* a GPU hang. The vma/obj on this list are protected by their
@@ -300,6 +310,7 @@ struct i915_request {
* on the active_list (of their final request).
*/
struct i915_capture_list *capture_list;
+#endif
/** Time at which this request was emitted, in jiffies. */
unsigned long emitted_jiffies;
@@ -401,6 +412,7 @@ int i915_request_await_object(struct i915_request *to,
bool write);
int i915_request_await_dma_fence(struct i915_request *rq,
struct dma_fence *fence);
+int i915_request_await_deps(struct i915_request *rq, const struct i915_deps *deps);
int i915_request_await_execution(struct i915_request *rq,
struct dma_fence *fence);
@@ -647,7 +659,8 @@ i915_request_timeline(const struct i915_request *rq)
{
/* Valid only while the request is being constructed (or retired). */
return rcu_dereference_protected(rq->timeline,
- lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex));
+ lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex) ||
+ test_bit(CONTEXT_IS_PARKING, &rq->context->flags));
}
static inline struct i915_gem_context *
diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c
index 4a6712dca838..41f2adb6a583 100644
--- a/drivers/gpu/drm/i915/i915_scatterlist.c
+++ b/drivers/gpu/drm/i915/i915_scatterlist.c
@@ -41,8 +41,32 @@ bool i915_sg_trim(struct sg_table *orig_st)
return true;
}
+static void i915_refct_sgt_release(struct kref *ref)
+{
+ struct i915_refct_sgt *rsgt =
+ container_of(ref, typeof(*rsgt), kref);
+
+ sg_free_table(&rsgt->table);
+ kfree(rsgt);
+}
+
+static const struct i915_refct_sgt_ops rsgt_ops = {
+ .release = i915_refct_sgt_release
+};
+
+/**
+ * i915_refct_sgt_init - Initialize a struct i915_refct_sgt with default ops
+ * @rsgt: The struct i915_refct_sgt to initialize.
+ * size: The size of the underlying memory buffer.
+ */
+void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size)
+{
+ __i915_refct_sgt_init(rsgt, size, &rsgt_ops);
+}
+
/**
- * i915_sg_from_mm_node - Create an sg_table from a struct drm_mm_node
+ * i915_rsgt_from_mm_node - Create a refcounted sg_table from a struct
+ * drm_mm_node
* @node: The drm_mm_node.
* @region_start: An offset to add to the dma addresses of the sg list.
*
@@ -50,25 +74,28 @@ bool i915_sg_trim(struct sg_table *orig_st)
* taking a maximum segment length into account, splitting into segments
* if necessary.
*
- * Return: A pointer to a kmalloced struct sg_table on success, negative
+ * Return: A pointer to a kmalloced struct i915_refct_sgt on success, negative
* error code cast to an error pointer on failure.
*/
-struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node,
- u64 region_start)
+struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
+ u64 region_start)
{
const u64 max_segment = SZ_1G; /* Do we have a limit on this? */
u64 segment_pages = max_segment >> PAGE_SHIFT;
u64 block_size, offset, prev_end;
+ struct i915_refct_sgt *rsgt;
struct sg_table *st;
struct scatterlist *sg;
- st = kmalloc(sizeof(*st), GFP_KERNEL);
- if (!st)
+ rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
+ if (!rsgt)
return ERR_PTR(-ENOMEM);
+ i915_refct_sgt_init(rsgt, node->size << PAGE_SHIFT);
+ st = &rsgt->table;
if (sg_alloc_table(st, DIV_ROUND_UP(node->size, segment_pages),
GFP_KERNEL)) {
- kfree(st);
+ i915_refct_sgt_put(rsgt);
return ERR_PTR(-ENOMEM);
}
@@ -104,11 +131,11 @@ struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node,
sg_mark_end(sg);
i915_sg_trim(st);
- return st;
+ return rsgt;
}
/**
- * i915_sg_from_buddy_resource - Create an sg_table from a struct
+ * i915_rsgt_from_buddy_resource - Create a refcounted sg_table from a struct
* i915_buddy_block list
* @res: The struct i915_ttm_buddy_resource.
* @region_start: An offset to add to the dma addresses of the sg list.
@@ -117,11 +144,11 @@ struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node,
* taking a maximum segment length into account, splitting into segments
* if necessary.
*
- * Return: A pointer to a kmalloced struct sg_table on success, negative
+ * Return: A pointer to a kmalloced struct i915_refct_sgts on success, negative
* error code cast to an error pointer on failure.
*/
-struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res,
- u64 region_start)
+struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
+ u64 region_start)
{
struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res);
const u64 size = res->num_pages << PAGE_SHIFT;
@@ -129,18 +156,21 @@ struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res,
struct i915_buddy_mm *mm = bman_res->mm;
struct list_head *blocks = &bman_res->blocks;
struct i915_buddy_block *block;
+ struct i915_refct_sgt *rsgt;
struct scatterlist *sg;
struct sg_table *st;
resource_size_t prev_end;
GEM_BUG_ON(list_empty(blocks));
- st = kmalloc(sizeof(*st), GFP_KERNEL);
- if (!st)
+ rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
+ if (!rsgt)
return ERR_PTR(-ENOMEM);
+ i915_refct_sgt_init(rsgt, size);
+ st = &rsgt->table;
if (sg_alloc_table(st, res->num_pages, GFP_KERNEL)) {
- kfree(st);
+ i915_refct_sgt_put(rsgt);
return ERR_PTR(-ENOMEM);
}
@@ -181,7 +211,7 @@ struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res,
sg_mark_end(sg);
i915_sg_trim(st);
- return st;
+ return rsgt;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h
index b8bd5925b03f..12c6a1684081 100644
--- a/drivers/gpu/drm/i915/i915_scatterlist.h
+++ b/drivers/gpu/drm/i915/i915_scatterlist.h
@@ -144,10 +144,78 @@ static inline unsigned int i915_sg_segment_size(void)
bool i915_sg_trim(struct sg_table *orig_st);
-struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node,
- u64 region_start);
+/**
+ * struct i915_refct_sgt_ops - Operations structure for struct i915_refct_sgt
+ */
+struct i915_refct_sgt_ops {
+ /**
+ * release() - Free the memory of the struct i915_refct_sgt
+ * @ref: struct kref that is embedded in the struct i915_refct_sgt
+ */
+ void (*release)(struct kref *ref);
+};
+
+/**
+ * struct i915_refct_sgt - A refcounted scatter-gather table
+ * @kref: struct kref for refcounting
+ * @table: struct sg_table holding the scatter-gather table itself. Note that
+ * @table->sgl = NULL can be used to determine whether a scatter-gather table
+ * is present or not.
+ * @size: The size in bytes of the underlying memory buffer
+ * @ops: The operations structure.
+ */
+struct i915_refct_sgt {
+ struct kref kref;
+ struct sg_table table;
+ size_t size;
+ const struct i915_refct_sgt_ops *ops;
+};
+
+/**
+ * i915_refct_sgt_put - Put a refcounted sg-table
+ * @rsgt the struct i915_refct_sgt to put.
+ */
+static inline void i915_refct_sgt_put(struct i915_refct_sgt *rsgt)
+{
+ if (rsgt)
+ kref_put(&rsgt->kref, rsgt->ops->release);
+}
+
+/**
+ * i915_refct_sgt_get - Get a refcounted sg-table
+ * @rsgt the struct i915_refct_sgt to get.
+ */
+static inline struct i915_refct_sgt *
+i915_refct_sgt_get(struct i915_refct_sgt *rsgt)
+{
+ kref_get(&rsgt->kref);
+ return rsgt;
+}
+
+/**
+ * __i915_refct_sgt_init - Initialize a refcounted sg-list with a custom
+ * operations structure
+ * @rsgt The struct i915_refct_sgt to initialize.
+ * @size: Size in bytes of the underlying memory buffer.
+ * @ops: A customized operations structure in case the refcounted sg-list
+ * is embedded into another structure.
+ */
+static inline void __i915_refct_sgt_init(struct i915_refct_sgt *rsgt,
+ size_t size,
+ const struct i915_refct_sgt_ops *ops)
+{
+ kref_init(&rsgt->kref);
+ rsgt->table.sgl = NULL;
+ rsgt->size = size;
+ rsgt->ops = ops;
+}
+
+void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size);
+
+struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
+ u64 region_start);
-struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res,
- u64 region_start);
+struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
+ u64 region_start);
#endif
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index 7ea0dbf81530..2a74a9a1cafe 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -18,7 +18,9 @@
#define I915_SW_FENCE_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr)
#endif
+#ifdef CONFIG_DRM_I915_SW_FENCE_CHECK_DAG
static DEFINE_SPINLOCK(i915_sw_fence_lock);
+#endif
#define WQ_FLAG_BITS \
BITS_PER_TYPE(typeof_member(struct wait_queue_entry, flags))
@@ -34,7 +36,7 @@ enum {
static void *i915_sw_fence_debug_hint(void *addr)
{
- return (void *)(((struct i915_sw_fence *)addr)->flags & I915_SW_FENCE_MASK);
+ return (void *)(((struct i915_sw_fence *)addr)->fn);
}
#ifdef CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS
@@ -126,10 +128,7 @@ static inline void debug_fence_assert(struct i915_sw_fence *fence)
static int __i915_sw_fence_notify(struct i915_sw_fence *fence,
enum i915_sw_fence_notify state)
{
- i915_sw_fence_notify_t fn;
-
- fn = (i915_sw_fence_notify_t)(fence->flags & I915_SW_FENCE_MASK);
- return fn(fence, state);
+ return fence->fn(fence, state);
}
#ifdef CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS
@@ -242,10 +241,13 @@ void __i915_sw_fence_init(struct i915_sw_fence *fence,
const char *name,
struct lock_class_key *key)
{
- BUG_ON(!fn || (unsigned long)fn & ~I915_SW_FENCE_MASK);
+ BUG_ON(!fn);
__init_waitqueue_head(&fence->wait, name, key);
- fence->flags = (unsigned long)fn;
+ fence->fn = fn;
+#ifdef CONFIG_DRM_I915_SW_FENCE_CHECK_DAG
+ fence->flags = 0;
+#endif
i915_sw_fence_reinit(fence);
}
@@ -257,7 +259,6 @@ void i915_sw_fence_reinit(struct i915_sw_fence *fence)
atomic_set(&fence->pending, 1);
fence->error = 0;
- I915_SW_FENCE_BUG_ON(!fence->flags);
I915_SW_FENCE_BUG_ON(!list_empty(&fence->wait.head));
}
@@ -279,6 +280,7 @@ static int i915_sw_fence_wake(wait_queue_entry_t *wq, unsigned mode, int flags,
return 0;
}
+#ifdef CONFIG_DRM_I915_SW_FENCE_CHECK_DAG
static bool __i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
const struct i915_sw_fence * const signaler)
{
@@ -322,9 +324,6 @@ static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
unsigned long flags;
bool err;
- if (!IS_ENABLED(CONFIG_DRM_I915_SW_FENCE_CHECK_DAG))
- return false;
-
spin_lock_irqsave(&i915_sw_fence_lock, flags);
err = __i915_sw_fence_check_if_after(fence, signaler);
__i915_sw_fence_clear_checked_bit(fence);
@@ -332,6 +331,13 @@ static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
return err;
}
+#else
+static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
+ const struct i915_sw_fence * const signaler)
+{
+ return false;
+}
+#endif
static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
struct i915_sw_fence *signaler,
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
index 30a863353ee6..a7c603bc1b01 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -17,26 +17,27 @@
struct completion;
struct dma_resv;
+struct i915_sw_fence;
+
+enum i915_sw_fence_notify {
+ FENCE_COMPLETE,
+ FENCE_FREE
+};
+
+typedef int (*i915_sw_fence_notify_t)(struct i915_sw_fence *,
+ enum i915_sw_fence_notify state);
struct i915_sw_fence {
wait_queue_head_t wait;
+ i915_sw_fence_notify_t fn;
+#ifdef CONFIG_DRM_I915_SW_FENCE_CHECK_DAG
unsigned long flags;
+#endif
atomic_t pending;
int error;
};
#define I915_SW_FENCE_CHECKED_BIT 0 /* used internally for DAG checking */
-#define I915_SW_FENCE_PRIVATE_BIT 1 /* available for use by owner */
-#define I915_SW_FENCE_MASK (~3)
-
-enum i915_sw_fence_notify {
- FENCE_COMPLETE,
- FENCE_FREE
-};
-
-typedef int (*i915_sw_fence_notify_t)(struct i915_sw_fence *,
- enum i915_sw_fence_notify state);
-#define __i915_sw_fence_call __aligned(4)
void __i915_sw_fence_init(struct i915_sw_fence *fence,
i915_sw_fence_notify_t fn,
diff --git a/drivers/gpu/drm/i915/i915_sw_fence_work.c b/drivers/gpu/drm/i915/i915_sw_fence_work.c
index 5b33ef23d54c..d2e56b387993 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence_work.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence_work.c
@@ -23,7 +23,7 @@ static void fence_work(struct work_struct *work)
dma_fence_put(&f->dma);
}
-static int __i915_sw_fence_call
+static int
fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
{
struct dma_fence_work *f = container_of(fence, typeof(*f), chain);
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 1804f4142740..fae4d1f4f275 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -52,7 +52,7 @@ static u32 calc_residency(struct drm_i915_private *dev_priv,
u64 res = 0;
with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref)
- res = intel_rc6_residency_us(&dev_priv->gt.rc6, reg);
+ res = intel_rc6_residency_us(&to_gt(dev_priv)->rc6, reg);
return DIV_ROUND_CLOSEST_ULL(res, 1000);
}
@@ -260,7 +260,7 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev,
struct device_attribute *attr, char *buf)
{
struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
- struct intel_rps *rps = &i915->gt.rps;
+ struct intel_rps *rps = &to_gt(i915)->rps;
return sysfs_emit(buf, "%d\n", intel_rps_read_actual_frequency(rps));
}
@@ -269,7 +269,7 @@ static ssize_t gt_cur_freq_mhz_show(struct device *kdev,
struct device_attribute *attr, char *buf)
{
struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
- struct intel_rps *rps = &i915->gt.rps;
+ struct intel_rps *rps = &to_gt(i915)->rps;
return sysfs_emit(buf, "%d\n", intel_rps_get_requested_frequency(rps));
}
@@ -277,9 +277,9 @@ static ssize_t gt_cur_freq_mhz_show(struct device *kdev,
static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf)
{
struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
- struct intel_rps *rps = &i915->gt.rps;
+ struct intel_rps *rps = &to_gt(i915)->rps;
- return sysfs_emit(buf, "%d\n", intel_gpu_freq(rps, rps->boost_freq));
+ return sysfs_emit(buf, "%d\n", intel_rps_get_boost_frequency(rps));
}
static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
@@ -287,8 +287,7 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
const char *buf, size_t count)
{
struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
- struct intel_rps *rps = &dev_priv->gt.rps;
- bool boost = false;
+ struct intel_rps *rps = &to_gt(dev_priv)->rps;
ssize_t ret;
u32 val;
@@ -296,28 +295,16 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
if (ret)
return ret;
- /* Validate against (static) hardware limits */
- val = intel_freq_opcode(rps, val);
- if (val < rps->min_freq || val > rps->max_freq)
- return -EINVAL;
-
- mutex_lock(&rps->lock);
- if (val != rps->boost_freq) {
- rps->boost_freq = val;
- boost = atomic_read(&rps->num_waiters);
- }
- mutex_unlock(&rps->lock);
- if (boost)
- schedule_work(&rps->work);
+ ret = intel_rps_set_boost_frequency(rps, val);
- return count;
+ return ret ?: count;
}
static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev,
struct device_attribute *attr, char *buf)
{
struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
- struct intel_rps *rps = &dev_priv->gt.rps;
+ struct intel_rps *rps = &to_gt(dev_priv)->rps;
return sysfs_emit(buf, "%d\n", intel_gpu_freq(rps, rps->efficient_freq));
}
@@ -325,7 +312,7 @@ static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev,
static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf)
{
struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
- struct intel_gt *gt = &dev_priv->gt;
+ struct intel_gt *gt = to_gt(dev_priv);
struct intel_rps *rps = &gt->rps;
return sysfs_emit(buf, "%d\n", intel_rps_get_max_frequency(rps));
@@ -336,7 +323,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
const char *buf, size_t count)
{
struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
- struct intel_gt *gt = &dev_priv->gt;
+ struct intel_gt *gt = to_gt(dev_priv);
struct intel_rps *rps = &gt->rps;
ssize_t ret;
u32 val;
@@ -353,7 +340,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf)
{
struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
- struct intel_gt *gt = &i915->gt;
+ struct intel_gt *gt = to_gt(i915);
struct intel_rps *rps = &gt->rps;
return sysfs_emit(buf, "%d\n", intel_rps_get_min_frequency(rps));
@@ -364,7 +351,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
const char *buf, size_t count)
{
struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
- struct intel_rps *rps = &i915->gt.rps;
+ struct intel_rps *rps = &to_gt(i915)->rps;
ssize_t ret;
u32 val;
@@ -394,7 +381,7 @@ static DEVICE_ATTR(gt_RPn_freq_mhz, S_IRUGO, gt_rp_mhz_show, NULL);
static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf)
{
struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
- struct intel_rps *rps = &dev_priv->gt.rps;
+ struct intel_rps *rps = &to_gt(dev_priv)->rps;
u32 val;
if (attr == &dev_attr_gt_RP0_freq_mhz)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index c837888dd542..68cf1d392250 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -41,12 +41,12 @@
static struct kmem_cache *slab_vmas;
-struct i915_vma *i915_vma_alloc(void)
+static struct i915_vma *i915_vma_alloc(void)
{
return kmem_cache_zalloc(slab_vmas, GFP_KERNEL);
}
-void i915_vma_free(struct i915_vma *vma)
+static void i915_vma_free(struct i915_vma *vma)
{
return kmem_cache_free(slab_vmas, vma);
}
@@ -110,11 +110,9 @@ vma_create(struct drm_i915_gem_object *obj,
return ERR_PTR(-ENOMEM);
kref_init(&vma->ref);
- mutex_init(&vma->pages_mutex);
vma->vm = i915_vm_get(vm);
vma->ops = &vm->vma_ops;
vma->obj = obj;
- vma->resv = obj->base.resv;
vma->size = obj->base.size;
vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
@@ -347,7 +345,7 @@ int i915_vma_wait_for_bind(struct i915_vma *vma)
fence = dma_fence_get_rcu_safe(&vma->active.excl.fence);
rcu_read_unlock();
if (fence) {
- err = dma_fence_wait(fence, MAX_SCHEDULE_TIMEOUT);
+ err = dma_fence_wait(fence, true);
dma_fence_put(fence);
}
}
@@ -355,6 +353,28 @@ int i915_vma_wait_for_bind(struct i915_vma *vma)
return err;
}
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+static int i915_vma_verify_bind_complete(struct i915_vma *vma)
+{
+ struct dma_fence *fence = i915_active_fence_get(&vma->active.excl);
+ int err;
+
+ if (!fence)
+ return 0;
+
+ if (dma_fence_is_signaled(fence))
+ err = fence->error;
+ else
+ err = -EBUSY;
+
+ dma_fence_put(fence);
+
+ return err;
+}
+#else
+#define i915_vma_verify_bind_complete(_vma) 0
+#endif
+
/**
* i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
* @vma: VMA to map
@@ -374,6 +394,7 @@ int i915_vma_bind(struct i915_vma *vma,
u32 bind_flags;
u32 vma_flags;
+ lockdep_assert_held(&vma->vm->mutex);
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
GEM_BUG_ON(vma->size > vma->node.size);
@@ -395,7 +416,7 @@ int i915_vma_bind(struct i915_vma *vma,
if (bind_flags == 0)
return 0;
- GEM_BUG_ON(!vma->pages);
+ GEM_BUG_ON(!atomic_read(&vma->pages_count));
trace_i915_vma_bind(vma, bind_flags);
if (work && bind_flags & vma->vm->bind_async_flags) {
@@ -424,14 +445,22 @@ int i915_vma_bind(struct i915_vma *vma,
work->base.dma.error = 0; /* enable the queue_work() */
+ __i915_gem_object_pin_pages(vma->obj);
+ work->pinned = i915_gem_object_get(vma->obj);
+ } else {
if (vma->obj) {
- __i915_gem_object_pin_pages(vma->obj);
- work->pinned = i915_gem_object_get(vma->obj);
+ int ret;
+
+ ret = i915_gem_object_wait_moving_fence(vma->obj, true);
+ if (ret)
+ return ret;
}
- } else {
vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
}
+ if (vma->obj)
+ set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags);
+
atomic_or(bind_flags, &vma->flags);
return 0;
}
@@ -450,6 +479,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
GEM_BUG_ON(!i915_vma_is_ggtt(vma));
GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND));
+ GEM_BUG_ON(i915_vma_verify_bind_complete(vma));
ptr = READ_ONCE(vma->iomap);
if (ptr == NULL) {
@@ -668,7 +698,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
}
color = 0;
- if (vma->obj && i915_vm_has_cache_coloring(vma->vm))
+ if (i915_vm_has_cache_coloring(vma->vm))
color = vma->obj->cache_level;
if (flags & PIN_OFFSET_FIXED) {
@@ -790,40 +820,356 @@ unpinned:
return pinned;
}
-static int vma_get_pages(struct i915_vma *vma)
+static struct scatterlist *
+rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset,
+ unsigned int width, unsigned int height,
+ unsigned int src_stride, unsigned int dst_stride,
+ struct sg_table *st, struct scatterlist *sg)
{
- int err = 0;
- bool pinned_pages = false;
+ unsigned int column, row;
+ unsigned int src_idx;
- if (atomic_add_unless(&vma->pages_count, 1, 0))
- return 0;
+ for (column = 0; column < width; column++) {
+ unsigned int left;
- if (vma->obj) {
- err = i915_gem_object_pin_pages(vma->obj);
- if (err)
- return err;
- pinned_pages = true;
+ src_idx = src_stride * (height - 1) + column + offset;
+ for (row = 0; row < height; row++) {
+ st->nents++;
+ /*
+ * We don't need the pages, but need to initialize
+ * the entries so the sg list can be happily traversed.
+ * The only thing we need are DMA addresses.
+ */
+ sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0);
+ sg_dma_address(sg) =
+ i915_gem_object_get_dma_address(obj, src_idx);
+ sg_dma_len(sg) = I915_GTT_PAGE_SIZE;
+ sg = sg_next(sg);
+ src_idx -= src_stride;
+ }
+
+ left = (dst_stride - height) * I915_GTT_PAGE_SIZE;
+
+ if (!left)
+ continue;
+
+ st->nents++;
+
+ /*
+ * The DE ignores the PTEs for the padding tiles, the sg entry
+ * here is just a conenience to indicate how many padding PTEs
+ * to insert at this spot.
+ */
+ sg_set_page(sg, NULL, left, 0);
+ sg_dma_address(sg) = 0;
+ sg_dma_len(sg) = left;
+ sg = sg_next(sg);
}
- /* Allocations ahoy! */
- if (mutex_lock_interruptible(&vma->pages_mutex)) {
- err = -EINTR;
- goto unpin;
+ return sg;
+}
+
+static noinline struct sg_table *
+intel_rotate_pages(struct intel_rotation_info *rot_info,
+ struct drm_i915_gem_object *obj)
+{
+ unsigned int size = intel_rotation_info_size(rot_info);
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ struct sg_table *st;
+ struct scatterlist *sg;
+ int ret = -ENOMEM;
+ int i;
+
+ /* Allocate target SG list. */
+ st = kmalloc(sizeof(*st), GFP_KERNEL);
+ if (!st)
+ goto err_st_alloc;
+
+ ret = sg_alloc_table(st, size, GFP_KERNEL);
+ if (ret)
+ goto err_sg_alloc;
+
+ st->nents = 0;
+ sg = st->sgl;
+
+ for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++)
+ sg = rotate_pages(obj, rot_info->plane[i].offset,
+ rot_info->plane[i].width, rot_info->plane[i].height,
+ rot_info->plane[i].src_stride,
+ rot_info->plane[i].dst_stride,
+ st, sg);
+
+ return st;
+
+err_sg_alloc:
+ kfree(st);
+err_st_alloc:
+
+ drm_dbg(&i915->drm, "Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
+ obj->base.size, rot_info->plane[0].width,
+ rot_info->plane[0].height, size);
+
+ return ERR_PTR(ret);
+}
+
+static struct scatterlist *
+remap_pages(struct drm_i915_gem_object *obj,
+ unsigned int offset, unsigned int alignment_pad,
+ unsigned int width, unsigned int height,
+ unsigned int src_stride, unsigned int dst_stride,
+ struct sg_table *st, struct scatterlist *sg)
+{
+ unsigned int row;
+
+ if (!width || !height)
+ return sg;
+
+ if (alignment_pad) {
+ st->nents++;
+
+ /*
+ * The DE ignores the PTEs for the padding tiles, the sg entry
+ * here is just a convenience to indicate how many padding PTEs
+ * to insert at this spot.
+ */
+ sg_set_page(sg, NULL, alignment_pad * 4096, 0);
+ sg_dma_address(sg) = 0;
+ sg_dma_len(sg) = alignment_pad * 4096;
+ sg = sg_next(sg);
}
- if (!atomic_read(&vma->pages_count)) {
- err = vma->ops->set_pages(vma);
- if (err)
- goto unlock;
- pinned_pages = false;
+ for (row = 0; row < height; row++) {
+ unsigned int left = width * I915_GTT_PAGE_SIZE;
+
+ while (left) {
+ dma_addr_t addr;
+ unsigned int length;
+
+ /*
+ * We don't need the pages, but need to initialize
+ * the entries so the sg list can be happily traversed.
+ * The only thing we need are DMA addresses.
+ */
+
+ addr = i915_gem_object_get_dma_address_len(obj, offset, &length);
+
+ length = min(left, length);
+
+ st->nents++;
+
+ sg_set_page(sg, NULL, length, 0);
+ sg_dma_address(sg) = addr;
+ sg_dma_len(sg) = length;
+ sg = sg_next(sg);
+
+ offset += length / I915_GTT_PAGE_SIZE;
+ left -= length;
+ }
+
+ offset += src_stride - width;
+
+ left = (dst_stride - width) * I915_GTT_PAGE_SIZE;
+
+ if (!left)
+ continue;
+
+ st->nents++;
+
+ /*
+ * The DE ignores the PTEs for the padding tiles, the sg entry
+ * here is just a conenience to indicate how many padding PTEs
+ * to insert at this spot.
+ */
+ sg_set_page(sg, NULL, left, 0);
+ sg_dma_address(sg) = 0;
+ sg_dma_len(sg) = left;
+ sg = sg_next(sg);
}
+
+ return sg;
+}
+
+static noinline struct sg_table *
+intel_remap_pages(struct intel_remapped_info *rem_info,
+ struct drm_i915_gem_object *obj)
+{
+ unsigned int size = intel_remapped_info_size(rem_info);
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ struct sg_table *st;
+ struct scatterlist *sg;
+ unsigned int gtt_offset = 0;
+ int ret = -ENOMEM;
+ int i;
+
+ /* Allocate target SG list. */
+ st = kmalloc(sizeof(*st), GFP_KERNEL);
+ if (!st)
+ goto err_st_alloc;
+
+ ret = sg_alloc_table(st, size, GFP_KERNEL);
+ if (ret)
+ goto err_sg_alloc;
+
+ st->nents = 0;
+ sg = st->sgl;
+
+ for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) {
+ unsigned int alignment_pad = 0;
+
+ if (rem_info->plane_alignment)
+ alignment_pad = ALIGN(gtt_offset, rem_info->plane_alignment) - gtt_offset;
+
+ sg = remap_pages(obj,
+ rem_info->plane[i].offset, alignment_pad,
+ rem_info->plane[i].width, rem_info->plane[i].height,
+ rem_info->plane[i].src_stride, rem_info->plane[i].dst_stride,
+ st, sg);
+
+ gtt_offset += alignment_pad +
+ rem_info->plane[i].dst_stride * rem_info->plane[i].height;
+ }
+
+ i915_sg_trim(st);
+
+ return st;
+
+err_sg_alloc:
+ kfree(st);
+err_st_alloc:
+
+ drm_dbg(&i915->drm, "Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n",
+ obj->base.size, rem_info->plane[0].width,
+ rem_info->plane[0].height, size);
+
+ return ERR_PTR(ret);
+}
+
+static noinline struct sg_table *
+intel_partial_pages(const struct i915_ggtt_view *view,
+ struct drm_i915_gem_object *obj)
+{
+ struct sg_table *st;
+ struct scatterlist *sg, *iter;
+ unsigned int count = view->partial.size;
+ unsigned int offset;
+ int ret = -ENOMEM;
+
+ st = kmalloc(sizeof(*st), GFP_KERNEL);
+ if (!st)
+ goto err_st_alloc;
+
+ ret = sg_alloc_table(st, count, GFP_KERNEL);
+ if (ret)
+ goto err_sg_alloc;
+
+ iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset);
+ GEM_BUG_ON(!iter);
+
+ sg = st->sgl;
+ st->nents = 0;
+ do {
+ unsigned int len;
+
+ len = min(sg_dma_len(iter) - (offset << PAGE_SHIFT),
+ count << PAGE_SHIFT);
+ sg_set_page(sg, NULL, len, 0);
+ sg_dma_address(sg) =
+ sg_dma_address(iter) + (offset << PAGE_SHIFT);
+ sg_dma_len(sg) = len;
+
+ st->nents++;
+ count -= len >> PAGE_SHIFT;
+ if (count == 0) {
+ sg_mark_end(sg);
+ i915_sg_trim(st); /* Drop any unused tail entries. */
+
+ return st;
+ }
+
+ sg = __sg_next(sg);
+ iter = __sg_next(iter);
+ offset = 0;
+ } while (1);
+
+err_sg_alloc:
+ kfree(st);
+err_st_alloc:
+ return ERR_PTR(ret);
+}
+
+static int
+__i915_vma_get_pages(struct i915_vma *vma)
+{
+ struct sg_table *pages;
+ int ret;
+
+ /*
+ * The vma->pages are only valid within the lifespan of the borrowed
+ * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
+ * must be the vma->pages. A simple rule is that vma->pages must only
+ * be accessed when the obj->mm.pages are pinned.
+ */
+ GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
+
+ switch (vma->ggtt_view.type) {
+ default:
+ GEM_BUG_ON(vma->ggtt_view.type);
+ fallthrough;
+ case I915_GGTT_VIEW_NORMAL:
+ pages = vma->obj->mm.pages;
+ break;
+
+ case I915_GGTT_VIEW_ROTATED:
+ pages =
+ intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
+ break;
+
+ case I915_GGTT_VIEW_REMAPPED:
+ pages =
+ intel_remap_pages(&vma->ggtt_view.remapped, vma->obj);
+ break;
+
+ case I915_GGTT_VIEW_PARTIAL:
+ pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
+ break;
+ }
+
+ ret = 0;
+ if (IS_ERR(pages)) {
+ ret = PTR_ERR(pages);
+ pages = NULL;
+ drm_err(&vma->vm->i915->drm,
+ "Failed to get pages for VMA view type %u (%d)!\n",
+ vma->ggtt_view.type, ret);
+ }
+
+ vma->pages = pages;
+
+ return ret;
+}
+
+I915_SELFTEST_EXPORT int i915_vma_get_pages(struct i915_vma *vma)
+{
+ int err;
+
+ if (atomic_add_unless(&vma->pages_count, 1, 0))
+ return 0;
+
+ err = i915_gem_object_pin_pages(vma->obj);
+ if (err)
+ return err;
+
+ err = __i915_vma_get_pages(vma);
+ if (err)
+ goto err_unpin;
+
+ vma->page_sizes = vma->obj->mm.page_sizes;
atomic_inc(&vma->pages_count);
-unlock:
- mutex_unlock(&vma->pages_mutex);
-unpin:
- if (pinned_pages)
- __i915_gem_object_unpin_pages(vma->obj);
+ return 0;
+
+err_unpin:
+ __i915_gem_object_unpin_pages(vma->obj);
return err;
}
@@ -831,18 +1177,31 @@ unpin:
static void __vma_put_pages(struct i915_vma *vma, unsigned int count)
{
/* We allocate under vma_get_pages, so beware the shrinker */
- mutex_lock_nested(&vma->pages_mutex, SINGLE_DEPTH_NESTING);
+ struct sg_table *pages = READ_ONCE(vma->pages);
+
GEM_BUG_ON(atomic_read(&vma->pages_count) < count);
+
if (atomic_sub_return(count, &vma->pages_count) == 0) {
- vma->ops->clear_pages(vma);
- GEM_BUG_ON(vma->pages);
- if (vma->obj)
- i915_gem_object_unpin_pages(vma->obj);
+ /*
+ * The atomic_sub_return is a read barrier for the READ_ONCE of
+ * vma->pages above.
+ *
+ * READ_ONCE is safe because this is either called from the same
+ * function (i915_vma_pin_ww), or guarded by vma->vm->mutex.
+ *
+ * TODO: We're leaving vma->pages dangling, until vma->obj->resv
+ * lock is required.
+ */
+ if (pages != vma->obj->mm.pages) {
+ sg_free_table(pages);
+ kfree(pages);
+ }
+
+ i915_gem_object_unpin_pages(vma->obj);
}
- mutex_unlock(&vma->pages_mutex);
}
-static void vma_put_pages(struct i915_vma *vma)
+I915_SELFTEST_EXPORT void i915_vma_put_pages(struct i915_vma *vma)
{
if (atomic_add_unless(&vma->pages_count, -1, 1))
return;
@@ -868,14 +1227,13 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
u64 size, u64 alignment, u64 flags)
{
struct i915_vma_work *work = NULL;
+ struct dma_fence *moving = NULL;
intel_wakeref_t wakeref = 0;
unsigned int bound;
int err;
-#ifdef CONFIG_PROVE_LOCKING
- if (debug_locks && !WARN_ON(!ww) && vma->resv)
- assert_vma_held(vma);
-#endif
+ assert_vma_held(vma);
+ GEM_BUG_ON(!ww);
BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND);
@@ -886,14 +1244,15 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
if (try_qad_pin(vma, flags & I915_VMA_BIND_MASK))
return 0;
- err = vma_get_pages(vma);
+ err = i915_vma_get_pages(vma);
if (err)
return err;
if (flags & PIN_GLOBAL)
wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
- if (flags & vma->vm->bind_async_flags) {
+ moving = vma->obj ? i915_gem_object_get_moving_fence(vma->obj) : NULL;
+ if (flags & vma->vm->bind_async_flags || moving) {
/* lock VM */
err = i915_vm_lock_objects(vma->vm, ww);
if (err)
@@ -907,6 +1266,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
work->vm = i915_vm_get(vma->vm);
+ dma_fence_work_chain(&work->base, moving);
+
/* Allocate enough page directories to used PTE */
if (vma->vm->allocate_va_range) {
err = i915_vm_alloc_pt_stash(vma->vm,
@@ -981,7 +1342,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
GEM_BUG_ON(!vma->pages);
err = i915_vma_bind(vma,
- vma->obj ? vma->obj->cache_level : 0,
+ vma->obj->cache_level,
flags, work);
if (err)
goto err_remove;
@@ -1011,7 +1372,11 @@ err_fence:
err_rpm:
if (wakeref)
intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
- vma_put_pages(vma);
+
+ if (moving)
+ dma_fence_put(moving);
+
+ i915_vma_put_pages(vma);
return err;
}
@@ -1026,23 +1391,15 @@ static void flush_idle_contexts(struct intel_gt *gt)
intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
}
-int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
- u32 align, unsigned int flags)
+static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
+ u32 align, unsigned int flags)
{
struct i915_address_space *vm = vma->vm;
int err;
- GEM_BUG_ON(!i915_vma_is_ggtt(vma));
-
-#ifdef CONFIG_LOCKDEP
- WARN_ON(!ww && vma->resv && dma_resv_held(vma->resv));
-#endif
-
do {
- if (ww)
- err = i915_vma_pin_ww(vma, ww, 0, align, flags | PIN_GLOBAL);
- else
- err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL);
+ err = i915_vma_pin_ww(vma, ww, 0, align, flags | PIN_GLOBAL);
+
if (err != -ENOSPC) {
if (!err) {
err = i915_vma_wait_for_bind(vma);
@@ -1061,6 +1418,30 @@ int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
} while (1);
}
+int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
+ u32 align, unsigned int flags)
+{
+ struct i915_gem_ww_ctx _ww;
+ int err;
+
+ GEM_BUG_ON(!i915_vma_is_ggtt(vma));
+
+ if (ww)
+ return __i915_ggtt_pin(vma, ww, align, flags);
+
+#ifdef CONFIG_LOCKDEP
+ WARN_ON(dma_resv_held(vma->obj->base.resv));
+#endif
+
+ for_i915_gem_ww(&_ww, err, true) {
+ err = i915_gem_object_lock(vma->obj, &_ww);
+ if (!err)
+ err = __i915_ggtt_pin(vma, &_ww, align, flags);
+ }
+
+ return err;
+}
+
static void __vma_close(struct i915_vma *vma, struct intel_gt *gt)
{
/*
@@ -1114,6 +1495,7 @@ void i915_vma_reopen(struct i915_vma *vma)
void i915_vma_release(struct kref *ref)
{
struct i915_vma *vma = container_of(ref, typeof(*vma), ref);
+ struct drm_i915_gem_object *obj = vma->obj;
if (drm_mm_node_allocated(&vma->node)) {
mutex_lock(&vma->vm->mutex);
@@ -1124,15 +1506,11 @@ void i915_vma_release(struct kref *ref)
}
GEM_BUG_ON(i915_vma_is_active(vma));
- if (vma->obj) {
- struct drm_i915_gem_object *obj = vma->obj;
-
- spin_lock(&obj->vma.lock);
- list_del(&vma->obj_link);
- if (!RB_EMPTY_NODE(&vma->obj_node))
- rb_erase(&vma->obj_node, &obj->vma.tree);
- spin_unlock(&obj->vma.lock);
- }
+ spin_lock(&obj->vma.lock);
+ list_del(&vma->obj_link);
+ if (!RB_EMPTY_NODE(&vma->obj_node))
+ rb_erase(&vma->obj_node, &obj->vma.tree);
+ spin_unlock(&obj->vma.lock);
__i915_vma_remove_closed(vma);
i915_vm_put(vma->vm);
@@ -1218,7 +1596,7 @@ __i915_request_await_bind(struct i915_request *rq, struct i915_vma *vma)
return __i915_request_await_exclusive(rq, &vma->active);
}
-int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
+static int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
{
int err;
@@ -1257,19 +1635,19 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
}
if (fence) {
- dma_resv_add_excl_fence(vma->resv, fence);
+ dma_resv_add_excl_fence(vma->obj->base.resv, fence);
obj->write_domain = I915_GEM_DOMAIN_RENDER;
obj->read_domains = 0;
}
} else {
if (!(flags & __EXEC_OBJECT_NO_RESERVE)) {
- err = dma_resv_reserve_shared(vma->resv, 1);
+ err = dma_resv_reserve_shared(vma->obj->base.resv, 1);
if (unlikely(err))
return err;
}
if (fence) {
- dma_resv_add_shared_fence(vma->resv, fence);
+ dma_resv_add_shared_fence(vma->obj->base.resv, fence);
obj->write_domain = 0;
}
}
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 648dbe744c96..32719431b3df 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -55,8 +55,6 @@ static inline bool i915_vma_is_active(const struct i915_vma *vma)
/* do not reserve memory to prevent deadlocks */
#define __EXEC_OBJECT_NO_RESERVE BIT(31)
-int __must_check __i915_vma_move_to_active(struct i915_vma *vma,
- struct i915_request *rq);
int __must_check _i915_vma_move_to_active(struct i915_vma *vma,
struct i915_request *rq,
struct dma_fence *fence,
@@ -234,16 +232,16 @@ static inline void __i915_vma_put(struct i915_vma *vma)
kref_put(&vma->ref, i915_vma_release);
}
-#define assert_vma_held(vma) dma_resv_assert_held((vma)->resv)
+#define assert_vma_held(vma) dma_resv_assert_held((vma)->obj->base.resv)
static inline void i915_vma_lock(struct i915_vma *vma)
{
- dma_resv_lock(vma->resv, NULL);
+ dma_resv_lock(vma->obj->base.resv, NULL);
}
static inline void i915_vma_unlock(struct i915_vma *vma)
{
- dma_resv_unlock(vma->resv);
+ dma_resv_unlock(vma->obj->base.resv);
}
int __must_check
@@ -418,9 +416,6 @@ static inline void i915_vma_clear_scanout(struct i915_vma *vma)
list_for_each_entry(V, &(OBJ)->vma.list, obj_link) \
for_each_until(!i915_vma_is_ggtt(V))
-struct i915_vma *i915_vma_alloc(void);
-void i915_vma_free(struct i915_vma *vma);
-
struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma);
void i915_vma_make_shrinkable(struct i915_vma *vma);
void i915_vma_make_purgeable(struct i915_vma *vma);
@@ -436,4 +431,7 @@ static inline int i915_vma_sync(struct i915_vma *vma)
void i915_vma_module_exit(void);
int i915_vma_module_init(void);
+I915_SELFTEST_DECLARE(int i915_vma_get_pages(struct i915_vma *vma));
+I915_SELFTEST_DECLARE(void i915_vma_put_pages(struct i915_vma *vma));
+
#endif
diff --git a/drivers/gpu/drm/i915/i915_vma_snapshot.c b/drivers/gpu/drm/i915/i915_vma_snapshot.c
new file mode 100644
index 000000000000..2949ceea9884
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_vma_snapshot.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include "i915_vma_snapshot.h"
+#include "i915_vma_types.h"
+#include "i915_vma.h"
+
+/**
+ * i915_vma_snapshot_init - Initialize a struct i915_vma_snapshot from
+ * a struct i915_vma.
+ * @vsnap: The i915_vma_snapshot to init.
+ * @vma: A struct i915_vma used to initialize @vsnap.
+ * @name: Name associated with the snapshot. The character pointer needs to
+ * stay alive over the lifitime of the shapsot
+ */
+void i915_vma_snapshot_init(struct i915_vma_snapshot *vsnap,
+ struct i915_vma *vma,
+ const char *name)
+{
+ if (!i915_vma_is_pinned(vma))
+ assert_object_held(vma->obj);
+
+ vsnap->name = name;
+ vsnap->size = vma->size;
+ vsnap->obj_size = vma->obj->base.size;
+ vsnap->gtt_offset = vma->node.start;
+ vsnap->gtt_size = vma->node.size;
+ vsnap->page_sizes = vma->page_sizes.gtt;
+ vsnap->pages = vma->pages;
+ vsnap->pages_rsgt = NULL;
+ vsnap->mr = NULL;
+ if (vma->obj->mm.rsgt)
+ vsnap->pages_rsgt = i915_refct_sgt_get(vma->obj->mm.rsgt);
+ vsnap->mr = vma->obj->mm.region;
+ kref_init(&vsnap->kref);
+ vsnap->vma_resource = &vma->active;
+ vsnap->onstack = false;
+ vsnap->present = true;
+}
+
+/**
+ * i915_vma_snapshot_init_onstack - Initialize a struct i915_vma_snapshot from
+ * a struct i915_vma, but avoid kfreeing it on last put.
+ * @vsnap: The i915_vma_snapshot to init.
+ * @vma: A struct i915_vma used to initialize @vsnap.
+ * @name: Name associated with the snapshot. The character pointer needs to
+ * stay alive over the lifitime of the shapsot
+ */
+void i915_vma_snapshot_init_onstack(struct i915_vma_snapshot *vsnap,
+ struct i915_vma *vma,
+ const char *name)
+{
+ i915_vma_snapshot_init(vsnap, vma, name);
+ vsnap->onstack = true;
+}
+
+static void vma_snapshot_release(struct kref *ref)
+{
+ struct i915_vma_snapshot *vsnap =
+ container_of(ref, typeof(*vsnap), kref);
+
+ vsnap->present = false;
+ if (vsnap->pages_rsgt)
+ i915_refct_sgt_put(vsnap->pages_rsgt);
+ if (!vsnap->onstack)
+ kfree(vsnap);
+}
+
+/**
+ * i915_vma_snapshot_put - Put an i915_vma_snapshot pointer reference
+ * @vsnap: The pointer reference
+ */
+void i915_vma_snapshot_put(struct i915_vma_snapshot *vsnap)
+{
+ kref_put(&vsnap->kref, vma_snapshot_release);
+}
+
+/**
+ * i915_vma_snapshot_put_onstack - Put an onstcak i915_vma_snapshot pointer
+ * reference and varify that the structure is released
+ * @vsnap: The pointer reference
+ *
+ * This function is intended to be paired with a i915_vma_init_onstack()
+ * and should be called before exiting the scope that declared or
+ * freeing the structure that embedded @vsnap to verify that all references
+ * have been released.
+ */
+void i915_vma_snapshot_put_onstack(struct i915_vma_snapshot *vsnap)
+{
+ if (!kref_put(&vsnap->kref, vma_snapshot_release))
+ GEM_BUG_ON(1);
+}
+
+/**
+ * i915_vma_snapshot_resource_pin - Temporarily block the memory the
+ * vma snapshot is pointing to from being released.
+ * @vsnap: The vma snapshot.
+ * @lockdep_cookie: Pointer to bool needed for lockdep support. This needs
+ * to be passed to the paired i915_vma_snapshot_resource_unpin.
+ *
+ * This function will temporarily try to hold up a fence or similar structure
+ * and will therefore enter a fence signaling critical section.
+ *
+ * Return: true if we succeeded in blocking the memory from being released,
+ * false otherwise.
+ */
+bool i915_vma_snapshot_resource_pin(struct i915_vma_snapshot *vsnap,
+ bool *lockdep_cookie)
+{
+ bool pinned = i915_active_acquire_if_busy(vsnap->vma_resource);
+
+ if (pinned)
+ *lockdep_cookie = dma_fence_begin_signalling();
+
+ return pinned;
+}
+
+/**
+ * i915_vma_snapshot_resource_unpin - Unblock vma snapshot memory from
+ * being released.
+ * @vsnap: The vma snapshot.
+ * @lockdep_cookie: Cookie returned from matching i915_vma_resource_pin().
+ *
+ * Might leave a fence signalling critical section and signal a fence.
+ */
+void i915_vma_snapshot_resource_unpin(struct i915_vma_snapshot *vsnap,
+ bool lockdep_cookie)
+{
+ dma_fence_end_signalling(lockdep_cookie);
+
+ return i915_active_release(vsnap->vma_resource);
+}
diff --git a/drivers/gpu/drm/i915/i915_vma_snapshot.h b/drivers/gpu/drm/i915/i915_vma_snapshot.h
new file mode 100644
index 000000000000..940581df4622
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_vma_snapshot.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+#ifndef _I915_VMA_SNAPSHOT_H_
+#define _I915_VMA_SNAPSHOT_H_
+
+#include <linux/kref.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+struct i915_active;
+struct i915_refct_sgt;
+struct i915_vma;
+struct intel_memory_region;
+struct sg_table;
+
+/**
+ * DOC: Simple utilities for snapshotting GPU vma metadata, later used for
+ * error capture. Vi use a separate header for this to avoid issues due to
+ * recursive header includes.
+ */
+
+/**
+ * struct i915_vma_snapshot - Snapshot of vma metadata.
+ * @size: The vma size in bytes.
+ * @obj_size: The size of the underlying object in bytes.
+ * @gtt_offset: The gtt offset the vma is bound to.
+ * @gtt_size: The size in bytes allocated for the vma in the GTT.
+ * @pages: The struct sg_table pointing to the pages bound.
+ * @pages_rsgt: The refcounted sg_table holding the reference for @pages if any.
+ * @mr: The memory region pointed for the pages bound.
+ * @kref: Reference for this structure.
+ * @vma_resource: FIXME: A means to keep the unbind fence from signaling.
+ * Temporarily while we have only sync unbinds, and still use the vma
+ * active, we use that. With async unbinding we need a signaling refcount
+ * for the unbind fence.
+ * @page_sizes: The vma GTT page sizes information.
+ * @onstack: Whether the structure shouldn't be freed on final put.
+ * @present: Whether the structure is present and initialized.
+ */
+struct i915_vma_snapshot {
+ const char *name;
+ size_t size;
+ size_t obj_size;
+ size_t gtt_offset;
+ size_t gtt_size;
+ struct sg_table *pages;
+ struct i915_refct_sgt *pages_rsgt;
+ struct intel_memory_region *mr;
+ struct kref kref;
+ struct i915_active *vma_resource;
+ u32 page_sizes;
+ bool onstack:1;
+ bool present:1;
+};
+
+void i915_vma_snapshot_init(struct i915_vma_snapshot *vsnap,
+ struct i915_vma *vma,
+ const char *name);
+
+void i915_vma_snapshot_init_onstack(struct i915_vma_snapshot *vsnap,
+ struct i915_vma *vma,
+ const char *name);
+
+void i915_vma_snapshot_put(struct i915_vma_snapshot *vsnap);
+
+void i915_vma_snapshot_put_onstack(struct i915_vma_snapshot *vsnap);
+
+bool i915_vma_snapshot_resource_pin(struct i915_vma_snapshot *vsnap,
+ bool *lockdep_cookie);
+
+void i915_vma_snapshot_resource_unpin(struct i915_vma_snapshot *vsnap,
+ bool lockdep_cookie);
+
+/**
+ * i915_vma_snapshot_alloc - Allocate a struct i915_vma_snapshot
+ * @gfp: Allocation mode.
+ *
+ * Return: A pointer to a struct i915_vma_snapshot if successful.
+ * NULL otherwise.
+ */
+static inline struct i915_vma_snapshot *i915_vma_snapshot_alloc(gfp_t gfp)
+{
+ return kmalloc(sizeof(struct i915_vma_snapshot), gfp);
+}
+
+/**
+ * i915_vma_snapshot_get - Take a reference on a struct i915_vma_snapshot
+ *
+ * Return: A pointer to a struct i915_vma_snapshot.
+ */
+static inline struct i915_vma_snapshot *
+i915_vma_snapshot_get(struct i915_vma_snapshot *vsnap)
+{
+ kref_get(&vsnap->kref);
+ return vsnap;
+}
+
+/**
+ * i915_vma_snapshot_present - Whether a struct i915_vma_snapshot is
+ * present and initialized.
+ *
+ * Return: true if present and initialized; false otherwise.
+ */
+static inline bool
+i915_vma_snapshot_present(const struct i915_vma_snapshot *vsnap)
+{
+ return vsnap && vsnap->present;
+}
+
+#endif
diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
index 4ee6e54799f4..ca575e129ced 100644
--- a/drivers/gpu/drm/i915/i915_vma_types.h
+++ b/drivers/gpu/drm/i915/i915_vma_types.h
@@ -187,7 +187,6 @@ struct i915_vma {
const struct i915_vma_ops *ops;
struct drm_i915_gem_object *obj;
- struct dma_resv *resv; /** Alias of obj->resv */
struct sg_table *pages;
void __iomem *iomap;
@@ -271,7 +270,6 @@ struct i915_vma {
#define I915_VMA_PAGES_BIAS 24
#define I915_VMA_PAGES_ACTIVE (BIT(24) | 1)
atomic_t pages_count; /* number of active binds to the pages */
- struct mutex pages_mutex; /* protect acquire/release of backing pages */
/**
* Support different GGTT views into the same object.
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index 24e05f1ef486..93b251b25aba 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -83,33 +83,26 @@ const char *intel_platform_name(enum intel_platform platform)
return platform_names[platform];
}
-static const char *iommu_name(void)
-{
- const char *msg = "n/a";
-
-#ifdef CONFIG_INTEL_IOMMU
- msg = enableddisabled(intel_iommu_gfx_mapped);
-#endif
-
- return msg;
-}
-
void intel_device_info_print_static(const struct intel_device_info *info,
struct drm_printer *p)
{
- if (info->graphics_rel)
- drm_printf(p, "graphics version: %u.%02u\n", info->graphics_ver, info->graphics_rel);
+ if (info->graphics.rel)
+ drm_printf(p, "graphics version: %u.%02u\n", info->graphics.ver,
+ info->graphics.rel);
+ else
+ drm_printf(p, "graphics version: %u\n", info->graphics.ver);
+
+ if (info->media.rel)
+ drm_printf(p, "media version: %u.%02u\n", info->media.ver, info->media.rel);
else
- drm_printf(p, "graphics version: %u\n", info->graphics_ver);
+ drm_printf(p, "media version: %u\n", info->media.ver);
- if (info->media_rel)
- drm_printf(p, "media version: %u.%02u\n", info->media_ver, info->media_rel);
+ if (info->display.rel)
+ drm_printf(p, "display version: %u.%02u\n", info->display.ver, info->display.rel);
else
- drm_printf(p, "media version: %u\n", info->media_ver);
+ drm_printf(p, "display version: %u\n", info->display.ver);
- drm_printf(p, "display version: %u\n", info->display.ver);
drm_printf(p, "gt: %d\n", info->gt);
- drm_printf(p, "iommu: %s\n", iommu_name());
drm_printf(p, "memory-regions: %x\n", info->memory_regions);
drm_printf(p, "page-sizes: %x\n", info->page_sizes);
drm_printf(p, "platform: %s\n", intel_platform_name(info->platform));
@@ -385,7 +378,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
info->display.has_dsc = 0;
}
- if (GRAPHICS_VER(dev_priv) == 6 && intel_vtd_active()) {
+ if (GRAPHICS_VER(dev_priv) == 6 && intel_vtd_active(dev_priv)) {
drm_info(&dev_priv->drm,
"Disabling ppGTT for VT-d support\n");
info->ppgtt_type = INTEL_PPGTT_NONE;
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 2a4e32b4ebfd..3699b1c539ea 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -129,6 +129,7 @@ enum intel_ppgtt_type {
func(is_dgfx); \
/* Keep has_* in alphabetical order */ \
func(has_64bit_reloc); \
+ func(has_64k_pages); \
func(gpu_reset_clobbers_display); \
func(has_reset_engine); \
func(has_global_mocs); \
@@ -171,11 +172,14 @@ enum intel_ppgtt_type {
func(overlay_needs_physical); \
func(supports_tv);
+struct ip_version {
+ u8 ver;
+ u8 rel;
+};
+
struct intel_device_info {
- u8 graphics_ver;
- u8 graphics_rel;
- u8 media_ver;
- u8 media_rel;
+ struct ip_version graphics;
+ struct ip_version media;
intel_engine_mask_t platform_engine_mask; /* Engines supported by the HW */
@@ -200,6 +204,7 @@ struct intel_device_info {
struct {
u8 ver;
+ u8 rel;
u8 pipe_mask;
u8 cpu_transcoder_mask;
diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c
index 4e70c1a9ef2e..cf6e98962d82 100644
--- a/drivers/gpu/drm/i915/intel_gvt.c
+++ b/drivers/gpu/drm/i915/intel_gvt.c
@@ -109,7 +109,7 @@ int intel_gvt_init(struct drm_i915_private *dev_priv)
return 0;
}
- if (intel_uc_wants_guc_submission(&dev_priv->gt.uc)) {
+ if (intel_uc_wants_guc_submission(&to_gt(dev_priv)->uc)) {
drm_err(&dev_priv->drm,
"i915 GVT-g loading failed due to Graphics virtualization is not yet supported with GuC submission\n");
return -EIO;
diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c
index e7f7e6627750..c70d7e286a51 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/intel_memory_region.c
@@ -3,6 +3,8 @@
* Copyright © 2019 Intel Corporation
*/
+#include <linux/prandom.h>
+
#include "intel_memory_region.h"
#include "i915_drv.h"
#include "i915_ttm_buddy_manager.h"
@@ -29,6 +31,110 @@ static const struct {
},
};
+static int __iopagetest(struct intel_memory_region *mem,
+ u8 __iomem *va, int pagesize,
+ u8 value, resource_size_t offset,
+ const void *caller)
+{
+ int byte = prandom_u32_max(pagesize);
+ u8 result[3];
+
+ memset_io(va, value, pagesize); /* or GPF! */
+ wmb();
+
+ result[0] = ioread8(va);
+ result[1] = ioread8(va + byte);
+ result[2] = ioread8(va + pagesize - 1);
+ if (memchr_inv(result, value, sizeof(result))) {
+ dev_err(mem->i915->drm.dev,
+ "Failed to read back from memory region:%pR at [%pa + %pa] for %ps; wrote %x, read (%x, %x, %x)\n",
+ &mem->region, &mem->io_start, &offset, caller,
+ value, result[0], result[1], result[2]);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int iopagetest(struct intel_memory_region *mem,
+ resource_size_t offset,
+ const void *caller)
+{
+ const u8 val[] = { 0x0, 0xa5, 0xc3, 0xf0 };
+ void __iomem *va;
+ int err;
+ int i;
+
+ va = ioremap_wc(mem->io_start + offset, PAGE_SIZE);
+ if (!va) {
+ dev_err(mem->i915->drm.dev,
+ "Failed to ioremap memory region [%pa + %pa] for %ps\n",
+ &mem->io_start, &offset, caller);
+ return -EFAULT;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(val); i++) {
+ err = __iopagetest(mem, va, PAGE_SIZE, val[i], offset, caller);
+ if (err)
+ break;
+
+ err = __iopagetest(mem, va, PAGE_SIZE, ~val[i], offset, caller);
+ if (err)
+ break;
+ }
+
+ iounmap(va);
+ return err;
+}
+
+static resource_size_t random_page(resource_size_t last)
+{
+ /* Limited to low 44b (16TiB), but should suffice for a spot check */
+ return prandom_u32_max(last >> PAGE_SHIFT) << PAGE_SHIFT;
+}
+
+static int iomemtest(struct intel_memory_region *mem,
+ bool test_all,
+ const void *caller)
+{
+ resource_size_t last = resource_size(&mem->region) - PAGE_SIZE;
+ resource_size_t page;
+ int err;
+
+ /*
+ * Quick test to check read/write access to the iomap (backing store).
+ *
+ * Write a byte, read it back. If the iomapping fails, we expect
+ * a GPF preventing further execution. If the backing store does not
+ * exist, the read back will return garbage. We check a couple of pages,
+ * the first and last of the specified region to confirm the backing
+ * store + iomap does cover the entire memory region; and we check
+ * a random offset within as a quick spot check for bad memory.
+ */
+
+ if (test_all) {
+ for (page = 0; page <= last; page += PAGE_SIZE) {
+ err = iopagetest(mem, page, caller);
+ if (err)
+ return err;
+ }
+ } else {
+ err = iopagetest(mem, 0, caller);
+ if (err)
+ return err;
+
+ err = iopagetest(mem, last, caller);
+ if (err)
+ return err;
+
+ err = iopagetest(mem, random_page(last), caller);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
struct intel_memory_region *
intel_memory_region_lookup(struct drm_i915_private *i915,
u16 class, u16 instance)
@@ -90,6 +196,21 @@ void intel_memory_region_debug(struct intel_memory_region *mr,
&mr->total, &mr->avail);
}
+static int intel_memory_region_memtest(struct intel_memory_region *mem,
+ void *caller)
+{
+ struct drm_i915_private *i915 = mem->i915;
+ int err = 0;
+
+ if (!mem->io_start)
+ return 0;
+
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) || i915->params.memtest)
+ err = iomemtest(mem, i915->params.memtest, caller);
+
+ return err;
+}
+
struct intel_memory_region *
intel_memory_region_create(struct drm_i915_private *i915,
resource_size_t start,
@@ -126,9 +247,15 @@ intel_memory_region_create(struct drm_i915_private *i915,
goto err_free;
}
- kref_init(&mem->kref);
+ err = intel_memory_region_memtest(mem, (void *)_RET_IP_);
+ if (err)
+ goto err_release;
+
return mem;
+err_release:
+ if (mem->ops->release)
+ mem->ops->release(mem);
err_free:
kfree(mem);
return ERR_PTR(err);
@@ -144,28 +271,17 @@ void intel_memory_region_set_name(struct intel_memory_region *mem,
va_end(ap);
}
-static void __intel_memory_region_destroy(struct kref *kref)
+void intel_memory_region_destroy(struct intel_memory_region *mem)
{
- struct intel_memory_region *mem =
- container_of(kref, typeof(*mem), kref);
+ int ret = 0;
if (mem->ops->release)
- mem->ops->release(mem);
+ ret = mem->ops->release(mem);
+ GEM_WARN_ON(!list_empty_careful(&mem->objects.list));
mutex_destroy(&mem->objects.lock);
- kfree(mem);
-}
-
-struct intel_memory_region *
-intel_memory_region_get(struct intel_memory_region *mem)
-{
- kref_get(&mem->kref);
- return mem;
-}
-
-void intel_memory_region_put(struct intel_memory_region *mem)
-{
- kref_put(&mem->kref, __intel_memory_region_destroy);
+ if (!ret)
+ kfree(mem);
}
/* Global memory region registration -- only slight layer inversions! */
@@ -234,7 +350,7 @@ void intel_memory_regions_driver_release(struct drm_i915_private *i915)
fetch_and_zero(&i915->mm.regions[i]);
if (region)
- intel_memory_region_put(region);
+ intel_memory_region_destroy(region);
}
}
diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h
index 3feae3353d33..5625c9c38993 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.h
+++ b/drivers/gpu/drm/i915/intel_memory_region.h
@@ -6,7 +6,6 @@
#ifndef __INTEL_MEMORY_REGION_H__
#define __INTEL_MEMORY_REGION_H__
-#include <linux/kref.h>
#include <linux/ioport.h>
#include <linux/mutex.h>
#include <linux/io-mapping.h>
@@ -51,7 +50,7 @@ struct intel_memory_region_ops {
unsigned int flags;
int (*init)(struct intel_memory_region *mem);
- void (*release)(struct intel_memory_region *mem);
+ int (*release)(struct intel_memory_region *mem);
int (*init_object)(struct intel_memory_region *mem,
struct drm_i915_gem_object *obj,
@@ -71,8 +70,6 @@ struct intel_memory_region {
/* For fake LMEM */
struct drm_mm_node fake_mappable;
- struct kref kref;
-
resource_size_t io_start;
resource_size_t min_page_size;
resource_size_t total;
@@ -110,9 +107,7 @@ intel_memory_region_create(struct drm_i915_private *i915,
u16 instance,
const struct intel_memory_region_ops *ops);
-struct intel_memory_region *
-intel_memory_region_get(struct intel_memory_region *mem);
-void intel_memory_region_put(struct intel_memory_region *mem);
+void intel_memory_region_destroy(struct intel_memory_region *mem);
int intel_memory_regions_hw_probe(struct drm_i915_private *i915);
void intel_memory_regions_driver_release(struct drm_i915_private *i915);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 9e4c4240c448..46b21680e601 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -7502,11 +7502,34 @@ static void dg1_init_clock_gating(struct drm_i915_private *dev_priv)
gen12lp_init_clock_gating(dev_priv);
/* Wa_1409836686:dg1[a0] */
- if (IS_DG1_GT_STEP(dev_priv, STEP_A0, STEP_B0))
+ if (IS_DG1_GRAPHICS_STEP(dev_priv, STEP_A0, STEP_B0))
intel_uncore_write(&dev_priv->uncore, GEN9_CLKGATE_DIS_3, intel_uncore_read(&dev_priv->uncore, GEN9_CLKGATE_DIS_3) |
DPT_GATING_DIS);
}
+static void xehpsdv_init_clock_gating(struct drm_i915_private *dev_priv)
+{
+ /* Wa_22010146351:xehpsdv */
+ if (IS_XEHPSDV_GRAPHICS_STEP(dev_priv, STEP_A0, STEP_B0))
+ intel_uncore_rmw(&dev_priv->uncore, XEHP_CLOCK_GATE_DIS, 0, SGR_DIS);
+}
+
+static void dg2_init_clock_gating(struct drm_i915_private *i915)
+{
+ /* Wa_22010954014:dg2_g10 */
+ if (IS_DG2_G10(i915))
+ intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0,
+ SGSI_SIDECLK_DIS);
+
+ /*
+ * Wa_14010733611:dg2_g10
+ * Wa_22010146351:dg2_g10
+ */
+ if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0))
+ intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0,
+ SGR_DIS | SGGI_DIS);
+}
+
static void cnp_init_clock_gating(struct drm_i915_private *dev_priv)
{
if (!HAS_PCH_CNP(dev_priv))
@@ -7551,12 +7574,12 @@ static void kbl_init_clock_gating(struct drm_i915_private *dev_priv)
FBC_LLC_FULLY_OPEN);
/* WaDisableSDEUnitClockGating:kbl */
- if (IS_KBL_GT_STEP(dev_priv, 0, STEP_C0))
+ if (IS_KBL_GRAPHICS_STEP(dev_priv, 0, STEP_C0))
intel_uncore_write(&dev_priv->uncore, GEN8_UCGCTL6, intel_uncore_read(&dev_priv->uncore, GEN8_UCGCTL6) |
GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
/* WaDisableGamClockGating:kbl */
- if (IS_KBL_GT_STEP(dev_priv, 0, STEP_C0))
+ if (IS_KBL_GRAPHICS_STEP(dev_priv, 0, STEP_C0))
intel_uncore_write(&dev_priv->uncore, GEN6_UCGCTL1, intel_uncore_read(&dev_priv->uncore, GEN6_UCGCTL1) |
GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
@@ -7923,6 +7946,8 @@ static const struct drm_i915_clock_gating_funcs platform##_clock_gating_funcs =
.init_clock_gating = platform##_init_clock_gating, \
}
+CG_FUNCS(dg2);
+CG_FUNCS(xehpsdv);
CG_FUNCS(adlp);
CG_FUNCS(dg1);
CG_FUNCS(gen12lp);
@@ -7959,7 +7984,11 @@ CG_FUNCS(nop);
*/
void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
{
- if (IS_ALDERLAKE_P(dev_priv))
+ if (IS_DG2(dev_priv))
+ dev_priv->clock_gating_funcs = &dg2_clock_gating_funcs;
+ else if (IS_XEHPSDV(dev_priv))
+ dev_priv->clock_gating_funcs = &xehpsdv_clock_gating_funcs;
+ else if (IS_ALDERLAKE_P(dev_priv))
dev_priv->clock_gating_funcs = &adlp_clock_gating_funcs;
else if (IS_DG1(dev_priv))
dev_priv->clock_gating_funcs = &dg1_clock_gating_funcs;
diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c
index 98c7339bf8ba..f2b888c16958 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -104,19 +104,50 @@ int intel_region_ttm_init(struct intel_memory_region *mem)
* memory region, and if it was registered with the TTM device,
* removes that registration.
*/
-void intel_region_ttm_fini(struct intel_memory_region *mem)
+int intel_region_ttm_fini(struct intel_memory_region *mem)
{
- int ret;
+ struct ttm_resource_manager *man = mem->region_private;
+ int ret = -EBUSY;
+ int count;
+
+ /*
+ * Put the region's move fences. This releases requests that
+ * may hold on to contexts and vms that may hold on to buffer
+ * objects placed in this region.
+ */
+ if (man)
+ ttm_resource_manager_cleanup(man);
+
+ /* Flush objects from region. */
+ for (count = 0; count < 10; ++count) {
+ i915_gem_flush_free_objects(mem->i915);
+
+ mutex_lock(&mem->objects.lock);
+ if (list_empty(&mem->objects.list))
+ ret = 0;
+ mutex_unlock(&mem->objects.lock);
+ if (!ret)
+ break;
+
+ msleep(20);
+ flush_delayed_work(&mem->i915->bdev.wq);
+ }
+
+ /* If we leaked objects, Don't free the region causing use after free */
+ if (ret || !man)
+ return ret;
ret = i915_ttm_buddy_man_fini(&mem->i915->bdev,
intel_region_to_ttm_type(mem));
GEM_WARN_ON(ret);
mem->region_private = NULL;
+
+ return ret;
}
/**
- * intel_region_ttm_resource_to_st - Convert an opaque TTM resource manager resource
- * to an sg_table.
+ * intel_region_ttm_resource_to_rsgt -
+ * Convert an opaque TTM resource manager resource to a refcounted sg_table.
* @mem: The memory region.
* @res: The resource manager resource obtained from the TTM resource manager.
*
@@ -126,17 +157,18 @@ void intel_region_ttm_fini(struct intel_memory_region *mem)
*
* Return: A malloced sg_table on success, an error pointer on failure.
*/
-struct sg_table *intel_region_ttm_resource_to_st(struct intel_memory_region *mem,
- struct ttm_resource *res)
+struct i915_refct_sgt *
+intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem,
+ struct ttm_resource *res)
{
if (mem->is_range_manager) {
struct ttm_range_mgr_node *range_node =
to_ttm_range_mgr_node(res);
- return i915_sg_from_mm_node(&range_node->mm_nodes[0],
- mem->region.start);
+ return i915_rsgt_from_mm_node(&range_node->mm_nodes[0],
+ mem->region.start);
} else {
- return i915_sg_from_buddy_resource(res, mem->region.start);
+ return i915_rsgt_from_buddy_resource(res, mem->region.start);
}
}
diff --git a/drivers/gpu/drm/i915/intel_region_ttm.h b/drivers/gpu/drm/i915/intel_region_ttm.h
index 6f44075920f2..fdee5e7bd46c 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.h
+++ b/drivers/gpu/drm/i915/intel_region_ttm.h
@@ -20,10 +20,11 @@ void intel_region_ttm_device_fini(struct drm_i915_private *dev_priv);
int intel_region_ttm_init(struct intel_memory_region *mem);
-void intel_region_ttm_fini(struct intel_memory_region *mem);
+int intel_region_ttm_fini(struct intel_memory_region *mem);
-struct sg_table *intel_region_ttm_resource_to_st(struct intel_memory_region *mem,
- struct ttm_resource *res);
+struct i915_refct_sgt *
+intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem,
+ struct ttm_resource *res);
void intel_region_ttm_resource_free(struct intel_memory_region *mem,
struct ttm_resource *res);
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 22dab36afcb6..53f1ccb78849 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -68,6 +68,9 @@ static noinline depot_stack_handle_t __save_depot_stack(void)
static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
{
spin_lock_init(&rpm->debug.lock);
+
+ if (rpm->available)
+ stack_depot_init();
}
static noinline depot_stack_handle_t
diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c
index 6cf967631395..a4b16b9e2e55 100644
--- a/drivers/gpu/drm/i915/intel_step.c
+++ b/drivers/gpu/drm/i915/intel_step.c
@@ -23,7 +23,8 @@
* use a macro to define these to make it easier to identify the platforms
* where the two steppings can deviate.
*/
-#define COMMON_STEP(x) .gt_step = STEP_##x, .display_step = STEP_##x
+#define COMMON_STEP(x) .graphics_step = STEP_##x, .display_step = STEP_##x, .media_step = STEP_##x
+#define COMMON_GT_MEDIA_STEP(x) .graphics_step = STEP_##x, .media_step = STEP_##x
static const struct intel_step_info skl_revids[] = {
[0x6] = { COMMON_STEP(G0) },
@@ -33,13 +34,13 @@ static const struct intel_step_info skl_revids[] = {
};
static const struct intel_step_info kbl_revids[] = {
- [1] = { .gt_step = STEP_B0, .display_step = STEP_B0 },
- [2] = { .gt_step = STEP_C0, .display_step = STEP_B0 },
- [3] = { .gt_step = STEP_D0, .display_step = STEP_B0 },
- [4] = { .gt_step = STEP_F0, .display_step = STEP_C0 },
- [5] = { .gt_step = STEP_C0, .display_step = STEP_B1 },
- [6] = { .gt_step = STEP_D1, .display_step = STEP_B1 },
- [7] = { .gt_step = STEP_G0, .display_step = STEP_C0 },
+ [1] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 },
+ [2] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_B0 },
+ [3] = { COMMON_GT_MEDIA_STEP(D0), .display_step = STEP_B0 },
+ [4] = { COMMON_GT_MEDIA_STEP(F0), .display_step = STEP_C0 },
+ [5] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_B1 },
+ [6] = { COMMON_GT_MEDIA_STEP(D1), .display_step = STEP_B1 },
+ [7] = { COMMON_GT_MEDIA_STEP(G0), .display_step = STEP_C0 },
};
static const struct intel_step_info bxt_revids[] = {
@@ -63,16 +64,16 @@ static const struct intel_step_info jsl_ehl_revids[] = {
};
static const struct intel_step_info tgl_uy_revids[] = {
- [0] = { .gt_step = STEP_A0, .display_step = STEP_A0 },
- [1] = { .gt_step = STEP_B0, .display_step = STEP_C0 },
- [2] = { .gt_step = STEP_B1, .display_step = STEP_C0 },
- [3] = { .gt_step = STEP_C0, .display_step = STEP_D0 },
+ [0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 },
+ [1] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_C0 },
+ [2] = { COMMON_GT_MEDIA_STEP(B1), .display_step = STEP_C0 },
+ [3] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_D0 },
};
/* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */
static const struct intel_step_info tgl_revids[] = {
- [0] = { .gt_step = STEP_A0, .display_step = STEP_B0 },
- [1] = { .gt_step = STEP_B0, .display_step = STEP_D0 },
+ [0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_B0 },
+ [1] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_D0 },
};
static const struct intel_step_info rkl_revids[] = {
@@ -87,38 +88,38 @@ static const struct intel_step_info dg1_revids[] = {
};
static const struct intel_step_info adls_revids[] = {
- [0x0] = { .gt_step = STEP_A0, .display_step = STEP_A0 },
- [0x1] = { .gt_step = STEP_A0, .display_step = STEP_A2 },
- [0x4] = { .gt_step = STEP_B0, .display_step = STEP_B0 },
- [0x8] = { .gt_step = STEP_C0, .display_step = STEP_B0 },
- [0xC] = { .gt_step = STEP_D0, .display_step = STEP_C0 },
+ [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 },
+ [0x1] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A2 },
+ [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 },
+ [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_B0 },
+ [0xC] = { COMMON_GT_MEDIA_STEP(D0), .display_step = STEP_C0 },
};
static const struct intel_step_info adlp_revids[] = {
- [0x0] = { .gt_step = STEP_A0, .display_step = STEP_A0 },
- [0x4] = { .gt_step = STEP_B0, .display_step = STEP_B0 },
- [0x8] = { .gt_step = STEP_C0, .display_step = STEP_C0 },
- [0xC] = { .gt_step = STEP_C0, .display_step = STEP_D0 },
+ [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 },
+ [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 },
+ [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_C0 },
+ [0xC] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_D0 },
};
static const struct intel_step_info xehpsdv_revids[] = {
- [0x0] = { .gt_step = STEP_A0 },
- [0x1] = { .gt_step = STEP_A1 },
- [0x4] = { .gt_step = STEP_B0 },
- [0x8] = { .gt_step = STEP_C0 },
+ [0x0] = { COMMON_GT_MEDIA_STEP(A0) },
+ [0x1] = { COMMON_GT_MEDIA_STEP(A1) },
+ [0x4] = { COMMON_GT_MEDIA_STEP(B0) },
+ [0x8] = { COMMON_GT_MEDIA_STEP(C0) },
};
static const struct intel_step_info dg2_g10_revid_step_tbl[] = {
- [0x0] = { .gt_step = STEP_A0, .display_step = STEP_A0 },
- [0x1] = { .gt_step = STEP_A1, .display_step = STEP_A0 },
- [0x4] = { .gt_step = STEP_B0, .display_step = STEP_B0 },
- [0x8] = { .gt_step = STEP_C0, .display_step = STEP_C0 },
+ [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 },
+ [0x1] = { COMMON_GT_MEDIA_STEP(A1), .display_step = STEP_A0 },
+ [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 },
+ [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_C0 },
};
static const struct intel_step_info dg2_g11_revid_step_tbl[] = {
- [0x0] = { .gt_step = STEP_A0, .display_step = STEP_B0 },
- [0x4] = { .gt_step = STEP_B0, .display_step = STEP_C0 },
- [0x5] = { .gt_step = STEP_B1, .display_step = STEP_C0 },
+ [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_B0 },
+ [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_C0 },
+ [0x5] = { COMMON_GT_MEDIA_STEP(B1), .display_step = STEP_C0 },
};
void intel_step_init(struct drm_i915_private *i915)
@@ -179,7 +180,7 @@ void intel_step_init(struct drm_i915_private *i915)
if (!revids)
return;
- if (revid < size && revids[revid].gt_step != STEP_NONE) {
+ if (revid < size && revids[revid].graphics_step != STEP_NONE) {
step = revids[revid];
} else {
drm_warn(&i915->drm, "Unknown revid 0x%02x\n", revid);
@@ -192,7 +193,7 @@ void intel_step_init(struct drm_i915_private *i915)
* steppings in the array are not monotonically increasing, but
* it's better than defaulting to 0.
*/
- while (revid < size && revids[revid].gt_step == STEP_NONE)
+ while (revid < size && revids[revid].graphics_step == STEP_NONE)
revid++;
if (revid < size) {
@@ -201,12 +202,12 @@ void intel_step_init(struct drm_i915_private *i915)
step = revids[revid];
} else {
drm_dbg(&i915->drm, "Using future steppings\n");
- step.gt_step = STEP_FUTURE;
+ step.graphics_step = STEP_FUTURE;
step.display_step = STEP_FUTURE;
}
}
- if (drm_WARN_ON(&i915->drm, step.gt_step == STEP_NONE))
+ if (drm_WARN_ON(&i915->drm, step.graphics_step == STEP_NONE))
return;
RUNTIME_INFO(i915)->step = step;
diff --git a/drivers/gpu/drm/i915/intel_step.h b/drivers/gpu/drm/i915/intel_step.h
index f6641e2a3c77..d71a99bd5179 100644
--- a/drivers/gpu/drm/i915/intel_step.h
+++ b/drivers/gpu/drm/i915/intel_step.h
@@ -11,8 +11,9 @@
struct drm_i915_private;
struct intel_step_info {
- u8 gt_step;
+ u8 graphics_step;
u8 display_step;
+ u8 media_step;
};
#define STEP_ENUM_VAL(name) STEP_##name,
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 703061e8be51..721912539b15 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -724,7 +724,8 @@ void intel_uncore_forcewake_get__locked(struct intel_uncore *uncore,
}
static void __intel_uncore_forcewake_put(struct intel_uncore *uncore,
- enum forcewake_domains fw_domains)
+ enum forcewake_domains fw_domains,
+ bool delayed)
{
struct intel_uncore_forcewake_domain *domain;
unsigned int tmp;
@@ -739,7 +740,11 @@ static void __intel_uncore_forcewake_put(struct intel_uncore *uncore,
continue;
}
- fw_domains_put(uncore, domain->mask);
+ if (delayed &&
+ !(domain->uncore->fw_domains_timer & domain->mask))
+ fw_domain_arm_timer(domain);
+ else
+ fw_domains_put(uncore, domain->mask);
}
}
@@ -760,7 +765,20 @@ void intel_uncore_forcewake_put(struct intel_uncore *uncore,
return;
spin_lock_irqsave(&uncore->lock, irqflags);
- __intel_uncore_forcewake_put(uncore, fw_domains);
+ __intel_uncore_forcewake_put(uncore, fw_domains, false);
+ spin_unlock_irqrestore(&uncore->lock, irqflags);
+}
+
+void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore,
+ enum forcewake_domains fw_domains)
+{
+ unsigned long irqflags;
+
+ if (!uncore->fw_get_funcs)
+ return;
+
+ spin_lock_irqsave(&uncore->lock, irqflags);
+ __intel_uncore_forcewake_put(uncore, fw_domains, true);
spin_unlock_irqrestore(&uncore->lock, irqflags);
}
@@ -802,7 +820,7 @@ void intel_uncore_forcewake_put__locked(struct intel_uncore *uncore,
if (!uncore->fw_get_funcs)
return;
- __intel_uncore_forcewake_put(uncore, fw_domains);
+ __intel_uncore_forcewake_put(uncore, fw_domains, false);
}
void assert_forcewakes_inactive(struct intel_uncore *uncore)
@@ -2020,7 +2038,7 @@ static int i915_pmic_bus_access_notifier(struct notifier_block *nb,
return NOTIFY_OK;
}
-static int uncore_mmio_setup(struct intel_uncore *uncore)
+int intel_uncore_setup_mmio(struct intel_uncore *uncore)
{
struct drm_i915_private *i915 = uncore->i915;
struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
@@ -2053,7 +2071,7 @@ static int uncore_mmio_setup(struct intel_uncore *uncore)
return 0;
}
-static void uncore_mmio_cleanup(struct intel_uncore *uncore)
+void intel_uncore_cleanup_mmio(struct intel_uncore *uncore)
{
struct pci_dev *pdev = to_pci_dev(uncore->i915->drm.dev);
@@ -2061,12 +2079,13 @@ static void uncore_mmio_cleanup(struct intel_uncore *uncore)
}
void intel_uncore_init_early(struct intel_uncore *uncore,
- struct drm_i915_private *i915)
+ struct intel_gt *gt)
{
spin_lock_init(&uncore->lock);
- uncore->i915 = i915;
- uncore->rpm = &i915->runtime_pm;
- uncore->debug = &i915->mmio_debug;
+ uncore->i915 = gt->i915;
+ uncore->gt = gt;
+ uncore->rpm = &gt->i915->runtime_pm;
+ uncore->debug = &gt->i915->mmio_debug;
}
static void uncore_raw_init(struct intel_uncore *uncore)
@@ -2146,10 +2165,6 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore)
struct drm_i915_private *i915 = uncore->i915;
int ret;
- ret = uncore_mmio_setup(uncore);
- if (ret)
- return ret;
-
/*
* The boot firmware initializes local memory and assesses its health.
* If memory training fails, the punit will have been instructed to
@@ -2170,7 +2185,7 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore)
} else {
ret = uncore_forcewake_init(uncore);
if (ret)
- goto out_mmio_cleanup;
+ return ret;
}
/* make sure fw funcs are set if and only if we have fw*/
@@ -2192,11 +2207,6 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore)
drm_dbg(&i915->drm, "unclaimed mmio detected on uncore init, clearing\n");
return 0;
-
-out_mmio_cleanup:
- uncore_mmio_cleanup(uncore);
-
- return ret;
}
/*
@@ -2261,8 +2271,6 @@ void intel_uncore_fini_mmio(struct intel_uncore *uncore)
intel_uncore_fw_domains_fini(uncore);
iosf_mbi_punit_release();
}
-
- uncore_mmio_cleanup(uncore);
}
/**
diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
index 3248e4e2c540..2a15b2b2e2fc 100644
--- a/drivers/gpu/drm/i915/intel_uncore.h
+++ b/drivers/gpu/drm/i915/intel_uncore.h
@@ -129,6 +129,7 @@ struct intel_uncore {
void __iomem *regs;
struct drm_i915_private *i915;
+ struct intel_gt *gt;
struct intel_runtime_pm *rpm;
spinlock_t lock; /** lock is also taken in irq contexts. */
@@ -217,12 +218,14 @@ u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore,
void
intel_uncore_mmio_debug_init_early(struct intel_uncore_mmio_debug *mmio_debug);
void intel_uncore_init_early(struct intel_uncore *uncore,
- struct drm_i915_private *i915);
+ struct intel_gt *gt);
+int intel_uncore_setup_mmio(struct intel_uncore *uncore);
int intel_uncore_init_mmio(struct intel_uncore *uncore);
void intel_uncore_prune_engine_fw_domains(struct intel_uncore *uncore,
struct intel_gt *gt);
bool intel_uncore_unclaimed_mmio(struct intel_uncore *uncore);
bool intel_uncore_arm_unclaimed_mmio_detection(struct intel_uncore *uncore);
+void intel_uncore_cleanup_mmio(struct intel_uncore *uncore);
void intel_uncore_fini_mmio(struct intel_uncore *uncore);
void intel_uncore_suspend(struct intel_uncore *uncore);
void intel_uncore_resume_early(struct intel_uncore *uncore);
@@ -243,6 +246,8 @@ void intel_uncore_forcewake_get(struct intel_uncore *uncore,
enum forcewake_domains domains);
void intel_uncore_forcewake_put(struct intel_uncore *uncore,
enum forcewake_domains domains);
+void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore,
+ enum forcewake_domains domains);
void intel_uncore_forcewake_flush(struct intel_uncore *uncore,
enum forcewake_domains fw_domains);
diff --git a/drivers/gpu/drm/i915/intel_wopcm.c b/drivers/gpu/drm/i915/intel_wopcm.c
index 5e511bb891f9..f06d21005106 100644
--- a/drivers/gpu/drm/i915/intel_wopcm.c
+++ b/drivers/gpu/drm/i915/intel_wopcm.c
@@ -220,7 +220,7 @@ static bool __wopcm_regs_locked(struct intel_uncore *uncore,
void intel_wopcm_init(struct intel_wopcm *wopcm)
{
struct drm_i915_private *i915 = wopcm_to_i915(wopcm);
- struct intel_gt *gt = &i915->gt;
+ struct intel_gt *gt = to_gt(i915);
u32 guc_fw_size = intel_uc_fw_get_upload_size(&gt->uc.guc.fw);
u32 huc_fw_size = intel_uc_fw_get_upload_size(&gt->uc.huc.fw);
u32 ctx_rsvd = context_reserved_size(i915);
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c
index 23fd86de5a24..6a7d4e2ee138 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c
@@ -7,26 +7,29 @@
#include "intel_pxp_irq.h"
#include "intel_pxp_pm.h"
#include "intel_pxp_session.h"
+#include "i915_drv.h"
-void intel_pxp_suspend(struct intel_pxp *pxp, bool runtime)
+void intel_pxp_suspend_prepare(struct intel_pxp *pxp)
{
if (!intel_pxp_is_enabled(pxp))
return;
pxp->arb_is_valid = false;
- /*
- * Contexts using protected objects keep a runtime PM reference, so we
- * can only runtime suspend when all of them have been either closed
- * or banned. Therefore, there is no need to invalidate in that
- * scenario.
- */
- if (!runtime)
- intel_pxp_invalidate(pxp);
+ intel_pxp_invalidate(pxp);
+}
- intel_pxp_fini_hw(pxp);
+void intel_pxp_suspend(struct intel_pxp *pxp)
+{
+ intel_wakeref_t wakeref;
- pxp->hw_state_invalidated = false;
+ if (!intel_pxp_is_enabled(pxp))
+ return;
+
+ with_intel_runtime_pm(&pxp_to_gt(pxp)->i915->runtime_pm, wakeref) {
+ intel_pxp_fini_hw(pxp);
+ pxp->hw_state_invalidated = false;
+ }
}
void intel_pxp_resume(struct intel_pxp *pxp)
@@ -44,3 +47,15 @@ void intel_pxp_resume(struct intel_pxp *pxp)
intel_pxp_init_hw(pxp);
}
+
+void intel_pxp_runtime_suspend(struct intel_pxp *pxp)
+{
+ if (!intel_pxp_is_enabled(pxp))
+ return;
+
+ pxp->arb_is_valid = false;
+
+ intel_pxp_fini_hw(pxp);
+
+ pxp->hw_state_invalidated = false;
+}
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.h b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.h
index c89e97a0c3d0..16990a3f2f85 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.h
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.h
@@ -9,16 +9,29 @@
#include "intel_pxp_types.h"
#ifdef CONFIG_DRM_I915_PXP
-void intel_pxp_suspend(struct intel_pxp *pxp, bool runtime);
+void intel_pxp_suspend_prepare(struct intel_pxp *pxp);
+void intel_pxp_suspend(struct intel_pxp *pxp);
void intel_pxp_resume(struct intel_pxp *pxp);
+void intel_pxp_runtime_suspend(struct intel_pxp *pxp);
#else
-static inline void intel_pxp_suspend(struct intel_pxp *pxp, bool runtime)
+static inline void intel_pxp_suspend_prepare(struct intel_pxp *pxp)
+{
+}
+
+static inline void intel_pxp_suspend(struct intel_pxp *pxp)
{
}
static inline void intel_pxp_resume(struct intel_pxp *pxp)
{
}
-#endif
+static inline void intel_pxp_runtime_suspend(struct intel_pxp *pxp)
+{
+}
+#endif
+static inline void intel_pxp_runtime_resume(struct intel_pxp *pxp)
+{
+ intel_pxp_resume(pxp);
+}
#endif /* __INTEL_PXP_PM_H__ */
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c
index f2fc50d7dfd3..4b6f5655fab5 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c
@@ -16,7 +16,9 @@
static inline struct intel_pxp *i915_dev_to_pxp(struct device *i915_kdev)
{
- return &kdev_to_i915(i915_kdev)->gt.pxp;
+ struct drm_i915_private *i915 = kdev_to_i915(i915_kdev);
+
+ return &to_gt(i915)->pxp;
}
static int intel_pxp_tee_io_message(struct intel_pxp *pxp,
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index 61bf4560d8af..2dac9be1de58 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -254,7 +254,7 @@ int i915_active_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_active_barrier),
};
- if (intel_gt_is_wedged(&i915->gt))
+ if (intel_gt_is_wedged(to_gt(i915)))
return 0;
return i915_subtests(tests, i915);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index 152d9ab135b1..b5576888cd78 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -248,7 +248,7 @@ int i915_gem_live_selftests(struct drm_i915_private *i915)
SUBTEST(igt_gem_ww_ctx),
};
- if (intel_gt_is_wedged(&i915->gt))
+ if (intel_gt_is_wedged(to_gt(i915)))
return 0;
return i915_live_subtests(tests, i915);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index f99bb0113726..75b709c26dd3 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -442,6 +442,7 @@ static int igt_evict_contexts(void *arg)
/* Overfill the GGTT with context objects and so try to evict one. */
for_each_engine(engine, gt, id) {
struct i915_sw_fence fence;
+ struct i915_request *last = NULL;
count = 0;
onstack_fence_init(&fence);
@@ -479,6 +480,9 @@ static int igt_evict_contexts(void *arg)
i915_request_add(rq);
count++;
+ if (last)
+ i915_request_put(last);
+ last = i915_request_get(rq);
err = 0;
} while(1);
onstack_fence_fini(&fence);
@@ -486,6 +490,21 @@ static int igt_evict_contexts(void *arg)
count, engine->name);
if (err)
break;
+ if (last) {
+ if (i915_request_wait(last, 0, HZ) < 0) {
+ err = -EIO;
+ i915_request_put(last);
+ pr_err("Failed waiting for last request (on %s)",
+ engine->name);
+ break;
+ }
+ i915_request_put(last);
+ }
+ err = intel_gt_wait_for_idle(engine->gt, HZ * 3);
+ if (err) {
+ pr_err("Failed to idle GT (on %s)", engine->name);
+ break;
+ }
}
mutex_lock(&ggtt->vm.mutex);
@@ -526,7 +545,7 @@ int i915_gem_evict_mock_selftests(void)
return -ENOMEM;
with_intel_runtime_pm(&i915->runtime_pm, wakeref)
- err = i915_subtests(tests, &i915->gt);
+ err = i915_subtests(tests, to_gt(i915));
mock_destroy_device(i915);
return err;
@@ -538,8 +557,8 @@ int i915_gem_evict_live_selftests(struct drm_i915_private *i915)
SUBTEST(igt_evict_contexts),
};
- if (intel_gt_is_wedged(&i915->gt))
+ if (intel_gt_is_wedged(to_gt(i915)))
return 0;
- return intel_gt_live_subtests(tests, &i915->gt);
+ return intel_gt_live_subtests(tests, to_gt(i915));
}
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 46f4236039a9..575705c3bce9 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -155,7 +155,7 @@ static int igt_ppgtt_alloc(void *arg)
if (!HAS_PPGTT(dev_priv))
return 0;
- ppgtt = i915_ppgtt_create(&dev_priv->gt, 0);
+ ppgtt = i915_ppgtt_create(to_gt(dev_priv), 0);
if (IS_ERR(ppgtt))
return PTR_ERR(ppgtt);
@@ -1053,7 +1053,7 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv,
if (IS_ERR(file))
return PTR_ERR(file);
- ppgtt = i915_ppgtt_create(&dev_priv->gt, 0);
+ ppgtt = i915_ppgtt_create(to_gt(dev_priv), 0);
if (IS_ERR(ppgtt)) {
err = PTR_ERR(ppgtt);
goto out_free;
@@ -1275,7 +1275,7 @@ static void track_vma_bind(struct i915_vma *vma)
__i915_gem_object_pin_pages(obj);
- GEM_BUG_ON(vma->pages);
+ GEM_BUG_ON(atomic_read(&vma->pages_count));
atomic_set(&vma->pages_count, I915_VMA_PAGES_ACTIVE);
__i915_gem_object_pin_pages(obj);
vma->pages = obj->mm.pages;
@@ -1953,7 +1953,9 @@ static int igt_cs_tlb(void *arg)
goto end;
}
- err = vma->ops->set_pages(vma);
+ i915_gem_object_lock(bbe, NULL);
+ err = i915_vma_get_pages(vma);
+ i915_gem_object_unlock(bbe);
if (err)
goto end;
@@ -1994,7 +1996,7 @@ end_ww:
i915_request_put(rq);
}
- vma->ops->clear_pages(vma);
+ i915_vma_put_pages(vma);
err = context_sync(ce);
if (err) {
@@ -2009,7 +2011,9 @@ end_ww:
goto end;
}
- err = vma->ops->set_pages(vma);
+ i915_gem_object_lock(act, NULL);
+ err = i915_vma_get_pages(vma);
+ i915_gem_object_unlock(act);
if (err)
goto end;
@@ -2047,7 +2051,7 @@ end_ww:
}
end_spin(batch, count - 1);
- vma->ops->clear_pages(vma);
+ i915_vma_put_pages(vma);
err = context_sync(ce);
if (err) {
diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c
index 9e9a6cb1d9e5..88db2e3d81d0 100644
--- a/drivers/gpu/drm/i915/selftests/i915_perf.c
+++ b/drivers/gpu/drm/i915/selftests/i915_perf.c
@@ -424,7 +424,7 @@ int i915_perf_live_selftests(struct drm_i915_private *i915)
if (!perf->metrics_kobj || !perf->ops.enable_metric_set)
return 0;
- if (intel_gt_is_wedged(&i915->gt))
+ if (intel_gt_is_wedged(to_gt(i915)))
return 0;
err = alloc_empty_config(&i915->perf);
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index d67710d10615..92a859b34190 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -209,6 +209,10 @@ static int igt_request_rewind(void *arg)
int err = -EINVAL;
ctx[0] = mock_context(i915, "A");
+ if (!ctx[0]) {
+ err = -ENOMEM;
+ goto err_ctx_0;
+ }
ce = i915_gem_context_get_engine(ctx[0], RCS0);
GEM_BUG_ON(IS_ERR(ce));
@@ -223,6 +227,10 @@ static int igt_request_rewind(void *arg)
i915_request_add(request);
ctx[1] = mock_context(i915, "B");
+ if (!ctx[1]) {
+ err = -ENOMEM;
+ goto err_ctx_1;
+ }
ce = i915_gem_context_get_engine(ctx[1], RCS0);
GEM_BUG_ON(IS_ERR(ce));
@@ -261,9 +269,11 @@ err:
i915_request_put(vip);
err_context_1:
mock_context_close(ctx[1]);
+err_ctx_1:
i915_request_put(request);
err_context_0:
mock_context_close(ctx[0]);
+err_ctx_0:
mock_device_flush(i915);
return err;
}
@@ -831,7 +841,7 @@ static struct i915_vma *empty_batch(struct drm_i915_private *i915)
__i915_gem_object_flush_map(obj, 0, 64);
i915_gem_object_unpin_map(obj);
- intel_gt_chipset_flush(&i915->gt);
+ intel_gt_chipset_flush(to_gt(i915));
vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
if (IS_ERR(vma)) {
@@ -972,7 +982,7 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
if (IS_ERR(obj))
return ERR_CAST(obj);
- vma = i915_vma_instance(obj, i915->gt.vm, NULL);
+ vma = i915_vma_instance(obj, to_gt(i915)->vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err;
@@ -1004,7 +1014,7 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
__i915_gem_object_flush_map(obj, 0, 64);
i915_gem_object_unpin_map(obj);
- intel_gt_chipset_flush(&i915->gt);
+ intel_gt_chipset_flush(to_gt(i915));
return vma;
@@ -1690,7 +1700,7 @@ int i915_request_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_breadcrumbs_smoketest),
};
- if (intel_gt_is_wedged(&i915->gt))
+ if (intel_gt_is_wedged(to_gt(i915)))
return 0;
return i915_subtests(tests, i915);
@@ -2805,7 +2815,7 @@ static int p_sync0(void *arg)
i915_request_add(rq);
err = 0;
- if (i915_request_wait(rq, 0, HZ / 5) < 0)
+ if (i915_request_wait(rq, 0, HZ) < 0)
err = -ETIME;
i915_request_put(rq);
if (err)
@@ -2876,7 +2886,7 @@ static int p_sync1(void *arg)
i915_request_add(rq);
err = 0;
- if (prev && i915_request_wait(prev, 0, HZ / 5) < 0)
+ if (prev && i915_request_wait(prev, 0, HZ) < 0)
err = -ETIME;
i915_request_put(prev);
prev = rq;
@@ -3081,7 +3091,7 @@ int i915_request_perf_selftests(struct drm_i915_private *i915)
SUBTEST(perf_parallel_engines),
};
- if (intel_gt_is_wedged(&i915->gt))
+ if (intel_gt_is_wedged(to_gt(i915)))
return 0;
return i915_subtests(tests, i915);
diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c
index 484759c9409c..2d6d7bd13c3c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_selftest.c
+++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c
@@ -298,10 +298,10 @@ int __i915_live_setup(void *data)
struct drm_i915_private *i915 = data;
/* The selftests expect an idle system */
- if (intel_gt_pm_wait_for_idle(&i915->gt))
+ if (intel_gt_pm_wait_for_idle(to_gt(i915)))
return -EIO;
- return intel_gt_terminally_wedged(&i915->gt);
+ return intel_gt_terminally_wedged(to_gt(i915));
}
int __i915_live_teardown(int err, void *data)
diff --git a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
index cbf45d85cbff..daa985e5a19b 100644
--- a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
@@ -28,7 +28,7 @@
#include "../i915_selftest.h"
-static int __i915_sw_fence_call
+static int
fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
{
switch (state) {
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index 1f10fe36619b..5c5809dfe9b2 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -691,7 +691,11 @@ static int igt_vma_rotate_remap(void *arg)
}
i915_vma_unpin(vma);
-
+ err = i915_vma_unbind(vma);
+ if (err) {
+ pr_err("Unbinding returned %i\n", err);
+ goto out_object;
+ }
cond_resched();
}
}
@@ -848,6 +852,11 @@ static int igt_vma_partial(void *arg)
i915_vma_unpin(vma);
nvma++;
+ err = i915_vma_unbind(vma);
+ if (err) {
+ pr_err("Unbinding returned %i\n", err);
+ goto out_object;
+ }
cond_resched();
}
@@ -882,6 +891,12 @@ static int igt_vma_partial(void *arg)
i915_vma_unpin(vma);
+ err = i915_vma_unbind(vma);
+ if (err) {
+ pr_err("Unbinding returned %i\n", err);
+ goto out_object;
+ }
+
count = 0;
list_for_each_entry(vma, &obj->vma.list, obj_link)
count++;
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
index a6c71fca61aa..b84594601d30 100644
--- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
@@ -14,7 +14,7 @@
int igt_flush_test(struct drm_i915_private *i915)
{
- struct intel_gt *gt = &i915->gt;
+ struct intel_gt *gt = to_gt(i915);
int ret = intel_gt_is_wedged(gt) ? -EIO : 0;
cond_resched();
diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c
index 1c721542e277..72b58b66692a 100644
--- a/drivers/gpu/drm/i915/selftests/igt_live_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c
@@ -16,7 +16,7 @@ int igt_live_test_begin(struct igt_live_test *t,
const char *func,
const char *name)
{
- struct intel_gt *gt = &i915->gt;
+ struct intel_gt *gt = to_gt(i915);
struct intel_engine_cs *engine;
enum intel_engine_id id;
int err;
@@ -57,7 +57,7 @@ int igt_live_test_end(struct igt_live_test *t)
return -EIO;
}
- for_each_engine(engine, &i915->gt, id) {
+ for_each_engine(engine, to_gt(i915), id) {
if (t->reset_engine[id] ==
i915_reset_engine_count(&i915->gpu_error, engine))
continue;
diff --git a/drivers/gpu/drm/i915/selftests/igt_reset.c b/drivers/gpu/drm/i915/selftests/igt_reset.c
index 9f8590b868a9..a2838c65f8a5 100644
--- a/drivers/gpu/drm/i915/selftests/igt_reset.c
+++ b/drivers/gpu/drm/i915/selftests/igt_reset.c
@@ -36,7 +36,7 @@ void igt_global_reset_unlock(struct intel_gt *gt)
enum intel_engine_id id;
for_each_engine(engine, gt, id)
- clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+ clear_and_wake_up_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
clear_bit(I915_RESET_BACKOFF, &gt->reset.flags);
wake_up_all(&gt->reset.queue);
diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
index 418caae84759..8255561ff853 100644
--- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
@@ -225,7 +225,7 @@ static int igt_mock_reserve(void *arg)
out_close:
close_objects(mem, &objects);
- intel_memory_region_put(mem);
+ intel_memory_region_destroy(mem);
out_free_order:
kfree(order);
return err;
@@ -439,7 +439,7 @@ static int igt_mock_splintered_region(void *arg)
out_close:
close_objects(mem, &objects);
out_put:
- intel_memory_region_put(mem);
+ intel_memory_region_destroy(mem);
return err;
}
@@ -507,7 +507,7 @@ static int igt_mock_max_segment(void *arg)
out_close:
close_objects(mem, &objects);
out_put:
- intel_memory_region_put(mem);
+ intel_memory_region_destroy(mem);
return err;
}
@@ -1196,7 +1196,7 @@ int intel_memory_region_mock_selftests(void)
err = i915_subtests(tests, mem);
- intel_memory_region_put(mem);
+ intel_memory_region_destroy(mem);
out_unref:
mock_destroy_device(i915);
return err;
@@ -1217,7 +1217,7 @@ int intel_memory_region_live_selftests(struct drm_i915_private *i915)
return 0;
}
- if (intel_gt_is_wedged(&i915->gt))
+ if (intel_gt_is_wedged(to_gt(i915)))
return 0;
return i915_live_subtests(tests, i915);
@@ -1229,7 +1229,7 @@ int intel_memory_region_perf_selftests(struct drm_i915_private *i915)
SUBTEST(perf_memcpy),
};
- if (intel_gt_is_wedged(&i915->gt))
+ if (intel_gt_is_wedged(to_gt(i915)))
return 0;
return i915_live_subtests(tests, i915);
diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c
index bc8128170a99..cdd196783535 100644
--- a/drivers/gpu/drm/i915/selftests/intel_uncore.c
+++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c
@@ -344,5 +344,5 @@ int intel_uncore_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_forcewake_domains),
};
- return intel_gt_live_subtests(tests, &i915->gt);
+ return intel_gt_live_subtests(tests, to_gt(i915));
}
diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
index 080b90b63d16..bf2752cc1e0b 100644
--- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
+++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
@@ -26,7 +26,7 @@
/* Small library of different fence types useful for writing tests */
-static int __i915_sw_fence_call
+static int
nop_fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
{
return NOTIFY_DONE;
@@ -41,12 +41,12 @@ void __onstack_fence_init(struct i915_sw_fence *fence,
__init_waitqueue_head(&fence->wait, name, key);
atomic_set(&fence->pending, 1);
fence->error = 0;
- fence->flags = (unsigned long)nop_fence_notify;
+ fence->fn = nop_fence_notify;
}
void onstack_fence_fini(struct i915_sw_fence *fence)
{
- if (!fence->flags)
+ if (!fence->fn)
return;
i915_sw_fence_commit(fence);
@@ -89,7 +89,7 @@ struct heap_fence {
};
};
-static int __i915_sw_fence_call
+static int
heap_fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
{
struct heap_fence *h = container_of(fence, typeof(*h), fence);
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 4f8180146888..8aa7b1d33865 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -45,7 +45,7 @@
void mock_device_flush(struct drm_i915_private *i915)
{
- struct intel_gt *gt = &i915->gt;
+ struct intel_gt *gt = to_gt(i915);
struct intel_engine_cs *engine;
enum intel_engine_id id;
@@ -64,7 +64,7 @@ static void mock_device_release(struct drm_device *dev)
goto out;
mock_device_flush(i915);
- intel_gt_driver_remove(&i915->gt);
+ intel_gt_driver_remove(to_gt(i915));
i915_gem_drain_workqueue(i915);
i915_gem_drain_freed_objects(i915);
@@ -73,7 +73,7 @@ static void mock_device_release(struct drm_device *dev)
destroy_workqueue(i915->wq);
intel_region_ttm_device_fini(i915);
- intel_gt_driver_late_release(&i915->gt);
+ intel_gt_driver_late_release(to_gt(i915));
intel_memory_regions_driver_release(i915);
drm_mode_config_cleanup(&i915->drm);
@@ -165,7 +165,7 @@ struct drm_i915_private *mock_gem_device(void)
/* Using the global GTT may ask questions about KMS users, so prepare */
drm_mode_config_init(&i915->drm);
- mkwrite_device_info(i915)->graphics_ver = -1;
+ mkwrite_device_info(i915)->graphics.ver = -1;
mkwrite_device_info(i915)->page_sizes =
I915_GTT_PAGE_SIZE_4K |
@@ -175,12 +175,14 @@ struct drm_i915_private *mock_gem_device(void)
mkwrite_device_info(i915)->memory_regions = REGION_SMEM;
intel_memory_regions_hw_probe(i915);
- mock_uncore_init(&i915->uncore, i915);
+ spin_lock_init(&i915->gpu_error.lock);
i915_gem_init__mm(i915);
- intel_gt_init_early(&i915->gt, i915);
- atomic_inc(&i915->gt.wakeref.count); /* disable; no hw support */
- i915->gt.awake = -ENODEV;
+ intel_gt_init_early(to_gt(i915), i915);
+ __intel_gt_init_early(to_gt(i915), i915);
+ mock_uncore_init(&i915->uncore, i915);
+ atomic_inc(&to_gt(i915)->wakeref.count); /* disable; no hw support */
+ to_gt(i915)->awake = -ENODEV;
ret = intel_region_ttm_device_init(i915);
if (ret)
@@ -193,19 +195,19 @@ struct drm_i915_private *mock_gem_device(void)
mock_init_contexts(i915);
mock_init_ggtt(i915, &i915->ggtt);
- i915->gt.vm = i915_vm_get(&i915->ggtt.vm);
+ to_gt(i915)->vm = i915_vm_get(&i915->ggtt.vm);
mkwrite_device_info(i915)->platform_engine_mask = BIT(0);
- i915->gt.info.engine_mask = BIT(0);
+ to_gt(i915)->info.engine_mask = BIT(0);
- i915->gt.engine[RCS0] = mock_engine(i915, "mock", RCS0);
- if (!i915->gt.engine[RCS0])
+ to_gt(i915)->engine[RCS0] = mock_engine(i915, "mock", RCS0);
+ if (!to_gt(i915)->engine[RCS0])
goto err_unlock;
- if (mock_engine_init(i915->gt.engine[RCS0]))
+ if (mock_engine_init(to_gt(i915)->engine[RCS0]))
goto err_context;
- __clear_bit(I915_WEDGED, &i915->gt.reset.flags);
+ __clear_bit(I915_WEDGED, &to_gt(i915)->reset.flags);
intel_engines_driver_register(i915);
i915->do_release = true;
@@ -214,13 +216,13 @@ struct drm_i915_private *mock_gem_device(void)
return i915;
err_context:
- intel_gt_driver_remove(&i915->gt);
+ intel_gt_driver_remove(to_gt(i915));
err_unlock:
destroy_workqueue(i915->wq);
err_drv:
intel_region_ttm_device_fini(i915);
err_ttm:
- intel_gt_driver_late_release(&i915->gt);
+ intel_gt_driver_late_release(to_gt(i915));
intel_memory_regions_driver_release(i915);
drm_mode_config_cleanup(&i915->drm);
mock_destroy_device(i915);
diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c
index cc047ec594f9..1802baf80a17 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c
@@ -70,7 +70,7 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name)
if (!ppgtt)
return NULL;
- ppgtt->vm.gt = &i915->gt;
+ ppgtt->vm.gt = to_gt(i915);
ppgtt->vm.i915 = i915;
ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE);
ppgtt->vm.dma = i915->drm.dev;
@@ -78,6 +78,7 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name)
i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
+ ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
ppgtt->vm.clear_range = mock_clear_range;
ppgtt->vm.insert_page = mock_insert_page;
@@ -86,8 +87,6 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name)
ppgtt->vm.vma_ops.bind_vma = mock_bind_ppgtt;
ppgtt->vm.vma_ops.unbind_vma = mock_unbind_ppgtt;
- ppgtt->vm.vma_ops.set_pages = ppgtt_set_pages;
- ppgtt->vm.vma_ops.clear_pages = clear_pages;
return ppgtt;
}
@@ -109,7 +108,7 @@ void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt)
{
memset(ggtt, 0, sizeof(*ggtt));
- ggtt->vm.gt = &i915->gt;
+ ggtt->vm.gt = to_gt(i915);
ggtt->vm.i915 = i915;
ggtt->vm.is_ggtt = true;
@@ -118,6 +117,7 @@ void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt)
ggtt->vm.total = 4096 * PAGE_SIZE;
ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+ ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
ggtt->vm.clear_range = mock_clear_range;
ggtt->vm.insert_page = mock_insert_page;
@@ -126,11 +126,9 @@ void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt)
ggtt->vm.vma_ops.bind_vma = mock_bind_ggtt;
ggtt->vm.vma_ops.unbind_vma = mock_unbind_ggtt;
- ggtt->vm.vma_ops.set_pages = ggtt_set_pages;
- ggtt->vm.vma_ops.clear_pages = clear_pages;
i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
- i915->gt.ggtt = ggtt;
+ to_gt(i915)->ggtt = ggtt;
}
void mock_fini_ggtt(struct i915_ggtt *ggtt)
diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c
index 75793008c4ef..19bff8afcaaa 100644
--- a/drivers/gpu/drm/i915/selftests/mock_region.c
+++ b/drivers/gpu/drm/i915/selftests/mock_region.c
@@ -15,9 +15,9 @@
static void mock_region_put_pages(struct drm_i915_gem_object *obj,
struct sg_table *pages)
{
+ i915_refct_sgt_put(obj->mm.rsgt);
+ obj->mm.rsgt = NULL;
intel_region_ttm_resource_free(obj->mm.region, obj->mm.res);
- sg_free_table(pages);
- kfree(pages);
}
static int mock_region_get_pages(struct drm_i915_gem_object *obj)
@@ -36,12 +36,14 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj)
if (IS_ERR(obj->mm.res))
return PTR_ERR(obj->mm.res);
- pages = intel_region_ttm_resource_to_st(obj->mm.region, obj->mm.res);
- if (IS_ERR(pages)) {
- err = PTR_ERR(pages);
+ obj->mm.rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region,
+ obj->mm.res);
+ if (IS_ERR(obj->mm.rsgt)) {
+ err = PTR_ERR(obj->mm.rsgt);
goto err_free_resource;
}
+ pages = &obj->mm.rsgt->table;
__i915_gem_object_set_pages(obj, pages, i915_sg_dma_sizes(pages->sgl));
return 0;
@@ -82,13 +84,16 @@ static int mock_object_init(struct intel_memory_region *mem,
return 0;
}
-static void mock_region_fini(struct intel_memory_region *mem)
+static int mock_region_fini(struct intel_memory_region *mem)
{
struct drm_i915_private *i915 = mem->i915;
int instance = mem->instance;
+ int ret;
- intel_region_ttm_fini(mem);
+ ret = intel_region_ttm_fini(mem);
ida_free(&i915->selftest.mock_region_instances, instance);
+
+ return ret;
}
static const struct intel_memory_region_ops mock_region_ops = {
diff --git a/drivers/gpu/drm/i915/selftests/mock_uncore.c b/drivers/gpu/drm/i915/selftests/mock_uncore.c
index ca57e4008701..f2d6be5e1230 100644
--- a/drivers/gpu/drm/i915/selftests/mock_uncore.c
+++ b/drivers/gpu/drm/i915/selftests/mock_uncore.c
@@ -42,7 +42,7 @@ __nop_read(64)
void mock_uncore_init(struct intel_uncore *uncore,
struct drm_i915_private *i915)
{
- intel_uncore_init_early(uncore, i915);
+ intel_uncore_init_early(uncore, to_gt(i915));
ASSIGN_RAW_WRITE_MMIO_VFUNCS(uncore, nop);
ASSIGN_RAW_READ_MMIO_VFUNCS(uncore, nop);