aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gvt
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-02 15:04:15 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-02 15:04:15 -0700
commitfaa392181a0bd42c5478175cef601adeecdc91b6 (patch)
treee020e1142e34786676d0cd40f539bccdbb66099e /drivers/gpu/drm/i915/gvt
parentMerge tag 'for-linus-hmm' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma (diff)
parentMerge tag 'amd-drm-next-5.8-2020-05-27' of git://people.freedesktop.org/~agd5f/linux into drm-next (diff)
downloadlinux-dev-faa392181a0bd42c5478175cef601adeecdc91b6.tar.xz
linux-dev-faa392181a0bd42c5478175cef601adeecdc91b6.zip
Merge tag 'drm-next-2020-06-02' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie: "Highlights: - Core DRM had a lot of refactoring around managed drm resources to make drivers simpler. - Intel Tigerlake support is on by default - amdgpu now support p2p PCI buffer sharing and encrypted GPU memory Details: core: - uapi: error out EBUSY when existing master - uapi: rework SET/DROP MASTER permission handling - remove drm_pci.h - drm_pci* are now legacy - introduced managed DRM resources - subclassing support for drm_framebuffer - simple encoder helper - edid improvements - vblank + writeback documentation improved - drm/mm - optimise tree searches - port drivers to use devm_drm_dev_alloc dma-buf: - add flag for p2p buffer support mst: - ACT timeout improvements - remove drm_dp_mst_has_audio - don't use 2nd TX slot - spec recommends against it bridge: - dw-hdmi various improvements - chrontel ch7033 support - fix stack issues with old gcc hdmi: - add unpack function for drm infoframe fbdev: - misc fbdev driver fixes i915: - uapi: global sseu pinning - uapi: OA buffer polling - uapi: remove generated perf code - uapi: per-engine default property values in sysfs - Tigerlake GEN12 enabled. - Lots of gem refactoring - Tigerlake enablement patches - move to drm_device logging - Icelake gamma HW readout - push MST link retrain to hotplug work - bandwidth atomic helpers - ICL fixes - RPS/GT refactoring - Cherryview full-ppgtt support - i915 locking guidelines documented - require linear fb stride to be 512 multiple on gen9 - Tigerlake SAGV support amdgpu: - uapi: encrypted GPU memory handling - uapi: add MEM_SYNC IB flag - p2p dma-buf support - export VRAM dma-bufs - FRU chip access support - RAS/SR-IOV updates - Powerplay locking fixes - VCN DPG (powergating) enablement - GFX10 clockgating fixes - DC fixes - GPU reset fixes - navi SDMA fix - expose FP16 for modesetting - DP 1.4 compliance fixes - gfx10 soft recovery - Improved Critical Thermal Faults handling - resizable BAR on gmc10 amdkfd: - uapi: GWS resource management - track GPU memory per process - report PCI domain in topology radeon: - safe reg list generator fixes nouveau: - HD audio fixes on recent systems - vGPU detection (fail probe if we're on one, for now) - Interlaced mode fixes (mostly avoidance on Turing, which doesn't support it) - SVM improvements/fixes - NVIDIA format modifier support - Misc other fixes. adv7511: - HDMI SPDIF support ast: - allocate crtc state size - fix double assignment - fix suspend bochs: - drop connector register cirrus: - move to tiny drivers. exynos: - fix imported dma-buf mapping - enable runtime PM - fixes and cleanups mediatek: - DPI pin mode swap - config mipi_tx current/impedance lima: - devfreq + cooling device support - task handling improvements - runtime PM support pl111: - vexpress init improvements - fix module auto-load rcar-du: - DT bindings conversion to YAML - Planes zpos sanity check and fix - MAINTAINERS entry for LVDS panel driver mcde: - fix return value mgag200: - use managed config init stm: - read endpoints from DT vboxvideo: - use PCI managed functions - drop WC mtrr vkms: - enable cursor by default rockchip: - afbc support virtio: - various cleanups qxl: - fix cursor notify port hisilicon: - 128-byte stride alignment fix sun4i: - improved format handling" * tag 'drm-next-2020-06-02' of git://anongit.freedesktop.org/drm/drm: (1401 commits) drm/amd/display: Fix potential integer wraparound resulting in a hang drm/amd/display: drop cursor position check in atomic test drm/amdgpu: fix device attribute node create failed with multi gpu drm/nouveau: use correct conflicting framebuffer API drm/vblank: Fix -Wformat compile warnings on some arches drm/amdgpu: Sync with VM root BO when switching VM to CPU update mode drm/amd/display: Handle GPU reset for DC block drm/amdgpu: add apu flags (v2) drm/amd/powerpay: Disable gfxoff when setting manual mode on picasso and raven drm/amdgpu: fix pm sysfs node handling (v2) drm/amdgpu: move gpu_info parsing after common early init drm/amdgpu: move discovery gfx config fetching drm/nouveau/dispnv50: fix runtime pm imbalance on error drm/nouveau: fix runtime pm imbalance on error drm/nouveau: fix runtime pm imbalance on error drm/nouveau/debugfs: fix runtime pm imbalance on error drm/nouveau/nouveau/hmm: fix migrate zero page to GPU drm/nouveau/nouveau/hmm: fix nouveau_dmem_chunk allocations drm/nouveau/kms/nv50-: Share DP SST mode_valid() handling with MST drm/nouveau/kms/nv50-: Move 8BPC limit for MST into nv50_mstc_get_modes() ...
Diffstat (limited to 'drivers/gpu/drm/i915/gvt')
-rw-r--r--drivers/gpu/drm/i915/gvt/aperture_gm.c2
-rw-r--r--drivers/gpu/drm/i915/gvt/cmd_parser.c45
-rw-r--r--drivers/gpu/drm/i915/gvt/execlist.c2
-rw-r--r--drivers/gpu/drm/i915/gvt/gtt.c16
-rw-r--r--drivers/gpu/drm/i915/gvt/gtt.h1
-rw-r--r--drivers/gpu/drm/i915/gvt/gvt.c1
-rw-r--r--drivers/gpu/drm/i915/gvt/gvt.h5
-rw-r--r--drivers/gpu/drm/i915/gvt/handlers.c2
-rw-r--r--drivers/gpu/drm/i915/gvt/hypercall.h2
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.c247
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.h1
11 files changed, 272 insertions, 52 deletions
diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
index 8b13f091cee2..0d6d59871308 100644
--- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
+++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
@@ -35,7 +35,7 @@
*/
#include "i915_drv.h"
-#include "i915_gem_fence_reg.h"
+#include "gt/intel_ggtt_fencing.h"
#include "gvt.h"
static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm)
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index a3cc080a46c6..8b87f130f7f1 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -882,6 +882,47 @@ static int mocs_cmd_reg_handler(struct parser_exec_state *s,
return 0;
}
+static int is_cmd_update_pdps(unsigned int offset,
+ struct parser_exec_state *s)
+{
+ u32 base = s->workload->engine->mmio_base;
+ return i915_mmio_reg_equal(_MMIO(offset), GEN8_RING_PDP_UDW(base, 0));
+}
+
+static int cmd_pdp_mmio_update_handler(struct parser_exec_state *s,
+ unsigned int offset, unsigned int index)
+{
+ struct intel_vgpu *vgpu = s->vgpu;
+ struct intel_vgpu_mm *shadow_mm = s->workload->shadow_mm;
+ struct intel_vgpu_mm *mm;
+ u64 pdps[GEN8_3LVL_PDPES];
+
+ if (shadow_mm->ppgtt_mm.root_entry_type ==
+ GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
+ pdps[0] = (u64)cmd_val(s, 2) << 32;
+ pdps[0] |= cmd_val(s, 4);
+
+ mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
+ if (!mm) {
+ gvt_vgpu_err("failed to get the 4-level shadow vm\n");
+ return -EINVAL;
+ }
+ intel_vgpu_mm_get(mm);
+ list_add_tail(&mm->ppgtt_mm.link,
+ &s->workload->lri_shadow_mm);
+ *cmd_ptr(s, 2) = upper_32_bits(mm->ppgtt_mm.shadow_pdps[0]);
+ *cmd_ptr(s, 4) = lower_32_bits(mm->ppgtt_mm.shadow_pdps[0]);
+ } else {
+ /* Currently all guests use PML4 table and now can't
+ * have a guest with 3-level table but uses LRI for
+ * PPGTT update. So this is simply un-testable. */
+ GEM_BUG_ON(1);
+ gvt_vgpu_err("invalid shared shadow vm type\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
static int cmd_reg_handler(struct parser_exec_state *s,
unsigned int offset, unsigned int index, char *cmd)
{
@@ -920,6 +961,10 @@ static int cmd_reg_handler(struct parser_exec_state *s,
patch_value(s, cmd_ptr(s, index), VGT_PVINFO_PAGE);
}
+ if (is_cmd_update_pdps(offset, s) &&
+ cmd_pdp_mmio_update_handler(s, offset, index))
+ return -EINVAL;
+
/* TODO
* In order to let workload with inhibit context to generate
* correct image data into memory, vregs values will be loaded to
diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
index dd25c3024370..158873f269b1 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -424,8 +424,6 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload)
ret = emulate_execlist_ctx_schedule_out(execlist, &workload->ctx_desc);
out:
- intel_vgpu_unpin_mm(workload->shadow_mm);
- intel_vgpu_destroy_workload(workload);
return ret;
}
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 2a4b23f8aa74..210016192ce7 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -1900,6 +1900,7 @@ struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
INIT_LIST_HEAD(&mm->ppgtt_mm.list);
INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list);
+ INIT_LIST_HEAD(&mm->ppgtt_mm.link);
if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
mm->ppgtt_mm.guest_pdps[0] = pdps[0];
@@ -2341,12 +2342,27 @@ int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
{
const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
int ret;
+ struct intel_vgpu_submission *s = &vgpu->submission;
+ struct intel_engine_cs *engine;
+ int i;
if (bytes != 4 && bytes != 8)
return -EINVAL;
off -= info->gtt_start_offset;
ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes);
+
+ /* if ggtt of last submitted context is written,
+ * that context is probably got unpinned.
+ * Set last shadowed ctx to invalid.
+ */
+ for_each_engine(engine, vgpu->gvt->gt, i) {
+ if (!s->last_ctx[i].valid)
+ continue;
+
+ if (s->last_ctx[i].lrca == (off >> info->gtt_entry_size_shift))
+ s->last_ctx[i].valid = false;
+ }
return ret;
}
diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h
index 88789316807d..320b8d6ad92f 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.h
+++ b/drivers/gpu/drm/i915/gvt/gtt.h
@@ -160,6 +160,7 @@ struct intel_vgpu_mm {
struct list_head list;
struct list_head lru_list;
+ struct list_head link; /* possible LRI shadow mm list */
} ppgtt_mm;
struct {
void *virtual_ggtt;
diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c
index 9e1787867894..c7c561237883 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.c
+++ b/drivers/gpu/drm/i915/gvt/gvt.c
@@ -31,7 +31,6 @@
*/
#include <linux/types.h>
-#include <xen/xen.h>
#include <linux/kthread.h>
#include "i915_drv.h"
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 58c2c7932e3f..a4a6db6b7f90 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -163,6 +163,11 @@ struct intel_vgpu_submission {
const struct intel_vgpu_submission_ops *ops;
int virtual_submission_interface;
bool active;
+ struct {
+ u32 lrca;
+ bool valid;
+ u64 ring_context_gpa;
+ } last_ctx[I915_NUM_ENGINES];
};
struct intel_vgpu {
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 2faf50e1b051..3e88e3b5c43a 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -2812,7 +2812,7 @@ static int init_bdw_mmio_info(struct intel_gvt *gvt)
MMIO_D(GAMTARBMODE, D_BDW_PLUS);
#define RING_REG(base) _MMIO((base) + 0x270)
- MMIO_RING_F(RING_REG, 32, 0, 0, 0, D_BDW_PLUS, NULL, NULL);
+ MMIO_RING_F(RING_REG, 32, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, NULL, NULL);
#undef RING_REG
MMIO_RING_GM_RDR(RING_HWS_PGA, D_BDW_PLUS, NULL, hws_pga_write);
diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h
index b17c4a1599cd..b79da5124f83 100644
--- a/drivers/gpu/drm/i915/gvt/hypercall.h
+++ b/drivers/gpu/drm/i915/gvt/hypercall.h
@@ -79,6 +79,4 @@ struct intel_gvt_mpt {
bool (*is_valid_gfn)(unsigned long handle, unsigned long gfn);
};
-extern struct intel_gvt_mpt xengt_mpt;
-
#endif /* _GVT_HYPERCALL_H_ */
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index e92ed96c9b23..0fb1df71c637 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -58,10 +58,8 @@ static void set_context_pdp_root_pointer(
static void update_shadow_pdps(struct intel_vgpu_workload *workload)
{
- struct drm_i915_gem_object *ctx_obj =
- workload->req->context->state->obj;
struct execlist_ring_context *shadow_ring_context;
- struct page *page;
+ struct intel_context *ctx = workload->req->context;
if (WARN_ON(!workload->shadow_mm))
return;
@@ -69,11 +67,9 @@ static void update_shadow_pdps(struct intel_vgpu_workload *workload)
if (WARN_ON(!atomic_read(&workload->shadow_mm->pincount)))
return;
- page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
- shadow_ring_context = kmap(page);
+ shadow_ring_context = (struct execlist_ring_context *)ctx->lrc_reg_state;
set_context_pdp_root_pointer(shadow_ring_context,
(void *)workload->shadow_mm->ppgtt_mm.shadow_pdps);
- kunmap(page);
}
/*
@@ -128,16 +124,24 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
{
struct intel_vgpu *vgpu = workload->vgpu;
struct intel_gvt *gvt = vgpu->gvt;
- struct drm_i915_gem_object *ctx_obj =
- workload->req->context->state->obj;
+ struct intel_context *ctx = workload->req->context;
struct execlist_ring_context *shadow_ring_context;
- struct page *page;
void *dst;
+ void *context_base;
unsigned long context_gpa, context_page_num;
+ unsigned long gpa_base; /* first gpa of consecutive GPAs */
+ unsigned long gpa_size; /* size of consecutive GPAs */
+ struct intel_vgpu_submission *s = &vgpu->submission;
int i;
+ bool skip = false;
+ int ring_id = workload->engine->id;
+
+ GEM_BUG_ON(!intel_context_is_pinned(ctx));
+
+ context_base = (void *) ctx->lrc_reg_state -
+ (LRC_STATE_PN << I915_GTT_PAGE_SHIFT);
- page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
- shadow_ring_context = kmap(page);
+ shadow_ring_context = (void *) ctx->lrc_reg_state;
sr_oa_regs(workload, (u32 *)shadow_ring_context, true);
#define COPY_REG(name) \
@@ -169,23 +173,43 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
sr_oa_regs(workload, (u32 *)shadow_ring_context, false);
- kunmap(page);
- if (IS_RESTORE_INHIBIT(shadow_ring_context->ctx_ctrl.val))
- return 0;
+ gvt_dbg_sched("ring %s workload lrca %x, ctx_id %x, ctx gpa %llx",
+ workload->engine->name, workload->ctx_desc.lrca,
+ workload->ctx_desc.context_id,
+ workload->ring_context_gpa);
- gvt_dbg_sched("ring %s workload lrca %x",
- workload->engine->name,
- workload->ctx_desc.lrca);
+ /* only need to ensure this context is not pinned/unpinned during the
+ * period from last submission to this this submission.
+ * Upon reaching this function, the currently submitted context is not
+ * supposed to get unpinned. If a misbehaving guest driver ever does
+ * this, it would corrupt itself.
+ */
+ if (s->last_ctx[ring_id].valid &&
+ (s->last_ctx[ring_id].lrca ==
+ workload->ctx_desc.lrca) &&
+ (s->last_ctx[ring_id].ring_context_gpa ==
+ workload->ring_context_gpa))
+ skip = true;
+
+ s->last_ctx[ring_id].lrca = workload->ctx_desc.lrca;
+ s->last_ctx[ring_id].ring_context_gpa = workload->ring_context_gpa;
+ if (IS_RESTORE_INHIBIT(shadow_ring_context->ctx_ctrl.val) || skip)
+ return 0;
+
+ s->last_ctx[ring_id].valid = false;
context_page_num = workload->engine->context_size;
context_page_num = context_page_num >> PAGE_SHIFT;
if (IS_BROADWELL(gvt->gt->i915) && workload->engine->id == RCS0)
context_page_num = 19;
- i = 2;
- while (i < context_page_num) {
+ /* find consecutive GPAs from gma until the first inconsecutive GPA.
+ * read from the continuous GPAs into dst virtual address
+ */
+ gpa_size = 0;
+ for (i = 2; i < context_page_num; i++) {
context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
(u32)((workload->ctx_desc.lrca + i) <<
I915_GTT_PAGE_SHIFT));
@@ -194,13 +218,26 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
return -EFAULT;
}
- page = i915_gem_object_get_page(ctx_obj, i);
- dst = kmap(page);
- intel_gvt_hypervisor_read_gpa(vgpu, context_gpa, dst,
- I915_GTT_PAGE_SIZE);
- kunmap(page);
- i++;
+ if (gpa_size == 0) {
+ gpa_base = context_gpa;
+ dst = context_base + (i << I915_GTT_PAGE_SHIFT);
+ } else if (context_gpa != gpa_base + gpa_size)
+ goto read;
+
+ gpa_size += I915_GTT_PAGE_SIZE;
+
+ if (i == context_page_num - 1)
+ goto read;
+
+ continue;
+
+read:
+ intel_gvt_hypervisor_read_gpa(vgpu, gpa_base, dst, gpa_size);
+ gpa_base = context_gpa;
+ gpa_size = I915_GTT_PAGE_SIZE;
+ dst = context_base + (i << I915_GTT_PAGE_SHIFT);
}
+ s->last_ctx[ring_id].valid = true;
return 0;
}
@@ -599,10 +636,9 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
if (bb->va && !IS_ERR(bb->va))
i915_gem_object_unpin_map(bb->obj);
- if (bb->vma && !IS_ERR(bb->vma)) {
+ if (bb->vma && !IS_ERR(bb->vma))
i915_vma_unpin(bb->vma);
- i915_vma_close(bb->vma);
- }
+
i915_gem_object_put(bb->obj);
}
list_del(&bb->list);
@@ -610,10 +646,11 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
}
}
-static int prepare_workload(struct intel_vgpu_workload *workload)
+static int
+intel_vgpu_shadow_mm_pin(struct intel_vgpu_workload *workload)
{
struct intel_vgpu *vgpu = workload->vgpu;
- struct intel_vgpu_submission *s = &vgpu->submission;
+ struct intel_vgpu_mm *m;
int ret = 0;
ret = intel_vgpu_pin_mm(workload->shadow_mm);
@@ -628,6 +665,52 @@ static int prepare_workload(struct intel_vgpu_workload *workload)
return -EINVAL;
}
+ if (!list_empty(&workload->lri_shadow_mm)) {
+ list_for_each_entry(m, &workload->lri_shadow_mm,
+ ppgtt_mm.link) {
+ ret = intel_vgpu_pin_mm(m);
+ if (ret) {
+ list_for_each_entry_from_reverse(m,
+ &workload->lri_shadow_mm,
+ ppgtt_mm.link)
+ intel_vgpu_unpin_mm(m);
+ gvt_vgpu_err("LRI shadow ppgtt fail to pin\n");
+ break;
+ }
+ }
+ }
+
+ if (ret)
+ intel_vgpu_unpin_mm(workload->shadow_mm);
+
+ return ret;
+}
+
+static void
+intel_vgpu_shadow_mm_unpin(struct intel_vgpu_workload *workload)
+{
+ struct intel_vgpu_mm *m;
+
+ if (!list_empty(&workload->lri_shadow_mm)) {
+ list_for_each_entry(m, &workload->lri_shadow_mm,
+ ppgtt_mm.link)
+ intel_vgpu_unpin_mm(m);
+ }
+ intel_vgpu_unpin_mm(workload->shadow_mm);
+}
+
+static int prepare_workload(struct intel_vgpu_workload *workload)
+{
+ struct intel_vgpu *vgpu = workload->vgpu;
+ struct intel_vgpu_submission *s = &vgpu->submission;
+ int ret = 0;
+
+ ret = intel_vgpu_shadow_mm_pin(workload);
+ if (ret) {
+ gvt_vgpu_err("fail to pin shadow mm\n");
+ return ret;
+ }
+
update_shadow_pdps(workload);
set_context_ppgtt_from_shadow(workload, s->shadow[workload->engine->id]);
@@ -674,7 +757,7 @@ err_shadow_wa_ctx:
err_shadow_batch:
release_shadow_batch_buffer(workload);
err_unpin_mm:
- intel_vgpu_unpin_mm(workload->shadow_mm);
+ intel_vgpu_shadow_mm_unpin(workload);
return ret;
}
@@ -784,15 +867,48 @@ out:
return workload;
}
+static void update_guest_pdps(struct intel_vgpu *vgpu,
+ u64 ring_context_gpa, u32 pdp[8])
+{
+ u64 gpa;
+ int i;
+
+ gpa = ring_context_gpa + RING_CTX_OFF(pdps[0].val);
+
+ for (i = 0; i < 8; i++)
+ intel_gvt_hypervisor_write_gpa(vgpu,
+ gpa + i * 8, &pdp[7 - i], 4);
+}
+
+static __maybe_unused bool
+check_shadow_context_ppgtt(struct execlist_ring_context *c, struct intel_vgpu_mm *m)
+{
+ if (m->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
+ u64 shadow_pdp = c->pdps[7].val | (u64) c->pdps[6].val << 32;
+
+ if (shadow_pdp != m->ppgtt_mm.shadow_pdps[0]) {
+ gvt_dbg_mm("4-level context ppgtt not match LRI command\n");
+ return false;
+ }
+ return true;
+ } else {
+ /* see comment in LRI handler in cmd_parser.c */
+ gvt_dbg_mm("invalid shadow mm type\n");
+ return false;
+ }
+}
+
static void update_guest_context(struct intel_vgpu_workload *workload)
{
struct i915_request *rq = workload->req;
struct intel_vgpu *vgpu = workload->vgpu;
- struct drm_i915_gem_object *ctx_obj = rq->context->state->obj;
struct execlist_ring_context *shadow_ring_context;
- struct page *page;
+ struct intel_context *ctx = workload->req->context;
+ void *context_base;
void *src;
unsigned long context_gpa, context_page_num;
+ unsigned long gpa_base; /* first gpa of consecutive GPAs */
+ unsigned long gpa_size; /* size of consecutive GPAs*/
int i;
u32 ring_base;
u32 head, tail;
@@ -801,6 +917,8 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
gvt_dbg_sched("ring id %d workload lrca %x\n", rq->engine->id,
workload->ctx_desc.lrca);
+ GEM_BUG_ON(!intel_context_is_pinned(ctx));
+
head = workload->rb_head;
tail = workload->rb_tail;
wrap_count = workload->guest_rb_head >> RB_HEAD_WRAP_CNT_OFF;
@@ -824,9 +942,14 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
if (IS_BROADWELL(rq->i915) && rq->engine->id == RCS0)
context_page_num = 19;
- i = 2;
+ context_base = (void *) ctx->lrc_reg_state -
+ (LRC_STATE_PN << I915_GTT_PAGE_SHIFT);
- while (i < context_page_num) {
+ /* find consecutive GPAs from gma until the first inconsecutive GPA.
+ * write to the consecutive GPAs from src virtual address
+ */
+ gpa_size = 0;
+ for (i = 2; i < context_page_num; i++) {
context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
(u32)((workload->ctx_desc.lrca + i) <<
I915_GTT_PAGE_SHIFT));
@@ -835,19 +958,39 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
return;
}
- page = i915_gem_object_get_page(ctx_obj, i);
- src = kmap(page);
- intel_gvt_hypervisor_write_gpa(vgpu, context_gpa, src,
- I915_GTT_PAGE_SIZE);
- kunmap(page);
- i++;
+ if (gpa_size == 0) {
+ gpa_base = context_gpa;
+ src = context_base + (i << I915_GTT_PAGE_SHIFT);
+ } else if (context_gpa != gpa_base + gpa_size)
+ goto write;
+
+ gpa_size += I915_GTT_PAGE_SIZE;
+
+ if (i == context_page_num - 1)
+ goto write;
+
+ continue;
+
+write:
+ intel_gvt_hypervisor_write_gpa(vgpu, gpa_base, src, gpa_size);
+ gpa_base = context_gpa;
+ gpa_size = I915_GTT_PAGE_SIZE;
+ src = context_base + (i << I915_GTT_PAGE_SHIFT);
}
intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa +
RING_CTX_OFF(ring_header.val), &workload->rb_tail, 4);
- page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
- shadow_ring_context = kmap(page);
+ shadow_ring_context = (void *) ctx->lrc_reg_state;
+
+ if (!list_empty(&workload->lri_shadow_mm)) {
+ struct intel_vgpu_mm *m = list_last_entry(&workload->lri_shadow_mm,
+ struct intel_vgpu_mm,
+ ppgtt_mm.link);
+ GEM_BUG_ON(!check_shadow_context_ppgtt(shadow_ring_context, m));
+ update_guest_pdps(vgpu, workload->ring_context_gpa,
+ (void *)m->ppgtt_mm.guest_pdps);
+ }
#define COPY_REG(name) \
intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa + \
@@ -864,8 +1007,6 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
(void *)shadow_ring_context +
sizeof(*shadow_ring_context),
I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
-
- kunmap(page);
}
void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu,
@@ -959,6 +1100,9 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
workload->complete(workload);
+ intel_vgpu_shadow_mm_unpin(workload);
+ intel_vgpu_destroy_workload(workload);
+
atomic_dec(&s->running_workload_num);
wake_up(&scheduler->workload_complete_wq);
@@ -1264,6 +1408,8 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
atomic_set(&s->running_workload_num, 0);
bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES);
+ memset(s->last_ctx, 0, sizeof(s->last_ctx));
+
i915_vm_put(&ppgtt->vm);
return 0;
@@ -1350,6 +1496,16 @@ void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload)
release_shadow_batch_buffer(workload);
release_shadow_wa_ctx(&workload->wa_ctx);
+ if (!list_empty(&workload->lri_shadow_mm)) {
+ struct intel_vgpu_mm *m, *mm;
+ list_for_each_entry_safe(m, mm, &workload->lri_shadow_mm,
+ ppgtt_mm.link) {
+ list_del(&m->ppgtt_mm.link);
+ intel_vgpu_mm_put(m);
+ }
+ }
+
+ GEM_BUG_ON(!list_empty(&workload->lri_shadow_mm));
if (workload->shadow_mm)
intel_vgpu_mm_put(workload->shadow_mm);
@@ -1368,6 +1524,7 @@ alloc_workload(struct intel_vgpu *vgpu)
INIT_LIST_HEAD(&workload->list);
INIT_LIST_HEAD(&workload->shadow_bb);
+ INIT_LIST_HEAD(&workload->lri_shadow_mm);
init_waitqueue_head(&workload->shadow_ctx_status_wq);
atomic_set(&workload->shadow_ctx_active, 0);
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h
index bf7fc0ca4cb1..15d317f2a4a4 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.h
+++ b/drivers/gpu/drm/i915/gvt/scheduler.h
@@ -87,6 +87,7 @@ struct intel_vgpu_workload {
int status;
struct intel_vgpu_mm *shadow_mm;
+ struct list_head lri_shadow_mm; /* For PPGTT load cmd */
/* different submission model may need different handler */
int (*prepare)(struct intel_vgpu_workload *);