aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gem
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-02 15:04:15 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-02 15:04:15 -0700
commitfaa392181a0bd42c5478175cef601adeecdc91b6 (patch)
treee020e1142e34786676d0cd40f539bccdbb66099e /drivers/gpu/drm/i915/gem
parentMerge tag 'for-linus-hmm' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma (diff)
parentMerge tag 'amd-drm-next-5.8-2020-05-27' of git://people.freedesktop.org/~agd5f/linux into drm-next (diff)
downloadlinux-dev-faa392181a0bd42c5478175cef601adeecdc91b6.tar.xz
linux-dev-faa392181a0bd42c5478175cef601adeecdc91b6.zip
Merge tag 'drm-next-2020-06-02' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie: "Highlights: - Core DRM had a lot of refactoring around managed drm resources to make drivers simpler. - Intel Tigerlake support is on by default - amdgpu now support p2p PCI buffer sharing and encrypted GPU memory Details: core: - uapi: error out EBUSY when existing master - uapi: rework SET/DROP MASTER permission handling - remove drm_pci.h - drm_pci* are now legacy - introduced managed DRM resources - subclassing support for drm_framebuffer - simple encoder helper - edid improvements - vblank + writeback documentation improved - drm/mm - optimise tree searches - port drivers to use devm_drm_dev_alloc dma-buf: - add flag for p2p buffer support mst: - ACT timeout improvements - remove drm_dp_mst_has_audio - don't use 2nd TX slot - spec recommends against it bridge: - dw-hdmi various improvements - chrontel ch7033 support - fix stack issues with old gcc hdmi: - add unpack function for drm infoframe fbdev: - misc fbdev driver fixes i915: - uapi: global sseu pinning - uapi: OA buffer polling - uapi: remove generated perf code - uapi: per-engine default property values in sysfs - Tigerlake GEN12 enabled. - Lots of gem refactoring - Tigerlake enablement patches - move to drm_device logging - Icelake gamma HW readout - push MST link retrain to hotplug work - bandwidth atomic helpers - ICL fixes - RPS/GT refactoring - Cherryview full-ppgtt support - i915 locking guidelines documented - require linear fb stride to be 512 multiple on gen9 - Tigerlake SAGV support amdgpu: - uapi: encrypted GPU memory handling - uapi: add MEM_SYNC IB flag - p2p dma-buf support - export VRAM dma-bufs - FRU chip access support - RAS/SR-IOV updates - Powerplay locking fixes - VCN DPG (powergating) enablement - GFX10 clockgating fixes - DC fixes - GPU reset fixes - navi SDMA fix - expose FP16 for modesetting - DP 1.4 compliance fixes - gfx10 soft recovery - Improved Critical Thermal Faults handling - resizable BAR on gmc10 amdkfd: - uapi: GWS resource management - track GPU memory per process - report PCI domain in topology radeon: - safe reg list generator fixes nouveau: - HD audio fixes on recent systems - vGPU detection (fail probe if we're on one, for now) - Interlaced mode fixes (mostly avoidance on Turing, which doesn't support it) - SVM improvements/fixes - NVIDIA format modifier support - Misc other fixes. adv7511: - HDMI SPDIF support ast: - allocate crtc state size - fix double assignment - fix suspend bochs: - drop connector register cirrus: - move to tiny drivers. exynos: - fix imported dma-buf mapping - enable runtime PM - fixes and cleanups mediatek: - DPI pin mode swap - config mipi_tx current/impedance lima: - devfreq + cooling device support - task handling improvements - runtime PM support pl111: - vexpress init improvements - fix module auto-load rcar-du: - DT bindings conversion to YAML - Planes zpos sanity check and fix - MAINTAINERS entry for LVDS panel driver mcde: - fix return value mgag200: - use managed config init stm: - read endpoints from DT vboxvideo: - use PCI managed functions - drop WC mtrr vkms: - enable cursor by default rockchip: - afbc support virtio: - various cleanups qxl: - fix cursor notify port hisilicon: - 128-byte stride alignment fix sun4i: - improved format handling" * tag 'drm-next-2020-06-02' of git://anongit.freedesktop.org/drm/drm: (1401 commits) drm/amd/display: Fix potential integer wraparound resulting in a hang drm/amd/display: drop cursor position check in atomic test drm/amdgpu: fix device attribute node create failed with multi gpu drm/nouveau: use correct conflicting framebuffer API drm/vblank: Fix -Wformat compile warnings on some arches drm/amdgpu: Sync with VM root BO when switching VM to CPU update mode drm/amd/display: Handle GPU reset for DC block drm/amdgpu: add apu flags (v2) drm/amd/powerpay: Disable gfxoff when setting manual mode on picasso and raven drm/amdgpu: fix pm sysfs node handling (v2) drm/amdgpu: move gpu_info parsing after common early init drm/amdgpu: move discovery gfx config fetching drm/nouveau/dispnv50: fix runtime pm imbalance on error drm/nouveau: fix runtime pm imbalance on error drm/nouveau: fix runtime pm imbalance on error drm/nouveau/debugfs: fix runtime pm imbalance on error drm/nouveau/nouveau/hmm: fix migrate zero page to GPU drm/nouveau/nouveau/hmm: fix nouveau_dmem_chunk allocations drm/nouveau/kms/nv50-: Share DP SST mode_valid() handling with MST drm/nouveau/kms/nv50-: Move 8BPC limit for MST into nv50_mstc_get_modes() ...
Diffstat (limited to 'drivers/gpu/drm/i915/gem')
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_clflush.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_client_blt.c4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c89
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.h4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_domain.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c843
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_fence.c4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_mman.c34
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.c8
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_blt.c26
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_blt.h1
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_types.h3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pages.c3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_phys.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shrinker.c18
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_stolen.c4
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c3
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/huge_pages.c83
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c566
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c2
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c52
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c171
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c128
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c2
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c26
25 files changed, 1570 insertions, 510 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
index 34be4c0ee7c5..bc0223716906 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -108,7 +108,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
if (clflush) {
i915_sw_fence_await_reservation(&clflush->base.chain,
obj->base.resv, NULL, true,
- I915_FENCE_TIMEOUT,
+ i915_fence_timeout(to_i915(obj->base.dev)),
I915_FENCE_GFP);
dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma);
dma_fence_work_commit(&clflush->base);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
index 0598e5382a1d..d3a86a4d5c04 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -6,7 +6,6 @@
#include "i915_drv.h"
#include "gt/intel_context.h"
#include "gt/intel_engine_pm.h"
-#include "gt/intel_engine_pool.h"
#include "i915_gem_client_blt.h"
#include "i915_gem_object_blt.h"
@@ -289,8 +288,7 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
i915_gem_object_lock(obj);
err = i915_sw_fence_await_reservation(&work->wait,
- obj->base.resv, NULL,
- true, I915_FENCE_TIMEOUT,
+ obj->base.resv, NULL, true, 0,
I915_FENCE_GFP);
if (err < 0) {
dma_fence_set_error(&work->dma, err);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 68326ad3b2e0..900ea8b7fc8f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -130,9 +130,7 @@ static void lut_close(struct i915_gem_context *ctx)
if (&lut->obj_link != &obj->lut_list) {
i915_lut_handle_free(lut);
radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
- if (atomic_dec_and_test(&vma->open_count) &&
- !i915_vma_is_ggtt(vma))
- i915_vma_close(vma);
+ i915_vma_close(vma);
i915_gem_object_put(obj);
}
@@ -570,23 +568,19 @@ static void engines_idle_release(struct i915_gem_context *ctx,
engines->ctx = i915_gem_context_get(ctx);
for_each_gem_engine(ce, engines, it) {
- struct dma_fence *fence;
- int err = 0;
+ int err;
/* serialises with execbuf */
set_bit(CONTEXT_CLOSED_BIT, &ce->flags);
if (!intel_context_pin_if_active(ce))
continue;
- fence = i915_active_fence_get(&ce->timeline->last_request);
- if (fence) {
- err = i915_sw_fence_await_dma_fence(&engines->fence,
- fence, 0,
- GFP_KERNEL);
- dma_fence_put(fence);
- }
+ /* Wait until context is finally scheduled out and retired */
+ err = i915_sw_fence_await_active(&engines->fence,
+ &ce->active,
+ I915_ACTIVE_AWAIT_BARRIER);
intel_context_unpin(ce);
- if (err < 0)
+ if (err)
goto kill;
}
@@ -757,21 +751,46 @@ err_free:
return ERR_PTR(err);
}
+static inline struct i915_gem_engines *
+__context_engines_await(const struct i915_gem_context *ctx)
+{
+ struct i915_gem_engines *engines;
+
+ rcu_read_lock();
+ do {
+ engines = rcu_dereference(ctx->engines);
+ GEM_BUG_ON(!engines);
+
+ if (unlikely(!i915_sw_fence_await(&engines->fence)))
+ continue;
+
+ if (likely(engines == rcu_access_pointer(ctx->engines)))
+ break;
+
+ i915_sw_fence_complete(&engines->fence);
+ } while (1);
+ rcu_read_unlock();
+
+ return engines;
+}
+
static int
context_apply_all(struct i915_gem_context *ctx,
int (*fn)(struct intel_context *ce, void *data),
void *data)
{
struct i915_gem_engines_iter it;
+ struct i915_gem_engines *e;
struct intel_context *ce;
int err = 0;
- for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+ e = __context_engines_await(ctx);
+ for_each_gem_engine(ce, e, it) {
err = fn(ce, data);
if (err)
break;
}
- i915_gem_context_unlock_engines(ctx);
+ i915_sw_fence_complete(&e->fence);
return err;
}
@@ -786,11 +805,13 @@ static int __apply_ppgtt(struct intel_context *ce, void *vm)
static struct i915_address_space *
__set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm)
{
- struct i915_address_space *old = i915_gem_context_vm(ctx);
+ struct i915_address_space *old;
+ old = rcu_replace_pointer(ctx->vm,
+ i915_vm_open(vm),
+ lockdep_is_held(&ctx->mutex));
GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old));
- rcu_assign_pointer(ctx->vm, i915_vm_open(vm));
context_apply_all(ctx, __apply_ppgtt, vm);
return old;
@@ -1069,30 +1090,6 @@ static void cb_retire(struct i915_active *base)
kfree(cb);
}
-static inline struct i915_gem_engines *
-__context_engines_await(const struct i915_gem_context *ctx)
-{
- struct i915_gem_engines *engines;
-
- rcu_read_lock();
- do {
- engines = rcu_dereference(ctx->engines);
- if (unlikely(!engines))
- break;
-
- if (unlikely(!i915_sw_fence_await(&engines->fence)))
- continue;
-
- if (likely(engines == rcu_access_pointer(ctx->engines)))
- break;
-
- i915_sw_fence_complete(&engines->fence);
- } while (1);
- rcu_read_unlock();
-
- return engines;
-}
-
I915_SELFTEST_DECLARE(static intel_engine_mask_t context_barrier_inject_fault);
static int context_barrier_task(struct i915_gem_context *ctx,
intel_engine_mask_t engines,
@@ -1401,10 +1398,10 @@ static int get_ringsize(struct i915_gem_context *ctx,
return 0;
}
-static int
-user_to_context_sseu(struct drm_i915_private *i915,
- const struct drm_i915_gem_context_param_sseu *user,
- struct intel_sseu *context)
+int
+i915_gem_user_to_context_sseu(struct drm_i915_private *i915,
+ const struct drm_i915_gem_context_param_sseu *user,
+ struct intel_sseu *context)
{
const struct sseu_dev_info *device = &RUNTIME_INFO(i915)->sseu;
@@ -1539,7 +1536,7 @@ static int set_sseu(struct i915_gem_context *ctx,
goto out_ce;
}
- ret = user_to_context_sseu(i915, &user_sseu, &sseu);
+ ret = i915_gem_user_to_context_sseu(i915, &user_sseu, &sseu);
if (ret)
goto out_ce;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index f1d884d304bd..3702b2fb27ab 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -225,4 +225,8 @@ i915_gem_engines_iter_next(struct i915_gem_engines_iter *it);
struct i915_lut_handle *i915_lut_handle_alloc(void);
void i915_lut_handle_free(struct i915_lut_handle *lut);
+int i915_gem_user_to_context_sseu(struct drm_i915_private *i915,
+ const struct drm_i915_gem_context_param_sseu *user,
+ struct intel_sseu *context);
+
#endif /* !__I915_GEM_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 4f96c8788a2e..7f76fc68f498 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -368,7 +368,7 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct i915_vma *vma;
- if (!atomic_read(&obj->bind_count))
+ if (list_empty(&obj->vma.list))
return;
mutex_lock(&i915->ggtt.vm.mutex);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 8a4e9c1cbf6c..3ce185670ca4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -15,8 +15,8 @@
#include "gem/i915_gem_ioctls.h"
#include "gt/intel_context.h"
-#include "gt/intel_engine_pool.h"
#include "gt/intel_gt.h"
+#include "gt/intel_gt_buffer_pool.h"
#include "gt/intel_gt_pm.h"
#include "gt/intel_ring.h"
@@ -40,6 +40,11 @@ struct eb_vma {
u32 handle;
};
+struct eb_vma_array {
+ struct kref kref;
+ struct eb_vma vma[];
+};
+
enum {
FORCE_CPU_RELOC = 1,
FORCE_GTT_RELOC,
@@ -52,7 +57,6 @@ enum {
#define __EXEC_OBJECT_NEEDS_MAP BIT(29)
#define __EXEC_OBJECT_NEEDS_BIAS BIT(28)
#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 28) /* all of the above */
-#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
#define __EXEC_HAS_RELOC BIT(31)
#define __EXEC_INTERNAL_FLAGS (~0u << 31)
@@ -264,7 +268,9 @@ struct i915_execbuffer {
bool has_fence : 1;
bool needs_unfenced : 1;
+ struct i915_vma *target;
struct i915_request *rq;
+ struct i915_vma *rq_vma;
u32 *rq_cmd;
unsigned int rq_size;
} reloc_cache;
@@ -283,6 +289,7 @@ struct i915_execbuffer {
*/
int lut_size;
struct hlist_head *buckets; /** ht for relocation handles */
+ struct eb_vma_array *array;
};
static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
@@ -292,8 +299,62 @@ static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
eb->args->batch_len);
}
+static struct eb_vma_array *eb_vma_array_create(unsigned int count)
+{
+ struct eb_vma_array *arr;
+
+ arr = kvmalloc(struct_size(arr, vma, count), GFP_KERNEL | __GFP_NOWARN);
+ if (!arr)
+ return NULL;
+
+ kref_init(&arr->kref);
+ arr->vma[0].vma = NULL;
+
+ return arr;
+}
+
+static inline void eb_unreserve_vma(struct eb_vma *ev)
+{
+ struct i915_vma *vma = ev->vma;
+
+ if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
+ __i915_vma_unpin_fence(vma);
+
+ if (ev->flags & __EXEC_OBJECT_HAS_PIN)
+ __i915_vma_unpin(vma);
+
+ ev->flags &= ~(__EXEC_OBJECT_HAS_PIN |
+ __EXEC_OBJECT_HAS_FENCE);
+}
+
+static void eb_vma_array_destroy(struct kref *kref)
+{
+ struct eb_vma_array *arr = container_of(kref, typeof(*arr), kref);
+ struct eb_vma *ev = arr->vma;
+
+ while (ev->vma) {
+ eb_unreserve_vma(ev);
+ i915_vma_put(ev->vma);
+ ev++;
+ }
+
+ kvfree(arr);
+}
+
+static void eb_vma_array_put(struct eb_vma_array *arr)
+{
+ kref_put(&arr->kref, eb_vma_array_destroy);
+}
+
static int eb_create(struct i915_execbuffer *eb)
{
+ /* Allocate an extra slot for use by the command parser + sentinel */
+ eb->array = eb_vma_array_create(eb->buffer_count + 2);
+ if (!eb->array)
+ return -ENOMEM;
+
+ eb->vma = eb->array->vma;
+
if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) {
unsigned int size = 1 + ilog2(eb->buffer_count);
@@ -327,8 +388,10 @@ static int eb_create(struct i915_execbuffer *eb)
break;
} while (--size);
- if (unlikely(!size))
+ if (unlikely(!size)) {
+ eb_vma_array_put(eb->array);
return -ENOMEM;
+ }
eb->lut_size = size;
} else {
@@ -368,6 +431,32 @@ eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
return false;
}
+static u64 eb_pin_flags(const struct drm_i915_gem_exec_object2 *entry,
+ unsigned int exec_flags)
+{
+ u64 pin_flags = 0;
+
+ if (exec_flags & EXEC_OBJECT_NEEDS_GTT)
+ pin_flags |= PIN_GLOBAL;
+
+ /*
+ * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
+ * limit address to the first 4GBs for unflagged objects.
+ */
+ if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
+ pin_flags |= PIN_ZONE_4G;
+
+ if (exec_flags & __EXEC_OBJECT_NEEDS_MAP)
+ pin_flags |= PIN_MAPPABLE;
+
+ if (exec_flags & EXEC_OBJECT_PINNED)
+ pin_flags |= entry->offset | PIN_OFFSET_FIXED;
+ else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS)
+ pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
+
+ return pin_flags;
+}
+
static inline bool
eb_pin_vma(struct i915_execbuffer *eb,
const struct drm_i915_gem_exec_object2 *entry,
@@ -385,8 +474,19 @@ eb_pin_vma(struct i915_execbuffer *eb,
if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT))
pin_flags |= PIN_GLOBAL;
- if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags)))
- return false;
+ /* Attempt to reuse the current location if available */
+ if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags))) {
+ if (entry->flags & EXEC_OBJECT_PINNED)
+ return false;
+
+ /* Failing that pick any _free_ space if suitable */
+ if (unlikely(i915_vma_pin(vma,
+ entry->pad_to_size,
+ entry->alignment,
+ eb_pin_flags(entry, ev->flags) |
+ PIN_USER | PIN_NOEVICT)))
+ return false;
+ }
if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
if (unlikely(i915_vma_pin_fence(vma))) {
@@ -402,26 +502,6 @@ eb_pin_vma(struct i915_execbuffer *eb,
return !eb_vma_misplaced(entry, vma, ev->flags);
}
-static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
-{
- GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
-
- if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
- __i915_vma_unpin_fence(vma);
-
- __i915_vma_unpin(vma);
-}
-
-static inline void
-eb_unreserve_vma(struct eb_vma *ev)
-{
- if (!(ev->flags & __EXEC_OBJECT_HAS_PIN))
- return;
-
- __eb_unreserve_vma(ev->vma, ev->flags);
- ev->flags &= ~__EXEC_OBJECT_RESERVED;
-}
-
static int
eb_validate_vma(struct i915_execbuffer *eb,
struct drm_i915_gem_exec_object2 *entry,
@@ -481,7 +561,7 @@ eb_add_vma(struct i915_execbuffer *eb,
GEM_BUG_ON(i915_vma_is_closed(vma));
- ev->vma = i915_vma_get(vma);
+ ev->vma = vma;
ev->exec = entry;
ev->flags = entry->flags;
@@ -547,28 +627,9 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
u64 pin_flags)
{
struct drm_i915_gem_exec_object2 *entry = ev->exec;
- unsigned int exec_flags = ev->flags;
struct i915_vma *vma = ev->vma;
int err;
- if (exec_flags & EXEC_OBJECT_NEEDS_GTT)
- pin_flags |= PIN_GLOBAL;
-
- /*
- * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
- * limit address to the first 4GBs for unflagged objects.
- */
- if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
- pin_flags |= PIN_ZONE_4G;
-
- if (exec_flags & __EXEC_OBJECT_NEEDS_MAP)
- pin_flags |= PIN_MAPPABLE;
-
- if (exec_flags & EXEC_OBJECT_PINNED)
- pin_flags |= entry->offset | PIN_OFFSET_FIXED;
- else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS)
- pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
-
if (drm_mm_node_allocated(&vma->node) &&
eb_vma_misplaced(entry, vma, ev->flags)) {
err = i915_vma_unbind(vma);
@@ -578,7 +639,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
err = i915_vma_pin(vma,
entry->pad_to_size, entry->alignment,
- pin_flags);
+ eb_pin_flags(entry, ev->flags) | pin_flags);
if (err)
return err;
@@ -587,7 +648,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
eb->args->flags |= __EXEC_HAS_RELOC;
}
- if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
+ if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
err = i915_vma_pin_fence(vma);
if (unlikely(err)) {
i915_vma_unpin(vma);
@@ -595,10 +656,10 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
}
if (vma->fence)
- exec_flags |= __EXEC_OBJECT_HAS_FENCE;
+ ev->flags |= __EXEC_OBJECT_HAS_FENCE;
}
- ev->flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
+ ev->flags |= __EXEC_OBJECT_HAS_PIN;
GEM_BUG_ON(eb_vma_misplaced(entry, vma, ev->flags));
return 0;
@@ -728,77 +789,117 @@ static int eb_select_context(struct i915_execbuffer *eb)
return 0;
}
-static int eb_lookup_vmas(struct i915_execbuffer *eb)
+static int __eb_add_lut(struct i915_execbuffer *eb,
+ u32 handle, struct i915_vma *vma)
{
- struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma;
- struct drm_i915_gem_object *obj;
- unsigned int i, batch;
+ struct i915_gem_context *ctx = eb->gem_context;
+ struct i915_lut_handle *lut;
int err;
- if (unlikely(i915_gem_context_is_closed(eb->gem_context)))
- return -ENOENT;
+ lut = i915_lut_handle_alloc();
+ if (unlikely(!lut))
+ return -ENOMEM;
- INIT_LIST_HEAD(&eb->relocs);
- INIT_LIST_HEAD(&eb->unbound);
+ i915_vma_get(vma);
+ if (!atomic_fetch_inc(&vma->open_count))
+ i915_vma_reopen(vma);
+ lut->handle = handle;
+ lut->ctx = ctx;
+
+ /* Check that the context hasn't been closed in the meantime */
+ err = -EINTR;
+ if (!mutex_lock_interruptible(&ctx->mutex)) {
+ err = -ENOENT;
+ if (likely(!i915_gem_context_is_closed(ctx)))
+ err = radix_tree_insert(&ctx->handles_vma, handle, vma);
+ if (err == 0) { /* And nor has this handle */
+ struct drm_i915_gem_object *obj = vma->obj;
+
+ i915_gem_object_lock(obj);
+ if (idr_find(&eb->file->object_idr, handle) == obj) {
+ list_add(&lut->obj_link, &obj->lut_list);
+ } else {
+ radix_tree_delete(&ctx->handles_vma, handle);
+ err = -ENOENT;
+ }
+ i915_gem_object_unlock(obj);
+ }
+ mutex_unlock(&ctx->mutex);
+ }
+ if (unlikely(err))
+ goto err;
- batch = eb_batch_index(eb);
+ return 0;
- for (i = 0; i < eb->buffer_count; i++) {
- u32 handle = eb->exec[i].handle;
- struct i915_lut_handle *lut;
+err:
+ i915_vma_close(vma);
+ i915_vma_put(vma);
+ i915_lut_handle_free(lut);
+ return err;
+}
+
+static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle)
+{
+ do {
+ struct drm_i915_gem_object *obj;
struct i915_vma *vma;
+ int err;
- vma = radix_tree_lookup(handles_vma, handle);
+ rcu_read_lock();
+ vma = radix_tree_lookup(&eb->gem_context->handles_vma, handle);
if (likely(vma))
- goto add_vma;
+ vma = i915_vma_tryget(vma);
+ rcu_read_unlock();
+ if (likely(vma))
+ return vma;
obj = i915_gem_object_lookup(eb->file, handle);
- if (unlikely(!obj)) {
- err = -ENOENT;
- goto err_vma;
- }
+ if (unlikely(!obj))
+ return ERR_PTR(-ENOENT);
vma = i915_vma_instance(obj, eb->context->vm, NULL);
if (IS_ERR(vma)) {
- err = PTR_ERR(vma);
- goto err_obj;
+ i915_gem_object_put(obj);
+ return vma;
}
- lut = i915_lut_handle_alloc();
- if (unlikely(!lut)) {
- err = -ENOMEM;
- goto err_obj;
- }
+ err = __eb_add_lut(eb, handle, vma);
+ if (likely(!err))
+ return vma;
- err = radix_tree_insert(handles_vma, handle, vma);
- if (unlikely(err)) {
- i915_lut_handle_free(lut);
- goto err_obj;
- }
+ i915_gem_object_put(obj);
+ if (err != -EEXIST)
+ return ERR_PTR(err);
+ } while (1);
+}
- /* transfer ref to lut */
- if (!atomic_fetch_inc(&vma->open_count))
- i915_vma_reopen(vma);
- lut->handle = handle;
- lut->ctx = eb->gem_context;
+static int eb_lookup_vmas(struct i915_execbuffer *eb)
+{
+ unsigned int batch = eb_batch_index(eb);
+ unsigned int i;
+ int err = 0;
- i915_gem_object_lock(obj);
- list_add(&lut->obj_link, &obj->lut_list);
- i915_gem_object_unlock(obj);
+ INIT_LIST_HEAD(&eb->relocs);
+ INIT_LIST_HEAD(&eb->unbound);
+
+ for (i = 0; i < eb->buffer_count; i++) {
+ struct i915_vma *vma;
+
+ vma = eb_lookup_vma(eb, eb->exec[i].handle);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ break;
+ }
-add_vma:
err = eb_validate_vma(eb, &eb->exec[i], vma);
- if (unlikely(err))
- goto err_vma;
+ if (unlikely(err)) {
+ i915_vma_put(vma);
+ break;
+ }
eb_add_vma(eb, i, batch, vma);
}
- return 0;
-
-err_obj:
- i915_gem_object_put(obj);
-err_vma:
eb->vma[i].vma = NULL;
return err;
}
@@ -823,31 +924,13 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
}
}
-static void eb_release_vmas(const struct i915_execbuffer *eb)
-{
- const unsigned int count = eb->buffer_count;
- unsigned int i;
-
- for (i = 0; i < count; i++) {
- struct eb_vma *ev = &eb->vma[i];
- struct i915_vma *vma = ev->vma;
-
- if (!vma)
- break;
-
- eb->vma[i].vma = NULL;
-
- if (ev->flags & __EXEC_OBJECT_HAS_PIN)
- __eb_unreserve_vma(vma, ev->flags);
-
- i915_vma_put(vma);
- }
-}
-
static void eb_destroy(const struct i915_execbuffer *eb)
{
GEM_BUG_ON(eb->reloc_cache.rq);
+ if (eb->array)
+ eb_vma_array_put(eb->array);
+
if (eb->lut_size > 0)
kfree(eb->buckets);
}
@@ -872,7 +955,7 @@ static void reloc_cache_init(struct reloc_cache *cache,
cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
cache->node.flags = 0;
cache->rq = NULL;
- cache->rq_size = 0;
+ cache->target = NULL;
}
static inline void *unmask_page(unsigned long p)
@@ -894,29 +977,122 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
return &i915->ggtt;
}
-static void reloc_gpu_flush(struct reloc_cache *cache)
+#define RELOC_TAIL 4
+
+static int reloc_gpu_chain(struct reloc_cache *cache)
+{
+ struct intel_gt_buffer_pool_node *pool;
+ struct i915_request *rq = cache->rq;
+ struct i915_vma *batch;
+ u32 *cmd;
+ int err;
+
+ pool = intel_gt_get_buffer_pool(rq->engine->gt, PAGE_SIZE);
+ if (IS_ERR(pool))
+ return PTR_ERR(pool);
+
+ batch = i915_vma_instance(pool->obj, rq->context->vm, NULL);
+ if (IS_ERR(batch)) {
+ err = PTR_ERR(batch);
+ goto out_pool;
+ }
+
+ err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK);
+ if (err)
+ goto out_pool;
+
+ GEM_BUG_ON(cache->rq_size + RELOC_TAIL > PAGE_SIZE / sizeof(u32));
+ cmd = cache->rq_cmd + cache->rq_size;
+ *cmd++ = MI_ARB_CHECK;
+ if (cache->gen >= 8)
+ *cmd++ = MI_BATCH_BUFFER_START_GEN8;
+ else if (cache->gen >= 6)
+ *cmd++ = MI_BATCH_BUFFER_START;
+ else
+ *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
+ *cmd++ = lower_32_bits(batch->node.start);
+ *cmd++ = upper_32_bits(batch->node.start); /* Always 0 for gen<8 */
+ i915_gem_object_flush_map(cache->rq_vma->obj);
+ i915_gem_object_unpin_map(cache->rq_vma->obj);
+ cache->rq_vma = NULL;
+
+ err = intel_gt_buffer_pool_mark_active(pool, rq);
+ if (err == 0) {
+ i915_vma_lock(batch);
+ err = i915_request_await_object(rq, batch->obj, false);
+ if (err == 0)
+ err = i915_vma_move_to_active(batch, rq, 0);
+ i915_vma_unlock(batch);
+ }
+ i915_vma_unpin(batch);
+ if (err)
+ goto out_pool;
+
+ cmd = i915_gem_object_pin_map(batch->obj,
+ cache->has_llc ?
+ I915_MAP_FORCE_WB :
+ I915_MAP_FORCE_WC);
+ if (IS_ERR(cmd)) {
+ err = PTR_ERR(cmd);
+ goto out_pool;
+ }
+
+ /* Return with batch mapping (cmd) still pinned */
+ cache->rq_cmd = cmd;
+ cache->rq_size = 0;
+ cache->rq_vma = batch;
+
+out_pool:
+ intel_gt_buffer_pool_put(pool);
+ return err;
+}
+
+static unsigned int reloc_bb_flags(const struct reloc_cache *cache)
{
- struct drm_i915_gem_object *obj = cache->rq->batch->obj;
+ return cache->gen > 5 ? 0 : I915_DISPATCH_SECURE;
+}
+
+static int reloc_gpu_flush(struct reloc_cache *cache)
+{
+ struct i915_request *rq;
+ int err;
- GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
- cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END;
+ rq = fetch_and_zero(&cache->rq);
+ if (!rq)
+ return 0;
- __i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1));
- i915_gem_object_unpin_map(obj);
+ if (cache->rq_vma) {
+ struct drm_i915_gem_object *obj = cache->rq_vma->obj;
- intel_gt_chipset_flush(cache->rq->engine->gt);
+ GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
+ cache->rq_cmd[cache->rq_size++] = MI_BATCH_BUFFER_END;
- i915_request_add(cache->rq);
- cache->rq = NULL;
+ __i915_gem_object_flush_map(obj,
+ 0, sizeof(u32) * cache->rq_size);
+ i915_gem_object_unpin_map(obj);
+ }
+
+ err = 0;
+ if (rq->engine->emit_init_breadcrumb)
+ err = rq->engine->emit_init_breadcrumb(rq);
+ if (!err)
+ err = rq->engine->emit_bb_start(rq,
+ rq->batch->node.start,
+ PAGE_SIZE,
+ reloc_bb_flags(cache));
+ if (err)
+ i915_request_set_error_once(rq, err);
+
+ intel_gt_chipset_flush(rq->engine->gt);
+ i915_request_add(rq);
+
+ return err;
}
static void reloc_cache_reset(struct reloc_cache *cache)
{
void *vaddr;
- if (cache->rq)
- reloc_gpu_flush(cache);
-
if (!cache->vaddr)
return;
@@ -1109,17 +1285,17 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
}
static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
- struct i915_vma *vma,
+ struct intel_engine_cs *engine,
unsigned int len)
{
struct reloc_cache *cache = &eb->reloc_cache;
- struct intel_engine_pool_node *pool;
+ struct intel_gt_buffer_pool_node *pool;
struct i915_request *rq;
struct i915_vma *batch;
u32 *cmd;
int err;
- pool = intel_engine_get_pool(eb->engine, PAGE_SIZE);
+ pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE);
if (IS_ERR(pool))
return PTR_ERR(pool);
@@ -1132,7 +1308,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
goto out_pool;
}
- batch = i915_vma_instance(pool->obj, vma->vm, NULL);
+ batch = i915_vma_instance(pool->obj, eb->context->vm, NULL);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
goto err_unmap;
@@ -1142,26 +1318,32 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
if (err)
goto err_unmap;
- rq = i915_request_create(eb->context);
+ if (engine == eb->context->engine) {
+ rq = i915_request_create(eb->context);
+ } else {
+ struct intel_context *ce;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto err_unpin;
+ }
+
+ i915_vm_put(ce->vm);
+ ce->vm = i915_vm_get(eb->context->vm);
+
+ rq = intel_context_create_request(ce);
+ intel_context_put(ce);
+ }
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_unpin;
}
- err = intel_engine_pool_mark_active(pool, rq);
+ err = intel_gt_buffer_pool_mark_active(pool, rq);
if (err)
goto err_request;
- err = reloc_move_to_gpu(rq, vma);
- if (err)
- goto err_request;
-
- err = eb->engine->emit_bb_start(rq,
- batch->node.start, PAGE_SIZE,
- cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
- if (err)
- goto skip_request;
-
i915_vma_lock(batch);
err = i915_request_await_object(rq, batch->obj, false);
if (err == 0)
@@ -1176,6 +1358,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
cache->rq = rq;
cache->rq_cmd = cmd;
cache->rq_size = 0;
+ cache->rq_vma = batch;
/* Return with batch mapping (cmd) still pinned */
goto out_pool;
@@ -1189,124 +1372,206 @@ err_unpin:
err_unmap:
i915_gem_object_unpin_map(pool->obj);
out_pool:
- intel_engine_pool_put(pool);
+ intel_gt_buffer_pool_put(pool);
return err;
}
+static bool reloc_can_use_engine(const struct intel_engine_cs *engine)
+{
+ return engine->class != VIDEO_DECODE_CLASS || !IS_GEN(engine->i915, 6);
+}
+
static u32 *reloc_gpu(struct i915_execbuffer *eb,
struct i915_vma *vma,
unsigned int len)
{
struct reloc_cache *cache = &eb->reloc_cache;
u32 *cmd;
-
- if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1))
- reloc_gpu_flush(cache);
+ int err;
if (unlikely(!cache->rq)) {
- int err;
+ struct intel_engine_cs *engine = eb->engine;
- if (!intel_engine_can_store_dword(eb->engine))
- return ERR_PTR(-ENODEV);
+ if (!reloc_can_use_engine(engine)) {
+ engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
+ if (!engine)
+ return ERR_PTR(-ENODEV);
+ }
- err = __reloc_gpu_alloc(eb, vma, len);
+ err = __reloc_gpu_alloc(eb, engine, len);
if (unlikely(err))
return ERR_PTR(err);
}
+ if (vma != cache->target) {
+ err = reloc_move_to_gpu(cache->rq, vma);
+ if (unlikely(err)) {
+ i915_request_set_error_once(cache->rq, err);
+ return ERR_PTR(err);
+ }
+
+ cache->target = vma;
+ }
+
+ if (unlikely(cache->rq_size + len >
+ PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) {
+ err = reloc_gpu_chain(cache);
+ if (unlikely(err)) {
+ i915_request_set_error_once(cache->rq, err);
+ return ERR_PTR(err);
+ }
+ }
+
+ GEM_BUG_ON(cache->rq_size + len >= PAGE_SIZE / sizeof(u32));
cmd = cache->rq_cmd + cache->rq_size;
cache->rq_size += len;
return cmd;
}
-static u64
-relocate_entry(struct i915_vma *vma,
- const struct drm_i915_gem_relocation_entry *reloc,
- struct i915_execbuffer *eb,
- const struct i915_vma *target)
+static inline bool use_reloc_gpu(struct i915_vma *vma)
{
- u64 offset = reloc->offset;
- u64 target_offset = relocation_target(reloc, target);
- bool wide = eb->reloc_cache.use_64bit_reloc;
- void *vaddr;
+ if (DBG_FORCE_RELOC == FORCE_GPU_RELOC)
+ return true;
- if (!eb->reloc_cache.vaddr &&
- (DBG_FORCE_RELOC == FORCE_GPU_RELOC ||
- !dma_resv_test_signaled_rcu(vma->resv, true))) {
- const unsigned int gen = eb->reloc_cache.gen;
- unsigned int len;
- u32 *batch;
- u64 addr;
-
- if (wide)
- len = offset & 7 ? 8 : 5;
- else if (gen >= 4)
- len = 4;
- else
- len = 3;
+ if (DBG_FORCE_RELOC)
+ return false;
- batch = reloc_gpu(eb, vma, len);
- if (IS_ERR(batch))
- goto repeat;
+ return !dma_resv_test_signaled_rcu(vma->resv, true);
+}
- addr = gen8_canonical_addr(vma->node.start + offset);
- if (wide) {
- if (offset & 7) {
- *batch++ = MI_STORE_DWORD_IMM_GEN4;
- *batch++ = lower_32_bits(addr);
- *batch++ = upper_32_bits(addr);
- *batch++ = lower_32_bits(target_offset);
-
- addr = gen8_canonical_addr(addr + 4);
-
- *batch++ = MI_STORE_DWORD_IMM_GEN4;
- *batch++ = lower_32_bits(addr);
- *batch++ = upper_32_bits(addr);
- *batch++ = upper_32_bits(target_offset);
- } else {
- *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1;
- *batch++ = lower_32_bits(addr);
- *batch++ = upper_32_bits(addr);
- *batch++ = lower_32_bits(target_offset);
- *batch++ = upper_32_bits(target_offset);
- }
- } else if (gen >= 6) {
+static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset)
+{
+ struct page *page;
+ unsigned long addr;
+
+ GEM_BUG_ON(vma->pages != vma->obj->mm.pages);
+
+ page = i915_gem_object_get_page(vma->obj, offset >> PAGE_SHIFT);
+ addr = PFN_PHYS(page_to_pfn(page));
+ GEM_BUG_ON(overflows_type(addr, u32)); /* expected dma32 */
+
+ return addr + offset_in_page(offset);
+}
+
+static bool __reloc_entry_gpu(struct i915_execbuffer *eb,
+ struct i915_vma *vma,
+ u64 offset,
+ u64 target_addr)
+{
+ const unsigned int gen = eb->reloc_cache.gen;
+ unsigned int len;
+ u32 *batch;
+ u64 addr;
+
+ if (gen >= 8)
+ len = offset & 7 ? 8 : 5;
+ else if (gen >= 4)
+ len = 4;
+ else
+ len = 3;
+
+ batch = reloc_gpu(eb, vma, len);
+ if (IS_ERR(batch))
+ return false;
+
+ addr = gen8_canonical_addr(vma->node.start + offset);
+ if (gen >= 8) {
+ if (offset & 7) {
*batch++ = MI_STORE_DWORD_IMM_GEN4;
- *batch++ = 0;
- *batch++ = addr;
- *batch++ = target_offset;
- } else if (gen >= 4) {
- *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
- *batch++ = 0;
- *batch++ = addr;
- *batch++ = target_offset;
+ *batch++ = lower_32_bits(addr);
+ *batch++ = upper_32_bits(addr);
+ *batch++ = lower_32_bits(target_addr);
+
+ addr = gen8_canonical_addr(addr + 4);
+
+ *batch++ = MI_STORE_DWORD_IMM_GEN4;
+ *batch++ = lower_32_bits(addr);
+ *batch++ = upper_32_bits(addr);
+ *batch++ = upper_32_bits(target_addr);
} else {
- *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
- *batch++ = addr;
- *batch++ = target_offset;
+ *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1;
+ *batch++ = lower_32_bits(addr);
+ *batch++ = upper_32_bits(addr);
+ *batch++ = lower_32_bits(target_addr);
+ *batch++ = upper_32_bits(target_addr);
}
-
- goto out;
+ } else if (gen >= 6) {
+ *batch++ = MI_STORE_DWORD_IMM_GEN4;
+ *batch++ = 0;
+ *batch++ = addr;
+ *batch++ = target_addr;
+ } else if (IS_I965G(eb->i915)) {
+ *batch++ = MI_STORE_DWORD_IMM_GEN4;
+ *batch++ = 0;
+ *batch++ = vma_phys_addr(vma, offset);
+ *batch++ = target_addr;
+ } else if (gen >= 4) {
+ *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *batch++ = 0;
+ *batch++ = addr;
+ *batch++ = target_addr;
+ } else if (gen >= 3 &&
+ !(IS_I915G(eb->i915) || IS_I915GM(eb->i915))) {
+ *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+ *batch++ = addr;
+ *batch++ = target_addr;
+ } else {
+ *batch++ = MI_STORE_DWORD_IMM;
+ *batch++ = vma_phys_addr(vma, offset);
+ *batch++ = target_addr;
}
+ return true;
+}
+
+static bool reloc_entry_gpu(struct i915_execbuffer *eb,
+ struct i915_vma *vma,
+ u64 offset,
+ u64 target_addr)
+{
+ if (eb->reloc_cache.vaddr)
+ return false;
+
+ if (!use_reloc_gpu(vma))
+ return false;
+
+ return __reloc_entry_gpu(eb, vma, offset, target_addr);
+}
+
+static u64
+relocate_entry(struct i915_vma *vma,
+ const struct drm_i915_gem_relocation_entry *reloc,
+ struct i915_execbuffer *eb,
+ const struct i915_vma *target)
+{
+ u64 target_addr = relocation_target(reloc, target);
+ u64 offset = reloc->offset;
+
+ if (!reloc_entry_gpu(eb, vma, offset, target_addr)) {
+ bool wide = eb->reloc_cache.use_64bit_reloc;
+ void *vaddr;
+
repeat:
- vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT);
- if (IS_ERR(vaddr))
- return PTR_ERR(vaddr);
+ vaddr = reloc_vaddr(vma->obj,
+ &eb->reloc_cache,
+ offset >> PAGE_SHIFT);
+ if (IS_ERR(vaddr))
+ return PTR_ERR(vaddr);
- clflush_write32(vaddr + offset_in_page(offset),
- lower_32_bits(target_offset),
- eb->reloc_cache.vaddr);
+ GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32)));
+ clflush_write32(vaddr + offset_in_page(offset),
+ lower_32_bits(target_addr),
+ eb->reloc_cache.vaddr);
- if (wide) {
- offset += sizeof(u32);
- target_offset >>= 32;
- wide = false;
- goto repeat;
+ if (wide) {
+ offset += sizeof(u32);
+ target_addr >>= 32;
+ wide = false;
+ goto repeat;
+ }
}
-out:
return target->node.start | UPDATE;
}
@@ -1411,12 +1676,11 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
{
#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
- struct drm_i915_gem_relocation_entry __user *urelocs;
const struct drm_i915_gem_exec_object2 *entry = ev->exec;
- unsigned int remain;
+ struct drm_i915_gem_relocation_entry __user *urelocs =
+ u64_to_user_ptr(entry->relocs_ptr);
+ unsigned long remain = entry->relocation_count;
- urelocs = u64_to_user_ptr(entry->relocs_ptr);
- remain = entry->relocation_count;
if (unlikely(remain > N_RELOC(ULONG_MAX)))
return -EINVAL;
@@ -1425,13 +1689,13 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
* to read. However, if the array is not writable the user loses
* the updated relocation values.
*/
- if (unlikely(!access_ok(urelocs, remain*sizeof(*urelocs))))
+ if (unlikely(!access_ok(urelocs, remain * sizeof(*urelocs))))
return -EFAULT;
do {
struct drm_i915_gem_relocation_entry *r = stack;
unsigned int count =
- min_t(unsigned int, remain, ARRAY_SIZE(stack));
+ min_t(unsigned long, remain, ARRAY_SIZE(stack));
unsigned int copied;
/*
@@ -1494,9 +1758,7 @@ static int eb_relocate(struct i915_execbuffer *eb)
{
int err;
- mutex_lock(&eb->gem_context->mutex);
err = eb_lookup_vmas(eb);
- mutex_unlock(&eb->gem_context->mutex);
if (err)
return err;
@@ -1509,15 +1771,20 @@ static int eb_relocate(struct i915_execbuffer *eb)
/* The objects are in their final locations, apply the relocations. */
if (eb->args->flags & __EXEC_HAS_RELOC) {
struct eb_vma *ev;
+ int flush;
list_for_each_entry(ev, &eb->relocs, reloc_link) {
err = eb_relocate_vma(eb, ev);
if (err)
- return err;
+ break;
}
+
+ flush = reloc_gpu_flush(&eb->reloc_cache);
+ if (!err)
+ err = flush;
}
- return 0;
+ return err;
}
static int eb_move_to_gpu(struct i915_execbuffer *eb)
@@ -1597,19 +1864,15 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
err = i915_vma_move_to_active(vma, eb->request, flags);
i915_vma_unlock(vma);
-
- __eb_unreserve_vma(vma, flags);
- i915_vma_put(vma);
-
- ev->vma = NULL;
+ eb_unreserve_vma(ev);
}
ww_acquire_fini(&acquire);
+ eb_vma_array_put(fetch_and_zero(&eb->array));
+
if (unlikely(err))
goto err_skip;
- eb->exec = NULL;
-
/* Unconditionally flush any chipset caches (for streaming writes). */
intel_gt_chipset_flush(eb->engine->gt);
return 0;
@@ -1784,7 +2047,7 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
dma_resv_add_excl_fence(shadow->resv, &pw->base.dma);
dma_resv_unlock(shadow->resv);
- dma_fence_work_commit(&pw->base);
+ dma_fence_work_commit_imm(&pw->base);
return 0;
err_batch_unlock:
@@ -1804,7 +2067,7 @@ err_free:
static int eb_parse(struct i915_execbuffer *eb)
{
struct drm_i915_private *i915 = eb->i915;
- struct intel_engine_pool_node *pool;
+ struct intel_gt_buffer_pool_node *pool;
struct i915_vma *shadow, *trampoline;
unsigned int len;
int err;
@@ -1827,7 +2090,7 @@ static int eb_parse(struct i915_execbuffer *eb)
len += I915_CMD_PARSER_TRAMPOLINE_SIZE;
}
- pool = intel_engine_get_pool(eb->engine, len);
+ pool = intel_gt_get_buffer_pool(eb->engine->gt, len);
if (IS_ERR(pool))
return PTR_ERR(pool);
@@ -1861,6 +2124,7 @@ static int eb_parse(struct i915_execbuffer *eb)
eb->vma[eb->buffer_count].vma = i915_vma_get(shadow);
eb->vma[eb->buffer_count].flags = __EXEC_OBJECT_HAS_PIN;
eb->batch = &eb->vma[eb->buffer_count++];
+ eb->vma[eb->buffer_count].vma = NULL;
eb->trampoline = trampoline;
eb->batch_start_offset = 0;
@@ -1874,7 +2138,7 @@ err_trampoline:
err_shadow:
i915_vma_unpin(shadow);
err:
- intel_engine_pool_put(pool);
+ intel_gt_buffer_pool_put(pool);
return err;
}
@@ -2318,39 +2582,13 @@ static void eb_request_add(struct i915_execbuffer *eb)
/* Check that the context wasn't destroyed before submission */
if (likely(!intel_context_is_closed(eb->context))) {
attr = eb->gem_context->sched;
-
- /*
- * Boost actual workloads past semaphores!
- *
- * With semaphores we spin on one engine waiting for another,
- * simply to reduce the latency of starting our work when
- * the signaler completes. However, if there is any other
- * work that we could be doing on this engine instead, that
- * is better utilisation and will reduce the overall duration
- * of the current work. To avoid PI boosting a semaphore
- * far in the distance past over useful work, we keep a history
- * of any semaphore use along our dependency chain.
- */
- if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
- attr.priority |= I915_PRIORITY_NOSEMAPHORE;
-
- /*
- * Boost priorities to new clients (new request flows).
- *
- * Allow interactive/synchronous clients to jump ahead of
- * the bulk clients. (FQ_CODEL)
- */
- if (list_empty(&rq->sched.signalers_list))
- attr.priority |= I915_PRIORITY_WAIT;
} else {
/* Serialise with context_close via the add_to_timeline */
i915_request_set_error_once(rq, -ENOENT);
__i915_request_skip(rq);
}
- local_bh_disable();
__i915_request_queue(rq, &attr);
- local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
/* Try to clean up the client's timeline after submitting the request */
if (prev)
@@ -2369,7 +2607,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
struct drm_i915_private *i915 = to_i915(dev);
struct i915_execbuffer eb;
struct dma_fence *in_fence = NULL;
- struct dma_fence *exec_fence = NULL;
struct sync_file *out_fence = NULL;
struct i915_vma *batch;
int out_fence_fd = -1;
@@ -2386,8 +2623,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
args->flags |= __EXEC_HAS_RELOC;
eb.exec = exec;
- eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1);
- eb.vma[0].vma = NULL;
eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
reloc_cache_init(&eb.reloc_cache, eb.i915);
@@ -2414,30 +2649,22 @@ i915_gem_do_execbuffer(struct drm_device *dev,
if (args->flags & I915_EXEC_IS_PINNED)
eb.batch_flags |= I915_DISPATCH_PINNED;
- if (args->flags & I915_EXEC_FENCE_IN) {
+#define IN_FENCES (I915_EXEC_FENCE_IN | I915_EXEC_FENCE_SUBMIT)
+ if (args->flags & IN_FENCES) {
+ if ((args->flags & IN_FENCES) == IN_FENCES)
+ return -EINVAL;
+
in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
if (!in_fence)
return -EINVAL;
}
-
- if (args->flags & I915_EXEC_FENCE_SUBMIT) {
- if (in_fence) {
- err = -EINVAL;
- goto err_in_fence;
- }
-
- exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
- if (!exec_fence) {
- err = -EINVAL;
- goto err_in_fence;
- }
- }
+#undef IN_FENCES
if (args->flags & I915_EXEC_FENCE_OUT) {
out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
if (out_fence_fd < 0) {
err = out_fence_fd;
- goto err_exec_fence;
+ goto err_in_fence;
}
}
@@ -2528,14 +2755,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
}
if (in_fence) {
- err = i915_request_await_dma_fence(eb.request, in_fence);
- if (err < 0)
- goto err_request;
- }
-
- if (exec_fence) {
- err = i915_request_await_execution(eb.request, exec_fence,
- eb.engine->bond_execute);
+ if (args->flags & I915_EXEC_FENCE_SUBMIT)
+ err = i915_request_await_execution(eb.request,
+ in_fence,
+ eb.engine->bond_execute);
+ else
+ err = i915_request_await_dma_fence(eb.request,
+ in_fence);
if (err < 0)
goto err_request;
}
@@ -2563,7 +2789,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
*/
eb.request->batch = batch;
if (batch->private)
- intel_engine_pool_mark_active(batch->private, eb.request);
+ intel_gt_buffer_pool_mark_active(batch->private, eb.request);
trace_i915_request_queue(eb.request, eb.batch_flags);
err = eb_submit(&eb, batch);
@@ -2592,10 +2818,8 @@ err_batch_unpin:
i915_vma_unpin(batch);
err_parse:
if (batch->private)
- intel_engine_pool_put(batch->private);
+ intel_gt_buffer_pool_put(batch->private);
err_vma:
- if (eb.exec)
- eb_release_vmas(&eb);
if (eb.trampoline)
i915_vma_unpin(eb.trampoline);
eb_unpin_engine(&eb);
@@ -2606,8 +2830,6 @@ err_destroy:
err_out_fence:
if (out_fence_fd != -1)
put_unused_fd(out_fence_fd);
-err_exec_fence:
- dma_fence_put(exec_fence);
err_in_fence:
dma_fence_put(in_fence);
return err;
@@ -2615,7 +2837,7 @@ err_in_fence:
static size_t eb_element_size(void)
{
- return sizeof(struct drm_i915_gem_exec_object2) + sizeof(struct eb_vma);
+ return sizeof(struct drm_i915_gem_exec_object2);
}
static bool check_buffer_count(size_t count)
@@ -2671,7 +2893,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
/* Copy in the exec list from userland */
exec_list = kvmalloc_array(count, sizeof(*exec_list),
__GFP_NOWARN | GFP_KERNEL);
- exec2_list = kvmalloc_array(count + 1, eb_element_size(),
+ exec2_list = kvmalloc_array(count, eb_element_size(),
__GFP_NOWARN | GFP_KERNEL);
if (exec_list == NULL || exec2_list == NULL) {
drm_dbg(&i915->drm,
@@ -2749,8 +2971,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
if (err)
return err;
- /* Allocate an extra slot for use by the command parser */
- exec2_list = kvmalloc_array(count + 1, eb_element_size(),
+ exec2_list = kvmalloc_array(count, eb_element_size(),
__GFP_NOWARN | GFP_KERNEL);
if (exec2_list == NULL) {
drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n",
@@ -2818,3 +3039,7 @@ end:;
kvfree(exec2_list);
return err;
}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_gem_execbuffer.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_fence.c b/drivers/gpu/drm/i915/gem/i915_gem_fence.c
index 2f6100ec2608..8ab842c80f99 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_fence.c
@@ -72,8 +72,8 @@ i915_gem_object_lock_fence(struct drm_i915_gem_object *obj)
0, 0);
if (i915_sw_fence_await_reservation(&stub->chain,
- obj->base.resv, NULL,
- true, I915_FENCE_TIMEOUT,
+ obj->base.resv, NULL, true,
+ i915_fence_timeout(to_i915(obj->base.dev)),
I915_FENCE_GFP) < 0)
goto err;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index b39c24dae64e..70f5f82da288 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -396,6 +396,38 @@ err:
return i915_error_to_vmf_fault(ret);
}
+static int
+vm_access(struct vm_area_struct *area, unsigned long addr,
+ void *buf, int len, int write)
+{
+ struct i915_mmap_offset *mmo = area->vm_private_data;
+ struct drm_i915_gem_object *obj = mmo->obj;
+ void *vaddr;
+
+ if (i915_gem_object_is_readonly(obj) && write)
+ return -EACCES;
+
+ addr -= area->vm_start;
+ if (addr >= obj->base.size)
+ return -EINVAL;
+
+ /* As this is primarily for debugging, let's focus on simplicity */
+ vaddr = i915_gem_object_pin_map(obj, I915_MAP_FORCE_WC);
+ if (IS_ERR(vaddr))
+ return PTR_ERR(vaddr);
+
+ if (write) {
+ memcpy(vaddr + addr, buf, len);
+ __i915_gem_object_flush_map(obj, addr, len);
+ } else {
+ memcpy(buf, vaddr + addr, len);
+ }
+
+ i915_gem_object_unpin_map(obj);
+
+ return len;
+}
+
void __i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj)
{
struct i915_vma *vma;
@@ -745,12 +777,14 @@ static void vm_close(struct vm_area_struct *vma)
static const struct vm_operations_struct vm_ops_gtt = {
.fault = vm_fault_gtt,
+ .access = vm_access,
.open = vm_open,
.close = vm_close,
};
static const struct vm_operations_struct vm_ops_cpu = {
.fault = vm_fault_cpu,
+ .access = vm_access,
.open = vm_open,
.close = vm_close,
};
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 5da9f9e534b9..99356c00c19e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -135,9 +135,7 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
if (vma) {
GEM_BUG_ON(vma->obj != obj);
GEM_BUG_ON(!atomic_read(&vma->open_count));
- if (atomic_dec_and_test(&vma->open_count) &&
- !i915_vma_is_ggtt(vma))
- i915_vma_close(vma);
+ i915_vma_close(vma);
}
mutex_unlock(&ctx->mutex);
@@ -164,9 +162,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
struct llist_node *freed)
{
struct drm_i915_gem_object *obj, *on;
- intel_wakeref_t wakeref;
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
llist_for_each_entry_safe(obj, on, freed, freed) {
struct i915_mmap_offset *mmo, *mn;
@@ -206,7 +202,6 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
}
obj->mmo.offsets = RB_ROOT;
- GEM_BUG_ON(atomic_read(&obj->bind_count));
GEM_BUG_ON(obj->userfault_count);
GEM_BUG_ON(!list_empty(&obj->lut_list));
@@ -227,7 +222,6 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
cond_resched();
}
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
}
void i915_gem_flush_free_objects(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
index e00792158f13..f457d7130491 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
@@ -6,8 +6,8 @@
#include "i915_drv.h"
#include "gt/intel_context.h"
#include "gt/intel_engine_pm.h"
-#include "gt/intel_engine_pool.h"
#include "gt/intel_gt.h"
+#include "gt/intel_gt_buffer_pool.h"
#include "gt/intel_ring.h"
#include "i915_gem_clflush.h"
#include "i915_gem_object_blt.h"
@@ -18,7 +18,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
{
struct drm_i915_private *i915 = ce->vm->i915;
const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
- struct intel_engine_pool_node *pool;
+ struct intel_gt_buffer_pool_node *pool;
struct i915_vma *batch;
u64 offset;
u64 count;
@@ -33,7 +33,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
count = div_u64(round_up(vma->size, block_size), block_size);
size = (1 + 8 * count) * sizeof(u32);
size = round_up(size, PAGE_SIZE);
- pool = intel_engine_get_pool(ce->engine, size);
+ pool = intel_gt_get_buffer_pool(ce->engine->gt, size);
if (IS_ERR(pool)) {
err = PTR_ERR(pool);
goto out_pm;
@@ -78,10 +78,12 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
} while (rem);
*cmd = MI_BATCH_BUFFER_END;
- intel_gt_chipset_flush(ce->vm->gt);
+ i915_gem_object_flush_map(pool->obj);
i915_gem_object_unpin_map(pool->obj);
+ intel_gt_chipset_flush(ce->vm->gt);
+
batch = i915_vma_instance(pool->obj, ce->vm, NULL);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
@@ -96,7 +98,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
return batch;
out_put:
- intel_engine_pool_put(pool);
+ intel_gt_buffer_pool_put(pool);
out_pm:
intel_engine_pm_put(ce->engine);
return ERR_PTR(err);
@@ -114,13 +116,13 @@ int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq)
if (unlikely(err))
return err;
- return intel_engine_pool_mark_active(vma->private, rq);
+ return intel_gt_buffer_pool_mark_active(vma->private, rq);
}
void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma)
{
i915_vma_unpin(vma);
- intel_engine_pool_put(vma->private);
+ intel_gt_buffer_pool_put(vma->private);
intel_engine_pm_put(ce->engine);
}
@@ -213,7 +215,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
{
struct drm_i915_private *i915 = ce->vm->i915;
const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
- struct intel_engine_pool_node *pool;
+ struct intel_gt_buffer_pool_node *pool;
struct i915_vma *batch;
u64 src_offset, dst_offset;
u64 count, rem;
@@ -228,7 +230,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
count = div_u64(round_up(dst->size, block_size), block_size);
size = (1 + 11 * count) * sizeof(u32);
size = round_up(size, PAGE_SIZE);
- pool = intel_engine_get_pool(ce->engine, size);
+ pool = intel_gt_get_buffer_pool(ce->engine->gt, size);
if (IS_ERR(pool)) {
err = PTR_ERR(pool);
goto out_pm;
@@ -289,10 +291,12 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
} while (rem);
*cmd = MI_BATCH_BUFFER_END;
- intel_gt_chipset_flush(ce->vm->gt);
+ i915_gem_object_flush_map(pool->obj);
i915_gem_object_unpin_map(pool->obj);
+ intel_gt_chipset_flush(ce->vm->gt);
+
batch = i915_vma_instance(pool->obj, ce->vm, NULL);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
@@ -307,7 +311,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
return batch;
out_put:
- intel_engine_pool_put(pool);
+ intel_gt_buffer_pool_put(pool);
out_pm:
intel_engine_pm_put(ce->engine);
return ERR_PTR(err);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
index 243a43a87824..8bcd336a90dc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
@@ -10,7 +10,6 @@
#include "gt/intel_context.h"
#include "gt/intel_engine_pm.h"
-#include "gt/intel_engine_pool.h"
#include "i915_vma.h"
struct drm_i915_gem_object;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index a0b10bcd8d8a..54ee658bb168 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -179,9 +179,6 @@ struct drm_i915_gem_object {
#define TILING_MASK (FENCE_MINIMUM_STRIDE - 1)
#define STRIDE_MASK (~TILING_MASK)
- /** Count of VMA actually bound by this object */
- atomic_t bind_count;
-
struct {
/*
* Protects the pages and their use. Do not use directly, but
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 24f4cadea114..af9e48ee4a33 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -199,8 +199,6 @@ int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
if (i915_gem_object_has_pinned_pages(obj))
return -EBUSY;
- GEM_BUG_ON(atomic_read(&obj->bind_count));
-
/* May be called by shrinker from within get_pages() (on another bo) */
mutex_lock(&obj->mm.lock);
if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
@@ -393,6 +391,7 @@ void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
GEM_BUG_ON(range_overflows_t(typeof(obj->base.size),
offset, size, obj->base.size));
+ wmb(); /* let all previous writes be visible to coherent partners */
obj->mm.dirty = true;
if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
index 698e22420dc5..7fe9831aa9ba 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -10,8 +10,6 @@
#include <drm/drm.h> /* for drm_legacy.h! */
#include <drm/drm_cache.h>
-#include <drm/drm_legacy.h> /* for drm_pci.h! */
-#include <drm/drm_pci.h>
#include "gt/intel_gt.h"
#include "i915_drv.h"
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index 03e5eb4c99d1..5b65ce738b16 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -27,18 +27,6 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
return false;
/*
- * Only report true if by unbinding the object and putting its pages
- * we can actually make forward progress towards freeing physical
- * pages.
- *
- * If the pages are pinned for any other reason than being bound
- * to the GPU, simply unbinding from the GPU is not going to succeed
- * in releasing our pin count on the pages themselves.
- */
- if (atomic_read(&obj->mm.pages_pin_count) > atomic_read(&obj->bind_count))
- return false;
-
- /*
* We can only return physical pages to the system if we can either
* discard the contents (because the user has marked them as being
* purgeable) or if we can move their contents out to swap.
@@ -54,6 +42,8 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj,
flags = 0;
if (shrink & I915_SHRINK_ACTIVE)
flags = I915_GEM_OBJECT_UNBIND_ACTIVE;
+ if (!(shrink & I915_SHRINK_BOUND))
+ flags = I915_GEM_OBJECT_UNBIND_TEST;
if (i915_gem_object_unbind(obj, flags) == 0)
__i915_gem_object_put_pages(obj);
@@ -194,10 +184,6 @@ i915_gem_shrink(struct drm_i915_private *i915,
i915_gem_object_is_framebuffer(obj))
continue;
- if (!(shrink & I915_SHRINK_BOUND) &&
- atomic_read(&obj->bind_count))
- continue;
-
if (!can_release_pages(obj))
continue;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 5557dfa83a7b..dc250278bd2c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -381,14 +381,14 @@ static int i915_gem_init_stolen(struct drm_i915_private *i915)
mutex_init(&i915->mm.stolen_lock);
if (intel_vgpu_active(i915)) {
- dev_notice(i915->drm.dev,
+ drm_notice(&i915->drm,
"%s, disabling use of stolen memory\n",
"iGVT-g active");
return 0;
}
if (intel_vtd_active() && INTEL_GEN(i915) < 8) {
- dev_notice(i915->drm.dev,
+ drm_notice(&i915->drm,
"%s, disabling use of stolen memory\n",
"DMAR active");
return 0;
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
index fa16f2c3f3ac..2b46c6530da9 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
@@ -88,8 +88,7 @@ static void huge_put_pages(struct drm_i915_gem_object *obj,
}
static const struct drm_i915_gem_object_ops huge_ops = {
- .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
- I915_GEM_OBJECT_IS_SHRINKABLE,
+ .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE,
.get_pages = huge_get_pages,
.put_pages = huge_put_pages,
};
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index d4f94ca9ae0d..c9988b6d5c88 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -421,7 +421,7 @@ static int igt_mock_exhaust_device_supported_pages(void *arg)
err = i915_vma_pin(vma, 0, 0, PIN_USER);
if (err)
- goto out_close;
+ goto out_put;
err = igt_check_page_sizes(vma);
@@ -432,8 +432,6 @@ static int igt_mock_exhaust_device_supported_pages(void *arg)
}
i915_vma_unpin(vma);
- i915_vma_close(vma);
-
i915_gem_object_put(obj);
if (err)
@@ -443,8 +441,6 @@ static int igt_mock_exhaust_device_supported_pages(void *arg)
goto out_device;
-out_close:
- i915_vma_close(vma);
out_put:
i915_gem_object_put(obj);
out_device:
@@ -492,7 +488,7 @@ static int igt_mock_memory_region_huge_pages(void *arg)
err = i915_vma_pin(vma, 0, 0, PIN_USER);
if (err)
- goto out_close;
+ goto out_put;
err = igt_check_page_sizes(vma);
if (err)
@@ -515,8 +511,6 @@ static int igt_mock_memory_region_huge_pages(void *arg)
}
i915_vma_unpin(vma);
- i915_vma_close(vma);
-
__i915_gem_object_put_pages(obj);
i915_gem_object_put(obj);
}
@@ -526,8 +520,6 @@ static int igt_mock_memory_region_huge_pages(void *arg)
out_unpin:
i915_vma_unpin(vma);
-out_close:
- i915_vma_close(vma);
out_put:
i915_gem_object_put(obj);
out_region:
@@ -587,10 +579,8 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg)
}
err = i915_vma_pin(vma, 0, 0, flags);
- if (err) {
- i915_vma_close(vma);
+ if (err)
goto out_unpin;
- }
err = igt_check_page_sizes(vma);
@@ -603,10 +593,8 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg)
i915_vma_unpin(vma);
- if (err) {
- i915_vma_close(vma);
+ if (err)
goto out_unpin;
- }
/*
* Try all the other valid offsets until the next
@@ -615,16 +603,12 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg)
*/
for (offset = 4096; offset < page_size; offset += 4096) {
err = i915_vma_unbind(vma);
- if (err) {
- i915_vma_close(vma);
+ if (err)
goto out_unpin;
- }
err = i915_vma_pin(vma, 0, 0, flags | offset);
- if (err) {
- i915_vma_close(vma);
+ if (err)
goto out_unpin;
- }
err = igt_check_page_sizes(vma);
@@ -636,10 +620,8 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg)
i915_vma_unpin(vma);
- if (err) {
- i915_vma_close(vma);
+ if (err)
goto out_unpin;
- }
if (igt_timeout(end_time,
"%s timed out at offset %x with page-size %x\n",
@@ -647,8 +629,6 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg)
break;
}
- i915_vma_close(vma);
-
i915_gem_object_unpin_pages(obj);
__i915_gem_object_put_pages(obj);
i915_gem_object_put(obj);
@@ -670,12 +650,6 @@ static void close_object_list(struct list_head *objects,
struct drm_i915_gem_object *obj, *on;
list_for_each_entry_safe(obj, on, objects, st_link) {
- struct i915_vma *vma;
-
- vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
- if (!IS_ERR(vma))
- i915_vma_close(vma);
-
list_del(&obj->st_link);
i915_gem_object_unpin_pages(obj);
__i915_gem_object_put_pages(obj);
@@ -912,7 +886,7 @@ static int igt_mock_ppgtt_64K(void *arg)
err = i915_vma_pin(vma, 0, 0, flags);
if (err)
- goto out_vma_close;
+ goto out_object_unpin;
err = igt_check_page_sizes(vma);
if (err)
@@ -945,8 +919,6 @@ static int igt_mock_ppgtt_64K(void *arg)
}
i915_vma_unpin(vma);
- i915_vma_close(vma);
-
i915_gem_object_unpin_pages(obj);
__i915_gem_object_put_pages(obj);
i915_gem_object_put(obj);
@@ -957,8 +929,6 @@ static int igt_mock_ppgtt_64K(void *arg)
out_vma_unpin:
i915_vma_unpin(vma);
-out_vma_close:
- i915_vma_close(vma);
out_object_unpin:
i915_gem_object_unpin_pages(obj);
out_object_put:
@@ -1070,7 +1040,7 @@ static int __igt_write_huge(struct intel_context *ce,
err = i915_vma_unbind(vma);
if (err)
- goto out_vma_close;
+ return err;
err = i915_vma_pin(vma, size, 0, flags | offset);
if (err) {
@@ -1081,7 +1051,7 @@ static int __igt_write_huge(struct intel_context *ce,
if (err == -ENOSPC && i915_is_ggtt(ce->vm))
err = 0;
- goto out_vma_close;
+ return err;
}
err = igt_check_page_sizes(vma);
@@ -1102,8 +1072,6 @@ static int __igt_write_huge(struct intel_context *ce,
out_vma_unpin:
i915_vma_unpin(vma);
-out_vma_close:
- __i915_vma_put(vma);
return err;
}
@@ -1490,7 +1458,7 @@ static int igt_ppgtt_pin_update(void *arg)
err = i915_vma_pin(vma, SZ_2M, 0, flags);
if (err)
- goto out_close;
+ goto out_put;
if (vma->page_sizes.sg < page_size) {
pr_info("Unable to allocate page-size %x, finishing test early\n",
@@ -1527,8 +1495,6 @@ static int igt_ppgtt_pin_update(void *arg)
goto out_unpin;
i915_vma_unpin(vma);
- i915_vma_close(vma);
-
i915_gem_object_put(obj);
}
@@ -1546,7 +1512,7 @@ static int igt_ppgtt_pin_update(void *arg)
err = i915_vma_pin(vma, 0, 0, flags);
if (err)
- goto out_close;
+ goto out_put;
/*
* Make sure we don't end up with something like where the pde is still
@@ -1576,8 +1542,6 @@ static int igt_ppgtt_pin_update(void *arg)
out_unpin:
i915_vma_unpin(vma);
-out_close:
- i915_vma_close(vma);
out_put:
i915_gem_object_put(obj);
out_vm:
@@ -1629,13 +1593,11 @@ static int igt_tmpfs_fallback(void *arg)
err = i915_vma_pin(vma, 0, 0, PIN_USER);
if (err)
- goto out_close;
+ goto out_put;
err = igt_check_page_sizes(vma);
i915_vma_unpin(vma);
-out_close:
- i915_vma_close(vma);
out_put:
i915_gem_object_put(obj);
out_restore:
@@ -1682,7 +1644,7 @@ static int igt_shrink_thp(void *arg)
err = i915_vma_pin(vma, 0, 0, flags);
if (err)
- goto out_close;
+ goto out_put;
if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) {
pr_info("failed to allocate THP, finishing test early\n");
@@ -1706,7 +1668,7 @@ static int igt_shrink_thp(void *arg)
i915_gem_context_unlock_engines(ctx);
i915_vma_unpin(vma);
if (err)
- goto out_close;
+ goto out_put;
/*
* Now that the pages are *unpinned* shrink-all should invoke
@@ -1716,18 +1678,18 @@ static int igt_shrink_thp(void *arg)
if (i915_gem_object_has_pages(obj)) {
pr_err("shrink-all didn't truncate the pages\n");
err = -EINVAL;
- goto out_close;
+ goto out_put;
}
if (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys) {
pr_err("residual page-size bits left\n");
err = -EINVAL;
- goto out_close;
+ goto out_put;
}
err = i915_vma_pin(vma, 0, 0, flags);
if (err)
- goto out_close;
+ goto out_put;
while (n--) {
err = cpu_check(obj, n, 0xdeadbeaf);
@@ -1737,8 +1699,6 @@ static int igt_shrink_thp(void *arg)
out_unpin:
i915_vma_unpin(vma);
-out_close:
- i915_vma_close(vma);
out_put:
i915_gem_object_put(obj);
out_vm:
@@ -1777,21 +1737,20 @@ int i915_gem_huge_page_mock_selftests(void)
if (!i915_vm_is_4lvl(&ppgtt->vm)) {
pr_err("failed to create 48b PPGTT\n");
err = -EINVAL;
- goto out_close;
+ goto out_put;
}
/* If we were ever hit this then it's time to mock the 64K scratch */
if (!i915_vm_has_scratch_64K(&ppgtt->vm)) {
pr_err("PPGTT missing 64K scratch page\n");
err = -EINVAL;
- goto out_close;
+ goto out_put;
}
err = i915_subtests(tests, ppgtt);
-out_close:
+out_put:
i915_vm_put(&ppgtt->vm);
-
out_unlock:
drm_dev_put(&dev_priv->drm);
return err;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index b972be165e85..8fe3ad2ee34e 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -7,9 +7,12 @@
#include "gt/intel_engine_user.h"
#include "gt/intel_gt.h"
+#include "gt/intel_gpu_commands.h"
+#include "gem/i915_gem_lmem.h"
#include "selftests/igt_flush_test.h"
#include "selftests/mock_drm.h"
+#include "selftests/i915_random.h"
#include "huge_gem_object.h"
#include "mock_context.h"
@@ -127,10 +130,573 @@ static int igt_client_fill(void *arg)
} while (1);
}
+#define WIDTH 512
+#define HEIGHT 32
+
+struct blit_buffer {
+ struct i915_vma *vma;
+ u32 start_val;
+ u32 tiling;
+};
+
+struct tiled_blits {
+ struct intel_context *ce;
+ struct blit_buffer buffers[3];
+ struct blit_buffer scratch;
+ struct i915_vma *batch;
+ u64 hole;
+ u32 width;
+ u32 height;
+};
+
+static int prepare_blit(const struct tiled_blits *t,
+ struct blit_buffer *dst,
+ struct blit_buffer *src,
+ struct drm_i915_gem_object *batch)
+{
+ const int gen = INTEL_GEN(to_i915(batch->base.dev));
+ bool use_64b_reloc = gen >= 8;
+ u32 src_pitch, dst_pitch;
+ u32 cmd, *cs;
+
+ cs = i915_gem_object_pin_map(batch, I915_MAP_WC);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(BCS_SWCTRL);
+ cmd = (BCS_SRC_Y | BCS_DST_Y) << 16;
+ if (src->tiling == I915_TILING_Y)
+ cmd |= BCS_SRC_Y;
+ if (dst->tiling == I915_TILING_Y)
+ cmd |= BCS_DST_Y;
+ *cs++ = cmd;
+
+ cmd = MI_FLUSH_DW;
+ if (gen >= 8)
+ cmd++;
+ *cs++ = cmd;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+
+ cmd = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (8 - 2);
+ if (gen >= 8)
+ cmd += 2;
+
+ src_pitch = t->width * 4;
+ if (src->tiling) {
+ cmd |= XY_SRC_COPY_BLT_SRC_TILED;
+ src_pitch /= 4;
+ }
+
+ dst_pitch = t->width * 4;
+ if (dst->tiling) {
+ cmd |= XY_SRC_COPY_BLT_DST_TILED;
+ dst_pitch /= 4;
+ }
+
+ *cs++ = cmd;
+ *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | dst_pitch;
+ *cs++ = 0;
+ *cs++ = t->height << 16 | t->width;
+ *cs++ = lower_32_bits(dst->vma->node.start);
+ if (use_64b_reloc)
+ *cs++ = upper_32_bits(dst->vma->node.start);
+ *cs++ = 0;
+ *cs++ = src_pitch;
+ *cs++ = lower_32_bits(src->vma->node.start);
+ if (use_64b_reloc)
+ *cs++ = upper_32_bits(src->vma->node.start);
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ i915_gem_object_flush_map(batch);
+ i915_gem_object_unpin_map(batch);
+
+ return 0;
+}
+
+static void tiled_blits_destroy_buffers(struct tiled_blits *t)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(t->buffers); i++)
+ i915_vma_put(t->buffers[i].vma);
+
+ i915_vma_put(t->scratch.vma);
+ i915_vma_put(t->batch);
+}
+
+static struct i915_vma *
+__create_vma(struct tiled_blits *t, size_t size, bool lmem)
+{
+ struct drm_i915_private *i915 = t->ce->vm->i915;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+
+ if (lmem)
+ obj = i915_gem_object_create_lmem(i915, size, 0);
+ else
+ obj = i915_gem_object_create_shmem(i915, size);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ vma = i915_vma_instance(obj, t->ce->vm, NULL);
+ if (IS_ERR(vma))
+ i915_gem_object_put(obj);
+
+ return vma;
+}
+
+static struct i915_vma *create_vma(struct tiled_blits *t, bool lmem)
+{
+ return __create_vma(t, PAGE_ALIGN(t->width * t->height * 4), lmem);
+}
+
+static int tiled_blits_create_buffers(struct tiled_blits *t,
+ int width, int height,
+ struct rnd_state *prng)
+{
+ struct drm_i915_private *i915 = t->ce->engine->i915;
+ int i;
+
+ t->width = width;
+ t->height = height;
+
+ t->batch = __create_vma(t, PAGE_SIZE, false);
+ if (IS_ERR(t->batch))
+ return PTR_ERR(t->batch);
+
+ t->scratch.vma = create_vma(t, false);
+ if (IS_ERR(t->scratch.vma)) {
+ i915_vma_put(t->batch);
+ return PTR_ERR(t->scratch.vma);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(t->buffers); i++) {
+ struct i915_vma *vma;
+
+ vma = create_vma(t, HAS_LMEM(i915) && i % 2);
+ if (IS_ERR(vma)) {
+ tiled_blits_destroy_buffers(t);
+ return PTR_ERR(vma);
+ }
+
+ t->buffers[i].vma = vma;
+ t->buffers[i].tiling =
+ i915_prandom_u32_max_state(I915_TILING_Y + 1, prng);
+ }
+
+ return 0;
+}
+
+static void fill_scratch(struct tiled_blits *t, u32 *vaddr, u32 val)
+{
+ int i;
+
+ t->scratch.start_val = val;
+ for (i = 0; i < t->width * t->height; i++)
+ vaddr[i] = val++;
+
+ i915_gem_object_flush_map(t->scratch.vma->obj);
+}
+
+static u64 swizzle_bit(unsigned int bit, u64 offset)
+{
+ return (offset & BIT_ULL(bit)) >> (bit - 6);
+}
+
+static u64 tiled_offset(const struct intel_gt *gt,
+ u64 v,
+ unsigned int stride,
+ unsigned int tiling)
+{
+ unsigned int swizzle;
+ u64 x, y;
+
+ if (tiling == I915_TILING_NONE)
+ return v;
+
+ y = div64_u64_rem(v, stride, &x);
+
+ if (tiling == I915_TILING_X) {
+ v = div64_u64_rem(y, 8, &y) * stride * 8;
+ v += y * 512;
+ v += div64_u64_rem(x, 512, &x) << 12;
+ v += x;
+
+ swizzle = gt->ggtt->bit_6_swizzle_x;
+ } else {
+ const unsigned int ytile_span = 16;
+ const unsigned int ytile_height = 512;
+
+ v = div64_u64_rem(y, 32, &y) * stride * 32;
+ v += y * ytile_span;
+ v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
+ v += x;
+
+ swizzle = gt->ggtt->bit_6_swizzle_y;
+ }
+
+ switch (swizzle) {
+ case I915_BIT_6_SWIZZLE_9:
+ v ^= swizzle_bit(9, v);
+ break;
+ case I915_BIT_6_SWIZZLE_9_10:
+ v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v);
+ break;
+ case I915_BIT_6_SWIZZLE_9_11:
+ v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v);
+ break;
+ case I915_BIT_6_SWIZZLE_9_10_11:
+ v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v);
+ break;
+ }
+
+ return v;
+}
+
+static const char *repr_tiling(int tiling)
+{
+ switch (tiling) {
+ case I915_TILING_NONE: return "linear";
+ case I915_TILING_X: return "X";
+ case I915_TILING_Y: return "Y";
+ default: return "unknown";
+ }
+}
+
+static int verify_buffer(const struct tiled_blits *t,
+ struct blit_buffer *buf,
+ struct rnd_state *prng)
+{
+ const u32 *vaddr;
+ int ret = 0;
+ int x, y, p;
+
+ x = i915_prandom_u32_max_state(t->width, prng);
+ y = i915_prandom_u32_max_state(t->height, prng);
+ p = y * t->width + x;
+
+ vaddr = i915_gem_object_pin_map(buf->vma->obj, I915_MAP_WC);
+ if (IS_ERR(vaddr))
+ return PTR_ERR(vaddr);
+
+ if (vaddr[0] != buf->start_val) {
+ ret = -EINVAL;
+ } else {
+ u64 v = tiled_offset(buf->vma->vm->gt,
+ p * 4, t->width * 4,
+ buf->tiling);
+
+ if (vaddr[v / sizeof(*vaddr)] != buf->start_val + p)
+ ret = -EINVAL;
+ }
+ if (ret) {
+ pr_err("Invalid %s tiling detected at (%d, %d), start_val %x\n",
+ repr_tiling(buf->tiling),
+ x, y, buf->start_val);
+ igt_hexdump(vaddr, 4096);
+ }
+
+ i915_gem_object_unpin_map(buf->vma->obj);
+ return ret;
+}
+
+static int move_to_active(struct i915_vma *vma,
+ struct i915_request *rq,
+ unsigned int flags)
+{
+ int err;
+
+ i915_vma_lock(vma);
+ err = i915_request_await_object(rq, vma->obj, false);
+ if (err == 0)
+ err = i915_vma_move_to_active(vma, rq, flags);
+ i915_vma_unlock(vma);
+
+ return err;
+}
+
+static int pin_buffer(struct i915_vma *vma, u64 addr)
+{
+ int err;
+
+ if (drm_mm_node_allocated(&vma->node) && vma->node.start != addr) {
+ err = i915_vma_unbind(vma);
+ if (err)
+ return err;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED | addr);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int
+tiled_blit(struct tiled_blits *t,
+ struct blit_buffer *dst, u64 dst_addr,
+ struct blit_buffer *src, u64 src_addr)
+{
+ struct i915_request *rq;
+ int err;
+
+ err = pin_buffer(src->vma, src_addr);
+ if (err) {
+ pr_err("Cannot pin src @ %llx\n", src_addr);
+ return err;
+ }
+
+ err = pin_buffer(dst->vma, dst_addr);
+ if (err) {
+ pr_err("Cannot pin dst @ %llx\n", dst_addr);
+ goto err_src;
+ }
+
+ err = i915_vma_pin(t->batch, 0, 0, PIN_USER | PIN_HIGH);
+ if (err) {
+ pr_err("cannot pin batch\n");
+ goto err_dst;
+ }
+
+ err = prepare_blit(t, dst, src, t->batch->obj);
+ if (err)
+ goto err_bb;
+
+ rq = intel_context_create_request(t->ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_bb;
+ }
+
+ err = move_to_active(t->batch, rq, 0);
+ if (!err)
+ err = move_to_active(src->vma, rq, 0);
+ if (!err)
+ err = move_to_active(dst->vma, rq, 0);
+ if (!err)
+ err = rq->engine->emit_bb_start(rq,
+ t->batch->node.start,
+ t->batch->node.size,
+ 0);
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (i915_request_wait(rq, 0, HZ / 2) < 0)
+ err = -ETIME;
+ i915_request_put(rq);
+
+ dst->start_val = src->start_val;
+err_bb:
+ i915_vma_unpin(t->batch);
+err_dst:
+ i915_vma_unpin(dst->vma);
+err_src:
+ i915_vma_unpin(src->vma);
+ return err;
+}
+
+static struct tiled_blits *
+tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng)
+{
+ struct drm_mm_node hole;
+ struct tiled_blits *t;
+ u64 hole_size;
+ int err;
+
+ t = kzalloc(sizeof(*t), GFP_KERNEL);
+ if (!t)
+ return ERR_PTR(-ENOMEM);
+
+ t->ce = intel_context_create(engine);
+ if (IS_ERR(t->ce)) {
+ err = PTR_ERR(t->ce);
+ goto err_free;
+ }
+
+ hole_size = 2 * PAGE_ALIGN(WIDTH * HEIGHT * 4);
+ hole_size *= 2; /* room to maneuver */
+ hole_size += 2 * I915_GTT_MIN_ALIGNMENT;
+
+ mutex_lock(&t->ce->vm->mutex);
+ memset(&hole, 0, sizeof(hole));
+ err = drm_mm_insert_node_in_range(&t->ce->vm->mm, &hole,
+ hole_size, 0, I915_COLOR_UNEVICTABLE,
+ 0, U64_MAX,
+ DRM_MM_INSERT_BEST);
+ if (!err)
+ drm_mm_remove_node(&hole);
+ mutex_unlock(&t->ce->vm->mutex);
+ if (err) {
+ err = -ENODEV;
+ goto err_put;
+ }
+
+ t->hole = hole.start + I915_GTT_MIN_ALIGNMENT;
+ pr_info("Using hole at %llx\n", t->hole);
+
+ err = tiled_blits_create_buffers(t, WIDTH, HEIGHT, prng);
+ if (err)
+ goto err_put;
+
+ return t;
+
+err_put:
+ intel_context_put(t->ce);
+err_free:
+ kfree(t);
+ return ERR_PTR(err);
+}
+
+static void tiled_blits_destroy(struct tiled_blits *t)
+{
+ tiled_blits_destroy_buffers(t);
+
+ intel_context_put(t->ce);
+ kfree(t);
+}
+
+static int tiled_blits_prepare(struct tiled_blits *t,
+ struct rnd_state *prng)
+{
+ u64 offset = PAGE_ALIGN(t->width * t->height * 4);
+ u32 *map;
+ int err;
+ int i;
+
+ map = i915_gem_object_pin_map(t->scratch.vma->obj, I915_MAP_WC);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ /* Use scratch to fill objects */
+ for (i = 0; i < ARRAY_SIZE(t->buffers); i++) {
+ fill_scratch(t, map, prandom_u32_state(prng));
+ GEM_BUG_ON(verify_buffer(t, &t->scratch, prng));
+
+ err = tiled_blit(t,
+ &t->buffers[i], t->hole + offset,
+ &t->scratch, t->hole);
+ if (err == 0)
+ err = verify_buffer(t, &t->buffers[i], prng);
+ if (err) {
+ pr_err("Failed to create buffer %d\n", i);
+ break;
+ }
+ }
+
+ i915_gem_object_unpin_map(t->scratch.vma->obj);
+ return err;
+}
+
+static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng)
+{
+ u64 offset =
+ round_up(t->width * t->height * 4, 2 * I915_GTT_MIN_ALIGNMENT);
+ int err;
+
+ /* We want to check position invariant tiling across GTT eviction */
+
+ err = tiled_blit(t,
+ &t->buffers[1], t->hole + offset / 2,
+ &t->buffers[0], t->hole + 2 * offset);
+ if (err)
+ return err;
+
+ /* Reposition so that we overlap the old addresses, and slightly off */
+ err = tiled_blit(t,
+ &t->buffers[2], t->hole + I915_GTT_MIN_ALIGNMENT,
+ &t->buffers[1], t->hole + 3 * offset / 2);
+ if (err)
+ return err;
+
+ err = verify_buffer(t, &t->buffers[2], prng);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int __igt_client_tiled_blits(struct intel_engine_cs *engine,
+ struct rnd_state *prng)
+{
+ struct tiled_blits *t;
+ int err;
+
+ t = tiled_blits_create(engine, prng);
+ if (IS_ERR(t))
+ return PTR_ERR(t);
+
+ err = tiled_blits_prepare(t, prng);
+ if (err)
+ goto out;
+
+ err = tiled_blits_bounce(t, prng);
+ if (err)
+ goto out;
+
+out:
+ tiled_blits_destroy(t);
+ return err;
+}
+
+static bool has_bit17_swizzle(int sw)
+{
+ return (sw == I915_BIT_6_SWIZZLE_9_10_17 ||
+ sw == I915_BIT_6_SWIZZLE_9_17);
+}
+
+static bool bad_swizzling(struct drm_i915_private *i915)
+{
+ struct i915_ggtt *ggtt = &i915->ggtt;
+
+ if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES)
+ return true;
+
+ if (has_bit17_swizzle(ggtt->bit_6_swizzle_x) ||
+ has_bit17_swizzle(ggtt->bit_6_swizzle_y))
+ return true;
+
+ return false;
+}
+
+static int igt_client_tiled_blits(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ I915_RND_STATE(prng);
+ int inst = 0;
+
+ /* Test requires explicit BLT tiling controls */
+ if (INTEL_GEN(i915) < 4)
+ return 0;
+
+ if (bad_swizzling(i915)) /* Requires sane (sub-page) swizzling */
+ return 0;
+
+ do {
+ struct intel_engine_cs *engine;
+ int err;
+
+ engine = intel_engine_lookup_user(i915,
+ I915_ENGINE_CLASS_COPY,
+ inst++);
+ if (!engine)
+ return 0;
+
+ err = __igt_client_tiled_blits(engine, &prng);
+ if (err == -ENODEV)
+ err = 0;
+ if (err)
+ return err;
+ } while (1);
+}
+
int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(igt_client_fill),
+ SUBTEST(igt_client_tiled_blits),
};
if (intel_gt_is_wedged(&i915->gt))
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 3f6079e1dfb6..87d7d8aa080f 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -158,6 +158,8 @@ static int wc_set(struct context *ctx, unsigned long offset, u32 v)
return PTR_ERR(map);
map[offset / sizeof(*map)] = v;
+
+ __i915_gem_object_flush_map(ctx->obj, offset, sizeof(*map));
i915_gem_object_unpin_map(ctx->obj);
return 0;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 54b86cf7f5d2..b81978890641 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -972,12 +972,6 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
goto err_batch;
}
- err = rq->engine->emit_bb_start(rq,
- batch->node.start, batch->node.size,
- 0);
- if (err)
- goto err_request;
-
i915_vma_lock(batch);
err = i915_request_await_object(rq, batch->obj, false);
if (err == 0)
@@ -994,6 +988,18 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
if (err)
goto skip_request;
+ if (rq->engine->emit_init_breadcrumb) {
+ err = rq->engine->emit_init_breadcrumb(rq);
+ if (err)
+ goto skip_request;
+ }
+
+ err = rq->engine->emit_bb_start(rq,
+ batch->node.start, batch->node.size,
+ 0);
+ if (err)
+ goto skip_request;
+
i915_vma_unpin_and_release(&batch, 0);
i915_vma_unpin(vma);
@@ -1005,7 +1011,6 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
skip_request:
i915_request_set_error_once(rq, err);
-err_request:
i915_request_add(rq);
err_batch:
i915_vma_unpin_and_release(&batch, 0);
@@ -1541,10 +1546,6 @@ static int write_to_scratch(struct i915_gem_context *ctx,
goto err_unpin;
}
- err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0);
- if (err)
- goto err_request;
-
i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, false);
if (err == 0)
@@ -1553,6 +1554,16 @@ static int write_to_scratch(struct i915_gem_context *ctx,
if (err)
goto skip_request;
+ if (rq->engine->emit_init_breadcrumb) {
+ err = rq->engine->emit_init_breadcrumb(rq);
+ if (err)
+ goto skip_request;
+ }
+
+ err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0);
+ if (err)
+ goto skip_request;
+
i915_vma_unpin(vma);
i915_request_add(rq);
@@ -1560,7 +1571,6 @@ static int write_to_scratch(struct i915_gem_context *ctx,
goto out_vm;
skip_request:
i915_request_set_error_once(rq, err);
-err_request:
i915_request_add(rq);
err_unpin:
i915_vma_unpin(vma);
@@ -1674,10 +1684,6 @@ static int read_from_scratch(struct i915_gem_context *ctx,
goto err_unpin;
}
- err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, flags);
- if (err)
- goto err_request;
-
i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, true);
if (err == 0)
@@ -1686,8 +1692,17 @@ static int read_from_scratch(struct i915_gem_context *ctx,
if (err)
goto skip_request;
+ if (rq->engine->emit_init_breadcrumb) {
+ err = rq->engine->emit_init_breadcrumb(rq);
+ if (err)
+ goto skip_request;
+ }
+
+ err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, flags);
+ if (err)
+ goto skip_request;
+
i915_vma_unpin(vma);
- i915_vma_close(vma);
i915_request_add(rq);
@@ -1709,7 +1724,6 @@ static int read_from_scratch(struct i915_gem_context *ctx,
goto out_vm;
skip_request:
i915_request_set_error_once(rq, err);
-err_request:
i915_request_add(rq);
err_unpin:
i915_vma_unpin(vma);
@@ -1925,7 +1939,7 @@ static int mock_context_barrier(void *arg)
goto out;
}
- rq = igt_request_alloc(ctx, i915->engine[RCS0]);
+ rq = igt_request_alloc(ctx, i915->gt.engine[RCS0]);
if (IS_ERR(rq)) {
pr_err("Request allocation failed!\n");
goto out;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
new file mode 100644
index 000000000000..a49016f8ee0d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include "i915_selftest.h"
+
+#include "gt/intel_engine_pm.h"
+#include "selftests/igt_flush_test.h"
+
+static u64 read_reloc(const u32 *map, int x, const u64 mask)
+{
+ u64 reloc;
+
+ memcpy(&reloc, &map[x], sizeof(reloc));
+ return reloc & mask;
+}
+
+static int __igt_gpu_reloc(struct i915_execbuffer *eb,
+ struct drm_i915_gem_object *obj)
+{
+ const unsigned int offsets[] = { 8, 3, 0 };
+ const u64 mask =
+ GENMASK_ULL(eb->reloc_cache.use_64bit_reloc ? 63 : 31, 0);
+ const u32 *map = page_mask_bits(obj->mm.mapping);
+ struct i915_request *rq;
+ struct i915_vma *vma;
+ int err;
+ int i;
+
+ vma = i915_vma_instance(obj, eb->context->vm, NULL);
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
+ if (err)
+ return err;
+
+ /* 8-Byte aligned */
+ if (!__reloc_entry_gpu(eb, vma,
+ offsets[0] * sizeof(u32),
+ 0)) {
+ err = -EIO;
+ goto unpin_vma;
+ }
+
+ /* !8-Byte aligned */
+ if (!__reloc_entry_gpu(eb, vma,
+ offsets[1] * sizeof(u32),
+ 1)) {
+ err = -EIO;
+ goto unpin_vma;
+ }
+
+ /* Skip to the end of the cmd page */
+ i = PAGE_SIZE / sizeof(u32) - RELOC_TAIL - 1;
+ i -= eb->reloc_cache.rq_size;
+ memset32(eb->reloc_cache.rq_cmd + eb->reloc_cache.rq_size,
+ MI_NOOP, i);
+ eb->reloc_cache.rq_size += i;
+
+ /* Force batch chaining */
+ if (!__reloc_entry_gpu(eb, vma,
+ offsets[2] * sizeof(u32),
+ 2)) {
+ err = -EIO;
+ goto unpin_vma;
+ }
+
+ GEM_BUG_ON(!eb->reloc_cache.rq);
+ rq = i915_request_get(eb->reloc_cache.rq);
+ err = reloc_gpu_flush(&eb->reloc_cache);
+ if (err)
+ goto put_rq;
+ GEM_BUG_ON(eb->reloc_cache.rq);
+
+ err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, HZ / 2);
+ if (err) {
+ intel_gt_set_wedged(eb->engine->gt);
+ goto put_rq;
+ }
+
+ if (!i915_request_completed(rq)) {
+ pr_err("%s: did not wait for relocations!\n", eb->engine->name);
+ err = -EINVAL;
+ goto put_rq;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(offsets); i++) {
+ u64 reloc = read_reloc(map, offsets[i], mask);
+
+ if (reloc != i) {
+ pr_err("%s[%d]: map[%d] %llx != %x\n",
+ eb->engine->name, i, offsets[i], reloc, i);
+ err = -EINVAL;
+ }
+ }
+ if (err)
+ igt_hexdump(map, 4096);
+
+put_rq:
+ i915_request_put(rq);
+unpin_vma:
+ i915_vma_unpin(vma);
+ return err;
+}
+
+static int igt_gpu_reloc(void *arg)
+{
+ struct i915_execbuffer eb;
+ struct drm_i915_gem_object *scratch;
+ int err = 0;
+ u32 *map;
+
+ eb.i915 = arg;
+
+ scratch = i915_gem_object_create_internal(eb.i915, 4096);
+ if (IS_ERR(scratch))
+ return PTR_ERR(scratch);
+
+ map = i915_gem_object_pin_map(scratch, I915_MAP_WC);
+ if (IS_ERR(map)) {
+ err = PTR_ERR(map);
+ goto err_scratch;
+ }
+
+ for_each_uabi_engine(eb.engine, eb.i915) {
+ reloc_cache_init(&eb.reloc_cache, eb.i915);
+ memset(map, POISON_INUSE, 4096);
+
+ intel_engine_pm_get(eb.engine);
+ eb.context = intel_context_create(eb.engine);
+ if (IS_ERR(eb.context)) {
+ err = PTR_ERR(eb.context);
+ goto err_pm;
+ }
+
+ err = intel_context_pin(eb.context);
+ if (err)
+ goto err_put;
+
+ err = __igt_gpu_reloc(&eb, scratch);
+
+ intel_context_unpin(eb.context);
+err_put:
+ intel_context_put(eb.context);
+err_pm:
+ intel_engine_pm_put(eb.engine);
+ if (err)
+ break;
+ }
+
+ if (igt_flush_test(eb.i915))
+ err = -EIO;
+
+err_scratch:
+ i915_gem_object_put(scratch);
+ return err;
+}
+
+int i915_gem_execbuffer_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(igt_gpu_reloc),
+ };
+
+ if (intel_gt_is_wedged(&i915->gt))
+ return 0;
+
+ return i915_live_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 43912e9b683d..9c7402ce5bf9 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -952,6 +952,129 @@ static int igt_mmap(void *arg)
return 0;
}
+static const char *repr_mmap_type(enum i915_mmap_type type)
+{
+ switch (type) {
+ case I915_MMAP_TYPE_GTT: return "gtt";
+ case I915_MMAP_TYPE_WB: return "wb";
+ case I915_MMAP_TYPE_WC: return "wc";
+ case I915_MMAP_TYPE_UC: return "uc";
+ default: return "unknown";
+ }
+}
+
+static bool can_access(const struct drm_i915_gem_object *obj)
+{
+ unsigned int flags =
+ I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_HAS_IOMEM;
+
+ return i915_gem_object_type_has(obj, flags);
+}
+
+static int __igt_mmap_access(struct drm_i915_private *i915,
+ struct drm_i915_gem_object *obj,
+ enum i915_mmap_type type)
+{
+ struct i915_mmap_offset *mmo;
+ unsigned long __user *ptr;
+ unsigned long A, B;
+ unsigned long x, y;
+ unsigned long addr;
+ int err;
+
+ memset(&A, 0xAA, sizeof(A));
+ memset(&B, 0xBB, sizeof(B));
+
+ if (!can_mmap(obj, type) || !can_access(obj))
+ return 0;
+
+ mmo = mmap_offset_attach(obj, type, NULL);
+ if (IS_ERR(mmo))
+ return PTR_ERR(mmo);
+
+ addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED);
+ if (IS_ERR_VALUE(addr))
+ return addr;
+ ptr = (unsigned long __user *)addr;
+
+ err = __put_user(A, ptr);
+ if (err) {
+ pr_err("%s(%s): failed to write into user mmap\n",
+ obj->mm.region->name, repr_mmap_type(type));
+ goto out_unmap;
+ }
+
+ intel_gt_flush_ggtt_writes(&i915->gt);
+
+ err = access_process_vm(current, addr, &x, sizeof(x), 0);
+ if (err != sizeof(x)) {
+ pr_err("%s(%s): access_process_vm() read failed\n",
+ obj->mm.region->name, repr_mmap_type(type));
+ goto out_unmap;
+ }
+
+ err = access_process_vm(current, addr, &B, sizeof(B), FOLL_WRITE);
+ if (err != sizeof(B)) {
+ pr_err("%s(%s): access_process_vm() write failed\n",
+ obj->mm.region->name, repr_mmap_type(type));
+ goto out_unmap;
+ }
+
+ intel_gt_flush_ggtt_writes(&i915->gt);
+
+ err = __get_user(y, ptr);
+ if (err) {
+ pr_err("%s(%s): failed to read from user mmap\n",
+ obj->mm.region->name, repr_mmap_type(type));
+ goto out_unmap;
+ }
+
+ if (x != A || y != B) {
+ pr_err("%s(%s): failed to read/write values, found (%lx, %lx)\n",
+ obj->mm.region->name, repr_mmap_type(type),
+ x, y);
+ err = -EINVAL;
+ goto out_unmap;
+ }
+
+out_unmap:
+ vm_munmap(addr, obj->base.size);
+ return err;
+}
+
+static int igt_mmap_access(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_memory_region *mr;
+ enum intel_region_id id;
+
+ for_each_memory_region(mr, i915, id) {
+ struct drm_i915_gem_object *obj;
+ int err;
+
+ obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0);
+ if (obj == ERR_PTR(-ENODEV))
+ continue;
+
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_GTT);
+ if (err == 0)
+ err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_WB);
+ if (err == 0)
+ err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_WC);
+ if (err == 0)
+ err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_UC);
+
+ i915_gem_object_put(obj);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
static int __igt_mmap_gpu(struct drm_i915_private *i915,
struct drm_i915_gem_object *obj,
enum i915_mmap_type type)
@@ -1156,9 +1279,6 @@ static int __igt_mmap_revoke(struct drm_i915_private *i915,
if (err)
goto out_unmap;
- GEM_BUG_ON(mmo->mmap_type == I915_MMAP_TYPE_GTT &&
- !atomic_read(&obj->bind_count));
-
err = check_present(addr, obj->base.size);
if (err) {
pr_err("%s: was not present\n", obj->mm.region->name);
@@ -1175,7 +1295,6 @@ static int __igt_mmap_revoke(struct drm_i915_private *i915,
pr_err("Failed to unbind object!\n");
goto out_unmap;
}
- GEM_BUG_ON(atomic_read(&obj->bind_count));
if (type != I915_MMAP_TYPE_GTT) {
__i915_gem_object_put_pages(obj);
@@ -1233,6 +1352,7 @@ int i915_gem_mman_live_selftests(struct drm_i915_private *i915)
SUBTEST(igt_smoke_tiling),
SUBTEST(igt_mmap_offset_exhaustion),
SUBTEST(igt_mmap),
+ SUBTEST(igt_mmap_access),
SUBTEST(igt_mmap_revoke),
SUBTEST(igt_mmap_gpu),
};
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
index 2b6db6f799de..faa5b6d91795 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
@@ -14,7 +14,7 @@ static int igt_gem_object(void *arg)
{
struct drm_i915_private *i915 = arg;
struct drm_i915_gem_object *obj;
- int err = -ENOMEM;
+ int err;
/* Basic test to ensure we can create an object */
diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
index 772d8cba7da9..e21b5023ca7d 100644
--- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
+++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
@@ -83,6 +83,8 @@ igt_emit_store_dw(struct i915_vma *vma,
offset += PAGE_SIZE;
}
*cmd = MI_BATCH_BUFFER_END;
+
+ i915_gem_object_flush_map(obj);
i915_gem_object_unpin_map(obj);
intel_gt_chipset_flush(vma->vm->gt);
@@ -126,16 +128,6 @@ int igt_gpu_fill_dw(struct intel_context *ce,
goto err_batch;
}
- flags = 0;
- if (INTEL_GEN(ce->vm->i915) <= 5)
- flags |= I915_DISPATCH_SECURE;
-
- err = rq->engine->emit_bb_start(rq,
- batch->node.start, batch->node.size,
- flags);
- if (err)
- goto err_request;
-
i915_vma_lock(batch);
err = i915_request_await_object(rq, batch->obj, false);
if (err == 0)
@@ -152,15 +144,17 @@ int igt_gpu_fill_dw(struct intel_context *ce,
if (err)
goto skip_request;
- i915_request_add(rq);
-
- i915_vma_unpin_and_release(&batch, 0);
+ flags = 0;
+ if (INTEL_GEN(ce->vm->i915) <= 5)
+ flags |= I915_DISPATCH_SECURE;
- return 0;
+ err = rq->engine->emit_bb_start(rq,
+ batch->node.start, batch->node.size,
+ flags);
skip_request:
- i915_request_set_error_once(rq, err);
-err_request:
+ if (err)
+ i915_request_set_error_once(rq, err);
i915_request_add(rq);
err_batch:
i915_vma_unpin_and_release(&batch, 0);