aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem_execbuffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c264
1 files changed, 160 insertions, 104 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index d6099d084748..b06f561a268f 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -75,37 +75,42 @@ struct i915_execbuffer {
unsigned int page;
bool use_64bit_reloc : 1;
} reloc_cache;
- int and;
- union {
- struct i915_vma **lut;
- struct hlist_head *buckets;
- };
+ int lut_mask;
+ struct hlist_head *buckets;
};
+/*
+ * As an alternative to creating a hashtable of handle-to-vma for a batch,
+ * we used the last available reserved field in the execobject[] and stash
+ * a link from the execobj to its vma.
+ */
+#define __exec_to_vma(ee) (ee)->rsvd2
+#define exec_to_vma(ee) u64_to_ptr(struct i915_vma, __exec_to_vma(ee))
+
static int eb_create(struct i915_execbuffer *eb)
{
- eb->lut = NULL;
- if (eb->args->flags & I915_EXEC_HANDLE_LUT) {
- unsigned int size = eb->args->buffer_count;
- size *= sizeof(struct i915_vma *);
- eb->lut = kmalloc(size,
- GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
- }
-
- if (!eb->lut) {
- unsigned int size = eb->args->buffer_count;
- unsigned int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
- BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
- while (count > 2*size)
- count >>= 1;
- eb->lut = kzalloc(count * sizeof(struct hlist_head),
- GFP_TEMPORARY);
- if (!eb->lut)
- return -ENOMEM;
-
- eb->and = count - 1;
+ if ((eb->args->flags & I915_EXEC_HANDLE_LUT) == 0) {
+ unsigned int size = 1 + ilog2(eb->args->buffer_count);
+
+ do {
+ eb->buckets = kzalloc(sizeof(struct hlist_head) << size,
+ GFP_TEMPORARY |
+ __GFP_NORETRY |
+ __GFP_NOWARN);
+ if (eb->buckets)
+ break;
+ } while (--size);
+
+ if (unlikely(!eb->buckets)) {
+ eb->buckets = kzalloc(sizeof(struct hlist_head),
+ GFP_TEMPORARY);
+ if (unlikely(!eb->buckets))
+ return -ENOMEM;
+ }
+
+ eb->lut_mask = size;
} else {
- eb->and = -eb->args->buffer_count;
+ eb->lut_mask = -eb->args->buffer_count;
}
return 0;
@@ -142,73 +147,112 @@ eb_reset(struct i915_execbuffer *eb)
vma->exec_entry = NULL;
}
- if (eb->and >= 0)
- memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
+ if (eb->lut_mask >= 0)
+ memset(eb->buckets, 0,
+ sizeof(struct hlist_head) << eb->lut_mask);
}
-static struct i915_vma *
-eb_get_batch(struct i915_execbuffer *eb)
+static bool
+eb_add_vma(struct i915_execbuffer *eb, struct i915_vma *vma, int i)
{
- struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_link);
+ if (unlikely(vma->exec_entry)) {
+ DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n",
+ eb->exec[i].handle, i);
+ return false;
+ }
+ list_add_tail(&vma->exec_link, &eb->vmas);
- /*
- * SNA is doing fancy tricks with compressing batch buffers, which leads
- * to negative relocation deltas. Usually that works out ok since the
- * relocate address is still positive, except when the batch is placed
- * very low in the GTT. Ensure this doesn't happen.
- *
- * Note that actual hangs have only been observed on gen7, but for
- * paranoia do it everywhere.
- */
- if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
- vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
+ vma->exec_entry = &eb->exec[i];
+ if (eb->lut_mask >= 0) {
+ vma->exec_handle = eb->exec[i].handle;
+ hlist_add_head(&vma->exec_node,
+ &eb->buckets[hash_32(vma->exec_handle,
+ eb->lut_mask)]);
+ }
- return vma;
+ i915_vma_get(vma);
+ __exec_to_vma(&eb->exec[i]) = (uintptr_t)vma;
+ return true;
+}
+
+static inline struct hlist_head *
+ht_head(const struct i915_gem_context *ctx, u32 handle)
+{
+ return &ctx->vma_lut.ht[hash_32(handle, ctx->vma_lut.ht_bits)];
+}
+
+static inline bool
+ht_needs_resize(const struct i915_gem_context *ctx)
+{
+ return (4*ctx->vma_lut.ht_count > 3*ctx->vma_lut.ht_size ||
+ 4*ctx->vma_lut.ht_count + 1 < ctx->vma_lut.ht_size);
}
static int
eb_lookup_vmas(struct i915_execbuffer *eb)
{
- struct drm_i915_gem_object *obj;
- struct list_head objects;
- int i, ret;
+#define INTERMEDIATE BIT(0)
+ const int count = eb->args->buffer_count;
+ struct i915_vma *vma;
+ int slow_pass = -1;
+ int i;
INIT_LIST_HEAD(&eb->vmas);
- INIT_LIST_HEAD(&objects);
+ if (unlikely(eb->ctx->vma_lut.ht_size & I915_CTX_RESIZE_IN_PROGRESS))
+ flush_work(&eb->ctx->vma_lut.resize);
+ GEM_BUG_ON(eb->ctx->vma_lut.ht_size & I915_CTX_RESIZE_IN_PROGRESS);
+
+ for (i = 0; i < count; i++) {
+ __exec_to_vma(&eb->exec[i]) = 0;
+
+ hlist_for_each_entry(vma,
+ ht_head(eb->ctx, eb->exec[i].handle),
+ ctx_node) {
+ if (vma->ctx_handle != eb->exec[i].handle)
+ continue;
+
+ if (!eb_add_vma(eb, vma, i))
+ return -EINVAL;
+
+ goto next_vma;
+ }
+
+ if (slow_pass < 0)
+ slow_pass = i;
+next_vma: ;
+ }
+
+ if (slow_pass < 0)
+ return 0;
+
spin_lock(&eb->file->table_lock);
/* Grab a reference to the object and release the lock so we can lookup
* or create the VMA without using GFP_ATOMIC */
- for (i = 0; i < eb->args->buffer_count; i++) {
- obj = to_intel_bo(idr_find(&eb->file->object_idr, eb->exec[i].handle));
- if (obj == NULL) {
- spin_unlock(&eb->file->table_lock);
- DRM_DEBUG("Invalid object handle %d at index %d\n",
- eb->exec[i].handle, i);
- ret = -ENOENT;
- goto err;
- }
+ for (i = slow_pass; i < count; i++) {
+ struct drm_i915_gem_object *obj;
- if (!list_empty(&obj->obj_exec_link)) {
+ if (__exec_to_vma(&eb->exec[i]))
+ continue;
+
+ obj = to_intel_bo(idr_find(&eb->file->object_idr,
+ eb->exec[i].handle));
+ if (unlikely(!obj)) {
spin_unlock(&eb->file->table_lock);
- DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
- obj, eb->exec[i].handle, i);
- ret = -EINVAL;
- goto err;
+ DRM_DEBUG("Invalid object handle %d at index %d\n",
+ eb->exec[i].handle, i);
+ return -ENOENT;
}
- i915_gem_object_get(obj);
- list_add_tail(&obj->obj_exec_link, &objects);
+ __exec_to_vma(&eb->exec[i]) = INTERMEDIATE | (uintptr_t)obj;
}
spin_unlock(&eb->file->table_lock);
- i = 0;
- while (!list_empty(&objects)) {
- struct i915_vma *vma;
+ for (i = slow_pass; i < count; i++) {
+ struct drm_i915_gem_object *obj;
- obj = list_first_entry(&objects,
- struct drm_i915_gem_object,
- obj_exec_link);
+ if ((__exec_to_vma(&eb->exec[i]) & INTERMEDIATE) == 0)
+ continue;
/*
* NOTE: We can leak any vmas created here when something fails
@@ -218,61 +262,73 @@ eb_lookup_vmas(struct i915_execbuffer *eb)
* from the (obj, vm) we don't run the risk of creating
* duplicated vmas for the same vm.
*/
+ obj = u64_to_ptr(struct drm_i915_gem_object,
+ __exec_to_vma(&eb->exec[i]) & ~INTERMEDIATE);
vma = i915_vma_instance(obj, eb->vm, NULL);
if (unlikely(IS_ERR(vma))) {
DRM_DEBUG("Failed to lookup VMA\n");
- ret = PTR_ERR(vma);
- goto err;
+ return PTR_ERR(vma);
}
- /* Transfer ownership from the objects list to the vmas list. */
- list_add_tail(&vma->exec_link, &eb->vmas);
- list_del_init(&obj->obj_exec_link);
-
- vma->exec_entry = &eb->exec[i];
- if (eb->and < 0) {
- eb->lut[i] = vma;
- } else {
- u32 handle =
- eb->args->flags & I915_EXEC_HANDLE_LUT ?
- i : eb->exec[i].handle;
- vma->exec_handle = handle;
- hlist_add_head(&vma->exec_node,
- &eb->buckets[handle & eb->and]);
+ /* First come, first served */
+ if (!vma->ctx) {
+ vma->ctx = eb->ctx;
+ vma->ctx_handle = eb->exec[i].handle;
+ hlist_add_head(&vma->ctx_node,
+ ht_head(eb->ctx, eb->exec[i].handle));
+ eb->ctx->vma_lut.ht_count++;
+ if (i915_vma_is_ggtt(vma)) {
+ GEM_BUG_ON(obj->vma_hashed);
+ obj->vma_hashed = vma;
+ }
}
- ++i;
+
+ if (!eb_add_vma(eb, vma, i))
+ return -EINVAL;
+ }
+
+ if (ht_needs_resize(eb->ctx)) {
+ eb->ctx->vma_lut.ht_size |= I915_CTX_RESIZE_IN_PROGRESS;
+ queue_work(system_highpri_wq, &eb->ctx->vma_lut.resize);
}
return 0;
+#undef INTERMEDIATE
+}
+static struct i915_vma *
+eb_get_batch(struct i915_execbuffer *eb)
+{
+ struct i915_vma *vma =
+ exec_to_vma(&eb->exec[eb->args->buffer_count - 1]);
-err:
- while (!list_empty(&objects)) {
- obj = list_first_entry(&objects,
- struct drm_i915_gem_object,
- obj_exec_link);
- list_del_init(&obj->obj_exec_link);
- i915_gem_object_put(obj);
- }
/*
- * Objects already transfered to the vmas list will be unreferenced by
- * eb_destroy.
+ * SNA is doing fancy tricks with compressing batch buffers, which leads
+ * to negative relocation deltas. Usually that works out ok since the
+ * relocate address is still positive, except when the batch is placed
+ * very low in the GTT. Ensure this doesn't happen.
+ *
+ * Note that actual hangs have only been observed on gen7, but for
+ * paranoia do it everywhere.
*/
+ if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
+ vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
- return ret;
+ return vma;
}
-static struct i915_vma *eb_get_vma(struct i915_execbuffer *eb, unsigned long handle)
+static struct i915_vma *
+eb_get_vma(struct i915_execbuffer *eb, unsigned long handle)
{
- if (eb->and < 0) {
- if (handle >= -eb->and)
+ if (eb->lut_mask < 0) {
+ if (handle >= -eb->lut_mask)
return NULL;
- return eb->lut[handle];
+ return exec_to_vma(&eb->exec[handle]);
} else {
struct hlist_head *head;
struct i915_vma *vma;
- head = &eb->buckets[handle & eb->and];
+ head = &eb->buckets[hash_32(handle, eb->lut_mask)];
hlist_for_each_entry(vma, head, exec_node) {
if (vma->exec_handle == handle)
return vma;
@@ -296,7 +352,7 @@ static void eb_destroy(struct i915_execbuffer *eb)
i915_gem_context_put(eb->ctx);
- if (eb->buckets)
+ if (eb->lut_mask >= 0)
kfree(eb->buckets);
}
@@ -916,7 +972,7 @@ static int eb_reserve(struct i915_execbuffer *eb)
need_fence =
(entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
needs_unfenced_map) &&
- i915_gem_object_is_tiled(obj);
+ i915_gem_object_is_tiled(vma->obj);
need_mappable = need_fence || need_reloc_mappable(vma);
if (entry->flags & EXEC_OBJECT_PINNED)