diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gem/selftests')
-rw-r--r-- | drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 579 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c | 30 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c | 214 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 704 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 306 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c | 354 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c | 33 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h | 13 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gem/selftests/mock_context.c | 17 |
11 files changed, 1645 insertions, 610 deletions
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c index 3c5d17b2b670..892d12db6c49 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c @@ -96,6 +96,7 @@ huge_gem_object(struct drm_i915_private *i915, phys_addr_t phys_size, dma_addr_t dma_size) { + static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; unsigned int cache_level; @@ -111,7 +112,7 @@ huge_gem_object(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, dma_size); - i915_gem_object_init(obj, &huge_ops); + i915_gem_object_init(obj, &huge_ops, &lock_class); obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 8de83c6d81f5..688c49a24f32 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -8,6 +8,8 @@ #include "i915_selftest.h" +#include "gem/i915_gem_region.h" +#include "gem/i915_gem_lmem.h" #include "gem/i915_gem_pm.h" #include "gt/intel_gt.h" @@ -17,6 +19,7 @@ #include "selftests/mock_drm.h" #include "selftests/mock_gem_device.h" +#include "selftests/mock_region.h" #include "selftests/i915_random.h" static const unsigned int page_sizes[] = { @@ -113,8 +116,6 @@ static int get_huge_pages(struct drm_i915_gem_object *obj) if (i915_gem_gtt_prepare_pages(obj, st)) goto err; - obj->mm.madv = I915_MADV_DONTNEED; - GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask); __i915_gem_object_set_pages(obj, st, sg_page_sizes); @@ -135,7 +136,6 @@ static void put_huge_pages(struct drm_i915_gem_object *obj, huge_pages_free_pages(pages); obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; } static const struct drm_i915_gem_object_ops huge_page_ops = { @@ -150,6 +150,7 @@ huge_pages_object(struct drm_i915_private *i915, u64 size, unsigned int page_mask) { + static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; GEM_BUG_ON(!size); @@ -166,7 +167,9 @@ huge_pages_object(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &huge_page_ops); + i915_gem_object_init(obj, &huge_page_ops, &lock_class); + + i915_gem_object_set_volatile(obj); obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; @@ -227,8 +230,6 @@ static int fake_get_huge_pages(struct drm_i915_gem_object *obj) i915_sg_trim(st); - obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, st, sg_page_sizes); return 0; @@ -261,8 +262,6 @@ static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj) sg_dma_len(sg) = obj->base.size; sg_dma_address(sg) = page_size; - obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, st, sg->length); return 0; @@ -281,7 +280,6 @@ static void fake_put_huge_pages(struct drm_i915_gem_object *obj, { fake_free_huge_pages(obj, pages); obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; } static const struct drm_i915_gem_object_ops fake_ops = { @@ -299,6 +297,7 @@ static const struct drm_i915_gem_object_ops fake_ops_single = { static struct drm_i915_gem_object * fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) { + static struct lock_class_key lock_class; struct drm_i915_gem_object *obj; GEM_BUG_ON(!size); @@ -317,9 +316,11 @@ fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) drm_gem_private_object_init(&i915->drm, &obj->base, size); if (single) - i915_gem_object_init(obj, &fake_ops_single); + i915_gem_object_init(obj, &fake_ops_single, &lock_class); else - i915_gem_object_init(obj, &fake_ops); + i915_gem_object_init(obj, &fake_ops, &lock_class); + + i915_gem_object_set_volatile(obj); obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; @@ -333,7 +334,12 @@ static int igt_check_page_sizes(struct i915_vma *vma) struct drm_i915_private *i915 = vma->vm->i915; unsigned int supported = INTEL_INFO(i915)->page_sizes; struct drm_i915_gem_object *obj = vma->obj; - int err = 0; + int err; + + /* We have to wait for the async bind to complete before our asserts */ + err = i915_vma_sync(vma); + if (err) + return err; if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) { pr_err("unsupported page_sizes.sg=%u, supported=%u\n", @@ -447,6 +453,88 @@ out_device: return err; } +static int igt_mock_memory_region_huge_pages(void *arg) +{ + const unsigned int flags[] = { 0, I915_BO_ALLOC_CONTIGUOUS }; + struct i915_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->vm.i915; + unsigned long supported = INTEL_INFO(i915)->page_sizes; + struct intel_memory_region *mem; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int bit; + int err = 0; + + mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0); + if (IS_ERR(mem)) { + pr_err("%s failed to create memory region\n", __func__); + return PTR_ERR(mem); + } + + for_each_set_bit(bit, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { + unsigned int page_size = BIT(bit); + resource_size_t phys; + int i; + + for (i = 0; i < ARRAY_SIZE(flags); ++i) { + obj = i915_gem_object_create_region(mem, page_size, + flags[i]); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_region; + } + + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_close; + + err = igt_check_page_sizes(vma); + if (err) + goto out_unpin; + + phys = i915_gem_object_get_dma_address(obj, 0); + if (!IS_ALIGNED(phys, page_size)) { + pr_err("%s addr misaligned(%pa) page_size=%u\n", + __func__, &phys, page_size); + err = -EINVAL; + goto out_unpin; + } + + if (vma->page_sizes.gtt != page_size) { + pr_err("%s page_sizes.gtt=%u, expected=%u\n", + __func__, vma->page_sizes.gtt, + page_size); + err = -EINVAL; + goto out_unpin; + } + + i915_vma_unpin(vma); + i915_vma_close(vma); + + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + i915_gem_object_put(obj); + } + } + + goto out_region; + +out_unpin: + i915_vma_unpin(vma); +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); +out_region: + intel_memory_region_put(mem); + return err; +} + static int igt_mock_ppgtt_misaligned_dma(void *arg) { struct i915_ppgtt *ppgtt = arg; @@ -879,9 +967,8 @@ out_object_put: return err; } -static int gpu_write(struct i915_vma *vma, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, +static int gpu_write(struct intel_context *ce, + struct i915_vma *vma, u32 dw, u32 val) { @@ -893,11 +980,12 @@ static int gpu_write(struct i915_vma *vma, if (err) return err; - return igt_gpu_fill_dw(vma, ctx, engine, dw * sizeof(u32), + return igt_gpu_fill_dw(ce, vma, dw * sizeof(u32), vma->size >> PAGE_SHIFT, val); } -static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) +static int +__cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val) { unsigned int needs_flush; unsigned long n; @@ -929,18 +1017,61 @@ static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) return err; } -static int __igt_write_huge(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, +static int __cpu_check_lmem(struct drm_i915_gem_object *obj, u32 dword, u32 val) +{ + unsigned long n; + int err; + + i915_gem_object_lock(obj); + err = i915_gem_object_set_to_wc_domain(obj, false); + i915_gem_object_unlock(obj); + if (err) + return err; + + err = i915_gem_object_pin_pages(obj); + if (err) + return err; + + for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) { + u32 __iomem *base; + u32 read_val; + + base = i915_gem_object_lmem_io_map_page_atomic(obj, n); + + read_val = ioread32(base + dword); + io_mapping_unmap_atomic(base); + if (read_val != val) { + pr_err("n=%lu base[%u]=%u, val=%u\n", + n, dword, read_val, val); + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_pages(obj); + return err; +} + +static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) +{ + if (i915_gem_object_has_struct_page(obj)) + return __cpu_check_shmem(obj, dword, val); + else if (i915_gem_object_is_lmem(obj)) + return __cpu_check_lmem(obj, dword, val); + + return -ENODEV; +} + +static int __igt_write_huge(struct intel_context *ce, struct drm_i915_gem_object *obj, u64 size, u64 offset, u32 dword, u32 val) { - struct i915_address_space *vm = ctx->vm ?: &engine->gt->ggtt->vm; unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; struct i915_vma *vma; int err; - vma = i915_vma_instance(obj, vm, NULL); + vma = i915_vma_instance(obj, ce->vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -954,7 +1085,7 @@ static int __igt_write_huge(struct i915_gem_context *ctx, * The ggtt may have some pages reserved so * refrain from erroring out. */ - if (err == -ENOSPC && i915_is_ggtt(vm)) + if (err == -ENOSPC && i915_is_ggtt(ce->vm)) err = 0; goto out_vma_close; @@ -964,7 +1095,7 @@ static int __igt_write_huge(struct i915_gem_context *ctx, if (err) goto out_vma_unpin; - err = gpu_write(vma, ctx, engine, dword, val); + err = gpu_write(ce, vma, dword, val); if (err) { pr_err("gpu-write failed at offset=%llx\n", offset); goto out_vma_unpin; @@ -987,14 +1118,13 @@ out_vma_close: static int igt_write_huge(struct i915_gem_context *ctx, struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; - static struct intel_engine_cs *engines[I915_NUM_ENGINES]; - struct intel_engine_cs *engine; + struct i915_gem_engines *engines; + struct i915_gem_engines_iter it; + struct intel_context *ce; I915_RND_STATE(prng); IGT_TIMEOUT(end_time); unsigned int max_page_size; - unsigned int id; + unsigned int count; u64 max; u64 num; u64 size; @@ -1008,19 +1138,18 @@ static int igt_write_huge(struct i915_gem_context *ctx, if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) size = round_up(size, I915_GTT_PAGE_SIZE_2M); - max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg); - max = div_u64((vm->total - size), max_page_size); - n = 0; - for_each_engine(engine, i915, id) { - if (!intel_engine_can_store_dword(engine)) { - pr_info("store-dword-imm not supported on engine=%u\n", - id); + count = 0; + max = U64_MAX; + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + count++; + if (!intel_engine_can_store_dword(ce->engine)) continue; - } - engines[n++] = engine; - } + max = min(max, ce->vm->total); + n++; + } + i915_gem_context_unlock_engines(ctx); if (!n) return 0; @@ -1029,23 +1158,30 @@ static int igt_write_huge(struct i915_gem_context *ctx, * randomized order, lets also make feeding to the same engine a few * times in succession a possibility by enlarging the permutation array. */ - order = i915_random_order(n * I915_NUM_ENGINES, &prng); + order = i915_random_order(count * count, &prng); if (!order) return -ENOMEM; + max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg); + max = div_u64(max - size, max_page_size); + /* * Try various offsets in an ascending/descending fashion until we * timeout -- we want to avoid issues hidden by effectively always using * offset = 0. */ i = 0; + engines = i915_gem_context_lock_engines(ctx); for_each_prime_number_from(num, 0, max) { u64 offset_low = num * max_page_size; u64 offset_high = (max - num) * max_page_size; u32 dword = offset_in_page(num) / 4; + struct intel_context *ce; - engine = engines[order[i] % n]; - i = (i + 1) % (n * I915_NUM_ENGINES); + ce = engines->engines[order[i] % engines->num_engines]; + i = (i + 1) % (count * count); + if (!ce || !intel_engine_can_store_dword(ce->engine)) + continue; /* * In order to utilize 64K pages we need to both pad the vma @@ -1057,22 +1193,23 @@ static int igt_write_huge(struct i915_gem_context *ctx, offset_low = round_down(offset_low, I915_GTT_PAGE_SIZE_2M); - err = __igt_write_huge(ctx, engine, obj, size, offset_low, + err = __igt_write_huge(ce, obj, size, offset_low, dword, num + 1); if (err) break; - err = __igt_write_huge(ctx, engine, obj, size, offset_high, + err = __igt_write_huge(ce, obj, size, offset_high, dword, num + 1); if (err) break; if (igt_timeout(end_time, - "%s timed out on engine=%u, offset_low=%llx offset_high=%llx, max_page_size=%x\n", - __func__, engine->id, offset_low, offset_high, + "%s timed out on %s, offset_low=%llx offset_high=%llx, max_page_size=%x\n", + __func__, ce->engine->name, offset_low, offset_high, max_page_size)) break; } + i915_gem_context_unlock_engines(ctx); kfree(order); @@ -1180,131 +1317,235 @@ out_device: return err; } -static int igt_ppgtt_internal_huge(void *arg) +typedef struct drm_i915_gem_object * +(*igt_create_fn)(struct drm_i915_private *i915, u32 size, u32 flags); + +static inline bool igt_can_allocate_thp(struct drm_i915_private *i915) +{ + return i915->mm.gemfs && has_transparent_hugepage(); +} + +static struct drm_i915_gem_object * +igt_create_shmem(struct drm_i915_private *i915, u32 size, u32 flags) +{ + if (!igt_can_allocate_thp(i915)) { + pr_info("%s missing THP support, skipping\n", __func__); + return ERR_PTR(-ENODEV); + } + + return i915_gem_object_create_shmem(i915, size); +} + +static struct drm_i915_gem_object * +igt_create_internal(struct drm_i915_private *i915, u32 size, u32 flags) +{ + return i915_gem_object_create_internal(i915, size); +} + +static struct drm_i915_gem_object * +igt_create_system(struct drm_i915_private *i915, u32 size, u32 flags) +{ + return huge_pages_object(i915, size, size); +} + +static struct drm_i915_gem_object * +igt_create_local(struct drm_i915_private *i915, u32 size, u32 flags) +{ + return i915_gem_object_create_lmem(i915, size, flags); +} + +static u32 igt_random_size(struct rnd_state *prng, + u32 min_page_size, + u32 max_page_size) +{ + u64 mask; + u32 size; + + GEM_BUG_ON(!is_power_of_2(min_page_size)); + GEM_BUG_ON(!is_power_of_2(max_page_size)); + GEM_BUG_ON(min_page_size < PAGE_SIZE); + GEM_BUG_ON(min_page_size > max_page_size); + + mask = ((max_page_size << 1ULL) - 1) & PAGE_MASK; + size = prandom_u32_state(prng) & mask; + if (size < min_page_size) + size |= min_page_size; + + return size; +} + +static int igt_ppgtt_smoke_huge(void *arg) { struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; struct drm_i915_gem_object *obj; - static const unsigned int sizes[] = { - SZ_64K, - SZ_128K, - SZ_256K, - SZ_512K, - SZ_1M, - SZ_2M, + I915_RND_STATE(prng); + struct { + igt_create_fn fn; + u32 min; + u32 max; + } backends[] = { + { igt_create_internal, SZ_64K, SZ_2M, }, + { igt_create_shmem, SZ_64K, SZ_32M, }, + { igt_create_local, SZ_64K, SZ_1G, }, }; - int i; int err; + int i; /* - * Sanity check that the HW uses huge pages correctly through internal - * -- ensure that our writes land in the right place. + * Sanity check that the HW uses huge pages correctly through our + * various backends -- ensure that our writes land in the right place. */ - for (i = 0; i < ARRAY_SIZE(sizes); ++i) { - unsigned int size = sizes[i]; + for (i = 0; i < ARRAY_SIZE(backends); ++i) { + u32 min = backends[i].min; + u32 max = backends[i].max; + u32 size = max; +try_again: + size = igt_random_size(&prng, min, rounddown_pow_of_two(size)); - obj = i915_gem_object_create_internal(i915, size); - if (IS_ERR(obj)) - return PTR_ERR(obj); + obj = backends[i].fn(i915, size, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + if (err == -E2BIG) { + size >>= 1; + goto try_again; + } else if (err == -ENODEV) { + err = 0; + continue; + } + + return err; + } err = i915_gem_object_pin_pages(obj); - if (err) + if (err) { + if (err == -ENXIO) { + i915_gem_object_put(obj); + size >>= 1; + goto try_again; + } goto out_put; + } - if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) { - pr_info("internal unable to allocate huge-page(s) with size=%u\n", - size); + if (obj->mm.page_sizes.phys < min) { + pr_info("%s unable to allocate huge-page(s) with size=%u, i=%d\n", + __func__, size, i); + err = -ENOMEM; goto out_unpin; } err = igt_write_huge(ctx, obj); if (err) { - pr_err("internal write-huge failed with size=%u\n", - size); - goto out_unpin; + pr_err("%s write-huge failed with size=%u, i=%d\n", + __func__, size, i); } - +out_unpin: i915_gem_object_unpin_pages(obj); __i915_gem_object_put_pages(obj, I915_MM_NORMAL); +out_put: i915_gem_object_put(obj); - } - return 0; + if (err == -ENOMEM || err == -ENXIO) + err = 0; -out_unpin: - i915_gem_object_unpin_pages(obj); -out_put: - i915_gem_object_put(obj); + if (err) + break; - return err; -} + cond_resched(); + } -static inline bool igt_can_allocate_thp(struct drm_i915_private *i915) -{ - return i915->mm.gemfs && has_transparent_hugepage(); + return err; } -static int igt_ppgtt_gemfs_huge(void *arg) +static int igt_ppgtt_sanity_check(void *arg) { struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; - struct drm_i915_gem_object *obj; - static const unsigned int sizes[] = { - SZ_2M, - SZ_4M, - SZ_8M, - SZ_16M, - SZ_32M, + unsigned int supported = INTEL_INFO(i915)->page_sizes; + struct { + igt_create_fn fn; + unsigned int flags; + } backends[] = { + { igt_create_system, 0, }, + { igt_create_local, I915_BO_ALLOC_CONTIGUOUS, }, }; - int i; + struct { + u32 size; + u32 pages; + } combos[] = { + { SZ_64K, SZ_64K }, + { SZ_2M, SZ_2M }, + { SZ_2M, SZ_64K }, + { SZ_2M - SZ_64K, SZ_64K }, + { SZ_2M - SZ_4K, SZ_64K | SZ_4K }, + { SZ_2M + SZ_4K, SZ_64K | SZ_4K }, + { SZ_2M + SZ_4K, SZ_2M | SZ_4K }, + { SZ_2M + SZ_64K, SZ_2M | SZ_64K }, + }; + int i, j; int err; + if (supported == I915_GTT_PAGE_SIZE_4K) + return 0; + /* - * Sanity check that the HW uses huge pages correctly through gemfs -- - * ensure that our writes land in the right place. + * Sanity check that the HW behaves with a limited set of combinations. + * We already have a bunch of randomised testing, which should give us + * a decent amount of variation between runs, however we should keep + * this to limit the chances of introducing a temporary regression, by + * testing the most obvious cases that might make something blow up. */ - if (!igt_can_allocate_thp(i915)) { - pr_info("missing THP support, skipping\n"); - return 0; - } + for (i = 0; i < ARRAY_SIZE(backends); ++i) { + for (j = 0; j < ARRAY_SIZE(combos); ++j) { + struct drm_i915_gem_object *obj; + u32 size = combos[j].size; + u32 pages = combos[j].pages; - for (i = 0; i < ARRAY_SIZE(sizes); ++i) { - unsigned int size = sizes[i]; + obj = backends[i].fn(i915, size, backends[i].flags); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + if (err == -ENODEV) { + pr_info("Device lacks local memory, skipping\n"); + err = 0; + break; + } - obj = i915_gem_object_create_shmem(i915, size); - if (IS_ERR(obj)) - return PTR_ERR(obj); + return err; + } - err = i915_gem_object_pin_pages(obj); - if (err) - goto out_put; + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + goto out; + } - if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { - pr_info("finishing test early, gemfs unable to allocate huge-page(s) with size=%u\n", - size); - goto out_unpin; - } + GEM_BUG_ON(pages > obj->base.size); + pages = pages & supported; - err = igt_write_huge(ctx, obj); - if (err) { - pr_err("gemfs write-huge failed with size=%u\n", - size); - goto out_unpin; + if (pages) + obj->mm.page_sizes.sg = pages; + + err = igt_write_huge(ctx, obj); + + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + i915_gem_object_put(obj); + + if (err) { + pr_err("%s write-huge failed with size=%u pages=%u i=%d, j=%d\n", + __func__, size, pages, i, j); + goto out; + } } - i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - i915_gem_object_put(obj); + cond_resched(); } - return 0; - -out_unpin: - i915_gem_object_unpin_pages(obj); -out_put: - i915_gem_object_put(obj); +out: + if (err == -ENOMEM) + err = 0; return err; } @@ -1314,15 +1555,15 @@ static int igt_ppgtt_pin_update(void *arg) struct i915_gem_context *ctx = arg; struct drm_i915_private *dev_priv = ctx->i915; unsigned long supported = INTEL_INFO(dev_priv)->page_sizes; - struct i915_address_space *vm = ctx->vm; struct drm_i915_gem_object *obj; + struct i915_gem_engines_iter it; + struct i915_address_space *vm; + struct intel_context *ce; struct i915_vma *vma; unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; - struct intel_engine_cs *engine; - enum intel_engine_id id; unsigned int n; int first, last; - int err; + int err = 0; /* * Make sure there's no funny business when doing a PIN_UPDATE -- in the @@ -1332,9 +1573,10 @@ static int igt_ppgtt_pin_update(void *arg) * huge-gtt-pages. */ - if (!vm || !i915_vm_is_4lvl(vm)) { + vm = i915_gem_context_get_vm_rcu(ctx); + if (!i915_vm_is_4lvl(vm)) { pr_info("48b PPGTT not supported, skipping\n"); - return 0; + goto out_vm; } first = ilog2(I915_GTT_PAGE_SIZE_64K); @@ -1387,7 +1629,7 @@ static int igt_ppgtt_pin_update(void *arg) goto out_unpin; } - err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE); + err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE, NULL); if (err) goto out_unpin; @@ -1419,14 +1661,18 @@ static int igt_ppgtt_pin_update(void *arg) */ n = 0; - for_each_engine(engine, dev_priv, id) { - if (!intel_engine_can_store_dword(engine)) + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + if (!intel_engine_can_store_dword(ce->engine)) continue; - err = gpu_write(vma, ctx, engine, n++, 0xdeadbeaf); + err = gpu_write(ce, vma, n++, 0xdeadbeaf); if (err) - goto out_unpin; + break; } + i915_gem_context_unlock_engines(ctx); + if (err) + goto out_unpin; + while (n--) { err = cpu_check(obj, n, 0xdeadbeaf); if (err) @@ -1439,6 +1685,8 @@ out_close: i915_vma_close(vma); out_put: i915_gem_object_put(obj); +out_vm: + i915_vm_put(vm); return err; } @@ -1448,7 +1696,7 @@ static int igt_tmpfs_fallback(void *arg) struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; struct vfsmount *gemfs = i915->mm.gemfs; - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; + struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx); struct drm_i915_gem_object *obj; struct i915_vma *vma; u32 *vaddr; @@ -1498,6 +1746,7 @@ out_put: out_restore: i915->mm.gemfs = gemfs; + i915_vm_put(vm); return err; } @@ -1505,14 +1754,14 @@ static int igt_shrink_thp(void *arg) { struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; + struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx); struct drm_i915_gem_object *obj; - struct intel_engine_cs *engine; - enum intel_engine_id id; + struct i915_gem_engines_iter it; + struct intel_context *ce; struct i915_vma *vma; unsigned int flags = PIN_USER; unsigned int n; - int err; + int err = 0; /* * Sanity check shrinking huge-paged object -- make sure nothing blows @@ -1521,12 +1770,14 @@ static int igt_shrink_thp(void *arg) if (!igt_can_allocate_thp(i915)) { pr_info("missing THP support, skipping\n"); - return 0; + goto out_vm; } obj = i915_gem_object_create_shmem(i915, SZ_2M); - if (IS_ERR(obj)) - return PTR_ERR(obj); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_vm; + } vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { @@ -1548,16 +1799,19 @@ static int igt_shrink_thp(void *arg) goto out_unpin; n = 0; - for_each_engine(engine, i915, id) { - if (!intel_engine_can_store_dword(engine)) + + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + if (!intel_engine_can_store_dword(ce->engine)) continue; - err = gpu_write(vma, ctx, engine, n++, 0xdeadbeaf); + err = gpu_write(ce, vma, n++, 0xdeadbeaf); if (err) - goto out_unpin; + break; } - + i915_gem_context_unlock_engines(ctx); i915_vma_unpin(vma); + if (err) + goto out_close; /* * Now that the pages are *unpinned* shrink-all should invoke @@ -1583,16 +1837,17 @@ static int igt_shrink_thp(void *arg) while (n--) { err = cpu_check(obj, n, 0xdeadbeaf); if (err) - goto out_unpin; + break; } - out_unpin: i915_vma_unpin(vma); out_close: i915_vma_close(vma); out_put: i915_gem_object_put(obj); +out_vm: + i915_vm_put(vm); return err; } @@ -1601,6 +1856,7 @@ int i915_gem_huge_page_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_mock_exhaust_device_supported_pages), + SUBTEST(igt_mock_memory_region_huge_pages), SUBTEST(igt_mock_ppgtt_misaligned_dma), SUBTEST(igt_mock_ppgtt_huge_fill), SUBTEST(igt_mock_ppgtt_64K), @@ -1617,7 +1873,6 @@ int i915_gem_huge_page_mock_selftests(void) mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL; mkwrite_device_info(dev_priv)->ppgtt_size = 48; - mutex_lock(&dev_priv->drm.struct_mutex); ppgtt = i915_ppgtt_create(dev_priv); if (IS_ERR(ppgtt)) { err = PTR_ERR(ppgtt); @@ -1643,9 +1898,7 @@ out_close: i915_vm_put(&ppgtt->vm); out_unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); drm_dev_put(&dev_priv->drm); - return err; } @@ -1656,12 +1909,12 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ppgtt_pin_update), SUBTEST(igt_tmpfs_fallback), SUBTEST(igt_ppgtt_exhaust_huge), - SUBTEST(igt_ppgtt_gemfs_huge), - SUBTEST(igt_ppgtt_internal_huge), + SUBTEST(igt_ppgtt_smoke_huge), + SUBTEST(igt_ppgtt_sanity_check), }; struct drm_file *file; struct i915_gem_context *ctx; - intel_wakeref_t wakeref; + struct i915_address_space *vm; int err; if (!HAS_PPGTT(i915)) { @@ -1676,25 +1929,21 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - ctx = live_context(i915, file); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); - goto out_unlock; + goto out_file; } - if (ctx->vm) - ctx->vm->scrub_64K = true; + mutex_lock(&ctx->mutex); + vm = i915_gem_context_vm(ctx); + if (vm) + WRITE_ONCE(vm->scrub_64K, true); + mutex_unlock(&ctx->mutex); err = i915_subtests(tests, ctx); -out_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - +out_file: mock_file_free(i915, file); - return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index d8804a847945..da8edee4fe0a 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -5,6 +5,7 @@ #include "i915_selftest.h" +#include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "selftests/igt_flush_test.h" @@ -12,10 +13,9 @@ #include "huge_gem_object.h" #include "mock_context.h" -static int igt_client_fill(void *arg) +static int __igt_client_fill(struct intel_engine_cs *engine) { - struct drm_i915_private *i915 = arg; - struct intel_context *ce = i915->engine[BCS0]->kernel_context; + struct intel_context *ce = engine->kernel_context; struct drm_i915_gem_object *obj; struct rnd_state prng; IGT_TIMEOUT(end); @@ -37,7 +37,7 @@ static int igt_client_fill(void *arg) pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, phys_sz, sz, val); - obj = huge_gem_object(i915, phys_sz, sz); + obj = huge_gem_object(engine->i915, phys_sz, sz); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto err_flush; @@ -103,6 +103,28 @@ err_flush: return err; } +static int igt_client_fill(void *arg) +{ + int inst = 0; + + do { + struct intel_engine_cs *engine; + int err; + + engine = intel_engine_lookup_user(arg, + I915_ENGINE_CLASS_COPY, + inst++); + if (!engine) + return 0; + + err = __igt_client_fill(engine); + if (err == -ENOMEM) + err = 0; + if (err) + return err; + } while (1); +} + int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 0ff7a89aadca..2b29f6b4e1dd 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -7,13 +7,18 @@ #include <linux/prime_numbers.h> #include "gt/intel_gt.h" +#include "gt/intel_gt_pm.h" +#include "gt/intel_ring.h" #include "i915_selftest.h" #include "selftests/i915_random.h" -static int cpu_set(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 v) +struct context { + struct drm_i915_gem_object *obj; + struct intel_engine_cs *engine; +}; + +static int cpu_set(struct context *ctx, unsigned long offset, u32 v) { unsigned int needs_clflush; struct page *page; @@ -21,11 +26,11 @@ static int cpu_set(struct drm_i915_gem_object *obj, u32 *cpu; int err; - err = i915_gem_object_prepare_write(obj, &needs_clflush); + err = i915_gem_object_prepare_write(ctx->obj, &needs_clflush); if (err) return err; - page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); map = kmap_atomic(page); cpu = map + offset_in_page(offset); @@ -38,14 +43,12 @@ static int cpu_set(struct drm_i915_gem_object *obj, drm_clflush_virt_range(cpu, sizeof(*cpu)); kunmap_atomic(map); - i915_gem_object_finish_access(obj); + i915_gem_object_finish_access(ctx->obj); return 0; } -static int cpu_get(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 *v) +static int cpu_get(struct context *ctx, unsigned long offset, u32 *v) { unsigned int needs_clflush; struct page *page; @@ -53,11 +56,11 @@ static int cpu_get(struct drm_i915_gem_object *obj, u32 *cpu; int err; - err = i915_gem_object_prepare_read(obj, &needs_clflush); + err = i915_gem_object_prepare_read(ctx->obj, &needs_clflush); if (err) return err; - page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); map = kmap_atomic(page); cpu = map + offset_in_page(offset); @@ -67,136 +70,137 @@ static int cpu_get(struct drm_i915_gem_object *obj, *v = *cpu; kunmap_atomic(map); - i915_gem_object_finish_access(obj); + i915_gem_object_finish_access(ctx->obj); return 0; } -static int gtt_set(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 v) +static int gtt_set(struct context *ctx, unsigned long offset, u32 v) { struct i915_vma *vma; u32 __iomem *map; - int err; + int err = 0; - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_gtt_domain(obj, true); - i915_gem_object_unlock(obj); + i915_gem_object_lock(ctx->obj); + err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); + i915_gem_object_unlock(ctx->obj); if (err) return err; - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE); if (IS_ERR(vma)) return PTR_ERR(vma); + intel_gt_pm_get(vma->vm->gt); + map = i915_vma_pin_iomap(vma); i915_vma_unpin(vma); - if (IS_ERR(map)) - return PTR_ERR(map); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto out_rpm; + } iowrite32(v, &map[offset / sizeof(*map)]); i915_vma_unpin_iomap(vma); - return 0; +out_rpm: + intel_gt_pm_put(vma->vm->gt); + return err; } -static int gtt_get(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 *v) +static int gtt_get(struct context *ctx, unsigned long offset, u32 *v) { struct i915_vma *vma; u32 __iomem *map; - int err; + int err = 0; - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_gtt_domain(obj, false); - i915_gem_object_unlock(obj); + i915_gem_object_lock(ctx->obj); + err = i915_gem_object_set_to_gtt_domain(ctx->obj, false); + i915_gem_object_unlock(ctx->obj); if (err) return err; - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE); if (IS_ERR(vma)) return PTR_ERR(vma); + intel_gt_pm_get(vma->vm->gt); + map = i915_vma_pin_iomap(vma); i915_vma_unpin(vma); - if (IS_ERR(map)) - return PTR_ERR(map); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto out_rpm; + } *v = ioread32(&map[offset / sizeof(*map)]); i915_vma_unpin_iomap(vma); - return 0; +out_rpm: + intel_gt_pm_put(vma->vm->gt); + return err; } -static int wc_set(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 v) +static int wc_set(struct context *ctx, unsigned long offset, u32 v) { u32 *map; int err; - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_wc_domain(obj, true); - i915_gem_object_unlock(obj); + i915_gem_object_lock(ctx->obj); + err = i915_gem_object_set_to_wc_domain(ctx->obj, true); + i915_gem_object_unlock(ctx->obj); if (err) return err; - map = i915_gem_object_pin_map(obj, I915_MAP_WC); + map = i915_gem_object_pin_map(ctx->obj, I915_MAP_WC); if (IS_ERR(map)) return PTR_ERR(map); map[offset / sizeof(*map)] = v; - i915_gem_object_unpin_map(obj); + i915_gem_object_unpin_map(ctx->obj); return 0; } -static int wc_get(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 *v) +static int wc_get(struct context *ctx, unsigned long offset, u32 *v) { u32 *map; int err; - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_wc_domain(obj, false); - i915_gem_object_unlock(obj); + i915_gem_object_lock(ctx->obj); + err = i915_gem_object_set_to_wc_domain(ctx->obj, false); + i915_gem_object_unlock(ctx->obj); if (err) return err; - map = i915_gem_object_pin_map(obj, I915_MAP_WC); + map = i915_gem_object_pin_map(ctx->obj, I915_MAP_WC); if (IS_ERR(map)) return PTR_ERR(map); *v = map[offset / sizeof(*map)]; - i915_gem_object_unpin_map(obj); + i915_gem_object_unpin_map(ctx->obj); return 0; } -static int gpu_set(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 v) +static int gpu_set(struct context *ctx, unsigned long offset, u32 v) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_request *rq; struct i915_vma *vma; u32 *cs; int err; - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_gtt_domain(obj, true); - i915_gem_object_unlock(obj); + i915_gem_object_lock(ctx->obj); + err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); + i915_gem_object_unlock(ctx->obj); if (err) return err; - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0); if (IS_ERR(vma)) return PTR_ERR(vma); - rq = i915_request_create(i915->engine[RCS0]->kernel_context); + rq = i915_request_create(ctx->engine->kernel_context); if (IS_ERR(rq)) { i915_vma_unpin(vma); return PTR_ERR(rq); @@ -209,12 +213,12 @@ static int gpu_set(struct drm_i915_gem_object *obj, return PTR_ERR(cs); } - if (INTEL_GEN(i915) >= 8) { + if (INTEL_GEN(ctx->engine->i915) >= 8) { *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset); *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset); *cs++ = v; - } else if (INTEL_GEN(i915) >= 4) { + } else if (INTEL_GEN(ctx->engine->i915) >= 4) { *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = 0; *cs++ = i915_ggtt_offset(vma) + offset; @@ -239,32 +243,34 @@ static int gpu_set(struct drm_i915_gem_object *obj, return err; } -static bool always_valid(struct drm_i915_private *i915) +static bool always_valid(struct context *ctx) { return true; } -static bool needs_fence_registers(struct drm_i915_private *i915) +static bool needs_fence_registers(struct context *ctx) { - return !intel_gt_is_wedged(&i915->gt); -} + struct intel_gt *gt = ctx->engine->gt; -static bool needs_mi_store_dword(struct drm_i915_private *i915) -{ - if (intel_gt_is_wedged(&i915->gt)) + if (intel_gt_is_wedged(gt)) return false; - if (!HAS_ENGINE(i915, RCS0)) + return gt->ggtt->num_fences; +} + +static bool needs_mi_store_dword(struct context *ctx) +{ + if (intel_gt_is_wedged(ctx->engine->gt)) return false; - return intel_engine_can_store_dword(i915->engine[RCS0]); + return intel_engine_can_store_dword(ctx->engine); } static const struct igt_coherency_mode { const char *name; - int (*set)(struct drm_i915_gem_object *, unsigned long offset, u32 v); - int (*get)(struct drm_i915_gem_object *, unsigned long offset, u32 *v); - bool (*valid)(struct drm_i915_private *i915); + int (*set)(struct context *ctx, unsigned long offset, u32 v); + int (*get)(struct context *ctx, unsigned long offset, u32 *v); + bool (*valid)(struct context *ctx); } igt_coherency_mode[] = { { "cpu", cpu_set, cpu_get, always_valid }, { "gtt", gtt_set, gtt_get, needs_fence_registers }, @@ -273,19 +279,37 @@ static const struct igt_coherency_mode { { }, }; +static struct intel_engine_cs * +random_engine(struct drm_i915_private *i915, struct rnd_state *prng) +{ + struct intel_engine_cs *engine; + unsigned int count; + + count = 0; + for_each_uabi_engine(engine, i915) + count++; + + count = i915_prandom_u32_max_state(count, prng); + for_each_uabi_engine(engine, i915) + if (count-- == 0) + return engine; + + return NULL; +} + static int igt_gem_coherency(void *arg) { const unsigned int ncachelines = PAGE_SIZE/64; - I915_RND_STATE(prng); struct drm_i915_private *i915 = arg; const struct igt_coherency_mode *read, *write, *over; - struct drm_i915_gem_object *obj; - intel_wakeref_t wakeref; unsigned long count, n; u32 *offsets, *values; + I915_RND_STATE(prng); + struct context ctx; int err = 0; - /* We repeatedly write, overwrite and read from a sequence of + /* + * We repeatedly write, overwrite and read from a sequence of * cachelines in order to try and detect incoherency (unflushed writes * from either the CPU or GPU). Each setter/getter uses our cache * domain API which should prevent incoherency. @@ -299,34 +323,36 @@ static int igt_gem_coherency(void *arg) values = offsets + ncachelines; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + ctx.engine = random_engine(i915, &prng); + GEM_BUG_ON(!ctx.engine); + pr_info("%s: using %s\n", __func__, ctx.engine->name); + for (over = igt_coherency_mode; over->name; over++) { if (!over->set) continue; - if (!over->valid(i915)) + if (!over->valid(&ctx)) continue; for (write = igt_coherency_mode; write->name; write++) { if (!write->set) continue; - if (!write->valid(i915)) + if (!write->valid(&ctx)) continue; for (read = igt_coherency_mode; read->name; read++) { if (!read->get) continue; - if (!read->valid(i915)) + if (!read->valid(&ctx)) continue; for_each_prime_number_from(count, 1, ncachelines) { - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto unlock; + ctx.obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(ctx.obj)) { + err = PTR_ERR(ctx.obj); + goto free; } i915_random_reorder(offsets, ncachelines, &prng); @@ -334,7 +360,7 @@ static int igt_gem_coherency(void *arg) values[n] = prandom_u32_state(&prng); for (n = 0; n < count; n++) { - err = over->set(obj, offsets[n], ~values[n]); + err = over->set(&ctx, offsets[n], ~values[n]); if (err) { pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n", n, count, over->name, err); @@ -343,7 +369,7 @@ static int igt_gem_coherency(void *arg) } for (n = 0; n < count; n++) { - err = write->set(obj, offsets[n], values[n]); + err = write->set(&ctx, offsets[n], values[n]); if (err) { pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n", n, count, write->name, err); @@ -354,7 +380,7 @@ static int igt_gem_coherency(void *arg) for (n = 0; n < count; n++) { u32 found; - err = read->get(obj, offsets[n], &found); + err = read->get(&ctx, offsets[n], &found); if (err) { pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n", n, count, read->name, err); @@ -372,20 +398,18 @@ static int igt_gem_coherency(void *arg) } } - i915_gem_object_put(obj); + i915_gem_object_put(ctx.obj); } } } } -unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); +free: kfree(offsets); return err; put_object: - i915_gem_object_put(obj); - goto unlock; + i915_gem_object_put(ctx.obj); + goto free; } int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 3e6f4a65d356..62fabc023a83 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -8,6 +8,7 @@ #include "gem/i915_gem_pm.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" #include "gt/intel_reset.h" #include "i915_selftest.h" @@ -31,7 +32,6 @@ static int live_nop_switch(void *arg) struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; struct i915_gem_context **ctx; - enum intel_engine_id id; struct igt_live_test t; struct drm_file *file; unsigned long n; @@ -52,23 +52,21 @@ static int live_nop_switch(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); if (!ctx) { err = -ENOMEM; - goto out_unlock; + goto out_file; } for (n = 0; n < nctx; n++) { ctx[n] = live_context(i915, file); if (IS_ERR(ctx[n])) { err = PTR_ERR(ctx[n]); - goto out_unlock; + goto out_file; } } - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { struct i915_request *rq; unsigned long end_time, prime; ktime_t times[2] = {}; @@ -78,7 +76,7 @@ static int live_nop_switch(void *arg) rq = igt_request_alloc(ctx[n], engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto out_unlock; + goto out_file; } i915_request_add(rq); } @@ -86,7 +84,7 @@ static int live_nop_switch(void *arg) pr_err("Failed to populated %d contexts\n", nctx); intel_gt_set_wedged(&i915->gt); err = -EIO; - goto out_unlock; + goto out_file; } times[1] = ktime_get_raw(); @@ -96,7 +94,7 @@ static int live_nop_switch(void *arg) err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) - goto out_unlock; + goto out_file; end_time = jiffies + i915_selftest.timeout_jiffies; for_each_prime_number_from(prime, 2, 8192) { @@ -106,7 +104,7 @@ static int live_nop_switch(void *arg) rq = igt_request_alloc(ctx[n % nctx], engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto out_unlock; + goto out_file; } /* @@ -142,7 +140,7 @@ static int live_nop_switch(void *arg) err = igt_live_test_end(&t); if (err) - goto out_unlock; + goto out_file; pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", engine->name, @@ -150,8 +148,235 @@ static int live_nop_switch(void *arg) prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); } -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); +out_file: + mock_file_free(i915, file); + return err; +} + +struct parallel_switch { + struct task_struct *tsk; + struct intel_context *ce[2]; +}; + +static int __live_parallel_switch1(void *data) +{ + struct parallel_switch *arg = data; + IGT_TIMEOUT(end_time); + unsigned long count; + + count = 0; + do { + struct i915_request *rq = NULL; + int err, n; + + err = 0; + for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) { + struct i915_request *prev = rq; + + rq = i915_request_create(arg->ce[n]); + if (IS_ERR(rq)) { + i915_request_put(prev); + return PTR_ERR(rq); + } + + i915_request_get(rq); + if (prev) { + err = i915_request_await_dma_fence(rq, &prev->fence); + i915_request_put(prev); + } + + i915_request_add(rq); + } + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -ETIME; + i915_request_put(rq); + if (err) + return err; + + count++; + } while (!__igt_timeout(end_time, NULL)); + + pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count); + return 0; +} + +static int __live_parallel_switchN(void *data) +{ + struct parallel_switch *arg = data; + struct i915_request *rq = NULL; + IGT_TIMEOUT(end_time); + unsigned long count; + int n; + + count = 0; + do { + for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { + struct i915_request *prev = rq; + int err = 0; + + rq = i915_request_create(arg->ce[n]); + if (IS_ERR(rq)) { + i915_request_put(prev); + return PTR_ERR(rq); + } + + i915_request_get(rq); + if (prev) { + err = i915_request_await_dma_fence(rq, &prev->fence); + i915_request_put(prev); + } + + i915_request_add(rq); + if (err) { + i915_request_put(rq); + return err; + } + } + + count++; + } while (!__igt_timeout(end_time, NULL)); + i915_request_put(rq); + + pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count); + return 0; +} + +static int live_parallel_switch(void *arg) +{ + struct drm_i915_private *i915 = arg; + static int (* const func[])(void *arg) = { + __live_parallel_switch1, + __live_parallel_switchN, + NULL, + }; + struct parallel_switch *data = NULL; + struct i915_gem_engines *engines; + struct i915_gem_engines_iter it; + int (* const *fn)(void *arg); + struct i915_gem_context *ctx; + struct intel_context *ce; + struct drm_file *file; + int n, m, count; + int err = 0; + + /* + * Check we can process switches on all engines simultaneously. + */ + + if (!DRIVER_CAPS(i915)->has_logical_contexts) + return 0; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_file; + } + + engines = i915_gem_context_lock_engines(ctx); + count = engines->num_engines; + + data = kcalloc(count, sizeof(*data), GFP_KERNEL); + if (!data) { + i915_gem_context_unlock_engines(ctx); + err = -ENOMEM; + goto out_file; + } + + m = 0; /* Use the first context as our template for the engines */ + for_each_gem_engine(ce, engines, it) { + err = intel_context_pin(ce); + if (err) { + i915_gem_context_unlock_engines(ctx); + goto out; + } + data[m++].ce[0] = intel_context_get(ce); + } + i915_gem_context_unlock_engines(ctx); + + /* Clone the same set of engines into the other contexts */ + for (n = 1; n < ARRAY_SIZE(data->ce); n++) { + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out; + } + + for (m = 0; m < count; m++) { + if (!data[m].ce[0]) + continue; + + ce = intel_context_create(ctx, data[m].ce[0]->engine); + if (IS_ERR(ce)) + goto out; + + err = intel_context_pin(ce); + if (err) { + intel_context_put(ce); + goto out; + } + + data[m].ce[n] = ce; + } + } + + for (fn = func; !err && *fn; fn++) { + struct igt_live_test t; + int n; + + err = igt_live_test_begin(&t, i915, __func__, ""); + if (err) + break; + + for (n = 0; n < count; n++) { + if (!data[n].ce[0]) + continue; + + data[n].tsk = kthread_run(*fn, &data[n], + "igt/parallel:%s", + data[n].ce[0]->engine->name); + if (IS_ERR(data[n].tsk)) { + err = PTR_ERR(data[n].tsk); + break; + } + get_task_struct(data[n].tsk); + } + + yield(); /* start all threads before we kthread_stop() */ + + for (n = 0; n < count; n++) { + int status; + + if (IS_ERR_OR_NULL(data[n].tsk)) + continue; + + status = kthread_stop(data[n].tsk); + if (status && !err) + err = status; + + put_task_struct(data[n].tsk); + data[n].tsk = NULL; + } + + if (igt_live_test_end(&t)) + err = -EIO; + } + +out: + for (n = 0; n < count; n++) { + for (m = 0; m < ARRAY_SIZE(data->ce); m++) { + if (!data[n].ce[m]) + continue; + + intel_context_unpin(data[n].ce[m]); + intel_context_put(data[n].ce[m]); + } + } + kfree(data); +out_file: mock_file_free(i915, file); return err; } @@ -166,28 +391,20 @@ static unsigned long fake_page_count(struct drm_i915_gem_object *obj) return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; } -static int gpu_fill(struct drm_i915_gem_object *obj, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, +static int gpu_fill(struct intel_context *ce, + struct drm_i915_gem_object *obj, unsigned int dw) { - struct i915_address_space *vm = ctx->vm ?: &engine->gt->ggtt->vm; struct i915_vma *vma; int err; - GEM_BUG_ON(obj->base.size > vm->total); - GEM_BUG_ON(!intel_engine_can_store_dword(engine)); + GEM_BUG_ON(obj->base.size > ce->vm->total); + GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); - vma = i915_vma_instance(obj, vm, NULL); + vma = i915_vma_instance(obj, ce->vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_gtt_domain(obj, true); - i915_gem_object_unlock(obj); - if (err) - return err; - err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); if (err) return err; @@ -200,9 +417,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj, * whilst checking that each context provides a unique view * into the object. */ - err = igt_gpu_fill_dw(vma, - ctx, - engine, + err = igt_gpu_fill_dw(ce, vma, (dw * real_page_count(obj)) << PAGE_SHIFT | (dw * sizeof(u32)), real_page_count(obj), @@ -305,22 +520,21 @@ static int file_add_object(struct drm_file *file, } static struct drm_i915_gem_object * -create_test_object(struct i915_gem_context *ctx, +create_test_object(struct i915_address_space *vm, struct drm_file *file, struct list_head *objects) { struct drm_i915_gem_object *obj; - struct i915_address_space *vm = ctx->vm ?: &ctx->i915->ggtt.vm; u64 size; int err; /* Keep in GEM's good graces */ - i915_retire_requests(ctx->i915); + intel_gt_retire_requests(vm->gt); size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); size = round_down(size, DW_PER_PAGE * PAGE_SIZE); - obj = huge_gem_object(ctx->i915, DW_PER_PAGE * PAGE_SIZE, size); + obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size); if (IS_ERR(obj)) return obj; @@ -348,11 +562,49 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj) return npages / DW_PER_PAGE; } +static void throttle_release(struct i915_request **q, int count) +{ + int i; + + for (i = 0; i < count; i++) { + if (IS_ERR_OR_NULL(q[i])) + continue; + + i915_request_put(fetch_and_zero(&q[i])); + } +} + +static int throttle(struct intel_context *ce, + struct i915_request **q, int count) +{ + int i; + + if (!IS_ERR_OR_NULL(q[0])) { + if (i915_request_wait(q[0], + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT) < 0) + return -EINTR; + + i915_request_put(q[0]); + } + + for (i = 0; i < count - 1; i++) + q[i] = q[i + 1]; + + q[i] = intel_context_create_request(ce); + if (IS_ERR(q[i])) + return PTR_ERR(q[i]); + + i915_request_get(q[i]); + i915_request_add(q[i]); + + return 0; +} + static int igt_ctx_exec(void *arg) { struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; - enum intel_engine_id id; int err = -ENODEV; /* @@ -364,9 +616,10 @@ static int igt_ctx_exec(void *arg) if (!DRIVER_CAPS(i915)->has_logical_contexts) return 0; - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { struct drm_i915_gem_object *obj = NULL; unsigned long ncontexts, ndwords, dw; + struct i915_request *tq[5] = {}; struct igt_live_test t; struct drm_file *file; IGT_TIMEOUT(end_time); @@ -382,39 +635,53 @@ static int igt_ctx_exec(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) - goto out_unlock; + goto out_file; ncontexts = 0; ndwords = 0; dw = 0; while (!time_after(jiffies, end_time)) { struct i915_gem_context *ctx; + struct intel_context *ce; - ctx = live_context(i915, file); + ctx = kernel_context(i915); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); - goto out_unlock; + goto out_file; } + ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); + GEM_BUG_ON(IS_ERR(ce)); + if (!obj) { - obj = create_test_object(ctx, file, &objects); + obj = create_test_object(ce->vm, file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); - goto out_unlock; + intel_context_put(ce); + kernel_context_close(ctx); + goto out_file; } } - err = gpu_fill(obj, ctx, engine, dw); + err = gpu_fill(ce, obj, dw); if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), - engine->name, ctx->hw_id, - yesno(!!ctx->vm), err); - goto out_unlock; + engine->name, + yesno(!!rcu_access_pointer(ctx->vm)), + err); + intel_context_put(ce); + kernel_context_close(ctx); + goto out_file; + } + + err = throttle(ce, tq, ARRAY_SIZE(tq)); + if (err) { + intel_context_put(ce); + kernel_context_close(ctx); + goto out_file; } if (++dw == max_dwords(obj)) { @@ -424,6 +691,9 @@ static int igt_ctx_exec(void *arg) ndwords++; ncontexts++; + + intel_context_put(ce); + kernel_context_close(ctx); } pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", @@ -441,10 +711,10 @@ static int igt_ctx_exec(void *arg) dw += rem; } -out_unlock: +out_file: + throttle_release(tq, ARRAY_SIZE(tq)); if (igt_live_test_end(&t)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); mock_file_free(i915, file); if (err) @@ -459,9 +729,9 @@ out_unlock: static int igt_shared_ctx_exec(void *arg) { struct drm_i915_private *i915 = arg; + struct i915_request *tq[5] = {}; struct i915_gem_context *parent; struct intel_engine_cs *engine; - enum intel_engine_id id; struct igt_live_test t; struct drm_file *file; int err = 0; @@ -478,24 +748,22 @@ static int igt_shared_ctx_exec(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - parent = live_context(i915, file); if (IS_ERR(parent)) { err = PTR_ERR(parent); - goto out_unlock; + goto out_file; } if (!parent->vm) { /* not full-ppgtt; nothing to share */ err = 0; - goto out_unlock; + goto out_file; } err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + goto out_file; - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { unsigned long ncontexts, ndwords, dw; struct drm_i915_gem_object *obj = NULL; IGT_TIMEOUT(end_time); @@ -509,6 +777,7 @@ static int igt_shared_ctx_exec(void *arg) ncontexts = 0; while (!time_after(jiffies, end_time)) { struct i915_gem_context *ctx; + struct intel_context *ce; ctx = kernel_context(i915); if (IS_ERR(ctx)) { @@ -516,23 +785,38 @@ static int igt_shared_ctx_exec(void *arg) goto out_test; } + mutex_lock(&ctx->mutex); __assign_ppgtt(ctx, parent->vm); + mutex_unlock(&ctx->mutex); + + ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); + GEM_BUG_ON(IS_ERR(ce)); if (!obj) { - obj = create_test_object(parent, file, &objects); + obj = create_test_object(parent->vm, file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); + intel_context_put(ce); kernel_context_close(ctx); goto out_test; } } - err = gpu_fill(obj, ctx, engine, dw); + err = gpu_fill(ce, obj, dw); if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), - engine->name, ctx->hw_id, - yesno(!!ctx->vm), err); + engine->name, + yesno(!!rcu_access_pointer(ctx->vm)), + err); + intel_context_put(ce); + kernel_context_close(ctx); + goto out_test; + } + + err = throttle(ce, tq, ARRAY_SIZE(tq)); + if (err) { + intel_context_put(ce); kernel_context_close(ctx); goto out_test; } @@ -545,6 +829,7 @@ static int igt_shared_ctx_exec(void *arg) ndwords++; ncontexts++; + intel_context_put(ce); kernel_context_close(ctx); } pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", @@ -562,16 +847,13 @@ static int igt_shared_ctx_exec(void *arg) dw += rem; } - mutex_unlock(&i915->drm.struct_mutex); i915_gem_drain_freed_objects(i915); - mutex_lock(&i915->drm.struct_mutex); } out_test: + throttle_release(tq, ARRAY_SIZE(tq)); if (igt_live_test_end(&t)) err = -EIO; -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - +out_file: mock_file_free(i915, file); return err; } @@ -604,6 +886,8 @@ static struct i915_vma *rpcs_query_batch(struct i915_vma *vma) __i915_gem_object_flush_map(obj, 0, 64); i915_gem_object_unpin_map(obj); + intel_gt_chipset_flush(vma->vm->gt); + vma = i915_vma_instance(obj, vma->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); @@ -681,10 +965,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, if (err) goto skip_request; - i915_vma_unpin(batch); - i915_vma_close(batch); - i915_vma_put(batch); - + i915_vma_unpin_and_release(&batch, 0); i915_vma_unpin(vma); *rq_out = i915_request_get(rq); @@ -698,8 +979,7 @@ skip_request: err_request: i915_request_add(rq); err_batch: - i915_vma_unpin(batch); - i915_vma_put(batch); + i915_vma_unpin_and_release(&batch, 0); err_vma: i915_vma_unpin(vma); @@ -860,8 +1140,8 @@ out: igt_spinner_end(spin); if ((flags & TEST_IDLE) && ret == 0) { - ret = i915_gem_wait_for_idle(ce->engine->i915, - 0, MAX_SCHEDULE_TIMEOUT); + ret = intel_gt_wait_for_idle(ce->engine->gt, + MAX_SCHEDULE_TIMEOUT); if (ret) return ret; @@ -887,7 +1167,7 @@ __sseu_test(const char *name, if (ret) return ret; - ret = __intel_context_reconfigure_sseu(ce, sseu); + ret = intel_context_reconfigure_sseu(ce, sseu); if (ret) goto out_spin; @@ -908,106 +1188,97 @@ __igt_ctx_sseu(struct drm_i915_private *i915, const char *name, unsigned int flags) { - struct intel_engine_cs *engine = i915->engine[RCS0]; struct drm_i915_gem_object *obj; - struct i915_gem_context *ctx; - struct intel_context *ce; - struct intel_sseu pg_sseu; - struct drm_file *file; - int ret; - - if (INTEL_GEN(i915) < 9 || !engine) - return 0; - - if (!RUNTIME_INFO(i915)->sseu.has_slice_pg) - return 0; + int inst = 0; + int ret = 0; - if (hweight32(engine->sseu.slice_mask) < 2) + if (INTEL_GEN(i915) < 9 || !RUNTIME_INFO(i915)->sseu.has_slice_pg) return 0; - /* - * Gen11 VME friendly power-gated configuration with half enabled - * sub-slices. - */ - pg_sseu = engine->sseu; - pg_sseu.slice_mask = 1; - pg_sseu.subslice_mask = - ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2)); - - pr_info("SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", - name, flags, hweight32(engine->sseu.slice_mask), - hweight32(pg_sseu.slice_mask)); - - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - if (flags & TEST_RESET) igt_global_reset_lock(&i915->gt); - mutex_lock(&i915->drm.struct_mutex); - - ctx = live_context(i915, file); - if (IS_ERR(ctx)) { - ret = PTR_ERR(ctx); - goto out_unlock; - } - i915_gem_context_clear_bannable(ctx); /* to reset and beyond! */ - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) { ret = PTR_ERR(obj); goto out_unlock; } - ce = i915_gem_context_get_engine(ctx, RCS0); - if (IS_ERR(ce)) { - ret = PTR_ERR(ce); - goto out_put; - } + do { + struct intel_engine_cs *engine; + struct intel_context *ce; + struct intel_sseu pg_sseu; - ret = intel_context_pin(ce); - if (ret) - goto out_context; + engine = intel_engine_lookup_user(i915, + I915_ENGINE_CLASS_RENDER, + inst++); + if (!engine) + break; - /* First set the default mask. */ - ret = __sseu_test(name, flags, ce, obj, engine->sseu); - if (ret) - goto out_fail; + if (hweight32(engine->sseu.slice_mask) < 2) + continue; - /* Then set a power-gated configuration. */ - ret = __sseu_test(name, flags, ce, obj, pg_sseu); - if (ret) - goto out_fail; + /* + * Gen11 VME friendly power-gated configuration with + * half enabled sub-slices. + */ + pg_sseu = engine->sseu; + pg_sseu.slice_mask = 1; + pg_sseu.subslice_mask = + ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2)); + + pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", + engine->name, name, flags, + hweight32(engine->sseu.slice_mask), + hweight32(pg_sseu.slice_mask)); + + ce = intel_context_create(engine->kernel_context->gem_context, + engine); + if (IS_ERR(ce)) { + ret = PTR_ERR(ce); + goto out_put; + } - /* Back to defaults. */ - ret = __sseu_test(name, flags, ce, obj, engine->sseu); - if (ret) - goto out_fail; + ret = intel_context_pin(ce); + if (ret) + goto out_ce; - /* One last power-gated configuration for the road. */ - ret = __sseu_test(name, flags, ce, obj, pg_sseu); - if (ret) - goto out_fail; + /* First set the default mask. */ + ret = __sseu_test(name, flags, ce, obj, engine->sseu); + if (ret) + goto out_unpin; + + /* Then set a power-gated configuration. */ + ret = __sseu_test(name, flags, ce, obj, pg_sseu); + if (ret) + goto out_unpin; + + /* Back to defaults. */ + ret = __sseu_test(name, flags, ce, obj, engine->sseu); + if (ret) + goto out_unpin; -out_fail: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + /* One last power-gated configuration for the road. */ + ret = __sseu_test(name, flags, ce, obj, pg_sseu); + if (ret) + goto out_unpin; + +out_unpin: + intel_context_unpin(ce); +out_ce: + intel_context_put(ce); + } while (!ret); + + if (igt_flush_test(i915)) ret = -EIO; - intel_context_unpin(ce); -out_context: - intel_context_put(ce); out_put: i915_gem_object_put(obj); out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - if (flags & TEST_RESET) igt_global_reset_unlock(&i915->gt); - mock_file_free(i915, file); - if (ret) pr_err("%s: Failed with %d!\n", name, ret); @@ -1041,6 +1312,7 @@ static int igt_ctx_readonly(void *arg) { struct drm_i915_private *i915 = arg; struct drm_i915_gem_object *obj = NULL; + struct i915_request *tq[5] = {}; struct i915_address_space *vm; struct i915_gem_context *ctx; unsigned long idx, ndwords, dw; @@ -1061,52 +1333,63 @@ static int igt_ctx_readonly(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + goto out_file; ctx = live_context(i915, file); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); - goto out_unlock; + goto out_file; } - vm = ctx->vm ?: &i915->ggtt.alias->vm; + rcu_read_lock(); + vm = rcu_dereference(ctx->vm) ?: &i915->ggtt.alias->vm; if (!vm || !vm->has_read_only) { + rcu_read_unlock(); err = 0; - goto out_unlock; + goto out_file; } + rcu_read_unlock(); ndwords = 0; dw = 0; while (!time_after(jiffies, end_time)) { - struct intel_engine_cs *engine; - unsigned int id; + struct i915_gem_engines_iter it; + struct intel_context *ce; - for_each_engine(engine, i915, id) { - if (!intel_engine_can_store_dword(engine)) + for_each_gem_engine(ce, + i915_gem_context_lock_engines(ctx), it) { + if (!intel_engine_can_store_dword(ce->engine)) continue; if (!obj) { - obj = create_test_object(ctx, file, &objects); + obj = create_test_object(ce->vm, file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); - goto out_unlock; + i915_gem_context_unlock_engines(ctx); + goto out_file; } if (prandom_u32_state(&prng) & 1) i915_gem_object_set_readonly(obj); } - err = gpu_fill(obj, ctx, engine, dw); + err = gpu_fill(ce, obj, dw); if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), - engine->name, ctx->hw_id, - yesno(!!ctx->vm), err); - goto out_unlock; + ce->engine->name, + yesno(!!rcu_access_pointer(ctx->vm)), + err); + i915_gem_context_unlock_engines(ctx); + goto out_file; + } + + err = throttle(ce, tq, ARRAY_SIZE(tq)); + if (err) { + i915_gem_context_unlock_engines(ctx); + goto out_file; } if (++dw == max_dwords(obj)) { @@ -1115,6 +1398,7 @@ static int igt_ctx_readonly(void *arg) } ndwords++; } + i915_gem_context_unlock_engines(ctx); } pr_info("Submitted %lu dwords (across %u engines)\n", ndwords, RUNTIME_INFO(i915)->num_engines); @@ -1137,19 +1421,19 @@ static int igt_ctx_readonly(void *arg) dw += rem; } -out_unlock: +out_file: + throttle_release(tq, ARRAY_SIZE(tq)); if (igt_live_test_end(&t)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); mock_file_free(i915, file); return err; } -static int check_scratch(struct i915_gem_context *ctx, u64 offset) +static int check_scratch(struct i915_address_space *vm, u64 offset) { struct drm_mm_node *node = - __drm_mm_interval_first(&ctx->vm->mm, + __drm_mm_interval_first(&vm->mm, offset, offset + sizeof(u32) - 1); if (!node || node->start > offset) return 0; @@ -1167,6 +1451,7 @@ static int write_to_scratch(struct i915_gem_context *ctx, { struct drm_i915_private *i915 = ctx->i915; struct drm_i915_gem_object *obj; + struct i915_address_space *vm; struct i915_request *rq; struct i915_vma *vma; u32 *cmd; @@ -1197,17 +1482,20 @@ static int write_to_scratch(struct i915_gem_context *ctx, __i915_gem_object_flush_map(obj, 0, 64); i915_gem_object_unpin_map(obj); - vma = i915_vma_instance(obj, ctx->vm, NULL); + intel_gt_chipset_flush(engine->gt); + + vm = i915_gem_context_get_vm_rcu(ctx); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); - goto err; + goto err_vm; } err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); if (err) - goto err; + goto err_vm; - err = check_scratch(ctx, offset); + err = check_scratch(vm, offset); if (err) goto err_unpin; @@ -1229,12 +1517,11 @@ static int write_to_scratch(struct i915_gem_context *ctx, if (err) goto skip_request; - i915_vma_unpin(vma); - i915_vma_close(vma); - i915_vma_put(vma); + i915_vma_unpin_and_release(&vma, 0); i915_request_add(rq); + i915_vm_put(vm); return 0; skip_request: @@ -1243,6 +1530,8 @@ err_request: i915_request_add(rq); err_unpin: i915_vma_unpin(vma); +err_vm: + i915_vm_put(vm); err: i915_gem_object_put(obj); return err; @@ -1254,6 +1543,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, { struct drm_i915_private *i915 = ctx->i915; struct drm_i915_gem_object *obj; + struct i915_address_space *vm; const u32 RCS_GPR0 = 0x2600; /* not all engines have their own GPR! */ const u32 result = 0x100; struct i915_request *rq; @@ -1296,17 +1586,20 @@ static int read_from_scratch(struct i915_gem_context *ctx, i915_gem_object_flush_map(obj); i915_gem_object_unpin_map(obj); - vma = i915_vma_instance(obj, ctx->vm, NULL); + intel_gt_chipset_flush(engine->gt); + + vm = i915_gem_context_get_vm_rcu(ctx); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); - goto err; + goto err_vm; } err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); if (err) - goto err; + goto err_vm; - err = check_scratch(ctx, offset); + err = check_scratch(vm, offset); if (err) goto err_unpin; @@ -1337,12 +1630,12 @@ static int read_from_scratch(struct i915_gem_context *ctx, err = i915_gem_object_set_to_cpu_domain(obj, false); i915_gem_object_unlock(obj); if (err) - goto err; + goto err_vm; cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto err; + goto err_vm; } *value = cmd[result / sizeof(*cmd)]; @@ -1357,6 +1650,8 @@ err_request: i915_request_add(rq); err_unpin: i915_vma_unpin(vma); +err_vm: + i915_vm_put(vm); err: i915_gem_object_put(obj); return err; @@ -1371,7 +1666,6 @@ static int igt_vm_isolation(void *arg) struct drm_file *file; I915_RND_STATE(prng); unsigned long count; - unsigned int id; u64 vm_total; int err; @@ -1387,34 +1681,32 @@ static int igt_vm_isolation(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + goto out_file; ctx_a = live_context(i915, file); if (IS_ERR(ctx_a)) { err = PTR_ERR(ctx_a); - goto out_unlock; + goto out_file; } ctx_b = live_context(i915, file); if (IS_ERR(ctx_b)) { err = PTR_ERR(ctx_b); - goto out_unlock; + goto out_file; } /* We can only test vm isolation, if the vm are distinct */ if (ctx_a->vm == ctx_b->vm) - goto out_unlock; + goto out_file; vm_total = ctx_a->vm->total; GEM_BUG_ON(ctx_b->vm->total != vm_total); vm_total -= I915_GTT_PAGE_SIZE; count = 0; - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { IGT_TIMEOUT(end_time); unsigned long this = 0; @@ -1436,7 +1728,7 @@ static int igt_vm_isolation(void *arg) err = read_from_scratch(ctx_b, engine, offset, &value); if (err) - goto out_unlock; + goto out_file; if (value) { pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", @@ -1445,7 +1737,7 @@ static int igt_vm_isolation(void *arg) lower_32_bits(offset), this); err = -EINVAL; - goto out_unlock; + goto out_file; } this++; @@ -1455,30 +1747,13 @@ static int igt_vm_isolation(void *arg) pr_info("Checked %lu scratch offsets across %d engines\n", count, RUNTIME_INFO(i915)->num_engines); -out_unlock: +out_file: if (igt_live_test_end(&t)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); - mock_file_free(i915, file); return err; } -static __maybe_unused const char * -__engine_name(struct drm_i915_private *i915, intel_engine_mask_t engines) -{ - struct intel_engine_cs *engine; - intel_engine_mask_t tmp; - - if (engines == ALL_ENGINES) - return "all"; - - for_each_engine_masked(engine, i915, engines, tmp) - return engine->name; - - return "none"; -} - static bool skip_unused_engines(struct intel_context *ce, void *data) { return !ce->state; @@ -1506,13 +1781,9 @@ static int mock_context_barrier(void *arg) * a request; useful for retiring old state after loading new. */ - mutex_lock(&i915->drm.struct_mutex); - ctx = mock_context(i915, "mock"); - if (!ctx) { - err = -ENOMEM; - goto unlock; - } + if (!ctx) + return -ENOMEM; counter = 0; err = context_barrier_task(ctx, 0, @@ -1585,8 +1856,6 @@ static int mock_context_barrier(void *arg) out: mock_context_close(ctx); -unlock: - mutex_unlock(&i915->drm.struct_mutex); return err; #undef pr_fmt #define pr_fmt(x) x @@ -1614,6 +1883,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_nop_switch), + SUBTEST(live_parallel_switch), SUBTEST(igt_ctx_exec), SUBTEST(igt_ctx_readonly), SUBTEST(igt_ctx_sseu), diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 1d27babff0ce..29b2077b73d2 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -10,6 +10,7 @@ #include "gt/intel_gt_pm.h" #include "huge_gem_object.h" #include "i915_selftest.h" +#include "selftests/i915_random.h" #include "selftests/igt_flush_test.h" struct tile { @@ -76,18 +77,103 @@ static u64 tiled_offset(const struct tile *tile, u64 v) static int check_partial_mapping(struct drm_i915_gem_object *obj, const struct tile *tile, - unsigned long end_time) + struct rnd_state *prng) { - const unsigned int nreal = obj->scratch / PAGE_SIZE; const unsigned long npages = obj->base.size / PAGE_SIZE; + struct i915_ggtt_view view; struct i915_vma *vma; unsigned long page; + u32 __iomem *io; + struct page *p; + unsigned int n; + u64 offset; + u32 *cpu; int err; - if (igt_timeout(end_time, - "%s: timed out before tiling=%d stride=%d\n", - __func__, tile->tiling, tile->stride)) - return -EINTR; + err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride); + if (err) { + pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n", + tile->tiling, tile->stride, err); + return err; + } + + GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling); + GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride); + + i915_gem_object_lock(obj); + err = i915_gem_object_set_to_gtt_domain(obj, true); + i915_gem_object_unlock(obj); + if (err) { + pr_err("Failed to flush to GTT write domain; err=%d\n", err); + return err; + } + + page = i915_prandom_u32_max_state(npages, prng); + view = compute_partial_view(obj, page, MIN_CHUNK_PAGES); + + vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) { + pr_err("Failed to pin partial view: offset=%lu; err=%d\n", + page, (int)PTR_ERR(vma)); + return PTR_ERR(vma); + } + + n = page - view.partial.offset; + GEM_BUG_ON(n >= view.partial.size); + + io = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(io)) { + pr_err("Failed to iomap partial view: offset=%lu; err=%d\n", + page, (int)PTR_ERR(io)); + err = PTR_ERR(io); + goto out; + } + + iowrite32(page, io + n * PAGE_SIZE / sizeof(*io)); + i915_vma_unpin_iomap(vma); + + offset = tiled_offset(tile, page << PAGE_SHIFT); + if (offset >= obj->base.size) + goto out; + + intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt); + + p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + cpu = kmap(p) + offset_in_page(offset); + drm_clflush_virt_range(cpu, sizeof(*cpu)); + if (*cpu != (u32)page) { + pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n", + page, n, + view.partial.offset, + view.partial.size, + vma->size >> PAGE_SHIFT, + tile->tiling ? tile_row_pages(obj) : 0, + vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride, + offset >> PAGE_SHIFT, + (unsigned int)offset_in_page(offset), + offset, + (u32)page, *cpu); + err = -EINVAL; + } + *cpu = 0; + drm_clflush_virt_range(cpu, sizeof(*cpu)); + kunmap(p); + +out: + i915_vma_destroy(vma); + return err; +} + +static int check_partial_mappings(struct drm_i915_gem_object *obj, + const struct tile *tile, + unsigned long end_time) +{ + const unsigned int nreal = obj->scratch / PAGE_SIZE; + const unsigned long npages = obj->base.size / PAGE_SIZE; + struct i915_vma *vma; + unsigned long page; + int err; err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride); if (err) { @@ -170,11 +256,42 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, return err; i915_vma_destroy(vma); + + if (igt_timeout(end_time, + "%s: timed out after tiling=%d stride=%d\n", + __func__, tile->tiling, tile->stride)) + return -EINTR; } return 0; } +static unsigned int +setup_tile_size(struct tile *tile, struct drm_i915_private *i915) +{ + if (INTEL_GEN(i915) <= 2) { + tile->height = 16; + tile->width = 128; + tile->size = 11; + } else if (tile->tiling == I915_TILING_Y && + HAS_128_BYTE_Y_TILING(i915)) { + tile->height = 32; + tile->width = 128; + tile->size = 12; + } else { + tile->height = 8; + tile->width = 512; + tile->size = 12; + } + + if (INTEL_GEN(i915) < 4) + return 8192 / tile->width; + else if (INTEL_GEN(i915) < 7) + return 128 * I965_FENCE_MAX_PITCH_VAL / tile->width; + else + return 128 * GEN7_FENCE_MAX_PITCH_VAL / tile->width; +} + static int igt_partial_tiling(void *arg) { const unsigned int nreal = 1 << 12; /* largest tile row x2 */ @@ -184,6 +301,9 @@ static int igt_partial_tiling(void *arg) int tiling; int err; + if (!i915_ggtt_has_aperture(&i915->ggtt)) + return 0; + /* We want to check the page mapping and fencing of a large object * mmapped through the GTT. The object we create is larger than can * possibly be mmaped as a whole, and so we must use partial GGTT vma. @@ -205,7 +325,6 @@ static int igt_partial_tiling(void *arg) goto out; } - mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(&i915->runtime_pm); if (1) { @@ -219,7 +338,7 @@ static int igt_partial_tiling(void *arg) tile.swizzle = I915_BIT_6_SWIZZLE_NONE; tile.tiling = I915_TILING_NONE; - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err && err != -EINTR) goto out_unlock; } @@ -241,10 +360,10 @@ static int igt_partial_tiling(void *arg) tile.tiling = tiling; switch (tiling) { case I915_TILING_X: - tile.swizzle = i915->mm.bit_6_swizzle_x; + tile.swizzle = i915->ggtt.bit_6_swizzle_x; break; case I915_TILING_Y: - tile.swizzle = i915->mm.bit_6_swizzle_y; + tile.swizzle = i915->ggtt.bit_6_swizzle_y; break; } @@ -253,31 +372,11 @@ static int igt_partial_tiling(void *arg) tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17) continue; - if (INTEL_GEN(i915) <= 2) { - tile.height = 16; - tile.width = 128; - tile.size = 11; - } else if (tile.tiling == I915_TILING_Y && - HAS_128_BYTE_Y_TILING(i915)) { - tile.height = 32; - tile.width = 128; - tile.size = 12; - } else { - tile.height = 8; - tile.width = 512; - tile.size = 12; - } - - if (INTEL_GEN(i915) < 4) - max_pitch = 8192 / tile.width; - else if (INTEL_GEN(i915) < 7) - max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width; - else - max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width; + max_pitch = setup_tile_size(&tile, i915); for (pitch = max_pitch; pitch; pitch >>= 1) { tile.stride = tile.width * pitch; - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err == -EINTR) goto next_tiling; if (err) @@ -285,7 +384,7 @@ static int igt_partial_tiling(void *arg) if (pitch > 2 && INTEL_GEN(i915) >= 4) { tile.stride = tile.width * (pitch - 1); - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err == -EINTR) goto next_tiling; if (err) @@ -294,7 +393,7 @@ static int igt_partial_tiling(void *arg) if (pitch < max_pitch && INTEL_GEN(i915) >= 4) { tile.stride = tile.width * (pitch + 1); - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err == -EINTR) goto next_tiling; if (err) @@ -305,7 +404,7 @@ static int igt_partial_tiling(void *arg) if (INTEL_GEN(i915) >= 4) { for_each_prime_number(pitch, max_pitch) { tile.stride = tile.width * pitch; - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err == -EINTR) goto next_tiling; if (err) @@ -318,7 +417,100 @@ next_tiling: ; out_unlock: intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); + i915_gem_object_unpin_pages(obj); +out: + i915_gem_object_put(obj); + return err; +} + +static int igt_smoke_tiling(void *arg) +{ + const unsigned int nreal = 1 << 12; /* largest tile row x2 */ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + intel_wakeref_t wakeref; + I915_RND_STATE(prng); + unsigned long count; + IGT_TIMEOUT(end); + int err; + + if (!i915_ggtt_has_aperture(&i915->ggtt)) + return 0; + + /* + * igt_partial_tiling() does an exhastive check of partial tiling + * chunking, but will undoubtably run out of time. Here, we do a + * randomised search and hope over many runs of 1s with different + * seeds we will do a thorough check. + * + * Remember to look at the st_seed if we see a flip-flop in BAT! + */ + + if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) + return 0; + + obj = huge_gem_object(i915, + nreal << PAGE_SHIFT, + (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) { + pr_err("Failed to allocate %u pages (%lu total), err=%d\n", + nreal, obj->base.size / PAGE_SIZE, err); + goto out; + } + + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + count = 0; + do { + struct tile tile; + + tile.tiling = + i915_prandom_u32_max_state(I915_TILING_Y + 1, &prng); + switch (tile.tiling) { + case I915_TILING_NONE: + tile.height = 1; + tile.width = 1; + tile.size = 0; + tile.stride = 0; + tile.swizzle = I915_BIT_6_SWIZZLE_NONE; + break; + + case I915_TILING_X: + tile.swizzle = i915->ggtt.bit_6_swizzle_x; + break; + case I915_TILING_Y: + tile.swizzle = i915->ggtt.bit_6_swizzle_y; + break; + } + + if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 || + tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17) + continue; + + if (tile.tiling != I915_TILING_NONE) { + unsigned int max_pitch = setup_tile_size(&tile, i915); + + tile.stride = + i915_prandom_u32_max_state(max_pitch, &prng); + tile.stride = (1 + tile.stride) * tile.width; + if (INTEL_GEN(i915) < 4) + tile.stride = rounddown_pow_of_two(tile.stride); + } + + err = check_partial_mapping(obj, &tile, &prng); + if (err) + break; + + count++; + } while (!__igt_timeout(end, NULL)); + + pr_info("%s: Completed %lu trials\n", __func__, count); + + intel_runtime_pm_put(&i915->runtime_pm, wakeref); i915_gem_object_unpin_pages(obj); out: i915_gem_object_put(obj); @@ -329,20 +521,19 @@ static int make_obj_busy(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); struct intel_engine_cs *engine; - enum intel_engine_id id; - struct i915_vma *vma; - int err; - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); - if (IS_ERR(vma)) - return PTR_ERR(vma); + for_each_uabi_engine(engine, i915) { + struct i915_request *rq; + struct i915_vma *vma; + int err; - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - return err; + vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); - for_each_engine(engine, i915, id) { - struct i915_request *rq; + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + return err; rq = i915_request_create(engine->kernel_context); if (IS_ERR(rq)) { @@ -358,12 +549,13 @@ static int make_obj_busy(struct drm_i915_gem_object *obj) i915_vma_unlock(vma); i915_request_add(rq); + i915_vma_unpin(vma); + if (err) + return err; } - i915_vma_unpin(vma); i915_gem_object_put(obj); /* leave it only alive via its active ref */ - - return err; + return 0; } static bool assert_mmap_offset(struct drm_i915_private *i915, @@ -386,21 +578,14 @@ static bool assert_mmap_offset(struct drm_i915_private *i915, static void disable_retire_worker(struct drm_i915_private *i915) { i915_gem_driver_unregister__shrinker(i915); - intel_gt_pm_get(&i915->gt); - - cancel_delayed_work_sync(&i915->gem.retire_work); - flush_work(&i915->gem.idle_work); + cancel_delayed_work_sync(&i915->gt.requests.retire_work); } static void restore_retire_worker(struct drm_i915_private *i915) { + igt_flush_test(i915); intel_gt_pm_put(&i915->gt); - - mutex_lock(&i915->drm.struct_mutex); - igt_flush_test(i915, I915_WAIT_LOCKED); - mutex_unlock(&i915->drm.struct_mutex); - i915_gem_driver_register__shrinker(i915); } @@ -490,9 +675,7 @@ static int igt_mmap_offset_exhaustion(void *arg) goto out; } - mutex_lock(&i915->drm.struct_mutex); err = make_obj_busy(obj); - mutex_unlock(&i915->drm.struct_mutex); if (err) { pr_err("[loop %d] Failed to busy the object\n", loop); goto err_obj; @@ -515,6 +698,7 @@ int i915_gem_mman_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_partial_tiling), + SUBTEST(igt_smoke_tiling), SUBTEST(igt_mmap_offset_exhaustion), }; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c index c21d747e7d05..e8132aca0bb6 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c @@ -3,40 +3,241 @@ * Copyright © 2019 Intel Corporation */ +#include <linux/sort.h> + #include "gt/intel_gt.h" +#include "gt/intel_engine_user.h" #include "i915_selftest.h" +#include "gem/i915_gem_context.h" #include "selftests/igt_flush_test.h" +#include "selftests/i915_random.h" #include "selftests/mock_drm.h" #include "huge_gem_object.h" #include "mock_context.h" -static int igt_fill_blt(void *arg) +static int wrap_ktime_compare(const void *A, const void *B) +{ + const ktime_t *a = A, *b = B; + + return ktime_compare(*a, *b); +} + +static int __perf_fill_blt(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + int inst = 0; + + do { + struct intel_engine_cs *engine; + ktime_t t[5]; + int pass; + int err; + + engine = intel_engine_lookup_user(i915, + I915_ENGINE_CLASS_COPY, + inst++); + if (!engine) + return 0; + + for (pass = 0; pass < ARRAY_SIZE(t); pass++) { + struct intel_context *ce = engine->kernel_context; + ktime_t t0, t1; + + t0 = ktime_get(); + + err = i915_gem_object_fill_blt(obj, ce, 0); + if (err) + return err; + + err = i915_gem_object_wait(obj, + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT); + if (err) + return err; + + t1 = ktime_get(); + t[pass] = ktime_sub(t1, t0); + } + + sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); + pr_info("%s: blt %zd KiB fill: %lld MiB/s\n", + engine->name, + obj->base.size >> 10, + div64_u64(mul_u32_u32(4 * obj->base.size, + 1000 * 1000 * 1000), + t[1] + 2 * t[2] + t[3]) >> 20); + } while (1); +} + +static int perf_fill_blt(void *arg) { struct drm_i915_private *i915 = arg; - struct intel_context *ce = i915->engine[BCS0]->kernel_context; - struct drm_i915_gem_object *obj; + static const unsigned long sizes[] = { + SZ_4K, + SZ_64K, + SZ_2M, + SZ_64M + }; + int i; + + for (i = 0; i < ARRAY_SIZE(sizes); i++) { + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_internal(i915, sizes[i]); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = __perf_fill_blt(obj); + i915_gem_object_put(obj); + if (err) + return err; + } + + return 0; +} + +static int __perf_copy_blt(struct drm_i915_gem_object *src, + struct drm_i915_gem_object *dst) +{ + struct drm_i915_private *i915 = to_i915(src->base.dev); + int inst = 0; + + do { + struct intel_engine_cs *engine; + ktime_t t[5]; + int pass; + + engine = intel_engine_lookup_user(i915, + I915_ENGINE_CLASS_COPY, + inst++); + if (!engine) + return 0; + + for (pass = 0; pass < ARRAY_SIZE(t); pass++) { + struct intel_context *ce = engine->kernel_context; + ktime_t t0, t1; + int err; + + t0 = ktime_get(); + + err = i915_gem_object_copy_blt(src, dst, ce); + if (err) + return err; + + err = i915_gem_object_wait(dst, + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT); + if (err) + return err; + + t1 = ktime_get(); + t[pass] = ktime_sub(t1, t0); + } + + sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); + pr_info("%s: blt %zd KiB copy: %lld MiB/s\n", + engine->name, + src->base.size >> 10, + div64_u64(mul_u32_u32(4 * src->base.size, + 1000 * 1000 * 1000), + t[1] + 2 * t[2] + t[3]) >> 20); + } while (1); +} + +static int perf_copy_blt(void *arg) +{ + struct drm_i915_private *i915 = arg; + static const unsigned long sizes[] = { + SZ_4K, + SZ_64K, + SZ_2M, + SZ_64M + }; + int i; + + for (i = 0; i < ARRAY_SIZE(sizes); i++) { + struct drm_i915_gem_object *src, *dst; + int err; + + src = i915_gem_object_create_internal(i915, sizes[i]); + if (IS_ERR(src)) + return PTR_ERR(src); + + dst = i915_gem_object_create_internal(i915, sizes[i]); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + goto err_src; + } + + err = __perf_copy_blt(src, dst); + + i915_gem_object_put(dst); +err_src: + i915_gem_object_put(src); + if (err) + return err; + } + + return 0; +} + +struct igt_thread_arg { + struct drm_i915_private *i915; struct rnd_state prng; + unsigned int n_cpus; +}; + +static int igt_fill_blt_thread(void *arg) +{ + struct igt_thread_arg *thread = arg; + struct drm_i915_private *i915 = thread->i915; + struct rnd_state *prng = &thread->prng; + struct drm_i915_gem_object *obj; + struct i915_gem_context *ctx; + struct intel_context *ce; + struct drm_file *file; + unsigned int prio; IGT_TIMEOUT(end); - u32 *vaddr; - int err = 0; + int err; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_file; + } - prandom_seed_state(&prng, i915_selftest.random_seed); + prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); + ctx->sched.priority = I915_USER_PRIORITY(prio); - /* - * XXX: needs some threads to scale all these tests, also maybe throw - * in submission from higher priority context to see if we are - * preempted for very large objects... - */ + ce = i915_gem_context_get_engine(ctx, BCS0); + GEM_BUG_ON(IS_ERR(ce)); do { const u32 max_block_size = S16_MAX * PAGE_SIZE; - u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng)); - u32 phys_sz = sz % (max_block_size + 1); - u32 val = prandom_u32_state(&prng); + u32 val = prandom_u32_state(prng); + u64 total = ce->vm->total; + u32 phys_sz; + u32 sz; + u32 *vaddr; u32 i; + /* + * If we have a tiny shared address space, like for the GGTT + * then we can't be too greedy. + */ + if (i915_is_ggtt(ce->vm)) + total = div64_u64(total, thread->n_cpus); + + sz = min_t(u64, total >> 4, prandom_u32_state(prng)); + phys_sz = sz % (max_block_size + 1); + sz = round_up(sz, PAGE_SIZE); phys_sz = round_up(phys_sz, PAGE_SIZE); @@ -65,9 +266,7 @@ static int igt_fill_blt(void *arg) if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) obj->cache_dirty = true; - mutex_lock(&i915->drm.struct_mutex); err = i915_gem_object_fill_blt(obj, ce, val); - mutex_unlock(&i915->drm.struct_mutex); if (err) goto err_unpin; @@ -100,28 +299,56 @@ err_flush: if (err == -ENOMEM) err = 0; + intel_context_put(ce); +out_file: + mock_file_free(i915, file); return err; } -static int igt_copy_blt(void *arg) +static int igt_copy_blt_thread(void *arg) { - struct drm_i915_private *i915 = arg; - struct intel_context *ce = i915->engine[BCS0]->kernel_context; + struct igt_thread_arg *thread = arg; + struct drm_i915_private *i915 = thread->i915; + struct rnd_state *prng = &thread->prng; struct drm_i915_gem_object *src, *dst; - struct rnd_state prng; + struct i915_gem_context *ctx; + struct intel_context *ce; + struct drm_file *file; + unsigned int prio; IGT_TIMEOUT(end); - u32 *vaddr; - int err = 0; + int err; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); - prandom_seed_state(&prng, i915_selftest.random_seed); + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_file; + } + + prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); + ctx->sched.priority = I915_USER_PRIORITY(prio); + + ce = i915_gem_context_get_engine(ctx, BCS0); + GEM_BUG_ON(IS_ERR(ce)); do { const u32 max_block_size = S16_MAX * PAGE_SIZE; - u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng)); - u32 phys_sz = sz % (max_block_size + 1); - u32 val = prandom_u32_state(&prng); + u32 val = prandom_u32_state(prng); + u64 total = ce->vm->total; + u32 phys_sz; + u32 sz; + u32 *vaddr; u32 i; + if (i915_is_ggtt(ce->vm)) + total = div64_u64(total, thread->n_cpus); + + sz = min_t(u64, total >> 4, prandom_u32_state(prng)); + phys_sz = sz % (max_block_size + 1); + sz = round_up(sz, PAGE_SIZE); phys_sz = round_up(phys_sz, PAGE_SIZE); @@ -166,9 +393,7 @@ static int igt_copy_blt(void *arg) if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) dst->cache_dirty = true; - mutex_lock(&i915->drm.struct_mutex); err = i915_gem_object_copy_blt(src, dst, ce); - mutex_unlock(&i915->drm.struct_mutex); if (err) goto err_unpin; @@ -205,12 +430,85 @@ err_flush: if (err == -ENOMEM) err = 0; + intel_context_put(ce); +out_file: + mock_file_free(i915, file); + return err; +} + +static int igt_threaded_blt(struct drm_i915_private *i915, + int (*blt_fn)(void *arg)) +{ + struct igt_thread_arg *thread; + struct task_struct **tsk; + I915_RND_STATE(prng); + unsigned int n_cpus; + unsigned int i; + int err = 0; + + n_cpus = num_online_cpus() + 1; + + tsk = kcalloc(n_cpus, sizeof(struct task_struct *), GFP_KERNEL); + if (!tsk) + return 0; + + thread = kcalloc(n_cpus, sizeof(struct igt_thread_arg), GFP_KERNEL); + if (!thread) { + kfree(tsk); + return 0; + } + + for (i = 0; i < n_cpus; ++i) { + thread[i].i915 = i915; + thread[i].n_cpus = n_cpus; + thread[i].prng = + I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng)); + + tsk[i] = kthread_run(blt_fn, &thread[i], "igt/blt-%d", i); + if (IS_ERR(tsk[i])) { + err = PTR_ERR(tsk[i]); + break; + } + + get_task_struct(tsk[i]); + } + + yield(); /* start all threads before we kthread_stop() */ + + for (i = 0; i < n_cpus; ++i) { + int status; + + if (IS_ERR_OR_NULL(tsk[i])) + continue; + + status = kthread_stop(tsk[i]); + if (status && !err) + err = status; + + put_task_struct(tsk[i]); + } + + kfree(tsk); + kfree(thread); + return err; } +static int igt_fill_blt(void *arg) +{ + return igt_threaded_blt(arg, igt_fill_blt_thread); +} + +static int igt_copy_blt(void *arg) +{ + return igt_threaded_blt(arg, igt_copy_blt_thread); +} + int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { + SUBTEST(perf_fill_blt), + SUBTEST(perf_copy_blt), SUBTEST(igt_fill_blt), SUBTEST(igt_copy_blt), }; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c index 94a15e3f6db8..34932871b3a5 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c @@ -25,9 +25,7 @@ static int mock_phys_object(void *arg) goto out; } - mutex_lock(&i915->drm.struct_mutex); err = i915_gem_object_attach_phys(obj, PAGE_SIZE); - mutex_unlock(&i915->drm.struct_mutex); if (err) { pr_err("i915_gem_object_attach_phys failed, err=%d\n", err); goto out_obj; diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c index 57ece53c1075..6718da20f35d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c @@ -9,6 +9,7 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_pm.h" #include "gt/intel_context.h" +#include "gt/intel_gt.h" #include "i915_vma.h" #include "i915_drv.h" @@ -84,6 +85,8 @@ igt_emit_store_dw(struct i915_vma *vma, *cmd = MI_BATCH_BUFFER_END; i915_gem_object_unpin_map(obj); + intel_gt_chipset_flush(vma->vm->gt); + vma = i915_vma_instance(obj, vma->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); @@ -101,40 +104,35 @@ err: return ERR_PTR(err); } -int igt_gpu_fill_dw(struct i915_vma *vma, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - u64 offset, - unsigned long count, - u32 val) +int igt_gpu_fill_dw(struct intel_context *ce, + struct i915_vma *vma, u64 offset, + unsigned long count, u32 val) { - struct i915_address_space *vm = ctx->vm ?: &engine->gt->ggtt->vm; struct i915_request *rq; struct i915_vma *batch; unsigned int flags; int err; - GEM_BUG_ON(vma->size > vm->total); - GEM_BUG_ON(!intel_engine_can_store_dword(engine)); + GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); GEM_BUG_ON(!i915_vma_is_pinned(vma)); batch = igt_emit_store_dw(vma, offset, count, val); if (IS_ERR(batch)) return PTR_ERR(batch); - rq = igt_request_alloc(ctx, engine); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_batch; } flags = 0; - if (INTEL_GEN(vm->i915) <= 5) + if (INTEL_GEN(ce->vm->i915) <= 5) flags |= I915_DISPATCH_SECURE; - err = engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - flags); + err = rq->engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + flags); if (err) goto err_request; @@ -156,9 +154,7 @@ int igt_gpu_fill_dw(struct i915_vma *vma, i915_request_add(rq); - i915_vma_unpin(batch); - i915_vma_close(batch); - i915_vma_put(batch); + i915_vma_unpin_and_release(&batch, 0); return 0; @@ -167,7 +163,6 @@ skip_request: err_request: i915_request_add(rq); err_batch: - i915_vma_unpin(batch); - i915_vma_put(batch); + i915_vma_unpin_and_release(&batch, 0); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h index 361a7ef866b0..4221cf84d175 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h @@ -11,9 +11,11 @@ struct i915_request; struct i915_gem_context; -struct intel_engine_cs; struct i915_vma; +struct intel_context; +struct intel_engine_cs; + struct i915_request * igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine); @@ -23,11 +25,8 @@ igt_emit_store_dw(struct i915_vma *vma, unsigned long count, u32 val); -int igt_gpu_fill_dw(struct i915_vma *vma, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - u64 offset, - unsigned long count, - u32 val); +int igt_gpu_fill_dw(struct intel_context *ce, + struct i915_vma *vma, u64 offset, + unsigned long count, u32 val); #endif /* __IGT_GEM_UTILS_H__ */ diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index be8974ccff24..29b8984f0e47 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -13,7 +13,6 @@ mock_context(struct drm_i915_private *i915, { struct i915_gem_context *ctx; struct i915_gem_engines *e; - int ret; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -23,6 +22,8 @@ mock_context(struct drm_i915_private *i915, INIT_LIST_HEAD(&ctx->link); ctx->i915 = i915; + i915_gem_context_set_persistence(ctx); + mutex_init(&ctx->engines_mutex); e = default_engines(ctx); if (IS_ERR(e)) @@ -30,13 +31,8 @@ mock_context(struct drm_i915_private *i915, RCU_INIT_POINTER(ctx->engines, e); INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); - INIT_LIST_HEAD(&ctx->hw_id_link); mutex_init(&ctx->mutex); - ret = i915_gem_context_pin_hw_id(ctx); - if (ret < 0) - goto err_engines; - if (name) { struct i915_ppgtt *ppgtt; @@ -48,14 +44,15 @@ mock_context(struct drm_i915_private *i915, if (!ppgtt) goto err_put; + mutex_lock(&ctx->mutex); __set_ppgtt(ctx, &ppgtt->vm); + mutex_unlock(&ctx->mutex); + i915_vm_put(&ppgtt->vm); } return ctx; -err_engines: - free_engines(rcu_access_pointer(ctx->engines)); err_free: kfree(ctx); return NULL; @@ -73,7 +70,7 @@ void mock_context_close(struct i915_gem_context *ctx) void mock_init_contexts(struct drm_i915_private *i915) { - init_contexts(i915); + init_contexts(&i915->gem.contexts); } struct i915_gem_context * @@ -82,8 +79,6 @@ live_context(struct drm_i915_private *i915, struct drm_file *file) struct i915_gem_context *ctx; int err; - lockdep_assert_held(&i915->drm.struct_mutex); - ctx = i915_gem_create_context(i915, 0); if (IS_ERR(ctx)) return ctx; |