diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2020-07-29 17:42:17 +0100 |
---|---|---|
committer | Joonas Lahtinen <joonas.lahtinen@linux.intel.com> | 2020-09-07 14:24:05 +0300 |
commit | cd0452aa2a0d146ff2c695877a4f5d3e4d09b435 (patch) | |
tree | 084636daa2d992d9521681cd120ab4cf78c9abaa /drivers/gpu/drm/i915/gt/gen6_ppgtt.c | |
parent | drm/i915/gt: Distinguish the virtual breadcrumbs from the irq breadcrumbs (diff) | |
download | linux-dev-cd0452aa2a0d146ff2c695877a4f5d3e4d09b435.tar.xz linux-dev-cd0452aa2a0d146ff2c695877a4f5d3e4d09b435.zip |
drm/i915: Preallocate stashes for vma page-directories
We need to make the DMA allocations used for page directories to be
performed up front so that we can include those allocations in our
memory reservation pass. The downside is that we have to assume the
worst case, even before we know the final layout, and always allocate
enough page directories for this object, even when there will be overlap.
This unfortunately can be quite expensive, especially as we have to
clear/reset the page directories and DMA pages, but it should only be
required during early phases of a workload when new objects are being
discovered, or after memory/eviction pressure when we need to rebind.
Once we reach steady state, the objects should not be moved and we no
longer need to preallocating the pages tables.
It should be noted that the lifetime for the page directories DMA is
more or less decoupled from individual fences as they will be shared
across objects across timelines.
v2: Only allocate enough PD space for the PTE we may use, we do not need
to allocate PD that will be left as scratch.
v3: Store the shift unto the first PD level to encapsulate the different
PTE counts for gen6/gen8.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200729164219.5737-1-chris@chris-wilson.co.uk
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Diffstat (limited to 'drivers/gpu/drm/i915/gt/gen6_ppgtt.c')
-rw-r--r-- | drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 40 |
1 files changed, 14 insertions, 26 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index cdc0b9c54305..4e6c1f7f48ef 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -177,16 +177,16 @@ static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end) mutex_unlock(&ppgtt->flush); } -static int gen6_alloc_va_range(struct i915_address_space *vm, - u64 start, u64 length) +static void gen6_alloc_va_range(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + u64 start, u64 length) { struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); struct i915_page_directory * const pd = ppgtt->base.pd; - struct i915_page_table *pt, *alloc = NULL; + struct i915_page_table *pt; bool flush = false; u64 from = start; unsigned int pde; - int ret = 0; spin_lock(&pd->lock); gen6_for_each_pde(pt, pd, start, length, pde) { @@ -195,21 +195,17 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, if (px_base(pt) == px_base(&vm->scratch[1])) { spin_unlock(&pd->lock); - pt = fetch_and_zero(&alloc); - if (!pt) - pt = alloc_pt(vm); - if (IS_ERR(pt)) { - ret = PTR_ERR(pt); - goto unwind_out; - } + pt = stash->pt[0]; + GEM_BUG_ON(!pt); fill32_px(pt, vm->scratch[0].encode); spin_lock(&pd->lock); if (pd->entry[pde] == &vm->scratch[1]) { + stash->pt[0] = pt->stash; + atomic_set(&pt->used, 0); pd->entry[pde] = pt; } else { - alloc = pt; pt = pd->entry[pde]; } @@ -226,15 +222,6 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, with_intel_runtime_pm(&vm->i915->runtime_pm, wakeref) gen6_flush_pd(ppgtt, from, start); } - - goto out; - -unwind_out: - gen6_ppgtt_clear_range(vm, from, start - from); -out: - if (alloc) - free_px(vm, alloc); - return ret; } static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt) @@ -302,10 +289,11 @@ static void pd_vma_clear_pages(struct i915_vma *vma) vma->pages = NULL; } -static int pd_vma_bind(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 unused) +static void pd_vma_bind(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 unused) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); struct gen6_ppgtt *ppgtt = vma->private; @@ -315,7 +303,6 @@ static int pd_vma_bind(struct i915_address_space *vm, ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset; gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total); - return 0; } static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma) @@ -448,6 +435,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) mutex_init(&ppgtt->pin_mutex); ppgtt_init(&ppgtt->base, gt); + ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t)); ppgtt->base.vm.top = 1; ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND; |