aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem_gtt.c
diff options
context:
space:
mode:
authorMichel Thierry <michel.thierry@intel.com>2015-07-30 11:05:29 +0100
committerDaniel Vetter <daniel.vetter@ffwll.ch>2015-08-14 18:16:21 +0200
commit762d99363dc9bc436f39f8bdc3f8670ea272a5a9 (patch)
tree10a965fb6701120112b52aac36db04ff8cbf96c5 /drivers/gpu/drm/i915/i915_gem_gtt.c
parentdrm/i915/gen8: Add PML4 structure (diff)
downloadlinux-dev-762d99363dc9bc436f39f8bdc3f8670ea272a5a9.tar.xz
linux-dev-762d99363dc9bc436f39f8bdc3f8670ea272a5a9.zip
drm/i915/gen8: implement alloc/free for 4lvl
PML4 has no special attributes, and there will always be a PML4. So simply initialize it at creation, and destroy it at the end. The code for 4lvl is able to call into the existing 3lvl page table code to handle all of the lower levels. v2: Return something at the end of gen8_alloc_va_range_4lvl to keep the compiler happy. And define ret only in one place. Updated gen8_ppgtt_unmap_pages and gen8_ppgtt_free to handle 4lvl. v3: Use i915_dma_unmap_single instead of pci API. Fix a couple of incorrect checks when unmapping pdp and pd pages (Akash). v4: Call __pdp_fini also for 32b PPGTT. Clean up alloc_pdp param list. v5: Prevent (harmless) out of range access in gen8_for_each_pml4e. v6: Simplify alloc_vma_range_4lvl and gen8_ppgtt_init_common error paths. (Akash) v7: Rebase, s/gen8_ppgtt_free_*/gen8_ppgtt_cleanup_*/. v8: Change location of pml4_init/fini. It will make next patches cleaner. v9: Rebase after Mika's ppgtt cleanup / scratch merge patch series, while trying to reuse as much as possible for pdp alloc. pml4_init/fini replaced by setup/cleanup_px macros. v10: Rebase after Mika's merged ppgtt cleanup patch series. v11: Rebase after final merged version of Mika's ppgtt/scratch patches. v12: Fix pdpe start value in trace (Akash) v13: Define all 4lvl functions in this patch directly, instead of previous patches, add i915_page_directory_pointer_entry_alloc here, use test_bit to detect when pdp is already allocated (Akash). v14: Move pdp allocation into a new gen8_ppgtt_alloc_page_dirpointers funtion, as we do for pds and pts; move pd and pdp setup functions to this patch (Akash). v15: Added kfree(pdp) from previous patch to this (Akash). Cc: Akash Goel <akash.goel@intel.com> Signed-off-by: Ben Widawsky <ben@bwidawsk.net> Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+) Reviewed-by: Akash Goel <akash.goel@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_gtt.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c239
1 files changed, 225 insertions, 14 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index da7863b5c22b..229a31760260 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -204,6 +204,9 @@ static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
return pde;
}
+#define gen8_pdpe_encode gen8_pde_encode
+#define gen8_pml4e_encode gen8_pde_encode
+
static gen6_pte_t snb_pte_encode(dma_addr_t addr,
enum i915_cache_level level,
bool valid, u32 unused)
@@ -553,10 +556,73 @@ static void __pdp_fini(struct i915_page_directory_pointer *pdp)
pdp->page_directory = NULL;
}
+static struct
+i915_page_directory_pointer *alloc_pdp(struct drm_device *dev)
+{
+ struct i915_page_directory_pointer *pdp;
+ int ret = -ENOMEM;
+
+ WARN_ON(!USES_FULL_48BIT_PPGTT(dev));
+
+ pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
+ if (!pdp)
+ return ERR_PTR(-ENOMEM);
+
+ ret = __pdp_init(dev, pdp);
+ if (ret)
+ goto fail_bitmap;
+
+ ret = setup_px(dev, pdp);
+ if (ret)
+ goto fail_page_m;
+
+ return pdp;
+
+fail_page_m:
+ __pdp_fini(pdp);
+fail_bitmap:
+ kfree(pdp);
+
+ return ERR_PTR(ret);
+}
+
static void free_pdp(struct drm_device *dev,
struct i915_page_directory_pointer *pdp)
{
__pdp_fini(pdp);
+ if (USES_FULL_48BIT_PPGTT(dev)) {
+ cleanup_px(dev, pdp);
+ kfree(pdp);
+ }
+}
+
+static void
+gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt,
+ struct i915_page_directory_pointer *pdp,
+ struct i915_page_directory *pd,
+ int index)
+{
+ gen8_ppgtt_pdpe_t *page_directorypo;
+
+ if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
+ return;
+
+ page_directorypo = kmap_px(pdp);
+ page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
+ kunmap_px(ppgtt, page_directorypo);
+}
+
+static void
+gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt,
+ struct i915_pml4 *pml4,
+ struct i915_page_directory_pointer *pdp,
+ int index)
+{
+ gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
+
+ WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev));
+ pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
+ kunmap_px(ppgtt, pagemap);
}
/* Broadwell Page Directory Pointer Descriptors */
@@ -782,12 +848,9 @@ static void gen8_free_scratch(struct i915_address_space *vm)
free_scratch_page(dev, vm->scratch_page);
}
-static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
+static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev,
+ struct i915_page_directory_pointer *pdp)
{
- struct i915_hw_ppgtt *ppgtt =
- container_of(vm, struct i915_hw_ppgtt, base);
- struct i915_page_directory_pointer *pdp = &ppgtt->pdp; /* FIXME: 48b */
- struct drm_device *dev = ppgtt->base.dev;
int i;
for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) {
@@ -799,6 +862,31 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
}
free_pdp(dev, pdp);
+}
+
+static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
+{
+ int i;
+
+ for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
+ if (WARN_ON(!ppgtt->pml4.pdps[i]))
+ continue;
+
+ gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]);
+ }
+
+ cleanup_px(ppgtt->base.dev, &ppgtt->pml4);
+}
+
+static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
+{
+ struct i915_hw_ppgtt *ppgtt =
+ container_of(vm, struct i915_hw_ppgtt, base);
+
+ if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
+ gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp);
+ else
+ gen8_ppgtt_cleanup_4lvl(ppgtt);
gen8_free_scratch(vm);
}
@@ -920,6 +1008,60 @@ unwind_out:
return -ENOMEM;
}
+/**
+ * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range.
+ * @vm: Master vm structure.
+ * @pml4: Page map level 4 for this address range.
+ * @start: Starting virtual address to begin allocations.
+ * @length: Size of the allocations.
+ * @new_pdps: Bitmap set by function with new allocations. Likely used by the
+ * caller to free on error.
+ *
+ * Allocate the required number of page directory pointers. Extremely similar to
+ * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs().
+ * The main difference is here we are limited by the pml4 boundary (instead of
+ * the page directory pointer).
+ *
+ * Return: 0 if success; negative error code otherwise.
+ */
+static int
+gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
+ struct i915_pml4 *pml4,
+ uint64_t start,
+ uint64_t length,
+ unsigned long *new_pdps)
+{
+ struct drm_device *dev = vm->dev;
+ struct i915_page_directory_pointer *pdp;
+ uint64_t temp;
+ uint32_t pml4e;
+
+ WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4));
+
+ gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
+ if (!test_bit(pml4e, pml4->used_pml4es)) {
+ pdp = alloc_pdp(dev);
+ if (IS_ERR(pdp))
+ goto unwind_out;
+
+ pml4->pdps[pml4e] = pdp;
+ __set_bit(pml4e, new_pdps);
+ trace_i915_page_directory_pointer_entry_alloc(vm,
+ pml4e,
+ start,
+ GEN8_PML4E_SHIFT);
+ }
+ }
+
+ return 0;
+
+unwind_out:
+ for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
+ free_pdp(dev, pml4->pdps[pml4e]);
+
+ return -ENOMEM;
+}
+
static void
free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts,
uint32_t pdpes)
@@ -981,14 +1123,15 @@ static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask;
}
-static int gen8_alloc_va_range(struct i915_address_space *vm,
- uint64_t start, uint64_t length)
+static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
+ struct i915_page_directory_pointer *pdp,
+ uint64_t start,
+ uint64_t length)
{
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
unsigned long *new_page_dirs, **new_page_tables;
struct drm_device *dev = vm->dev;
- struct i915_page_directory_pointer *pdp = &ppgtt->pdp; /* FIXME: 48b */
struct i915_page_directory *pd;
const uint64_t orig_start = start;
const uint64_t orig_length = length;
@@ -1069,6 +1212,7 @@ static int gen8_alloc_va_range(struct i915_address_space *vm,
kunmap_px(ppgtt, page_directory);
__set_bit(pdpe, pdp->used_pdpes);
+ gen8_setup_page_directory(ppgtt, pdp, pd, pdpe);
}
free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes);
@@ -1089,6 +1233,68 @@ err_out:
return ret;
}
+static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
+ struct i915_pml4 *pml4,
+ uint64_t start,
+ uint64_t length)
+{
+ DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
+ struct i915_hw_ppgtt *ppgtt =
+ container_of(vm, struct i915_hw_ppgtt, base);
+ struct i915_page_directory_pointer *pdp;
+ uint64_t temp, pml4e;
+ int ret = 0;
+
+ /* Do the pml4 allocations first, so we don't need to track the newly
+ * allocated tables below the pdp */
+ bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4);
+
+ /* The pagedirectory and pagetable allocations are done in the shared 3
+ * and 4 level code. Just allocate the pdps.
+ */
+ ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length,
+ new_pdps);
+ if (ret)
+ return ret;
+
+ WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2,
+ "The allocation has spanned more than 512GB. "
+ "It is highly likely this is incorrect.");
+
+ gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
+ WARN_ON(!pdp);
+
+ ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
+ if (ret)
+ goto err_out;
+
+ gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e);
+ }
+
+ bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
+ GEN8_PML4ES_PER_PML4);
+
+ return 0;
+
+err_out:
+ for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
+ gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]);
+
+ return ret;
+}
+
+static int gen8_alloc_va_range(struct i915_address_space *vm,
+ uint64_t start, uint64_t length)
+{
+ struct i915_hw_ppgtt *ppgtt =
+ container_of(vm, struct i915_hw_ppgtt, base);
+
+ if (USES_FULL_48BIT_PPGTT(vm->dev))
+ return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length);
+ else
+ return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
+}
+
/*
* GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
* with a net effect resembling a 2-level page table in normal x86 terms. Each
@@ -1114,9 +1320,14 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
ppgtt->switch_mm = gen8_mm_switch;
- if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
- ret = __pdp_init(false, &ppgtt->pdp);
+ if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
+ ret = setup_px(ppgtt->base.dev, &ppgtt->pml4);
+ if (ret)
+ goto free_scratch;
+ ppgtt->base.total = 1ULL << 48;
+ } else {
+ ret = __pdp_init(false, &ppgtt->pdp);
if (ret)
goto free_scratch;
@@ -1128,10 +1339,10 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
* 2GiB).
*/
ppgtt->base.total = to_i915(ppgtt->base.dev)->gtt.base.total;
- } else {
- ppgtt->base.total = 1ULL << 48;
- ret = -EPERM; /* Not yet implemented */
- goto free_scratch;
+
+ trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base,
+ 0, 0,
+ GEN8_PML4E_SHIFT);
}
return 0;