aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gem/i915_gem_mman.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gem/i915_gem_mman.c')
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_mman.c508
1 files changed, 508 insertions, 0 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
new file mode 100644
index 000000000000..39a661927d8e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -0,0 +1,508 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/mman.h>
+#include <linux/sizes.h>
+
+#include "i915_drv.h"
+#include "i915_gem_gtt.h"
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+#include "i915_vma.h"
+#include "intel_drv.h"
+
+static inline bool
+__vma_matches(struct vm_area_struct *vma, struct file *filp,
+ unsigned long addr, unsigned long size)
+{
+ if (vma->vm_file != filp)
+ return false;
+
+ return vma->vm_start == addr &&
+ (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
+}
+
+/**
+ * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
+ * it is mapped to.
+ * @dev: drm device
+ * @data: ioctl data blob
+ * @file: drm file
+ *
+ * While the mapping holds a reference on the contents of the object, it doesn't
+ * imply a ref on the object itself.
+ *
+ * IMPORTANT:
+ *
+ * DRM driver writers who look a this function as an example for how to do GEM
+ * mmap support, please don't implement mmap support like here. The modern way
+ * to implement DRM mmap support is with an mmap offset ioctl (like
+ * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
+ * That way debug tooling like valgrind will understand what's going on, hiding
+ * the mmap call in a driver private ioctl will break that. The i915 driver only
+ * does cpu mmaps this way because we didn't know better.
+ */
+int
+i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct drm_i915_gem_mmap *args = data;
+ struct drm_i915_gem_object *obj;
+ unsigned long addr;
+
+ if (args->flags & ~(I915_MMAP_WC))
+ return -EINVAL;
+
+ if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
+ return -ENODEV;
+
+ obj = i915_gem_object_lookup(file, args->handle);
+ if (!obj)
+ return -ENOENT;
+
+ /* prime objects have no backing filp to GEM mmap
+ * pages from.
+ */
+ if (!obj->base.filp) {
+ addr = -ENXIO;
+ goto err;
+ }
+
+ if (range_overflows(args->offset, args->size, (u64)obj->base.size)) {
+ addr = -EINVAL;
+ goto err;
+ }
+
+ addr = vm_mmap(obj->base.filp, 0, args->size,
+ PROT_READ | PROT_WRITE, MAP_SHARED,
+ args->offset);
+ if (IS_ERR_VALUE(addr))
+ goto err;
+
+ if (args->flags & I915_MMAP_WC) {
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+
+ if (down_write_killable(&mm->mmap_sem)) {
+ addr = -EINTR;
+ goto err;
+ }
+ vma = find_vma(mm, addr);
+ if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
+ vma->vm_page_prot =
+ pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+ else
+ addr = -ENOMEM;
+ up_write(&mm->mmap_sem);
+ if (IS_ERR_VALUE(addr))
+ goto err;
+
+ /* This may race, but that's ok, it only gets set */
+ WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
+ }
+ i915_gem_object_put(obj);
+
+ args->addr_ptr = (u64)addr;
+ return 0;
+
+err:
+ i915_gem_object_put(obj);
+ return addr;
+}
+
+static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
+{
+ return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
+}
+
+/**
+ * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
+ *
+ * A history of the GTT mmap interface:
+ *
+ * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
+ * aligned and suitable for fencing, and still fit into the available
+ * mappable space left by the pinned display objects. A classic problem
+ * we called the page-fault-of-doom where we would ping-pong between
+ * two objects that could not fit inside the GTT and so the memcpy
+ * would page one object in at the expense of the other between every
+ * single byte.
+ *
+ * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
+ * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
+ * object is too large for the available space (or simply too large
+ * for the mappable aperture!), a view is created instead and faulted
+ * into userspace. (This view is aligned and sized appropriately for
+ * fenced access.)
+ *
+ * 2 - Recognise WC as a separate cache domain so that we can flush the
+ * delayed writes via GTT before performing direct access via WC.
+ *
+ * 3 - Remove implicit set-domain(GTT) and synchronisation on initial
+ * pagefault; swapin remains transparent.
+ *
+ * Restrictions:
+ *
+ * * snoopable objects cannot be accessed via the GTT. It can cause machine
+ * hangs on some architectures, corruption on others. An attempt to service
+ * a GTT page fault from a snoopable object will generate a SIGBUS.
+ *
+ * * the object must be able to fit into RAM (physical memory, though no
+ * limited to the mappable aperture).
+ *
+ *
+ * Caveats:
+ *
+ * * a new GTT page fault will synchronize rendering from the GPU and flush
+ * all data to system memory. Subsequent access will not be synchronized.
+ *
+ * * all mappings are revoked on runtime device suspend.
+ *
+ * * there are only 8, 16 or 32 fence registers to share between all users
+ * (older machines require fence register for display and blitter access
+ * as well). Contention of the fence registers will cause the previous users
+ * to be unmapped and any new access will generate new page faults.
+ *
+ * * running out of memory while servicing a fault may generate a SIGBUS,
+ * rather than the expected SIGSEGV.
+ */
+int i915_gem_mmap_gtt_version(void)
+{
+ return 3;
+}
+
+static inline struct i915_ggtt_view
+compute_partial_view(const struct drm_i915_gem_object *obj,
+ pgoff_t page_offset,
+ unsigned int chunk)
+{
+ struct i915_ggtt_view view;
+
+ if (i915_gem_object_is_tiled(obj))
+ chunk = roundup(chunk, tile_row_pages(obj));
+
+ view.type = I915_GGTT_VIEW_PARTIAL;
+ view.partial.offset = rounddown(page_offset, chunk);
+ view.partial.size =
+ min_t(unsigned int, chunk,
+ (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
+
+ /* If the partial covers the entire object, just create a normal VMA. */
+ if (chunk >= obj->base.size >> PAGE_SHIFT)
+ view.type = I915_GGTT_VIEW_NORMAL;
+
+ return view;
+}
+
+/**
+ * i915_gem_fault - fault a page into the GTT
+ * @vmf: fault info
+ *
+ * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
+ * from userspace. The fault handler takes care of binding the object to
+ * the GTT (if needed), allocating and programming a fence register (again,
+ * only if needed based on whether the old reg is still valid or the object
+ * is tiled) and inserting a new PTE into the faulting process.
+ *
+ * Note that the faulting process may involve evicting existing objects
+ * from the GTT and/or fence registers to make room. So performance may
+ * suffer if the GTT working set is large or there are few fence registers
+ * left.
+ *
+ * The current feature set supported by i915_gem_fault() and thus GTT mmaps
+ * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
+ */
+vm_fault_t i915_gem_fault(struct vm_fault *vmf)
+{
+#define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
+ struct vm_area_struct *area = vmf->vma;
+ struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
+ struct drm_device *dev = obj->base.dev;
+ struct drm_i915_private *i915 = to_i915(dev);
+ struct intel_runtime_pm *rpm = &i915->runtime_pm;
+ struct i915_ggtt *ggtt = &i915->ggtt;
+ bool write = area->vm_flags & VM_WRITE;
+ intel_wakeref_t wakeref;
+ struct i915_vma *vma;
+ pgoff_t page_offset;
+ int srcu;
+ int ret;
+
+ /* Sanity check that we allow writing into this object */
+ if (i915_gem_object_is_readonly(obj) && write)
+ return VM_FAULT_SIGBUS;
+
+ /* We don't use vmf->pgoff since that has the fake offset */
+ page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
+
+ trace_i915_gem_object_fault(obj, page_offset, true, write);
+
+ ret = i915_gem_object_pin_pages(obj);
+ if (ret)
+ goto err;
+
+ wakeref = intel_runtime_pm_get(rpm);
+
+ srcu = i915_reset_trylock(i915);
+ if (srcu < 0) {
+ ret = srcu;
+ goto err_rpm;
+ }
+
+ ret = i915_mutex_lock_interruptible(dev);
+ if (ret)
+ goto err_reset;
+
+ /* Access to snoopable pages through the GTT is incoherent. */
+ if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) {
+ ret = -EFAULT;
+ goto err_unlock;
+ }
+
+ /* Now pin it into the GTT as needed */
+ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
+ PIN_MAPPABLE |
+ PIN_NONBLOCK |
+ PIN_NONFAULT);
+ if (IS_ERR(vma)) {
+ /* Use a partial view if it is bigger than available space */
+ struct i915_ggtt_view view =
+ compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
+ unsigned int flags;
+
+ flags = PIN_MAPPABLE;
+ if (view.type == I915_GGTT_VIEW_NORMAL)
+ flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
+
+ /*
+ * Userspace is now writing through an untracked VMA, abandon
+ * all hope that the hardware is able to track future writes.
+ */
+ obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
+
+ vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
+ if (IS_ERR(vma) && !view.type) {
+ flags = PIN_MAPPABLE;
+ view.type = I915_GGTT_VIEW_PARTIAL;
+ vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
+ }
+ }
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto err_unlock;
+ }
+
+ ret = i915_vma_pin_fence(vma);
+ if (ret)
+ goto err_unpin;
+
+ /* Finally, remap it using the new GTT offset */
+ ret = remap_io_mapping(area,
+ area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
+ (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
+ min_t(u64, vma->size, area->vm_end - area->vm_start),
+ &ggtt->iomap);
+ if (ret)
+ goto err_fence;
+
+ /* Mark as being mmapped into userspace for later revocation */
+ assert_rpm_wakelock_held(rpm);
+ if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
+ list_add(&obj->userfault_link, &i915->ggtt.userfault_list);
+ if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
+ intel_wakeref_auto(&i915->ggtt.userfault_wakeref,
+ msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
+ GEM_BUG_ON(!obj->userfault_count);
+
+ i915_vma_set_ggtt_write(vma);
+
+err_fence:
+ i915_vma_unpin_fence(vma);
+err_unpin:
+ __i915_vma_unpin(vma);
+err_unlock:
+ mutex_unlock(&dev->struct_mutex);
+err_reset:
+ i915_reset_unlock(i915, srcu);
+err_rpm:
+ intel_runtime_pm_put(rpm, wakeref);
+ i915_gem_object_unpin_pages(obj);
+err:
+ switch (ret) {
+ case -EIO:
+ /*
+ * We eat errors when the gpu is terminally wedged to avoid
+ * userspace unduly crashing (gl has no provisions for mmaps to
+ * fail). But any other -EIO isn't ours (e.g. swap in failure)
+ * and so needs to be reported.
+ */
+ if (!i915_terminally_wedged(i915))
+ return VM_FAULT_SIGBUS;
+ /* else, fall through */
+ case -EAGAIN:
+ /*
+ * EAGAIN means the gpu is hung and we'll wait for the error
+ * handler to reset everything when re-faulting in
+ * i915_mutex_lock_interruptible.
+ */
+ case 0:
+ case -ERESTARTSYS:
+ case -EINTR:
+ case -EBUSY:
+ /*
+ * EBUSY is ok: this just means that another thread
+ * already did the job.
+ */
+ return VM_FAULT_NOPAGE;
+ case -ENOMEM:
+ return VM_FAULT_OOM;
+ case -ENOSPC:
+ case -EFAULT:
+ return VM_FAULT_SIGBUS;
+ default:
+ WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret);
+ return VM_FAULT_SIGBUS;
+ }
+}
+
+void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
+{
+ struct i915_vma *vma;
+
+ GEM_BUG_ON(!obj->userfault_count);
+
+ obj->userfault_count = 0;
+ list_del(&obj->userfault_link);
+ drm_vma_node_unmap(&obj->base.vma_node,
+ obj->base.dev->anon_inode->i_mapping);
+
+ for_each_ggtt_vma(vma, obj)
+ i915_vma_unset_userfault(vma);
+}
+
+/**
+ * i915_gem_object_release_mmap - remove physical page mappings
+ * @obj: obj in question
+ *
+ * Preserve the reservation of the mmapping with the DRM core code, but
+ * relinquish ownership of the pages back to the system.
+ *
+ * It is vital that we remove the page mapping if we have mapped a tiled
+ * object through the GTT and then lose the fence register due to
+ * resource pressure. Similarly if the object has been moved out of the
+ * aperture, than pages mapped into userspace must be revoked. Removing the
+ * mapping will then trigger a page fault on the next user access, allowing
+ * fixup by i915_gem_fault().
+ */
+void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ intel_wakeref_t wakeref;
+
+ /* Serialisation between user GTT access and our code depends upon
+ * revoking the CPU's PTE whilst the mutex is held. The next user
+ * pagefault then has to wait until we release the mutex.
+ *
+ * Note that RPM complicates somewhat by adding an additional
+ * requirement that operations to the GGTT be made holding the RPM
+ * wakeref.
+ */
+ lockdep_assert_held(&i915->drm.struct_mutex);
+ wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+ if (!obj->userfault_count)
+ goto out;
+
+ __i915_gem_object_release_mmap(obj);
+
+ /* Ensure that the CPU's PTE are revoked and there are not outstanding
+ * memory transactions from userspace before we return. The TLB
+ * flushing implied above by changing the PTE above *should* be
+ * sufficient, an extra barrier here just provides us with a bit
+ * of paranoid documentation about our requirement to serialise
+ * memory writes before touching registers / GSM.
+ */
+ wmb();
+
+out:
+ intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+}
+
+static int create_mmap_offset(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ int err;
+
+ err = drm_gem_create_mmap_offset(&obj->base);
+ if (likely(!err))
+ return 0;
+
+ /* Attempt to reap some mmap space from dead objects */
+ do {
+ err = i915_gem_wait_for_idle(i915,
+ I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT);
+ if (err)
+ break;
+
+ i915_gem_drain_freed_objects(i915);
+ err = drm_gem_create_mmap_offset(&obj->base);
+ if (!err)
+ break;
+
+ } while (flush_delayed_work(&i915->gem.retire_work));
+
+ return err;
+}
+
+int
+i915_gem_mmap_gtt(struct drm_file *file,
+ struct drm_device *dev,
+ u32 handle,
+ u64 *offset)
+{
+ struct drm_i915_gem_object *obj;
+ int ret;
+
+ obj = i915_gem_object_lookup(file, handle);
+ if (!obj)
+ return -ENOENT;
+
+ ret = create_mmap_offset(obj);
+ if (ret == 0)
+ *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
+
+ i915_gem_object_put(obj);
+ return ret;
+}
+
+/**
+ * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
+ * @dev: DRM device
+ * @data: GTT mapping ioctl data
+ * @file: GEM object info
+ *
+ * Simply returns the fake offset to userspace so it can mmap it.
+ * The mmap call will end up in drm_gem_mmap(), which will set things
+ * up so we can get faults in the handler above.
+ *
+ * The fault handler will take care of binding the object into the GTT
+ * (since it may have been evicted to make room for something), allocating
+ * a fence register, and mapping the appropriate aperture address into
+ * userspace.
+ */
+int
+i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct drm_i915_gem_mmap_gtt *args = data;
+
+ return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_gem_mman.c"
+#endif