aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c973
1 files changed, 238 insertions, 735 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 216f52b744a6..6728ea5c71d4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -25,18 +25,9 @@
*
*/
-#include <drm/drmP.h>
#include <drm/drm_vma_manager.h>
+#include <drm/drm_pci.h>
#include <drm/i915_drm.h>
-#include "i915_drv.h"
-#include "i915_gem_clflush.h"
-#include "i915_vgpu.h"
-#include "i915_trace.h"
-#include "intel_drv.h"
-#include "intel_frontbuffer.h"
-#include "intel_mocs.h"
-#include "intel_workarounds.h"
-#include "i915_gemfs.h"
#include <linux/dma-fence-array.h>
#include <linux/kthread.h>
#include <linux/reservation.h>
@@ -46,6 +37,19 @@
#include <linux/swap.h>
#include <linux/pci.h>
#include <linux/dma-buf.h>
+#include <linux/mman.h>
+
+#include "i915_drv.h"
+#include "i915_gem_clflush.h"
+#include "i915_gemfs.h"
+#include "i915_reset.h"
+#include "i915_trace.h"
+#include "i915_vgpu.h"
+
+#include "intel_drv.h"
+#include "intel_frontbuffer.h"
+#include "intel_mocs.h"
+#include "intel_workarounds.h"
static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
@@ -139,6 +143,8 @@ int i915_mutex_lock_interruptible(struct drm_device *dev)
static u32 __i915_gem_park(struct drm_i915_private *i915)
{
+ intel_wakeref_t wakeref;
+
GEM_TRACE("\n");
lockdep_assert_held(&i915->drm.struct_mutex);
@@ -169,14 +175,13 @@ static u32 __i915_gem_park(struct drm_i915_private *i915)
i915_pmu_gt_parked(i915);
i915_vma_parked(i915);
- i915->gt.awake = false;
+ wakeref = fetch_and_zero(&i915->gt.awake);
+ GEM_BUG_ON(!wakeref);
if (INTEL_GEN(i915) >= 6)
gen6_rps_idle(i915);
- intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ);
-
- intel_runtime_pm_put(i915);
+ intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
return i915->gt.epoch;
}
@@ -201,12 +206,11 @@ void i915_gem_unpark(struct drm_i915_private *i915)
lockdep_assert_held(&i915->drm.struct_mutex);
GEM_BUG_ON(!i915->gt.active_requests);
+ assert_rpm_wakelock_held(i915);
if (i915->gt.awake)
return;
- intel_runtime_pm_get_noresume(i915);
-
/*
* It seems that the DMC likes to transition between the DC states a lot
* when there are no connected displays (no active power domains) during
@@ -218,9 +222,9 @@ void i915_gem_unpark(struct drm_i915_private *i915)
* Work around it by grabbing a GT IRQ power domain whilst there is any
* GT activity, preventing any DC state transitions.
*/
- intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
+ i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
+ GEM_BUG_ON(!i915->gt.awake);
- i915->gt.awake = true;
if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */
i915->gt.epoch = 1;
@@ -243,21 +247,19 @@ int
i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
- struct drm_i915_private *dev_priv = to_i915(dev);
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
+ struct i915_ggtt *ggtt = &to_i915(dev)->ggtt;
struct drm_i915_gem_get_aperture *args = data;
struct i915_vma *vma;
u64 pinned;
+ mutex_lock(&ggtt->vm.mutex);
+
pinned = ggtt->vm.reserved;
- mutex_lock(&dev->struct_mutex);
- list_for_each_entry(vma, &ggtt->vm.active_list, vm_link)
- if (i915_vma_is_pinned(vma))
- pinned += vma->node.size;
- list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link)
+ list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
if (i915_vma_is_pinned(vma))
pinned += vma->node.size;
- mutex_unlock(&dev->struct_mutex);
+
+ mutex_unlock(&ggtt->vm.mutex);
args->aper_size = ggtt->vm.total;
args->aper_available_size = args->aper_size - pinned;
@@ -437,15 +439,19 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
if (ret)
return ret;
- while ((vma = list_first_entry_or_null(&obj->vma_list,
- struct i915_vma,
- obj_link))) {
+ spin_lock(&obj->vma.lock);
+ while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
+ struct i915_vma,
+ obj_link))) {
list_move_tail(&vma->obj_link, &still_in_list);
+ spin_unlock(&obj->vma.lock);
+
ret = i915_vma_unbind(vma);
- if (ret)
- break;
+
+ spin_lock(&obj->vma.lock);
}
- list_splice(&still_in_list, &obj->vma_list);
+ list_splice(&still_in_list, &obj->vma.list);
+ spin_unlock(&obj->vma.lock);
return ret;
}
@@ -655,11 +661,6 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj,
struct intel_rps_client *rps_client)
{
might_sleep();
-#if IS_ENABLED(CONFIG_LOCKDEP)
- GEM_BUG_ON(debug_locks &&
- !!lockdep_is_held(&obj->base.dev->struct_mutex) !=
- !!(flags & I915_WAIT_LOCKED));
-#endif
GEM_BUG_ON(timeout < 0);
timeout = i915_gem_object_wait_reservation(obj->resv,
@@ -711,8 +712,8 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj)
static int
i915_gem_create(struct drm_file *file,
struct drm_i915_private *dev_priv,
- uint64_t size,
- uint32_t *handle_p)
+ u64 size,
+ u32 *handle_p)
{
struct drm_i915_gem_object *obj;
int ret;
@@ -783,6 +784,8 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
{
+ intel_wakeref_t wakeref;
+
/*
* No actual flushing is required for the GTT write domain for reads
* from the GTT domain. Writes to it "immediately" go to main memory
@@ -809,13 +812,13 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
i915_gem_chipset_flush(dev_priv);
- intel_runtime_pm_get(dev_priv);
- spin_lock_irq(&dev_priv->uncore.lock);
+ with_intel_runtime_pm(dev_priv, wakeref) {
+ spin_lock_irq(&dev_priv->uncore.lock);
- POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
+ POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
- spin_unlock_irq(&dev_priv->uncore.lock);
- intel_runtime_pm_put(dev_priv);
+ spin_unlock_irq(&dev_priv->uncore.lock);
+ }
}
static void
@@ -859,58 +862,6 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
obj->write_domain = 0;
}
-static inline int
-__copy_to_user_swizzled(char __user *cpu_vaddr,
- const char *gpu_vaddr, int gpu_offset,
- int length)
-{
- int ret, cpu_offset = 0;
-
- while (length > 0) {
- int cacheline_end = ALIGN(gpu_offset + 1, 64);
- int this_length = min(cacheline_end - gpu_offset, length);
- int swizzled_gpu_offset = gpu_offset ^ 64;
-
- ret = __copy_to_user(cpu_vaddr + cpu_offset,
- gpu_vaddr + swizzled_gpu_offset,
- this_length);
- if (ret)
- return ret + length;
-
- cpu_offset += this_length;
- gpu_offset += this_length;
- length -= this_length;
- }
-
- return 0;
-}
-
-static inline int
-__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
- const char __user *cpu_vaddr,
- int length)
-{
- int ret, cpu_offset = 0;
-
- while (length > 0) {
- int cacheline_end = ALIGN(gpu_offset + 1, 64);
- int this_length = min(cacheline_end - gpu_offset, length);
- int swizzled_gpu_offset = gpu_offset ^ 64;
-
- ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
- cpu_vaddr + cpu_offset,
- this_length);
- if (ret)
- return ret + length;
-
- cpu_offset += this_length;
- gpu_offset += this_length;
- length -= this_length;
- }
-
- return 0;
-}
-
/*
* Pins the specified object's pages and synchronizes the object with
* GPU accesses. Sets needs_clflush to non-zero if the caller should
@@ -1030,72 +981,23 @@ err_unpin:
return ret;
}
-static void
-shmem_clflush_swizzled_range(char *addr, unsigned long length,
- bool swizzled)
-{
- if (unlikely(swizzled)) {
- unsigned long start = (unsigned long) addr;
- unsigned long end = (unsigned long) addr + length;
-
- /* For swizzling simply ensure that we always flush both
- * channels. Lame, but simple and it works. Swizzled
- * pwrite/pread is far from a hotpath - current userspace
- * doesn't use it at all. */
- start = round_down(start, 128);
- end = round_up(end, 128);
-
- drm_clflush_virt_range((void *)start, end - start);
- } else {
- drm_clflush_virt_range(addr, length);
- }
-
-}
-
-/* Only difference to the fast-path function is that this can handle bit17
- * and uses non-atomic copy and kmap functions. */
static int
-shmem_pread_slow(struct page *page, int offset, int length,
- char __user *user_data,
- bool page_do_bit17_swizzling, bool needs_clflush)
+shmem_pread(struct page *page, int offset, int len, char __user *user_data,
+ bool needs_clflush)
{
char *vaddr;
int ret;
vaddr = kmap(page);
- if (needs_clflush)
- shmem_clflush_swizzled_range(vaddr + offset, length,
- page_do_bit17_swizzling);
-
- if (page_do_bit17_swizzling)
- ret = __copy_to_user_swizzled(user_data, vaddr, offset, length);
- else
- ret = __copy_to_user(user_data, vaddr + offset, length);
- kunmap(page);
- return ret ? - EFAULT : 0;
-}
-
-static int
-shmem_pread(struct page *page, int offset, int length, char __user *user_data,
- bool page_do_bit17_swizzling, bool needs_clflush)
-{
- int ret;
+ if (needs_clflush)
+ drm_clflush_virt_range(vaddr + offset, len);
- ret = -ENODEV;
- if (!page_do_bit17_swizzling) {
- char *vaddr = kmap_atomic(page);
+ ret = __copy_to_user(user_data, vaddr + offset, len);
- if (needs_clflush)
- drm_clflush_virt_range(vaddr + offset, length);
- ret = __copy_to_user_inatomic(user_data, vaddr + offset, length);
- kunmap_atomic(vaddr);
- }
- if (ret == 0)
- return 0;
+ kunmap(page);
- return shmem_pread_slow(page, offset, length, user_data,
- page_do_bit17_swizzling, needs_clflush);
+ return ret ? -EFAULT : 0;
}
static int
@@ -1104,15 +1006,10 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
{
char __user *user_data;
u64 remain;
- unsigned int obj_do_bit17_swizzling;
unsigned int needs_clflush;
unsigned int idx, offset;
int ret;
- obj_do_bit17_swizzling = 0;
- if (i915_gem_object_needs_bit17_swizzle(obj))
- obj_do_bit17_swizzling = BIT(17);
-
ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
if (ret)
return ret;
@@ -1130,7 +1027,6 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
ret = shmem_pread(page, offset, length, user_data,
- page_to_phys(page) & obj_do_bit17_swizzling,
needs_clflush);
if (ret)
break;
@@ -1174,6 +1070,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct i915_ggtt *ggtt = &i915->ggtt;
+ intel_wakeref_t wakeref;
struct drm_mm_node node;
struct i915_vma *vma;
void __user *user_data;
@@ -1184,7 +1081,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
if (ret)
return ret;
- intel_runtime_pm_get(i915);
+ wakeref = intel_runtime_pm_get(i915);
vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
PIN_MAPPABLE |
PIN_NONFAULT |
@@ -1257,7 +1154,7 @@ out_unpin:
i915_vma_unpin(vma);
}
out_unlock:
- intel_runtime_pm_put(i915);
+ intel_runtime_pm_put(i915, wakeref);
mutex_unlock(&i915->drm.struct_mutex);
return ret;
@@ -1358,6 +1255,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct i915_ggtt *ggtt = &i915->ggtt;
+ intel_wakeref_t wakeref;
struct drm_mm_node node;
struct i915_vma *vma;
u64 remain, offset;
@@ -1376,13 +1274,14 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
* This easily dwarfs any performance advantage from
* using the cache bypass of indirect GGTT access.
*/
- if (!intel_runtime_pm_get_if_in_use(i915)) {
+ wakeref = intel_runtime_pm_get_if_in_use(i915);
+ if (!wakeref) {
ret = -EFAULT;
goto out_unlock;
}
} else {
/* No backing pages, no fallback, we must force GGTT access */
- intel_runtime_pm_get(i915);
+ wakeref = intel_runtime_pm_get(i915);
}
vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
@@ -1464,39 +1363,12 @@ out_unpin:
i915_vma_unpin(vma);
}
out_rpm:
- intel_runtime_pm_put(i915);
+ intel_runtime_pm_put(i915, wakeref);
out_unlock:
mutex_unlock(&i915->drm.struct_mutex);
return ret;
}
-static int
-shmem_pwrite_slow(struct page *page, int offset, int length,
- char __user *user_data,
- bool page_do_bit17_swizzling,
- bool needs_clflush_before,
- bool needs_clflush_after)
-{
- char *vaddr;
- int ret;
-
- vaddr = kmap(page);
- if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
- shmem_clflush_swizzled_range(vaddr + offset, length,
- page_do_bit17_swizzling);
- if (page_do_bit17_swizzling)
- ret = __copy_from_user_swizzled(vaddr, offset, user_data,
- length);
- else
- ret = __copy_from_user(vaddr + offset, user_data, length);
- if (needs_clflush_after)
- shmem_clflush_swizzled_range(vaddr + offset, length,
- page_do_bit17_swizzling);
- kunmap(page);
-
- return ret ? -EFAULT : 0;
-}
-
/* Per-page copy function for the shmem pwrite fastpath.
* Flushes invalid cachelines before writing to the target if
* needs_clflush_before is set and flushes out any written cachelines after
@@ -1504,31 +1376,24 @@ shmem_pwrite_slow(struct page *page, int offset, int length,
*/
static int
shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
- bool page_do_bit17_swizzling,
bool needs_clflush_before,
bool needs_clflush_after)
{
+ char *vaddr;
int ret;
- ret = -ENODEV;
- if (!page_do_bit17_swizzling) {
- char *vaddr = kmap_atomic(page);
+ vaddr = kmap(page);
- if (needs_clflush_before)
- drm_clflush_virt_range(vaddr + offset, len);
- ret = __copy_from_user_inatomic(vaddr + offset, user_data, len);
- if (needs_clflush_after)
- drm_clflush_virt_range(vaddr + offset, len);
+ if (needs_clflush_before)
+ drm_clflush_virt_range(vaddr + offset, len);
- kunmap_atomic(vaddr);
- }
- if (ret == 0)
- return ret;
+ ret = __copy_from_user(vaddr + offset, user_data, len);
+ if (!ret && needs_clflush_after)
+ drm_clflush_virt_range(vaddr + offset, len);
- return shmem_pwrite_slow(page, offset, len, user_data,
- page_do_bit17_swizzling,
- needs_clflush_before,
- needs_clflush_after);
+ kunmap(page);
+
+ return ret ? -EFAULT : 0;
}
static int
@@ -1538,7 +1403,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
struct drm_i915_private *i915 = to_i915(obj->base.dev);
void __user *user_data;
u64 remain;
- unsigned int obj_do_bit17_swizzling;
unsigned int partial_cacheline_write;
unsigned int needs_clflush;
unsigned int offset, idx;
@@ -1553,10 +1417,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
if (ret)
return ret;
- obj_do_bit17_swizzling = 0;
- if (i915_gem_object_needs_bit17_swizzle(obj))
- obj_do_bit17_swizzling = BIT(17);
-
/* If we don't overwrite a cacheline completely we need to be
* careful to have up-to-date data by first clflushing. Don't
* overcomplicate things and flush the entire patch.
@@ -1573,7 +1433,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
ret = shmem_pwrite(page, offset, length, user_data,
- page_to_phys(page) & obj_do_bit17_swizzling,
(offset | length) & partial_cacheline_write,
needs_clflush & CLFLUSH_AFTER);
if (ret)
@@ -1677,23 +1536,21 @@ err:
static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
{
- struct drm_i915_private *i915;
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct list_head *list;
struct i915_vma *vma;
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+ mutex_lock(&i915->ggtt.vm.mutex);
for_each_ggtt_vma(vma, obj) {
- if (i915_vma_is_active(vma))
- continue;
-
if (!drm_mm_node_allocated(&vma->node))
continue;
- list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+ list_move_tail(&vma->vm_link, &vma->vm->bound_list);
}
+ mutex_unlock(&i915->ggtt.vm.mutex);
- i915 = to_i915(obj->base.dev);
spin_lock(&i915->mm.obj_lock);
list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
list_move_tail(&obj->mm.link, list);
@@ -1713,8 +1570,8 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
{
struct drm_i915_gem_set_domain *args = data;
struct drm_i915_gem_object *obj;
- uint32_t read_domains = args->read_domains;
- uint32_t write_domain = args->write_domain;
+ u32 read_domains = args->read_domains;
+ u32 write_domain = args->write_domain;
int err;
/* Only handle setting domains to types used by the CPU. */
@@ -1824,6 +1681,16 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
return 0;
}
+static inline bool
+__vma_matches(struct vm_area_struct *vma, struct file *filp,
+ unsigned long addr, unsigned long size)
+{
+ if (vma->vm_file != filp)
+ return false;
+
+ return vma->vm_start == addr && (vma->vm_end - vma->vm_start) == size;
+}
+
/**
* i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
* it is mapped to.
@@ -1873,6 +1740,9 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
addr = vm_mmap(obj->base.filp, 0, args->size,
PROT_READ | PROT_WRITE, MAP_SHARED,
args->offset);
+ if (IS_ERR_VALUE(addr))
+ goto err;
+
if (args->flags & I915_MMAP_WC) {
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
@@ -1882,23 +1752,28 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
return -EINTR;
}
vma = find_vma(mm, addr);
- if (vma)
+ if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
vma->vm_page_prot =
pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
else
addr = -ENOMEM;
up_write(&mm->mmap_sem);
+ if (IS_ERR_VALUE(addr))
+ goto err;
/* This may race, but that's ok, it only gets set */
WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
}
i915_gem_object_put(obj);
- if (IS_ERR((void *)addr))
- return addr;
- args->addr_ptr = (uint64_t) addr;
+ args->addr_ptr = (u64)addr;
return 0;
+
+err:
+ i915_gem_object_put(obj);
+
+ return addr;
}
static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
@@ -2009,6 +1884,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
struct drm_i915_private *dev_priv = to_i915(dev);
struct i915_ggtt *ggtt = &dev_priv->ggtt;
bool write = area->vm_flags & VM_WRITE;
+ intel_wakeref_t wakeref;
struct i915_vma *vma;
pgoff_t page_offset;
int ret;
@@ -2038,7 +1914,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
if (ret)
goto err;
- intel_runtime_pm_get(dev_priv);
+ wakeref = intel_runtime_pm_get(dev_priv);
ret = i915_mutex_lock_interruptible(dev);
if (ret)
@@ -2116,7 +1992,7 @@ err_unpin:
err_unlock:
mutex_unlock(&dev->struct_mutex);
err_rpm:
- intel_runtime_pm_put(dev_priv);
+ intel_runtime_pm_put(dev_priv, wakeref);
i915_gem_object_unpin_pages(obj);
err:
switch (ret) {
@@ -2189,6 +2065,7 @@ void
i915_gem_release_mmap(struct drm_i915_gem_object *obj)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ intel_wakeref_t wakeref;
/* Serialisation between user GTT access and our code depends upon
* revoking the CPU's PTE whilst the mutex is held. The next user
@@ -2199,7 +2076,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
* wakeref.
*/
lockdep_assert_held(&i915->drm.struct_mutex);
- intel_runtime_pm_get(i915);
+ wakeref = intel_runtime_pm_get(i915);
if (!obj->userfault_count)
goto out;
@@ -2216,7 +2093,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
wmb();
out:
- intel_runtime_pm_put(i915);
+ intel_runtime_pm_put(i915, wakeref);
}
void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
@@ -2296,8 +2173,8 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
int
i915_gem_mmap_gtt(struct drm_file *file,
struct drm_device *dev,
- uint32_t handle,
- uint64_t *offset)
+ u32 handle,
+ u64 *offset)
{
struct drm_i915_gem_object *obj;
int ret;
@@ -2444,8 +2321,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
struct sg_table *pages;
pages = fetch_and_zero(&obj->mm.pages);
- if (!pages)
- return NULL;
+ if (IS_ERR_OR_NULL(pages))
+ return pages;
spin_lock(&i915->mm.obj_lock);
list_del(&obj->mm.link);
@@ -2469,22 +2346,23 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
return pages;
}
-void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
- enum i915_mm_subclass subclass)
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+ enum i915_mm_subclass subclass)
{
struct sg_table *pages;
+ int ret;
if (i915_gem_object_has_pinned_pages(obj))
- return;
+ return -EBUSY;
GEM_BUG_ON(obj->bind_count);
- if (!i915_gem_object_has_pages(obj))
- return;
/* May be called by shrinker from within get_pages() (on another bo) */
mutex_lock_nested(&obj->mm.lock, subclass);
- if (unlikely(atomic_read(&obj->mm.pages_pin_count)))
+ if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
+ ret = -EBUSY;
goto unlock;
+ }
/*
* ->put_pages might need to allocate memory for the bit17 swizzle
@@ -2492,11 +2370,24 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
* lists early.
*/
pages = __i915_gem_object_unset_pages(obj);
+
+ /*
+ * XXX Temporary hijinx to avoid updating all backends to handle
+ * NULL pages. In the future, when we have more asynchronous
+ * get_pages backends we should be better able to handle the
+ * cancellation of the async task in a more uniform manner.
+ */
+ if (!pages && !i915_gem_object_needs_async_cancel(obj))
+ pages = ERR_PTR(-EINVAL);
+
if (!IS_ERR(pages))
obj->ops->put_pages(obj, pages);
+ ret = 0;
unlock:
mutex_unlock(&obj->mm.lock);
+
+ return ret;
}
bool i915_sg_trim(struct sg_table *orig_st)
@@ -3000,59 +2891,12 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
return 0;
}
-static void i915_gem_client_mark_guilty(struct drm_i915_file_private *file_priv,
- const struct i915_gem_context *ctx)
+static bool match_ring(struct i915_request *rq)
{
- unsigned int score;
- unsigned long prev_hang;
+ struct drm_i915_private *dev_priv = rq->i915;
+ u32 ring = I915_READ(RING_START(rq->engine->mmio_base));
- if (i915_gem_context_is_banned(ctx))
- score = I915_CLIENT_SCORE_CONTEXT_BAN;
- else
- score = 0;
-
- prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
- if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
- score += I915_CLIENT_SCORE_HANG_FAST;
-
- if (score) {
- atomic_add(score, &file_priv->ban_score);
-
- DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n",
- ctx->name, score,
- atomic_read(&file_priv->ban_score));
- }
-}
-
-static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx)
-{
- unsigned int score;
- bool banned, bannable;
-
- atomic_inc(&ctx->guilty_count);
-
- bannable = i915_gem_context_is_bannable(ctx);
- score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score);
- banned = score >= CONTEXT_SCORE_BAN_THRESHOLD;
-
- /* Cool contexts don't accumulate client ban score */
- if (!bannable)
- return;
-
- if (banned) {
- DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n",
- ctx->name, atomic_read(&ctx->guilty_count),
- score);
- i915_gem_context_set_banned(ctx);
- }
-
- if (!IS_ERR_OR_NULL(ctx->file_priv))
- i915_gem_client_mark_guilty(ctx->file_priv, ctx);
-}
-
-static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
-{
- atomic_inc(&ctx->active_count);
+ return ring == i915_ggtt_offset(rq->ring->vma);
}
struct i915_request *
@@ -3074,9 +2918,16 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
*/
spin_lock_irqsave(&engine->timeline.lock, flags);
list_for_each_entry(request, &engine->timeline.requests, link) {
- if (__i915_request_completed(request, request->global_seqno))
+ if (i915_request_completed(request))
continue;
+ if (!i915_request_started(request))
+ break;
+
+ /* More than one preemptible request may match! */
+ if (!match_ring(request))
+ break;
+
active = request;
break;
}
@@ -3085,366 +2936,6 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
return active;
}
-/*
- * Ensure irq handler finishes, and not run again.
- * Also return the active request so that we only search for it once.
- */
-struct i915_request *
-i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
-{
- struct i915_request *request;
-
- /*
- * During the reset sequence, we must prevent the engine from
- * entering RC6. As the context state is undefined until we restart
- * the engine, if it does enter RC6 during the reset, the state
- * written to the powercontext is undefined and so we may lose
- * GPU state upon resume, i.e. fail to restart after a reset.
- */
- intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
-
- request = engine->reset.prepare(engine);
- if (request && request->fence.error == -EIO)
- request = ERR_PTR(-EIO); /* Previous reset failed! */
-
- return request;
-}
-
-int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
-{
- struct intel_engine_cs *engine;
- struct i915_request *request;
- enum intel_engine_id id;
- int err = 0;
-
- for_each_engine(engine, dev_priv, id) {
- request = i915_gem_reset_prepare_engine(engine);
- if (IS_ERR(request)) {
- err = PTR_ERR(request);
- continue;
- }
-
- engine->hangcheck.active_request = request;
- }
-
- i915_gem_revoke_fences(dev_priv);
- intel_uc_sanitize(dev_priv);
-
- return err;
-}
-
-static void engine_skip_context(struct i915_request *request)
-{
- struct intel_engine_cs *engine = request->engine;
- struct i915_gem_context *hung_ctx = request->gem_context;
- struct i915_timeline *timeline = request->timeline;
- unsigned long flags;
-
- GEM_BUG_ON(timeline == &engine->timeline);
-
- spin_lock_irqsave(&engine->timeline.lock, flags);
- spin_lock(&timeline->lock);
-
- list_for_each_entry_continue(request, &engine->timeline.requests, link)
- if (request->gem_context == hung_ctx)
- i915_request_skip(request, -EIO);
-
- list_for_each_entry(request, &timeline->requests, link)
- i915_request_skip(request, -EIO);
-
- spin_unlock(&timeline->lock);
- spin_unlock_irqrestore(&engine->timeline.lock, flags);
-}
-
-/* Returns the request if it was guilty of the hang */
-static struct i915_request *
-i915_gem_reset_request(struct intel_engine_cs *engine,
- struct i915_request *request,
- bool stalled)
-{
- /* The guilty request will get skipped on a hung engine.
- *
- * Users of client default contexts do not rely on logical
- * state preserved between batches so it is safe to execute
- * queued requests following the hang. Non default contexts
- * rely on preserved state, so skipping a batch loses the
- * evolution of the state and it needs to be considered corrupted.
- * Executing more queued batches on top of corrupted state is
- * risky. But we take the risk by trying to advance through
- * the queued requests in order to make the client behaviour
- * more predictable around resets, by not throwing away random
- * amount of batches it has prepared for execution. Sophisticated
- * clients can use gem_reset_stats_ioctl and dma fence status
- * (exported via sync_file info ioctl on explicit fences) to observe
- * when it loses the context state and should rebuild accordingly.
- *
- * The context ban, and ultimately the client ban, mechanism are safety
- * valves if client submission ends up resulting in nothing more than
- * subsequent hangs.
- */
-
- if (i915_request_completed(request)) {
- GEM_TRACE("%s pardoned global=%d (fence %llx:%d), current %d\n",
- engine->name, request->global_seqno,
- request->fence.context, request->fence.seqno,
- intel_engine_get_seqno(engine));
- stalled = false;
- }
-
- if (stalled) {
- i915_gem_context_mark_guilty(request->gem_context);
- i915_request_skip(request, -EIO);
-
- /* If this context is now banned, skip all pending requests. */
- if (i915_gem_context_is_banned(request->gem_context))
- engine_skip_context(request);
- } else {
- /*
- * Since this is not the hung engine, it may have advanced
- * since the hang declaration. Double check by refinding
- * the active request at the time of the reset.
- */
- request = i915_gem_find_active_request(engine);
- if (request) {
- unsigned long flags;
-
- i915_gem_context_mark_innocent(request->gem_context);
- dma_fence_set_error(&request->fence, -EAGAIN);
-
- /* Rewind the engine to replay the incomplete rq */
- spin_lock_irqsave(&engine->timeline.lock, flags);
- request = list_prev_entry(request, link);
- if (&request->link == &engine->timeline.requests)
- request = NULL;
- spin_unlock_irqrestore(&engine->timeline.lock, flags);
- }
- }
-
- return request;
-}
-
-void i915_gem_reset_engine(struct intel_engine_cs *engine,
- struct i915_request *request,
- bool stalled)
-{
- /*
- * Make sure this write is visible before we re-enable the interrupt
- * handlers on another CPU, as tasklet_enable() resolves to just
- * a compiler barrier which is insufficient for our purpose here.
- */
- smp_store_mb(engine->irq_posted, 0);
-
- if (request)
- request = i915_gem_reset_request(engine, request, stalled);
-
- /* Setup the CS to resume from the breadcrumb of the hung request */
- engine->reset.reset(engine, request);
-}
-
-void i915_gem_reset(struct drm_i915_private *dev_priv,
- unsigned int stalled_mask)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
- i915_retire_requests(dev_priv);
-
- for_each_engine(engine, dev_priv, id) {
- struct intel_context *ce;
-
- i915_gem_reset_engine(engine,
- engine->hangcheck.active_request,
- stalled_mask & ENGINE_MASK(id));
- ce = fetch_and_zero(&engine->last_retired_context);
- if (ce)
- intel_context_unpin(ce);
-
- /*
- * Ostensibily, we always want a context loaded for powersaving,
- * so if the engine is idle after the reset, send a request
- * to load our scratch kernel_context.
- *
- * More mysteriously, if we leave the engine idle after a reset,
- * the next userspace batch may hang, with what appears to be
- * an incoherent read by the CS (presumably stale TLB). An
- * empty request appears sufficient to paper over the glitch.
- */
- if (intel_engine_is_idle(engine)) {
- struct i915_request *rq;
-
- rq = i915_request_alloc(engine,
- dev_priv->kernel_context);
- if (!IS_ERR(rq))
- i915_request_add(rq);
- }
- }
-
- i915_gem_restore_fences(dev_priv);
-}
-
-void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)
-{
- engine->reset.finish(engine);
-
- intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
-}
-
-void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
- for_each_engine(engine, dev_priv, id) {
- engine->hangcheck.active_request = NULL;
- i915_gem_reset_finish_engine(engine);
- }
-}
-
-static void nop_submit_request(struct i915_request *request)
-{
- unsigned long flags;
-
- GEM_TRACE("%s fence %llx:%d -> -EIO\n",
- request->engine->name,
- request->fence.context, request->fence.seqno);
- dma_fence_set_error(&request->fence, -EIO);
-
- spin_lock_irqsave(&request->engine->timeline.lock, flags);
- __i915_request_submit(request);
- intel_engine_init_global_seqno(request->engine, request->global_seqno);
- spin_unlock_irqrestore(&request->engine->timeline.lock, flags);
-}
-
-void i915_gem_set_wedged(struct drm_i915_private *i915)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- GEM_TRACE("start\n");
-
- if (GEM_SHOW_DEBUG()) {
- struct drm_printer p = drm_debug_printer(__func__);
-
- for_each_engine(engine, i915, id)
- intel_engine_dump(engine, &p, "%s\n", engine->name);
- }
-
- if (test_and_set_bit(I915_WEDGED, &i915->gpu_error.flags))
- goto out;
-
- /*
- * First, stop submission to hw, but do not yet complete requests by
- * rolling the global seqno forward (since this would complete requests
- * for which we haven't set the fence error to EIO yet).
- */
- for_each_engine(engine, i915, id)
- i915_gem_reset_prepare_engine(engine);
-
- /* Even if the GPU reset fails, it should still stop the engines */
- if (INTEL_GEN(i915) >= 5)
- intel_gpu_reset(i915, ALL_ENGINES);
-
- for_each_engine(engine, i915, id) {
- engine->submit_request = nop_submit_request;
- engine->schedule = NULL;
- }
- i915->caps.scheduler = 0;
-
- /*
- * Make sure no request can slip through without getting completed by
- * either this call here to intel_engine_init_global_seqno, or the one
- * in nop_submit_request.
- */
- synchronize_rcu();
-
- /* Mark all executing requests as skipped */
- for_each_engine(engine, i915, id)
- engine->cancel_requests(engine);
-
- for_each_engine(engine, i915, id) {
- i915_gem_reset_finish_engine(engine);
- intel_engine_wakeup(engine);
- }
-
-out:
- GEM_TRACE("end\n");
-
- wake_up_all(&i915->gpu_error.reset_queue);
-}
-
-bool i915_gem_unset_wedged(struct drm_i915_private *i915)
-{
- struct i915_timeline *tl;
-
- lockdep_assert_held(&i915->drm.struct_mutex);
- if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
- return true;
-
- GEM_TRACE("start\n");
-
- /*
- * Before unwedging, make sure that all pending operations
- * are flushed and errored out - we may have requests waiting upon
- * third party fences. We marked all inflight requests as EIO, and
- * every execbuf since returned EIO, for consistency we want all
- * the currently pending requests to also be marked as EIO, which
- * is done inside our nop_submit_request - and so we must wait.
- *
- * No more can be submitted until we reset the wedged bit.
- */
- list_for_each_entry(tl, &i915->gt.timelines, link) {
- struct i915_request *rq;
-
- rq = i915_gem_active_peek(&tl->last_request,
- &i915->drm.struct_mutex);
- if (!rq)
- continue;
-
- /*
- * We can't use our normal waiter as we want to
- * avoid recursively trying to handle the current
- * reset. The basic dma_fence_default_wait() installs
- * a callback for dma_fence_signal(), which is
- * triggered by our nop handler (indirectly, the
- * callback enables the signaler thread which is
- * woken by the nop_submit_request() advancing the seqno
- * and when the seqno passes the fence, the signaler
- * then signals the fence waking us up).
- */
- if (dma_fence_default_wait(&rq->fence, true,
- MAX_SCHEDULE_TIMEOUT) < 0)
- return false;
- }
- i915_retire_requests(i915);
- GEM_BUG_ON(i915->gt.active_requests);
-
- if (!intel_gpu_reset(i915, ALL_ENGINES))
- intel_engines_sanitize(i915);
-
- /*
- * Undo nop_submit_request. We prevent all new i915 requests from
- * being queued (by disallowing execbuf whilst wedged) so having
- * waited for all active requests above, we know the system is idle
- * and do not have to worry about a thread being inside
- * engine->submit_request() as we swap over. So unlike installing
- * the nop_submit_request on reset, we can do this from normal
- * context and do not require stop_machine().
- */
- intel_engines_reset_default_submission(i915);
- i915_gem_contexts_lost(i915);
-
- GEM_TRACE("end\n");
-
- smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
- clear_bit(I915_WEDGED, &i915->gpu_error.flags);
-
- return true;
-}
-
static void
i915_gem_retire_work_handler(struct work_struct *work)
{
@@ -3547,7 +3038,7 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915)
GEM_BUG_ON(i915->gt.active_requests);
for_each_engine(engine, i915, id) {
- GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request));
+ GEM_BUG_ON(__i915_active_request_peek(&engine->timeline.last_request));
GEM_BUG_ON(engine->last_retired_context !=
to_intel_context(i915->kernel_context, engine));
}
@@ -3766,33 +3257,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
return ret;
}
-static long wait_for_timeline(struct i915_timeline *tl,
- unsigned int flags, long timeout)
-{
- struct i915_request *rq;
-
- rq = i915_gem_active_get_unlocked(&tl->last_request);
- if (!rq)
- return timeout;
-
- /*
- * "Race-to-idle".
- *
- * Switching to the kernel context is often used a synchronous
- * step prior to idling, e.g. in suspend for flushing all
- * current operations to memory before sleeping. These we
- * want to complete as quickly as possible to avoid prolonged
- * stalls, so allow the gpu to boost to maximum clocks.
- */
- if (flags & I915_WAIT_FOR_IDLE_BOOST)
- gen6_rps_boost(rq, NULL);
-
- timeout = i915_request_wait(rq, flags, timeout);
- i915_request_put(rq);
-
- return timeout;
-}
-
static int wait_for_engines(struct drm_i915_private *i915)
{
if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
@@ -3806,6 +3270,52 @@ static int wait_for_engines(struct drm_i915_private *i915)
return 0;
}
+static long
+wait_for_timelines(struct drm_i915_private *i915,
+ unsigned int flags, long timeout)
+{
+ struct i915_gt_timelines *gt = &i915->gt.timelines;
+ struct i915_timeline *tl;
+
+ if (!READ_ONCE(i915->gt.active_requests))
+ return timeout;
+
+ mutex_lock(&gt->mutex);
+ list_for_each_entry(tl, &gt->active_list, link) {
+ struct i915_request *rq;
+
+ rq = i915_active_request_get_unlocked(&tl->last_request);
+ if (!rq)
+ continue;
+
+ mutex_unlock(&gt->mutex);
+
+ /*
+ * "Race-to-idle".
+ *
+ * Switching to the kernel context is often used a synchronous
+ * step prior to idling, e.g. in suspend for flushing all
+ * current operations to memory before sleeping. These we
+ * want to complete as quickly as possible to avoid prolonged
+ * stalls, so allow the gpu to boost to maximum clocks.
+ */
+ if (flags & I915_WAIT_FOR_IDLE_BOOST)
+ gen6_rps_boost(rq, NULL);
+
+ timeout = i915_request_wait(rq, flags, timeout);
+ i915_request_put(rq);
+ if (timeout < 0)
+ return timeout;
+
+ /* restart after reacquiring the lock */
+ mutex_lock(&gt->mutex);
+ tl = list_entry(&gt->active_list, typeof(*tl), link);
+ }
+ mutex_unlock(&gt->mutex);
+
+ return timeout;
+}
+
int i915_gem_wait_for_idle(struct drm_i915_private *i915,
unsigned int flags, long timeout)
{
@@ -3817,17 +3327,15 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
if (!READ_ONCE(i915->gt.awake))
return 0;
+ timeout = wait_for_timelines(i915, flags, timeout);
+ if (timeout < 0)
+ return timeout;
+
if (flags & I915_WAIT_LOCKED) {
- struct i915_timeline *tl;
int err;
lockdep_assert_held(&i915->drm.struct_mutex);
- list_for_each_entry(tl, &i915->gt.timelines, link) {
- timeout = wait_for_timeline(tl, flags, timeout);
- if (timeout < 0)
- return timeout;
- }
if (GEM_SHOW_DEBUG() && !timeout) {
/* Presume that timeout was non-zero to begin with! */
dev_warn(&i915->drm.pdev->dev,
@@ -3841,17 +3349,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
i915_retire_requests(i915);
GEM_BUG_ON(i915->gt.active_requests);
- } else {
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- for_each_engine(engine, i915, id) {
- struct i915_timeline *tl = &engine->timeline;
-
- timeout = wait_for_timeline(tl, flags, timeout);
- if (timeout < 0)
- return timeout;
- }
}
return 0;
@@ -4037,7 +3534,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
* reading an invalid PTE on older architectures.
*/
restart:
- list_for_each_entry(vma, &obj->vma_list, obj_link) {
+ list_for_each_entry(vma, &obj->vma.list, obj_link) {
if (!drm_mm_node_allocated(&vma->node))
continue;
@@ -4115,7 +3612,7 @@ restart:
*/
}
- list_for_each_entry(vma, &obj->vma_list, obj_link) {
+ list_for_each_entry(vma, &obj->vma.list, obj_link) {
if (!drm_mm_node_allocated(&vma->node))
continue;
@@ -4125,7 +3622,7 @@ restart:
}
}
- list_for_each_entry(vma, &obj->vma_list, obj_link)
+ list_for_each_entry(vma, &obj->vma.list, obj_link)
vma->node.color = cache_level;
i915_gem_object_set_cache_coherency(obj, cache_level);
obj->cache_dirty = true; /* Always invalidate stale cachelines */
@@ -4688,7 +4185,8 @@ out:
}
static void
-frontbuffer_retire(struct i915_gem_active *active, struct i915_request *request)
+frontbuffer_retire(struct i915_active_request *active,
+ struct i915_request *request)
{
struct drm_i915_gem_object *obj =
container_of(active, typeof(*obj), frontbuffer_write);
@@ -4701,7 +4199,9 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
{
mutex_init(&obj->mm.lock);
- INIT_LIST_HEAD(&obj->vma_list);
+ spin_lock_init(&obj->vma.lock);
+ INIT_LIST_HEAD(&obj->vma.list);
+
INIT_LIST_HEAD(&obj->lut_list);
INIT_LIST_HEAD(&obj->batch_pool_link);
@@ -4713,7 +4213,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
obj->resv = &obj->__builtin_resv;
obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
- init_request_active(&obj->frontbuffer_write, frontbuffer_retire);
+ i915_active_request_init(&obj->frontbuffer_write,
+ NULL, frontbuffer_retire);
obj->mm.madv = I915_MADV_WILLNEED;
INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
@@ -4856,8 +4357,9 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
struct llist_node *freed)
{
struct drm_i915_gem_object *obj, *on;
+ intel_wakeref_t wakeref;
- intel_runtime_pm_get(i915);
+ wakeref = intel_runtime_pm_get(i915);
llist_for_each_entry_safe(obj, on, freed, freed) {
struct i915_vma *vma, *vn;
@@ -4866,14 +4368,13 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
mutex_lock(&i915->drm.struct_mutex);
GEM_BUG_ON(i915_gem_object_is_active(obj));
- list_for_each_entry_safe(vma, vn,
- &obj->vma_list, obj_link) {
+ list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
GEM_BUG_ON(i915_vma_is_active(vma));
vma->flags &= ~I915_VMA_PIN_MASK;
i915_vma_destroy(vma);
}
- GEM_BUG_ON(!list_empty(&obj->vma_list));
- GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree));
+ GEM_BUG_ON(!list_empty(&obj->vma.list));
+ GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
/* This serializes freeing with the shrinker. Since the free
* is delayed, first by RCU then by the workqueue, we want the
@@ -4918,7 +4419,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
if (on)
cond_resched();
}
- intel_runtime_pm_put(i915);
+ intel_runtime_pm_put(i915, wakeref);
}
static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
@@ -5027,13 +4528,11 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
void i915_gem_sanitize(struct drm_i915_private *i915)
{
- int err;
+ intel_wakeref_t wakeref;
GEM_TRACE("\n");
- mutex_lock(&i915->drm.struct_mutex);
-
- intel_runtime_pm_get(i915);
+ wakeref = intel_runtime_pm_get(i915);
intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
/*
@@ -5053,28 +4552,28 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
* it may impact the display and we are uncertain about the stability
* of the reset, so this could be applied to even earlier gen.
*/
- err = -ENODEV;
- if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915))
- err = WARN_ON(intel_gpu_reset(i915, ALL_ENGINES));
- if (!err)
- intel_engines_sanitize(i915);
+ intel_engines_sanitize(i915, false);
intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
- intel_runtime_pm_put(i915);
+ intel_runtime_pm_put(i915, wakeref);
+ mutex_lock(&i915->drm.struct_mutex);
i915_gem_contexts_lost(i915);
mutex_unlock(&i915->drm.struct_mutex);
}
int i915_gem_suspend(struct drm_i915_private *i915)
{
+ intel_wakeref_t wakeref;
int ret;
GEM_TRACE("\n");
- intel_runtime_pm_get(i915);
+ wakeref = intel_runtime_pm_get(i915);
intel_suspend_gt_powersave(i915);
+ flush_workqueue(i915->wq);
+
mutex_lock(&i915->drm.struct_mutex);
/*
@@ -5104,11 +4603,9 @@ int i915_gem_suspend(struct drm_i915_private *i915)
i915_retire_requests(i915); /* ensure we flush after wedging */
mutex_unlock(&i915->drm.struct_mutex);
+ i915_reset_flush(i915);
- intel_uc_suspend(i915);
-
- cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
- cancel_delayed_work_sync(&i915->gt.retire_work);
+ drain_delayed_work(&i915->gt.retire_work);
/*
* As the idle_work is rearming if it detects a race, play safe and
@@ -5116,6 +4613,8 @@ int i915_gem_suspend(struct drm_i915_private *i915)
*/
drain_delayed_work(&i915->gt.idle_work);
+ intel_uc_suspend(i915);
+
/*
* Assert that we successfully flushed all the work and
* reset the GPU back to its idle, low power state.
@@ -5124,12 +4623,12 @@ int i915_gem_suspend(struct drm_i915_private *i915)
if (WARN_ON(!intel_engines_are_idle(i915)))
i915_gem_set_wedged(i915); /* no hope, discard everything */
- intel_runtime_pm_put(i915);
+ intel_runtime_pm_put(i915, wakeref);
return 0;
err_unlock:
mutex_unlock(&i915->drm.struct_mutex);
- intel_runtime_pm_put(i915);
+ intel_runtime_pm_put(i915, wakeref);
return ret;
}
@@ -5223,15 +4722,15 @@ void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
DISP_TILE_SURFACE_SWIZZLING);
- if (IS_GEN5(dev_priv))
+ if (IS_GEN(dev_priv, 5))
return;
I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
- if (IS_GEN6(dev_priv))
+ if (IS_GEN(dev_priv, 6))
I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
- else if (IS_GEN7(dev_priv))
+ else if (IS_GEN(dev_priv, 7))
I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
- else if (IS_GEN8(dev_priv))
+ else if (IS_GEN(dev_priv, 8))
I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
else
BUG();
@@ -5253,10 +4752,10 @@ static void init_unused_rings(struct drm_i915_private *dev_priv)
init_unused_ring(dev_priv, SRB1_BASE);
init_unused_ring(dev_priv, SRB2_BASE);
init_unused_ring(dev_priv, SRB3_BASE);
- } else if (IS_GEN2(dev_priv)) {
+ } else if (IS_GEN(dev_priv, 2)) {
init_unused_ring(dev_priv, SRB0_BASE);
init_unused_ring(dev_priv, SRB1_BASE);
- } else if (IS_GEN3(dev_priv)) {
+ } else if (IS_GEN(dev_priv, 3)) {
init_unused_ring(dev_priv, PRB1_BASE);
init_unused_ring(dev_priv, PRB2_BASE);
}
@@ -5552,6 +5051,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
dev_priv->gt.cleanup_engine = intel_engine_cleanup;
}
+ i915_timelines_init(dev_priv);
+
ret = i915_gem_init_userptr(dev_priv);
if (ret)
return ret;
@@ -5580,7 +5081,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
}
ret = i915_gem_init_scratch(dev_priv,
- IS_GEN2(dev_priv) ? SZ_256K : PAGE_SIZE);
+ IS_GEN(dev_priv, 2) ? SZ_256K : PAGE_SIZE);
if (ret) {
GEM_BUG_ON(ret == -EIO);
goto err_ggtt;
@@ -5674,8 +5175,10 @@ err_unlock:
err_uc_misc:
intel_uc_fini_misc(dev_priv);
- if (ret != -EIO)
+ if (ret != -EIO) {
i915_gem_cleanup_userptr(dev_priv);
+ i915_timelines_fini(dev_priv);
+ }
if (ret == -EIO) {
mutex_lock(&dev_priv->drm.struct_mutex);
@@ -5726,6 +5229,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv)
intel_uc_fini_misc(dev_priv);
i915_gem_cleanup_userptr(dev_priv);
+ i915_timelines_fini(dev_priv);
i915_gem_drain_freed_objects(dev_priv);
@@ -5828,7 +5332,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
if (!dev_priv->priorities)
goto err_dependencies;
- INIT_LIST_HEAD(&dev_priv->gt.timelines);
INIT_LIST_HEAD(&dev_priv->gt.active_rings);
INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
@@ -5840,6 +5343,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
i915_gem_idle_work_handler);
init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
+ mutex_init(&dev_priv->gpu_error.wedge_mutex);
atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
@@ -5871,7 +5375,6 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
WARN_ON(dev_priv->mm.object_count);
- WARN_ON(!list_empty(&dev_priv->gt.timelines));
kmem_cache_destroy(dev_priv->priorities);
kmem_cache_destroy(dev_priv->dependencies);