From d865110cc2345d67752cd7e1350b391c34feb2aa Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 7 Jan 2013 21:47:33 +0200 Subject: drm/i915: merge get_gtt_alignment/get_unfenced_gtt_alignment() The two functions are rather similar, so merge them. Signed-off-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 44 +++++++---------------------------------- 1 file changed, 7 insertions(+), 37 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e6cc020ea32c..2166b61053c6 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1463,16 +1463,15 @@ i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) * Return the required GTT alignment for an object, taking into account * potential fence register mapping. */ -static uint32_t -i915_gem_get_gtt_alignment(struct drm_device *dev, - uint32_t size, - int tiling_mode) +uint32_t +i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, + int tiling_mode, bool fenced) { /* * Minimum alignment is 4k (GTT page size), but might be greater * if a fence register is needed for the object. */ - if (INTEL_INFO(dev)->gen >= 4 || + if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || tiling_mode == I915_TILING_NONE) return 4096; @@ -1483,35 +1482,6 @@ i915_gem_get_gtt_alignment(struct drm_device *dev, return i915_gem_get_gtt_size(dev, size, tiling_mode); } -/** - * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an - * unfenced object - * @dev: the device - * @size: size of the object - * @tiling_mode: tiling mode of the object - * - * Return the required GTT alignment for an object, only taking into account - * unfenced tiled surface requirements. - */ -uint32_t -i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev, - uint32_t size, - int tiling_mode) -{ - /* - * Minimum alignment is 4k (GTT page size) for sane hw. - */ - if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) || - tiling_mode == I915_TILING_NONE) - return 4096; - - /* Previous hardware however needs to be aligned to a power-of-two - * tile height. The simplest method for determining this is to reuse - * the power-of-tile object size. - */ - return i915_gem_get_gtt_size(dev, size, tiling_mode); -} - static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = obj->base.dev->dev_private; @@ -2934,11 +2904,11 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, obj->tiling_mode); fence_alignment = i915_gem_get_gtt_alignment(dev, obj->base.size, - obj->tiling_mode); + obj->tiling_mode, true); unfenced_alignment = - i915_gem_get_unfenced_gtt_alignment(dev, + i915_gem_get_gtt_alignment(dev, obj->base.size, - obj->tiling_mode); + obj->tiling_mode, false); if (alignment == 0) alignment = map_and_fenceable ? fence_alignment : -- cgit v1.2.3-59-g8ed1b From 56c844e539f1f6f5768c5f73f119e6f4aed9d320 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 7 Jan 2013 21:47:34 +0200 Subject: drm/i915: merge {i965, sandybridge}_write_fence_reg() The two functions are rather similar, so merge them. Signed-off-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 44 ++++++++++++++--------------------------- 1 file changed, 15 insertions(+), 29 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2166b61053c6..718574eb66de 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2507,52 +2507,38 @@ int i915_gpu_idle(struct drm_device *dev) return 0; } -static void sandybridge_write_fence_reg(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj) -{ - drm_i915_private_t *dev_priv = dev->dev_private; - uint64_t val; - - if (obj) { - u32 size = obj->gtt_space->size; - - val = (uint64_t)((obj->gtt_offset + size - 4096) & - 0xfffff000) << 32; - val |= obj->gtt_offset & 0xfffff000; - val |= (uint64_t)((obj->stride / 128) - 1) << - SANDYBRIDGE_FENCE_PITCH_SHIFT; - - if (obj->tiling_mode == I915_TILING_Y) - val |= 1 << I965_FENCE_TILING_Y_SHIFT; - val |= I965_FENCE_REG_VALID; - } else - val = 0; - - I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + reg * 8, val); - POSTING_READ(FENCE_REG_SANDYBRIDGE_0 + reg * 8); -} - static void i965_write_fence_reg(struct drm_device *dev, int reg, struct drm_i915_gem_object *obj) { drm_i915_private_t *dev_priv = dev->dev_private; + int fence_reg; + int fence_pitch_shift; uint64_t val; + if (INTEL_INFO(dev)->gen >= 6) { + fence_reg = FENCE_REG_SANDYBRIDGE_0; + fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT; + } else { + fence_reg = FENCE_REG_965_0; + fence_pitch_shift = I965_FENCE_PITCH_SHIFT; + } + if (obj) { u32 size = obj->gtt_space->size; val = (uint64_t)((obj->gtt_offset + size - 4096) & 0xfffff000) << 32; val |= obj->gtt_offset & 0xfffff000; - val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT; + val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift; if (obj->tiling_mode == I915_TILING_Y) val |= 1 << I965_FENCE_TILING_Y_SHIFT; val |= I965_FENCE_REG_VALID; } else val = 0; - I915_WRITE64(FENCE_REG_965_0 + reg * 8, val); - POSTING_READ(FENCE_REG_965_0 + reg * 8); + fence_reg += reg * 8; + I915_WRITE64(fence_reg, val); + POSTING_READ(fence_reg); } static void i915_write_fence_reg(struct drm_device *dev, int reg, @@ -2636,7 +2622,7 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg, { switch (INTEL_INFO(dev)->gen) { case 7: - case 6: sandybridge_write_fence_reg(dev, reg, obj); break; + case 6: case 5: case 4: i965_write_fence_reg(dev, reg, obj); break; case 3: i915_write_fence_reg(dev, reg, obj); break; -- cgit v1.2.3-59-g8ed1b From 0fa877965194fa79fe87944844185d90cfc35352 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 7 Jan 2013 21:47:35 +0200 Subject: drm/i915: use gtt_get_size() instead of open coding it Signed-off-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem_tiling.c | 13 +------------ 3 files changed, 4 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0b09361d37ae..35ecabc711ed 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1564,6 +1564,8 @@ void i915_gem_detach_phys_object(struct drm_device *dev, void i915_gem_free_all_phys_object(struct drm_device *dev); void i915_gem_release(struct drm_device *dev, struct drm_file *file); +uint32_t +i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode); uint32_t i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, int tiling_mode, bool fenced); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 718574eb66de..313bdbaba3cd 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1435,7 +1435,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) obj->fault_mappable = false; } -static uint32_t +uint32_t i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) { uint32_t gtt_size; diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index cb71ded7122e..e76f0d8470a1 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -272,18 +272,7 @@ i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode) return false; } - /* - * Previous chips need to be aligned to the size of the smallest - * fence register that can contain the object. - */ - if (INTEL_INFO(obj->base.dev)->gen == 3) - size = 1024*1024; - else - size = 512*1024; - - while (size < obj->base.size) - size <<= 1; - + size = i915_gem_get_gtt_size(obj->base.dev, obj->base.size, tiling_mode); if (obj->gtt_space->size != size) return false; -- cgit v1.2.3-59-g8ed1b From dd624afd533bdc87b8c10835515a0c8b2b9868b1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 15 Jan 2013 12:39:35 +0000 Subject: drm/i915: Add a debug interface to forcibly evict and shrink our object caches As a means to investigate some bad system behaviour related to the purging of the active, inactive and unbound lists, it is useful to be able to manually control when those lists should be cleared. v2: use _safe list iterators as we kick objects from the list as we walk. Signed-off-by: Chris Wilson [danvet: Add a small comment explaining why we don't need to check and wait for gpu resets, acked by Chris on irc.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 104 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 2 +- 3 files changed, 106 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index f7d88e99ebf0..f94418bd1ed2 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1776,6 +1776,102 @@ static const struct file_operations i915_ring_stop_fops = { .llseek = default_llseek, }; +#define DROP_UNBOUND 0x1 +#define DROP_BOUND 0x2 +#define DROP_RETIRE 0x4 +#define DROP_ACTIVE 0x8 +#define DROP_ALL (DROP_UNBOUND | \ + DROP_BOUND | \ + DROP_RETIRE | \ + DROP_ACTIVE) +static ssize_t +i915_drop_caches_read(struct file *filp, + char __user *ubuf, + size_t max, + loff_t *ppos) +{ + char buf[20]; + int len; + + len = snprintf(buf, sizeof(buf), "0x%08x\n", DROP_ALL); + if (len > sizeof(buf)) + len = sizeof(buf); + + return simple_read_from_buffer(ubuf, max, ppos, buf, len); +} + +static ssize_t +i915_drop_caches_write(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + struct drm_device *dev = filp->private_data; + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_gem_object *obj, *next; + char buf[20]; + int val = 0, ret; + + if (cnt > 0) { + if (cnt > sizeof(buf) - 1) + return -EINVAL; + + if (copy_from_user(buf, ubuf, cnt)) + return -EFAULT; + buf[cnt] = 0; + + val = simple_strtoul(buf, NULL, 0); + } + + DRM_DEBUG_DRIVER("Dropping caches: 0x%08x\n", val); + + /* No need to check and wait for gpu resets, only libdrm auto-restarts + * on ioctls on -EAGAIN. */ + ret = mutex_lock_interruptible(&dev->struct_mutex); + if (ret) + return ret; + + if (val & DROP_ACTIVE) { + ret = i915_gpu_idle(dev); + if (ret) + goto unlock; + } + + if (val & (DROP_RETIRE | DROP_ACTIVE)) + i915_gem_retire_requests(dev); + + if (val & DROP_BOUND) { + list_for_each_entry_safe(obj, next, &dev_priv->mm.inactive_list, mm_list) + if (obj->pin_count == 0) { + ret = i915_gem_object_unbind(obj); + if (ret) + goto unlock; + } + } + + if (val & DROP_UNBOUND) { + list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, gtt_list) + if (obj->pages_pin_count == 0) { + ret = i915_gem_object_put_pages(obj); + if (ret) + goto unlock; + } + } + +unlock: + mutex_unlock(&dev->struct_mutex); + + return ret ?: cnt; +} + +static const struct file_operations i915_drop_caches_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = i915_drop_caches_read, + .write = i915_drop_caches_write, + .llseek = default_llseek, +}; + static ssize_t i915_max_freq_read(struct file *filp, char __user *ubuf, @@ -2172,6 +2268,12 @@ int i915_debugfs_init(struct drm_minor *minor) if (ret) return ret; + ret = i915_debugfs_create(minor->debugfs_root, minor, + "i915_gem_drop_caches", + &i915_drop_caches_fops); + if (ret) + return ret; + ret = i915_debugfs_create(minor->debugfs_root, minor, "i915_error_state", &i915_error_state_fops); @@ -2203,6 +2305,8 @@ void i915_debugfs_cleanup(struct drm_minor *minor) 1, minor); drm_debugfs_remove_files((struct drm_info_list *) &i915_cache_sharing_fops, 1, minor); + drm_debugfs_remove_files((struct drm_info_list *) &i915_drop_caches_fops, + 1, minor); drm_debugfs_remove_files((struct drm_info_list *) &i915_ring_stop_fops, 1, minor); drm_debugfs_remove_files((struct drm_info_list *) &i915_error_state_fops, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 35ecabc711ed..8f5b2816fc03 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1445,6 +1445,7 @@ int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj, bool nonblocking); void i915_gem_object_unpin(struct drm_i915_gem_object *obj); int __must_check i915_gem_object_unbind(struct drm_i915_gem_object *obj); +int i915_gem_object_put_pages(struct drm_i915_gem_object *obj); void i915_gem_release_mmap(struct drm_i915_gem_object *obj); void i915_gem_lastclose(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 313bdbaba3cd..9d33fb3f8d42 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1662,7 +1662,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) kfree(obj->pages); } -static int +int i915_gem_object_put_pages(struct drm_i915_gem_object *obj) { const struct drm_i915_gem_object_ops *ops = obj->ops; -- cgit v1.2.3-59-g8ed1b From 43e28f092b2fa4ebc46bdc210134a80610815785 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 8 Jan 2013 10:53:09 +0000 Subject: drm/i915: Bail if we attempt to allocate pages for a purged object Move the existing checking inside bind_to_gtt() to the more appropriate layer in order to prevent recreation of the pages after they have been explicitly truncated. Signed-off-by: Chris Wilson Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9d33fb3f8d42..a2bb18914329 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1828,6 +1828,11 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj) if (obj->pages) return 0; + if (obj->madv != I915_MADV_WILLNEED) { + DRM_ERROR("Attempting to obtain a purgeable object\n"); + return -EINVAL; + } + BUG_ON(obj->pages_pin_count); ret = ops->get_pages(obj); @@ -2440,7 +2445,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj) { drm_i915_private_t *dev_priv = obj->base.dev->dev_private; - int ret = 0; + int ret; if (obj->gtt_space == NULL) return 0; @@ -2880,11 +2885,6 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, bool mappable, fenceable; int ret; - if (obj->madv != I915_MADV_WILLNEED) { - DRM_ERROR("Attempting to bind a purgeable object\n"); - return -EINVAL; - } - fence_size = i915_gem_get_gtt_size(dev, obj->base.size, obj->tiling_mode); -- cgit v1.2.3-59-g8ed1b From 5d4545aef561ad47f91bcf75814af20c104b5a9e Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 17 Jan 2013 12:45:15 -0800 Subject: drm/i915: Create a gtt structure The purpose of the gtt structure is to help isolate our gtt specific properties from the rest of the code (in doing so it help us finish the isolation from the AGP connection). The following members are pulled out (and renamed): gtt_start gtt_total gtt_mappable_end gtt_mappable gtt_base_addr gsm The gtt structure will serve as a nice place to put gen specific gtt routines in upcoming patches. As far as what else I feel belongs in this structure: it is meant to encapsulate the GTT's physical properties. This is why I've not added fields which track various drm_mm properties, or things like gtt_mtrr (which is itself a pretty transient field). Reviewed-by: Rodrigo Vivi [Ben modified commit messages] Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 4 ++-- drivers/gpu/drm/i915/i915_dma.c | 20 ++++++++++---------- drivers/gpu/drm/i915/i915_drv.h | 29 +++++++++++++++++++++-------- drivers/gpu/drm/i915/i915_gem.c | 14 +++++++------- drivers/gpu/drm/i915/i915_gem_evict.c | 2 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 24 ++++++++++++------------ drivers/gpu/drm/i915/i915_gem_tiling.c | 2 +- drivers/gpu/drm/i915/i915_irq.c | 4 ++-- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_fb.c | 2 +- drivers/gpu/drm/i915/intel_overlay.c | 4 ++-- 12 files changed, 61 insertions(+), 48 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 35d326d70fab..773b23ecc83b 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -259,8 +259,8 @@ static int i915_gem_object_info(struct seq_file *m, void* data) count, size); seq_printf(m, "%zu [%zu] gtt total\n", - dev_priv->mm.gtt_total, - dev_priv->mm.gtt_mappable_end - dev_priv->mm.gtt_start); + dev_priv->gtt.total, + dev_priv->gtt.mappable_end - dev_priv->gtt.start); mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 442118293883..bb622135798a 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1076,7 +1076,7 @@ static int i915_set_status_page(struct drm_device *dev, void *data, ring->status_page.gfx_addr = hws->addr & (0x1ffff<<12); dev_priv->dri1.gfx_hws_cpu_addr = - ioremap_wc(dev_priv->mm.gtt_base_addr + hws->addr, 4096); + ioremap_wc(dev_priv->gtt.mappable_base + hws->addr, 4096); if (dev_priv->dri1.gfx_hws_cpu_addr == NULL) { i915_dma_cleanup(dev); ring->status_page.gfx_addr = 0; @@ -1543,17 +1543,17 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) } aperture_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT; - dev_priv->mm.gtt_base_addr = dev_priv->mm.gtt->gma_bus_addr; + dev_priv->gtt.mappable_base = dev_priv->mm.gtt->gma_bus_addr; - dev_priv->mm.gtt_mapping = - io_mapping_create_wc(dev_priv->mm.gtt_base_addr, + dev_priv->gtt.mappable = + io_mapping_create_wc(dev_priv->gtt.mappable_base, aperture_size); - if (dev_priv->mm.gtt_mapping == NULL) { + if (dev_priv->gtt.mappable == NULL) { ret = -EIO; goto out_rmmap; } - i915_mtrr_setup(dev_priv, dev_priv->mm.gtt_base_addr, + i915_mtrr_setup(dev_priv, dev_priv->gtt.mappable_base, aperture_size); /* The i915 workqueue is primarily used for batched retirement of @@ -1658,11 +1658,11 @@ out_gem_unload: out_mtrrfree: if (dev_priv->mm.gtt_mtrr >= 0) { mtrr_del(dev_priv->mm.gtt_mtrr, - dev_priv->mm.gtt_base_addr, + dev_priv->gtt.mappable_base, aperture_size); dev_priv->mm.gtt_mtrr = -1; } - io_mapping_free(dev_priv->mm.gtt_mapping); + io_mapping_free(dev_priv->gtt.mappable); out_rmmap: pci_iounmap(dev->pdev, dev_priv->regs); put_gmch: @@ -1696,10 +1696,10 @@ int i915_driver_unload(struct drm_device *dev) /* Cancel the retire work handler, which should be idle now. */ cancel_delayed_work_sync(&dev_priv->mm.retire_work); - io_mapping_free(dev_priv->mm.gtt_mapping); + io_mapping_free(dev_priv->gtt.mappable); if (dev_priv->mm.gtt_mtrr >= 0) { mtrr_del(dev_priv->mm.gtt_mtrr, - dev_priv->mm.gtt_base_addr, + dev_priv->gtt.mappable_base, dev_priv->mm.gtt->gtt_mappable_entries * PAGE_SIZE); dev_priv->mm.gtt_mtrr = -1; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6e2c10b65c8d..f3f2e5e1393f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -364,6 +364,25 @@ struct intel_device_info { u8 has_llc:1; }; +/* The Graphics Translation Table is the way in which GEN hardware translates a + * Graphics Virtual Address into a Physical Address. In addition to the normal + * collateral associated with any va->pa translations GEN hardware also has a + * portion of the GTT which can be mapped by the CPU and remain both coherent + * and correct (in cases like swizzling). That region is referred to as GMADR in + * the spec. + */ +struct i915_gtt { + unsigned long start; /* Start offset of used GTT */ + size_t total; /* Total size GTT can map */ + + unsigned long mappable_end; /* End offset that we can CPU map */ + struct io_mapping *mappable; /* Mapping to our CPU mappable region */ + phys_addr_t mappable_base; /* PA of our GMADR */ + + /** "Graphics Stolen Memory" holds the global PTEs */ + void __iomem *gsm; +}; + #define I915_PPGTT_PD_ENTRIES 512 #define I915_PPGTT_PT_ENTRIES 1024 struct i915_hw_ppgtt { @@ -781,6 +800,8 @@ typedef struct drm_i915_private { /* Register state */ bool modeset_on_lid; + struct i915_gtt gtt; + struct { /** Bridge to intel-gtt-ko */ struct intel_gtt *gtt; @@ -799,15 +820,8 @@ typedef struct drm_i915_private { struct list_head unbound_list; /** Usable portion of the GTT for GEM */ - unsigned long gtt_start; - unsigned long gtt_mappable_end; unsigned long stolen_base; /* limited to low memory (32-bit) */ - /** "Graphics Stolen Memory" holds the global PTEs */ - void __iomem *gsm; - - struct io_mapping *gtt_mapping; - phys_addr_t gtt_base_addr; int gtt_mtrr; /** PPGTT used for aliasing the PPGTT with the GTT */ @@ -885,7 +899,6 @@ typedef struct drm_i915_private { struct drm_i915_gem_phys_object *phys_objs[I915_MAX_PHYS_OBJECT]; /* accounting, useful for userland debugging */ - size_t gtt_total; size_t object_memory; u32 object_count; } mm; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a2bb18914329..51fdf16181a7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -186,7 +186,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, pinned += obj->gtt_space->size; mutex_unlock(&dev->struct_mutex); - args->aper_size = dev_priv->mm.gtt_total; + args->aper_size = dev_priv->gtt.total; args->aper_available_size = args->aper_size - pinned; return 0; @@ -637,7 +637,7 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, * source page isn't available. Return the error and we'll * retry in the slow path. */ - if (fast_user_write(dev_priv->mm.gtt_mapping, page_base, + if (fast_user_write(dev_priv->gtt.mappable, page_base, page_offset, user_data, page_length)) { ret = -EFAULT; goto out_unpin; @@ -1362,7 +1362,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) obj->fault_mappable = true; - pfn = ((dev_priv->mm.gtt_base_addr + obj->gtt_offset) >> PAGE_SHIFT) + + pfn = ((dev_priv->gtt.mappable_base + obj->gtt_offset) >> PAGE_SHIFT) + page_offset; /* Finally, remap it using the new GTT offset */ @@ -1544,7 +1544,7 @@ i915_gem_mmap_gtt(struct drm_file *file, goto unlock; } - if (obj->base.size > dev_priv->mm.gtt_mappable_end) { + if (obj->base.size > dev_priv->gtt.mappable_end) { ret = -E2BIG; goto out; } @@ -2910,7 +2910,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, * before evicting everything in a vain attempt to find space. */ if (obj->base.size > - (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) { + (map_and_fenceable ? dev_priv->gtt.mappable_end : dev_priv->gtt.total)) { DRM_ERROR("Attempting to bind an object larger than the aperture\n"); return -E2BIG; } @@ -2931,7 +2931,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, if (map_and_fenceable) ret = drm_mm_insert_node_in_range_generic(&dev_priv->mm.gtt_space, node, size, alignment, obj->cache_level, - 0, dev_priv->mm.gtt_mappable_end); + 0, dev_priv->gtt.mappable_end); else ret = drm_mm_insert_node_generic(&dev_priv->mm.gtt_space, node, size, alignment, obj->cache_level); @@ -2971,7 +2971,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, (node->start & (fence_alignment - 1)) == 0; mappable = - obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end; + obj->gtt_offset + obj->base.size <= dev_priv->gtt.mappable_end; obj->map_and_fenceable = mappable && fenceable; diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 776a3225184c..c86d5d9356fd 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -80,7 +80,7 @@ i915_gem_evict_something(struct drm_device *dev, int min_size, if (mappable) drm_mm_init_scan_with_range(&dev_priv->mm.gtt_space, min_size, alignment, cache_level, - 0, dev_priv->mm.gtt_mappable_end); + 0, dev_priv->gtt.mappable_end); else drm_mm_init_scan(&dev_priv->mm.gtt_space, min_size, alignment, cache_level); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index f5a11ecf5494..27269103b621 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -281,7 +281,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, /* Map the page containing the relocation we're going to perform. */ reloc->offset += obj->gtt_offset; - reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, + reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable, reloc->offset & PAGE_MASK); reloc_entry = (uint32_t __iomem *) (reloc_page + (reloc->offset & ~PAGE_MASK)); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 8c42ddd467b6..61bfb12e1016 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -290,7 +290,7 @@ void i915_gem_init_ppgtt(struct drm_device *dev) return; - pd_addr = (gtt_pte_t __iomem*)dev_priv->mm.gsm + ppgtt->pd_offset/sizeof(gtt_pte_t); + pd_addr = (gtt_pte_t __iomem*)dev_priv->gtt.gsm + ppgtt->pd_offset/sizeof(gtt_pte_t); for (i = 0; i < ppgtt->num_pd_entries; i++) { dma_addr_t pt_addr; @@ -367,7 +367,7 @@ static void i915_ggtt_clear_range(struct drm_device *dev, { struct drm_i915_private *dev_priv = dev->dev_private; gtt_pte_t scratch_pte; - gtt_pte_t __iomem *gtt_base = (gtt_pte_t __iomem *) dev_priv->mm.gsm + first_entry; + gtt_pte_t __iomem *gtt_base = (gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry; int i; @@ -393,8 +393,8 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) struct drm_i915_gem_object *obj; /* First fill our portion of the GTT with scratch pages */ - i915_ggtt_clear_range(dev, dev_priv->mm.gtt_start / PAGE_SIZE, - dev_priv->mm.gtt_total / PAGE_SIZE); + i915_ggtt_clear_range(dev, dev_priv->gtt.start / PAGE_SIZE, + dev_priv->gtt.total / PAGE_SIZE); list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) { i915_gem_clflush_object(obj); @@ -433,7 +433,7 @@ static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj, const int first_entry = obj->gtt_space->start >> PAGE_SHIFT; const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry; gtt_pte_t __iomem *gtt_entries = - (gtt_pte_t __iomem *)dev_priv->mm.gsm + first_entry; + (gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; int unused, i = 0; unsigned int len, m = 0; dma_addr_t addr; @@ -556,9 +556,9 @@ void i915_gem_setup_global_gtt(struct drm_device *dev, obj->has_global_gtt_mapping = 1; } - dev_priv->mm.gtt_start = start; - dev_priv->mm.gtt_mappable_end = mappable_end; - dev_priv->mm.gtt_total = end - start; + dev_priv->gtt.start = start; + dev_priv->gtt.mappable_end = mappable_end; + dev_priv->gtt.total = end - start; /* Clear any non-preallocated blocks */ drm_mm_for_each_hole(entry, &dev_priv->mm.gtt_space, @@ -752,9 +752,9 @@ int i915_gem_gtt_init(struct drm_device *dev) goto err_out; } - dev_priv->mm.gsm = ioremap_wc(gtt_bus_addr, - dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t)); - if (!dev_priv->mm.gsm) { + dev_priv->gtt.gsm = ioremap_wc(gtt_bus_addr, + dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t)); + if (!dev_priv->gtt.gsm) { DRM_ERROR("Failed to map the gtt page table\n"); teardown_scratch_page(dev); ret = -ENOMEM; @@ -778,7 +778,7 @@ err_out: void i915_gem_gtt_fini(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - iounmap(dev_priv->mm.gsm); + iounmap(dev_priv->gtt.gsm); teardown_scratch_page(dev); if (INTEL_INFO(dev)->gen < 6) intel_gmch_remove(); diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index e76f0d8470a1..abcba2f5a788 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -357,7 +357,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, obj->map_and_fenceable = obj->gtt_space == NULL || - (obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end && + (obj->gtt_offset + obj->base.size <= dev_priv->gtt.mappable_end && i915_gem_object_fence_ok(obj, args->tiling_mode)); /* Rebind if we need a change of alignment */ diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 202813128441..cc49a6ddc052 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -939,7 +939,7 @@ i915_error_object_create(struct drm_i915_private *dev_priv, goto unwind; local_irq_save(flags); - if (reloc_offset < dev_priv->mm.gtt_mappable_end && + if (reloc_offset < dev_priv->gtt.mappable_end && src->has_global_gtt_mapping) { void __iomem *s; @@ -948,7 +948,7 @@ i915_error_object_create(struct drm_i915_private *dev_priv, * captures what the GPU read. */ - s = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, + s = io_mapping_map_atomic_wc(dev_priv->gtt.mappable, reloc_offset); memcpy_fromio(d, s, PAGE_SIZE); io_mapping_unmap_atomic(s); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 40b6b5e9d6ef..e4c5067a54d3 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8687,7 +8687,7 @@ void intel_modeset_init(struct drm_device *dev) dev->mode_config.max_width = 8192; dev->mode_config.max_height = 8192; } - dev->mode_config.fb_base = dev_priv->mm.gtt_base_addr; + dev->mode_config.fb_base = dev_priv->gtt.mappable_base; DRM_DEBUG_KMS("%d display pipe%s available.\n", dev_priv->num_pipe, dev_priv->num_pipe > 1 ? "s" : ""); diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c index 71d55801c0d9..ce02af8ca96e 100644 --- a/drivers/gpu/drm/i915/intel_fb.c +++ b/drivers/gpu/drm/i915/intel_fb.c @@ -142,7 +142,7 @@ static int intelfb_create(struct intel_fbdev *ifbdev, info->fix.smem_len = size; info->screen_base = - ioremap_wc(dev_priv->mm.gtt_base_addr + obj->gtt_offset, + ioremap_wc(dev_priv->gtt.mappable_base + obj->gtt_offset, size); if (!info->screen_base) { ret = -ENOSPC; diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index fabe0acf808d..ba978bf93a2e 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -195,7 +195,7 @@ intel_overlay_map_regs(struct intel_overlay *overlay) if (OVERLAY_NEEDS_PHYSICAL(overlay->dev)) regs = (struct overlay_registers __iomem *)overlay->reg_bo->phys_obj->handle->vaddr; else - regs = io_mapping_map_wc(dev_priv->mm.gtt_mapping, + regs = io_mapping_map_wc(dev_priv->gtt.mappable, overlay->reg_bo->gtt_offset); return regs; @@ -1434,7 +1434,7 @@ intel_overlay_map_regs_atomic(struct intel_overlay *overlay) regs = (struct overlay_registers __iomem *) overlay->reg_bo->phys_obj->handle->vaddr; else - regs = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, + regs = io_mapping_map_atomic_wc(dev_priv->gtt.mappable, overlay->reg_bo->gtt_offset); return regs; -- cgit v1.2.3-59-g8ed1b From 93d187993b783c68383a884091a600d9ad499ea6 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 17 Jan 2013 12:45:17 -0800 Subject: drm/i915: Remove use of gtt_mappable_entries Mappable_end, ie. size is almost always what you want as opposed to the number of entries. Since we already have that information, we can scrap the number of entries and only calculate it when needed. If gtt_start is !0, this will have slightly different behavior. This difference can only occur in DRI1, and exists when we try to kick out the firmware fb. The new code seems like a bugfix to me. The other case where we've changed the behavior is during init we check the mappable region against our current known upper and lower limits (64MB, and 512MB). This now matches the comment, and makes things more convenient after removing gtt_mappable_entries. Also worth noting is the setting of mappable_end is taken out of setup because we do it earlier now in the DRI2 case and therefore need to add that tiny hunk to support the DRI1 IOCTL. v2: Move up mappable end to before legacy AGP init v3: Add the dev_priv inclusion here from previous rebase error in patch 5 Reviewed-by: Rodrigo Vivi (v2) Signed-off-by: Ben Widawsky [danvet: squash in fix for a printk format flag mismatch warning.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_dma.c | 8 ++++---- drivers/gpu/drm/i915/i915_gem.c | 2 ++ drivers/gpu/drm/i915/i915_gem_gtt.c | 15 +++++++-------- drivers/gpu/drm/i915/intel_fb.c | 3 +-- 5 files changed, 15 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 773b23ecc83b..90a6fc506dd5 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -258,7 +258,7 @@ static int i915_gem_object_info(struct seq_file *m, void* data) seq_printf(m, "%u fault mappable objects, %zu bytes\n", count, size); - seq_printf(m, "%zu [%zu] gtt total\n", + seq_printf(m, "%zu [%lu] gtt total\n", dev_priv->gtt.total, dev_priv->gtt.mappable_end - dev_priv->gtt.start); diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 468d2a0fc378..3f70178c63ca 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1427,8 +1427,8 @@ static void i915_kick_out_firmware_fb(struct drm_i915_private *dev_priv) return; ap->ranges[0].base = dev_priv->gtt.mappable_base; - ap->ranges[0].size = - dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT; + ap->ranges[0].size = dev_priv->gtt.mappable_end - dev_priv->gtt.start; + primary = pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW; @@ -1542,7 +1542,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) goto put_gmch; } - aperture_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT; + aperture_size = dev_priv->gtt.mappable_end; dev_priv->gtt.mappable = io_mapping_create_wc(dev_priv->gtt.mappable_base, @@ -1699,7 +1699,7 @@ int i915_driver_unload(struct drm_device *dev) if (dev_priv->mm.gtt_mtrr >= 0) { mtrr_del(dev_priv->mm.gtt_mtrr, dev_priv->gtt.mappable_base, - dev_priv->mm.gtt->gtt_mappable_entries * PAGE_SIZE); + dev_priv->gtt.mappable_end); dev_priv->mm.gtt_mtrr = -1; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 51fdf16181a7..e4132ffb7609 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -149,6 +149,7 @@ int i915_gem_init_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_init *args = data; if (drm_core_check_feature(dev, DRIVER_MODESET)) @@ -165,6 +166,7 @@ i915_gem_init_ioctl(struct drm_device *dev, void *data, mutex_lock(&dev->struct_mutex); i915_gem_setup_global_gtt(dev, args->gtt_start, args->gtt_end, args->gtt_end); + dev_priv->gtt.mappable_end = args->gtt_end; mutex_unlock(&dev->struct_mutex); return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 0b8930577953..0f0db02c1e56 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -557,7 +557,6 @@ void i915_gem_setup_global_gtt(struct drm_device *dev, } dev_priv->gtt.start = start; - dev_priv->gtt.mappable_end = mappable_end; dev_priv->gtt.total = end - start; /* Clear any non-preallocated blocks */ @@ -596,7 +595,7 @@ void i915_gem_init_global_gtt(struct drm_device *dev) int ret; gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT; - mappable_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT; + mappable_size = dev_priv->gtt.mappable_end; if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) { /* PPGTT pdes are stolen from global gtt ptes, so shrink the @@ -692,6 +691,7 @@ int i915_gem_gtt_init(struct drm_device *dev) int ret; dev_priv->gtt.mappable_base = pci_resource_start(dev->pdev, 2); + dev_priv->gtt.mappable_end = pci_resource_len(dev->pdev, 2); /* On modern platforms we need not worry ourself with the legacy * hostbridge query stuff. Skip it entirely @@ -735,14 +735,13 @@ int i915_gem_gtt_init(struct drm_device *dev) else dev_priv->mm.gtt->stolen_size = gen7_get_stolen_size(snb_gmch_ctl); - dev_priv->mm.gtt->gtt_mappable_entries = pci_resource_len(dev->pdev, 2) >> PAGE_SHIFT; /* 64/512MB is the current min/max we actually know of, but this is just a * coarse sanity check. */ - if ((dev_priv->mm.gtt->gtt_mappable_entries >> 8) < 64 || - dev_priv->mm.gtt->gtt_mappable_entries > dev_priv->mm.gtt->gtt_total_entries) { - DRM_ERROR("Unknown GMADR entries (%d)\n", - dev_priv->mm.gtt->gtt_mappable_entries); + if ((dev_priv->gtt.mappable_end < (64<<20) || + (dev_priv->gtt.mappable_end > (512<<20)))) { + DRM_ERROR("Unknown GMADR size (%lx)\n", + dev_priv->gtt.mappable_end); ret = -ENXIO; goto err_out; } @@ -764,7 +763,7 @@ int i915_gem_gtt_init(struct drm_device *dev) /* GMADR is the PCI aperture used by SW to access tiled GFX surfaces in a linear fashion. */ DRM_INFO("Memory usable by graphics device = %dM\n", dev_priv->mm.gtt->gtt_total_entries >> 8); - DRM_DEBUG_DRIVER("GMADR size = %dM\n", dev_priv->mm.gtt->gtt_mappable_entries >> 8); + DRM_DEBUG_DRIVER("GMADR size = %ldM\n", dev_priv->gtt.mappable_end >> 20); DRM_DEBUG_DRIVER("GTT stolen size = %dM\n", dev_priv->mm.gtt->stolen_size >> 20); return 0; diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c index ce02af8ca96e..ce5f54498426 100644 --- a/drivers/gpu/drm/i915/intel_fb.c +++ b/drivers/gpu/drm/i915/intel_fb.c @@ -135,8 +135,7 @@ static int intelfb_create(struct intel_fbdev *ifbdev, goto out_unpin; } info->apertures->ranges[0].base = dev->mode_config.fb_base; - info->apertures->ranges[0].size = - dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT; + info->apertures->ranges[0].size = dev_priv->gtt.mappable_end; info->fix.smem_start = dev->mode_config.fb_base + obj->gtt_offset; info->fix.smem_len = size; -- cgit v1.2.3-59-g8ed1b From 99584db33ba4f864777e2cfef5329ed1bf13f714 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 14 Nov 2012 17:14:04 +0100 Subject: drm/i915: extract hangcheck/reset/error_state state into substruct This has been sprinkled all over the place in dev_priv. I think it'd be good to also move all the code into a separate file like i915_gem_error.c, but that's for another patch. Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 10 +++--- drivers/gpu/drm/i915/i915_dma.c | 6 ++-- drivers/gpu/drm/i915/i915_drv.c | 8 ++--- drivers/gpu/drm/i915/i915_drv.h | 39 +++++++++++++--------- drivers/gpu/drm/i915/i915_gem.c | 10 +++--- drivers/gpu/drm/i915/i915_irq.c | 59 ++++++++++++++++++--------------- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 7 files changed, 73 insertions(+), 61 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 90a6fc506dd5..3b1bf4e70d94 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -814,11 +814,11 @@ static int i915_error_state_open(struct inode *inode, struct file *file) error_priv->dev = dev; - spin_lock_irqsave(&dev_priv->error_lock, flags); - error_priv->error = dev_priv->first_error; + spin_lock_irqsave(&dev_priv->gpu_error.lock, flags); + error_priv->error = dev_priv->gpu_error.first_error; if (error_priv->error) kref_get(&error_priv->error->ref); - spin_unlock_irqrestore(&dev_priv->error_lock, flags); + spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags); return single_open(file, i915_error_state, error_priv); } @@ -1727,7 +1727,7 @@ i915_ring_stop_read(struct file *filp, int len; len = snprintf(buf, sizeof(buf), - "0x%08x\n", dev_priv->stop_rings); + "0x%08x\n", dev_priv->gpu_error.stop_rings); if (len > sizeof(buf)) len = sizeof(buf); @@ -1763,7 +1763,7 @@ i915_ring_stop_write(struct file *filp, if (ret) return ret; - dev_priv->stop_rings = val; + dev_priv->gpu_error.stop_rings = val; mutex_unlock(&dev->struct_mutex); return cnt; diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 3f70178c63ca..11c7aa80e29b 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1605,7 +1605,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) pci_enable_msi(dev->pdev); spin_lock_init(&dev_priv->irq_lock); - spin_lock_init(&dev_priv->error_lock); + spin_lock_init(&dev_priv->gpu_error.lock); spin_lock_init(&dev_priv->rps.lock); mutex_init(&dev_priv->dpio_lock); @@ -1725,8 +1725,8 @@ int i915_driver_unload(struct drm_device *dev) } /* Free error state after interrupts are fully disabled. */ - del_timer_sync(&dev_priv->hangcheck_timer); - cancel_work_sync(&dev_priv->error_work); + del_timer_sync(&dev_priv->gpu_error.hangcheck_timer); + cancel_work_sync(&dev_priv->gpu_error.work); i915_destroy_error_state(dev); if (dev->pdev->msi_enabled) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c8cbc32fe8db..3ff8e73f4341 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -779,9 +779,9 @@ int intel_gpu_reset(struct drm_device *dev) } /* Also reset the gpu hangman. */ - if (dev_priv->stop_rings) { + if (dev_priv->gpu_error.stop_rings) { DRM_DEBUG("Simulated gpu hang, resetting stop_rings\n"); - dev_priv->stop_rings = 0; + dev_priv->gpu_error.stop_rings = 0; if (ret == -ENODEV) { DRM_ERROR("Reset not implemented, but ignoring " "error for simulated gpu hangs\n"); @@ -820,12 +820,12 @@ int i915_reset(struct drm_device *dev) i915_gem_reset(dev); ret = -ENODEV; - if (get_seconds() - dev_priv->last_gpu_reset < 5) + if (get_seconds() - dev_priv->gpu_error.last_reset < 5) DRM_ERROR("GPU hanging too fast, declaring wedged!\n"); else ret = intel_gpu_reset(dev); - dev_priv->last_gpu_reset = get_seconds(); + dev_priv->gpu_error.last_reset = get_seconds(); if (ret) { DRM_ERROR("Failed to reset chip.\n"); mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ea3226852ea8..dfe0e747c4f7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -766,6 +766,28 @@ struct i915_gem_mm { u32 object_count; }; +struct i915_gpu_error { + /* For hangcheck timer */ +#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ +#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD) + struct timer_list hangcheck_timer; + int hangcheck_count; + uint32_t last_acthd[I915_NUM_RINGS]; + uint32_t prev_instdone[I915_NUM_INSTDONE_REG]; + + /* For reset and error_state handling. */ + spinlock_t lock; + /* Protected by the above dev->gpu_error.lock. */ + struct drm_i915_error_state *first_error; + struct work_struct work; + struct completion completion; + + unsigned long last_reset; + + /* For gpu hang simulation. */ + unsigned int stop_rings; +}; + typedef struct drm_i915_private { struct drm_device *dev; struct kmem_cache *slab; @@ -829,16 +851,6 @@ typedef struct drm_i915_private { int num_pipe; int num_pch_pll; - /* For hangcheck timer */ -#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ -#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD) - struct timer_list hangcheck_timer; - int hangcheck_count; - uint32_t last_acthd[I915_NUM_RINGS]; - uint32_t prev_instdone[I915_NUM_INSTDONE_REG]; - - unsigned int stop_rings; - unsigned long cfb_size; unsigned int cfb_fb; enum plane cfb_plane; @@ -886,11 +898,6 @@ typedef struct drm_i915_private { unsigned int fsb_freq, mem_freq, is_ddr3; - spinlock_t error_lock; - /* Protected by dev->error_lock. */ - struct drm_i915_error_state *first_error; - struct work_struct error_work; - struct completion error_completion; struct workqueue_struct *wq; /* Display functions */ @@ -949,7 +956,7 @@ typedef struct drm_i915_private { struct drm_mm_node *compressed_fb; struct drm_mm_node *compressed_llb; - unsigned long last_gpu_reset; + struct i915_gpu_error gpu_error; /* list of fbdev register on this device */ struct intel_fbdev *fbdev; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e4132ffb7609..95e022e7a26e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -90,7 +90,7 @@ static int i915_gem_wait_for_error(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - struct completion *x = &dev_priv->error_completion; + struct completion *x = &dev_priv->gpu_error.completion; unsigned long flags; int ret; @@ -943,7 +943,7 @@ i915_gem_check_wedge(struct drm_i915_private *dev_priv, bool interruptible) { if (atomic_read(&dev_priv->mm.wedged)) { - struct completion *x = &dev_priv->error_completion; + struct completion *x = &dev_priv->gpu_error.completion; bool recovery_complete; unsigned long flags; @@ -2045,7 +2045,7 @@ i915_add_request(struct intel_ring_buffer *ring, if (!dev_priv->mm.suspended) { if (i915_enable_hangcheck) { - mod_timer(&dev_priv->hangcheck_timer, + mod_timer(&dev_priv->gpu_error.hangcheck_timer, round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES)); } if (was_empty) { @@ -3803,7 +3803,7 @@ i915_gem_idle(struct drm_device *dev) * And not confound mm.suspended! */ dev_priv->mm.suspended = 1; - del_timer_sync(&dev_priv->hangcheck_timer); + del_timer_sync(&dev_priv->gpu_error.hangcheck_timer); i915_kernel_lost_context(dev); i915_gem_cleanup_ringbuffer(dev); @@ -4064,7 +4064,7 @@ i915_gem_load(struct drm_device *dev) INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); INIT_DELAYED_WORK(&dev_priv->mm.retire_work, i915_gem_retire_work_handler); - init_completion(&dev_priv->error_completion); + init_completion(&dev_priv->gpu_error.completion); /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ if (IS_GEN3(dev)) { diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index cc49a6ddc052..c768ebdf8a27 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -356,8 +356,8 @@ static void notify_ring(struct drm_device *dev, wake_up_all(&ring->irq_queue); if (i915_enable_hangcheck) { - dev_priv->hangcheck_count = 0; - mod_timer(&dev_priv->hangcheck_timer, + dev_priv->gpu_error.hangcheck_count = 0; + mod_timer(&dev_priv->gpu_error.hangcheck_timer, round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES)); } } @@ -863,7 +863,7 @@ done: static void i915_error_work_func(struct work_struct *work) { drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t, - error_work); + gpu_error.work); struct drm_device *dev = dev_priv->dev; char *error_event[] = { "ERROR=1", NULL }; char *reset_event[] = { "RESET=1", NULL }; @@ -878,7 +878,7 @@ static void i915_error_work_func(struct work_struct *work) atomic_set(&dev_priv->mm.wedged, 0); kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_done_event); } - complete_all(&dev_priv->error_completion); + complete_all(&dev_priv->gpu_error.completion); } } @@ -1255,9 +1255,9 @@ static void i915_capture_error_state(struct drm_device *dev) unsigned long flags; int i, pipe; - spin_lock_irqsave(&dev_priv->error_lock, flags); - error = dev_priv->first_error; - spin_unlock_irqrestore(&dev_priv->error_lock, flags); + spin_lock_irqsave(&dev_priv->gpu_error.lock, flags); + error = dev_priv->gpu_error.first_error; + spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags); if (error) return; @@ -1341,12 +1341,12 @@ static void i915_capture_error_state(struct drm_device *dev) error->overlay = intel_overlay_capture_error_state(dev); error->display = intel_display_capture_error_state(dev); - spin_lock_irqsave(&dev_priv->error_lock, flags); - if (dev_priv->first_error == NULL) { - dev_priv->first_error = error; + spin_lock_irqsave(&dev_priv->gpu_error.lock, flags); + if (dev_priv->gpu_error.first_error == NULL) { + dev_priv->gpu_error.first_error = error; error = NULL; } - spin_unlock_irqrestore(&dev_priv->error_lock, flags); + spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags); if (error) i915_error_state_free(&error->ref); @@ -1358,10 +1358,10 @@ void i915_destroy_error_state(struct drm_device *dev) struct drm_i915_error_state *error; unsigned long flags; - spin_lock_irqsave(&dev_priv->error_lock, flags); - error = dev_priv->first_error; - dev_priv->first_error = NULL; - spin_unlock_irqrestore(&dev_priv->error_lock, flags); + spin_lock_irqsave(&dev_priv->gpu_error.lock, flags); + error = dev_priv->gpu_error.first_error; + dev_priv->gpu_error.first_error = NULL; + spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags); if (error) kref_put(&error->ref, i915_error_state_free); @@ -1482,7 +1482,7 @@ void i915_handle_error(struct drm_device *dev, bool wedged) i915_report_and_clear_eir(dev); if (wedged) { - INIT_COMPLETION(dev_priv->error_completion); + INIT_COMPLETION(dev_priv->gpu_error.completion); atomic_set(&dev_priv->mm.wedged, 1); /* @@ -1492,7 +1492,7 @@ void i915_handle_error(struct drm_device *dev, bool wedged) wake_up_all(&ring->irq_queue); } - queue_work(dev_priv->wq, &dev_priv->error_work); + queue_work(dev_priv->wq, &dev_priv->gpu_error.work); } static void i915_pageflip_stall_check(struct drm_device *dev, int pipe) @@ -1723,7 +1723,7 @@ static bool i915_hangcheck_hung(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; - if (dev_priv->hangcheck_count++ > 1) { + if (dev_priv->gpu_error.hangcheck_count++ > 1) { bool hung = true; DRM_ERROR("Hangcheck timer elapsed... GPU hung\n"); @@ -1782,25 +1782,29 @@ void i915_hangcheck_elapsed(unsigned long data) goto repeat; } - dev_priv->hangcheck_count = 0; + dev_priv->gpu_error.hangcheck_count = 0; return; } i915_get_extra_instdone(dev, instdone); - if (memcmp(dev_priv->last_acthd, acthd, sizeof(acthd)) == 0 && - memcmp(dev_priv->prev_instdone, instdone, sizeof(instdone)) == 0) { + if (memcmp(dev_priv->gpu_error.last_acthd, acthd, + sizeof(acthd)) == 0 && + memcmp(dev_priv->gpu_error.prev_instdone, instdone, + sizeof(instdone)) == 0) { if (i915_hangcheck_hung(dev)) return; } else { - dev_priv->hangcheck_count = 0; + dev_priv->gpu_error.hangcheck_count = 0; - memcpy(dev_priv->last_acthd, acthd, sizeof(acthd)); - memcpy(dev_priv->prev_instdone, instdone, sizeof(instdone)); + memcpy(dev_priv->gpu_error.last_acthd, acthd, + sizeof(acthd)); + memcpy(dev_priv->gpu_error.prev_instdone, instdone, + sizeof(instdone)); } repeat: /* Reset timer case chip hangs without another request being added */ - mod_timer(&dev_priv->hangcheck_timer, + mod_timer(&dev_priv->gpu_error.hangcheck_timer, round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES)); } @@ -2769,11 +2773,12 @@ void intel_irq_init(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; INIT_WORK(&dev_priv->hotplug_work, i915_hotplug_work_func); - INIT_WORK(&dev_priv->error_work, i915_error_work_func); + INIT_WORK(&dev_priv->gpu_error.work, i915_error_work_func); INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work); INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work); - setup_timer(&dev_priv->hangcheck_timer, i915_hangcheck_elapsed, + setup_timer(&dev_priv->gpu_error.hangcheck_timer, + i915_hangcheck_elapsed, (unsigned long) dev); pm_qos_add_request(&dev_priv->pm_qos, PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ce1d07487402..d6b06aa4c05c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1491,7 +1491,7 @@ void intel_ring_advance(struct intel_ring_buffer *ring) struct drm_i915_private *dev_priv = ring->dev->dev_private; ring->tail &= ring->size - 1; - if (dev_priv->stop_rings & intel_ring_flag(ring)) + if (dev_priv->gpu_error.stop_rings & intel_ring_flag(ring)) return; ring->write_tail(ring, ring->tail); } -- cgit v1.2.3-59-g8ed1b From 33196deddacc7790defb9a7e84659e0362d4da7a Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 14 Nov 2012 17:14:05 +0100 Subject: drm/i915: move wedged to the other gpu error handling stuff And to make Ben Widawsky happier, use the gpu_error instead of the entire device as the argument in some functions. Drop the outdated comment on ->wedged for now, a follow-up patch will change the semantics and add a proper comment again. Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 13 +++---------- drivers/gpu/drm/i915/i915_gem.c | 34 ++++++++++++++++----------------- drivers/gpu/drm/i915/i915_irq.c | 6 +++--- drivers/gpu/drm/i915/intel_display.c | 4 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 6 ++++-- 6 files changed, 30 insertions(+), 35 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 3b1bf4e70d94..e1b7eaf60d24 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1672,7 +1672,7 @@ i915_wedged_read(struct file *filp, len = snprintf(buf, sizeof(buf), "wedged : %d\n", - atomic_read(&dev_priv->mm.wedged)); + atomic_read(&dev_priv->gpu_error.wedged)); if (len > sizeof(buf)) len = sizeof(buf); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index dfe0e747c4f7..62da6c7a4884 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -744,15 +744,6 @@ struct i915_gem_mm { */ int suspended; - /** - * Flag if the hardware appears to be wedged. - * - * This is set when attempts to idle the device timeout. - * It prevents command submission from occurring and makes - * every pending request fail - */ - atomic_t wedged; - /** Bit 6 swizzling required for X tiling */ uint32_t bit_6_swizzle_x; /** Bit 6 swizzling required for Y tiling */ @@ -784,6 +775,8 @@ struct i915_gpu_error { unsigned long last_reset; + atomic_t wedged; + /* For gpu hang simulation. */ unsigned int stop_rings; }; @@ -1548,7 +1541,7 @@ i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj) void i915_gem_retire_requests(struct drm_device *dev); void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring); -int __must_check i915_gem_check_wedge(struct drm_i915_private *dev_priv, +int __must_check i915_gem_check_wedge(struct i915_gpu_error *error, bool interruptible); void i915_gem_reset(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 95e022e7a26e..04b2f92eb456 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -87,14 +87,13 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, } static int -i915_gem_wait_for_error(struct drm_device *dev) +i915_gem_wait_for_error(struct i915_gpu_error *error) { - struct drm_i915_private *dev_priv = dev->dev_private; - struct completion *x = &dev_priv->gpu_error.completion; + struct completion *x = &error->completion; unsigned long flags; int ret; - if (!atomic_read(&dev_priv->mm.wedged)) + if (!atomic_read(&error->wedged)) return 0; /* @@ -110,7 +109,7 @@ i915_gem_wait_for_error(struct drm_device *dev) return ret; } - if (atomic_read(&dev_priv->mm.wedged)) { + if (atomic_read(&error->wedged)) { /* GPU is hung, bump the completion count to account for * the token we just consumed so that we never hit zero and * end up waiting upon a subsequent completion event that @@ -125,9 +124,10 @@ i915_gem_wait_for_error(struct drm_device *dev) int i915_mutex_lock_interruptible(struct drm_device *dev) { + struct drm_i915_private *dev_priv = dev->dev_private; int ret; - ret = i915_gem_wait_for_error(dev); + ret = i915_gem_wait_for_error(&dev_priv->gpu_error); if (ret) return ret; @@ -939,11 +939,11 @@ unlock: } int -i915_gem_check_wedge(struct drm_i915_private *dev_priv, +i915_gem_check_wedge(struct i915_gpu_error *error, bool interruptible) { - if (atomic_read(&dev_priv->mm.wedged)) { - struct completion *x = &dev_priv->gpu_error.completion; + if (atomic_read(&error->wedged)) { + struct completion *x = &error->completion; bool recovery_complete; unsigned long flags; @@ -1025,7 +1025,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, #define EXIT_COND \ (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \ - atomic_read(&dev_priv->mm.wedged)) + atomic_read(&dev_priv->gpu_error.wedged)) do { if (interruptible) end = wait_event_interruptible_timeout(ring->irq_queue, @@ -1035,7 +1035,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, end = wait_event_timeout(ring->irq_queue, EXIT_COND, timeout_jiffies); - ret = i915_gem_check_wedge(dev_priv, interruptible); + ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); if (ret) end = ret; } while (end == 0 && wait_forever); @@ -1081,7 +1081,7 @@ i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno) BUG_ON(!mutex_is_locked(&dev->struct_mutex)); BUG_ON(seqno == 0); - ret = i915_gem_check_wedge(dev_priv, interruptible); + ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); if (ret) return ret; @@ -1146,7 +1146,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, if (seqno == 0) return 0; - ret = i915_gem_check_wedge(dev_priv, true); + ret = i915_gem_check_wedge(&dev_priv->gpu_error, true); if (ret) return ret; @@ -1379,7 +1379,7 @@ out: /* If this -EIO is due to a gpu hang, give the reset code a * chance to clean up the mess. Otherwise return the proper * SIGBUS. */ - if (!atomic_read(&dev_priv->mm.wedged)) + if (!atomic_read(&dev_priv->gpu_error.wedged)) return VM_FAULT_SIGBUS; case -EAGAIN: /* Give the error handler a chance to run and move the @@ -3390,7 +3390,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) u32 seqno = 0; int ret; - if (atomic_read(&dev_priv->mm.wedged)) + if (atomic_read(&dev_priv->gpu_error.wedged)) return -EIO; spin_lock(&file_priv->mm.lock); @@ -3978,9 +3978,9 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data, if (drm_core_check_feature(dev, DRIVER_MODESET)) return 0; - if (atomic_read(&dev_priv->mm.wedged)) { + if (atomic_read(&dev_priv->gpu_error.wedged)) { DRM_ERROR("Reenabling wedged hardware, good luck\n"); - atomic_set(&dev_priv->mm.wedged, 0); + atomic_set(&dev_priv->gpu_error.wedged, 0); } mutex_lock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index c768ebdf8a27..f2c0016fa533 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -871,11 +871,11 @@ static void i915_error_work_func(struct work_struct *work) kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, error_event); - if (atomic_read(&dev_priv->mm.wedged)) { + if (atomic_read(&dev_priv->gpu_error.wedged)) { DRM_DEBUG_DRIVER("resetting chip\n"); kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_event); if (!i915_reset(dev)) { - atomic_set(&dev_priv->mm.wedged, 0); + atomic_set(&dev_priv->gpu_error.wedged, 0); kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_done_event); } complete_all(&dev_priv->gpu_error.completion); @@ -1483,7 +1483,7 @@ void i915_handle_error(struct drm_device *dev, bool wedged) if (wedged) { INIT_COMPLETION(dev_priv->gpu_error.completion); - atomic_set(&dev_priv->mm.wedged, 1); + atomic_set(&dev_priv->gpu_error.wedged, 1); /* * Wakeup waiting processes so they don't hang diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b35902e5d925..160aa5f76118 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2223,7 +2223,7 @@ intel_finish_fb(struct drm_framebuffer *old_fb) WARN_ON(waitqueue_active(&dev_priv->pending_flip_queue)); wait_event(dev_priv->pending_flip_queue, - atomic_read(&dev_priv->mm.wedged) || + atomic_read(&dev_priv->gpu_error.wedged) || atomic_read(&obj->pending_flip) == 0); /* Big Hammer, we also need to ensure that any pending @@ -2871,7 +2871,7 @@ static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc) unsigned long flags; bool pending; - if (atomic_read(&dev_priv->mm.wedged)) + if (atomic_read(&dev_priv->gpu_error.wedged)) return false; spin_lock_irqsave(&dev->event_lock, flags); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index d6b06aa4c05c..9438bcd50678 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1371,7 +1371,8 @@ static int ring_wait_for_space(struct intel_ring_buffer *ring, int n) msleep(1); - ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible); + ret = i915_gem_check_wedge(&dev_priv->gpu_error, + dev_priv->mm.interruptible); if (ret) return ret; } while (!time_after(jiffies, end)); @@ -1460,7 +1461,8 @@ int intel_ring_begin(struct intel_ring_buffer *ring, drm_i915_private_t *dev_priv = ring->dev->dev_private; int ret; - ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible); + ret = i915_gem_check_wedge(&dev_priv->gpu_error, + dev_priv->mm.interruptible); if (ret) return ret; -- cgit v1.2.3-59-g8ed1b From 308887aad14c4ecc3fc10a3c58ec42641c5e4423 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 14 Nov 2012 17:14:06 +0100 Subject: drm/i915: fix reset handling in the throttle ioctl While auditing the code I've noticed one place (the throttle ioctl) which does not yet wait for the reset handler to complete and doesn't properly decode the wedge state into -EAGAIN/-EIO. Fix this up by calling the right helpers. This might explain the oddball "my compositor just died in a successfull gpu reset" reports. Or maybe not, since current mesa doesn't use this ioctl to throttle command submission. The throttle ioctl doesn't take the struct_mutex, so to avoid busy-looping with -EAGAIN while a reset is in process, check for errors first and wait for the handler to complete if a reset is pending by calling i915_gem_wait_for_error. Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 04b2f92eb456..c96a501b8205 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3390,8 +3390,13 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) u32 seqno = 0; int ret; - if (atomic_read(&dev_priv->gpu_error.wedged)) - return -EIO; + ret = i915_gem_wait_for_error(&dev_priv->gpu_error); + if (ret) + return ret; + + ret = i915_gem_check_wedge(&dev_priv->gpu_error, false); + if (ret) + return ret; spin_lock(&file_priv->mm.lock); list_for_each_entry(request, &file_priv->mm.request_list, client_list) { -- cgit v1.2.3-59-g8ed1b From 1f83fee08d625f8d0130f9fe5ef7b17c2e022f3c Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 15 Nov 2012 17:17:22 +0100 Subject: drm/i915: clear up wedged transitions We have two important transitions of the wedged state in the current code: - 0 -> 1: This means a hang has been detected, and signals to everyone that they please get of any locks, so that the reset work item can do its job. - 1 -> 0: The reset handler has completed. Now the last transition mixes up two states: "Reset completed and successful" and "Reset failed". To distinguish these two we do some tricks with the reset completion, but I simply could not convince myself that this doesn't race under odd circumstances. Hence split this up, and add a new terminal state indicating that the hw is gone for good. Also add explicit #defines for both states, update comments. v2: Split out the reset handling bugfix for the throttle ioctl. v3: s/tmp/wedged/ sugested by Chris Wilson. Also fixup up a rebase error which prevented this patch from actually compiling. v4: To unify the wedged state with the reset counter, keep the reset-in-progress state just as a flag. The terminally-wedged state is now denoted with a big number. v5: Add a comment to the reset_counter special values explaining that WEDGED & RESET_IN_PROGRESS needs to be true for the code to be correct. v6: Fixup logic errors introduced with the wedged+reset_counter unification. Since WEDGED implies reset-in-progress (in a way we're terminally stuck in the dead-but-reset-not-completed state), we need ensure that we check for this everywhere. The specific bug was in wait_for_error, which would simply have timed out. v7: Extract an inline i915_reset_in_progress helper to make the code more readable. Also annote the reset-in-progress case with an unlikely, to help the compiler optimize the fastpath. Do the same for the terminally wedged case with i915_terminally_wedged. Reviewed-by: Damien Lespiau Signed-Off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 40 +++++++++++++++++++++++++++-- drivers/gpu/drm/i915/i915_gem.c | 49 +++++++++++++----------------------- drivers/gpu/drm/i915/i915_irq.c | 23 +++++++++++------ drivers/gpu/drm/i915/intel_display.c | 4 +-- 5 files changed, 74 insertions(+), 44 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e1b7eaf60d24..384f19368a1d 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1672,7 +1672,7 @@ i915_wedged_read(struct file *filp, len = snprintf(buf, sizeof(buf), "wedged : %d\n", - atomic_read(&dev_priv->gpu_error.wedged)); + atomic_read(&dev_priv->gpu_error.reset_counter)); if (len > sizeof(buf)) len = sizeof(buf); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 62da6c7a4884..c84743bb6937 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -771,11 +771,37 @@ struct i915_gpu_error { /* Protected by the above dev->gpu_error.lock. */ struct drm_i915_error_state *first_error; struct work_struct work; - struct completion completion; unsigned long last_reset; - atomic_t wedged; + /** + * State variable controlling the reset flow + * + * Upper bits are for the reset counter. + * + * Lowest bit controls the reset state machine: Set means a reset is in + * progress. This state will (presuming we don't have any bugs) decay + * into either unset (successful reset) or the special WEDGED value (hw + * terminally sour). All waiters on the reset_queue will be woken when + * that happens. + */ + atomic_t reset_counter; + + /** + * Special values/flags for reset_counter + * + * Note that the code relies on + * I915_WEDGED & I915_RESET_IN_PROGRESS_FLAG + * being true. + */ +#define I915_RESET_IN_PROGRESS_FLAG 1 +#define I915_WEDGED 0xffffffff + + /** + * Waitqueue to signal when the reset has completed. Used by clients + * that wait for dev_priv->mm.wedged to settle. + */ + wait_queue_head_t reset_queue; /* For gpu hang simulation. */ unsigned int stop_rings; @@ -1543,6 +1569,16 @@ void i915_gem_retire_requests(struct drm_device *dev); void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring); int __must_check i915_gem_check_wedge(struct i915_gpu_error *error, bool interruptible); +static inline bool i915_reset_in_progress(struct i915_gpu_error *error) +{ + return unlikely(atomic_read(&error->reset_counter) + & I915_RESET_IN_PROGRESS_FLAG); +} + +static inline bool i915_terminally_wedged(struct i915_gpu_error *error) +{ + return atomic_read(&error->reset_counter) == I915_WEDGED; +} void i915_gem_reset(struct drm_device *dev); void i915_gem_clflush_object(struct drm_i915_gem_object *obj); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c96a501b8205..2ca901194824 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -89,36 +89,32 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, static int i915_gem_wait_for_error(struct i915_gpu_error *error) { - struct completion *x = &error->completion; - unsigned long flags; int ret; - if (!atomic_read(&error->wedged)) +#define EXIT_COND (!i915_reset_in_progress(error)) + if (EXIT_COND) return 0; + /* GPU is already declared terminally dead, give up. */ + if (i915_terminally_wedged(error)) + return -EIO; + /* * Only wait 10 seconds for the gpu reset to complete to avoid hanging * userspace. If it takes that long something really bad is going on and * we should simply try to bail out and fail as gracefully as possible. */ - ret = wait_for_completion_interruptible_timeout(x, 10*HZ); + ret = wait_event_interruptible_timeout(error->reset_queue, + EXIT_COND, + 10*HZ); if (ret == 0) { DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); return -EIO; } else if (ret < 0) { return ret; } +#undef EXIT_COND - if (atomic_read(&error->wedged)) { - /* GPU is hung, bump the completion count to account for - * the token we just consumed so that we never hit zero and - * end up waiting upon a subsequent completion event that - * will never happen. - */ - spin_lock_irqsave(&x->wait.lock, flags); - x->done++; - spin_unlock_irqrestore(&x->wait.lock, flags); - } return 0; } @@ -942,23 +938,14 @@ int i915_gem_check_wedge(struct i915_gpu_error *error, bool interruptible) { - if (atomic_read(&error->wedged)) { - struct completion *x = &error->completion; - bool recovery_complete; - unsigned long flags; - - /* Give the error handler a chance to run. */ - spin_lock_irqsave(&x->wait.lock, flags); - recovery_complete = x->done > 0; - spin_unlock_irqrestore(&x->wait.lock, flags); - + if (i915_reset_in_progress(error)) { /* Non-interruptible callers can't handle -EAGAIN, hence return * -EIO unconditionally for these. */ if (!interruptible) return -EIO; - /* Recovery complete, but still wedged means reset failure. */ - if (recovery_complete) + /* Recovery complete, but the reset failed ... */ + if (i915_terminally_wedged(error)) return -EIO; return -EAGAIN; @@ -1025,7 +1012,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, #define EXIT_COND \ (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \ - atomic_read(&dev_priv->gpu_error.wedged)) + i915_reset_in_progress(&dev_priv->gpu_error)) do { if (interruptible) end = wait_event_interruptible_timeout(ring->irq_queue, @@ -1379,7 +1366,7 @@ out: /* If this -EIO is due to a gpu hang, give the reset code a * chance to clean up the mess. Otherwise return the proper * SIGBUS. */ - if (!atomic_read(&dev_priv->gpu_error.wedged)) + if (i915_terminally_wedged(&dev_priv->gpu_error)) return VM_FAULT_SIGBUS; case -EAGAIN: /* Give the error handler a chance to run and move the @@ -3983,9 +3970,9 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data, if (drm_core_check_feature(dev, DRIVER_MODESET)) return 0; - if (atomic_read(&dev_priv->gpu_error.wedged)) { + if (i915_reset_in_progress(&dev_priv->gpu_error)) { DRM_ERROR("Reenabling wedged hardware, good luck\n"); - atomic_set(&dev_priv->gpu_error.wedged, 0); + atomic_set(&dev_priv->gpu_error.reset_counter, 0); } mutex_lock(&dev->struct_mutex); @@ -4069,7 +4056,7 @@ i915_gem_load(struct drm_device *dev) INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); INIT_DELAYED_WORK(&dev_priv->mm.retire_work, i915_gem_retire_work_handler); - init_completion(&dev_priv->gpu_error.completion); + init_waitqueue_head(&dev_priv->gpu_error.reset_queue); /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ if (IS_GEN3(dev)) { diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index f2c0016fa533..4562c5406ef8 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -862,8 +862,10 @@ done: */ static void i915_error_work_func(struct work_struct *work) { - drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t, - gpu_error.work); + struct i915_gpu_error *error = container_of(work, struct i915_gpu_error, + work); + drm_i915_private_t *dev_priv = container_of(error, drm_i915_private_t, + gpu_error); struct drm_device *dev = dev_priv->dev; char *error_event[] = { "ERROR=1", NULL }; char *reset_event[] = { "RESET=1", NULL }; @@ -871,14 +873,18 @@ static void i915_error_work_func(struct work_struct *work) kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, error_event); - if (atomic_read(&dev_priv->gpu_error.wedged)) { + if (i915_reset_in_progress(error)) { DRM_DEBUG_DRIVER("resetting chip\n"); kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_event); + if (!i915_reset(dev)) { - atomic_set(&dev_priv->gpu_error.wedged, 0); + atomic_set(&error->reset_counter, 0); kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_done_event); + } else { + atomic_set(&error->reset_counter, I915_WEDGED); } - complete_all(&dev_priv->gpu_error.completion); + + wake_up_all(&dev_priv->gpu_error.reset_queue); } } @@ -1482,11 +1488,12 @@ void i915_handle_error(struct drm_device *dev, bool wedged) i915_report_and_clear_eir(dev); if (wedged) { - INIT_COMPLETION(dev_priv->gpu_error.completion); - atomic_set(&dev_priv->gpu_error.wedged, 1); + atomic_set(&dev_priv->gpu_error.reset_counter, + I915_RESET_IN_PROGRESS_FLAG); /* - * Wakeup waiting processes so they don't hang + * Wakeup waiting processes so that the reset work item + * doesn't deadlock trying to grab various locks. */ for_each_ring(ring, dev_priv, i) wake_up_all(&ring->irq_queue); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 160aa5f76118..77254460a5cb 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2223,7 +2223,7 @@ intel_finish_fb(struct drm_framebuffer *old_fb) WARN_ON(waitqueue_active(&dev_priv->pending_flip_queue)); wait_event(dev_priv->pending_flip_queue, - atomic_read(&dev_priv->gpu_error.wedged) || + i915_reset_in_progress(&dev_priv->gpu_error) || atomic_read(&obj->pending_flip) == 0); /* Big Hammer, we also need to ensure that any pending @@ -2871,7 +2871,7 @@ static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc) unsigned long flags; bool pending; - if (atomic_read(&dev_priv->gpu_error.wedged)) + if (i915_reset_in_progress(&dev_priv->gpu_error)) return false; spin_lock_irqsave(&dev->event_lock, flags); -- cgit v1.2.3-59-g8ed1b From d0a57789d5ec807fc218151b2fb2de4da30fbef5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 9 Oct 2012 19:24:37 +0100 Subject: drm/i915: Only insert the mb() before updating the fence parameter With a fence, we only need to insert a memory barrier around the actual fence alteration for CPU accesses through the GTT. Performing the barrier in flush-fence was inserting unnecessary and expensive barriers for never fenced objects. Note removing the barriers from flush-fence, which was effectively a barrier before every direct access through the GTT, revealed that we where missing a barrier before the first access through the GTT. Lack of that barrier was sufficient to cause GPU hangs. v2: Add a couple more comments to explain the new barriers Signed-off-by: Chris Wilson Reviewed-by: Daniel Vetter Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2ca901194824..ce706555d011 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2611,9 +2611,22 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg, POSTING_READ(FENCE_REG_830_0 + reg * 4); } +inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj) +{ + return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT; +} + static void i915_gem_write_fence(struct drm_device *dev, int reg, struct drm_i915_gem_object *obj) { + struct drm_i915_private *dev_priv = dev->dev_private; + + /* Ensure that all CPU reads are completed before installing a fence + * and all writes before removing the fence. + */ + if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) + mb(); + switch (INTEL_INFO(dev)->gen) { case 7: case 6: @@ -2623,6 +2636,12 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg, case 2: i830_write_fence_reg(dev, reg, obj); break; default: BUG(); } + + /* And similarly be paranoid that no direct access to this region + * is reordered to before the fence is installed. + */ + if (i915_gem_object_needs_mb(obj)) + mb(); } static inline int fence_number(struct drm_i915_private *dev_priv, @@ -2652,7 +2671,7 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, } static int -i915_gem_object_flush_fence(struct drm_i915_gem_object *obj) +i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) { if (obj->last_fenced_seqno) { int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); @@ -2662,12 +2681,6 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj) obj->last_fenced_seqno = 0; } - /* Ensure that all CPU reads are completed before installing a fence - * and all writes before removing the fence. - */ - if (obj->base.read_domains & I915_GEM_DOMAIN_GTT) - mb(); - obj->fenced_gpu_access = false; return 0; } @@ -2678,7 +2691,7 @@ i915_gem_object_put_fence(struct drm_i915_gem_object *obj) struct drm_i915_private *dev_priv = obj->base.dev->dev_private; int ret; - ret = i915_gem_object_flush_fence(obj); + ret = i915_gem_object_wait_fence(obj); if (ret) return ret; @@ -2752,7 +2765,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj) * will need to serialise the write to the associated fence register? */ if (obj->fence_dirty) { - ret = i915_gem_object_flush_fence(obj); + ret = i915_gem_object_wait_fence(obj); if (ret) return ret; } @@ -2773,7 +2786,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj) if (reg->obj) { struct drm_i915_gem_object *old = reg->obj; - ret = i915_gem_object_flush_fence(old); + ret = i915_gem_object_wait_fence(old); if (ret) return ret; @@ -3068,6 +3081,13 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) i915_gem_object_flush_cpu_write_domain(obj); + /* Serialise direct access to this object with the barriers for + * coherent writes from the GPU, by effectively invalidating the + * GTT domain upon first access. + */ + if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) + mb(); + old_write_domain = obj->base.write_domain; old_read_domains = obj->base.read_domains; -- cgit v1.2.3-59-g8ed1b From 97c809fd9cf5e914322b53773ad0d67efe503fde Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 9 Oct 2012 19:24:38 +0100 Subject: drm/i915: Only apply the mb() when flushing the GTT domain during a finish Now that we seem to have brought order to the GTT barriers, the last one to review is the terminal barrier before we unbind the buffer from the GTT. This needs to only be performed if the buffer still resides in the GTT domain, and so we can skip some needless barriers otherwise. Signed-off-by: Chris Wilson Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ce706555d011..5bb370fdc99c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2407,15 +2407,15 @@ static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) { u32 old_write_domain, old_read_domains; - /* Act a barrier for all accesses through the GTT */ - mb(); - /* Force a pagefault for domain tracking on next user access */ i915_gem_release_mmap(obj); if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) return; + /* Wait for any direct GTT access to complete */ + mb(); + old_read_domains = obj->base.read_domains; old_write_domain = obj->base.write_domain; -- cgit v1.2.3-59-g8ed1b From f69061bedd6ea63f271fe97914364def2f33fc6b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 6 Dec 2012 09:01:42 +0100 Subject: drm/i915: create a race-free reset detection With the previous patch the state transition handling of the reset code itself is now (hopefully) race free and solid. But that still leaves out everyone else - with the various lock-free wait paths we have there's the possibility that the reset happens between the point where we read the seqno we should wait on and the actual wait. And if __wait_seqno then never sees the RESET_IN_PROGRESS state, we'll happily wait for a seqno which will in all likelyhood never signal. In practice this is not a big problem since the X server gets constantly interrupted, and can then submit more work (hopefully) to unblock everyone else: As soon as a new seqno write lands, all waiters will unblock. But running the i-g-t reset testcase ZZ_hangman can expose this race, especially on slower hw with fewer cpu cores. Now looking forward to ARB_robustness and friends that's not the best possible behaviour, hence this patch adds a reset_counter to be able to detect any reset, even if a given thread never observed the in-progress state. The important part is to correctly order things: - The write side needs to increment the counter after any seqno gets reset. Hence we need to do that at the end of the reset work, and again wake everyone up. We also need to place a barrier in between any possible seqno changes and the counter increment, since any unlock operations only guarantee that nothing leaks out, but not that at later load operation gets moved ahead. - On the read side we need to ensure that no reset can sneak in and invalidate the seqno. In all cases we can use the one-sided barrier that unlock operations guarantee (of the lock protecting the respective seqno/ring pair) to ensure correct ordering. Hence it is sufficient to place the atomic read before the mutex/spin_unlock and no additional barriers are required. The end-result of all this is that we need to wake up everyone twice in a reset operation: - First, before the reset starts, to get any lockholders of the locks, so that the reset can proceed. - Second, after the reset is completed, to allow waiters to properly and reliably detect the reset condition and bail out. I admit that this entire reset_counter thing smells a bit like overkill, but I think it's justified since it makes it really explicit what the bail-out condition is. And we need a reset counter anyway to implement ARB_robustness, and imo with finer-grained locking on the horizont this is the most resilient scheme I could think of. v2: Drop spurious change in the wait_for_error EXIT_COND - we only need to wait until we leave the reset-in-progress wedged state. v3: Don't play tricks with barriers in the throttle ioctl, the spin_unlock is barrier enough. I've also considered using a little helper to grab the current reset_counter, but then decided that hiding the atomic_read isn't a great idea, since having it explicitly show up in the code is a nice remainder to reviews to check the memory barriers. v4: Add a comment to explain why we need to fall through in __wait_seqno in the end variable assignments. v5: Review from Damien: - s/smb/smp/ in a comment - don't increment the reset counter after we've set it to WEDGED. Now we (again) properly wedge the gpu when the reset fails. Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 11 +++++++++-- drivers/gpu/drm/i915/i915_gem.c | 36 +++++++++++++++++++++++++++++++----- drivers/gpu/drm/i915/i915_irq.c | 30 +++++++++++++++++++++++++----- 3 files changed, 65 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c84743bb6937..56ece50910a8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -775,9 +775,16 @@ struct i915_gpu_error { unsigned long last_reset; /** - * State variable controlling the reset flow + * State variable and reset counter controlling the reset flow * - * Upper bits are for the reset counter. + * Upper bits are for the reset counter. This counter is used by the + * wait_seqno code to race-free noticed that a reset event happened and + * that it needs to restart the entire ioctl (since most likely the + * seqno it waited for won't ever signal anytime soon). + * + * This is important for lock-free wait paths, where no contended lock + * naturally enforces the correct ordering between the bail-out of the + * waiter and the gpu reset work code. * * Lowest bit controls the reset state machine: Set means a reset is in * progress. This state will (presuming we don't have any bugs) decay diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5bb370fdc99c..5226ebcac33a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -976,13 +976,22 @@ i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) * __wait_seqno - wait until execution of seqno has finished * @ring: the ring expected to report seqno * @seqno: duh! + * @reset_counter: reset sequence associated with the given seqno * @interruptible: do an interruptible wait (normally yes) * @timeout: in - how long to wait (NULL forever); out - how much time remaining * + * Note: It is of utmost importance that the passed in seqno and reset_counter + * values have been read by the caller in an smp safe manner. Where read-side + * locks are involved, it is sufficient to read the reset_counter before + * unlocking the lock that protects the seqno. For lockless tricks, the + * reset_counter _must_ be read before, and an appropriate smp_rmb must be + * inserted. + * * Returns 0 if the seqno was found within the alloted time. Else returns the * errno with remaining time filled in timeout argument. */ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, + unsigned reset_counter, bool interruptible, struct timespec *timeout) { drm_i915_private_t *dev_priv = ring->dev->dev_private; @@ -1012,7 +1021,8 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, #define EXIT_COND \ (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \ - i915_reset_in_progress(&dev_priv->gpu_error)) + i915_reset_in_progress(&dev_priv->gpu_error) || \ + reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) do { if (interruptible) end = wait_event_interruptible_timeout(ring->irq_queue, @@ -1022,6 +1032,13 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, end = wait_event_timeout(ring->irq_queue, EXIT_COND, timeout_jiffies); + /* We need to check whether any gpu reset happened in between + * the caller grabbing the seqno and now ... */ + if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) + end = -EAGAIN; + + /* ... but upgrade the -EGAIN to an -EIO if the gpu is truely + * gone. */ ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); if (ret) end = ret; @@ -1076,7 +1093,9 @@ i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno) if (ret) return ret; - return __wait_seqno(ring, seqno, interruptible, NULL); + return __wait_seqno(ring, seqno, + atomic_read(&dev_priv->gpu_error.reset_counter), + interruptible, NULL); } /** @@ -1123,6 +1142,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_ring_buffer *ring = obj->ring; + unsigned reset_counter; u32 seqno; int ret; @@ -1141,8 +1161,9 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, if (ret) return ret; + reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); mutex_unlock(&dev->struct_mutex); - ret = __wait_seqno(ring, seqno, true, NULL); + ret = __wait_seqno(ring, seqno, reset_counter, true, NULL); mutex_lock(&dev->struct_mutex); i915_gem_retire_requests_ring(ring); @@ -2297,10 +2318,12 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj) int i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + drm_i915_private_t *dev_priv = dev->dev_private; struct drm_i915_gem_wait *args = data; struct drm_i915_gem_object *obj; struct intel_ring_buffer *ring = NULL; struct timespec timeout_stack, *timeout = NULL; + unsigned reset_counter; u32 seqno = 0; int ret = 0; @@ -2341,9 +2364,10 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } drm_gem_object_unreference(&obj->base); + reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); mutex_unlock(&dev->struct_mutex); - ret = __wait_seqno(ring, seqno, true, timeout); + ret = __wait_seqno(ring, seqno, reset_counter, true, timeout); if (timeout) { WARN_ON(!timespec_valid(timeout)); args->timeout_ns = timespec_to_ns(timeout); @@ -3394,6 +3418,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) unsigned long recent_enough = jiffies - msecs_to_jiffies(20); struct drm_i915_gem_request *request; struct intel_ring_buffer *ring = NULL; + unsigned reset_counter; u32 seqno = 0; int ret; @@ -3413,12 +3438,13 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) ring = request->ring; seqno = request->seqno; } + reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); spin_unlock(&file_priv->mm.lock); if (seqno == 0) return 0; - ret = __wait_seqno(ring, seqno, true, NULL); + ret = __wait_seqno(ring, seqno, reset_counter, true, NULL); if (ret == 0) queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 4562c5406ef8..f833f2c155f8 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -867,9 +867,11 @@ static void i915_error_work_func(struct work_struct *work) drm_i915_private_t *dev_priv = container_of(error, drm_i915_private_t, gpu_error); struct drm_device *dev = dev_priv->dev; + struct intel_ring_buffer *ring; char *error_event[] = { "ERROR=1", NULL }; char *reset_event[] = { "RESET=1", NULL }; char *reset_done_event[] = { "ERROR=0", NULL }; + int i, ret; kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, error_event); @@ -877,13 +879,31 @@ static void i915_error_work_func(struct work_struct *work) DRM_DEBUG_DRIVER("resetting chip\n"); kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_event); - if (!i915_reset(dev)) { - atomic_set(&error->reset_counter, 0); - kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_done_event); + ret = i915_reset(dev); + + if (ret == 0) { + /* + * After all the gem state is reset, increment the reset + * counter and wake up everyone waiting for the reset to + * complete. + * + * Since unlock operations are a one-sided barrier only, + * we need to insert a barrier here to order any seqno + * updates before + * the counter increment. + */ + smp_mb__before_atomic_inc(); + atomic_inc(&dev_priv->gpu_error.reset_counter); + + kobject_uevent_env(&dev->primary->kdev.kobj, + KOBJ_CHANGE, reset_done_event); } else { atomic_set(&error->reset_counter, I915_WEDGED); } + for_each_ring(ring, dev_priv, i) + wake_up_all(&ring->irq_queue); + wake_up_all(&dev_priv->gpu_error.reset_queue); } } @@ -1488,8 +1508,8 @@ void i915_handle_error(struct drm_device *dev, bool wedged) i915_report_and_clear_eir(dev); if (wedged) { - atomic_set(&dev_priv->gpu_error.reset_counter, - I915_RESET_IN_PROGRESS_FLAG); + atomic_set_mask(I915_RESET_IN_PROGRESS_FLAG, + &dev_priv->gpu_error.reset_counter); /* * Wakeup waiting processes so that the reset work item -- cgit v1.2.3-59-g8ed1b From 99433931950f33039d9e1a52b4ed9af3f1b58e84 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 22 Jan 2013 14:12:17 +0200 Subject: drm/i915: use gem_set_seqno() on hardware init When machine was rebooted or module was reloaded, gem_hw_init() set last_seqno to be identical to next_seqno. This lead to situation that waits for first ever request always passed immediately regardless if it was actually executed. Use gem_set_seqno() to be consistent how hw is initialized on init, wrap and on resume. Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 6 ++++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 -- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5226ebcac33a..801f77ece72e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3941,8 +3941,6 @@ i915_gem_init_hw(struct drm_device *dev) i915_gem_init_swizzling(dev); - dev_priv->next_seqno = dev_priv->last_seqno = (u32)~0 - 0x1000; - ret = intel_init_render_ring_buffer(dev); if (ret) return ret; @@ -3959,6 +3957,10 @@ i915_gem_init_hw(struct drm_device *dev) goto cleanup_bsd_ring; } + ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); + if (ret) + return ret; + /* * XXX: There was some w/a described somewhere suggesting loading * contexts before PPGTT. diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 9438bcd50678..dc6ae2fa1cee 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1223,8 +1223,6 @@ static int intel_init_ring_buffer(struct drm_device *dev, if (IS_I830(ring->dev) || IS_845G(ring->dev)) ring->effective_size -= 128; - intel_ring_init_seqno(ring, dev_priv->last_seqno); - return 0; err_unmap: -- cgit v1.2.3-59-g8ed1b From 725a5b54028916cd2511a251c5b5b13d1715addc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 8 Jan 2013 11:02:57 +0000 Subject: drm/i915: Only run idle processing from i915_gem_retire_requests_worker When adding the fb idle detection to mark-inactive, it was forgotten that userspace can drive the processing of retire-requests. We assumed that it would be principally driven by the retire requests worker, running once every second whilst active and so we would get the deferred timer for free. Instead we spend too many CPU cycles reclocking the LVDS preventing real work from being done. Signed-off-by: Chris Wilson Reported-and-tested-by: Alexander Lam Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=58843 Cc: stable@vger.kernel.org Reviewed-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 3 --- drivers/gpu/drm/i915/intel_display.c | 12 +++--------- drivers/gpu/drm/i915/intel_drv.h | 3 +-- 3 files changed, 4 insertions(+), 14 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 801f77ece72e..d7461771be2d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1899,9 +1899,6 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS); BUG_ON(!obj->active); - if (obj->pin_count) /* are we a framebuffer? */ - intel_mark_fb_idle(obj); - list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); list_del_init(&obj->ring_list); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 56c51ddf54ec..0d45487040d2 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6725,11 +6725,6 @@ void intel_mark_busy(struct drm_device *dev) void intel_mark_idle(struct drm_device *dev) { -} - -void intel_mark_fb_busy(struct drm_i915_gem_object *obj) -{ - struct drm_device *dev = obj->base.dev; struct drm_crtc *crtc; if (!i915_powersave) @@ -6739,12 +6734,11 @@ void intel_mark_fb_busy(struct drm_i915_gem_object *obj) if (!crtc->fb) continue; - if (to_intel_framebuffer(crtc->fb)->obj == obj) - intel_increase_pllclock(crtc); + intel_decrease_pllclock(crtc); } } -void intel_mark_fb_idle(struct drm_i915_gem_object *obj) +void intel_mark_fb_busy(struct drm_i915_gem_object *obj) { struct drm_device *dev = obj->base.dev; struct drm_crtc *crtc; @@ -6757,7 +6751,7 @@ void intel_mark_fb_idle(struct drm_i915_gem_object *obj) continue; if (to_intel_framebuffer(crtc->fb)->obj == obj) - intel_decrease_pllclock(crtc); + intel_increase_pllclock(crtc); } } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index fcdfe42e434c..13afb37d8dec 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -452,9 +452,8 @@ extern bool intel_sdvo_init(struct drm_device *dev, uint32_t sdvo_reg, extern void intel_dvo_init(struct drm_device *dev); extern void intel_tv_init(struct drm_device *dev); extern void intel_mark_busy(struct drm_device *dev); -extern void intel_mark_idle(struct drm_device *dev); extern void intel_mark_fb_busy(struct drm_i915_gem_object *obj); -extern void intel_mark_fb_idle(struct drm_i915_gem_object *obj); +extern void intel_mark_idle(struct drm_device *dev); extern bool intel_lvds_init(struct drm_device *dev); extern bool intel_is_dual_link_lvds(struct drm_device *dev); extern void intel_dp_init(struct drm_device *dev, int output_reg, -- cgit v1.2.3-59-g8ed1b