From 0e6d6ec02f867fe8d1f785312b7343b21052322f Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Wed, 12 Mar 2014 10:41:32 +0100 Subject: drm/ttm: Work around performance regression with VM_PFNMAP A performance regression was introduced in TTM in linux 3.13 when we started using VM_PFNMAP for shared mappings. In theory this should've been faster due to less page book-keeping but it appears like VM_PFNMAP + x86 PAT + write-combine is a particularly cpu-hungry combination, as seen by largely increased cpu-usage on r200 GL video playback. Until we've sorted out why, revert to always use VM_MIXEDMAP. Reference: freedesktop.org bugzilla bug #75719 Reported-and-tested-by: Acked-by: Alex Deucher Signed-off-by: Thomas Hellstrom Cc: stable@vger.kernel.org --- drivers/gpu/drm/ttm/ttm_bo_vm.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 801231c9ae48..0ce48e5a9cb4 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -339,11 +339,13 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma, vma->vm_private_data = bo; /* - * PFNMAP is faster than MIXEDMAP due to reduced page - * administration. So use MIXEDMAP only if private VMA, where - * we need to support COW. + * We'd like to use VM_PFNMAP on shared mappings, where + * (vma->vm_flags & VM_SHARED) != 0, for performance reasons, + * but for some reason VM_PFNMAP + x86 PAT + write-combine is very + * bad for performance. Until that has been sorted out, use + * VM_MIXEDMAP on all mappings. See freedesktop.org bug #75719 */ - vma->vm_flags |= (vma->vm_flags & VM_SHARED) ? VM_PFNMAP : VM_MIXEDMAP; + vma->vm_flags |= VM_MIXEDMAP; vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; return 0; out_unref: @@ -359,7 +361,7 @@ int ttm_fbdev_mmap(struct vm_area_struct *vma, struct ttm_buffer_object *bo) vma->vm_ops = &ttm_bo_vm_ops; vma->vm_private_data = ttm_bo_reference(bo); - vma->vm_flags |= (vma->vm_flags & VM_SHARED) ? VM_PFNMAP : VM_MIXEDMAP; + vma->vm_flags |= VM_MIXEDMAP; vma->vm_flags |= VM_IO | VM_DONTEXPAND; return 0; } -- cgit v1.2.3-59-g8ed1b From 9ef7506f7eff3fc42724269f62e30164c141661f Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 12 Mar 2014 10:59:37 -0400 Subject: drm/ttm: don't oops if no invalidate_caches() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A few of the simpler TTM drivers (cirrus, ast, mgag200) do not implement this function. Yet can end up somehow with an evicted bo: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [< (null)>] (null) PGD 16e761067 PUD 16e6cf067 PMD 0 Oops: 0010 [#1] SMP Modules linked in: bnep bluetooth rfkill fuse ip6t_rpfilter ip6t_REJECT ipt_REJECT xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw iptable_filter ip_tables sg btrfs zlib_deflate raid6_pq xor dm_queue_length iTCO_wdt iTCO_vendor_support coretemp kvm dcdbas dm_service_time microcode serio_raw pcspkr lpc_ich mfd_core i7core_edac edac_core ses enclosure ipmi_si ipmi_msghandler shpchp acpi_power_meter mperf nfsd auth_rpcgss nfs_acl lockd uinput sunrpc dm_multipath xfs libcrc32c ata_generic pata_acpi sr_mod cdrom sd_mod usb_storage mgag200 syscopyarea sysfillrect sysimgblt i2c_algo_bit lpfc drm_kms_helper ttm crc32c_intel ata_piix bfa drm ixgbe libata i2c_core mdio crc_t10dif ptp crct10dif_common pps_core scsi_transport_fc dca scsi_tgt megaraid_sas bnx2 dm_mirror dm_region_hash dm_log dm_mod CPU: 16 PID: 2572 Comm: X Not tainted 3.10.0-86.el7.x86_64 #1 Hardware name: Dell Inc. PowerEdge R810/0H235N, BIOS 0.3.0 11/14/2009 task: ffff8801799dabc0 ti: ffff88016c884000 task.ti: ffff88016c884000 RIP: 0010:[<0000000000000000>] [< (null)>] (null) RSP: 0018:ffff88016c885ad8 EFLAGS: 00010202 RAX: ffffffffa04e94c0 RBX: ffff880178937a20 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000240004 RDI: ffff880178937a00 RBP: ffff88016c885b60 R08: 00000000000171a0 R09: ffff88007cf171a0 R10: ffffea0005842540 R11: ffffffff810487b9 R12: ffff880178937b30 R13: ffff880178937a00 R14: ffff88016c885b78 R15: ffff880179929400 FS: 00007f81ba2ef980(0000) GS:ffff88007cf00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000016e763000 CR4: 00000000000007e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: ffffffffa0306fae ffff8801799295c0 0000000000260004 0000000000000001 ffff88016c885b60 ffffffffa0307669 00ff88007cf17738 ffff88017cf17700 ffff880178937a00 ffff880100000000 ffff880100000000 0000000079929400 Call Trace: [] ? ttm_bo_handle_move_mem+0x54e/0x5b0 [ttm] [] ? ttm_bo_mem_space+0x169/0x340 [ttm] [] ttm_bo_move_buffer+0x117/0x130 [ttm] [] ? perf_event_init_context+0x141/0x220 [] ttm_bo_validate+0xc1/0x130 [ttm] [] mgag200_bo_pin+0x87/0xc0 [mgag200] [] mga_crtc_cursor_set+0x474/0xbb0 [mgag200] [] ? __mem_cgroup_commit_charge+0x152/0x3b0 [] ? mutex_lock+0x12/0x2f [] drm_mode_cursor_common+0x123/0x170 [drm] [] drm_mode_cursor_ioctl+0x41/0x50 [drm] [] drm_ioctl+0x502/0x630 [drm] [] ? __do_page_fault+0x1f4/0x510 [] ? __restore_xstate_sig+0x218/0x4f0 [] do_vfs_ioctl+0x2e5/0x4d0 [] ? file_has_perm+0x8e/0xa0 [] SyS_ioctl+0x81/0xa0 [] system_call_fastpath+0x16/0x1b Code: Bad RIP value. RIP [< (null)>] (null) RSP CR2: 0000000000000000 Signed-off-by: Rob Clark Reviewed-by: Jérôme Glisse Reviewed-by: Thomas Hellstrom Cc: stable@vger.kernel.org --- drivers/gpu/drm/ttm/ttm_bo.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index a06651309388..214b7992a3aa 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -351,9 +351,11 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo, moved: if (bo->evicted) { - ret = bdev->driver->invalidate_caches(bdev, bo->mem.placement); - if (ret) - pr_err("Can not flush read caches\n"); + if (bdev->driver->invalidate_caches) { + ret = bdev->driver->invalidate_caches(bdev, bo->mem.placement); + if (ret) + pr_err("Can not flush read caches\n"); + } bo->evicted = false; } -- cgit v1.2.3-59-g8ed1b From 7848865914c6a63ead674f0f5604b77df7d3874f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 11 Mar 2014 15:02:30 -0400 Subject: drm/radeon: fix runpm disabling on non-PX harder Make sure runtime pm is disabled on non-PX hardware. Should fix powerdown problems without displays attached. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_kms.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 2aecd6dc2610..66ed3ea71440 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -33,6 +33,13 @@ #include #include #include + +#if defined(CONFIG_VGA_SWITCHEROO) +bool radeon_is_px(void); +#else +static inline bool radeon_is_px(void) { return false; } +#endif + /** * radeon_driver_unload_kms - Main unload function for KMS. * @@ -130,7 +137,8 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) "Error during ACPI methods call\n"); } - if (radeon_runtime_pm != 0) { + if ((radeon_runtime_pm == 1) || + ((radeon_runtime_pm == -1) && radeon_is_px())) { pm_runtime_use_autosuspend(dev->dev); pm_runtime_set_autosuspend_delay(dev->dev, 5000); pm_runtime_set_active(dev->dev); -- cgit v1.2.3-59-g8ed1b From 7b1bbe883b3ed962ca2be4daf321f318f5091340 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 12 Mar 2014 15:15:58 -0400 Subject: drm/radeon/cik: properly set sdma ring status on disable When we disable the rings, set the status properly. If not other code pathes may try and use the rings which are not functional at this point. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/cik_sdma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index 1ecb3f1070e3..5d7a5fa3741a 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -264,6 +264,8 @@ static void cik_sdma_gfx_stop(struct radeon_device *rdev) WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl); WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0); } + rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; + rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; } /** -- cgit v1.2.3-59-g8ed1b From 07ae78c9798b79bad3d3adf983c94ba23fde54d4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 12 Mar 2014 15:26:34 -0400 Subject: drm/radeon/cik: stop the sdma engines in the enable() function We always stop the rings when disabling the engines so just call the stop functions directly from the sdma enable function. This way the rings' status is set correctly on suspend so there are no problems on resume. Fixes resume failures that result in acceleration getting disabled. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/cik_sdma.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index 5d7a5fa3741a..94626ea90fa5 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -293,6 +293,11 @@ void cik_sdma_enable(struct radeon_device *rdev, bool enable) u32 me_cntl, reg_offset; int i; + if (enable == false) { + cik_sdma_gfx_stop(rdev); + cik_sdma_rlc_stop(rdev); + } + for (i = 0; i < 2; i++) { if (i == 0) reg_offset = SDMA0_REGISTER_OFFSET; @@ -422,10 +427,6 @@ static int cik_sdma_load_microcode(struct radeon_device *rdev) if (!rdev->sdma_fw) return -EINVAL; - /* stop the gfx rings and rlc compute queues */ - cik_sdma_gfx_stop(rdev); - cik_sdma_rlc_stop(rdev); - /* halt the MEs */ cik_sdma_enable(rdev, false); @@ -494,9 +495,6 @@ int cik_sdma_resume(struct radeon_device *rdev) */ void cik_sdma_fini(struct radeon_device *rdev) { - /* stop the gfx rings and rlc compute queues */ - cik_sdma_gfx_stop(rdev); - cik_sdma_rlc_stop(rdev); /* halt the MEs */ cik_sdma_enable(rdev, false); radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); -- cgit v1.2.3-59-g8ed1b From b2b3d8d952e4f8d6ac2ce80be96b937f29f6e42e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 12 Mar 2014 16:20:44 -0400 Subject: drm/radeon/cik: properly set compute ring status on disable When we disable the rings, set the status properly. If not other code pathes may try and use the rings which are not functional at this point. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/cik.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index e22be8458d92..bbb17841a9e5 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -4134,8 +4134,11 @@ static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable) { if (enable) WREG32(CP_MEC_CNTL, 0); - else + else { WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT)); + rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; + rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; + } udelay(50); } -- cgit v1.2.3-59-g8ed1b From 4b0c82529b92cda4723cf1d09e1e2ee9b9ae96f1 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Wed, 12 Mar 2014 20:42:31 +0100 Subject: drm/vmwgfx: Fix a surface reference corner-case in legacy emulation mode If running on a gb-object capable device with a non-gb capable surface exporter (X server) and a gb capable surface referencing client (GL driver), the referencing client expects to find a shareable backing buffer attached to the surface at reference time. This may not be the case if the surface has not yet been validated. This would cause the surface reference IOCTL to return an error. Signed-off-by: Thomas Hellstrom Reviewed-by: Jakob Bornecrantz --- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 82468d902915..e7af580ab977 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -830,6 +830,24 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, if (unlikely(ret != 0)) goto out_unlock; + /* + * A gb-aware client referencing a shared surface will + * expect a backup buffer to be present. + */ + if (dev_priv->has_mob && req->shareable) { + uint32_t backup_handle; + + ret = vmw_user_dmabuf_alloc(dev_priv, tfile, + res->backup_size, + true, + &backup_handle, + &res->backup); + if (unlikely(ret != 0)) { + vmw_resource_unreference(&res); + goto out_unlock; + } + } + tmp = vmw_resource_reference(&srf->res); ret = ttm_prime_object_init(tfile, res->backup_size, &user_srf->prime, req->shareable, VMW_RES_SURFACE, -- cgit v1.2.3-59-g8ed1b