aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2019-12-04 10:13:16 +1000
committerDave Airlie <airlied@redhat.com>2019-12-04 10:13:16 +1000
commit909a60652624b837aff16825197c6a21d84fec82 (patch)
treee565f940a2b1a135915c001490956376dc711085 /drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
parentMerge tag 'drm-intel-next-fixes-2019-11-28' of git://anongit.freedesktop.org/drm/drm-intel into drm-next (diff)
parentdrm/radeon: fix r1xx/r2xx register checker for POT textures (diff)
downloadlinux-dev-909a60652624b837aff16825197c6a21d84fec82.tar.xz
linux-dev-909a60652624b837aff16825197c6a21d84fec82.zip
Merge tag 'drm-next-5.5-2019-12-03' of git://people.freedesktop.org/~agd5f/linux into drm-next
drm-next-5.5-2019-12-03: amdgpu: - Fix vram lost handling with BACO on VI/CI asics - DC fixes for Navi14 - Misc gfx10 fixes - SR-IOV fixes - Fix driver unload - Fix XGMI limits on Arcturus amdkfd: - Enable KFD on PPC - Optimize KFD page table reservations radeon: - Fix register checker for r1xx/r2xx Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexdeucher@gmail.com> Link: https://patchwork.freedesktop.org/patch/msgid/20191203204135.5437-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c15
1 files changed, 14 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index ae6f5446262c..12dbcfaa34b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -105,11 +105,24 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
(kfd_mem_limit.max_ttm_mem_limit >> 20));
}
+/* Estimate page table size needed to represent a given memory size
+ *
+ * With 4KB pages, we need one 8 byte PTE for each 4KB of memory
+ * (factor 512, >> 9). With 2MB pages, we need one 8 byte PTE for 2MB
+ * of memory (factor 256K, >> 18). ROCm user mode tries to optimize
+ * for 2MB pages for TLB efficiency. However, small allocations and
+ * fragmented system memory still need some 4KB pages. We choose a
+ * compromise that should work in most cases without reserving too
+ * much memory for page tables unnecessarily (factor 16K, >> 14).
+ */
+#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14)
+
static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 domain, bool sg)
{
+ uint64_t reserved_for_pt =
+ ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed;
- uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9;
int ret = 0;
acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,