summaryrefslogtreecommitdiffstats
path: root/sys/dev/pci/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
diff options
context:
space:
mode:
authorjsg <jsg@openbsd.org>2020-06-08 04:47:57 +0000
committerjsg <jsg@openbsd.org>2020-06-08 04:47:57 +0000
commitc349dbc7938c71a30e13c1be4acc1976165f4630 (patch)
tree8798187dfd7a927a15123e8dad31b782b074baa8 /sys/dev/pci/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
parentThe errcheck() function treats an errno of ERANGE or EDOM as something (diff)
downloadwireguard-openbsd-c349dbc7938c71a30e13c1be4acc1976165f4630.tar.xz
wireguard-openbsd-c349dbc7938c71a30e13c1be4acc1976165f4630.zip
update drm to linux 5.7
adds kernel support for amdgpu: vega20, raven2, renoir, navi10, navi14 inteldrm: icelake, tigerlake Thanks to the OpenBSD Foundation for sponsoring this work, kettenis@ for helping, patrick@ for helping adapt rockchip drm and many developers for testing.
Diffstat (limited to 'sys/dev/pci/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c')
-rw-r--r--sys/dev/pci/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c898
1 files changed, 527 insertions, 371 deletions
diff --git a/sys/dev/pci/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/sys/dev/pci/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index a5905a9b0d2..b5a5cab9244 100644
--- a/sys/dev/pci/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/sys/dev/pci/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -19,21 +19,17 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
-
-#define pr_fmt(fmt) "kfd2kgd: " fmt
-
+#include <linux/dma-buf.h>
#include <linux/list.h>
#include <linux/pagemap.h>
#include <linux/sched/mm.h>
-#include <drm/drmP.h>
+#include <linux/sched/task.h>
+
#include "amdgpu_object.h"
#include "amdgpu_vm.h"
#include "amdgpu_amdkfd.h"
-
-/* Special VM and GART address alignment needed for VI pre-Fiji due to
- * a HW bug.
- */
-#define VI_BO_SIZE_ALIGN (0x8000)
+#include "amdgpu_dma_buf.h"
+#include <uapi/linux/kfd_ioctl.h>
/* BO flag to indicate a KFD userptr BO */
#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
@@ -46,9 +42,9 @@
/* Impose limit on how much memory KFD can use */
static struct {
uint64_t max_system_mem_limit;
- uint64_t max_userptr_mem_limit;
+ uint64_t max_ttm_mem_limit;
int64_t system_mem_used;
- int64_t userptr_mem_used;
+ int64_t ttm_mem_used;
spinlock_t mem_limit_lock;
} kfd_mem_limit;
@@ -90,8 +86,8 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
}
/* Set memory usage limits. Current, limits are
- * System (kernel) memory - 3/8th System RAM
- * Userptr memory - 3/4th System RAM
+ * System (TTM + userptr) memory - 15/16th System RAM
+ * TTM memory - 3/8th System RAM
*/
void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
{
@@ -103,48 +99,74 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
mem *= si.mem_unit;
mtx_init(&kfd_mem_limit.mem_limit_lock, IPL_TTY);
- kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
- kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2);
- pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n",
+ kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
+ kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
+ pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
(kfd_mem_limit.max_system_mem_limit >> 20),
- (kfd_mem_limit.max_userptr_mem_limit >> 20));
+ (kfd_mem_limit.max_ttm_mem_limit >> 20));
}
-static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
- uint64_t size, u32 domain)
+/* Estimate page table size needed to represent a given memory size
+ *
+ * With 4KB pages, we need one 8 byte PTE for each 4KB of memory
+ * (factor 512, >> 9). With 2MB pages, we need one 8 byte PTE for 2MB
+ * of memory (factor 256K, >> 18). ROCm user mode tries to optimize
+ * for 2MB pages for TLB efficiency. However, small allocations and
+ * fragmented system memory still need some 4KB pages. We choose a
+ * compromise that should work in most cases without reserving too
+ * much memory for page tables unnecessarily (factor 16K, >> 14).
+ */
+#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14)
+
+static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
+ uint64_t size, u32 domain, bool sg)
{
- size_t acc_size;
+ uint64_t reserved_for_pt =
+ ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
+ size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed;
int ret = 0;
acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
sizeof(struct amdgpu_bo));
- spin_lock(&kfd_mem_limit.mem_limit_lock);
+ vram_needed = 0;
if (domain == AMDGPU_GEM_DOMAIN_GTT) {
- if (kfd_mem_limit.system_mem_used + (acc_size + size) >
- kfd_mem_limit.max_system_mem_limit) {
- ret = -ENOMEM;
- goto err_no_mem;
- }
- kfd_mem_limit.system_mem_used += (acc_size + size);
- } else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
- if ((kfd_mem_limit.system_mem_used + acc_size >
- kfd_mem_limit.max_system_mem_limit) ||
- (kfd_mem_limit.userptr_mem_used + (size + acc_size) >
- kfd_mem_limit.max_userptr_mem_limit)) {
- ret = -ENOMEM;
- goto err_no_mem;
- }
- kfd_mem_limit.system_mem_used += acc_size;
- kfd_mem_limit.userptr_mem_used += size;
+ /* TTM GTT memory */
+ system_mem_needed = acc_size + size;
+ ttm_mem_needed = acc_size + size;
+ } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
+ /* Userptr */
+ system_mem_needed = acc_size + size;
+ ttm_mem_needed = acc_size;
+ } else {
+ /* VRAM and SG */
+ system_mem_needed = acc_size;
+ ttm_mem_needed = acc_size;
+ if (domain == AMDGPU_GEM_DOMAIN_VRAM)
+ vram_needed = size;
}
-err_no_mem:
+
+ spin_lock(&kfd_mem_limit.mem_limit_lock);
+
+ if ((kfd_mem_limit.system_mem_used + system_mem_needed >
+ kfd_mem_limit.max_system_mem_limit) ||
+ (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
+ kfd_mem_limit.max_ttm_mem_limit) ||
+ (adev->kfd.vram_used + vram_needed >
+ adev->gmc.real_vram_size - reserved_for_pt)) {
+ ret = -ENOMEM;
+ } else {
+ kfd_mem_limit.system_mem_used += system_mem_needed;
+ kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
+ adev->kfd.vram_used += vram_needed;
+ }
+
spin_unlock(&kfd_mem_limit.mem_limit_lock);
return ret;
}
-static void unreserve_system_mem_limit(struct amdgpu_device *adev,
- uint64_t size, u32 domain)
+static void unreserve_mem_limit(struct amdgpu_device *adev,
+ uint64_t size, u32 domain, bool sg)
{
size_t acc_size;
@@ -154,71 +176,62 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev,
spin_lock(&kfd_mem_limit.mem_limit_lock);
if (domain == AMDGPU_GEM_DOMAIN_GTT) {
kfd_mem_limit.system_mem_used -= (acc_size + size);
- } else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
+ kfd_mem_limit.ttm_mem_used -= (acc_size + size);
+ } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
+ kfd_mem_limit.system_mem_used -= (acc_size + size);
+ kfd_mem_limit.ttm_mem_used -= acc_size;
+ } else {
kfd_mem_limit.system_mem_used -= acc_size;
- kfd_mem_limit.userptr_mem_used -= size;
+ kfd_mem_limit.ttm_mem_used -= acc_size;
+ if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
+ adev->kfd.vram_used -= size;
+ WARN_ONCE(adev->kfd.vram_used < 0,
+ "kfd VRAM memory accounting unbalanced");
+ }
}
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
"kfd system memory accounting unbalanced");
- WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
- "kfd userptr memory accounting unbalanced");
+ WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
+ "kfd TTM memory accounting unbalanced");
spin_unlock(&kfd_mem_limit.mem_limit_lock);
}
-void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
+void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
{
- spin_lock(&kfd_mem_limit.mem_limit_lock);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ u32 domain = bo->preferred_domains;
+ bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
- kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
- kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
- } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
- kfd_mem_limit.system_mem_used -=
- (bo->tbo.acc_size + amdgpu_bo_size(bo));
+ domain = AMDGPU_GEM_DOMAIN_CPU;
+ sg = false;
}
- WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
- "kfd system memory accounting unbalanced");
- WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
- "kfd userptr memory accounting unbalanced");
- spin_unlock(&kfd_mem_limit.mem_limit_lock);
+ unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg);
}
-/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence(s) from BO's
+/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's
* reservation object.
*
* @bo: [IN] Remove eviction fence(s) from this BO
- * @ef: [IN] If ef is specified, then this eviction fence is removed if it
+ * @ef: [IN] This eviction fence is removed if it
* is present in the shared list.
- * @ef_list: [OUT] Returns list of eviction fences. These fences are removed
- * from BO's reservation object shared list.
- * @ef_count: [OUT] Number of fences in ef_list.
*
- * NOTE: If called with ef_list, then amdgpu_amdkfd_add_eviction_fence must be
- * called to restore the eviction fences and to avoid memory leak. This is
- * useful for shared BOs.
* NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held.
*/
static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
- struct amdgpu_amdkfd_fence *ef,
- struct amdgpu_amdkfd_fence ***ef_list,
- unsigned int *ef_count)
+ struct amdgpu_amdkfd_fence *ef)
{
- struct reservation_object *resv = bo->tbo.resv;
- struct reservation_object_list *old, *new;
+ struct dma_resv *resv = bo->tbo.base.resv;
+ struct dma_resv_list *old, *new;
unsigned int i, j, k;
- if (!ef && !ef_list)
+ if (!ef)
return -EINVAL;
- if (ef_list) {
- *ef_list = NULL;
- *ef_count = 0;
- }
-
- old = reservation_object_get_list(resv);
+ old = dma_resv_get_list(resv);
if (!old)
return 0;
@@ -234,10 +247,9 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
struct dma_fence *f;
f = rcu_dereference_protected(old->shared[i],
- reservation_object_held(resv));
+ dma_resv_held(resv));
- if ((ef && f->context == ef->base.context) ||
- (!ef && to_amdgpu_amdkfd_fence(f)))
+ if (f->context == ef->base.context)
RCU_INIT_POINTER(new->shared[--j], f);
else
RCU_INIT_POINTER(new->shared[k++], f);
@@ -245,21 +257,6 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
new->shared_max = old->shared_max;
new->shared_count = k;
- if (!ef) {
- unsigned int count = old->shared_count - j;
-
- /* Alloc memory for count number of eviction fence pointers.
- * Fill the ef_list array and ef_count
- */
- *ef_list = kcalloc(count, sizeof(**ef_list), GFP_KERNEL);
- *ef_count = count;
-
- if (!*ef_list) {
- kfree(new);
- return -ENOMEM;
- }
- }
-
/* Install the new fence list, seqcount provides the barriers */
preempt_disable();
write_seqcount_begin(&resv->seq);
@@ -272,45 +269,48 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
struct dma_fence *f;
f = rcu_dereference_protected(new->shared[i],
- reservation_object_held(resv));
- if (!ef)
- (*ef_list)[k++] = to_amdgpu_amdkfd_fence(f);
- else
- dma_fence_put(f);
+ dma_resv_held(resv));
+ dma_fence_put(f);
}
kfree_rcu(old, rcu);
return 0;
}
-/* amdgpu_amdkfd_add_eviction_fence - Adds eviction fence(s) back into BO's
- * reservation object.
- *
- * @bo: [IN] Add eviction fences to this BO
- * @ef_list: [IN] List of eviction fences to be added
- * @ef_count: [IN] Number of fences in ef_list.
- *
- * NOTE: Must call amdgpu_amdkfd_remove_eviction_fence before calling this
- * function.
- */
-static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo,
- struct amdgpu_amdkfd_fence **ef_list,
- unsigned int ef_count)
+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
{
- int i;
+ struct amdgpu_bo *root = bo;
+ struct amdgpu_vm_bo_base *vm_bo;
+ struct amdgpu_vm *vm;
+ struct amdkfd_process_info *info;
+ struct amdgpu_amdkfd_fence *ef;
+ int ret;
- if (!ef_list || !ef_count)
- return;
+ /* we can always get vm_bo from root PD bo.*/
+ while (root->parent)
+ root = root->parent;
- for (i = 0; i < ef_count; i++) {
- amdgpu_bo_fence(bo, &ef_list[i]->base, true);
- /* Re-adding the fence takes an additional reference. Drop that
- * reference.
- */
- dma_fence_put(&ef_list[i]->base);
- }
+ vm_bo = root->vm_bo;
+ if (!vm_bo)
+ return 0;
+
+ vm = vm_bo->vm;
+ if (!vm)
+ return 0;
+
+ info = vm->process_info;
+ if (!info || !info->eviction_fence)
+ return 0;
+
+ ef = container_of(dma_fence_get(&info->eviction_fence->base),
+ struct amdgpu_amdkfd_fence, base);
+
+ BUG_ON(!dma_resv_trylock(bo->tbo.base.resv));
+ ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef);
+ dma_resv_unlock(bo->tbo.base.resv);
- kfree(ef_list);
+ dma_fence_put(&ef->base);
+ return ret;
}
static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
@@ -328,18 +328,8 @@ static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (ret)
goto validate_fail;
- if (wait) {
- struct amdgpu_amdkfd_fence **ef_list;
- unsigned int ef_count;
-
- ret = amdgpu_amdkfd_remove_eviction_fence(bo, NULL, &ef_list,
- &ef_count);
- if (ret)
- goto validate_fail;
-
- ttm_bo_wait(&bo->tbo, false, false);
- amdgpu_amdkfd_add_eviction_fence(bo, ef_list, ef_count);
- }
+ if (wait)
+ amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
validate_fail:
return ret;
@@ -364,7 +354,6 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
struct amdgpu_bo *pd = vm->root.base.bo;
struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
struct amdgpu_vm_parser param;
- uint64_t addr, flags = AMDGPU_PTE_VALID;
int ret;
param.domain = AMDGPU_GEM_DOMAIN_VRAM;
@@ -383,9 +372,7 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
return ret;
}
- addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
- amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags);
- vm->pd_phys_addr = addr;
+ vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
if (vm->use_cpu_for_update) {
ret = amdgpu_bo_kmap(pd, NULL);
@@ -398,34 +385,50 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
return 0;
}
-static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
- struct dma_fence *f)
-{
- int ret = amdgpu_sync_fence(adev, sync, f, false);
-
- /* Sync objects can't handle multiple GPUs (contexts) updating
- * sync->last_vm_update. Fortunately we don't need it for
- * KFD's purposes, so we can just drop that fence.
- */
- if (sync->last_vm_update) {
- dma_fence_put(sync->last_vm_update);
- sync->last_vm_update = NULL;
- }
-
- return ret;
-}
-
static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
{
struct amdgpu_bo *pd = vm->root.base.bo;
struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
int ret;
- ret = amdgpu_vm_update_directories(adev, vm);
+ ret = amdgpu_vm_update_pdes(adev, vm, false);
if (ret)
return ret;
- return sync_vm_fence(adev, sync, vm->last_update);
+ return amdgpu_sync_fence(sync, vm->last_update, false);
+}
+
+static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
+{
+ struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
+ bool coherent = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
+ uint32_t mapping_flags;
+
+ mapping_flags = AMDGPU_VM_PAGE_READABLE;
+ if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE)
+ mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
+ if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE)
+ mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
+
+ switch (adev->asic_type) {
+ case CHIP_ARCTURUS:
+ if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+ if (bo_adev == adev)
+ mapping_flags |= coherent ?
+ AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
+ else
+ mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ } else {
+ mapping_flags |= coherent ?
+ AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+ }
+ break;
+ default:
+ mapping_flags |= coherent ?
+ AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+ }
+
+ return amdgpu_gem_va_map_flags(adev, mapping_flags);
}
/* add_bo_to_vm - Add a BO to a VM
@@ -446,7 +449,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
{
int ret;
struct kfd_bo_va_list *bo_va_entry;
- struct amdgpu_bo *pd = vm->root.base.bo;
struct amdgpu_bo *bo = mem->bo;
uint64_t va = mem->va;
struct list_head *list_bo_va = &mem->bo_va_list;
@@ -477,42 +479,23 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
}
bo_va_entry->va = va;
- bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev,
- mem->mapping_flags);
+ bo_va_entry->pte_flags = get_pte_flags(adev, mem);
bo_va_entry->kgd_dev = (void *)adev;
list_add(&bo_va_entry->bo_list, list_bo_va);
if (p_bo_va_entry)
*p_bo_va_entry = bo_va_entry;
- /* Allocate new page tables if needed and validate
- * them. Clearing of new page tables and validate need to wait
- * on move fences. We don't want that to trigger the eviction
- * fence, so remove it temporarily.
- */
- amdgpu_amdkfd_remove_eviction_fence(pd,
- vm->process_info->eviction_fence,
- NULL, NULL);
-
- ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
- if (ret) {
- pr_err("Failed to allocate pts, err=%d\n", ret);
- goto err_alloc_pts;
- }
-
+ /* Allocate validate page tables if needed */
ret = vm_validate_pt_pd_bos(vm);
if (ret) {
pr_err("validate_pt_pd_bos() failed\n");
goto err_alloc_pts;
}
- /* Add the eviction fence back */
- amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
-
return 0;
err_alloc_pts:
- amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va);
list_del(&bo_va_entry->bo_list);
err_vmadd:
@@ -539,7 +522,7 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
struct amdgpu_bo *bo = mem->bo;
INIT_LIST_HEAD(&entry->head);
- entry->shared = true;
+ entry->num_shared = 1;
entry->bo = &bo->tbo;
mutex_lock(&process_info->lock);
if (userptr)
@@ -549,6 +532,17 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
mutex_unlock(&process_info->lock);
}
+static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
+ struct amdkfd_process_info *process_info)
+{
+ struct ttm_validate_buffer *bo_list_entry;
+
+ bo_list_entry = &mem->validate_list;
+ mutex_lock(&process_info->lock);
+ list_del(&bo_list_entry->head);
+ mutex_unlock(&process_info->lock);
+}
+
/* Initializes user pages. It registers the MMU notifier and validates
* the userptr BO in the GTT domain.
*
@@ -561,8 +555,7 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
*
* Returns 0 for success, negative errno for errors.
*/
-static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
- uint64_t user_addr)
+static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
{
struct amdkfd_process_info *process_info = mem->process_info;
struct amdgpu_bo *bo = mem->bo;
@@ -584,28 +577,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
goto out;
}
- /* If no restore worker is running concurrently, user_pages
- * should not be allocated
- */
- WARN(mem->user_pages, "Leaking user_pages array");
-
- mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
- sizeof(struct vm_page *),
- GFP_KERNEL | __GFP_ZERO);
- if (!mem->user_pages) {
- pr_err("%s: Failed to allocate pages array\n", __func__);
- ret = -ENOMEM;
- goto unregister_out;
- }
-
- ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
+ ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
if (ret) {
pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
- goto free_out;
+ goto unregister_out;
}
- amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
-
ret = amdgpu_bo_reserve(bo, true);
if (ret) {
pr_err("%s: Failed to reserve BO\n", __func__);
@@ -618,11 +595,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
amdgpu_bo_unreserve(bo);
release_out:
- if (ret)
- release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
-free_out:
- kvfree(mem->user_pages);
- mem->user_pages = NULL;
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
unregister_out:
if (ret)
amdgpu_mn_unregister(bo);
@@ -678,11 +651,9 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
if (!ctx->vm_pd)
return -ENOMEM;
- ctx->kfd_bo.robj = bo;
ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = &bo->tbo;
- ctx->kfd_bo.tv.shared = true;
- ctx->kfd_bo.user_pages = NULL;
+ ctx->kfd_bo.tv.num_shared = 1;
list_add(&ctx->kfd_bo.tv.head, &ctx->list);
amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
@@ -743,11 +714,9 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
return -ENOMEM;
}
- ctx->kfd_bo.robj = bo;
ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = &bo->tbo;
- ctx->kfd_bo.tv.shared = true;
- ctx->kfd_bo.user_pages = NULL;
+ ctx->kfd_bo.tv.num_shared = 1;
list_add(&ctx->kfd_bo.tv.head, &ctx->list);
i = 0;
@@ -813,25 +782,12 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
{
struct amdgpu_bo_va *bo_va = entry->bo_va;
struct amdgpu_vm *vm = bo_va->base.vm;
- struct amdgpu_bo *pd = vm->root.base.bo;
- /* Remove eviction fence from PD (and thereby from PTs too as
- * they share the resv. object). Otherwise during PT update
- * job (see amdgpu_vm_bo_update_mapping), eviction fence would
- * get added to job->sync object and job execution would
- * trigger the eviction fence.
- */
- amdgpu_amdkfd_remove_eviction_fence(pd,
- vm->process_info->eviction_fence,
- NULL, NULL);
amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
- /* Add the eviction fence back */
- amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
-
- sync_vm_fence(adev, sync, bo_va->last_pt_update);
+ amdgpu_sync_fence(sync, bo_va->last_pt_update, false);
return 0;
}
@@ -841,13 +797,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
struct amdgpu_sync *sync)
{
int ret;
- struct amdgpu_vm *vm;
- struct amdgpu_bo_va *bo_va;
- struct amdgpu_bo *bo;
-
- bo_va = entry->bo_va;
- vm = bo_va->base.vm;
- bo = bo_va->base.bo;
+ struct amdgpu_bo_va *bo_va = entry->bo_va;
/* Update the page tables */
ret = amdgpu_vm_bo_update(adev, bo_va, false);
@@ -856,7 +806,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
return ret;
}
- return sync_vm_fence(adev, sync, bo_va->last_pt_update);
+ return amdgpu_sync_fence(sync, bo_va->last_pt_update, false);
}
static int map_bo_to_gpuvm(struct amdgpu_device *adev,
@@ -891,6 +841,24 @@ update_gpuvm_pte_failed:
return ret;
}
+static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size)
+{
+ struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
+
+ if (!sg)
+ return NULL;
+ if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
+ kfree(sg);
+ return NULL;
+ }
+ sg->sgl->dma_address = addr;
+ sg->sgl->length = size;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ sg->sgl->dma_length = size;
+#endif
+ return sg;
+}
+
static int process_validate_vms(struct amdkfd_process_info *process_info)
{
struct amdgpu_vm *peer_vm;
@@ -906,6 +874,26 @@ static int process_validate_vms(struct amdkfd_process_info *process_info)
return 0;
}
+static int process_sync_pds_resv(struct amdkfd_process_info *process_info,
+ struct amdgpu_sync *sync)
+{
+ struct amdgpu_vm *peer_vm;
+ int ret;
+
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node) {
+ struct amdgpu_bo *pd = peer_vm->root.base.bo;
+
+ ret = amdgpu_sync_resv(NULL, sync, pd->tbo.base.resv,
+ AMDGPU_SYNC_NE_OWNER,
+ AMDGPU_FENCE_OWNER_KFD);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
static int process_update_pds(struct amdkfd_process_info *process_info,
struct amdgpu_sync *sync)
{
@@ -968,9 +956,13 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
pr_err("validate_pt_pd_bos() failed\n");
goto validate_pd_fail;
}
- ret = ttm_bo_wait(&vm->root.base.bo->tbo, false, false);
+ ret = amdgpu_bo_sync_wait(vm->root.base.bo,
+ AMDGPU_FENCE_OWNER_KFD, false);
if (ret)
goto wait_pd_fail;
+ ret = dma_resv_reserve_shared(vm->root.base.bo->tbo.base.resv, 1);
+ if (ret)
+ goto reserve_shared_fail;
amdgpu_bo_fence(vm->root.base.bo,
&vm->process_info->eviction_fence->base, true);
amdgpu_bo_unreserve(vm->root.base.bo);
@@ -984,6 +976,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
return 0;
+reserve_shared_fail:
wait_pd_fail:
validate_pd_fail:
amdgpu_bo_unreserve(vm->root.base.bo);
@@ -1003,8 +996,8 @@ create_evict_fence_fail:
return ret;
}
-int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
- void **process_info,
+int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, unsigned int pasid,
+ void **vm, void **process_info,
struct dma_fence **ef)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
@@ -1016,7 +1009,7 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
return -ENOMEM;
/* Initialize AMDGPU part of the VM */
- ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, 0);
+ ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, pasid);
if (ret) {
pr_err("Failed init vm ret %d\n", ret);
goto amdgpu_vm_init_fail;
@@ -1039,7 +1032,7 @@ amdgpu_vm_init_fail:
}
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
- struct file *filp,
+ struct file *filp, unsigned int pasid,
void **vm, void **process_info,
struct dma_fence **ef)
{
@@ -1054,7 +1047,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
return -EINVAL;
/* Convert VM into a compute VM */
- ret = amdgpu_vm_make_compute(adev, avm);
+ ret = amdgpu_vm_make_compute(adev, avm, pasid);
if (ret)
return ret;
@@ -1088,6 +1081,8 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
list_del(&vm->vm_list_node);
mutex_unlock(&process_info->lock);
+ vm->process_info = NULL;
+
/* Release per-process resources when last compute VM is destroyed */
if (!process_info->n_vms) {
WARN_ON(!list_empty(&process_info->kfd_bo_list));
@@ -1117,11 +1112,34 @@ void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
kfree(vm);
}
-uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
+void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+
+ if (WARN_ON(!kgd || !vm))
+ return;
+
+ pr_debug("Releasing process vm %p\n", vm);
+
+ /* The original pasid of amdgpu vm has already been
+ * released during making a amdgpu vm to a compute vm
+ * The current pasid is managed by kfd and will be
+ * released on kfd process destroy. Set amdgpu pasid
+ * to 0 to avoid duplicate release.
+ */
+ amdgpu_vm_release_compute(adev, avm);
+}
+
+uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
{
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+ struct amdgpu_bo *pd = avm->root.base.bo;
+ struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
- return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
+ if (adev->asic_type < CHIP_VEGA10)
+ return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
+ return avm->pd_phys_addr;
}
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
@@ -1131,44 +1149,57 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+ enum ttm_bo_type bo_type = ttm_bo_type_device;
+ struct sg_table *sg = NULL;
uint64_t user_addr = 0;
struct amdgpu_bo *bo;
struct amdgpu_bo_param bp;
- int byte_align;
u32 domain, alloc_domain;
u64 alloc_flags;
- uint32_t mapping_flags;
int ret;
/*
* Check on which domain to allocate BO
*/
- if (flags & ALLOC_MEM_FLAGS_VRAM) {
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
- alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
- alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
+ alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+ alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
- } else if (flags & ALLOC_MEM_FLAGS_GTT) {
+ } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_flags = 0;
- } else if (flags & ALLOC_MEM_FLAGS_USERPTR) {
+ } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
alloc_flags = 0;
if (!offset || !*offset)
return -EINVAL;
- user_addr = *offset;
+ user_addr = untagged_addr(*offset);
+ } else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+ KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
+ domain = AMDGPU_GEM_DOMAIN_GTT;
+ alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
+ bo_type = ttm_bo_type_sg;
+ alloc_flags = 0;
+ if (size > UINT_MAX)
+ return -EINVAL;
+ sg = create_doorbell_sg(*offset, size);
+ if (!sg)
+ return -ENOMEM;
} else {
return -EINVAL;
}
*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
- if (!*mem)
- return -ENOMEM;
+ if (!*mem) {
+ ret = -ENOMEM;
+ goto err;
+ }
INIT_LIST_HEAD(&(*mem)->bo_va_list);
- rw_init(&(*mem)->lock, "gpuvm");
- (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
+ rw_init(&(*mem)->lock, "gpuvma");
+ (*mem)->aql_queue = !!(flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
/* Workaround for AQL queue wraparound bug. Map the same
* memory twice. That means we only actually allocate half
@@ -1177,30 +1208,14 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
if ((*mem)->aql_queue)
size = size >> 1;
- /* Workaround for TLB bug on older VI chips */
- byte_align = (adev->family == AMDGPU_FAMILY_VI &&
- adev->asic_type != CHIP_FIJI &&
- adev->asic_type != CHIP_POLARIS10 &&
- adev->asic_type != CHIP_POLARIS11) ?
- VI_BO_SIZE_ALIGN : 1;
-
- mapping_flags = AMDGPU_VM_PAGE_READABLE;
- if (flags & ALLOC_MEM_FLAGS_WRITABLE)
- mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
- if (flags & ALLOC_MEM_FLAGS_EXECUTABLE)
- mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
- if (flags & ALLOC_MEM_FLAGS_COHERENT)
- mapping_flags |= AMDGPU_VM_MTYPE_UC;
- else
- mapping_flags |= AMDGPU_VM_MTYPE_NC;
- (*mem)->mapping_flags = mapping_flags;
+ (*mem)->alloc_flags = flags;
amdgpu_sync_create(&(*mem)->sync);
- ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain);
+ ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg);
if (ret) {
pr_debug("Insufficient system memory\n");
- goto err_reserve_system_mem;
+ goto err_reserve_limit;
}
pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
@@ -1208,10 +1223,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
memset(&bp, 0, sizeof(bp));
bp.size = size;
- bp.byte_align = byte_align;
+ bp.byte_align = 1;
bp.domain = alloc_domain;
bp.flags = alloc_flags;
- bp.type = ttm_bo_type_device;
+ bp.type = bo_type;
bp.resv = NULL;
ret = amdgpu_bo_create(adev, &bp, &bo);
if (ret) {
@@ -1219,6 +1234,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
domain_string(alloc_domain), ret);
goto err_bo_create;
}
+ if (bo_type == ttm_bo_type_sg) {
+ bo->tbo.sg = sg;
+ bo->tbo.ttm->sg = sg;
+ }
bo->kfd_bo = *mem;
(*mem)->bo = bo;
if (user_addr)
@@ -1231,13 +1250,9 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
if (user_addr) {
- ret = init_user_pages(*mem, current->mm, user_addr);
- if (ret) {
- mutex_lock(&avm->process_info->lock);
- list_del(&(*mem)->validate_list.head);
- mutex_unlock(&avm->process_info->lock);
+ ret = init_user_pages(*mem, user_addr);
+ if (ret)
goto allocate_init_user_pages_failed;
- }
}
if (offset)
@@ -1246,14 +1261,20 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
return 0;
allocate_init_user_pages_failed:
+ remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
amdgpu_bo_unref(&bo);
/* Don't unreserve system mem limit twice */
- goto err_reserve_system_mem;
+ goto err_reserve_limit;
err_bo_create:
- unreserve_system_mem_limit(adev, size, alloc_domain);
-err_reserve_system_mem:
+ unreserve_mem_limit(adev, size, alloc_domain, !!sg);
+err_reserve_limit:
mutex_destroy(&(*mem)->lock);
kfree(*mem);
+err:
+ if (sg) {
+ sg_free_table(sg);
+ kfree(sg);
+ }
return ret;
}
@@ -1290,15 +1311,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
list_del(&bo_list_entry->head);
mutex_unlock(&process_info->lock);
- /* Free user pages if necessary */
- if (mem->user_pages) {
- pr_debug("%s: Freeing user_pages array\n", __func__);
- if (mem->user_pages[0])
- release_pages(mem->user_pages,
- mem->bo->tbo.ttm->num_pages);
- kvfree(mem->user_pages);
- }
-
ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
if (unlikely(ret))
return ret;
@@ -1308,8 +1320,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
* attached
*/
amdgpu_amdkfd_remove_eviction_fence(mem->bo,
- process_info->eviction_fence,
- NULL, NULL);
+ process_info->eviction_fence);
pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
mem->va + bo_size * (1 + mem->aql_queue));
@@ -1323,8 +1334,16 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
/* Free the sync object */
amdgpu_sync_free(&mem->sync);
+ /* If the SG is not NULL, it's one we created for a doorbell or mmio
+ * remap BO. We need to free it.
+ */
+ if (mem->bo->tbo.sg) {
+ sg_free_table(mem->bo->tbo.sg);
+ kfree(mem->bo->tbo.sg);
+ }
+
/* Free the BO*/
- amdgpu_bo_unref(&mem->bo);
+ drm_gem_object_put_unlocked(&mem->bo->tbo.base);
mutex_destroy(&mem->lock);
kfree(mem);
@@ -1387,7 +1406,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
* the queues are still stopped and we can leave mapping for
* the next restore worker
*/
- if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) &&
+ bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
is_invalid_userptr = true;
if (check_if_add_bo_to_vm(avm, mem)) {
@@ -1429,7 +1449,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
is_invalid_userptr);
if (ret) {
- pr_err("Failed to map radeon bo to gpuvm\n");
+ pr_err("Failed to map bo to gpuvm\n");
goto map_bo_to_gpuvm_failed;
}
@@ -1527,8 +1547,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
if (mem->mapped_to_gpu_memory == 0 &&
!amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count)
amdgpu_amdkfd_remove_eviction_fence(mem->bo,
- process_info->eviction_fence,
- NULL, NULL);
+ process_info->eviction_fence);
unreserve_out:
unreserve_bo_and_vms(&ctx, false, false);
@@ -1589,7 +1608,7 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
}
amdgpu_amdkfd_remove_eviction_fence(
- bo, mem->process_info->eviction_fence, NULL, NULL);
+ bo, mem->process_info->eviction_fence);
list_del_init(&mem->validate_list.head);
if (size)
@@ -1624,6 +1643,64 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
return 0;
}
+int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
+ struct dma_buf *dma_buf,
+ uint64_t va, void *vm,
+ struct kgd_mem **mem, uint64_t *size,
+ uint64_t *mmap_offset)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+ struct drm_gem_object *obj;
+ struct amdgpu_bo *bo;
+ struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+
+ if (dma_buf->ops != &amdgpu_dmabuf_ops)
+ /* Can't handle non-graphics buffers */
+ return -EINVAL;
+
+ obj = dma_buf->priv;
+ if (obj->dev->dev_private != adev)
+ /* Can't handle buffers from other devices */
+ return -EINVAL;
+
+ bo = gem_to_amdgpu_bo(obj);
+ if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT)))
+ /* Only VRAM and GTT BOs are supported */
+ return -EINVAL;
+
+ *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+ if (!*mem)
+ return -ENOMEM;
+
+ if (size)
+ *size = amdgpu_bo_size(bo);
+
+ if (mmap_offset)
+ *mmap_offset = amdgpu_bo_mmap_offset(bo);
+
+ INIT_LIST_HEAD(&(*mem)->bo_va_list);
+ rw_init(&(*mem)->lock, "gpuvmi");
+
+ (*mem)->alloc_flags =
+ ((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
+ KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT)
+ | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
+ | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
+
+ drm_gem_object_get(&bo->tbo.base);
+ (*mem)->bo = bo;
+ (*mem)->va = va;
+ (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
+ AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
+ (*mem)->mapped_to_gpu_memory = 0;
+ (*mem)->process_info = avm->process_info;
+ add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
+ amdgpu_sync_create(&(*mem)->sync);
+
+ return 0;
+}
+
/* Evict a userptr BO by stopping the queues if necessary
*
* Runs in MMU notifier, may be in RECLAIM_FS context. This means it
@@ -1639,14 +1716,14 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
struct mm_struct *mm)
{
struct amdkfd_process_info *process_info = mem->process_info;
- int invalid, evicted_bos;
+ int evicted_bos;
int r = 0;
- invalid = atomic_inc_return(&mem->invalid);
+ atomic_inc(&mem->invalid);
evicted_bos = atomic_inc_return(&process_info->evicted_bos);
if (evicted_bos == 1) {
/* First eviction, stop the queues */
- r = kgd2kfd->quiesce_mm(mm);
+ r = kgd2kfd_quiesce_mm(mm);
if (r)
pr_err("Failed to quiesce KFD\n");
schedule_delayed_work(&process_info->restore_userptr_work,
@@ -1711,36 +1788,24 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
bo = mem->bo;
- if (!mem->user_pages) {
- mem->user_pages =
- kvmalloc_array(bo->tbo.ttm->num_pages,
- sizeof(struct vm_page *),
- GFP_KERNEL | __GFP_ZERO);
- if (!mem->user_pages) {
- pr_err("%s: Failed to allocate pages array\n",
- __func__);
- return -ENOMEM;
- }
- } else if (mem->user_pages[0]) {
- release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
- }
-
/* Get updated user pages */
- ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
- mem->user_pages);
+ ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
if (ret) {
- mem->user_pages[0] = NULL;
- pr_info("%s: Failed to get user pages: %d\n",
+ pr_debug("%s: Failed to get user pages: %d\n",
__func__, ret);
- /* Pretend it succeeded. It will fail later
- * with a VM fault if the GPU tries to access
- * it. Better than hanging indefinitely with
- * stalled user mode queues.
- */
+
+ /* Return error -EBUSY or -ENOMEM, retry restore */
+ return ret;
}
+ /*
+ * FIXME: Cannot ignore the return code, must hold
+ * notifier_lock
+ */
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+
/* Mark the BO as valid unless it was invalidated
- * again concurrently
+ * again concurrently.
*/
if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
return -EAGAIN;
@@ -1773,7 +1838,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
GFP_KERNEL);
if (!pd_bo_list_entries) {
pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out_no_mem;
}
INIT_LIST_HEAD(&resv_list);
@@ -1790,27 +1856,17 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
validate_list.head) {
list_add_tail(&mem->resv_list.head, &resv_list);
mem->resv_list.bo = mem->validate_list.bo;
- mem->resv_list.shared = mem->validate_list.shared;
+ mem->resv_list.num_shared = mem->validate_list.num_shared;
}
/* Reserve all BOs and page tables for validation */
ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
WARN(!list_empty(&duplicates), "Duplicates should be empty");
if (ret)
- goto out;
+ goto out_free;
amdgpu_sync_create(&sync);
- /* Avoid triggering eviction fences when unmapping invalid
- * userptr BOs (waits for all fences, doesn't use
- * FENCE_OWNER_VM)
- */
- list_for_each_entry(peer_vm, &process_info->vm_list_head,
- vm_list_node)
- amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo,
- process_info->eviction_fence,
- NULL, NULL);
-
ret = process_validate_vms(process_info);
if (ret)
goto unreserve_out;
@@ -1823,10 +1879,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
bo = mem->bo;
- /* Copy pages array and validate the BO if we got user pages */
- if (mem->user_pages[0]) {
- amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
- mem->user_pages);
+ /* Validate the BO if we got user pages */
+ if (bo->tbo.ttm->pages[0]) {
amdgpu_bo_placement_from_domain(bo, mem->domain);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (ret) {
@@ -1835,13 +1889,6 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
}
}
- /* Validate succeeded, now the BO owns the pages, free
- * our copy of the pointer array. Put this BO back on
- * the userptr_valid_list. If we need to revalidate
- * it, we need to start from scratch.
- */
- kvfree(mem->user_pages);
- mem->user_pages = NULL;
list_move_tail(&mem->validate_list.head,
&process_info->userptr_valid_list);
@@ -1871,15 +1918,12 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
ret = process_update_pds(process_info, &sync);
unreserve_out:
- list_for_each_entry(peer_vm, &process_info->vm_list_head,
- vm_list_node)
- amdgpu_bo_fence(peer_vm->root.base.bo,
- &process_info->eviction_fence->base, true);
ttm_eu_backoff_reservation(&ticket, &resv_list);
amdgpu_sync_wait(&sync, false);
amdgpu_sync_free(&sync);
-out:
+out_free:
kfree(pd_bo_list_entries);
+out_no_mem:
return ret;
}
@@ -1938,12 +1982,13 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
evicted_bos)
goto unlock_out;
evicted_bos = 0;
- if (kgd2kfd->resume_mm(mm)) {
+ if (kgd2kfd_resume_mm(mm)) {
pr_err("%s: Failed to resume KFD\n", __func__);
/* No recovery from this failure. Probably the CP is
* hanging. No point trying again.
*/
}
+
unlock_out:
mutex_unlock(&process_info->lock);
mmput(mm);
@@ -2009,7 +2054,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
list_add_tail(&mem->resv_list.head, &ctx.list);
mem->resv_list.bo = mem->validate_list.bo;
- mem->resv_list.shared = mem->validate_list.shared;
+ mem->resv_list.num_shared = mem->validate_list.num_shared;
}
ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
@@ -2026,13 +2071,10 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
if (ret)
goto validate_map_fail;
- /* Wait for PD/PTs validate to finish */
- /* FIXME: I think this isn't needed */
- list_for_each_entry(peer_vm, &process_info->vm_list_head,
- vm_list_node) {
- struct amdgpu_bo *bo = peer_vm->root.base.bo;
-
- ttm_bo_wait(&bo->tbo, false, false);
+ ret = process_sync_pds_resv(process_info, &sync_obj);
+ if (ret) {
+ pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
+ goto validate_map_fail;
}
/* Validate BOs and map them to GPUVM (update VM page tables). */
@@ -2048,7 +2090,11 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
pr_debug("Memory eviction: Validate BOs failed. Try again\n");
goto validate_map_fail;
}
-
+ ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving, false);
+ if (ret) {
+ pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
+ goto validate_map_fail;
+ }
list_for_each_entry(bo_va_entry, &mem->bo_va_list,
bo_list) {
ret = update_gpuvm_pte((struct amdgpu_device *)
@@ -2069,6 +2115,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
goto validate_map_fail;
}
+ /* Wait for validate and PT updates to finish */
amdgpu_sync_wait(&sync_obj, false);
/* Release old eviction fence and create new one, because fence only
@@ -2087,10 +2134,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
process_info->eviction_fence = new_fence;
*ef = dma_fence_get(&new_fence->base);
- /* Wait for validate to finish and attach new eviction fence */
- list_for_each_entry(mem, &process_info->kfd_bo_list,
- validate_list.head)
- ttm_bo_wait(&mem->bo->tbo, false, false);
+ /* Attach new eviction fence to all BOs */
list_for_each_entry(mem, &process_info->kfd_bo_list,
validate_list.head)
amdgpu_bo_fence(mem->bo,
@@ -2112,3 +2156,115 @@ ttm_reserve_fail:
kfree(pd_bo_list);
return ret;
}
+
+int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem)
+{
+ struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
+ struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws;
+ int ret;
+
+ if (!info || !gws)
+ return -EINVAL;
+
+ *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+ if (!*mem)
+ return -ENOMEM;
+
+ rw_init(&(*mem)->lock, "aggws");
+ INIT_LIST_HEAD(&(*mem)->bo_va_list);
+ (*mem)->bo = amdgpu_bo_ref(gws_bo);
+ (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
+ (*mem)->process_info = process_info;
+ add_kgd_mem_to_kfd_bo_list(*mem, process_info, false);
+ amdgpu_sync_create(&(*mem)->sync);
+
+
+ /* Validate gws bo the first time it is added to process */
+ mutex_lock(&(*mem)->process_info->lock);
+ ret = amdgpu_bo_reserve(gws_bo, false);
+ if (unlikely(ret)) {
+ pr_err("Reserve gws bo failed %d\n", ret);
+ goto bo_reservation_failure;
+ }
+
+ ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true);
+ if (ret) {
+ pr_err("GWS BO validate failed %d\n", ret);
+ goto bo_validation_failure;
+ }
+ /* GWS resource is shared b/t amdgpu and amdkfd
+ * Add process eviction fence to bo so they can
+ * evict each other.
+ */
+ ret = dma_resv_reserve_shared(gws_bo->tbo.base.resv, 1);
+ if (ret)
+ goto reserve_shared_fail;
+ amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true);
+ amdgpu_bo_unreserve(gws_bo);
+ mutex_unlock(&(*mem)->process_info->lock);
+
+ return ret;
+
+reserve_shared_fail:
+bo_validation_failure:
+ amdgpu_bo_unreserve(gws_bo);
+bo_reservation_failure:
+ mutex_unlock(&(*mem)->process_info->lock);
+ amdgpu_sync_free(&(*mem)->sync);
+ remove_kgd_mem_from_kfd_bo_list(*mem, process_info);
+ amdgpu_bo_unref(&gws_bo);
+ mutex_destroy(&(*mem)->lock);
+ kfree(*mem);
+ *mem = NULL;
+ return ret;
+}
+
+int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)
+{
+ int ret;
+ struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
+ struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
+ struct amdgpu_bo *gws_bo = kgd_mem->bo;
+
+ /* Remove BO from process's validate list so restore worker won't touch
+ * it anymore
+ */
+ remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info);
+
+ ret = amdgpu_bo_reserve(gws_bo, false);
+ if (unlikely(ret)) {
+ pr_err("Reserve gws bo failed %d\n", ret);
+ //TODO add BO back to validate_list?
+ return ret;
+ }
+ amdgpu_amdkfd_remove_eviction_fence(gws_bo,
+ process_info->eviction_fence);
+ amdgpu_bo_unreserve(gws_bo);
+ amdgpu_sync_free(&kgd_mem->sync);
+ amdgpu_bo_unref(&gws_bo);
+ mutex_destroy(&kgd_mem->lock);
+ kfree(mem);
+ return 0;
+}
+
+/* Returns GPU-specific tiling mode information */
+int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
+ struct tile_config *config)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+ config->gb_addr_config = adev->gfx.config.gb_addr_config;
+ config->tile_config_ptr = adev->gfx.config.tile_mode_array;
+ config->num_tile_configs =
+ ARRAY_SIZE(adev->gfx.config.tile_mode_array);
+ config->macro_tile_config_ptr =
+ adev->gfx.config.macrotile_mode_array;
+ config->num_macro_tile_configs =
+ ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
+
+ /* Those values are not set from GFX9 onwards */
+ config->num_banks = adev->gfx.config.num_banks;
+ config->num_ranks = adev->gfx.config.num_ranks;
+
+ return 0;
+}