diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_migrate.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 231 |
1 files changed, 149 insertions, 82 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index ed5385137f48..22b077ac9a19 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -24,6 +24,7 @@ #include <linux/hmm.h> #include <linux/dma-direction.h> #include <linux/dma-mapping.h> +#include <linux/migrate.h> #include "amdgpu_sync.h" #include "amdgpu_object.h" #include "amdgpu_vm.h" @@ -32,11 +33,12 @@ #include "kfd_priv.h" #include "kfd_svm.h" #include "kfd_migrate.h" +#include "kfd_smi_events.h" #ifdef dev_fmt #undef dev_fmt #endif -#define dev_fmt(fmt) "kfd_migrate: %s: " fmt, __func__ +#define dev_fmt(fmt) "kfd_migrate: " fmt static uint64_t svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr) @@ -86,10 +88,7 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages, cpu_addr = &job->ibs[0].ptr[num_dw]; - r = amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr); - if (r) - goto error_free; - + amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr); r = amdgpu_job_submit(job, &adev->mman.entity, AMDGPU_FENCE_OWNER_UNDEFINED, &fence); if (r) @@ -224,8 +223,7 @@ svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn) page = pfn_to_page(pfn); svm_range_bo_ref(prange->svm_bo); page->zone_device_data = prange->svm_bo; - get_page(page); - lock_page(page); + zone_device_page_init(page); } static void @@ -299,7 +297,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, struct migrate_vma *migrate, struct dma_fence **mfence, dma_addr_t *scratch) { - uint64_t npages = migrate->cpages; + uint64_t npages = migrate->npages; struct device *dev = adev->dev; struct amdgpu_res_cursor cursor; dma_addr_t *src; @@ -315,7 +313,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, r = svm_range_vram_node_new(adev, prange, true); if (r) { - dev_err(adev->dev, "fail %d to alloc vram\n", r); + dev_dbg(adev->dev, "fail %d to alloc vram\n", r); goto out; } @@ -324,17 +322,19 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, for (i = j = 0; i < npages; i++) { struct page *spage; + dst[i] = cursor.start + (j << PAGE_SHIFT); + migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]); + svm_migrate_get_vram_page(prange, migrate->dst[i]); + migrate->dst[i] = migrate_pfn(migrate->dst[i]); + spage = migrate_pfn_to_page(migrate->src[i]); if (spage && !is_zone_device_page(spage)) { - dst[i] = cursor.start + (j << PAGE_SHIFT); - migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]); - svm_migrate_get_vram_page(prange, migrate->dst[i]); - migrate->dst[i] = migrate_pfn(migrate->dst[i]); src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE, DMA_TO_DEVICE); r = dma_mapping_error(dev, src[i]); if (r) { - dev_err(adev->dev, "fail %d dma_map_page\n", r); + dev_err(adev->dev, "%s: fail %d dma_map_page\n", + __func__, r); goto out_free_vram_pages; } } else { @@ -346,7 +346,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, mfence); if (r) goto out_free_vram_pages; - amdgpu_res_next(&cursor, j << PAGE_SHIFT); + amdgpu_res_next(&cursor, (j + 1) << PAGE_SHIFT); j = 0; } else { amdgpu_res_next(&cursor, PAGE_SIZE); @@ -365,7 +365,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, if (r) goto out_free_vram_pages; amdgpu_res_next(&cursor, (j + 1) * PAGE_SIZE); - j= 0; + j = 0; } else { j++; } @@ -404,15 +404,15 @@ out: static long svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, struct vm_area_struct *vma, uint64_t start, - uint64_t end) + uint64_t end, uint32_t trigger) { + struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms); uint64_t npages = (end - start) >> PAGE_SHIFT; struct kfd_process_device *pdd; struct dma_fence *mfence = NULL; - struct migrate_vma migrate; + struct migrate_vma migrate = { 0 }; unsigned long cpages = 0; dma_addr_t *scratch; - size_t size; void *buf; int r = -ENOMEM; @@ -423,9 +423,9 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, migrate.flags = MIGRATE_VMA_SELECT_SYSTEM; migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev); - size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t); - size *= npages; - buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO); + buf = kvcalloc(npages, + 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t), + GFP_KERNEL); if (!buf) goto out; @@ -433,10 +433,15 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, migrate.dst = migrate.src + npages; scratch = (dma_addr_t *)(migrate.dst + npages); + kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid, + start >> PAGE_SHIFT, end >> PAGE_SHIFT, + 0, adev->kfd.dev->id, prange->prefetch_loc, + prange->preferred_loc, trigger); + r = migrate_vma_setup(&migrate); if (r) { - dev_err(adev->dev, "vma setup fail %d range [0x%lx 0x%lx]\n", r, - prange->start, prange->last); + dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n", + __func__, r, prange->start, prange->last); goto out_free; } @@ -461,6 +466,10 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, svm_migrate_copy_done(adev, mfence); migrate_vma_finalize(&migrate); + kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid, + start >> PAGE_SHIFT, end >> PAGE_SHIFT, + 0, adev->kfd.dev->id, trigger); + svm_range_dma_unmap(adev->dev, scratch, 0, npages); svm_range_free_dma_mappings(prange); @@ -482,6 +491,7 @@ out: * @prange: range structure * @best_loc: the device to migrate to * @mm: the process mm structure + * @trigger: reason of migration * * Context: Process context, caller hold mmap read lock, svms lock, prange lock * @@ -490,7 +500,7 @@ out: */ static int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, - struct mm_struct *mm) + struct mm_struct *mm, uint32_t trigger) { unsigned long addr, start, end; struct vm_area_struct *vma; @@ -513,9 +523,6 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms, prange->start, prange->last, best_loc); - /* FIXME: workaround for page locking bug with invalid pages */ - svm_range_prefault(prange, mm, SVM_ADEV_PGMAP_OWNER(adev)); - start = prange->start << PAGE_SHIFT; end = (prange->last + 1) << PAGE_SHIFT; @@ -527,7 +534,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, break; next = min(vma->vm_end, end); - r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next); + r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger); if (r < 0) { pr_debug("failed %ld to migrate\n", r); break; @@ -593,7 +600,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, continue; } src[i] = svm_migrate_addr(adev, spage); - if (i > 0 && src[i] != src[i - 1] + PAGE_SIZE) { + if (j > 0 && src[i] != src[i - 1] + PAGE_SIZE) { r = svm_migrate_copy_memory_gart(adev, dst + i - j, src + i - j, j, FROM_VRAM_TO_RAM, @@ -614,7 +621,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE); r = dma_mapping_error(dev, dst[i]); if (r) { - dev_err(adev->dev, "fail %d dma_map_page\n", r); + dev_err(adev->dev, "%s: fail %d dma_map_page\n", __func__, r); goto out_oom; } @@ -640,18 +647,35 @@ out_oom: return r; } +/** + * svm_migrate_vma_to_ram - migrate range inside one vma from device to system + * + * @adev: amdgpu device to migrate from + * @prange: svm range structure + * @vma: vm_area_struct that range [start, end] belongs to + * @start: range start virtual address in pages + * @end: range end virtual address in pages + * + * Context: Process context, caller hold mmap read lock, prange->migrate_mutex + * + * Return: + * 0 - success with all pages migrated + * negative values - indicate error + * positive values - partial migration, number of pages not migrated + */ static long svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, - struct vm_area_struct *vma, uint64_t start, uint64_t end) + struct vm_area_struct *vma, uint64_t start, uint64_t end, + uint32_t trigger, struct page *fault_page) { + struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms); uint64_t npages = (end - start) >> PAGE_SHIFT; unsigned long upages = npages; unsigned long cpages = 0; struct kfd_process_device *pdd; struct dma_fence *mfence = NULL; - struct migrate_vma migrate; + struct migrate_vma migrate = { 0 }; dma_addr_t *scratch; - size_t size; void *buf; int r = -ENOMEM; @@ -659,23 +683,32 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, migrate.vma = vma; migrate.start = start; migrate.end = end; - migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev); + if (adev->gmc.xgmi.connected_to_cpu) + migrate.flags = MIGRATE_VMA_SELECT_DEVICE_COHERENT; + else + migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; - size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t); - size *= npages; - buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO); + buf = kvcalloc(npages, + 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t), + GFP_KERNEL); if (!buf) goto out; migrate.src = buf; migrate.dst = migrate.src + npages; + migrate.fault_page = fault_page; scratch = (dma_addr_t *)(migrate.dst + npages); + kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid, + start >> PAGE_SHIFT, end >> PAGE_SHIFT, + adev->kfd.dev->id, 0, prange->prefetch_loc, + prange->preferred_loc, trigger); + r = migrate_vma_setup(&migrate); if (r) { - dev_err(adev->dev, "vma setup fail %d range [0x%lx 0x%lx]\n", r, - prange->start, prange->last); + dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n", + __func__, r, prange->start, prange->last); goto out_free; } @@ -702,6 +735,11 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, svm_migrate_copy_done(adev, mfence); migrate_vma_finalize(&migrate); + + kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid, + start >> PAGE_SHIFT, end >> PAGE_SHIFT, + adev->kfd.dev->id, 0, trigger); + svm_range_dma_unmap(adev->dev, scratch, 0, npages); out_free: @@ -711,8 +749,6 @@ out: pdd = svm_range_get_pdd_by_adev(prange, adev); if (pdd) WRITE_ONCE(pdd->page_out, pdd->page_out + cpages); - - return upages; } return r ? r : upages; } @@ -721,13 +757,15 @@ out: * svm_migrate_vram_to_ram - migrate svm range from device to system * @prange: range structure * @mm: process mm, use current->mm if NULL + * @trigger: reason of migration * - * Context: Process context, caller hold mmap read lock, svms lock, prange lock + * Context: Process context, caller hold mmap read lock, prange->migrate_mutex * * Return: * 0 - OK, otherwise error code */ -int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) +int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, + uint32_t trigger, struct page *fault_page) { struct amdgpu_device *adev; struct vm_area_struct *vma; @@ -761,13 +799,17 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) unsigned long next; vma = find_vma(mm, addr); - if (!vma || addr < vma->vm_start) + if (!vma || addr < vma->vm_start) { + pr_debug("failed to find vma for prange %p\n", prange); + r = -EFAULT; break; + } next = min(vma->vm_end, end); - r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next); + r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next, trigger, + fault_page); if (r < 0) { - pr_debug("failed %ld to migrate\n", r); + pr_debug("failed %ld to migrate prange %p\n", r, prange); break; } else { upages += r; @@ -775,7 +817,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) addr = next; } - if (!upages) { + if (r >= 0 && !upages) { svm_range_vram_node_free(prange); prange->actual_loc = 0; } @@ -788,6 +830,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) * @prange: range structure * @best_loc: the device to migrate to * @mm: process mm, use current->mm if NULL + * @trigger: reason of migration * * Context: Process context, caller hold mmap read lock, svms lock, prange lock * @@ -796,7 +839,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) */ static int svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, - struct mm_struct *mm) + struct mm_struct *mm, uint32_t trigger) { int r, retries = 3; @@ -808,7 +851,7 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc); do { - r = svm_migrate_vram_to_ram(prange, mm); + r = svm_migrate_vram_to_ram(prange, mm, trigger, NULL); if (r) return r; } while (prange->actual_loc && --retries); @@ -816,17 +859,17 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, if (prange->actual_loc) return -EDEADLK; - return svm_migrate_ram_to_vram(prange, best_loc, mm); + return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger); } int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, - struct mm_struct *mm) + struct mm_struct *mm, uint32_t trigger) { if (!prange->actual_loc) - return svm_migrate_ram_to_vram(prange, best_loc, mm); + return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger); else - return svm_migrate_vram_to_vram(prange, best_loc, mm); + return svm_migrate_vram_to_vram(prange, best_loc, mm, trigger); } @@ -843,7 +886,7 @@ svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) { unsigned long addr = vmf->address; - struct vm_area_struct *vma; + struct svm_range_bo *svm_bo; enum svm_work_list_ops op; struct svm_range *parent; struct svm_range *prange; @@ -851,29 +894,42 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) struct mm_struct *mm; int r = 0; - vma = vmf->vma; - mm = vma->vm_mm; + svm_bo = vmf->page->zone_device_data; + if (!svm_bo) { + pr_debug("failed get device page at addr 0x%lx\n", addr); + return VM_FAULT_SIGBUS; + } + if (!mmget_not_zero(svm_bo->eviction_fence->mm)) { + pr_debug("addr 0x%lx of process mm is destroyed\n", addr); + return VM_FAULT_SIGBUS; + } - p = kfd_lookup_process_by_mm(vma->vm_mm); + mm = svm_bo->eviction_fence->mm; + if (mm != vmf->vma->vm_mm) + pr_debug("addr 0x%lx is COW mapping in child process\n", addr); + + p = kfd_lookup_process_by_mm(mm); if (!p) { pr_debug("failed find process at fault address 0x%lx\n", addr); - return VM_FAULT_SIGBUS; + r = VM_FAULT_SIGBUS; + goto out_mmput; } if (READ_ONCE(p->svms.faulting_task) == current) { pr_debug("skipping ram migration\n"); - kfd_unref_process(p); - return 0; + r = 0; + goto out_unref_process; } - addr >>= PAGE_SHIFT; + pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr); + addr >>= PAGE_SHIFT; mutex_lock(&p->svms.lock); prange = svm_range_from_addr(&p->svms, addr, &parent); if (!prange) { - pr_debug("cannot find svm range at 0x%lx\n", addr); + pr_debug("failed get range svms 0x%p addr 0x%lx\n", &p->svms, addr); r = -EFAULT; - goto out; + goto out_unlock_svms; } mutex_lock(&parent->migrate_mutex); @@ -895,10 +951,12 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) goto out_unlock_prange; } - r = svm_migrate_vram_to_ram(prange, mm); + r = svm_migrate_vram_to_ram(prange, vmf->vma->vm_mm, + KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU, + vmf->page); if (r) - pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r, - prange, prange->start, prange->last); + pr_debug("failed %d migrate svms 0x%p range 0x%p [0x%lx 0x%lx]\n", + r, prange->svms, prange, prange->start, prange->last); /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */ if (p->xnack_enabled && parent == prange) @@ -912,12 +970,13 @@ out_unlock_prange: if (prange != parent) mutex_unlock(&prange->migrate_mutex); mutex_unlock(&parent->migrate_mutex); -out: +out_unlock_svms: mutex_unlock(&p->svms.lock); - kfd_unref_process(p); - +out_unref_process: pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr); - + kfd_unref_process(p); +out_mmput: + mmput(mm); return r ? VM_FAULT_SIGBUS : 0; } @@ -933,7 +992,7 @@ int svm_migrate_init(struct amdgpu_device *adev) { struct kfd_dev *kfddev = adev->kfd.dev; struct dev_pagemap *pgmap; - struct resource *res; + struct resource *res = NULL; unsigned long size; void *r; @@ -948,28 +1007,34 @@ int svm_migrate_init(struct amdgpu_device *adev) * should remove reserved size */ size = ALIGN(adev->gmc.real_vram_size, 2ULL << 20); - res = devm_request_free_mem_region(adev->dev, &iomem_resource, size); - if (IS_ERR(res)) - return -ENOMEM; + if (adev->gmc.xgmi.connected_to_cpu) { + pgmap->range.start = adev->gmc.aper_base; + pgmap->range.end = adev->gmc.aper_base + adev->gmc.aper_size - 1; + pgmap->type = MEMORY_DEVICE_COHERENT; + } else { + res = devm_request_free_mem_region(adev->dev, &iomem_resource, size); + if (IS_ERR(res)) + return -ENOMEM; + pgmap->range.start = res->start; + pgmap->range.end = res->end; + pgmap->type = MEMORY_DEVICE_PRIVATE; + } - pgmap->type = MEMORY_DEVICE_PRIVATE; pgmap->nr_range = 1; - pgmap->range.start = res->start; - pgmap->range.end = res->end; pgmap->ops = &svm_migrate_pgmap_ops; pgmap->owner = SVM_ADEV_PGMAP_OWNER(adev); - pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; - + pgmap->flags = 0; /* Device manager releases device-specific resources, memory region and * pgmap when driver disconnects from device. */ r = devm_memremap_pages(adev->dev, pgmap); if (IS_ERR(r)) { pr_err("failed to register HMM device memory\n"); - /* Disable SVM support capability */ pgmap->type = 0; - devm_release_mem_region(adev->dev, res->start, resource_size(res)); + if (pgmap->type == MEMORY_DEVICE_PRIVATE) + devm_release_mem_region(adev->dev, res->start, + res->end - res->start + 1); return PTR_ERR(r); } @@ -978,6 +1043,8 @@ int svm_migrate_init(struct amdgpu_device *adev) amdgpu_amdkfd_reserve_system_mem(SVM_HMM_PAGE_STRUCT_SIZE(size)); + svm_range_set_max_pages(adev); + pr_info("HMM registered %ldMB device memory\n", size >> 20); return 0; |