diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
26 files changed, 287 insertions, 784 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index 48155060a57c..61474627a32c 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -38,11 +38,9 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ $(AMDKFD_PATH)/kfd_mqd_manager_v9.o \ $(AMDKFD_PATH)/kfd_mqd_manager_v10.o \ $(AMDKFD_PATH)/kfd_kernel_queue.o \ - $(AMDKFD_PATH)/kfd_kernel_queue_cik.o \ - $(AMDKFD_PATH)/kfd_kernel_queue_vi.o \ - $(AMDKFD_PATH)/kfd_kernel_queue_v9.o \ - $(AMDKFD_PATH)/kfd_kernel_queue_v10.o \ $(AMDKFD_PATH)/kfd_packet_manager.o \ + $(AMDKFD_PATH)/kfd_packet_manager_vi.o \ + $(AMDKFD_PATH)/kfd_packet_manager_v9.o \ $(AMDKFD_PATH)/kfd_process_queue_manager.o \ $(AMDKFD_PATH)/kfd_device_queue_manager.o \ $(AMDKFD_PATH)/kfd_device_queue_manager_cik.o \ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 1544007af34a..3f0300e53727 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -42,6 +42,7 @@ static long kfd_ioctl(struct file *, unsigned int, unsigned long); static int kfd_open(struct inode *, struct file *); +static int kfd_release(struct inode *, struct file *); static int kfd_mmap(struct file *, struct vm_area_struct *); static const char kfd_dev_name[] = "kfd"; @@ -51,6 +52,7 @@ static const struct file_operations kfd_fops = { .unlocked_ioctl = kfd_ioctl, .compat_ioctl = compat_ptr_ioctl, .open = kfd_open, + .release = kfd_release, .mmap = kfd_mmap, }; @@ -124,8 +126,13 @@ static int kfd_open(struct inode *inode, struct file *filep) if (IS_ERR(process)) return PTR_ERR(process); - if (kfd_is_locked()) + if (kfd_is_locked()) { + kfd_unref_process(process); return -EAGAIN; + } + + /* filep now owns the reference returned by kfd_create_process */ + filep->private_data = process; dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n", process->pasid, process->is_32bit_user_mode); @@ -133,6 +140,16 @@ static int kfd_open(struct inode *inode, struct file *filep) return 0; } +static int kfd_release(struct inode *inode, struct file *filep) +{ + struct kfd_process *process = filep->private_data; + + if (process) + kfd_unref_process(process); + + return 0; +} + static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p, void *data) { @@ -258,6 +275,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, unsigned int queue_id; struct kfd_process_device *pdd; struct queue_properties q_properties; + uint32_t doorbell_offset_in_process = 0; memset(&q_properties, 0, sizeof(struct queue_properties)); @@ -286,7 +304,8 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, p->pasid, dev->id); - err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id); + err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, + &doorbell_offset_in_process); if (err != 0) goto err_create_queue; @@ -296,14 +315,11 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, /* Return gpu_id as doorbell offset for mmap usage */ args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL; args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id); - args->doorbell_offset <<= PAGE_SHIFT; if (KFD_IS_SOC15(dev->device_info->asic_family)) - /* On SOC15 ASICs, doorbell allocation must be - * per-device, and independent from the per-process - * queue_id. Return the doorbell offset within the - * doorbell aperture to user mode. + /* On SOC15 ASICs, include the doorbell offset within the + * process doorbell frame, which is 2 pages. */ - args->doorbell_offset |= q_properties.doorbell_off; + args->doorbell_offset |= doorbell_offset_in_process; mutex_unlock(&p->mutex); @@ -1312,10 +1328,9 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, /* MMIO is mapped through kfd device * Generate a kfd mmap offset */ - if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) { - args->mmap_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(args->gpu_id); - args->mmap_offset <<= PAGE_SHIFT; - } + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) + args->mmap_offset = KFD_MMAP_TYPE_MMIO + | KFD_MMAP_GPU_ID(args->gpu_id); return 0; @@ -1803,9 +1818,14 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg); - process = kfd_get_process(current); - if (IS_ERR(process)) { - dev_dbg(kfd_device, "no process\n"); + /* Get the process struct from the filep. Only the process + * that opened /dev/kfd can use the file descriptor. Child + * processes need to create their own KFD device context. + */ + process = filep->private_data; + if (process->lead_thread != current->group_leader) { + dev_dbg(kfd_device, "Using KFD FD in wrong process\n"); + retcode = -EBADF; goto err_i1; } @@ -1899,20 +1919,19 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) { struct kfd_process *process; struct kfd_dev *dev = NULL; - unsigned long vm_pgoff; + unsigned long mmap_offset; unsigned int gpu_id; process = kfd_get_process(current); if (IS_ERR(process)) return PTR_ERR(process); - vm_pgoff = vma->vm_pgoff; - vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff); - gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff); + mmap_offset = vma->vm_pgoff << PAGE_SHIFT; + gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset); if (gpu_id) dev = kfd_device_by_id(gpu_id); - switch (vm_pgoff & KFD_MMAP_TYPE_MASK) { + switch (mmap_offset & KFD_MMAP_TYPE_MASK) { case KFD_MMAP_TYPE_DOORBELL: if (!dev) return -ENODEV; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c index d59f2cd056c6..27bcc5b472f6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -72,11 +72,11 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, * The receive packet buff will be sitting on the Indirect Buffer * and in the PQ we put the IB packet + sync packet(s). */ - status = kq->ops.acquire_packet_buffer(kq, + status = kq_acquire_packet_buffer(kq, pq_packets_size_in_bytes / sizeof(uint32_t), &ib_packet_buff); if (status) { - pr_err("acquire_packet_buffer failed\n"); + pr_err("kq_acquire_packet_buffer failed\n"); return status; } @@ -115,7 +115,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, if (status) { pr_err("Failed to allocate GART memory\n"); - kq->ops.rollback_packet(kq); + kq_rollback_packet(kq); return status; } @@ -151,7 +151,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, rm_packet->data_lo = QUEUESTATE__ACTIVE; - kq->ops.submit_packet(kq); + kq_submit_packet(kq); /* Wait till CP writes sync code: */ status = amdkfd_fence_wait_timeout( @@ -185,7 +185,7 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) properties.type = KFD_QUEUE_TYPE_DIQ; status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL, - &properties, &qid); + &properties, &qid, NULL); if (status) { pr_err("Failed to create DIQ\n"); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c index 15c523027285..511712c2e382 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c @@ -93,7 +93,7 @@ void kfd_debugfs_init(void) kfd_debugfs_hqds_by_device, &kfd_debugfs_fops); debugfs_create_file("rls", S_IFREG | 0444, debugfs_root, kfd_debugfs_rls_by_device, &kfd_debugfs_fops); - debugfs_create_file("hang_hws", S_IFREG | 0644, debugfs_root, + debugfs_create_file("hang_hws", S_IFREG | 0200, debugfs_root, NULL, &kfd_debugfs_hang_hws_fops); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 4fa8834ce7cb..2a9e40131735 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -728,6 +728,9 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd) { if (!kfd->init_complete) return 0; + + kfd->dqm->ops.pre_reset(kfd->dqm); + kgd2kfd_suspend(kfd); kfd_signal_reset_event(kfd); @@ -742,7 +745,7 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd) int kgd2kfd_post_reset(struct kfd_dev *kfd) { - int ret, count; + int ret; if (!kfd->init_complete) return 0; @@ -750,7 +753,7 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd) ret = kfd_resume(kfd); if (ret) return ret; - count = atomic_dec_return(&kfd_locked); + atomic_dec(&kfd_locked); atomic_set(&kfd->sram_ecc_flag, 0); @@ -822,6 +825,21 @@ dqm_start_error: return err; } +static inline void kfd_queue_work(struct workqueue_struct *wq, + struct work_struct *work) +{ + int cpu, new_cpu; + + cpu = new_cpu = smp_processor_id(); + do { + new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids; + if (cpu_to_node(new_cpu) == numa_node_id()) + break; + } while (cpu != new_cpu); + + queue_work_on(new_cpu, wq, work); +} + /* This is called directly from KGD at ISR. */ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) { @@ -844,7 +862,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) patched_ihre, &is_patched) && enqueue_ih_ring_entry(kfd, is_patched ? patched_ihre : ih_ring_entry)) - queue_work(kfd->ih_wq, &kfd->interrupt_work); + kfd_queue_work(kfd->ih_wq, &kfd->interrupt_work); spin_unlock_irqrestore(&kfd->interrupt_lock, flags); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 984c2f2b24b6..80d22bf702e8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -170,7 +170,7 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) } q->properties.doorbell_off = - kfd_doorbell_id_to_offset(dev, q->process, + kfd_get_doorbell_dw_offset_in_bar(dev, q->process, q->doorbell_id); return 0; @@ -930,11 +930,11 @@ static void uninitialize(struct device_queue_manager *dqm) for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) kfree(dqm->mqd_mgrs[i]); mutex_destroy(&dqm->lock_hidden); - kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem); } static int start_nocpsch(struct device_queue_manager *dqm) { + pr_info("SW scheduler is used"); init_interrupts(dqm); if (dqm->dev->device_info->asic_family == CHIP_HAWAII) @@ -947,12 +947,19 @@ static int start_nocpsch(struct device_queue_manager *dqm) static int stop_nocpsch(struct device_queue_manager *dqm) { if (dqm->dev->device_info->asic_family == CHIP_HAWAII) - pm_uninit(&dqm->packets); + pm_uninit(&dqm->packets, false); dqm->sched_running = false; return 0; } +static void pre_reset(struct device_queue_manager *dqm) +{ + dqm_lock(dqm); + dqm->is_resetting = true; + dqm_unlock(dqm); +} + static int allocate_sdma_queue(struct device_queue_manager *dqm, struct queue *q) { @@ -1100,6 +1107,7 @@ static int start_cpsch(struct device_queue_manager *dqm) dqm_lock(dqm); /* clear hang status when driver try to start the hw scheduler */ dqm->is_hws_hang = false; + dqm->is_resetting = false; dqm->sched_running = true; execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); dqm_unlock(dqm); @@ -1107,20 +1115,24 @@ static int start_cpsch(struct device_queue_manager *dqm) return 0; fail_allocate_vidmem: fail_set_sched_resources: - pm_uninit(&dqm->packets); + pm_uninit(&dqm->packets, false); fail_packet_manager_init: return retval; } static int stop_cpsch(struct device_queue_manager *dqm) { + bool hanging; + dqm_lock(dqm); - unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); + if (!dqm->is_hws_hang) + unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); + hanging = dqm->is_hws_hang || dqm->is_resetting; dqm->sched_running = false; dqm_unlock(dqm); kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); - pm_uninit(&dqm->packets); + pm_uninit(&dqm->packets, hanging); return 0; } @@ -1225,16 +1237,18 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, list_add(&q->list, &qpd->queues_list); qpd->queue_count++; + + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + dqm->sdma_queue_count++; + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) + dqm->xgmi_sdma_queue_count++; + if (q->properties.is_active) { dqm->queue_count++; retval = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); } - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) - dqm->sdma_queue_count++; - else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) - dqm->xgmi_sdma_queue_count++; /* * Unconditionally increment this counter, regardless of the queue's * type or whether the queue is active. @@ -1352,8 +1366,17 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, /* should be timed out */ retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, queue_preemption_timeout_ms); - if (retval) + if (retval) { + pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); + dqm->is_hws_hang = true; + /* It's possible we're detecting a HWS hang in the + * middle of a GPU reset. No need to schedule another + * reset in this case. + */ + if (!dqm->is_resetting) + schedule_work(&dqm->hw_exception_work); return retval; + } pm_release_ib(&dqm->packets); dqm->active_runlist = false; @@ -1371,12 +1394,8 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, if (dqm->is_hws_hang) return -EIO; retval = unmap_queues_cpsch(dqm, filter, filter_param); - if (retval) { - pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); - dqm->is_hws_hang = true; - schedule_work(&dqm->hw_exception_work); + if (retval) return retval; - } return map_queues_cpsch(dqm); } @@ -1595,7 +1614,7 @@ static int get_wave_state(struct device_queue_manager *dqm, goto dqm_unlock; } - mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE]; + mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; if (!mqd_mgr->get_wave_state) { r = -EINVAL; @@ -1770,6 +1789,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.initialize = initialize_cpsch; dqm->ops.start = start_cpsch; dqm->ops.stop = stop_cpsch; + dqm->ops.pre_reset = pre_reset; dqm->ops.destroy_queue = destroy_queue_cpsch; dqm->ops.update_queue = update_queue; dqm->ops.register_process = register_process; @@ -1788,6 +1808,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) /* initialize dqm for no cp scheduling */ dqm->ops.start = start_nocpsch; dqm->ops.stop = stop_nocpsch; + dqm->ops.pre_reset = pre_reset; dqm->ops.create_queue = create_queue_nocpsch; dqm->ops.destroy_queue = destroy_queue_nocpsch; dqm->ops.update_queue = update_queue; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index a8c37e6da027..871d3b628d2d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -104,6 +104,7 @@ struct device_queue_manager_ops { int (*initialize)(struct device_queue_manager *dqm); int (*start)(struct device_queue_manager *dqm); int (*stop)(struct device_queue_manager *dqm); + void (*pre_reset)(struct device_queue_manager *dqm); void (*uninitialize)(struct device_queue_manager *dqm); int (*create_kernel_queue)(struct device_queue_manager *dqm, struct kernel_queue *kq, @@ -190,7 +191,6 @@ struct device_queue_manager { /* the pasid mapping for each kfd vmid */ uint16_t vmid_pasid[VMID_NUM]; uint64_t pipelines_addr; - struct kfd_mem_obj *pipeline_mem; uint64_t fence_gpu_addr; unsigned int *fence_addr; struct kfd_mem_obj *fence_mem; @@ -199,6 +199,7 @@ struct device_queue_manager { /* hw exception */ bool is_hws_hang; + bool is_resetting; struct work_struct hw_exception_work; struct kfd_mem_obj hiq_sdma_mqd; bool sched_running; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index ebe79bf00145..8e0c00b9555e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -91,7 +91,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd) kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address + doorbell_start_offset; - kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32); + kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32); kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base, kfd_doorbell_process_slice(kfd)); @@ -103,8 +103,8 @@ int kfd_doorbell_init(struct kfd_dev *kfd) pr_debug("doorbell base == 0x%08lX\n", (uintptr_t)kfd->doorbell_base); - pr_debug("doorbell_id_offset == 0x%08lX\n", - kfd->doorbell_id_offset); + pr_debug("doorbell_base_dw_offset == 0x%08lX\n", + kfd->doorbell_base_dw_offset); pr_debug("doorbell_process_limit == 0x%08lX\n", doorbell_process_limit); @@ -185,7 +185,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, * Calculating the kernel doorbell offset using the first * doorbell page. */ - *doorbell_off = kfd->doorbell_id_offset + inx; + *doorbell_off = kfd->doorbell_base_dw_offset + inx; pr_debug("Get kernel queue doorbell\n" " doorbell offset == 0x%08X\n" @@ -225,17 +225,17 @@ void write_kernel_doorbell64(void __iomem *db, u64 value) } } -unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd, +unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd, struct kfd_process *process, unsigned int doorbell_id) { /* - * doorbell_id_offset accounts for doorbells taken by KGD. + * doorbell_base_dw_offset accounts for doorbells taken by KGD. * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to * the process's doorbells. The offset returned is in dword * units regardless of the ASIC-dependent doorbell size. */ - return kfd->doorbell_id_offset + + return kfd->doorbell_base_dw_offset + process->doorbell_index * kfd_doorbell_process_slice(kfd) / sizeof(u32) + doorbell_id * kfd->device_info->doorbell_size / sizeof(u32); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 908081c85de1..1f8365575b12 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -346,7 +346,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, ret = create_signal_event(devkfd, p, ev); if (!ret) { *event_page_offset = KFD_MMAP_TYPE_EVENTS; - *event_page_offset <<= PAGE_SHIFT; *event_slot_index = ev->event_id; } break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c index 193e2835bd4d..8d871514671e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c @@ -62,9 +62,6 @@ int kfd_iommu_device_init(struct kfd_dev *kfd) struct amd_iommu_device_info iommu_info; unsigned int pasid_limit; int err; - struct kfd_topology_device *top_dev; - - top_dev = kfd_topology_device_by_id(kfd->id); if (!kfd->device_info->needs_iommu_device) return 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 11d244891393..bae706462f96 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -34,7 +34,10 @@ #define PM4_COUNT_ZERO (((1 << 15) - 1) << 16) -static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, +/* Initialize a kernel queue, including allocations of GART memory + * needed for the queue. + */ +static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev, enum kfd_queue_type type, unsigned int queue_size) { struct queue_properties prop; @@ -87,9 +90,17 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, kq->pq_kernel_addr = kq->pq->cpu_ptr; kq->pq_gpu_addr = kq->pq->gpu_addr; - retval = kq->ops_asic_specific.initialize(kq, dev, type, queue_size); - if (!retval) - goto err_eop_allocate_vidmem; + /* For CIK family asics, kq->eop_mem is not needed */ + if (dev->device_info->asic_family > CHIP_MULLINS) { + retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem); + if (retval != 0) + goto err_eop_allocate_vidmem; + + kq->eop_gpu_addr = kq->eop_mem->gpu_addr; + kq->eop_kernel_addr = kq->eop_mem->cpu_ptr; + + memset(kq->eop_kernel_addr, 0, PAGE_SIZE); + } retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel), &kq->rptr_mem); @@ -183,9 +194,10 @@ err_get_kernel_doorbell: } -static void uninitialize(struct kernel_queue *kq) +/* Uninitialize a kernel queue and free all its memory usages. */ +static void kq_uninitialize(struct kernel_queue *kq, bool hanging) { - if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ) + if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ && !hanging) kq->mqd_mgr->destroy_mqd(kq->mqd_mgr, kq->queue->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_RESET, @@ -200,14 +212,19 @@ static void uninitialize(struct kernel_queue *kq) kfd_gtt_sa_free(kq->dev, kq->rptr_mem); kfd_gtt_sa_free(kq->dev, kq->wptr_mem); - kq->ops_asic_specific.uninitialize(kq); + + /* For CIK family asics, kq->eop_mem is Null, kfd_gtt_sa_free() + * is able to handle NULL properly. + */ + kfd_gtt_sa_free(kq->dev, kq->eop_mem); + kfd_gtt_sa_free(kq->dev, kq->pq); kfd_release_kernel_doorbell(kq->dev, kq->queue->properties.doorbell_ptr); uninit_queue(kq->queue); } -static int acquire_packet_buffer(struct kernel_queue *kq, +int kq_acquire_packet_buffer(struct kernel_queue *kq, size_t packet_size_in_dwords, unsigned int **buffer_ptr) { size_t available_size; @@ -268,7 +285,7 @@ err_no_space: return -ENOMEM; } -static void submit_packet(struct kernel_queue *kq) +void kq_submit_packet(struct kernel_queue *kq) { #ifdef DEBUG int i; @@ -280,11 +297,18 @@ static void submit_packet(struct kernel_queue *kq) } pr_debug("\n"); #endif - - kq->ops_asic_specific.submit_packet(kq); + if (kq->dev->device_info->doorbell_size == 8) { + *kq->wptr64_kernel = kq->pending_wptr64; + write_kernel_doorbell64(kq->queue->properties.doorbell_ptr, + kq->pending_wptr64); + } else { + *kq->wptr_kernel = kq->pending_wptr; + write_kernel_doorbell(kq->queue->properties.doorbell_ptr, + kq->pending_wptr); + } } -static void rollback_packet(struct kernel_queue *kq) +void kq_rollback_packet(struct kernel_queue *kq) { if (kq->dev->device_info->doorbell_size == 8) { kq->pending_wptr64 = *kq->wptr64_kernel; @@ -304,60 +328,18 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, if (!kq) return NULL; - kq->ops.initialize = initialize; - kq->ops.uninitialize = uninitialize; - kq->ops.acquire_packet_buffer = acquire_packet_buffer; - kq->ops.submit_packet = submit_packet; - kq->ops.rollback_packet = rollback_packet; - - switch (dev->device_info->asic_family) { - case CHIP_CARRIZO: - case CHIP_TONGA: - case CHIP_FIJI: - case CHIP_POLARIS10: - case CHIP_POLARIS11: - case CHIP_POLARIS12: - case CHIP_VEGAM: - kernel_queue_init_vi(&kq->ops_asic_specific); - break; - - case CHIP_KAVERI: - case CHIP_HAWAII: - kernel_queue_init_cik(&kq->ops_asic_specific); - break; - - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: - case CHIP_RAVEN: - case CHIP_RENOIR: - case CHIP_ARCTURUS: - kernel_queue_init_v9(&kq->ops_asic_specific); - break; - case CHIP_NAVI10: - case CHIP_NAVI12: - case CHIP_NAVI14: - kernel_queue_init_v10(&kq->ops_asic_specific); - break; - default: - WARN(1, "Unexpected ASIC family %u", - dev->device_info->asic_family); - goto out_free; - } - - if (kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE)) + if (kq_initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE)) return kq; pr_err("Failed to init kernel queue\n"); -out_free: kfree(kq); return NULL; } -void kernel_queue_uninit(struct kernel_queue *kq) +void kernel_queue_uninit(struct kernel_queue *kq, bool hanging) { - kq->ops.uninitialize(kq); + kq_uninitialize(kq, hanging); kfree(kq); } @@ -377,7 +359,7 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev) return; } - retval = kq->ops.acquire_packet_buffer(kq, 5, &buffer); + retval = kq_acquire_packet_buffer(kq, 5, &buffer); if (unlikely(retval != 0)) { pr_err(" Failed to acquire packet buffer\n"); pr_err("Kernel queue test failed\n"); @@ -385,7 +367,7 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev) } for (i = 0; i < 5; i++) buffer[i] = kq->nop_packet; - kq->ops.submit_packet(kq); + kq_submit_packet(kq); pr_err("Ending kernel queue test\n"); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h index 365fc674fea4..f4cfe9f1871c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h @@ -29,45 +29,28 @@ #include "kfd_priv.h" /** - * struct kernel_queue_ops - * - * @initialize: Initialize a kernel queue, including allocations of GART memory - * needed for the queue. - * - * @uninitialize: Uninitialize a kernel queue and free all its memory usages. - * - * @acquire_packet_buffer: Returns a pointer to the location in the kernel + * kq_acquire_packet_buffer: Returns a pointer to the location in the kernel * queue ring buffer where the calling function can write its packet. It is * Guaranteed that there is enough space for that packet. It also updates the * pending write pointer to that location so subsequent calls to * acquire_packet_buffer will get a correct write pointer * - * @submit_packet: Update the write pointer and doorbell of a kernel queue. - * - * @sync_with_hw: Wait until the write pointer and the read pointer of a kernel - * queue are equal, which means the CP has read all the submitted packets. + * kq_submit_packet: Update the write pointer and doorbell of a kernel queue. * - * @rollback_packet: This routine is called if we failed to build an acquired + * kq_rollback_packet: This routine is called if we failed to build an acquired * packet for some reason. It just overwrites the pending wptr with the current * one * */ -struct kernel_queue_ops { - bool (*initialize)(struct kernel_queue *kq, struct kfd_dev *dev, - enum kfd_queue_type type, unsigned int queue_size); - void (*uninitialize)(struct kernel_queue *kq); - int (*acquire_packet_buffer)(struct kernel_queue *kq, - size_t packet_size_in_dwords, - unsigned int **buffer_ptr); - void (*submit_packet)(struct kernel_queue *kq); - void (*rollback_packet)(struct kernel_queue *kq); -}; +int kq_acquire_packet_buffer(struct kernel_queue *kq, + size_t packet_size_in_dwords, + unsigned int **buffer_ptr); +void kq_submit_packet(struct kernel_queue *kq); +void kq_rollback_packet(struct kernel_queue *kq); -struct kernel_queue { - struct kernel_queue_ops ops; - struct kernel_queue_ops ops_asic_specific; +struct kernel_queue { /* data */ struct kfd_dev *dev; struct mqd_manager *mqd_mgr; @@ -99,9 +82,4 @@ struct kernel_queue { struct list_head list; }; -void kernel_queue_init_cik(struct kernel_queue_ops *ops); -void kernel_queue_init_vi(struct kernel_queue_ops *ops); -void kernel_queue_init_v9(struct kernel_queue_ops *ops); -void kernel_queue_init_v10(struct kernel_queue_ops *ops); - #endif /* KFD_KERNEL_QUEUE_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c deleted file mode 100644 index 19e54acb4125..000000000000 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright 2014 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "kfd_kernel_queue.h" - -static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, - enum kfd_queue_type type, unsigned int queue_size); -static void uninitialize_cik(struct kernel_queue *kq); -static void submit_packet_cik(struct kernel_queue *kq); - -void kernel_queue_init_cik(struct kernel_queue_ops *ops) -{ - ops->initialize = initialize_cik; - ops->uninitialize = uninitialize_cik; - ops->submit_packet = submit_packet_cik; -} - -static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, - enum kfd_queue_type type, unsigned int queue_size) -{ - return true; -} - -static void uninitialize_cik(struct kernel_queue *kq) -{ -} - -static void submit_packet_cik(struct kernel_queue *kq) -{ - *kq->wptr_kernel = kq->pending_wptr; - write_kernel_doorbell(kq->queue->properties.doorbell_ptr, - kq->pending_wptr); -} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c deleted file mode 100644 index aed32ab7102e..000000000000 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c +++ /dev/null @@ -1,348 +0,0 @@ -/* - * Copyright 2018 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "kfd_kernel_queue.h" -#include "kfd_device_queue_manager.h" -#include "kfd_pm4_headers_ai.h" -#include "kfd_pm4_opcodes.h" -#include "gc/gc_10_1_0_sh_mask.h" - -static bool initialize_v10(struct kernel_queue *kq, struct kfd_dev *dev, - enum kfd_queue_type type, unsigned int queue_size); -static void uninitialize_v10(struct kernel_queue *kq); -static void submit_packet_v10(struct kernel_queue *kq); - -void kernel_queue_init_v10(struct kernel_queue_ops *ops) -{ - ops->initialize = initialize_v10; - ops->uninitialize = uninitialize_v10; - ops->submit_packet = submit_packet_v10; -} - -static bool initialize_v10(struct kernel_queue *kq, struct kfd_dev *dev, - enum kfd_queue_type type, unsigned int queue_size) -{ - int retval; - - retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem); - if (retval != 0) - return false; - - kq->eop_gpu_addr = kq->eop_mem->gpu_addr; - kq->eop_kernel_addr = kq->eop_mem->cpu_ptr; - - memset(kq->eop_kernel_addr, 0, PAGE_SIZE); - - return true; -} - -static void uninitialize_v10(struct kernel_queue *kq) -{ - kfd_gtt_sa_free(kq->dev, kq->eop_mem); -} - -static void submit_packet_v10(struct kernel_queue *kq) -{ - *kq->wptr64_kernel = kq->pending_wptr64; - write_kernel_doorbell64(kq->queue->properties.doorbell_ptr, - kq->pending_wptr64); -} - -static int pm_map_process_v10(struct packet_manager *pm, - uint32_t *buffer, struct qcm_process_device *qpd) -{ - struct pm4_mes_map_process *packet; - uint64_t vm_page_table_base_addr = qpd->page_table_base; - - packet = (struct pm4_mes_map_process *)buffer; - memset(buffer, 0, sizeof(struct pm4_mes_map_process)); - - packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, - sizeof(struct pm4_mes_map_process)); - packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; - packet->bitfields2.process_quantum = 1; - packet->bitfields2.pasid = qpd->pqm->process->pasid; - packet->bitfields14.gds_size = qpd->gds_size; - packet->bitfields14.num_gws = qpd->num_gws; - packet->bitfields14.num_oac = qpd->num_oac; - packet->bitfields14.sdma_enable = 1; - - packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; - - packet->sh_mem_config = qpd->sh_mem_config; - packet->sh_mem_bases = qpd->sh_mem_bases; - if (qpd->tba_addr) { - packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8); - packet->sq_shader_tba_hi = (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT) | - upper_32_bits(qpd->tba_addr >> 8); - packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8); - packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8); - } - - packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area); - packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area); - - packet->vm_context_page_table_base_addr_lo32 = - lower_32_bits(vm_page_table_base_addr); - packet->vm_context_page_table_base_addr_hi32 = - upper_32_bits(vm_page_table_base_addr); - - return 0; -} - -static int pm_runlist_v10(struct packet_manager *pm, uint32_t *buffer, - uint64_t ib, size_t ib_size_in_dwords, bool chain) -{ - struct pm4_mes_runlist *packet; - - int concurrent_proc_cnt = 0; - struct kfd_dev *kfd = pm->dqm->dev; - - /* Determine the number of processes to map together to HW: - * it can not exceed the number of VMIDs available to the - * scheduler, and it is determined by the smaller of the number - * of processes in the runlist and kfd module parameter - * hws_max_conc_proc. - * Note: the arbitration between the number of VMIDs and - * hws_max_conc_proc has been done in - * kgd2kfd_device_init(). - */ - concurrent_proc_cnt = min(pm->dqm->processes_count, - kfd->max_proc_per_quantum); - - - packet = (struct pm4_mes_runlist *)buffer; - - memset(buffer, 0, sizeof(struct pm4_mes_runlist)); - packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST, - sizeof(struct pm4_mes_runlist)); - - packet->bitfields4.ib_size = ib_size_in_dwords; - packet->bitfields4.chain = chain ? 1 : 0; - packet->bitfields4.offload_polling = 0; - packet->bitfields4.valid = 1; - packet->bitfields4.process_cnt = concurrent_proc_cnt; - packet->ordinal2 = lower_32_bits(ib); - packet->ib_base_hi = upper_32_bits(ib); - - return 0; -} - -static int pm_map_queues_v10(struct packet_manager *pm, uint32_t *buffer, - struct queue *q, bool is_static) -{ - struct pm4_mes_map_queues *packet; - bool use_static = is_static; - - packet = (struct pm4_mes_map_queues *)buffer; - memset(buffer, 0, sizeof(struct pm4_mes_map_queues)); - - packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES, - sizeof(struct pm4_mes_map_queues)); - packet->bitfields2.num_queues = 1; - packet->bitfields2.queue_sel = - queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi; - - packet->bitfields2.engine_sel = - engine_sel__mes_map_queues__compute_vi; - packet->bitfields2.queue_type = - queue_type__mes_map_queues__normal_compute_vi; - - switch (q->properties.type) { - case KFD_QUEUE_TYPE_COMPUTE: - if (use_static) - packet->bitfields2.queue_type = - queue_type__mes_map_queues__normal_latency_static_queue_vi; - break; - case KFD_QUEUE_TYPE_DIQ: - packet->bitfields2.queue_type = - queue_type__mes_map_queues__debug_interface_queue_vi; - break; - case KFD_QUEUE_TYPE_SDMA: - case KFD_QUEUE_TYPE_SDMA_XGMI: - packet->bitfields2.engine_sel = q->properties.sdma_engine_id + - engine_sel__mes_map_queues__sdma0_vi; - use_static = false; /* no static queues under SDMA */ - break; - default: - WARN(1, "queue type %d\n", q->properties.type); - return -EINVAL; - } - packet->bitfields3.doorbell_offset = - q->properties.doorbell_off; - - packet->mqd_addr_lo = - lower_32_bits(q->gart_mqd_addr); - - packet->mqd_addr_hi = - upper_32_bits(q->gart_mqd_addr); - - packet->wptr_addr_lo = - lower_32_bits((uint64_t)q->properties.write_ptr); - - packet->wptr_addr_hi = - upper_32_bits((uint64_t)q->properties.write_ptr); - - return 0; -} - -static int pm_unmap_queues_v10(struct packet_manager *pm, uint32_t *buffer, - enum kfd_queue_type type, - enum kfd_unmap_queues_filter filter, - uint32_t filter_param, bool reset, - unsigned int sdma_engine) -{ - struct pm4_mes_unmap_queues *packet; - - packet = (struct pm4_mes_unmap_queues *)buffer; - memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues)); - - packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES, - sizeof(struct pm4_mes_unmap_queues)); - switch (type) { - case KFD_QUEUE_TYPE_COMPUTE: - case KFD_QUEUE_TYPE_DIQ: - packet->bitfields2.engine_sel = - engine_sel__mes_unmap_queues__compute; - break; - case KFD_QUEUE_TYPE_SDMA: - case KFD_QUEUE_TYPE_SDMA_XGMI: - packet->bitfields2.engine_sel = - engine_sel__mes_unmap_queues__sdma0 + sdma_engine; - break; - default: - WARN(1, "queue type %d\n", type); - break; - } - - if (reset) - packet->bitfields2.action = - action__mes_unmap_queues__reset_queues; - else - packet->bitfields2.action = - action__mes_unmap_queues__preempt_queues; - - switch (filter) { - case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: - packet->bitfields2.queue_sel = - queue_sel__mes_unmap_queues__perform_request_on_specified_queues; - packet->bitfields2.num_queues = 1; - packet->bitfields3b.doorbell_offset0 = filter_param; - break; - case KFD_UNMAP_QUEUES_FILTER_BY_PASID: - packet->bitfields2.queue_sel = - queue_sel__mes_unmap_queues__perform_request_on_pasid_queues; - packet->bitfields3a.pasid = filter_param; - break; - case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: - packet->bitfields2.queue_sel = - queue_sel__mes_unmap_queues__unmap_all_queues; - break; - case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES: - /* in this case, we do not preempt static queues */ - packet->bitfields2.queue_sel = - queue_sel__mes_unmap_queues__unmap_all_non_static_queues; - break; - default: - WARN(1, "filter %d\n", filter); - break; - } - - return 0; - -} - -static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer, - uint64_t fence_address, uint32_t fence_value) -{ - struct pm4_mes_query_status *packet; - - packet = (struct pm4_mes_query_status *)buffer; - memset(buffer, 0, sizeof(struct pm4_mes_query_status)); - - - packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS, - sizeof(struct pm4_mes_query_status)); - - packet->bitfields2.context_id = 0; - packet->bitfields2.interrupt_sel = - interrupt_sel__mes_query_status__completion_status; - packet->bitfields2.command = - command__mes_query_status__fence_only_after_write_ack; - - packet->addr_hi = upper_32_bits((uint64_t)fence_address); - packet->addr_lo = lower_32_bits((uint64_t)fence_address); - packet->data_hi = upper_32_bits((uint64_t)fence_value); - packet->data_lo = lower_32_bits((uint64_t)fence_value); - - return 0; -} - - -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer) -{ - struct pm4_mec_release_mem *packet; - - WARN_ON(!buffer); - - packet = (struct pm4_mec_release_mem *)buffer; - memset(buffer, 0, sizeof(struct pm4_mec_release_mem)); - - packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM, - sizeof(struct pm4_mec_release_mem)); - - packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; - packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe; - packet->bitfields2.tcl1_action_ena = 1; - packet->bitfields2.tc_action_ena = 1; - packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru; - - packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low; - packet->bitfields3.int_sel = - int_sel__mec_release_mem__send_interrupt_after_write_confirm; - - packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2; - packet->address_hi = upper_32_bits(gpu_addr); - - packet->data_lo = 0; - - return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int); -} - -const struct packet_manager_funcs kfd_v10_pm_funcs = { - .map_process = pm_map_process_v10, - .runlist = pm_runlist_v10, - .set_resources = pm_set_resources_vi, - .map_queues = pm_map_queues_v10, - .unmap_queues = pm_unmap_queues_v10, - .query_status = pm_query_status_v10, - .release_mem = pm_release_mem_v10, - .map_process_size = sizeof(struct pm4_mes_map_process), - .runlist_size = sizeof(struct pm4_mes_runlist), - .set_resources_size = sizeof(struct pm4_mes_set_resources), - .map_queues_size = sizeof(struct pm4_mes_map_queues), - .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), - .query_status_size = sizeof(struct pm4_mes_query_status), - .release_mem_size = sizeof(struct pm4_mec_release_mem) -}; - diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 28876aceb14b..19f0fe547c57 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -374,7 +374,6 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, switch (type) { case KFD_MQD_TYPE_CP: - case KFD_MQD_TYPE_COMPUTE: mqd->allocate_mqd = allocate_mqd; mqd->init_mqd = init_mqd; mqd->free_mqd = free_mqd; @@ -401,7 +400,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, #endif break; case KFD_MQD_TYPE_DIQ: - mqd->allocate_mqd = allocate_hiq_mqd; + mqd->allocate_mqd = allocate_mqd; mqd->init_mqd = init_mqd_hiq; mqd->free_mqd = free_mqd; mqd->load_mqd = load_mqd; @@ -442,7 +441,7 @@ struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type, mqd = mqd_manager_init_cik(type, dev); if (!mqd) return NULL; - if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE)) + if (type == KFD_MQD_TYPE_CP) mqd->update_mqd = update_mqd_hawaii; return mqd; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 4a236b2c2354..d1d68a51bfb8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -66,6 +66,12 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, m->compute_static_thread_mgmt_se3); } +static void set_priority(struct v10_compute_mqd *m, struct queue_properties *q) +{ + m->cp_hqd_pipe_priority = pipe_priority_map[q->priority]; + m->cp_hqd_queue_priority = q->priority; +} + static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, struct queue_properties *q) { @@ -109,9 +115,6 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT | 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT; - m->cp_hqd_pipe_priority = 1; - m->cp_hqd_queue_priority = 15; - if (q->format == KFD_QUEUE_FORMAT_AQL) { m->cp_hqd_aql_control = 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT; @@ -150,6 +153,14 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, return r; } +static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + struct queue_properties *p, struct mm_struct *mms) +{ + return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id, + queue_id, p->doorbell_off); +} + static void update_mqd(struct mqd_manager *mm, void *mqd, struct queue_properties *q) { @@ -208,11 +219,9 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_ctx_save_control = 0; update_cu_mask(mm, mqd, q); + set_priority(m, q); - q->is_active = (q->queue_size > 0 && - q->queue_address != 0 && - q->queue_percent > 0 && - !q->is_evicted); + q->is_active = QUEUE_IS_ACTIVE(*q); } static int destroy_mqd(struct mqd_manager *mm, void *mqd, @@ -247,18 +256,22 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd, { struct v10_compute_mqd *m; - /* Control stack is located one page after MQD. */ - void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE); - m = get_mqd(mqd); + /* Control stack is written backwards, while workgroup context data + * is written forwards. Both starts from m->cp_hqd_cntl_stack_size. + * Current position is at m->cp_hqd_cntl_stack_offset and + * m->cp_hqd_wg_state_offset, respectively. + */ *ctl_stack_used_size = m->cp_hqd_cntl_stack_size - m->cp_hqd_cntl_stack_offset; *save_area_used_size = m->cp_hqd_wg_state_offset - m->cp_hqd_cntl_stack_size; - if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size)) - return -EFAULT; + /* Control stack is not copied to user mode for GFXv10 because + * it's part of the context save area that is already + * accessible to user mode + */ return 0; } @@ -277,18 +290,6 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd, 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT; } -static void update_mqd_hiq(struct mqd_manager *mm, void *mqd, - struct queue_properties *q) -{ - struct v10_compute_mqd *m; - - update_mqd(mm, mqd, q); - - /* TODO: what's the point? update_mqd already does this. */ - m = get_mqd(mqd); - m->cp_hqd_vmid = q->vmid; -} - static void init_mqd_sdma(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -340,11 +341,7 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd, m->sdma_queue_id = q->sdma_queue_id; m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT; - - q->is_active = (q->queue_size > 0 && - q->queue_address != 0 && - q->queue_percent > 0 && - !q->is_evicted); + q->is_active = QUEUE_IS_ACTIVE(*q); } /* @@ -392,7 +389,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) return NULL; - mqd = kzalloc(sizeof(*mqd), GFP_NOIO); + mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); if (!mqd) return NULL; @@ -400,7 +397,6 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, switch (type) { case KFD_MQD_TYPE_CP: - case KFD_MQD_TYPE_COMPUTE: pr_debug("%s@%i\n", __func__, __LINE__); mqd->allocate_mqd = allocate_mqd; mqd->init_mqd = init_mqd; @@ -421,8 +417,8 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, mqd->allocate_mqd = allocate_hiq_mqd; mqd->init_mqd = init_mqd_hiq; mqd->free_mqd = free_mqd_hiq_sdma; - mqd->load_mqd = load_mqd; - mqd->update_mqd = update_mqd_hiq; + mqd->load_mqd = hiq_load_mqd_kiq; + mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; mqd->mqd_size = sizeof(struct v10_compute_mqd); @@ -432,11 +428,11 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, pr_debug("%s@%i\n", __func__, __LINE__); break; case KFD_MQD_TYPE_DIQ: - mqd->allocate_mqd = allocate_hiq_mqd; + mqd->allocate_mqd = allocate_mqd; mqd->init_mqd = init_mqd_hiq; mqd->free_mqd = free_mqd; mqd->load_mqd = load_mqd; - mqd->update_mqd = update_mqd_hiq; + mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; mqd->mqd_size = sizeof(struct v10_compute_mqd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index d3380c5bdbde..436b7f518979 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -92,7 +92,7 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, * instead of sub-allocation function. */ if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) { - mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO); + mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); if (!mqd_mem_obj) return NULL; retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd, @@ -191,6 +191,14 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, wptr_shift, 0, mms); } +static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + struct queue_properties *p, struct mm_struct *mms) +{ + return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id, + queue_id, p->doorbell_off); +} + static void update_mqd(struct mqd_manager *mm, void *mqd, struct queue_properties *q) { @@ -302,7 +310,8 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd, *ctl_stack_used_size = m->cp_hqd_cntl_stack_size - m->cp_hqd_cntl_stack_offset; - *save_area_used_size = m->cp_hqd_wg_state_offset; + *save_area_used_size = m->cp_hqd_wg_state_offset - + m->cp_hqd_cntl_stack_size; if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size)) return -EFAULT; @@ -324,18 +333,6 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd, 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT; } -static void update_mqd_hiq(struct mqd_manager *mm, void *mqd, - struct queue_properties *q) -{ - struct v9_mqd *m; - - update_mqd(mm, mqd, q); - - /* TODO: what's the point? update_mqd already does this. */ - m = get_mqd(mqd); - m->cp_hqd_vmid = q->vmid; -} - static void init_mqd_sdma(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -443,7 +440,6 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, switch (type) { case KFD_MQD_TYPE_CP: - case KFD_MQD_TYPE_COMPUTE: mqd->allocate_mqd = allocate_mqd; mqd->init_mqd = init_mqd; mqd->free_mqd = free_mqd; @@ -461,8 +457,8 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->allocate_mqd = allocate_hiq_mqd; mqd->init_mqd = init_mqd_hiq; mqd->free_mqd = free_mqd_hiq_sdma; - mqd->load_mqd = load_mqd; - mqd->update_mqd = update_mqd_hiq; + mqd->load_mqd = hiq_load_mqd_kiq; + mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; mqd->mqd_size = sizeof(struct v9_mqd); @@ -471,11 +467,11 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, #endif break; case KFD_MQD_TYPE_DIQ: - mqd->allocate_mqd = allocate_hiq_mqd; + mqd->allocate_mqd = allocate_mqd; mqd->init_mqd = init_mqd_hiq; mqd->free_mqd = free_mqd; mqd->load_mqd = load_mqd; - mqd->update_mqd = update_mqd_hiq; + mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; mqd->mqd_size = sizeof(struct v9_mqd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 7d144f56f421..a5e8ff1e5945 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -312,11 +312,7 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd, static void update_mqd_hiq(struct mqd_manager *mm, void *mqd, struct queue_properties *q) { - struct vi_mqd *m; __update_mqd(mm, mqd, q, MTYPE_UC, 0); - - m = get_mqd(mqd); - m->cp_hqd_vmid = q->vmid; } static void init_mqd_sdma(struct mqd_manager *mm, void **mqd, @@ -425,7 +421,6 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, switch (type) { case KFD_MQD_TYPE_CP: - case KFD_MQD_TYPE_COMPUTE: mqd->allocate_mqd = allocate_mqd; mqd->init_mqd = init_mqd; mqd->free_mqd = free_mqd; @@ -453,7 +448,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, #endif break; case KFD_MQD_TYPE_DIQ: - mqd->allocate_mqd = allocate_hiq_mqd; + mqd->allocate_mqd = allocate_mqd; mqd->init_mqd = init_mqd_hiq; mqd->free_mqd = free_mqd; mqd->load_mqd = load_mqd; @@ -494,7 +489,7 @@ struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type, mqd = mqd_manager_init_vi(type, dev); if (!mqd) return NULL; - if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE)) + if (type == KFD_MQD_TYPE_CP) mqd->update_mqd = update_mqd_tonga; return mqd; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 83ef4b3dd2fb..dc406e6dee23 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -241,12 +241,10 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) case CHIP_RAVEN: case CHIP_RENOIR: case CHIP_ARCTURUS: - pm->pmf = &kfd_v9_pm_funcs; - break; case CHIP_NAVI10: case CHIP_NAVI12: case CHIP_NAVI14: - pm->pmf = &kfd_v10_pm_funcs; + pm->pmf = &kfd_v9_pm_funcs; break; default: WARN(1, "Unexpected ASIC family %u", @@ -266,10 +264,10 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) return 0; } -void pm_uninit(struct packet_manager *pm) +void pm_uninit(struct packet_manager *pm, bool hanging) { mutex_destroy(&pm->lock); - kernel_queue_uninit(pm->priv_queue); + kernel_queue_uninit(pm->priv_queue, hanging); } int pm_send_set_resources(struct packet_manager *pm, @@ -280,7 +278,7 @@ int pm_send_set_resources(struct packet_manager *pm, size = pm->pmf->set_resources_size; mutex_lock(&pm->lock); - pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, + kq_acquire_packet_buffer(pm->priv_queue, size / sizeof(uint32_t), (unsigned int **)&buffer); if (!buffer) { @@ -291,9 +289,9 @@ int pm_send_set_resources(struct packet_manager *pm, retval = pm->pmf->set_resources(pm, buffer, res); if (!retval) - pm->priv_queue->ops.submit_packet(pm->priv_queue); + kq_submit_packet(pm->priv_queue); else - pm->priv_queue->ops.rollback_packet(pm->priv_queue); + kq_rollback_packet(pm->priv_queue); out: mutex_unlock(&pm->lock); @@ -318,7 +316,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t); mutex_lock(&pm->lock); - retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, + retval = kq_acquire_packet_buffer(pm->priv_queue, packet_size_dwords, &rl_buffer); if (retval) goto fail_acquire_packet_buffer; @@ -328,14 +326,14 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) if (retval) goto fail_create_runlist; - pm->priv_queue->ops.submit_packet(pm->priv_queue); + kq_submit_packet(pm->priv_queue); mutex_unlock(&pm->lock); return retval; fail_create_runlist: - pm->priv_queue->ops.rollback_packet(pm->priv_queue); + kq_rollback_packet(pm->priv_queue); fail_acquire_packet_buffer: mutex_unlock(&pm->lock); fail_create_runlist_ib: @@ -354,7 +352,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, size = pm->pmf->query_status_size; mutex_lock(&pm->lock); - pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, + kq_acquire_packet_buffer(pm->priv_queue, size / sizeof(uint32_t), (unsigned int **)&buffer); if (!buffer) { pr_err("Failed to allocate buffer on kernel queue\n"); @@ -364,9 +362,9 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value); if (!retval) - pm->priv_queue->ops.submit_packet(pm->priv_queue); + kq_submit_packet(pm->priv_queue); else - pm->priv_queue->ops.rollback_packet(pm->priv_queue); + kq_rollback_packet(pm->priv_queue); out: mutex_unlock(&pm->lock); @@ -383,7 +381,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, size = pm->pmf->unmap_queues_size; mutex_lock(&pm->lock); - pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, + kq_acquire_packet_buffer(pm->priv_queue, size / sizeof(uint32_t), (unsigned int **)&buffer); if (!buffer) { pr_err("Failed to allocate buffer on kernel queue\n"); @@ -394,9 +392,9 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, retval = pm->pmf->unmap_queues(pm, buffer, type, filter, filter_param, reset, sdma_engine); if (!retval) - pm->priv_queue->ops.submit_packet(pm->priv_queue); + kq_submit_packet(pm->priv_queue); else - pm->priv_queue->ops.rollback_packet(pm->priv_queue); + kq_rollback_packet(pm->priv_queue); out: mutex_unlock(&pm->lock); @@ -441,7 +439,7 @@ int pm_debugfs_hang_hws(struct packet_manager *pm) size = pm->pmf->query_status_size; mutex_lock(&pm->lock); - pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, + kq_acquire_packet_buffer(pm->priv_queue, size / sizeof(uint32_t), (unsigned int **)&buffer); if (!buffer) { pr_err("Failed to allocate buffer on kernel queue\n"); @@ -449,7 +447,7 @@ int pm_debugfs_hang_hws(struct packet_manager *pm) goto out; } memset(buffer, 0x55, size); - pm->priv_queue->ops.submit_packet(pm->priv_queue); + kq_submit_packet(pm->priv_queue); pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.", buffer[0], buffer[1], buffer[2], buffer[3], diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index 9a4bafb2e175..2de01009f1b6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -25,47 +25,7 @@ #include "kfd_device_queue_manager.h" #include "kfd_pm4_headers_ai.h" #include "kfd_pm4_opcodes.h" - -static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev, - enum kfd_queue_type type, unsigned int queue_size); -static void uninitialize_v9(struct kernel_queue *kq); -static void submit_packet_v9(struct kernel_queue *kq); - -void kernel_queue_init_v9(struct kernel_queue_ops *ops) -{ - ops->initialize = initialize_v9; - ops->uninitialize = uninitialize_v9; - ops->submit_packet = submit_packet_v9; -} - -static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev, - enum kfd_queue_type type, unsigned int queue_size) -{ - int retval; - - retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem); - if (retval) - return false; - - kq->eop_gpu_addr = kq->eop_mem->gpu_addr; - kq->eop_kernel_addr = kq->eop_mem->cpu_ptr; - - memset(kq->eop_kernel_addr, 0, PAGE_SIZE); - - return true; -} - -static void uninitialize_v9(struct kernel_queue *kq) -{ - kfd_gtt_sa_free(kq->dev, kq->eop_mem); -} - -static void submit_packet_v9(struct kernel_queue *kq) -{ - *kq->wptr64_kernel = kq->pending_wptr64; - write_kernel_doorbell64(kq->queue->properties.doorbell_ptr, - kq->pending_wptr64); -} +#include "gc/gc_10_1_0_sh_mask.h" static int pm_map_process_v9(struct packet_manager *pm, uint32_t *buffer, struct qcm_process_device *qpd) @@ -90,10 +50,17 @@ static int pm_map_process_v9(struct packet_manager *pm, packet->sh_mem_config = qpd->sh_mem_config; packet->sh_mem_bases = qpd->sh_mem_bases; - packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8); - packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8); - packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8); - packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8); + if (qpd->tba_addr) { + packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8); + /* On GFX9, unlike GFX10, bit TRAP_EN of SQ_SHADER_TBA_HI is + * not defined, so setting it won't do any harm. + */ + packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8) + | 1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT; + + packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8); + packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8); + } packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area); packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area); @@ -341,35 +308,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer, return 0; } - -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer) -{ - struct pm4_mec_release_mem *packet; - - packet = (struct pm4_mec_release_mem *)buffer; - memset(buffer, 0, sizeof(struct pm4_mec_release_mem)); - - packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM, - sizeof(struct pm4_mec_release_mem)); - - packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; - packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe; - packet->bitfields2.tcl1_action_ena = 1; - packet->bitfields2.tc_action_ena = 1; - packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru; - - packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low; - packet->bitfields3.int_sel = - int_sel__mec_release_mem__send_interrupt_after_write_confirm; - - packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2; - packet->address_hi = upper_32_bits(gpu_addr); - - packet->data_lo = 0; - - return 0; -} - const struct packet_manager_funcs kfd_v9_pm_funcs = { .map_process = pm_map_process_v9, .runlist = pm_runlist_v9, @@ -377,12 +315,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = { .map_queues = pm_map_queues_v9, .unmap_queues = pm_unmap_queues_v9, .query_status = pm_query_status_v9, - .release_mem = pm_release_mem_v9, + .release_mem = NULL, .map_process_size = sizeof(struct pm4_mes_map_process), .runlist_size = sizeof(struct pm4_mes_runlist), .set_resources_size = sizeof(struct pm4_mes_set_resources), .map_queues_size = sizeof(struct pm4_mes_map_queues), .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), .query_status_size = sizeof(struct pm4_mes_query_status), - .release_mem_size = sizeof(struct pm4_mec_release_mem) + .release_mem_size = 0, }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c index 2adaf40027eb..bed4d0ccb6b1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c @@ -26,47 +26,6 @@ #include "kfd_pm4_headers_vi.h" #include "kfd_pm4_opcodes.h" -static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev, - enum kfd_queue_type type, unsigned int queue_size); -static void uninitialize_vi(struct kernel_queue *kq); -static void submit_packet_vi(struct kernel_queue *kq); - -void kernel_queue_init_vi(struct kernel_queue_ops *ops) -{ - ops->initialize = initialize_vi; - ops->uninitialize = uninitialize_vi; - ops->submit_packet = submit_packet_vi; -} - -static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev, - enum kfd_queue_type type, unsigned int queue_size) -{ - int retval; - - retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem); - if (retval != 0) - return false; - - kq->eop_gpu_addr = kq->eop_mem->gpu_addr; - kq->eop_kernel_addr = kq->eop_mem->cpu_ptr; - - memset(kq->eop_kernel_addr, 0, PAGE_SIZE); - - return true; -} - -static void uninitialize_vi(struct kernel_queue *kq) -{ - kfd_gtt_sa_free(kq->dev, kq->eop_mem); -} - -static void submit_packet_vi(struct kernel_queue *kq) -{ - *kq->wptr_kernel = kq->pending_wptr; - write_kernel_doorbell(kq->queue->properties.doorbell_ptr, - kq->pending_wptr); -} - unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size) { union PM4_MES_TYPE_3_HEADER header; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 060a9e8b301e..6af1b5881f43 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -59,24 +59,21 @@ * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these * defines are w.r.t to PAGE_SIZE */ -#define KFD_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT) +#define KFD_MMAP_TYPE_SHIFT 62 #define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT) #define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT) #define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT) #define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT) #define KFD_MMAP_TYPE_MMIO (0x0ULL << KFD_MMAP_TYPE_SHIFT) -#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT) +#define KFD_MMAP_GPU_ID_SHIFT 46 #define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \ << KFD_MMAP_GPU_ID_SHIFT) #define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\ & KFD_MMAP_GPU_ID_MASK) -#define KFD_MMAP_GPU_ID_GET(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \ +#define KFD_MMAP_GET_GPU_ID(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \ >> KFD_MMAP_GPU_ID_SHIFT) -#define KFD_MMAP_OFFSET_VALUE_MASK (0x3FFFFFFFFFFFULL >> PAGE_SHIFT) -#define KFD_MMAP_OFFSET_VALUE_GET(offset) (offset & KFD_MMAP_OFFSET_VALUE_MASK) - /* * When working with cp scheduler we should assign the HIQ manually or via * the amdgpu driver to a fixed hqd slot, here are the fixed HIQ hqd slot @@ -238,9 +235,10 @@ struct kfd_dev { * KFD. It is aligned for mapping * into user mode */ - size_t doorbell_id_offset; /* Doorbell offset (from KFD doorbell - * to HW doorbell, GFX reserved some - * at the start) + size_t doorbell_base_dw_offset; /* Offset from the start of the PCI + * doorbell BAR to the first KFD + * doorbell in dwords. GFX reserves + * the segment before this offset. */ u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells * page used by kernel queue @@ -510,8 +508,7 @@ struct queue { * Please read the kfd_mqd_manager.h description. */ enum KFD_MQD_TYPE { - KFD_MQD_TYPE_COMPUTE = 0, /* for no cp scheduling */ - KFD_MQD_TYPE_HIQ, /* for hiq */ + KFD_MQD_TYPE_HIQ = 0, /* for hiq */ KFD_MQD_TYPE_CP, /* for cp queues and diq */ KFD_MQD_TYPE_SDMA, /* for sdma queues */ KFD_MQD_TYPE_DIQ, /* for diq */ @@ -818,7 +815,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr); u32 read_kernel_doorbell(u32 __iomem *db); void write_kernel_doorbell(void __iomem *db, u32 value); void write_kernel_doorbell64(void __iomem *db, u64 value); -unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd, +unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd, struct kfd_process *process, unsigned int doorbell_id); phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, @@ -886,7 +883,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev); void device_queue_manager_uninit(struct device_queue_manager *dqm); struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, enum kfd_queue_type type); -void kernel_queue_uninit(struct kernel_queue *kq); +void kernel_queue_uninit(struct kernel_queue *kq, bool hanging); int kfd_process_vm_fault(struct device_queue_manager *dqm, unsigned int pasid); /* Process Queue Manager */ @@ -904,7 +901,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, struct kfd_dev *dev, struct file *f, struct queue_properties *properties, - unsigned int *qid); + unsigned int *qid, + uint32_t *p_doorbell_offset_in_process); int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid); int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, struct queue_properties *p); @@ -972,10 +970,9 @@ struct packet_manager_funcs { extern const struct packet_manager_funcs kfd_vi_pm_funcs; extern const struct packet_manager_funcs kfd_v9_pm_funcs; -extern const struct packet_manager_funcs kfd_v10_pm_funcs; int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm); -void pm_uninit(struct packet_manager *pm); +void pm_uninit(struct packet_manager *pm, bool hanging); int pm_send_set_resources(struct packet_manager *pm, struct scheduling_resources *res); int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues); @@ -991,9 +988,6 @@ void pm_release_ib(struct packet_manager *pm); /* Following PM funcs can be shared among VI and AI */ unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size); -int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, - struct scheduling_resources *res); - uint64_t kfd_get_number_elems(struct kfd_dev *kfd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 10f9af5784f2..25b90f70aecd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -32,6 +32,7 @@ #include <linux/mman.h> #include <linux/file.h> #include "amdgpu_amdkfd.h" +#include "amdgpu.h" struct mm_struct; @@ -324,6 +325,8 @@ struct kfd_process *kfd_create_process(struct file *filep) (int)process->lead_thread->pid); } out: + if (!IS_ERR(process)) + kref_get(&process->ref); mutex_unlock(&kfd_processes_mutex); return process; @@ -560,8 +563,7 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base) continue; - offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id)) - << PAGE_SHIFT; + offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id); qpd->tba_addr = (int64_t)vm_mmap(filep, 0, KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, MAP_SHARED, offset); @@ -1151,16 +1153,17 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, void kfd_flush_tlb(struct kfd_process_device *pdd) { struct kfd_dev *dev = pdd->dev; - const struct kfd2kgd_calls *f2g = dev->kfd2kgd; if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { /* Nothing to flush until a VMID is assigned, which * only happens when the first queue is created. */ if (pdd->qpd.vmid) - f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid); + amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd, + pdd->qpd.vmid); } else { - f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid); + amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd, + pdd->process->pasid); } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 2659d226c056..31fcd1b51f00 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -162,7 +162,7 @@ void pqm_uninit(struct process_queue_manager *pqm) pqm->queue_slot_bitmap = NULL; } -static int create_cp_queue(struct process_queue_manager *pqm, +static int init_user_queue(struct process_queue_manager *pqm, struct kfd_dev *dev, struct queue **q, struct queue_properties *q_properties, struct file *f, unsigned int qid) @@ -192,7 +192,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, struct kfd_dev *dev, struct file *f, struct queue_properties *properties, - unsigned int *qid) + unsigned int *qid, + uint32_t *p_doorbell_offset_in_process) { int retval; struct kfd_process_device *pdd; @@ -250,7 +251,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, goto err_create_queue; } - retval = create_cp_queue(pqm, dev, &q, properties, f, *qid); + retval = init_user_queue(pqm, dev, &q, properties, f, *qid); if (retval != 0) goto err_create_queue; pqn->q = q; @@ -271,7 +272,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, goto err_create_queue; } - retval = create_cp_queue(pqm, dev, &q, properties, f, *qid); + retval = init_user_queue(pqm, dev, &q, properties, f, *qid); if (retval != 0) goto err_create_queue; pqn->q = q; @@ -303,12 +304,15 @@ int pqm_create_queue(struct process_queue_manager *pqm, goto err_create_queue; } - if (q) + if (q && p_doorbell_offset_in_process) /* Return the doorbell offset within the doorbell page * to the caller so it can be passed up to user mode * (in bytes). + * There are always 1024 doorbells per process, so in case + * of 8-byte doorbells, there are two doorbell pages per + * process. */ - properties->doorbell_off = + *p_doorbell_offset_in_process = (q->properties.doorbell_off * sizeof(uint32_t)) & (kfd_doorbell_process_slice(dev) - 1); @@ -370,7 +374,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) /* destroy kernel queue (DIQ) */ dqm = pqn->kq->dev->dqm; dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd); - kernel_queue_uninit(pqn->kq); + kernel_queue_uninit(pqn->kq, false); } if (pqn->q) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 69bd0628fdc6..203c823d65f1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -486,6 +486,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.num_sdma_engines); sysfs_show_32bit_prop(buffer, "num_sdma_xgmi_engines", dev->node_props.num_sdma_xgmi_engines); + sysfs_show_32bit_prop(buffer, "num_sdma_queues_per_engine", + dev->node_props.num_sdma_queues_per_engine); + sysfs_show_32bit_prop(buffer, "num_cp_queues", + dev->node_props.num_cp_queues); if (dev->gpu) { log_max_watch_addr = @@ -1309,9 +1313,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu) dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines; dev->node_props.num_sdma_xgmi_engines = gpu->device_info->num_xgmi_sdma_engines; + dev->node_props.num_sdma_queues_per_engine = + gpu->device_info->num_sdma_queues_per_engine; dev->node_props.num_gws = (hws_gws_support && dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ? amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0; + dev->node_props.num_cp_queues = get_queues_num(dev->gpu->dqm); kfd_fill_mem_clk_max_info(dev); kfd_fill_iolink_non_crat_info(dev); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 15843e0fc756..74e9b1682af8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -81,6 +81,8 @@ struct kfd_node_properties { int32_t drm_render_minor; uint32_t num_sdma_engines; uint32_t num_sdma_xgmi_engines; + uint32_t num_sdma_queues_per_engine; + uint32_t num_cp_queues; char name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE]; }; |