aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Makefile6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c59
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c10
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c24
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c55
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c14
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_iommu.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c100
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h40
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c53
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c348
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c66
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c34
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c9
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c36
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c (renamed from drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c)90
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c (renamed from drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c)41
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h32
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c13
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c18
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h2
26 files changed, 287 insertions, 784 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index 48155060a57c..61474627a32c 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -38,11 +38,9 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_mqd_manager_v9.o \
$(AMDKFD_PATH)/kfd_mqd_manager_v10.o \
$(AMDKFD_PATH)/kfd_kernel_queue.o \
- $(AMDKFD_PATH)/kfd_kernel_queue_cik.o \
- $(AMDKFD_PATH)/kfd_kernel_queue_vi.o \
- $(AMDKFD_PATH)/kfd_kernel_queue_v9.o \
- $(AMDKFD_PATH)/kfd_kernel_queue_v10.o \
$(AMDKFD_PATH)/kfd_packet_manager.o \
+ $(AMDKFD_PATH)/kfd_packet_manager_vi.o \
+ $(AMDKFD_PATH)/kfd_packet_manager_v9.o \
$(AMDKFD_PATH)/kfd_process_queue_manager.o \
$(AMDKFD_PATH)/kfd_device_queue_manager.o \
$(AMDKFD_PATH)/kfd_device_queue_manager_cik.o \
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 1544007af34a..3f0300e53727 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -42,6 +42,7 @@
static long kfd_ioctl(struct file *, unsigned int, unsigned long);
static int kfd_open(struct inode *, struct file *);
+static int kfd_release(struct inode *, struct file *);
static int kfd_mmap(struct file *, struct vm_area_struct *);
static const char kfd_dev_name[] = "kfd";
@@ -51,6 +52,7 @@ static const struct file_operations kfd_fops = {
.unlocked_ioctl = kfd_ioctl,
.compat_ioctl = compat_ptr_ioctl,
.open = kfd_open,
+ .release = kfd_release,
.mmap = kfd_mmap,
};
@@ -124,8 +126,13 @@ static int kfd_open(struct inode *inode, struct file *filep)
if (IS_ERR(process))
return PTR_ERR(process);
- if (kfd_is_locked())
+ if (kfd_is_locked()) {
+ kfd_unref_process(process);
return -EAGAIN;
+ }
+
+ /* filep now owns the reference returned by kfd_create_process */
+ filep->private_data = process;
dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
process->pasid, process->is_32bit_user_mode);
@@ -133,6 +140,16 @@ static int kfd_open(struct inode *inode, struct file *filep)
return 0;
}
+static int kfd_release(struct inode *inode, struct file *filep)
+{
+ struct kfd_process *process = filep->private_data;
+
+ if (process)
+ kfd_unref_process(process);
+
+ return 0;
+}
+
static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
void *data)
{
@@ -258,6 +275,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
unsigned int queue_id;
struct kfd_process_device *pdd;
struct queue_properties q_properties;
+ uint32_t doorbell_offset_in_process = 0;
memset(&q_properties, 0, sizeof(struct queue_properties));
@@ -286,7 +304,8 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
p->pasid,
dev->id);
- err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id);
+ err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id,
+ &doorbell_offset_in_process);
if (err != 0)
goto err_create_queue;
@@ -296,14 +315,11 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
/* Return gpu_id as doorbell offset for mmap usage */
args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
- args->doorbell_offset <<= PAGE_SHIFT;
if (KFD_IS_SOC15(dev->device_info->asic_family))
- /* On SOC15 ASICs, doorbell allocation must be
- * per-device, and independent from the per-process
- * queue_id. Return the doorbell offset within the
- * doorbell aperture to user mode.
+ /* On SOC15 ASICs, include the doorbell offset within the
+ * process doorbell frame, which is 2 pages.
*/
- args->doorbell_offset |= q_properties.doorbell_off;
+ args->doorbell_offset |= doorbell_offset_in_process;
mutex_unlock(&p->mutex);
@@ -1312,10 +1328,9 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
/* MMIO is mapped through kfd device
* Generate a kfd mmap offset
*/
- if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
- args->mmap_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(args->gpu_id);
- args->mmap_offset <<= PAGE_SHIFT;
- }
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
+ args->mmap_offset = KFD_MMAP_TYPE_MMIO
+ | KFD_MMAP_GPU_ID(args->gpu_id);
return 0;
@@ -1803,9 +1818,14 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg);
- process = kfd_get_process(current);
- if (IS_ERR(process)) {
- dev_dbg(kfd_device, "no process\n");
+ /* Get the process struct from the filep. Only the process
+ * that opened /dev/kfd can use the file descriptor. Child
+ * processes need to create their own KFD device context.
+ */
+ process = filep->private_data;
+ if (process->lead_thread != current->group_leader) {
+ dev_dbg(kfd_device, "Using KFD FD in wrong process\n");
+ retcode = -EBADF;
goto err_i1;
}
@@ -1899,20 +1919,19 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct kfd_process *process;
struct kfd_dev *dev = NULL;
- unsigned long vm_pgoff;
+ unsigned long mmap_offset;
unsigned int gpu_id;
process = kfd_get_process(current);
if (IS_ERR(process))
return PTR_ERR(process);
- vm_pgoff = vma->vm_pgoff;
- vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
- gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
+ mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
+ gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
if (gpu_id)
dev = kfd_device_by_id(gpu_id);
- switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
+ switch (mmap_offset & KFD_MMAP_TYPE_MASK) {
case KFD_MMAP_TYPE_DOORBELL:
if (!dev)
return -ENODEV;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index d59f2cd056c6..27bcc5b472f6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -72,11 +72,11 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
* The receive packet buff will be sitting on the Indirect Buffer
* and in the PQ we put the IB packet + sync packet(s).
*/
- status = kq->ops.acquire_packet_buffer(kq,
+ status = kq_acquire_packet_buffer(kq,
pq_packets_size_in_bytes / sizeof(uint32_t),
&ib_packet_buff);
if (status) {
- pr_err("acquire_packet_buffer failed\n");
+ pr_err("kq_acquire_packet_buffer failed\n");
return status;
}
@@ -115,7 +115,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
if (status) {
pr_err("Failed to allocate GART memory\n");
- kq->ops.rollback_packet(kq);
+ kq_rollback_packet(kq);
return status;
}
@@ -151,7 +151,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
rm_packet->data_lo = QUEUESTATE__ACTIVE;
- kq->ops.submit_packet(kq);
+ kq_submit_packet(kq);
/* Wait till CP writes sync code: */
status = amdkfd_fence_wait_timeout(
@@ -185,7 +185,7 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
properties.type = KFD_QUEUE_TYPE_DIQ;
status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
- &properties, &qid);
+ &properties, &qid, NULL);
if (status) {
pr_err("Failed to create DIQ\n");
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
index 15c523027285..511712c2e382 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
@@ -93,7 +93,7 @@ void kfd_debugfs_init(void)
kfd_debugfs_hqds_by_device, &kfd_debugfs_fops);
debugfs_create_file("rls", S_IFREG | 0444, debugfs_root,
kfd_debugfs_rls_by_device, &kfd_debugfs_fops);
- debugfs_create_file("hang_hws", S_IFREG | 0644, debugfs_root,
+ debugfs_create_file("hang_hws", S_IFREG | 0200, debugfs_root,
NULL, &kfd_debugfs_hang_hws_fops);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 4fa8834ce7cb..2a9e40131735 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -728,6 +728,9 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd)
{
if (!kfd->init_complete)
return 0;
+
+ kfd->dqm->ops.pre_reset(kfd->dqm);
+
kgd2kfd_suspend(kfd);
kfd_signal_reset_event(kfd);
@@ -742,7 +745,7 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd)
int kgd2kfd_post_reset(struct kfd_dev *kfd)
{
- int ret, count;
+ int ret;
if (!kfd->init_complete)
return 0;
@@ -750,7 +753,7 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
ret = kfd_resume(kfd);
if (ret)
return ret;
- count = atomic_dec_return(&kfd_locked);
+ atomic_dec(&kfd_locked);
atomic_set(&kfd->sram_ecc_flag, 0);
@@ -822,6 +825,21 @@ dqm_start_error:
return err;
}
+static inline void kfd_queue_work(struct workqueue_struct *wq,
+ struct work_struct *work)
+{
+ int cpu, new_cpu;
+
+ cpu = new_cpu = smp_processor_id();
+ do {
+ new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids;
+ if (cpu_to_node(new_cpu) == numa_node_id())
+ break;
+ } while (cpu != new_cpu);
+
+ queue_work_on(new_cpu, wq, work);
+}
+
/* This is called directly from KGD at ISR. */
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
{
@@ -844,7 +862,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
patched_ihre, &is_patched)
&& enqueue_ih_ring_entry(kfd,
is_patched ? patched_ihre : ih_ring_entry))
- queue_work(kfd->ih_wq, &kfd->interrupt_work);
+ kfd_queue_work(kfd->ih_wq, &kfd->interrupt_work);
spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 984c2f2b24b6..80d22bf702e8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -170,7 +170,7 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
}
q->properties.doorbell_off =
- kfd_doorbell_id_to_offset(dev, q->process,
+ kfd_get_doorbell_dw_offset_in_bar(dev, q->process,
q->doorbell_id);
return 0;
@@ -930,11 +930,11 @@ static void uninitialize(struct device_queue_manager *dqm)
for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
kfree(dqm->mqd_mgrs[i]);
mutex_destroy(&dqm->lock_hidden);
- kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
}
static int start_nocpsch(struct device_queue_manager *dqm)
{
+ pr_info("SW scheduler is used");
init_interrupts(dqm);
if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
@@ -947,12 +947,19 @@ static int start_nocpsch(struct device_queue_manager *dqm)
static int stop_nocpsch(struct device_queue_manager *dqm)
{
if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
- pm_uninit(&dqm->packets);
+ pm_uninit(&dqm->packets, false);
dqm->sched_running = false;
return 0;
}
+static void pre_reset(struct device_queue_manager *dqm)
+{
+ dqm_lock(dqm);
+ dqm->is_resetting = true;
+ dqm_unlock(dqm);
+}
+
static int allocate_sdma_queue(struct device_queue_manager *dqm,
struct queue *q)
{
@@ -1100,6 +1107,7 @@ static int start_cpsch(struct device_queue_manager *dqm)
dqm_lock(dqm);
/* clear hang status when driver try to start the hw scheduler */
dqm->is_hws_hang = false;
+ dqm->is_resetting = false;
dqm->sched_running = true;
execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
dqm_unlock(dqm);
@@ -1107,20 +1115,24 @@ static int start_cpsch(struct device_queue_manager *dqm)
return 0;
fail_allocate_vidmem:
fail_set_sched_resources:
- pm_uninit(&dqm->packets);
+ pm_uninit(&dqm->packets, false);
fail_packet_manager_init:
return retval;
}
static int stop_cpsch(struct device_queue_manager *dqm)
{
+ bool hanging;
+
dqm_lock(dqm);
- unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
+ if (!dqm->is_hws_hang)
+ unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
+ hanging = dqm->is_hws_hang || dqm->is_resetting;
dqm->sched_running = false;
dqm_unlock(dqm);
kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
- pm_uninit(&dqm->packets);
+ pm_uninit(&dqm->packets, hanging);
return 0;
}
@@ -1225,16 +1237,18 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
list_add(&q->list, &qpd->queues_list);
qpd->queue_count++;
+
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+ dqm->sdma_queue_count++;
+ else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
+ dqm->xgmi_sdma_queue_count++;
+
if (q->properties.is_active) {
dqm->queue_count++;
retval = execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
}
- if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
- dqm->sdma_queue_count++;
- else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
- dqm->xgmi_sdma_queue_count++;
/*
* Unconditionally increment this counter, regardless of the queue's
* type or whether the queue is active.
@@ -1352,8 +1366,17 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
/* should be timed out */
retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
queue_preemption_timeout_ms);
- if (retval)
+ if (retval) {
+ pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
+ dqm->is_hws_hang = true;
+ /* It's possible we're detecting a HWS hang in the
+ * middle of a GPU reset. No need to schedule another
+ * reset in this case.
+ */
+ if (!dqm->is_resetting)
+ schedule_work(&dqm->hw_exception_work);
return retval;
+ }
pm_release_ib(&dqm->packets);
dqm->active_runlist = false;
@@ -1371,12 +1394,8 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm,
if (dqm->is_hws_hang)
return -EIO;
retval = unmap_queues_cpsch(dqm, filter, filter_param);
- if (retval) {
- pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
- dqm->is_hws_hang = true;
- schedule_work(&dqm->hw_exception_work);
+ if (retval)
return retval;
- }
return map_queues_cpsch(dqm);
}
@@ -1595,7 +1614,7 @@ static int get_wave_state(struct device_queue_manager *dqm,
goto dqm_unlock;
}
- mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
+ mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
if (!mqd_mgr->get_wave_state) {
r = -EINVAL;
@@ -1770,6 +1789,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.initialize = initialize_cpsch;
dqm->ops.start = start_cpsch;
dqm->ops.stop = stop_cpsch;
+ dqm->ops.pre_reset = pre_reset;
dqm->ops.destroy_queue = destroy_queue_cpsch;
dqm->ops.update_queue = update_queue;
dqm->ops.register_process = register_process;
@@ -1788,6 +1808,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
/* initialize dqm for no cp scheduling */
dqm->ops.start = start_nocpsch;
dqm->ops.stop = stop_nocpsch;
+ dqm->ops.pre_reset = pre_reset;
dqm->ops.create_queue = create_queue_nocpsch;
dqm->ops.destroy_queue = destroy_queue_nocpsch;
dqm->ops.update_queue = update_queue;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index a8c37e6da027..871d3b628d2d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -104,6 +104,7 @@ struct device_queue_manager_ops {
int (*initialize)(struct device_queue_manager *dqm);
int (*start)(struct device_queue_manager *dqm);
int (*stop)(struct device_queue_manager *dqm);
+ void (*pre_reset)(struct device_queue_manager *dqm);
void (*uninitialize)(struct device_queue_manager *dqm);
int (*create_kernel_queue)(struct device_queue_manager *dqm,
struct kernel_queue *kq,
@@ -190,7 +191,6 @@ struct device_queue_manager {
/* the pasid mapping for each kfd vmid */
uint16_t vmid_pasid[VMID_NUM];
uint64_t pipelines_addr;
- struct kfd_mem_obj *pipeline_mem;
uint64_t fence_gpu_addr;
unsigned int *fence_addr;
struct kfd_mem_obj *fence_mem;
@@ -199,6 +199,7 @@ struct device_queue_manager {
/* hw exception */
bool is_hws_hang;
+ bool is_resetting;
struct work_struct hw_exception_work;
struct kfd_mem_obj hiq_sdma_mqd;
bool sched_running;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index ebe79bf00145..8e0c00b9555e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -91,7 +91,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
doorbell_start_offset;
- kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32);
+ kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32);
kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
kfd_doorbell_process_slice(kfd));
@@ -103,8 +103,8 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
pr_debug("doorbell base == 0x%08lX\n",
(uintptr_t)kfd->doorbell_base);
- pr_debug("doorbell_id_offset == 0x%08lX\n",
- kfd->doorbell_id_offset);
+ pr_debug("doorbell_base_dw_offset == 0x%08lX\n",
+ kfd->doorbell_base_dw_offset);
pr_debug("doorbell_process_limit == 0x%08lX\n",
doorbell_process_limit);
@@ -185,7 +185,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
* Calculating the kernel doorbell offset using the first
* doorbell page.
*/
- *doorbell_off = kfd->doorbell_id_offset + inx;
+ *doorbell_off = kfd->doorbell_base_dw_offset + inx;
pr_debug("Get kernel queue doorbell\n"
" doorbell offset == 0x%08X\n"
@@ -225,17 +225,17 @@ void write_kernel_doorbell64(void __iomem *db, u64 value)
}
}
-unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
+unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
struct kfd_process *process,
unsigned int doorbell_id)
{
/*
- * doorbell_id_offset accounts for doorbells taken by KGD.
+ * doorbell_base_dw_offset accounts for doorbells taken by KGD.
* index * kfd_doorbell_process_slice/sizeof(u32) adjusts to
* the process's doorbells. The offset returned is in dword
* units regardless of the ASIC-dependent doorbell size.
*/
- return kfd->doorbell_id_offset +
+ return kfd->doorbell_base_dw_offset +
process->doorbell_index
* kfd_doorbell_process_slice(kfd) / sizeof(u32) +
doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 908081c85de1..1f8365575b12 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -346,7 +346,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
ret = create_signal_event(devkfd, p, ev);
if (!ret) {
*event_page_offset = KFD_MMAP_TYPE_EVENTS;
- *event_page_offset <<= PAGE_SHIFT;
*event_slot_index = ev->event_id;
}
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
index 193e2835bd4d..8d871514671e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
@@ -62,9 +62,6 @@ int kfd_iommu_device_init(struct kfd_dev *kfd)
struct amd_iommu_device_info iommu_info;
unsigned int pasid_limit;
int err;
- struct kfd_topology_device *top_dev;
-
- top_dev = kfd_topology_device_by_id(kfd->id);
if (!kfd->device_info->needs_iommu_device)
return 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 11d244891393..bae706462f96 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -34,7 +34,10 @@
#define PM4_COUNT_ZERO (((1 << 15) - 1) << 16)
-static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
+/* Initialize a kernel queue, including allocations of GART memory
+ * needed for the queue.
+ */
+static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev,
enum kfd_queue_type type, unsigned int queue_size)
{
struct queue_properties prop;
@@ -87,9 +90,17 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
kq->pq_kernel_addr = kq->pq->cpu_ptr;
kq->pq_gpu_addr = kq->pq->gpu_addr;
- retval = kq->ops_asic_specific.initialize(kq, dev, type, queue_size);
- if (!retval)
- goto err_eop_allocate_vidmem;
+ /* For CIK family asics, kq->eop_mem is not needed */
+ if (dev->device_info->asic_family > CHIP_MULLINS) {
+ retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
+ if (retval != 0)
+ goto err_eop_allocate_vidmem;
+
+ kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
+ kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
+
+ memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
+ }
retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel),
&kq->rptr_mem);
@@ -183,9 +194,10 @@ err_get_kernel_doorbell:
}
-static void uninitialize(struct kernel_queue *kq)
+/* Uninitialize a kernel queue and free all its memory usages. */
+static void kq_uninitialize(struct kernel_queue *kq, bool hanging)
{
- if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ)
+ if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ && !hanging)
kq->mqd_mgr->destroy_mqd(kq->mqd_mgr,
kq->queue->mqd,
KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
@@ -200,14 +212,19 @@ static void uninitialize(struct kernel_queue *kq)
kfd_gtt_sa_free(kq->dev, kq->rptr_mem);
kfd_gtt_sa_free(kq->dev, kq->wptr_mem);
- kq->ops_asic_specific.uninitialize(kq);
+
+ /* For CIK family asics, kq->eop_mem is Null, kfd_gtt_sa_free()
+ * is able to handle NULL properly.
+ */
+ kfd_gtt_sa_free(kq->dev, kq->eop_mem);
+
kfd_gtt_sa_free(kq->dev, kq->pq);
kfd_release_kernel_doorbell(kq->dev,
kq->queue->properties.doorbell_ptr);
uninit_queue(kq->queue);
}
-static int acquire_packet_buffer(struct kernel_queue *kq,
+int kq_acquire_packet_buffer(struct kernel_queue *kq,
size_t packet_size_in_dwords, unsigned int **buffer_ptr)
{
size_t available_size;
@@ -268,7 +285,7 @@ err_no_space:
return -ENOMEM;
}
-static void submit_packet(struct kernel_queue *kq)
+void kq_submit_packet(struct kernel_queue *kq)
{
#ifdef DEBUG
int i;
@@ -280,11 +297,18 @@ static void submit_packet(struct kernel_queue *kq)
}
pr_debug("\n");
#endif
-
- kq->ops_asic_specific.submit_packet(kq);
+ if (kq->dev->device_info->doorbell_size == 8) {
+ *kq->wptr64_kernel = kq->pending_wptr64;
+ write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
+ kq->pending_wptr64);
+ } else {
+ *kq->wptr_kernel = kq->pending_wptr;
+ write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
+ kq->pending_wptr);
+ }
}
-static void rollback_packet(struct kernel_queue *kq)
+void kq_rollback_packet(struct kernel_queue *kq)
{
if (kq->dev->device_info->doorbell_size == 8) {
kq->pending_wptr64 = *kq->wptr64_kernel;
@@ -304,60 +328,18 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
if (!kq)
return NULL;
- kq->ops.initialize = initialize;
- kq->ops.uninitialize = uninitialize;
- kq->ops.acquire_packet_buffer = acquire_packet_buffer;
- kq->ops.submit_packet = submit_packet;
- kq->ops.rollback_packet = rollback_packet;
-
- switch (dev->device_info->asic_family) {
- case CHIP_CARRIZO:
- case CHIP_TONGA:
- case CHIP_FIJI:
- case CHIP_POLARIS10:
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- case CHIP_VEGAM:
- kernel_queue_init_vi(&kq->ops_asic_specific);
- break;
-
- case CHIP_KAVERI:
- case CHIP_HAWAII:
- kernel_queue_init_cik(&kq->ops_asic_specific);
- break;
-
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_RAVEN:
- case CHIP_RENOIR:
- case CHIP_ARCTURUS:
- kernel_queue_init_v9(&kq->ops_asic_specific);
- break;
- case CHIP_NAVI10:
- case CHIP_NAVI12:
- case CHIP_NAVI14:
- kernel_queue_init_v10(&kq->ops_asic_specific);
- break;
- default:
- WARN(1, "Unexpected ASIC family %u",
- dev->device_info->asic_family);
- goto out_free;
- }
-
- if (kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE))
+ if (kq_initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE))
return kq;
pr_err("Failed to init kernel queue\n");
-out_free:
kfree(kq);
return NULL;
}
-void kernel_queue_uninit(struct kernel_queue *kq)
+void kernel_queue_uninit(struct kernel_queue *kq, bool hanging)
{
- kq->ops.uninitialize(kq);
+ kq_uninitialize(kq, hanging);
kfree(kq);
}
@@ -377,7 +359,7 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev)
return;
}
- retval = kq->ops.acquire_packet_buffer(kq, 5, &buffer);
+ retval = kq_acquire_packet_buffer(kq, 5, &buffer);
if (unlikely(retval != 0)) {
pr_err(" Failed to acquire packet buffer\n");
pr_err("Kernel queue test failed\n");
@@ -385,7 +367,7 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev)
}
for (i = 0; i < 5; i++)
buffer[i] = kq->nop_packet;
- kq->ops.submit_packet(kq);
+ kq_submit_packet(kq);
pr_err("Ending kernel queue test\n");
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
index 365fc674fea4..f4cfe9f1871c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
@@ -29,45 +29,28 @@
#include "kfd_priv.h"
/**
- * struct kernel_queue_ops
- *
- * @initialize: Initialize a kernel queue, including allocations of GART memory
- * needed for the queue.
- *
- * @uninitialize: Uninitialize a kernel queue and free all its memory usages.
- *
- * @acquire_packet_buffer: Returns a pointer to the location in the kernel
+ * kq_acquire_packet_buffer: Returns a pointer to the location in the kernel
* queue ring buffer where the calling function can write its packet. It is
* Guaranteed that there is enough space for that packet. It also updates the
* pending write pointer to that location so subsequent calls to
* acquire_packet_buffer will get a correct write pointer
*
- * @submit_packet: Update the write pointer and doorbell of a kernel queue.
- *
- * @sync_with_hw: Wait until the write pointer and the read pointer of a kernel
- * queue are equal, which means the CP has read all the submitted packets.
+ * kq_submit_packet: Update the write pointer and doorbell of a kernel queue.
*
- * @rollback_packet: This routine is called if we failed to build an acquired
+ * kq_rollback_packet: This routine is called if we failed to build an acquired
* packet for some reason. It just overwrites the pending wptr with the current
* one
*
*/
-struct kernel_queue_ops {
- bool (*initialize)(struct kernel_queue *kq, struct kfd_dev *dev,
- enum kfd_queue_type type, unsigned int queue_size);
- void (*uninitialize)(struct kernel_queue *kq);
- int (*acquire_packet_buffer)(struct kernel_queue *kq,
- size_t packet_size_in_dwords,
- unsigned int **buffer_ptr);
- void (*submit_packet)(struct kernel_queue *kq);
- void (*rollback_packet)(struct kernel_queue *kq);
-};
+int kq_acquire_packet_buffer(struct kernel_queue *kq,
+ size_t packet_size_in_dwords,
+ unsigned int **buffer_ptr);
+void kq_submit_packet(struct kernel_queue *kq);
+void kq_rollback_packet(struct kernel_queue *kq);
-struct kernel_queue {
- struct kernel_queue_ops ops;
- struct kernel_queue_ops ops_asic_specific;
+struct kernel_queue {
/* data */
struct kfd_dev *dev;
struct mqd_manager *mqd_mgr;
@@ -99,9 +82,4 @@ struct kernel_queue {
struct list_head list;
};
-void kernel_queue_init_cik(struct kernel_queue_ops *ops);
-void kernel_queue_init_vi(struct kernel_queue_ops *ops);
-void kernel_queue_init_v9(struct kernel_queue_ops *ops);
-void kernel_queue_init_v10(struct kernel_queue_ops *ops);
-
#endif /* KFD_KERNEL_QUEUE_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
deleted file mode 100644
index 19e54acb4125..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include "kfd_kernel_queue.h"
-
-static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
- enum kfd_queue_type type, unsigned int queue_size);
-static void uninitialize_cik(struct kernel_queue *kq);
-static void submit_packet_cik(struct kernel_queue *kq);
-
-void kernel_queue_init_cik(struct kernel_queue_ops *ops)
-{
- ops->initialize = initialize_cik;
- ops->uninitialize = uninitialize_cik;
- ops->submit_packet = submit_packet_cik;
-}
-
-static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
- enum kfd_queue_type type, unsigned int queue_size)
-{
- return true;
-}
-
-static void uninitialize_cik(struct kernel_queue *kq)
-{
-}
-
-static void submit_packet_cik(struct kernel_queue *kq)
-{
- *kq->wptr_kernel = kq->pending_wptr;
- write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
- kq->pending_wptr);
-}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
deleted file mode 100644
index aed32ab7102e..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
+++ /dev/null
@@ -1,348 +0,0 @@
-/*
- * Copyright 2018 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include "kfd_kernel_queue.h"
-#include "kfd_device_queue_manager.h"
-#include "kfd_pm4_headers_ai.h"
-#include "kfd_pm4_opcodes.h"
-#include "gc/gc_10_1_0_sh_mask.h"
-
-static bool initialize_v10(struct kernel_queue *kq, struct kfd_dev *dev,
- enum kfd_queue_type type, unsigned int queue_size);
-static void uninitialize_v10(struct kernel_queue *kq);
-static void submit_packet_v10(struct kernel_queue *kq);
-
-void kernel_queue_init_v10(struct kernel_queue_ops *ops)
-{
- ops->initialize = initialize_v10;
- ops->uninitialize = uninitialize_v10;
- ops->submit_packet = submit_packet_v10;
-}
-
-static bool initialize_v10(struct kernel_queue *kq, struct kfd_dev *dev,
- enum kfd_queue_type type, unsigned int queue_size)
-{
- int retval;
-
- retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
- if (retval != 0)
- return false;
-
- kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
- kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
-
- memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
-
- return true;
-}
-
-static void uninitialize_v10(struct kernel_queue *kq)
-{
- kfd_gtt_sa_free(kq->dev, kq->eop_mem);
-}
-
-static void submit_packet_v10(struct kernel_queue *kq)
-{
- *kq->wptr64_kernel = kq->pending_wptr64;
- write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
- kq->pending_wptr64);
-}
-
-static int pm_map_process_v10(struct packet_manager *pm,
- uint32_t *buffer, struct qcm_process_device *qpd)
-{
- struct pm4_mes_map_process *packet;
- uint64_t vm_page_table_base_addr = qpd->page_table_base;
-
- packet = (struct pm4_mes_map_process *)buffer;
- memset(buffer, 0, sizeof(struct pm4_mes_map_process));
-
- packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
- sizeof(struct pm4_mes_map_process));
- packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
- packet->bitfields2.process_quantum = 1;
- packet->bitfields2.pasid = qpd->pqm->process->pasid;
- packet->bitfields14.gds_size = qpd->gds_size;
- packet->bitfields14.num_gws = qpd->num_gws;
- packet->bitfields14.num_oac = qpd->num_oac;
- packet->bitfields14.sdma_enable = 1;
-
- packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
-
- packet->sh_mem_config = qpd->sh_mem_config;
- packet->sh_mem_bases = qpd->sh_mem_bases;
- if (qpd->tba_addr) {
- packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
- packet->sq_shader_tba_hi = (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT) |
- upper_32_bits(qpd->tba_addr >> 8);
- packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
- packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
- }
-
- packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
- packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
-
- packet->vm_context_page_table_base_addr_lo32 =
- lower_32_bits(vm_page_table_base_addr);
- packet->vm_context_page_table_base_addr_hi32 =
- upper_32_bits(vm_page_table_base_addr);
-
- return 0;
-}
-
-static int pm_runlist_v10(struct packet_manager *pm, uint32_t *buffer,
- uint64_t ib, size_t ib_size_in_dwords, bool chain)
-{
- struct pm4_mes_runlist *packet;
-
- int concurrent_proc_cnt = 0;
- struct kfd_dev *kfd = pm->dqm->dev;
-
- /* Determine the number of processes to map together to HW:
- * it can not exceed the number of VMIDs available to the
- * scheduler, and it is determined by the smaller of the number
- * of processes in the runlist and kfd module parameter
- * hws_max_conc_proc.
- * Note: the arbitration between the number of VMIDs and
- * hws_max_conc_proc has been done in
- * kgd2kfd_device_init().
- */
- concurrent_proc_cnt = min(pm->dqm->processes_count,
- kfd->max_proc_per_quantum);
-
-
- packet = (struct pm4_mes_runlist *)buffer;
-
- memset(buffer, 0, sizeof(struct pm4_mes_runlist));
- packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST,
- sizeof(struct pm4_mes_runlist));
-
- packet->bitfields4.ib_size = ib_size_in_dwords;
- packet->bitfields4.chain = chain ? 1 : 0;
- packet->bitfields4.offload_polling = 0;
- packet->bitfields4.valid = 1;
- packet->bitfields4.process_cnt = concurrent_proc_cnt;
- packet->ordinal2 = lower_32_bits(ib);
- packet->ib_base_hi = upper_32_bits(ib);
-
- return 0;
-}
-
-static int pm_map_queues_v10(struct packet_manager *pm, uint32_t *buffer,
- struct queue *q, bool is_static)
-{
- struct pm4_mes_map_queues *packet;
- bool use_static = is_static;
-
- packet = (struct pm4_mes_map_queues *)buffer;
- memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
-
- packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
- sizeof(struct pm4_mes_map_queues));
- packet->bitfields2.num_queues = 1;
- packet->bitfields2.queue_sel =
- queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
-
- packet->bitfields2.engine_sel =
- engine_sel__mes_map_queues__compute_vi;
- packet->bitfields2.queue_type =
- queue_type__mes_map_queues__normal_compute_vi;
-
- switch (q->properties.type) {
- case KFD_QUEUE_TYPE_COMPUTE:
- if (use_static)
- packet->bitfields2.queue_type =
- queue_type__mes_map_queues__normal_latency_static_queue_vi;
- break;
- case KFD_QUEUE_TYPE_DIQ:
- packet->bitfields2.queue_type =
- queue_type__mes_map_queues__debug_interface_queue_vi;
- break;
- case KFD_QUEUE_TYPE_SDMA:
- case KFD_QUEUE_TYPE_SDMA_XGMI:
- packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
- engine_sel__mes_map_queues__sdma0_vi;
- use_static = false; /* no static queues under SDMA */
- break;
- default:
- WARN(1, "queue type %d\n", q->properties.type);
- return -EINVAL;
- }
- packet->bitfields3.doorbell_offset =
- q->properties.doorbell_off;
-
- packet->mqd_addr_lo =
- lower_32_bits(q->gart_mqd_addr);
-
- packet->mqd_addr_hi =
- upper_32_bits(q->gart_mqd_addr);
-
- packet->wptr_addr_lo =
- lower_32_bits((uint64_t)q->properties.write_ptr);
-
- packet->wptr_addr_hi =
- upper_32_bits((uint64_t)q->properties.write_ptr);
-
- return 0;
-}
-
-static int pm_unmap_queues_v10(struct packet_manager *pm, uint32_t *buffer,
- enum kfd_queue_type type,
- enum kfd_unmap_queues_filter filter,
- uint32_t filter_param, bool reset,
- unsigned int sdma_engine)
-{
- struct pm4_mes_unmap_queues *packet;
-
- packet = (struct pm4_mes_unmap_queues *)buffer;
- memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
-
- packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
- sizeof(struct pm4_mes_unmap_queues));
- switch (type) {
- case KFD_QUEUE_TYPE_COMPUTE:
- case KFD_QUEUE_TYPE_DIQ:
- packet->bitfields2.engine_sel =
- engine_sel__mes_unmap_queues__compute;
- break;
- case KFD_QUEUE_TYPE_SDMA:
- case KFD_QUEUE_TYPE_SDMA_XGMI:
- packet->bitfields2.engine_sel =
- engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
- break;
- default:
- WARN(1, "queue type %d\n", type);
- break;
- }
-
- if (reset)
- packet->bitfields2.action =
- action__mes_unmap_queues__reset_queues;
- else
- packet->bitfields2.action =
- action__mes_unmap_queues__preempt_queues;
-
- switch (filter) {
- case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
- packet->bitfields2.queue_sel =
- queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
- packet->bitfields2.num_queues = 1;
- packet->bitfields3b.doorbell_offset0 = filter_param;
- break;
- case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
- packet->bitfields2.queue_sel =
- queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
- packet->bitfields3a.pasid = filter_param;
- break;
- case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
- packet->bitfields2.queue_sel =
- queue_sel__mes_unmap_queues__unmap_all_queues;
- break;
- case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
- /* in this case, we do not preempt static queues */
- packet->bitfields2.queue_sel =
- queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
- break;
- default:
- WARN(1, "filter %d\n", filter);
- break;
- }
-
- return 0;
-
-}
-
-static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
- uint64_t fence_address, uint32_t fence_value)
-{
- struct pm4_mes_query_status *packet;
-
- packet = (struct pm4_mes_query_status *)buffer;
- memset(buffer, 0, sizeof(struct pm4_mes_query_status));
-
-
- packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS,
- sizeof(struct pm4_mes_query_status));
-
- packet->bitfields2.context_id = 0;
- packet->bitfields2.interrupt_sel =
- interrupt_sel__mes_query_status__completion_status;
- packet->bitfields2.command =
- command__mes_query_status__fence_only_after_write_ack;
-
- packet->addr_hi = upper_32_bits((uint64_t)fence_address);
- packet->addr_lo = lower_32_bits((uint64_t)fence_address);
- packet->data_hi = upper_32_bits((uint64_t)fence_value);
- packet->data_lo = lower_32_bits((uint64_t)fence_value);
-
- return 0;
-}
-
-
-static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
-{
- struct pm4_mec_release_mem *packet;
-
- WARN_ON(!buffer);
-
- packet = (struct pm4_mec_release_mem *)buffer;
- memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
-
- packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
- sizeof(struct pm4_mec_release_mem));
-
- packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
- packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
- packet->bitfields2.tcl1_action_ena = 1;
- packet->bitfields2.tc_action_ena = 1;
- packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
-
- packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
- packet->bitfields3.int_sel =
- int_sel__mec_release_mem__send_interrupt_after_write_confirm;
-
- packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
- packet->address_hi = upper_32_bits(gpu_addr);
-
- packet->data_lo = 0;
-
- return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
-}
-
-const struct packet_manager_funcs kfd_v10_pm_funcs = {
- .map_process = pm_map_process_v10,
- .runlist = pm_runlist_v10,
- .set_resources = pm_set_resources_vi,
- .map_queues = pm_map_queues_v10,
- .unmap_queues = pm_unmap_queues_v10,
- .query_status = pm_query_status_v10,
- .release_mem = pm_release_mem_v10,
- .map_process_size = sizeof(struct pm4_mes_map_process),
- .runlist_size = sizeof(struct pm4_mes_runlist),
- .set_resources_size = sizeof(struct pm4_mes_set_resources),
- .map_queues_size = sizeof(struct pm4_mes_map_queues),
- .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
- .query_status_size = sizeof(struct pm4_mes_query_status),
- .release_mem_size = sizeof(struct pm4_mec_release_mem)
-};
-
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 28876aceb14b..19f0fe547c57 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -374,7 +374,6 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
switch (type) {
case KFD_MQD_TYPE_CP:
- case KFD_MQD_TYPE_COMPUTE:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd;
mqd->free_mqd = free_mqd;
@@ -401,7 +400,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
#endif
break;
case KFD_MQD_TYPE_DIQ:
- mqd->allocate_mqd = allocate_hiq_mqd;
+ mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd;
mqd->load_mqd = load_mqd;
@@ -442,7 +441,7 @@ struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
mqd = mqd_manager_init_cik(type, dev);
if (!mqd)
return NULL;
- if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE))
+ if (type == KFD_MQD_TYPE_CP)
mqd->update_mqd = update_mqd_hawaii;
return mqd;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 4a236b2c2354..d1d68a51bfb8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -66,6 +66,12 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
m->compute_static_thread_mgmt_se3);
}
+static void set_priority(struct v10_compute_mqd *m, struct queue_properties *q)
+{
+ m->cp_hqd_pipe_priority = pipe_priority_map[q->priority];
+ m->cp_hqd_queue_priority = q->priority;
+}
+
static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
struct queue_properties *q)
{
@@ -109,9 +115,6 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
- m->cp_hqd_pipe_priority = 1;
- m->cp_hqd_queue_priority = 15;
-
if (q->format == KFD_QUEUE_FORMAT_AQL) {
m->cp_hqd_aql_control =
1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
@@ -150,6 +153,14 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
return r;
}
+static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+ return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id,
+ queue_id, p->doorbell_off);
+}
+
static void update_mqd(struct mqd_manager *mm, void *mqd,
struct queue_properties *q)
{
@@ -208,11 +219,9 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
m->cp_hqd_ctx_save_control = 0;
update_cu_mask(mm, mqd, q);
+ set_priority(m, q);
- q->is_active = (q->queue_size > 0 &&
- q->queue_address != 0 &&
- q->queue_percent > 0 &&
- !q->is_evicted);
+ q->is_active = QUEUE_IS_ACTIVE(*q);
}
static int destroy_mqd(struct mqd_manager *mm, void *mqd,
@@ -247,18 +256,22 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
{
struct v10_compute_mqd *m;
- /* Control stack is located one page after MQD. */
- void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
-
m = get_mqd(mqd);
+ /* Control stack is written backwards, while workgroup context data
+ * is written forwards. Both starts from m->cp_hqd_cntl_stack_size.
+ * Current position is at m->cp_hqd_cntl_stack_offset and
+ * m->cp_hqd_wg_state_offset, respectively.
+ */
*ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
m->cp_hqd_cntl_stack_offset;
*save_area_used_size = m->cp_hqd_wg_state_offset -
m->cp_hqd_cntl_stack_size;
- if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size))
- return -EFAULT;
+ /* Control stack is not copied to user mode for GFXv10 because
+ * it's part of the context save area that is already
+ * accessible to user mode
+ */
return 0;
}
@@ -277,18 +290,6 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
}
-static void update_mqd_hiq(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q)
-{
- struct v10_compute_mqd *m;
-
- update_mqd(mm, mqd, q);
-
- /* TODO: what's the point? update_mqd already does this. */
- m = get_mqd(mqd);
- m->cp_hqd_vmid = q->vmid;
-}
-
static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -340,11 +341,7 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
m->sdma_queue_id = q->sdma_queue_id;
m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
-
- q->is_active = (q->queue_size > 0 &&
- q->queue_address != 0 &&
- q->queue_percent > 0 &&
- !q->is_evicted);
+ q->is_active = QUEUE_IS_ACTIVE(*q);
}
/*
@@ -392,7 +389,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
return NULL;
- mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
+ mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
if (!mqd)
return NULL;
@@ -400,7 +397,6 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
switch (type) {
case KFD_MQD_TYPE_CP:
- case KFD_MQD_TYPE_COMPUTE:
pr_debug("%s@%i\n", __func__, __LINE__);
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd;
@@ -421,8 +417,8 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
mqd->allocate_mqd = allocate_hiq_mqd;
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd_hiq_sdma;
- mqd->load_mqd = load_mqd;
- mqd->update_mqd = update_mqd_hiq;
+ mqd->load_mqd = hiq_load_mqd_kiq;
+ mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct v10_compute_mqd);
@@ -432,11 +428,11 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
pr_debug("%s@%i\n", __func__, __LINE__);
break;
case KFD_MQD_TYPE_DIQ:
- mqd->allocate_mqd = allocate_hiq_mqd;
+ mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd;
mqd->load_mqd = load_mqd;
- mqd->update_mqd = update_mqd_hiq;
+ mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct v10_compute_mqd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index d3380c5bdbde..436b7f518979 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -92,7 +92,7 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
* instead of sub-allocation function.
*/
if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
- mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
+ mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
if (!mqd_mem_obj)
return NULL;
retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
@@ -191,6 +191,14 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
wptr_shift, 0, mms);
}
+static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+ return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id,
+ queue_id, p->doorbell_off);
+}
+
static void update_mqd(struct mqd_manager *mm, void *mqd,
struct queue_properties *q)
{
@@ -302,7 +310,8 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
*ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
m->cp_hqd_cntl_stack_offset;
- *save_area_used_size = m->cp_hqd_wg_state_offset;
+ *save_area_used_size = m->cp_hqd_wg_state_offset -
+ m->cp_hqd_cntl_stack_size;
if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size))
return -EFAULT;
@@ -324,18 +333,6 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
}
-static void update_mqd_hiq(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q)
-{
- struct v9_mqd *m;
-
- update_mqd(mm, mqd, q);
-
- /* TODO: what's the point? update_mqd already does this. */
- m = get_mqd(mqd);
- m->cp_hqd_vmid = q->vmid;
-}
-
static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -443,7 +440,6 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
switch (type) {
case KFD_MQD_TYPE_CP:
- case KFD_MQD_TYPE_COMPUTE:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd;
mqd->free_mqd = free_mqd;
@@ -461,8 +457,8 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
mqd->allocate_mqd = allocate_hiq_mqd;
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd_hiq_sdma;
- mqd->load_mqd = load_mqd;
- mqd->update_mqd = update_mqd_hiq;
+ mqd->load_mqd = hiq_load_mqd_kiq;
+ mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct v9_mqd);
@@ -471,11 +467,11 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
#endif
break;
case KFD_MQD_TYPE_DIQ:
- mqd->allocate_mqd = allocate_hiq_mqd;
+ mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd;
mqd->load_mqd = load_mqd;
- mqd->update_mqd = update_mqd_hiq;
+ mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct v9_mqd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 7d144f56f421..a5e8ff1e5945 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -312,11 +312,7 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
static void update_mqd_hiq(struct mqd_manager *mm, void *mqd,
struct queue_properties *q)
{
- struct vi_mqd *m;
__update_mqd(mm, mqd, q, MTYPE_UC, 0);
-
- m = get_mqd(mqd);
- m->cp_hqd_vmid = q->vmid;
}
static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
@@ -425,7 +421,6 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
switch (type) {
case KFD_MQD_TYPE_CP:
- case KFD_MQD_TYPE_COMPUTE:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd;
mqd->free_mqd = free_mqd;
@@ -453,7 +448,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
#endif
break;
case KFD_MQD_TYPE_DIQ:
- mqd->allocate_mqd = allocate_hiq_mqd;
+ mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd;
mqd->load_mqd = load_mqd;
@@ -494,7 +489,7 @@ struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
mqd = mqd_manager_init_vi(type, dev);
if (!mqd)
return NULL;
- if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE))
+ if (type == KFD_MQD_TYPE_CP)
mqd->update_mqd = update_mqd_tonga;
return mqd;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 83ef4b3dd2fb..dc406e6dee23 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -241,12 +241,10 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
case CHIP_RAVEN:
case CHIP_RENOIR:
case CHIP_ARCTURUS:
- pm->pmf = &kfd_v9_pm_funcs;
- break;
case CHIP_NAVI10:
case CHIP_NAVI12:
case CHIP_NAVI14:
- pm->pmf = &kfd_v10_pm_funcs;
+ pm->pmf = &kfd_v9_pm_funcs;
break;
default:
WARN(1, "Unexpected ASIC family %u",
@@ -266,10 +264,10 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
return 0;
}
-void pm_uninit(struct packet_manager *pm)
+void pm_uninit(struct packet_manager *pm, bool hanging)
{
mutex_destroy(&pm->lock);
- kernel_queue_uninit(pm->priv_queue);
+ kernel_queue_uninit(pm->priv_queue, hanging);
}
int pm_send_set_resources(struct packet_manager *pm,
@@ -280,7 +278,7 @@ int pm_send_set_resources(struct packet_manager *pm,
size = pm->pmf->set_resources_size;
mutex_lock(&pm->lock);
- pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
+ kq_acquire_packet_buffer(pm->priv_queue,
size / sizeof(uint32_t),
(unsigned int **)&buffer);
if (!buffer) {
@@ -291,9 +289,9 @@ int pm_send_set_resources(struct packet_manager *pm,
retval = pm->pmf->set_resources(pm, buffer, res);
if (!retval)
- pm->priv_queue->ops.submit_packet(pm->priv_queue);
+ kq_submit_packet(pm->priv_queue);
else
- pm->priv_queue->ops.rollback_packet(pm->priv_queue);
+ kq_rollback_packet(pm->priv_queue);
out:
mutex_unlock(&pm->lock);
@@ -318,7 +316,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);
mutex_lock(&pm->lock);
- retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
+ retval = kq_acquire_packet_buffer(pm->priv_queue,
packet_size_dwords, &rl_buffer);
if (retval)
goto fail_acquire_packet_buffer;
@@ -328,14 +326,14 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
if (retval)
goto fail_create_runlist;
- pm->priv_queue->ops.submit_packet(pm->priv_queue);
+ kq_submit_packet(pm->priv_queue);
mutex_unlock(&pm->lock);
return retval;
fail_create_runlist:
- pm->priv_queue->ops.rollback_packet(pm->priv_queue);
+ kq_rollback_packet(pm->priv_queue);
fail_acquire_packet_buffer:
mutex_unlock(&pm->lock);
fail_create_runlist_ib:
@@ -354,7 +352,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
size = pm->pmf->query_status_size;
mutex_lock(&pm->lock);
- pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
+ kq_acquire_packet_buffer(pm->priv_queue,
size / sizeof(uint32_t), (unsigned int **)&buffer);
if (!buffer) {
pr_err("Failed to allocate buffer on kernel queue\n");
@@ -364,9 +362,9 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value);
if (!retval)
- pm->priv_queue->ops.submit_packet(pm->priv_queue);
+ kq_submit_packet(pm->priv_queue);
else
- pm->priv_queue->ops.rollback_packet(pm->priv_queue);
+ kq_rollback_packet(pm->priv_queue);
out:
mutex_unlock(&pm->lock);
@@ -383,7 +381,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
size = pm->pmf->unmap_queues_size;
mutex_lock(&pm->lock);
- pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
+ kq_acquire_packet_buffer(pm->priv_queue,
size / sizeof(uint32_t), (unsigned int **)&buffer);
if (!buffer) {
pr_err("Failed to allocate buffer on kernel queue\n");
@@ -394,9 +392,9 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
retval = pm->pmf->unmap_queues(pm, buffer, type, filter, filter_param,
reset, sdma_engine);
if (!retval)
- pm->priv_queue->ops.submit_packet(pm->priv_queue);
+ kq_submit_packet(pm->priv_queue);
else
- pm->priv_queue->ops.rollback_packet(pm->priv_queue);
+ kq_rollback_packet(pm->priv_queue);
out:
mutex_unlock(&pm->lock);
@@ -441,7 +439,7 @@ int pm_debugfs_hang_hws(struct packet_manager *pm)
size = pm->pmf->query_status_size;
mutex_lock(&pm->lock);
- pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
+ kq_acquire_packet_buffer(pm->priv_queue,
size / sizeof(uint32_t), (unsigned int **)&buffer);
if (!buffer) {
pr_err("Failed to allocate buffer on kernel queue\n");
@@ -449,7 +447,7 @@ int pm_debugfs_hang_hws(struct packet_manager *pm)
goto out;
}
memset(buffer, 0x55, size);
- pm->priv_queue->ops.submit_packet(pm->priv_queue);
+ kq_submit_packet(pm->priv_queue);
pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
buffer[0], buffer[1], buffer[2], buffer[3],
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
index 9a4bafb2e175..2de01009f1b6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -25,47 +25,7 @@
#include "kfd_device_queue_manager.h"
#include "kfd_pm4_headers_ai.h"
#include "kfd_pm4_opcodes.h"
-
-static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
- enum kfd_queue_type type, unsigned int queue_size);
-static void uninitialize_v9(struct kernel_queue *kq);
-static void submit_packet_v9(struct kernel_queue *kq);
-
-void kernel_queue_init_v9(struct kernel_queue_ops *ops)
-{
- ops->initialize = initialize_v9;
- ops->uninitialize = uninitialize_v9;
- ops->submit_packet = submit_packet_v9;
-}
-
-static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
- enum kfd_queue_type type, unsigned int queue_size)
-{
- int retval;
-
- retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
- if (retval)
- return false;
-
- kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
- kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
-
- memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
-
- return true;
-}
-
-static void uninitialize_v9(struct kernel_queue *kq)
-{
- kfd_gtt_sa_free(kq->dev, kq->eop_mem);
-}
-
-static void submit_packet_v9(struct kernel_queue *kq)
-{
- *kq->wptr64_kernel = kq->pending_wptr64;
- write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
- kq->pending_wptr64);
-}
+#include "gc/gc_10_1_0_sh_mask.h"
static int pm_map_process_v9(struct packet_manager *pm,
uint32_t *buffer, struct qcm_process_device *qpd)
@@ -90,10 +50,17 @@ static int pm_map_process_v9(struct packet_manager *pm,
packet->sh_mem_config = qpd->sh_mem_config;
packet->sh_mem_bases = qpd->sh_mem_bases;
- packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
- packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8);
- packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
- packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
+ if (qpd->tba_addr) {
+ packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
+ /* On GFX9, unlike GFX10, bit TRAP_EN of SQ_SHADER_TBA_HI is
+ * not defined, so setting it won't do any harm.
+ */
+ packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8)
+ | 1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT;
+
+ packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
+ packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
+ }
packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
@@ -341,35 +308,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
return 0;
}
-
-static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
-{
- struct pm4_mec_release_mem *packet;
-
- packet = (struct pm4_mec_release_mem *)buffer;
- memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
-
- packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
- sizeof(struct pm4_mec_release_mem));
-
- packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
- packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
- packet->bitfields2.tcl1_action_ena = 1;
- packet->bitfields2.tc_action_ena = 1;
- packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
-
- packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
- packet->bitfields3.int_sel =
- int_sel__mec_release_mem__send_interrupt_after_write_confirm;
-
- packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
- packet->address_hi = upper_32_bits(gpu_addr);
-
- packet->data_lo = 0;
-
- return 0;
-}
-
const struct packet_manager_funcs kfd_v9_pm_funcs = {
.map_process = pm_map_process_v9,
.runlist = pm_runlist_v9,
@@ -377,12 +315,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
.map_queues = pm_map_queues_v9,
.unmap_queues = pm_unmap_queues_v9,
.query_status = pm_query_status_v9,
- .release_mem = pm_release_mem_v9,
+ .release_mem = NULL,
.map_process_size = sizeof(struct pm4_mes_map_process),
.runlist_size = sizeof(struct pm4_mes_runlist),
.set_resources_size = sizeof(struct pm4_mes_set_resources),
.map_queues_size = sizeof(struct pm4_mes_map_queues),
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
.query_status_size = sizeof(struct pm4_mes_query_status),
- .release_mem_size = sizeof(struct pm4_mec_release_mem)
+ .release_mem_size = 0,
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
index 2adaf40027eb..bed4d0ccb6b1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
@@ -26,47 +26,6 @@
#include "kfd_pm4_headers_vi.h"
#include "kfd_pm4_opcodes.h"
-static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
- enum kfd_queue_type type, unsigned int queue_size);
-static void uninitialize_vi(struct kernel_queue *kq);
-static void submit_packet_vi(struct kernel_queue *kq);
-
-void kernel_queue_init_vi(struct kernel_queue_ops *ops)
-{
- ops->initialize = initialize_vi;
- ops->uninitialize = uninitialize_vi;
- ops->submit_packet = submit_packet_vi;
-}
-
-static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
- enum kfd_queue_type type, unsigned int queue_size)
-{
- int retval;
-
- retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
- if (retval != 0)
- return false;
-
- kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
- kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
-
- memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
-
- return true;
-}
-
-static void uninitialize_vi(struct kernel_queue *kq)
-{
- kfd_gtt_sa_free(kq->dev, kq->eop_mem);
-}
-
-static void submit_packet_vi(struct kernel_queue *kq)
-{
- *kq->wptr_kernel = kq->pending_wptr;
- write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
- kq->pending_wptr);
-}
-
unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size)
{
union PM4_MES_TYPE_3_HEADER header;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 060a9e8b301e..6af1b5881f43 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -59,24 +59,21 @@
* NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
* defines are w.r.t to PAGE_SIZE
*/
-#define KFD_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT)
+#define KFD_MMAP_TYPE_SHIFT 62
#define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT)
#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT)
#define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT)
#define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT)
#define KFD_MMAP_TYPE_MMIO (0x0ULL << KFD_MMAP_TYPE_SHIFT)
-#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
+#define KFD_MMAP_GPU_ID_SHIFT 46
#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
<< KFD_MMAP_GPU_ID_SHIFT)
#define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\
& KFD_MMAP_GPU_ID_MASK)
-#define KFD_MMAP_GPU_ID_GET(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \
+#define KFD_MMAP_GET_GPU_ID(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \
>> KFD_MMAP_GPU_ID_SHIFT)
-#define KFD_MMAP_OFFSET_VALUE_MASK (0x3FFFFFFFFFFFULL >> PAGE_SHIFT)
-#define KFD_MMAP_OFFSET_VALUE_GET(offset) (offset & KFD_MMAP_OFFSET_VALUE_MASK)
-
/*
* When working with cp scheduler we should assign the HIQ manually or via
* the amdgpu driver to a fixed hqd slot, here are the fixed HIQ hqd slot
@@ -238,9 +235,10 @@ struct kfd_dev {
* KFD. It is aligned for mapping
* into user mode
*/
- size_t doorbell_id_offset; /* Doorbell offset (from KFD doorbell
- * to HW doorbell, GFX reserved some
- * at the start)
+ size_t doorbell_base_dw_offset; /* Offset from the start of the PCI
+ * doorbell BAR to the first KFD
+ * doorbell in dwords. GFX reserves
+ * the segment before this offset.
*/
u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells
* page used by kernel queue
@@ -510,8 +508,7 @@ struct queue {
* Please read the kfd_mqd_manager.h description.
*/
enum KFD_MQD_TYPE {
- KFD_MQD_TYPE_COMPUTE = 0, /* for no cp scheduling */
- KFD_MQD_TYPE_HIQ, /* for hiq */
+ KFD_MQD_TYPE_HIQ = 0, /* for hiq */
KFD_MQD_TYPE_CP, /* for cp queues and diq */
KFD_MQD_TYPE_SDMA, /* for sdma queues */
KFD_MQD_TYPE_DIQ, /* for diq */
@@ -818,7 +815,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
u32 read_kernel_doorbell(u32 __iomem *db);
void write_kernel_doorbell(void __iomem *db, u32 value);
void write_kernel_doorbell64(void __iomem *db, u64 value);
-unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
+unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
struct kfd_process *process,
unsigned int doorbell_id);
phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
@@ -886,7 +883,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev);
void device_queue_manager_uninit(struct device_queue_manager *dqm);
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
enum kfd_queue_type type);
-void kernel_queue_uninit(struct kernel_queue *kq);
+void kernel_queue_uninit(struct kernel_queue *kq, bool hanging);
int kfd_process_vm_fault(struct device_queue_manager *dqm, unsigned int pasid);
/* Process Queue Manager */
@@ -904,7 +901,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct kfd_dev *dev,
struct file *f,
struct queue_properties *properties,
- unsigned int *qid);
+ unsigned int *qid,
+ uint32_t *p_doorbell_offset_in_process);
int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
struct queue_properties *p);
@@ -972,10 +970,9 @@ struct packet_manager_funcs {
extern const struct packet_manager_funcs kfd_vi_pm_funcs;
extern const struct packet_manager_funcs kfd_v9_pm_funcs;
-extern const struct packet_manager_funcs kfd_v10_pm_funcs;
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
-void pm_uninit(struct packet_manager *pm);
+void pm_uninit(struct packet_manager *pm, bool hanging);
int pm_send_set_resources(struct packet_manager *pm,
struct scheduling_resources *res);
int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues);
@@ -991,9 +988,6 @@ void pm_release_ib(struct packet_manager *pm);
/* Following PM funcs can be shared among VI and AI */
unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
-int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
- struct scheduling_resources *res);
-
uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 10f9af5784f2..25b90f70aecd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -32,6 +32,7 @@
#include <linux/mman.h>
#include <linux/file.h>
#include "amdgpu_amdkfd.h"
+#include "amdgpu.h"
struct mm_struct;
@@ -324,6 +325,8 @@ struct kfd_process *kfd_create_process(struct file *filep)
(int)process->lead_thread->pid);
}
out:
+ if (!IS_ERR(process))
+ kref_get(&process->ref);
mutex_unlock(&kfd_processes_mutex);
return process;
@@ -560,8 +563,7 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
continue;
- offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id))
- << PAGE_SHIFT;
+ offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id);
qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
MAP_SHARED, offset);
@@ -1151,16 +1153,17 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
void kfd_flush_tlb(struct kfd_process_device *pdd)
{
struct kfd_dev *dev = pdd->dev;
- const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
/* Nothing to flush until a VMID is assigned, which
* only happens when the first queue is created.
*/
if (pdd->qpd.vmid)
- f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
+ amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
+ pdd->qpd.vmid);
} else {
- f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
+ amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
+ pdd->process->pasid);
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 2659d226c056..31fcd1b51f00 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -162,7 +162,7 @@ void pqm_uninit(struct process_queue_manager *pqm)
pqm->queue_slot_bitmap = NULL;
}
-static int create_cp_queue(struct process_queue_manager *pqm,
+static int init_user_queue(struct process_queue_manager *pqm,
struct kfd_dev *dev, struct queue **q,
struct queue_properties *q_properties,
struct file *f, unsigned int qid)
@@ -192,7 +192,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct kfd_dev *dev,
struct file *f,
struct queue_properties *properties,
- unsigned int *qid)
+ unsigned int *qid,
+ uint32_t *p_doorbell_offset_in_process)
{
int retval;
struct kfd_process_device *pdd;
@@ -250,7 +251,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
}
- retval = create_cp_queue(pqm, dev, &q, properties, f, *qid);
+ retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
@@ -271,7 +272,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
}
- retval = create_cp_queue(pqm, dev, &q, properties, f, *qid);
+ retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
@@ -303,12 +304,15 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
}
- if (q)
+ if (q && p_doorbell_offset_in_process)
/* Return the doorbell offset within the doorbell page
* to the caller so it can be passed up to user mode
* (in bytes).
+ * There are always 1024 doorbells per process, so in case
+ * of 8-byte doorbells, there are two doorbell pages per
+ * process.
*/
- properties->doorbell_off =
+ *p_doorbell_offset_in_process =
(q->properties.doorbell_off * sizeof(uint32_t)) &
(kfd_doorbell_process_slice(dev) - 1);
@@ -370,7 +374,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
/* destroy kernel queue (DIQ) */
dqm = pqn->kq->dev->dqm;
dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd);
- kernel_queue_uninit(pqn->kq);
+ kernel_queue_uninit(pqn->kq, false);
}
if (pqn->q) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 69bd0628fdc6..203c823d65f1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -486,6 +486,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->node_props.num_sdma_engines);
sysfs_show_32bit_prop(buffer, "num_sdma_xgmi_engines",
dev->node_props.num_sdma_xgmi_engines);
+ sysfs_show_32bit_prop(buffer, "num_sdma_queues_per_engine",
+ dev->node_props.num_sdma_queues_per_engine);
+ sysfs_show_32bit_prop(buffer, "num_cp_queues",
+ dev->node_props.num_cp_queues);
if (dev->gpu) {
log_max_watch_addr =
@@ -1309,9 +1313,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines;
dev->node_props.num_sdma_xgmi_engines =
gpu->device_info->num_xgmi_sdma_engines;
+ dev->node_props.num_sdma_queues_per_engine =
+ gpu->device_info->num_sdma_queues_per_engine;
dev->node_props.num_gws = (hws_gws_support &&
dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0;
+ dev->node_props.num_cp_queues = get_queues_num(dev->gpu->dqm);
kfd_fill_mem_clk_max_info(dev);
kfd_fill_iolink_non_crat_info(dev);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 15843e0fc756..74e9b1682af8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -81,6 +81,8 @@ struct kfd_node_properties {
int32_t drm_render_minor;
uint32_t num_sdma_engines;
uint32_t num_sdma_xgmi_engines;
+ uint32_t num_sdma_queues_per_engine;
+ uint32_t num_cp_queues;
char name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
};