aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorPhilip Yang <Philip.Yang@amd.com>2024-11-26 11:33:15 -0500
committerAlex Deucher <alexander.deucher@amd.com>2024-12-18 12:39:07 -0500
commit34db5a32617d102e8042151bb87590e43c97132e (patch)
treec6e752e4208514bb16a697901bf6daf735d56afd
parentdrm/amdgpu: Optimize gfx v9 GPU page fault handling (diff)
downloadwireguard-linux-34db5a32617d102e8042151bb87590e43c97132e.tar.xz
wireguard-linux-34db5a32617d102e8042151bb87590e43c97132e.zip
drm/amdkfd: Queue interrupt work to different CPU
For CPX mode, each KFD node has interrupt worker to process ih_fifo to send events to user space. Currently all interrupt workers of same adev queue to same CPU, all workers execution are actually serialized and this cause KFD ih_fifo overflow when CPU usage is high. Use per-GPU unbounded highpri queue with number of workers equals to number of partitions, let queue_work select the next CPU round robin among the local CPUs of same NUMA. Signed-off-by: Philip Yang <Philip.Yang@amd.com> Reviewed-by: Felix Kuehling <felix.kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to '')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c25
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c25
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h3
3 files changed, 20 insertions, 33 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 9b49563f2c42..a29374c86405 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -649,6 +649,14 @@ static void kfd_cleanup_nodes(struct kfd_dev *kfd, unsigned int num_nodes)
struct kfd_node *knode;
unsigned int i;
+ /*
+ * flush_work ensures that there are no outstanding
+ * work-queue items that will access interrupt_ring. New work items
+ * can't be created because we stopped interrupt handling above.
+ */
+ flush_workqueue(kfd->ih_wq);
+ destroy_workqueue(kfd->ih_wq);
+
for (i = 0; i < num_nodes; i++) {
knode = kfd->nodes[i];
device_queue_manager_uninit(knode->dqm);
@@ -1066,21 +1074,6 @@ static int kfd_resume(struct kfd_node *node)
return err;
}
-static inline void kfd_queue_work(struct workqueue_struct *wq,
- struct work_struct *work)
-{
- int cpu, new_cpu;
-
- cpu = new_cpu = smp_processor_id();
- do {
- new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids;
- if (cpu_to_node(new_cpu) == numa_node_id())
- break;
- } while (cpu != new_cpu);
-
- queue_work_on(new_cpu, wq, work);
-}
-
/* This is called directly from KGD at ISR. */
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
{
@@ -1106,7 +1099,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
patched_ihre, &is_patched)
&& enqueue_ih_ring_entry(node,
is_patched ? patched_ihre : ih_ring_entry)) {
- kfd_queue_work(node->ih_wq, &node->interrupt_work);
+ queue_work(node->kfd->ih_wq, &node->interrupt_work);
spin_unlock_irqrestore(&node->interrupt_lock, flags);
return;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index 8e00800f3207..6beb786c582a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -62,11 +62,14 @@ int kfd_interrupt_init(struct kfd_node *node)
return r;
}
- node->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1);
- if (unlikely(!node->ih_wq)) {
- kfifo_free(&node->ih_fifo);
- dev_err(node->adev->dev, "Failed to allocate KFD IH workqueue\n");
- return -ENOMEM;
+ if (!node->kfd->ih_wq) {
+ node->kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI | WQ_UNBOUND,
+ node->kfd->num_nodes);
+ if (unlikely(!node->kfd->ih_wq)) {
+ kfifo_free(&node->ih_fifo);
+ dev_err(node->adev->dev, "Failed to allocate KFD IH workqueue\n");
+ return -ENOMEM;
+ }
}
spin_lock_init(&node->interrupt_lock);
@@ -96,16 +99,6 @@ void kfd_interrupt_exit(struct kfd_node *node)
spin_lock_irqsave(&node->interrupt_lock, flags);
node->interrupts_active = false;
spin_unlock_irqrestore(&node->interrupt_lock, flags);
-
- /*
- * flush_work ensures that there are no outstanding
- * work-queue items that will access interrupt_ring. New work items
- * can't be created because we stopped interrupt handling above.
- */
- flush_workqueue(node->ih_wq);
-
- destroy_workqueue(node->ih_wq);
-
kfifo_free(&node->ih_fifo);
}
@@ -155,7 +148,7 @@ static void interrupt_wq(struct work_struct *work)
/* If we spent more than a second processing signals,
* reschedule the worker to avoid soft-lockup warnings
*/
- queue_work(dev->ih_wq, &dev->interrupt_work);
+ queue_work(dev->kfd->ih_wq, &dev->interrupt_work);
break;
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index e529fdc1b422..c32b255c0eb2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -274,7 +274,6 @@ struct kfd_node {
/* Interrupts */
struct kfifo ih_fifo;
- struct workqueue_struct *ih_wq;
struct work_struct interrupt_work;
spinlock_t interrupt_lock;
@@ -367,6 +366,8 @@ struct kfd_dev {
struct kfd_node *nodes[MAX_KFD_NODES];
unsigned int num_nodes;
+ struct workqueue_struct *ih_wq;
+
/* Kernel doorbells for KFD device */
struct amdgpu_bo *doorbells;