aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorPhilip Yang <Philip.Yang@amd.com>2024-11-22 17:36:15 -0500
committerAlex Deucher <alexander.deucher@amd.com>2024-12-18 12:39:07 -0500
commitf607b2b867bbef8a3a76de8d0eccf7429782bdca (patch)
tree76ace342da8d893ccebe11158f104971e8714ff0
parentdrm/amdgpu: partially revert "reduce reset time" (diff)
downloadwireguard-linux-f607b2b867bbef8a3a76de8d0eccf7429782bdca.tar.xz
wireguard-linux-f607b2b867bbef8a3a76de8d0eccf7429782bdca.zip
drm/amdkfd: KFD interrupt access ih_fifo data in-place
To handle 40000 to 80000 interrupts per second running CPX mode with 4 streams/queues per KFD node, KFD interrupt handler becomes the performance bottleneck. Remove the kfifo_out memcpy overhead by accessing ih_fifo data in-place and updating rptr with kfifo_skip_count. Signed-off-by: Philip Yang <Philip.Yang@amd.com> Reviewed-by: Felix Kuehling <felix.kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c35
1 files changed, 14 insertions, 21 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index 9b6b6e882593..e7412de9a0ac 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -114,50 +114,43 @@ void kfd_interrupt_exit(struct kfd_node *node)
*/
bool enqueue_ih_ring_entry(struct kfd_node *node, const void *ih_ring_entry)
{
- int count;
-
- count = kfifo_in(&node->ih_fifo, ih_ring_entry,
- node->kfd->device_info.ih_ring_entry_size);
- if (count != node->kfd->device_info.ih_ring_entry_size) {
+ if (kfifo_is_full(&node->ih_fifo)) {
dev_dbg_ratelimited(node->adev->dev,
- "Interrupt ring overflow, dropping interrupt %d\n",
- count);
+ "Interrupt ring overflow, dropping interrupt\n");
return false;
}
+ kfifo_in(&node->ih_fifo, ih_ring_entry, node->kfd->device_info.ih_ring_entry_size);
return true;
}
/*
* Assumption: single reader/writer. This function is not re-entrant
*/
-static bool dequeue_ih_ring_entry(struct kfd_node *node, void *ih_ring_entry)
+static bool dequeue_ih_ring_entry(struct kfd_node *node, u32 **ih_ring_entry)
{
int count;
- count = kfifo_out(&node->ih_fifo, ih_ring_entry,
- node->kfd->device_info.ih_ring_entry_size);
-
- WARN_ON(count && count != node->kfd->device_info.ih_ring_entry_size);
+ if (kfifo_is_empty(&node->ih_fifo))
+ return false;
+ count = kfifo_out_linear_ptr(&node->ih_fifo, ih_ring_entry,
+ node->kfd->device_info.ih_ring_entry_size);
+ WARN_ON(count != node->kfd->device_info.ih_ring_entry_size);
return count == node->kfd->device_info.ih_ring_entry_size;
}
static void interrupt_wq(struct work_struct *work)
{
- struct kfd_node *dev = container_of(work, struct kfd_node,
- interrupt_work);
- uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE];
+ struct kfd_node *dev = container_of(work, struct kfd_node, interrupt_work);
+ uint32_t *ih_ring_entry;
unsigned long start_jiffies = jiffies;
- if (dev->kfd->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) {
- dev_err_once(dev->adev->dev, "Ring entry too small\n");
- return;
- }
-
- while (dequeue_ih_ring_entry(dev, ih_ring_entry)) {
+ while (dequeue_ih_ring_entry(dev, &ih_ring_entry)) {
dev->kfd->device_info.event_interrupt_class->interrupt_wq(dev,
ih_ring_entry);
+ kfifo_skip_count(&dev->ih_fifo, dev->kfd->device_info.ih_ring_entry_size);
+
if (time_is_before_jiffies(start_jiffies + HZ)) {
/* If we spent more than a second processing signals,
* reschedule the worker to avoid soft-lockup warnings