aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
authorFelix Kuehling <Felix.Kuehling@amd.com>2019-04-23 23:32:56 -0400
committerAlex Deucher <alexander.deucher@amd.com>2019-05-24 12:21:03 -0500
commit32cce8bc86032cc0c24086d6ce3ccf8e05e686cf (patch)
tree817ee45996815473350973851ff2f0239b68d53a /drivers/gpu/drm/amd/amdkfd
parentdrm/amdkfd: Delete alloc_format field from map_queue struct (diff)
downloadlinux-dev-32cce8bc86032cc0c24086d6ce3ccf8e05e686cf.tar.xz
linux-dev-32cce8bc86032cc0c24086d6ce3ccf8e05e686cf.zip
drm/amdkfd: Fix a circular lock dependency
Fix a circular lock dependency exposed under userptr memory pressure. The DQM lock is the only one taken inside the MMU notifier. We need to make sure that no reclaim is done under this lock, and that no other locks are taken under which reclaim is possible. Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Reviewed-by: Philip Yang <Philip.Yang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c33
1 files changed, 29 insertions, 4 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 1562590d837e..0bfdb141b6e7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -794,10 +794,14 @@ static int register_process(struct device_queue_manager *dqm,
retval = dqm->asic_ops.update_qpd(dqm, qpd);
dqm->processes_count++;
- kfd_inc_compute_active(dqm->dev);
dqm_unlock(dqm);
+ /* Outside the DQM lock because under the DQM lock we can't do
+ * reclaim or take other locks that others hold while reclaiming.
+ */
+ kfd_inc_compute_active(dqm->dev);
+
return retval;
}
@@ -818,7 +822,6 @@ static int unregister_process(struct device_queue_manager *dqm,
list_del(&cur->list);
kfree(cur);
dqm->processes_count--;
- kfd_dec_compute_active(dqm->dev);
goto out;
}
}
@@ -826,6 +829,13 @@ static int unregister_process(struct device_queue_manager *dqm,
retval = 1;
out:
dqm_unlock(dqm);
+
+ /* Outside the DQM lock because under the DQM lock we can't do
+ * reclaim or take other locks that others hold while reclaiming.
+ */
+ if (!retval)
+ kfd_dec_compute_active(dqm->dev);
+
return retval;
}
@@ -1519,6 +1529,7 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
struct queue *q, *next;
struct device_process_node *cur, *next_dpn;
int retval = 0;
+ bool found = false;
dqm_lock(dqm);
@@ -1537,12 +1548,19 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
list_del(&cur->list);
kfree(cur);
dqm->processes_count--;
- kfd_dec_compute_active(dqm->dev);
+ found = true;
break;
}
}
dqm_unlock(dqm);
+
+ /* Outside the DQM lock because under the DQM lock we can't do
+ * reclaim or take other locks that others hold while reclaiming.
+ */
+ if (found)
+ kfd_dec_compute_active(dqm->dev);
+
return retval;
}
@@ -1588,6 +1606,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
struct device_process_node *cur, *next_dpn;
enum kfd_unmap_queues_filter filter =
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
+ bool found = false;
retval = 0;
@@ -1624,7 +1643,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
list_del(&cur->list);
kfree(cur);
dqm->processes_count--;
- kfd_dec_compute_active(dqm->dev);
+ found = true;
break;
}
}
@@ -1638,6 +1657,12 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
dqm_unlock(dqm);
+ /* Outside the DQM lock because under the DQM lock we can't do
+ * reclaim or take other locks that others hold while reclaiming.
+ */
+ if (found)
+ kfd_dec_compute_active(dqm->dev);
+
/* Lastly, free mqd resources.
* Do uninit_mqd() after dqm_unlock to avoid circular locking.
*/