diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_process.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process.c | 246 |
1 files changed, 233 insertions, 13 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index d27221ddcdeb..40695d52e9a8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -25,6 +25,7 @@ #include <linux/sched.h> #include <linux/sched/mm.h> #include <linux/sched/task.h> +#include <linux/mmu_context.h> #include <linux/slab.h> #include <linux/amd-iommu.h> #include <linux/notifier.h> @@ -76,6 +77,178 @@ struct kfd_procfs_tree { static struct kfd_procfs_tree procfs; +/* + * Structure for SDMA activity tracking + */ +struct kfd_sdma_activity_handler_workarea { + struct work_struct sdma_activity_work; + struct kfd_process_device *pdd; + uint64_t sdma_activity_counter; +}; + +struct temp_sdma_queue_list { + uint64_t rptr; + uint64_t sdma_val; + unsigned int queue_id; + struct list_head list; +}; + +static void kfd_sdma_activity_worker(struct work_struct *work) +{ + struct kfd_sdma_activity_handler_workarea *workarea; + struct kfd_process_device *pdd; + uint64_t val; + struct mm_struct *mm; + struct queue *q; + struct qcm_process_device *qpd; + struct device_queue_manager *dqm; + int ret = 0; + struct temp_sdma_queue_list sdma_q_list; + struct temp_sdma_queue_list *sdma_q, *next; + + workarea = container_of(work, struct kfd_sdma_activity_handler_workarea, + sdma_activity_work); + if (!workarea) + return; + + pdd = workarea->pdd; + if (!pdd) + return; + dqm = pdd->dev->dqm; + qpd = &pdd->qpd; + if (!dqm || !qpd) + return; + /* + * Total SDMA activity is current SDMA activity + past SDMA activity + * Past SDMA count is stored in pdd. + * To get the current activity counters for all active SDMA queues, + * we loop over all SDMA queues and get their counts from user-space. + * + * We cannot call get_user() with dqm_lock held as it can cause + * a circular lock dependency situation. To read the SDMA stats, + * we need to do the following: + * + * 1. Create a temporary list of SDMA queue nodes from the qpd->queues_list, + * with dqm_lock/dqm_unlock(). + * 2. Call get_user() for each node in temporary list without dqm_lock. + * Save the SDMA count for each node and also add the count to the total + * SDMA count counter. + * Its possible, during this step, a few SDMA queue nodes got deleted + * from the qpd->queues_list. + * 3. Do a second pass over qpd->queues_list to check if any nodes got deleted. + * If any node got deleted, its SDMA count would be captured in the sdma + * past activity counter. So subtract the SDMA counter stored in step 2 + * for this node from the total SDMA count. + */ + INIT_LIST_HEAD(&sdma_q_list.list); + + /* + * Create the temp list of all SDMA queues + */ + dqm_lock(dqm); + + list_for_each_entry(q, &qpd->queues_list, list) { + if ((q->properties.type != KFD_QUEUE_TYPE_SDMA) && + (q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI)) + continue; + + sdma_q = kzalloc(sizeof(struct temp_sdma_queue_list), GFP_KERNEL); + if (!sdma_q) { + dqm_unlock(dqm); + goto cleanup; + } + + INIT_LIST_HEAD(&sdma_q->list); + sdma_q->rptr = (uint64_t)q->properties.read_ptr; + sdma_q->queue_id = q->properties.queue_id; + list_add_tail(&sdma_q->list, &sdma_q_list.list); + } + + /* + * If the temp list is empty, then no SDMA queues nodes were found in + * qpd->queues_list. Return the past activity count as the total sdma + * count + */ + if (list_empty(&sdma_q_list.list)) { + workarea->sdma_activity_counter = pdd->sdma_past_activity_counter; + dqm_unlock(dqm); + return; + } + + dqm_unlock(dqm); + + /* + * Get the usage count for each SDMA queue in temp_list. + */ + mm = get_task_mm(pdd->process->lead_thread); + if (!mm) + goto cleanup; + + kthread_use_mm(mm); + + list_for_each_entry(sdma_q, &sdma_q_list.list, list) { + val = 0; + ret = read_sdma_queue_counter(sdma_q->rptr, &val); + if (ret) { + pr_debug("Failed to read SDMA queue active counter for queue id: %d", + sdma_q->queue_id); + } else { + sdma_q->sdma_val = val; + workarea->sdma_activity_counter += val; + } + } + + kthread_unuse_mm(mm); + mmput(mm); + + /* + * Do a second iteration over qpd_queues_list to check if any SDMA + * nodes got deleted while fetching SDMA counter. + */ + dqm_lock(dqm); + + workarea->sdma_activity_counter += pdd->sdma_past_activity_counter; + + list_for_each_entry(q, &qpd->queues_list, list) { + if (list_empty(&sdma_q_list.list)) + break; + + if ((q->properties.type != KFD_QUEUE_TYPE_SDMA) && + (q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI)) + continue; + + list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) { + if (((uint64_t)q->properties.read_ptr == sdma_q->rptr) && + (sdma_q->queue_id == q->properties.queue_id)) { + list_del(&sdma_q->list); + kfree(sdma_q); + break; + } + } + } + + dqm_unlock(dqm); + + /* + * If temp list is not empty, it implies some queues got deleted + * from qpd->queues_list during SDMA usage read. Subtract the SDMA + * count for each node from the total SDMA count. + */ + list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) { + workarea->sdma_activity_counter -= sdma_q->sdma_val; + list_del(&sdma_q->list); + kfree(sdma_q); + } + + return; + +cleanup: + list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) { + list_del(&sdma_q->list); + kfree(sdma_q); + } +} + static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr, char *buffer) { @@ -87,8 +260,24 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr, } else if (strncmp(attr->name, "vram_", 5) == 0) { struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device, attr_vram); - if (pdd) - return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage)); + return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage)); + } else if (strncmp(attr->name, "sdma_", 5) == 0) { + struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device, + attr_sdma); + struct kfd_sdma_activity_handler_workarea sdma_activity_work_handler; + + INIT_WORK(&sdma_activity_work_handler.sdma_activity_work, + kfd_sdma_activity_worker); + + sdma_activity_work_handler.pdd = pdd; + + schedule_work(&sdma_activity_work_handler.sdma_activity_work); + + flush_work(&sdma_activity_work_handler.sdma_activity_work); + + return snprintf(buffer, PAGE_SIZE, "%llu\n", + (sdma_activity_work_handler.sdma_activity_counter)/ + SDMA_ACTIVITY_DIVISOR); } else { pr_err("Invalid attribute"); return -EINVAL; @@ -210,7 +399,24 @@ int kfd_procfs_add_queue(struct queue *q) return 0; } -int kfd_procfs_add_vram_usage(struct kfd_process *p) +static int kfd_sysfs_create_file(struct kfd_process *p, struct attribute *attr, + char *name) +{ + int ret = 0; + + if (!p || !attr || !name) + return -EINVAL; + + attr->name = name; + attr->mode = KFD_SYSFS_FILE_MODE; + sysfs_attr_init(attr); + + ret = sysfs_create_file(p->kobj, attr); + + return ret; +} + +static int kfd_procfs_add_sysfs_files(struct kfd_process *p) { int ret = 0; struct kfd_process_device *pdd; @@ -221,17 +427,25 @@ int kfd_procfs_add_vram_usage(struct kfd_process *p) if (!p->kobj) return -EFAULT; - /* Create proc/<pid>/vram_<gpuid> file for each GPU */ + /* + * Create sysfs files for each GPU: + * - proc/<pid>/vram_<gpuid> + * - proc/<pid>/sdma_<gpuid> + */ list_for_each_entry(pdd, &p->per_device_data, per_device_list) { - snprintf(pdd->vram_filename, MAX_VRAM_FILENAME_LEN, "vram_%u", + snprintf(pdd->vram_filename, MAX_SYSFS_FILENAME_LEN, "vram_%u", pdd->dev->id); - pdd->attr_vram.name = pdd->vram_filename; - pdd->attr_vram.mode = KFD_SYSFS_FILE_MODE; - sysfs_attr_init(&pdd->attr_vram); - ret = sysfs_create_file(p->kobj, &pdd->attr_vram); + ret = kfd_sysfs_create_file(p, &pdd->attr_vram, pdd->vram_filename); if (ret) pr_warn("Creating vram usage for gpu id %d failed", (int)pdd->dev->id); + + snprintf(pdd->sdma_filename, MAX_SYSFS_FILENAME_LEN, "sdma_%u", + pdd->dev->id); + ret = kfd_sysfs_create_file(p, &pdd->attr_sdma, pdd->sdma_filename); + if (ret) + pr_warn("Creating sdma usage for gpu id %d failed", + (int)pdd->dev->id); } return ret; @@ -428,6 +642,7 @@ struct kfd_process *kfd_create_process(struct file *filep) (int)process->lead_thread->pid); if (ret) { pr_warn("Creating procfs pid directory failed"); + kobject_put(process->kobj); goto out; } @@ -444,9 +659,9 @@ struct kfd_process *kfd_create_process(struct file *filep) if (!process->kobj_queues) pr_warn("Creating KFD proc/queues folder failed"); - ret = kfd_procfs_add_vram_usage(process); + ret = kfd_procfs_add_sysfs_files(process); if (ret) - pr_warn("Creating vram usage file for pid %d failed", + pr_warn("Creating sysfs usage file for pid %d failed", (int)process->lead_thread->pid); } out: @@ -597,8 +812,10 @@ static void kfd_process_wq_release(struct work_struct *work) kobject_put(p->kobj_queues); p->kobj_queues = NULL; - list_for_each_entry(pdd, &p->per_device_data, per_device_list) + list_for_each_entry(pdd, &p->per_device_data, per_device_list) { sysfs_remove_file(p->kobj, &pdd->attr_vram); + sysfs_remove_file(p->kobj, &pdd->attr_sdma); + } kobject_del(p->kobj); kobject_put(p->kobj); @@ -906,6 +1123,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, pdd->already_dequeued = false; pdd->runtime_inuse = false; pdd->vram_usage = 0; + pdd->sdma_past_activity_counter = 0; list_add(&pdd->per_device_list, &p->per_device_data); /* Init idr used for memory handle translation */ @@ -1002,8 +1220,10 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, */ if (!pdd->runtime_inuse) { err = pm_runtime_get_sync(dev->ddev->dev); - if (err < 0) + if (err < 0) { + pm_runtime_put_autosuspend(dev->ddev->dev); return ERR_PTR(err); + } } err = kfd_iommu_bind_process_to_device(pdd); |