diff options
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 37 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process.c | 1 |
3 files changed, 39 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 95fc5668195c..6af6deeda523 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -2049,6 +2049,14 @@ static int criu_checkpoint(struct file *filep, goto exit_unlock; } + /* Confirm all process queues are evicted */ + if (!p->queues_paused) { + pr_err("Cannot dump process when queues are not in evicted state\n"); + /* CRIU plugin did not call op PROCESS_INFO before checkpointing */ + ret = -EINVAL; + goto exit_unlock; + } + criu_get_process_object_info(p, &num_bos, &priv_size); if (num_bos != args->num_bos || @@ -2388,7 +2396,24 @@ static int criu_unpause(struct file *filep, struct kfd_process *p, struct kfd_ioctl_criu_args *args) { - return 0; + int ret; + + mutex_lock(&p->mutex); + + if (!p->queues_paused) { + mutex_unlock(&p->mutex); + return -EINVAL; + } + + ret = kfd_process_restore_queues(p); + if (ret) + pr_err("Failed to unpause queues ret:%d\n", ret); + else + p->queues_paused = false; + + mutex_unlock(&p->mutex); + + return ret; } static int criu_resume(struct file *filep, @@ -2440,6 +2465,12 @@ static int criu_process_info(struct file *filep, goto err_unlock; } + ret = kfd_process_evict_queues(p); + if (ret) + goto err_unlock; + + p->queues_paused = true; + args->pid = task_pid_nr_ns(p->lead_thread, task_active_pid_ns(p->lead_thread)); @@ -2447,6 +2478,10 @@ static int criu_process_info(struct file *filep, dev_dbg(kfd_device, "Num of bos:%u\n", args->num_bos); err_unlock: + if (ret) { + kfd_process_restore_queues(p); + p->queues_paused = false; + } mutex_unlock(&p->mutex); return ret; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 9b347247055c..677f21447112 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -877,6 +877,8 @@ struct kfd_process { bool xnack_enabled; atomic_t poison; + /* Queues are in paused stated because we are in the process of doing a CRIU checkpoint */ + bool queues_paused; }; #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index b3198e186622..0649064b8e95 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1384,6 +1384,7 @@ static struct kfd_process *create_process(const struct task_struct *thread) process->mm = thread->mm; process->lead_thread = thread->group_leader; process->n_pdds = 0; + process->queues_paused = false; INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker); INIT_DELAYED_WORK(&process->restore_work, restore_process_worker); process->last_restore_timestamp = get_jiffies_64(); |