From c4381d0ee81930097e94e55d1c23f85798ffd093 Mon Sep 17 00:00:00 2001 From: Bokun Zhang Date: Wed, 12 Jan 2022 10:34:11 -0500 Subject: drm/amdgpu: Add interface to load SRIOV cap FW - Add interface to load SRIOV cap FW. If the FW does not exist, simply skip this FW loading routine. This FW will only be loaded under SRIOV. Other driver configuration will not be affected. By adding this interface, it will make us easier to prepare SRIOV Linux guest driver for different users. - Update sysfs interface to read cap FW version. - Refactor PSP FW loading routine under SRIOV to use a unified SWITCH statement instead of using IF statement - Remove redundant amdgpu_sriov_vf() check in FW loading routine Acked-by: Monk Liu Acked-by: Guchun Chen Signed-off-by: Bokun Zhang Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 0b94ec7b73e7..be4f9111f478 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -728,6 +728,8 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_FW_DMCUB 0x14 /* Subquery id: Query TOC firmware version */ #define AMDGPU_INFO_FW_TOC 0x15 + /* Subquery id: Query CAP firmware version */ + #define AMDGPU_INFO_FW_CAP 0x16 /* number of bytes moved for TTM migration */ #define AMDGPU_INFO_NUM_BYTES_MOVED 0x0f -- cgit v1.2.3-59-g8ed1b From 8cda7a4f96e435be2fd074009d69521d973d7d31 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 7 Jan 2022 17:57:33 -0500 Subject: drm/amdgpu/UAPI: add new CTX OP to get/set stable pstates Add a new CTX ioctl operation to set stable pstates for profiling. When creating traces for tools like RGP or using SPM or doing performance profiling, it's required to enable a special stable profiling power state on the GPU. These profiling states set fixed clocks and disable certain other power features like powergating which may impact the results. Historically, these profiling pstates were enabled via sysfs, but this adds an interface to enable it via the CTX ioctl from the application. Since the power state is global only one application can set it at a time, so if multiple applications try and use it only the first will get it, the ioctl will return -EBUSY for others. The sysfs interface will override whatever has been set by this interface. Mesa MR: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/207 v2: don't default r = 0; v3: rebase on Evan's PM cleanup Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 129 ++++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + drivers/gpu/drm/amd/pm/amdgpu_pm.c | 5 ++ drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 3 + include/uapi/drm/amdgpu_drm.h | 17 +++- 6 files changed, 153 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 468003583b2a..2c929fa40379 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -237,6 +237,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter); ctx->init_priority = priority; ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; + ctx->stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE; return 0; } @@ -255,6 +256,86 @@ static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) kfree(entity); } +static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, + u32 *stable_pstate) +{ + struct amdgpu_device *adev = ctx->adev; + enum amd_dpm_forced_level current_level; + + if (!ctx) + return -EINVAL; + + current_level = amdgpu_dpm_get_performance_level(adev); + + switch (current_level) { + case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: + *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_STANDARD; + break; + case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: + *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK; + break; + case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK: + *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK; + break; + case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: + *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_PEAK; + break; + default: + *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE; + break; + } + return 0; +} + +static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, + u32 stable_pstate) +{ + struct amdgpu_device *adev = ctx->adev; + enum amd_dpm_forced_level level; + int r; + + if (!ctx) + return -EINVAL; + + mutex_lock(&adev->pm.stable_pstate_ctx_lock); + if (adev->pm.stable_pstate_ctx && adev->pm.stable_pstate_ctx != ctx) { + r = -EBUSY; + goto done; + } + + switch (stable_pstate) { + case AMDGPU_CTX_STABLE_PSTATE_NONE: + level = AMD_DPM_FORCED_LEVEL_AUTO; + break; + case AMDGPU_CTX_STABLE_PSTATE_STANDARD: + level = AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD; + break; + case AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK: + level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK; + break; + case AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK: + level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK; + break; + case AMDGPU_CTX_STABLE_PSTATE_PEAK: + level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK; + break; + default: + r = -EINVAL; + goto done; + } + + r = amdgpu_dpm_force_performance_level(adev, level); + + if (level == AMD_DPM_FORCED_LEVEL_AUTO) + adev->pm.stable_pstate_ctx = NULL; + else + adev->pm.stable_pstate_ctx = ctx; +done: + mutex_unlock(&adev->pm.stable_pstate_ctx_lock); + + return r; +} + static void amdgpu_ctx_fini(struct kref *ref) { struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); @@ -270,7 +351,7 @@ static void amdgpu_ctx_fini(struct kref *ref) ctx->entities[i][j] = NULL; } } - + amdgpu_ctx_set_stable_pstate(ctx, AMDGPU_CTX_STABLE_PSTATE_NONE); mutex_destroy(&ctx->lock); kfree(ctx); } @@ -467,11 +548,41 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev, return 0; } + + +static int amdgpu_ctx_stable_pstate(struct amdgpu_device *adev, + struct amdgpu_fpriv *fpriv, uint32_t id, + bool set, u32 *stable_pstate) +{ + struct amdgpu_ctx *ctx; + struct amdgpu_ctx_mgr *mgr; + int r; + + if (!fpriv) + return -EINVAL; + + mgr = &fpriv->ctx_mgr; + mutex_lock(&mgr->lock); + ctx = idr_find(&mgr->ctx_handles, id); + if (!ctx) { + mutex_unlock(&mgr->lock); + return -EINVAL; + } + + if (set) + r = amdgpu_ctx_set_stable_pstate(ctx, *stable_pstate); + else + r = amdgpu_ctx_get_stable_pstate(ctx, stable_pstate); + + mutex_unlock(&mgr->lock); + return r; +} + int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { int r; - uint32_t id; + uint32_t id, stable_pstate; int32_t priority; union drm_amdgpu_ctx *args = data; @@ -500,6 +611,20 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, case AMDGPU_CTX_OP_QUERY_STATE2: r = amdgpu_ctx_query2(adev, fpriv, id, &args->out); break; + case AMDGPU_CTX_OP_GET_STABLE_PSTATE: + if (args->in.flags) + return -EINVAL; + r = amdgpu_ctx_stable_pstate(adev, fpriv, id, false, &stable_pstate); + args->out.pstate.flags = stable_pstate; + break; + case AMDGPU_CTX_OP_SET_STABLE_PSTATE: + if (args->in.flags & ~AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK) + return -EINVAL; + stable_pstate = args->in.flags & AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK; + if (stable_pstate > AMDGPU_CTX_STABLE_PSTATE_PEAK) + return -EINVAL; + r = amdgpu_ctx_stable_pstate(adev, fpriv, id, true, &stable_pstate); + break; default: return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index a44b8b8ed39c..142f2f87d44c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -53,6 +53,7 @@ struct amdgpu_ctx { atomic_t guilty; unsigned long ras_counter_ce; unsigned long ras_counter_ue; + uint32_t stable_pstate; }; struct amdgpu_ctx_mgr { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6ba57ad88640..660fb4085c97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3510,6 +3510,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, init_rwsem(&adev->reset_sem); mutex_init(&adev->psp.mutex); mutex_init(&adev->notifier_lock); + mutex_init(&adev->pm.stable_pstate_ctx_lock); amdgpu_device_init_apu_flags(adev); diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 837a31a46596..d68e7132da2c 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -336,11 +336,16 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev, return ret; } + mutex_lock(&adev->pm.stable_pstate_ctx_lock); if (amdgpu_dpm_force_performance_level(adev, level)) { pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); + mutex_unlock(&adev->pm.stable_pstate_ctx_lock); return -EINVAL; } + /* override whatever a user ctx may have set */ + adev->pm.stable_pstate_ctx = NULL; + mutex_unlock(&adev->pm.stable_pstate_ctx_lock); pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index 2766b88ecf96..5cc05110cdae 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -338,6 +338,9 @@ struct amdgpu_pm { uint32_t smu_debug_mask; bool pp_force_state_enabled; + + struct mutex stable_pstate_ctx_lock; + struct amdgpu_ctx *stable_pstate_ctx; }; u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev); diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index be4f9111f478..76b580d10a52 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -206,6 +206,8 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_OP_FREE_CTX 2 #define AMDGPU_CTX_OP_QUERY_STATE 3 #define AMDGPU_CTX_OP_QUERY_STATE2 4 +#define AMDGPU_CTX_OP_GET_STABLE_PSTATE 5 +#define AMDGPU_CTX_OP_SET_STABLE_PSTATE 6 /* GPU reset status */ #define AMDGPU_CTX_NO_RESET 0 @@ -238,10 +240,18 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_PRIORITY_HIGH 512 #define AMDGPU_CTX_PRIORITY_VERY_HIGH 1023 +/* select a stable profiling pstate for perfmon tools */ +#define AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK 0xf +#define AMDGPU_CTX_STABLE_PSTATE_NONE 0 +#define AMDGPU_CTX_STABLE_PSTATE_STANDARD 1 +#define AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK 2 +#define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK 3 +#define AMDGPU_CTX_STABLE_PSTATE_PEAK 4 + struct drm_amdgpu_ctx_in { /** AMDGPU_CTX_OP_* */ __u32 op; - /** For future use, no flags defined so far */ + /** Flags */ __u32 flags; __u32 ctx_id; /** AMDGPU_CTX_PRIORITY_* */ @@ -262,6 +272,11 @@ union drm_amdgpu_ctx_out { /** Reset status since the last call of the ioctl. */ __u32 reset_status; } state; + + struct { + __u32 flags; + __u32 _pad; + } pstate; }; union drm_amdgpu_ctx { -- cgit v1.2.3-59-g8ed1b From 3698807094ecae945436921325f5c309d1123f11 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Tue, 24 Aug 2021 16:13:41 -0400 Subject: drm/amdkfd: CRIU Introduce Checkpoint-Restore APIs Checkpoint-Restore in userspace (CRIU) is a powerful tool that can snapshot a running process and later restore it on same or a remote machine but expects the processes that have a device file (e.g. GPU) associated with them, provide necessary driver support to assist CRIU and its extensible plugin interface. Thus, In order to support the Checkpoint-Restore of any ROCm process, the AMD Radeon Open Compute Kernel driver, needs to provide a set of new APIs that provide necessary VRAM metadata and its contents to a userspace component (CRIU plugin) that can store it in form of image files. This introduces some new ioctls which will be used to checkpoint-Restore any KFD bound user process. KFD only allows ioctl calls from the same process that opened the KFD file descriptor. Since these ioctls are expected to be called from a KFD criu plugin which has elevated ptrace attached privileges and CAP_CHECKPOINT_RESTORE capabilities attached with the file descriptors so modify KFD to allow such calls. (API redesigned by David Yat Sin) Suggested-by: Felix Kuehling Reviewed-by: Felix Kuehling Signed-off-by: David Yat Sin Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 98 +++++++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 65 ++++++++++++++++++++- include/uapi/linux/kfd_ioctl.h | 81 +++++++++++++++++++++++++- 3 files changed, 241 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 214a2c67fba4..90e6d9e335a5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include "kfd_priv.h" @@ -1859,6 +1860,75 @@ static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) } #endif +static int criu_checkpoint(struct file *filep, + struct kfd_process *p, + struct kfd_ioctl_criu_args *args) +{ + return 0; +} + +static int criu_restore(struct file *filep, + struct kfd_process *p, + struct kfd_ioctl_criu_args *args) +{ + return 0; +} + +static int criu_unpause(struct file *filep, + struct kfd_process *p, + struct kfd_ioctl_criu_args *args) +{ + return 0; +} + +static int criu_resume(struct file *filep, + struct kfd_process *p, + struct kfd_ioctl_criu_args *args) +{ + return 0; +} + +static int criu_process_info(struct file *filep, + struct kfd_process *p, + struct kfd_ioctl_criu_args *args) +{ + return 0; +} + +static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data) +{ + struct kfd_ioctl_criu_args *args = data; + int ret; + + dev_dbg(kfd_device, "CRIU operation: %d\n", args->op); + switch (args->op) { + case KFD_CRIU_OP_PROCESS_INFO: + ret = criu_process_info(filep, p, args); + break; + case KFD_CRIU_OP_CHECKPOINT: + ret = criu_checkpoint(filep, p, args); + break; + case KFD_CRIU_OP_UNPAUSE: + ret = criu_unpause(filep, p, args); + break; + case KFD_CRIU_OP_RESTORE: + ret = criu_restore(filep, p, args); + break; + case KFD_CRIU_OP_RESUME: + ret = criu_resume(filep, p, args); + break; + default: + dev_dbg(kfd_device, "Unsupported CRIU operation:%d\n", args->op); + ret = -EINVAL; + break; + } + + if (ret) + dev_dbg(kfd_device, "CRIU operation:%d err:%d\n", args->op, ret); + + return ret; +} + #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ .cmd_drv = 0, .name = #ioctl} @@ -1962,6 +2032,10 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE, kfd_ioctl_set_xnack_mode, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP, + kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE), + }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) @@ -1976,6 +2050,7 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) char *kdata = NULL; unsigned int usize, asize; int retcode = -EINVAL; + bool ptrace_attached = false; if (nr >= AMDKFD_CORE_IOCTL_COUNT) goto err_i1; @@ -2001,7 +2076,15 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) * processes need to create their own KFD device context. */ process = filep->private_data; - if (process->lead_thread != current->group_leader) { + + rcu_read_lock(); + if ((ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE) && + ptrace_parent(process->lead_thread) == current) + ptrace_attached = true; + rcu_read_unlock(); + + if (process->lead_thread != current->group_leader + && !ptrace_attached) { dev_dbg(kfd_device, "Using KFD FD in wrong process\n"); retcode = -EBADF; goto err_i1; @@ -2016,6 +2099,19 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) goto err_i1; } + /* + * Versions of docker shipped in Ubuntu 18.xx and 20.xx do not support + * CAP_CHECKPOINT_RESTORE, so we also allow access if CAP_SYS_ADMIN as CAP_SYS_ADMIN is a + * more priviledged access. + */ + if (unlikely(ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE)) { + if (!capable(CAP_CHECKPOINT_RESTORE) && + !capable(CAP_SYS_ADMIN)) { + retcode = -EACCES; + goto err_i1; + } + } + if (cmd & (IOC_IN | IOC_OUT)) { if (asize <= sizeof(stack_kdata)) { kdata = stack_kdata; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index ea68f3b3a4e9..f928878196ef 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -121,7 +121,26 @@ */ #define KFD_QUEUE_DOORBELL_MIRROR_OFFSET 512 - +/** + * enum kfd_ioctl_flags - KFD ioctl flags + * Various flags that can be set in &amdkfd_ioctl_desc.flags to control how + * userspace can use a given ioctl. + */ +enum kfd_ioctl_flags { + /* + * @KFD_IOC_FLAG_CHECKPOINT_RESTORE: + * Certain KFD ioctls such as AMDKFD_IOC_CRIU_OP can potentially + * perform privileged operations and load arbitrary data into MQDs and + * eventually HQD registers when the queue is mapped by HWS. In order to + * prevent this we should perform additional security checks. + * + * This is equivalent to callers with the CHECKPOINT_RESTORE capability. + * + * Note: Since earlier versions of docker do not support CHECKPOINT_RESTORE, + * we also allow ioctls with SYS_ADMIN capability. + */ + KFD_IOC_FLAG_CHECKPOINT_RESTORE = BIT(0), +}; /* * Kernel module parameter to specify maximum number of supported queues per * device @@ -1006,6 +1025,50 @@ void kfd_process_set_trap_handler(struct qcm_process_device *qpd, uint64_t tba_addr, uint64_t tma_addr); +/* CRIU */ +/* + * Need to increment KFD_CRIU_PRIV_VERSION each time a change is made to any of the CRIU private + * structures: + * kfd_criu_process_priv_data + * kfd_criu_device_priv_data + * kfd_criu_bo_priv_data + * kfd_criu_queue_priv_data + * kfd_criu_event_priv_data + * kfd_criu_svm_range_priv_data + */ + +#define KFD_CRIU_PRIV_VERSION 1 + +struct kfd_criu_process_priv_data { + uint32_t version; +}; + +struct kfd_criu_device_priv_data { + /* For future use */ + uint64_t reserved; +}; + +struct kfd_criu_bo_priv_data { + uint64_t reserved; +}; + +struct kfd_criu_svm_range_priv_data { + uint32_t object_type; + uint32_t reserved; +}; + +struct kfd_criu_queue_priv_data { + uint32_t object_type; + uint32_t reserved; +}; + +struct kfd_criu_event_priv_data { + uint32_t object_type; + uint32_t reserved; +}; + +/* CRIU - End */ + /* Queue Context Management */ int init_queue(struct queue **q, const struct queue_properties *properties); void uninit_queue(struct queue *q); diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index af96af174dc4..49429a6c42fc 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -468,6 +468,82 @@ struct kfd_ioctl_smi_events_args { __u32 anon_fd; /* from KFD */ }; +/************************************************************************************************** + * CRIU IOCTLs (Checkpoint Restore In Userspace) + * + * When checkpointing a process, the userspace application will perform: + * 1. PROCESS_INFO op to determine current process information. This pauses execution and evicts + * all the queues. + * 2. CHECKPOINT op to checkpoint process contents (BOs, queues, events, svm-ranges) + * 3. UNPAUSE op to un-evict all the queues + * + * When restoring a process, the CRIU userspace application will perform: + * + * 1. RESTORE op to restore process contents + * 2. RESUME op to start the process + * + * Note: Queues are forced into an evicted state after a successful PROCESS_INFO. User + * application needs to perform an UNPAUSE operation after calling PROCESS_INFO. + */ + +enum kfd_criu_op { + KFD_CRIU_OP_PROCESS_INFO, + KFD_CRIU_OP_CHECKPOINT, + KFD_CRIU_OP_UNPAUSE, + KFD_CRIU_OP_RESTORE, + KFD_CRIU_OP_RESUME, +}; + +/** + * kfd_ioctl_criu_args - Arguments perform CRIU operation + * @devices: [in/out] User pointer to memory location for devices information. + * This is an array of type kfd_criu_device_bucket. + * @bos: [in/out] User pointer to memory location for BOs information + * This is an array of type kfd_criu_bo_bucket. + * @priv_data: [in/out] User pointer to memory location for private data + * @priv_data_size: [in/out] Size of priv_data in bytes + * @num_devices: [in/out] Number of GPUs used by process. Size of @devices array. + * @num_bos [in/out] Number of BOs used by process. Size of @bos array. + * @num_objects: [in/out] Number of objects used by process. Objects are opaque to + * user application. + * @pid: [in/out] PID of the process being checkpointed + * @op [in] Type of operation (kfd_criu_op) + * + * Return: 0 on success, -errno on failure + */ +struct kfd_ioctl_criu_args { + __u64 devices; /* Used during ops: CHECKPOINT, RESTORE */ + __u64 bos; /* Used during ops: CHECKPOINT, RESTORE */ + __u64 priv_data; /* Used during ops: CHECKPOINT, RESTORE */ + __u64 priv_data_size; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 num_devices; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 num_bos; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 num_objects; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 pid; /* Used during ops: PROCESS_INFO, RESUME */ + __u32 op; +}; + +struct kfd_criu_device_bucket { + __u32 user_gpu_id; + __u32 actual_gpu_id; + __u32 drm_fd; + __u32 pad; +}; + +struct kfd_criu_bo_bucket { + __u64 addr; + __u64 size; + __u64 offset; + __u64 restored_offset; /* During restore, updated offset for BO */ + __u32 gpu_id; /* This is the user_gpu_id */ + __u32 alloc_flags; + __u32 dmabuf_fd; + __u32 pad; +}; + +/* CRIU IOCTLs - END */ +/**************************************************************************************************/ + /* Register offset inside the remapped mmio page */ enum kfd_mmio_remap { @@ -742,7 +818,10 @@ struct kfd_ioctl_set_xnack_mode_args { #define AMDKFD_IOC_SET_XNACK_MODE \ AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args) +#define AMDKFD_IOC_CRIU_OP \ + AMDKFD_IOWR(0x22, struct kfd_ioctl_criu_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x22 +#define AMDKFD_COMMAND_END 0x23 #endif -- cgit v1.2.3-59-g8ed1b From 692996f2bef7aa1737e07554255ba0d9a73fb750 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Tue, 18 Jan 2022 01:47:58 -0500 Subject: drm/amdkfd: Bump up KFD API version for CRIU - Change KFD minor version to 7 for CRIU Proposed userspace changes: https://github.com/RadeonOpenCompute/criu Reviewed-by: Felix Kuehling Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 49429a6c42fc..e6a56c146920 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -32,9 +32,10 @@ * - 1.4 - Indicate new SRAM EDC bit in device properties * - 1.5 - Add SVM API * - 1.6 - Query clear flags in SVM get_attr API + * - 1.7 - Checkpoint Restore (CRIU) API */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 6 +#define KFD_IOCTL_MINOR_VERSION 7 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ -- cgit v1.2.3-59-g8ed1b From 5bdd3eb253544b1e80f904e1205699d0a126d2d6 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Fri, 4 Feb 2022 11:58:32 -0500 Subject: drm/amdkfd: Remove unused old debugger implementation Cleanup the kfd code by removing the unused old debugger implementation. The address watch was only ever implemented in the upstream driver for GFXv7 (Kaveri). The user mode tools runtime using this API was never open-sourced. Work on the old debugger prototype that used this API has been discontinued years ago. Only a small piece of resetting wavefronts is kept and is moved to kfd_device_queue_manager.c. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 3 - .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 3 - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 24 - .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c | 25 - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 96 --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 24 - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 24 - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 10 - drivers/gpu/drm/amd/amdkfd/Makefile | 2 - drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 290 +------ drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 845 --------------------- drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h | 230 ------ drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c | 158 ---- drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h | 293 ------- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 2 - .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 59 ++ .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 35 + drivers/gpu/drm/amd/amdkfd/kfd_iommu.c | 12 - drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 - drivers/gpu/drm/amd/amdkfd/kfd_process.c | 19 - drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 9 - include/uapi/linux/kfd_ioctl.h | 8 +- 22 files changed, 106 insertions(+), 2070 deletions(-) delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c index 46cd4ee6bafb..c8935d718207 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c @@ -37,10 +37,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = { .hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied, .hqd_destroy = kgd_gfx_v9_hqd_destroy, .hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy, - .address_watch_disable = kgd_gfx_v9_address_watch_disable, - .address_watch_execute = kgd_gfx_v9_address_watch_execute, .wave_control_execute = kgd_gfx_v9_wave_control_execute, - .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, .get_atc_vmid_pasid_mapping_info = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index abe93b3ff765..4191af5a3f13 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -289,10 +289,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { .hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied, .hqd_destroy = kgd_gfx_v9_hqd_destroy, .hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy, - .address_watch_disable = kgd_gfx_v9_address_watch_disable, - .address_watch_execute = kgd_gfx_v9_address_watch_execute, .wave_control_execute = kgd_gfx_v9_wave_control_execute, - .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, .get_atc_vmid_pasid_mapping_info = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 7b7f4b2764c1..9378fc79e9ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -671,20 +671,6 @@ static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -static int kgd_address_watch_disable(struct amdgpu_device *adev) -{ - return 0; -} - -static int kgd_address_watch_execute(struct amdgpu_device *adev, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo) -{ - return 0; -} - static int kgd_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd) @@ -709,13 +695,6 @@ static int kgd_wave_control_execute(struct amdgpu_device *adev, return 0; } -static uint32_t kgd_address_watch_get_offset(struct amdgpu_device *adev, - unsigned int watch_point_id, - unsigned int reg_offset) -{ - return 0; -} - static void set_vm_context_page_table_base(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { @@ -767,10 +746,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, .hqd_destroy = kgd_hqd_destroy, .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = set_vm_context_page_table_base, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c index 1f37d3574001..e9c80ce13f3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c @@ -582,21 +582,6 @@ static int hqd_sdma_destroy_v10_3(struct amdgpu_device *adev, void *mqd, return 0; } - -static int address_watch_disable_v10_3(struct amdgpu_device *adev) -{ - return 0; -} - -static int address_watch_execute_v10_3(struct amdgpu_device *adev, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo) -{ - return 0; -} - static int wave_control_execute_v10_3(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd) @@ -621,13 +606,6 @@ static int wave_control_execute_v10_3(struct amdgpu_device *adev, return 0; } -static uint32_t address_watch_get_offset_v10_3(struct amdgpu_device *adev, - unsigned int watch_point_id, - unsigned int reg_offset) -{ - return 0; -} - static void set_vm_context_page_table_base_v10_3(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { @@ -809,10 +787,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { .hqd_sdma_is_occupied = hqd_sdma_is_occupied_v10_3, .hqd_destroy = hqd_destroy_v10_3, .hqd_sdma_destroy = hqd_sdma_destroy_v10_3, - .address_watch_disable = address_watch_disable_v10_3, - .address_watch_execute = address_watch_execute_v10_3, .wave_control_execute = wave_control_execute_v10_3, - .address_watch_get_offset = address_watch_get_offset_v10_3, .get_atc_vmid_pasid_mapping_info = NULL, .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3, .program_trap_handler_settings = program_trap_handler_settings_v10_3, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 36528dad7684..65552bb7d2f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -45,43 +45,6 @@ enum { MAX_WATCH_ADDRESSES = 4 }; -enum { - ADDRESS_WATCH_REG_ADDR_HI = 0, - ADDRESS_WATCH_REG_ADDR_LO, - ADDRESS_WATCH_REG_CNTL, - ADDRESS_WATCH_REG_MAX -}; - -/* not defined in the CI/KV reg file */ -enum { - ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL, - ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF, - ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000, - /* extend the mask to 26 bits to match the low address field */ - ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6, - ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF -}; - -static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = { - mmTCP_WATCH0_ADDR_H, mmTCP_WATCH0_ADDR_L, mmTCP_WATCH0_CNTL, - mmTCP_WATCH1_ADDR_H, mmTCP_WATCH1_ADDR_L, mmTCP_WATCH1_CNTL, - mmTCP_WATCH2_ADDR_H, mmTCP_WATCH2_ADDR_L, mmTCP_WATCH2_CNTL, - mmTCP_WATCH3_ADDR_H, mmTCP_WATCH3_ADDR_L, mmTCP_WATCH3_CNTL -}; - -union TCP_WATCH_CNTL_BITS { - struct { - uint32_t mask:24; - uint32_t vmid:4; - uint32_t atc:1; - uint32_t mode:2; - uint32_t valid:1; - } bitfields, bits; - uint32_t u32All; - signed int i32All; - float f32All; -}; - static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe, uint32_t queue, uint32_t vmid) { @@ -529,55 +492,6 @@ static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd, return 0; } -static int kgd_address_watch_disable(struct amdgpu_device *adev) -{ - union TCP_WATCH_CNTL_BITS cntl; - unsigned int i; - - cntl.u32All = 0; - - cntl.bitfields.valid = 0; - cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; - cntl.bitfields.atc = 1; - - /* Turning off this address until we set all the registers */ - for (i = 0; i < MAX_WATCH_ADDRESSES; i++) - WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_CNTL], cntl.u32All); - - return 0; -} - -static int kgd_address_watch_execute(struct amdgpu_device *adev, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo) -{ - union TCP_WATCH_CNTL_BITS cntl; - - cntl.u32All = cntl_val; - - /* Turning off this watch point until we set all the registers */ - cntl.bitfields.valid = 0; - WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_CNTL], cntl.u32All); - - WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_ADDR_HI], addr_hi); - - WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_ADDR_LO], addr_lo); - - /* Enable the watch point */ - cntl.bitfields.valid = 1; - - WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_CNTL], cntl.u32All); - - return 0; -} - static int kgd_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd) @@ -602,13 +516,6 @@ static int kgd_wave_control_execute(struct amdgpu_device *adev, return 0; } -static uint32_t kgd_address_watch_get_offset(struct amdgpu_device *adev, - unsigned int watch_point_id, - unsigned int reg_offset) -{ - return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]; -} - static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid) { @@ -665,10 +572,7 @@ const struct kfd2kgd_calls gfx_v7_kfd2kgd = { .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, .hqd_destroy = kgd_hqd_destroy, .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info, .set_scratch_backing_va = set_scratch_backing_va, .set_vm_context_page_table_base = set_vm_context_page_table_base, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 52832cd69a93..9dc5f2a0cc07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -538,20 +538,6 @@ static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -static int kgd_address_watch_disable(struct amdgpu_device *adev) -{ - return 0; -} - -static int kgd_address_watch_execute(struct amdgpu_device *adev, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo) -{ - return 0; -} - static int kgd_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd) @@ -576,13 +562,6 @@ static int kgd_wave_control_execute(struct amdgpu_device *adev, return 0; } -static uint32_t kgd_address_watch_get_offset(struct amdgpu_device *adev, - unsigned int watch_point_id, - unsigned int reg_offset) -{ - return 0; -} - static void set_scratch_backing_va(struct amdgpu_device *adev, uint64_t va, uint32_t vmid) { @@ -614,10 +593,7 @@ const struct kfd2kgd_calls gfx_v8_kfd2kgd = { .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, .hqd_destroy = kgd_hqd_destroy, .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info, .set_scratch_backing_va = set_scratch_backing_va, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 1abf662a0e91..53895a41932e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -622,20 +622,6 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -int kgd_gfx_v9_address_watch_disable(struct amdgpu_device *adev) -{ - return 0; -} - -int kgd_gfx_v9_address_watch_execute(struct amdgpu_device *adev, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo) -{ - return 0; -} - int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd) @@ -660,13 +646,6 @@ int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev, return 0; } -uint32_t kgd_gfx_v9_address_watch_get_offset(struct amdgpu_device *adev, - unsigned int watch_point_id, - unsigned int reg_offset) -{ - return 0; -} - void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { @@ -888,10 +867,7 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, .hqd_destroy = kgd_gfx_v9_hqd_destroy, .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_gfx_v9_address_watch_disable, - .address_watch_execute = kgd_gfx_v9_address_watch_execute, .wave_control_execute = kgd_gfx_v9_wave_control_execute, - .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, .get_atc_vmid_pasid_mapping_info = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index 24be49df26fd..c7ed3bc9053c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -46,19 +46,9 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id); -int kgd_gfx_v9_address_watch_disable(struct amdgpu_device *adev); -int kgd_gfx_v9_address_watch_execute(struct amdgpu_device *adev, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo); int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd); -uint32_t kgd_gfx_v9_address_watch_get_offset(struct amdgpu_device *adev, - unsigned int watch_point_id, - unsigned int reg_offset); - bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid); diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index c4f3aff11072..19cfbf9577b4 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -51,8 +51,6 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ $(AMDKFD_PATH)/kfd_events.o \ $(AMDKFD_PATH)/cik_event_interrupt.o \ $(AMDKFD_PATH)/kfd_int_process_v9.o \ - $(AMDKFD_PATH)/kfd_dbgdev.o \ - $(AMDKFD_PATH)/kfd_dbgmgr.o \ $(AMDKFD_PATH)/kfd_smi_events.o \ $(AMDKFD_PATH)/kfd_crat.o diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 64e3b4e3a712..13e46631e289 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -39,7 +39,6 @@ #include #include "kfd_priv.h" #include "kfd_device_queue_manager.h" -#include "kfd_dbgmgr.h" #include "kfd_svm.h" #include "amdgpu_amdkfd.h" #include "kfd_smi_events.h" @@ -580,299 +579,26 @@ err_pdd: static int kfd_ioctl_dbg_register(struct file *filep, struct kfd_process *p, void *data) { - struct kfd_ioctl_dbg_register_args *args = data; - struct kfd_dev *dev; - struct kfd_dbgmgr *dbgmgr_ptr; - struct kfd_process_device *pdd; - bool create_ok; - long status = 0; - - mutex_lock(&p->mutex); - pdd = kfd_process_device_data_by_id(p, args->gpu_id); - if (!pdd) { - status = -EINVAL; - goto err_pdd; - } - dev = pdd->dev; - - if (dev->adev->asic_type == CHIP_CARRIZO) { - pr_debug("kfd_ioctl_dbg_register not supported on CZ\n"); - status = -EINVAL; - goto err_chip_unsupp; - } - - mutex_lock(kfd_get_dbgmgr_mutex()); - - /* - * make sure that we have pdd, if this the first queue created for - * this process - */ - pdd = kfd_bind_process_to_device(dev, p); - if (IS_ERR(pdd)) { - status = PTR_ERR(pdd); - goto out; - } - - if (!dev->dbgmgr) { - /* In case of a legal call, we have no dbgmgr yet */ - create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev); - if (create_ok) { - status = kfd_dbgmgr_register(dbgmgr_ptr, p); - if (status != 0) - kfd_dbgmgr_destroy(dbgmgr_ptr); - else - dev->dbgmgr = dbgmgr_ptr; - } - } else { - pr_debug("debugger already registered\n"); - status = -EINVAL; - } - -out: - mutex_unlock(kfd_get_dbgmgr_mutex()); -err_pdd: -err_chip_unsupp: - mutex_unlock(&p->mutex); - - return status; + return -EPERM; } static int kfd_ioctl_dbg_unregister(struct file *filep, struct kfd_process *p, void *data) { - struct kfd_ioctl_dbg_unregister_args *args = data; - struct kfd_process_device *pdd; - struct kfd_dev *dev; - long status; - - mutex_lock(&p->mutex); - pdd = kfd_process_device_data_by_id(p, args->gpu_id); - mutex_unlock(&p->mutex); - if (!pdd || !pdd->dev->dbgmgr) - return -EINVAL; - - dev = pdd->dev; - - if (dev->adev->asic_type == CHIP_CARRIZO) { - pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n"); - return -EINVAL; - } - - mutex_lock(kfd_get_dbgmgr_mutex()); - - status = kfd_dbgmgr_unregister(dev->dbgmgr, p); - if (!status) { - kfd_dbgmgr_destroy(dev->dbgmgr); - dev->dbgmgr = NULL; - } - - mutex_unlock(kfd_get_dbgmgr_mutex()); - - return status; + return -EPERM; } -/* - * Parse and generate variable size data structure for address watch. - * Total size of the buffer and # watch points is limited in order - * to prevent kernel abuse. (no bearing to the much smaller HW limitation - * which is enforced by dbgdev module) - * please also note that the watch address itself are not "copied from user", - * since it be set into the HW in user mode values. - * - */ static int kfd_ioctl_dbg_address_watch(struct file *filep, struct kfd_process *p, void *data) { - struct kfd_ioctl_dbg_address_watch_args *args = data; - struct kfd_dev *dev; - struct kfd_process_device *pdd; - struct dbg_address_watch_info aw_info; - unsigned char *args_buff; - long status; - void __user *cmd_from_user; - uint64_t watch_mask_value = 0; - unsigned int args_idx = 0; - - memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info)); - - mutex_lock(&p->mutex); - pdd = kfd_process_device_data_by_id(p, args->gpu_id); - mutex_unlock(&p->mutex); - if (!pdd) { - pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); - return -EINVAL; - } - dev = pdd->dev; - - if (dev->adev->asic_type == CHIP_CARRIZO) { - pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); - return -EINVAL; - } - cmd_from_user = (void __user *) args->content_ptr; - - /* Validate arguments */ - - if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) || - (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) || - (cmd_from_user == NULL)) - return -EINVAL; - - /* this is the actual buffer to work with */ - args_buff = memdup_user(cmd_from_user, - args->buf_size_in_bytes - sizeof(*args)); - if (IS_ERR(args_buff)) - return PTR_ERR(args_buff); - - aw_info.process = p; - - aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx])); - args_idx += sizeof(aw_info.num_watch_points); - - aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx]; - args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points; - - /* - * set watch address base pointer to point on the array base - * within args_buff - */ - aw_info.watch_address = (uint64_t *) &args_buff[args_idx]; - - /* skip over the addresses buffer */ - args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points; - - if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) { - status = -EINVAL; - goto out; - } - - watch_mask_value = (uint64_t) args_buff[args_idx]; - - if (watch_mask_value > 0) { - /* - * There is an array of masks. - * set watch mask base pointer to point on the array base - * within args_buff - */ - aw_info.watch_mask = (uint64_t *) &args_buff[args_idx]; - - /* skip over the masks buffer */ - args_idx += sizeof(aw_info.watch_mask) * - aw_info.num_watch_points; - } else { - /* just the NULL mask, set to NULL and skip over it */ - aw_info.watch_mask = NULL; - args_idx += sizeof(aw_info.watch_mask); - } - - if (args_idx >= args->buf_size_in_bytes - sizeof(args)) { - status = -EINVAL; - goto out; - } - - /* Currently HSA Event is not supported for DBG */ - aw_info.watch_event = NULL; - - mutex_lock(kfd_get_dbgmgr_mutex()); - - status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info); - - mutex_unlock(kfd_get_dbgmgr_mutex()); - -out: - kfree(args_buff); - - return status; + return -EPERM; } /* Parse and generate fixed size data structure for wave control */ static int kfd_ioctl_dbg_wave_control(struct file *filep, struct kfd_process *p, void *data) { - struct kfd_ioctl_dbg_wave_control_args *args = data; - struct kfd_dev *dev; - struct kfd_process_device *pdd; - struct dbg_wave_control_info wac_info; - unsigned char *args_buff; - uint32_t computed_buff_size; - long status; - void __user *cmd_from_user; - unsigned int args_idx = 0; - - memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info)); - - /* we use compact form, independent of the packing attribute value */ - computed_buff_size = sizeof(*args) + - sizeof(wac_info.mode) + - sizeof(wac_info.operand) + - sizeof(wac_info.dbgWave_msg.DbgWaveMsg) + - sizeof(wac_info.dbgWave_msg.MemoryVA) + - sizeof(wac_info.trapId); - - mutex_lock(&p->mutex); - pdd = kfd_process_device_data_by_id(p, args->gpu_id); - mutex_unlock(&p->mutex); - if (!pdd) { - pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); - return -EINVAL; - } - dev = pdd->dev; - - if (dev->adev->asic_type == CHIP_CARRIZO) { - pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); - return -EINVAL; - } - - /* input size must match the computed "compact" size */ - if (args->buf_size_in_bytes != computed_buff_size) { - pr_debug("size mismatch, computed : actual %u : %u\n", - args->buf_size_in_bytes, computed_buff_size); - return -EINVAL; - } - - cmd_from_user = (void __user *) args->content_ptr; - - if (cmd_from_user == NULL) - return -EINVAL; - - /* copy the entire buffer from user */ - - args_buff = memdup_user(cmd_from_user, - args->buf_size_in_bytes - sizeof(*args)); - if (IS_ERR(args_buff)) - return PTR_ERR(args_buff); - - /* move ptr to the start of the "pay-load" area */ - wac_info.process = p; - - wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx])); - args_idx += sizeof(wac_info.operand); - - wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx])); - args_idx += sizeof(wac_info.mode); - - wac_info.trapId = *((uint32_t *)(&args_buff[args_idx])); - args_idx += sizeof(wac_info.trapId); - - wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = - *((uint32_t *)(&args_buff[args_idx])); - wac_info.dbgWave_msg.MemoryVA = NULL; - - mutex_lock(kfd_get_dbgmgr_mutex()); - - pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n", - wac_info.process, wac_info.operand, - wac_info.mode, wac_info.trapId, - wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); - - status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info); - - pr_debug("Returned status of dbg manager is %ld\n", status); - - mutex_unlock(kfd_get_dbgmgr_mutex()); - - kfree(args_buff); - - return status; + return -EPERM; } static int kfd_ioctl_get_clock_counters(struct file *filep, @@ -2900,16 +2626,16 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS, kfd_ioctl_wait_events, 0), - AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER, + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER_DEPRECATED, kfd_ioctl_dbg_register, 0), - AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER, + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED, kfd_ioctl_dbg_unregister, 0), - AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH, + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED, kfd_ioctl_dbg_address_watch, 0), - AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL, + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED, kfd_ioctl_dbg_wave_control, 0), AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c deleted file mode 100644 index 8eca9ed3ab36..000000000000 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ /dev/null @@ -1,845 +0,0 @@ -/* - * Copyright 2014 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "kfd_pm4_headers.h" -#include "kfd_pm4_headers_diq.h" -#include "kfd_kernel_queue.h" -#include "kfd_priv.h" -#include "kfd_pm4_opcodes.h" -#include "cik_regs.h" -#include "kfd_dbgmgr.h" -#include "kfd_dbgdev.h" -#include "kfd_device_queue_manager.h" - -static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev) -{ - dev->kfd2kgd->address_watch_disable(dev->adev); -} - -static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, - u32 pasid, uint64_t vmid0_address, - uint32_t *packet_buff, size_t size_in_bytes) -{ - struct pm4__release_mem *rm_packet; - struct pm4__indirect_buffer_pasid *ib_packet; - struct kfd_mem_obj *mem_obj; - size_t pq_packets_size_in_bytes; - union ULARGE_INTEGER *largep; - union ULARGE_INTEGER addr; - struct kernel_queue *kq; - uint64_t *rm_state; - unsigned int *ib_packet_buff; - int status; - - if (WARN_ON(!size_in_bytes)) - return -EINVAL; - - kq = dbgdev->kq; - - pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) + - sizeof(struct pm4__indirect_buffer_pasid); - - /* - * We acquire a buffer from DIQ - * The receive packet buff will be sitting on the Indirect Buffer - * and in the PQ we put the IB packet + sync packet(s). - */ - status = kq_acquire_packet_buffer(kq, - pq_packets_size_in_bytes / sizeof(uint32_t), - &ib_packet_buff); - if (status) { - pr_err("kq_acquire_packet_buffer failed\n"); - return status; - } - - memset(ib_packet_buff, 0, pq_packets_size_in_bytes); - - ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff); - - ib_packet->header.count = 3; - ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID; - ib_packet->header.type = PM4_TYPE_3; - - largep = (union ULARGE_INTEGER *) &vmid0_address; - - ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2; - ib_packet->bitfields3.ib_base_hi = largep->u.high_part; - - ib_packet->control = (1 << 23) | (1 << 31) | - ((size_in_bytes / 4) & 0xfffff); - - ib_packet->bitfields5.pasid = pasid; - - /* - * for now we use release mem for GPU-CPU synchronization - * Consider WaitRegMem + WriteData as a better alternative - * we get a GART allocations ( gpu/cpu mapping), - * for the sync variable, and wait until: - * (a) Sync with HW - * (b) Sync var is written by CP to mem. - */ - rm_packet = (struct pm4__release_mem *) (ib_packet_buff + - (sizeof(struct pm4__indirect_buffer_pasid) / - sizeof(unsigned int))); - - status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t), - &mem_obj); - - if (status) { - pr_err("Failed to allocate GART memory\n"); - kq_rollback_packet(kq); - return status; - } - - rm_state = (uint64_t *) mem_obj->cpu_ptr; - - *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING; - - rm_packet->header.opcode = IT_RELEASE_MEM; - rm_packet->header.type = PM4_TYPE_3; - rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2; - - rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; - rm_packet->bitfields2.event_index = - event_index___release_mem__end_of_pipe; - - rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru; - rm_packet->bitfields2.atc = 0; - rm_packet->bitfields2.tc_wb_action_ena = 1; - - addr.quad_part = mem_obj->gpu_addr; - - rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2; - rm_packet->address_hi = addr.u.high_part; - - rm_packet->bitfields3.data_sel = - data_sel___release_mem__send_64_bit_data; - - rm_packet->bitfields3.int_sel = - int_sel___release_mem__send_data_after_write_confirm; - - rm_packet->bitfields3.dst_sel = - dst_sel___release_mem__memory_controller; - - rm_packet->data_lo = QUEUESTATE__ACTIVE; - - kq_submit_packet(kq); - - /* Wait till CP writes sync code: */ - status = amdkfd_fence_wait_timeout( - rm_state, - QUEUESTATE__ACTIVE, 1500); - - kfd_gtt_sa_free(dbgdev->dev, mem_obj); - - return status; -} - -static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev) -{ - /* - * no action is needed in this case, - * just make sure diq will not be used - */ - - dbgdev->kq = NULL; - - return 0; -} - -static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) -{ - struct queue_properties properties; - unsigned int qid; - struct kernel_queue *kq = NULL; - int status; - - properties.type = KFD_QUEUE_TYPE_DIQ; - - status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL, - &properties, &qid, NULL, NULL, NULL, NULL); - - if (status) { - pr_err("Failed to create DIQ\n"); - return status; - } - - pr_debug("DIQ Created with queue id: %d\n", qid); - - kq = pqm_get_kernel_queue(dbgdev->pqm, qid); - - if (!kq) { - pr_err("Error getting DIQ\n"); - pqm_destroy_queue(dbgdev->pqm, qid); - return -EFAULT; - } - - dbgdev->kq = kq; - - return status; -} - -static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev) -{ - /* disable watch address */ - dbgdev_address_watch_disable_nodiq(dbgdev->dev); - return 0; -} - -static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev) -{ - /* todo - disable address watch */ - int status; - - status = pqm_destroy_queue(dbgdev->pqm, - dbgdev->kq->queue->properties.queue_id); - dbgdev->kq = NULL; - - return status; -} - -static void dbgdev_address_watch_set_registers( - const struct dbg_address_watch_info *adw_info, - union TCP_WATCH_ADDR_H_BITS *addrHi, - union TCP_WATCH_ADDR_L_BITS *addrLo, - union TCP_WATCH_CNTL_BITS *cntl, - unsigned int index, unsigned int vmid) -{ - union ULARGE_INTEGER addr; - - addr.quad_part = 0; - addrHi->u32All = 0; - addrLo->u32All = 0; - cntl->u32All = 0; - - if (adw_info->watch_mask) - cntl->bitfields.mask = - (uint32_t) (adw_info->watch_mask[index] & - ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK); - else - cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; - - addr.quad_part = (unsigned long long) adw_info->watch_address[index]; - - addrHi->bitfields.addr = addr.u.high_part & - ADDRESS_WATCH_REG_ADDHIGH_MASK; - addrLo->bitfields.addr = - (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT); - - cntl->bitfields.mode = adw_info->watch_mode[index]; - cntl->bitfields.vmid = (uint32_t) vmid; - /* for now assume it is an ATC address */ - cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT; - - pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask); - pr_debug("\t\t%20s %08x\n", "set reg add high :", - addrHi->bitfields.addr); - pr_debug("\t\t%20s %08x\n", "set reg add low :", - addrLo->bitfields.addr); -} - -static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, - struct dbg_address_watch_info *adw_info) -{ - union TCP_WATCH_ADDR_H_BITS addrHi; - union TCP_WATCH_ADDR_L_BITS addrLo; - union TCP_WATCH_CNTL_BITS cntl; - struct kfd_process_device *pdd; - unsigned int i; - - /* taking the vmid for that process on the safe way using pdd */ - pdd = kfd_get_process_device_data(dbgdev->dev, - adw_info->process); - if (!pdd) { - pr_err("Failed to get pdd for wave control no DIQ\n"); - return -EFAULT; - } - - addrHi.u32All = 0; - addrLo.u32All = 0; - cntl.u32All = 0; - - if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || - (adw_info->num_watch_points == 0)) { - pr_err("num_watch_points is invalid\n"); - return -EINVAL; - } - - if (!adw_info->watch_mode || !adw_info->watch_address) { - pr_err("adw_info fields are not valid\n"); - return -EINVAL; - } - - for (i = 0; i < adw_info->num_watch_points; i++) { - dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo, - &cntl, i, pdd->qpd.vmid); - - pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); - pr_debug("\t\t%20s %08x\n", "register index :", i); - pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid); - pr_debug("\t\t%20s %08x\n", "Address Low is :", - addrLo.bitfields.addr); - pr_debug("\t\t%20s %08x\n", "Address high is :", - addrHi.bitfields.addr); - pr_debug("\t\t%20s %08x\n", "Address high is :", - addrHi.bitfields.addr); - pr_debug("\t\t%20s %08x\n", "Control Mask is :", - cntl.bitfields.mask); - pr_debug("\t\t%20s %08x\n", "Control Mode is :", - cntl.bitfields.mode); - pr_debug("\t\t%20s %08x\n", "Control Vmid is :", - cntl.bitfields.vmid); - pr_debug("\t\t%20s %08x\n", "Control atc is :", - cntl.bitfields.atc); - pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); - - pdd->dev->kfd2kgd->address_watch_execute( - dbgdev->dev->adev, - i, - cntl.u32All, - addrHi.u32All, - addrLo.u32All); - } - - return 0; -} - -static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, - struct dbg_address_watch_info *adw_info) -{ - struct pm4__set_config_reg *packets_vec; - union TCP_WATCH_ADDR_H_BITS addrHi; - union TCP_WATCH_ADDR_L_BITS addrLo; - union TCP_WATCH_CNTL_BITS cntl; - struct kfd_mem_obj *mem_obj; - unsigned int aw_reg_add_dword; - uint32_t *packet_buff_uint; - unsigned int i; - int status; - size_t ib_size = sizeof(struct pm4__set_config_reg) * 4; - /* we do not control the vmid in DIQ mode, just a place holder */ - unsigned int vmid = 0; - - addrHi.u32All = 0; - addrLo.u32All = 0; - cntl.u32All = 0; - - if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || - (adw_info->num_watch_points == 0)) { - pr_err("num_watch_points is invalid\n"); - return -EINVAL; - } - - if (!adw_info->watch_mode || !adw_info->watch_address) { - pr_err("adw_info fields are not valid\n"); - return -EINVAL; - } - - status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); - - if (status) { - pr_err("Failed to allocate GART memory\n"); - return status; - } - - packet_buff_uint = mem_obj->cpu_ptr; - - memset(packet_buff_uint, 0, ib_size); - - packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint); - - packets_vec[0].header.count = 1; - packets_vec[0].header.opcode = IT_SET_CONFIG_REG; - packets_vec[0].header.type = PM4_TYPE_3; - packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; - packets_vec[0].bitfields2.insert_vmid = 1; - packets_vec[1].ordinal1 = packets_vec[0].ordinal1; - packets_vec[1].bitfields2.insert_vmid = 0; - packets_vec[2].ordinal1 = packets_vec[0].ordinal1; - packets_vec[2].bitfields2.insert_vmid = 0; - packets_vec[3].ordinal1 = packets_vec[0].ordinal1; - packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; - packets_vec[3].bitfields2.insert_vmid = 1; - - for (i = 0; i < adw_info->num_watch_points; i++) { - dbgdev_address_watch_set_registers(adw_info, - &addrHi, - &addrLo, - &cntl, - i, - vmid); - - pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); - pr_debug("\t\t%20s %08x\n", "register index :", i); - pr_debug("\t\t%20s %08x\n", "vmid is :", vmid); - pr_debug("\t\t%20s %p\n", "Add ptr is :", - adw_info->watch_address); - pr_debug("\t\t%20s %08llx\n", "Add is :", - adw_info->watch_address[i]); - pr_debug("\t\t%20s %08x\n", "Address Low is :", - addrLo.bitfields.addr); - pr_debug("\t\t%20s %08x\n", "Address high is :", - addrHi.bitfields.addr); - pr_debug("\t\t%20s %08x\n", "Control Mask is :", - cntl.bitfields.mask); - pr_debug("\t\t%20s %08x\n", "Control Mode is :", - cntl.bitfields.mode); - pr_debug("\t\t%20s %08x\n", "Control Vmid is :", - cntl.bitfields.vmid); - pr_debug("\t\t%20s %08x\n", "Control atc is :", - cntl.bitfields.atc); - pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); - - aw_reg_add_dword = - dbgdev->dev->kfd2kgd->address_watch_get_offset( - dbgdev->dev->adev, - i, - ADDRESS_WATCH_REG_CNTL); - - packets_vec[0].bitfields2.reg_offset = - aw_reg_add_dword - AMD_CONFIG_REG_BASE; - - packets_vec[0].reg_data[0] = cntl.u32All; - - aw_reg_add_dword = - dbgdev->dev->kfd2kgd->address_watch_get_offset( - dbgdev->dev->adev, - i, - ADDRESS_WATCH_REG_ADDR_HI); - - packets_vec[1].bitfields2.reg_offset = - aw_reg_add_dword - AMD_CONFIG_REG_BASE; - packets_vec[1].reg_data[0] = addrHi.u32All; - - aw_reg_add_dword = - dbgdev->dev->kfd2kgd->address_watch_get_offset( - dbgdev->dev->adev, - i, - ADDRESS_WATCH_REG_ADDR_LO); - - packets_vec[2].bitfields2.reg_offset = - aw_reg_add_dword - AMD_CONFIG_REG_BASE; - packets_vec[2].reg_data[0] = addrLo.u32All; - - /* enable watch flag if address is not zero*/ - if (adw_info->watch_address[i] > 0) - cntl.bitfields.valid = 1; - else - cntl.bitfields.valid = 0; - - aw_reg_add_dword = - dbgdev->dev->kfd2kgd->address_watch_get_offset( - dbgdev->dev->adev, - i, - ADDRESS_WATCH_REG_CNTL); - - packets_vec[3].bitfields2.reg_offset = - aw_reg_add_dword - AMD_CONFIG_REG_BASE; - packets_vec[3].reg_data[0] = cntl.u32All; - - status = dbgdev_diq_submit_ib( - dbgdev, - adw_info->process->pasid, - mem_obj->gpu_addr, - packet_buff_uint, - ib_size); - - if (status) { - pr_err("Failed to submit IB to DIQ\n"); - break; - } - } - - kfd_gtt_sa_free(dbgdev->dev, mem_obj); - return status; -} - -static int dbgdev_wave_control_set_registers( - struct dbg_wave_control_info *wac_info, - union SQ_CMD_BITS *in_reg_sq_cmd, - union GRBM_GFX_INDEX_BITS *in_reg_gfx_index) -{ - int status = 0; - union SQ_CMD_BITS reg_sq_cmd; - union GRBM_GFX_INDEX_BITS reg_gfx_index; - struct HsaDbgWaveMsgAMDGen2 *pMsg; - - reg_sq_cmd.u32All = 0; - reg_gfx_index.u32All = 0; - pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2; - - switch (wac_info->mode) { - /* Send command to single wave */ - case HSA_DBG_WAVEMODE_SINGLE: - /* - * Limit access to the process waves only, - * by setting vmid check - */ - reg_sq_cmd.bits.check_vmid = 1; - reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD; - reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId; - reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE; - - reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; - reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; - reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; - - break; - - /* Send command to all waves with matching VMID */ - case HSA_DBG_WAVEMODE_BROADCAST_PROCESS: - - reg_gfx_index.bits.sh_broadcast_writes = 1; - reg_gfx_index.bits.se_broadcast_writes = 1; - reg_gfx_index.bits.instance_broadcast_writes = 1; - - reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; - - break; - - /* Send command to all CU waves with matching VMID */ - case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU: - - reg_sq_cmd.bits.check_vmid = 1; - reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; - - reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; - reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; - reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; - - break; - - default: - return -EINVAL; - } - - switch (wac_info->operand) { - case HSA_DBG_WAVEOP_HALT: - reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT; - break; - - case HSA_DBG_WAVEOP_RESUME: - reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME; - break; - - case HSA_DBG_WAVEOP_KILL: - reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; - break; - - case HSA_DBG_WAVEOP_DEBUG: - reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG; - break; - - case HSA_DBG_WAVEOP_TRAP: - if (wac_info->trapId < MAX_TRAPID) { - reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP; - reg_sq_cmd.bits.trap_id = wac_info->trapId; - } else { - status = -EINVAL; - } - break; - - default: - status = -EINVAL; - break; - } - - if (status == 0) { - *in_reg_sq_cmd = reg_sq_cmd; - *in_reg_gfx_index = reg_gfx_index; - } - - return status; -} - -static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, - struct dbg_wave_control_info *wac_info) -{ - - int status; - union SQ_CMD_BITS reg_sq_cmd; - union GRBM_GFX_INDEX_BITS reg_gfx_index; - struct kfd_mem_obj *mem_obj; - uint32_t *packet_buff_uint; - struct pm4__set_config_reg *packets_vec; - size_t ib_size = sizeof(struct pm4__set_config_reg) * 3; - - reg_sq_cmd.u32All = 0; - - status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, - ®_gfx_index); - if (status) { - pr_err("Failed to set wave control registers\n"); - return status; - } - - /* we do not control the VMID in DIQ, so reset it to a known value */ - reg_sq_cmd.bits.vm_id = 0; - - pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); - - pr_debug("\t\t mode is: %u\n", wac_info->mode); - pr_debug("\t\t operand is: %u\n", wac_info->operand); - pr_debug("\t\t trap id is: %u\n", wac_info->trapId); - pr_debug("\t\t msg value is: %u\n", - wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); - pr_debug("\t\t vmid is: N/A\n"); - - pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); - pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); - pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); - pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); - pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); - pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); - pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); - - pr_debug("\t\t ibw is : %u\n", - reg_gfx_index.bitfields.instance_broadcast_writes); - pr_debug("\t\t ii is : %u\n", - reg_gfx_index.bitfields.instance_index); - pr_debug("\t\t sebw is : %u\n", - reg_gfx_index.bitfields.se_broadcast_writes); - pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); - pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); - pr_debug("\t\t sbw is : %u\n", - reg_gfx_index.bitfields.sh_broadcast_writes); - - pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); - - status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); - - if (status != 0) { - pr_err("Failed to allocate GART memory\n"); - return status; - } - - packet_buff_uint = mem_obj->cpu_ptr; - - memset(packet_buff_uint, 0, ib_size); - - packets_vec = (struct pm4__set_config_reg *) packet_buff_uint; - packets_vec[0].header.count = 1; - packets_vec[0].header.opcode = IT_SET_UCONFIG_REG; - packets_vec[0].header.type = PM4_TYPE_3; - packets_vec[0].bitfields2.reg_offset = - GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE; - - packets_vec[0].bitfields2.insert_vmid = 0; - packets_vec[0].reg_data[0] = reg_gfx_index.u32All; - - packets_vec[1].header.count = 1; - packets_vec[1].header.opcode = IT_SET_CONFIG_REG; - packets_vec[1].header.type = PM4_TYPE_3; - packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE; - - packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET; - packets_vec[1].bitfields2.insert_vmid = 1; - packets_vec[1].reg_data[0] = reg_sq_cmd.u32All; - - /* Restore the GRBM_GFX_INDEX register */ - - reg_gfx_index.u32All = 0; - reg_gfx_index.bits.sh_broadcast_writes = 1; - reg_gfx_index.bits.instance_broadcast_writes = 1; - reg_gfx_index.bits.se_broadcast_writes = 1; - - - packets_vec[2].ordinal1 = packets_vec[0].ordinal1; - packets_vec[2].bitfields2.reg_offset = - GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE; - - packets_vec[2].bitfields2.insert_vmid = 0; - packets_vec[2].reg_data[0] = reg_gfx_index.u32All; - - status = dbgdev_diq_submit_ib( - dbgdev, - wac_info->process->pasid, - mem_obj->gpu_addr, - packet_buff_uint, - ib_size); - - if (status) - pr_err("Failed to submit IB to DIQ\n"); - - kfd_gtt_sa_free(dbgdev->dev, mem_obj); - - return status; -} - -static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev, - struct dbg_wave_control_info *wac_info) -{ - int status; - union SQ_CMD_BITS reg_sq_cmd; - union GRBM_GFX_INDEX_BITS reg_gfx_index; - struct kfd_process_device *pdd; - - reg_sq_cmd.u32All = 0; - - /* taking the VMID for that process on the safe way using PDD */ - pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process); - - if (!pdd) { - pr_err("Failed to get pdd for wave control no DIQ\n"); - return -EFAULT; - } - status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, - ®_gfx_index); - if (status) { - pr_err("Failed to set wave control registers\n"); - return status; - } - - /* for non DIQ we need to patch the VMID: */ - - reg_sq_cmd.bits.vm_id = pdd->qpd.vmid; - - pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); - - pr_debug("\t\t mode is: %u\n", wac_info->mode); - pr_debug("\t\t operand is: %u\n", wac_info->operand); - pr_debug("\t\t trap id is: %u\n", wac_info->trapId); - pr_debug("\t\t msg value is: %u\n", - wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); - pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid); - - pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); - pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); - pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); - pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); - pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); - pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); - pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); - - pr_debug("\t\t ibw is : %u\n", - reg_gfx_index.bitfields.instance_broadcast_writes); - pr_debug("\t\t ii is : %u\n", - reg_gfx_index.bitfields.instance_index); - pr_debug("\t\t sebw is : %u\n", - reg_gfx_index.bitfields.se_broadcast_writes); - pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); - pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); - pr_debug("\t\t sbw is : %u\n", - reg_gfx_index.bitfields.sh_broadcast_writes); - - pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); - - return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->adev, - reg_gfx_index.u32All, - reg_sq_cmd.u32All); -} - -int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) -{ - int status = 0; - unsigned int vmid; - uint16_t queried_pasid; - union SQ_CMD_BITS reg_sq_cmd; - union GRBM_GFX_INDEX_BITS reg_gfx_index; - struct kfd_process_device *pdd; - struct dbg_wave_control_info wac_info; - int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; - int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; - - reg_sq_cmd.u32All = 0; - status = 0; - - wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS; - wac_info.operand = HSA_DBG_WAVEOP_KILL; - - pr_debug("Killing all process wavefronts\n"); - - /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. - * ATC_VMID15_PASID_MAPPING - * to check which VMID the current process is mapped to. - */ - - for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { - status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info - (dev->adev, vmid, &queried_pasid); - - if (status && queried_pasid == p->pasid) { - pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", - vmid, p->pasid); - break; - } - } - - if (vmid > last_vmid_to_scan) { - pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid); - return -EFAULT; - } - - /* taking the VMID for that process on the safe way using PDD */ - pdd = kfd_get_process_device_data(dev, p); - if (!pdd) - return -EFAULT; - - status = dbgdev_wave_control_set_registers(&wac_info, ®_sq_cmd, - ®_gfx_index); - if (status != 0) - return -EINVAL; - - /* for non DIQ we need to patch the VMID: */ - reg_sq_cmd.bits.vm_id = vmid; - - dev->kfd2kgd->wave_control_execute(dev->adev, - reg_gfx_index.u32All, - reg_sq_cmd.u32All); - - return 0; -} - -void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, - enum DBGDEV_TYPE type) -{ - pdbgdev->dev = pdev; - pdbgdev->kq = NULL; - pdbgdev->type = type; - pdbgdev->pqm = NULL; - - switch (type) { - case DBGDEV_TYPE_NODIQ: - pdbgdev->dbgdev_register = dbgdev_register_nodiq; - pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq; - pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq; - pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq; - break; - case DBGDEV_TYPE_DIQ: - default: - pdbgdev->dbgdev_register = dbgdev_register_diq; - pdbgdev->dbgdev_unregister = dbgdev_unregister_diq; - pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq; - pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq; - break; - } - -} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h deleted file mode 100644 index 0619c777b47e..000000000000 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Copyright 2014 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef KFD_DBGDEV_H_ -#define KFD_DBGDEV_H_ - -enum { - SQ_CMD_VMID_OFFSET = 28, - ADDRESS_WATCH_CNTL_OFFSET = 24 -}; - -enum { - PRIV_QUEUE_SYNC_TIME_MS = 200 -}; - -/* CONTEXT reg space definition */ -enum { - CONTEXT_REG_BASE = 0xA000, - CONTEXT_REG_END = 0xA400, - CONTEXT_REG_SIZE = CONTEXT_REG_END - CONTEXT_REG_BASE -}; - -/* USER CONFIG reg space definition */ -enum { - USERCONFIG_REG_BASE = 0xC000, - USERCONFIG_REG_END = 0x10000, - USERCONFIG_REG_SIZE = USERCONFIG_REG_END - USERCONFIG_REG_BASE -}; - -/* CONFIG reg space definition */ -enum { - AMD_CONFIG_REG_BASE = 0x2000, /* in dwords */ - AMD_CONFIG_REG_END = 0x2B00, - AMD_CONFIG_REG_SIZE = AMD_CONFIG_REG_END - AMD_CONFIG_REG_BASE -}; - -/* SH reg space definition */ -enum { - SH_REG_BASE = 0x2C00, - SH_REG_END = 0x3000, - SH_REG_SIZE = SH_REG_END - SH_REG_BASE -}; - -/* SQ_CMD definitions */ -#define SQ_CMD 0x8DEC - -enum SQ_IND_CMD_CMD { - SQ_IND_CMD_CMD_NULL = 0x00000000, - SQ_IND_CMD_CMD_HALT = 0x00000001, - SQ_IND_CMD_CMD_RESUME = 0x00000002, - SQ_IND_CMD_CMD_KILL = 0x00000003, - SQ_IND_CMD_CMD_DEBUG = 0x00000004, - SQ_IND_CMD_CMD_TRAP = 0x00000005, -}; - -enum SQ_IND_CMD_MODE { - SQ_IND_CMD_MODE_SINGLE = 0x00000000, - SQ_IND_CMD_MODE_BROADCAST = 0x00000001, - SQ_IND_CMD_MODE_BROADCAST_QUEUE = 0x00000002, - SQ_IND_CMD_MODE_BROADCAST_PIPE = 0x00000003, - SQ_IND_CMD_MODE_BROADCAST_ME = 0x00000004, -}; - -union SQ_IND_INDEX_BITS { - struct { - uint32_t wave_id:4; - uint32_t simd_id:2; - uint32_t thread_id:6; - uint32_t:1; - uint32_t force_read:1; - uint32_t read_timeout:1; - uint32_t unindexed:1; - uint32_t index:16; - - } bitfields, bits; - uint32_t u32All; - signed int i32All; - float f32All; -}; - -union SQ_IND_CMD_BITS { - struct { - uint32_t data:32; - } bitfields, bits; - uint32_t u32All; - signed int i32All; - float f32All; -}; - -union SQ_CMD_BITS { - struct { - uint32_t cmd:3; - uint32_t:1; - uint32_t mode:3; - uint32_t check_vmid:1; - uint32_t trap_id:3; - uint32_t:5; - uint32_t wave_id:4; - uint32_t simd_id:2; - uint32_t:2; - uint32_t queue_id:3; - uint32_t:1; - uint32_t vm_id:4; - } bitfields, bits; - uint32_t u32All; - signed int i32All; - float f32All; -}; - -union SQ_IND_DATA_BITS { - struct { - uint32_t data:32; - } bitfields, bits; - uint32_t u32All; - signed int i32All; - float f32All; -}; - -union GRBM_GFX_INDEX_BITS { - struct { - uint32_t instance_index:8; - uint32_t sh_index:8; - uint32_t se_index:8; - uint32_t:5; - uint32_t sh_broadcast_writes:1; - uint32_t instance_broadcast_writes:1; - uint32_t se_broadcast_writes:1; - } bitfields, bits; - uint32_t u32All; - signed int i32All; - float f32All; -}; - -union TCP_WATCH_ADDR_H_BITS { - struct { - uint32_t addr:16; - uint32_t:16; - - } bitfields, bits; - uint32_t u32All; - signed int i32All; - float f32All; -}; - -union TCP_WATCH_ADDR_L_BITS { - struct { - uint32_t:6; - uint32_t addr:26; - } bitfields, bits; - uint32_t u32All; - signed int i32All; - float f32All; -}; - -enum { - QUEUESTATE__INVALID = 0, /* so by default we'll get invalid state */ - QUEUESTATE__ACTIVE_COMPLETION_PENDING, - QUEUESTATE__ACTIVE -}; - -union ULARGE_INTEGER { - struct { - uint32_t low_part; - uint32_t high_part; - } u; - unsigned long long quad_part; -}; - - -#define KFD_CIK_VMID_START_OFFSET (8) -#define KFD_CIK_VMID_END_OFFSET (KFD_CIK_VMID_START_OFFSET + (8)) - - -void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, - enum DBGDEV_TYPE type); - -union TCP_WATCH_CNTL_BITS { - struct { - uint32_t mask:24; - uint32_t vmid:4; - uint32_t atc:1; - uint32_t mode:2; - uint32_t valid:1; - } bitfields, bits; - uint32_t u32All; - signed int i32All; - float f32All; -}; - -enum { - ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL, - ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF, - ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000, - /* extend the mask to 26 bits in order to match the low address field */ - ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6, - ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF -}; - -enum { - MAX_TRAPID = 8, /* 3 bits in the bitfield. */ - MAX_WATCH_ADDRESSES = 4 -}; - -enum { - ADDRESS_WATCH_REG_ADDR_HI = 0, - ADDRESS_WATCH_REG_ADDR_LO, - ADDRESS_WATCH_REG_CNTL, - ADDRESS_WATCH_REG_MAX -}; - -#endif /* KFD_DBGDEV_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c deleted file mode 100644 index 9bfa50633654..000000000000 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Copyright 2014 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ -#include -#include -#include -#include -#include -#include - -#include "kfd_priv.h" -#include "cik_regs.h" -#include "kfd_pm4_headers.h" -#include "kfd_pm4_headers_diq.h" -#include "kfd_dbgmgr.h" -#include "kfd_dbgdev.h" -#include "kfd_device_queue_manager.h" - -static DEFINE_MUTEX(kfd_dbgmgr_mutex); - -struct mutex *kfd_get_dbgmgr_mutex(void) -{ - return &kfd_dbgmgr_mutex; -} - - -static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr) -{ - kfree(pmgr->dbgdev); - - pmgr->dbgdev = NULL; - pmgr->pasid = 0; - pmgr->dev = NULL; -} - -void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr) -{ - if (pmgr) { - kfd_dbgmgr_uninitialize(pmgr); - kfree(pmgr); - } -} - -bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) -{ - enum DBGDEV_TYPE type = DBGDEV_TYPE_DIQ; - struct kfd_dbgmgr *new_buff; - - if (WARN_ON(!pdev->init_complete)) - return false; - - new_buff = kfd_alloc_struct(new_buff); - if (!new_buff) { - pr_err("Failed to allocate dbgmgr instance\n"); - return false; - } - - new_buff->pasid = 0; - new_buff->dev = pdev; - new_buff->dbgdev = kfd_alloc_struct(new_buff->dbgdev); - if (!new_buff->dbgdev) { - pr_err("Failed to allocate dbgdev instance\n"); - kfree(new_buff); - return false; - } - - /* get actual type of DBGDevice cpsch or not */ - if (pdev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) - type = DBGDEV_TYPE_NODIQ; - - kfd_dbgdev_init(new_buff->dbgdev, pdev, type); - *ppmgr = new_buff; - - return true; -} - -long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p) -{ - if (pmgr->pasid != 0) { - pr_debug("H/W debugger is already active using pasid 0x%x\n", - pmgr->pasid); - return -EBUSY; - } - - /* remember pasid */ - pmgr->pasid = p->pasid; - - /* provide the pqm for diq generation */ - pmgr->dbgdev->pqm = &p->pqm; - - /* activate the actual registering */ - pmgr->dbgdev->dbgdev_register(pmgr->dbgdev); - - return 0; -} - -long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p) -{ - /* Is the requests coming from the already registered process? */ - if (pmgr->pasid != p->pasid) { - pr_debug("H/W debugger is not registered by calling pasid 0x%x\n", - p->pasid); - return -EINVAL; - } - - pmgr->dbgdev->dbgdev_unregister(pmgr->dbgdev); - - pmgr->pasid = 0; - - return 0; -} - -long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, - struct dbg_wave_control_info *wac_info) -{ - /* Is the requests coming from the already registered process? */ - if (pmgr->pasid != wac_info->process->pasid) { - pr_debug("H/W debugger support was not registered for requester pasid 0x%x\n", - wac_info->process->pasid); - return -EINVAL; - } - - return (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, wac_info); -} - -long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, - struct dbg_address_watch_info *adw_info) -{ - /* Is the requests coming from the already registered process? */ - if (pmgr->pasid != adw_info->process->pasid) { - pr_debug("H/W debugger support was not registered for requester pasid 0x%x\n", - adw_info->process->pasid); - return -EINVAL; - } - - return (long) pmgr->dbgdev->dbgdev_address_watch(pmgr->dbgdev, - adw_info); -} - diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h deleted file mode 100644 index f9c6df1fdc5c..000000000000 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h +++ /dev/null @@ -1,293 +0,0 @@ -/* - * Copyright 2014 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef KFD_DBGMGR_H_ -#define KFD_DBGMGR_H_ - -#include "kfd_priv.h" - -/* must align with hsakmttypes definition */ -#pragma pack(push, 4) - -enum HSA_DBG_WAVEOP { - HSA_DBG_WAVEOP_HALT = 1, /* Halts a wavefront */ - HSA_DBG_WAVEOP_RESUME = 2, /* Resumes a wavefront */ - HSA_DBG_WAVEOP_KILL = 3, /* Kills a wavefront */ - HSA_DBG_WAVEOP_DEBUG = 4, /* Causes wavefront to enter dbg mode */ - HSA_DBG_WAVEOP_TRAP = 5, /* Causes wavefront to take a trap */ - HSA_DBG_NUM_WAVEOP = 5, - HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF -}; - -enum HSA_DBG_WAVEMODE { - /* send command to a single wave */ - HSA_DBG_WAVEMODE_SINGLE = 0, - /* - * Broadcast to all wavefronts of all processes is not - * supported for HSA user mode - */ - - /* send to waves within current process */ - HSA_DBG_WAVEMODE_BROADCAST_PROCESS = 2, - /* send to waves within current process on CU */ - HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3, - HSA_DBG_NUM_WAVEMODE = 3, - HSA_DBG_MAX_WAVEMODE = 0xFFFFFFFF -}; - -enum HSA_DBG_WAVEMSG_TYPE { - HSA_DBG_WAVEMSG_AUTO = 0, - HSA_DBG_WAVEMSG_USER = 1, - HSA_DBG_WAVEMSG_ERROR = 2, - HSA_DBG_NUM_WAVEMSG, - HSA_DBG_MAX_WAVEMSG = 0xFFFFFFFF -}; - -enum HSA_DBG_WATCH_MODE { - HSA_DBG_WATCH_READ = 0, /* Read operations only */ - HSA_DBG_WATCH_NONREAD = 1, /* Write or Atomic operations only */ - HSA_DBG_WATCH_ATOMIC = 2, /* Atomic Operations only */ - HSA_DBG_WATCH_ALL = 3, /* Read, Write or Atomic operations */ - HSA_DBG_WATCH_NUM, - HSA_DBG_WATCH_SIZE = 0xFFFFFFFF -}; - -/* This structure is hardware specific and may change in the future */ -struct HsaDbgWaveMsgAMDGen2 { - union { - struct ui32 { - uint32_t UserData:8; /* user data */ - uint32_t ShaderArray:1; /* Shader array */ - uint32_t Priv:1; /* Privileged */ - uint32_t Reserved0:4; /* Reserved, should be 0 */ - uint32_t WaveId:4; /* wave id */ - uint32_t SIMD:2; /* SIMD id */ - uint32_t HSACU:4; /* Compute unit */ - uint32_t ShaderEngine:2;/* Shader engine */ - uint32_t MessageType:2; /* see HSA_DBG_WAVEMSG_TYPE */ - uint32_t Reserved1:4; /* Reserved, should be 0 */ - } ui32; - uint32_t Value; - }; - uint32_t Reserved2; -}; - -union HsaDbgWaveMessageAMD { - struct HsaDbgWaveMsgAMDGen2 WaveMsgInfoGen2; - /* for future HsaDbgWaveMsgAMDGen3; */ -}; - -struct HsaDbgWaveMessage { - void *MemoryVA; /* ptr to associated host-accessible data */ - union HsaDbgWaveMessageAMD DbgWaveMsg; -}; - -/* - * TODO: This definitions to be MOVED to kfd_event, once it is implemented. - * - * HSA sync primitive, Event and HW Exception notification API definitions. - * The API functions allow the runtime to define a so-called sync-primitive, - * a SW object combining a user-mode provided "syncvar" and a scheduler event - * that can be signaled through a defined GPU interrupt. A syncvar is - * a process virtual memory location of a certain size that can be accessed - * by CPU and GPU shader code within the process to set and query the content - * within that memory. The definition of the content is determined by the HSA - * runtime and potentially GPU shader code interfacing with the HSA runtime. - * The syncvar values may be commonly written through an PM4 WRITE_DATA packet - * in the user mode instruction stream. The OS scheduler event is typically - * associated and signaled by an interrupt issued by the GPU, but other HSA - * system interrupt conditions from other HW (e.g. IOMMUv2) may be surfaced - * by the KFD by this mechanism, too. - */ - -/* these are the new definitions for events */ -enum HSA_EVENTTYPE { - HSA_EVENTTYPE_SIGNAL = 0, /* user-mode generated GPU signal */ - HSA_EVENTTYPE_NODECHANGE = 1, /* HSA node change (attach/detach) */ - HSA_EVENTTYPE_DEVICESTATECHANGE = 2, /* HSA device state change - * (start/stop) - */ - HSA_EVENTTYPE_HW_EXCEPTION = 3, /* GPU shader exception event */ - HSA_EVENTTYPE_SYSTEM_EVENT = 4, /* GPU SYSCALL with parameter info */ - HSA_EVENTTYPE_DEBUG_EVENT = 5, /* GPU signal for debugging */ - HSA_EVENTTYPE_PROFILE_EVENT = 6,/* GPU signal for profiling */ - HSA_EVENTTYPE_QUEUE_EVENT = 7, /* GPU signal queue idle state - * (EOP pm4) - */ - /* ... */ - HSA_EVENTTYPE_MAXID, - HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF -}; - -/* Sub-definitions for various event types: Syncvar */ -struct HsaSyncVar { - union SyncVar { - void *UserData; /* pointer to user mode data */ - uint64_t UserDataPtrValue; /* 64bit compatibility of value */ - } SyncVar; - uint64_t SyncVarSize; -}; - -/* Sub-definitions for various event types: NodeChange */ - -enum HSA_EVENTTYPE_NODECHANGE_FLAGS { - HSA_EVENTTYPE_NODECHANGE_ADD = 0, - HSA_EVENTTYPE_NODECHANGE_REMOVE = 1, - HSA_EVENTTYPE_NODECHANGE_SIZE = 0xFFFFFFFF -}; - -struct HsaNodeChange { - /* HSA node added/removed on the platform */ - enum HSA_EVENTTYPE_NODECHANGE_FLAGS Flags; -}; - -/* Sub-definitions for various event types: DeviceStateChange */ -enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS { - /* device started (and available) */ - HSA_EVENTTYPE_DEVICESTATUSCHANGE_START = 0, - /* device stopped (i.e. unavailable) */ - HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP = 1, - HSA_EVENTTYPE_DEVICESTATUSCHANGE_SIZE = 0xFFFFFFFF -}; - -enum HSA_DEVICE { - HSA_DEVICE_CPU = 0, - HSA_DEVICE_GPU = 1, - MAX_HSA_DEVICE = 2 -}; - -struct HsaDeviceStateChange { - uint32_t NodeId; /* F-NUMA node that contains the device */ - enum HSA_DEVICE Device; /* device type: GPU or CPU */ - enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags; /* event flags */ -}; - -struct HsaEventData { - enum HSA_EVENTTYPE EventType; /* event type */ - union EventData { - /* - * return data associated with HSA_EVENTTYPE_SIGNAL - * and other events - */ - struct HsaSyncVar SyncVar; - - /* data associated with HSA_EVENTTYPE_NODE_CHANGE */ - struct HsaNodeChange NodeChangeState; - - /* data associated with HSA_EVENTTYPE_DEVICE_STATE_CHANGE */ - struct HsaDeviceStateChange DeviceState; - } EventData; - - /* the following data entries are internal to the KFD & thunk itself */ - - /* internal thunk store for Event data (OsEventHandle) */ - uint64_t HWData1; - /* internal thunk store for Event data (HWAddress) */ - uint64_t HWData2; - /* internal thunk store for Event data (HWData) */ - uint32_t HWData3; -}; - -struct HsaEventDescriptor { - /* event type to allocate */ - enum HSA_EVENTTYPE EventType; - /* H-NUMA node containing GPU device that is event source */ - uint32_t NodeId; - /* pointer to user mode syncvar data, syncvar->UserDataPtrValue - * may be NULL - */ - struct HsaSyncVar SyncVar; -}; - -struct HsaEvent { - uint32_t EventId; - struct HsaEventData EventData; -}; - -#pragma pack(pop) - -enum DBGDEV_TYPE { - DBGDEV_TYPE_ILLEGAL = 0, - DBGDEV_TYPE_NODIQ = 1, - DBGDEV_TYPE_DIQ = 2, - DBGDEV_TYPE_TEST = 3 -}; - -struct dbg_address_watch_info { - struct kfd_process *process; - enum HSA_DBG_WATCH_MODE *watch_mode; - uint64_t *watch_address; - uint64_t *watch_mask; - struct HsaEvent *watch_event; - uint32_t num_watch_points; -}; - -struct dbg_wave_control_info { - struct kfd_process *process; - uint32_t trapId; - enum HSA_DBG_WAVEOP operand; - enum HSA_DBG_WAVEMODE mode; - struct HsaDbgWaveMessage dbgWave_msg; -}; - -struct kfd_dbgdev { - - /* The device that owns this data. */ - struct kfd_dev *dev; - - /* kernel queue for DIQ */ - struct kernel_queue *kq; - - /* a pointer to the pqm of the calling process */ - struct process_queue_manager *pqm; - - /* type of debug device ( DIQ, non DIQ, etc. ) */ - enum DBGDEV_TYPE type; - - /* virtualized function pointers to device dbg */ - int (*dbgdev_register)(struct kfd_dbgdev *dbgdev); - int (*dbgdev_unregister)(struct kfd_dbgdev *dbgdev); - int (*dbgdev_address_watch)(struct kfd_dbgdev *dbgdev, - struct dbg_address_watch_info *adw_info); - int (*dbgdev_wave_control)(struct kfd_dbgdev *dbgdev, - struct dbg_wave_control_info *wac_info); - -}; - -struct kfd_dbgmgr { - u32 pasid; - struct kfd_dev *dev; - struct kfd_dbgdev *dbgdev; -}; - -/* prototypes for debug manager functions */ -struct mutex *kfd_get_dbgmgr_mutex(void); -void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr); -bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev); -long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p); -long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p); -long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, - struct dbg_wave_control_info *wac_info); -long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, - struct dbg_address_watch_info *adw_info); -#endif /* KFD_DBGMGR_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 5a47f437b455..dbb877fba724 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -575,8 +575,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, if (kfd_resume(kfd)) goto kfd_resume_error; - kfd->dbgmgr = NULL; - if (kfd_topology_add_device(kfd)) { dev_err(kfd_device, "Error adding device to topology\n"); goto kfd_topology_add_device_error; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 219acc818eb4..7f6f1a842b0b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -480,6 +480,65 @@ static inline void deallocate_hqd(struct device_queue_manager *dqm, dqm->allocated_queues[q->pipe] |= (1 << q->queue); } +#define SQ_IND_CMD_CMD_KILL 0x00000003 +#define SQ_IND_CMD_MODE_BROADCAST 0x00000001 + +static int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) +{ + int status = 0; + unsigned int vmid; + uint16_t queried_pasid; + union SQ_CMD_BITS reg_sq_cmd; + union GRBM_GFX_INDEX_BITS reg_gfx_index; + struct kfd_process_device *pdd; + int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; + int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; + + reg_sq_cmd.u32All = 0; + reg_gfx_index.u32All = 0; + + pr_debug("Killing all process wavefronts\n"); + + /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. + * ATC_VMID15_PASID_MAPPING + * to check which VMID the current process is mapped to. + */ + + for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { + status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info + (dev->adev, vmid, &queried_pasid); + + if (status && queried_pasid == p->pasid) { + pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", + vmid, p->pasid); + break; + } + } + + if (vmid > last_vmid_to_scan) { + pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid); + return -EFAULT; + } + + /* taking the VMID for that process on the safe way using PDD */ + pdd = kfd_get_process_device_data(dev, p); + if (!pdd) + return -EFAULT; + + reg_gfx_index.bits.sh_broadcast_writes = 1; + reg_gfx_index.bits.se_broadcast_writes = 1; + reg_gfx_index.bits.instance_broadcast_writes = 1; + reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; + reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; + reg_sq_cmd.bits.vm_id = vmid; + + dev->kfd2kgd->wave_control_execute(dev->adev, + reg_gfx_index.u32All, + reg_sq_cmd.u32All); + + return 0; +} + /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked * to avoid asynchronized access */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index a7d2e3323977..05a9c17daa3e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -39,6 +39,41 @@ struct device_process_node { struct list_head list; }; +union SQ_CMD_BITS { + struct { + uint32_t cmd:3; + uint32_t:1; + uint32_t mode:3; + uint32_t check_vmid:1; + uint32_t trap_id:3; + uint32_t:5; + uint32_t wave_id:4; + uint32_t simd_id:2; + uint32_t:2; + uint32_t queue_id:3; + uint32_t:1; + uint32_t vm_id:4; + } bitfields, bits; + uint32_t u32All; + signed int i32All; + float f32All; +}; + +union GRBM_GFX_INDEX_BITS { + struct { + uint32_t instance_index:8; + uint32_t sh_index:8; + uint32_t se_index:8; + uint32_t:5; + uint32_t sh_broadcast_writes:1; + uint32_t instance_broadcast_writes:1; + uint32_t se_broadcast_writes:1; + } bitfields, bits; + uint32_t u32All; + signed int i32All; + float f32All; +}; + /** * struct device_queue_manager_ops * diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c index 66ad8d0b8f7f..fe62407dacb2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c @@ -30,7 +30,6 @@ #include #include #include "kfd_priv.h" -#include "kfd_dbgmgr.h" #include "kfd_topology.h" #include "kfd_iommu.h" @@ -163,17 +162,6 @@ static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, u32 pasid) pr_debug("Unbinding process 0x%x from IOMMU\n", pasid); - mutex_lock(kfd_get_dbgmgr_mutex()); - - if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { - if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { - kfd_dbgmgr_destroy(dev->dbgmgr); - dev->dbgmgr = NULL; - } - } - - mutex_unlock(kfd_get_dbgmgr_mutex()); - mutex_lock(&p->mutex); pdd = kfd_get_process_device_data(dev, p); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 74fb4762c66e..b6790a637f5c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -300,9 +300,6 @@ struct kfd_dev { */ bool interrupts_active; - /* Debug manager */ - struct kfd_dbgmgr *dbgmgr; - /* Firmware versions */ uint16_t mec_fw_version; uint16_t mec2_fw_version; @@ -1331,8 +1328,6 @@ void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid); void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type); -int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); - bool kfd_is_locked(void); /* Compute profile */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 8e2780d2f735..8c6a48add76e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -40,7 +40,6 @@ struct mm_struct; #include "kfd_priv.h" #include "kfd_device_queue_manager.h" -#include "kfd_dbgmgr.h" #include "kfd_iommu.h" #include "kfd_svm.h" @@ -1158,7 +1157,6 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct kfd_process *p; - int i; /* * The kfd_process structure can not be free because the @@ -1178,23 +1176,6 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, mutex_lock(&p->mutex); - /* Iterate over all process device data structures and if the - * pdd is in debug mode, we should first force unregistration, - * then we will be able to destroy the queues - */ - for (i = 0; i < p->n_pdds; i++) { - struct kfd_dev *dev = p->pdds[i]->dev; - - mutex_lock(kfd_get_dbgmgr_mutex()); - if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { - if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { - kfd_dbgmgr_destroy(dev->dbgmgr); - dev->dbgmgr = NULL; - } - } - mutex_unlock(kfd_get_dbgmgr_mutex()); - } - kfd_process_dequeue_from_all_devices(p); pqm_uninit(&p->pqm); diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index ac941f62cbed..2f60cf35a444 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -269,18 +269,9 @@ struct kfd2kgd_calls { int (*hqd_sdma_destroy)(struct amdgpu_device *adev, void *mqd, unsigned int timeout); - int (*address_watch_disable)(struct amdgpu_device *adev); - int (*address_watch_execute)(struct amdgpu_device *adev, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo); int (*wave_control_execute)(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd); - uint32_t (*address_watch_get_offset)(struct amdgpu_device *adev, - unsigned int watch_point_id, - unsigned int reg_offset); bool (*get_atc_vmid_pasid_mapping_info)(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid); diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index e6a56c146920..6e4268f5e482 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -756,16 +756,16 @@ struct kfd_ioctl_set_xnack_mode_args { #define AMDKFD_IOC_WAIT_EVENTS \ AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args) -#define AMDKFD_IOC_DBG_REGISTER \ +#define AMDKFD_IOC_DBG_REGISTER_DEPRECATED \ AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args) -#define AMDKFD_IOC_DBG_UNREGISTER \ +#define AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED \ AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args) -#define AMDKFD_IOC_DBG_ADDRESS_WATCH \ +#define AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED \ AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args) -#define AMDKFD_IOC_DBG_WAVE_CONTROL \ +#define AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED \ AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args) #define AMDKFD_IOC_SET_SCRATCH_BACKING_VA \ -- cgit v1.2.3-59-g8ed1b