From c4381d0ee81930097e94e55d1c23f85798ffd093 Mon Sep 17 00:00:00 2001 From: Bokun Zhang Date: Wed, 12 Jan 2022 10:34:11 -0500 Subject: drm/amdgpu: Add interface to load SRIOV cap FW - Add interface to load SRIOV cap FW. If the FW does not exist, simply skip this FW loading routine. This FW will only be loaded under SRIOV. Other driver configuration will not be affected. By adding this interface, it will make us easier to prepare SRIOV Linux guest driver for different users. - Update sysfs interface to read cap FW version. - Refactor PSP FW loading routine under SRIOV to use a unified SWITCH statement instead of using IF statement - Remove redundant amdgpu_sriov_vf() check in FW loading routine Acked-by: Monk Liu Acked-by: Guchun Chen Signed-off-by: Bokun Zhang Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 0b94ec7b73e7..be4f9111f478 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -728,6 +728,8 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_FW_DMCUB 0x14 /* Subquery id: Query TOC firmware version */ #define AMDGPU_INFO_FW_TOC 0x15 + /* Subquery id: Query CAP firmware version */ + #define AMDGPU_INFO_FW_CAP 0x16 /* number of bytes moved for TTM migration */ #define AMDGPU_INFO_NUM_BYTES_MOVED 0x0f -- cgit v1.2.3-59-g8ed1b From 8cda7a4f96e435be2fd074009d69521d973d7d31 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 7 Jan 2022 17:57:33 -0500 Subject: drm/amdgpu/UAPI: add new CTX OP to get/set stable pstates Add a new CTX ioctl operation to set stable pstates for profiling. When creating traces for tools like RGP or using SPM or doing performance profiling, it's required to enable a special stable profiling power state on the GPU. These profiling states set fixed clocks and disable certain other power features like powergating which may impact the results. Historically, these profiling pstates were enabled via sysfs, but this adds an interface to enable it via the CTX ioctl from the application. Since the power state is global only one application can set it at a time, so if multiple applications try and use it only the first will get it, the ioctl will return -EBUSY for others. The sysfs interface will override whatever has been set by this interface. Mesa MR: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/207 v2: don't default r = 0; v3: rebase on Evan's PM cleanup Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 129 ++++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + drivers/gpu/drm/amd/pm/amdgpu_pm.c | 5 ++ drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 3 + include/uapi/drm/amdgpu_drm.h | 17 +++- 6 files changed, 153 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 468003583b2a..2c929fa40379 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -237,6 +237,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter); ctx->init_priority = priority; ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; + ctx->stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE; return 0; } @@ -255,6 +256,86 @@ static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) kfree(entity); } +static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, + u32 *stable_pstate) +{ + struct amdgpu_device *adev = ctx->adev; + enum amd_dpm_forced_level current_level; + + if (!ctx) + return -EINVAL; + + current_level = amdgpu_dpm_get_performance_level(adev); + + switch (current_level) { + case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: + *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_STANDARD; + break; + case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: + *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK; + break; + case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK: + *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK; + break; + case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: + *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_PEAK; + break; + default: + *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE; + break; + } + return 0; +} + +static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, + u32 stable_pstate) +{ + struct amdgpu_device *adev = ctx->adev; + enum amd_dpm_forced_level level; + int r; + + if (!ctx) + return -EINVAL; + + mutex_lock(&adev->pm.stable_pstate_ctx_lock); + if (adev->pm.stable_pstate_ctx && adev->pm.stable_pstate_ctx != ctx) { + r = -EBUSY; + goto done; + } + + switch (stable_pstate) { + case AMDGPU_CTX_STABLE_PSTATE_NONE: + level = AMD_DPM_FORCED_LEVEL_AUTO; + break; + case AMDGPU_CTX_STABLE_PSTATE_STANDARD: + level = AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD; + break; + case AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK: + level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK; + break; + case AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK: + level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK; + break; + case AMDGPU_CTX_STABLE_PSTATE_PEAK: + level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK; + break; + default: + r = -EINVAL; + goto done; + } + + r = amdgpu_dpm_force_performance_level(adev, level); + + if (level == AMD_DPM_FORCED_LEVEL_AUTO) + adev->pm.stable_pstate_ctx = NULL; + else + adev->pm.stable_pstate_ctx = ctx; +done: + mutex_unlock(&adev->pm.stable_pstate_ctx_lock); + + return r; +} + static void amdgpu_ctx_fini(struct kref *ref) { struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); @@ -270,7 +351,7 @@ static void amdgpu_ctx_fini(struct kref *ref) ctx->entities[i][j] = NULL; } } - + amdgpu_ctx_set_stable_pstate(ctx, AMDGPU_CTX_STABLE_PSTATE_NONE); mutex_destroy(&ctx->lock); kfree(ctx); } @@ -467,11 +548,41 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev, return 0; } + + +static int amdgpu_ctx_stable_pstate(struct amdgpu_device *adev, + struct amdgpu_fpriv *fpriv, uint32_t id, + bool set, u32 *stable_pstate) +{ + struct amdgpu_ctx *ctx; + struct amdgpu_ctx_mgr *mgr; + int r; + + if (!fpriv) + return -EINVAL; + + mgr = &fpriv->ctx_mgr; + mutex_lock(&mgr->lock); + ctx = idr_find(&mgr->ctx_handles, id); + if (!ctx) { + mutex_unlock(&mgr->lock); + return -EINVAL; + } + + if (set) + r = amdgpu_ctx_set_stable_pstate(ctx, *stable_pstate); + else + r = amdgpu_ctx_get_stable_pstate(ctx, stable_pstate); + + mutex_unlock(&mgr->lock); + return r; +} + int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { int r; - uint32_t id; + uint32_t id, stable_pstate; int32_t priority; union drm_amdgpu_ctx *args = data; @@ -500,6 +611,20 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, case AMDGPU_CTX_OP_QUERY_STATE2: r = amdgpu_ctx_query2(adev, fpriv, id, &args->out); break; + case AMDGPU_CTX_OP_GET_STABLE_PSTATE: + if (args->in.flags) + return -EINVAL; + r = amdgpu_ctx_stable_pstate(adev, fpriv, id, false, &stable_pstate); + args->out.pstate.flags = stable_pstate; + break; + case AMDGPU_CTX_OP_SET_STABLE_PSTATE: + if (args->in.flags & ~AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK) + return -EINVAL; + stable_pstate = args->in.flags & AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK; + if (stable_pstate > AMDGPU_CTX_STABLE_PSTATE_PEAK) + return -EINVAL; + r = amdgpu_ctx_stable_pstate(adev, fpriv, id, true, &stable_pstate); + break; default: return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index a44b8b8ed39c..142f2f87d44c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -53,6 +53,7 @@ struct amdgpu_ctx { atomic_t guilty; unsigned long ras_counter_ce; unsigned long ras_counter_ue; + uint32_t stable_pstate; }; struct amdgpu_ctx_mgr { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6ba57ad88640..660fb4085c97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3510,6 +3510,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, init_rwsem(&adev->reset_sem); mutex_init(&adev->psp.mutex); mutex_init(&adev->notifier_lock); + mutex_init(&adev->pm.stable_pstate_ctx_lock); amdgpu_device_init_apu_flags(adev); diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 837a31a46596..d68e7132da2c 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -336,11 +336,16 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev, return ret; } + mutex_lock(&adev->pm.stable_pstate_ctx_lock); if (amdgpu_dpm_force_performance_level(adev, level)) { pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); + mutex_unlock(&adev->pm.stable_pstate_ctx_lock); return -EINVAL; } + /* override whatever a user ctx may have set */ + adev->pm.stable_pstate_ctx = NULL; + mutex_unlock(&adev->pm.stable_pstate_ctx_lock); pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index 2766b88ecf96..5cc05110cdae 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -338,6 +338,9 @@ struct amdgpu_pm { uint32_t smu_debug_mask; bool pp_force_state_enabled; + + struct mutex stable_pstate_ctx_lock; + struct amdgpu_ctx *stable_pstate_ctx; }; u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev); diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index be4f9111f478..76b580d10a52 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -206,6 +206,8 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_OP_FREE_CTX 2 #define AMDGPU_CTX_OP_QUERY_STATE 3 #define AMDGPU_CTX_OP_QUERY_STATE2 4 +#define AMDGPU_CTX_OP_GET_STABLE_PSTATE 5 +#define AMDGPU_CTX_OP_SET_STABLE_PSTATE 6 /* GPU reset status */ #define AMDGPU_CTX_NO_RESET 0 @@ -238,10 +240,18 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_PRIORITY_HIGH 512 #define AMDGPU_CTX_PRIORITY_VERY_HIGH 1023 +/* select a stable profiling pstate for perfmon tools */ +#define AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK 0xf +#define AMDGPU_CTX_STABLE_PSTATE_NONE 0 +#define AMDGPU_CTX_STABLE_PSTATE_STANDARD 1 +#define AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK 2 +#define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK 3 +#define AMDGPU_CTX_STABLE_PSTATE_PEAK 4 + struct drm_amdgpu_ctx_in { /** AMDGPU_CTX_OP_* */ __u32 op; - /** For future use, no flags defined so far */ + /** Flags */ __u32 flags; __u32 ctx_id; /** AMDGPU_CTX_PRIORITY_* */ @@ -262,6 +272,11 @@ union drm_amdgpu_ctx_out { /** Reset status since the last call of the ioctl. */ __u32 reset_status; } state; + + struct { + __u32 flags; + __u32 _pad; + } pstate; }; union drm_amdgpu_ctx { -- cgit v1.2.3-59-g8ed1b From 06feec6005c9d9500cd286ec440aabf8b2ddd94d Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Jan 2022 22:50:39 +0300 Subject: ASoC: hdmi-codec: Fix OOB memory accesses Correct size of iec_status array by changing it to the size of status array of the struct snd_aes_iec958. This fixes out-of-bounds slab read accesses made by memcpy() of the hdmi-codec driver. This problem is reported by KASAN. Cc: stable@vger.kernel.org Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20220112195039.1329-1-digetx@gmail.com Signed-off-by: Mark Brown --- include/uapi/sound/asound.h | 4 +++- sound/soc/codecs/hdmi-codec.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index ff7e638221c5..228279ea0670 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -56,8 +56,10 @@ * * ****************************************************************************/ +#define AES_IEC958_STATUS_SIZE 24 + struct snd_aes_iec958 { - unsigned char status[24]; /* AES/IEC958 channel status bits */ + unsigned char status[AES_IEC958_STATUS_SIZE]; /* AES/IEC958 channel status bits */ unsigned char subcode[147]; /* AES/IEC958 subcode bits */ unsigned char pad; /* nothing */ unsigned char dig_subframe[4]; /* AES/IEC958 subframe bits */ diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c index b61f980cabdc..b07607a9ecea 100644 --- a/sound/soc/codecs/hdmi-codec.c +++ b/sound/soc/codecs/hdmi-codec.c @@ -277,7 +277,7 @@ struct hdmi_codec_priv { bool busy; struct snd_soc_jack *jack; unsigned int jack_status; - u8 iec_status[5]; + u8 iec_status[AES_IEC958_STATUS_SIZE]; }; static const struct snd_soc_dapm_widget hdmi_widgets[] = { -- cgit v1.2.3-59-g8ed1b From f6c6804c43fa18d3cee64b55490dfbd3bef1363a Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 28 Jan 2022 15:40:25 +0000 Subject: kvm: Move KVM_GET_XSAVE2 IOCTL definition at the end of kvm.h This way we can more easily find the next free IOCTL number when adding new IOCTLs. Fixes: be50b2065dfa ("kvm: x86: Add support for getting/setting expanded xstate buffer") Signed-off-by: Janosch Frank Message-Id: <20220128154025.102666-1-frankja@linux.ibm.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index b46bcdb0cab1..5191b57e1562 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1624,9 +1624,6 @@ struct kvm_enc_region { #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) -/* Available with KVM_CAP_XSAVE2 */ -#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) - struct kvm_s390_pv_sec_parm { __u64 origin; __u64 length; @@ -2048,4 +2045,7 @@ struct kvm_stats_desc { #define KVM_GET_STATS_FD _IO(KVMIO, 0xce) +/* Available with KVM_CAP_XSAVE2 */ +#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) + #endif /* __LINUX_KVM_H */ -- cgit v1.2.3-59-g8ed1b From ddecd22878601a606d160680fa85802b75d92eb6 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 31 Jan 2022 11:34:07 +0100 Subject: perf: uapi: Document perf_event_attr::sig_data truncation on 32 bit architectures Due to the alignment requirements of siginfo_t, as described in 3ddb3fd8cdb0 ("signal, perf: Fix siginfo_t by avoiding u64 on 32-bit architectures"), siginfo_t::si_perf_data is limited to an unsigned long. However, perf_event_attr::sig_data is an u64, to avoid having to deal with compat conversions. Due to being an u64, it may not immediately be clear to users that sig_data is truncated on 32 bit architectures. Add a comment to explicitly point this out, and hopefully help some users save time by not having to deduce themselves what's happening. Reported-by: Dmitry Vyukov Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dmitry Vyukov Link: https://lore.kernel.org/r/20220131103407.1971678-3-elver@google.com --- include/uapi/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 1b65042ab1db..82858b697c05 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -465,6 +465,8 @@ struct perf_event_attr { /* * User provided data if sigtrap=1, passed back to user via * siginfo_t::si_perf_data, e.g. to permit user to identify the event. + * Note, siginfo_t::si_perf_data is long-sized, and sig_data will be + * truncated accordingly on 32 bit architectures. */ __u64 sig_data; }; -- cgit v1.2.3-59-g8ed1b From c86d86131ab75696fc52d98571148842e067d620 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Wed, 2 Feb 2022 06:09:04 +0300 Subject: Partially revert "net/smc: Add netlink net namespace support" The change of sizeof(struct smc_diag_linkinfo) by commit 79d39fc503b4 ("net/smc: Add netlink net namespace support") introduced an ABI regression: since struct smc_diag_lgrinfo contains an object of type "struct smc_diag_linkinfo", offset of all subsequent members of struct smc_diag_lgrinfo was changed by that change. As result, applications compiled with the old version of struct smc_diag_linkinfo will receive garbage in struct smc_diag_lgrinfo.role if the kernel implements this new version of struct smc_diag_linkinfo. Fix this regression by reverting the part of commit 79d39fc503b4 that changes struct smc_diag_linkinfo. After all, there is SMC_GEN_NETLINK interface which is good enough, so there is probably no need to touch the smc_diag ABI in the first place. Fixes: 79d39fc503b4 ("net/smc: Add netlink net namespace support") Signed-off-by: Dmitry V. Levin Reviewed-by: Karsten Graul Link: https://lore.kernel.org/r/20220202030904.GA9742@altlinux.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/smc_diag.h | 11 +++++------ net/smc/smc_diag.c | 2 -- 2 files changed, 5 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h index c7008d87f1a4..8cb3a6fef553 100644 --- a/include/uapi/linux/smc_diag.h +++ b/include/uapi/linux/smc_diag.h @@ -84,12 +84,11 @@ struct smc_diag_conninfo { /* SMC_DIAG_LINKINFO */ struct smc_diag_linkinfo { - __u8 link_id; /* link identifier */ - __u8 ibname[IB_DEVICE_NAME_MAX]; /* name of the RDMA device */ - __u8 ibport; /* RDMA device port number */ - __u8 gid[40]; /* local GID */ - __u8 peer_gid[40]; /* peer GID */ - __aligned_u64 net_cookie; /* RDMA device net namespace */ + __u8 link_id; /* link identifier */ + __u8 ibname[IB_DEVICE_NAME_MAX]; /* name of the RDMA device */ + __u8 ibport; /* RDMA device port number */ + __u8 gid[40]; /* local GID */ + __u8 peer_gid[40]; /* peer GID */ }; struct smc_diag_lgrinfo { diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index b8898c787d23..1fca2f90a9c7 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -146,13 +146,11 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, (req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && !list_empty(&smc->conn.lgr->list)) { struct smc_link *link = smc->conn.lnk; - struct net *net = read_pnet(&link->smcibdev->ibdev->coredev.rdma_net); struct smc_diag_lgrinfo linfo = { .role = smc->conn.lgr->role, .lnk[0].ibport = link->ibport, .lnk[0].link_id = link->link_id, - .lnk[0].net_cookie = net->net_cookie, }; memcpy(linfo.lnk[0].ibname, -- cgit v1.2.3-59-g8ed1b From 164666fa66669d437bdcc8d5f1744a2aee73be41 Mon Sep 17 00:00:00 2001 From: Demi Marie Obenour Date: Mon, 31 Jan 2022 12:23:07 -0500 Subject: Improve docs for IOCTL_GNTDEV_MAP_GRANT_REF --------------cKY3Ggs6VDUCSn4I6iN78sHA Content-Type: multipart/mixed; boundary="------------g0T69ASidFiPhh4eOY4XzIg1" --------------g0T69ASidFiPhh4eOY4XzIg1 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: quoted-printable The current implementation of gntdev guarantees that the first call to IOCTL_GNTDEV_MAP_GRANT_REF will set @index to 0. This is required to use gntdev for Wayland, which is a future desire of Qubes OS. Additionally, requesting zero grants results in an error, but this was not documented either. Document both of these. Signed-off-by: Demi Marie Obenour Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/f66c5a4e-2034-00b5-a635-6983bd999c07@gmail.com Signed-off-by: Juergen Gross --- include/uapi/xen/gntdev.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/xen/gntdev.h b/include/uapi/xen/gntdev.h index 9ac5515b9bc2..7a7145395c09 100644 --- a/include/uapi/xen/gntdev.h +++ b/include/uapi/xen/gntdev.h @@ -47,7 +47,13 @@ struct ioctl_gntdev_grant_ref { /* * Inserts the grant references into the mapping table of an instance * of gntdev. N.B. This does not perform the mapping, which is deferred - * until mmap() is called with @index as the offset. + * until mmap() is called with @index as the offset. @index should be + * considered opaque to userspace, with one exception: if no grant + * references have ever been inserted into the mapping table of this + * instance, @index will be set to 0. This is necessary to use gntdev + * with userspace APIs that expect a file descriptor that can be + * mmap()'d at offset 0, such as Wayland. If @count is set to 0, this + * ioctl will fail. */ #define IOCTL_GNTDEV_MAP_GRANT_REF \ _IOC(_IOC_NONE, 'G', 0, sizeof(struct ioctl_gntdev_map_grant_ref)) -- cgit v1.2.3-59-g8ed1b From d1ca60efc53d665cf89ed847a14a510a81770b81 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 2 Feb 2022 12:00:56 +0100 Subject: netfilter: ctnetlink: disable helper autoassign When userspace, e.g. conntrackd, inserts an entry with a specified helper, its possible that the helper is lost immediately after its added: ctnetlink_create_conntrack -> nf_ct_helper_ext_add + assign helper -> ctnetlink_setup_nat -> ctnetlink_parse_nat_setup -> parse_nat_setup -> nfnetlink_parse_nat_setup -> nf_nat_setup_info -> nf_conntrack_alter_reply -> __nf_ct_try_assign_helper ... and __nf_ct_try_assign_helper will zero the helper again. Set IPS_HELPER bit to bypass auto-assign logic, its unwanted, just like when helper is assigned via ruleset. Dropped old 'not strictly necessary' comment, it referred to use of rcu_assign_pointer() before it got replaced by RCU_INIT_POINTER(). NB: Fixes tag intentionally incorrect, this extends the referenced commit, but this change won't build without IPS_HELPER introduced there. Fixes: 6714cf5465d280 ("netfilter: nf_conntrack: fix explicit helper attachment and NAT") Reported-by: Pham Thanh Tuyen Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_common.h | 2 +- net/netfilter/nf_conntrack_netlink.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 4b3395082d15..26071021e986 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -106,7 +106,7 @@ enum ip_conntrack_status { IPS_NAT_CLASH = IPS_UNTRACKED, #endif - /* Conntrack got a helper explicitly attached via CT target. */ + /* Conntrack got a helper explicitly attached (ruleset, ctnetlink). */ IPS_HELPER_BIT = 13, IPS_HELPER = (1 << IPS_HELPER_BIT), diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index ac438370f94a..7032402ffd33 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2311,7 +2311,8 @@ ctnetlink_create_conntrack(struct net *net, if (helper->from_nlattr) helper->from_nlattr(helpinfo, ct); - /* not in hash table yet so not strictly necessary */ + /* disable helper auto-assignment for this entry */ + ct->status |= IPS_HELPER; RCU_INIT_POINTER(help->helper, helper); } } else { -- cgit v1.2.3-59-g8ed1b From a3574119826d9a4ef807fb973cf5150c3b90da43 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Thu, 3 Feb 2022 13:38:06 +0000 Subject: drm: document struct drm_mode_fb_cmd2 Follow-up for the DRM_IOCTL_MODE_GETFB2 docs. v2: (Daniel Stone) - Replace fourcc.org with drm_fourcc.h because this is the authoritative source and the website may have mismatches. - Drop assumption that offsets will generally be 0. - Mention that unused entries must be zero'ed out. v3: (Pekka) - Mention that a handle can be re-used - Add unit for pitches/offsets Signed-off-by: Simon Ser Reviewed-by: Daniel Vetter Acked-by: Pekka Paalanen Cc: Daniel Stone Reviewed-by: Daniel Stone Link: https://patchwork.freedesktop.org/patch/msgid/20220203133753.261507-1-contact@emersion.fr --- include/uapi/drm/drm_mode.h | 88 ++++++++++++++++++++++++++++++--------------- 1 file changed, 60 insertions(+), 28 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index e1e351682872..0a0d56a6158e 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -663,41 +663,73 @@ struct drm_mode_fb_cmd { #define DRM_MODE_FB_INTERLACED (1<<0) /* for interlaced framebuffers */ #define DRM_MODE_FB_MODIFIERS (1<<1) /* enables ->modifer[] */ +/** + * struct drm_mode_fb_cmd2 - Frame-buffer metadata. + * + * This struct holds frame-buffer metadata. There are two ways to use it: + * + * - User-space can fill this struct and perform a &DRM_IOCTL_MODE_ADDFB2 + * ioctl to register a new frame-buffer. The new frame-buffer object ID will + * be set by the kernel in @fb_id. + * - User-space can set @fb_id and perform a &DRM_IOCTL_MODE_GETFB2 ioctl to + * fetch metadata about an existing frame-buffer. + * + * In case of planar formats, this struct allows up to 4 buffer objects with + * offsets and pitches per plane. The pitch and offset order is dictated by the + * format FourCC as defined by ``drm_fourcc.h``, e.g. NV12 is described as: + * + * YUV 4:2:0 image with a plane of 8 bit Y samples followed by an + * interleaved U/V plane containing 8 bit 2x2 subsampled colour difference + * samples. + * + * So it would consist of a Y plane at ``offsets[0]`` and a UV plane at + * ``offsets[1]``. + * + * To accommodate tiled, compressed, etc formats, a modifier can be specified. + * For more information see the "Format Modifiers" section. Note that even + * though it looks like we have a modifier per-plane, we in fact do not. The + * modifier for each plane must be identical. Thus all combinations of + * different data layouts for multi-plane formats must be enumerated as + * separate modifiers. + * + * All of the entries in @handles, @pitches, @offsets and @modifier must be + * zero when unused. Warning, for @offsets and @modifier zero can't be used to + * figure out whether the entry is used or not since it's a valid value (a zero + * offset is common, and a zero modifier is &DRM_FORMAT_MOD_LINEAR). + */ struct drm_mode_fb_cmd2 { + /** @fb_id: Object ID of the frame-buffer. */ __u32 fb_id; + /** @width: Width of the frame-buffer. */ __u32 width; + /** @height: Height of the frame-buffer. */ __u32 height; - __u32 pixel_format; /* fourcc code from drm_fourcc.h */ - __u32 flags; /* see above flags */ + /** + * @pixel_format: FourCC format code, see ``DRM_FORMAT_*`` constants in + * ``drm_fourcc.h``. + */ + __u32 pixel_format; + /** + * @flags: Frame-buffer flags (see &DRM_MODE_FB_INTERLACED and + * &DRM_MODE_FB_MODIFIERS). + */ + __u32 flags; - /* - * In case of planar formats, this ioctl allows up to 4 - * buffer objects with offsets and pitches per plane. - * The pitch and offset order is dictated by the fourcc, - * e.g. NV12 (https://fourcc.org/yuv.php#NV12) is described as: - * - * YUV 4:2:0 image with a plane of 8 bit Y samples - * followed by an interleaved U/V plane containing - * 8 bit 2x2 subsampled colour difference samples. - * - * So it would consist of Y as offsets[0] and UV as - * offsets[1]. Note that offsets[0] will generally - * be 0 (but this is not required). - * - * To accommodate tiled, compressed, etc formats, a - * modifier can be specified. The default value of zero - * indicates "native" format as specified by the fourcc. - * Vendor specific modifier token. Note that even though - * it looks like we have a modifier per-plane, we in fact - * do not. The modifier for each plane must be identical. - * Thus all combinations of different data layouts for - * multi plane formats must be enumerated as separate - * modifiers. + /** + * @handles: GEM buffer handle, one per plane. Set to 0 if the plane is + * unused. The same handle can be used for multiple planes. */ __u32 handles[4]; - __u32 pitches[4]; /* pitch for each plane */ - __u32 offsets[4]; /* offset of each plane */ - __u64 modifier[4]; /* ie, tiling, compress */ + /** @pitches: Pitch (aka. stride) in bytes, one per plane. */ + __u32 pitches[4]; + /** @offsets: Offset into the buffer in bytes, one per plane. */ + __u32 offsets[4]; + /** + * @modifier: Format modifier, one per plane. See ``DRM_FORMAT_MOD_*`` + * constants in ``drm_fourcc.h``. All planes must use the same + * modifier. Ignored unless &DRM_MODE_FB_MODIFIERS is set in @flags. + */ + __u64 modifier[4]; }; #define DRM_MODE_FB_DIRTY_ANNOTATE_COPY 0x01 -- cgit v1.2.3-59-g8ed1b From 3698807094ecae945436921325f5c309d1123f11 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Tue, 24 Aug 2021 16:13:41 -0400 Subject: drm/amdkfd: CRIU Introduce Checkpoint-Restore APIs Checkpoint-Restore in userspace (CRIU) is a powerful tool that can snapshot a running process and later restore it on same or a remote machine but expects the processes that have a device file (e.g. GPU) associated with them, provide necessary driver support to assist CRIU and its extensible plugin interface. Thus, In order to support the Checkpoint-Restore of any ROCm process, the AMD Radeon Open Compute Kernel driver, needs to provide a set of new APIs that provide necessary VRAM metadata and its contents to a userspace component (CRIU plugin) that can store it in form of image files. This introduces some new ioctls which will be used to checkpoint-Restore any KFD bound user process. KFD only allows ioctl calls from the same process that opened the KFD file descriptor. Since these ioctls are expected to be called from a KFD criu plugin which has elevated ptrace attached privileges and CAP_CHECKPOINT_RESTORE capabilities attached with the file descriptors so modify KFD to allow such calls. (API redesigned by David Yat Sin) Suggested-by: Felix Kuehling Reviewed-by: Felix Kuehling Signed-off-by: David Yat Sin Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 98 +++++++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 65 ++++++++++++++++++++- include/uapi/linux/kfd_ioctl.h | 81 +++++++++++++++++++++++++- 3 files changed, 241 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 214a2c67fba4..90e6d9e335a5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include "kfd_priv.h" @@ -1859,6 +1860,75 @@ static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) } #endif +static int criu_checkpoint(struct file *filep, + struct kfd_process *p, + struct kfd_ioctl_criu_args *args) +{ + return 0; +} + +static int criu_restore(struct file *filep, + struct kfd_process *p, + struct kfd_ioctl_criu_args *args) +{ + return 0; +} + +static int criu_unpause(struct file *filep, + struct kfd_process *p, + struct kfd_ioctl_criu_args *args) +{ + return 0; +} + +static int criu_resume(struct file *filep, + struct kfd_process *p, + struct kfd_ioctl_criu_args *args) +{ + return 0; +} + +static int criu_process_info(struct file *filep, + struct kfd_process *p, + struct kfd_ioctl_criu_args *args) +{ + return 0; +} + +static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data) +{ + struct kfd_ioctl_criu_args *args = data; + int ret; + + dev_dbg(kfd_device, "CRIU operation: %d\n", args->op); + switch (args->op) { + case KFD_CRIU_OP_PROCESS_INFO: + ret = criu_process_info(filep, p, args); + break; + case KFD_CRIU_OP_CHECKPOINT: + ret = criu_checkpoint(filep, p, args); + break; + case KFD_CRIU_OP_UNPAUSE: + ret = criu_unpause(filep, p, args); + break; + case KFD_CRIU_OP_RESTORE: + ret = criu_restore(filep, p, args); + break; + case KFD_CRIU_OP_RESUME: + ret = criu_resume(filep, p, args); + break; + default: + dev_dbg(kfd_device, "Unsupported CRIU operation:%d\n", args->op); + ret = -EINVAL; + break; + } + + if (ret) + dev_dbg(kfd_device, "CRIU operation:%d err:%d\n", args->op, ret); + + return ret; +} + #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ .cmd_drv = 0, .name = #ioctl} @@ -1962,6 +2032,10 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE, kfd_ioctl_set_xnack_mode, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP, + kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE), + }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) @@ -1976,6 +2050,7 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) char *kdata = NULL; unsigned int usize, asize; int retcode = -EINVAL; + bool ptrace_attached = false; if (nr >= AMDKFD_CORE_IOCTL_COUNT) goto err_i1; @@ -2001,7 +2076,15 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) * processes need to create their own KFD device context. */ process = filep->private_data; - if (process->lead_thread != current->group_leader) { + + rcu_read_lock(); + if ((ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE) && + ptrace_parent(process->lead_thread) == current) + ptrace_attached = true; + rcu_read_unlock(); + + if (process->lead_thread != current->group_leader + && !ptrace_attached) { dev_dbg(kfd_device, "Using KFD FD in wrong process\n"); retcode = -EBADF; goto err_i1; @@ -2016,6 +2099,19 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) goto err_i1; } + /* + * Versions of docker shipped in Ubuntu 18.xx and 20.xx do not support + * CAP_CHECKPOINT_RESTORE, so we also allow access if CAP_SYS_ADMIN as CAP_SYS_ADMIN is a + * more priviledged access. + */ + if (unlikely(ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE)) { + if (!capable(CAP_CHECKPOINT_RESTORE) && + !capable(CAP_SYS_ADMIN)) { + retcode = -EACCES; + goto err_i1; + } + } + if (cmd & (IOC_IN | IOC_OUT)) { if (asize <= sizeof(stack_kdata)) { kdata = stack_kdata; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index ea68f3b3a4e9..f928878196ef 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -121,7 +121,26 @@ */ #define KFD_QUEUE_DOORBELL_MIRROR_OFFSET 512 - +/** + * enum kfd_ioctl_flags - KFD ioctl flags + * Various flags that can be set in &amdkfd_ioctl_desc.flags to control how + * userspace can use a given ioctl. + */ +enum kfd_ioctl_flags { + /* + * @KFD_IOC_FLAG_CHECKPOINT_RESTORE: + * Certain KFD ioctls such as AMDKFD_IOC_CRIU_OP can potentially + * perform privileged operations and load arbitrary data into MQDs and + * eventually HQD registers when the queue is mapped by HWS. In order to + * prevent this we should perform additional security checks. + * + * This is equivalent to callers with the CHECKPOINT_RESTORE capability. + * + * Note: Since earlier versions of docker do not support CHECKPOINT_RESTORE, + * we also allow ioctls with SYS_ADMIN capability. + */ + KFD_IOC_FLAG_CHECKPOINT_RESTORE = BIT(0), +}; /* * Kernel module parameter to specify maximum number of supported queues per * device @@ -1006,6 +1025,50 @@ void kfd_process_set_trap_handler(struct qcm_process_device *qpd, uint64_t tba_addr, uint64_t tma_addr); +/* CRIU */ +/* + * Need to increment KFD_CRIU_PRIV_VERSION each time a change is made to any of the CRIU private + * structures: + * kfd_criu_process_priv_data + * kfd_criu_device_priv_data + * kfd_criu_bo_priv_data + * kfd_criu_queue_priv_data + * kfd_criu_event_priv_data + * kfd_criu_svm_range_priv_data + */ + +#define KFD_CRIU_PRIV_VERSION 1 + +struct kfd_criu_process_priv_data { + uint32_t version; +}; + +struct kfd_criu_device_priv_data { + /* For future use */ + uint64_t reserved; +}; + +struct kfd_criu_bo_priv_data { + uint64_t reserved; +}; + +struct kfd_criu_svm_range_priv_data { + uint32_t object_type; + uint32_t reserved; +}; + +struct kfd_criu_queue_priv_data { + uint32_t object_type; + uint32_t reserved; +}; + +struct kfd_criu_event_priv_data { + uint32_t object_type; + uint32_t reserved; +}; + +/* CRIU - End */ + /* Queue Context Management */ int init_queue(struct queue **q, const struct queue_properties *properties); void uninit_queue(struct queue *q); diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index af96af174dc4..49429a6c42fc 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -468,6 +468,82 @@ struct kfd_ioctl_smi_events_args { __u32 anon_fd; /* from KFD */ }; +/************************************************************************************************** + * CRIU IOCTLs (Checkpoint Restore In Userspace) + * + * When checkpointing a process, the userspace application will perform: + * 1. PROCESS_INFO op to determine current process information. This pauses execution and evicts + * all the queues. + * 2. CHECKPOINT op to checkpoint process contents (BOs, queues, events, svm-ranges) + * 3. UNPAUSE op to un-evict all the queues + * + * When restoring a process, the CRIU userspace application will perform: + * + * 1. RESTORE op to restore process contents + * 2. RESUME op to start the process + * + * Note: Queues are forced into an evicted state after a successful PROCESS_INFO. User + * application needs to perform an UNPAUSE operation after calling PROCESS_INFO. + */ + +enum kfd_criu_op { + KFD_CRIU_OP_PROCESS_INFO, + KFD_CRIU_OP_CHECKPOINT, + KFD_CRIU_OP_UNPAUSE, + KFD_CRIU_OP_RESTORE, + KFD_CRIU_OP_RESUME, +}; + +/** + * kfd_ioctl_criu_args - Arguments perform CRIU operation + * @devices: [in/out] User pointer to memory location for devices information. + * This is an array of type kfd_criu_device_bucket. + * @bos: [in/out] User pointer to memory location for BOs information + * This is an array of type kfd_criu_bo_bucket. + * @priv_data: [in/out] User pointer to memory location for private data + * @priv_data_size: [in/out] Size of priv_data in bytes + * @num_devices: [in/out] Number of GPUs used by process. Size of @devices array. + * @num_bos [in/out] Number of BOs used by process. Size of @bos array. + * @num_objects: [in/out] Number of objects used by process. Objects are opaque to + * user application. + * @pid: [in/out] PID of the process being checkpointed + * @op [in] Type of operation (kfd_criu_op) + * + * Return: 0 on success, -errno on failure + */ +struct kfd_ioctl_criu_args { + __u64 devices; /* Used during ops: CHECKPOINT, RESTORE */ + __u64 bos; /* Used during ops: CHECKPOINT, RESTORE */ + __u64 priv_data; /* Used during ops: CHECKPOINT, RESTORE */ + __u64 priv_data_size; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 num_devices; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 num_bos; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 num_objects; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 pid; /* Used during ops: PROCESS_INFO, RESUME */ + __u32 op; +}; + +struct kfd_criu_device_bucket { + __u32 user_gpu_id; + __u32 actual_gpu_id; + __u32 drm_fd; + __u32 pad; +}; + +struct kfd_criu_bo_bucket { + __u64 addr; + __u64 size; + __u64 offset; + __u64 restored_offset; /* During restore, updated offset for BO */ + __u32 gpu_id; /* This is the user_gpu_id */ + __u32 alloc_flags; + __u32 dmabuf_fd; + __u32 pad; +}; + +/* CRIU IOCTLs - END */ +/**************************************************************************************************/ + /* Register offset inside the remapped mmio page */ enum kfd_mmio_remap { @@ -742,7 +818,10 @@ struct kfd_ioctl_set_xnack_mode_args { #define AMDKFD_IOC_SET_XNACK_MODE \ AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args) +#define AMDKFD_IOC_CRIU_OP \ + AMDKFD_IOWR(0x22, struct kfd_ioctl_criu_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x22 +#define AMDKFD_COMMAND_END 0x23 #endif -- cgit v1.2.3-59-g8ed1b From 692996f2bef7aa1737e07554255ba0d9a73fb750 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Tue, 18 Jan 2022 01:47:58 -0500 Subject: drm/amdkfd: Bump up KFD API version for CRIU - Change KFD minor version to 7 for CRIU Proposed userspace changes: https://github.com/RadeonOpenCompute/criu Reviewed-by: Felix Kuehling Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 49429a6c42fc..e6a56c146920 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -32,9 +32,10 @@ * - 1.4 - Indicate new SRAM EDC bit in device properties * - 1.5 - Add SVM API * - 1.6 - Query clear flags in SVM get_attr API + * - 1.7 - Checkpoint Restore (CRIU) API */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 6 +#define KFD_IOCTL_MINOR_VERSION 7 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ -- cgit v1.2.3-59-g8ed1b From 5bdd3eb253544b1e80f904e1205699d0a126d2d6 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Fri, 4 Feb 2022 11:58:32 -0500 Subject: drm/amdkfd: Remove unused old debugger implementation Cleanup the kfd code by removing the unused old debugger implementation. The address watch was only ever implemented in the upstream driver for GFXv7 (Kaveri). The user mode tools runtime using this API was never open-sourced. Work on the old debugger prototype that used this API has been discontinued years ago. Only a small piece of resetting wavefronts is kept and is moved to kfd_device_queue_manager.c. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 3 - .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 3 - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 24 - .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c | 25 - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 96 --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 24 - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 24 - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 10 - drivers/gpu/drm/amd/amdkfd/Makefile | 2 - drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 290 +------ drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 845 --------------------- drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h | 230 ------ drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c | 158 ---- drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h | 293 ------- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 2 - .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 59 ++ .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 35 + drivers/gpu/drm/amd/amdkfd/kfd_iommu.c | 12 - drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 - drivers/gpu/drm/amd/amdkfd/kfd_process.c | 19 - drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 9 - include/uapi/linux/kfd_ioctl.h | 8 +- 22 files changed, 106 insertions(+), 2070 deletions(-) delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c index 46cd4ee6bafb..c8935d718207 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c @@ -37,10 +37,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = { .hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied, .hqd_destroy = kgd_gfx_v9_hqd_destroy, .hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy, - .address_watch_disable = kgd_gfx_v9_address_watch_disable, - .address_watch_execute = kgd_gfx_v9_address_watch_execute, .wave_control_execute = kgd_gfx_v9_wave_control_execute, - .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, .get_atc_vmid_pasid_mapping_info = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index abe93b3ff765..4191af5a3f13 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -289,10 +289,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { .hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied, .hqd_destroy = kgd_gfx_v9_hqd_destroy, .hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy, - .address_watch_disable = kgd_gfx_v9_address_watch_disable, - .address_watch_execute = kgd_gfx_v9_address_watch_execute, .wave_control_execute = kgd_gfx_v9_wave_control_execute, - .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, .get_atc_vmid_pasid_mapping_info = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 7b7f4b2764c1..9378fc79e9ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -671,20 +671,6 @@ static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -static int kgd_address_watch_disable(struct amdgpu_device *adev) -{ - return 0; -} - -static int kgd_address_watch_execute(struct amdgpu_device *adev, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo) -{ - return 0; -} - static int kgd_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd) @@ -709,13 +695,6 @@ static int kgd_wave_control_execute(struct amdgpu_device *adev, return 0; } -static uint32_t kgd_address_watch_get_offset(struct amdgpu_device *adev, - unsigned int watch_point_id, - unsigned int reg_offset) -{ - return 0; -} - static void set_vm_context_page_table_base(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { @@ -767,10 +746,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, .hqd_destroy = kgd_hqd_destroy, .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = set_vm_context_page_table_base, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c index 1f37d3574001..e9c80ce13f3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c @@ -582,21 +582,6 @@ static int hqd_sdma_destroy_v10_3(struct amdgpu_device *adev, void *mqd, return 0; } - -static int address_watch_disable_v10_3(struct amdgpu_device *adev) -{ - return 0; -} - -static int address_watch_execute_v10_3(struct amdgpu_device *adev, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo) -{ - return 0; -} - static int wave_control_execute_v10_3(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd) @@ -621,13 +606,6 @@ static int wave_control_execute_v10_3(struct amdgpu_device *adev, return 0; } -static uint32_t address_watch_get_offset_v10_3(struct amdgpu_device *adev, - unsigned int watch_point_id, - unsigned int reg_offset) -{ - return 0; -} - static void set_vm_context_page_table_base_v10_3(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { @@ -809,10 +787,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { .hqd_sdma_is_occupied = hqd_sdma_is_occupied_v10_3, .hqd_destroy = hqd_destroy_v10_3, .hqd_sdma_destroy = hqd_sdma_destroy_v10_3, - .address_watch_disable = address_watch_disable_v10_3, - .address_watch_execute = address_watch_execute_v10_3, .wave_control_execute = wave_control_execute_v10_3, - .address_watch_get_offset = address_watch_get_offset_v10_3, .get_atc_vmid_pasid_mapping_info = NULL, .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3, .program_trap_handler_settings = program_trap_handler_settings_v10_3, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 36528dad7684..65552bb7d2f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -45,43 +45,6 @@ enum { MAX_WATCH_ADDRESSES = 4 }; -enum { - ADDRESS_WATCH_REG_ADDR_HI = 0, - ADDRESS_WATCH_REG_ADDR_LO, - ADDRESS_WATCH_REG_CNTL, - ADDRESS_WATCH_REG_MAX -}; - -/* not defined in the CI/KV reg file */ -enum { - ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL, - ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF, - ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000, - /* extend the mask to 26 bits to match the low address field */ - ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6, - ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF -}; - -static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = { - mmTCP_WATCH0_ADDR_H, mmTCP_WATCH0_ADDR_L, mmTCP_WATCH0_CNTL, - mmTCP_WATCH1_ADDR_H, mmTCP_WATCH1_ADDR_L, mmTCP_WATCH1_CNTL, - mmTCP_WATCH2_ADDR_H, mmTCP_WATCH2_ADDR_L, mmTCP_WATCH2_CNTL, - mmTCP_WATCH3_ADDR_H, mmTCP_WATCH3_ADDR_L, mmTCP_WATCH3_CNTL -}; - -union TCP_WATCH_CNTL_BITS { - struct { - uint32_t mask:24; - uint32_t vmid:4; - uint32_t atc:1; - uint32_t mode:2; - uint32_t valid:1; - } bitfields, bits; - uint32_t u32All; - signed int i32All; - float f32All; -}; - static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe, uint32_t queue, uint32_t vmid) { @@ -529,55 +492,6 @@ static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd, return 0; } -static int kgd_address_watch_disable(struct amdgpu_device *adev) -{ - union TCP_WATCH_CNTL_BITS cntl; - unsigned int i; - - cntl.u32All = 0; - - cntl.bitfields.valid = 0; - cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; - cntl.bitfields.atc = 1; - - /* Turning off this address until we set all the registers */ - for (i = 0; i < MAX_WATCH_ADDRESSES; i++) - WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_CNTL], cntl.u32All); - - return 0; -} - -static int kgd_address_watch_execute(struct amdgpu_device *adev, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo) -{ - union TCP_WATCH_CNTL_BITS cntl; - - cntl.u32All = cntl_val; - - /* Turning off this watch point until we set all the registers */ - cntl.bitfields.valid = 0; - WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_CNTL], cntl.u32All); - - WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_ADDR_HI], addr_hi); - - WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_ADDR_LO], addr_lo); - - /* Enable the watch point */ - cntl.bitfields.valid = 1; - - WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_CNTL], cntl.u32All); - - return 0; -} - static int kgd_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd) @@ -602,13 +516,6 @@ static int kgd_wave_control_execute(struct amdgpu_device *adev, return 0; } -static uint32_t kgd_address_watch_get_offset(struct amdgpu_device *adev, - unsigned int watch_point_id, - unsigned int reg_offset) -{ - return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]; -} - static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid) { @@ -665,10 +572,7 @@ const struct kfd2kgd_calls gfx_v7_kfd2kgd = { .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, .hqd_destroy = kgd_hqd_destroy, .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info, .set_scratch_backing_va = set_scratch_backing_va, .set_vm_context_page_table_base = set_vm_context_page_table_base, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 52832cd69a93..9dc5f2a0cc07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -538,20 +538,6 @@ static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -static int kgd_address_watch_disable(struct amdgpu_device *adev) -{ - return 0; -} - -static int kgd_address_watch_execute(struct amdgpu_device *adev, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo) -{ - return 0; -} - static int kgd_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd) @@ -576,13 +562,6 @@ static int kgd_wave_control_execute(struct amdgpu_device *adev, return 0; } -static uint32_t kgd_address_watch_get_offset(struct amdgpu_device *adev, - unsigned int watch_point_id, - unsigned int reg_offset) -{ - return 0; -} - static void set_scratch_backing_va(struct amdgpu_device *adev, uint64_t va, uint32_t vmid) { @@ -614,10 +593,7 @@ const struct kfd2kgd_calls gfx_v8_kfd2kgd = { .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, .hqd_destroy = kgd_hqd_destroy, .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info, .set_scratch_backing_va = set_scratch_backing_va, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 1abf662a0e91..53895a41932e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -622,20 +622,6 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -int kgd_gfx_v9_address_watch_disable(struct amdgpu_device *adev) -{ - return 0; -} - -int kgd_gfx_v9_address_watch_execute(struct amdgpu_device *adev, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo) -{ - return 0; -} - int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd) @@ -660,13 +646,6 @@ int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev, return 0; } -uint32_t kgd_gfx_v9_address_watch_get_offset(struct amdgpu_device *adev, - unsigned int watch_point_id, - unsigned int reg_offset) -{ - return 0; -} - void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { @@ -888,10 +867,7 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, .hqd_destroy = kgd_gfx_v9_hqd_destroy, .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_gfx_v9_address_watch_disable, - .address_watch_execute = kgd_gfx_v9_address_watch_execute, .wave_control_execute = kgd_gfx_v9_wave_control_execute, - .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, .get_atc_vmid_pasid_mapping_info = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index 24be49df26fd..c7ed3bc9053c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -46,19 +46,9 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id); -int kgd_gfx_v9_address_watch_disable(struct amdgpu_device *adev); -int kgd_gfx_v9_address_watch_execute(struct amdgpu_device *adev, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo); int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd); -uint32_t kgd_gfx_v9_address_watch_get_offset(struct amdgpu_device *adev, - unsigned int watch_point_id, - unsigned int reg_offset); - bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid); diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index c4f3aff11072..19cfbf9577b4 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -51,8 +51,6 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ $(AMDKFD_PATH)/kfd_events.o \ $(AMDKFD_PATH)/cik_event_interrupt.o \ $(AMDKFD_PATH)/kfd_int_process_v9.o \ - $(AMDKFD_PATH)/kfd_dbgdev.o \ - $(AMDKFD_PATH)/kfd_dbgmgr.o \ $(AMDKFD_PATH)/kfd_smi_events.o \ $(AMDKFD_PATH)/kfd_crat.o diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 64e3b4e3a712..13e46631e289 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -39,7 +39,6 @@ #include #include "kfd_priv.h" #include "kfd_device_queue_manager.h" -#include "kfd_dbgmgr.h" #include "kfd_svm.h" #include "amdgpu_amdkfd.h" #include "kfd_smi_events.h" @@ -580,299 +579,26 @@ err_pdd: static int kfd_ioctl_dbg_register(struct file *filep, struct kfd_process *p, void *data) { - struct kfd_ioctl_dbg_register_args *args = data; - struct kfd_dev *dev; - struct kfd_dbgmgr *dbgmgr_ptr; - struct kfd_process_device *pdd; - bool create_ok; - long status = 0; - - mutex_lock(&p->mutex); - pdd = kfd_process_device_data_by_id(p, args->gpu_id); - if (!pdd) { - status = -EINVAL; - goto err_pdd; - } - dev = pdd->dev; - - if (dev->adev->asic_type == CHIP_CARRIZO) { - pr_debug("kfd_ioctl_dbg_register not supported on CZ\n"); - status = -EINVAL; - goto err_chip_unsupp; - } - - mutex_lock(kfd_get_dbgmgr_mutex()); - - /* - * make sure that we have pdd, if this the first queue created for - * this process - */ - pdd = kfd_bind_process_to_device(dev, p); - if (IS_ERR(pdd)) { - status = PTR_ERR(pdd); - goto out; - } - - if (!dev->dbgmgr) { - /* In case of a legal call, we have no dbgmgr yet */ - create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev); - if (create_ok) { - status = kfd_dbgmgr_register(dbgmgr_ptr, p); - if (status != 0) - kfd_dbgmgr_destroy(dbgmgr_ptr); - else - dev->dbgmgr = dbgmgr_ptr; - } - } else { - pr_debug("debugger already registered\n"); - status = -EINVAL; - } - -out: - mutex_unlock(kfd_get_dbgmgr_mutex()); -err_pdd: -err_chip_unsupp: - mutex_unlock(&p->mutex); - - return status; + return -EPERM; } static int kfd_ioctl_dbg_unregister(struct file *filep, struct kfd_process *p, void *data) { - struct kfd_ioctl_dbg_unregister_args *args = data; - struct kfd_process_device *pdd; - struct kfd_dev *dev; - long status; - - mutex_lock(&p->mutex); - pdd = kfd_process_device_data_by_id(p, args->gpu_id); - mutex_unlock(&p->mutex); - if (!pdd || !pdd->dev->dbgmgr) - return -EINVAL; - - dev = pdd->dev; - - if (dev->adev->asic_type == CHIP_CARRIZO) { - pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n"); - return -EINVAL; - } - - mutex_lock(kfd_get_dbgmgr_mutex()); - - status = kfd_dbgmgr_unregister(dev->dbgmgr, p); - if (!status) { - kfd_dbgmgr_destroy(dev->dbgmgr); - dev->dbgmgr = NULL; - } - - mutex_unlock(kfd_get_dbgmgr_mutex()); - - return status; + return -EPERM; } -/* - * Parse and generate variable size data structure for address watch. - * Total size of the buffer and # watch points is limited in order - * to prevent kernel abuse. (no bearing to the much smaller HW limitation - * which is enforced by dbgdev module) - * please also note that the watch address itself are not "copied from user", - * since it be set into the HW in user mode values. - * - */ static int kfd_ioctl_dbg_address_watch(struct file *filep, struct kfd_process *p, void *data) { - struct kfd_ioctl_dbg_address_watch_args *args = data; - struct kfd_dev *dev; - struct kfd_process_device *pdd; - struct dbg_address_watch_info aw_info; - unsigned char *args_buff; - long status; - void __user *cmd_from_user; - uint64_t watch_mask_value = 0; - unsigned int args_idx = 0; - - memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info)); - - mutex_lock(&p->mutex); - pdd = kfd_process_device_data_by_id(p, args->gpu_id); - mutex_unlock(&p->mutex); - if (!pdd) { - pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); - return -EINVAL; - } - dev = pdd->dev; - - if (dev->adev->asic_type == CHIP_CARRIZO) { - pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); - return -EINVAL; - } - cmd_from_user = (void __user *) args->content_ptr; - - /* Validate arguments */ - - if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) || - (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) || - (cmd_from_user == NULL)) - return -EINVAL; - - /* this is the actual buffer to work with */ - args_buff = memdup_user(cmd_from_user, - args->buf_size_in_bytes - sizeof(*args)); - if (IS_ERR(args_buff)) - return PTR_ERR(args_buff); - - aw_info.process = p; - - aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx])); - args_idx += sizeof(aw_info.num_watch_points); - - aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx]; - args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points; - - /* - * set watch address base pointer to point on the array base - * within args_buff - */ - aw_info.watch_address = (uint64_t *) &args_buff[args_idx]; - - /* skip over the addresses buffer */ - args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points; - - if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) { - status = -EINVAL; - goto out; - } - - watch_mask_value = (uint64_t) args_buff[args_idx]; - - if (watch_mask_value > 0) { - /* - * There is an array of masks. - * set watch mask base pointer to point on the array base - * within args_buff - */ - aw_info.watch_mask = (uint64_t *) &args_buff[args_idx]; - - /* skip over the masks buffer */ - args_idx += sizeof(aw_info.watch_mask) * - aw_info.num_watch_points; - } else { - /* just the NULL mask, set to NULL and skip over it */ - aw_info.watch_mask = NULL; - args_idx += sizeof(aw_info.watch_mask); - } - - if (args_idx >= args->buf_size_in_bytes - sizeof(args)) { - status = -EINVAL; - goto out; - } - - /* Currently HSA Event is not supported for DBG */ - aw_info.watch_event = NULL; - - mutex_lock(kfd_get_dbgmgr_mutex()); - - status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info); - - mutex_unlock(kfd_get_dbgmgr_mutex()); - -out: - kfree(args_buff); - - return status; + return -EPERM; } /* Parse and generate fixed size data structure for wave control */ static int kfd_ioctl_dbg_wave_control(struct file *filep, struct kfd_process *p, void *data) { - struct kfd_ioctl_dbg_wave_control_args *args = data; - struct kfd_dev *dev; - struct kfd_process_device *pdd; - struct dbg_wave_control_info wac_info; - unsigned char *args_buff; - uint32_t computed_buff_size; - long status; - void __user *cmd_from_user; - unsigned int args_idx = 0; - - memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info)); - - /* we use compact form, independent of the packing attribute value */ - computed_buff_size = sizeof(*args) + - sizeof(wac_info.mode) + - sizeof(wac_info.operand) + - sizeof(wac_info.dbgWave_msg.DbgWaveMsg) + - sizeof(wac_info.dbgWave_msg.MemoryVA) + - sizeof(wac_info.trapId); - - mutex_lock(&p->mutex); - pdd = kfd_process_device_data_by_id(p, args->gpu_id); - mutex_unlock(&p->mutex); - if (!pdd) { - pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); - return -EINVAL; - } - dev = pdd->dev; - - if (dev->adev->asic_type == CHIP_CARRIZO) { - pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); - return -EINVAL; - } - - /* input size must match the computed "compact" size */ - if (args->buf_size_in_bytes != computed_buff_size) { - pr_debug("size mismatch, computed : actual %u : %u\n", - args->buf_size_in_bytes, computed_buff_size); - return -EINVAL; - } - - cmd_from_user = (void __user *) args->content_ptr; - - if (cmd_from_user == NULL) - return -EINVAL; - - /* copy the entire buffer from user */ - - args_buff = memdup_user(cmd_from_user, - args->buf_size_in_bytes - sizeof(*args)); - if (IS_ERR(args_buff)) - return PTR_ERR(args_buff); - - /* move ptr to the start of the "pay-load" area */ - wac_info.process = p; - - wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx])); - args_idx += sizeof(wac_info.operand); - - wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx])); - args_idx += sizeof(wac_info.mode); - - wac_info.trapId = *((uint32_t *)(&args_buff[args_idx])); - args_idx += sizeof(wac_info.trapId); - - wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = - *((uint32_t *)(&args_buff[args_idx])); - wac_info.dbgWave_msg.MemoryVA = NULL; - - mutex_lock(kfd_get_dbgmgr_mutex()); - - pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n", - wac_info.process, wac_info.operand, - wac_info.mode, wac_info.trapId, - wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); - - status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info); - - pr_debug("Returned status of dbg manager is %ld\n", status); - - mutex_unlock(kfd_get_dbgmgr_mutex()); - - kfree(args_buff); - - return status; + return -EPERM; } static int kfd_ioctl_get_clock_counters(struct file *filep, @@ -2900,16 +2626,16 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS, kfd_ioctl_wait_events, 0), - AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER, + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER_DEPRECATED, kfd_ioctl_dbg_register, 0), - AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER, + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED, kfd_ioctl_dbg_unregister, 0), - AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH, + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED, kfd_ioctl_dbg_address_watch, 0), - AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL, + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED, kfd_ioctl_dbg_wave_control, 0), AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c deleted file mode 100644 index 8eca9ed3ab36..000000000000 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ /dev/null @@ -1,845 +0,0 @@ -/* - * Copyright 2014 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "kfd_pm4_headers.h" -#include "kfd_pm4_headers_diq.h" -#include "kfd_kernel_queue.h" -#include "kfd_priv.h" -#include "kfd_pm4_opcodes.h" -#include "cik_regs.h" -#include "kfd_dbgmgr.h" -#include "kfd_dbgdev.h" -#include "kfd_device_queue_manager.h" - -static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev) -{ - dev->kfd2kgd->address_watch_disable(dev->adev); -} - -static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, - u32 pasid, uint64_t vmid0_address, - uint32_t *packet_buff, size_t size_in_bytes) -{ - struct pm4__release_mem *rm_packet; - struct pm4__indirect_buffer_pasid *ib_packet; - struct kfd_mem_obj *mem_obj; - size_t pq_packets_size_in_bytes; - union ULARGE_INTEGER *largep; - union ULARGE_INTEGER addr; - struct kernel_queue *kq; - uint64_t *rm_state; - unsigned int *ib_packet_buff; - int status; - - if (WARN_ON(!size_in_bytes)) - return -EINVAL; - - kq = dbgdev->kq; - - pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) + - sizeof(struct pm4__indirect_buffer_pasid); - - /* - * We acquire a buffer from DIQ - * The receive packet buff will be sitting on the Indirect Buffer - * and in the PQ we put the IB packet + sync packet(s). - */ - status = kq_acquire_packet_buffer(kq, - pq_packets_size_in_bytes / sizeof(uint32_t), - &ib_packet_buff); - if (status) { - pr_err("kq_acquire_packet_buffer failed\n"); - return status; - } - - memset(ib_packet_buff, 0, pq_packets_size_in_bytes); - - ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff); - - ib_packet->header.count = 3; - ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID; - ib_packet->header.type = PM4_TYPE_3; - - largep = (union ULARGE_INTEGER *) &vmid0_address; - - ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2; - ib_packet->bitfields3.ib_base_hi = largep->u.high_part; - - ib_packet->control = (1 << 23) | (1 << 31) | - ((size_in_bytes / 4) & 0xfffff); - - ib_packet->bitfields5.pasid = pasid; - - /* - * for now we use release mem for GPU-CPU synchronization - * Consider WaitRegMem + WriteData as a better alternative - * we get a GART allocations ( gpu/cpu mapping), - * for the sync variable, and wait until: - * (a) Sync with HW - * (b) Sync var is written by CP to mem. - */ - rm_packet = (struct pm4__release_mem *) (ib_packet_buff + - (sizeof(struct pm4__indirect_buffer_pasid) / - sizeof(unsigned int))); - - status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t), - &mem_obj); - - if (status) { - pr_err("Failed to allocate GART memory\n"); - kq_rollback_packet(kq); - return status; - } - - rm_state = (uint64_t *) mem_obj->cpu_ptr; - - *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING; - - rm_packet->header.opcode = IT_RELEASE_MEM; - rm_packet->header.type = PM4_TYPE_3; - rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2; - - rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; - rm_packet->bitfields2.event_index = - event_index___release_mem__end_of_pipe; - - rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru; - rm_packet->bitfields2.atc = 0; - rm_packet->bitfields2.tc_wb_action_ena = 1; - - addr.quad_part = mem_obj->gpu_addr; - - rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2; - rm_packet->address_hi = addr.u.high_part; - - rm_packet->bitfields3.data_sel = - data_sel___release_mem__send_64_bit_data; - - rm_packet->bitfields3.int_sel = - int_sel___release_mem__send_data_after_write_confirm; - - rm_packet->bitfields3.dst_sel = - dst_sel___release_mem__memory_controller; - - rm_packet->data_lo = QUEUESTATE__ACTIVE; - - kq_submit_packet(kq); - - /* Wait till CP writes sync code: */ - status = amdkfd_fence_wait_timeout( - rm_state, - QUEUESTATE__ACTIVE, 1500); - - kfd_gtt_sa_free(dbgdev->dev, mem_obj); - - return status; -} - -static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev) -{ - /* - * no action is needed in this case, - * just make sure diq will not be used - */ - - dbgdev->kq = NULL; - - return 0; -} - -static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) -{ - struct queue_properties properties; - unsigned int qid; - struct kernel_queue *kq = NULL; - int status; - - properties.type = KFD_QUEUE_TYPE_DIQ; - - status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL, - &properties, &qid, NULL, NULL, NULL, NULL); - - if (status) { - pr_err("Failed to create DIQ\n"); - return status; - } - - pr_debug("DIQ Created with queue id: %d\n", qid); - - kq = pqm_get_kernel_queue(dbgdev->pqm, qid); - - if (!kq) { - pr_err("Error getting DIQ\n"); - pqm_destroy_queue(dbgdev->pqm, qid); - return -EFAULT; - } - - dbgdev->kq = kq; - - return status; -} - -static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev) -{ - /* disable watch address */ - dbgdev_address_watch_disable_nodiq(dbgdev->dev); - return 0; -} - -static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev) -{ - /* todo - disable address watch */ - int status; - - status = pqm_destroy_queue(dbgdev->pqm, - dbgdev->kq->queue->properties.queue_id); - dbgdev->kq = NULL; - - return status; -} - -static void dbgdev_address_watch_set_registers( - const struct dbg_address_watch_info *adw_info, - union TCP_WATCH_ADDR_H_BITS *addrHi, - union TCP_WATCH_ADDR_L_BITS *addrLo, - union TCP_WATCH_CNTL_BITS *cntl, - unsigned int index, unsigned int vmid) -{ - union ULARGE_INTEGER addr; - - addr.quad_part = 0; - addrHi->u32All = 0; - addrLo->u32All = 0; - cntl->u32All = 0; - - if (adw_info->watch_mask) - cntl->bitfields.mask = - (uint32_t) (adw_info->watch_mask[index] & - ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK); - else - cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; - - addr.quad_part = (unsigned long long) adw_info->watch_address[index]; - - addrHi->bitfields.addr = addr.u.high_part & - ADDRESS_WATCH_REG_ADDHIGH_MASK; - addrLo->bitfields.addr = - (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT); - - cntl->bitfields.mode = adw_info->watch_mode[index]; - cntl->bitfields.vmid = (uint32_t) vmid; - /* for now assume it is an ATC address */ - cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT; - - pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask); - pr_debug("\t\t%20s %08x\n", "set reg add high :", - addrHi->bitfields.addr); - pr_debug("\t\t%20s %08x\n", "set reg add low :", - addrLo->bitfields.addr); -} - -static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, - struct dbg_address_watch_info *adw_info) -{ - union TCP_WATCH_ADDR_H_BITS addrHi; - union TCP_WATCH_ADDR_L_BITS addrLo; - union TCP_WATCH_CNTL_BITS cntl; - struct kfd_process_device *pdd; - unsigned int i; - - /* taking the vmid for that process on the safe way using pdd */ - pdd = kfd_get_process_device_data(dbgdev->dev, - adw_info->process); - if (!pdd) { - pr_err("Failed to get pdd for wave control no DIQ\n"); - return -EFAULT; - } - - addrHi.u32All = 0; - addrLo.u32All = 0; - cntl.u32All = 0; - - if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || - (adw_info->num_watch_points == 0)) { - pr_err("num_watch_points is invalid\n"); - return -EINVAL; - } - - if (!adw_info->watch_mode || !adw_info->watch_address) { - pr_err("adw_info fields are not valid\n"); - return -EINVAL; - } - - for (i = 0; i < adw_info->num_watch_points; i++) { - dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo, - &cntl, i, pdd->qpd.vmid); - - pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); - pr_debug("\t\t%20s %08x\n", "register index :", i); - pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid); - pr_debug("\t\t%20s %08x\n", "Address Low is :", - addrLo.bitfields.addr); - pr_debug("\t\t%20s %08x\n", "Address high is :", - addrHi.bitfields.addr); - pr_debug("\t\t%20s %08x\n", "Address high is :", - addrHi.bitfields.addr); - pr_debug("\t\t%20s %08x\n", "Control Mask is :", - cntl.bitfields.mask); - pr_debug("\t\t%20s %08x\n", "Control Mode is :", - cntl.bitfields.mode); - pr_debug("\t\t%20s %08x\n", "Control Vmid is :", - cntl.bitfields.vmid); - pr_debug("\t\t%20s %08x\n", "Control atc is :", - cntl.bitfields.atc); - pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); - - pdd->dev->kfd2kgd->address_watch_execute( - dbgdev->dev->adev, - i, - cntl.u32All, - addrHi.u32All, - addrLo.u32All); - } - - return 0; -} - -static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, - struct dbg_address_watch_info *adw_info) -{ - struct pm4__set_config_reg *packets_vec; - union TCP_WATCH_ADDR_H_BITS addrHi; - union TCP_WATCH_ADDR_L_BITS addrLo; - union TCP_WATCH_CNTL_BITS cntl; - struct kfd_mem_obj *mem_obj; - unsigned int aw_reg_add_dword; - uint32_t *packet_buff_uint; - unsigned int i; - int status; - size_t ib_size = sizeof(struct pm4__set_config_reg) * 4; - /* we do not control the vmid in DIQ mode, just a place holder */ - unsigned int vmid = 0; - - addrHi.u32All = 0; - addrLo.u32All = 0; - cntl.u32All = 0; - - if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || - (adw_info->num_watch_points == 0)) { - pr_err("num_watch_points is invalid\n"); - return -EINVAL; - } - - if (!adw_info->watch_mode || !adw_info->watch_address) { - pr_err("adw_info fields are not valid\n"); - return -EINVAL; - } - - status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); - - if (status) { - pr_err("Failed to allocate GART memory\n"); - return status; - } - - packet_buff_uint = mem_obj->cpu_ptr; - - memset(packet_buff_uint, 0, ib_size); - - packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint); - - packets_vec[0].header.count = 1; - packets_vec[0].header.opcode = IT_SET_CONFIG_REG; - packets_vec[0].header.type = PM4_TYPE_3; - packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; - packets_vec[0].bitfields2.insert_vmid = 1; - packets_vec[1].ordinal1 = packets_vec[0].ordinal1; - packets_vec[1].bitfields2.insert_vmid = 0; - packets_vec[2].ordinal1 = packets_vec[0].ordinal1; - packets_vec[2].bitfields2.insert_vmid = 0; - packets_vec[3].ordinal1 = packets_vec[0].ordinal1; - packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; - packets_vec[3].bitfields2.insert_vmid = 1; - - for (i = 0; i < adw_info->num_watch_points; i++) { - dbgdev_address_watch_set_registers(adw_info, - &addrHi, - &addrLo, - &cntl, - i, - vmid); - - pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); - pr_debug("\t\t%20s %08x\n", "register index :", i); - pr_debug("\t\t%20s %08x\n", "vmid is :", vmid); - pr_debug("\t\t%20s %p\n", "Add ptr is :", - adw_info->watch_address); - pr_debug("\t\t%20s %08llx\n", "Add is :", - adw_info->watch_address[i]); - pr_debug("\t\t%20s %08x\n", "Address Low is :", - addrLo.bitfields.addr); - pr_debug("\t\t%20s %08x\n", "Address high is :", - addrHi.bitfields.addr); - pr_debug("\t\t%20s %08x\n", "Control Mask is :", - cntl.bitfields.mask); - pr_debug("\t\t%20s %08x\n", "Control Mode is :", - cntl.bitfields.mode); - pr_debug("\t\t%20s %08x\n", "Control Vmid is :", - cntl.bitfields.vmid); - pr_debug("\t\t%20s %08x\n", "Control atc is :", - cntl.bitfields.atc); - pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); - - aw_reg_add_dword = - dbgdev->dev->kfd2kgd->address_watch_get_offset( - dbgdev->dev->adev, - i, - ADDRESS_WATCH_REG_CNTL); - - packets_vec[0].bitfields2.reg_offset = - aw_reg_add_dword - AMD_CONFIG_REG_BASE; - - packets_vec[0].reg_data[0] = cntl.u32All; - - aw_reg_add_dword = - dbgdev->dev->kfd2kgd->address_watch_get_offset( - dbgdev->dev->adev, - i, - ADDRESS_WATCH_REG_ADDR_HI); - - packets_vec[1].bitfields2.reg_offset = - aw_reg_add_dword - AMD_CONFIG_REG_BASE; - packets_vec[1].reg_data[0] = addrHi.u32All; - - aw_reg_add_dword = - dbgdev->dev->kfd2kgd->address_watch_get_offset( - dbgdev->dev->adev, - i, - ADDRESS_WATCH_REG_ADDR_LO); - - packets_vec[2].bitfields2.reg_offset = - aw_reg_add_dword - AMD_CONFIG_REG_BASE; - packets_vec[2].reg_data[0] = addrLo.u32All; - - /* enable watch flag if address is not zero*/ - if (adw_info->watch_address[i] > 0) - cntl.bitfields.valid = 1; - else - cntl.bitfields.valid = 0; - - aw_reg_add_dword = - dbgdev->dev->kfd2kgd->address_watch_get_offset( - dbgdev->dev->adev, - i, - ADDRESS_WATCH_REG_CNTL); - - packets_vec[3].bitfields2.reg_offset = - aw_reg_add_dword - AMD_CONFIG_REG_BASE; - packets_vec[3].reg_data[0] = cntl.u32All; - - status = dbgdev_diq_submit_ib( - dbgdev, - adw_info->process->pasid, - mem_obj->gpu_addr, - packet_buff_uint, - ib_size); - - if (status) { - pr_err("Failed to submit IB to DIQ\n"); - break; - } - } - - kfd_gtt_sa_free(dbgdev->dev, mem_obj); - return status; -} - -static int dbgdev_wave_control_set_registers( - struct dbg_wave_control_info *wac_info, - union SQ_CMD_BITS *in_reg_sq_cmd, - union GRBM_GFX_INDEX_BITS *in_reg_gfx_index) -{ - int status = 0; - union SQ_CMD_BITS reg_sq_cmd; - union GRBM_GFX_INDEX_BITS reg_gfx_index; - struct HsaDbgWaveMsgAMDGen2 *pMsg; - - reg_sq_cmd.u32All = 0; - reg_gfx_index.u32All = 0; - pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2; - - switch (wac_info->mode) { - /* Send command to single wave */ - case HSA_DBG_WAVEMODE_SINGLE: - /* - * Limit access to the process waves only, - * by setting vmid check - */ - reg_sq_cmd.bits.check_vmid = 1; - reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD; - reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId; - reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE; - - reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; - reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; - reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; - - break; - - /* Send command to all waves with matching VMID */ - case HSA_DBG_WAVEMODE_BROADCAST_PROCESS: - - reg_gfx_index.bits.sh_broadcast_writes = 1; - reg_gfx_index.bits.se_broadcast_writes = 1; - reg_gfx_index.bits.instance_broadcast_writes = 1; - - reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; - - break; - - /* Send command to all CU waves with matching VMID */ - case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU: - - reg_sq_cmd.bits.check_vmid = 1; - reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; - - reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; - reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; - reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; - - break; - - default: - return -EINVAL; - } - - switch (wac_info->operand) { - case HSA_DBG_WAVEOP_HALT: - reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT; - break; - - case HSA_DBG_WAVEOP_RESUME: - reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME; - break; - - case HSA_DBG_WAVEOP_KILL: - reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; - break; - - case HSA_DBG_WAVEOP_DEBUG: - reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG; - break; - - case HSA_DBG_WAVEOP_TRAP: - if (wac_info->trapId < MAX_TRAPID) { - reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP; - reg_sq_cmd.bits.trap_id = wac_info->trapId; - } else { - status = -EINVAL; - } - break; - - default: - status = -EINVAL; - break; - } - - if (status == 0) { - *in_reg_sq_cmd = reg_sq_cmd; - *in_reg_gfx_index = reg_gfx_index; - } - - return status; -} - -static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, - struct dbg_wave_control_info *wac_info) -{ - - int status; - union SQ_CMD_BITS reg_sq_cmd; - union GRBM_GFX_INDEX_BITS reg_gfx_index; - struct kfd_mem_obj *mem_obj; - uint32_t *packet_buff_uint; - struct pm4__set_config_reg *packets_vec; - size_t ib_size = sizeof(struct pm4__set_config_reg) * 3; - - reg_sq_cmd.u32All = 0; - - status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, - ®_gfx_index); - if (status) { - pr_err("Failed to set wave control registers\n"); - return status; - } - - /* we do not control the VMID in DIQ, so reset it to a known value */ - reg_sq_cmd.bits.vm_id = 0; - - pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); - - pr_debug("\t\t mode is: %u\n", wac_info->mode); - pr_debug("\t\t operand is: %u\n", wac_info->operand); - pr_debug("\t\t trap id is: %u\n", wac_info->trapId); - pr_debug("\t\t msg value is: %u\n", - wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); - pr_debug("\t\t vmid is: N/A\n"); - - pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); - pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); - pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); - pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); - pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); - pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); - pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); - - pr_debug("\t\t ibw is : %u\n", - reg_gfx_index.bitfields.instance_broadcast_writes); - pr_debug("\t\t ii is : %u\n", - reg_gfx_index.bitfields.instance_index); - pr_debug("\t\t sebw is : %u\n", - reg_gfx_index.bitfields.se_broadcast_writes); - pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); - pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); - pr_debug("\t\t sbw is : %u\n", - reg_gfx_index.bitfields.sh_broadcast_writes); - - pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); - - status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); - - if (status != 0) { - pr_err("Failed to allocate GART memory\n"); - return status; - } - - packet_buff_uint = mem_obj->cpu_ptr; - - memset(packet_buff_uint, 0, ib_size); - - packets_vec = (struct pm4__set_config_reg *) packet_buff_uint; - packets_vec[0].header.count = 1; - packets_vec[0].header.opcode = IT_SET_UCONFIG_REG; - packets_vec[0].header.type = PM4_TYPE_3; - packets_vec[0].bitfields2.reg_offset = - GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE; - - packets_vec[0].bitfields2.insert_vmid = 0; - packets_vec[0].reg_data[0] = reg_gfx_index.u32All; - - packets_vec[1].header.count = 1; - packets_vec[1].header.opcode = IT_SET_CONFIG_REG; - packets_vec[1].header.type = PM4_TYPE_3; - packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE; - - packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET; - packets_vec[1].bitfields2.insert_vmid = 1; - packets_vec[1].reg_data[0] = reg_sq_cmd.u32All; - - /* Restore the GRBM_GFX_INDEX register */ - - reg_gfx_index.u32All = 0; - reg_gfx_index.bits.sh_broadcast_writes = 1; - reg_gfx_index.bits.instance_broadcast_writes = 1; - reg_gfx_index.bits.se_broadcast_writes = 1; - - - packets_vec[2].ordinal1 = packets_vec[0].ordinal1; - packets_vec[2].bitfields2.reg_offset = - GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE; - - packets_vec[2].bitfields2.insert_vmid = 0; - packets_vec[2].reg_data[0] = reg_gfx_index.u32All; - - status = dbgdev_diq_submit_ib( - dbgdev, - wac_info->process->pasid, - mem_obj->gpu_addr, - packet_buff_uint, - ib_size); - - if (status) - pr_err("Failed to submit IB to DIQ\n"); - - kfd_gtt_sa_free(dbgdev->dev, mem_obj); - - return status; -} - -static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev, - struct dbg_wave_control_info *wac_info) -{ - int status; - union SQ_CMD_BITS reg_sq_cmd; - union GRBM_GFX_INDEX_BITS reg_gfx_index; - struct kfd_process_device *pdd; - - reg_sq_cmd.u32All = 0; - - /* taking the VMID for that process on the safe way using PDD */ - pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process); - - if (!pdd) { - pr_err("Failed to get pdd for wave control no DIQ\n"); - return -EFAULT; - } - status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, - ®_gfx_index); - if (status) { - pr_err("Failed to set wave control registers\n"); - return status; - } - - /* for non DIQ we need to patch the VMID: */ - - reg_sq_cmd.bits.vm_id = pdd->qpd.vmid; - - pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); - - pr_debug("\t\t mode is: %u\n", wac_info->mode); - pr_debug("\t\t operand is: %u\n", wac_info->operand); - pr_debug("\t\t trap id is: %u\n", wac_info->trapId); - pr_debug("\t\t msg value is: %u\n", - wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); - pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid); - - pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); - pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); - pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); - pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); - pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); - pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); - pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); - - pr_debug("\t\t ibw is : %u\n", - reg_gfx_index.bitfields.instance_broadcast_writes); - pr_debug("\t\t ii is : %u\n", - reg_gfx_index.bitfields.instance_index); - pr_debug("\t\t sebw is : %u\n", - reg_gfx_index.bitfields.se_broadcast_writes); - pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); - pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); - pr_debug("\t\t sbw is : %u\n", - reg_gfx_index.bitfields.sh_broadcast_writes); - - pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); - - return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->adev, - reg_gfx_index.u32All, - reg_sq_cmd.u32All); -} - -int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) -{ - int status = 0; - unsigned int vmid; - uint16_t queried_pasid; - union SQ_CMD_BITS reg_sq_cmd; - union GRBM_GFX_INDEX_BITS reg_gfx_index; - struct kfd_process_device *pdd; - struct dbg_wave_control_info wac_info; - int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; - int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; - - reg_sq_cmd.u32All = 0; - status = 0; - - wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS; - wac_info.operand = HSA_DBG_WAVEOP_KILL; - - pr_debug("Killing all process wavefronts\n"); - - /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. - * ATC_VMID15_PASID_MAPPING - * to check which VMID the current process is mapped to. - */ - - for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { - status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info - (dev->adev, vmid, &queried_pasid); - - if (status && queried_pasid == p->pasid) { - pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", - vmid, p->pasid); - break; - } - } - - if (vmid > last_vmid_to_scan) { - pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid); - return -EFAULT; - } - - /* taking the VMID for that process on the safe way using PDD */ - pdd = kfd_get_process_device_data(dev, p); - if (!pdd) - return -EFAULT; - - status = dbgdev_wave_control_set_registers(&wac_info, ®_sq_cmd, - ®_gfx_index); - if (status != 0) - return -EINVAL; - - /* for non DIQ we need to patch the VMID: */ - reg_sq_cmd.bits.vm_id = vmid; - - dev->kfd2kgd->wave_control_execute(dev->adev, - reg_gfx_index.u32All, - reg_sq_cmd.u32All); - - return 0; -} - -void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, - enum DBGDEV_TYPE type) -{ - pdbgdev->dev = pdev; - pdbgdev->kq = NULL; - pdbgdev->type = type; - pdbgdev->pqm = NULL; - - switch (type) { - case DBGDEV_TYPE_NODIQ: - pdbgdev->dbgdev_register = dbgdev_register_nodiq; - pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq; - pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq; - pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq; - break; - case DBGDEV_TYPE_DIQ: - default: - pdbgdev->dbgdev_register = dbgdev_register_diq; - pdbgdev->dbgdev_unregister = dbgdev_unregister_diq; - pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq; - pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq; - break; - } - -} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h deleted file mode 100644 index 0619c777b47e..000000000000 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Copyright 2014 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef KFD_DBGDEV_H_ -#define KFD_DBGDEV_H_ - -enum { - SQ_CMD_VMID_OFFSET = 28, - ADDRESS_WATCH_CNTL_OFFSET = 24 -}; - -enum { - PRIV_QUEUE_SYNC_TIME_MS = 200 -}; - -/* CONTEXT reg space definition */ -enum { - CONTEXT_REG_BASE = 0xA000, - CONTEXT_REG_END = 0xA400, - CONTEXT_REG_SIZE = CONTEXT_REG_END - CONTEXT_REG_BASE -}; - -/* USER CONFIG reg space definition */ -enum { - USERCONFIG_REG_BASE = 0xC000, - USERCONFIG_REG_END = 0x10000, - USERCONFIG_REG_SIZE = USERCONFIG_REG_END - USERCONFIG_REG_BASE -}; - -/* CONFIG reg space definition */ -enum { - AMD_CONFIG_REG_BASE = 0x2000, /* in dwords */ - AMD_CONFIG_REG_END = 0x2B00, - AMD_CONFIG_REG_SIZE = AMD_CONFIG_REG_END - AMD_CONFIG_REG_BASE -}; - -/* SH reg space definition */ -enum { - SH_REG_BASE = 0x2C00, - SH_REG_END = 0x3000, - SH_REG_SIZE = SH_REG_END - SH_REG_BASE -}; - -/* SQ_CMD definitions */ -#define SQ_CMD 0x8DEC - -enum SQ_IND_CMD_CMD { - SQ_IND_CMD_CMD_NULL = 0x00000000, - SQ_IND_CMD_CMD_HALT = 0x00000001, - SQ_IND_CMD_CMD_RESUME = 0x00000002, - SQ_IND_CMD_CMD_KILL = 0x00000003, - SQ_IND_CMD_CMD_DEBUG = 0x00000004, - SQ_IND_CMD_CMD_TRAP = 0x00000005, -}; - -enum SQ_IND_CMD_MODE { - SQ_IND_CMD_MODE_SINGLE = 0x00000000, - SQ_IND_CMD_MODE_BROADCAST = 0x00000001, - SQ_IND_CMD_MODE_BROADCAST_QUEUE = 0x00000002, - SQ_IND_CMD_MODE_BROADCAST_PIPE = 0x00000003, - SQ_IND_CMD_MODE_BROADCAST_ME = 0x00000004, -}; - -union SQ_IND_INDEX_BITS { - struct { - uint32_t wave_id:4; - uint32_t simd_id:2; - uint32_t thread_id:6; - uint32_t:1; - uint32_t force_read:1; - uint32_t read_timeout:1; - uint32_t unindexed:1; - uint32_t index:16; - - } bitfields, bits; - uint32_t u32All; - signed int i32All; - float f32All; -}; - -union SQ_IND_CMD_BITS { - struct { - uint32_t data:32; - } bitfields, bits; - uint32_t u32All; - signed int i32All; - float f32All; -}; - -union SQ_CMD_BITS { - struct { - uint32_t cmd:3; - uint32_t:1; - uint32_t mode:3; - uint32_t check_vmid:1; - uint32_t trap_id:3; - uint32_t:5; - uint32_t wave_id:4; - uint32_t simd_id:2; - uint32_t:2; - uint32_t queue_id:3; - uint32_t:1; - uint32_t vm_id:4; - } bitfields, bits; - uint32_t u32All; - signed int i32All; - float f32All; -}; - -union SQ_IND_DATA_BITS { - struct { - uint32_t data:32; - } bitfields, bits; - uint32_t u32All; - signed int i32All; - float f32All; -}; - -union GRBM_GFX_INDEX_BITS { - struct { - uint32_t instance_index:8; - uint32_t sh_index:8; - uint32_t se_index:8; - uint32_t:5; - uint32_t sh_broadcast_writes:1; - uint32_t instance_broadcast_writes:1; - uint32_t se_broadcast_writes:1; - } bitfields, bits; - uint32_t u32All; - signed int i32All; - float f32All; -}; - -union TCP_WATCH_ADDR_H_BITS { - struct { - uint32_t addr:16; - uint32_t:16; - - } bitfields, bits; - uint32_t u32All; - signed int i32All; - float f32All; -}; - -union TCP_WATCH_ADDR_L_BITS { - struct { - uint32_t:6; - uint32_t addr:26; - } bitfields, bits; - uint32_t u32All; - signed int i32All; - float f32All; -}; - -enum { - QUEUESTATE__INVALID = 0, /* so by default we'll get invalid state */ - QUEUESTATE__ACTIVE_COMPLETION_PENDING, - QUEUESTATE__ACTIVE -}; - -union ULARGE_INTEGER { - struct { - uint32_t low_part; - uint32_t high_part; - } u; - unsigned long long quad_part; -}; - - -#define KFD_CIK_VMID_START_OFFSET (8) -#define KFD_CIK_VMID_END_OFFSET (KFD_CIK_VMID_START_OFFSET + (8)) - - -void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, - enum DBGDEV_TYPE type); - -union TCP_WATCH_CNTL_BITS { - struct { - uint32_t mask:24; - uint32_t vmid:4; - uint32_t atc:1; - uint32_t mode:2; - uint32_t valid:1; - } bitfields, bits; - uint32_t u32All; - signed int i32All; - float f32All; -}; - -enum { - ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL, - ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF, - ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000, - /* extend the mask to 26 bits in order to match the low address field */ - ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6, - ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF -}; - -enum { - MAX_TRAPID = 8, /* 3 bits in the bitfield. */ - MAX_WATCH_ADDRESSES = 4 -}; - -enum { - ADDRESS_WATCH_REG_ADDR_HI = 0, - ADDRESS_WATCH_REG_ADDR_LO, - ADDRESS_WATCH_REG_CNTL, - ADDRESS_WATCH_REG_MAX -}; - -#endif /* KFD_DBGDEV_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c deleted file mode 100644 index 9bfa50633654..000000000000 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Copyright 2014 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ -#include -#include -#include -#include -#include -#include - -#include "kfd_priv.h" -#include "cik_regs.h" -#include "kfd_pm4_headers.h" -#include "kfd_pm4_headers_diq.h" -#include "kfd_dbgmgr.h" -#include "kfd_dbgdev.h" -#include "kfd_device_queue_manager.h" - -static DEFINE_MUTEX(kfd_dbgmgr_mutex); - -struct mutex *kfd_get_dbgmgr_mutex(void) -{ - return &kfd_dbgmgr_mutex; -} - - -static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr) -{ - kfree(pmgr->dbgdev); - - pmgr->dbgdev = NULL; - pmgr->pasid = 0; - pmgr->dev = NULL; -} - -void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr) -{ - if (pmgr) { - kfd_dbgmgr_uninitialize(pmgr); - kfree(pmgr); - } -} - -bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) -{ - enum DBGDEV_TYPE type = DBGDEV_TYPE_DIQ; - struct kfd_dbgmgr *new_buff; - - if (WARN_ON(!pdev->init_complete)) - return false; - - new_buff = kfd_alloc_struct(new_buff); - if (!new_buff) { - pr_err("Failed to allocate dbgmgr instance\n"); - return false; - } - - new_buff->pasid = 0; - new_buff->dev = pdev; - new_buff->dbgdev = kfd_alloc_struct(new_buff->dbgdev); - if (!new_buff->dbgdev) { - pr_err("Failed to allocate dbgdev instance\n"); - kfree(new_buff); - return false; - } - - /* get actual type of DBGDevice cpsch or not */ - if (pdev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) - type = DBGDEV_TYPE_NODIQ; - - kfd_dbgdev_init(new_buff->dbgdev, pdev, type); - *ppmgr = new_buff; - - return true; -} - -long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p) -{ - if (pmgr->pasid != 0) { - pr_debug("H/W debugger is already active using pasid 0x%x\n", - pmgr->pasid); - return -EBUSY; - } - - /* remember pasid */ - pmgr->pasid = p->pasid; - - /* provide the pqm for diq generation */ - pmgr->dbgdev->pqm = &p->pqm; - - /* activate the actual registering */ - pmgr->dbgdev->dbgdev_register(pmgr->dbgdev); - - return 0; -} - -long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p) -{ - /* Is the requests coming from the already registered process? */ - if (pmgr->pasid != p->pasid) { - pr_debug("H/W debugger is not registered by calling pasid 0x%x\n", - p->pasid); - return -EINVAL; - } - - pmgr->dbgdev->dbgdev_unregister(pmgr->dbgdev); - - pmgr->pasid = 0; - - return 0; -} - -long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, - struct dbg_wave_control_info *wac_info) -{ - /* Is the requests coming from the already registered process? */ - if (pmgr->pasid != wac_info->process->pasid) { - pr_debug("H/W debugger support was not registered for requester pasid 0x%x\n", - wac_info->process->pasid); - return -EINVAL; - } - - return (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, wac_info); -} - -long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, - struct dbg_address_watch_info *adw_info) -{ - /* Is the requests coming from the already registered process? */ - if (pmgr->pasid != adw_info->process->pasid) { - pr_debug("H/W debugger support was not registered for requester pasid 0x%x\n", - adw_info->process->pasid); - return -EINVAL; - } - - return (long) pmgr->dbgdev->dbgdev_address_watch(pmgr->dbgdev, - adw_info); -} - diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h deleted file mode 100644 index f9c6df1fdc5c..000000000000 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h +++ /dev/null @@ -1,293 +0,0 @@ -/* - * Copyright 2014 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef KFD_DBGMGR_H_ -#define KFD_DBGMGR_H_ - -#include "kfd_priv.h" - -/* must align with hsakmttypes definition */ -#pragma pack(push, 4) - -enum HSA_DBG_WAVEOP { - HSA_DBG_WAVEOP_HALT = 1, /* Halts a wavefront */ - HSA_DBG_WAVEOP_RESUME = 2, /* Resumes a wavefront */ - HSA_DBG_WAVEOP_KILL = 3, /* Kills a wavefront */ - HSA_DBG_WAVEOP_DEBUG = 4, /* Causes wavefront to enter dbg mode */ - HSA_DBG_WAVEOP_TRAP = 5, /* Causes wavefront to take a trap */ - HSA_DBG_NUM_WAVEOP = 5, - HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF -}; - -enum HSA_DBG_WAVEMODE { - /* send command to a single wave */ - HSA_DBG_WAVEMODE_SINGLE = 0, - /* - * Broadcast to all wavefronts of all processes is not - * supported for HSA user mode - */ - - /* send to waves within current process */ - HSA_DBG_WAVEMODE_BROADCAST_PROCESS = 2, - /* send to waves within current process on CU */ - HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3, - HSA_DBG_NUM_WAVEMODE = 3, - HSA_DBG_MAX_WAVEMODE = 0xFFFFFFFF -}; - -enum HSA_DBG_WAVEMSG_TYPE { - HSA_DBG_WAVEMSG_AUTO = 0, - HSA_DBG_WAVEMSG_USER = 1, - HSA_DBG_WAVEMSG_ERROR = 2, - HSA_DBG_NUM_WAVEMSG, - HSA_DBG_MAX_WAVEMSG = 0xFFFFFFFF -}; - -enum HSA_DBG_WATCH_MODE { - HSA_DBG_WATCH_READ = 0, /* Read operations only */ - HSA_DBG_WATCH_NONREAD = 1, /* Write or Atomic operations only */ - HSA_DBG_WATCH_ATOMIC = 2, /* Atomic Operations only */ - HSA_DBG_WATCH_ALL = 3, /* Read, Write or Atomic operations */ - HSA_DBG_WATCH_NUM, - HSA_DBG_WATCH_SIZE = 0xFFFFFFFF -}; - -/* This structure is hardware specific and may change in the future */ -struct HsaDbgWaveMsgAMDGen2 { - union { - struct ui32 { - uint32_t UserData:8; /* user data */ - uint32_t ShaderArray:1; /* Shader array */ - uint32_t Priv:1; /* Privileged */ - uint32_t Reserved0:4; /* Reserved, should be 0 */ - uint32_t WaveId:4; /* wave id */ - uint32_t SIMD:2; /* SIMD id */ - uint32_t HSACU:4; /* Compute unit */ - uint32_t ShaderEngine:2;/* Shader engine */ - uint32_t MessageType:2; /* see HSA_DBG_WAVEMSG_TYPE */ - uint32_t Reserved1:4; /* Reserved, should be 0 */ - } ui32; - uint32_t Value; - }; - uint32_t Reserved2; -}; - -union HsaDbgWaveMessageAMD { - struct HsaDbgWaveMsgAMDGen2 WaveMsgInfoGen2; - /* for future HsaDbgWaveMsgAMDGen3; */ -}; - -struct HsaDbgWaveMessage { - void *MemoryVA; /* ptr to associated host-accessible data */ - union HsaDbgWaveMessageAMD DbgWaveMsg; -}; - -/* - * TODO: This definitions to be MOVED to kfd_event, once it is implemented. - * - * HSA sync primitive, Event and HW Exception notification API definitions. - * The API functions allow the runtime to define a so-called sync-primitive, - * a SW object combining a user-mode provided "syncvar" and a scheduler event - * that can be signaled through a defined GPU interrupt. A syncvar is - * a process virtual memory location of a certain size that can be accessed - * by CPU and GPU shader code within the process to set and query the content - * within that memory. The definition of the content is determined by the HSA - * runtime and potentially GPU shader code interfacing with the HSA runtime. - * The syncvar values may be commonly written through an PM4 WRITE_DATA packet - * in the user mode instruction stream. The OS scheduler event is typically - * associated and signaled by an interrupt issued by the GPU, but other HSA - * system interrupt conditions from other HW (e.g. IOMMUv2) may be surfaced - * by the KFD by this mechanism, too. - */ - -/* these are the new definitions for events */ -enum HSA_EVENTTYPE { - HSA_EVENTTYPE_SIGNAL = 0, /* user-mode generated GPU signal */ - HSA_EVENTTYPE_NODECHANGE = 1, /* HSA node change (attach/detach) */ - HSA_EVENTTYPE_DEVICESTATECHANGE = 2, /* HSA device state change - * (start/stop) - */ - HSA_EVENTTYPE_HW_EXCEPTION = 3, /* GPU shader exception event */ - HSA_EVENTTYPE_SYSTEM_EVENT = 4, /* GPU SYSCALL with parameter info */ - HSA_EVENTTYPE_DEBUG_EVENT = 5, /* GPU signal for debugging */ - HSA_EVENTTYPE_PROFILE_EVENT = 6,/* GPU signal for profiling */ - HSA_EVENTTYPE_QUEUE_EVENT = 7, /* GPU signal queue idle state - * (EOP pm4) - */ - /* ... */ - HSA_EVENTTYPE_MAXID, - HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF -}; - -/* Sub-definitions for various event types: Syncvar */ -struct HsaSyncVar { - union SyncVar { - void *UserData; /* pointer to user mode data */ - uint64_t UserDataPtrValue; /* 64bit compatibility of value */ - } SyncVar; - uint64_t SyncVarSize; -}; - -/* Sub-definitions for various event types: NodeChange */ - -enum HSA_EVENTTYPE_NODECHANGE_FLAGS { - HSA_EVENTTYPE_NODECHANGE_ADD = 0, - HSA_EVENTTYPE_NODECHANGE_REMOVE = 1, - HSA_EVENTTYPE_NODECHANGE_SIZE = 0xFFFFFFFF -}; - -struct HsaNodeChange { - /* HSA node added/removed on the platform */ - enum HSA_EVENTTYPE_NODECHANGE_FLAGS Flags; -}; - -/* Sub-definitions for various event types: DeviceStateChange */ -enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS { - /* device started (and available) */ - HSA_EVENTTYPE_DEVICESTATUSCHANGE_START = 0, - /* device stopped (i.e. unavailable) */ - HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP = 1, - HSA_EVENTTYPE_DEVICESTATUSCHANGE_SIZE = 0xFFFFFFFF -}; - -enum HSA_DEVICE { - HSA_DEVICE_CPU = 0, - HSA_DEVICE_GPU = 1, - MAX_HSA_DEVICE = 2 -}; - -struct HsaDeviceStateChange { - uint32_t NodeId; /* F-NUMA node that contains the device */ - enum HSA_DEVICE Device; /* device type: GPU or CPU */ - enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags; /* event flags */ -}; - -struct HsaEventData { - enum HSA_EVENTTYPE EventType; /* event type */ - union EventData { - /* - * return data associated with HSA_EVENTTYPE_SIGNAL - * and other events - */ - struct HsaSyncVar SyncVar; - - /* data associated with HSA_EVENTTYPE_NODE_CHANGE */ - struct HsaNodeChange NodeChangeState; - - /* data associated with HSA_EVENTTYPE_DEVICE_STATE_CHANGE */ - struct HsaDeviceStateChange DeviceState; - } EventData; - - /* the following data entries are internal to the KFD & thunk itself */ - - /* internal thunk store for Event data (OsEventHandle) */ - uint64_t HWData1; - /* internal thunk store for Event data (HWAddress) */ - uint64_t HWData2; - /* internal thunk store for Event data (HWData) */ - uint32_t HWData3; -}; - -struct HsaEventDescriptor { - /* event type to allocate */ - enum HSA_EVENTTYPE EventType; - /* H-NUMA node containing GPU device that is event source */ - uint32_t NodeId; - /* pointer to user mode syncvar data, syncvar->UserDataPtrValue - * may be NULL - */ - struct HsaSyncVar SyncVar; -}; - -struct HsaEvent { - uint32_t EventId; - struct HsaEventData EventData; -}; - -#pragma pack(pop) - -enum DBGDEV_TYPE { - DBGDEV_TYPE_ILLEGAL = 0, - DBGDEV_TYPE_NODIQ = 1, - DBGDEV_TYPE_DIQ = 2, - DBGDEV_TYPE_TEST = 3 -}; - -struct dbg_address_watch_info { - struct kfd_process *process; - enum HSA_DBG_WATCH_MODE *watch_mode; - uint64_t *watch_address; - uint64_t *watch_mask; - struct HsaEvent *watch_event; - uint32_t num_watch_points; -}; - -struct dbg_wave_control_info { - struct kfd_process *process; - uint32_t trapId; - enum HSA_DBG_WAVEOP operand; - enum HSA_DBG_WAVEMODE mode; - struct HsaDbgWaveMessage dbgWave_msg; -}; - -struct kfd_dbgdev { - - /* The device that owns this data. */ - struct kfd_dev *dev; - - /* kernel queue for DIQ */ - struct kernel_queue *kq; - - /* a pointer to the pqm of the calling process */ - struct process_queue_manager *pqm; - - /* type of debug device ( DIQ, non DIQ, etc. ) */ - enum DBGDEV_TYPE type; - - /* virtualized function pointers to device dbg */ - int (*dbgdev_register)(struct kfd_dbgdev *dbgdev); - int (*dbgdev_unregister)(struct kfd_dbgdev *dbgdev); - int (*dbgdev_address_watch)(struct kfd_dbgdev *dbgdev, - struct dbg_address_watch_info *adw_info); - int (*dbgdev_wave_control)(struct kfd_dbgdev *dbgdev, - struct dbg_wave_control_info *wac_info); - -}; - -struct kfd_dbgmgr { - u32 pasid; - struct kfd_dev *dev; - struct kfd_dbgdev *dbgdev; -}; - -/* prototypes for debug manager functions */ -struct mutex *kfd_get_dbgmgr_mutex(void); -void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr); -bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev); -long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p); -long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p); -long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, - struct dbg_wave_control_info *wac_info); -long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, - struct dbg_address_watch_info *adw_info); -#endif /* KFD_DBGMGR_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 5a47f437b455..dbb877fba724 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -575,8 +575,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, if (kfd_resume(kfd)) goto kfd_resume_error; - kfd->dbgmgr = NULL; - if (kfd_topology_add_device(kfd)) { dev_err(kfd_device, "Error adding device to topology\n"); goto kfd_topology_add_device_error; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 219acc818eb4..7f6f1a842b0b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -480,6 +480,65 @@ static inline void deallocate_hqd(struct device_queue_manager *dqm, dqm->allocated_queues[q->pipe] |= (1 << q->queue); } +#define SQ_IND_CMD_CMD_KILL 0x00000003 +#define SQ_IND_CMD_MODE_BROADCAST 0x00000001 + +static int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) +{ + int status = 0; + unsigned int vmid; + uint16_t queried_pasid; + union SQ_CMD_BITS reg_sq_cmd; + union GRBM_GFX_INDEX_BITS reg_gfx_index; + struct kfd_process_device *pdd; + int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; + int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; + + reg_sq_cmd.u32All = 0; + reg_gfx_index.u32All = 0; + + pr_debug("Killing all process wavefronts\n"); + + /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. + * ATC_VMID15_PASID_MAPPING + * to check which VMID the current process is mapped to. + */ + + for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { + status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info + (dev->adev, vmid, &queried_pasid); + + if (status && queried_pasid == p->pasid) { + pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", + vmid, p->pasid); + break; + } + } + + if (vmid > last_vmid_to_scan) { + pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid); + return -EFAULT; + } + + /* taking the VMID for that process on the safe way using PDD */ + pdd = kfd_get_process_device_data(dev, p); + if (!pdd) + return -EFAULT; + + reg_gfx_index.bits.sh_broadcast_writes = 1; + reg_gfx_index.bits.se_broadcast_writes = 1; + reg_gfx_index.bits.instance_broadcast_writes = 1; + reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; + reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; + reg_sq_cmd.bits.vm_id = vmid; + + dev->kfd2kgd->wave_control_execute(dev->adev, + reg_gfx_index.u32All, + reg_sq_cmd.u32All); + + return 0; +} + /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked * to avoid asynchronized access */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index a7d2e3323977..05a9c17daa3e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -39,6 +39,41 @@ struct device_process_node { struct list_head list; }; +union SQ_CMD_BITS { + struct { + uint32_t cmd:3; + uint32_t:1; + uint32_t mode:3; + uint32_t check_vmid:1; + uint32_t trap_id:3; + uint32_t:5; + uint32_t wave_id:4; + uint32_t simd_id:2; + uint32_t:2; + uint32_t queue_id:3; + uint32_t:1; + uint32_t vm_id:4; + } bitfields, bits; + uint32_t u32All; + signed int i32All; + float f32All; +}; + +union GRBM_GFX_INDEX_BITS { + struct { + uint32_t instance_index:8; + uint32_t sh_index:8; + uint32_t se_index:8; + uint32_t:5; + uint32_t sh_broadcast_writes:1; + uint32_t instance_broadcast_writes:1; + uint32_t se_broadcast_writes:1; + } bitfields, bits; + uint32_t u32All; + signed int i32All; + float f32All; +}; + /** * struct device_queue_manager_ops * diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c index 66ad8d0b8f7f..fe62407dacb2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c @@ -30,7 +30,6 @@ #include #include #include "kfd_priv.h" -#include "kfd_dbgmgr.h" #include "kfd_topology.h" #include "kfd_iommu.h" @@ -163,17 +162,6 @@ static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, u32 pasid) pr_debug("Unbinding process 0x%x from IOMMU\n", pasid); - mutex_lock(kfd_get_dbgmgr_mutex()); - - if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { - if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { - kfd_dbgmgr_destroy(dev->dbgmgr); - dev->dbgmgr = NULL; - } - } - - mutex_unlock(kfd_get_dbgmgr_mutex()); - mutex_lock(&p->mutex); pdd = kfd_get_process_device_data(dev, p); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 74fb4762c66e..b6790a637f5c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -300,9 +300,6 @@ struct kfd_dev { */ bool interrupts_active; - /* Debug manager */ - struct kfd_dbgmgr *dbgmgr; - /* Firmware versions */ uint16_t mec_fw_version; uint16_t mec2_fw_version; @@ -1331,8 +1328,6 @@ void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid); void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type); -int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); - bool kfd_is_locked(void); /* Compute profile */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 8e2780d2f735..8c6a48add76e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -40,7 +40,6 @@ struct mm_struct; #include "kfd_priv.h" #include "kfd_device_queue_manager.h" -#include "kfd_dbgmgr.h" #include "kfd_iommu.h" #include "kfd_svm.h" @@ -1158,7 +1157,6 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct kfd_process *p; - int i; /* * The kfd_process structure can not be free because the @@ -1178,23 +1176,6 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, mutex_lock(&p->mutex); - /* Iterate over all process device data structures and if the - * pdd is in debug mode, we should first force unregistration, - * then we will be able to destroy the queues - */ - for (i = 0; i < p->n_pdds; i++) { - struct kfd_dev *dev = p->pdds[i]->dev; - - mutex_lock(kfd_get_dbgmgr_mutex()); - if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { - if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { - kfd_dbgmgr_destroy(dev->dbgmgr); - dev->dbgmgr = NULL; - } - } - mutex_unlock(kfd_get_dbgmgr_mutex()); - } - kfd_process_dequeue_from_all_devices(p); pqm_uninit(&p->pqm); diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index ac941f62cbed..2f60cf35a444 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -269,18 +269,9 @@ struct kfd2kgd_calls { int (*hqd_sdma_destroy)(struct amdgpu_device *adev, void *mqd, unsigned int timeout); - int (*address_watch_disable)(struct amdgpu_device *adev); - int (*address_watch_execute)(struct amdgpu_device *adev, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo); int (*wave_control_execute)(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd); - uint32_t (*address_watch_get_offset)(struct amdgpu_device *adev, - unsigned int watch_point_id, - unsigned int reg_offset); bool (*get_atc_vmid_pasid_mapping_info)(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid); diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index e6a56c146920..6e4268f5e482 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -756,16 +756,16 @@ struct kfd_ioctl_set_xnack_mode_args { #define AMDKFD_IOC_WAIT_EVENTS \ AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args) -#define AMDKFD_IOC_DBG_REGISTER \ +#define AMDKFD_IOC_DBG_REGISTER_DEPRECATED \ AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args) -#define AMDKFD_IOC_DBG_UNREGISTER \ +#define AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED \ AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args) -#define AMDKFD_IOC_DBG_ADDRESS_WATCH \ +#define AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED \ AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args) -#define AMDKFD_IOC_DBG_WAVE_CONTROL \ +#define AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED \ AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args) #define AMDKFD_IOC_SET_SCRATCH_BACKING_VA \ -- cgit v1.2.3-59-g8ed1b From 79aa0367385ceaf5351ea77ea1fb66136739ea9d Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 15 Feb 2022 18:54:38 -0500 Subject: drm/amdkfd: Replace zero-length array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reference: https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays CC: Changcheng Deng Signed-off-by: Felix Kuehling Reviewed-by: Philip Yang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 6e4268f5e482..baec5a41de3e 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -673,7 +673,7 @@ struct kfd_ioctl_svm_args { __u32 op; __u32 nattr; /* Variable length array of attributes */ - struct kfd_ioctl_svm_attribute attrs[0]; + struct kfd_ioctl_svm_attribute attrs[]; }; /** -- cgit v1.2.3-59-g8ed1b From a65dbf7cded724a5ed4a5e1a718616b048ca0c34 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Thu, 23 Dec 2021 09:35:37 +0800 Subject: drm/amdgpu/gfx10: Add GC 10.3.7 Support Needed to properly initialize GC 10.3.7. Signed-off-by: Prike Liang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 5 +++ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 53 ++++++++++++++++++++++++++- include/uapi/drm/amdgpu_drm.h | 1 + 3 files changed, 58 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 0274de05e333..7c7e28fd912e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1424,6 +1424,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); break; default: @@ -1766,6 +1767,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(10, 3, 3): adev->family = AMDGPU_FAMILY_YC; break; + case IP_VERSION(10, 3, 7): + adev->family = AMDGPU_FAMILY_GC_10_3_7; + break; default: return -EINVAL; } @@ -1778,6 +1782,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(10, 1, 4): case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): adev->flags |= AMD_IS_APU; break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index dfbe65c1ae0b..95b8f76d0e7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -258,6 +258,13 @@ MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec.bin"); MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec2.bin"); MODULE_FIRMWARE("amdgpu/cyan_skillfish2_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_7_ce.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_7_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_7_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec2.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_7_rlc.bin"); + static const struct soc15_reg_golden golden_settings_gc_10_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), @@ -3408,6 +3415,31 @@ static const struct soc15_reg_golden golden_settings_gc_10_0_cyan_skillfish[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000) }; +static const struct soc15_reg_golden golden_settings_gc_10_3_7[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x000000e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000041), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x000001ff, 0x00000020), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf000003f, 0x01200007), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0xffffff7f, 0x00010020), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00100000) +}; + #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -3646,6 +3678,11 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_10_0_cyan_skillfish, (const u32)ARRAY_SIZE(golden_settings_gc_10_0_cyan_skillfish)); break; + case IP_VERSION(10, 3, 7): + soc15_program_register_sequence(adev, + golden_settings_gc_10_3_7, + (const u32)ARRAY_SIZE(golden_settings_gc_10_3_7)); + break; default: break; } @@ -3835,6 +3872,7 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): adev->gfx.cp_fw_write_wait = true; break; default: @@ -3962,6 +4000,9 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) else chip_name = "cyan_skillfish"; break; + case IP_VERSION(10, 3, 7): + chip_name = "gc_10_3_7"; + break; default: BUG(); } @@ -4558,6 +4599,7 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -4695,6 +4737,7 @@ static int gfx_v10_0_sw_init(void *handle) case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; @@ -6207,6 +6250,7 @@ static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, DOORBELL_RANGE_LOWER_Sienna_Cichlid, ring->doorbell_index); WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); @@ -6344,6 +6388,7 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, 0); break; default: @@ -6358,6 +6403,7 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); @@ -7185,6 +7231,7 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev) break; case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): return true; default: data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE); @@ -7220,6 +7267,7 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev) case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): /* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) << GRBM_CAM_DATA__CAM_ADDR__SHIFT) | @@ -7672,6 +7720,7 @@ static int gfx_v10_0_early_init(void *handle) case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_Sienna_Cichlid; break; default: @@ -9430,6 +9479,7 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev) case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs; break; case IP_VERSION(10, 1, 2): @@ -9522,7 +9572,8 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { bitmap = i * adev->gfx.config.max_sh_per_se + j; if (((adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) || - (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3))) && + (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3)) || + (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 7))) && ((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1)) continue; mask = 1; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 76b580d10a52..55fa660e35f4 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1151,6 +1151,7 @@ struct drm_amdgpu_info_video_caps { #define AMDGPU_FAMILY_NV 143 /* Navi10 */ #define AMDGPU_FAMILY_VGH 144 /* Van Gogh */ #define AMDGPU_FAMILY_YC 146 /* Yellow Carp */ +#define AMDGPU_FAMILY_GC_10_3_7 151 /* GC 10.3.7 */ #if defined(__cplusplus) } -- cgit v1.2.3-59-g8ed1b From 874bfdfa4735cbb1b0d6e0c6157c712a312371a1 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Thu, 10 Feb 2022 14:42:09 -0500 Subject: drm/amdgpu: add gc 10.3.6 support this patch adds gc 10.3.6 support. Signed-off-by: Yifan Zhang Reviewed-by: Alex Deucher Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 6 ++ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 87 ++++++++++++++++++++++++++- include/uapi/drm/amdgpu_drm.h | 1 + 3 files changed, 91 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 1ba0b055d44b..3501ade93f44 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1421,6 +1421,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 7): amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); @@ -1559,6 +1560,7 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block); break; default: @@ -1765,6 +1767,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(10, 3, 3): adev->family = AMDGPU_FAMILY_YC; break; + case IP_VERSION(10, 3, 6): + adev->family = AMDGPU_FAMILY_GC_10_3_6; + break; case IP_VERSION(10, 3, 7): adev->family = AMDGPU_FAMILY_GC_10_3_7; break; @@ -1780,6 +1785,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(10, 1, 4): case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 7): adev->flags |= AMD_IS_APU; break; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 95b8f76d0e7a..90158289cd30 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -106,6 +106,12 @@ #define mmGOLDEN_TSC_COUNT_UPPER_Vangogh_BASE_IDX 1 #define mmGOLDEN_TSC_COUNT_LOWER_Vangogh 0x0026 #define mmGOLDEN_TSC_COUNT_LOWER_Vangogh_BASE_IDX 1 + +#define mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6 0x002d +#define mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6_BASE_IDX 1 +#define mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6 0x002e +#define mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6_BASE_IDX 1 + #define mmSPI_CONFIG_CNTL_1_Vangogh 0x2441 #define mmSPI_CONFIG_CNTL_1_Vangogh_BASE_IDX 1 #define mmVGT_TF_MEMORY_BASE_HI_Vangogh 0x2261 @@ -258,6 +264,13 @@ MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec.bin"); MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec2.bin"); MODULE_FIRMWARE("amdgpu/cyan_skillfish2_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_6_ce.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_6_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_6_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_6_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_6_mec2.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_6_rlc.bin"); + MODULE_FIRMWARE("amdgpu/gc_10_3_7_ce.bin"); MODULE_FIRMWARE("amdgpu/gc_10_3_7_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_10_3_7_me.bin"); @@ -3415,6 +3428,32 @@ static const struct soc15_reg_golden golden_settings_gc_10_0_cyan_skillfish[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000) }; +static const struct soc15_reg_golden golden_settings_gc_10_3_6[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x00000044), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x00000044), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x000001ff, 0x00000020), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0xffffff7f, 0x00010020), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00100000) +}; + static const struct soc15_reg_golden golden_settings_gc_10_3_7[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x000000e4), @@ -3678,6 +3717,11 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_10_0_cyan_skillfish, (const u32)ARRAY_SIZE(golden_settings_gc_10_0_cyan_skillfish)); break; + case IP_VERSION(10, 3, 6): + soc15_program_register_sequence(adev, + golden_settings_gc_10_3_6, + (const u32)ARRAY_SIZE(golden_settings_gc_10_3_6)); + break; case IP_VERSION(10, 3, 7): soc15_program_register_sequence(adev, golden_settings_gc_10_3_7, @@ -3871,6 +3915,7 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 7): adev->gfx.cp_fw_write_wait = true; @@ -3993,6 +4038,9 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) case IP_VERSION(10, 3, 3): chip_name = "yellow_carp"; break; + case IP_VERSION(10, 3, 6): + chip_name = "gc_10_3_6"; + break; case IP_VERSION(10, 1, 3): case IP_VERSION(10, 1, 4): if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) @@ -4598,6 +4646,7 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 7): adev->gfx.config.max_hw_contexts = 8; @@ -4736,6 +4785,7 @@ static int gfx_v10_0_sw_init(void *handle) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 7): adev->gfx.me.num_me = 1; @@ -4975,7 +5025,8 @@ static void gfx_v10_0_setup_rb(struct amdgpu_device *adev) for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { bitmap = i * adev->gfx.config.max_sh_per_se + j; if (((adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) || - (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3))) && + (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3)) || + (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 6))) && ((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1)) continue; gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); @@ -6249,6 +6300,7 @@ static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 7): tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, @@ -6387,6 +6439,7 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 7): WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, 0); @@ -6402,6 +6455,7 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 7): WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, @@ -6501,6 +6555,7 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid); tmp &= 0xffffff00; @@ -7231,6 +7286,7 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev) break; case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 7): return true; default: @@ -7266,6 +7322,7 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 7): /* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */ @@ -7587,6 +7644,7 @@ static int gfx_v10_0_soft_reset(void *handle) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY_Sienna_Cichlid)) grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, @@ -7654,6 +7712,21 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) preempt_enable(); clock = clock_lo | (clock_hi << 32ULL); break; + case IP_VERSION(10, 3, 6): + preempt_disable(); + clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6); + clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6); + hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6); + /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over + * roughly every 42 seconds. + */ + if (hi_check != clock_hi) { + clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6); + clock_hi = hi_check; + } + preempt_enable(); + clock = clock_lo | (clock_hi << 32ULL); + break; default: preempt_disable(); clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER); @@ -7719,6 +7792,7 @@ static int gfx_v10_0_early_init(void *handle) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 7): adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_Sienna_Cichlid; @@ -7781,6 +7855,7 @@ static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data); @@ -7817,6 +7892,7 @@ static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data); break; @@ -8271,6 +8347,7 @@ static void gfx_v10_cntl_power_gating(struct amdgpu_device *adev, bool enable) switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 6): data = 0x4E20 & RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK_Vangogh; WREG32_SOC15(GC, 0, mmRLC_PG_DELAY_3, data); break; @@ -8339,6 +8416,7 @@ static int gfx_v10_0_set_powergating_state(void *handle, break; case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 6): gfx_v10_cntl_pg(adev, enable); amdgpu_gfx_off_ctrl(adev, enable); break; @@ -8365,6 +8443,7 @@ static int gfx_v10_0_set_clockgating_state(void *handle, case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): gfx_v10_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); @@ -9478,6 +9557,7 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 7): adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs; @@ -9572,8 +9652,9 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { bitmap = i * adev->gfx.config.max_sh_per_se + j; if (((adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) || - (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3)) || - (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 7))) && + (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3)) || + (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 6)) || + (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 7))) && ((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1)) continue; mask = 1; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 55fa660e35f4..1d65c1fbc4ec 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1151,6 +1151,7 @@ struct drm_amdgpu_info_video_caps { #define AMDGPU_FAMILY_NV 143 /* Navi10 */ #define AMDGPU_FAMILY_VGH 144 /* Van Gogh */ #define AMDGPU_FAMILY_YC 146 /* Yellow Carp */ +#define AMDGPU_FAMILY_GC_10_3_6 149 /* GC 10.3.6 */ #define AMDGPU_FAMILY_GC_10_3_7 151 /* GC 10.3.7 */ #if defined(__cplusplus) -- cgit v1.2.3-59-g8ed1b From 93b71801a8274cd9511557faf04365a5de487197 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 22 Feb 2022 09:06:54 -0500 Subject: KVM: PPC: reserve capability 210 for KVM_CAP_PPC_AIL_MODE_3 Add KVM_CAP_PPC_AIL_MODE_3 to advertise the capability to set the AIL resource mode to 3 with the H_SET_MODE hypercall. This capability differs between processor types and KVM types (PR, HV, Nested HV), and affects guest-visible behaviour. QEMU will implement a cap-ail-mode-3 to control this behaviour[1], and use the KVM CAP if available to determine KVM support[2]. Reviewed-by: Fabiano Rosas Signed-off-by: Nicholas Piggin Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 14 ++++++++++++++ include/uapi/linux/kvm.h | 1 + tools/include/uapi/linux/kvm.h | 1 + 3 files changed, 16 insertions(+) (limited to 'include/uapi') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index a4267104db50..9954568c7eab 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6997,6 +6997,20 @@ indicated by the fd to the VM this is called on. This is intended to support intra-host migration of VMs between userspace VMMs, upgrading the VMM process without interrupting the guest. +7.30 KVM_CAP_PPC_AIL_MODE_3 +------------------------------- + +:Capability: KVM_CAP_PPC_AIL_MODE_3 +:Architectures: ppc +:Type: vm + +This capability indicates that the kernel supports the mode 3 setting for the +"Address Translation Mode on Interrupt" aka "Alternate Interrupt Location" +resource that is controlled with the H_SET_MODE hypercall. + +This capability allows a guest kernel to use a better-performance mode for +handling interrupts and system calls. + 8. Other capabilities. ====================== diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 5191b57e1562..507ee1f2aa96 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1134,6 +1134,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_GPA_BITS 207 #define KVM_CAP_XSAVE2 208 #define KVM_CAP_SYS_ATTRIBUTES 209 +#define KVM_CAP_PPC_AIL_MODE_3 210 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 5191b57e1562..507ee1f2aa96 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1134,6 +1134,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_GPA_BITS 207 #define KVM_CAP_XSAVE2 208 #define KVM_CAP_SYS_ATTRIBUTES 209 +#define KVM_CAP_PPC_AIL_MODE_3 210 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3-59-g8ed1b