aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
authorZhenyu Wang <zhenyuw@linux.intel.com>2020-02-20 16:23:37 +0800
committerZhenyu Wang <zhenyuw@linux.intel.com>2020-02-20 16:23:37 +0800
commitc95baf12f5077419db01313ab61c2aac007d40cd (patch)
tree8c2aed3b89aecfb100b0546b601b7c7ae513a974 /drivers/gpu/drm/amd/amdgpu
parentdrm/i915/gvt: remove unused vblank_done completion (diff)
parentdrm/i915/dp: Add all tiled and port sync conns to modeset (diff)
downloadwireguard-linux-c95baf12f5077419db01313ab61c2aac007d40cd.tar.xz
wireguard-linux-c95baf12f5077419db01313ab61c2aac007d40cd.zip
Merge drm-intel-next-queued into gvt-next
Backmerge to pull in https://patchwork.freedesktop.org/patch/353621/?series=73544&rev=1 Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Kconfig2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h157
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c121
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c214
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c416
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c251
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c249
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c347
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c127
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c244
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c126
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c326
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c159
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c566
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_df.h65
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c220
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c217
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c312
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c38
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c211
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h51
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c70
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c211
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h64
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c124
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c70
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c443
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c84
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h101
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c74
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c1113
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c821
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h114
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c697
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c226
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c99
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c48
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_test.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c379
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c152
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h48
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c43
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c223
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h94
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c143
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c450
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h38
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c52
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c100
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/arct_reg_init.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v1_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v2_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_dp.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_i2c.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c177
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c38
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c38
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v6_0.c44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c38
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v1_7.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v3_6.c302
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c375
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c79
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c1971
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c978
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c85
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c286
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c63
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c64
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c661
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c586
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c827
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c641
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c241
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c1068
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c82
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c384
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_ih.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c233
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.c151
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v10_0.c107
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v11_0.c441
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v12_0.c83
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v3_1.c88
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c347
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c72
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c114
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dma.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_ih.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c273
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.h18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15_common.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_0.c37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_0.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_1.c276
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_1.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v4_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c592
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c624
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c915
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.c63
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c69
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.h1
190 files changed, 17068 insertions, 8398 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 2e98c016cb47..9375e7f12420 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0-only
+# SPDX-License-Identifier: MIT
config DRM_AMDGPU_SI
bool "Enable amdgpu support for SI parts"
depends on DRM_AMDGPU
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 00962a659009..c2bbcdd9c875 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -53,8 +53,9 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
- amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
- amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o smu_v11_0_i2c.o
+ amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
+ amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
+ amdgpu_umc.o smu_v11_0_i2c.o
amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o
@@ -67,7 +68,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce
amdgpu-y += \
vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o navi14_reg_init.o \
- arct_reg_init.o navi12_reg_init.o
+ arct_reg_init.o navi12_reg_init.o mxgpu_nv.o
# add DF block
amdgpu-y += \
@@ -83,7 +84,7 @@ amdgpu-y += \
# add UMC block
amdgpu-y += \
- umc_v6_1.o
+ umc_v6_1.o umc_v6_0.o
# add IH block
amdgpu-y += \
@@ -119,6 +120,7 @@ amdgpu-y += \
amdgpu_rlc.o \
gfx_v8_0.o \
gfx_v9_0.o \
+ gfx_v9_4.o \
gfx_v10_0.o
# add async DMA block
@@ -146,12 +148,16 @@ amdgpu-y += \
vce_v3_0.o \
vce_v4_0.o
-# add VCN block
+# add VCN and JPEG block
amdgpu-y += \
amdgpu_vcn.o \
vcn_v1_0.o \
vcn_v2_0.o \
- vcn_v2_5.o
+ vcn_v2_5.o \
+ amdgpu_jpeg.o \
+ jpeg_v1_0.o \
+ jpeg_v2_0.o \
+ jpeg_v2_5.o
# add ATHUB block
amdgpu-y += \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index bd37df5dd6d0..da3bcff61b97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -69,10 +69,12 @@
#include "amdgpu_uvd.h"
#include "amdgpu_vce.h"
#include "amdgpu_vcn.h"
+#include "amdgpu_jpeg.h"
#include "amdgpu_mn.h"
#include "amdgpu_gmc.h"
#include "amdgpu_gfx.h"
#include "amdgpu_sdma.h"
+#include "amdgpu_nbio.h"
#include "amdgpu_dm.h"
#include "amdgpu_virt.h"
#include "amdgpu_csa.h"
@@ -88,6 +90,7 @@
#include "amdgpu_mes.h"
#include "amdgpu_umc.h"
#include "amdgpu_mmhub.h"
+#include "amdgpu_df.h"
#define MAX_GPU_INSTANCE 16
@@ -106,6 +109,8 @@ struct amdgpu_mgpu_info
uint32_t num_apu;
};
+#define AMDGPU_MAX_TIMEOUT_PARAM_LENGTH 256
+
/*
* Modules parameters.
*/
@@ -122,6 +127,7 @@ extern int amdgpu_disp_priority;
extern int amdgpu_hw_i2c;
extern int amdgpu_pcie_gen2;
extern int amdgpu_msi;
+extern char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH];
extern int amdgpu_dpm;
extern int amdgpu_fw_load_type;
extern int amdgpu_aspm;
@@ -135,6 +141,7 @@ extern int amdgpu_vm_fragment_size;
extern int amdgpu_vm_fault_stop;
extern int amdgpu_vm_debug;
extern int amdgpu_vm_update_mode;
+extern int amdgpu_exp_hw_support;
extern int amdgpu_dc;
extern int amdgpu_sched_jobs;
extern int amdgpu_sched_hw_submission;
@@ -146,11 +153,7 @@ extern uint amdgpu_sdma_phase_quantum;
extern char *amdgpu_disable_cu;
extern char *amdgpu_virtual_display;
extern uint amdgpu_pp_feature_mask;
-extern int amdgpu_ngg;
-extern int amdgpu_prim_buf_per_se;
-extern int amdgpu_pos_buf_per_se;
-extern int amdgpu_cntl_sb_buf_per_se;
-extern int amdgpu_param_buf_per_se;
+extern uint amdgpu_force_long_training;
extern int amdgpu_job_hang_limit;
extern int amdgpu_lbpw;
extern int amdgpu_compute_multipipe;
@@ -167,6 +170,12 @@ extern int amdgpu_mcbp;
extern int amdgpu_discovery;
extern int amdgpu_mes;
extern int amdgpu_noretry;
+extern int amdgpu_force_asic_type;
+#ifdef CONFIG_HSA_AMD
+extern int sched_policy;
+#else
+static const int sched_policy = KFD_SCHED_POLICY_HWS;
+#endif
#ifdef CONFIG_DRM_AMDGPU_SI
extern int amdgpu_si_support;
@@ -283,6 +292,9 @@ struct amdgpu_ip_block_version {
const struct amd_ip_funcs *funcs;
};
+#define HW_REV(_Major, _Minor, _Rev) \
+ ((((uint32_t) (_Major)) << 16) | ((uint32_t) (_Minor) << 8) | ((uint32_t) (_Rev)))
+
struct amdgpu_ip_block {
struct amdgpu_ip_block_status status;
const struct amdgpu_ip_block_version *version;
@@ -425,7 +437,6 @@ struct amdgpu_fpriv {
};
int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
-int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev);
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned size, struct amdgpu_ib *ib);
@@ -477,7 +488,6 @@ struct amdgpu_cs_parser {
uint64_t bytes_moved_vis_threshold;
uint64_t bytes_moved;
uint64_t bytes_moved_vis;
- struct amdgpu_bo_list_entry *evictable;
/* user fence */
struct amdgpu_bo_list_entry uf_entry;
@@ -580,6 +590,8 @@ struct amdgpu_asic_funcs {
bool (*need_reset_on_init)(struct amdgpu_device *adev);
/* PCIe replay counter */
uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev);
+ /* device supports BACO */
+ bool (*supports_baco)(struct amdgpu_device *adev);
};
/*
@@ -624,6 +636,10 @@ struct amdgpu_fw_vram_usage {
u64 size;
struct amdgpu_bo *reserved_bo;
void *va;
+
+ /* GDDR6 training support flag.
+ */
+ bool mem_train_support;
};
/*
@@ -644,91 +660,11 @@ typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t);
typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t);
-
-/*
- * amdgpu nbio functions
- *
- */
-struct nbio_hdp_flush_reg {
- u32 ref_and_mask_cp0;
- u32 ref_and_mask_cp1;
- u32 ref_and_mask_cp2;
- u32 ref_and_mask_cp3;
- u32 ref_and_mask_cp4;
- u32 ref_and_mask_cp5;
- u32 ref_and_mask_cp6;
- u32 ref_and_mask_cp7;
- u32 ref_and_mask_cp8;
- u32 ref_and_mask_cp9;
- u32 ref_and_mask_sdma0;
- u32 ref_and_mask_sdma1;
- u32 ref_and_mask_sdma2;
- u32 ref_and_mask_sdma3;
- u32 ref_and_mask_sdma4;
- u32 ref_and_mask_sdma5;
- u32 ref_and_mask_sdma6;
- u32 ref_and_mask_sdma7;
-};
-
struct amdgpu_mmio_remap {
u32 reg_offset;
resource_size_t bus_addr;
};
-struct amdgpu_nbio_funcs {
- const struct nbio_hdp_flush_reg *hdp_flush_reg;
- u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev);
- u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev);
- u32 (*get_pcie_index_offset)(struct amdgpu_device *adev);
- u32 (*get_pcie_data_offset)(struct amdgpu_device *adev);
- u32 (*get_rev_id)(struct amdgpu_device *adev);
- void (*mc_access_enable)(struct amdgpu_device *adev, bool enable);
- void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
- u32 (*get_memsize)(struct amdgpu_device *adev);
- void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance,
- bool use_doorbell, int doorbell_index, int doorbell_size);
- void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell,
- int doorbell_index, int instance);
- void (*enable_doorbell_aperture)(struct amdgpu_device *adev,
- bool enable);
- void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev,
- bool enable);
- void (*ih_doorbell_range)(struct amdgpu_device *adev,
- bool use_doorbell, int doorbell_index);
- void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
- bool enable);
- void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev,
- bool enable);
- void (*get_clockgating_state)(struct amdgpu_device *adev,
- u32 *flags);
- void (*ih_control)(struct amdgpu_device *adev);
- void (*init_registers)(struct amdgpu_device *adev);
- void (*detect_hw_virt)(struct amdgpu_device *adev);
- void (*remap_hdp_registers)(struct amdgpu_device *adev);
-};
-
-struct amdgpu_df_funcs {
- void (*sw_init)(struct amdgpu_device *adev);
- void (*enable_broadcast_mode)(struct amdgpu_device *adev,
- bool enable);
- u32 (*get_fb_channel_number)(struct amdgpu_device *adev);
- u32 (*get_hbm_channel_number)(struct amdgpu_device *adev);
- void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
- bool enable);
- void (*get_clockgating_state)(struct amdgpu_device *adev,
- u32 *flags);
- void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
- bool enable);
- int (*pmc_start)(struct amdgpu_device *adev, uint64_t config,
- int is_enable);
- int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config,
- int is_disable);
- void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config,
- uint64_t *count);
- uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val);
- void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val,
- uint32_t ficadl_val, uint32_t ficadh_val);
-};
/* Define the HW IP blocks will be used in driver , add more if necessary */
enum amd_hw_ip_block_type {
GC_HWIP = 1,
@@ -748,6 +684,7 @@ enum amd_hw_ip_block_type {
MP1_HWIP,
UVD_HWIP,
VCN_HWIP = UVD_HWIP,
+ JPEG_HWIP = VCN_HWIP,
VCE_HWIP,
DF_HWIP,
DCE_HWIP,
@@ -813,6 +750,7 @@ struct amdgpu_device {
uint8_t *bios;
uint32_t bios_size;
struct amdgpu_bo *stolen_vga_memory;
+ struct amdgpu_bo *discovery_memory;
uint32_t bios_scratch_reg_offset;
uint32_t bios_scratch[AMDGPU_BIOS_NUM_SCRATCH];
@@ -921,6 +859,12 @@ struct amdgpu_device {
u32 cg_flags;
u32 pg_flags;
+ /* nbio */
+ struct amdgpu_nbio nbio;
+
+ /* mmhub */
+ struct amdgpu_mmhub mmhub;
+
/* gfx */
struct amdgpu_gfx gfx;
@@ -936,6 +880,9 @@ struct amdgpu_device {
/* vcn */
struct amdgpu_vcn vcn;
+ /* jpeg */
+ struct amdgpu_jpeg jpeg;
+
/* firmwares */
struct amdgpu_firmware firmware;
@@ -961,6 +908,9 @@ struct amdgpu_device {
bool enable_mes;
struct amdgpu_mes mes;
+ /* df */
+ struct amdgpu_df df;
+
struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM];
int num_ip_blocks;
struct mutex mn_lock;
@@ -974,10 +924,6 @@ struct amdgpu_device {
/* soc15 register offset based on ip, instance and segment */
uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
- const struct amdgpu_nbio_funcs *nbio_funcs;
- const struct amdgpu_df_funcs *df_funcs;
- const struct amdgpu_mmhub_funcs *mmhub_funcs;
-
/* delayed work_func for deferring clockgating during resume */
struct delayed_work delayed_init_work;
@@ -1006,11 +952,11 @@ struct amdgpu_device {
struct mutex lock_reset;
struct amdgpu_doorbell_index doorbell_index;
+ struct mutex notifier_lock;
+
int asic_reset_res;
struct work_struct xgmi_reset_work;
- bool in_baco_reset;
-
long gfx_timeout;
long sdma_timeout;
long video_timeout;
@@ -1018,6 +964,14 @@ struct amdgpu_device {
uint64_t unique_id;
uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS];
+
+ /* device pstate */
+ int pstate;
+ /* enable runtime pm on the device */
+ bool runpm;
+
+ bool pm_sysfs_en;
+ bool ucode_sysfs_en;
};
static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
@@ -1032,6 +986,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
void amdgpu_device_fini(struct amdgpu_device *adev);
int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
+void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
+ uint32_t *buf, size_t size, bool write);
uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
uint32_t acc_flags);
void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
@@ -1053,10 +1009,14 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define AMDGPU_REGS_IDX (1<<0)
#define AMDGPU_REGS_NO_KIQ (1<<1)
+#define AMDGPU_REGS_KIQ (1<<2)
#define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
#define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ)
+#define RREG32_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_KIQ)
+#define WREG32_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_KIQ)
+
#define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
#define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
@@ -1151,6 +1111,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
#define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev))
#define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev)))
+#define amdgpu_asic_supports_baco(adev) (adev)->asic_funcs->supports_baco((adev))
+
#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter));
/* Common functions */
@@ -1167,9 +1129,12 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
const u32 *registers,
const u32 array_size);
-bool amdgpu_device_is_px(struct drm_device *dev);
+bool amdgpu_device_supports_boco(struct drm_device *dev);
+bool amdgpu_device_supports_baco(struct drm_device *dev);
bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
struct amdgpu_device *peer_adev);
+int amdgpu_device_baco_enter(struct drm_device *dev);
+int amdgpu_device_baco_exit(struct drm_device *dev);
/* atpx handler */
#if defined(CONFIG_VGA_SWITCHEROO)
@@ -1207,8 +1172,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv);
void amdgpu_driver_postclose_kms(struct drm_device *dev,
struct drm_file *file_priv);
int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
-int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon);
-int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon);
+int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
+int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe);
int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe);
void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
index 82155ac3288a..12247a32f9ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -527,7 +527,7 @@ static int acp_set_powergating_state(void *handle,
enum amd_powergating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = state == AMD_PG_STATE_GATE ? true : false;
+ bool enable = (state == AMD_PG_STATE_GATE);
if (adev->powerplay.pp_funcs &&
adev->powerplay.pp_funcs->set_powergating_by_smu)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 07eb29885372..8609287620ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -63,45 +63,10 @@ void amdgpu_amdkfd_fini(void)
void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
{
- const struct kfd2kgd_calls *kfd2kgd;
-
- switch (adev->asic_type) {
-#ifdef CONFIG_DRM_AMDGPU_CIK
- case CHIP_KAVERI:
- case CHIP_HAWAII:
- kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
- break;
-#endif
- case CHIP_CARRIZO:
- case CHIP_TONGA:
- case CHIP_FIJI:
- case CHIP_POLARIS10:
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- case CHIP_VEGAM:
- kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
- break;
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_RAVEN:
- kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
- break;
- case CHIP_ARCTURUS:
- kfd2kgd = amdgpu_amdkfd_arcturus_get_functions();
- break;
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
- kfd2kgd = amdgpu_amdkfd_gfx_10_0_get_functions();
- break;
- default:
- dev_info(adev->dev, "kfd not supported on this ASIC\n");
- return;
- }
+ bool vf = amdgpu_sriov_vf(adev);
adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev,
- adev->pdev, kfd2kgd);
+ adev->pdev, adev->asic_type, vf);
if (adev->kfd.dev)
amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
@@ -165,14 +130,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
adev->gfx.mec.queue_bitmap,
KGD_MAX_QUEUES);
- /* remove the KIQ bit as well */
- if (adev->gfx.kiq.ring.sched.ready)
- clear_bit(amdgpu_gfx_mec_queue_to_bit(adev,
- adev->gfx.kiq.ring.me - 1,
- adev->gfx.kiq.ring.pipe,
- adev->gfx.kiq.ring.queue),
- gpu_resources.queue_bitmap);
-
/* According to linux/bitmap.h we shouldn't use bitmap_clear if
* nbits is not compile time constant
*/
@@ -202,7 +159,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
adev->doorbell_index.last_non_cp;
}
- kgd2kfd_device_init(adev->kfd.dev, &gpu_resources);
+ kgd2kfd_device_init(adev->kfd.dev, adev->ddev, &gpu_resources);
}
}
@@ -656,15 +613,9 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
- if (is_support_sw_smu(adev))
- smu_switch_power_profile(&adev->smu,
- PP_SMC_POWER_PROFILE_COMPUTE,
- !idle);
- else if (adev->powerplay.pp_funcs &&
- adev->powerplay.pp_funcs->switch_power_profile)
- amdgpu_dpm_switch_power_profile(adev,
- PP_SMC_POWER_PROFILE_COMPUTE,
- !idle);
+ amdgpu_dpm_switch_power_profile(adev,
+ PP_SMC_POWER_PROFILE_COMPUTE,
+ !idle);
}
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
@@ -677,6 +628,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
return false;
}
+int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+ if (adev->family == AMDGPU_FAMILY_AI) {
+ int i;
+
+ for (i = 0; i < adev->num_vmhubs; i++)
+ amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
+ } else {
+ amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
+ }
+
+ return 0;
+}
+
+int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+ uint32_t flush_type = 0;
+ bool all_hub = false;
+
+ if (adev->gmc.xgmi.num_physical_nodes &&
+ adev->asic_type == CHIP_VEGA20)
+ flush_type = 2;
+
+ if (adev->family == AMDGPU_FAMILY_AI)
+ all_hub = true;
+
+ return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
+}
+
bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
@@ -709,38 +692,14 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm)
return 0;
}
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
-{
- return NULL;
-}
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
-{
- return NULL;
-}
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
-{
- return NULL;
-}
-
-struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void)
-{
- return NULL;
-}
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void)
-{
- return NULL;
-}
-
struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev,
- const struct kfd2kgd_calls *f2g)
+ unsigned int asic_type, bool vf)
{
return NULL;
}
bool kgd2kfd_device_init(struct kfd_dev *kfd,
+ struct drm_device *ddev,
const struct kgd2kfd_shared_resources *gpu_resources)
{
return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index e519df3fd2b6..47b0f2957d1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -57,7 +57,7 @@ struct kgd_mem {
unsigned int mapped_to_gpu_memory;
uint64_t va;
- uint32_t mapping_flags;
+ uint32_t alloc_flags;
atomic_t invalid;
struct amdkfd_process_info *process_info;
@@ -136,12 +136,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t *ib_cmd, uint32_t ib_len);
void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void);
-struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void);
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void);
+int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid);
+int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid);
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
@@ -179,10 +175,17 @@ uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd);
uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd);
uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src);
+/* Read user wptr from a specified user address space with page fault
+ * disabled. The memory must be pinned and mapped to the hardware when
+ * this is called in hqd_load functions, so it should never fault in
+ * the first place. This resolves a circular lock dependency involving
+ * four locks, including the DQM lock and mmap_sem.
+ */
#define read_user_wptr(mmptr, wptr, dst) \
({ \
bool valid = false; \
if ((mmptr) && (wptr)) { \
+ pagefault_disable(); \
if ((mmptr) == current->mm) { \
valid = !get_user((dst), (wptr)); \
} else if (current->mm == NULL) { \
@@ -190,6 +193,7 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s
valid = !get_user((dst), (wptr)); \
unuse_mm(mmptr); \
} \
+ pagefault_enable(); \
} \
valid; \
})
@@ -240,8 +244,9 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
int kgd2kfd_init(void);
void kgd2kfd_exit(void);
struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev,
- const struct kfd2kgd_calls *f2g);
+ unsigned int asic_type, bool vf);
bool kgd2kfd_device_init(struct kfd_dev *kfd,
+ struct drm_device *ddev,
const struct kgd2kfd_shared_resources *gpu_resources);
void kgd2kfd_device_exit(struct kfd_dev *kfd);
void kgd2kfd_suspend(struct kfd_dev *kfd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index c79aaebeeaf0..4bcc175a149d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -19,10 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
-
-#undef pr_fmt
-#define pr_fmt(fmt) "kfd2kgd: " fmt
-
#include <linux/module.h>
#include <linux/fdtable.h>
#include <linux/uaccess.h>
@@ -50,6 +46,8 @@
#include "soc15.h"
#include "soc15d.h"
#include "amdgpu_amdkfd_gfx_v9.h"
+#include "gfxhub_v1_0.h"
+#include "mmhub_v9_4.h"
#define HQD_N_REGS 56
#define DUMP_REG(addr) do { \
@@ -69,62 +67,60 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
return (struct v9_sdma_mqd *)mqd;
}
-static uint32_t get_sdma_base_addr(struct amdgpu_device *adev,
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
unsigned int engine_id,
unsigned int queue_id)
{
- uint32_t base[8] = {
- SOC15_REG_OFFSET(SDMA0, 0,
- mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
- SOC15_REG_OFFSET(SDMA1, 0,
- mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL,
- SOC15_REG_OFFSET(SDMA2, 0,
- mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL,
- SOC15_REG_OFFSET(SDMA3, 0,
- mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL,
- SOC15_REG_OFFSET(SDMA4, 0,
- mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL,
- SOC15_REG_OFFSET(SDMA5, 0,
- mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL,
- SOC15_REG_OFFSET(SDMA6, 0,
- mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL,
- SOC15_REG_OFFSET(SDMA7, 0,
- mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL
- };
- uint32_t retval;
-
- retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL -
- mmSDMA0_RLC0_RB_CNTL);
-
- pr_debug("sdma base address: 0x%x\n", retval);
-
- return retval;
-}
+ uint32_t sdma_engine_reg_base = 0;
+ uint32_t sdma_rlc_reg_offset;
-static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev,
- u32 instance, u32 offset)
-{
- switch (instance) {
+ switch (engine_id) {
+ default:
+ dev_warn(adev->dev,
+ "Invalid sdma engine id (%d), using engine id 0\n",
+ engine_id);
+ /* fall through */
case 0:
- return (adev->reg_offset[SDMA0_HWIP][0][0] + offset);
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+ mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
+ break;
case 1:
- return (adev->reg_offset[SDMA1_HWIP][0][1] + offset);
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
+ mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL;
+ break;
case 2:
- return (adev->reg_offset[SDMA2_HWIP][0][1] + offset);
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0,
+ mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL;
+ break;
case 3:
- return (adev->reg_offset[SDMA3_HWIP][0][1] + offset);
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0,
+ mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL;
+ break;
case 4:
- return (adev->reg_offset[SDMA4_HWIP][0][1] + offset);
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA4, 0,
+ mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL;
+ break;
case 5:
- return (adev->reg_offset[SDMA5_HWIP][0][1] + offset);
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA5, 0,
+ mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL;
+ break;
case 6:
- return (adev->reg_offset[SDMA6_HWIP][0][1] + offset);
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA6, 0,
+ mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL;
+ break;
case 7:
- return (adev->reg_offset[SDMA7_HWIP][0][1] + offset);
- default:
+ sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA7, 0,
+ mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL;
break;
}
- return 0;
+
+ sdma_rlc_reg_offset = sdma_engine_reg_base
+ + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
+
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+ queue_id, sdma_rlc_reg_offset);
+
+ return sdma_rlc_reg_offset;
}
static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
@@ -132,70 +128,67 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v9_sdma_mqd *m;
- uint32_t sdma_base_addr, sdmax_gfx_context_cntl;
+ uint32_t sdma_rlc_reg_offset;
unsigned long end_jiffies;
uint32_t data;
uint64_t data64;
uint64_t __user *wptr64 = (uint64_t __user *)wptr;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
m->sdma_queue_id);
- sdmax_gfx_context_cntl = sdma_v4_0_get_reg_offset(adev,
- m->sdma_engine_id, mmSDMA0_GFX_CONTEXT_CNTL);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
end_jiffies = msecs_to_jiffies(2000) + jiffies;
while (true) {
- data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
- if (time_after(jiffies, end_jiffies))
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
+ }
usleep_range(500, 1000);
}
- data = RREG32(sdmax_gfx_context_cntl);
- data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
- RESUME_CTX, 0);
- WREG32(sdmax_gfx_context_cntl, data);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
m->sdmax_rlcx_doorbell_offset);
data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
ENABLE, 1);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
m->sdmax_rlcx_rb_rptr_hi);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
if (read_user_wptr(mm, wptr64, data64)) {
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
lower_32_bits(data64));
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
upper_32_bits(data64));
} else {
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
m->sdmax_rlcx_rb_rptr);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
m->sdmax_rlcx_rb_rptr_hi);
}
- WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
m->sdmax_rlcx_rb_base_hi);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
m->sdmax_rlcx_rb_rptr_addr_lo);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
m->sdmax_rlcx_rb_rptr_addr_hi);
data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
RB_ENABLE, 1);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
return 0;
}
@@ -205,7 +198,8 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
uint32_t (**dump)[2], uint32_t *n_regs)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
- uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id);
+ uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+ engine_id, queue_id);
uint32_t i = 0, reg;
#undef HQD_N_REGS
#define HQD_N_REGS (19+6+7+10)
@@ -215,15 +209,15 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
return -ENOMEM;
for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
- DUMP_REG(sdma_base_addr + reg);
+ DUMP_REG(sdma_rlc_reg_offset + reg);
for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
- DUMP_REG(sdma_base_addr + reg);
+ DUMP_REG(sdma_rlc_reg_offset + reg);
for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
- DUMP_REG(sdma_base_addr + reg);
+ DUMP_REG(sdma_rlc_reg_offset + reg);
for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
- DUMP_REG(sdma_base_addr + reg);
+ DUMP_REG(sdma_rlc_reg_offset + reg);
WARN_ON_ONCE(i != HQD_N_REGS);
*n_regs = i;
@@ -235,14 +229,14 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v9_sdma_mqd *m;
- uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_reg_offset;
uint32_t sdma_rlc_rb_cntl;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
m->sdma_queue_id);
- sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+ sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
return true;
@@ -255,44 +249,63 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v9_sdma_mqd *m;
- uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_reg_offset;
uint32_t temp;
unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
m->sdma_queue_id);
- temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
while (true) {
- temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
- if (time_after(jiffies, end_jiffies))
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
+ }
usleep_range(500, 1000);
}
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
- RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
- m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
m->sdmax_rlcx_rb_rptr_hi =
- RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI);
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
return 0;
}
-static const struct kfd2kgd_calls kfd2kgd = {
+static void kgd_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+ pr_err("trying to set page table base for wrong VMID %u\n",
+ vmid);
+ return;
+ }
+
+ mmhub_v9_4_setup_vm_pt_regs(adev, vmid, page_table_base);
+
+ gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
+}
+
+const struct kfd2kgd_calls arcturus_kfd2kgd = {
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
.init_interrupts = kgd_gfx_v9_init_interrupts,
.hqd_load = kgd_gfx_v9_hqd_load,
+ .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
.hqd_sdma_load = kgd_hqd_sdma_load,
.hqd_dump = kgd_gfx_v9_hqd_dump,
.hqd_sdma_dump = kgd_hqd_sdma_dump,
@@ -304,20 +317,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
.address_watch_execute = kgd_gfx_v9_address_watch_execute,
.wave_control_execute = kgd_gfx_v9_wave_control_execute,
.address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
- .get_atc_vmid_pasid_mapping_pasid =
- kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid,
- .get_atc_vmid_pasid_mapping_valid =
- kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid,
- .set_scratch_backing_va = kgd_gfx_v9_set_scratch_backing_va,
+ .get_atc_vmid_pasid_mapping_info =
+ kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.get_tile_config = kgd_gfx_v9_get_tile_config,
- .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
- .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
- .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid,
+ .set_vm_context_page_table_base = kgd_set_vm_context_page_table_base,
.get_hive_id = amdgpu_amdkfd_get_hive_id,
};
-
-struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void)
-{
- return (struct kfd2kgd_calls *)&kfd2kgd;
-}
-
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index d10f483f5e27..a7b17c8deb00 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -19,18 +19,9 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#undef pr_fmt
-#define pr_fmt(fmt) "kfd2kgd: " fmt
-
-#include <linux/module.h>
-#include <linux/fdtable.h>
-#include <linux/uaccess.h>
-#include <linux/firmware.h>
#include <linux/mmu_context.h>
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
-#include "amdgpu_ucode.h"
-#include "soc15_hw_ip.h"
#include "gc/gc_10_1_0_offset.h"
#include "gc/gc_10_1_0_sh_mask.h"
#include "navi10_enum.h"
@@ -42,6 +33,7 @@
#include "v10_structs.h"
#include "nv.h"
#include "nvd.h"
+#include "gfxhub_v2_0.h"
enum hqd_dequeue_request_type {
NO_ACTION = 0,
@@ -50,63 +42,6 @@ enum hqd_dequeue_request_type {
SAVE_WAVES
};
-/*
- * Register access functions
- */
-
-static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
- uint32_t sh_mem_config,
- uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
- uint32_t sh_mem_bases);
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
- unsigned int vmid);
-static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
-static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr,
- uint32_t wptr_shift, uint32_t wptr_mask,
- struct mm_struct *mm);
-static int kgd_hqd_dump(struct kgd_dev *kgd,
- uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs);
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
- uint32_t __user *wptr, struct mm_struct *mm);
-static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
- uint32_t engine_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs);
-static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
- uint32_t pipe_id, uint32_t queue_id);
-static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
- enum kfd_preempt_type reset_type,
- unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id);
-static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
- unsigned int utimeout);
-#if 0
-static uint32_t get_watch_base_addr(struct amdgpu_device *adev);
-#endif
-static int kgd_address_watch_disable(struct kgd_dev *kgd);
-static int kgd_address_watch_execute(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- uint32_t cntl_val,
- uint32_t addr_hi,
- uint32_t addr_lo);
-static int kgd_wave_control_execute(struct kgd_dev *kgd,
- uint32_t gfx_index_val,
- uint32_t sq_cmd);
-static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- unsigned int reg_offset);
-
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
- uint8_t vmid);
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
- uint8_t vmid);
-static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
- uint64_t page_table_base);
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
-
/* Because of REG_GET_FIELD() being used, we put this function in the
* asic specific file.
*/
@@ -139,37 +74,6 @@ static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
return 0;
}
-static const struct kfd2kgd_calls kfd2kgd = {
- .program_sh_mem_settings = kgd_program_sh_mem_settings,
- .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
- .init_interrupts = kgd_init_interrupts,
- .hqd_load = kgd_hqd_load,
- .hqd_sdma_load = kgd_hqd_sdma_load,
- .hqd_dump = kgd_hqd_dump,
- .hqd_sdma_dump = kgd_hqd_sdma_dump,
- .hqd_is_occupied = kgd_hqd_is_occupied,
- .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
- .hqd_destroy = kgd_hqd_destroy,
- .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
- .address_watch_disable = kgd_address_watch_disable,
- .address_watch_execute = kgd_address_watch_execute,
- .wave_control_execute = kgd_wave_control_execute,
- .address_watch_get_offset = kgd_address_watch_get_offset,
- .get_atc_vmid_pasid_mapping_pasid =
- get_atc_vmid_pasid_mapping_pasid,
- .get_atc_vmid_pasid_mapping_valid =
- get_atc_vmid_pasid_mapping_valid,
- .invalidate_tlbs = invalidate_tlbs,
- .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
- .set_vm_context_page_table_base = set_vm_context_page_table_base,
- .get_tile_config = amdgpu_amdkfd_get_tile_config,
-};
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions()
-{
- return (struct kfd2kgd_calls *)&kfd2kgd;
-}
-
static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
{
return (struct amdgpu_device *)kgd;
@@ -203,13 +107,13 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
lock_srbm(kgd, mec, pipe, queue_id, 0);
}
-static uint32_t get_queue_mask(struct amdgpu_device *adev,
+static uint64_t get_queue_mask(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id)
{
- unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe +
- queue_id) & 31;
+ unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
+ queue_id;
- return ((uint32_t)1) << bit;
+ return 1ull << bit;
}
static void release_queue(struct kgd_dev *kgd)
@@ -250,11 +154,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
ATC_VMID0_PASID_MAPPING__VALID_MASK;
pr_debug("pasid 0x%x vmid %d, reg value %x\n", pasid, vmid, pasid_mapping);
- /*
- * need to do this twice, once for gfx and once for mmhub
- * for ATC add 16 to VMID for mmhub, for IH different registers.
- * ATC_VMID0..15 registers are separate from ATC_VMID16..31.
- */
pr_debug("ATHUB, reg %x\n", SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid);
WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid,
@@ -306,11 +205,11 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
return 0;
}
-static uint32_t get_sdma_base_addr(struct amdgpu_device *adev,
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
unsigned int engine_id,
unsigned int queue_id)
{
- uint32_t base[2] = {
+ uint32_t sdma_engine_reg_base[2] = {
SOC15_REG_OFFSET(SDMA0, 0,
mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
/* On gfx10, mmSDMA1_xxx registers are defined NOT based
@@ -322,12 +221,12 @@ static uint32_t get_sdma_base_addr(struct amdgpu_device *adev,
SOC15_REG_OFFSET(SDMA1, 0,
mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL
};
- uint32_t retval;
- retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL -
- mmSDMA0_RLC0_RB_CNTL);
+ uint32_t retval = sdma_engine_reg_base[engine_id]
+ + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
- pr_debug("sdma base address: 0x%x\n", retval);
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+ queue_id, retval);
return retval;
}
@@ -369,21 +268,6 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
acquire_queue(kgd, pipe_id, queue_id);
- /* HIQ is set during driver init period with vmid set to 0*/
- if (m->cp_hqd_vmid == 0) {
- uint32_t value, mec, pipe;
-
- mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
- pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
-
- pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
- mec, pipe, queue_id);
- value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
- value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
- ((mec << 5) | (pipe << 3) | queue_id | 0x80));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
- }
-
/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
mqd_hqd = &m->cp_mqd_base_addr_lo;
hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
@@ -433,9 +317,10 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
lower_32_bits((uint64_t)wptr));
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
upper_32_bits((uint64_t)wptr));
- pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, get_queue_mask(adev, pipe_id, queue_id));
+ pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__,
+ (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
- get_queue_mask(adev, pipe_id, queue_id));
+ (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
}
/* Start the EOP fetcher */
@@ -451,6 +336,59 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
return 0;
}
+static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t doorbell_off)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct v10_compute_mqd *m;
+ uint32_t mec, pipe;
+ int r;
+
+ m = get_mqd(mqd);
+
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+ mec, pipe, queue_id);
+
+ spin_lock(&adev->gfx.kiq.ring_lock);
+ r = amdgpu_ring_alloc(kiq_ring, 7);
+ if (r) {
+ pr_err("Failed to alloc KIQ (%d).\n", r);
+ goto out_unlock;
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(queue_id) |
+ PACKET3_MAP_QUEUES_PIPE(pipe) |
+ PACKET3_MAP_QUEUES_ME((mec - 1)) |
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
+ amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
+ amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
+ amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
+ amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
+ amdgpu_ring_commit(kiq_ring);
+
+out_unlock:
+ spin_unlock(&adev->gfx.kiq.ring_lock);
+ release_queue(kgd);
+
+ return r;
+}
+
static int kgd_hqd_dump(struct kgd_dev *kgd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
@@ -488,72 +426,67 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_sdma_mqd *m;
- uint32_t sdma_base_addr, sdmax_gfx_context_cntl;
+ uint32_t sdma_rlc_reg_offset;
unsigned long end_jiffies;
uint32_t data;
uint64_t data64;
uint64_t __user *wptr64 = (uint64_t __user *)wptr;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
m->sdma_queue_id);
- pr_debug("sdma load base addr %x for engine %d, queue %d\n", sdma_base_addr, m->sdma_engine_id, m->sdma_queue_id);
- sdmax_gfx_context_cntl = m->sdma_engine_id ?
- SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) :
- SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
end_jiffies = msecs_to_jiffies(2000) + jiffies;
while (true) {
- data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
- if (time_after(jiffies, end_jiffies))
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
+ }
usleep_range(500, 1000);
}
- data = RREG32(sdmax_gfx_context_cntl);
- data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
- RESUME_CTX, 0);
- WREG32(sdmax_gfx_context_cntl, data);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
m->sdmax_rlcx_doorbell_offset);
data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
ENABLE, 1);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
m->sdmax_rlcx_rb_rptr_hi);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
if (read_user_wptr(mm, wptr64, data64)) {
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
lower_32_bits(data64));
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
upper_32_bits(data64));
} else {
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
m->sdmax_rlcx_rb_rptr);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
m->sdmax_rlcx_rb_rptr_hi);
}
- WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
m->sdmax_rlcx_rb_base_hi);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
m->sdmax_rlcx_rb_rptr_addr_lo);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
m->sdmax_rlcx_rb_rptr_addr_hi);
data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
RB_ENABLE, 1);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
return 0;
}
@@ -563,28 +496,26 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
uint32_t (**dump)[2], uint32_t *n_regs)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
- uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id);
+ uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+ engine_id, queue_id);
uint32_t i = 0, reg;
#undef HQD_N_REGS
#define HQD_N_REGS (19+6+7+10)
- pr_debug("sdma dump engine id %d queue_id %d\n", engine_id, queue_id);
- pr_debug("sdma base addr %x\n", sdma_base_addr);
-
*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
- DUMP_REG(sdma_base_addr + reg);
+ DUMP_REG(sdma_rlc_reg_offset + reg);
for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
- DUMP_REG(sdma_base_addr + reg);
+ DUMP_REG(sdma_rlc_reg_offset + reg);
for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
- DUMP_REG(sdma_base_addr + reg);
+ DUMP_REG(sdma_rlc_reg_offset + reg);
for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
- DUMP_REG(sdma_base_addr + reg);
+ DUMP_REG(sdma_rlc_reg_offset + reg);
WARN_ON_ONCE(i != HQD_N_REGS);
*n_regs = i;
@@ -618,14 +549,14 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_sdma_mqd *m;
- uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_reg_offset;
uint32_t sdma_rlc_rb_cntl;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
m->sdma_queue_id);
- sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+ sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
return true;
@@ -746,122 +677,52 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v10_sdma_mqd *m;
- uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_reg_offset;
uint32_t temp;
unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
m->sdma_queue_id);
- temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
while (true) {
- temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
- if (time_after(jiffies, end_jiffies))
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
+ }
usleep_range(500, 1000);
}
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
- RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
- m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
m->sdmax_rlcx_rb_rptr_hi =
- RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI);
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
return 0;
}
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
- uint8_t vmid)
+static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+ uint8_t vmid, uint16_t *p_pasid)
{
- uint32_t reg;
+ uint32_t value;
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ vmid);
- return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-}
-
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
- uint8_t vmid)
-{
- uint32_t reg;
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
- reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
- + vmid);
- return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
-}
-
-static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
-{
- signed long r;
- uint32_t seq;
- struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-
- spin_lock(&adev->gfx.kiq.ring_lock);
- amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
- amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
- amdgpu_ring_write(ring,
- PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
- PACKET3_INVALIDATE_TLBS_PASID(pasid));
- amdgpu_fence_emit_polling(ring, &seq);
- amdgpu_ring_commit(ring);
- spin_unlock(&adev->gfx.kiq.ring_lock);
-
- r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
- if (r < 1) {
- DRM_ERROR("wait for kiq fence error: %ld.\n", r);
- return -ETIME;
- }
-
- return 0;
-}
-
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- int vmid;
- struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-
- if (amdgpu_emu_mode == 0 && ring->sched.ready)
- return invalidate_tlbs_with_kiq(adev, pasid);
-
- for (vmid = 0; vmid < 16; vmid++) {
- if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
- continue;
- if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
- if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
- == pasid) {
- amdgpu_gmc_flush_gpu_tlb(adev, vmid,
- AMDGPU_GFXHUB_0, 0);
- break;
- }
- }
- }
-
- return 0;
-}
-
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+ *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
- if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
- pr_err("non kfd vmid %d\n", vmid);
- return 0;
- }
-
- amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
- return 0;
+ return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}
static int kgd_address_watch_disable(struct kgd_dev *kgd)
@@ -914,7 +775,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint64_t page_table_base)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
- uint64_t base = page_table_base | AMDGPU_PTE_VALID;
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("trying to set page table base for wrong VMID %u\n",
@@ -922,18 +782,30 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
return;
}
- /* TODO: take advantage of per-process address space size. For
- * now, all processes share the same address space size, like
- * on GFX8 and older.
- */
- WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
-
- WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
- lower_32_bits(adev->vm_manager.max_pfn - 1));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
- upper_32_bits(adev->vm_manager.max_pfn - 1));
-
- WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
+ /* SDMA is on gfxhub as well for Navi1* series */
+ gfxhub_v2_0_setup_vm_pt_regs(adev, vmid, page_table_base);
}
+
+const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
+ .program_sh_mem_settings = kgd_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+ .init_interrupts = kgd_init_interrupts,
+ .hqd_load = kgd_hqd_load,
+ .hiq_mqd_load = kgd_hiq_mqd_load,
+ .hqd_sdma_load = kgd_hqd_sdma_load,
+ .hqd_dump = kgd_hqd_dump,
+ .hqd_sdma_dump = kgd_hqd_sdma_dump,
+ .hqd_is_occupied = kgd_hqd_is_occupied,
+ .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+ .hqd_destroy = kgd_hqd_destroy,
+ .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+ .address_watch_disable = kgd_address_watch_disable,
+ .address_watch_execute = kgd_address_watch_execute,
+ .wave_control_execute = kgd_wave_control_execute,
+ .address_watch_get_offset = kgd_address_watch_get_offset,
+ .get_atc_vmid_pasid_mapping_info =
+ get_atc_vmid_pasid_mapping_info,
+ .get_tile_config = amdgpu_amdkfd_get_tile_config,
+ .set_vm_context_page_table_base = set_vm_context_page_table_base,
+ .get_hive_id = amdgpu_amdkfd_get_hive_id,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 5f459bf5f622..8f052e98a3c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -20,8 +20,6 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <linux/fdtable.h>
-#include <linux/uaccess.h>
#include <linux/mmu_context.h>
#include "amdgpu.h"
@@ -86,65 +84,6 @@ union TCP_WATCH_CNTL_BITS {
float f32All;
};
-/*
- * Register access functions
- */
-
-static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
- uint32_t sh_mem_config, uint32_t sh_mem_ape1_base,
- uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
-
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
- unsigned int vmid);
-
-static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
-static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr,
- uint32_t wptr_shift, uint32_t wptr_mask,
- struct mm_struct *mm);
-static int kgd_hqd_dump(struct kgd_dev *kgd,
- uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs);
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
- uint32_t __user *wptr, struct mm_struct *mm);
-static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
- uint32_t engine_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs);
-static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
- uint32_t pipe_id, uint32_t queue_id);
-
-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
- enum kfd_preempt_type reset_type,
- unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id);
-static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
-static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
- unsigned int utimeout);
-static int kgd_address_watch_disable(struct kgd_dev *kgd);
-static int kgd_address_watch_execute(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- uint32_t cntl_val,
- uint32_t addr_hi,
- uint32_t addr_lo);
-static int kgd_wave_control_execute(struct kgd_dev *kgd,
- uint32_t gfx_index_val,
- uint32_t sq_cmd);
-static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- unsigned int reg_offset);
-
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
- uint8_t vmid);
-
-static void set_scratch_backing_va(struct kgd_dev *kgd,
- uint64_t va, uint32_t vmid);
-static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
- uint64_t page_table_base);
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
-static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd);
-
/* Because of REG_GET_FIELD() being used, we put this function in the
* asic specific file.
*/
@@ -170,37 +109,6 @@ static int get_tile_config(struct kgd_dev *kgd,
return 0;
}
-static const struct kfd2kgd_calls kfd2kgd = {
- .program_sh_mem_settings = kgd_program_sh_mem_settings,
- .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
- .init_interrupts = kgd_init_interrupts,
- .hqd_load = kgd_hqd_load,
- .hqd_sdma_load = kgd_hqd_sdma_load,
- .hqd_dump = kgd_hqd_dump,
- .hqd_sdma_dump = kgd_hqd_sdma_dump,
- .hqd_is_occupied = kgd_hqd_is_occupied,
- .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
- .hqd_destroy = kgd_hqd_destroy,
- .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
- .address_watch_disable = kgd_address_watch_disable,
- .address_watch_execute = kgd_address_watch_execute,
- .wave_control_execute = kgd_wave_control_execute,
- .address_watch_get_offset = kgd_address_watch_get_offset,
- .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
- .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
- .set_scratch_backing_va = set_scratch_backing_va,
- .get_tile_config = get_tile_config,
- .set_vm_context_page_table_base = set_vm_context_page_table_base,
- .invalidate_tlbs = invalidate_tlbs,
- .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
- .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
-};
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
-{
- return (struct kfd2kgd_calls *)&kfd2kgd;
-}
-
static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
{
return (struct amdgpu_device *)kgd;
@@ -303,14 +211,15 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
return 0;
}
-static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
+static inline uint32_t get_sdma_rlc_reg_offset(struct cik_sdma_rlc_registers *m)
{
uint32_t retval;
retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
- pr_debug("sdma base address: 0x%x\n", retval);
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n",
+ m->sdma_engine_id, m->sdma_queue_id, retval);
return retval;
}
@@ -413,60 +322,52 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct cik_sdma_rlc_registers *m;
unsigned long end_jiffies;
- uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_reg_offset;
uint32_t data;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(m);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
m->sdma_rlc_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
end_jiffies = msecs_to_jiffies(2000) + jiffies;
while (true) {
- data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
- if (time_after(jiffies, end_jiffies))
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
+ }
usleep_range(500, 1000);
}
- if (m->sdma_engine_id) {
- data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL);
- data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL,
- RESUME_CTX, 0);
- WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data);
- } else {
- data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL);
- data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
- RESUME_CTX, 0);
- WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
- }
data = REG_SET_FIELD(m->sdma_rlc_doorbell, SDMA0_RLC0_DOORBELL,
ENABLE, 1);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdma_rlc_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+ m->sdma_rlc_rb_rptr);
if (read_user_wptr(mm, wptr, data))
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, data);
else
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
m->sdma_rlc_rb_rptr);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_VIRTUAL_ADDR,
m->sdma_rlc_virtual_addr);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
m->sdma_rlc_rb_base_hi);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
m->sdma_rlc_rb_rptr_addr_lo);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
m->sdma_rlc_rb_rptr_addr_hi);
data = REG_SET_FIELD(m->sdma_rlc_rb_cntl, SDMA0_RLC0_RB_CNTL,
RB_ENABLE, 1);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
return 0;
}
@@ -524,13 +425,13 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct cik_sdma_rlc_registers *m;
- uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_reg_offset;
uint32_t sdma_rlc_rb_cntl;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(m);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
- sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+ sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
return true;
@@ -645,32 +546,34 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct cik_sdma_rlc_registers *m;
- uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_reg_offset;
uint32_t temp;
unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(m);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
- temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
while (true) {
- temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
- if (time_after(jiffies, end_jiffies))
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
+ }
usleep_range(500, 1000);
}
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
- RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
- m->sdma_rlc_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
+ m->sdma_rlc_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
return 0;
}
@@ -758,24 +661,16 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset];
}
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
- uint8_t vmid)
+static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+ uint8_t vmid, uint16_t *p_pasid)
{
- uint32_t reg;
+ uint32_t value;
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
- return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-}
-
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
- uint8_t vmid)
-{
- uint32_t reg;
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+ value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+ *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
- reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
- return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+ return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}
static void set_scratch_backing_va(struct kgd_dev *kgd,
@@ -801,45 +696,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
lower_32_bits(page_table_base));
}
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- int vmid;
- unsigned int tmp;
-
- if (adev->in_gpu_reset)
- return -EIO;
-
- for (vmid = 0; vmid < 16; vmid++) {
- if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
- continue;
-
- tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
- if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
- (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
- WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
- RREG32(mmVM_INVALIDATE_RESPONSE);
- break;
- }
- }
-
- return 0;
-}
-
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
- if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
- pr_err("non kfd vmid\n");
- return 0;
- }
-
- WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
- RREG32(mmVM_INVALIDATE_RESPONSE);
- return 0;
-}
-
/**
* read_vmid_from_vmfault_reg - read vmid from register
*
@@ -855,3 +711,26 @@ static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd)
return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
}
+
+const struct kfd2kgd_calls gfx_v7_kfd2kgd = {
+ .program_sh_mem_settings = kgd_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+ .init_interrupts = kgd_init_interrupts,
+ .hqd_load = kgd_hqd_load,
+ .hqd_sdma_load = kgd_hqd_sdma_load,
+ .hqd_dump = kgd_hqd_dump,
+ .hqd_sdma_dump = kgd_hqd_sdma_dump,
+ .hqd_is_occupied = kgd_hqd_is_occupied,
+ .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+ .hqd_destroy = kgd_hqd_destroy,
+ .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+ .address_watch_disable = kgd_address_watch_disable,
+ .address_watch_execute = kgd_address_watch_execute,
+ .wave_control_execute = kgd_wave_control_execute,
+ .address_watch_get_offset = kgd_address_watch_get_offset,
+ .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info,
+ .set_scratch_backing_va = set_scratch_backing_va,
+ .get_tile_config = get_tile_config,
+ .set_vm_context_page_table_base = set_vm_context_page_table_base,
+ .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 6d2f61449606..19a10db93d68 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -20,9 +20,6 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <linux/module.h>
-#include <linux/fdtable.h>
-#include <linux/uaccess.h>
#include <linux/mmu_context.h>
#include "amdgpu.h"
@@ -44,62 +41,6 @@ enum hqd_dequeue_request_type {
RESET_WAVES
};
-/*
- * Register access functions
- */
-
-static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
- uint32_t sh_mem_config,
- uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
- uint32_t sh_mem_bases);
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
- unsigned int vmid);
-static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
-static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr,
- uint32_t wptr_shift, uint32_t wptr_mask,
- struct mm_struct *mm);
-static int kgd_hqd_dump(struct kgd_dev *kgd,
- uint32_t pipe_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs);
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
- uint32_t __user *wptr, struct mm_struct *mm);
-static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
- uint32_t engine_id, uint32_t queue_id,
- uint32_t (**dump)[2], uint32_t *n_regs);
-static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
- uint32_t pipe_id, uint32_t queue_id);
-static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
- enum kfd_preempt_type reset_type,
- unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id);
-static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
- unsigned int utimeout);
-static int kgd_address_watch_disable(struct kgd_dev *kgd);
-static int kgd_address_watch_execute(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- uint32_t cntl_val,
- uint32_t addr_hi,
- uint32_t addr_lo);
-static int kgd_wave_control_execute(struct kgd_dev *kgd,
- uint32_t gfx_index_val,
- uint32_t sq_cmd);
-static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- unsigned int reg_offset);
-
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
- uint8_t vmid);
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
- uint8_t vmid);
-static void set_scratch_backing_va(struct kgd_dev *kgd,
- uint64_t va, uint32_t vmid);
-static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
- uint64_t page_table_base);
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
-
/* Because of REG_GET_FIELD() being used, we put this function in the
* asic specific file.
*/
@@ -125,38 +66,6 @@ static int get_tile_config(struct kgd_dev *kgd,
return 0;
}
-static const struct kfd2kgd_calls kfd2kgd = {
- .program_sh_mem_settings = kgd_program_sh_mem_settings,
- .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
- .init_interrupts = kgd_init_interrupts,
- .hqd_load = kgd_hqd_load,
- .hqd_sdma_load = kgd_hqd_sdma_load,
- .hqd_dump = kgd_hqd_dump,
- .hqd_sdma_dump = kgd_hqd_sdma_dump,
- .hqd_is_occupied = kgd_hqd_is_occupied,
- .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
- .hqd_destroy = kgd_hqd_destroy,
- .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
- .address_watch_disable = kgd_address_watch_disable,
- .address_watch_execute = kgd_address_watch_execute,
- .wave_control_execute = kgd_wave_control_execute,
- .address_watch_get_offset = kgd_address_watch_get_offset,
- .get_atc_vmid_pasid_mapping_pasid =
- get_atc_vmid_pasid_mapping_pasid,
- .get_atc_vmid_pasid_mapping_valid =
- get_atc_vmid_pasid_mapping_valid,
- .set_scratch_backing_va = set_scratch_backing_va,
- .get_tile_config = get_tile_config,
- .set_vm_context_page_table_base = set_vm_context_page_table_base,
- .invalidate_tlbs = invalidate_tlbs,
- .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
-};
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
-{
- return (struct kfd2kgd_calls *)&kfd2kgd;
-}
-
static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
{
return (struct amdgpu_device *)kgd;
@@ -260,13 +169,15 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
return 0;
}
-static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m)
+static inline uint32_t get_sdma_rlc_reg_offset(struct vi_sdma_mqd *m)
{
uint32_t retval;
retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
- pr_debug("sdma base address: 0x%x\n", retval);
+
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n",
+ m->sdma_engine_id, m->sdma_queue_id, retval);
return retval;
}
@@ -398,59 +309,51 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct vi_sdma_mqd *m;
unsigned long end_jiffies;
- uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_reg_offset;
uint32_t data;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(m);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
end_jiffies = msecs_to_jiffies(2000) + jiffies;
while (true) {
- data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
- if (time_after(jiffies, end_jiffies))
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
+ }
usleep_range(500, 1000);
}
- if (m->sdma_engine_id) {
- data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL);
- data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL,
- RESUME_CTX, 0);
- WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data);
- } else {
- data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL);
- data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
- RESUME_CTX, 0);
- WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
- }
data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
ENABLE, 1);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+ m->sdmax_rlcx_rb_rptr);
if (read_user_wptr(mm, wptr, data))
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, data);
else
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
m->sdmax_rlcx_rb_rptr);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_VIRTUAL_ADDR,
m->sdmax_rlcx_virtual_addr);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
m->sdmax_rlcx_rb_base_hi);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
m->sdmax_rlcx_rb_rptr_addr_lo);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
m->sdmax_rlcx_rb_rptr_addr_hi);
data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
RB_ENABLE, 1);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
return 0;
}
@@ -517,13 +420,13 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct vi_sdma_mqd *m;
- uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_reg_offset;
uint32_t sdma_rlc_rb_cntl;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(m);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
- sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+ sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
return true;
@@ -641,54 +544,48 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct vi_sdma_mqd *m;
- uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_reg_offset;
uint32_t temp;
unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(m);
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
- temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
while (true) {
- temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
- if (time_after(jiffies, end_jiffies))
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
+ }
usleep_range(500, 1000);
}
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
- RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
- m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
return 0;
}
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
- uint8_t vmid)
+static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+ uint8_t vmid, uint16_t *p_pasid)
{
- uint32_t reg;
+ uint32_t value;
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
- return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-}
-
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
- uint8_t vmid)
-{
- uint32_t reg;
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+ value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+ *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
- reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
- return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+ return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}
static int kgd_address_watch_disable(struct kgd_dev *kgd)
@@ -760,41 +657,25 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
lower_32_bits(page_table_base));
}
-static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- int vmid;
- unsigned int tmp;
-
- if (adev->in_gpu_reset)
- return -EIO;
-
- for (vmid = 0; vmid < 16; vmid++) {
- if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
- continue;
-
- tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
- if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
- (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
- WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
- RREG32(mmVM_INVALIDATE_RESPONSE);
- break;
- }
- }
-
- return 0;
-}
-
-static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
- if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
- pr_err("non kfd vmid %d\n", vmid);
- return -EINVAL;
- }
-
- WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
- RREG32(mmVM_INVALIDATE_RESPONSE);
- return 0;
-}
+const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
+ .program_sh_mem_settings = kgd_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+ .init_interrupts = kgd_init_interrupts,
+ .hqd_load = kgd_hqd_load,
+ .hqd_sdma_load = kgd_hqd_sdma_load,
+ .hqd_dump = kgd_hqd_dump,
+ .hqd_sdma_dump = kgd_hqd_sdma_dump,
+ .hqd_is_occupied = kgd_hqd_is_occupied,
+ .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+ .hqd_destroy = kgd_hqd_destroy,
+ .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+ .address_watch_disable = kgd_address_watch_disable,
+ .address_watch_execute = kgd_address_watch_execute,
+ .wave_control_execute = kgd_wave_control_execute,
+ .address_watch_get_offset = kgd_address_watch_get_offset,
+ .get_atc_vmid_pasid_mapping_info =
+ get_atc_vmid_pasid_mapping_info,
+ .set_scratch_backing_va = set_scratch_backing_va,
+ .get_tile_config = get_tile_config,
+ .set_vm_context_page_table_base = set_vm_context_page_table_base,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index e262f2ac07a3..8562afe5b761 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -19,17 +19,10 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
-
-#define pr_fmt(fmt) "kfd2kgd: " fmt
-
-#include <linux/module.h>
-#include <linux/fdtable.h>
-#include <linux/uaccess.h>
#include <linux/mmu_context.h>
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
-#include "soc15_hw_ip.h"
#include "gc/gc_9_0_offset.h"
#include "gc/gc_9_0_sh_mask.h"
#include "vega10_enum.h"
@@ -47,11 +40,7 @@
#include "soc15d.h"
#include "mmhub_v1_0.h"
#include "gfxhub_v1_0.h"
-#include "gmc_v9_0.h"
-
-#define V9_PIPE_PER_MEC (4)
-#define V9_QUEUES_PER_PIPE_MEC (8)
enum hqd_dequeue_request_type {
NO_ACTION = 0,
@@ -114,13 +103,13 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
lock_srbm(kgd, mec, pipe, queue_id, 0);
}
-static uint32_t get_queue_mask(struct amdgpu_device *adev,
+static uint64_t get_queue_mask(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id)
{
- unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe +
- queue_id) & 31;
+ unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
+ queue_id;
- return ((uint32_t)1) << bit;
+ return 1ull << bit;
}
static void release_queue(struct kgd_dev *kgd)
@@ -226,22 +215,21 @@ int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
return 0;
}
-static uint32_t get_sdma_base_addr(struct amdgpu_device *adev,
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
unsigned int engine_id,
unsigned int queue_id)
{
- uint32_t base[2] = {
+ uint32_t sdma_engine_reg_base[2] = {
SOC15_REG_OFFSET(SDMA0, 0,
mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
SOC15_REG_OFFSET(SDMA1, 0,
mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL
};
- uint32_t retval;
+ uint32_t retval = sdma_engine_reg_base[engine_id]
+ + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
- retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL -
- mmSDMA0_RLC0_RB_CNTL);
-
- pr_debug("sdma base address: 0x%x\n", retval);
+ pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+ queue_id, retval);
return retval;
}
@@ -270,21 +258,6 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
acquire_queue(kgd, pipe_id, queue_id);
- /* HIQ is set during driver init period with vmid set to 0*/
- if (m->cp_hqd_vmid == 0) {
- uint32_t value, mec, pipe;
-
- mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
- pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
-
- pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
- mec, pipe, queue_id);
- value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
- value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
- ((mec << 5) | (pipe << 3) | queue_id | 0x80));
- WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
- }
-
/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
mqd_hqd = &m->cp_mqd_base_addr_lo;
hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
@@ -335,7 +308,7 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
upper_32_bits((uintptr_t)wptr));
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
- get_queue_mask(adev, pipe_id, queue_id));
+ (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
}
/* Start the EOP fetcher */
@@ -351,6 +324,59 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
return 0;
}
+int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t doorbell_off)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ struct v9_mqd *m;
+ uint32_t mec, pipe;
+ int r;
+
+ m = get_mqd(mqd);
+
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+ mec, pipe, queue_id);
+
+ spin_lock(&adev->gfx.kiq.ring_lock);
+ r = amdgpu_ring_alloc(kiq_ring, 7);
+ if (r) {
+ pr_err("Failed to alloc KIQ (%d).\n", r);
+ goto out_unlock;
+ }
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(queue_id) |
+ PACKET3_MAP_QUEUES_PIPE(pipe) |
+ PACKET3_MAP_QUEUES_ME((mec - 1)) |
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
+ amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
+ amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
+ amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
+ amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
+ amdgpu_ring_commit(kiq_ring);
+
+out_unlock:
+ spin_unlock(&adev->gfx.kiq.ring_lock);
+ release_queue(kgd);
+
+ return r;
+}
+
int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
@@ -388,71 +414,67 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v9_sdma_mqd *m;
- uint32_t sdma_base_addr, sdmax_gfx_context_cntl;
+ uint32_t sdma_rlc_reg_offset;
unsigned long end_jiffies;
uint32_t data;
uint64_t data64;
uint64_t __user *wptr64 = (uint64_t __user *)wptr;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
m->sdma_queue_id);
- sdmax_gfx_context_cntl = m->sdma_engine_id ?
- SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) :
- SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
end_jiffies = msecs_to_jiffies(2000) + jiffies;
while (true) {
- data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
- if (time_after(jiffies, end_jiffies))
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
+ }
usleep_range(500, 1000);
}
- data = RREG32(sdmax_gfx_context_cntl);
- data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
- RESUME_CTX, 0);
- WREG32(sdmax_gfx_context_cntl, data);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
m->sdmax_rlcx_doorbell_offset);
data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
ENABLE, 1);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
m->sdmax_rlcx_rb_rptr_hi);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
if (read_user_wptr(mm, wptr64, data64)) {
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
lower_32_bits(data64));
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
upper_32_bits(data64));
} else {
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
m->sdmax_rlcx_rb_rptr);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
m->sdmax_rlcx_rb_rptr_hi);
}
- WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
m->sdmax_rlcx_rb_base_hi);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
m->sdmax_rlcx_rb_rptr_addr_lo);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
m->sdmax_rlcx_rb_rptr_addr_hi);
data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
RB_ENABLE, 1);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
return 0;
}
@@ -462,7 +484,8 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
uint32_t (**dump)[2], uint32_t *n_regs)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
- uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id);
+ uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+ engine_id, queue_id);
uint32_t i = 0, reg;
#undef HQD_N_REGS
#define HQD_N_REGS (19+6+7+10)
@@ -472,15 +495,15 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
return -ENOMEM;
for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
- DUMP_REG(sdma_base_addr + reg);
+ DUMP_REG(sdma_rlc_reg_offset + reg);
for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
- DUMP_REG(sdma_base_addr + reg);
+ DUMP_REG(sdma_rlc_reg_offset + reg);
for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
- DUMP_REG(sdma_base_addr + reg);
+ DUMP_REG(sdma_rlc_reg_offset + reg);
for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
- DUMP_REG(sdma_base_addr + reg);
+ DUMP_REG(sdma_rlc_reg_offset + reg);
WARN_ON_ONCE(i != HQD_N_REGS);
*n_regs = i;
@@ -514,14 +537,14 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v9_sdma_mqd *m;
- uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_reg_offset;
uint32_t sdma_rlc_rb_cntl;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
m->sdma_queue_id);
- sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+ sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
return true;
@@ -584,151 +607,52 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct v9_sdma_mqd *m;
- uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_reg_offset;
uint32_t temp;
unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
m->sdma_queue_id);
- temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
while (true) {
- temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
- if (time_after(jiffies, end_jiffies))
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("SDMA RLC not idle in %s\n", __func__);
return -ETIME;
+ }
usleep_range(500, 1000);
}
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
- RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+ WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
- m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
m->sdmax_rlcx_rb_rptr_hi =
- RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI);
+ RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
return 0;
}
-bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
- uint8_t vmid)
+bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+ uint8_t vmid, uint16_t *p_pasid)
{
- uint32_t reg;
+ uint32_t value;
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ vmid);
- return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-}
+ *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
-uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
- uint8_t vmid)
-{
- uint32_t reg;
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
- reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
- + vmid);
- return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
-}
-
-static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,
- uint32_t flush_type)
-{
- signed long r;
- uint32_t seq;
- struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-
- spin_lock(&adev->gfx.kiq.ring_lock);
- amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
- amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
- amdgpu_ring_write(ring,
- PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
- PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
- PACKET3_INVALIDATE_TLBS_PASID(pasid) |
- PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
- amdgpu_fence_emit_polling(ring, &seq);
- amdgpu_ring_commit(ring);
- spin_unlock(&adev->gfx.kiq.ring_lock);
-
- r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
- if (r < 1) {
- DRM_ERROR("wait for kiq fence error: %ld.\n", r);
- return -ETIME;
- }
-
- return 0;
-}
-
-int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- int vmid, i;
- struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
- uint32_t flush_type = 0;
-
- if (adev->in_gpu_reset)
- return -EIO;
- if (adev->gmc.xgmi.num_physical_nodes &&
- adev->asic_type == CHIP_VEGA20)
- flush_type = 2;
-
- if (ring->sched.ready)
- return invalidate_tlbs_with_kiq(adev, pasid, flush_type);
-
- for (vmid = 0; vmid < 16; vmid++) {
- if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
- continue;
- if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
- if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
- == pasid) {
- for (i = 0; i < adev->num_vmhubs; i++)
- amdgpu_gmc_flush_gpu_tlb(adev, vmid,
- i, flush_type);
- break;
- }
- }
- }
-
- return 0;
-}
-
-int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- int i;
-
- if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
- pr_err("non kfd vmid %d\n", vmid);
- return 0;
- }
-
- /* Use legacy mode tlb invalidation.
- *
- * Currently on Raven the code below is broken for anything but
- * legacy mode due to a MMHUB power gating problem. A workaround
- * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
- * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
- * bit.
- *
- * TODO 1: agree on the right set of invalidation registers for
- * KFD use. Use the last one for now. Invalidate both GC and
- * MMHUB.
- *
- * TODO 2: support range-based invalidation, requires kfg2kgd
- * interface change
- */
- for (i = 0; i < adev->num_vmhubs; i++)
- amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
-
- return 0;
+ return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
}
int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd)
@@ -777,17 +701,8 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,
return 0;
}
-void kgd_gfx_v9_set_scratch_backing_va(struct kgd_dev *kgd,
- uint64_t va, uint32_t vmid)
-{
- /* No longer needed on GFXv9. The scratch base address is
- * passed to the shader by the CP. It's the user mode driver's
- * responsibility.
- */
-}
-
-void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
- uint64_t page_table_base)
+static void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd,
+ uint32_t vmid, uint64_t page_table_base)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
@@ -797,25 +712,17 @@ void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmi
return;
}
- /* TODO: take advantage of per-process address space size. For
- * now, all processes share the same address space size, like
- * on GFX8 and older.
- */
- if (adev->asic_type == CHIP_ARCTURUS) {
- /* Two MMHUBs */
- mmhub_v9_4_setup_vm_pt_regs(adev, 0, vmid, page_table_base);
- mmhub_v9_4_setup_vm_pt_regs(adev, 1, vmid, page_table_base);
- } else
- mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
+ mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
}
-static const struct kfd2kgd_calls kfd2kgd = {
+const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
.init_interrupts = kgd_gfx_v9_init_interrupts,
.hqd_load = kgd_gfx_v9_hqd_load,
+ .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
.hqd_sdma_load = kgd_hqd_sdma_load,
.hqd_dump = kgd_gfx_v9_hqd_dump,
.hqd_sdma_dump = kgd_hqd_sdma_dump,
@@ -827,19 +734,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
.address_watch_execute = kgd_gfx_v9_address_watch_execute,
.wave_control_execute = kgd_gfx_v9_wave_control_execute,
.address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
- .get_atc_vmid_pasid_mapping_pasid =
- kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid,
- .get_atc_vmid_pasid_mapping_valid =
- kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid,
- .set_scratch_backing_va = kgd_gfx_v9_set_scratch_backing_va,
+ .get_atc_vmid_pasid_mapping_info =
+ kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.get_tile_config = kgd_gfx_v9_get_tile_config,
.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
- .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
- .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid,
.get_hive_id = amdgpu_amdkfd_get_hive_id,
};
-
-struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
-{
- return (struct kfd2kgd_calls *)&kfd2kgd;
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
index 26d8879bff9d..63d3e6683dfe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -33,6 +33,9 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
uint32_t wptr_shift, uint32_t wptr_mask,
struct mm_struct *mm);
+int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t doorbell_off);
int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs);
@@ -55,15 +58,7 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,
unsigned int watch_point_id,
unsigned int reg_offset);
-bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
- uint8_t vmid);
-uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
- uint8_t vmid);
-void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
- uint64_t page_table_base);
-void kgd_gfx_v9_set_scratch_backing_va(struct kgd_dev *kgd,
- uint64_t va, uint32_t vmid);
-int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
-int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
+bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+ uint8_t vmid, uint16_t *p_pasid);
int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd,
struct tile_config *config);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 6d021ecc8d59..fa8ac9d19a7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -19,9 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
-
-#define pr_fmt(fmt) "kfd2kgd: " fmt
-
#include <linux/dma-buf.h>
#include <linux/list.h>
#include <linux/pagemap.h>
@@ -33,11 +30,6 @@
#include "amdgpu_amdkfd.h"
#include "amdgpu_dma_buf.h"
-/* Special VM and GART address alignment needed for VI pre-Fiji due to
- * a HW bug.
- */
-#define VI_BO_SIZE_ALIGN (0x8000)
-
/* BO flag to indicate a KFD userptr BO */
#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
@@ -93,7 +85,7 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
}
/* Set memory usage limits. Current, limits are
- * System (TTM + userptr) memory - 3/4th System RAM
+ * System (TTM + userptr) memory - 15/16th System RAM
* TTM memory - 3/8th System RAM
*/
void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
@@ -106,18 +98,31 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
mem *= si.mem_unit;
spin_lock_init(&kfd_mem_limit.mem_limit_lock);
- kfd_mem_limit.max_system_mem_limit = (mem >> 1) + (mem >> 2);
+ kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
(kfd_mem_limit.max_system_mem_limit >> 20),
(kfd_mem_limit.max_ttm_mem_limit >> 20));
}
+/* Estimate page table size needed to represent a given memory size
+ *
+ * With 4KB pages, we need one 8 byte PTE for each 4KB of memory
+ * (factor 512, >> 9). With 2MB pages, we need one 8 byte PTE for 2MB
+ * of memory (factor 256K, >> 18). ROCm user mode tries to optimize
+ * for 2MB pages for TLB efficiency. However, small allocations and
+ * fragmented system memory still need some 4KB pages. We choose a
+ * compromise that should work in most cases without reserving too
+ * much memory for page tables unnecessarily (factor 16K, >> 14).
+ */
+#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14)
+
static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 domain, bool sg)
{
+ uint64_t reserved_for_pt =
+ ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed;
- uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9;
int ret = 0;
acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
@@ -349,11 +354,44 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
int ret;
- ret = amdgpu_vm_update_directories(adev, vm);
+ ret = amdgpu_vm_update_pdes(adev, vm, false);
if (ret)
return ret;
- return amdgpu_sync_fence(NULL, sync, vm->last_update, false);
+ return amdgpu_sync_fence(sync, vm->last_update, false);
+}
+
+static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
+{
+ struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
+ bool coherent = mem->alloc_flags & ALLOC_MEM_FLAGS_COHERENT;
+ uint32_t mapping_flags;
+
+ mapping_flags = AMDGPU_VM_PAGE_READABLE;
+ if (mem->alloc_flags & ALLOC_MEM_FLAGS_WRITABLE)
+ mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
+ if (mem->alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE)
+ mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
+
+ switch (adev->asic_type) {
+ case CHIP_ARCTURUS:
+ if (mem->alloc_flags & ALLOC_MEM_FLAGS_VRAM) {
+ if (bo_adev == adev)
+ mapping_flags |= coherent ?
+ AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
+ else
+ mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ } else {
+ mapping_flags |= coherent ?
+ AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+ }
+ break;
+ default:
+ mapping_flags |= coherent ?
+ AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+ }
+
+ return amdgpu_gem_va_map_flags(adev, mapping_flags);
}
/* add_bo_to_vm - Add a BO to a VM
@@ -404,8 +442,7 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
}
bo_va_entry->va = va;
- bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev,
- mem->mapping_flags);
+ bo_va_entry->pte_flags = get_pte_flags(adev, mem);
bo_va_entry->kgd_dev = (void *)adev;
list_add(&bo_va_entry->bo_list, list_bo_va);
@@ -481,8 +518,7 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
*
* Returns 0 for success, negative errno for errors.
*/
-static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
- uint64_t user_addr)
+static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
{
struct amdkfd_process_info *process_info = mem->process_info;
struct amdgpu_bo *bo = mem->bo;
@@ -586,7 +622,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
- false, &ctx->duplicates, true);
+ false, &ctx->duplicates);
if (!ret)
ctx->reserved = true;
else {
@@ -659,7 +695,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
}
ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
- false, &ctx->duplicates, true);
+ false, &ctx->duplicates);
if (!ret)
ctx->reserved = true;
else
@@ -714,7 +750,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
- amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false);
+ amdgpu_sync_fence(sync, bo_va->last_pt_update, false);
return 0;
}
@@ -733,7 +769,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
return ret;
}
- return amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false);
+ return amdgpu_sync_fence(sync, bo_va->last_pt_update, false);
}
static int map_bo_to_gpuvm(struct amdgpu_device *adev,
@@ -1079,10 +1115,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
uint64_t user_addr = 0;
struct amdgpu_bo *bo;
struct amdgpu_bo_param bp;
- int byte_align;
u32 domain, alloc_domain;
u64 alloc_flags;
- uint32_t mapping_flags;
int ret;
/*
@@ -1135,25 +1169,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
if ((*mem)->aql_queue)
size = size >> 1;
- /* Workaround for TLB bug on older VI chips */
- byte_align = (adev->family == AMDGPU_FAMILY_VI &&
- adev->asic_type != CHIP_FIJI &&
- adev->asic_type != CHIP_POLARIS10 &&
- adev->asic_type != CHIP_POLARIS11 &&
- adev->asic_type != CHIP_POLARIS12 &&
- adev->asic_type != CHIP_VEGAM) ?
- VI_BO_SIZE_ALIGN : 1;
-
- mapping_flags = AMDGPU_VM_PAGE_READABLE;
- if (flags & ALLOC_MEM_FLAGS_WRITABLE)
- mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
- if (flags & ALLOC_MEM_FLAGS_EXECUTABLE)
- mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
- if (flags & ALLOC_MEM_FLAGS_COHERENT)
- mapping_flags |= AMDGPU_VM_MTYPE_UC;
- else
- mapping_flags |= AMDGPU_VM_MTYPE_NC;
- (*mem)->mapping_flags = mapping_flags;
+ (*mem)->alloc_flags = flags;
amdgpu_sync_create(&(*mem)->sync);
@@ -1168,7 +1184,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
memset(&bp, 0, sizeof(bp));
bp.size = size;
- bp.byte_align = byte_align;
+ bp.byte_align = 1;
bp.domain = alloc_domain;
bp.flags = alloc_flags;
bp.type = bo_type;
@@ -1195,7 +1211,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
if (user_addr) {
- ret = init_user_pages(*mem, current->mm, user_addr);
+ ret = init_user_pages(*mem, user_addr);
if (ret)
goto allocate_init_user_pages_failed;
}
@@ -1626,9 +1642,10 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
INIT_LIST_HEAD(&(*mem)->bo_va_list);
mutex_init(&(*mem)->lock);
- (*mem)->mapping_flags =
- AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
- AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_NC;
+ (*mem)->alloc_flags =
+ ((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
+ ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT) |
+ ALLOC_MEM_FLAGS_WRITABLE | ALLOC_MEM_FLAGS_EXECUTABLE;
(*mem)->bo = amdgpu_bo_ref(bo);
(*mem)->va = va;
@@ -1657,10 +1674,10 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
struct mm_struct *mm)
{
struct amdkfd_process_info *process_info = mem->process_info;
- int invalid, evicted_bos;
+ int evicted_bos;
int r = 0;
- invalid = atomic_inc_return(&mem->invalid);
+ atomic_inc(&mem->invalid);
evicted_bos = atomic_inc_return(&process_info->evicted_bos);
if (evicted_bos == 1) {
/* First eviction, stop the queues */
@@ -1739,6 +1756,10 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
return ret;
}
+ /*
+ * FIXME: Cannot ignore the return code, must hold
+ * notifier_lock
+ */
amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
/* Mark the BO as valid unless it was invalidated
@@ -1797,8 +1818,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
}
/* Reserve all BOs and page tables for validation */
- ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates,
- true);
+ ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
WARN(!list_empty(&duplicates), "Duplicates should be empty");
if (ret)
goto out_free;
@@ -1996,7 +2016,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
}
ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
- false, &duplicate_save, true);
+ false, &duplicate_save);
if (ret) {
pr_debug("Memory eviction: TTM Reserve Failed. Try again\n");
goto ttm_reserve_fail;
@@ -2028,7 +2048,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
pr_debug("Memory eviction: Validate BOs failed. Try again\n");
goto validate_map_fail;
}
- ret = amdgpu_sync_fence(NULL, &sync_obj, bo->tbo.moving, false);
+ ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving, false);
if (ret) {
pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
goto validate_map_fail;
@@ -2109,6 +2129,7 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem
return -ENOMEM;
mutex_init(&(*mem)->lock);
+ INIT_LIST_HEAD(&(*mem)->bo_va_list);
(*mem)->bo = amdgpu_bo_ref(gws_bo);
(*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
(*mem)->process_info = process_info;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 1c9d40f97a9b..fdd52d86a4d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -338,17 +338,9 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device *
path_size += le16_to_cpu(path->usSize);
if (device_support & le16_to_cpu(path->usDeviceTag)) {
- uint8_t con_obj_id, con_obj_num, con_obj_type;
-
- con_obj_id =
+ uint8_t con_obj_id =
(le16_to_cpu(path->usConnObjectId) & OBJECT_ID_MASK)
>> OBJECT_ID_SHIFT;
- con_obj_num =
- (le16_to_cpu(path->usConnObjectId) & ENUM_ID_MASK)
- >> ENUM_ID_SHIFT;
- con_obj_type =
- (le16_to_cpu(path->usConnObjectId) &
- OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT;
/* Skip TV/CV support */
if ((le16_to_cpu(path->usDeviceTag) ==
@@ -373,15 +365,7 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device *
router.ddc_valid = false;
router.cd_valid = false;
for (j = 0; j < ((le16_to_cpu(path->usSize) - 8) / 2); j++) {
- uint8_t grph_obj_id, grph_obj_num, grph_obj_type;
-
- grph_obj_id =
- (le16_to_cpu(path->usGraphicObjIds[j]) &
- OBJECT_ID_MASK) >> OBJECT_ID_SHIFT;
- grph_obj_num =
- (le16_to_cpu(path->usGraphicObjIds[j]) &
- ENUM_ID_MASK) >> ENUM_ID_SHIFT;
- grph_obj_type =
+ uint8_t grph_obj_type =
(le16_to_cpu(path->usGraphicObjIds[j]) &
OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT;
@@ -2038,6 +2022,11 @@ int amdgpu_atombios_init(struct amdgpu_device *adev)
if (adev->is_atom_fw) {
amdgpu_atomfirmware_scratch_regs_init(adev);
amdgpu_atomfirmware_allocate_fb_scratch(adev);
+ ret = amdgpu_atomfirmware_get_mem_train_info(adev);
+ if (ret) {
+ DRM_ERROR("Failed to get mem train fb location.\n");
+ return ret;
+ }
} else {
amdgpu_atombios_scratch_regs_init(adev);
amdgpu_atombios_allocate_fb_scratch(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index daf687428cdb..58f9d8c3a17a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -27,6 +27,7 @@
#include "amdgpu_atomfirmware.h"
#include "atom.h"
#include "atombios.h"
+#include "soc15_hw_ip.h"
bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev)
{
@@ -120,65 +121,14 @@ union vram_info {
struct atom_vram_info_header_v2_3 v23;
struct atom_vram_info_header_v2_4 v24;
};
-/*
- * Return vram width from integrated system info table, if available,
- * or 0 if not.
- */
-int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev)
-{
- struct amdgpu_mode_info *mode_info = &adev->mode_info;
- int index;
- u16 data_offset, size;
- union igp_info *igp_info;
- union vram_info *vram_info;
- u32 mem_channel_number;
- u32 mem_channel_width;
- u8 frev, crev;
- if (adev->flags & AMD_IS_APU)
- index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
- integratedsysteminfo);
- else
- index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
- vram_info);
-
- /* get any igp specific overrides */
- if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size,
- &frev, &crev, &data_offset)) {
- if (adev->flags & AMD_IS_APU) {
- igp_info = (union igp_info *)
- (mode_info->atom_context->bios + data_offset);
- switch (crev) {
- case 11:
- mem_channel_number = igp_info->v11.umachannelnumber;
- /* channel width is 64 */
- return mem_channel_number * 64;
- default:
- return 0;
- }
- } else {
- vram_info = (union vram_info *)
- (mode_info->atom_context->bios + data_offset);
- switch (crev) {
- case 3:
- mem_channel_number = vram_info->v23.vram_module[0].channel_num;
- mem_channel_width = vram_info->v23.vram_module[0].channel_width;
- return mem_channel_number * (1 << mem_channel_width);
- case 4:
- mem_channel_number = vram_info->v24.vram_module[0].channel_num;
- mem_channel_width = vram_info->v24.vram_module[0].channel_width;
- return mem_channel_number * (1 << mem_channel_width);
- default:
- return 0;
- }
- }
- }
-
- return 0;
-}
+union vram_module {
+ struct atom_vram_module_v9 v9;
+ struct atom_vram_module_v10 v10;
+};
-static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev,
- int atom_mem_type)
+static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
+ int atom_mem_type)
{
int vram_type;
@@ -219,19 +169,25 @@ static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev,
return vram_type;
}
-/*
- * Return vram type from either integrated system info table
- * or umc info table, if available, or 0 (TYPE_UNKNOWN) if not
- */
-int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
+
+
+int
+amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
+ int *vram_width, int *vram_type,
+ int *vram_vendor)
{
struct amdgpu_mode_info *mode_info = &adev->mode_info;
- int index;
+ int index, i = 0;
u16 data_offset, size;
union igp_info *igp_info;
union vram_info *vram_info;
+ union vram_module *vram_module;
u8 frev, crev;
u8 mem_type;
+ u8 mem_vendor;
+ u32 mem_channel_number;
+ u32 mem_channel_width;
+ u32 module_id;
if (adev->flags & AMD_IS_APU)
index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
@@ -239,6 +195,7 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
else
index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
vram_info);
+
if (amdgpu_atom_parse_data_header(mode_info->atom_context,
index, &size,
&frev, &crev, &data_offset)) {
@@ -247,25 +204,67 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
(mode_info->atom_context->bios + data_offset);
switch (crev) {
case 11:
+ mem_channel_number = igp_info->v11.umachannelnumber;
+ /* channel width is 64 */
+ if (vram_width)
+ *vram_width = mem_channel_number * 64;
mem_type = igp_info->v11.memorytype;
- return convert_atom_mem_type_to_vram_type(adev, mem_type);
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ break;
default:
- return 0;
+ return -EINVAL;
}
} else {
vram_info = (union vram_info *)
(mode_info->atom_context->bios + data_offset);
+ module_id = (RREG32(adev->bios_scratch_reg_offset + 4) & 0x00ff0000) >> 16;
switch (crev) {
case 3:
- mem_type = vram_info->v23.vram_module[0].memory_type;
- return convert_atom_mem_type_to_vram_type(adev, mem_type);
+ if (module_id > vram_info->v23.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v23.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v9.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v9.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v9.channel_num;
+ mem_channel_width = vram_module->v9.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
case 4:
- mem_type = vram_info->v24.vram_module[0].memory_type;
- return convert_atom_mem_type_to_vram_type(adev, mem_type);
+ if (module_id > vram_info->v24.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v24.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v10.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v10.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v10.channel_num;
+ mem_channel_width = vram_module->v10.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v10.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
default:
- return 0;
+ return -EINVAL;
}
}
+
}
return 0;
@@ -464,3 +463,108 @@ int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev)
}
return -EINVAL;
}
+
+/*
+ * Check if VBIOS supports GDDR6 training data save/restore
+ */
+static bool gddr6_mem_train_vbios_support(struct amdgpu_device *adev)
+{
+ uint16_t data_offset;
+ int index;
+
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ firmwareinfo);
+ if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, NULL,
+ NULL, NULL, &data_offset)) {
+ struct atom_firmware_info_v3_1 *firmware_info =
+ (struct atom_firmware_info_v3_1 *)(adev->mode_info.atom_context->bios +
+ data_offset);
+
+ DRM_DEBUG("atom firmware capability:0x%08x.\n",
+ le32_to_cpu(firmware_info->firmware_capability));
+
+ if (le32_to_cpu(firmware_info->firmware_capability) &
+ ATOM_FIRMWARE_CAP_ENABLE_2STAGE_BIST_TRAINING)
+ return true;
+ }
+
+ return false;
+}
+
+static int gddr6_mem_train_support(struct amdgpu_device *adev)
+{
+ int ret;
+ uint32_t major, minor, revision, hw_v;
+
+ if (gddr6_mem_train_vbios_support(adev)) {
+ amdgpu_discovery_get_ip_version(adev, MP0_HWID, &major, &minor, &revision);
+ hw_v = HW_REV(major, minor, revision);
+ /*
+ * treat 0 revision as a special case since register for MP0 and MMHUB is missing
+ * for some Navi10 A0, preventing driver from discovering the hwip information since
+ * none of the functions will be initialized, it should not cause any problems
+ */
+ switch (hw_v) {
+ case HW_REV(11, 0, 0):
+ case HW_REV(11, 0, 5):
+ ret = 1;
+ break;
+ default:
+ DRM_ERROR("memory training vbios supports but psp hw(%08x)"
+ " doesn't support!\n", hw_v);
+ ret = -1;
+ break;
+ }
+ } else {
+ ret = 0;
+ hw_v = -1;
+ }
+
+
+ DRM_DEBUG("mp0 hw_v %08x, ret:%d.\n", hw_v, ret);
+ return ret;
+}
+
+int amdgpu_atomfirmware_get_mem_train_info(struct amdgpu_device *adev)
+{
+ struct atom_context *ctx = adev->mode_info.atom_context;
+ int index;
+ uint8_t frev, crev;
+ uint16_t data_offset, size;
+ int ret;
+
+ adev->fw_vram_usage.mem_train_support = false;
+
+ if (adev->asic_type != CHIP_NAVI10 &&
+ adev->asic_type != CHIP_NAVI14)
+ return 0;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ ret = gddr6_mem_train_support(adev);
+ if (ret == -1)
+ return -EINVAL;
+ else if (ret == 0)
+ return 0;
+
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ vram_usagebyfirmware);
+ ret = amdgpu_atom_parse_data_header(ctx, index, &size, &frev, &crev,
+ &data_offset);
+ if (ret == 0) {
+ DRM_ERROR("parse data header failed.\n");
+ return -EINVAL;
+ }
+
+ DRM_DEBUG("atom firmware common table header size:0x%04x, frev:0x%02x,"
+ " crev:0x%02x, data_offset:0x%04x.\n", size, frev, crev, data_offset);
+ /* only support 2.1+ */
+ if (((uint16_t)frev << 8 | crev) < 0x0201) {
+ DRM_ERROR("frev:0x%02x, crev:0x%02x < 2.1 !\n", frev, crev);
+ return -EINVAL;
+ }
+
+ adev->fw_vram_usage.mem_train_support = true;
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
index 5ec6f92f353c..434fe2fa0089 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
@@ -29,8 +29,9 @@
bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev);
void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev);
int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev);
-int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev);
-int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
+ int *vram_width, int *vram_type, int *vram_vendor);
+int amdgpu_atomfirmware_get_mem_train_info(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev);
bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 649e68c4479b..d1495e1c9289 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -33,7 +33,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
{
unsigned long start_jiffies;
unsigned long end_jiffies;
- struct dma_fence *fence = NULL;
+ struct dma_fence *fence;
int i, r;
start_jiffies = jiffies;
@@ -44,16 +44,14 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
if (r)
goto exit_do_move;
r = dma_fence_wait(fence, false);
+ dma_fence_put(fence);
if (r)
goto exit_do_move;
- dma_fence_put(fence);
}
end_jiffies = jiffies;
r = jiffies_to_msecs(end_jiffies - start_jiffies);
exit_do_move:
- if (fence)
- dma_fence_put(fence);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 7bcf86c61999..85b0515c0fdc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -140,7 +140,12 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
return 0;
error_free:
- while (i--) {
+ for (i = 0; i < last_entry; ++i) {
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo);
+
+ amdgpu_bo_unref(&bo);
+ }
+ for (i = first_userptr; i < num_entries; ++i) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo);
amdgpu_bo_unref(&bo);
@@ -270,7 +275,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
r = amdgpu_bo_create_list_entry_array(&args->in, &info);
if (r)
- goto error_free;
+ return r;
switch (args->in.operation) {
case AMDGPU_BO_LIST_OP_CREATE:
@@ -283,8 +288,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
mutex_unlock(&fpriv->bo_list_lock);
if (r < 0) {
- amdgpu_bo_list_put(list);
- return r;
+ goto error_put_list;
}
handle = r;
@@ -306,9 +310,8 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
mutex_unlock(&fpriv->bo_list_lock);
if (IS_ERR(old)) {
- amdgpu_bo_list_put(list);
r = PTR_ERR(old);
- goto error_free;
+ goto error_put_list;
}
amdgpu_bo_list_put(old);
@@ -325,8 +328,10 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
return 0;
+error_put_list:
+ amdgpu_bo_list_put(list);
+
error_free:
- if (info)
- kvfree(info);
+ kvfree(info);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index d8729285f731..a62cbc8199de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -1019,8 +1019,12 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
*/
if (amdgpu_connector->shared_ddc && (ret == connector_status_connected)) {
struct drm_connector *list_connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *list_amdgpu_connector;
- list_for_each_entry(list_connector, &dev->mode_config.connector_list, head) {
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(list_connector,
+ &iter) {
if (connector == list_connector)
continue;
list_amdgpu_connector = to_amdgpu_connector(list_connector);
@@ -1037,6 +1041,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
}
}
}
+ drm_connector_list_iter_end(&iter);
}
}
}
@@ -1494,6 +1499,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,
{
struct drm_device *dev = adev->ddev;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector;
struct amdgpu_connector_atom_dig *amdgpu_dig_connector;
struct drm_encoder *encoder;
@@ -1508,10 +1514,12 @@ amdgpu_connector_add(struct amdgpu_device *adev,
return;
/* see if we already added it */
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
amdgpu_connector = to_amdgpu_connector(connector);
if (amdgpu_connector->connector_id == connector_id) {
amdgpu_connector->devices |= supported_device;
+ drm_connector_list_iter_end(&iter);
return;
}
if (amdgpu_connector->ddc_bus && i2c_bus->valid) {
@@ -1526,6 +1534,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,
}
}
}
+ drm_connector_list_iter_end(&iter);
/* check if it's a dp bridge */
list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 2e53feed40e2..a52a084158b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -35,6 +35,7 @@
#include "amdgpu_trace.h"
#include "amdgpu_gmc.h"
#include "amdgpu_gem.h"
+#include "amdgpu_ras.h"
static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
struct drm_amdgpu_cs_chunk_fence *data,
@@ -449,75 +450,12 @@ retry:
return r;
}
-/* Last resort, try to evict something from the current working set */
-static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
- struct amdgpu_bo *validated)
-{
- uint32_t domain = validated->allowed_domains;
- struct ttm_operation_ctx ctx = { true, false };
- int r;
-
- if (!p->evictable)
- return false;
-
- for (;&p->evictable->tv.head != &p->validated;
- p->evictable = list_prev_entry(p->evictable, tv.head)) {
-
- struct amdgpu_bo_list_entry *candidate = p->evictable;
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(candidate->tv.bo);
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- bool update_bytes_moved_vis;
- uint32_t other;
-
- /* If we reached our current BO we can forget it */
- if (bo == validated)
- break;
-
- /* We can't move pinned BOs here */
- if (bo->pin_count)
- continue;
-
- other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
-
- /* Check if this BO is in one of the domains we need space for */
- if (!(other & domain))
- continue;
-
- /* Check if we can move this BO somewhere else */
- other = bo->allowed_domains & ~domain;
- if (!other)
- continue;
-
- /* Good we can try to move this BO somewhere else */
- update_bytes_moved_vis =
- !amdgpu_gmc_vram_full_visible(&adev->gmc) &&
- amdgpu_bo_in_cpu_visible_vram(bo);
- amdgpu_bo_placement_from_domain(bo, other);
- r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
- p->bytes_moved += ctx.bytes_moved;
- if (update_bytes_moved_vis)
- p->bytes_moved_vis += ctx.bytes_moved;
-
- if (unlikely(r))
- break;
-
- p->evictable = list_prev_entry(p->evictable, tv.head);
- list_move(&candidate->tv.head, &p->validated);
-
- return true;
- }
-
- return false;
-}
-
static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
{
struct amdgpu_cs_parser *p = param;
int r;
- do {
- r = amdgpu_cs_bo_validate(p, bo);
- } while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo));
+ r = amdgpu_cs_bo_validate(p, bo);
if (r)
return r;
@@ -536,7 +474,6 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
list_for_each_entry(lobj, validated, tv.head) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
- bool binding_userptr = false;
struct mm_struct *usermm;
usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
@@ -553,20 +490,14 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
lobj->user_pages);
- binding_userptr = true;
}
- if (p->evictable == lobj)
- p->evictable = NULL;
-
r = amdgpu_cs_validate(p, bo);
if (r)
return r;
- if (binding_userptr) {
- kvfree(lobj->user_pages);
- lobj->user_pages = NULL;
- }
+ kvfree(lobj->user_pages);
+ lobj->user_pages = NULL;
}
return 0;
}
@@ -607,8 +538,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
e->tv.num_shared = 2;
amdgpu_bo_list_get_list(p->bo_list, &p->validated);
- if (p->bo_list->first_userptr != p->bo_list->num_entries)
- p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
INIT_LIST_HEAD(&duplicates);
amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
@@ -650,7 +579,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
}
r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
- &duplicates, false);
+ &duplicates);
if (unlikely(r != 0)) {
if (r != -ERESTARTSYS)
DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
@@ -661,9 +590,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
&p->bytes_moved_vis_threshold);
p->bytes_moved = 0;
p->bytes_moved_vis = 0;
- p->evictable = list_last_entry(&p->validated,
- struct amdgpu_bo_list_entry,
- tv.head);
r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
amdgpu_cs_validate, p);
@@ -869,29 +795,23 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(adev, &p->job->sync,
- fpriv->prt_va->last_pt_update, false);
+ r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
if (r)
return r;
if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
- struct dma_fence *f;
-
bo_va = fpriv->csa_va;
BUG_ON(!bo_va);
r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
return r;
- f = bo_va->last_pt_update;
- r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
+ r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
if (r)
return r;
}
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
- struct dma_fence *f;
-
/* ignore duplicates */
bo = ttm_to_amdgpu_bo(e->tv.bo);
if (!bo)
@@ -905,8 +825,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- f = bo_va->last_pt_update;
- r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
+ r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
if (r)
return r;
}
@@ -915,11 +834,11 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_vm_update_directories(adev, vm);
+ r = amdgpu_vm_update_pdes(adev, vm, false);
if (r)
return r;
- r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false);
+ r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update);
if (r)
return r;
@@ -990,6 +909,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
if (parser->entity && parser->entity != entity)
return -EINVAL;
+ /* Return if there is no run queue associated with this entity.
+ * Possibly because of disabled HW IP*/
+ if (entity->rq == NULL)
+ return -EINVAL;
+
parser->entity = entity;
ring = to_amdgpu_ring(entity->rq->sched);
@@ -1061,7 +985,7 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
dma_fence_put(old);
}
- r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true);
+ r = amdgpu_sync_fence(&p->job->sync, fence, true);
dma_fence_put(fence);
if (r)
return r;
@@ -1083,7 +1007,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
return r;
}
- r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true);
+ r = amdgpu_sync_fence(&p->job->sync, fence, true);
dma_fence_put(fence);
return r;
@@ -1291,11 +1215,11 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
if (r)
goto error_unlock;
- /* No memory allocation is allowed while holding the mn lock.
- * p->mn is hold until amdgpu_cs_submit is finished and fence is added
- * to BOs.
+ /* No memory allocation is allowed while holding the notifier lock.
+ * The lock is held until amdgpu_cs_submit is finished and fence is
+ * added to BOs.
*/
- amdgpu_mn_lock(p->mn);
+ mutex_lock(&p->adev->notifier_lock);
/* If userptr are invalidated after amdgpu_cs_parser_bos(), return
* -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
@@ -1310,7 +1234,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
goto error_abort;
}
- job->owner = p->filp;
p->fence = dma_fence_get(&job->base.s_fence->finished);
amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
@@ -1338,13 +1261,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
- amdgpu_mn_unlock(p->mn);
+ mutex_unlock(&p->adev->notifier_lock);
return 0;
error_abort:
drm_sched_job_cleanup(&job->base);
- amdgpu_mn_unlock(p->mn);
+ mutex_unlock(&p->adev->notifier_lock);
error_unlock:
amdgpu_job_free(job);
@@ -1359,6 +1282,9 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
bool reserved_buffers = false;
int i, r;
+ if (amdgpu_ras_intr_triggered())
+ return -EHWPOISON;
+
if (!adev->accel_working)
return -EBUSY;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index 35a8d3c96fc9..08047bc4d588 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -80,7 +80,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
list_add(&csa_tv.head, &list);
amdgpu_vm_get_pd_bo(vm, &list, &pd);
- r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL, false);
+ r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
if (r) {
DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 6614d8a6f4c8..94a6c42f29ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -42,19 +42,12 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
[AMDGPU_HW_IP_VCN_JPEG] = 1,
};
-static int amdgpu_ctx_total_num_entities(void)
-{
- unsigned i, num_entities = 0;
-
- for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
- num_entities += amdgpu_ctx_num_entities[i];
-
- return num_entities;
-}
-
static int amdgpu_ctx_priority_permit(struct drm_file *filp,
enum drm_sched_priority priority)
{
+ if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
+ return -EINVAL;
+
/* NORMAL and below are accessible by everyone */
if (priority <= DRM_SCHED_PRIORITY_NORMAL)
return 0;
@@ -68,47 +61,94 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
return -EACCES;
}
+static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const u32 ring)
+{
+ struct amdgpu_device *adev = ctx->adev;
+ struct amdgpu_ctx_entity *entity;
+ struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
+ unsigned num_scheds = 0;
+ enum drm_sched_priority priority;
+ int r;
+
+ entity = kcalloc(1, offsetof(typeof(*entity), fences[amdgpu_sched_jobs]),
+ GFP_KERNEL);
+ if (!entity)
+ return -ENOMEM;
+
+ entity->sequence = 1;
+ priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
+ ctx->init_priority : ctx->override_priority;
+ switch (hw_ip) {
+ case AMDGPU_HW_IP_GFX:
+ sched = &adev->gfx.gfx_ring[0].sched;
+ scheds = &sched;
+ num_scheds = 1;
+ break;
+ case AMDGPU_HW_IP_COMPUTE:
+ scheds = adev->gfx.compute_sched;
+ num_scheds = adev->gfx.num_compute_sched;
+ break;
+ case AMDGPU_HW_IP_DMA:
+ scheds = adev->sdma.sdma_sched;
+ num_scheds = adev->sdma.num_sdma_sched;
+ break;
+ case AMDGPU_HW_IP_UVD:
+ sched = &adev->uvd.inst[0].ring.sched;
+ scheds = &sched;
+ num_scheds = 1;
+ break;
+ case AMDGPU_HW_IP_VCE:
+ sched = &adev->vce.ring[0].sched;
+ scheds = &sched;
+ num_scheds = 1;
+ break;
+ case AMDGPU_HW_IP_UVD_ENC:
+ sched = &adev->uvd.inst[0].ring_enc[0].sched;
+ scheds = &sched;
+ num_scheds = 1;
+ break;
+ case AMDGPU_HW_IP_VCN_DEC:
+ scheds = adev->vcn.vcn_dec_sched;
+ num_scheds = adev->vcn.num_vcn_dec_sched;
+ break;
+ case AMDGPU_HW_IP_VCN_ENC:
+ scheds = adev->vcn.vcn_enc_sched;
+ num_scheds = adev->vcn.num_vcn_enc_sched;
+ break;
+ case AMDGPU_HW_IP_VCN_JPEG:
+ scheds = adev->jpeg.jpeg_sched;
+ num_scheds = adev->jpeg.num_jpeg_sched;
+ break;
+ }
+
+ r = drm_sched_entity_init(&entity->entity, priority, scheds, num_scheds,
+ &ctx->guilty);
+ if (r)
+ goto error_free_entity;
+
+ ctx->entities[hw_ip][ring] = entity;
+ return 0;
+
+error_free_entity:
+ kfree(entity);
+
+ return r;
+}
+
static int amdgpu_ctx_init(struct amdgpu_device *adev,
enum drm_sched_priority priority,
struct drm_file *filp,
struct amdgpu_ctx *ctx)
{
- unsigned num_entities = amdgpu_ctx_total_num_entities();
- unsigned i, j, k;
int r;
- if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
- return -EINVAL;
-
r = amdgpu_ctx_priority_permit(filp, priority);
if (r)
return r;
memset(ctx, 0, sizeof(*ctx));
- ctx->adev = adev;
-
- ctx->fences = kcalloc(amdgpu_sched_jobs * num_entities,
- sizeof(struct dma_fence*), GFP_KERNEL);
- if (!ctx->fences)
- return -ENOMEM;
-
- ctx->entities[0] = kcalloc(num_entities,
- sizeof(struct amdgpu_ctx_entity),
- GFP_KERNEL);
- if (!ctx->entities[0]) {
- r = -ENOMEM;
- goto error_free_fences;
- }
-
- for (i = 0; i < num_entities; ++i) {
- struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
- entity->sequence = 1;
- entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
- }
- for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
- ctx->entities[i] = ctx->entities[i - 1] +
- amdgpu_ctx_num_entities[i - 1];
+ ctx->adev = adev;
kref_init(&ctx->refcount);
spin_lock_init(&ctx->ring_lock);
@@ -120,114 +160,49 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
ctx->init_priority = priority;
ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
- for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
- struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
- struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
- unsigned num_rings = 0;
- unsigned num_rqs = 0;
-
- switch (i) {
- case AMDGPU_HW_IP_GFX:
- rings[0] = &adev->gfx.gfx_ring[0];
- num_rings = 1;
- break;
- case AMDGPU_HW_IP_COMPUTE:
- for (j = 0; j < adev->gfx.num_compute_rings; ++j)
- rings[j] = &adev->gfx.compute_ring[j];
- num_rings = adev->gfx.num_compute_rings;
- break;
- case AMDGPU_HW_IP_DMA:
- for (j = 0; j < adev->sdma.num_instances; ++j)
- rings[j] = &adev->sdma.instance[j].ring;
- num_rings = adev->sdma.num_instances;
- break;
- case AMDGPU_HW_IP_UVD:
- rings[0] = &adev->uvd.inst[0].ring;
- num_rings = 1;
- break;
- case AMDGPU_HW_IP_VCE:
- rings[0] = &adev->vce.ring[0];
- num_rings = 1;
- break;
- case AMDGPU_HW_IP_UVD_ENC:
- rings[0] = &adev->uvd.inst[0].ring_enc[0];
- num_rings = 1;
- break;
- case AMDGPU_HW_IP_VCN_DEC:
- for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
- if (adev->vcn.harvest_config & (1 << j))
- continue;
- rings[num_rings++] = &adev->vcn.inst[j].ring_dec;
- }
- break;
- case AMDGPU_HW_IP_VCN_ENC:
- for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
- if (adev->vcn.harvest_config & (1 << j))
- continue;
- for (k = 0; k < adev->vcn.num_enc_rings; ++k)
- rings[num_rings++] = &adev->vcn.inst[j].ring_enc[k];
- }
- break;
- case AMDGPU_HW_IP_VCN_JPEG:
- for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
- if (adev->vcn.harvest_config & (1 << j))
- continue;
- rings[num_rings++] = &adev->vcn.inst[j].ring_jpeg;
- }
- break;
- }
+ return 0;
- for (j = 0; j < num_rings; ++j) {
- if (!rings[j]->adev)
- continue;
+}
- rqs[num_rqs++] = &rings[j]->sched.sched_rq[priority];
- }
+static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
+{
- for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
- r = drm_sched_entity_init(&ctx->entities[i][j].entity,
- rqs, num_rqs, &ctx->guilty);
- if (r)
- goto error_cleanup_entities;
- }
+ int i;
- return 0;
+ if (!entity)
+ return;
-error_cleanup_entities:
- for (i = 0; i < num_entities; ++i)
- drm_sched_entity_destroy(&ctx->entities[0][i].entity);
- kfree(ctx->entities[0]);
+ for (i = 0; i < amdgpu_sched_jobs; ++i)
+ dma_fence_put(entity->fences[i]);
-error_free_fences:
- kfree(ctx->fences);
- ctx->fences = NULL;
- return r;
+ kfree(entity);
}
static void amdgpu_ctx_fini(struct kref *ref)
{
struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
- unsigned num_entities = amdgpu_ctx_total_num_entities();
struct amdgpu_device *adev = ctx->adev;
unsigned i, j;
if (!adev)
return;
- for (i = 0; i < num_entities; ++i)
- for (j = 0; j < amdgpu_sched_jobs; ++j)
- dma_fence_put(ctx->entities[0][i].fences[j]);
- kfree(ctx->fences);
- kfree(ctx->entities[0]);
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
+ amdgpu_ctx_fini_entity(ctx->entities[i][j]);
+ ctx->entities[i][j] = NULL;
+ }
+ }
mutex_destroy(&ctx->lock);
-
kfree(ctx);
}
int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
u32 ring, struct drm_sched_entity **entity)
{
+ int r;
+
if (hw_ip >= AMDGPU_HW_IP_NUM) {
DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
return -EINVAL;
@@ -244,7 +219,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
return -EINVAL;
}
- *entity = &ctx->entities[hw_ip][ring].entity;
+ if (ctx->entities[hw_ip][ring] == NULL) {
+ r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
+ if (r)
+ return r;
+ }
+
+ *entity = &ctx->entities[hw_ip][ring]->entity;
return 0;
}
@@ -284,14 +265,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
static void amdgpu_ctx_do_release(struct kref *ref)
{
struct amdgpu_ctx *ctx;
- unsigned num_entities;
- u32 i;
+ u32 i, j;
ctx = container_of(ref, struct amdgpu_ctx, refcount);
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ if (!ctx->entities[i][j])
+ continue;
- num_entities = amdgpu_ctx_total_num_entities();
- for (i = 0; i < num_entities; i++)
- drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+ drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
+ }
+ }
amdgpu_ctx_fini(ref);
}
@@ -521,19 +505,23 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
enum drm_sched_priority priority)
{
- unsigned num_entities = amdgpu_ctx_total_num_entities();
enum drm_sched_priority ctx_prio;
- unsigned i;
+ unsigned i, j;
ctx->override_priority = priority;
ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
ctx->init_priority : ctx->override_priority;
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct drm_sched_entity *entity;
- for (i = 0; i < num_entities; i++) {
- struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
+ if (!ctx->entities[i][j])
+ continue;
- drm_sched_entity_set_priority(entity, ctx_prio);
+ entity = &ctx->entities[i][j]->entity;
+ drm_sched_entity_set_priority(entity, ctx_prio);
+ }
}
}
@@ -569,20 +557,24 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
{
- unsigned num_entities = amdgpu_ctx_total_num_entities();
struct amdgpu_ctx *ctx;
struct idr *idp;
- uint32_t id, i;
+ uint32_t id, i, j;
idp = &mgr->ctx_handles;
mutex_lock(&mgr->lock);
idr_for_each_entry(idp, ctx, id) {
- for (i = 0; i < num_entities; i++) {
- struct drm_sched_entity *entity;
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct drm_sched_entity *entity;
+
+ if (!ctx->entities[i][j])
+ continue;
- entity = &ctx->entities[0][i].entity;
- timeout = drm_sched_entity_flush(entity, timeout);
+ entity = &ctx->entities[i][j]->entity;
+ timeout = drm_sched_entity_flush(entity, timeout);
+ }
}
}
mutex_unlock(&mgr->lock);
@@ -591,10 +583,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
{
- unsigned num_entities = amdgpu_ctx_total_num_entities();
struct amdgpu_ctx *ctx;
struct idr *idp;
- uint32_t id, i;
+ uint32_t id, i, j;
idp = &mgr->ctx_handles;
@@ -604,8 +595,17 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
continue;
}
- for (i = 0; i < num_entities; i++)
- drm_sched_entity_fini(&ctx->entities[0][i].entity);
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+ struct drm_sched_entity *entity;
+
+ if (!ctx->entities[i][j])
+ continue;
+
+ entity = &ctx->entities[i][j]->entity;
+ drm_sched_entity_fini(entity);
+ }
+ }
}
}
@@ -627,3 +627,45 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
idr_destroy(&mgr->ctx_handles);
mutex_destroy(&mgr->lock);
}
+
+void amdgpu_ctx_init_sched(struct amdgpu_device *adev)
+{
+ int i, j;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ adev->gfx.gfx_sched[i] = &adev->gfx.gfx_ring[i].sched;
+ adev->gfx.num_gfx_sched++;
+ }
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ adev->gfx.compute_sched[i] = &adev->gfx.compute_ring[i].sched;
+ adev->gfx.num_compute_sched++;
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->sdma.sdma_sched[i] = &adev->sdma.instance[i].ring.sched;
+ adev->sdma.num_sdma_sched++;
+ }
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ adev->vcn.vcn_dec_sched[adev->vcn.num_vcn_dec_sched++] =
+ &adev->vcn.inst[i].ring_dec.sched;
+ }
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+ for (j = 0; j < adev->vcn.num_enc_rings; ++j)
+ adev->vcn.vcn_enc_sched[adev->vcn.num_vcn_enc_sched++] =
+ &adev->vcn.inst[i].ring_enc[j].sched;
+ }
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+ adev->jpeg.jpeg_sched[adev->jpeg.num_jpeg_sched++] =
+ &adev->jpeg.inst[i].ring_dec.sched;
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index da808633732b..de490f183af2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -29,10 +29,12 @@ struct drm_device;
struct drm_file;
struct amdgpu_fpriv;
+#define AMDGPU_MAX_ENTITY_NUM 4
+
struct amdgpu_ctx_entity {
uint64_t sequence;
- struct dma_fence **fences;
struct drm_sched_entity entity;
+ struct dma_fence *fences[];
};
struct amdgpu_ctx {
@@ -42,8 +44,7 @@ struct amdgpu_ctx {
unsigned reset_counter_query;
uint32_t vram_lost_counter;
spinlock_t ring_lock;
- struct dma_fence **fences;
- struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM];
+ struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
bool preamble_presented;
enum drm_sched_priority init_priority;
enum drm_sched_priority override_priority;
@@ -87,4 +88,7 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
+void amdgpu_ctx_init_sched(struct amdgpu_device *adev);
+
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 5652cc72ed3a..f24ed9a1a3e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -26,6 +26,7 @@
#include <linux/kthread.h>
#include <linux/pci.h>
#include <linux/uaccess.h>
+#include <linux/pm_runtime.h>
#include <drm/drm_debugfs.h>
@@ -129,7 +130,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
sh_bank = 0xFFFFFFFF;
if (instance_bank == 0x3FF)
instance_bank = 0xFFFFFFFF;
- use_bank = 1;
+ use_bank = true;
} else if (*pos & (1ULL << 61)) {
me = (*pos & GENMASK_ULL(33, 24)) >> 24;
@@ -137,17 +138,24 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
queue = (*pos & GENMASK_ULL(53, 44)) >> 44;
vmid = (*pos & GENMASK_ULL(58, 54)) >> 54;
- use_ring = 1;
+ use_ring = true;
} else {
- use_bank = use_ring = 0;
+ use_bank = use_ring = false;
}
*pos &= (1UL << 22) - 1;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
+
if (use_bank) {
if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
- (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines))
+ (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return -EINVAL;
+ }
mutex_lock(&adev->grbm_idx_mutex);
amdgpu_gfx_select_se_sh(adev, se_bank,
sh_bank, instance_bank);
@@ -193,6 +201,9 @@ end:
if (pm_pg_lock)
mutex_unlock(&adev->pm.mutex);
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
return result;
}
@@ -237,13 +248,20 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
+
while (size) {
uint32_t value;
value = RREG32_PCIE(*pos >> 2);
r = put_user(value, (uint32_t *)buf);
- if (r)
+ if (r) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
result += 4;
buf += 4;
@@ -251,6 +269,9 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
size -= 4;
}
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
return result;
}
@@ -276,12 +297,19 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
+
while (size) {
uint32_t value;
r = get_user(value, (uint32_t *)buf);
- if (r)
+ if (r) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
WREG32_PCIE(*pos >> 2, value);
@@ -291,6 +319,9 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
size -= 4;
}
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
return result;
}
@@ -316,13 +347,20 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
+
while (size) {
uint32_t value;
value = RREG32_DIDT(*pos >> 2);
r = put_user(value, (uint32_t *)buf);
- if (r)
+ if (r) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
result += 4;
buf += 4;
@@ -330,6 +368,9 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
size -= 4;
}
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
return result;
}
@@ -355,12 +396,19 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
+
while (size) {
uint32_t value;
r = get_user(value, (uint32_t *)buf);
- if (r)
+ if (r) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
WREG32_DIDT(*pos >> 2, value);
@@ -370,6 +418,9 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
size -= 4;
}
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
return result;
}
@@ -395,13 +446,20 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
+
while (size) {
uint32_t value;
value = RREG32_SMC(*pos);
r = put_user(value, (uint32_t *)buf);
- if (r)
+ if (r) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
result += 4;
buf += 4;
@@ -409,6 +467,9 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
size -= 4;
}
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
return result;
}
@@ -434,12 +495,19 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
+
while (size) {
uint32_t value;
r = get_user(value, (uint32_t *)buf);
- if (r)
+ if (r) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return r;
+ }
WREG32_SMC(*pos, value);
@@ -449,6 +517,9 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
size -= 4;
}
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
return result;
}
@@ -572,7 +643,16 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
idx = *pos >> 2;
valuesize = sizeof(values);
+
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
+
r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
if (r)
return r;
@@ -633,6 +713,10 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
wave = (*pos & GENMASK_ULL(36, 31)) >> 31;
simd = (*pos & GENMASK_ULL(44, 37)) >> 37;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
+
/* switch to the specific se/sh/cu */
mutex_lock(&adev->grbm_idx_mutex);
amdgpu_gfx_select_se_sh(adev, se, sh, cu);
@@ -644,6 +728,9 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
mutex_unlock(&adev->grbm_idx_mutex);
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
if (!x)
return -EINVAL;
@@ -711,6 +798,10 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
if (!data)
return -ENOMEM;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
+
/* switch to the specific se/sh/cu */
mutex_lock(&adev->grbm_idx_mutex);
amdgpu_gfx_select_se_sh(adev, se, sh, cu);
@@ -726,6 +817,9 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
mutex_unlock(&adev->grbm_idx_mutex);
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
while (size) {
uint32_t value;
@@ -859,6 +953,13 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
struct amdgpu_device *adev = dev->dev_private;
int r = 0, i;
+ r = pm_runtime_get_sync(dev->dev);
+ if (r < 0)
+ return r;
+
+ /* Avoid accidently unparking the sched thread during GPU reset */
+ mutex_lock(&adev->lock_reset);
+
/* hold on the scheduler */
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
@@ -884,6 +985,11 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
kthread_unpark(ring->sched.thread);
}
+ mutex_unlock(&adev->lock_reset);
+
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+
return 0;
}
@@ -902,8 +1008,17 @@ static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data)
struct drm_info_node *node = (struct drm_info_node *)m->private;
struct drm_device *dev = node->minor->dev;
struct amdgpu_device *adev = dev->dev_private;
+ int r;
+
+ r = pm_runtime_get_sync(dev->dev);
+ if (r < 0)
+ return r;
seq_printf(m, "(%d)\n", amdgpu_bo_evict_vram(adev));
+
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+
return 0;
}
@@ -912,8 +1027,17 @@ static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data)
struct drm_info_node *node = (struct drm_info_node *)m->private;
struct drm_device *dev = node->minor->dev;
struct amdgpu_device *adev = dev->dev_private;
+ int r;
+
+ r = pm_runtime_get_sync(dev->dev);
+ if (r < 0)
+ return r;
seq_printf(m, "(%d)\n", ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_TT));
+
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+
return 0;
}
@@ -1036,6 +1160,9 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
if (!fences)
return -ENOMEM;
+ /* Avoid accidently unparking the sched thread during GPU reset */
+ mutex_lock(&adev->lock_reset);
+
/* stop the scheduler */
kthread_park(ring->sched.thread);
@@ -1075,10 +1202,11 @@ failure:
/* restart the scheduler */
kthread_unpark(ring->sched.thread);
+ mutex_unlock(&adev->lock_reset);
+
ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
- if (fences)
- kfree(fences);
+ kfree(fences);
return 0;
}
@@ -1090,8 +1218,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
{
adev->debugfs_preempt =
debugfs_create_file("amdgpu_preempt_ib", 0600,
- adev->ddev->primary->debugfs_root,
- (void *)adev, &fops_ib_preempt);
+ adev->ddev->primary->debugfs_root, adev,
+ &fops_ib_preempt);
if (!(adev->debugfs_preempt)) {
DRM_ERROR("unable to create amdgpu_preempt_ib debugsfs file\n");
return -EIO;
@@ -1103,8 +1231,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
void amdgpu_debugfs_preempt_cleanup(struct amdgpu_device *adev)
{
- if (adev->debugfs_preempt)
- debugfs_remove(adev->debugfs_preempt);
+ debugfs_remove(adev->debugfs_preempt);
}
#else
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5a1939dbd4e3..39cd545976b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -65,6 +65,9 @@
#include "amdgpu_ras.h"
#include "amdgpu_pmu.h"
+#include <linux/suspend.h>
+#include <drm/task_barrier.h>
+
MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
@@ -78,7 +81,7 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
#define AMDGPU_RESUME_MS 2000
-static const char *amdgpu_asic_name[] = {
+const char *amdgpu_asic_name[] = {
"TAHITI",
"PITCAIRN",
"VERDE",
@@ -135,14 +138,14 @@ static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
/**
- * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
+ * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
*
* @dev: drm_device pointer
*
* Returns true if the device is a dGPU with HG/PX power control,
* otherwise return false.
*/
-bool amdgpu_device_is_px(struct drm_device *dev)
+bool amdgpu_device_supports_boco(struct drm_device *dev)
{
struct amdgpu_device *adev = dev->dev_private;
@@ -151,6 +154,51 @@ bool amdgpu_device_is_px(struct drm_device *dev)
return false;
}
+/**
+ * amdgpu_device_supports_baco - Does the device support BACO
+ *
+ * @dev: drm_device pointer
+ *
+ * Returns true if the device supporte BACO,
+ * otherwise return false.
+ */
+bool amdgpu_device_supports_baco(struct drm_device *dev)
+{
+ struct amdgpu_device *adev = dev->dev_private;
+
+ return amdgpu_asic_supports_baco(adev);
+}
+
+/**
+ * VRAM access helper functions.
+ *
+ * amdgpu_device_vram_access - read/write a buffer in vram
+ *
+ * @adev: amdgpu_device pointer
+ * @pos: offset of the buffer in vram
+ * @buf: virtual address of the buffer in system memory
+ * @size: read/write size, sizeof(@buf) must > @size
+ * @write: true - write to vram, otherwise - read from vram
+ */
+void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
+ uint32_t *buf, size_t size, bool write)
+{
+ uint64_t last;
+ unsigned long flags;
+
+ last = size - 4;
+ for (last += pos; pos <= last; pos += 4) {
+ spin_lock_irqsave(&adev->mmio_idx_lock, flags);
+ WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
+ WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
+ if (write)
+ WREG32_NO_KIQ(mmMM_DATA, *buf++);
+ else
+ *buf++ = RREG32_NO_KIQ(mmMM_DATA);
+ spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
+ }
+}
+
/*
* MMIO register access helper functions.
*/
@@ -168,8 +216,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
{
uint32_t ret;
- if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
- return amdgpu_virt_kiq_rreg(adev, reg);
+ if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)))
+ return amdgpu_kiq_rreg(adev, reg);
if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
@@ -246,8 +294,8 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
adev->last_mm_index = v;
}
- if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
- return amdgpu_virt_kiq_wreg(adev, reg, v);
+ if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)))
+ return amdgpu_kiq_wreg(adev, reg, v);
if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
@@ -937,7 +985,7 @@ static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
{
struct sysinfo si;
- bool is_os_64 = (sizeof(void *) == 8) ? true : false;
+ bool is_os_64 = (sizeof(void *) == 8);
uint64_t total_memory;
uint64_t dram_size_seven_GB = 0x1B8000000;
uint64_t dram_size_three_GB = 0xB8000000;
@@ -984,8 +1032,6 @@ def_value:
*/
static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
{
- int ret = 0;
-
if (amdgpu_sched_jobs < 4) {
dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
amdgpu_sched_jobs);
@@ -1023,15 +1069,9 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
amdgpu_device_check_block_size(adev);
- ret = amdgpu_device_get_job_timeout_settings(adev);
- if (ret) {
- dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
- return ret;
- }
-
adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
- return ret;
+ return 0;
}
/**
@@ -1046,8 +1086,9 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
{
struct drm_device *dev = pci_get_drvdata(pdev);
+ int r;
- if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF)
+ if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
return;
if (state == VGA_SWITCHEROO_ON) {
@@ -1055,7 +1096,12 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero
/* don't suspend or resume card normally */
dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
- amdgpu_device_resume(dev, true, true);
+ pci_set_power_state(dev->pdev, PCI_D0);
+ pci_restore_state(dev->pdev);
+ r = pci_enable_device(dev->pdev);
+ if (r)
+ DRM_WARN("pci_enable_device failed (%d)\n", r);
+ amdgpu_device_resume(dev, true);
dev->switch_power_state = DRM_SWITCH_POWER_ON;
drm_kms_helper_poll_enable(dev);
@@ -1063,7 +1109,11 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero
pr_info("amdgpu: switched off\n");
drm_kms_helper_poll_disable(dev);
dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
- amdgpu_device_suspend(dev, true, true);
+ amdgpu_device_suspend(dev, true);
+ pci_save_state(dev->pdev);
+ /* Shut down the device */
+ pci_disable_device(dev->pdev);
+ pci_set_power_state(dev->pdev, PCI_D3cold);
dev->switch_power_state = DRM_SWITCH_POWER_OFF;
}
}
@@ -1469,6 +1519,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
+ goto parse_soc_bounding_box;
+
adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
@@ -1496,14 +1549,18 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
adev->gfx.config.num_packer_per_sc =
le32_to_cpu(gpu_info_fw->num_packer_per_sc);
}
-#ifdef CONFIG_DRM_AMD_DC_DCN2_0
+
+parse_soc_bounding_box:
+ /*
+ * soc bounding box info is not integrated in disocovery table,
+ * we always need to parse it from gpu info firmware.
+ */
if (hdr->version_minor == 2) {
const struct gpu_info_firmware_v1_2 *gpu_info_fw =
(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
}
-#endif
break;
}
default:
@@ -1613,6 +1670,9 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
if (r)
return r;
+ if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
+ amdgpu_discovery_get_gfx_info(adev);
+
amdgpu_amdkfd_device_probe(adev);
if (amdgpu_sriov_vf(adev)) {
@@ -1622,7 +1682,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
}
adev->pm.pp_feature = amdgpu_pp_feature_mask;
- if (amdgpu_sriov_vf(adev))
+ if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -1749,7 +1809,8 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
}
}
- r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
+ if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
+ r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
return r;
}
@@ -1816,6 +1877,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
}
}
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_init_data_exchange(adev);
+
r = amdgpu_ib_pool_init(adev);
if (r) {
dev_err(adev->dev, "IB initialization failed (%d).\n", r);
@@ -1839,16 +1903,26 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
if (r)
goto init_failed;
+ /*
+ * retired pages will be loaded from eeprom and reserved here,
+ * it should be called after amdgpu_device_ip_hw_init_phase2 since
+ * for some ASICs the RAS EEPROM code relies on SMU fully functioning
+ * for I2C communication which only true at this point.
+ * recovery_init may fail, but it can free all resources allocated by
+ * itself and its failure should not stop amdgpu init process.
+ *
+ * Note: theoretically, this should be called before all vram allocations
+ * to protect retired page from abusing
+ */
+ amdgpu_ras_recovery_init(adev);
+
if (adev->gmc.xgmi.num_physical_nodes > 1)
amdgpu_xgmi_add_device(adev);
amdgpu_amdkfd_device_init(adev);
init_failed:
- if (amdgpu_sriov_vf(adev)) {
- if (!r)
- amdgpu_virt_init_data_exchange(adev);
+ if (amdgpu_sriov_vf(adev))
amdgpu_virt_release_full_gpu(adev, true);
- }
return r;
}
@@ -1887,6 +1961,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
* amdgpu_device_set_cg_state - set clockgating for amdgpu device
*
* @adev: amdgpu_device pointer
+ * @state: clockgating state (gate or ungate)
*
* The list of all the hardware IPs that make up the asic is walked and the
* set_clockgating_state callbacks are run.
@@ -1911,6 +1986,7 @@ static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
+ adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
adev->ip_blocks[i].version->funcs->set_clockgating_state) {
/* enable clockgating to save power */
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
@@ -1941,6 +2017,7 @@ static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_power
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
+ adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
adev->ip_blocks[i].version->funcs->set_powergating_state) {
/* enable powergating to save power */
r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
@@ -2006,6 +2083,7 @@ out:
*/
static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
{
+ struct amdgpu_gpu_instance *gpu_instance;
int i = 0, r;
for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -2031,8 +2109,39 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
- /* set to low pstate by default */
- amdgpu_xgmi_set_pstate(adev, 0);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ mutex_lock(&mgpu_info.mutex);
+
+ /*
+ * Reset device p-state to low as this was booted with high.
+ *
+ * This should be performed only after all devices from the same
+ * hive get initialized.
+ *
+ * However, it's unknown how many device in the hive in advance.
+ * As this is counted one by one during devices initializations.
+ *
+ * So, we wait for all XGMI interlinked devices initialized.
+ * This may bring some delays as those devices may come from
+ * different hives. But that should be OK.
+ */
+ if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev->flags & AMD_IS_APU)
+ continue;
+
+ r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
+ if (r) {
+ DRM_ERROR("pstate setting failed (%d).\n", r);
+ break;
+ }
+ }
+ }
+
+ mutex_unlock(&mgpu_info.mutex);
+ }
return 0;
}
@@ -2220,6 +2329,12 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
/* displays are handled in phase1 */
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
continue;
+ /* PSP lost connection when err_event_athub occurs */
+ if (amdgpu_ras_intr_triggered() &&
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
+ adev->ip_blocks[i].status.hw = false;
+ continue;
+ }
/* XXX handle errors */
r = adev->ip_blocks[i].version->funcs->suspend(adev);
/* XXX handle errors */
@@ -2230,18 +2345,11 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
adev->ip_blocks[i].status.hw = false;
/* handle putting the SMC in the appropriate state */
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
- if (is_support_sw_smu(adev)) {
- /* todo */
- } else if (adev->powerplay.pp_funcs &&
- adev->powerplay.pp_funcs->set_mp1_state) {
- r = adev->powerplay.pp_funcs->set_mp1_state(
- adev->powerplay.pp_handle,
- adev->mp1_state);
- if (r) {
- DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
- adev->mp1_state, r);
- return r;
- }
+ r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
+ if (r) {
+ DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
+ adev->mp1_state, r);
+ return r;
}
}
@@ -2324,7 +2432,8 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
AMD_IP_BLOCK_TYPE_GFX,
AMD_IP_BLOCK_TYPE_SDMA,
AMD_IP_BLOCK_TYPE_UVD,
- AMD_IP_BLOCK_TYPE_VCE
+ AMD_IP_BLOCK_TYPE_VCE,
+ AMD_IP_BLOCK_TYPE_VCN
};
for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
@@ -2339,7 +2448,11 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
block->status.hw)
continue;
- r = block->version->funcs->hw_init(adev);
+ if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
+ r = block->version->funcs->resume(adev);
+ else
+ r = block->version->funcs->hw_init(adev);
+
DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
if (r)
return r;
@@ -2511,20 +2624,19 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#if defined(CONFIG_DRM_AMD_DC_DCN)
case CHIP_RAVEN:
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN2_1)
case CHIP_RENOIR:
#endif
return amdgpu_dc != 0;
#endif
default:
+ if (amdgpu_dc > 0)
+ DRM_INFO("Display Core has been requested via kernel parameter "
+ "but isn't supported by ASIC, ignoring\n");
return false;
}
}
@@ -2549,13 +2661,110 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
{
struct amdgpu_device *adev =
container_of(__work, struct amdgpu_device, xgmi_reset_work);
+ struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
+
+ /* It's a bug to not have a hive within this function */
+ if (WARN_ON(!hive))
+ return;
+
+ /*
+ * Use task barrier to synchronize all xgmi reset works across the
+ * hive. task_barrier_enter and task_barrier_exit will block
+ * until all the threads running the xgmi reset works reach
+ * those points. task_barrier_full will do both blocks.
+ */
+ if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
+
+ task_barrier_enter(&hive->tb);
+ adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);
+
+ if (adev->asic_reset_res)
+ goto fail;
+
+ task_barrier_exit(&hive->tb);
+ adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);
+
+ if (adev->asic_reset_res)
+ goto fail;
+ } else {
+
+ task_barrier_full(&hive->tb);
+ adev->asic_reset_res = amdgpu_asic_reset(adev);
+ }
- adev->asic_reset_res = amdgpu_asic_reset(adev);
+fail:
if (adev->asic_reset_res)
DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
adev->asic_reset_res, adev->ddev->unique);
}
+static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
+{
+ char *input = amdgpu_lockup_timeout;
+ char *timeout_setting = NULL;
+ int index = 0;
+ long timeout;
+ int ret = 0;
+
+ /*
+ * By default timeout for non compute jobs is 10000.
+ * And there is no timeout enforced on compute jobs.
+ * In SR-IOV or passthrough mode, timeout for compute
+ * jobs are 10000 by default.
+ */
+ adev->gfx_timeout = msecs_to_jiffies(10000);
+ adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
+ if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
+ adev->compute_timeout = adev->gfx_timeout;
+ else
+ adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
+
+ if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
+ while ((timeout_setting = strsep(&input, ",")) &&
+ strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
+ ret = kstrtol(timeout_setting, 0, &timeout);
+ if (ret)
+ return ret;
+
+ if (timeout == 0) {
+ index++;
+ continue;
+ } else if (timeout < 0) {
+ timeout = MAX_SCHEDULE_TIMEOUT;
+ } else {
+ timeout = msecs_to_jiffies(timeout);
+ }
+
+ switch (index++) {
+ case 0:
+ adev->gfx_timeout = timeout;
+ break;
+ case 1:
+ adev->compute_timeout = timeout;
+ break;
+ case 2:
+ adev->sdma_timeout = timeout;
+ break;
+ case 3:
+ adev->video_timeout = timeout;
+ break;
+ default:
+ break;
+ }
+ }
+ /*
+ * There is only one value specified and
+ * it should apply to all non-compute jobs.
+ */
+ if (index == 1) {
+ adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
+ if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
+ adev->compute_timeout = adev->gfx_timeout;
+ }
+ }
+
+ return ret;
+}
/**
* amdgpu_device_init - initialize the driver
@@ -2575,7 +2784,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
uint32_t flags)
{
int r, i;
- bool runtime = false;
+ bool boco = false;
u32 max_MBps;
adev->shutdown = false;
@@ -2583,7 +2792,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->ddev = ddev;
adev->pdev = pdev;
adev->flags = flags;
- adev->asic_type = flags & AMD_ASIC_MASK;
+
+ if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
+ adev->asic_type = amdgpu_force_asic_type;
+ else
+ adev->asic_type = flags & AMD_ASIC_MASK;
+
adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
if (amdgpu_emu_mode == 1)
adev->usec_timeout *= 2;
@@ -2593,7 +2807,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->mman.buffer_funcs = NULL;
adev->mman.buffer_funcs_ring = NULL;
adev->vm_manager.vm_pte_funcs = NULL;
- adev->vm_manager.vm_pte_num_rqs = 0;
+ adev->vm_manager.vm_pte_num_scheds = 0;
adev->gmc.gmc_funcs = NULL;
adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
@@ -2633,8 +2847,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->virt.vf_errors.lock);
hash_init(adev->mn_hash);
mutex_init(&adev->lock_reset);
- mutex_init(&adev->virt.dpm_mutex);
mutex_init(&adev->psp.mutex);
+ mutex_init(&adev->notifier_lock);
r = amdgpu_device_check_arguments(adev);
if (r)
@@ -2726,6 +2940,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r)
return r;
+ r = amdgpu_device_get_job_timeout_settings(adev);
+ if (r) {
+ dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
+ return r;
+ }
+
/* doorbell bar mapping and doorbell index init*/
amdgpu_device_doorbell_init(adev);
@@ -2734,12 +2954,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
* ignore it */
vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
- if (amdgpu_device_is_px(ddev))
- runtime = true;
- if (!pci_is_thunderbolt_attached(adev->pdev))
+ if (amdgpu_device_supports_boco(ddev))
+ boco = true;
+ if (amdgpu_has_atpx() &&
+ (amdgpu_is_atpx_hybrid() ||
+ amdgpu_has_atpx_dgpu_power_cntl()) &&
+ !pci_is_thunderbolt_attached(adev->pdev))
vga_switcheroo_register_client(adev->pdev,
- &amdgpu_switcheroo_ops, runtime);
- if (runtime)
+ &amdgpu_switcheroo_ops, boco);
+ if (boco)
vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
if (amdgpu_emu_mode == 1) {
@@ -2826,11 +3049,17 @@ fence_driver_init:
}
dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
- if (amdgpu_virt_request_full_gpu(adev, false))
- amdgpu_virt_release_full_gpu(adev, false);
goto failed;
}
+ DRM_DEBUG("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
+ adev->gfx.config.max_shader_engines,
+ adev->gfx.config.max_sh_per_se,
+ adev->gfx.config.max_cu_per_sh,
+ adev->gfx.cu_info.number);
+
+ amdgpu_ctx_init_sched(adev);
+
adev->accel_working = true;
amdgpu_vm_check_compute_bug(adev);
@@ -2845,16 +3074,19 @@ fence_driver_init:
amdgpu_fbdev_init(adev);
- if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
- amdgpu_pm_virt_sysfs_init(adev);
-
r = amdgpu_pm_sysfs_init(adev);
- if (r)
+ if (r) {
+ adev->pm_sysfs_en = false;
DRM_ERROR("registering pm debugfs failed (%d).\n", r);
+ } else
+ adev->pm_sysfs_en = true;
r = amdgpu_ucode_sysfs_init(adev);
- if (r)
+ if (r) {
+ adev->ucode_sysfs_en = false;
DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
+ } else
+ adev->ucode_sysfs_en = true;
r = amdgpu_debugfs_gem_init(adev);
if (r)
@@ -2885,6 +3117,13 @@ fence_driver_init:
DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
}
+ /*
+ * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
+ * Otherwise the mgpu fan boost feature will be skipped due to the
+ * gpu instance is counted less.
+ */
+ amdgpu_register_gpu_instance(adev);
+
/* enable clockgating, etc. after ib tests, etc. since some blocks require
* explicit gating rather than handling it automatically.
*/
@@ -2916,7 +3155,7 @@ fence_driver_init:
failed:
amdgpu_vf_error_trans_all(adev);
- if (runtime)
+ if (boco)
vga_switcheroo_fini_domain_pm_ops(adev->dev);
return r;
@@ -2935,7 +3174,9 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
int r;
DRM_INFO("amdgpu: finishing device.\n");
+ flush_delayed_work(&adev->delayed_init_work);
adev->shutdown = true;
+
/* disable all interrupts */
amdgpu_irq_disable_all(adev);
if (adev->mode_info.mode_config_initialized){
@@ -2945,7 +3186,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
drm_atomic_helper_shutdown(adev->ddev);
}
amdgpu_fence_driver_fini(adev);
- amdgpu_pm_sysfs_fini(adev);
+ if (adev->pm_sysfs_en)
+ amdgpu_pm_sysfs_fini(adev);
amdgpu_fbdev_fini(adev);
r = amdgpu_device_ip_fini(adev);
if (adev->firmware.gpu_info_fw) {
@@ -2953,7 +3195,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
adev->firmware.gpu_info_fw = NULL;
}
adev->accel_working = false;
- cancel_delayed_work_sync(&adev->delayed_init_work);
/* free i2c buses */
if (!amdgpu_device_has_dc_support(adev))
amdgpu_i2c_fini(adev);
@@ -2963,9 +3204,12 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
kfree(adev->bios);
adev->bios = NULL;
- if (!pci_is_thunderbolt_attached(adev->pdev))
+ if (amdgpu_has_atpx() &&
+ (amdgpu_is_atpx_hybrid() ||
+ amdgpu_has_atpx_dgpu_power_cntl()) &&
+ !pci_is_thunderbolt_attached(adev->pdev))
vga_switcheroo_unregister_client(adev->pdev);
- if (adev->flags & AMD_IS_PX)
+ if (amdgpu_device_supports_boco(adev->ddev))
vga_switcheroo_fini_domain_pm_ops(adev->dev);
vga_client_register(adev->pdev, NULL, NULL, NULL);
if (adev->rio_mem)
@@ -2974,12 +3218,11 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
iounmap(adev->rmmio);
adev->rmmio = NULL;
amdgpu_device_doorbell_fini(adev);
- if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
- amdgpu_pm_virt_sysfs_fini(adev);
amdgpu_debugfs_regs_cleanup(adev);
device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
- amdgpu_ucode_sysfs_fini(adev);
+ if (adev->ucode_sysfs_en)
+ amdgpu_ucode_sysfs_fini(adev);
if (IS_ENABLED(CONFIG_PERF_EVENTS))
amdgpu_pmu_fini(adev);
amdgpu_debugfs_preempt_cleanup(adev);
@@ -3002,11 +3245,12 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
* Returns 0 for success or an error on failure.
* Called at driver suspend.
*/
-int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
+int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
{
struct amdgpu_device *adev;
struct drm_crtc *crtc;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
int r;
if (dev == NULL || dev->dev_private == NULL) {
@@ -3029,9 +3273,11 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
if (!amdgpu_device_has_dc_support(adev)) {
/* turn off display hw */
drm_modeset_lock_all(dev);
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
- drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
- }
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter)
+ drm_helper_connector_dpms(connector,
+ DRM_MODE_DPMS_OFF);
+ drm_connector_list_iter_end(&iter);
drm_modeset_unlock_all(dev);
/* unpin the front buffers and cursors */
list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
@@ -3082,17 +3328,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
*/
amdgpu_bo_evict_vram(adev);
- pci_save_state(dev->pdev);
- if (suspend) {
- /* Shut down the device */
- pci_disable_device(dev->pdev);
- pci_set_power_state(dev->pdev, PCI_D3hot);
- } else {
- r = amdgpu_asic_reset(adev);
- if (r)
- DRM_ERROR("amdgpu asic reset failed\n");
- }
-
return 0;
}
@@ -3107,9 +3342,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
* Returns 0 for success or an error on failure.
* Called at driver resume.
*/
-int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
+int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
{
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_device *adev = dev->dev_private;
struct drm_crtc *crtc;
int r = 0;
@@ -3117,14 +3353,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
- if (resume) {
- pci_set_power_state(dev->pdev, PCI_D0);
- pci_restore_state(dev->pdev);
- r = pci_enable_device(dev->pdev);
- if (r)
- return r;
- }
-
/* post card */
if (amdgpu_device_need_post(adev)) {
r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
@@ -3180,9 +3408,13 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
/* turn on display hw */
drm_modeset_lock_all(dev);
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
- drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
- }
+
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter)
+ drm_helper_connector_dpms(connector,
+ DRM_MODE_DPMS_ON);
+ drm_connector_list_iter_end(&iter);
+
drm_modeset_unlock_all(dev);
}
amdgpu_fbdev_set_suspend(adev, 0);
@@ -3459,13 +3691,12 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
if (r)
return r;
- amdgpu_amdkfd_pre_reset(adev);
-
/* Resume IP prior to SMC */
r = amdgpu_device_ip_reinit_early_sriov(adev);
if (r)
goto error;
+ amdgpu_virt_init_data_exchange(adev);
/* we need recover gart prior to run SMC/CP/SDMA resume */
amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
@@ -3483,7 +3714,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
amdgpu_amdkfd_post_reset(adev);
error:
- amdgpu_virt_init_data_exchange(adev);
amdgpu_virt_release_full_gpu(adev, true);
if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
amdgpu_inc_vram_lost(adev);
@@ -3529,6 +3759,11 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_RAVEN:
+ case CHIP_ARCTURUS:
+ case CHIP_RENOIR:
+ case CHIP_NAVI10:
+ case CHIP_NAVI14:
+ case CHIP_NAVI12:
break;
default:
goto disabled;
@@ -3605,7 +3840,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
/* For XGMI run all resets in parallel to speed up the process */
if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
- if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work))
+ if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
r = -EALREADY;
} else
r = amdgpu_asic_reset(tmp_adev);
@@ -3617,7 +3852,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
}
}
- /* For XGMI wait for all PSP resets to complete before proceed */
+ /* For XGMI wait for all resets to complete before proceed */
if (!r) {
list_for_each_entry(tmp_adev, device_list_handle,
gmc.xgmi.head) {
@@ -3628,14 +3863,11 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
break;
}
}
-
- list_for_each_entry(tmp_adev, device_list_handle,
- gmc.xgmi.head) {
- amdgpu_ras_reserve_bad_pages(tmp_adev);
- }
}
}
+ if (!r && amdgpu_ras_intr_triggered())
+ amdgpu_ras_intr_cleared();
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
if (need_full_reset) {
@@ -3724,7 +3956,7 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
mutex_lock(&adev->lock_reset);
atomic_inc(&adev->gpu_reset_counter);
- adev->in_gpu_reset = 1;
+ adev->in_gpu_reset = true;
switch (amdgpu_asic_reset_method(adev)) {
case AMD_RESET_METHOD_MODE1:
adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
@@ -3736,25 +3968,18 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
adev->mp1_state = PP_MP1_STATE_NONE;
break;
}
- /* Block kfd: SRIOV would do it separately */
- if (!amdgpu_sriov_vf(adev))
- amdgpu_amdkfd_pre_reset(adev);
return true;
}
static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
{
- /*unlock kfd: SRIOV would do it separately */
- if (!amdgpu_sriov_vf(adev))
- amdgpu_amdkfd_post_reset(adev);
amdgpu_vf_error_trans_all(adev);
adev->mp1_state = PP_MP1_STATE_NONE;
- adev->in_gpu_reset = 0;
+ adev->in_gpu_reset = false;
mutex_unlock(&adev->lock_reset);
}
-
/**
* amdgpu_device_gpu_recover - reset the asic and recover scheduler
*
@@ -3774,11 +3999,28 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_hive_info *hive = NULL;
struct amdgpu_device *tmp_adev = NULL;
int i, r = 0;
+ bool in_ras_intr = amdgpu_ras_intr_triggered();
+ bool use_baco =
+ (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
+ true : false;
+
+ /*
+ * Flush RAM to disk so that after reboot
+ * the user can read log and see why the system rebooted.
+ */
+ if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
+
+ DRM_WARN("Emergency reboot.");
+
+ ksys_sync_helper();
+ emergency_restart();
+ }
need_full_reset = job_signaled = false;
INIT_LIST_HEAD(&device_list);
- dev_info(adev->dev, "GPU reset begin!\n");
+ dev_info(adev->dev, "GPU %s begin!\n",
+ (in_ras_intr && !use_baco) ? "jobs stop":"reset");
cancel_delayed_work_sync(&adev->delayed_init_work);
@@ -3805,9 +4047,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
return 0;
}
+ /* Block kfd: SRIOV would do it separately */
+ if (!amdgpu_sriov_vf(adev))
+ amdgpu_amdkfd_pre_reset(adev);
+
/* Build list of devices to reset */
if (adev->gmc.xgmi.num_physical_nodes > 1) {
if (!hive) {
+ /*unlock kfd: SRIOV would do it separately */
+ if (!amdgpu_sriov_vf(adev))
+ amdgpu_amdkfd_post_reset(adev);
amdgpu_device_unlock_adev(adev);
return -ENODEV;
}
@@ -3823,17 +4072,23 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
device_list_handle = &device_list;
}
- /*
- * Mark these ASICs to be reseted as untracked first
- * And add them back after reset completed
- */
- list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head)
- amdgpu_unregister_gpu_instance(tmp_adev);
-
/* block all schedulers and reset given job's ring */
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
+ if (tmp_adev != adev) {
+ amdgpu_device_lock_adev(tmp_adev, false);
+ if (!amdgpu_sriov_vf(tmp_adev))
+ amdgpu_amdkfd_pre_reset(tmp_adev);
+ }
+
+ /*
+ * Mark these ASICs to be reseted as untracked first
+ * And add them back after reset completed
+ */
+ amdgpu_unregister_gpu_instance(tmp_adev);
+
/* disable ras on ALL IPs */
- if (amdgpu_device_ip_need_full_reset(tmp_adev))
+ if (!(in_ras_intr && !use_baco) &&
+ amdgpu_device_ip_need_full_reset(tmp_adev))
amdgpu_ras_suspend(tmp_adev);
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
@@ -3843,10 +4098,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
continue;
drm_sched_stop(&ring->sched, job ? &job->base : NULL);
+
+ if (in_ras_intr && !use_baco)
+ amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
}
}
+ if (in_ras_intr && !use_baco)
+ goto skip_sched_resume;
+
/*
* Must check guilty signal here since after this point all old
* HW fences are force signaled.
@@ -3857,9 +4118,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
dma_fence_is_signaled(job->base.s_fence->parent))
job_signaled = true;
- if (!amdgpu_device_ip_need_full_reset(adev))
- device_list_handle = &device_list;
-
if (job_signaled) {
dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
goto skip_hw_reset;
@@ -3881,7 +4139,6 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
if (tmp_adev == adev)
continue;
- amdgpu_device_lock_adev(tmp_adev, false);
r = amdgpu_device_pre_asic_reset(tmp_adev,
NULL,
&need_full_reset);
@@ -3909,6 +4166,7 @@ skip_hw_reset:
/* Post ASIC reset for all devs .*/
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
+
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = tmp_adev->rings[i];
@@ -3930,12 +4188,18 @@ skip_hw_reset:
if (r) {
/* bad news, how to tell it to userspace ? */
- dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
+ dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
} else {
- dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter));
+ dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
}
+ }
+skip_sched_resume:
+ list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
+ /*unlock kfd: SRIOV would do it separately */
+ if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
+ amdgpu_amdkfd_post_reset(tmp_adev);
amdgpu_device_unlock_adev(tmp_adev);
}
@@ -4083,3 +4347,35 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
}
}
+int amdgpu_device_baco_enter(struct drm_device *dev)
+{
+ struct amdgpu_device *adev = dev->dev_private;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ if (!amdgpu_device_supports_baco(adev->ddev))
+ return -ENOTSUPP;
+
+ if (ras && ras->supported)
+ adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
+
+ return amdgpu_dpm_baco_enter(adev);
+}
+
+int amdgpu_device_baco_exit(struct drm_device *dev)
+{
+ struct amdgpu_device *adev = dev->dev_private;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ int ret = 0;
+
+ if (!amdgpu_device_supports_baco(adev->ddev))
+ return -ENOTSUPP;
+
+ ret = amdgpu_dpm_baco_exit(adev);
+ if (ret)
+ return ret;
+
+ if (ras && ras->supported)
+ adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
new file mode 100644
index 000000000000..057f6ea645d7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_DF_H__
+#define __AMDGPU_DF_H__
+
+struct amdgpu_df_hash_status {
+ bool hash_64k;
+ bool hash_2m;
+ bool hash_1g;
+};
+
+struct amdgpu_df_funcs {
+ void (*sw_init)(struct amdgpu_device *adev);
+ void (*sw_fini)(struct amdgpu_device *adev);
+ void (*enable_broadcast_mode)(struct amdgpu_device *adev,
+ bool enable);
+ u32 (*get_fb_channel_number)(struct amdgpu_device *adev);
+ u32 (*get_hbm_channel_number)(struct amdgpu_device *adev);
+ void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
+ bool enable);
+ void (*get_clockgating_state)(struct amdgpu_device *adev,
+ u32 *flags);
+ void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
+ bool enable);
+ int (*pmc_start)(struct amdgpu_device *adev, uint64_t config,
+ int is_enable);
+ int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config,
+ int is_disable);
+ void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config,
+ uint64_t *count);
+ uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val);
+ void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val,
+ uint32_t ficadl_val, uint32_t ficadh_val);
+ uint64_t (*get_dram_base_addr)(struct amdgpu_device *adev,
+ uint32_t df_inst);
+ uint32_t (*get_df_inst_id)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_df {
+ struct amdgpu_df_hash_status hash_status;
+ const struct amdgpu_df_funcs *funcs;
+};
+
+#endif /* __AMDGPU_DF_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 1481899f86c1..f95092741c38 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -134,20 +134,10 @@ static int hw_id_map[MAX_HWIP] = {
static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary)
{
- uint32_t *p = (uint32_t *)binary;
uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
- uint64_t pos = vram_size - BINARY_MAX_SIZE;
- unsigned long flags;
-
- while (pos < vram_size) {
- spin_lock_irqsave(&adev->mmio_idx_lock, flags);
- WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
- WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
- *p++ = RREG32_NO_KIQ(mmMM_DATA);
- spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
- pos += 4;
- }
+ uint64_t pos = vram_size - DISCOVERY_TMR_SIZE;
+ amdgpu_device_vram_access(adev, pos, (uint32_t *)binary, DISCOVERY_TMR_SIZE, false);
return 0;
}
@@ -179,7 +169,7 @@ int amdgpu_discovery_init(struct amdgpu_device *adev)
uint16_t checksum;
int r;
- adev->discovery = kzalloc(BINARY_MAX_SIZE, GFP_KERNEL);
+ adev->discovery = kzalloc(DISCOVERY_TMR_SIZE, GFP_KERNEL);
if (!adev->discovery)
return -ENOMEM;
@@ -333,7 +323,7 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
}
int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
- int *major, int *minor)
+ int *major, int *minor, int *revision)
{
struct binary_header *bhdr;
struct ip_discovery_header *ihdr;
@@ -369,6 +359,8 @@ int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
*major = ip->major;
if (minor)
*minor = ip->minor;
+ if (revision)
+ *revision = ip->revision;
return 0;
}
ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
index 85b8c4d4d576..ba78e15d9b05 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
@@ -24,11 +24,13 @@
#ifndef __AMDGPU_DISCOVERY__
#define __AMDGPU_DISCOVERY__
+#define DISCOVERY_TMR_SIZE (64 << 10)
+
int amdgpu_discovery_init(struct amdgpu_device *adev);
void amdgpu_discovery_fini(struct amdgpu_device *adev);
int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev);
int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
- int *major, int *minor);
+ int *major, int *minor, int *revision);
int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev);
#endif /* __AMDGPU_DISCOVERY__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 1d4aaa9580f4..6d520a3eec40 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -370,11 +370,13 @@ void amdgpu_display_print_display_setup(struct drm_device *dev)
struct amdgpu_connector *amdgpu_connector;
struct drm_encoder *encoder;
struct amdgpu_encoder *amdgpu_encoder;
+ struct drm_connector_list_iter iter;
uint32_t devices;
int i = 0;
+ drm_connector_list_iter_begin(dev, &iter);
DRM_INFO("AMDGPU Display Connectors\n");
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_for_each_connector_iter(connector, &iter) {
amdgpu_connector = to_amdgpu_connector(connector);
DRM_INFO("Connector %d:\n", i);
DRM_INFO(" %s\n", connector->name);
@@ -438,6 +440,7 @@ void amdgpu_display_print_display_setup(struct drm_device *dev)
}
i++;
}
+ drm_connector_list_iter_end(&iter);
}
/**
@@ -510,13 +513,23 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
* will not allow USWC mappings.
* Also, don't allow GTT domain if the BO doens't have USWC falg set.
*/
- if (adev->asic_type >= CHIP_CARRIZO &&
- adev->asic_type <= CHIP_RAVEN &&
- (adev->flags & AMD_IS_APU) &&
- (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
+ if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
amdgpu_bo_support_uswc(bo_flags) &&
- amdgpu_device_asic_has_dc_support(adev->asic_type))
- domain |= AMDGPU_GEM_DOMAIN_GTT;
+ amdgpu_device_asic_has_dc_support(adev->asic_type)) {
+ switch (adev->asic_type) {
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ domain |= AMDGPU_GEM_DOMAIN_GTT;
+ break;
+ case CHIP_RAVEN:
+ /* enable S/G on PCO and RV2 */
+ if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
+ domain |= AMDGPU_GEM_DOMAIN_GTT;
+ break;
+ default:
+ break;
+ }
+ }
#endif
return domain;
@@ -687,7 +700,6 @@ bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
struct amdgpu_encoder *amdgpu_encoder;
struct drm_connector *connector;
- struct amdgpu_connector *amdgpu_connector;
u32 src_v = 1, dst_v = 1;
u32 src_h = 1, dst_h = 1;
@@ -699,7 +711,6 @@ bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
continue;
amdgpu_encoder = to_amdgpu_encoder(encoder);
connector = amdgpu_get_connector_for_encoder(encoder);
- amdgpu_connector = to_amdgpu_connector(connector);
/* set scaling */
if (amdgpu_encoder->rmx_type == RMX_OFF)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 61f108ec2b5c..a59cd47aa6c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -34,27 +34,12 @@
#include "amdgpu.h"
#include "amdgpu_display.h"
#include "amdgpu_gem.h"
+#include "amdgpu_dma_buf.h"
#include <drm/amdgpu_drm.h>
#include <linux/dma-buf.h>
#include <linux/dma-fence-array.h>
/**
- * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table
- * implementation
- * @obj: GEM buffer object (BO)
- *
- * Returns:
- * A scatter/gather table for the pinned pages of the BO's memory.
- */
-struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
-{
- struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
- int npages = bo->tbo.num_pages;
-
- return drm_prime_pages_to_sg(bo->tbo.ttm->pages, npages);
-}
-
-/**
* amdgpu_gem_prime_vmap - &dma_buf_ops.vmap implementation
* @obj: GEM BO
*
@@ -179,92 +164,126 @@ err_fences_put:
}
/**
- * amdgpu_dma_buf_map_attach - &dma_buf_ops.attach implementation
- * @dma_buf: Shared DMA buffer
+ * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
+ *
+ * @dmabuf: DMA-buf where we attach to
+ * @attach: attachment to add
+ *
+ * Add the attachment as user to the exported DMA-buf.
+ */
+static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
+ struct dma_buf_attachment *attach)
+{
+ struct drm_gem_object *obj = dmabuf->priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ int r;
+
+ if (attach->dev->driver == adev->dev->driver)
+ return 0;
+
+ r = amdgpu_bo_reserve(bo, false);
+ if (unlikely(r != 0))
+ return r;
+
+ /*
+ * We only create shared fences for internal use, but importers
+ * of the dmabuf rely on exclusive fences for implicitly
+ * tracking write hazards. As any of the current fences may
+ * correspond to a write, we need to convert all existing
+ * fences on the reservation object into a single exclusive
+ * fence.
+ */
+ r = __dma_resv_make_exclusive(bo->tbo.base.resv);
+ if (r)
+ return r;
+
+ bo->prime_shared_count++;
+ amdgpu_bo_unreserve(bo);
+ return 0;
+}
+
+/**
+ * amdgpu_dma_buf_detach - &dma_buf_ops.detach implementation
+ *
+ * @dmabuf: DMA-buf where we remove the attachment from
+ * @attach: the attachment to remove
+ *
+ * Called when an attachment is removed from the DMA-buf.
+ */
+static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
+ struct dma_buf_attachment *attach)
+{
+ struct drm_gem_object *obj = dmabuf->priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+
+ if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
+ bo->prime_shared_count--;
+}
+
+/**
+ * amdgpu_dma_buf_map - &dma_buf_ops.map_dma_buf implementation
* @attach: DMA-buf attachment
+ * @dir: DMA direction
*
* Makes sure that the shared DMA buffer can be accessed by the target device.
* For now, simply pins it to the GTT domain, where it should be accessible by
* all DMA devices.
*
* Returns:
- * 0 on success or a negative error code on failure.
+ * sg_table filled with the DMA addresses to use or ERR_PRT with negative error
+ * code.
*/
-static int amdgpu_dma_buf_map_attach(struct dma_buf *dma_buf,
- struct dma_buf_attachment *attach)
+static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
+ enum dma_data_direction dir)
{
+ struct dma_buf *dma_buf = attach->dmabuf;
struct drm_gem_object *obj = dma_buf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct sg_table *sgt;
long r;
- r = drm_gem_map_attach(dma_buf, attach);
- if (r)
- return r;
-
- r = amdgpu_bo_reserve(bo, false);
- if (unlikely(r != 0))
- goto error_detach;
-
-
- if (attach->dev->driver != adev->dev->driver) {
- /*
- * We only create shared fences for internal use, but importers
- * of the dmabuf rely on exclusive fences for implicitly
- * tracking write hazards. As any of the current fences may
- * correspond to a write, we need to convert all existing
- * fences on the reservation object into a single exclusive
- * fence.
- */
- r = __dma_resv_make_exclusive(bo->tbo.base.resv);
- if (r)
- goto error_unreserve;
- }
-
- /* pin buffer into GTT */
r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
if (r)
- goto error_unreserve;
+ return ERR_PTR(r);
- if (attach->dev->driver != adev->dev->driver)
- bo->prime_shared_count++;
+ sgt = drm_prime_pages_to_sg(bo->tbo.ttm->pages, bo->tbo.num_pages);
+ if (IS_ERR(sgt))
+ return sgt;
-error_unreserve:
- amdgpu_bo_unreserve(bo);
+ if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
+ DMA_ATTR_SKIP_CPU_SYNC))
+ goto error_free;
-error_detach:
- if (r)
- drm_gem_map_detach(dma_buf, attach);
- return r;
+ return sgt;
+
+error_free:
+ sg_free_table(sgt);
+ kfree(sgt);
+ return ERR_PTR(-ENOMEM);
}
/**
- * amdgpu_dma_buf_map_detach - &dma_buf_ops.detach implementation
- * @dma_buf: Shared DMA buffer
+ * amdgpu_dma_buf_unmap - &dma_buf_ops.unmap_dma_buf implementation
* @attach: DMA-buf attachment
+ * @sgt: sg_table to unmap
+ * @dir: DMA direction
*
* This is called when a shared DMA buffer no longer needs to be accessible by
* another device. For now, simply unpins the buffer from GTT.
*/
-static void amdgpu_dma_buf_map_detach(struct dma_buf *dma_buf,
- struct dma_buf_attachment *attach)
+static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach,
+ struct sg_table *sgt,
+ enum dma_data_direction dir)
{
- struct drm_gem_object *obj = dma_buf->priv;
+ struct drm_gem_object *obj = attach->dmabuf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- int ret = 0;
-
- ret = amdgpu_bo_reserve(bo, true);
- if (unlikely(ret != 0))
- goto error;
+ dma_unmap_sg(attach->dev, sgt->sgl, sgt->nents, dir);
+ sg_free_table(sgt);
+ kfree(sgt);
amdgpu_bo_unpin(bo);
- if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
- bo->prime_shared_count--;
- amdgpu_bo_unreserve(bo);
-
-error:
- drm_gem_map_detach(dma_buf, attach);
}
/**
@@ -308,10 +327,11 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
}
const struct dma_buf_ops amdgpu_dmabuf_ops = {
- .attach = amdgpu_dma_buf_map_attach,
- .detach = amdgpu_dma_buf_map_detach,
- .map_dma_buf = drm_gem_map_dma_buf,
- .unmap_dma_buf = drm_gem_unmap_dma_buf,
+ .dynamic_mapping = true,
+ .attach = amdgpu_dma_buf_attach,
+ .detach = amdgpu_dma_buf_detach,
+ .map_dma_buf = amdgpu_dma_buf_map,
+ .unmap_dma_buf = amdgpu_dma_buf_unmap,
.release = drm_gem_dmabuf_release,
.begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,
.mmap = drm_gem_dmabuf_mmap,
@@ -321,7 +341,6 @@ const struct dma_buf_ops amdgpu_dmabuf_ops = {
/**
* amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation
- * @dev: DRM device
* @gobj: GEM BO
* @flags: Flags such as DRM_CLOEXEC and DRM_RDWR.
*
@@ -341,40 +360,35 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj,
return ERR_PTR(-EPERM);
buf = drm_gem_prime_export(gobj, flags);
- if (!IS_ERR(buf)) {
- buf->file->f_mapping = gobj->dev->anon_inode->i_mapping;
+ if (!IS_ERR(buf))
buf->ops = &amdgpu_dmabuf_ops;
- }
return buf;
}
/**
- * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table
- * implementation
+ * amdgpu_dma_buf_create_obj - create BO for DMA-buf import
+ *
* @dev: DRM device
- * @attach: DMA-buf attachment
- * @sg: Scatter/gather table
+ * @dma_buf: DMA-buf
*
- * Imports shared DMA buffer memory exported by another device.
+ * Creates an empty SG BO for DMA-buf import.
*
* Returns:
* A new GEM BO of the given DRM device, representing the memory
* described by the given DMA-buf attachment and scatter/gather table.
*/
-struct drm_gem_object *
-amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
- struct dma_buf_attachment *attach,
- struct sg_table *sg)
+static struct drm_gem_object *
+amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
{
- struct dma_resv *resv = attach->dmabuf->resv;
+ struct dma_resv *resv = dma_buf->resv;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_bo *bo;
struct amdgpu_bo_param bp;
int ret;
memset(&bp, 0, sizeof(bp));
- bp.size = attach->dmabuf->size;
+ bp.size = dma_buf->size;
bp.byte_align = PAGE_SIZE;
bp.domain = AMDGPU_GEM_DOMAIN_CPU;
bp.flags = 0;
@@ -385,11 +399,9 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
if (ret)
goto error;
- bo->tbo.sg = sg;
- bo->tbo.ttm->sg = sg;
bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
- if (attach->dmabuf->ops != &amdgpu_dmabuf_ops)
+ if (dma_buf->ops != &amdgpu_dmabuf_ops)
bo->prime_shared_count = 1;
dma_resv_unlock(resv);
@@ -405,15 +417,15 @@ error:
* @dev: DRM device
* @dma_buf: Shared DMA buffer
*
- * The main work is done by the &drm_gem_prime_import helper, which in turn
- * uses &amdgpu_gem_prime_import_sg_table.
+ * Import a dma_buf into a the driver and potentially create a new GEM object.
*
* Returns:
* GEM BO representing the shared DMA buffer for the given device.
*/
struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
- struct dma_buf *dma_buf)
+ struct dma_buf *dma_buf)
{
+ struct dma_buf_attachment *attach;
struct drm_gem_object *obj;
if (dma_buf->ops == &amdgpu_dmabuf_ops) {
@@ -428,5 +440,17 @@ struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
}
}
- return drm_gem_prime_import(dev, dma_buf);
+ obj = amdgpu_dma_buf_create_obj(dev, dma_buf);
+ if (IS_ERR(obj))
+ return obj;
+
+ attach = dma_buf_dynamic_attach(dma_buf, dev->dev, true);
+ if (IS_ERR(attach)) {
+ drm_gem_object_put(obj);
+ return ERR_CAST(attach);
+ }
+
+ get_dma_buf(dma_buf);
+ obj->import_attach = attach;
+ return obj;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
index 5012e6ab58f1..ec447a7b6b28 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
@@ -25,11 +25,6 @@
#include <drm/drm_gem.h>
-struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
-struct drm_gem_object *
-amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
- struct dma_buf_attachment *attach,
- struct sg_table *sg);
struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj,
int flags);
struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
index 5803fcbae22f..a2e8c3dfb4f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
@@ -911,7 +911,8 @@ int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low)
if (is_support_sw_smu(adev)) {
ret = smu_get_dpm_freq_range(&adev->smu, SMU_GFXCLK,
low ? &clk_freq : NULL,
- !low ? &clk_freq : NULL);
+ !low ? &clk_freq : NULL,
+ true);
if (ret)
return 0;
return clk_freq * 100;
@@ -928,7 +929,8 @@ int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low)
if (is_support_sw_smu(adev)) {
ret = smu_get_dpm_freq_range(&adev->smu, SMU_UCLK,
low ? &clk_freq : NULL,
- !low ? &clk_freq : NULL);
+ !low ? &clk_freq : NULL,
+ true);
if (ret)
return 0;
return clk_freq * 100;
@@ -944,20 +946,63 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block
bool swsmu = is_support_sw_smu(adev);
switch (block_type) {
- case AMD_IP_BLOCK_TYPE_GFX:
case AMD_IP_BLOCK_TYPE_UVD:
- case AMD_IP_BLOCK_TYPE_VCN:
case AMD_IP_BLOCK_TYPE_VCE:
+ if (swsmu) {
+ ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate);
+ } else if (adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->set_powergating_by_smu) {
+ /*
+ * TODO: need a better lock mechanism
+ *
+ * Here adev->pm.mutex lock protection is enforced on
+ * UVD and VCE cases only. Since for other cases, there
+ * may be already lock protection in amdgpu_pm.c.
+ * This is a quick fix for the deadlock issue below.
+ * NFO: task ocltst:2028 blocked for more than 120 seconds.
+ * Tainted: G OE 5.0.0-37-generic #40~18.04.1-Ubuntu
+ * echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+ * cltst D 0 2028 2026 0x00000000
+ * all Trace:
+ * __schedule+0x2c0/0x870
+ * schedule+0x2c/0x70
+ * schedule_preempt_disabled+0xe/0x10
+ * __mutex_lock.isra.9+0x26d/0x4e0
+ * __mutex_lock_slowpath+0x13/0x20
+ * ? __mutex_lock_slowpath+0x13/0x20
+ * mutex_lock+0x2f/0x40
+ * amdgpu_dpm_set_powergating_by_smu+0x64/0xe0 [amdgpu]
+ * gfx_v8_0_enable_gfx_static_mg_power_gating+0x3c/0x70 [amdgpu]
+ * gfx_v8_0_set_powergating_state+0x66/0x260 [amdgpu]
+ * amdgpu_device_ip_set_powergating_state+0x62/0xb0 [amdgpu]
+ * pp_dpm_force_performance_level+0xe7/0x100 [amdgpu]
+ * amdgpu_set_dpm_forced_performance_level+0x129/0x330 [amdgpu]
+ */
+ mutex_lock(&adev->pm.mutex);
+ ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu(
+ (adev)->powerplay.pp_handle, block_type, gate));
+ mutex_unlock(&adev->pm.mutex);
+ }
+ break;
+ case AMD_IP_BLOCK_TYPE_GFX:
+ case AMD_IP_BLOCK_TYPE_VCN:
case AMD_IP_BLOCK_TYPE_SDMA:
if (swsmu)
ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate);
- else
+ else if (adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->set_powergating_by_smu)
ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu(
(adev)->powerplay.pp_handle, block_type, gate));
break;
+ case AMD_IP_BLOCK_TYPE_JPEG:
+ if (swsmu)
+ ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate);
+ break;
case AMD_IP_BLOCK_TYPE_GMC:
case AMD_IP_BLOCK_TYPE_ACP:
- ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu(
+ if (adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->set_powergating_by_smu)
+ ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu(
(adev)->powerplay.pp_handle, block_type, gate));
break;
default:
@@ -966,3 +1011,163 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block
return ret;
}
+
+int amdgpu_dpm_baco_enter(struct amdgpu_device *adev)
+{
+ const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+ void *pp_handle = adev->powerplay.pp_handle;
+ struct smu_context *smu = &adev->smu;
+ int ret = 0;
+
+ if (is_support_sw_smu(adev)) {
+ ret = smu_baco_enter(smu);
+ } else {
+ if (!pp_funcs || !pp_funcs->set_asic_baco_state)
+ return -ENOENT;
+
+ /* enter BACO state */
+ ret = pp_funcs->set_asic_baco_state(pp_handle, 1);
+ }
+
+ return ret;
+}
+
+int amdgpu_dpm_baco_exit(struct amdgpu_device *adev)
+{
+ const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+ void *pp_handle = adev->powerplay.pp_handle;
+ struct smu_context *smu = &adev->smu;
+ int ret = 0;
+
+ if (is_support_sw_smu(adev)) {
+ ret = smu_baco_exit(smu);
+ } else {
+ if (!pp_funcs || !pp_funcs->set_asic_baco_state)
+ return -ENOENT;
+
+ /* exit BACO state */
+ ret = pp_funcs->set_asic_baco_state(pp_handle, 0);
+ }
+
+ return ret;
+}
+
+int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev,
+ enum pp_mp1_state mp1_state)
+{
+ int ret = 0;
+
+ if (is_support_sw_smu(adev)) {
+ ret = smu_set_mp1_state(&adev->smu, mp1_state);
+ } else if (adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->set_mp1_state) {
+ ret = adev->powerplay.pp_funcs->set_mp1_state(
+ adev->powerplay.pp_handle,
+ mp1_state);
+ }
+
+ return ret;
+}
+
+bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev)
+{
+ const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+ void *pp_handle = adev->powerplay.pp_handle;
+ struct smu_context *smu = &adev->smu;
+ bool baco_cap;
+
+ if (is_support_sw_smu(adev)) {
+ return smu_baco_is_support(smu);
+ } else {
+ if (!pp_funcs || !pp_funcs->get_asic_baco_capability)
+ return false;
+
+ if (pp_funcs->get_asic_baco_capability(pp_handle, &baco_cap))
+ return false;
+
+ return baco_cap ? true : false;
+ }
+}
+
+int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev)
+{
+ const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+ void *pp_handle = adev->powerplay.pp_handle;
+ struct smu_context *smu = &adev->smu;
+
+ if (is_support_sw_smu(adev)) {
+ return smu_mode2_reset(smu);
+ } else {
+ if (!pp_funcs || !pp_funcs->asic_reset_mode_2)
+ return -ENOENT;
+
+ return pp_funcs->asic_reset_mode_2(pp_handle);
+ }
+}
+
+int amdgpu_dpm_baco_reset(struct amdgpu_device *adev)
+{
+ const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+ void *pp_handle = adev->powerplay.pp_handle;
+ struct smu_context *smu = &adev->smu;
+ int ret = 0;
+
+ dev_info(adev->dev, "GPU BACO reset\n");
+
+ if (is_support_sw_smu(adev)) {
+ ret = smu_baco_enter(smu);
+ if (ret)
+ return ret;
+
+ ret = smu_baco_exit(smu);
+ if (ret)
+ return ret;
+ } else {
+ if (!pp_funcs
+ || !pp_funcs->set_asic_baco_state)
+ return -ENOENT;
+
+ /* enter BACO state */
+ ret = pp_funcs->set_asic_baco_state(pp_handle, 1);
+ if (ret)
+ return ret;
+
+ /* exit BACO state */
+ ret = pp_funcs->set_asic_baco_state(pp_handle, 0);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev,
+ enum PP_SMC_POWER_PROFILE type,
+ bool en)
+{
+ int ret = 0;
+
+ if (is_support_sw_smu(adev))
+ ret = smu_switch_power_profile(&adev->smu, type, en);
+ else if (adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->switch_power_profile)
+ ret = adev->powerplay.pp_funcs->switch_power_profile(
+ adev->powerplay.pp_handle, type, en);
+
+ return ret;
+}
+
+int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev,
+ uint32_t pstate)
+{
+ int ret = 0;
+
+ if (is_support_sw_smu_xgmi(adev))
+ ret = smu_set_xgmi_pstate(&adev->smu, pstate);
+ else if (adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->set_xgmi_pstate)
+ ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle,
+ pstate);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index 1c5c0fd76dbf..902ca6c00cca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -298,12 +298,6 @@ enum amdgpu_pcie_gen {
#define amdgpu_dpm_get_current_power_state(adev) \
((adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle))
-#define amdgpu_smu_get_current_power_state(adev) \
- ((adev)->smu.ppt_funcs->get_current_power_state(&((adev)->smu)))
-
-#define amdgpu_smu_set_power_state(adev) \
- ((adev)->smu.ppt_funcs->set_power_state(&((adev)->smu)))
-
#define amdgpu_dpm_get_pp_num_states(adev, data) \
((adev)->powerplay.pp_funcs->get_pp_num_states((adev)->powerplay.pp_handle, data))
@@ -347,10 +341,6 @@ enum amdgpu_pcie_gen {
((adev)->powerplay.pp_funcs->reset_power_profile_state(\
(adev)->powerplay.pp_handle, request))
-#define amdgpu_dpm_switch_power_profile(adev, type, en) \
- ((adev)->powerplay.pp_funcs->switch_power_profile(\
- (adev)->powerplay.pp_handle, type, en))
-
#define amdgpu_dpm_set_clockgating_by_smu(adev, msg_id) \
((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\
(adev)->powerplay.pp_handle, msg_id))
@@ -523,4 +513,24 @@ extern int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low);
extern int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low);
+int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev,
+ uint32_t pstate);
+
+int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev,
+ enum PP_SMC_POWER_PROFILE type,
+ bool en);
+
+int amdgpu_dpm_baco_reset(struct amdgpu_device *adev);
+
+int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev);
+
+bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev);
+
+int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev,
+ enum pp_mp1_state mp1_state);
+
+int amdgpu_dpm_baco_exit(struct amdgpu_device *adev);
+
+int amdgpu_dpm_baco_enter(struct amdgpu_device *adev);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index baf32484b820..a9c4edca70c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -43,6 +43,8 @@
#include "amdgpu_amdkfd.h"
+#include "amdgpu_ras.h"
+
/*
* KMS wrapper.
* - 3.0.0 - initial driver
@@ -82,13 +84,12 @@
* - 3.33.0 - Fixes for GDS ENOMEM failures in AMDGPU_CS.
* - 3.34.0 - Non-DC can flip correctly between buffers with different pitches
* - 3.35.0 - Add drm_amdgpu_info_device::tcc_disabled_mask
+ * - 3.36.0 - Allow reading more status registers on si/cik
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 35
+#define KMS_DRIVER_MINOR 36
#define KMS_DRIVER_PATCHLEVEL 0
-#define AMDGPU_MAX_TIMEOUT_PARAM_LENTH 256
-
int amdgpu_vram_limit = 0;
int amdgpu_vis_vram_limit = 0;
int amdgpu_gart_size = -1; /* auto */
@@ -101,7 +102,7 @@ int amdgpu_disp_priority = 0;
int amdgpu_hw_i2c = 0;
int amdgpu_pcie_gen2 = -1;
int amdgpu_msi = -1;
-char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENTH];
+char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH];
int amdgpu_dpm = -1;
int amdgpu_fw_load_type = -1;
int amdgpu_aspm = -1;
@@ -128,11 +129,7 @@ char *amdgpu_disable_cu = NULL;
char *amdgpu_virtual_display = NULL;
/* OverDrive(bit 14) disabled by default*/
uint amdgpu_pp_feature_mask = 0xffffbfff;
-int amdgpu_ngg = 0;
-int amdgpu_prim_buf_per_se = 0;
-int amdgpu_pos_buf_per_se = 0;
-int amdgpu_cntl_sb_buf_per_se = 0;
-int amdgpu_param_buf_per_se = 0;
+uint amdgpu_force_long_training = 0;
int amdgpu_job_hang_limit = 0;
int amdgpu_lbpw = -1;
int amdgpu_compute_multipipe = -1;
@@ -145,13 +142,14 @@ int amdgpu_async_gfx_ring = 1;
int amdgpu_mcbp = 0;
int amdgpu_discovery = -1;
int amdgpu_mes = 0;
-int amdgpu_noretry = 1;
+int amdgpu_noretry;
+int amdgpu_force_asic_type = -1;
struct amdgpu_mgpu_info mgpu_info = {
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
};
int amdgpu_ras_enable = -1;
-uint amdgpu_ras_mask = 0xfffffffb;
+uint amdgpu_ras_mask = 0xffffffff;
/**
* DOC: vramlimit (int)
@@ -244,16 +242,21 @@ module_param_named(msi, amdgpu_msi, int, 0444);
*
* The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or
* multiple values specified. 0 and negative values are invalidated. They will be adjusted
- * to default timeout.
- * - With one value specified, the setting will apply to all non-compute jobs.
- * - With multiple values specified, the first one will be for GFX. The second one is for Compute.
- * And the third and fourth ones are for SDMA and Video.
+ * to the default timeout.
+ *
+ * - With one value specified, the setting will apply to all non-compute jobs.
+ * - With multiple values specified, the first one will be for GFX.
+ * The second one is for Compute. The third and fourth ones are
+ * for SDMA and Video.
+ *
* By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video)
* jobs is 10000. And there is no timeout enforced on compute jobs.
*/
-MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: 10000 for non-compute jobs and infinity timeout for compute jobs."
+MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and infinity timeout for compute jobs; "
+ "for passthrough or sriov, 10000 for all jobs."
" 0: keep default value. negative: infinity timeout), "
- "format is [Non-Compute] or [GFX,Compute,SDMA,Video]");
+ "format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; "
+ "for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video].");
module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444);
/**
@@ -392,6 +395,14 @@ MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))");
module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, uint, 0444);
/**
+ * DOC: forcelongtraining (uint)
+ * Force long memory training in resume.
+ * The default is zero, indicates short training in resume.
+ */
+MODULE_PARM_DESC(forcelongtraining, "force memory long training");
+module_param_named(forcelongtraining, amdgpu_force_long_training, uint, 0444);
+
+/**
* DOC: pcie_gen_cap (uint)
* Override PCIE gen speed capabilities. See the CAIL flags in drivers/gpu/drm/amd/include/amd_pcie.h.
* The default is 0 (automatic for each asic).
@@ -449,42 +460,6 @@ MODULE_PARM_DESC(virtual_display,
module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444);
/**
- * DOC: ngg (int)
- * Set to enable Next Generation Graphics (1 = enable). The default is 0 (disabled).
- */
-MODULE_PARM_DESC(ngg, "Next Generation Graphics (1 = enable, 0 = disable(default depending on gfx))");
-module_param_named(ngg, amdgpu_ngg, int, 0444);
-
-/**
- * DOC: prim_buf_per_se (int)
- * Override the size of Primitive Buffer per Shader Engine in Byte. The default is 0 (depending on gfx).
- */
-MODULE_PARM_DESC(prim_buf_per_se, "the size of Primitive Buffer per Shader Engine (default depending on gfx)");
-module_param_named(prim_buf_per_se, amdgpu_prim_buf_per_se, int, 0444);
-
-/**
- * DOC: pos_buf_per_se (int)
- * Override the size of Position Buffer per Shader Engine in Byte. The default is 0 (depending on gfx).
- */
-MODULE_PARM_DESC(pos_buf_per_se, "the size of Position Buffer per Shader Engine (default depending on gfx)");
-module_param_named(pos_buf_per_se, amdgpu_pos_buf_per_se, int, 0444);
-
-/**
- * DOC: cntl_sb_buf_per_se (int)
- * Override the size of Control Sideband per Shader Engine in Byte. The default is 0 (depending on gfx).
- */
-MODULE_PARM_DESC(cntl_sb_buf_per_se, "the size of Control Sideband per Shader Engine (default depending on gfx)");
-module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444);
-
-/**
- * DOC: param_buf_per_se (int)
- * Override the size of Off-Chip Parameter Cache per Shader Engine in Byte.
- * The default is 0 (depending on gfx).
- */
-MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Parameter Cache per Shader Engine (default depending on gfx)");
-module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444);
-
-/**
* DOC: job_hang_limit (int)
* Set how much time allow a job hang and not drop it. The default is 0.
*/
@@ -613,9 +588,19 @@ MODULE_PARM_DESC(mes,
module_param_named(mes, amdgpu_mes, int, 0444);
MODULE_PARM_DESC(noretry,
- "Disable retry faults (0 = retry enabled, 1 = retry disabled (default))");
+ "Disable retry faults (0 = retry enabled (default), 1 = retry disabled)");
module_param_named(noretry, amdgpu_noretry, int, 0644);
+/**
+ * DOC: force_asic_type (int)
+ * A non negative value used to specify the asic type for all supported GPUs.
+ */
+MODULE_PARM_DESC(force_asic_type,
+ "A non negative value used to specify the asic type for all supported GPUs");
+module_param_named(force_asic_type, amdgpu_force_asic_type, int, 0444);
+
+
+
#ifdef CONFIG_HSA_AMD
/**
* DOC: sched_policy (int)
@@ -1013,15 +998,17 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x731B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
{0x1002, 0x731F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
/* Navi14 */
- {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT},
- {0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT},
- {0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
+ {0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
+ {0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
+ {0x1002, 0x734F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
/* Renoir */
{0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU|AMD_EXP_HW_SUPPORT},
/* Navi12 */
{0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x7362, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT},
{0, 0, 0}
};
@@ -1048,6 +1035,41 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
return -ENODEV;
}
+#ifdef CONFIG_DRM_AMDGPU_SI
+ if (!amdgpu_si_support) {
+ switch (flags & AMD_ASIC_MASK) {
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ case CHIP_OLAND:
+ case CHIP_HAINAN:
+ dev_info(&pdev->dev,
+ "SI support provided by radeon.\n");
+ dev_info(&pdev->dev,
+ "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n"
+ );
+ return -ENODEV;
+ }
+ }
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ if (!amdgpu_cik_support) {
+ switch (flags & AMD_ASIC_MASK) {
+ case CHIP_KAVERI:
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ dev_info(&pdev->dev,
+ "CIK support provided by radeon.\n");
+ dev_info(&pdev->dev,
+ "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n"
+ );
+ return -ENODEV;
+ }
+ }
+#endif
+
/* Get rid of things like offb */
ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, "amdgpudrmfb");
if (ret)
@@ -1092,7 +1114,10 @@ amdgpu_pci_remove(struct pci_dev *pdev)
{
struct drm_device *dev = pci_get_drvdata(pdev);
- DRM_ERROR("Device removal is currently not supported outside of fbcon\n");
+#ifdef MODULE
+ if (THIS_MODULE->state != MODULE_STATE_GOING)
+#endif
+ DRM_ERROR("Hotplug removal is not supported\n");
drm_dev_unplug(dev);
drm_dev_put(dev);
pci_disable_device(pdev);
@@ -1105,6 +1130,9 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)
struct drm_device *dev = pci_get_drvdata(pdev);
struct amdgpu_device *adev = dev->dev_private;
+ if (amdgpu_ras_intr_triggered())
+ return;
+
/* if we are running in a VM, make sure the device
* torn down properly on reboot/shutdown.
* unfortunately we can't detect certain
@@ -1119,7 +1147,7 @@ static int amdgpu_pmops_suspend(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
- return amdgpu_device_suspend(drm_dev, true, true);
+ return amdgpu_device_suspend(drm_dev, true);
}
static int amdgpu_pmops_resume(struct device *dev)
@@ -1127,66 +1155,92 @@ static int amdgpu_pmops_resume(struct device *dev)
struct drm_device *drm_dev = dev_get_drvdata(dev);
/* GPU comes up enabled by the bios on resume */
- if (amdgpu_device_is_px(drm_dev)) {
+ if (amdgpu_device_supports_boco(drm_dev) ||
+ amdgpu_device_supports_baco(drm_dev)) {
pm_runtime_disable(dev);
pm_runtime_set_active(dev);
pm_runtime_enable(dev);
}
- return amdgpu_device_resume(drm_dev, true, true);
+ return amdgpu_device_resume(drm_dev, true);
}
static int amdgpu_pmops_freeze(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_dev->dev_private;
+ int r;
- return amdgpu_device_suspend(drm_dev, false, true);
+ r = amdgpu_device_suspend(drm_dev, true);
+ if (r)
+ return r;
+ return amdgpu_asic_reset(adev);
}
static int amdgpu_pmops_thaw(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
- return amdgpu_device_resume(drm_dev, false, true);
+ return amdgpu_device_resume(drm_dev, true);
}
static int amdgpu_pmops_poweroff(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
- return amdgpu_device_suspend(drm_dev, true, true);
+ return amdgpu_device_suspend(drm_dev, true);
}
static int amdgpu_pmops_restore(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
- return amdgpu_device_resume(drm_dev, false, true);
+ return amdgpu_device_resume(drm_dev, true);
}
static int amdgpu_pmops_runtime_suspend(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct drm_device *drm_dev = pci_get_drvdata(pdev);
- int ret;
+ struct amdgpu_device *adev = drm_dev->dev_private;
+ int ret, i;
- if (!amdgpu_device_is_px(drm_dev)) {
+ if (!adev->runpm) {
pm_runtime_forbid(dev);
return -EBUSY;
}
- drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
+ /* wait for all rings to drain before suspending */
+ for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+ struct amdgpu_ring *ring = adev->rings[i];
+ if (ring && ring->sched.ready) {
+ ret = amdgpu_fence_wait_empty(ring);
+ if (ret)
+ return -EBUSY;
+ }
+ }
+
+ if (amdgpu_device_supports_boco(drm_dev))
+ drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
drm_kms_helper_poll_disable(drm_dev);
- ret = amdgpu_device_suspend(drm_dev, false, false);
- pci_save_state(pdev);
- pci_disable_device(pdev);
- pci_ignore_hotplug(pdev);
- if (amdgpu_is_atpx_hybrid())
- pci_set_power_state(pdev, PCI_D3cold);
- else if (!amdgpu_has_atpx_dgpu_power_cntl())
- pci_set_power_state(pdev, PCI_D3hot);
- drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
+ ret = amdgpu_device_suspend(drm_dev, false);
+ if (amdgpu_device_supports_boco(drm_dev)) {
+ /* Only need to handle PCI state in the driver for ATPX
+ * PCI core handles it for _PR3.
+ */
+ if (amdgpu_is_atpx_hybrid()) {
+ pci_ignore_hotplug(pdev);
+ } else {
+ pci_save_state(pdev);
+ pci_disable_device(pdev);
+ pci_ignore_hotplug(pdev);
+ pci_set_power_state(pdev, PCI_D3cold);
+ }
+ drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
+ } else if (amdgpu_device_supports_baco(drm_dev)) {
+ amdgpu_device_baco_enter(drm_dev);
+ }
return 0;
}
@@ -1195,34 +1249,45 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct drm_device *drm_dev = pci_get_drvdata(pdev);
+ struct amdgpu_device *adev = drm_dev->dev_private;
int ret;
- if (!amdgpu_device_is_px(drm_dev))
+ if (!adev->runpm)
return -EINVAL;
- drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
-
- if (amdgpu_is_atpx_hybrid() ||
- !amdgpu_has_atpx_dgpu_power_cntl())
- pci_set_power_state(pdev, PCI_D0);
- pci_restore_state(pdev);
- ret = pci_enable_device(pdev);
- if (ret)
- return ret;
- pci_set_master(pdev);
+ if (amdgpu_device_supports_boco(drm_dev)) {
+ drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
- ret = amdgpu_device_resume(drm_dev, false, false);
+ /* Only need to handle PCI state in the driver for ATPX
+ * PCI core handles it for _PR3.
+ */
+ if (amdgpu_is_atpx_hybrid()) {
+ pci_set_master(pdev);
+ } else {
+ pci_set_power_state(pdev, PCI_D0);
+ pci_restore_state(pdev);
+ ret = pci_enable_device(pdev);
+ if (ret)
+ return ret;
+ pci_set_master(pdev);
+ }
+ } else if (amdgpu_device_supports_baco(drm_dev)) {
+ amdgpu_device_baco_exit(drm_dev);
+ }
+ ret = amdgpu_device_resume(drm_dev, false);
drm_kms_helper_poll_enable(drm_dev);
- drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
+ if (amdgpu_device_supports_boco(drm_dev))
+ drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
return 0;
}
static int amdgpu_pmops_runtime_idle(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_dev->dev_private;
struct drm_crtc *crtc;
- if (!amdgpu_device_is_px(drm_dev)) {
+ if (!adev->runpm) {
pm_runtime_forbid(dev);
return -EBUSY;
}
@@ -1312,66 +1377,6 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
return 0;
}
-int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
-{
- char *input = amdgpu_lockup_timeout;
- char *timeout_setting = NULL;
- int index = 0;
- long timeout;
- int ret = 0;
-
- /*
- * By default timeout for non compute jobs is 10000.
- * And there is no timeout enforced on compute jobs.
- */
- adev->gfx_timeout = msecs_to_jiffies(10000);
- adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
- adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
-
- if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) {
- while ((timeout_setting = strsep(&input, ",")) &&
- strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) {
- ret = kstrtol(timeout_setting, 0, &timeout);
- if (ret)
- return ret;
-
- if (timeout == 0) {
- index++;
- continue;
- } else if (timeout < 0) {
- timeout = MAX_SCHEDULE_TIMEOUT;
- } else {
- timeout = msecs_to_jiffies(timeout);
- }
-
- switch (index++) {
- case 0:
- adev->gfx_timeout = timeout;
- break;
- case 1:
- adev->compute_timeout = timeout;
- break;
- case 2:
- adev->sdma_timeout = timeout;
- break;
- case 3:
- adev->video_timeout = timeout;
- break;
- default:
- break;
- }
- }
- /*
- * There is only one value specified and
- * it should apply to all non-compute jobs.
- */
- if (index == 1)
- adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
- }
-
- return ret;
-}
-
static bool
amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
bool in_vblank_irq, int *vpos, int *hpos,
@@ -1386,7 +1391,8 @@ static struct drm_driver kms_driver = {
.driver_features =
DRIVER_USE_AGP | DRIVER_ATOMIC |
DRIVER_GEM |
- DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ,
+ DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ |
+ DRIVER_SYNCOBJ_TIMELINE,
.load = amdgpu_driver_load_kms,
.open = amdgpu_driver_open_kms,
.postclose = amdgpu_driver_postclose_kms,
@@ -1410,8 +1416,6 @@ static struct drm_driver kms_driver = {
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_export = amdgpu_gem_prime_export,
.gem_prime_import = amdgpu_gem_prime_import,
- .gem_prime_get_sg_table = amdgpu_gem_prime_get_sg_table,
- .gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table,
.gem_prime_vmap = amdgpu_gem_prime_vmap,
.gem_prime_vunmap = amdgpu_gem_prime_vunmap,
.gem_prime_mmap = amdgpu_gem_prime_mmap,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
index 571a6dfb473e..61fcf247a638 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
@@ -37,12 +37,14 @@ amdgpu_link_encoder_connector(struct drm_device *dev)
{
struct amdgpu_device *adev = dev->dev_private;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector;
struct drm_encoder *encoder;
struct amdgpu_encoder *amdgpu_encoder;
+ drm_connector_list_iter_begin(dev, &iter);
/* walk the list and link encoders to connectors */
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_for_each_connector_iter(connector, &iter) {
amdgpu_connector = to_amdgpu_connector(connector);
list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
amdgpu_encoder = to_amdgpu_encoder(encoder);
@@ -55,6 +57,7 @@ amdgpu_link_encoder_connector(struct drm_device *dev)
}
}
}
+ drm_connector_list_iter_end(&iter);
}
void amdgpu_encoder_set_active_device(struct drm_encoder *encoder)
@@ -62,8 +65,10 @@ void amdgpu_encoder_set_active_device(struct drm_encoder *encoder)
struct drm_device *dev = encoder->dev;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
amdgpu_encoder->active_device = amdgpu_encoder->devices & amdgpu_connector->devices;
@@ -72,6 +77,7 @@ void amdgpu_encoder_set_active_device(struct drm_encoder *encoder)
amdgpu_connector->devices, encoder->encoder_type);
}
}
+ drm_connector_list_iter_end(&iter);
}
struct drm_connector *
@@ -79,15 +85,20 @@ amdgpu_get_connector_for_encoder(struct drm_encoder *encoder)
{
struct drm_device *dev = encoder->dev;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct drm_connector *connector;
+ struct drm_connector *connector, *found = NULL;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector;
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
amdgpu_connector = to_amdgpu_connector(connector);
- if (amdgpu_encoder->active_device & amdgpu_connector->devices)
- return connector;
+ if (amdgpu_encoder->active_device & amdgpu_connector->devices) {
+ found = connector;
+ break;
+ }
}
- return NULL;
+ drm_connector_list_iter_end(&iter);
+ return found;
}
struct drm_connector *
@@ -95,15 +106,20 @@ amdgpu_get_connector_for_encoder_init(struct drm_encoder *encoder)
{
struct drm_device *dev = encoder->dev;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct drm_connector *connector;
+ struct drm_connector *connector, *found = NULL;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector;
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
amdgpu_connector = to_amdgpu_connector(connector);
- if (amdgpu_encoder->devices & amdgpu_connector->devices)
- return connector;
+ if (amdgpu_encoder->devices & amdgpu_connector->devices) {
+ found = connector;
+ break;
+ }
}
- return NULL;
+ drm_connector_list_iter_end(&iter);
+ return found;
}
struct drm_encoder *amdgpu_get_external_encoder(struct drm_encoder *encoder)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 143753d237e7..2672dc64a310 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -69,7 +69,7 @@ amdgpufb_release(struct fb_info *info, int user)
return 0;
}
-static struct fb_ops amdgpufb_ops = {
+static const struct fb_ops amdgpufb_ops = {
.owner = THIS_MODULE,
DRM_FB_HELPER_DEFAULT_OPS,
.fb_open = amdgpufb_open,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 23085b352cf2..3c01252b1e0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -34,6 +34,7 @@
#include <linux/kref.h>
#include <linux/slab.h>
#include <linux/firmware.h>
+#include <linux/pm_runtime.h>
#include <drm/drm_debugfs.h>
@@ -154,7 +155,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
seq);
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
seq, flags | AMDGPU_FENCE_FLAG_INT);
-
+ pm_runtime_get_noresume(adev->ddev->dev);
ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
if (unlikely(rcu_dereference_protected(*ptr, 1))) {
struct dma_fence *old;
@@ -234,6 +235,7 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
bool amdgpu_fence_process(struct amdgpu_ring *ring)
{
struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ struct amdgpu_device *adev = ring->adev;
uint32_t seq, last_seq;
int r;
@@ -274,6 +276,8 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
BUG();
dma_fence_put(fence);
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
} while (last_seq != seq);
return true;
@@ -462,18 +466,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
timeout = adev->gfx_timeout;
break;
case AMDGPU_RING_TYPE_COMPUTE:
- /*
- * For non-sriov case, no timeout enforce
- * on compute ring by default. Unless user
- * specifies a timeout for compute ring.
- *
- * For sriov case, always use the timeout
- * as gfx ring
- */
- if (!amdgpu_sriov_vf(ring->adev))
- timeout = adev->compute_timeout;
- else
- timeout = adev->gfx_timeout;
+ timeout = adev->compute_timeout;
break;
case AMDGPU_RING_TYPE_SDMA:
timeout = adev->sdma_timeout;
@@ -748,10 +741,18 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data)
struct drm_info_node *node = (struct drm_info_node *) m->private;
struct drm_device *dev = node->minor->dev;
struct amdgpu_device *adev = dev->dev_private;
+ int r;
+
+ r = pm_runtime_get_sync(dev->dev);
+ if (r < 0)
+ return 0;
seq_printf(m, "gpu recover\n");
amdgpu_device_gpu_recover(adev, NULL);
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 5e8bdded265f..e01e681d2a60 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -71,7 +71,7 @@
*/
static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
{
- struct page *dummy_page = adev->mman.bdev.glob->dummy_read_page;
+ struct page *dummy_page = ttm_bo_glob.dummy_read_page;
if (adev->dummy_page_addr)
return 0;
@@ -302,6 +302,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
* @pages: number of pages to bind
* @pagelist: pages to bind
* @dma_addr: DMA addresses of pages
+ * @flags: page table entry flags
*
* Binds the requested pages to the gart page table
* (all asics).
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 8ceb44925947..4277125a79ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -175,7 +175,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
- r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates, false);
+ r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
if (r) {
dev_err(adev->dev, "leaking bo va because "
"we fail to reserve bo (%d)\n", r);
@@ -527,13 +527,41 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
goto error;
}
- r = amdgpu_vm_update_directories(adev, vm);
+ r = amdgpu_vm_update_pdes(adev, vm, false);
error:
if (r && r != -ERESTARTSYS)
DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
}
+/**
+ * amdgpu_gem_va_map_flags - map GEM UAPI flags into hardware flags
+ *
+ * @adev: amdgpu_device pointer
+ * @flags: GEM UAPI flags
+ *
+ * Returns the GEM UAPI flags mapped into hardware for the ASIC.
+ */
+uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags)
+{
+ uint64_t pte_flag = 0;
+
+ if (flags & AMDGPU_VM_PAGE_EXECUTABLE)
+ pte_flag |= AMDGPU_PTE_EXECUTABLE;
+ if (flags & AMDGPU_VM_PAGE_READABLE)
+ pte_flag |= AMDGPU_PTE_READABLE;
+ if (flags & AMDGPU_VM_PAGE_WRITEABLE)
+ pte_flag |= AMDGPU_PTE_WRITEABLE;
+ if (flags & AMDGPU_VM_PAGE_PRT)
+ pte_flag |= AMDGPU_PTE_PRT;
+
+ if (adev->gmc.gmc_funcs->map_mtype)
+ pte_flag |= amdgpu_gmc_map_mtype(adev,
+ flags & AMDGPU_VM_MTYPE_MASK);
+
+ return pte_flag;
+}
+
int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
@@ -613,7 +641,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
- r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates, false);
+ r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
if (r)
goto error_unref;
@@ -631,7 +659,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
switch (args->operation) {
case AMDGPU_VA_OP_MAP:
- va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
+ va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
args->offset_in_bo, args->map_size,
va_flags);
@@ -646,7 +674,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
args->map_size);
break;
case AMDGPU_VA_OP_REPLACE:
- va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
+ va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,
args->offset_in_bo, args->map_size,
va_flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
index 0b66d2e6b5d5..e0f025dd1b14 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
@@ -67,6 +67,7 @@ int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
+uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags);
int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index f9bef3154b99..0f960b498792 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -26,6 +26,7 @@
#include "amdgpu.h"
#include "amdgpu_gfx.h"
#include "amdgpu_rlc.h"
+#include "amdgpu_ras.h"
/* delay 0.1 second to enable gfx off feature */
#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
@@ -231,12 +232,10 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
{
- int i, queue, pipe, me;
+ int i, queue, me;
for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) {
queue = i % adev->gfx.me.num_queue_per_pipe;
- pipe = (i / adev->gfx.me.num_queue_per_pipe)
- % adev->gfx.me.num_pipe_per_me;
me = (i / adev->gfx.me.num_queue_per_pipe)
/ adev->gfx.me.num_pipe_per_me;
@@ -297,7 +296,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
spin_lock_init(&kiq->ring_lock);
- r = amdgpu_device_wb_get(adev, &adev->virt.reg_val_offs);
+ r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs);
if (r)
return r;
@@ -320,10 +319,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
return r;
}
-void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring,
- struct amdgpu_irq_src *irq)
+void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
{
- amdgpu_device_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
+ amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs);
amdgpu_ring_fini(ring);
}
@@ -456,8 +454,6 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
}
ring = &adev->gfx.kiq.ring;
- if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring)
- kfree(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]);
kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
amdgpu_bo_free_kernel(&ring->mqd_obj,
&ring->mqd_gpu_addr,
@@ -547,12 +543,6 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
return;
- if (!is_support_sw_smu(adev) &&
- (!adev->powerplay.pp_funcs ||
- !adev->powerplay.pp_funcs->set_powergating_by_smu))
- return;
-
-
mutex_lock(&adev->gfx.gfx_off_mutex);
if (!enable)
@@ -569,3 +559,194 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
mutex_unlock(&adev->gfx.gfx_off_mutex);
}
+
+int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev)
+{
+ int r;
+ struct ras_fs_if fs_info = {
+ .sysfs_name = "gfx_err_count",
+ .debugfs_name = "gfx_err_inject",
+ };
+ struct ras_ih_if ih_info = {
+ .cb = amdgpu_gfx_process_ras_data_cb,
+ };
+
+ if (!adev->gfx.ras_if) {
+ adev->gfx.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+ if (!adev->gfx.ras_if)
+ return -ENOMEM;
+ adev->gfx.ras_if->block = AMDGPU_RAS_BLOCK__GFX;
+ adev->gfx.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->gfx.ras_if->sub_block_index = 0;
+ strcpy(adev->gfx.ras_if->name, "gfx");
+ }
+ fs_info.head = ih_info.head = *adev->gfx.ras_if;
+
+ r = amdgpu_ras_late_init(adev, adev->gfx.ras_if,
+ &fs_info, &ih_info);
+ if (r)
+ goto free;
+
+ if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) {
+ r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
+ if (r)
+ goto late_fini;
+ } else {
+ /* free gfx ras_if if ras is not supported */
+ r = 0;
+ goto free;
+ }
+
+ return 0;
+late_fini:
+ amdgpu_ras_late_fini(adev, adev->gfx.ras_if, &ih_info);
+free:
+ kfree(adev->gfx.ras_if);
+ adev->gfx.ras_if = NULL;
+ return r;
+}
+
+void amdgpu_gfx_ras_fini(struct amdgpu_device *adev)
+{
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
+ adev->gfx.ras_if) {
+ struct ras_common_if *ras_if = adev->gfx.ras_if;
+ struct ras_ih_if ih_info = {
+ .head = *ras_if,
+ .cb = amdgpu_gfx_process_ras_data_cb,
+ };
+
+ amdgpu_ras_late_fini(adev, ras_if, &ih_info);
+ kfree(ras_if);
+ }
+}
+
+int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
+ void *err_data,
+ struct amdgpu_iv_entry *entry)
+{
+ /* TODO ue will trigger an interrupt.
+ *
+ * When “Full RAS” is enabled, the per-IP interrupt sources should
+ * be disabled and the driver should only look for the aggregated
+ * interrupt via sync flood
+ */
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
+ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+ if (adev->gfx.funcs->query_ras_error_count)
+ adev->gfx.funcs->query_ras_error_count(adev, err_data);
+ amdgpu_ras_reset_gpu(adev);
+ }
+ return AMDGPU_RAS_SUCCESS;
+}
+
+int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ struct ras_common_if *ras_if = adev->gfx.ras_if;
+ struct ras_dispatch_if ih_data = {
+ .entry = entry,
+ };
+
+ if (!ras_if)
+ return 0;
+
+ ih_data.head = *ras_if;
+
+ DRM_ERROR("CP ECC ERROR IRQ\n");
+ amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+ return 0;
+}
+
+uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
+{
+ signed long r, cnt = 0;
+ unsigned long flags;
+ uint32_t seq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_ring *ring = &kiq->ring;
+
+ BUG_ON(!ring->funcs->emit_rreg);
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ amdgpu_ring_alloc(ring, 32);
+ amdgpu_ring_emit_rreg(ring, reg);
+ amdgpu_fence_emit_polling(ring, &seq);
+ amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+ /* don't wait anymore for gpu reset case because this way may
+ * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+ * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+ * never return if we keep waiting in virt_kiq_rreg, which cause
+ * gpu_recover() hang there.
+ *
+ * also don't wait anymore for IRQ context
+ * */
+ if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
+ goto failed_kiq_read;
+
+ might_sleep();
+ while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+ msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+ }
+
+ if (cnt > MAX_KIQ_REG_TRY)
+ goto failed_kiq_read;
+
+ return adev->wb.wb[kiq->reg_val_offs];
+
+failed_kiq_read:
+ pr_err("failed to read reg:%x\n", reg);
+ return ~0;
+}
+
+void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
+{
+ signed long r, cnt = 0;
+ unsigned long flags;
+ uint32_t seq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_ring *ring = &kiq->ring;
+
+ BUG_ON(!ring->funcs->emit_wreg);
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ amdgpu_ring_alloc(ring, 32);
+ amdgpu_ring_emit_wreg(ring, reg, v);
+ amdgpu_fence_emit_polling(ring, &seq);
+ amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+ /* don't wait anymore for gpu reset case because this way may
+ * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+ * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+ * never return if we keep waiting in virt_kiq_rreg, which cause
+ * gpu_recover() hang there.
+ *
+ * also don't wait anymore for IRQ context
+ * */
+ if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
+ goto failed_kiq_write;
+
+ might_sleep();
+ while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+
+ msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+ }
+
+ if (cnt > MAX_KIQ_REG_TRY)
+ goto failed_kiq_write;
+
+ return;
+
+failed_kiq_write:
+ pr_err("failed to write reg:%x\n", reg);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 6ee4021910e2..ca17ffb01301 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -76,11 +76,15 @@ struct kiq_pm4_funcs {
struct amdgpu_ring *ring,
u64 addr,
u64 seq);
+ void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub);
/* Packet sizes */
int set_resources_size;
int map_queues_size;
int unmap_queues_size;
int query_status_size;
+ int invalidate_tlbs_size;
};
struct amdgpu_kiq {
@@ -90,6 +94,7 @@ struct amdgpu_kiq {
struct amdgpu_ring ring;
struct amdgpu_irq_src irq;
const struct kiq_pm4_funcs *pmf;
+ uint32_t reg_val_offs;
};
/*
@@ -201,28 +206,6 @@ struct amdgpu_gfx_funcs {
int (*query_ras_error_count) (struct amdgpu_device *adev, void *ras_error_status);
};
-struct amdgpu_ngg_buf {
- struct amdgpu_bo *bo;
- uint64_t gpu_addr;
- uint32_t size;
- uint32_t bo_size;
-};
-
-enum {
- NGG_PRIM = 0,
- NGG_POS,
- NGG_CNTL,
- NGG_PARAM,
- NGG_BUF_MAX
-};
-
-struct amdgpu_ngg {
- struct amdgpu_ngg_buf buf[NGG_BUF_MAX];
- uint32_t gds_reserve_addr;
- uint32_t gds_reserve_size;
- bool init;
-};
-
struct sq_work {
struct work_struct work;
unsigned ih_data;
@@ -247,7 +230,7 @@ struct amdgpu_me {
uint32_t num_me;
uint32_t num_pipe_per_me;
uint32_t num_queue_per_pipe;
- void *mqd_backup[AMDGPU_MAX_GFX_RINGS + 1];
+ void *mqd_backup[AMDGPU_MAX_GFX_RINGS];
/* These are the resources for which amdgpu takes ownership */
DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
@@ -289,9 +272,14 @@ struct amdgpu_gfx {
uint32_t mec2_feature_version;
bool mec_fw_write_wait;
bool me_fw_write_wait;
+ bool cp_fw_write_wait;
struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS];
+ struct drm_gpu_scheduler *gfx_sched[AMDGPU_MAX_GFX_RINGS];
+ uint32_t num_gfx_sched;
unsigned num_gfx_rings;
struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
+ struct drm_gpu_scheduler *compute_sched[AMDGPU_MAX_COMPUTE_RINGS];
+ uint32_t num_compute_sched;
unsigned num_compute_rings;
struct amdgpu_irq_src eop_irq;
struct amdgpu_irq_src priv_reg_irq;
@@ -311,9 +299,6 @@ struct amdgpu_gfx {
uint32_t grbm_soft_reset;
uint32_t srbm_soft_reset;
- /* NGG */
- struct amdgpu_ngg ngg;
-
/* gfx off */
bool gfx_off_state; /* true: enabled, false: disabled */
struct mutex gfx_off_mutex;
@@ -355,8 +340,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq);
-void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring,
- struct amdgpu_irq_src *irq);
+void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring);
void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev);
int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
@@ -384,5 +368,14 @@ void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me,
int pipe, int queue);
void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
-
+int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev);
+void amdgpu_gfx_ras_fini(struct amdgpu_device *adev);
+int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
+ void *err_data,
+ struct amdgpu_iv_entry *entry);
+int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry);
+uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);
+void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 5790db61fa2c..5884ab590486 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -27,6 +27,8 @@
#include <linux/io-64-nonatomic-lo-hi.h>
#include "amdgpu.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_xgmi.h"
/**
* amdgpu_gmc_get_pde_for_bo - get the PDE for a BO
@@ -221,7 +223,7 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
u64 size_af, size_bf;
if (amdgpu_sriov_vf(adev)) {
- mc->agp_start = 0xffffffff;
+ mc->agp_start = 0xffffffffffff;
mc->agp_end = 0x0;
mc->agp_size = 0;
@@ -305,3 +307,69 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
gmc->fault_hash[hash].idx = gmc->last_fault++;
return false;
}
+
+int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->umc.funcs && adev->umc.funcs->ras_late_init) {
+ r = adev->umc.funcs->ras_late_init(adev);
+ if (r)
+ return r;
+ }
+
+ if (adev->mmhub.funcs && adev->mmhub.funcs->ras_late_init) {
+ r = adev->mmhub.funcs->ras_late_init(adev);
+ if (r)
+ return r;
+ }
+
+ return amdgpu_xgmi_ras_late_init(adev);
+}
+
+void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
+{
+ amdgpu_umc_ras_fini(adev);
+ amdgpu_mmhub_ras_fini(adev);
+ amdgpu_xgmi_ras_fini(adev);
+}
+
+ /*
+ * The latest engine allocation on gfx9/10 is:
+ * Engine 2, 3: firmware
+ * Engine 0, 1, 4~16: amdgpu ring,
+ * subject to change when ring number changes
+ * Engine 17: Gart flushes
+ */
+#define GFXHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
+#define MMHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
+
+int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
+ {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP,
+ GFXHUB_FREE_VM_INV_ENGS_BITMAP};
+ unsigned i;
+ unsigned vmhub, inv_eng;
+
+ for (i = 0; i < adev->num_rings; ++i) {
+ ring = adev->rings[i];
+ vmhub = ring->funcs->vmhub;
+
+ inv_eng = ffs(vm_inv_engs[vmhub]);
+ if (!inv_eng) {
+ dev_err(adev->dev, "no VM inv eng for ring %s\n",
+ ring->name);
+ return -EINVAL;
+ }
+
+ ring->vm_inv_eng = inv_eng - 1;
+ vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng);
+
+ dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
+ ring->name, ring->vm_inv_eng, ring->funcs->vmhub);
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index b6e1d98ef01e..d3c27a3c43f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -77,6 +77,7 @@ struct amdgpu_gmc_fault {
struct amdgpu_vmhub {
uint32_t ctx0_ptb_addr_lo32;
uint32_t ctx0_ptb_addr_hi32;
+ uint32_t vm_inv_eng0_sem;
uint32_t vm_inv_eng0_req;
uint32_t vm_inv_eng0_ack;
uint32_t vm_context0_cntl;
@@ -91,6 +92,9 @@ struct amdgpu_gmc_funcs {
/* flush the vm tlb via mmio */
void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type);
+ /* flush the vm tlb via pasid */
+ int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
+ uint32_t flush_type, bool all_hub);
/* flush the vm tlb via ring */
uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
uint64_t pd_addr);
@@ -99,12 +103,15 @@ struct amdgpu_gmc_funcs {
unsigned pasid);
/* enable/disable PRT support */
void (*set_prt)(struct amdgpu_device *adev, bool enable);
- /* set pte flags based per asic */
- uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev,
- uint32_t flags);
+ /* map mtype to hardware flags */
+ uint64_t (*map_mtype)(struct amdgpu_device *adev, uint32_t flags);
/* get the pde for a given mc addr */
void (*get_vm_pde)(struct amdgpu_device *adev, int level,
u64 *dst, u64 *flags);
+ /* get the pte flags to use for a BO VA mapping */
+ void (*get_vm_pte)(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t *flags);
};
struct amdgpu_xgmi {
@@ -120,21 +127,52 @@ struct amdgpu_xgmi {
/* gpu list in the same hive */
struct list_head head;
bool supported;
+ struct ras_common_if *ras_if;
};
struct amdgpu_gmc {
+ /* FB's physical address in MMIO space (for CPU to
+ * map FB). This is different compared to the agp/
+ * gart/vram_start/end field as the later is from
+ * GPU's view and aper_base is from CPU's view.
+ */
resource_size_t aper_size;
resource_size_t aper_base;
/* for some chips with <= 32MB we need to lie
* about vram size near mc fb location */
u64 mc_vram_size;
u64 visible_vram_size;
+ /* AGP aperture start and end in MC address space
+ * Driver find a hole in the MC address space
+ * to place AGP by setting MC_VM_AGP_BOT/TOP registers
+ * Under VMID0, logical address == MC address. AGP
+ * aperture maps to physical bus or IOVA addressed.
+ * AGP aperture is used to simulate FB in ZFB case.
+ * AGP aperture is also used for page table in system
+ * memory (mainly for APU).
+ *
+ */
u64 agp_size;
u64 agp_start;
u64 agp_end;
+ /* GART aperture start and end in MC address space
+ * Driver find a hole in the MC address space
+ * to place GART by setting VM_CONTEXT0_PAGE_TABLE_START/END_ADDR
+ * registers
+ * Under VMID0, logical address inside GART aperture will
+ * be translated through gpuvm gart page table to access
+ * paged system memory
+ */
u64 gart_size;
u64 gart_start;
u64 gart_end;
+ /* Frame buffer aperture of this GPU device. Different from
+ * fb_start (see below), this only covers the local GPU device.
+ * Driver get fb_start from MC_VM_FB_LOCATION_BASE (set by vbios)
+ * and calculate vram_start of this local device by adding an
+ * offset inside the XGMI hive.
+ * Under VMID0, logical address == MC address
+ */
u64 vram_start;
u64 vram_end;
/* FB region , it's same as local vram region in single GPU, in XGMI
@@ -153,6 +191,7 @@ struct amdgpu_gmc {
uint32_t fw_version;
struct amdgpu_irq_src vm_fault;
uint32_t vram_type;
+ uint8_t vram_vendor;
uint32_t srbm_soft_reset;
bool prt_warning;
uint64_t stolen_size;
@@ -177,15 +216,17 @@ struct amdgpu_gmc {
struct amdgpu_xgmi xgmi;
struct amdgpu_irq_src ecc_irq;
- struct ras_common_if *umc_ras_if;
- struct ras_common_if *mmhub_ras_if;
};
#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
+#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
+ ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
+ ((adev), (pasid), (type), (allhub)))
#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
+#define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
-#define amdgpu_gmc_get_pte_flags(adev, flags) (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags))
+#define amdgpu_gmc_get_vm_pte(adev, mapping, flags) (adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags))
/**
* amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR
@@ -230,5 +271,8 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc);
bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
uint16_t pasid, uint64_t timestamp);
+int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
+void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
+int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 53734da1c2df..3a67f6c046d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -206,7 +206,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
int r;
if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait))
- return amdgpu_sync_fence(adev, sync, ring->vmid_wait, false);
+ return amdgpu_sync_fence(sync, ring->vmid_wait, false);
fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
if (!fences)
@@ -241,7 +241,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
return -ENOMEM;
}
- r = amdgpu_sync_fence(adev, sync, &array->base, false);
+ r = amdgpu_sync_fence(sync, &array->base, false);
dma_fence_put(ring->vmid_wait);
ring->vmid_wait = &array->base;
return r;
@@ -282,7 +282,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
!dma_fence_is_later(updates, (*id)->flushed_updates))
updates = NULL;
- if ((*id)->owner != vm->entity.fence_context ||
+ if ((*id)->owner != vm->direct.fence_context ||
job->vm_pd_addr != (*id)->pd_gpu_addr ||
updates || !(*id)->last_flush ||
((*id)->last_flush->context != fence_context &&
@@ -294,7 +294,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
if (tmp) {
*id = NULL;
- r = amdgpu_sync_fence(adev, sync, tmp, false);
+ r = amdgpu_sync_fence(sync, tmp, false);
return r;
}
needs_flush = true;
@@ -303,7 +303,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
/* Good we can use this VMID. Remember this submission as
* user of the VMID.
*/
- r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false);
+ r = amdgpu_sync_fence(&(*id)->active, fence, false);
if (r)
return r;
@@ -349,7 +349,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
struct dma_fence *flushed;
/* Check all the prerequisites to using this VMID */
- if ((*id)->owner != vm->entity.fence_context)
+ if ((*id)->owner != vm->direct.fence_context)
continue;
if ((*id)->pd_gpu_addr != job->vm_pd_addr)
@@ -375,7 +375,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
/* Good, we can use this VMID. Remember this submission as
* user of the VMID.
*/
- r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false);
+ r = amdgpu_sync_fence(&(*id)->active, fence, false);
if (r)
return r;
@@ -435,8 +435,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
id = idle;
/* Remember this submission as user of the VMID */
- r = amdgpu_sync_fence(ring->adev, &id->active,
- fence, false);
+ r = amdgpu_sync_fence(&id->active, fence, false);
if (r)
goto error;
@@ -449,7 +448,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
}
id->pd_gpu_addr = job->vm_pd_addr;
- id->owner = vm->entity.fence_context;
+ id->owner = vm->direct.fence_context;
if (job->vm_needs_flush) {
dma_fence_put(id->last_flush);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index 6d8f05511aba..111a301ce878 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -66,7 +66,6 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
if (ih->ring == NULL)
return -ENOMEM;
- memset((void *)ih->ring, 0, ih->ring_size + 8);
ih->gpu_addr = dma_addr;
ih->wptr_addr = dma_addr + ih->ring_size;
ih->wptr_cpu = &ih->ring[ih->ring_size / 4];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 2a3f5ec298db..5ed4227f304b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -55,6 +55,7 @@
#include "amdgpu_connectors.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_ras.h"
#include <linux/pm_runtime.h>
@@ -87,10 +88,13 @@ static void amdgpu_hotplug_work_func(struct work_struct *work)
struct drm_device *dev = adev->ddev;
struct drm_mode_config *mode_config = &dev->mode_config;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
mutex_lock(&mode_config->mutex);
- list_for_each_entry(connector, &mode_config->connector_list, head)
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter)
amdgpu_connector_hotplug(connector);
+ drm_connector_list_iter_end(&iter);
mutex_unlock(&mode_config->mutex);
/* Just fire off a uevent and let userspace tell us what to do */
drm_helper_hpd_irq_event(dev);
@@ -153,6 +157,22 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg)
ret = amdgpu_ih_process(adev, &adev->irq.ih);
if (ret == IRQ_HANDLED)
pm_runtime_mark_last_busy(dev->dev);
+
+ /* For the hardware that cannot enable bif ring for both ras_controller_irq
+ * and ras_err_evnet_athub_irq ih cookies, the driver has to poll status
+ * register to check whether the interrupt is triggered or not, and properly
+ * ack the interrupt if it is there
+ */
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF)) {
+ if (adev->nbio.funcs &&
+ adev->nbio.funcs->handle_ras_controller_intr_no_bifring)
+ adev->nbio.funcs->handle_ras_controller_intr_no_bifring(adev);
+
+ if (adev->nbio.funcs &&
+ adev->nbio.funcs->handle_ras_err_event_athub_intr_no_bifring)
+ adev->nbio.funcs->handle_ras_err_event_athub_intr_no_bifring(adev);
+ }
+
return ret;
}
@@ -228,10 +248,19 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
adev->irq.msi_enabled = false;
if (amdgpu_msi_ok(adev)) {
- int ret = pci_enable_msi(adev->pdev);
- if (!ret) {
+ int nvec = pci_msix_vec_count(adev->pdev);
+ unsigned int flags;
+
+ if (nvec <= 0) {
+ flags = PCI_IRQ_MSI;
+ } else {
+ flags = PCI_IRQ_MSI | PCI_IRQ_MSIX;
+ }
+ /* we only need one vector */
+ nvec = pci_alloc_irq_vectors(adev->pdev, 1, 1, flags);
+ if (nvec > 0) {
adev->irq.msi_enabled = true;
- dev_dbg(adev->dev, "amdgpu: using MSI.\n");
+ dev_dbg(adev->dev, "amdgpu: using MSI/MSI-X.\n");
}
}
@@ -254,7 +283,8 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
INIT_WORK(&adev->irq.ih2_work, amdgpu_irq_handle_ih2);
adev->irq.installed = true;
- r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq);
+ /* Use vector 0 for MSI-X */
+ r = drm_irq_install(adev->ddev, pci_irq_vector(adev->pdev, 0));
if (r) {
adev->irq.installed = false;
if (!amdgpu_device_has_dc_support(adev))
@@ -284,7 +314,7 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
drm_irq_uninstall(adev->ddev);
adev->irq.installed = false;
if (adev->irq.msi_enabled)
- pci_disable_msi(adev->pdev);
+ pci_free_irq_vectors(adev->pdev);
if (!amdgpu_device_has_dc_support(adev))
flush_work(&adev->hotplug_work);
}
@@ -369,7 +399,7 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev,
* amdgpu_irq_dispatch - dispatch IRQ to IP blocks
*
* @adev: amdgpu device pointer
- * @entry: interrupt vector pointer
+ * @ih: interrupt ring instance
*
* Dispatches IRQ to IP blocks.
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 9d76e0923a5a..d42be880a236 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -153,7 +153,6 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
if (r)
return r;
- job->owner = owner;
*f = dma_fence_get(&job->base.s_fence->finished);
amdgpu_job_free_resources(job);
priority = job->base.s_priority;
@@ -193,8 +192,7 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,
fence = amdgpu_sync_get_fence(&job->sync, &explicit);
if (fence && explicit) {
if (drm_sched_dependency_optimized(fence, s_entity)) {
- r = amdgpu_sync_fence(ring->adev, &job->sched_sync,
- fence, false);
+ r = amdgpu_sync_fence(&job->sched_sync, fence, false);
if (r)
DRM_ERROR("Error adding fence (%d)\n", r);
}
@@ -218,7 +216,7 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
struct amdgpu_ring *ring = to_amdgpu_ring(sched_job->sched);
struct dma_fence *fence = NULL, *finished;
struct amdgpu_job *job;
- int r;
+ int r = 0;
job = to_amdgpu_job(sched_job);
finished = &job->base.s_fence->finished;
@@ -243,9 +241,49 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
job->fence = dma_fence_get(fence);
amdgpu_job_free_resources(job);
+
+ fence = r ? ERR_PTR(r) : fence;
return fence;
}
+#define to_drm_sched_job(sched_job) \
+ container_of((sched_job), struct drm_sched_job, queue_node)
+
+void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
+{
+ struct drm_sched_job *s_job;
+ struct drm_sched_entity *s_entity = NULL;
+ int i;
+
+ /* Signal all jobs not yet scheduled */
+ for (i = DRM_SCHED_PRIORITY_MAX - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
+ struct drm_sched_rq *rq = &sched->sched_rq[i];
+
+ if (!rq)
+ continue;
+
+ spin_lock(&rq->lock);
+ list_for_each_entry(s_entity, &rq->entities, list) {
+ while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) {
+ struct drm_sched_fence *s_fence = s_job->s_fence;
+
+ dma_fence_signal(&s_fence->scheduled);
+ dma_fence_set_error(&s_fence->finished, -EHWPOISON);
+ dma_fence_signal(&s_fence->finished);
+ }
+ }
+ spin_unlock(&rq->lock);
+ }
+
+ /* Signal all jobs already scheduled to HW */
+ list_for_each_entry(s_job, &sched->ring_mirror_list, node) {
+ struct drm_sched_fence *s_fence = s_job->s_fence;
+
+ dma_fence_set_error(&s_fence->finished, -EHWPOISON);
+ dma_fence_signal(&s_fence->finished);
+ }
+}
+
const struct drm_sched_backend_ops amdgpu_sched_ops = {
.dependency = amdgpu_job_dependency,
.run_job = amdgpu_job_run,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 51e62504c279..3f7b8433d179 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -49,7 +49,6 @@ struct amdgpu_job {
uint32_t preamble_status;
uint32_t preemption_status;
uint32_t num_ibs;
- void *owner;
bool vm_needs_flush;
uint64_t vm_pd_addr;
unsigned vmid;
@@ -76,4 +75,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
void *owner, struct dma_fence **f);
int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
struct dma_fence **fence);
+
+void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
new file mode 100644
index 000000000000..5727f00afc8e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15d.h"
+#include "soc15_common.h"
+
+#define JPEG_IDLE_TIMEOUT msecs_to_jiffies(1000)
+
+static void amdgpu_jpeg_idle_work_handler(struct work_struct *work);
+
+int amdgpu_jpeg_sw_init(struct amdgpu_device *adev)
+{
+ INIT_DELAYED_WORK(&adev->jpeg.idle_work, amdgpu_jpeg_idle_work_handler);
+
+ return 0;
+}
+
+int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev)
+{
+ int i;
+
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec);
+ }
+
+ return 0;
+}
+
+int amdgpu_jpeg_suspend(struct amdgpu_device *adev)
+{
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+ return 0;
+}
+
+int amdgpu_jpeg_resume(struct amdgpu_device *adev)
+{
+ return 0;
+}
+
+static void amdgpu_jpeg_idle_work_handler(struct work_struct *work)
+{
+ struct amdgpu_device *adev =
+ container_of(work, struct amdgpu_device, jpeg.idle_work.work);
+ unsigned int fences = 0;
+ unsigned int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec);
+ }
+
+ if (fences == 0)
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG,
+ AMD_PG_STATE_GATE);
+ else
+ schedule_delayed_work(&adev->jpeg.idle_work, JPEG_IDLE_TIMEOUT);
+}
+
+void amdgpu_jpeg_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ bool set_clocks = !cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+ if (set_clocks)
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG,
+ AMD_PG_STATE_UNGATE);
+}
+
+void amdgpu_jpeg_ring_end_use(struct amdgpu_ring *ring)
+{
+ schedule_delayed_work(&ring->adev->jpeg.idle_work, JPEG_IDLE_TIMEOUT);
+}
+
+int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ WREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD);
+ r = amdgpu_ring_alloc(ring, 3);
+ if (r)
+ return r;
+
+ amdgpu_ring_write(ring, PACKET0(adev->jpeg.internal.jpeg_pitch, 0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ return r;
+}
+
+static int amdgpu_jpeg_dec_set_reg(struct amdgpu_ring *ring, uint32_t handle,
+ struct dma_fence **fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
+ struct dma_fence *f = NULL;
+ const unsigned ib_size_dw = 16;
+ int i, r;
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+ if (r)
+ return r;
+
+ ib = &job->ibs[0];
+
+ ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch, 0, 0, PACKETJ_TYPE0);
+ ib->ptr[1] = 0xDEADBEEF;
+ for (i = 2; i < 16; i += 2) {
+ ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
+ ib->ptr[i+1] = 0;
+ }
+ ib->length_dw = 16;
+
+ r = amdgpu_job_submit_direct(job, ring, &f);
+ if (r)
+ goto err;
+
+ if (fence)
+ *fence = dma_fence_get(f);
+ dma_fence_put(f);
+
+ return 0;
+
+err:
+ amdgpu_job_free(job);
+ return r;
+}
+
+int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t tmp = 0;
+ unsigned i;
+ struct dma_fence *fence = NULL;
+ long r = 0;
+
+ r = amdgpu_jpeg_dec_set_reg(ring, 1, &fence);
+ if (r)
+ goto error;
+
+ r = dma_fence_wait_timeout(fence, false, timeout);
+ if (r == 0) {
+ r = -ETIMEDOUT;
+ goto error;
+ } else if (r < 0) {
+ goto error;
+ } else {
+ r = 0;
+ }
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch);
+ if (tmp == 0xDEADBEEF)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ dma_fence_put(fence);
+error:
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
new file mode 100644
index 000000000000..bd9ef9cc86de
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_JPEG_H__
+#define __AMDGPU_JPEG_H__
+
+#define AMDGPU_MAX_JPEG_INSTANCES 2
+
+#define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0)
+#define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1)
+
+struct amdgpu_jpeg_reg{
+ unsigned jpeg_pitch;
+};
+
+struct amdgpu_jpeg_inst {
+ struct amdgpu_ring ring_dec;
+ struct amdgpu_irq_src irq;
+ struct amdgpu_jpeg_reg external;
+};
+
+struct amdgpu_jpeg {
+ uint8_t num_jpeg_inst;
+ struct amdgpu_jpeg_inst inst[AMDGPU_MAX_JPEG_INSTANCES];
+ struct amdgpu_jpeg_reg internal;
+ struct drm_gpu_scheduler *jpeg_sched[AMDGPU_MAX_JPEG_INSTANCES];
+ uint32_t num_jpeg_sched;
+ unsigned harvest_config;
+ struct delayed_work idle_work;
+ enum amd_powergating_state cur_state;
+};
+
+int amdgpu_jpeg_sw_init(struct amdgpu_device *adev);
+int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev);
+int amdgpu_jpeg_suspend(struct amdgpu_device *adev);
+int amdgpu_jpeg_resume(struct amdgpu_device *adev);
+
+void amdgpu_jpeg_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_jpeg_ring_end_use(struct amdgpu_ring *ring);
+
+int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring);
+int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+
+#endif /*__AMDGPU_JPEG_H__*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index f2c097983f48..60591dbc2097 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -91,7 +91,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
if (amdgpu_sriov_vf(adev))
amdgpu_virt_request_full_gpu(adev, false);
- if (amdgpu_device_is_px(dev)) {
+ if (adev->runpm) {
pm_runtime_get_sync(dev->dev);
pm_runtime_forbid(dev->dev);
}
@@ -144,49 +144,13 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
struct amdgpu_device *adev;
int r, acpi_status;
-#ifdef CONFIG_DRM_AMDGPU_SI
- if (!amdgpu_si_support) {
- switch (flags & AMD_ASIC_MASK) {
- case CHIP_TAHITI:
- case CHIP_PITCAIRN:
- case CHIP_VERDE:
- case CHIP_OLAND:
- case CHIP_HAINAN:
- dev_info(dev->dev,
- "SI support provided by radeon.\n");
- dev_info(dev->dev,
- "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n"
- );
- return -ENODEV;
- }
- }
-#endif
-#ifdef CONFIG_DRM_AMDGPU_CIK
- if (!amdgpu_cik_support) {
- switch (flags & AMD_ASIC_MASK) {
- case CHIP_KAVERI:
- case CHIP_BONAIRE:
- case CHIP_HAWAII:
- case CHIP_KABINI:
- case CHIP_MULLINS:
- dev_info(dev->dev,
- "CIK support provided by radeon.\n");
- dev_info(dev->dev,
- "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n"
- );
- return -ENODEV;
- }
- }
-#endif
-
adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL);
if (adev == NULL) {
return -ENOMEM;
}
dev->dev_private = (void *)adev;
- if ((amdgpu_runtime_pm != 0) &&
- amdgpu_has_atpx() &&
+ if (amdgpu_has_atpx() &&
(amdgpu_is_atpx_hybrid() ||
amdgpu_has_atpx_dgpu_power_cntl()) &&
((flags & AMD_IS_APU) == 0) &&
@@ -205,6 +169,13 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
goto out;
}
+ if (amdgpu_device_supports_boco(dev) &&
+ (amdgpu_runtime_pm != 0)) /* enable runpm by default */
+ adev->runpm = true;
+ else if (amdgpu_device_supports_baco(dev) &&
+ (amdgpu_runtime_pm > 0)) /* enable runpm if runpm=1 */
+ adev->runpm = true;
+
/* Call ACPI methods: require modeset init
* but failure is not fatal
*/
@@ -215,7 +186,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
"Error during ACPI methods call\n");
}
- if (amdgpu_device_is_px(dev)) {
+ if (adev->runpm) {
dev_pm_set_driver_flags(dev->dev, DPM_FLAG_NEVER_SKIP);
pm_runtime_use_autosuspend(dev->dev);
pm_runtime_set_autosuspend_delay(dev->dev, 5000);
@@ -225,11 +196,10 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
pm_runtime_put_autosuspend(dev->dev);
}
- amdgpu_register_gpu_instance(adev);
out:
if (r) {
/* balance pm_runtime_get_sync in amdgpu_driver_unload_kms */
- if (adev->rmmio && amdgpu_device_is_px(dev))
+ if (adev->rmmio && adev->runpm)
pm_runtime_put_noidle(dev->dev);
amdgpu_driver_unload_kms(dev);
}
@@ -329,6 +299,10 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
fw_info->ver = adev->dm.dmcu_fw_version;
fw_info->feature = 0;
break;
+ case AMDGPU_INFO_FW_DMCUB:
+ fw_info->ver = adev->dm.dmcub_fw_version;
+ fw_info->feature = 0;
+ break;
default:
return -EINVAL;
}
@@ -432,12 +406,14 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
ib_size_alignment = 1;
break;
case AMDGPU_HW_IP_VCN_JPEG:
- type = AMD_IP_BLOCK_TYPE_VCN;
- for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- if (adev->uvd.harvest_config & (1 << i))
+ type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ?
+ AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ if (adev->jpeg.harvest_config & (1 << i))
continue;
- if (adev->vcn.inst[i].ring_jpeg.sched.ready)
+ if (adev->jpeg.inst[i].ring_dec.sched.ready)
++num_rings;
}
ib_start_alignment = 16;
@@ -553,9 +529,12 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
break;
case AMDGPU_HW_IP_VCN_DEC:
case AMDGPU_HW_IP_VCN_ENC:
- case AMDGPU_HW_IP_VCN_JPEG:
type = AMD_IP_BLOCK_TYPE_VCN;
break;
+ case AMDGPU_HW_IP_VCN_JPEG:
+ type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ?
+ AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN;
+ break;
default:
return -EINVAL;
}
@@ -619,9 +598,12 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
struct drm_amdgpu_info_vram_gtt vram_gtt;
vram_gtt.vram_size = adev->gmc.real_vram_size -
- atomic64_read(&adev->vram_pin_size);
- vram_gtt.vram_cpu_accessible_size = adev->gmc.visible_vram_size -
- atomic64_read(&adev->visible_pin_size);
+ atomic64_read(&adev->vram_pin_size) -
+ AMDGPU_VM_RESERVED_VRAM;
+ vram_gtt.vram_cpu_accessible_size =
+ min(adev->gmc.visible_vram_size -
+ atomic64_read(&adev->visible_pin_size),
+ vram_gtt.vram_size);
vram_gtt.gtt_size = adev->mman.bdev.man[TTM_PL_TT].size;
vram_gtt.gtt_size *= PAGE_SIZE;
vram_gtt.gtt_size -= atomic64_read(&adev->gart_pin_size);
@@ -634,15 +616,18 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
memset(&mem, 0, sizeof(mem));
mem.vram.total_heap_size = adev->gmc.real_vram_size;
mem.vram.usable_heap_size = adev->gmc.real_vram_size -
- atomic64_read(&adev->vram_pin_size);
+ atomic64_read(&adev->vram_pin_size) -
+ AMDGPU_VM_RESERVED_VRAM;
mem.vram.heap_usage =
amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;
mem.cpu_accessible_vram.total_heap_size =
adev->gmc.visible_vram_size;
- mem.cpu_accessible_vram.usable_heap_size = adev->gmc.visible_vram_size -
- atomic64_read(&adev->visible_pin_size);
+ mem.cpu_accessible_vram.usable_heap_size =
+ min(adev->gmc.visible_vram_size -
+ atomic64_read(&adev->visible_pin_size),
+ mem.vram.usable_heap_size);
mem.cpu_accessible_vram.heap_usage =
amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
mem.cpu_accessible_vram.max_allocation =
@@ -685,15 +670,19 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
return -ENOMEM;
alloc_size = info->read_mmr_reg.count * sizeof(*regs);
- for (i = 0; i < info->read_mmr_reg.count; i++)
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < info->read_mmr_reg.count; i++) {
if (amdgpu_asic_read_register(adev, se_num, sh_num,
info->read_mmr_reg.dword_offset + i,
&regs[i])) {
DRM_DEBUG_KMS("unallowed offset %#x\n",
info->read_mmr_reg.dword_offset + i);
kfree(regs);
+ amdgpu_gfx_off_ctrl(adev, true);
return -EFAULT;
}
+ }
+ amdgpu_gfx_off_ctrl(adev, true);
n = copy_to_user(out, regs, min(size, alloc_size));
kfree(regs);
return n ? -EFAULT : 0;
@@ -714,10 +703,6 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
if (adev->pm.dpm_enabled) {
dev_info.max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 10;
dev_info.max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 10;
- } else if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev) &&
- adev->virt.ops->get_pp_clk) {
- dev_info.max_engine_clock = amdgpu_virt_get_sclk(adev, false) * 10;
- dev_info.max_memory_clock = amdgpu_virt_get_mclk(adev, false) * 10;
} else {
dev_info.max_engine_clock = adev->clock.default_sclk * 10;
dev_info.max_memory_clock = adev->clock.default_mclk * 10;
@@ -764,17 +749,6 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
dev_info.vce_harvest_config = adev->vce.harvest_config;
dev_info.gc_double_offchip_lds_buf =
adev->gfx.config.double_offchip_lds_buf;
-
- if (amdgpu_ngg) {
- dev_info.prim_buf_gpu_addr = adev->gfx.ngg.buf[NGG_PRIM].gpu_addr;
- dev_info.prim_buf_size = adev->gfx.ngg.buf[NGG_PRIM].size;
- dev_info.pos_buf_gpu_addr = adev->gfx.ngg.buf[NGG_POS].gpu_addr;
- dev_info.pos_buf_size = adev->gfx.ngg.buf[NGG_POS].size;
- dev_info.cntl_sb_buf_gpu_addr = adev->gfx.ngg.buf[NGG_CNTL].gpu_addr;
- dev_info.cntl_sb_buf_size = adev->gfx.ngg.buf[NGG_CNTL].size;
- dev_info.param_buf_gpu_addr = adev->gfx.ngg.buf[NGG_PARAM].gpu_addr;
- dev_info.param_buf_size = adev->gfx.ngg.buf[NGG_PARAM].size;
- }
dev_info.wave_front_size = adev->gfx.cu_info.wave_front_size;
dev_info.num_shader_visible_vgprs = adev->gfx.config.max_gprs;
dev_info.num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
@@ -1003,6 +977,12 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
/* Ensure IB tests are run on ring */
flush_delayed_work(&adev->delayed_init_work);
+
+ if (amdgpu_ras_intr_triggered()) {
+ DRM_ERROR("RAS Intr triggered, device disabled!!");
+ return -EHWPOISON;
+ }
+
file_priv->driver_priv = NULL;
r = pm_runtime_get_sync(dev->dev);
@@ -1425,6 +1405,14 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
seq_printf(m, "DMCU feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
+ /* DMCUB */
+ query_fw.fw_type = AMDGPU_INFO_FW_DMCUB;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "DMCUB feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
new file mode 100644
index 000000000000..676c48c02d77
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+
+int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev)
+{
+ int r;
+ struct ras_ih_if ih_info = {
+ .cb = NULL,
+ };
+ struct ras_fs_if fs_info = {
+ .sysfs_name = "mmhub_err_count",
+ .debugfs_name = "mmhub_err_inject",
+ };
+
+ if (!adev->mmhub.ras_if) {
+ adev->mmhub.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+ if (!adev->mmhub.ras_if)
+ return -ENOMEM;
+ adev->mmhub.ras_if->block = AMDGPU_RAS_BLOCK__MMHUB;
+ adev->mmhub.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->mmhub.ras_if->sub_block_index = 0;
+ strcpy(adev->mmhub.ras_if->name, "mmhub");
+ }
+ ih_info.head = fs_info.head = *adev->mmhub.ras_if;
+ r = amdgpu_ras_late_init(adev, adev->mmhub.ras_if,
+ &fs_info, &ih_info);
+ if (r || !amdgpu_ras_is_supported(adev, adev->mmhub.ras_if->block)) {
+ kfree(adev->mmhub.ras_if);
+ adev->mmhub.ras_if = NULL;
+ }
+
+ return r;
+}
+
+void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev)
+{
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) &&
+ adev->mmhub.ras_if) {
+ struct ras_common_if *ras_if = adev->mmhub.ras_if;
+ struct ras_ih_if ih_info = {
+ .cb = NULL,
+ };
+
+ amdgpu_ras_late_fini(adev, ras_if, &ih_info);
+ kfree(ras_if);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
index 2d75ecfa199b..1cd78940cf82 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
@@ -23,9 +23,17 @@
struct amdgpu_mmhub_funcs {
void (*ras_init)(struct amdgpu_device *adev);
+ int (*ras_late_init)(struct amdgpu_device *adev);
void (*query_ras_error_count)(struct amdgpu_device *adev,
void *ras_error_status);
};
+struct amdgpu_mmhub {
+ struct ras_common_if *ras_if;
+ const struct amdgpu_mmhub_funcs *funcs;
+};
+
+int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev);
+void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 31d4deb5d294..828b5167ff12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -51,438 +51,107 @@
#include "amdgpu_amdkfd.h"
/**
- * struct amdgpu_mn_node
+ * amdgpu_mn_invalidate_gfx - callback to notify about mm change
*
- * @it: interval node defining start-last of the affected address range
- * @bos: list of all BOs in the affected address range
- *
- * Manages all BOs which are affected of a certain range of address space.
- */
-struct amdgpu_mn_node {
- struct interval_tree_node it;
- struct list_head bos;
-};
-
-/**
- * amdgpu_mn_destroy - destroy the HMM mirror
- *
- * @work: previously sheduled work item
- *
- * Lazy destroys the notifier from a work item
- */
-static void amdgpu_mn_destroy(struct work_struct *work)
-{
- struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work);
- struct amdgpu_device *adev = amn->adev;
- struct amdgpu_mn_node *node, *next_node;
- struct amdgpu_bo *bo, *next_bo;
-
- mutex_lock(&adev->mn_lock);
- down_write(&amn->lock);
- hash_del(&amn->node);
- rbtree_postorder_for_each_entry_safe(node, next_node,
- &amn->objects.rb_root, it.rb) {
- list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) {
- bo->mn = NULL;
- list_del_init(&bo->mn_list);
- }
- kfree(node);
- }
- up_write(&amn->lock);
- mutex_unlock(&adev->mn_lock);
-
- hmm_mirror_unregister(&amn->mirror);
- kfree(amn);
-}
-
-/**
- * amdgpu_hmm_mirror_release - callback to notify about mm destruction
- *
- * @mirror: the HMM mirror (mm) this callback is about
- *
- * Shedule a work item to lazy destroy HMM mirror.
- */
-static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
-{
- struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
-
- INIT_WORK(&amn->work, amdgpu_mn_destroy);
- schedule_work(&amn->work);
-}
-
-/**
- * amdgpu_mn_lock - take the write side lock for this notifier
- *
- * @mn: our notifier
- */
-void amdgpu_mn_lock(struct amdgpu_mn *mn)
-{
- if (mn)
- down_write(&mn->lock);
-}
-
-/**
- * amdgpu_mn_unlock - drop the write side lock for this notifier
- *
- * @mn: our notifier
- */
-void amdgpu_mn_unlock(struct amdgpu_mn *mn)
-{
- if (mn)
- up_write(&mn->lock);
-}
-
-/**
- * amdgpu_mn_read_lock - take the read side lock for this notifier
- *
- * @amn: our notifier
- */
-static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
-{
- if (blockable)
- down_read(&amn->lock);
- else if (!down_read_trylock(&amn->lock))
- return -EAGAIN;
-
- return 0;
-}
-
-/**
- * amdgpu_mn_read_unlock - drop the read side lock for this notifier
- *
- * @amn: our notifier
- */
-static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn)
-{
- up_read(&amn->lock);
-}
-
-/**
- * amdgpu_mn_invalidate_node - unmap all BOs of a node
- *
- * @node: the node with the BOs to unmap
- * @start: start of address range affected
- * @end: end of address range affected
+ * @mni: the range (mm) is about to update
+ * @range: details on the invalidation
+ * @cur_seq: Value to pass to mmu_interval_set_seq()
*
* Block for operations on BOs to finish and mark pages as accessed and
* potentially dirty.
*/
-static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
- unsigned long start,
- unsigned long end)
+static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
- struct amdgpu_bo *bo;
+ struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
long r;
- list_for_each_entry(bo, &node->bos, mn_list) {
-
- if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end))
- continue;
-
- r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv,
- true, false, MAX_SCHEDULE_TIMEOUT);
- if (r <= 0)
- DRM_ERROR("(%ld) failed to wait for user bo\n", r);
- }
-}
-
-/**
- * amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change
- *
- * @mirror: the hmm_mirror (mm) is about to update
- * @update: the update start, end address
- *
- * Block for operations on BOs to finish and mark pages as accessed and
- * potentially dirty.
- */
-static int
-amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
- const struct mmu_notifier_range *update)
-{
- struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
- unsigned long start = update->start;
- unsigned long end = update->end;
- bool blockable = mmu_notifier_range_blockable(update);
- struct interval_tree_node *it;
+ if (!mmu_notifier_range_blockable(range))
+ return false;
- /* notification is exclusive, but interval is inclusive */
- end -= 1;
+ mutex_lock(&adev->notifier_lock);
- /* TODO we should be able to split locking for interval tree and
- * amdgpu_mn_invalidate_node
- */
- if (amdgpu_mn_read_lock(amn, blockable))
- return -EAGAIN;
+ mmu_interval_set_seq(mni, cur_seq);
- it = interval_tree_iter_first(&amn->objects, start, end);
- while (it) {
- struct amdgpu_mn_node *node;
-
- if (!blockable) {
- amdgpu_mn_read_unlock(amn);
- return -EAGAIN;
- }
-
- node = container_of(it, struct amdgpu_mn_node, it);
- it = interval_tree_iter_next(it, start, end);
-
- amdgpu_mn_invalidate_node(node, start, end);
- }
-
- amdgpu_mn_read_unlock(amn);
-
- return 0;
-}
-
-/**
- * amdgpu_mn_sync_pagetables_hsa - callback to notify about mm change
- *
- * @mirror: the hmm_mirror (mm) is about to update
- * @update: the update start, end address
- *
- * We temporarily evict all BOs between start and end. This
- * necessitates evicting all user-mode queues of the process. The BOs
- * are restorted in amdgpu_mn_invalidate_range_end_hsa.
- */
-static int
-amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
- const struct mmu_notifier_range *update)
-{
- struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
- unsigned long start = update->start;
- unsigned long end = update->end;
- bool blockable = mmu_notifier_range_blockable(update);
- struct interval_tree_node *it;
-
- /* notification is exclusive, but interval is inclusive */
- end -= 1;
-
- if (amdgpu_mn_read_lock(amn, blockable))
- return -EAGAIN;
-
- it = interval_tree_iter_first(&amn->objects, start, end);
- while (it) {
- struct amdgpu_mn_node *node;
- struct amdgpu_bo *bo;
-
- if (!blockable) {
- amdgpu_mn_read_unlock(amn);
- return -EAGAIN;
- }
-
- node = container_of(it, struct amdgpu_mn_node, it);
- it = interval_tree_iter_next(it, start, end);
-
- list_for_each_entry(bo, &node->bos, mn_list) {
- struct kgd_mem *mem = bo->kfd_bo;
-
- if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
- start, end))
- amdgpu_amdkfd_evict_userptr(mem, amn->mm);
- }
- }
-
- amdgpu_mn_read_unlock(amn);
-
- return 0;
+ r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, true, false,
+ MAX_SCHEDULE_TIMEOUT);
+ mutex_unlock(&adev->notifier_lock);
+ if (r <= 0)
+ DRM_ERROR("(%ld) failed to wait for user bo\n", r);
+ return true;
}
-/* Low bits of any reasonable mm pointer will be unused due to struct
- * alignment. Use these bits to make a unique key from the mm pointer
- * and notifier type.
- */
-#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
-
-static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
- [AMDGPU_MN_TYPE_GFX] = {
- .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
- .release = amdgpu_hmm_mirror_release
- },
- [AMDGPU_MN_TYPE_HSA] = {
- .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
- .release = amdgpu_hmm_mirror_release
- },
+static const struct mmu_interval_notifier_ops amdgpu_mn_gfx_ops = {
+ .invalidate = amdgpu_mn_invalidate_gfx,
};
/**
- * amdgpu_mn_get - create HMM mirror context
+ * amdgpu_mn_invalidate_hsa - callback to notify about mm change
*
- * @adev: amdgpu device pointer
- * @type: type of MMU notifier context
+ * @mni: the range (mm) is about to update
+ * @range: details on the invalidation
+ * @cur_seq: Value to pass to mmu_interval_set_seq()
*
- * Creates a HMM mirror context for current->mm.
+ * We temporarily evict the BO attached to this range. This necessitates
+ * evicting all user-mode queues of the process.
*/
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
- enum amdgpu_mn_type type)
+static bool amdgpu_mn_invalidate_hsa(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
- struct mm_struct *mm = current->mm;
- struct amdgpu_mn *amn;
- unsigned long key = AMDGPU_MN_KEY(mm, type);
- int r;
-
- mutex_lock(&adev->mn_lock);
- if (down_write_killable(&mm->mmap_sem)) {
- mutex_unlock(&adev->mn_lock);
- return ERR_PTR(-EINTR);
- }
-
- hash_for_each_possible(adev->mn_hash, amn, node, key)
- if (AMDGPU_MN_KEY(amn->mm, amn->type) == key)
- goto release_locks;
-
- amn = kzalloc(sizeof(*amn), GFP_KERNEL);
- if (!amn) {
- amn = ERR_PTR(-ENOMEM);
- goto release_locks;
- }
-
- amn->adev = adev;
- amn->mm = mm;
- init_rwsem(&amn->lock);
- amn->type = type;
- amn->objects = RB_ROOT_CACHED;
-
- amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
- r = hmm_mirror_register(&amn->mirror, mm);
- if (r)
- goto free_amn;
+ struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- hash_add(adev->mn_hash, &amn->node, AMDGPU_MN_KEY(mm, type));
+ if (!mmu_notifier_range_blockable(range))
+ return false;
-release_locks:
- up_write(&mm->mmap_sem);
- mutex_unlock(&adev->mn_lock);
+ mutex_lock(&adev->notifier_lock);
- return amn;
+ mmu_interval_set_seq(mni, cur_seq);
-free_amn:
- up_write(&mm->mmap_sem);
- mutex_unlock(&adev->mn_lock);
- kfree(amn);
+ amdgpu_amdkfd_evict_userptr(bo->kfd_bo, bo->notifier.mm);
+ mutex_unlock(&adev->notifier_lock);
- return ERR_PTR(r);
+ return true;
}
+static const struct mmu_interval_notifier_ops amdgpu_mn_hsa_ops = {
+ .invalidate = amdgpu_mn_invalidate_hsa,
+};
+
/**
* amdgpu_mn_register - register a BO for notifier updates
*
* @bo: amdgpu buffer object
* @addr: userptr addr we should monitor
*
- * Registers an HMM mirror for the given BO at the specified address.
+ * Registers a mmu_notifier for the given BO at the specified address.
* Returns 0 on success, -ERRNO if anything goes wrong.
*/
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
{
- unsigned long end = addr + amdgpu_bo_size(bo) - 1;
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- enum amdgpu_mn_type type =
- bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
- struct amdgpu_mn *amn;
- struct amdgpu_mn_node *node = NULL, *new_node;
- struct list_head bos;
- struct interval_tree_node *it;
-
- amn = amdgpu_mn_get(adev, type);
- if (IS_ERR(amn))
- return PTR_ERR(amn);
-
- new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
- if (!new_node)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&bos);
-
- down_write(&amn->lock);
-
- while ((it = interval_tree_iter_first(&amn->objects, addr, end))) {
- kfree(node);
- node = container_of(it, struct amdgpu_mn_node, it);
- interval_tree_remove(&node->it, &amn->objects);
- addr = min(it->start, addr);
- end = max(it->last, end);
- list_splice(&node->bos, &bos);
- }
-
- if (!node)
- node = new_node;
- else
- kfree(new_node);
-
- bo->mn = amn;
-
- node->it.start = addr;
- node->it.last = end;
- INIT_LIST_HEAD(&node->bos);
- list_splice(&bos, &node->bos);
- list_add(&bo->mn_list, &node->bos);
-
- interval_tree_insert(&node->it, &amn->objects);
-
- up_write(&amn->lock);
-
- return 0;
+ if (bo->kfd_bo)
+ return mmu_interval_notifier_insert(&bo->notifier, current->mm,
+ addr, amdgpu_bo_size(bo),
+ &amdgpu_mn_hsa_ops);
+ return mmu_interval_notifier_insert(&bo->notifier, current->mm, addr,
+ amdgpu_bo_size(bo),
+ &amdgpu_mn_gfx_ops);
}
/**
- * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
+ * amdgpu_mn_unregister - unregister a BO for notifier updates
*
* @bo: amdgpu buffer object
*
- * Remove any registration of HMM mirror updates from the buffer object.
+ * Remove any registration of mmu notifier updates from the buffer object.
*/
void amdgpu_mn_unregister(struct amdgpu_bo *bo)
{
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- struct amdgpu_mn *amn;
- struct list_head *head;
-
- mutex_lock(&adev->mn_lock);
-
- amn = bo->mn;
- if (amn == NULL) {
- mutex_unlock(&adev->mn_lock);
+ if (!bo->notifier.mm)
return;
- }
-
- down_write(&amn->lock);
-
- /* save the next list entry for later */
- head = bo->mn_list.next;
-
- bo->mn = NULL;
- list_del_init(&bo->mn_list);
-
- if (list_empty(head)) {
- struct amdgpu_mn_node *node;
-
- node = container_of(head, struct amdgpu_mn_node, bos);
- interval_tree_remove(&node->it, &amn->objects);
- kfree(node);
- }
-
- up_write(&amn->lock);
- mutex_unlock(&adev->mn_lock);
-}
-
-/* flags used by HMM internal, not related to CPU/GPU PTE flags */
-static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
- (1 << 0), /* HMM_PFN_VALID */
- (1 << 1), /* HMM_PFN_WRITE */
- 0 /* HMM_PFN_DEVICE_PRIVATE */
-};
-
-static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
- 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
- 0, /* HMM_PFN_NONE */
- 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
-};
-
-void amdgpu_hmm_init_range(struct hmm_range *range)
-{
- if (range) {
- range->flags = hmm_range_flags;
- range->values = hmm_range_values;
- range->pfn_shift = PAGE_SHIFT;
- }
+ mmu_interval_notifier_remove(&bo->notifier);
+ bo->notifier.mm = NULL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index b8ed68943625..a292238f75eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -30,63 +30,10 @@
#include <linux/workqueue.h>
#include <linux/interval_tree.h>
-enum amdgpu_mn_type {
- AMDGPU_MN_TYPE_GFX,
- AMDGPU_MN_TYPE_HSA,
-};
-
-/**
- * struct amdgpu_mn
- *
- * @adev: amdgpu device pointer
- * @mm: process address space
- * @type: type of MMU notifier
- * @work: destruction work item
- * @node: hash table node to find structure by adev and mn
- * @lock: rw semaphore protecting the notifier nodes
- * @objects: interval tree containing amdgpu_mn_nodes
- * @mirror: HMM mirror function support
- *
- * Data for each amdgpu device and process address space.
- */
-struct amdgpu_mn {
- /* constant after initialisation */
- struct amdgpu_device *adev;
- struct mm_struct *mm;
- enum amdgpu_mn_type type;
-
- /* only used on destruction */
- struct work_struct work;
-
- /* protected by adev->mn_lock */
- struct hlist_node node;
-
- /* objects protected by lock */
- struct rw_semaphore lock;
- struct rb_root_cached objects;
-
-#ifdef CONFIG_HMM_MIRROR
- /* HMM mirror */
- struct hmm_mirror mirror;
-#endif
-};
-
#if defined(CONFIG_HMM_MIRROR)
-void amdgpu_mn_lock(struct amdgpu_mn *mn);
-void amdgpu_mn_unlock(struct amdgpu_mn *mn);
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
- enum amdgpu_mn_type type);
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
void amdgpu_mn_unregister(struct amdgpu_bo *bo);
-void amdgpu_hmm_init_range(struct hmm_range *range);
#else
-static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
-static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
-static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
- enum amdgpu_mn_type type)
-{
- return NULL;
-}
static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
{
DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, "
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
new file mode 100644
index 000000000000..7d5c3a9de9ea
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+
+int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev)
+{
+ int r;
+ struct ras_ih_if ih_info = {
+ .cb = NULL,
+ };
+ struct ras_fs_if fs_info = {
+ .sysfs_name = "pcie_bif_err_count",
+ .debugfs_name = "pcie_bif_err_inject",
+ };
+
+ if (!adev->nbio.ras_if) {
+ adev->nbio.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+ if (!adev->nbio.ras_if)
+ return -ENOMEM;
+ adev->nbio.ras_if->block = AMDGPU_RAS_BLOCK__PCIE_BIF;
+ adev->nbio.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->nbio.ras_if->sub_block_index = 0;
+ strcpy(adev->nbio.ras_if->name, "pcie_bif");
+ }
+ ih_info.head = fs_info.head = *adev->nbio.ras_if;
+ r = amdgpu_ras_late_init(adev, adev->nbio.ras_if,
+ &fs_info, &ih_info);
+ if (r)
+ goto free;
+
+ if (amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) {
+ r = amdgpu_irq_get(adev, &adev->nbio.ras_controller_irq, 0);
+ if (r)
+ goto late_fini;
+ r = amdgpu_irq_get(adev, &adev->nbio.ras_err_event_athub_irq, 0);
+ if (r)
+ goto late_fini;
+ } else {
+ r = 0;
+ goto free;
+ }
+
+ return 0;
+late_fini:
+ amdgpu_ras_late_fini(adev, adev->nbio.ras_if, &ih_info);
+free:
+ kfree(adev->nbio.ras_if);
+ adev->nbio.ras_if = NULL;
+ return r;
+}
+
+void amdgpu_nbio_ras_fini(struct amdgpu_device *adev)
+{
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF) &&
+ adev->nbio.ras_if) {
+ struct ras_common_if *ras_if = adev->nbio.ras_if;
+ struct ras_ih_if ih_info = {
+ .cb = NULL,
+ };
+
+ amdgpu_ras_late_fini(adev, ras_if, &ih_info);
+ kfree(ras_if);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
new file mode 100644
index 000000000000..919bd566ba3c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_NBIO_H__
+#define __AMDGPU_NBIO_H__
+
+/*
+ * amdgpu nbio functions
+ */
+struct nbio_hdp_flush_reg {
+ u32 ref_and_mask_cp0;
+ u32 ref_and_mask_cp1;
+ u32 ref_and_mask_cp2;
+ u32 ref_and_mask_cp3;
+ u32 ref_and_mask_cp4;
+ u32 ref_and_mask_cp5;
+ u32 ref_and_mask_cp6;
+ u32 ref_and_mask_cp7;
+ u32 ref_and_mask_cp8;
+ u32 ref_and_mask_cp9;
+ u32 ref_and_mask_sdma0;
+ u32 ref_and_mask_sdma1;
+ u32 ref_and_mask_sdma2;
+ u32 ref_and_mask_sdma3;
+ u32 ref_and_mask_sdma4;
+ u32 ref_and_mask_sdma5;
+ u32 ref_and_mask_sdma6;
+ u32 ref_and_mask_sdma7;
+};
+
+struct amdgpu_nbio_funcs {
+ const struct nbio_hdp_flush_reg *hdp_flush_reg;
+ u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev);
+ u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev);
+ u32 (*get_pcie_index_offset)(struct amdgpu_device *adev);
+ u32 (*get_pcie_data_offset)(struct amdgpu_device *adev);
+ u32 (*get_rev_id)(struct amdgpu_device *adev);
+ void (*mc_access_enable)(struct amdgpu_device *adev, bool enable);
+ void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
+ u32 (*get_memsize)(struct amdgpu_device *adev);
+ void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index, int doorbell_size);
+ void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell,
+ int doorbell_index, int instance);
+ void (*enable_doorbell_aperture)(struct amdgpu_device *adev,
+ bool enable);
+ void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev,
+ bool enable);
+ void (*ih_doorbell_range)(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index);
+ void (*enable_doorbell_interrupt)(struct amdgpu_device *adev,
+ bool enable);
+ void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
+ bool enable);
+ void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev,
+ bool enable);
+ void (*get_clockgating_state)(struct amdgpu_device *adev,
+ u32 *flags);
+ void (*ih_control)(struct amdgpu_device *adev);
+ void (*init_registers)(struct amdgpu_device *adev);
+ void (*detect_hw_virt)(struct amdgpu_device *adev);
+ void (*remap_hdp_registers)(struct amdgpu_device *adev);
+ void (*handle_ras_controller_intr_no_bifring)(struct amdgpu_device *adev);
+ void (*handle_ras_err_event_athub_intr_no_bifring)(struct amdgpu_device *adev);
+ int (*init_ras_controller_interrupt)(struct amdgpu_device *adev);
+ int (*init_ras_err_event_athub_interrupt)(struct amdgpu_device *adev);
+ void (*query_ras_error_count)(struct amdgpu_device *adev,
+ void *ras_error_status);
+ int (*ras_late_init)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_nbio {
+ const struct nbio_hdp_flush_reg *hdp_flush_reg;
+ struct amdgpu_irq_src ras_controller_irq;
+ struct amdgpu_irq_src ras_err_event_athub_irq;
+ struct ras_common_if *ras_if;
+ const struct amdgpu_nbio_funcs *funcs;
+};
+
+int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev);
+void amdgpu_nbio_ras_fini(struct amdgpu_device *adev);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 1fead0e8b890..e3f16b49e970 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -343,6 +343,70 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
}
/**
+ * amdgpu_bo_create_kernel_at - create BO for kernel use at specific location
+ *
+ * @adev: amdgpu device object
+ * @offset: offset of the BO
+ * @size: size of the BO
+ * @domain: where to place it
+ * @bo_ptr: used to initialize BOs in structures
+ * @cpu_addr: optional CPU address mapping
+ *
+ * Creates a kernel BO at a specific offset in the address space of the domain.
+ *
+ * Returns:
+ * 0 on success, negative error code otherwise.
+ */
+int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
+ uint64_t offset, uint64_t size, uint32_t domain,
+ struct amdgpu_bo **bo_ptr, void **cpu_addr)
+{
+ struct ttm_operation_ctx ctx = { false, false };
+ unsigned int i;
+ int r;
+
+ offset &= PAGE_MASK;
+ size = ALIGN(size, PAGE_SIZE);
+
+ r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE, domain, bo_ptr,
+ NULL, cpu_addr);
+ if (r)
+ return r;
+
+ /*
+ * Remove the original mem node and create a new one at the request
+ * position.
+ */
+ if (cpu_addr)
+ amdgpu_bo_kunmap(*bo_ptr);
+
+ ttm_bo_mem_put(&(*bo_ptr)->tbo, &(*bo_ptr)->tbo.mem);
+
+ for (i = 0; i < (*bo_ptr)->placement.num_placement; ++i) {
+ (*bo_ptr)->placements[i].fpfn = offset >> PAGE_SHIFT;
+ (*bo_ptr)->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
+ }
+ r = ttm_bo_mem_space(&(*bo_ptr)->tbo, &(*bo_ptr)->placement,
+ &(*bo_ptr)->tbo.mem, &ctx);
+ if (r)
+ goto error;
+
+ if (cpu_addr) {
+ r = amdgpu_bo_kmap(*bo_ptr, cpu_addr);
+ if (r)
+ goto error;
+ }
+
+ amdgpu_bo_unreserve(*bo_ptr);
+ return 0;
+
+error:
+ amdgpu_bo_unreserve(*bo_ptr);
+ amdgpu_bo_unref(bo_ptr);
+ return r;
+}
+
+/**
* amdgpu_bo_free_kernel - free BO for kernel use
*
* @bo: amdgpu BO to free
@@ -451,9 +515,10 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
{
struct ttm_operation_ctx ctx = {
.interruptible = (bp->type != ttm_bo_type_kernel),
- .no_wait_gpu = false,
+ .no_wait_gpu = bp->no_wait_gpu,
.resv = bp->resv,
- .flags = TTM_OPT_FLAG_ALLOW_RES_EVICT
+ .flags = bp->type != ttm_bo_type_kernel ?
+ TTM_OPT_FLAG_ALLOW_RES_EVICT : 0
};
struct amdgpu_bo *bo;
unsigned long page_align, size = bp->size;
@@ -1058,7 +1123,10 @@ void amdgpu_bo_fini(struct amdgpu_device *adev)
int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
struct vm_area_struct *vma)
{
- return ttm_fbdev_mmap(vma, &bo->tbo);
+ if (vma->vm_pgoff != 0)
+ return -EACCES;
+
+ return ttm_bo_mmap_obj(vma, &bo->tbo);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 658f4c9779b7..36dec51d1ef1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -30,6 +30,9 @@
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
+#ifdef CONFIG_MMU_NOTIFIER
+#include <linux/mmu_notifier.h>
+#endif
#define AMDGPU_BO_INVALID_OFFSET LONG_MAX
#define AMDGPU_BO_MAX_PLACEMENTS 3
@@ -41,6 +44,7 @@ struct amdgpu_bo_param {
u32 preferred_domain;
u64 flags;
enum ttm_bo_type type;
+ bool no_wait_gpu;
struct dma_resv *resv;
};
@@ -100,10 +104,12 @@ struct amdgpu_bo {
struct ttm_bo_kmap_obj dma_buf_vmap;
struct amdgpu_mn *mn;
- union {
- struct list_head mn_list;
- struct list_head shadow_list;
- };
+
+#ifdef CONFIG_MMU_NOTIFIER
+ struct mmu_interval_notifier notifier;
+#endif
+
+ struct list_head shadow_list;
struct kgd_mem *kfd_bo;
};
@@ -237,6 +243,9 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
unsigned long size, int align,
u32 domain, struct amdgpu_bo **bo_ptr,
u64 *gpu_addr, void **cpu_addr);
+int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
+ uint64_t offset, uint64_t size, uint32_t domain,
+ struct amdgpu_bo **bo_ptr, void **cpu_addr);
void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
void **cpu_addr);
int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 03930313c263..b03b1eb7ba04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -37,6 +37,7 @@
#include <linux/hwmon.h>
#include <linux/hwmon-sysfs.h>
#include <linux/nospec.h>
+#include <linux/pm_runtime.h>
#include "hwmgr.h"
#define WIDTH_4K 3840
@@ -158,10 +159,18 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
enum amd_pm_state_type pm;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev)) {
if (adev->smu.ppt_funcs->get_current_power_state)
- pm = amdgpu_smu_get_current_power_state(adev);
+ pm = smu_get_current_power_state(&adev->smu);
else
pm = adev->pm.dpm.user_state;
} else if (adev->powerplay.pp_funcs->get_current_power_state) {
@@ -170,6 +179,9 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev,
pm = adev->pm.dpm.user_state;
}
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
return snprintf(buf, PAGE_SIZE, "%s\n",
(pm == POWER_STATE_TYPE_BATTERY) ? "battery" :
(pm == POWER_STATE_TYPE_BALANCED) ? "balanced" : "performance");
@@ -183,6 +195,10 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
enum amd_pm_state_type state;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return -EINVAL;
if (strncmp("battery", buf, strlen("battery")) == 0)
state = POWER_STATE_TYPE_BATTERY;
@@ -190,10 +206,12 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev,
state = POWER_STATE_TYPE_BALANCED;
else if (strncmp("performance", buf, strlen("performance")) == 0)
state = POWER_STATE_TYPE_PERFORMANCE;
- else {
- count = -EINVAL;
- goto fail;
- }
+ else
+ return -EINVAL;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev)) {
mutex_lock(&adev->pm.mutex);
@@ -206,12 +224,11 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev,
adev->pm.dpm.user_state = state;
mutex_unlock(&adev->pm.mutex);
- /* Can't set dpm state when the card is off */
- if (!(adev->flags & AMD_IS_PX) ||
- (ddev->switch_power_state == DRM_SWITCH_POWER_ON))
- amdgpu_pm_compute_clocks(adev);
+ amdgpu_pm_compute_clocks(adev);
}
-fail:
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
return count;
}
@@ -282,13 +299,14 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
enum amd_dpm_forced_level level = 0xff;
+ int ret;
- if (amdgpu_sriov_vf(adev))
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
return 0;
- if ((adev->flags & AMD_IS_PX) &&
- (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return snprintf(buf, PAGE_SIZE, "off\n");
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev))
level = smu_get_performance_level(&adev->smu);
@@ -297,6 +315,9 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
else
level = adev->pm.dpm.forced_level;
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
return snprintf(buf, PAGE_SIZE, "%s\n",
(level == AMD_DPM_FORCED_LEVEL_AUTO) ? "auto" :
(level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" :
@@ -320,9 +341,7 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
enum amd_dpm_forced_level current_level = 0xff;
int ret = 0;
- /* Can't force performance level when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
return -EINVAL;
if (strncmp("low", buf, strlen("low")) == 0) {
@@ -344,30 +363,23 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
} else if (strncmp("profile_peak", buf, strlen("profile_peak")) == 0) {
level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK;
} else {
- count = -EINVAL;
- goto fail;
+ return -EINVAL;
}
- /* handle sriov case here */
- if (amdgpu_sriov_vf(adev)) {
- if (amdgim_is_hwperf(adev) &&
- adev->virt.ops->force_dpm_level) {
- mutex_lock(&adev->pm.mutex);
- adev->virt.ops->force_dpm_level(adev, level);
- mutex_unlock(&adev->pm.mutex);
- return count;
- } else {
- return -EINVAL;
- }
- }
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev))
current_level = smu_get_performance_level(&adev->smu);
else if (adev->powerplay.pp_funcs->get_performance_level)
current_level = amdgpu_dpm_get_performance_level(adev);
- if (current_level == level)
+ if (current_level == level) {
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
return count;
+ }
/* profile_exit setting is valid only when current mode is in profile mode */
if (!(current_level & (AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD |
@@ -376,29 +388,40 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)) &&
(level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT)) {
pr_err("Currently not in any profile mode!\n");
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
return -EINVAL;
}
if (is_support_sw_smu(adev)) {
ret = smu_force_performance_level(&adev->smu, level);
- if (ret)
- count = -EINVAL;
+ if (ret) {
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+ return -EINVAL;
+ }
} else if (adev->powerplay.pp_funcs->force_performance_level) {
mutex_lock(&adev->pm.mutex);
if (adev->pm.dpm.thermal_active) {
- count = -EINVAL;
mutex_unlock(&adev->pm.mutex);
- goto fail;
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+ return -EINVAL;
}
ret = amdgpu_dpm_force_performance_level(adev, level);
- if (ret)
- count = -EINVAL;
- else
+ if (ret) {
+ mutex_unlock(&adev->pm.mutex);
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+ return -EINVAL;
+ } else {
adev->pm.dpm.forced_level = level;
+ }
mutex_unlock(&adev->pm.mutex);
}
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
-fail:
return count;
}
@@ -411,6 +434,10 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
struct pp_states_info data;
int i, buf_len, ret;
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev)) {
ret = smu_get_power_num_states(&adev->smu, &data);
if (ret)
@@ -418,6 +445,9 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
} else if (adev->powerplay.pp_funcs->get_pp_num_states)
amdgpu_dpm_get_pp_num_states(adev, &data);
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
buf_len = snprintf(buf, PAGE_SIZE, "states: %d\n", data.nums);
for (i = 0; i < data.nums; i++)
buf_len += snprintf(buf + buf_len, PAGE_SIZE, "%d %s\n", i,
@@ -440,6 +470,13 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
enum amd_pm_state_type pm = 0;
int i = 0, ret = 0;
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev)) {
pm = smu_get_current_power_state(smu);
ret = smu_get_power_num_states(smu, &data);
@@ -451,6 +488,9 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
amdgpu_dpm_get_pp_num_states(adev, &data);
}
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
for (i = 0; i < data.nums; i++) {
if (pm == data.states[i])
break;
@@ -469,6 +509,9 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
if (adev->pp_force_state_enabled)
return amdgpu_get_pp_cur_state(dev, attr, buf);
else
@@ -486,6 +529,9 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
unsigned long idx;
int ret;
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return -EINVAL;
+
if (strlen(buf) == 1)
adev->pp_force_state_enabled = false;
else if (is_support_sw_smu(adev))
@@ -495,14 +541,18 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
struct pp_states_info data;
ret = kstrtoul(buf, 0, &idx);
- if (ret || idx >= ARRAY_SIZE(data.states)) {
- count = -EINVAL;
- goto fail;
- }
+ if (ret || idx >= ARRAY_SIZE(data.states))
+ return -EINVAL;
+
idx = array_index_nospec(idx, ARRAY_SIZE(data.states));
amdgpu_dpm_get_pp_num_states(adev, &data);
state = data.states[idx];
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
/* only set user selected power states */
if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
state != POWER_STATE_TYPE_DEFAULT) {
@@ -510,8 +560,10 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
AMD_PP_TASK_ENABLE_USER_STATE, &state);
adev->pp_force_state_enabled = true;
}
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
}
-fail:
+
return count;
}
@@ -533,17 +585,32 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
char *table = NULL;
- int size;
+ int size, ret;
+
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev)) {
size = smu_sys_get_pp_table(&adev->smu, (void **)&table);
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
if (size < 0)
return size;
- }
- else if (adev->powerplay.pp_funcs->get_pp_table)
+ } else if (adev->powerplay.pp_funcs->get_pp_table) {
size = amdgpu_dpm_get_pp_table(adev, &table);
- else
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+ if (size < 0)
+ return size;
+ } else {
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
return 0;
+ }
if (size >= PAGE_SIZE)
size = PAGE_SIZE - 1;
@@ -562,13 +629,26 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
struct amdgpu_device *adev = ddev->dev_private;
int ret = 0;
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return -EINVAL;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev)) {
ret = smu_sys_set_pp_table(&adev->smu, (void *)buf, count);
- if (ret)
+ if (ret) {
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
return ret;
+ }
} else if (adev->powerplay.pp_funcs->set_pp_table)
amdgpu_dpm_set_pp_table(adev, buf, count);
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
return count;
}
@@ -654,6 +734,9 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
const char delimiter[3] = {' ', '\n', '\0'};
uint32_t type;
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
if (count > 127)
return -EINVAL;
@@ -689,18 +772,28 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
tmp_str++;
}
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev)) {
ret = smu_od_edit_dpm_table(&adev->smu, type,
parameter, parameter_size);
- if (ret)
+ if (ret) {
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
return -EINVAL;
+ }
} else {
if (adev->powerplay.pp_funcs->odn_edit_dpm_table) {
ret = amdgpu_dpm_odn_edit_dpm_table(adev, type,
parameter, parameter_size);
- if (ret)
+ if (ret) {
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
return -EINVAL;
+ }
}
if (type == PP_OD_COMMIT_DPM_TABLE) {
@@ -708,12 +801,18 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
amdgpu_dpm_dispatch_task(adev,
AMD_PP_TASK_READJUST_POWER_STATE,
NULL);
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
return count;
} else {
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
return -EINVAL;
}
}
}
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
return count;
}
@@ -724,24 +823,33 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
- uint32_t size = 0;
+ ssize_t size;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev)) {
size = smu_print_clk_levels(&adev->smu, SMU_OD_SCLK, buf);
size += smu_print_clk_levels(&adev->smu, SMU_OD_MCLK, buf+size);
size += smu_print_clk_levels(&adev->smu, SMU_OD_VDDC_CURVE, buf+size);
size += smu_print_clk_levels(&adev->smu, SMU_OD_RANGE, buf+size);
- return size;
} else if (adev->powerplay.pp_funcs->print_clock_levels) {
size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf);
size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size);
size += amdgpu_dpm_print_clock_levels(adev, OD_VDDC_CURVE, buf+size);
size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf+size);
- return size;
} else {
- return snprintf(buf, PAGE_SIZE, "\n");
+ size = snprintf(buf, PAGE_SIZE, "\n");
}
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+ return size;
}
/**
@@ -770,21 +878,36 @@ static ssize_t amdgpu_set_pp_feature_status(struct device *dev,
uint64_t featuremask;
int ret;
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
ret = kstrtou64(buf, 0, &featuremask);
if (ret)
return -EINVAL;
pr_debug("featuremask = 0x%llx\n", featuremask);
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev)) {
ret = smu_sys_set_pp_feature_mask(&adev->smu, featuremask);
- if (ret)
+ if (ret) {
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
return -EINVAL;
+ }
} else if (adev->powerplay.pp_funcs->set_ppfeature_status) {
ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask);
- if (ret)
+ if (ret) {
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
return -EINVAL;
+ }
}
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
return count;
}
@@ -795,18 +918,31 @@ static ssize_t amdgpu_get_pp_feature_status(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
+ ssize_t size;
+ int ret;
- if (is_support_sw_smu(adev)) {
- return smu_sys_get_pp_feature_mask(&adev->smu, buf);
- } else if (adev->powerplay.pp_funcs->get_ppfeature_status)
- return amdgpu_dpm_get_ppfeature_status(adev, buf);
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
+ if (is_support_sw_smu(adev))
+ size = smu_sys_get_pp_feature_mask(&adev->smu, buf);
+ else if (adev->powerplay.pp_funcs->get_ppfeature_status)
+ size = amdgpu_dpm_get_ppfeature_status(adev, buf);
+ else
+ size = snprintf(buf, PAGE_SIZE, "\n");
- return snprintf(buf, PAGE_SIZE, "\n");
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
+ return size;
}
/**
- * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_socclk pp_dpm_fclk pp_dpm_dcefclk
- * pp_dpm_pcie
+ * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_socclk pp_dpm_fclk pp_dpm_dcefclk pp_dpm_pcie
*
* The amdgpu driver provides a sysfs API for adjusting what power levels
* are enabled for a given power state. The files pp_dpm_sclk, pp_dpm_mclk,
@@ -822,9 +958,15 @@ static ssize_t amdgpu_get_pp_feature_status(struct device *dev,
*
* To manually adjust these states, first select manual using
* power_dpm_force_performance_level.
- * Secondly,Enter a new value for each level by inputing a string that
+ * Secondly, enter a new value for each level by inputing a string that
* contains " echo xx xx xx > pp_dpm_sclk/mclk/pcie"
- * E.g., echo 4 5 6 to > pp_dpm_sclk will enable sclk levels 4, 5, and 6.
+ * E.g.,
+ *
+ * .. code-block:: bash
+ *
+ * echo "4 5 6" > pp_dpm_sclk
+ *
+ * will enable sclk levels 4, 5, and 6.
*
* NOTE: change to the dcefclk max dpm level is not supported now
*/
@@ -835,17 +977,27 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
+ ssize_t size;
+ int ret;
- if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev) &&
- adev->virt.ops->get_pp_clk)
- return adev->virt.ops->get_pp_clk(adev, PP_SCLK, buf);
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev))
- return smu_print_clk_levels(&adev->smu, SMU_SCLK, buf);
+ size = smu_print_clk_levels(&adev->smu, SMU_SCLK, buf);
else if (adev->powerplay.pp_funcs->print_clock_levels)
- return amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf);
+ size = amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf);
else
- return snprintf(buf, PAGE_SIZE, "\n");
+ size = snprintf(buf, PAGE_SIZE, "\n");
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
+ return size;
}
/*
@@ -894,18 +1046,25 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
int ret;
uint32_t mask = 0;
- if (amdgpu_sriov_vf(adev))
- return 0;
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return -EINVAL;
ret = amdgpu_read_mask(buf, count, &mask);
if (ret)
return ret;
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev))
- ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask);
+ ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask, true);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
if (ret)
return -EINVAL;
@@ -918,17 +1077,27 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
+ ssize_t size;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
- if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev) &&
- adev->virt.ops->get_pp_clk)
- return adev->virt.ops->get_pp_clk(adev, PP_MCLK, buf);
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev))
- return smu_print_clk_levels(&adev->smu, SMU_MCLK, buf);
+ size = smu_print_clk_levels(&adev->smu, SMU_MCLK, buf);
else if (adev->powerplay.pp_funcs->print_clock_levels)
- return amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf);
+ size = amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf);
else
- return snprintf(buf, PAGE_SIZE, "\n");
+ size = snprintf(buf, PAGE_SIZE, "\n");
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
+ return size;
}
static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
@@ -938,21 +1107,28 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
- int ret;
uint32_t mask = 0;
+ int ret;
- if (amdgpu_sriov_vf(adev))
- return 0;
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return -EINVAL;
ret = amdgpu_read_mask(buf, count, &mask);
if (ret)
return ret;
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev))
- ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask);
+ ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask, true);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
if (ret)
return -EINVAL;
@@ -965,13 +1141,27 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
+ ssize_t size;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev))
- return smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf);
+ size = smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf);
else if (adev->powerplay.pp_funcs->print_clock_levels)
- return amdgpu_dpm_print_clock_levels(adev, PP_SOCCLK, buf);
+ size = amdgpu_dpm_print_clock_levels(adev, PP_SOCCLK, buf);
else
- return snprintf(buf, PAGE_SIZE, "\n");
+ size = snprintf(buf, PAGE_SIZE, "\n");
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
+ return size;
}
static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
@@ -984,14 +1174,26 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
int ret;
uint32_t mask = 0;
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return -EINVAL;
+
ret = amdgpu_read_mask(buf, count, &mask);
if (ret)
return ret;
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev))
- ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask);
+ ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask, true);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_SOCCLK, mask);
+ else
+ ret = 0;
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
if (ret)
return -EINVAL;
@@ -1005,13 +1207,27 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
+ ssize_t size;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev))
- return smu_print_clk_levels(&adev->smu, SMU_FCLK, buf);
+ size = smu_print_clk_levels(&adev->smu, SMU_FCLK, buf);
else if (adev->powerplay.pp_funcs->print_clock_levels)
- return amdgpu_dpm_print_clock_levels(adev, PP_FCLK, buf);
+ size = amdgpu_dpm_print_clock_levels(adev, PP_FCLK, buf);
else
- return snprintf(buf, PAGE_SIZE, "\n");
+ size = snprintf(buf, PAGE_SIZE, "\n");
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
+ return size;
}
static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
@@ -1024,14 +1240,26 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
int ret;
uint32_t mask = 0;
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return -EINVAL;
+
ret = amdgpu_read_mask(buf, count, &mask);
if (ret)
return ret;
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev))
- ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask);
+ ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask, true);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_FCLK, mask);
+ else
+ ret = 0;
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
if (ret)
return -EINVAL;
@@ -1045,13 +1273,27 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
+ ssize_t size;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev))
- return smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf);
+ size = smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf);
else if (adev->powerplay.pp_funcs->print_clock_levels)
- return amdgpu_dpm_print_clock_levels(adev, PP_DCEFCLK, buf);
+ size = amdgpu_dpm_print_clock_levels(adev, PP_DCEFCLK, buf);
else
- return snprintf(buf, PAGE_SIZE, "\n");
+ size = snprintf(buf, PAGE_SIZE, "\n");
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
+ return size;
}
static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
@@ -1064,14 +1306,26 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
int ret;
uint32_t mask = 0;
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
ret = amdgpu_read_mask(buf, count, &mask);
if (ret)
return ret;
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev))
- ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask);
+ ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask, true);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_DCEFCLK, mask);
+ else
+ ret = 0;
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
if (ret)
return -EINVAL;
@@ -1085,13 +1339,27 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
+ ssize_t size;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev))
- return smu_print_clk_levels(&adev->smu, SMU_PCIE, buf);
+ size = smu_print_clk_levels(&adev->smu, SMU_PCIE, buf);
else if (adev->powerplay.pp_funcs->print_clock_levels)
- return amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf);
+ size = amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf);
else
- return snprintf(buf, PAGE_SIZE, "\n");
+ size = snprintf(buf, PAGE_SIZE, "\n");
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
+ return size;
}
static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
@@ -1104,14 +1372,26 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
int ret;
uint32_t mask = 0;
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return -EINVAL;
+
ret = amdgpu_read_mask(buf, count, &mask);
if (ret)
return ret;
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev))
- ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask);
+ ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask, true);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
+ else
+ ret = 0;
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
if (ret)
return -EINVAL;
@@ -1126,12 +1406,23 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
uint32_t value = 0;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev))
value = smu_get_od_percentage(&(adev->smu), SMU_OD_SCLK);
else if (adev->powerplay.pp_funcs->get_sclk_od)
value = amdgpu_dpm_get_sclk_od(adev);
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
return snprintf(buf, PAGE_SIZE, "%d\n", value);
}
@@ -1145,12 +1436,17 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
int ret;
long int value;
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
ret = kstrtol(buf, 0, &value);
- if (ret) {
- count = -EINVAL;
- goto fail;
- }
+ if (ret)
+ return -EINVAL;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev)) {
value = smu_set_od_percentage(&(adev->smu), SMU_OD_SCLK, (uint32_t)value);
@@ -1166,7 +1462,9 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
}
}
-fail:
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
return count;
}
@@ -1177,12 +1475,23 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
uint32_t value = 0;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev))
value = smu_get_od_percentage(&(adev->smu), SMU_OD_MCLK);
else if (adev->powerplay.pp_funcs->get_mclk_od)
value = amdgpu_dpm_get_mclk_od(adev);
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
return snprintf(buf, PAGE_SIZE, "%d\n", value);
}
@@ -1196,12 +1505,17 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
int ret;
long int value;
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
ret = kstrtol(buf, 0, &value);
- if (ret) {
- count = -EINVAL;
- goto fail;
- }
+ if (ret)
+ return -EINVAL;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev)) {
value = smu_set_od_percentage(&(adev->smu), SMU_OD_MCLK, (uint32_t)value);
@@ -1217,7 +1531,9 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
}
}
-fail:
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
return count;
}
@@ -1247,13 +1563,27 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
+ ssize_t size;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev))
- return smu_get_power_profile_mode(&adev->smu, buf);
+ size = smu_get_power_profile_mode(&adev->smu, buf);
else if (adev->powerplay.pp_funcs->get_power_profile_mode)
- return amdgpu_dpm_get_power_profile_mode(adev, buf);
+ size = amdgpu_dpm_get_power_profile_mode(adev, buf);
+ else
+ size = snprintf(buf, PAGE_SIZE, "\n");
- return snprintf(buf, PAGE_SIZE, "\n");
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
+ return size;
}
@@ -1278,7 +1608,10 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
tmp[1] = '\0';
ret = kstrtol(tmp, 0, &profile_mode);
if (ret)
- goto fail;
+ return -EINVAL;
+
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return -EINVAL;
if (profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
if (count < 2 || count > 127)
@@ -1290,23 +1623,30 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
while (tmp_str[0]) {
sub_str = strsep(&tmp_str, delimiter);
ret = kstrtol(sub_str, 0, &parameter[parameter_size]);
- if (ret) {
- count = -EINVAL;
- goto fail;
- }
+ if (ret)
+ return -EINVAL;
parameter_size++;
while (isspace(*tmp_str))
tmp_str++;
}
}
parameter[parameter_size] = profile_mode;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev))
- ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size);
+ ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size, true);
else if (adev->powerplay.pp_funcs->set_power_profile_mode)
ret = amdgpu_dpm_set_power_profile_mode(adev, parameter, parameter_size);
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
if (!ret)
return count;
-fail:
+
return -EINVAL;
}
@@ -1326,10 +1666,20 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev,
struct amdgpu_device *adev = ddev->dev_private;
int r, value, size = sizeof(value);
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
+ r = pm_runtime_get_sync(ddev->dev);
+ if (r < 0)
+ return r;
+
/* read the IP busy sensor */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD,
(void *)&value, &size);
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
if (r)
return r;
@@ -1352,10 +1702,20 @@ static ssize_t amdgpu_get_memory_busy_percent(struct device *dev,
struct amdgpu_device *adev = ddev->dev_private;
int r, value, size = sizeof(value);
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
+ r = pm_runtime_get_sync(ddev->dev);
+ if (r < 0)
+ return r;
+
/* read the IP busy sensor */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD,
(void *)&value, &size);
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
if (r)
return r;
@@ -1381,8 +1741,20 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
uint64_t count0, count1;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
+ ret = pm_runtime_get_sync(ddev->dev);
+ if (ret < 0)
+ return ret;
amdgpu_asic_get_pcie_usage(adev, &count0, &count1);
+
+ pm_runtime_mark_last_busy(ddev->dev);
+ pm_runtime_put_autosuspend(ddev->dev);
+
return snprintf(buf, PAGE_SIZE, "%llu %llu %i\n",
count0, count1, pcie_get_mps(adev->pdev));
}
@@ -1404,6 +1776,9 @@ static ssize_t amdgpu_get_unique_id(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
if (adev->unique_id)
return snprintf(buf, PAGE_SIZE, "%016llx\n", adev->unique_id);
@@ -1467,42 +1842,43 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
char *buf)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
- struct drm_device *ddev = adev->ddev;
int channel = to_sensor_dev_attr(attr)->index;
int r, temp = 0, size = sizeof(temp);
- /* Can't get temperature when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return -EINVAL;
-
if (channel >= PP_TEMP_MAX)
return -EINVAL;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
+
switch (channel) {
case PP_TEMP_JUNCTION:
/* get current junction temperature */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP,
(void *)&temp, &size);
- if (r)
- return r;
break;
case PP_TEMP_EDGE:
/* get current edge temperature */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP,
(void *)&temp, &size);
- if (r)
- return r;
break;
case PP_TEMP_MEM:
/* get current memory temperature */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP,
(void *)&temp, &size);
- if (r)
- return r;
+ break;
+ default:
+ r = -EINVAL;
break;
}
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
+ if (r)
+ return r;
+
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
}
@@ -1598,15 +1974,27 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
u32 pwm_mode = 0;
+ int ret;
+
+ ret = pm_runtime_get_sync(adev->ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev)) {
pwm_mode = smu_get_fan_control_mode(&adev->smu);
} else {
- if (!adev->powerplay.pp_funcs->get_fan_control_mode)
+ if (!adev->powerplay.pp_funcs->get_fan_control_mode) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return -EINVAL;
+ }
pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
}
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
return sprintf(buf, "%i\n", pwm_mode);
}
@@ -1616,27 +2004,32 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
size_t count)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
- int err;
+ int err, ret;
int value;
- /* Can't adjust fan when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return -EINVAL;
-
err = kstrtoint(buf, 10, &value);
if (err)
return err;
+ ret = pm_runtime_get_sync(adev->ddev->dev);
+ if (ret < 0)
+ return ret;
+
if (is_support_sw_smu(adev)) {
smu_set_fan_control_mode(&adev->smu, value);
} else {
- if (!adev->powerplay.pp_funcs->set_fan_control_mode)
+ if (!adev->powerplay.pp_funcs->set_fan_control_mode) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return -EINVAL;
+ }
amdgpu_dpm_set_fan_control_mode(adev, value);
}
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
return count;
}
@@ -1663,34 +2056,43 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
u32 value;
u32 pwm_mode;
- /* Can't adjust fan when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return -EINVAL;
+ err = pm_runtime_get_sync(adev->ddev->dev);
+ if (err < 0)
+ return err;
+
if (is_support_sw_smu(adev))
pwm_mode = smu_get_fan_control_mode(&adev->smu);
else
pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
+
if (pwm_mode != AMD_FAN_CTRL_MANUAL) {
pr_info("manual fan speed control should be enabled first\n");
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return -EINVAL;
}
err = kstrtou32(buf, 10, &value);
- if (err)
+ if (err) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return err;
+ }
value = (value * 100) / 255;
- if (is_support_sw_smu(adev)) {
+ if (is_support_sw_smu(adev))
err = smu_set_fan_speed_percent(&adev->smu, value);
- if (err)
- return err;
- } else if (adev->powerplay.pp_funcs->set_fan_speed_percent) {
+ else if (adev->powerplay.pp_funcs->set_fan_speed_percent)
err = amdgpu_dpm_set_fan_speed_percent(adev, value);
- if (err)
- return err;
- }
+ else
+ err = -EINVAL;
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
+ if (err)
+ return err;
return count;
}
@@ -1703,20 +2105,22 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
int err;
u32 speed = 0;
- /* Can't adjust fan when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return -EINVAL;
+ err = pm_runtime_get_sync(adev->ddev->dev);
+ if (err < 0)
+ return err;
- if (is_support_sw_smu(adev)) {
+ if (is_support_sw_smu(adev))
err = smu_get_fan_speed_percent(&adev->smu, &speed);
- if (err)
- return err;
- } else if (adev->powerplay.pp_funcs->get_fan_speed_percent) {
+ else if (adev->powerplay.pp_funcs->get_fan_speed_percent)
err = amdgpu_dpm_get_fan_speed_percent(adev, &speed);
- if (err)
- return err;
- }
+ else
+ err = -EINVAL;
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
+ if (err)
+ return err;
speed = (speed * 255) / 100;
@@ -1731,20 +2135,22 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
int err;
u32 speed = 0;
- /* Can't adjust fan when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return -EINVAL;
+ err = pm_runtime_get_sync(adev->ddev->dev);
+ if (err < 0)
+ return err;
- if (is_support_sw_smu(adev)) {
+ if (is_support_sw_smu(adev))
err = smu_get_fan_speed_rpm(&adev->smu, &speed);
- if (err)
- return err;
- } else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) {
+ else if (adev->powerplay.pp_funcs->get_fan_speed_rpm)
err = amdgpu_dpm_get_fan_speed_rpm(adev, &speed);
- if (err)
- return err;
- }
+ else
+ err = -EINVAL;
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
+ if (err)
+ return err;
return sprintf(buf, "%i\n", speed);
}
@@ -1758,8 +2164,16 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev,
u32 size = sizeof(min_rpm);
int r;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
+
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM,
(void *)&min_rpm, &size);
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
if (r)
return r;
@@ -1775,8 +2189,16 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev,
u32 size = sizeof(max_rpm);
int r;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
+
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM,
(void *)&max_rpm, &size);
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
if (r)
return r;
@@ -1791,20 +2213,22 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
int err;
u32 rpm = 0;
- /* Can't adjust fan when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return -EINVAL;
+ err = pm_runtime_get_sync(adev->ddev->dev);
+ if (err < 0)
+ return err;
- if (is_support_sw_smu(adev)) {
+ if (is_support_sw_smu(adev))
err = smu_get_fan_speed_rpm(&adev->smu, &rpm);
- if (err)
- return err;
- } else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) {
+ else if (adev->powerplay.pp_funcs->get_fan_speed_rpm)
err = amdgpu_dpm_get_fan_speed_rpm(adev, &rpm);
- if (err)
- return err;
- }
+ else
+ err = -EINVAL;
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
+ if (err)
+ return err;
return sprintf(buf, "%i\n", rpm);
}
@@ -1818,32 +2242,40 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
u32 value;
u32 pwm_mode;
+ err = pm_runtime_get_sync(adev->ddev->dev);
+ if (err < 0)
+ return err;
+
if (is_support_sw_smu(adev))
pwm_mode = smu_get_fan_control_mode(&adev->smu);
else
pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
- if (pwm_mode != AMD_FAN_CTRL_MANUAL)
+ if (pwm_mode != AMD_FAN_CTRL_MANUAL) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return -ENODATA;
-
- /* Can't adjust fan when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return -EINVAL;
+ }
err = kstrtou32(buf, 10, &value);
- if (err)
+ if (err) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return err;
+ }
- if (is_support_sw_smu(adev)) {
+ if (is_support_sw_smu(adev))
err = smu_set_fan_speed_rpm(&adev->smu, value);
- if (err)
- return err;
- } else if (adev->powerplay.pp_funcs->set_fan_speed_rpm) {
+ else if (adev->powerplay.pp_funcs->set_fan_speed_rpm)
err = amdgpu_dpm_set_fan_speed_rpm(adev, value);
- if (err)
- return err;
- }
+ else
+ err = -EINVAL;
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
+ if (err)
+ return err;
return count;
}
@@ -1854,15 +2286,27 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
u32 pwm_mode = 0;
+ int ret;
+
+ ret = pm_runtime_get_sync(adev->ddev->dev);
+ if (ret < 0)
+ return ret;
if (is_support_sw_smu(adev)) {
pwm_mode = smu_get_fan_control_mode(&adev->smu);
} else {
- if (!adev->powerplay.pp_funcs->get_fan_control_mode)
+ if (!adev->powerplay.pp_funcs->get_fan_control_mode) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return -EINVAL;
+ }
pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
}
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
return sprintf(buf, "%i\n", pwm_mode == AMD_FAN_CTRL_AUTO ? 0 : 1);
}
@@ -1876,12 +2320,6 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
int value;
u32 pwm_mode;
- /* Can't adjust fan when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return -EINVAL;
-
-
err = kstrtoint(buf, 10, &value);
if (err)
return err;
@@ -1893,14 +2331,24 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
else
return -EINVAL;
+ err = pm_runtime_get_sync(adev->ddev->dev);
+ if (err < 0)
+ return err;
+
if (is_support_sw_smu(adev)) {
smu_set_fan_control_mode(&adev->smu, pwm_mode);
} else {
- if (!adev->powerplay.pp_funcs->set_fan_control_mode)
+ if (!adev->powerplay.pp_funcs->set_fan_control_mode) {
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
return -EINVAL;
+ }
amdgpu_dpm_set_fan_control_mode(adev, pwm_mode);
}
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
return count;
}
@@ -1909,18 +2357,20 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev,
char *buf)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
- struct drm_device *ddev = adev->ddev;
u32 vddgfx;
int r, size = sizeof(vddgfx);
- /* Can't get voltage when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return -EINVAL;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
/* get the voltage */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX,
(void *)&vddgfx, &size);
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
if (r)
return r;
@@ -1939,7 +2389,6 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
char *buf)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
- struct drm_device *ddev = adev->ddev;
u32 vddnb;
int r, size = sizeof(vddnb);
@@ -1947,14 +2396,17 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
if (!(adev->flags & AMD_IS_APU))
return -EINVAL;
- /* Can't get voltage when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return -EINVAL;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
/* get the voltage */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB,
(void *)&vddnb, &size);
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
if (r)
return r;
@@ -1973,19 +2425,21 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
char *buf)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
- struct drm_device *ddev = adev->ddev;
u32 query = 0;
int r, size = sizeof(u32);
unsigned uw;
- /* Can't get power when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return -EINVAL;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
/* get the voltage */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER,
(void *)&query, &size);
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
if (r)
return r;
@@ -2008,16 +2462,27 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
uint32_t limit = 0;
+ ssize_t size;
+ int r;
+
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
if (is_support_sw_smu(adev)) {
- smu_get_power_limit(&adev->smu, &limit, true);
- return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
+ smu_get_power_limit(&adev->smu, &limit, true, true);
+ size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
} else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) {
adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, true);
- return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
+ size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
} else {
- return snprintf(buf, PAGE_SIZE, "\n");
+ size = snprintf(buf, PAGE_SIZE, "\n");
}
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
+ return size;
}
static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
@@ -2026,16 +2491,27 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
uint32_t limit = 0;
+ ssize_t size;
+ int r;
+
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
if (is_support_sw_smu(adev)) {
- smu_get_power_limit(&adev->smu, &limit, false);
- return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
+ smu_get_power_limit(&adev->smu, &limit, false, true);
+ size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
} else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) {
adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, false);
- return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
+ size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
} else {
- return snprintf(buf, PAGE_SIZE, "\n");
+ size = snprintf(buf, PAGE_SIZE, "\n");
}
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
+ return size;
}
@@ -2048,19 +2524,29 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
int err;
u32 value;
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
err = kstrtou32(buf, 10, &value);
if (err)
return err;
value = value / 1000000; /* convert to Watt */
- if (is_support_sw_smu(adev)) {
+
+ err = pm_runtime_get_sync(adev->ddev->dev);
+ if (err < 0)
+ return err;
+
+ if (is_support_sw_smu(adev))
err = smu_set_power_limit(&adev->smu, value);
- } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) {
+ else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit)
err = adev->powerplay.pp_funcs->set_power_limit(adev->powerplay.pp_handle, value);
- } else {
+ else
err = -EINVAL;
- }
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
if (err)
return err;
@@ -2073,18 +2559,20 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev,
char *buf)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
- struct drm_device *ddev = adev->ddev;
uint32_t sclk;
int r, size = sizeof(sclk);
- /* Can't get voltage when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return -EINVAL;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
/* get the sclk */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK,
(void *)&sclk, &size);
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
if (r)
return r;
@@ -2103,18 +2591,20 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev,
char *buf)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
- struct drm_device *ddev = adev->ddev;
uint32_t mclk;
int r, size = sizeof(mclk);
- /* Can't get voltage when the card is off */
- if ((adev->flags & AMD_IS_PX) &&
- (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
- return -EINVAL;
+ r = pm_runtime_get_sync(adev->ddev->dev);
+ if (r < 0)
+ return r;
/* get the sclk */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK,
(void *)&mclk, &size);
+
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+
if (r)
return r;
@@ -2196,9 +2686,9 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
*
* - fan1_input: fan speed in RPM
*
- * - fan[1-*]_target: Desired fan speed Unit: revolution/min (RPM)
+ * - fan[1-\*]_target: Desired fan speed Unit: revolution/min (RPM)
*
- * - fan[1-*]_enable: Enable or disable the sensors.1: Enable 0: Disable
+ * - fan[1-\*]_enable: Enable or disable the sensors.1: Enable 0: Disable
*
* hwmon interfaces for GPU clocks:
*
@@ -2294,6 +2784,23 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
struct amdgpu_device *adev = dev_get_drvdata(dev);
umode_t effective_mode = attr->mode;
+ /* under multi-vf mode, the hwmon attributes are all not supported */
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ return 0;
+
+ /* there is no fan under pp one vf mode */
+ if (amdgpu_sriov_is_pp_one_vf(adev) &&
+ (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
+ attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
+ attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
+ attr == &sensor_dev_attr_pwm1_min.dev_attr.attr ||
+ attr == &sensor_dev_attr_fan1_input.dev_attr.attr ||
+ attr == &sensor_dev_attr_fan1_min.dev_attr.attr ||
+ attr == &sensor_dev_attr_fan1_max.dev_attr.attr ||
+ attr == &sensor_dev_attr_fan1_target.dev_attr.attr ||
+ attr == &sensor_dev_attr_fan1_enable.dev_attr.attr))
+ return 0;
+
/* Skip fan attributes if fan is not present */
if (adev->pm.no_fan && (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
@@ -2661,17 +3168,12 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
{
int ret = 0;
- if (is_support_sw_smu(adev)) {
- ret = smu_dpm_set_power_gate(&adev->smu, AMD_IP_BLOCK_TYPE_UVD, enable);
- if (ret)
- DRM_ERROR("[SW SMU]: dpm enable uvd failed, state = %s, ret = %d. \n",
- enable ? "true" : "false", ret);
- } else if (adev->powerplay.pp_funcs->set_powergating_by_smu) {
- /* enable/disable UVD */
- mutex_lock(&adev->pm.mutex);
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);
- mutex_unlock(&adev->pm.mutex);
- }
+
+ ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);
+ if (ret)
+ DRM_ERROR("Dpm %s uvd failed, ret = %d. \n",
+ enable ? "enable" : "disable", ret);
+
/* enable/disable Low Memory PState for UVD (4k videos) */
if (adev->asic_type == CHIP_STONEY &&
adev->uvd.decode_image_width >= WIDTH_4K) {
@@ -2688,17 +3190,11 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
{
int ret = 0;
- if (is_support_sw_smu(adev)) {
- ret = smu_dpm_set_power_gate(&adev->smu, AMD_IP_BLOCK_TYPE_VCE, enable);
- if (ret)
- DRM_ERROR("[SW SMU]: dpm enable vce failed, state = %s, ret = %d. \n",
- enable ? "true" : "false", ret);
- } else if (adev->powerplay.pp_funcs->set_powergating_by_smu) {
- /* enable/disable VCE */
- mutex_lock(&adev->pm.mutex);
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable);
- mutex_unlock(&adev->pm.mutex);
- }
+
+ ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable);
+ if (ret)
+ DRM_ERROR("Dpm %s vce failed, ret = %d. \n",
+ enable ? "enable" : "disable", ret);
}
void amdgpu_pm_print_power_states(struct amdgpu_device *adev)
@@ -2713,42 +3209,14 @@ void amdgpu_pm_print_power_states(struct amdgpu_device *adev)
}
-int amdgpu_pm_virt_sysfs_init(struct amdgpu_device *adev)
+void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable)
{
int ret = 0;
- if (!(amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev)))
- return ret;
-
- ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk);
- if (ret) {
- DRM_ERROR("failed to create device file pp_dpm_sclk\n");
- return ret;
- }
-
- ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk);
- if (ret) {
- DRM_ERROR("failed to create device file pp_dpm_mclk\n");
- return ret;
- }
-
- ret = device_create_file(adev->dev, &dev_attr_power_dpm_force_performance_level);
- if (ret) {
- DRM_ERROR("failed to create device file for dpm state\n");
- return ret;
- }
-
- return ret;
-}
-
-void amdgpu_pm_virt_sysfs_fini(struct amdgpu_device *adev)
-{
- if (!(amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev)))
- return;
-
- device_remove_file(adev->dev, &dev_attr_power_dpm_force_performance_level);
- device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk);
- device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk);
+ ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_JPEG, !enable);
+ if (ret)
+ DRM_ERROR("Dpm %s jpeg failed, ret = %d. \n",
+ enable ? "enable" : "disable", ret);
}
int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version)
@@ -2825,6 +3293,19 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
DRM_ERROR("failed to create device file pp_dpm_sclk\n");
return ret;
}
+
+ /* Arcturus does not support standalone mclk/socclk/fclk level setting */
+ if (adev->asic_type == CHIP_ARCTURUS) {
+ dev_attr_pp_dpm_mclk.attr.mode &= ~S_IWUGO;
+ dev_attr_pp_dpm_mclk.store = NULL;
+
+ dev_attr_pp_dpm_socclk.attr.mode &= ~S_IWUGO;
+ dev_attr_pp_dpm_socclk.store = NULL;
+
+ dev_attr_pp_dpm_fclk.attr.mode &= ~S_IWUGO;
+ dev_attr_pp_dpm_fclk.store = NULL;
+ }
+
ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk);
if (ret) {
DRM_ERROR("failed to create device file pp_dpm_mclk\n");
@@ -3008,7 +3489,8 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
struct smu_dpm_context *smu_dpm = &adev->smu.smu_dpm;
smu_handle_task(&adev->smu,
smu_dpm->dpm_level,
- AMD_PP_TASK_DISPLAY_CONFIG_CHANGE);
+ AMD_PP_TASK_DISPLAY_CONFIG_CHANGE,
+ true);
} else {
if (adev->powerplay.pp_funcs->dispatch_tasks) {
if (!amdgpu_device_has_dc_support(adev)) {
@@ -3144,8 +3626,12 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
struct drm_info_node *node = (struct drm_info_node *) m->private;
struct drm_device *dev = node->minor->dev;
struct amdgpu_device *adev = dev->dev_private;
- struct drm_device *ddev = adev->ddev;
u32 flags = 0;
+ int r;
+
+ r = pm_runtime_get_sync(dev->dev);
+ if (r < 0)
+ return r;
amdgpu_device_ip_get_clockgating_state(adev, &flags);
seq_printf(m, "Clock Gating Flags Mask: 0x%x\n", flags);
@@ -3154,23 +3640,28 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
if (!adev->pm.dpm_enabled) {
seq_printf(m, "dpm not enabled\n");
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
return 0;
}
- if ((adev->flags & AMD_IS_PX) &&
- (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) {
- seq_printf(m, "PX asic powered off\n");
- } else if (!is_support_sw_smu(adev) && adev->powerplay.pp_funcs->debugfs_print_current_performance_level) {
+
+ if (!is_support_sw_smu(adev) &&
+ adev->powerplay.pp_funcs->debugfs_print_current_performance_level) {
mutex_lock(&adev->pm.mutex);
if (adev->powerplay.pp_funcs->debugfs_print_current_performance_level)
adev->powerplay.pp_funcs->debugfs_print_current_performance_level(adev, m);
else
seq_printf(m, "Debugfs support not implemented for this asic\n");
mutex_unlock(&adev->pm.mutex);
+ r = 0;
} else {
- return amdgpu_debugfs_pm_info_pp(m, adev);
+ r = amdgpu_debugfs_pm_info_pp(m, adev);
}
- return 0;
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+
+ return r;
}
static const struct drm_info_list amdgpu_pm_info_list[] = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h
index ef31448ee8d8..3da1da277805 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h
@@ -41,5 +41,6 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev);
void amdgpu_dpm_thermal_work_handler(struct work_struct *work);
void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable);
void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable);
+void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
index 0e6dba9f60f0..07914e34bc25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
@@ -74,9 +74,9 @@ static void amdgpu_perf_start(struct perf_event *event, int flags)
switch (pe->pmu_perf_type) {
case PERF_TYPE_AMDGPU_DF:
if (!(flags & PERF_EF_RELOAD))
- pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 1);
+ pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 1);
- pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 0);
+ pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 0);
break;
default:
break;
@@ -101,13 +101,13 @@ static void amdgpu_perf_read(struct perf_event *event)
switch (pe->pmu_perf_type) {
case PERF_TYPE_AMDGPU_DF:
- pe->adev->df_funcs->pmc_get_count(pe->adev, hwc->conf,
+ pe->adev->df.funcs->pmc_get_count(pe->adev, hwc->conf,
&count);
break;
default:
count = 0;
break;
- };
+ }
} while (local64_cmpxchg(&hwc->prev_count, prev, count) != prev);
local64_add(count - prev, &event->count);
@@ -126,11 +126,11 @@ static void amdgpu_perf_stop(struct perf_event *event, int flags)
switch (pe->pmu_perf_type) {
case PERF_TYPE_AMDGPU_DF:
- pe->adev->df_funcs->pmc_stop(pe->adev, hwc->conf, 0);
+ pe->adev->df.funcs->pmc_stop(pe->adev, hwc->conf, 0);
break;
default:
break;
- };
+ }
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
hwc->state |= PERF_HES_STOPPED;
@@ -156,11 +156,11 @@ static int amdgpu_perf_add(struct perf_event *event, int flags)
switch (pe->pmu_perf_type) {
case PERF_TYPE_AMDGPU_DF:
- retval = pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 1);
+ retval = pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 1);
break;
default:
return 0;
- };
+ }
if (retval)
return retval;
@@ -184,11 +184,11 @@ static void amdgpu_perf_del(struct perf_event *event, int flags)
switch (pe->pmu_perf_type) {
case PERF_TYPE_AMDGPU_DF:
- pe->adev->df_funcs->pmc_stop(pe->adev, hwc->conf, 1);
+ pe->adev->df.funcs->pmc_stop(pe->adev, hwc->conf, 1);
break;
default:
break;
- };
+ }
perf_event_update_userpage(event);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 4d71537a960d..3a1570dafe34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -34,6 +34,8 @@
#include "psp_v11_0.h"
#include "psp_v12_0.h"
+#include "amdgpu_ras.h"
+
static void psp_set_funcs(struct amdgpu_device *adev);
static int psp_early_init(void *handle)
@@ -88,6 +90,17 @@ static int psp_sw_init(void *handle)
return ret;
}
+ ret = psp_mem_training_init(psp);
+ if (ret) {
+ DRM_ERROR("Failed to initialize memory training!\n");
+ return ret;
+ }
+ ret = psp_mem_training(psp, PSP_MEM_TRAIN_COLD_BOOT);
+ if (ret) {
+ DRM_ERROR("Failed to process memory training!\n");
+ return ret;
+ }
+
return 0;
}
@@ -95,6 +108,7 @@ static int psp_sw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ psp_mem_training_fini(&adev->psp);
release_firmware(adev->psp.sos_fw);
adev->psp.sos_fw = NULL;
release_firmware(adev->psp.asd_fw);
@@ -144,17 +158,26 @@ psp_cmd_submit_buf(struct psp_context *psp,
memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp));
index = atomic_inc_return(&psp->fence_value);
- ret = psp_cmd_submit(psp, psp->cmd_buf_mc_addr, fence_mc_addr, index);
+ ret = psp_ring_cmd_submit(psp, psp->cmd_buf_mc_addr, fence_mc_addr, index);
if (ret) {
atomic_dec(&psp->fence_value);
mutex_unlock(&psp->mutex);
return ret;
}
+ amdgpu_asic_invalidate_hdp(psp->adev, NULL);
while (*((unsigned int *)psp->fence_buf) != index) {
if (--timeout == 0)
break;
+ /*
+ * Shouldn't wait for timeout when err_event_athub occurs,
+ * because gpu reset thread triggered and lock resource should
+ * be released for psp resume sequence.
+ */
+ if (amdgpu_ras_intr_triggered())
+ break;
msleep(1);
+ amdgpu_asic_invalidate_hdp(psp->adev, NULL);
}
/* In some cases, psp response status is not 0 even there is no
@@ -168,8 +191,9 @@ psp_cmd_submit_buf(struct psp_context *psp,
if (ucode)
DRM_WARN("failed to load ucode id (%d) ",
ucode->ucode_id);
- DRM_WARN("psp command failed and response status is (0x%X)\n",
- psp->cmd_buf_mem->resp.status & GFX_CMD_STATUS_MASK);
+ DRM_WARN("psp command (0x%X) failed and response status is (0x%X)\n",
+ psp->cmd_buf_mem->cmd_id,
+ psp->cmd_buf_mem->resp.status);
if (!timeout) {
mutex_unlock(&psp->mutex);
return -EINVAL;
@@ -253,7 +277,8 @@ static int psp_tmr_init(struct psp_context *psp)
/* For ASICs support RLC autoload, psp will parse the toc
* and calculate the total size of TMR needed */
- if (psp->toc_start_addr &&
+ if (!amdgpu_sriov_vf(psp->adev) &&
+ psp->toc_start_addr &&
psp->toc_bin_size &&
psp->fw_pri_buf) {
ret = psp_load_toc(psp, &tmr_size);
@@ -287,47 +312,23 @@ static int psp_tmr_load(struct psp_context *psp)
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
- if (ret)
- goto failed;
kfree(cmd);
- return 0;
-
-failed:
- kfree(cmd);
return ret;
}
-static void psp_prep_asd_cmd_buf(struct psp_gfx_cmd_resp *cmd,
- uint64_t asd_mc, uint64_t asd_mc_shared,
- uint32_t size, uint32_t shared_size)
+static void psp_prep_asd_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+ uint64_t asd_mc, uint32_t size)
{
cmd->cmd_id = GFX_CMD_ID_LOAD_ASD;
cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(asd_mc);
cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(asd_mc);
cmd->cmd.cmd_load_ta.app_len = size;
- cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(asd_mc_shared);
- cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(asd_mc_shared);
- cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size;
-}
-
-static int psp_asd_init(struct psp_context *psp)
-{
- int ret;
-
- /*
- * Allocate 16k memory aligned to 4k from Frame Buffer (local
- * physical) for shared ASD <-> Driver
- */
- ret = amdgpu_bo_create_kernel(psp->adev, PSP_ASD_SHARED_MEM_SIZE,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
- &psp->asd_shared_bo,
- &psp->asd_shared_mc_addr,
- &psp->asd_shared_buf);
-
- return ret;
+ cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = 0;
+ cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = 0;
+ cmd->cmd.cmd_load_ta.cmd_buf_len = 0;
}
static int psp_asd_load(struct psp_context *psp)
@@ -349,11 +350,49 @@ static int psp_asd_load(struct psp_context *psp)
memset(psp->fw_pri_buf, 0, PSP_1_MEG);
memcpy(psp->fw_pri_buf, psp->asd_start_addr, psp->asd_ucode_size);
- psp_prep_asd_cmd_buf(cmd, psp->fw_pri_mc_addr, psp->asd_shared_mc_addr,
- psp->asd_ucode_size, PSP_ASD_SHARED_MEM_SIZE);
+ psp_prep_asd_load_cmd_buf(cmd, psp->fw_pri_mc_addr,
+ psp->asd_ucode_size);
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd,
+ psp->fence_buf_mc_addr);
+ if (!ret) {
+ psp->asd_context.asd_initialized = true;
+ psp->asd_context.session_id = cmd->resp.session_id;
+ }
+
+ kfree(cmd);
+
+ return ret;
+}
+
+static void psp_prep_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+ uint32_t session_id)
+{
+ cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA;
+ cmd->cmd.cmd_unload_ta.session_id = session_id;
+}
+
+static int psp_asd_unload(struct psp_context *psp)
+{
+ int ret;
+ struct psp_gfx_cmd_resp *cmd;
+
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (!psp->asd_context.asd_initialized)
+ return 0;
+
+ cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+ if (!cmd)
+ return -ENOMEM;
+
+ psp_prep_ta_unload_cmd_buf(cmd, psp->asd_context.session_id);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
+ if (!ret)
+ psp->asd_context.asd_initialized = false;
kfree(cmd);
@@ -388,18 +427,20 @@ int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
return ret;
}
-static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
- uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared,
- uint32_t xgmi_ta_size, uint32_t shared_size)
+static void psp_prep_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+ uint64_t ta_bin_mc,
+ uint32_t ta_bin_size,
+ uint64_t ta_shared_mc,
+ uint32_t ta_shared_size)
{
- cmd->cmd_id = GFX_CMD_ID_LOAD_TA;
- cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(xgmi_ta_mc);
- cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(xgmi_ta_mc);
- cmd->cmd.cmd_load_ta.app_len = xgmi_ta_size;
-
- cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(xgmi_mc_shared);
- cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(xgmi_mc_shared);
- cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size;
+ cmd->cmd_id = GFX_CMD_ID_LOAD_TA;
+ cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(ta_bin_mc);
+ cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(ta_bin_mc);
+ cmd->cmd.cmd_load_ta.app_len = ta_bin_size;
+
+ cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(ta_shared_mc);
+ cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(ta_shared_mc);
+ cmd->cmd.cmd_load_ta.cmd_buf_len = ta_shared_size;
}
static int psp_xgmi_init_shared_buf(struct psp_context *psp)
@@ -419,6 +460,36 @@ static int psp_xgmi_init_shared_buf(struct psp_context *psp)
return ret;
}
+static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+ uint32_t ta_cmd_id,
+ uint32_t session_id)
+{
+ cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD;
+ cmd->cmd.cmd_invoke_cmd.session_id = session_id;
+ cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id;
+}
+
+int psp_ta_invoke(struct psp_context *psp,
+ uint32_t ta_cmd_id,
+ uint32_t session_id)
+{
+ int ret;
+ struct psp_gfx_cmd_resp *cmd;
+
+ cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+ if (!cmd)
+ return -ENOMEM;
+
+ psp_prep_ta_invoke_cmd_buf(cmd, ta_cmd_id, session_id);
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd,
+ psp->fence_buf_mc_addr);
+
+ kfree(cmd);
+
+ return ret;
+}
+
static int psp_xgmi_load(struct psp_context *psp)
{
int ret;
@@ -427,8 +498,6 @@ static int psp_xgmi_load(struct psp_context *psp)
/*
* TODO: bypass the loading in sriov for now
*/
- if (amdgpu_sriov_vf(psp->adev))
- return 0;
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!cmd)
@@ -437,9 +506,11 @@ static int psp_xgmi_load(struct psp_context *psp)
memset(psp->fw_pri_buf, 0, PSP_1_MEG);
memcpy(psp->fw_pri_buf, psp->ta_xgmi_start_addr, psp->ta_xgmi_ucode_size);
- psp_prep_xgmi_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr,
- psp->xgmi_context.xgmi_shared_mc_addr,
- psp->ta_xgmi_ucode_size, PSP_XGMI_SHARED_MEM_SIZE);
+ psp_prep_ta_load_cmd_buf(cmd,
+ psp->fw_pri_mc_addr,
+ psp->ta_xgmi_ucode_size,
+ psp->xgmi_context.xgmi_shared_mc_addr,
+ PSP_XGMI_SHARED_MEM_SIZE);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
@@ -454,29 +525,25 @@ static int psp_xgmi_load(struct psp_context *psp)
return ret;
}
-static void psp_prep_xgmi_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd,
- uint32_t xgmi_session_id)
-{
- cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA;
- cmd->cmd.cmd_unload_ta.session_id = xgmi_session_id;
-}
-
static int psp_xgmi_unload(struct psp_context *psp)
{
int ret;
struct psp_gfx_cmd_resp *cmd;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* XGMI TA unload currently is not supported on Arcturus */
+ if (adev->asic_type == CHIP_ARCTURUS)
+ return 0;
/*
* TODO: bypass the unloading in sriov for now
*/
- if (amdgpu_sriov_vf(psp->adev))
- return 0;
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
- psp_prep_xgmi_ta_unload_cmd_buf(cmd, psp->xgmi_context.session_id);
+ psp_prep_ta_unload_cmd_buf(cmd, psp->xgmi_context.session_id);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
@@ -486,40 +553,9 @@ static int psp_xgmi_unload(struct psp_context *psp)
return ret;
}
-static void psp_prep_xgmi_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,
- uint32_t ta_cmd_id,
- uint32_t xgmi_session_id)
-{
- cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD;
- cmd->cmd.cmd_invoke_cmd.session_id = xgmi_session_id;
- cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id;
- /* Note: cmd_invoke_cmd.buf is not used for now */
-}
-
int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
{
- int ret;
- struct psp_gfx_cmd_resp *cmd;
-
- /*
- * TODO: bypass the loading in sriov for now
- */
- if (amdgpu_sriov_vf(psp->adev))
- return 0;
-
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
-
- psp_prep_xgmi_ta_invoke_cmd_buf(cmd, ta_cmd_id,
- psp->xgmi_context.session_id);
-
- ret = psp_cmd_submit_buf(psp, NULL, cmd,
- psp->fence_buf_mc_addr);
-
- kfree(cmd);
-
- return ret;
+ return psp_ta_invoke(psp, ta_cmd_id, psp->xgmi_context.session_id);
}
static int psp_xgmi_terminate(struct psp_context *psp)
@@ -548,7 +584,9 @@ static int psp_xgmi_initialize(struct psp_context *psp)
struct ta_xgmi_shared_memory *xgmi_cmd;
int ret;
- if (!psp->adev->psp.ta_fw)
+ if (!psp->adev->psp.ta_fw ||
+ !psp->adev->psp.ta_xgmi_ucode_size ||
+ !psp->adev->psp.ta_xgmi_start_addr)
return -ENOENT;
if (!psp->xgmi_context.initialized) {
@@ -573,20 +611,6 @@ static int psp_xgmi_initialize(struct psp_context *psp)
}
// ras begin
-static void psp_prep_ras_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
- uint64_t ras_ta_mc, uint64_t ras_mc_shared,
- uint32_t ras_ta_size, uint32_t shared_size)
-{
- cmd->cmd_id = GFX_CMD_ID_LOAD_TA;
- cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(ras_ta_mc);
- cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(ras_ta_mc);
- cmd->cmd.cmd_load_ta.app_len = ras_ta_size;
-
- cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(ras_mc_shared);
- cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(ras_mc_shared);
- cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size;
-}
-
static int psp_ras_init_shared_buf(struct psp_context *psp)
{
int ret;
@@ -622,15 +646,17 @@ static int psp_ras_load(struct psp_context *psp)
memset(psp->fw_pri_buf, 0, PSP_1_MEG);
memcpy(psp->fw_pri_buf, psp->ta_ras_start_addr, psp->ta_ras_ucode_size);
- psp_prep_ras_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr,
- psp->ras.ras_shared_mc_addr,
- psp->ta_ras_ucode_size, PSP_RAS_SHARED_MEM_SIZE);
+ psp_prep_ta_load_cmd_buf(cmd,
+ psp->fw_pri_mc_addr,
+ psp->ta_ras_ucode_size,
+ psp->ras.ras_shared_mc_addr,
+ PSP_RAS_SHARED_MEM_SIZE);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
if (!ret) {
- psp->ras.ras_initialized = 1;
+ psp->ras.ras_initialized = true;
psp->ras.session_id = cmd->resp.session_id;
}
@@ -639,13 +665,6 @@ static int psp_ras_load(struct psp_context *psp)
return ret;
}
-static void psp_prep_ras_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd,
- uint32_t ras_session_id)
-{
- cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA;
- cmd->cmd.cmd_unload_ta.session_id = ras_session_id;
-}
-
static int psp_ras_unload(struct psp_context *psp)
{
int ret;
@@ -661,7 +680,7 @@ static int psp_ras_unload(struct psp_context *psp)
if (!cmd)
return -ENOMEM;
- psp_prep_ras_ta_unload_cmd_buf(cmd, psp->ras.session_id);
+ psp_prep_ta_unload_cmd_buf(cmd, psp->ras.session_id);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
@@ -671,40 +690,15 @@ static int psp_ras_unload(struct psp_context *psp)
return ret;
}
-static void psp_prep_ras_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,
- uint32_t ta_cmd_id,
- uint32_t ras_session_id)
-{
- cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD;
- cmd->cmd.cmd_invoke_cmd.session_id = ras_session_id;
- cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id;
- /* Note: cmd_invoke_cmd.buf is not used for now */
-}
-
int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
{
- int ret;
- struct psp_gfx_cmd_resp *cmd;
-
/*
* TODO: bypass the loading in sriov for now
*/
if (amdgpu_sriov_vf(psp->adev))
return 0;
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
-
- psp_prep_ras_ta_invoke_cmd_buf(cmd, ta_cmd_id,
- psp->ras.session_id);
-
- ret = psp_cmd_submit_buf(psp, NULL, cmd,
- psp->fence_buf_mc_addr);
-
- kfree(cmd);
-
- return ret;
+ return psp_ta_invoke(psp, ta_cmd_id, psp->ras.session_id);
}
int psp_ras_enable_features(struct psp_context *psp,
@@ -737,6 +731,12 @@ static int psp_ras_terminate(struct psp_context *psp)
{
int ret;
+ /*
+ * TODO: bypass the terminate in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
if (!psp->ras.ras_initialized)
return 0;
@@ -744,7 +744,7 @@ static int psp_ras_terminate(struct psp_context *psp)
if (ret)
return ret;
- psp->ras.ras_initialized = 0;
+ psp->ras.ras_initialized = false;
/* free ras shared memory */
amdgpu_bo_free_kernel(&psp->ras.ras_shared_bo,
@@ -758,6 +758,18 @@ static int psp_ras_initialize(struct psp_context *psp)
{
int ret;
+ /*
+ * TODO: bypass the initialize in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (!psp->adev->psp.ta_ras_ucode_size ||
+ !psp->adev->psp.ta_ras_start_addr) {
+ dev_warn(psp->adev->dev, "RAS: ras ta ucode is not available\n");
+ return 0;
+ }
+
if (!psp->ras.ras_initialized) {
ret = psp_ras_init_shared_buf(psp);
if (ret)
@@ -772,6 +784,274 @@ static int psp_ras_initialize(struct psp_context *psp)
}
// ras end
+// HDCP start
+static int psp_hdcp_init_shared_buf(struct psp_context *psp)
+{
+ int ret;
+
+ /*
+ * Allocate 16k memory aligned to 4k from Frame Buffer (local
+ * physical) for hdcp ta <-> Driver
+ */
+ ret = amdgpu_bo_create_kernel(psp->adev, PSP_HDCP_SHARED_MEM_SIZE,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
+ &psp->hdcp_context.hdcp_shared_bo,
+ &psp->hdcp_context.hdcp_shared_mc_addr,
+ &psp->hdcp_context.hdcp_shared_buf);
+
+ return ret;
+}
+
+static int psp_hdcp_load(struct psp_context *psp)
+{
+ int ret;
+ struct psp_gfx_cmd_resp *cmd;
+
+ /*
+ * TODO: bypass the loading in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+ if (!cmd)
+ return -ENOMEM;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+ memcpy(psp->fw_pri_buf, psp->ta_hdcp_start_addr,
+ psp->ta_hdcp_ucode_size);
+
+ psp_prep_ta_load_cmd_buf(cmd,
+ psp->fw_pri_mc_addr,
+ psp->ta_hdcp_ucode_size,
+ psp->hdcp_context.hdcp_shared_mc_addr,
+ PSP_HDCP_SHARED_MEM_SIZE);
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+ if (!ret) {
+ psp->hdcp_context.hdcp_initialized = true;
+ psp->hdcp_context.session_id = cmd->resp.session_id;
+ }
+
+ kfree(cmd);
+
+ return ret;
+}
+static int psp_hdcp_initialize(struct psp_context *psp)
+{
+ int ret;
+
+ /*
+ * TODO: bypass the initialize in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (!psp->adev->psp.ta_hdcp_ucode_size ||
+ !psp->adev->psp.ta_hdcp_start_addr) {
+ dev_warn(psp->adev->dev, "HDCP: hdcp ta ucode is not available\n");
+ return 0;
+ }
+
+ if (!psp->hdcp_context.hdcp_initialized) {
+ ret = psp_hdcp_init_shared_buf(psp);
+ if (ret)
+ return ret;
+ }
+
+ ret = psp_hdcp_load(psp);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int psp_hdcp_unload(struct psp_context *psp)
+{
+ int ret;
+ struct psp_gfx_cmd_resp *cmd;
+
+ /*
+ * TODO: bypass the unloading in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+ if (!cmd)
+ return -ENOMEM;
+
+ psp_prep_ta_unload_cmd_buf(cmd, psp->hdcp_context.session_id);
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+ kfree(cmd);
+
+ return ret;
+}
+
+int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
+{
+ /*
+ * TODO: bypass the loading in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ return psp_ta_invoke(psp, ta_cmd_id, psp->hdcp_context.session_id);
+}
+
+static int psp_hdcp_terminate(struct psp_context *psp)
+{
+ int ret;
+
+ /*
+ * TODO: bypass the terminate in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (!psp->hdcp_context.hdcp_initialized)
+ return 0;
+
+ ret = psp_hdcp_unload(psp);
+ if (ret)
+ return ret;
+
+ psp->hdcp_context.hdcp_initialized = false;
+
+ /* free hdcp shared memory */
+ amdgpu_bo_free_kernel(&psp->hdcp_context.hdcp_shared_bo,
+ &psp->hdcp_context.hdcp_shared_mc_addr,
+ &psp->hdcp_context.hdcp_shared_buf);
+
+ return 0;
+}
+// HDCP end
+
+// DTM start
+static int psp_dtm_init_shared_buf(struct psp_context *psp)
+{
+ int ret;
+
+ /*
+ * Allocate 16k memory aligned to 4k from Frame Buffer (local
+ * physical) for dtm ta <-> Driver
+ */
+ ret = amdgpu_bo_create_kernel(psp->adev, PSP_DTM_SHARED_MEM_SIZE,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
+ &psp->dtm_context.dtm_shared_bo,
+ &psp->dtm_context.dtm_shared_mc_addr,
+ &psp->dtm_context.dtm_shared_buf);
+
+ return ret;
+}
+
+static int psp_dtm_load(struct psp_context *psp)
+{
+ int ret;
+ struct psp_gfx_cmd_resp *cmd;
+
+ /*
+ * TODO: bypass the loading in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+ if (!cmd)
+ return -ENOMEM;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+ memcpy(psp->fw_pri_buf, psp->ta_dtm_start_addr, psp->ta_dtm_ucode_size);
+
+ psp_prep_ta_load_cmd_buf(cmd,
+ psp->fw_pri_mc_addr,
+ psp->ta_dtm_ucode_size,
+ psp->dtm_context.dtm_shared_mc_addr,
+ PSP_DTM_SHARED_MEM_SIZE);
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+ if (!ret) {
+ psp->dtm_context.dtm_initialized = true;
+ psp->dtm_context.session_id = cmd->resp.session_id;
+ }
+
+ kfree(cmd);
+
+ return ret;
+}
+
+static int psp_dtm_initialize(struct psp_context *psp)
+{
+ int ret;
+
+ /*
+ * TODO: bypass the initialize in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (!psp->adev->psp.ta_dtm_ucode_size ||
+ !psp->adev->psp.ta_dtm_start_addr) {
+ dev_warn(psp->adev->dev, "DTM: dtm ta ucode is not available\n");
+ return 0;
+ }
+
+ if (!psp->dtm_context.dtm_initialized) {
+ ret = psp_dtm_init_shared_buf(psp);
+ if (ret)
+ return ret;
+ }
+
+ ret = psp_dtm_load(psp);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
+{
+ /*
+ * TODO: bypass the loading in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ return psp_ta_invoke(psp, ta_cmd_id, psp->dtm_context.session_id);
+}
+
+static int psp_dtm_terminate(struct psp_context *psp)
+{
+ int ret;
+
+ /*
+ * TODO: bypass the terminate in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (!psp->dtm_context.dtm_initialized)
+ return 0;
+
+ ret = psp_hdcp_unload(psp);
+ if (ret)
+ return ret;
+
+ psp->dtm_context.dtm_initialized = false;
+
+ /* free hdcp shared memory */
+ amdgpu_bo_free_kernel(&psp->dtm_context.dtm_shared_bo,
+ &psp->dtm_context.dtm_shared_mc_addr,
+ &psp->dtm_context.dtm_shared_buf);
+
+ return 0;
+}
+// DTM end
+
static int psp_hw_start(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
@@ -818,35 +1098,6 @@ static int psp_hw_start(struct psp_context *psp)
return ret;
}
- ret = psp_asd_init(psp);
- if (ret) {
- DRM_ERROR("PSP asd init failed!\n");
- return ret;
- }
-
- ret = psp_asd_load(psp);
- if (ret) {
- DRM_ERROR("PSP load asd failed!\n");
- return ret;
- }
-
- if (adev->gmc.xgmi.num_physical_nodes > 1) {
- ret = psp_xgmi_initialize(psp);
- /* Warning the XGMI seesion initialize failure
- * Instead of stop driver initialization
- */
- if (ret)
- dev_err(psp->adev->dev,
- "XGMI: Failed to initialize XGMI session\n");
- }
-
- if (psp->adev->psp.ta_fw) {
- ret = psp_ras_initialize(psp);
- if (ret)
- dev_err(psp->adev->dev,
- "RAS: Failed to initialize RAS\n");
- }
-
return 0;
}
@@ -926,6 +1177,9 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
case AMDGPU_UCODE_ID_VCN:
*type = GFX_FW_TYPE_VCN;
break;
+ case AMDGPU_UCODE_ID_VCN1:
+ *type = GFX_FW_TYPE_VCN1;
+ break;
case AMDGPU_UCODE_ID_DMCU_ERAM:
*type = GFX_FW_TYPE_DMCU_ERAM;
break;
@@ -938,6 +1192,9 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
case AMDGPU_UCODE_ID_VCN1_RAM:
*type = GFX_FW_TYPE_VCN1_RAM;
break;
+ case AMDGPU_UCODE_ID_DMCUB:
+ *type = GFX_FW_TYPE_DMUB;
+ break;
case AMDGPU_UCODE_ID_MAXIMUM:
default:
return -EINVAL;
@@ -950,21 +1207,7 @@ static void psp_print_fw_hdr(struct psp_context *psp,
struct amdgpu_firmware_info *ucode)
{
struct amdgpu_device *adev = psp->adev;
- const struct sdma_firmware_header_v1_0 *sdma_hdr =
- (const struct sdma_firmware_header_v1_0 *)
- adev->sdma.instance[ucode->ucode_id - AMDGPU_UCODE_ID_SDMA0].fw->data;
- const struct gfx_firmware_header_v1_0 *ce_hdr =
- (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
- const struct gfx_firmware_header_v1_0 *pfp_hdr =
- (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
- const struct gfx_firmware_header_v1_0 *me_hdr =
- (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
- const struct gfx_firmware_header_v1_0 *mec_hdr =
- (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
- const struct rlc_firmware_header_v2_0 *rlc_hdr =
- (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
- const struct smc_firmware_header_v1_0 *smc_hdr =
- (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data;
+ struct common_firmware_header *hdr;
switch (ucode->ucode_id) {
case AMDGPU_UCODE_ID_SDMA0:
@@ -975,25 +1218,33 @@ static void psp_print_fw_hdr(struct psp_context *psp,
case AMDGPU_UCODE_ID_SDMA5:
case AMDGPU_UCODE_ID_SDMA6:
case AMDGPU_UCODE_ID_SDMA7:
- amdgpu_ucode_print_sdma_hdr(&sdma_hdr->header);
+ hdr = (struct common_firmware_header *)
+ adev->sdma.instance[ucode->ucode_id - AMDGPU_UCODE_ID_SDMA0].fw->data;
+ amdgpu_ucode_print_sdma_hdr(hdr);
break;
case AMDGPU_UCODE_ID_CP_CE:
- amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
+ hdr = (struct common_firmware_header *)adev->gfx.ce_fw->data;
+ amdgpu_ucode_print_gfx_hdr(hdr);
break;
case AMDGPU_UCODE_ID_CP_PFP:
- amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
+ hdr = (struct common_firmware_header *)adev->gfx.pfp_fw->data;
+ amdgpu_ucode_print_gfx_hdr(hdr);
break;
case AMDGPU_UCODE_ID_CP_ME:
- amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
+ hdr = (struct common_firmware_header *)adev->gfx.me_fw->data;
+ amdgpu_ucode_print_gfx_hdr(hdr);
break;
case AMDGPU_UCODE_ID_CP_MEC1:
- amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+ hdr = (struct common_firmware_header *)adev->gfx.mec_fw->data;
+ amdgpu_ucode_print_gfx_hdr(hdr);
break;
case AMDGPU_UCODE_ID_RLC_G:
- amdgpu_ucode_print_rlc_hdr(&rlc_hdr->header);
+ hdr = (struct common_firmware_header *)adev->gfx.rlc_fw->data;
+ amdgpu_ucode_print_rlc_hdr(hdr);
break;
case AMDGPU_UCODE_ID_SMC:
- amdgpu_ucode_print_smc_hdr(&smc_hdr->header);
+ hdr = (struct common_firmware_header *)adev->pm.fw->data;
+ amdgpu_ucode_print_smc_hdr(hdr);
break;
default:
break;
@@ -1070,7 +1321,11 @@ out:
|| ucode->ucode_id == AMDGPU_UCODE_ID_SDMA5
|| ucode->ucode_id == AMDGPU_UCODE_ID_SDMA6
|| ucode->ucode_id == AMDGPU_UCODE_ID_SDMA7
- || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G))
+ || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G
+ || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
+ || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
+ || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
+ || ucode->ucode_id == AMDGPU_UCODE_ID_SMC))
/*skip ucode loading in SRIOV VF */
continue;
@@ -1079,10 +1334,6 @@ out:
ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT))
/* skip mec JT when autoload is enabled */
continue;
- /* Renoir only needs to load mec jump table one time */
- if (adev->asic_type == CHIP_RENOIR &&
- ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT)
- continue;
psp_print_fw_hdr(psp, ucode);
@@ -1091,7 +1342,8 @@ out:
return ret;
/* Start rlc autoload after psp recieved all the gfx firmware */
- if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) {
+ if (psp->autoload_supported && ucode->ucode_id == (amdgpu_sriov_vf(adev) ?
+ AMDGPU_UCODE_ID_CP_MEC2 : AMDGPU_UCODE_ID_RLC_G)) {
ret = psp_rlc_autoload(psp);
if (ret) {
DRM_ERROR("Failed to start rlc autoload\n");
@@ -1122,16 +1374,13 @@ static int psp_load_fw(struct amdgpu_device *adev)
if (!psp->cmd)
return -ENOMEM;
- /* this fw pri bo is not used under SRIOV */
- if (!amdgpu_sriov_vf(psp->adev)) {
- ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
- AMDGPU_GEM_DOMAIN_GTT,
- &psp->fw_pri_bo,
- &psp->fw_pri_mc_addr,
- &psp->fw_pri_buf);
- if (ret)
- goto failed;
- }
+ ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &psp->fw_pri_bo,
+ &psp->fw_pri_mc_addr,
+ &psp->fw_pri_buf);
+ if (ret)
+ goto failed;
ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
@@ -1165,6 +1414,39 @@ skip_memalloc:
if (ret)
goto failed;
+ ret = psp_asd_load(psp);
+ if (ret) {
+ DRM_ERROR("PSP load asd failed!\n");
+ return ret;
+ }
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ ret = psp_xgmi_initialize(psp);
+ /* Warning the XGMI seesion initialize failure
+ * Instead of stop driver initialization
+ */
+ if (ret)
+ dev_err(psp->adev->dev,
+ "XGMI: Failed to initialize XGMI session\n");
+ }
+
+ if (psp->adev->psp.ta_fw) {
+ ret = psp_ras_initialize(psp);
+ if (ret)
+ dev_err(psp->adev->dev,
+ "RAS: Failed to initialize RAS\n");
+
+ ret = psp_hdcp_initialize(psp);
+ if (ret)
+ dev_err(psp->adev->dev,
+ "HDCP: Failed to initialize HDCP\n");
+
+ ret = psp_dtm_initialize(psp);
+ if (ret)
+ dev_err(psp->adev->dev,
+ "DTM: Failed to initialize DTM\n");
+ }
+
return 0;
failed:
@@ -1216,8 +1498,13 @@ static int psp_hw_fini(void *handle)
psp->xgmi_context.initialized == 1)
psp_xgmi_terminate(psp);
- if (psp->adev->psp.ta_fw)
+ if (psp->adev->psp.ta_fw) {
psp_ras_terminate(psp);
+ psp_dtm_terminate(psp);
+ psp_hdcp_terminate(psp);
+ }
+
+ psp_asd_unload(psp);
psp_ring_destroy(psp, PSP_RING_TYPE__KM);
@@ -1227,8 +1514,6 @@ static int psp_hw_fini(void *handle)
&psp->fw_pri_mc_addr, &psp->fw_pri_buf);
amdgpu_bo_free_kernel(&psp->fence_buf_bo,
&psp->fence_buf_mc_addr, &psp->fence_buf);
- amdgpu_bo_free_kernel(&psp->asd_shared_bo, &psp->asd_shared_mc_addr,
- &psp->asd_shared_buf);
amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
(void **)&psp->cmd_buf_mem);
@@ -1259,6 +1544,16 @@ static int psp_suspend(void *handle)
DRM_ERROR("Failed to terminate ras ta\n");
return ret;
}
+ ret = psp_hdcp_terminate(psp);
+ if (ret) {
+ DRM_ERROR("Failed to terminate hdcp ta\n");
+ return ret;
+ }
+ ret = psp_dtm_terminate(psp);
+ if (ret) {
+ DRM_ERROR("Failed to terminate dtm ta\n");
+ return ret;
+ }
}
ret = psp_ring_stop(psp, PSP_RING_TYPE__KM);
@@ -1278,6 +1573,12 @@ static int psp_resume(void *handle)
DRM_INFO("PSP is resuming...\n");
+ ret = psp_mem_training(psp, PSP_MEM_TRAIN_RESUME);
+ if (ret) {
+ DRM_ERROR("Failed to process memory training!\n");
+ return ret;
+ }
+
mutex_lock(&adev->firmware.mutex);
ret = psp_hw_start(psp);
@@ -1288,6 +1589,39 @@ static int psp_resume(void *handle)
if (ret)
goto failed;
+ ret = psp_asd_load(psp);
+ if (ret) {
+ DRM_ERROR("PSP load asd failed!\n");
+ goto failed;
+ }
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ ret = psp_xgmi_initialize(psp);
+ /* Warning the XGMI seesion initialize failure
+ * Instead of stop driver initialization
+ */
+ if (ret)
+ dev_err(psp->adev->dev,
+ "XGMI: Failed to initialize XGMI session\n");
+ }
+
+ if (psp->adev->psp.ta_fw) {
+ ret = psp_ras_initialize(psp);
+ if (ret)
+ dev_err(psp->adev->dev,
+ "RAS: Failed to initialize RAS\n");
+
+ ret = psp_hdcp_initialize(psp);
+ if (ret)
+ dev_err(psp->adev->dev,
+ "HDCP: Failed to initialize HDCP\n");
+
+ ret = psp_dtm_initialize(psp);
+ if (ret)
+ dev_err(psp->adev->dev,
+ "DTM: Failed to initialize DTM\n");
+ }
+
mutex_unlock(&adev->firmware.mutex);
return 0;
@@ -1317,9 +1651,6 @@ int psp_rlc_autoload_start(struct psp_context *psp)
int ret;
struct psp_gfx_cmd_resp *cmd;
- if (amdgpu_sriov_vf(psp->adev))
- return 0;
-
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
@@ -1345,6 +1676,56 @@ int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx,
return psp_execute_np_fw_load(&adev->psp, &ucode);
}
+int psp_ring_cmd_submit(struct psp_context *psp,
+ uint64_t cmd_buf_mc_addr,
+ uint64_t fence_mc_addr,
+ int index)
+{
+ unsigned int psp_write_ptr_reg = 0;
+ struct psp_gfx_rb_frame *write_frame;
+ struct psp_ring *ring = &psp->km_ring;
+ struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem;
+ struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start +
+ ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1;
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t ring_size_dw = ring->ring_size / 4;
+ uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
+
+ /* KM (GPCOM) prepare write pointer */
+ psp_write_ptr_reg = psp_ring_get_wptr(psp);
+
+ /* Update KM RB frame pointer to new frame */
+ /* write_frame ptr increments by size of rb_frame in bytes */
+ /* psp_write_ptr_reg increments by size of rb_frame in DWORDs */
+ if ((psp_write_ptr_reg % ring_size_dw) == 0)
+ write_frame = ring_buffer_start;
+ else
+ write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw);
+ /* Check invalid write_frame ptr address */
+ if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) {
+ DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
+ ring_buffer_start, ring_buffer_end, write_frame);
+ DRM_ERROR("write_frame is pointing to address out of bounds\n");
+ return -EINVAL;
+ }
+
+ /* Initialize KM RB frame */
+ memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame));
+
+ /* Update KM RB frame */
+ write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr);
+ write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr);
+ write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr);
+ write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr);
+ write_frame->fence_value = index;
+ amdgpu_asic_flush_hdp(adev, NULL);
+
+ /* Update the write Pointer in DWORDs */
+ psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
+ psp_ring_set_wptr(psp, psp_write_ptr_reg);
+ return 0;
+}
+
static bool psp_check_fw_loading_status(struct amdgpu_device *adev,
enum AMDGPU_UCODE_ID ucode_type)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index bc0947f6bc8a..611021514c52 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -32,11 +32,13 @@
#define PSP_FENCE_BUFFER_SIZE 0x1000
#define PSP_CMD_BUFFER_SIZE 0x1000
-#define PSP_ASD_SHARED_MEM_SIZE 0x4000
#define PSP_XGMI_SHARED_MEM_SIZE 0x4000
#define PSP_RAS_SHARED_MEM_SIZE 0x4000
#define PSP_1_MEG 0x100000
#define PSP_TMR_SIZE 0x400000
+#define PSP_HDCP_SHARED_MEM_SIZE 0x4000
+#define PSP_DTM_SHARED_MEM_SIZE 0x4000
+#define PSP_SHARED_MEM_SIZE 0x4000
struct psp_context;
struct psp_xgmi_node_info;
@@ -46,6 +48,8 @@ enum psp_bootloader_cmd {
PSP_BL__LOAD_SYSDRV = 0x10000,
PSP_BL__LOAD_SOSDRV = 0x20000,
PSP_BL__LOAD_KEY_DATABASE = 0x80000,
+ PSP_BL__DRAM_LONG_TRAIN = 0x100000,
+ PSP_BL__DRAM_SHORT_TRAIN = 0x200000,
};
enum psp_ring_type
@@ -89,9 +93,6 @@ struct psp_funcs
enum psp_ring_type ring_type);
int (*ring_destroy)(struct psp_context *psp,
enum psp_ring_type ring_type);
- int (*cmd_submit)(struct psp_context *psp,
- uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
- int index);
bool (*compare_sram_data)(struct psp_context *psp,
struct amdgpu_firmware_info *ucode,
enum AMDGPU_UCODE_ID ucode_type);
@@ -108,6 +109,11 @@ struct psp_funcs
struct ta_ras_trigger_error_input *info);
int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr);
int (*rlc_autoload_start)(struct psp_context *psp);
+ int (*mem_training_init)(struct psp_context *psp);
+ void (*mem_training_fini)(struct psp_context *psp);
+ int (*mem_training)(struct psp_context *psp, uint32_t ops);
+ uint32_t (*ring_get_wptr)(struct psp_context *psp);
+ void (*ring_set_wptr)(struct psp_context *psp, uint32_t value);
};
#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
@@ -123,6 +129,11 @@ struct psp_xgmi_topology_info {
struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
};
+struct psp_asd_context {
+ bool asd_initialized;
+ uint32_t session_id;
+};
+
struct psp_xgmi_context {
uint8_t initialized;
uint32_t session_id;
@@ -142,6 +153,66 @@ struct psp_ras_context {
struct amdgpu_ras *ras;
};
+struct psp_hdcp_context {
+ bool hdcp_initialized;
+ uint32_t session_id;
+ struct amdgpu_bo *hdcp_shared_bo;
+ uint64_t hdcp_shared_mc_addr;
+ void *hdcp_shared_buf;
+};
+
+struct psp_dtm_context {
+ bool dtm_initialized;
+ uint32_t session_id;
+ struct amdgpu_bo *dtm_shared_bo;
+ uint64_t dtm_shared_mc_addr;
+ void *dtm_shared_buf;
+};
+
+#define MEM_TRAIN_SYSTEM_SIGNATURE 0x54534942
+#define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES 0x1000
+#define GDDR6_MEM_TRAINING_OFFSET 0x8000
+/*Define the VRAM size that will be encroached by BIST training.*/
+#define GDDR6_MEM_TRAINING_ENCROACHED_SIZE 0x2000000
+
+enum psp_memory_training_init_flag {
+ PSP_MEM_TRAIN_NOT_SUPPORT = 0x0,
+ PSP_MEM_TRAIN_SUPPORT = 0x1,
+ PSP_MEM_TRAIN_INIT_FAILED = 0x2,
+ PSP_MEM_TRAIN_RESERVE_SUCCESS = 0x4,
+ PSP_MEM_TRAIN_INIT_SUCCESS = 0x8,
+};
+
+enum psp_memory_training_ops {
+ PSP_MEM_TRAIN_SEND_LONG_MSG = 0x1,
+ PSP_MEM_TRAIN_SAVE = 0x2,
+ PSP_MEM_TRAIN_RESTORE = 0x4,
+ PSP_MEM_TRAIN_SEND_SHORT_MSG = 0x8,
+ PSP_MEM_TRAIN_COLD_BOOT = PSP_MEM_TRAIN_SEND_LONG_MSG,
+ PSP_MEM_TRAIN_RESUME = PSP_MEM_TRAIN_SEND_SHORT_MSG,
+};
+
+struct psp_memory_training_context {
+ /*training data size*/
+ u64 train_data_size;
+ /*
+ * sys_cache
+ * cpu virtual address
+ * system memory buffer that used to store the training data.
+ */
+ void *sys_cache;
+
+ /*vram offset of the p2c training data*/
+ u64 p2c_train_data_offset;
+
+ /*vram offset of the c2p training data*/
+ u64 c2p_train_data_offset;
+ struct amdgpu_bo *c2p_bo;
+
+ enum psp_memory_training_init_flag init;
+ u32 training_cnt;
+};
+
struct psp_context
{
struct amdgpu_device *adev;
@@ -172,15 +243,12 @@ struct psp_context
struct amdgpu_bo *tmr_bo;
uint64_t tmr_mc_addr;
- /* asd firmware and buffer */
+ /* asd firmware */
const struct firmware *asd_fw;
uint32_t asd_fw_version;
uint32_t asd_feature_version;
uint32_t asd_ucode_size;
uint8_t *asd_start_addr;
- struct amdgpu_bo *asd_shared_bo;
- uint64_t asd_shared_mc_addr;
- void *asd_shared_buf;
/* fence buffer */
struct amdgpu_bo *fence_buf_bo;
@@ -206,9 +274,22 @@ struct psp_context
uint32_t ta_ras_ucode_version;
uint32_t ta_ras_ucode_size;
uint8_t *ta_ras_start_addr;
+
+ uint32_t ta_hdcp_ucode_version;
+ uint32_t ta_hdcp_ucode_size;
+ uint8_t *ta_hdcp_start_addr;
+
+ uint32_t ta_dtm_ucode_version;
+ uint32_t ta_dtm_ucode_size;
+ uint8_t *ta_dtm_start_addr;
+
+ struct psp_asd_context asd_context;
struct psp_xgmi_context xgmi_context;
struct psp_ras_context ras;
+ struct psp_hdcp_context hdcp_context;
+ struct psp_dtm_context dtm_context;
struct mutex mutex;
+ struct psp_memory_training_context mem_train_ctx;
};
struct amdgpu_psp_funcs {
@@ -221,8 +302,6 @@ struct amdgpu_psp_funcs {
#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
#define psp_ring_stop(psp, type) (psp)->funcs->ring_stop((psp), (type))
#define psp_ring_destroy(psp, type) ((psp)->funcs->ring_destroy((psp), (type)))
-#define psp_cmd_submit(psp, cmd_mc, fence_mc, index) \
- (psp)->funcs->cmd_submit((psp), (cmd_mc), (fence_mc), (index))
#define psp_compare_sram_data(psp, ucode, type) \
(psp)->funcs->compare_sram_data((psp), (ucode), (type))
#define psp_init_microcode(psp) \
@@ -251,6 +330,12 @@ struct amdgpu_psp_funcs {
(psp)->funcs->xgmi_set_topology_info((psp), (num_device), (topology)) : -EINVAL)
#define psp_rlc_autoload(psp) \
((psp)->funcs->rlc_autoload_start ? (psp)->funcs->rlc_autoload_start((psp)) : 0)
+#define psp_mem_training_init(psp) \
+ ((psp)->funcs->mem_training_init ? (psp)->funcs->mem_training_init((psp)) : 0)
+#define psp_mem_training_fini(psp) \
+ ((psp)->funcs->mem_training_fini ? (psp)->funcs->mem_training_fini((psp)) : 0)
+#define psp_mem_training(psp, ops) \
+ ((psp)->funcs->mem_training ? (psp)->funcs->mem_training((psp), (ops)) : 0)
#define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
@@ -261,6 +346,9 @@ struct amdgpu_psp_funcs {
((psp)->funcs->ras_cure_posion ? \
(psp)->funcs->ras_cure_posion(psp, (addr)) : -EINVAL)
+#define psp_ring_get_wptr(psp) (psp)->funcs->ring_get_wptr((psp))
+#define psp_ring_set_wptr(psp, value) (psp)->funcs->ring_set_wptr((psp), (value))
+
extern const struct amd_ip_funcs psp_ip_funcs;
extern const struct amdgpu_ip_block_version psp_v3_1_ip_block;
@@ -279,10 +367,16 @@ int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
int psp_ras_enable_features(struct psp_context *psp,
union ta_ras_cmd_input *info, bool enable);
+int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
+int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
int psp_rlc_autoload_start(struct psp_context *psp);
extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
uint32_t value);
+int psp_ring_cmd_submit(struct psp_context *psp,
+ uint64_t cmd_buf_mc_addr,
+ uint64_t fence_mc_addr,
+ int index);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 016ea274b955..cef94e2169fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -25,10 +25,13 @@
#include <linux/list.h>
#include <linux/module.h>
#include <linux/uaccess.h>
+#include <linux/reboot.h>
+#include <linux/syscalls.h>
#include "amdgpu.h"
#include "amdgpu_ras.h"
#include "amdgpu_atomfirmware.h"
+#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
const char *ras_error_string[] = {
"none",
@@ -65,11 +68,16 @@ const char *ras_block_string[] = {
/* inject address is 52 bits */
#define RAS_UMC_INJECT_ADDR_LIMIT (0x1ULL << 52)
-static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev,
- uint64_t offset, uint64_t size,
- struct amdgpu_bo **bo_ptr);
-static int amdgpu_ras_release_vram(struct amdgpu_device *adev,
- struct amdgpu_bo **bo_ptr);
+enum amdgpu_ras_retire_page_reservation {
+ AMDGPU_RAS_RETIRE_PAGE_RESERVED,
+ AMDGPU_RAS_RETIRE_PAGE_PENDING,
+ AMDGPU_RAS_RETIRE_PAGE_FAULT,
+};
+
+atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0);
+
+static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
+ uint64_t addr);
static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
@@ -189,6 +197,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
return 0;
}
+
/**
* DOC: AMDGPU RAS debugfs control interface
*
@@ -208,31 +217,44 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
* As their names indicate, inject operation will write the
* value to the address.
*
- * Second member: struct ras_debug_if::op.
+ * The second member: struct ras_debug_if::op.
* It has three kinds of operations.
- * 0: disable RAS on the block. Take ::head as its data.
- * 1: enable RAS on the block. Take ::head as its data.
- * 2: inject errors on the block. Take ::inject as its data.
+ *
+ * - 0: disable RAS on the block. Take ::head as its data.
+ * - 1: enable RAS on the block. Take ::head as its data.
+ * - 2: inject errors on the block. Take ::inject as its data.
*
* How to use the interface?
- * programs:
- * copy the struct ras_debug_if in your codes and initialize it.
- * write the struct to the control node.
*
- * bash:
- * echo op block [error [sub_blcok address value]] > .../ras/ras_ctrl
- * op: disable, enable, inject
- * disable: only block is needed
- * enable: block and error are needed
- * inject: error, address, value are needed
- * block: umc, smda, gfx, .........
- * see ras_block_string[] for details
- * error: ue, ce
- * ue: multi_uncorrectable
- * ce: single_correctable
- * sub_block: sub block index, pass 0 if there is no sub block
+ * Programs
+ *
+ * Copy the struct ras_debug_if in your codes and initialize it.
+ * Write the struct to the control node.
+ *
+ * Shells
+ *
+ * .. code-block:: bash
+ *
+ * echo op block [error [sub_block address value]] > .../ras/ras_ctrl
+ *
+ * Parameters:
+ *
+ * op: disable, enable, inject
+ * disable: only block is needed
+ * enable: block and error are needed
+ * inject: error, address, value are needed
+ * block: umc, sdma, gfx, .........
+ * see ras_block_string[] for details
+ * error: ue, ce
+ * ue: multi_uncorrectable
+ * ce: single_correctable
+ * sub_block:
+ * sub block index, pass 0 if there is no sub block
+ *
+ * here are some examples for bash commands:
+ *
+ * .. code-block:: bash
*
- * here are some examples for bash commands,
* echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
* echo inject umc ce 0 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
* echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl
@@ -245,8 +267,11 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
* For inject, please check corresponding err count at
* /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
*
- * NOTE: operation is only allowed on blocks which are supported.
- * Please check ras mask at /sys/module/amdgpu/parameters/ras_mask
+ * .. note::
+ * Operations are only allowed on blocks which are supported.
+ * Please check ras mask at /sys/module/amdgpu/parameters/ras_mask
+ * to see which blocks support RAS on a particular asic.
+ *
*/
static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *buf,
size_t size, loff_t *pos)
@@ -276,13 +301,21 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
break;
}
+ /* umc ce/ue error injection for a bad page is not allowed */
+ if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) &&
+ amdgpu_ras_check_bad_page(adev, data.inject.address)) {
+ DRM_WARN("RAS WARN: 0x%llx has been marked as bad before error injection!\n",
+ data.inject.address);
+ break;
+ }
+
/* data.inject.address is offset instead of absolute gpu address */
ret = amdgpu_ras_error_inject(adev, &data.inject);
break;
default:
ret = -EINVAL;
break;
- };
+ }
if (ret)
return -EINVAL;
@@ -290,6 +323,33 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
return size;
}
+/**
+ * DOC: AMDGPU RAS debugfs EEPROM table reset interface
+ *
+ * Some boards contain an EEPROM which is used to persistently store a list of
+ * bad pages which experiences ECC errors in vram. This interface provides
+ * a way to reset the EEPROM, e.g., after testing error injection.
+ *
+ * Usage:
+ *
+ * .. code-block:: bash
+ *
+ * echo 1 > ../ras/ras_eeprom_reset
+ *
+ * will reset EEPROM table to 0 entries.
+ *
+ */
+static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+ int ret;
+
+ ret = amdgpu_ras_eeprom_reset_table(&adev->psp.ras.ras->eeprom_control);
+
+ return ret == 1 ? size : -EIO;
+}
+
static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = {
.owner = THIS_MODULE,
.read = NULL,
@@ -297,6 +357,34 @@ static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = {
.llseek = default_llseek
};
+static const struct file_operations amdgpu_ras_debugfs_eeprom_ops = {
+ .owner = THIS_MODULE,
+ .read = NULL,
+ .write = amdgpu_ras_debugfs_eeprom_write,
+ .llseek = default_llseek
+};
+
+/**
+ * DOC: AMDGPU RAS sysfs Error Count Interface
+ *
+ * It allows the user to read the error count for each IP block on the gpu through
+ * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
+ *
+ * It outputs the multiple lines which report the uncorrected (ue) and corrected
+ * (ce) error counts.
+ *
+ * The format of one line is below,
+ *
+ * [ce|ue]: count
+ *
+ * Example:
+ *
+ * .. code-block:: bash
+ *
+ * ue: 0
+ * ce: 1
+ *
+ */
static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -354,7 +442,7 @@ static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev,
}
/* return an obj equal to head, or the first when head is NULL */
-static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
+struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
struct ras_common_if *head)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@@ -475,15 +563,17 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head)))
return 0;
- ret = psp_ras_enable_features(&adev->psp, &info, enable);
- if (ret) {
- DRM_ERROR("RAS ERROR: %s %s feature failed ret %d\n",
- enable ? "enable":"disable",
- ras_block_str(head->block),
- ret);
- if (ret == TA_RAS_STATUS__RESET_NEEDED)
- return -EAGAIN;
- return -EINVAL;
+ if (!amdgpu_ras_intr_triggered()) {
+ ret = psp_ras_enable_features(&adev->psp, &info, enable);
+ if (ret) {
+ DRM_ERROR("RAS ERROR: %s %s feature failed ret %d\n",
+ enable ? "enable":"disable",
+ ras_block_str(head->block),
+ ret);
+ if (ret == TA_RAS_STATUS__RESET_NEEDED)
+ return -EAGAIN;
+ return -EINVAL;
+ }
}
/* setup the obj */
@@ -596,6 +686,7 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
{
struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
struct ras_err_data err_data = {0, 0, 0, NULL};
+ int i;
if (!obj)
return -EINVAL;
@@ -610,13 +701,24 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
if (adev->umc.funcs->query_ras_error_address)
adev->umc.funcs->query_ras_error_address(adev, &err_data);
break;
+ case AMDGPU_RAS_BLOCK__SDMA:
+ if (adev->sdma.funcs->query_ras_error_count) {
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ adev->sdma.funcs->query_ras_error_count(adev, i,
+ &err_data);
+ }
+ break;
case AMDGPU_RAS_BLOCK__GFX:
if (adev->gfx.funcs->query_ras_error_count)
adev->gfx.funcs->query_ras_error_count(adev, &err_data);
break;
case AMDGPU_RAS_BLOCK__MMHUB:
- if (adev->mmhub_funcs->query_ras_error_count)
- adev->mmhub_funcs->query_ras_error_count(adev, &err_data);
+ if (adev->mmhub.funcs->query_ras_error_count)
+ adev->mmhub.funcs->query_ras_error_count(adev, &err_data);
+ break;
+ case AMDGPU_RAS_BLOCK__PCIE_BIF:
+ if (adev->nbio.funcs->query_ras_error_count)
+ adev->nbio.funcs->query_ras_error_count(adev, &err_data);
break;
default:
break;
@@ -628,16 +730,32 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
info->ue_count = obj->err_data.ue_count;
info->ce_count = obj->err_data.ce_count;
- if (err_data.ce_count)
+ if (err_data.ce_count) {
dev_info(adev->dev, "%ld correctable errors detected in %s block\n",
obj->err_data.ce_count, ras_block_str(info->head.block));
- if (err_data.ue_count)
+ }
+ if (err_data.ue_count) {
dev_info(adev->dev, "%ld uncorrectable errors detected in %s block\n",
obj->err_data.ue_count, ras_block_str(info->head.block));
+ }
return 0;
}
+uint64_t get_xgmi_relative_phy_addr(struct amdgpu_device *adev, uint64_t addr)
+{
+ uint32_t df_inst_id;
+
+ if ((!adev->df.funcs) ||
+ (!adev->df.funcs->get_df_inst_id) ||
+ (!adev->df.funcs->get_dram_base_addr))
+ return addr;
+
+ df_inst_id = adev->df.funcs->get_df_inst_id(adev);
+
+ return addr + adev->df.funcs->get_dram_base_addr(adev, df_inst_id);
+}
+
/* wrapper of psp_ras_trigger_error */
int amdgpu_ras_error_inject(struct amdgpu_device *adev,
struct ras_inject_if *info)
@@ -655,6 +773,12 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
if (!obj)
return -EINVAL;
+ /* Calculate XGMI relative offset */
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ block_info.address = get_xgmi_relative_phy_addr(adev,
+ block_info.address);
+ }
+
switch (info->head.block) {
case AMDGPU_RAS_BLOCK__GFX:
if (adev->gfx.funcs->ras_error_inject)
@@ -664,6 +788,8 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
break;
case AMDGPU_RAS_BLOCK__UMC:
case AMDGPU_RAS_BLOCK__MMHUB:
+ case AMDGPU_RAS_BLOCK__XGMI_WAFL:
+ case AMDGPU_RAS_BLOCK__PCIE_BIF:
ret = psp_ras_trigger_error(&adev->psp, &block_info);
break;
default:
@@ -723,18 +849,18 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
{
switch (flags) {
- case 0:
+ case AMDGPU_RAS_RETIRE_PAGE_RESERVED:
return "R";
- case 1:
+ case AMDGPU_RAS_RETIRE_PAGE_PENDING:
return "P";
- case 2:
+ case AMDGPU_RAS_RETIRE_PAGE_FAULT:
default:
return "F";
};
}
-/*
- * DOC: ras sysfs gpu_vram_bad_pages interface
+/**
+ * DOC: AMDGPU RAS sysfs gpu_vram_bad_pages Interface
*
* It allows user to read the bad pages of vram on the gpu through
* /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages
@@ -746,14 +872,21 @@ static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
*
* gpu pfn and gpu page size are printed in hex format.
* flags can be one of below character,
+ *
* R: reserved, this gpu page is reserved and not able to use.
+ *
* P: pending for reserve, this gpu page is marked as bad, will be reserved
- * in next window of page_reserve.
+ * in next window of page_reserve.
+ *
* F: unable to reserve. this gpu page can't be reserved due to some reasons.
*
- * examples:
- * 0x00000001 : 0x00001000 : R
- * 0x00000002 : 0x00001000 : P
+ * Examples:
+ *
+ * .. code-block:: bash
+ *
+ * 0x00000001 : 0x00001000 : R
+ * 0x00000002 : 0x00001000 : P
+ *
*/
static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
@@ -927,6 +1060,24 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
}
/* sysfs end */
+/**
+ * DOC: AMDGPU RAS Reboot Behavior for Unrecoverable Errors
+ *
+ * Normally when there is an uncorrectable error, the driver will reset
+ * the GPU to recover. However, in the event of an unrecoverable error,
+ * the driver provides an interface to reboot the system automatically
+ * in that event.
+ *
+ * The following file in debugfs provides that interface:
+ * /sys/kernel/debug/dri/[0/1/2...]/ras/auto_reboot
+ *
+ * Usage:
+ *
+ * .. code-block:: bash
+ *
+ * echo true > .../ras/auto_reboot
+ *
+ */
/* debugfs begin */
static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
{
@@ -934,8 +1085,21 @@ static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
struct drm_minor *minor = adev->ddev->primary;
con->dir = debugfs_create_dir("ras", minor->debugfs_root);
- con->ent = debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, con->dir,
- adev, &amdgpu_ras_debugfs_ctrl_ops);
+ debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, con->dir,
+ adev, &amdgpu_ras_debugfs_ctrl_ops);
+ debugfs_create_file("ras_eeprom_reset", S_IWUGO | S_IRUGO, con->dir,
+ adev, &amdgpu_ras_debugfs_eeprom_ops);
+
+ /*
+ * After one uncorrectable error happens, usually GPU recovery will
+ * be scheduled. But due to the known problem in GPU recovery failing
+ * to bring GPU back, below interface provides one direct way to
+ * user to reboot system automatically in such case within
+ * ERREVENT_ATHUB_INTERRUPT generated. Normal GPU recovery routine
+ * will never be called.
+ */
+ debugfs_create_bool("auto_reboot", S_IWUGO | S_IRUGO, con->dir,
+ &con->reboot);
}
void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
@@ -980,10 +1144,8 @@ static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev)
amdgpu_ras_debugfs_remove(adev, &obj->head);
}
- debugfs_remove(con->ent);
- debugfs_remove(con->dir);
+ debugfs_remove_recursive(con->dir);
con->dir = NULL;
- con->ent = NULL;
}
/* debugfs end */
@@ -1177,6 +1339,7 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
data = con->eh_data;
if (!data || data->count == 0) {
*bps = NULL;
+ ret = -EINVAL;
goto out;
}
@@ -1188,15 +1351,15 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
for (; i < data->count; i++) {
(*bps)[i] = (struct ras_badpage){
- .bp = data->bps[i].bp,
+ .bp = data->bps[i].retired_page,
.size = AMDGPU_GPU_PAGE_SIZE,
- .flags = 0,
+ .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED,
};
if (data->last_reserved <= i)
- (*bps)[i].flags = 1;
- else if (data->bps[i].bo == NULL)
- (*bps)[i].flags = 2;
+ (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
+ else if (data->bps_bo[i] == NULL)
+ (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;
}
*count = data->count;
@@ -1210,109 +1373,51 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
struct amdgpu_ras *ras =
container_of(work, struct amdgpu_ras, recovery_work);
- amdgpu_device_gpu_recover(ras->adev, 0);
+ if (amdgpu_device_should_recover_gpu(ras->adev))
+ amdgpu_device_gpu_recover(ras->adev, 0);
atomic_set(&ras->in_recovery, 0);
}
-static int amdgpu_ras_release_vram(struct amdgpu_device *adev,
- struct amdgpu_bo **bo_ptr)
-{
- /* no need to free it actually. */
- amdgpu_bo_free_kernel(bo_ptr, NULL, NULL);
- return 0;
-}
-
-/* reserve vram with size@offset */
-static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev,
- uint64_t offset, uint64_t size,
- struct amdgpu_bo **bo_ptr)
-{
- struct ttm_operation_ctx ctx = { false, false };
- struct amdgpu_bo_param bp;
- int r = 0;
- int i;
- struct amdgpu_bo *bo;
-
- if (bo_ptr)
- *bo_ptr = NULL;
- memset(&bp, 0, sizeof(bp));
- bp.size = size;
- bp.byte_align = PAGE_SIZE;
- bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
- bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
- AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
- bp.type = ttm_bo_type_kernel;
- bp.resv = NULL;
-
- r = amdgpu_bo_create(adev, &bp, &bo);
- if (r)
- return -EINVAL;
-
- r = amdgpu_bo_reserve(bo, false);
- if (r)
- goto error_reserve;
-
- offset = ALIGN(offset, PAGE_SIZE);
- for (i = 0; i < bo->placement.num_placement; ++i) {
- bo->placements[i].fpfn = offset >> PAGE_SHIFT;
- bo->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
- }
-
- ttm_bo_mem_put(&bo->tbo, &bo->tbo.mem);
- r = ttm_bo_mem_space(&bo->tbo, &bo->placement, &bo->tbo.mem, &ctx);
- if (r)
- goto error_pin;
-
- r = amdgpu_bo_pin_restricted(bo,
- AMDGPU_GEM_DOMAIN_VRAM,
- offset,
- offset + size);
- if (r)
- goto error_pin;
-
- if (bo_ptr)
- *bo_ptr = bo;
-
- amdgpu_bo_unreserve(bo);
- return r;
-
-error_pin:
- amdgpu_bo_unreserve(bo);
-error_reserve:
- amdgpu_bo_unref(&bo);
- return r;
-}
-
/* alloc/realloc bps array */
static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
struct ras_err_handler_data *data, int pages)
{
unsigned int old_space = data->count + data->space_left;
unsigned int new_space = old_space + pages;
- unsigned int align_space = ALIGN(new_space, 1024);
- void *tmp = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL);
-
- if (!tmp)
+ unsigned int align_space = ALIGN(new_space, 512);
+ void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL);
+ struct amdgpu_bo **bps_bo =
+ kmalloc(align_space * sizeof(*data->bps_bo), GFP_KERNEL);
+
+ if (!bps || !bps_bo) {
+ kfree(bps);
+ kfree(bps_bo);
return -ENOMEM;
+ }
if (data->bps) {
- memcpy(tmp, data->bps,
+ memcpy(bps, data->bps,
data->count * sizeof(*data->bps));
kfree(data->bps);
}
+ if (data->bps_bo) {
+ memcpy(bps_bo, data->bps_bo,
+ data->count * sizeof(*data->bps_bo));
+ kfree(data->bps_bo);
+ }
- data->bps = tmp;
+ data->bps = bps;
+ data->bps_bo = bps_bo;
data->space_left += align_space - old_space;
return 0;
}
/* it deal with vram only. */
int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
- unsigned long *bps, int pages)
+ struct eeprom_table_record *bps, int pages)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data;
- int i = pages;
int ret = 0;
if (!con || !con->eh_data || !bps || pages <= 0)
@@ -1329,24 +1434,120 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
goto out;
}
- while (i--)
- data->bps[data->count++].bp = bps[i];
-
+ memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps));
+ data->count += pages;
data->space_left -= pages;
+
out:
mutex_unlock(&con->recovery_lock);
return ret;
}
+/*
+ * write error record array to eeprom, the function should be
+ * protected by recovery_lock
+ */
+static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_err_handler_data *data;
+ struct amdgpu_ras_eeprom_control *control;
+ int save_count;
+
+ if (!con || !con->eh_data)
+ return 0;
+
+ control = &con->eeprom_control;
+ data = con->eh_data;
+ save_count = data->count - control->num_recs;
+ /* only new entries are saved */
+ if (save_count > 0)
+ if (amdgpu_ras_eeprom_process_recods(control,
+ &data->bps[control->num_recs],
+ true,
+ save_count)) {
+ DRM_ERROR("Failed to save EEPROM table data!");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/*
+ * read error record array in eeprom and reserve enough space for
+ * storing new bad pages
+ */
+static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras_eeprom_control *control =
+ &adev->psp.ras.ras->eeprom_control;
+ struct eeprom_table_record *bps = NULL;
+ int ret = 0;
+
+ /* no bad page record, skip eeprom access */
+ if (!control->num_recs)
+ return ret;
+
+ bps = kcalloc(control->num_recs, sizeof(*bps), GFP_KERNEL);
+ if (!bps)
+ return -ENOMEM;
+
+ if (amdgpu_ras_eeprom_process_recods(control, bps, false,
+ control->num_recs)) {
+ DRM_ERROR("Failed to load EEPROM table records!");
+ ret = -EIO;
+ goto out;
+ }
+
+ ret = amdgpu_ras_add_bad_pages(adev, bps, control->num_recs);
+
+out:
+ kfree(bps);
+ return ret;
+}
+
+/*
+ * check if an address belongs to bad page
+ *
+ * Note: this check is only for umc block
+ */
+static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
+ uint64_t addr)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_err_handler_data *data;
+ int i;
+ bool ret = false;
+
+ if (!con || !con->eh_data)
+ return ret;
+
+ mutex_lock(&con->recovery_lock);
+ data = con->eh_data;
+ if (!data)
+ goto out;
+
+ addr >>= AMDGPU_GPU_PAGE_SHIFT;
+ for (i = 0; i < data->count; i++)
+ if (addr == data->bps[i].retired_page) {
+ ret = true;
+ goto out;
+ }
+
+out:
+ mutex_unlock(&con->recovery_lock);
+ return ret;
+}
+
/* called in gpu recovery/init */
int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data;
uint64_t bp;
- struct amdgpu_bo *bo;
- int i;
+ struct amdgpu_bo *bo = NULL;
+ int i, ret = 0;
if (!con || !con->eh_data)
return 0;
@@ -1357,18 +1558,29 @@ int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev)
goto out;
/* reserve vram at driver post stage. */
for (i = data->last_reserved; i < data->count; i++) {
- bp = data->bps[i].bp;
+ bp = data->bps[i].retired_page;
- if (amdgpu_ras_reserve_vram(adev, bp << PAGE_SHIFT,
- PAGE_SIZE, &bo))
- DRM_ERROR("RAS ERROR: reserve vram %llx fail\n", bp);
+ /* There are two cases of reserve error should be ignored:
+ * 1) a ras bad page has been allocated (used by someone);
+ * 2) a ras bad page has been reserved (duplicate error injection
+ * for one page);
+ */
+ if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
+ AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &bo, NULL))
+ DRM_WARN("RAS WARN: reserve vram for retired page %llx fail\n", bp);
- data->bps[i].bo = bo;
+ data->bps_bo[i] = bo;
data->last_reserved = i + 1;
+ bo = NULL;
}
+
+ /* continue to save bad pages to eeprom even reesrve_vram fails */
+ ret = amdgpu_ras_save_bad_pages(adev);
out:
mutex_unlock(&con->recovery_lock);
- return 0;
+ return ret;
}
/* called when driver unload */
@@ -1388,11 +1600,11 @@ static int amdgpu_ras_release_bad_pages(struct amdgpu_device *adev)
goto out;
for (i = data->last_reserved - 1; i >= 0; i--) {
- bo = data->bps[i].bo;
+ bo = data->bps_bo[i];
- amdgpu_ras_release_vram(adev, &bo);
+ amdgpu_bo_free_kernel(&bo, NULL, NULL);
- data->bps[i].bo = bo;
+ data->bps_bo[i] = bo;
data->last_reserved = i;
}
out:
@@ -1400,41 +1612,54 @@ out:
return 0;
}
-static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev)
-{
- /* TODO
- * write the array to eeprom when SMU disabled.
- */
- return 0;
-}
-
-static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
-{
- /* TODO
- * read the array to eeprom when SMU disabled.
- */
- return 0;
-}
-
-static int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
+int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- struct ras_err_handler_data **data = &con->eh_data;
+ struct ras_err_handler_data **data;
+ int ret;
- *data = kmalloc(sizeof(**data),
- GFP_KERNEL|__GFP_ZERO);
- if (!*data)
- return -ENOMEM;
+ if (con)
+ data = &con->eh_data;
+ else
+ return 0;
+
+ *data = kmalloc(sizeof(**data), GFP_KERNEL | __GFP_ZERO);
+ if (!*data) {
+ ret = -ENOMEM;
+ goto out;
+ }
mutex_init(&con->recovery_lock);
INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery);
atomic_set(&con->in_recovery, 0);
con->adev = adev;
- amdgpu_ras_load_bad_pages(adev);
- amdgpu_ras_reserve_bad_pages(adev);
+ ret = amdgpu_ras_eeprom_init(&con->eeprom_control);
+ if (ret)
+ goto free;
+
+ if (con->eeprom_control.num_recs) {
+ ret = amdgpu_ras_load_bad_pages(adev);
+ if (ret)
+ goto free;
+ ret = amdgpu_ras_reserve_bad_pages(adev);
+ if (ret)
+ goto release;
+ }
return 0;
+
+release:
+ amdgpu_ras_release_bad_pages(adev);
+free:
+ kfree((*data)->bps);
+ kfree((*data)->bps_bo);
+ kfree(*data);
+ con->eh_data = NULL;
+out:
+ DRM_WARN("Failed to initialize ras recovery!\n");
+
+ return ret;
}
static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
@@ -1442,13 +1667,17 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data = con->eh_data;
+ /* recovery_init failed to init it, fini is useless */
+ if (!data)
+ return 0;
+
cancel_work_sync(&con->recovery_work);
- amdgpu_ras_save_bad_pages(adev);
amdgpu_ras_release_bad_pages(adev);
mutex_lock(&con->recovery_lock);
con->eh_data = NULL;
kfree(data->bps);
+ kfree(data->bps_bo);
kfree(data);
mutex_unlock(&con->recovery_lock);
@@ -1485,7 +1714,8 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
*supported = 0;
if (amdgpu_sriov_vf(adev) ||
- adev->asic_type != CHIP_VEGA20)
+ (adev->asic_type != CHIP_VEGA20 &&
+ adev->asic_type != CHIP_ARCTURUS))
return;
if (adev->is_atom_fw &&
@@ -1500,6 +1730,7 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
int amdgpu_ras_init(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ int r;
if (con)
return 0;
@@ -1527,31 +1758,106 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
/* Might need get this flag from vbios. */
con->flags = RAS_DEFAULT_FLAGS;
- if (amdgpu_ras_recovery_init(adev))
- goto recovery_out;
+ if (adev->nbio.funcs->init_ras_controller_interrupt) {
+ r = adev->nbio.funcs->init_ras_controller_interrupt(adev);
+ if (r)
+ return r;
+ }
+
+ if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) {
+ r = adev->nbio.funcs->init_ras_err_event_athub_interrupt(adev);
+ if (r)
+ return r;
+ }
amdgpu_ras_mask &= AMDGPU_RAS_BLOCK_MASK;
if (amdgpu_ras_fs_init(adev))
goto fs_out;
- /* ras init for each ras block */
- if (adev->umc.funcs->ras_init)
- adev->umc.funcs->ras_init(adev);
-
DRM_INFO("RAS INFO: ras initialized successfully, "
"hardware ability[%x] ras_mask[%x]\n",
con->hw_supported, con->supported);
return 0;
fs_out:
- amdgpu_ras_recovery_fini(adev);
-recovery_out:
amdgpu_ras_set_context(adev, NULL);
kfree(con);
return -EINVAL;
}
+/* helper function to handle common stuff in ip late init phase */
+int amdgpu_ras_late_init(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block,
+ struct ras_fs_if *fs_info,
+ struct ras_ih_if *ih_info)
+{
+ int r;
+
+ /* disable RAS feature per IP block if it is not supported */
+ if (!amdgpu_ras_is_supported(adev, ras_block->block)) {
+ amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
+ return 0;
+ }
+
+ r = amdgpu_ras_feature_enable_on_boot(adev, ras_block, 1);
+ if (r) {
+ if (r == -EAGAIN) {
+ /* request gpu reset. will run again */
+ amdgpu_ras_request_reset_on_boot(adev,
+ ras_block->block);
+ return 0;
+ } else if (adev->in_suspend || adev->in_gpu_reset) {
+ /* in resume phase, if fail to enable ras,
+ * clean up all ras fs nodes, and disable ras */
+ goto cleanup;
+ } else
+ return r;
+ }
+
+ /* in resume phase, no need to create ras fs node */
+ if (adev->in_suspend || adev->in_gpu_reset)
+ return 0;
+
+ if (ih_info->cb) {
+ r = amdgpu_ras_interrupt_add_handler(adev, ih_info);
+ if (r)
+ goto interrupt;
+ }
+
+ amdgpu_ras_debugfs_create(adev, fs_info);
+
+ r = amdgpu_ras_sysfs_create(adev, fs_info);
+ if (r)
+ goto sysfs;
+
+ return 0;
+cleanup:
+ amdgpu_ras_sysfs_remove(adev, ras_block);
+sysfs:
+ amdgpu_ras_debugfs_remove(adev, ras_block);
+ if (ih_info->cb)
+ amdgpu_ras_interrupt_remove_handler(adev, ih_info);
+interrupt:
+ amdgpu_ras_feature_enable(adev, ras_block, 0);
+ return r;
+}
+
+/* helper function to remove ras fs node and interrupt handler */
+void amdgpu_ras_late_fini(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block,
+ struct ras_ih_if *ih_info)
+{
+ if (!ras_block || !ih_info)
+ return;
+
+ amdgpu_ras_sysfs_remove(adev, ras_block);
+ amdgpu_ras_debugfs_remove(adev, ras_block);
+ if (ih_info->cb)
+ amdgpu_ras_interrupt_remove_handler(adev, ih_info);
+ amdgpu_ras_feature_enable(adev, ras_block, 0);
+}
+
/* do some init work after IP late init as dependence.
* and it runs in resume/gpu reset/booting up cases.
*/
@@ -1594,7 +1900,7 @@ void amdgpu_ras_resume(struct amdgpu_device *adev)
* See feature_enable_on_boot
*/
amdgpu_ras_disable_all_features(adev, 1);
- amdgpu_ras_reset_gpu(adev, 0);
+ amdgpu_ras_reset_gpu(adev);
}
}
@@ -1645,3 +1951,18 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
return 0;
}
+
+void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
+{
+ uint32_t hw_supported, supported;
+
+ amdgpu_ras_check_supported(adev, &hw_supported, &supported);
+ if (!hw_supported)
+ return;
+
+ if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {
+ DRM_WARN("RAS event of type ERREVENT_ATHUB_INTERRUPT detected!\n");
+
+ amdgpu_ras_reset_gpu(adev);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 6c76bb2a6843..a5fe29a9373e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -317,8 +317,6 @@ struct amdgpu_ras {
struct list_head head;
/* debugfs */
struct dentry *dir;
- /* debugfs ctrl */
- struct dentry *ent;
/* sysfs */
struct device_attribute features_attr;
struct bin_attribute badpages_attr;
@@ -334,7 +332,7 @@ struct amdgpu_ras {
struct mutex recovery_lock;
uint32_t flags;
-
+ bool reboot;
struct amdgpu_ras_eeprom_control eeprom_control;
};
@@ -347,15 +345,14 @@ struct ras_err_data {
unsigned long ue_count;
unsigned long ce_count;
unsigned long err_addr_cnt;
- uint64_t *err_addr;
+ struct eeprom_table_record *err_addr;
};
struct ras_err_handler_data {
- /* point to bad pages array */
- struct {
- unsigned long bp;
- struct amdgpu_bo *bo;
- } *bps;
+ /* point to bad page records array */
+ struct eeprom_table_record *bps;
+ /* point to reserved bo array */
+ struct amdgpu_bo **bps_bo;
/* the count of entries */
int count;
/* the space can place new entries */
@@ -365,7 +362,7 @@ struct ras_err_handler_data {
};
typedef int (*ras_ih_cb)(struct amdgpu_device *adev,
- struct ras_err_data *err_data,
+ void *err_data,
struct amdgpu_iv_entry *entry);
struct ras_ih_data {
@@ -481,6 +478,7 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
return ras && (ras->supported & (1 << block));
}
+int amdgpu_ras_recovery_init(struct amdgpu_device *adev);
int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
unsigned int block);
@@ -492,15 +490,20 @@ unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
/* error handling functions */
int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
- unsigned long *bps, int pages);
+ struct eeprom_table_record *bps, int pages);
int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev);
-static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev,
- bool is_baco)
+static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
{
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ /* save bad page to eeprom before gpu reset,
+ * i2c may be unstable in gpu reset
+ */
+ if (in_task())
+ amdgpu_ras_reserve_bad_pages(adev);
+
if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
schedule_work(&ras->recovery_work);
return 0;
@@ -566,6 +569,13 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) {
int amdgpu_ras_init(struct amdgpu_device *adev);
int amdgpu_ras_fini(struct amdgpu_device *adev);
int amdgpu_ras_pre_fini(struct amdgpu_device *adev);
+int amdgpu_ras_late_init(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block,
+ struct ras_fs_if *fs_info,
+ struct ras_ih_if *ih_info);
+void amdgpu_ras_late_fini(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block,
+ struct ras_ih_if *ih_info);
int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
struct ras_common_if *head, bool enable);
@@ -599,4 +609,22 @@ int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev,
int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
struct ras_dispatch_if *info);
+
+struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
+ struct ras_common_if *head);
+
+extern atomic_t amdgpu_ras_in_intr;
+
+static inline bool amdgpu_ras_intr_triggered(void)
+{
+ return !!atomic_read(&amdgpu_ras_in_intr);
+}
+
+static inline void amdgpu_ras_intr_cleared(void)
+{
+ atomic_set(&amdgpu_ras_in_intr, 0);
+}
+
+void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 8a32b5c93778..2a8e04895595 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -27,7 +27,8 @@
#include <linux/bits.h>
#include "smu_v11_0_i2c.h"
-#define EEPROM_I2C_TARGET_ADDR 0xA0
+#define EEPROM_I2C_TARGET_ADDR_ARCTURUS 0xA8
+#define EEPROM_I2C_TARGET_ADDR_VEGA20 0xA0
/*
* The 2 macros bellow represent the actual size in bytes that
@@ -83,7 +84,7 @@ static int __update_table_header(struct amdgpu_ras_eeprom_control *control,
{
int ret = 0;
struct i2c_msg msg = {
- .addr = EEPROM_I2C_TARGET_ADDR,
+ .addr = 0,
.flags = 0,
.len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE,
.buf = buff,
@@ -93,6 +94,8 @@ static int __update_table_header(struct amdgpu_ras_eeprom_control *control,
*(uint16_t *)buff = EEPROM_HDR_START;
__encode_table_header_to_buff(&control->tbl_hdr, buff + EEPROM_ADDRESS_SIZE);
+ msg.addr = control->i2c_address;
+
ret = i2c_transfer(&control->eeprom_accessor, &msg, 1);
if (ret < 1)
DRM_ERROR("Failed to write EEPROM table header, ret:%d", ret);
@@ -100,7 +103,101 @@ static int __update_table_header(struct amdgpu_ras_eeprom_control *control,
return ret;
}
-static uint32_t __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control);
+
+
+static uint32_t __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control)
+{
+ int i;
+ uint32_t tbl_sum = 0;
+
+ /* Header checksum, skip checksum field in the calculation */
+ for (i = 0; i < sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum); i++)
+ tbl_sum += *(((unsigned char *)&control->tbl_hdr) + i);
+
+ return tbl_sum;
+}
+
+static uint32_t __calc_recs_byte_sum(struct eeprom_table_record *records,
+ int num)
+{
+ int i, j;
+ uint32_t tbl_sum = 0;
+
+ /* Records checksum */
+ for (i = 0; i < num; i++) {
+ struct eeprom_table_record *record = &records[i];
+
+ for (j = 0; j < sizeof(*record); j++) {
+ tbl_sum += *(((unsigned char *)record) + j);
+ }
+ }
+
+ return tbl_sum;
+}
+
+static inline uint32_t __calc_tbl_byte_sum(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *records, int num)
+{
+ return __calc_hdr_byte_sum(control) + __calc_recs_byte_sum(records, num);
+}
+
+/* Checksum = 256 -((sum of all table entries) mod 256) */
+static void __update_tbl_checksum(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *records, int num,
+ uint32_t old_hdr_byte_sum)
+{
+ /*
+ * This will update the table sum with new records.
+ *
+ * TODO: What happens when the EEPROM table is to be wrapped around
+ * and old records from start will get overridden.
+ */
+
+ /* need to recalculate updated header byte sum */
+ control->tbl_byte_sum -= old_hdr_byte_sum;
+ control->tbl_byte_sum += __calc_tbl_byte_sum(control, records, num);
+
+ control->tbl_hdr.checksum = 256 - (control->tbl_byte_sum % 256);
+}
+
+/* table sum mod 256 + checksum must equals 256 */
+static bool __validate_tbl_checksum(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *records, int num)
+{
+ control->tbl_byte_sum = __calc_tbl_byte_sum(control, records, num);
+
+ if (control->tbl_hdr.checksum + (control->tbl_byte_sum % 256) != 256) {
+ DRM_WARN("Checksum mismatch, checksum: %u ", control->tbl_hdr.checksum);
+ return false;
+ }
+
+ return true;
+}
+
+int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
+{
+ unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 };
+ struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+ int ret = 0;
+
+ mutex_lock(&control->tbl_mutex);
+
+ hdr->header = EEPROM_TABLE_HDR_VAL;
+ hdr->version = EEPROM_TABLE_VER;
+ hdr->first_rec_offset = EEPROM_RECORD_START;
+ hdr->tbl_size = EEPROM_TABLE_HEADER_SIZE;
+
+ control->tbl_byte_sum = 0;
+ __update_tbl_checksum(control, NULL, 0, 0);
+ control->next_addr = EEPROM_RECORD_START;
+
+ ret = __update_table_header(control, buff);
+
+ mutex_unlock(&control->tbl_mutex);
+
+ return ret;
+
+}
int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
{
@@ -109,7 +206,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 };
struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
struct i2c_msg msg = {
- .addr = EEPROM_I2C_TARGET_ADDR,
+ .addr = 0,
.flags = I2C_M_RD,
.len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE,
.buf = buff,
@@ -119,9 +216,15 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
switch (adev->asic_type) {
case CHIP_VEGA20:
+ control->i2c_address = EEPROM_I2C_TARGET_ADDR_VEGA20;
ret = smu_v11_0_i2c_eeprom_control_init(&control->eeprom_accessor);
break;
+ case CHIP_ARCTURUS:
+ control->i2c_address = EEPROM_I2C_TARGET_ADDR_ARCTURUS;
+ ret = smu_i2c_eeprom_init(&adev->smu, &control->eeprom_accessor);
+ break;
+
default:
return 0;
}
@@ -131,6 +234,8 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
return ret;
}
+ msg.addr = control->i2c_address;
+
/* Read/Create table header from EEPROM address 0 */
ret = i2c_transfer(&control->eeprom_accessor, &msg, 1);
if (ret < 1) {
@@ -143,25 +248,18 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
if (hdr->header == EEPROM_TABLE_HDR_VAL) {
control->num_recs = (hdr->tbl_size - EEPROM_TABLE_HEADER_SIZE) /
EEPROM_TABLE_RECORD_SIZE;
+ control->tbl_byte_sum = __calc_hdr_byte_sum(control);
+ control->next_addr = EEPROM_RECORD_START;
+
DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records",
control->num_recs);
} else {
DRM_INFO("Creating new EEPROM table");
- hdr->header = EEPROM_TABLE_HDR_VAL;
- hdr->version = EEPROM_TABLE_VER;
- hdr->first_rec_offset = EEPROM_RECORD_START;
- hdr->tbl_size = EEPROM_TABLE_HEADER_SIZE;
-
- adev->psp.ras.ras->eeprom_control.tbl_byte_sum =
- __calc_hdr_byte_sum(&adev->psp.ras.ras->eeprom_control);
- ret = __update_table_header(control, buff);
+ ret = amdgpu_ras_eeprom_reset_table(control);
}
- /* Start inserting records from here */
- adev->psp.ras.ras->eeprom_control.next_addr = EEPROM_RECORD_START;
-
return ret == 1 ? 0 : -EIO;
}
@@ -173,6 +271,9 @@ void amdgpu_ras_eeprom_fini(struct amdgpu_ras_eeprom_control *control)
case CHIP_VEGA20:
smu_v11_0_i2c_eeprom_control_fini(&control->eeprom_accessor);
break;
+ case CHIP_ARCTURUS:
+ smu_i2c_eeprom_fini(&adev->smu, &control->eeprom_accessor);
+ break;
default:
return;
@@ -226,8 +327,8 @@ static void __decode_table_record_from_buff(struct amdgpu_ras_eeprom_control *co
record->offset = (le64_to_cpu(tmp) & 0xffffffffffff);
i += 6;
- buff[i++] = record->mem_channel;
- buff[i++] = record->mcumc_id;
+ record->mem_channel = buff[i++];
+ record->mcumc_id = buff[i++];
memcpy(&tmp, buff + i, 6);
record->retired_page = (le64_to_cpu(tmp) & 0xffffffffffff);
@@ -266,87 +367,18 @@ static uint32_t __correct_eeprom_dest_address(uint32_t curr_address)
return curr_address;
}
-
-static uint32_t __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control)
-{
- int i;
- uint32_t tbl_sum = 0;
-
- /* Header checksum, skip checksum field in the calculation */
- for (i = 0; i < sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum); i++)
- tbl_sum += *(((unsigned char *)&control->tbl_hdr) + i);
-
- return tbl_sum;
-}
-
-static uint32_t __calc_recs_byte_sum(struct eeprom_table_record *records,
- int num)
-{
- int i, j;
- uint32_t tbl_sum = 0;
-
- /* Records checksum */
- for (i = 0; i < num; i++) {
- struct eeprom_table_record *record = &records[i];
-
- for (j = 0; j < sizeof(*record); j++) {
- tbl_sum += *(((unsigned char *)record) + j);
- }
- }
-
- return tbl_sum;
-}
-
-static inline uint32_t __calc_tbl_byte_sum(struct amdgpu_ras_eeprom_control *control,
- struct eeprom_table_record *records, int num)
-{
- return __calc_hdr_byte_sum(control) + __calc_recs_byte_sum(records, num);
-}
-
-/* Checksum = 256 -((sum of all table entries) mod 256) */
-static void __update_tbl_checksum(struct amdgpu_ras_eeprom_control *control,
- struct eeprom_table_record *records, int num,
- uint32_t old_hdr_byte_sum)
-{
- /*
- * This will update the table sum with new records.
- *
- * TODO: What happens when the EEPROM table is to be wrapped around
- * and old records from start will get overridden.
- */
-
- /* need to recalculate updated header byte sum */
- control->tbl_byte_sum -= old_hdr_byte_sum;
- control->tbl_byte_sum += __calc_tbl_byte_sum(control, records, num);
-
- control->tbl_hdr.checksum = 256 - (control->tbl_byte_sum % 256);
-}
-
-/* table sum mod 256 + checksum must equals 256 */
-static bool __validate_tbl_checksum(struct amdgpu_ras_eeprom_control *control,
- struct eeprom_table_record *records, int num)
-{
- control->tbl_byte_sum = __calc_tbl_byte_sum(control, records, num);
-
- if (control->tbl_hdr.checksum + (control->tbl_byte_sum % 256) != 256) {
- DRM_WARN("Checksum mismatch, checksum: %u ", control->tbl_hdr.checksum);
- return false;
- }
-
- return true;
-}
-
int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
struct eeprom_table_record *records,
bool write,
int num)
{
int i, ret = 0;
- struct i2c_msg *msgs;
- unsigned char *buffs;
+ struct i2c_msg *msgs, *msg;
+ unsigned char *buffs, *buff;
+ struct eeprom_table_record *record;
struct amdgpu_device *adev = to_amdgpu_device(control);
- if (adev->asic_type != CHIP_VEGA20)
+ if (adev->asic_type != CHIP_VEGA20 && adev->asic_type != CHIP_ARCTURUS)
return 0;
buffs = kcalloc(num, EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE,
@@ -373,9 +405,9 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
* 256b
*/
for (i = 0; i < num; i++) {
- unsigned char *buff = &buffs[i * (EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)];
- struct eeprom_table_record *record = &records[i];
- struct i2c_msg *msg = &msgs[i];
+ buff = &buffs[i * (EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)];
+ record = &records[i];
+ msg = &msgs[i];
control->next_addr = __correct_eeprom_dest_address(control->next_addr);
@@ -383,8 +415,8 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
* Update bits 16,17 of EEPROM address in I2C address by setting them
* to bits 1,2 of Device address byte
*/
- msg->addr = EEPROM_I2C_TARGET_ADDR |
- ((control->next_addr & EEPROM_ADDR_MSB_MASK) >> 15);
+ msg->addr = control->i2c_address |
+ ((control->next_addr & EEPROM_ADDR_MSB_MASK) >> 15);
msg->flags = write ? 0 : I2C_M_RD;
msg->len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE;
msg->buf = buff;
@@ -415,8 +447,8 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
if (!write) {
for (i = 0; i < num; i++) {
- unsigned char *buff = &buffs[i*(EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)];
- struct eeprom_table_record *record = &records[i];
+ buff = &buffs[i*(EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)];
+ record = &records[i];
__decode_table_record_from_buff(control, record, buff + EEPROM_ADDRESS_SIZE);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
index 41f3fcb9a29b..ca78f812d436 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
@@ -50,6 +50,7 @@ struct amdgpu_ras_eeprom_control {
struct mutex tbl_mutex;
bool bus_locked;
uint32_t tbl_byte_sum;
+ uint16_t i2c_address; // 8-bit represented address
};
/*
@@ -79,6 +80,7 @@ struct eeprom_table_record {
int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control);
void amdgpu_ras_eeprom_fini(struct amdgpu_ras_eeprom_control *control);
+int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control);
int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
struct eeprom_table_record *records,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
index c8793e6cc3c5..6373bfb47d55 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
@@ -124,13 +124,12 @@ int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws)
*/
int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
{
- volatile u32 *dst_ptr;
u32 dws;
int r;
/* allocate clear state block */
adev->gfx.rlc.clear_state_size = dws = adev->gfx.rlc.funcs->get_csb_size(adev);
- r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
+ r = amdgpu_bo_create_kernel(adev, dws * 4, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
@@ -141,13 +140,6 @@ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
return r;
}
- /* set up the cs buffer */
- dst_ptr = adev->gfx.rlc.cs_ptr;
- adev->gfx.rlc.funcs->get_csb_buffer(adev, dst_ptr);
- amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
- amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
- amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 5c13c503e61f..a2ee30b16212 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -23,6 +23,7 @@
#include "amdgpu.h"
#include "amdgpu_sdma.h"
+#include "amdgpu_ras.h"
#define AMDGPU_CSA_SDMA_SIZE 64
/* SDMA CSA reside in the 3rd page of CSA */
@@ -83,3 +84,101 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring,
return csa_mc_addr;
}
+
+int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev,
+ void *ras_ih_info)
+{
+ int r, i;
+ struct ras_ih_if *ih_info = (struct ras_ih_if *)ras_ih_info;
+ struct ras_fs_if fs_info = {
+ .sysfs_name = "sdma_err_count",
+ .debugfs_name = "sdma_err_inject",
+ };
+
+ if (!ih_info)
+ return -EINVAL;
+
+ if (!adev->sdma.ras_if) {
+ adev->sdma.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+ if (!adev->sdma.ras_if)
+ return -ENOMEM;
+ adev->sdma.ras_if->block = AMDGPU_RAS_BLOCK__SDMA;
+ adev->sdma.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->sdma.ras_if->sub_block_index = 0;
+ strcpy(adev->sdma.ras_if->name, "sdma");
+ }
+ fs_info.head = ih_info->head = *adev->sdma.ras_if;
+
+ r = amdgpu_ras_late_init(adev, adev->sdma.ras_if,
+ &fs_info, ih_info);
+ if (r)
+ goto free;
+
+ if (amdgpu_ras_is_supported(adev, adev->sdma.ras_if->block)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq,
+ AMDGPU_SDMA_IRQ_INSTANCE0 + i);
+ if (r)
+ goto late_fini;
+ }
+ } else {
+ r = 0;
+ goto free;
+ }
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_late_fini(adev, adev->sdma.ras_if, ih_info);
+free:
+ kfree(adev->sdma.ras_if);
+ adev->sdma.ras_if = NULL;
+ return r;
+}
+
+void amdgpu_sdma_ras_fini(struct amdgpu_device *adev)
+{
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) &&
+ adev->sdma.ras_if) {
+ struct ras_common_if *ras_if = adev->sdma.ras_if;
+ struct ras_ih_if ih_info = {
+ .head = *ras_if,
+ /* the cb member will not be used by
+ * amdgpu_ras_interrupt_remove_handler, init it only
+ * to cheat the check in ras_late_fini
+ */
+ .cb = amdgpu_sdma_process_ras_data_cb,
+ };
+
+ amdgpu_ras_late_fini(adev, ras_if, &ih_info);
+ kfree(ras_if);
+ }
+}
+
+int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev,
+ void *err_data,
+ struct amdgpu_iv_entry *entry)
+{
+ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+ amdgpu_ras_reset_gpu(adev);
+
+ return AMDGPU_RAS_SUCCESS;
+}
+
+int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ struct ras_common_if *ras_if = adev->sdma.ras_if;
+ struct ras_dispatch_if ih_data = {
+ .entry = entry,
+ };
+
+ if (!ras_if)
+ return 0;
+
+ ih_data.head = *ras_if;
+
+ amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index a9ae0d8a0589..485335267d78 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -50,8 +50,18 @@ struct amdgpu_sdma_instance {
bool burst_nop;
};
+struct amdgpu_sdma_ras_funcs {
+ int (*ras_late_init)(struct amdgpu_device *adev,
+ void *ras_ih_info);
+ void (*ras_fini)(struct amdgpu_device *adev);
+ int (*query_ras_error_count)(struct amdgpu_device *adev,
+ uint32_t instance, void *ras_error_status);
+};
+
struct amdgpu_sdma {
struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
+ struct drm_gpu_scheduler *sdma_sched[AMDGPU_MAX_SDMA_INSTANCES];
+ uint32_t num_sdma_sched;
struct amdgpu_irq_src trap_irq;
struct amdgpu_irq_src illegal_inst_irq;
struct amdgpu_irq_src ecc_irq;
@@ -59,6 +69,7 @@ struct amdgpu_sdma {
uint32_t srbm_soft_reset;
bool has_page_queue;
struct ras_common_if *ras_if;
+ const struct amdgpu_sdma_ras_funcs *funcs;
};
/*
@@ -104,4 +115,13 @@ struct amdgpu_sdma_instance *
amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring);
int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index);
uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, unsigned vmid);
+int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev,
+ void *ras_ih_info);
+void amdgpu_sdma_ras_fini(struct amdgpu_device *adev);
+int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev,
+ void *err_data,
+ struct amdgpu_iv_entry *entry);
+int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 95e5e93edd18..a09b6b9c27d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -129,7 +129,8 @@ static void amdgpu_sync_keep_later(struct dma_fence **keep,
* Tries to add the fence to an existing hash entry. Returns true when an entry
* was found, false otherwise.
*/
-static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, bool explicit)
+static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f,
+ bool explicit)
{
struct amdgpu_sync_entry *e;
@@ -151,19 +152,18 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f,
* amdgpu_sync_fence - remember to sync to this fence
*
* @sync: sync object to add fence to
- * @fence: fence to sync to
+ * @f: fence to sync to
+ * @explicit: if this is an explicit dependency
*
+ * Add the fence to the sync object.
*/
-int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
- struct dma_fence *f, bool explicit)
+int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
+ bool explicit)
{
struct amdgpu_sync_entry *e;
if (!f)
return 0;
- if (amdgpu_sync_same_dev(adev, f) &&
- amdgpu_sync_get_owner(f) == AMDGPU_FENCE_OWNER_VM)
- amdgpu_sync_keep_later(&sync->last_vm_update, f);
if (amdgpu_sync_add_later(sync, f, explicit))
return 0;
@@ -180,6 +180,24 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
}
/**
+ * amdgpu_sync_vm_fence - remember to sync to this VM fence
+ *
+ * @adev: amdgpu device
+ * @sync: sync object to add fence to
+ * @fence: the VM fence to add
+ *
+ * Add the fence to the sync object and remember it as VM update.
+ */
+int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
+{
+ if (!fence)
+ return 0;
+
+ amdgpu_sync_keep_later(&sync->last_vm_update, fence);
+ return amdgpu_sync_fence(sync, fence, false);
+}
+
+/**
* amdgpu_sync_resv - sync to a reservation object
*
* @sync: sync object to add fences from reservation object to
@@ -204,7 +222,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
/* always sync to the exclusive fence */
f = dma_resv_get_excl(resv);
- r = amdgpu_sync_fence(adev, sync, f, false);
+ r = amdgpu_sync_fence(sync, f, false);
flist = dma_resv_get_list(resv);
if (!flist || r)
@@ -222,13 +240,11 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
continue;
if (amdgpu_sync_same_dev(adev, f)) {
- /* VM updates are only interesting
- * for other VM updates and moves.
+ /* VM updates only sync with moves but not with user
+ * command submissions or KFD evictions fences
*/
- if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
- (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
- ((owner == AMDGPU_FENCE_OWNER_VM) !=
- (fence_owner == AMDGPU_FENCE_OWNER_VM)))
+ if (owner == AMDGPU_FENCE_OWNER_VM &&
+ fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED)
continue;
/* Ignore fence from the same owner and explicit one as
@@ -239,7 +255,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
continue;
}
- r = amdgpu_sync_fence(adev, sync, f, false);
+ r = amdgpu_sync_fence(sync, f, false);
if (r)
break;
}
@@ -340,7 +356,7 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
hash_for_each_safe(source->fences, i, tmp, e, node) {
f = e->fence;
if (!dma_fence_is_signaled(f)) {
- r = amdgpu_sync_fence(NULL, clone, f, e->explicit);
+ r = amdgpu_sync_fence(clone, f, e->explicit);
if (r)
return r;
} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
index b5f1778a2319..d62c2b81d92b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
@@ -40,8 +40,9 @@ struct amdgpu_sync {
};
void amdgpu_sync_create(struct amdgpu_sync *sync);
-int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
- struct dma_fence *f, bool explicit);
+int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
+ bool explicit);
+int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence);
int amdgpu_sync_resv(struct amdgpu_device *adev,
struct amdgpu_sync *sync,
struct dma_resv *resv,
@@ -49,7 +50,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
bool explicit_sync);
struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct amdgpu_ring *ring);
-struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit);
+struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync,
+ bool *explicit);
int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
void amdgpu_sync_free(struct amdgpu_sync *sync);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
index b66d29d5ffa2..b158230af8db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@@ -138,6 +138,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
}
dma_fence_put(fence);
+ fence = NULL;
r = amdgpu_bo_kmap(vram_obj, &vram_map);
if (r) {
@@ -183,6 +184,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
}
dma_fence_put(fence);
+ fence = NULL;
r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map);
if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 77674a7b9616..63e734a125fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -170,7 +170,7 @@ TRACE_EVENT(amdgpu_cs_ioctl,
__field(unsigned int, context)
__field(unsigned int, seqno)
__field(struct dma_fence *, fence)
- __field(char *, ring_name)
+ __string(ring, to_amdgpu_ring(job->base.sched)->name)
__field(u32, num_ibs)
),
@@ -179,12 +179,12 @@ TRACE_EVENT(amdgpu_cs_ioctl,
__assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
__entry->context = job->base.s_fence->finished.context;
__entry->seqno = job->base.s_fence->finished.seqno;
- __entry->ring_name = to_amdgpu_ring(job->base.sched)->name;
+ __assign_str(ring, to_amdgpu_ring(job->base.sched)->name)
__entry->num_ibs = job->num_ibs;
),
TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
__entry->sched_job_id, __get_str(timeline), __entry->context,
- __entry->seqno, __entry->ring_name, __entry->num_ibs)
+ __entry->seqno, __get_str(ring), __entry->num_ibs)
);
TRACE_EVENT(amdgpu_sched_run_job,
@@ -195,7 +195,7 @@ TRACE_EVENT(amdgpu_sched_run_job,
__string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
__field(unsigned int, context)
__field(unsigned int, seqno)
- __field(char *, ring_name)
+ __string(ring, to_amdgpu_ring(job->base.sched)->name)
__field(u32, num_ibs)
),
@@ -204,12 +204,12 @@ TRACE_EVENT(amdgpu_sched_run_job,
__assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
__entry->context = job->base.s_fence->finished.context;
__entry->seqno = job->base.s_fence->finished.seqno;
- __entry->ring_name = to_amdgpu_ring(job->base.sched)->name;
+ __assign_str(ring, to_amdgpu_ring(job->base.sched)->name)
__entry->num_ibs = job->num_ibs;
),
TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
__entry->sched_job_id, __get_str(timeline), __entry->context,
- __entry->seqno, __entry->ring_name, __entry->num_ibs)
+ __entry->seqno, __get_str(ring), __entry->num_ibs)
);
@@ -323,14 +323,15 @@ DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_cs,
TRACE_EVENT(amdgpu_vm_set_ptes,
TP_PROTO(uint64_t pe, uint64_t addr, unsigned count,
- uint32_t incr, uint64_t flags),
- TP_ARGS(pe, addr, count, incr, flags),
+ uint32_t incr, uint64_t flags, bool direct),
+ TP_ARGS(pe, addr, count, incr, flags, direct),
TP_STRUCT__entry(
__field(u64, pe)
__field(u64, addr)
__field(u32, count)
__field(u32, incr)
__field(u64, flags)
+ __field(bool, direct)
),
TP_fast_assign(
@@ -339,28 +340,32 @@ TRACE_EVENT(amdgpu_vm_set_ptes,
__entry->count = count;
__entry->incr = incr;
__entry->flags = flags;
+ __entry->direct = direct;
),
- TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%llx, count=%u",
- __entry->pe, __entry->addr, __entry->incr,
- __entry->flags, __entry->count)
+ TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%llx, count=%u, "
+ "direct=%d", __entry->pe, __entry->addr, __entry->incr,
+ __entry->flags, __entry->count, __entry->direct)
);
TRACE_EVENT(amdgpu_vm_copy_ptes,
- TP_PROTO(uint64_t pe, uint64_t src, unsigned count),
- TP_ARGS(pe, src, count),
+ TP_PROTO(uint64_t pe, uint64_t src, unsigned count, bool direct),
+ TP_ARGS(pe, src, count, direct),
TP_STRUCT__entry(
__field(u64, pe)
__field(u64, src)
__field(u32, count)
+ __field(bool, direct)
),
TP_fast_assign(
__entry->pe = pe;
__entry->src = src;
__entry->count = count;
+ __entry->direct = direct;
),
- TP_printk("pe=%010Lx, src=%010Lx, count=%u",
- __entry->pe, __entry->src, __entry->count)
+ TP_printk("pe=%010Lx, src=%010Lx, count=%u, direct=%d",
+ __entry->pe, __entry->src, __entry->count,
+ __entry->direct)
);
TRACE_EVENT(amdgpu_vm_flush,
@@ -468,7 +473,7 @@ TRACE_EVENT(amdgpu_ib_pipe_sync,
TP_PROTO(struct amdgpu_job *sched_job, struct dma_fence *fence),
TP_ARGS(sched_job, fence),
TP_STRUCT__entry(
- __field(const char *,name)
+ __string(ring, sched_job->base.sched->name)
__field(uint64_t, id)
__field(struct dma_fence *, fence)
__field(uint64_t, ctx)
@@ -476,14 +481,14 @@ TRACE_EVENT(amdgpu_ib_pipe_sync,
),
TP_fast_assign(
- __entry->name = sched_job->base.sched->name;
+ __assign_str(ring, sched_job->base.sched->name)
__entry->id = sched_job->base.id;
__entry->fence = fence;
__entry->ctx = fence->context;
__entry->seqno = fence->seqno;
),
TP_printk("job ring=%s, id=%llu, need pipe sync to fence=%p, context=%llu, seq=%u",
- __entry->name, __entry->id,
+ __get_str(ring), __entry->id,
__entry->fence, __entry->ctx,
__entry->seqno)
);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 8e867b8b432f..dee446278417 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -35,10 +35,13 @@
#include <linux/hmm.h>
#include <linux/pagemap.h>
#include <linux/sched/task.h>
+#include <linux/sched/mm.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/swiotlb.h>
+#include <linux/dma-buf.h>
+#include <linux/sizes.h>
#include <drm/ttm/ttm_bo_api.h>
#include <drm/ttm/ttm_bo_driver.h>
@@ -54,6 +57,7 @@
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_sdma.h"
+#include "amdgpu_ras.h"
#include "bif/bif_4_1_d.h"
static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
@@ -484,15 +488,12 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
struct ttm_operation_ctx *ctx,
struct ttm_mem_reg *new_mem)
{
- struct amdgpu_device *adev;
struct ttm_mem_reg *old_mem = &bo->mem;
struct ttm_mem_reg tmp_mem;
struct ttm_place placements;
struct ttm_placement placement;
int r;
- adev = amdgpu_ttm_adev(bo->bdev);
-
/* create space/pages for new_mem in GTT space */
tmp_mem = *new_mem;
tmp_mem.mm_node = NULL;
@@ -543,15 +544,12 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
struct ttm_operation_ctx *ctx,
struct ttm_mem_reg *new_mem)
{
- struct amdgpu_device *adev;
struct ttm_mem_reg *old_mem = &bo->mem;
struct ttm_mem_reg tmp_mem;
struct ttm_placement placement;
struct ttm_place placements;
int r;
- adev = amdgpu_ttm_adev(bo->bdev);
-
/* make space in GTT for old_mem buffer */
tmp_mem = *new_mem;
tmp_mem.mm_node = NULL;
@@ -763,6 +761,7 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
*/
struct amdgpu_ttm_tt {
struct ttm_dma_tt ttm;
+ struct drm_gem_object *gobj;
u64 offset;
uint64_t userptr;
struct task_struct *usertask;
@@ -772,6 +771,20 @@ struct amdgpu_ttm_tt {
#endif
};
+#ifdef CONFIG_DRM_AMDGPU_USERPTR
+/* flags used by HMM internal, not related to CPU/GPU PTE flags */
+static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
+ (1 << 0), /* HMM_PFN_VALID */
+ (1 << 1), /* HMM_PFN_WRITE */
+ 0 /* HMM_PFN_DEVICE_PRIVATE */
+};
+
+static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
+ 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
+ 0, /* HMM_PFN_NONE */
+ 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
+};
+
/**
* amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
* memory and start HMM tracking CPU page table update
@@ -779,85 +792,89 @@ struct amdgpu_ttm_tt {
* Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
* once afterwards to stop HMM tracking
*/
-#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
-
-#define MAX_RETRY_HMM_RANGE_FAULT 16
-
int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
{
- struct hmm_mirror *mirror = bo->mn ? &bo->mn->mirror : NULL;
struct ttm_tt *ttm = bo->tbo.ttm;
struct amdgpu_ttm_tt *gtt = (void *)ttm;
- struct mm_struct *mm = gtt->usertask->mm;
unsigned long start = gtt->userptr;
struct vm_area_struct *vma;
struct hmm_range *range;
+ unsigned long timeout;
+ struct mm_struct *mm;
unsigned long i;
- uint64_t *pfns;
int r = 0;
- if (!mm) /* Happens during process shutdown */
- return -ESRCH;
-
- if (unlikely(!mirror)) {
- DRM_DEBUG_DRIVER("Failed to get hmm_mirror\n");
- r = -EFAULT;
- goto out;
+ mm = bo->notifier.mm;
+ if (unlikely(!mm)) {
+ DRM_DEBUG_DRIVER("BO is not registered?\n");
+ return -EFAULT;
}
- vma = find_vma(mm, start);
- if (unlikely(!vma || start < vma->vm_start)) {
- r = -EFAULT;
- goto out;
- }
- if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
- vma->vm_file)) {
- r = -EPERM;
- goto out;
- }
+ /* Another get_user_pages is running at the same time?? */
+ if (WARN_ON(gtt->range))
+ return -EFAULT;
+
+ if (!mmget_not_zero(mm)) /* Happens during process shutdown */
+ return -ESRCH;
range = kzalloc(sizeof(*range), GFP_KERNEL);
if (unlikely(!range)) {
r = -ENOMEM;
goto out;
}
+ range->notifier = &bo->notifier;
+ range->flags = hmm_range_flags;
+ range->values = hmm_range_values;
+ range->pfn_shift = PAGE_SHIFT;
+ range->start = bo->notifier.interval_tree.start;
+ range->end = bo->notifier.interval_tree.last + 1;
+ range->default_flags = hmm_range_flags[HMM_PFN_VALID];
+ if (!amdgpu_ttm_tt_is_readonly(ttm))
+ range->default_flags |= range->flags[HMM_PFN_WRITE];
- pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
- if (unlikely(!pfns)) {
+ range->pfns = kvmalloc_array(ttm->num_pages, sizeof(*range->pfns),
+ GFP_KERNEL);
+ if (unlikely(!range->pfns)) {
r = -ENOMEM;
goto out_free_ranges;
}
- amdgpu_hmm_init_range(range);
- range->default_flags = range->flags[HMM_PFN_VALID];
- range->default_flags |= amdgpu_ttm_tt_is_readonly(ttm) ?
- 0 : range->flags[HMM_PFN_WRITE];
- range->pfn_flags_mask = 0;
- range->pfns = pfns;
- range->start = start;
- range->end = start + ttm->num_pages * PAGE_SIZE;
-
- hmm_range_register(range, mirror);
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, start);
+ if (unlikely(!vma || start < vma->vm_start)) {
+ r = -EFAULT;
+ goto out_unlock;
+ }
+ if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
+ vma->vm_file)) {
+ r = -EPERM;
+ goto out_unlock;
+ }
+ up_read(&mm->mmap_sem);
+ timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
- /*
- * Just wait for range to be valid, safe to ignore return value as we
- * will use the return value of hmm_range_fault() below under the
- * mmap_sem to ascertain the validity of the range.
- */
- hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT);
+retry:
+ range->notifier_seq = mmu_interval_read_begin(&bo->notifier);
down_read(&mm->mmap_sem);
r = hmm_range_fault(range, 0);
up_read(&mm->mmap_sem);
-
- if (unlikely(r < 0))
+ if (unlikely(r <= 0)) {
+ /*
+ * FIXME: This timeout should encompass the retry from
+ * mmu_interval_read_retry() as well.
+ */
+ if ((r == 0 || r == -EBUSY) && !time_after(jiffies, timeout))
+ goto retry;
goto out_free_pfns;
+ }
for (i = 0; i < ttm->num_pages; i++) {
- pages[i] = hmm_device_entry_to_page(range, pfns[i]);
+ /* FIXME: The pages cannot be touched outside the notifier_lock */
+ pages[i] = hmm_device_entry_to_page(range, range->pfns[i]);
if (unlikely(!pages[i])) {
pr_err("Page fault failed for pfn[%lu] = 0x%llx\n",
- i, pfns[i]);
+ i, range->pfns[i]);
r = -ENOMEM;
goto out_free_pfns;
@@ -865,15 +882,18 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
}
gtt->range = range;
+ mmput(mm);
return 0;
+out_unlock:
+ up_read(&mm->mmap_sem);
out_free_pfns:
- hmm_range_unregister(range);
- kvfree(pfns);
+ kvfree(range->pfns);
out_free_ranges:
kfree(range);
out:
+ mmput(mm);
return r;
}
@@ -898,15 +918,18 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
"No user pages to check\n");
if (gtt->range) {
- r = hmm_range_valid(gtt->range);
- hmm_range_unregister(gtt->range);
-
+ /*
+ * FIXME: Must always hold notifier_lock for this, and must
+ * not ignore the return code.
+ */
+ r = mmu_interval_read_retry(gtt->range->notifier,
+ gtt->range->notifier_seq);
kvfree(gtt->range->pfns);
kfree(gtt->range);
gtt->range = NULL;
}
- return r;
+ return !r;
}
#endif
@@ -987,10 +1010,18 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
sg_free_table(ttm->sg);
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
- if (gtt->range &&
- ttm->pages[0] == hmm_device_entry_to_page(gtt->range,
- gtt->range->pfns[0]))
- WARN_ONCE(1, "Missing get_user_page_done\n");
+ if (gtt->range) {
+ unsigned long i;
+
+ for (i = 0; i < ttm->num_pages; i++) {
+ if (ttm->pages[i] !=
+ hmm_device_entry_to_page(gtt->range,
+ gtt->range->pfns[i]))
+ break;
+ }
+
+ WARN((i == ttm->num_pages), "Missing get_user_page_done\n");
+ }
#endif
}
@@ -1217,16 +1248,14 @@ static struct ttm_backend_func amdgpu_backend_func = {
static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
uint32_t page_flags)
{
- struct amdgpu_device *adev;
struct amdgpu_ttm_tt *gtt;
- adev = amdgpu_ttm_adev(bo->bdev);
-
gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
if (gtt == NULL) {
return NULL;
}
gtt->ttm.ttm.func = &amdgpu_backend_func;
+ gtt->gobj = &bo->base;
/* allocate space for the uninitialized page entries */
if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags)) {
@@ -1247,7 +1276,6 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
{
struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
- bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
/* user pages are bound by amdgpu_ttm_tt_pin_userptr() */
if (gtt && gtt->userptr) {
@@ -1260,7 +1288,19 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
return 0;
}
- if (slave && ttm->sg) {
+ if (ttm->page_flags & TTM_PAGE_FLAG_SG) {
+ if (!ttm->sg) {
+ struct dma_buf_attachment *attach;
+ struct sg_table *sgt;
+
+ attach = gtt->gobj->import_attach;
+ sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
+ if (IS_ERR(sgt))
+ return PTR_ERR(sgt);
+
+ ttm->sg = sgt;
+ }
+
drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
gtt->ttm.dma_address,
ttm->num_pages);
@@ -1287,9 +1327,8 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
*/
static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
{
- struct amdgpu_device *adev;
struct amdgpu_ttm_tt *gtt = (void *)ttm;
- bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
+ struct amdgpu_device *adev;
if (gtt && gtt->userptr) {
amdgpu_ttm_tt_set_user_pages(ttm, NULL);
@@ -1298,7 +1337,16 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
return;
}
- if (slave)
+ if (ttm->sg && gtt->gobj->import_attach) {
+ struct dma_buf_attachment *attach;
+
+ attach = gtt->gobj->import_attach;
+ dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL);
+ ttm->sg = NULL;
+ return;
+ }
+
+ if (ttm->page_flags & TTM_PAGE_FLAG_SG)
return;
adev = amdgpu_ttm_adev(ttm->bdev);
@@ -1475,11 +1523,8 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
struct dma_fence *f;
int i;
- /* Don't evict VM page tables while they are busy, otherwise we can't
- * cleanly handle page faults.
- */
if (bo->type == ttm_bo_type_kernel &&
- !dma_resv_test_signaled_rcu(bo->base.resv, true))
+ !amdgpu_vm_evictable(ttm_to_amdgpu_bo(bo)))
return false;
/* If bo is a KFD BO, check if the bo belongs to the current process.
@@ -1634,81 +1679,96 @@ static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)
*/
static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
{
- struct ttm_operation_ctx ctx = { false, false };
- struct amdgpu_bo_param bp;
- int r = 0;
- int i;
- u64 vram_size = adev->gmc.visible_vram_size;
- u64 offset = adev->fw_vram_usage.start_offset;
- u64 size = adev->fw_vram_usage.size;
- struct amdgpu_bo *bo;
-
- memset(&bp, 0, sizeof(bp));
- bp.size = adev->fw_vram_usage.size;
- bp.byte_align = PAGE_SIZE;
- bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
- bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
- bp.type = ttm_bo_type_kernel;
- bp.resv = NULL;
+ uint64_t vram_size = adev->gmc.visible_vram_size;
+
adev->fw_vram_usage.va = NULL;
adev->fw_vram_usage.reserved_bo = NULL;
- if (adev->fw_vram_usage.size > 0 &&
- adev->fw_vram_usage.size <= vram_size) {
+ if (adev->fw_vram_usage.size == 0 ||
+ adev->fw_vram_usage.size > vram_size)
+ return 0;
- r = amdgpu_bo_create(adev, &bp,
- &adev->fw_vram_usage.reserved_bo);
- if (r)
- goto error_create;
+ return amdgpu_bo_create_kernel_at(adev,
+ adev->fw_vram_usage.start_offset,
+ adev->fw_vram_usage.size,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->fw_vram_usage.reserved_bo,
+ &adev->fw_vram_usage.va);
+}
- r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false);
- if (r)
- goto error_reserve;
+/*
+ * Memoy training reservation functions
+ */
- /* remove the original mem node and create a new one at the
- * request position
- */
- bo = adev->fw_vram_usage.reserved_bo;
- offset = ALIGN(offset, PAGE_SIZE);
- for (i = 0; i < bo->placement.num_placement; ++i) {
- bo->placements[i].fpfn = offset >> PAGE_SHIFT;
- bo->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
- }
+/**
+ * amdgpu_ttm_training_reserve_vram_fini - free memory training reserved vram
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * free memory training reserved vram if it has been reserved.
+ */
+static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev)
+{
+ struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
- ttm_bo_mem_put(&bo->tbo, &bo->tbo.mem);
- r = ttm_bo_mem_space(&bo->tbo, &bo->placement,
- &bo->tbo.mem, &ctx);
- if (r)
- goto error_pin;
+ ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
+ amdgpu_bo_free_kernel(&ctx->c2p_bo, NULL, NULL);
+ ctx->c2p_bo = NULL;
- r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo,
- AMDGPU_GEM_DOMAIN_VRAM,
- adev->fw_vram_usage.start_offset,
- (adev->fw_vram_usage.start_offset +
- adev->fw_vram_usage.size));
- if (r)
- goto error_pin;
- r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo,
- &adev->fw_vram_usage.va);
- if (r)
- goto error_kmap;
+ return 0;
+}
+
+static u64 amdgpu_ttm_training_get_c2p_offset(u64 vram_size)
+{
+ if ((vram_size & (SZ_1M - 1)) < (SZ_4K + 1) )
+ vram_size -= SZ_1M;
- amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo);
+ return ALIGN(vram_size, SZ_1M);
+}
+
+/**
+ * amdgpu_ttm_training_reserve_vram_init - create bo vram reservation from memory training
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * create bo vram reservation from memory training.
+ */
+static int amdgpu_ttm_training_reserve_vram_init(struct amdgpu_device *adev)
+{
+ int ret;
+ struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
+
+ memset(ctx, 0, sizeof(*ctx));
+ if (!adev->fw_vram_usage.mem_train_support) {
+ DRM_DEBUG("memory training does not support!\n");
+ return 0;
}
- return r;
-error_kmap:
- amdgpu_bo_unpin(adev->fw_vram_usage.reserved_bo);
-error_pin:
- amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo);
-error_reserve:
- amdgpu_bo_unref(&adev->fw_vram_usage.reserved_bo);
-error_create:
- adev->fw_vram_usage.va = NULL;
- adev->fw_vram_usage.reserved_bo = NULL;
- return r;
+ ctx->c2p_train_data_offset = amdgpu_ttm_training_get_c2p_offset(adev->gmc.mc_vram_size);
+ ctx->p2c_train_data_offset = (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
+ ctx->train_data_size = GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES;
+
+ DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
+ ctx->train_data_size,
+ ctx->p2c_train_data_offset,
+ ctx->c2p_train_data_offset);
+
+ ret = amdgpu_bo_create_kernel_at(adev,
+ ctx->c2p_train_data_offset,
+ ctx->train_data_size,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &ctx->c2p_bo,
+ NULL);
+ if (ret) {
+ DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
+ amdgpu_ttm_training_reserve_vram_fini(adev);
+ return ret;
+ }
+
+ ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
+ return 0;
}
+
/**
* amdgpu_ttm_init - Init the memory management (ttm) as well as various
* gtt/vram related fields.
@@ -1772,6 +1832,14 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
return r;
}
+ /*
+ *The reserved vram for memory training must be pinned to the specified
+ *place on the VRAM, so reserve it early.
+ */
+ r = amdgpu_ttm_training_reserve_vram_init(adev);
+ if (r)
+ return r;
+
/* allocate memory as required for VGA
* This is used for VGA emulation and pre-OS scanout buffers to
* avoid display artifacts while transitioning between pre-OS
@@ -1782,6 +1850,20 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
NULL, &stolen_vga_buf);
if (r)
return r;
+
+ /*
+ * reserve one TMR (64K) memory at the top of VRAM which holds
+ * IP Discovery data and is protected by PSP.
+ */
+ r = amdgpu_bo_create_kernel_at(adev,
+ adev->gmc.real_vram_size - DISCOVERY_TMR_SIZE,
+ DISCOVERY_TMR_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->discovery_memory,
+ NULL);
+ if (r)
+ return r;
+
DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
(unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
@@ -1857,7 +1939,11 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
return;
amdgpu_ttm_debugfs_fini(adev);
+ amdgpu_ttm_training_reserve_vram_fini(adev);
+ /* return the IP Discovery TMR memory back to VRAM */
+ amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL);
amdgpu_ttm_fw_reserve_vram_fini(adev);
+
if (adev->mman.aper_base_kaddr)
iounmap(adev->mman.aper_base_kaddr);
adev->mman.aper_base_kaddr = NULL;
@@ -1893,11 +1979,13 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
if (enable) {
struct amdgpu_ring *ring;
- struct drm_sched_rq *rq;
+ struct drm_gpu_scheduler *sched;
ring = adev->mman.buffer_funcs_ring;
- rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
- r = drm_sched_entity_init(&adev->mman.entity, &rq, 1, NULL);
+ sched = &ring->sched;
+ r = drm_sched_entity_init(&adev->mman.entity,
+ DRM_SCHED_PRIORITY_KERNEL, &sched,
+ 1, NULL);
if (r) {
DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
r);
@@ -1953,10 +2041,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
*addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
AMDGPU_GPU_PAGE_SIZE;
- num_dw = adev->mman.buffer_funcs->copy_num_dw;
- while (num_dw & 0x7)
- num_dw++;
-
+ num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
num_bytes = num_pages * 8;
r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, &job);
@@ -2016,11 +2101,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
num_loops = DIV_ROUND_UP(byte_count, max_bytes);
- num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw;
-
- /* for IB padding */
- while (num_dw & 0x7)
- num_dw++;
+ num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 3a6115ad0196..9ef312428231 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -360,6 +360,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
case CHIP_RAVEN:
case CHIP_VEGA12:
case CHIP_VEGA20:
+ case CHIP_ARCTURUS:
case CHIP_RENOIR:
case CHIP_NAVI10:
case CHIP_NAVI14:
@@ -368,8 +369,6 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
return AMDGPU_FW_LOAD_DIRECT;
else
return AMDGPU_FW_LOAD_PSP;
- case CHIP_ARCTURUS:
- return AMDGPU_FW_LOAD_DIRECT;
default:
DRM_ERROR("Unknown firmware load type\n");
@@ -448,6 +447,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
const struct common_firmware_header *header = NULL;
const struct gfx_firmware_header_v1_0 *cp_hdr = NULL;
const struct dmcu_firmware_header_v1_0 *dmcu_hdr = NULL;
+ const struct dmcub_firmware_header_v1_0 *dmcub_hdr = NULL;
if (NULL == ucode->fw)
return 0;
@@ -461,6 +461,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
header = (const struct common_firmware_header *)ucode->fw->data;
cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
dmcu_hdr = (const struct dmcu_firmware_header_v1_0 *)ucode->fw->data;
+ dmcub_hdr = (const struct dmcub_firmware_header_v1_0 *)ucode->fw->data;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP ||
(ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 &&
@@ -471,7 +472,8 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM &&
ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM &&
ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_ERAM &&
- ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_INTV)) {
+ ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_INTV &&
+ ucode->ucode_id != AMDGPU_UCODE_ID_DMCUB)) {
ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
@@ -507,6 +509,12 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
le32_to_cpu(header->ucode_array_offset_bytes) +
le32_to_cpu(dmcu_hdr->intv_offset_bytes)),
ucode->ucode_size);
+ } else if (ucode->ucode_id == AMDGPU_UCODE_ID_DMCUB) {
+ ucode->ucode_size = le32_to_cpu(dmcub_hdr->inst_const_bytes);
+ memcpy(ucode->kaddr,
+ (void *)((uint8_t *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes)),
+ ucode->ucode_size);
} else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL) {
ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes;
memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_cntl,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index b34f00d42049..b0e656409c03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -108,6 +108,12 @@ struct ta_firmware_header_v1_0 {
uint32_t ta_ras_ucode_version;
uint32_t ta_ras_offset_bytes;
uint32_t ta_ras_size_bytes;
+ uint32_t ta_hdcp_ucode_version;
+ uint32_t ta_hdcp_offset_bytes;
+ uint32_t ta_hdcp_size_bytes;
+ uint32_t ta_dtm_ucode_version;
+ uint32_t ta_dtm_offset_bytes;
+ uint32_t ta_dtm_size_bytes;
};
/* version_major=1, version_minor=0 */
@@ -245,6 +251,13 @@ struct dmcu_firmware_header_v1_0 {
uint32_t intv_size_bytes; /* size of interrupt vectors, in bytes */
};
+/* version_major=1, version_minor=0 */
+struct dmcub_firmware_header_v1_0 {
+ struct common_firmware_header header;
+ uint32_t inst_const_bytes; /* size of instruction region, in bytes */
+ uint32_t bss_data_bytes; /* size of bss/data region, in bytes */
+};
+
/* header is fixed size */
union amdgpu_firmware_header {
struct common_firmware_header common;
@@ -262,6 +275,7 @@ union amdgpu_firmware_header {
struct sdma_firmware_header_v1_1 sdma_v1_1;
struct gpu_info_firmware_header_v1_0 gpu_info;
struct dmcu_firmware_header_v1_0 dmcu;
+ struct dmcub_firmware_header_v1_0 dmcub;
uint8_t raw[0x100];
};
@@ -286,10 +300,10 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_CP_MEC2_JT,
AMDGPU_UCODE_ID_CP_MES,
AMDGPU_UCODE_ID_CP_MES_DATA,
- AMDGPU_UCODE_ID_RLC_G,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM,
+ AMDGPU_UCODE_ID_RLC_G,
AMDGPU_UCODE_ID_STORAGE,
AMDGPU_UCODE_ID_SMC,
AMDGPU_UCODE_ID_UVD,
@@ -301,6 +315,7 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_DMCU_INTV,
AMDGPU_UCODE_ID_VCN0_RAM,
AMDGPU_UCODE_ID_VCN1_RAM,
+ AMDGPU_UCODE_ID_DMCUB,
AMDGPU_UCODE_ID_MAXIMUM,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
new file mode 100644
index 000000000000..f4d40855147b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu_ras.h"
+
+int amdgpu_umc_ras_late_init(struct amdgpu_device *adev)
+{
+ int r;
+ struct ras_fs_if fs_info = {
+ .sysfs_name = "umc_err_count",
+ .debugfs_name = "umc_err_inject",
+ };
+ struct ras_ih_if ih_info = {
+ .cb = amdgpu_umc_process_ras_data_cb,
+ };
+
+ if (!adev->umc.ras_if) {
+ adev->umc.ras_if =
+ kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+ if (!adev->umc.ras_if)
+ return -ENOMEM;
+ adev->umc.ras_if->block = AMDGPU_RAS_BLOCK__UMC;
+ adev->umc.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->umc.ras_if->sub_block_index = 0;
+ strcpy(adev->umc.ras_if->name, "umc");
+ }
+ ih_info.head = fs_info.head = *adev->umc.ras_if;
+
+ r = amdgpu_ras_late_init(adev, adev->umc.ras_if,
+ &fs_info, &ih_info);
+ if (r)
+ goto free;
+
+ if (amdgpu_ras_is_supported(adev, adev->umc.ras_if->block)) {
+ r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0);
+ if (r)
+ goto late_fini;
+ } else {
+ r = 0;
+ goto free;
+ }
+
+ /* ras init of specific umc version */
+ if (adev->umc.funcs && adev->umc.funcs->err_cnt_init)
+ adev->umc.funcs->err_cnt_init(adev);
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_late_fini(adev, adev->umc.ras_if, &ih_info);
+free:
+ kfree(adev->umc.ras_if);
+ adev->umc.ras_if = NULL;
+ return r;
+}
+
+void amdgpu_umc_ras_fini(struct amdgpu_device *adev)
+{
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) &&
+ adev->umc.ras_if) {
+ struct ras_common_if *ras_if = adev->umc.ras_if;
+ struct ras_ih_if ih_info = {
+ .head = *ras_if,
+ .cb = amdgpu_umc_process_ras_data_cb,
+ };
+
+ amdgpu_ras_late_fini(adev, ras_if, &ih_info);
+ kfree(ras_if);
+ }
+}
+
+int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
+ void *ras_error_status,
+ struct amdgpu_iv_entry *entry)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+ if (adev->umc.funcs &&
+ adev->umc.funcs->query_ras_error_count)
+ adev->umc.funcs->query_ras_error_count(adev, ras_error_status);
+
+ if (adev->umc.funcs &&
+ adev->umc.funcs->query_ras_error_address &&
+ adev->umc.max_ras_err_cnt_per_query) {
+ err_data->err_addr =
+ kcalloc(adev->umc.max_ras_err_cnt_per_query,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+
+ /* still call query_ras_error_address to clear error status
+ * even NOMEM error is encountered
+ */
+ if(!err_data->err_addr)
+ DRM_WARN("Failed to alloc memory for umc error address record!\n");
+
+ /* umc query_ras_error_address is also responsible for clearing
+ * error status
+ */
+ adev->umc.funcs->query_ras_error_address(adev, ras_error_status);
+ }
+
+ /* only uncorrectable error needs gpu reset */
+ if (err_data->ue_count) {
+ if (err_data->err_addr_cnt &&
+ amdgpu_ras_add_bad_pages(adev, err_data->err_addr,
+ err_data->err_addr_cnt))
+ DRM_WARN("Failed to add ras bad page!\n");
+
+ amdgpu_ras_reset_gpu(adev);
+ }
+
+ kfree(err_data->err_addr);
+ return AMDGPU_RAS_SUCCESS;
+}
+
+int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ struct ras_common_if *ras_if = adev->umc.ras_if;
+ struct ras_dispatch_if ih_data = {
+ .entry = entry,
+ };
+
+ if (!ras_if)
+ return 0;
+
+ ih_data.head = *ras_if;
+
+ amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index 975afa04df09..a615a1eb750b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -21,47 +21,14 @@
#ifndef __AMDGPU_UMC_H__
#define __AMDGPU_UMC_H__
-/* implement 64 bits REG operations via 32 bits interface */
-#define RREG64_UMC(reg) (RREG32(reg) | \
- ((uint64_t)RREG32((reg) + 1) << 32))
-#define WREG64_UMC(reg, v) \
- do { \
- WREG32((reg), lower_32_bits(v)); \
- WREG32((reg) + 1, upper_32_bits(v)); \
- } while (0)
-
-/*
- * void (*func)(struct amdgpu_device *adev, struct ras_err_data *err_data,
- * uint32_t umc_reg_offset, uint32_t channel_index)
- */
-#define amdgpu_umc_for_each_channel(func) \
- struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; \
- uint32_t umc_inst, channel_inst, umc_reg_offset, channel_index; \
- for (umc_inst = 0; umc_inst < adev->umc.umc_inst_num; umc_inst++) { \
- /* enable the index mode to query eror count per channel */ \
- adev->umc.funcs->enable_umc_index_mode(adev, umc_inst); \
- for (channel_inst = 0; \
- channel_inst < adev->umc.channel_inst_num; \
- channel_inst++) { \
- /* calc the register offset according to channel instance */ \
- umc_reg_offset = adev->umc.channel_offs * channel_inst; \
- /* get channel index of interleaved memory */ \
- channel_index = adev->umc.channel_idx_tbl[ \
- umc_inst * adev->umc.channel_inst_num + channel_inst]; \
- (func)(adev, err_data, umc_reg_offset, channel_index); \
- } \
- } \
- adev->umc.funcs->disable_umc_index_mode(adev);
-
struct amdgpu_umc_funcs {
- void (*ras_init)(struct amdgpu_device *adev);
+ void (*err_cnt_init)(struct amdgpu_device *adev);
+ int (*ras_late_init)(struct amdgpu_device *adev);
void (*query_ras_error_count)(struct amdgpu_device *adev,
void *ras_error_status);
void (*query_ras_error_address)(struct amdgpu_device *adev,
void *ras_error_status);
- void (*enable_umc_index_mode)(struct amdgpu_device *adev,
- uint32_t umc_instance);
- void (*disable_umc_index_mode)(struct amdgpu_device *adev);
+ void (*init_registers)(struct amdgpu_device *adev);
};
struct amdgpu_umc {
@@ -75,8 +42,17 @@ struct amdgpu_umc {
uint32_t channel_offs;
/* channel index table of interleaved memory */
const uint32_t *channel_idx_tbl;
+ struct ras_common_if *ras_if;
const struct amdgpu_umc_funcs *funcs;
};
+int amdgpu_umc_ras_late_init(struct amdgpu_device *adev);
+void amdgpu_umc_ras_fini(struct amdgpu_device *adev);
+int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
+ void *ras_error_status,
+ struct amdgpu_iv_entry *entry);
+int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index b2c364b8695f..a92f3b18e657 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -39,6 +39,8 @@
#include "cikd.h"
#include "uvd/uvd_4_2_d.h"
+#include "amdgpu_ras.h"
+
/* 1 second timeout */
#define UVD_IDLE_TIMEOUT msecs_to_jiffies(1000)
@@ -297,6 +299,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
{
int i, j;
+ cancel_delayed_work_sync(&adev->uvd.idle_work);
drm_sched_entity_destroy(&adev->uvd.entity);
for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
@@ -327,12 +330,13 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
int amdgpu_uvd_entity_init(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring;
- struct drm_sched_rq *rq;
+ struct drm_gpu_scheduler *sched;
int r;
ring = &adev->uvd.inst[0].ring;
- rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
- r = drm_sched_entity_init(&adev->uvd.entity, &rq, 1, NULL);
+ sched = &ring->sched;
+ r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL,
+ &sched, 1, NULL);
if (r) {
DRM_ERROR("Failed setting up UVD kernel entity.\n");
return r;
@@ -346,6 +350,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
unsigned size;
void *ptr;
int i, j;
+ bool in_ras_intr = amdgpu_ras_intr_triggered();
cancel_delayed_work_sync(&adev->uvd.idle_work);
@@ -372,8 +377,16 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
if (!adev->uvd.inst[j].saved_bo)
return -ENOMEM;
- memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
+ /* re-write 0 since err_event_athub will corrupt VCPU buffer */
+ if (in_ras_intr)
+ memset(adev->uvd.inst[j].saved_bo, 0, size);
+ else
+ memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
}
+
+ if (in_ras_intr)
+ DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n");
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index b70b3c45bb29..59ddba137946 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -80,6 +80,11 @@ MODULE_FIRMWARE(FIRMWARE_VEGA12);
MODULE_FIRMWARE(FIRMWARE_VEGA20);
static void amdgpu_vce_idle_work_handler(struct work_struct *work);
+static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+ struct amdgpu_bo *bo,
+ struct dma_fence **fence);
+static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+ bool direct, struct dma_fence **fence);
/**
* amdgpu_vce_init - allocate memory, load vce firmware
@@ -211,6 +216,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
if (adev->vce.vcpu_bo == NULL)
return 0;
+ cancel_delayed_work_sync(&adev->vce.idle_work);
drm_sched_entity_destroy(&adev->vce.entity);
amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
@@ -234,12 +240,13 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
int amdgpu_vce_entity_init(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring;
- struct drm_sched_rq *rq;
+ struct drm_gpu_scheduler *sched;
int r;
ring = &adev->vce.ring[0];
- rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
- r = drm_sched_entity_init(&adev->vce.entity, &rq, 1, NULL);
+ sched = &ring->sched;
+ r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL,
+ &sched, 1, NULL);
if (r != 0) {
DRM_ERROR("Failed setting up VCE run queue.\n");
return r;
@@ -428,14 +435,15 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
*
* Open up a stream for HW test
*/
-int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
- struct dma_fence **fence)
+static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+ struct amdgpu_bo *bo,
+ struct dma_fence **fence)
{
const unsigned ib_size_dw = 1024;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
- uint64_t dummy;
+ uint64_t addr;
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -444,7 +452,7 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
ib = &job->ibs[0];
- dummy = ib->gpu_addr + 1024;
+ addr = amdgpu_bo_gpu_offset(bo);
/* stitch together an VCE create msg */
ib->length_dw = 0;
@@ -476,8 +484,8 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
ib->ptr[ib->length_dw++] = 0x00000014; /* len */
ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */
- ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
- ib->ptr[ib->length_dw++] = dummy;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
ib->ptr[ib->length_dw++] = 0x00000001;
for (i = ib->length_dw; i < ib_size_dw; ++i)
@@ -507,8 +515,8 @@ err:
*
* Close up a stream for HW test or if userspace failed to do so
*/
-int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
- bool direct, struct dma_fence **fence)
+static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+ bool direct, struct dma_fence **fence)
{
const unsigned ib_size_dw = 1024;
struct amdgpu_job *job;
@@ -644,7 +652,7 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
if ((addr + (uint64_t)size) >
(mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
- DRM_ERROR("BO to small for addr 0x%010Lx %d %d\n",
+ DRM_ERROR("BO too small for addr 0x%010Lx %d %d\n",
addr, lo, hi);
return -EINVAL;
}
@@ -1110,13 +1118,20 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct dma_fence *fence = NULL;
+ struct amdgpu_bo *bo = NULL;
long r;
/* skip vce ring1/2 ib test for now, since it's not reliable */
if (ring != &ring->adev->vce.ring[0])
return 0;
- r = amdgpu_vce_get_create_msg(ring, 1, NULL);
+ r = amdgpu_bo_create_reserved(ring->adev, 512, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &bo, NULL, NULL);
+ if (r)
+ return r;
+
+ r = amdgpu_vce_get_create_msg(ring, 1, bo, NULL);
if (r)
goto error;
@@ -1132,5 +1147,7 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
error:
dma_fence_put(fence);
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index 30ea54dd9117..d6d83a3ec803 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -58,10 +58,6 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev);
int amdgpu_vce_entity_init(struct amdgpu_device *adev);
int amdgpu_vce_suspend(struct amdgpu_device *adev);
int amdgpu_vce_resume(struct amdgpu_device *adev);
-int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
- struct dma_fence **fence);
-int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
- bool direct, struct dma_fence **fence);
void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 7a6beb2e7c4e..f96464e2c157 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -28,19 +28,10 @@
#include <linux/module.h>
#include <linux/pci.h>
-#include <drm/drm.h>
-
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "amdgpu_vcn.h"
#include "soc15d.h"
-#include "soc15_common.h"
-
-#include "vcn/vcn_1_0_offset.h"
-#include "vcn/vcn_1_0_sh_mask.h"
-
-/* 1 second timeout */
-#define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000)
/* Firmware Names */
#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
@@ -84,6 +75,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
break;
case CHIP_ARCTURUS:
fw_name = FIRMWARE_ARCTURUS;
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
+ adev->vcn.indirect_sram = true;
break;
case CHIP_RENOIR:
fw_name = FIRMWARE_RENOIR;
@@ -174,15 +168,15 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
return r;
}
- }
- if (adev->vcn.indirect_sram) {
- r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.dpg_sram_bo,
- &adev->vcn.dpg_sram_gpu_addr, &adev->vcn.dpg_sram_cpu_addr);
- if (r) {
- dev_err(adev->dev, "(%d) failed to allocate DPG bo\n", r);
- return r;
+ if (adev->vcn.indirect_sram) {
+ r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].dpg_sram_bo,
+ &adev->vcn.inst[i].dpg_sram_gpu_addr, &adev->vcn.inst[i].dpg_sram_cpu_addr);
+ if (r) {
+ dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r);
+ return r;
+ }
}
}
@@ -193,15 +187,16 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
{
int i, j;
- if (adev->vcn.indirect_sram) {
- amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo,
- &adev->vcn.dpg_sram_gpu_addr,
- (void **)&adev->vcn.dpg_sram_cpu_addr);
- }
+ cancel_delayed_work_sync(&adev->vcn.idle_work);
for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
if (adev->vcn.harvest_config & (1 << j))
continue;
+ if (adev->vcn.indirect_sram) {
+ amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo,
+ &adev->vcn.inst[j].dpg_sram_gpu_addr,
+ (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr);
+ }
kvfree(adev->vcn.inst[j].saved_bo);
amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo,
@@ -212,8 +207,6 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
for (i = 0; i < adev->vcn.num_enc_rings; ++i)
amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]);
-
- amdgpu_ring_fini(&adev->vcn.inst[j].ring_jpeg);
}
release_firmware(adev->vcn.fw);
@@ -294,6 +287,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
if (adev->vcn.harvest_config & (1 << j))
continue;
+
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]);
}
@@ -306,26 +300,17 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
else
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
- if (amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_jpeg))
- new_state.jpeg = VCN_DPG_STATE__PAUSE;
- else
- new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
-
- adev->vcn.pause_dpg_mode(adev, &new_state);
+ adev->vcn.pause_dpg_mode(adev, j, &new_state);
}
- fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_jpeg);
fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec);
fences += fence[j];
}
if (fences == 0) {
amdgpu_gfx_off_ctrl(adev, true);
- if (adev->asic_type < CHIP_ARCTURUS && adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
- else
- amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
- AMD_PG_STATE_GATE);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+ AMD_PG_STATE_GATE);
} else {
schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
}
@@ -338,11 +323,8 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
if (set_clocks) {
amdgpu_gfx_off_ctrl(adev, false);
- if (adev->asic_type < CHIP_ARCTURUS && adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
- else
- amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
- AMD_PG_STATE_UNGATE);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+ AMD_PG_STATE_UNGATE);
}
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
@@ -358,17 +340,10 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
else
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
- if (amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_jpeg))
- new_state.jpeg = VCN_DPG_STATE__PAUSE;
- else
- new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
-
if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
new_state.fw_based = VCN_DPG_STATE__PAUSE;
- else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
- new_state.jpeg = VCN_DPG_STATE__PAUSE;
- adev->vcn.pause_dpg_mode(adev, &new_state);
+ adev->vcn.pause_dpg_mode(adev, ring->me, &new_state);
}
}
@@ -518,9 +493,14 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
+ struct amdgpu_device *adev = ring->adev;
struct dma_fence *fence;
long r;
+ /* temporarily disable ib test for sriov */
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL);
if (r)
goto error;
@@ -569,13 +549,14 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
}
static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
- struct dma_fence **fence)
+ struct amdgpu_bo *bo,
+ struct dma_fence **fence)
{
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
- uint64_t dummy;
+ uint64_t addr;
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -583,14 +564,14 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
return r;
ib = &job->ibs[0];
- dummy = ib->gpu_addr + 1024;
+ addr = amdgpu_bo_gpu_offset(bo);
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
ib->ptr[ib->length_dw++] = handle;
- ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
- ib->ptr[ib->length_dw++] = dummy;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
ib->ptr[ib->length_dw++] = 0x0000000b;
ib->ptr[ib->length_dw++] = 0x00000014;
@@ -621,13 +602,14 @@ err:
}
static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
- struct dma_fence **fence)
+ struct amdgpu_bo *bo,
+ struct dma_fence **fence)
{
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
- uint64_t dummy;
+ uint64_t addr;
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -635,14 +617,14 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
return r;
ib = &job->ibs[0];
- dummy = ib->gpu_addr + 1024;
+ addr = amdgpu_bo_gpu_offset(bo);
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001;
ib->ptr[ib->length_dw++] = handle;
- ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
- ib->ptr[ib->length_dw++] = dummy;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
ib->ptr[ib->length_dw++] = 0x0000000b;
ib->ptr[ib->length_dw++] = 0x00000014;
@@ -674,129 +656,38 @@ err:
int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
+ struct amdgpu_device *adev = ring->adev;
struct dma_fence *fence = NULL;
+ struct amdgpu_bo *bo = NULL;
long r;
- r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL);
- if (r)
- goto error;
-
- r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence);
- if (r)
- goto error;
-
- r = dma_fence_wait_timeout(fence, false, timeout);
- if (r == 0)
- r = -ETIMEDOUT;
- else if (r > 0)
- r = 0;
-
-error:
- dma_fence_put(fence);
- return r;
-}
-
-int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
- uint32_t tmp = 0;
- unsigned i;
- int r;
-
- WREG32(adev->vcn.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD);
- r = amdgpu_ring_alloc(ring, 3);
- if (r)
- return r;
-
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.jpeg_pitch, 0));
- amdgpu_ring_write(ring, 0xDEADBEEF);
- amdgpu_ring_commit(ring);
-
- for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(adev->vcn.inst[ring->me].external.jpeg_pitch);
- if (tmp == 0xDEADBEEF)
- break;
- udelay(1);
- }
-
- if (i >= adev->usec_timeout)
- r = -ETIMEDOUT;
-
- return r;
-}
-
-static int amdgpu_vcn_jpeg_set_reg(struct amdgpu_ring *ring, uint32_t handle,
- struct dma_fence **fence)
-{
- struct amdgpu_device *adev = ring->adev;
- struct amdgpu_job *job;
- struct amdgpu_ib *ib;
- struct dma_fence *f = NULL;
- const unsigned ib_size_dw = 16;
- int i, r;
+ /* temporarily disable ib test for sriov */
+ if (amdgpu_sriov_vf(adev))
+ return 0;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+ r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &bo, NULL, NULL);
if (r)
return r;
- ib = &job->ibs[0];
-
- ib->ptr[0] = PACKETJ(adev->vcn.internal.jpeg_pitch, 0, 0, PACKETJ_TYPE0);
- ib->ptr[1] = 0xDEADBEEF;
- for (i = 2; i < 16; i += 2) {
- ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
- ib->ptr[i+1] = 0;
- }
- ib->length_dw = 16;
-
- r = amdgpu_job_submit_direct(job, ring, &f);
+ r = amdgpu_vcn_enc_get_create_msg(ring, 1, bo, NULL);
if (r)
- goto err;
-
- if (fence)
- *fence = dma_fence_get(f);
- dma_fence_put(f);
-
- return 0;
-
-err:
- amdgpu_job_free(job);
- return r;
-}
-
-int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout)
-{
- struct amdgpu_device *adev = ring->adev;
- uint32_t tmp = 0;
- unsigned i;
- struct dma_fence *fence = NULL;
- long r = 0;
+ goto error;
- r = amdgpu_vcn_jpeg_set_reg(ring, 1, &fence);
+ r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, bo, &fence);
if (r)
goto error;
r = dma_fence_wait_timeout(fence, false, timeout);
- if (r == 0) {
+ if (r == 0)
r = -ETIMEDOUT;
- goto error;
- } else if (r < 0) {
- goto error;
- } else {
+ else if (r > 0)
r = 0;
- }
-
- for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(adev->vcn.inst[ring->me].external.jpeg_pitch);
- if (tmp == 0xDEADBEEF)
- break;
- udelay(1);
- }
- if (i >= adev->usec_timeout)
- r = -ETIMEDOUT;
-
- dma_fence_put(fence);
error:
+ dma_fence_put(fence);
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index dface275c81a..d6deb0eb1e15 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -31,6 +31,7 @@
#define AMDGPU_VCN_MAX_ENC_RINGS 3
#define AMDGPU_MAX_VCN_INSTANCES 2
+#define AMDGPU_MAX_VCN_ENC_RINGS AMDGPU_VCN_MAX_ENC_RINGS * AMDGPU_MAX_VCN_INSTANCES
#define AMDGPU_VCN_HARVEST_VCN0 (1 << 0)
#define AMDGPU_VCN_HARVEST_VCN1 (1 << 1)
@@ -56,33 +57,41 @@
#define VCN_VID_IP_ADDRESS_2_0 0x0
#define VCN_AON_IP_ADDRESS_2_0 0x30000
-#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \
- ({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
- WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
+#define mmUVD_RBC_XX_IB_REG_CHECK 0x026b
+#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1
+#define mmUVD_REG_XX_MASK 0x026c
+#define mmUVD_REG_XX_MASK_BASE_IDX 1
+
+/* 1 second timeout */
+#define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000)
+
+#define RREG32_SOC15_DPG_MODE(ip, inst_idx, reg, mask, sram_sel) \
+ ({ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_MASK, mask); \
+ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_CTL, \
UVD_DPG_LMA_CTL__MASK_EN_MASK | \
- ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
+ ((adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg) \
<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
- RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); \
+ RREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_DATA); \
})
-#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \
+#define WREG32_SOC15_DPG_MODE(ip, inst_idx, reg, value, mask, sram_sel) \
do { \
- WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \
- WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
- WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
+ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_DATA, value); \
+ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_MASK, mask); \
+ WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_CTL, \
UVD_DPG_LMA_CTL__READ_WRITE_MASK | \
- ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
+ ((adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg) \
<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
} while (0)
-#define SOC15_DPG_MODE_OFFSET_2_0(ip, inst, reg) \
+#define SOC15_DPG_MODE_OFFSET_2_0(ip, inst_idx, reg) \
({ \
uint32_t internal_reg_offset, addr; \
bool video_range, aon_range; \
\
- addr = (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
+ addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \
addr <<= 2; \
video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS_2_0)) && \
((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS_2_0 + 0x2600))))); \
@@ -100,27 +109,27 @@
internal_reg_offset >>= 2; \
})
-#define RREG32_SOC15_DPG_MODE_2_0(offset, mask_en) \
- ({ \
- WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_CTL, \
- (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
- mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
- offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
- RREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_DATA); \
+#define RREG32_SOC15_DPG_MODE_2_0(inst_idx, offset, mask_en) \
+ ({ \
+ WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \
+ (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
+ mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
+ RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA); \
})
-#define WREG32_SOC15_DPG_MODE_2_0(offset, value, mask_en, indirect) \
- do { \
- if (!indirect) { \
- WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_DATA, value); \
- WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_CTL, \
- (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
- mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
- offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
- } else { \
- *adev->vcn.dpg_sram_curr_addr++ = offset; \
- *adev->vcn.dpg_sram_curr_addr++ = value; \
- } \
+#define WREG32_SOC15_DPG_MODE_2_0(inst_idx, offset, value, mask_en, indirect) \
+ do { \
+ if (!indirect) { \
+ WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA, value); \
+ WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \
+ (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
+ mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
+ } else { \
+ *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = offset; \
+ *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = value; \
+ } \
} while (0)
enum engine_status_constants {
@@ -158,7 +167,6 @@ struct amdgpu_vcn_reg{
unsigned ib_size;
unsigned gp_scratch8;
unsigned scratch9;
- unsigned jpeg_pitch;
};
struct amdgpu_vcn_inst {
@@ -168,9 +176,12 @@ struct amdgpu_vcn_inst {
void *saved_bo;
struct amdgpu_ring ring_dec;
struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS];
- struct amdgpu_ring ring_jpeg;
struct amdgpu_irq_src irq;
struct amdgpu_vcn_reg external;
+ struct amdgpu_bo *dpg_sram_bo;
+ void *dpg_sram_cpu_addr;
+ uint64_t dpg_sram_gpu_addr;
+ uint32_t *dpg_sram_curr_addr;
};
struct amdgpu_vcn {
@@ -182,18 +193,18 @@ struct amdgpu_vcn {
struct dpg_pause_state pause_state;
bool indirect_sram;
- struct amdgpu_bo *dpg_sram_bo;
- void *dpg_sram_cpu_addr;
- uint64_t dpg_sram_gpu_addr;
- uint32_t *dpg_sram_curr_addr;
uint8_t num_vcn_inst;
- struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES];
- struct amdgpu_vcn_reg internal;
+ struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES];
+ struct amdgpu_vcn_reg internal;
+ struct drm_gpu_scheduler *vcn_enc_sched[AMDGPU_MAX_VCN_ENC_RINGS];
+ struct drm_gpu_scheduler *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES];
+ uint32_t num_vcn_enc_sched;
+ uint32_t num_vcn_dec_sched;
unsigned harvest_config;
int (*pause_dpg_mode)(struct amdgpu_device *adev,
- struct dpg_pause_state *new_state);
+ int inst_idx, struct dpg_pause_state *new_state);
};
int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
@@ -209,7 +220,4 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring);
int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout);
-int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring);
-int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout);
-
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index e32ae906d797..adc813cde8e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -45,98 +45,6 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
adev->pg_flags = 0;
}
-uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
-{
- signed long r, cnt = 0;
- unsigned long flags;
- uint32_t seq;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
- struct amdgpu_ring *ring = &kiq->ring;
-
- BUG_ON(!ring->funcs->emit_rreg);
-
- spin_lock_irqsave(&kiq->ring_lock, flags);
- amdgpu_ring_alloc(ring, 32);
- amdgpu_ring_emit_rreg(ring, reg);
- amdgpu_fence_emit_polling(ring, &seq);
- amdgpu_ring_commit(ring);
- spin_unlock_irqrestore(&kiq->ring_lock, flags);
-
- r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
-
- /* don't wait anymore for gpu reset case because this way may
- * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
- * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
- * never return if we keep waiting in virt_kiq_rreg, which cause
- * gpu_recover() hang there.
- *
- * also don't wait anymore for IRQ context
- * */
- if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
- goto failed_kiq_read;
-
- might_sleep();
- while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
- msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
- r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
- }
-
- if (cnt > MAX_KIQ_REG_TRY)
- goto failed_kiq_read;
-
- return adev->wb.wb[adev->virt.reg_val_offs];
-
-failed_kiq_read:
- pr_err("failed to read reg:%x\n", reg);
- return ~0;
-}
-
-void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
-{
- signed long r, cnt = 0;
- unsigned long flags;
- uint32_t seq;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
- struct amdgpu_ring *ring = &kiq->ring;
-
- BUG_ON(!ring->funcs->emit_wreg);
-
- spin_lock_irqsave(&kiq->ring_lock, flags);
- amdgpu_ring_alloc(ring, 32);
- amdgpu_ring_emit_wreg(ring, reg, v);
- amdgpu_fence_emit_polling(ring, &seq);
- amdgpu_ring_commit(ring);
- spin_unlock_irqrestore(&kiq->ring_lock, flags);
-
- r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
-
- /* don't wait anymore for gpu reset case because this way may
- * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
- * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
- * never return if we keep waiting in virt_kiq_rreg, which cause
- * gpu_recover() hang there.
- *
- * also don't wait anymore for IRQ context
- * */
- if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
- goto failed_kiq_write;
-
- might_sleep();
- while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
-
- msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
- r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
- }
-
- if (cnt > MAX_KIQ_REG_TRY)
- goto failed_kiq_write;
-
- return;
-
-failed_kiq_write:
- pr_err("failed to write reg:%x\n", reg);
-}
-
void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
uint32_t ref, uint32_t mask)
@@ -379,54 +287,3 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
}
}
}
-
-static uint32_t parse_clk(char *buf, bool min)
-{
- char *ptr = buf;
- uint32_t clk = 0;
-
- do {
- ptr = strchr(ptr, ':');
- if (!ptr)
- break;
- ptr+=2;
- if (kstrtou32(ptr, 10, &clk))
- return 0;
- } while (!min);
-
- return clk * 100;
-}
-
-uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest)
-{
- char *buf = NULL;
- uint32_t clk = 0;
-
- buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- adev->virt.ops->get_pp_clk(adev, PP_SCLK, buf);
- clk = parse_clk(buf, lowest);
-
- kfree(buf);
-
- return clk;
-}
-
-uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest)
-{
- char *buf = NULL;
- uint32_t clk = 0;
-
- buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- adev->virt.ops->get_pp_clk(adev, PP_MCLK, buf);
- clk = parse_clk(buf, lowest);
-
- kfree(buf);
-
- return clk;
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index b0b2bdc750df..daaf909d009a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -57,8 +57,6 @@ struct amdgpu_virt_ops {
int (*reset_gpu)(struct amdgpu_device *adev);
int (*wait_reset)(struct amdgpu_device *adev);
void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3);
- int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf);
- int (*force_dpm_level)(struct amdgpu_device *adev, u32 level);
};
/*
@@ -85,8 +83,8 @@ enum AMDGIM_FEATURE_FLAG {
AMDGIM_FEATURE_GIM_LOAD_UCODES = 0x2,
/* VRAM LOST by GIM */
AMDGIM_FEATURE_GIM_FLR_VRAMLOST = 0x4,
- /* HW PERF SIM in GIM */
- AMDGIM_FEATURE_HW_PERF_SIMULATION = (1 << 3),
+ /* PP ONE VF MODE in GIM */
+ AMDGIM_FEATURE_PP_ONE_VF = (1 << 4),
};
struct amd_sriov_msg_pf2vf_info_header {
@@ -257,8 +255,6 @@ struct amdgpu_virt {
struct amdgpu_vf_error_buffer vf_errors;
struct amdgpu_virt_fw_reserve fw_reserve;
uint32_t gim_feature;
- /* protect DPM events to GIM */
- struct mutex dpm_mutex;
uint32_t reg_access_mode;
};
@@ -286,13 +282,11 @@ static inline bool is_virtual_machine(void)
#endif
}
-#define amdgim_is_hwperf(adev) \
- ((adev)->virt.gim_feature & AMDGIM_FEATURE_HW_PERF_SIMULATION)
+#define amdgpu_sriov_is_pp_one_vf(adev) \
+ ((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
void amdgpu_virt_init_setting(struct amdgpu_device *adev);
-uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);
-void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t rreg1,
uint32_t ref, uint32_t mask);
@@ -306,6 +300,4 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size,
unsigned int key,
unsigned int chksum);
void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
-uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest);
-uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 5251352f5922..d16231d6a790 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -83,6 +83,32 @@ struct amdgpu_prt_cb {
};
/**
+ * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
+ * happens while holding this lock anywhere to prevent deadlocks when
+ * an MMU notifier runs in reclaim-FS context.
+ */
+static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm)
+{
+ mutex_lock(&vm->eviction_lock);
+ vm->saved_flags = memalloc_nofs_save();
+}
+
+static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm)
+{
+ if (mutex_trylock(&vm->eviction_lock)) {
+ vm->saved_flags = memalloc_nofs_save();
+ return 1;
+ }
+ return 0;
+}
+
+static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
+{
+ memalloc_nofs_restore(vm->saved_flags);
+ mutex_unlock(&vm->eviction_lock);
+}
+
+/**
* amdgpu_vm_level_shift - return the addr shift for each level
*
* @adev: amdgpu_device pointer
@@ -130,7 +156,8 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
if (level == adev->vm_manager.root_level)
/* For the root directory */
- return round_up(adev->vm_manager.max_pfn, 1ULL << shift) >> shift;
+ return round_up(adev->vm_manager.max_pfn, 1ULL << shift)
+ >> shift;
else if (level != AMDGPU_VM_PTB)
/* Everything in between */
return 512;
@@ -341,7 +368,7 @@ static struct amdgpu_vm_pt *amdgpu_vm_pt_parent(struct amdgpu_vm_pt *pt)
return container_of(parent->vm_bo, struct amdgpu_vm_pt, base);
}
-/**
+/*
* amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt
*/
struct amdgpu_vm_pt_cursor {
@@ -482,6 +509,7 @@ static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
*
* @adev: amdgpu_device structure
* @vm: amdgpu_vm structure
+ * @start: optional cursor to start with
* @cursor: state to initialize
*
* Starts a deep first traversal of the PD/PT tree.
@@ -535,7 +563,7 @@ static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev,
amdgpu_vm_pt_ancestor(cursor);
}
-/**
+/*
* for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs
*/
#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \
@@ -560,12 +588,20 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
{
entry->priority = 0;
entry->tv.bo = &vm->root.base.bo->tbo;
- /* One for the VM updates, one for TTM and one for the CS job */
- entry->tv.num_shared = 3;
+ /* One for TTM and one for the CS job */
+ entry->tv.num_shared = 2;
entry->user_pages = NULL;
list_add(&entry->tv.head, validated);
}
+/**
+ * amdgpu_vm_del_from_lru_notify - update bulk_moveable flag
+ *
+ * @bo: BO which was removed from the LRU
+ *
+ * Make sure the bulk_moveable flag is updated when a BO is removed from the
+ * LRU.
+ */
void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)
{
struct amdgpu_bo *abo;
@@ -600,19 +636,18 @@ void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)
void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
{
- struct ttm_bo_global *glob = adev->mman.bdev.glob;
struct amdgpu_vm_bo_base *bo_base;
if (vm->bulk_moveable) {
- spin_lock(&glob->lru_lock);
+ spin_lock(&ttm_bo_glob.lru_lock);
ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&ttm_bo_glob.lru_lock);
return;
}
memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
- spin_lock(&glob->lru_lock);
+ spin_lock(&ttm_bo_glob.lru_lock);
list_for_each_entry(bo_base, &vm->idle, vm_status) {
struct amdgpu_bo *bo = bo_base->bo;
@@ -624,7 +659,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
ttm_bo_move_to_lru_tail(&bo->shadow->tbo,
&vm->lru_bulk_move);
}
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&ttm_bo_glob.lru_lock);
vm->bulk_moveable = true;
}
@@ -647,7 +682,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
void *param)
{
struct amdgpu_vm_bo_base *bo_base, *tmp;
- int r = 0;
+ int r;
vm->bulk_moveable &= list_empty(&vm->evicted);
@@ -656,7 +691,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
r = validate(param, bo);
if (r)
- break;
+ return r;
if (bo->tbo.type != ttm_bo_type_kernel) {
amdgpu_vm_bo_moved(bo_base);
@@ -669,7 +704,11 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
}
}
- return r;
+ amdgpu_vm_eviction_lock(vm);
+ vm->evicting = false;
+ amdgpu_vm_eviction_unlock(vm);
+
+ return 0;
}
/**
@@ -693,6 +732,7 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
* @adev: amdgpu_device pointer
* @vm: VM to clear BO from
* @bo: BO to clear
+ * @direct: use a direct update
*
* Root PD needs to be reserved when calling this.
*
@@ -701,7 +741,8 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
*/
static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
- struct amdgpu_bo *bo)
+ struct amdgpu_bo *bo,
+ bool direct)
{
struct ttm_operation_ctx ctx = { true, false };
unsigned level = adev->vm_manager.root_level;
@@ -760,6 +801,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
memset(&params, 0, sizeof(params));
params.adev = adev;
params.vm = vm;
+ params.direct = direct;
r = vm->update_funcs->prepare(&params, AMDGPU_FENCE_OWNER_KFD, NULL);
if (r)
@@ -813,10 +855,13 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
*
* @adev: amdgpu_device pointer
* @vm: requesting vm
+ * @level: the page table level
+ * @direct: use a direct update
* @bp: resulting BO allocation parameters
*/
static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- int level, struct amdgpu_bo_param *bp)
+ int level, bool direct,
+ struct amdgpu_bo_param *bp)
{
memset(bp, 0, sizeof(*bp));
@@ -831,6 +876,7 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
else if (!vm->root.base.bo || vm->root.base.bo->shadow)
bp->flags |= AMDGPU_GEM_CREATE_SHADOW;
bp->type = ttm_bo_type_kernel;
+ bp->no_wait_gpu = direct;
if (vm->root.base.bo)
bp->resv = vm->root.base.bo->tbo.base.resv;
}
@@ -841,6 +887,7 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
* @adev: amdgpu_device pointer
* @vm: VM to allocate page tables for
* @cursor: Which page table to allocate
+ * @direct: use a direct update
*
* Make sure a specific page table or directory is allocated.
*
@@ -850,7 +897,8 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
*/
static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
- struct amdgpu_vm_pt_cursor *cursor)
+ struct amdgpu_vm_pt_cursor *cursor,
+ bool direct)
{
struct amdgpu_vm_pt *entry = cursor->entry;
struct amdgpu_bo_param bp;
@@ -871,7 +919,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
if (entry->base.bo)
return 0;
- amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
+ amdgpu_vm_bo_param(adev, vm, cursor->level, direct, &bp);
r = amdgpu_bo_create(adev, &bp, &pt);
if (r)
@@ -883,7 +931,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
amdgpu_vm_bo_base_init(&entry->base, vm, pt);
- r = amdgpu_vm_clear_bo(adev, vm, pt);
+ r = amdgpu_vm_clear_bo(adev, vm, pt, direct);
if (r)
goto error_free_pt;
@@ -1020,7 +1068,8 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
* Returns:
* 0 on success, errno otherwise.
*/
-int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync)
+int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
+ bool need_pipe_sync)
{
struct amdgpu_device *adev = ring->adev;
unsigned vmhub = ring->funcs->vmhub;
@@ -1034,10 +1083,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
id->oa_base != job->oa_base ||
id->oa_size != job->oa_size);
bool vm_flush_needed = job->vm_needs_flush;
- bool pasid_mapping_needed = id->pasid != job->pasid ||
- !id->pasid_mapping ||
- !dma_fence_is_signaled(id->pasid_mapping);
struct dma_fence *fence = NULL;
+ bool pasid_mapping_needed = false;
unsigned patch_offset = 0;
int r;
@@ -1047,6 +1094,12 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
pasid_mapping_needed = true;
}
+ mutex_lock(&id_mgr->lock);
+ if (id->pasid != job->pasid || !id->pasid_mapping ||
+ !dma_fence_is_signaled(id->pasid_mapping))
+ pasid_mapping_needed = true;
+ mutex_unlock(&id_mgr->lock);
+
gds_switch_needed &= !!ring->funcs->emit_gds_switch;
vm_flush_needed &= !!ring->funcs->emit_vm_flush &&
job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
@@ -1086,9 +1139,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
}
if (pasid_mapping_needed) {
+ mutex_lock(&id_mgr->lock);
id->pasid = job->pasid;
dma_fence_put(id->pasid_mapping);
id->pasid_mapping = dma_fence_get(fence);
+ mutex_unlock(&id_mgr->lock);
}
dma_fence_put(fence);
@@ -1172,10 +1227,10 @@ uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
return result;
}
-/*
+/**
* amdgpu_vm_update_pde - update a single level in the hierarchy
*
- * @param: parameters for the update
+ * @params: parameters for the update
* @vm: requested vm
* @entry: entry to update
*
@@ -1199,7 +1254,7 @@ static int amdgpu_vm_update_pde(struct amdgpu_vm_update_params *params,
return vm->update_funcs->update(params, bo, pde, pt, 1, 0, flags);
}
-/*
+/**
* amdgpu_vm_invalidate_pds - mark all PDs as invalid
*
* @adev: amdgpu_device pointer
@@ -1218,19 +1273,20 @@ static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev,
amdgpu_vm_bo_relocated(&entry->base);
}
-/*
- * amdgpu_vm_update_directories - make sure that all directories are valid
+/**
+ * amdgpu_vm_update_pdes - make sure that all directories are valid
*
* @adev: amdgpu_device pointer
* @vm: requested vm
+ * @direct: submit directly to the paging queue
*
* Makes sure all directories are up to date.
*
* Returns:
* 0 for success, error for failure.
*/
-int amdgpu_vm_update_directories(struct amdgpu_device *adev,
- struct amdgpu_vm *vm)
+int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm, bool direct)
{
struct amdgpu_vm_update_params params;
int r;
@@ -1241,6 +1297,7 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
memset(&params, 0, sizeof(params));
params.adev = adev;
params.vm = vm;
+ params.direct = direct;
r = vm->update_funcs->prepare(&params, AMDGPU_FENCE_OWNER_VM, NULL);
if (r)
@@ -1268,7 +1325,7 @@ error:
return r;
}
-/**
+/*
* amdgpu_vm_update_flags - figure out flags for PTE updates
*
* Make sure to set the right flags for the PTEs at the desired level.
@@ -1391,7 +1448,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
uint64_t incr, entry_end, pe_start;
struct amdgpu_bo *pt;
- r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor);
+ /* make sure that the page tables covering the address range are
+ * actually allocated
+ */
+ r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor,
+ params->direct);
if (r)
return r;
@@ -1463,7 +1524,12 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
} while (frag_start < entry_end);
if (amdgpu_vm_pt_descendant(adev, &cursor)) {
- /* Free all child entries */
+ /* Free all child entries.
+ * Update the tables with the flags and addresses and free up subsequent
+ * tables in the case of huge pages or freed up areas.
+ * This is the maximum you can free, because all other page tables are not
+ * completely covered by the range and so potentially still in use.
+ */
while (cursor.pfn < frag_start) {
amdgpu_vm_free_pts(adev, params->vm, &cursor);
amdgpu_vm_pt_next(adev, &cursor);
@@ -1482,13 +1548,14 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
* amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
*
* @adev: amdgpu_device pointer
- * @exclusive: fence we need to sync to
- * @pages_addr: DMA addresses to use for mapping
* @vm: requested vm
+ * @direct: direct submission in a page fault
+ * @exclusive: fence we need to sync to
* @start: start of mapped range
* @last: last mapped entry
* @flags: flags for the entries
* @addr: addr to set the area to
+ * @pages_addr: DMA addresses to use for mapping
* @fence: optional resulting fence
*
* Fill in the page table entries between @start and @last.
@@ -1497,11 +1564,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
* 0 for success, -EINVAL for failure.
*/
static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm, bool direct,
struct dma_fence *exclusive,
- dma_addr_t *pages_addr,
- struct amdgpu_vm *vm,
uint64_t start, uint64_t last,
uint64_t flags, uint64_t addr,
+ dma_addr_t *pages_addr,
struct dma_fence **fence)
{
struct amdgpu_vm_update_params params;
@@ -1511,21 +1578,32 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
memset(&params, 0, sizeof(params));
params.adev = adev;
params.vm = vm;
+ params.direct = direct;
params.pages_addr = pages_addr;
/* sync to everything except eviction fences on unmapping */
if (!(flags & AMDGPU_PTE_VALID))
owner = AMDGPU_FENCE_OWNER_KFD;
+ amdgpu_vm_eviction_lock(vm);
+ if (vm->evicting) {
+ r = -EBUSY;
+ goto error_unlock;
+ }
+
r = vm->update_funcs->prepare(&params, owner, exclusive);
if (r)
- return r;
+ goto error_unlock;
r = amdgpu_vm_update_ptes(&params, start, last + 1, addr, flags);
if (r)
- return r;
+ goto error_unlock;
- return vm->update_funcs->commit(&params, fence);
+ r = vm->update_funcs->commit(&params, fence);
+
+error_unlock:
+ amdgpu_vm_eviction_unlock(vm);
+ return r;
}
/**
@@ -1569,27 +1647,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
flags &= ~AMDGPU_PTE_WRITEABLE;
- flags &= ~AMDGPU_PTE_EXECUTABLE;
- flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
-
- if (adev->asic_type >= CHIP_NAVI10) {
- flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;
- flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);
- } else {
- flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
- flags |= (mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK);
- }
-
- if ((mapping->flags & AMDGPU_PTE_PRT) &&
- (adev->asic_type >= CHIP_VEGA10)) {
- flags |= AMDGPU_PTE_PRT;
- if (adev->asic_type >= CHIP_NAVI10) {
- flags |= AMDGPU_PTE_SNOOPED;
- flags |= AMDGPU_PTE_LOG;
- flags |= AMDGPU_PTE_SYSTEM;
- }
- flags &= ~AMDGPU_PTE_VALID;
- }
+ /* Apply ASIC specific mapping flags */
+ amdgpu_gmc_get_vm_pte(adev, mapping, &flags);
trace_amdgpu_vm_bo_update(mapping);
@@ -1633,7 +1692,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
dma_addr = pages_addr;
} else {
addr = pages_addr[pfn];
- max_entries = count * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
+ max_entries = count *
+ AMDGPU_GPU_PAGES_IN_CPU_PAGE;
}
} else if (flags & AMDGPU_PTE_VALID) {
@@ -1642,9 +1702,9 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
}
last = min((uint64_t)mapping->last, start + max_entries - 1);
- r = amdgpu_vm_bo_update_mapping(adev, exclusive, dma_addr, vm,
+ r = amdgpu_vm_bo_update_mapping(adev, vm, false, exclusive,
start, last, flags, addr,
- fence);
+ dma_addr, fence);
if (r)
return r;
@@ -1672,8 +1732,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
* Returns:
* 0 for success, -EINVAL for failure.
*/
-int amdgpu_vm_bo_update(struct amdgpu_device *adev,
- struct amdgpu_bo_va *bo_va,
+int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
bool clear)
{
struct amdgpu_bo *bo = bo_va->base.bo;
@@ -1700,7 +1759,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm);
pages_addr = ttm->dma_address;
}
- exclusive = dma_resv_get_excl(bo->tbo.base.resv);
+ exclusive = bo->tbo.moving;
}
if (bo) {
@@ -1731,12 +1790,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
return r;
}
- if (vm->use_cpu_for_update) {
- /* Flush HDP */
- mb();
- amdgpu_asic_flush_hdp(adev, NULL);
- }
-
/* If the BO is not in its preferred location add it back to
* the evicted list so that it gets validated again on the
* next command submission.
@@ -1744,7 +1797,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
if (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) {
uint32_t mem_type = bo->tbo.mem.mem_type;
- if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type)))
+ if (!(bo->preferred_domains &
+ amdgpu_mem_type_to_domain(mem_type)))
amdgpu_vm_bo_evicted(&bo_va->base);
else
amdgpu_vm_bo_idle(&bo_va->base);
@@ -1938,9 +1992,9 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
mapping->start < AMDGPU_GMC_HOLE_START)
init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
- r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
+ r = amdgpu_vm_bo_update_mapping(adev, vm, false, NULL,
mapping->start, mapping->last,
- init_pte_value, 0, &f);
+ init_pte_value, 0, NULL, &f);
amdgpu_vm_free_mapping(adev, vm, mapping, f);
if (r) {
dma_fence_put(f);
@@ -2486,6 +2540,41 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
}
/**
+ * amdgpu_vm_evictable - check if we can evict a VM
+ *
+ * @bo: A page table of the VM.
+ *
+ * Check if it is possible to evict a VM.
+ */
+bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
+{
+ struct amdgpu_vm_bo_base *bo_base = bo->vm_bo;
+
+ /* Page tables of a destroyed VM can go away immediately */
+ if (!bo_base || !bo_base->vm)
+ return true;
+
+ /* Don't evict VM page tables while they are busy */
+ if (!dma_resv_test_signaled_rcu(bo->tbo.base.resv, true))
+ return false;
+
+ /* Try to block ongoing updates */
+ if (!amdgpu_vm_eviction_trylock(bo_base->vm))
+ return false;
+
+ /* Don't evict VM page tables while they are updated */
+ if (!dma_fence_is_signaled(bo_base->vm->last_direct) ||
+ !dma_fence_is_signaled(bo_base->vm->last_delayed)) {
+ amdgpu_vm_eviction_unlock(bo_base->vm);
+ return false;
+ }
+
+ bo_base->vm->evicting = true;
+ amdgpu_vm_eviction_unlock(bo_base->vm);
+ return true;
+}
+
+/**
* amdgpu_vm_bo_invalidate - mark the bo as invalid
*
* @adev: amdgpu_device pointer
@@ -2647,8 +2736,16 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
*/
long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
{
- return dma_resv_wait_timeout_rcu(vm->root.base.bo->tbo.base.resv,
- true, true, timeout);
+ timeout = dma_resv_wait_timeout_rcu(vm->root.base.bo->tbo.base.resv,
+ true, true, timeout);
+ if (timeout <= 0)
+ return timeout;
+
+ timeout = dma_fence_wait_timeout(vm->last_direct, true, timeout);
+ if (timeout <= 0)
+ return timeout;
+
+ return dma_fence_wait_timeout(vm->last_delayed, true, timeout);
}
/**
@@ -2682,13 +2779,22 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
spin_lock_init(&vm->invalidated_lock);
INIT_LIST_HEAD(&vm->freed);
- /* create scheduler entity for page table updates */
- r = drm_sched_entity_init(&vm->entity, adev->vm_manager.vm_pte_rqs,
- adev->vm_manager.vm_pte_num_rqs, NULL);
+
+ /* create scheduler entities for page table updates */
+ r = drm_sched_entity_init(&vm->direct, DRM_SCHED_PRIORITY_NORMAL,
+ adev->vm_manager.vm_pte_scheds,
+ adev->vm_manager.vm_pte_num_scheds, NULL);
if (r)
return r;
+ r = drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL,
+ adev->vm_manager.vm_pte_scheds,
+ adev->vm_manager.vm_pte_num_scheds, NULL);
+ if (r)
+ goto error_free_direct;
+
vm->pte_support_ats = false;
+ vm->is_compute_context = false;
if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
@@ -2702,7 +2808,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
}
DRM_DEBUG_DRIVER("VM update mode is %s\n",
vm->use_cpu_for_update ? "CPU" : "SDMA");
- WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)),
+ WARN_ONCE((vm->use_cpu_for_update &&
+ !amdgpu_gmc_vram_full_visible(&adev->gmc)),
"CPU update of VM recommended only for large BAR system\n");
if (vm->use_cpu_for_update)
@@ -2710,13 +2817,18 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
else
vm->update_funcs = &amdgpu_vm_sdma_funcs;
vm->last_update = NULL;
+ vm->last_direct = dma_fence_get_stub();
+ vm->last_delayed = dma_fence_get_stub();
+
+ mutex_init(&vm->eviction_lock);
+ vm->evicting = false;
- amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, &bp);
+ amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, &bp);
if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW;
r = amdgpu_bo_create(adev, &bp, &root);
if (r)
- goto error_free_sched_entity;
+ goto error_free_delayed;
r = amdgpu_bo_reserve(root, true);
if (r)
@@ -2728,7 +2840,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
amdgpu_vm_bo_base_init(&vm->root.base, vm, root);
- r = amdgpu_vm_clear_bo(adev, vm, root);
+ r = amdgpu_vm_clear_bo(adev, vm, root, false);
if (r)
goto error_unreserve;
@@ -2759,8 +2871,13 @@ error_free_root:
amdgpu_bo_unref(&vm->root.base.bo);
vm->root.base.bo = NULL;
-error_free_sched_entity:
- drm_sched_entity_destroy(&vm->entity);
+error_free_delayed:
+ dma_fence_put(vm->last_direct);
+ dma_fence_put(vm->last_delayed);
+ drm_sched_entity_destroy(&vm->delayed);
+
+error_free_direct:
+ drm_sched_entity_destroy(&vm->direct);
return r;
}
@@ -2801,6 +2918,7 @@ static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev,
*
* @adev: amdgpu_device pointer
* @vm: requested vm
+ * @pasid: pasid to use
*
* This only works on GFX VMs that don't have any BOs added and no
* page tables allocated yet.
@@ -2816,7 +2934,8 @@ static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev,
* Returns:
* 0 for success, -errno for errors.
*/
-int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid)
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ unsigned int pasid)
{
bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
int r;
@@ -2848,7 +2967,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns
*/
if (pte_support_ats != vm->pte_support_ats) {
vm->pte_support_ats = pte_support_ats;
- r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo);
+ r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, false);
if (r)
goto free_idr;
}
@@ -2858,7 +2977,8 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns
AMDGPU_VM_USE_CPU_FOR_COMPUTE);
DRM_DEBUG_DRIVER("VM update mode is %s\n",
vm->use_cpu_for_update ? "CPU" : "SDMA");
- WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)),
+ WARN_ONCE((vm->use_cpu_for_update &&
+ !amdgpu_gmc_vram_full_visible(&adev->gmc)),
"CPU update of VM recommended only for large BAR system\n");
if (vm->use_cpu_for_update)
@@ -2867,6 +2987,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns
vm->update_funcs = &amdgpu_vm_sdma_funcs;
dma_fence_put(vm->last_update);
vm->last_update = NULL;
+ vm->is_compute_context = true;
if (vm->pasid) {
unsigned long flags;
@@ -2921,6 +3042,7 @@ void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
}
vm->pasid = 0;
+ vm->is_compute_context = false;
}
/**
@@ -2937,31 +3059,26 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
struct amdgpu_bo_va_mapping *mapping, *tmp;
bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
struct amdgpu_bo *root;
- int i, r;
+ int i;
amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
+ root = amdgpu_bo_ref(vm->root.base.bo);
+ amdgpu_bo_reserve(root, true);
if (vm->pasid) {
unsigned long flags;
spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
+ vm->pasid = 0;
}
- drm_sched_entity_destroy(&vm->entity);
+ dma_fence_wait(vm->last_direct, false);
+ dma_fence_put(vm->last_direct);
+ dma_fence_wait(vm->last_delayed, false);
+ dma_fence_put(vm->last_delayed);
- if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
- dev_err(adev->dev, "still active bo inside vm\n");
- }
- rbtree_postorder_for_each_entry_safe(mapping, tmp,
- &vm->va.rb_root, rb) {
- /* Don't remove the mapping here, we don't want to trigger a
- * rebalance and the tree is about to be destroyed anyway.
- */
- list_del(&mapping->list);
- kfree(mapping);
- }
list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
amdgpu_vm_prt_fini(adev, vm);
@@ -2972,16 +3089,26 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
}
- root = amdgpu_bo_ref(vm->root.base.bo);
- r = amdgpu_bo_reserve(root, true);
- if (r) {
- dev_err(adev->dev, "Leaking page tables because BO reservation failed\n");
- } else {
- amdgpu_vm_free_pts(adev, vm, NULL);
- amdgpu_bo_unreserve(root);
- }
+ amdgpu_vm_free_pts(adev, vm, NULL);
+ amdgpu_bo_unreserve(root);
amdgpu_bo_unref(&root);
WARN_ON(vm->root.base.bo);
+
+ drm_sched_entity_destroy(&vm->direct);
+ drm_sched_entity_destroy(&vm->delayed);
+
+ if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
+ dev_err(adev->dev, "still active bo inside vm\n");
+ }
+ rbtree_postorder_for_each_entry_safe(mapping, tmp,
+ &vm->va.rb_root, rb) {
+ /* Don't remove the mapping here, we don't want to trigger a
+ * rebalance and the tree is about to be destroyed anyway.
+ */
+ list_del(&mapping->list);
+ kfree(mapping);
+ }
+
dma_fence_put(vm->last_update);
for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
amdgpu_vmid_free_reserved(adev, vm, i);
@@ -3065,8 +3192,9 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
switch (args->in.op) {
case AMDGPU_VM_OP_RESERVE_VMID:
- /* current, we only have requirement to reserve vmid from gfxhub */
- r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0);
+ /* We only have requirement to reserve vmid from gfxhub */
+ r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm,
+ AMDGPU_GFXHUB_0);
if (r)
return r;
break;
@@ -3109,13 +3237,97 @@ void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
*/
void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
{
- if (!vm->task_info.pid) {
- vm->task_info.pid = current->pid;
- get_task_comm(vm->task_info.task_name, current);
+ if (vm->task_info.pid)
+ return;
- if (current->group_leader->mm == current->mm) {
- vm->task_info.tgid = current->group_leader->pid;
- get_task_comm(vm->task_info.process_name, current->group_leader);
- }
+ vm->task_info.pid = current->pid;
+ get_task_comm(vm->task_info.task_name, current);
+
+ if (current->group_leader->mm != current->mm)
+ return;
+
+ vm->task_info.tgid = current->group_leader->pid;
+ get_task_comm(vm->task_info.process_name, current->group_leader);
+}
+
+/**
+ * amdgpu_vm_handle_fault - graceful handling of VM faults.
+ * @adev: amdgpu device pointer
+ * @pasid: PASID of the VM
+ * @addr: Address of the fault
+ *
+ * Try to gracefully handle a VM fault. Return true if the fault was handled and
+ * shouldn't be reported any more.
+ */
+bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
+ uint64_t addr)
+{
+ struct amdgpu_bo *root;
+ uint64_t value, flags;
+ struct amdgpu_vm *vm;
+ long r;
+
+ spin_lock(&adev->vm_manager.pasid_lock);
+ vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+ if (vm)
+ root = amdgpu_bo_ref(vm->root.base.bo);
+ else
+ root = NULL;
+ spin_unlock(&adev->vm_manager.pasid_lock);
+
+ if (!root)
+ return false;
+
+ r = amdgpu_bo_reserve(root, true);
+ if (r)
+ goto error_unref;
+
+ /* Double check that the VM still exists */
+ spin_lock(&adev->vm_manager.pasid_lock);
+ vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+ if (vm && vm->root.base.bo != root)
+ vm = NULL;
+ spin_unlock(&adev->vm_manager.pasid_lock);
+ if (!vm)
+ goto error_unlock;
+
+ addr /= AMDGPU_GPU_PAGE_SIZE;
+ flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
+ AMDGPU_PTE_SYSTEM;
+
+ if (vm->is_compute_context) {
+ /* Intentionally setting invalid PTE flag
+ * combination to force a no-retry-fault
+ */
+ flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE |
+ AMDGPU_PTE_TF;
+ value = 0;
+
+ } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
+ /* Redirect the access to the dummy page */
+ value = adev->dummy_page_addr;
+ flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE |
+ AMDGPU_PTE_WRITEABLE;
+
+ } else {
+ /* Let the hw retry silently on the PTE */
+ value = 0;
}
+
+ r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1,
+ flags, value, NULL, NULL);
+ if (r)
+ goto error_unlock;
+
+ r = amdgpu_vm_update_pdes(adev, vm, true);
+
+error_unlock:
+ amdgpu_bo_unreserve(root);
+ if (r < 0)
+ DRM_ERROR("Can't handle page fault (%ld)\n", r);
+
+error_unref:
+ amdgpu_bo_unref(&root);
+
+ return false;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 2eda3a8c330d..b4640ab38c95 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -30,6 +30,7 @@
#include <drm/gpu_scheduler.h>
#include <drm/drm_file.h>
#include <drm/ttm/ttm_bo_driver.h>
+#include <linux/sched/mm.h>
#include "amdgpu_sync.h"
#include "amdgpu_ring.h"
@@ -99,6 +100,9 @@ struct amdgpu_bo_list_entry;
#define AMDGPU_VM_FAULT_STOP_FIRST 1
#define AMDGPU_VM_FAULT_STOP_ALWAYS 2
+/* Reserve 4MB VRAM for page tables */
+#define AMDGPU_VM_RESERVED_VRAM (4ULL << 20)
+
/* max number of VMHUB */
#define AMDGPU_MAX_VMHUBS 3
#define AMDGPU_GFXHUB_0 0
@@ -199,6 +203,11 @@ struct amdgpu_vm_update_params {
struct amdgpu_vm *vm;
/**
+ * @direct: if changes should be made directly
+ */
+ bool direct;
+
+ /**
* @pages_addr:
*
* DMA addresses to use for mapping
@@ -231,6 +240,13 @@ struct amdgpu_vm {
/* tree of virtual addresses mapped */
struct rb_root_cached va;
+ /* Lock to prevent eviction while we are updating page tables
+ * use vm_eviction_lock/unlock(vm)
+ */
+ struct mutex eviction_lock;
+ bool evicting;
+ unsigned int saved_flags;
+
/* BOs who needs a validation */
struct list_head evicted;
@@ -254,8 +270,13 @@ struct amdgpu_vm {
struct amdgpu_vm_pt root;
struct dma_fence *last_update;
- /* Scheduler entity for page table updates */
- struct drm_sched_entity entity;
+ /* Scheduler entities for page table updates */
+ struct drm_sched_entity direct;
+ struct drm_sched_entity delayed;
+
+ /* Last submission to the scheduler entities */
+ struct dma_fence *last_direct;
+ struct dma_fence *last_delayed;
unsigned int pasid;
/* dedicated to vm */
@@ -289,6 +310,8 @@ struct amdgpu_vm {
struct ttm_lru_bulk_move lru_bulk_move;
/* mark whether can do the bulk move */
bool bulk_moveable;
+ /* Flag to indicate if VM is used for compute */
+ bool is_compute_context;
};
struct amdgpu_vm_manager {
@@ -308,8 +331,8 @@ struct amdgpu_vm_manager {
u64 vram_base_offset;
/* vm pte handling */
const struct amdgpu_vm_pte_funcs *vm_pte_funcs;
- struct drm_sched_rq *vm_pte_rqs[AMDGPU_MAX_RINGS];
- unsigned vm_pte_num_rqs;
+ struct drm_gpu_scheduler *vm_pte_scheds[AMDGPU_MAX_RINGS];
+ unsigned vm_pte_num_scheds;
struct amdgpu_ring *page_fault;
/* partial resident texture handling */
@@ -357,8 +380,8 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int (*callback)(void *p, struct amdgpu_bo *bo),
void *param);
int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
-int amdgpu_vm_update_directories(struct amdgpu_device *adev,
- struct amdgpu_vm *vm);
+int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm, bool direct);
int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct dma_fence **fence);
@@ -367,6 +390,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
bool clear);
+bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
struct amdgpu_bo *bo, bool evicted);
uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
@@ -404,6 +428,8 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
struct amdgpu_task_info *task_info);
+bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
+ uint64_t addr);
void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
index 5222d165abfc..73fec7a0ced5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
@@ -49,13 +49,6 @@ static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p, void *owner,
{
int r;
- /* Wait for PT BOs to be idle. PTs share the same resv. object
- * as the root PD BO
- */
- r = amdgpu_bo_sync_wait(p->vm->root.base.bo, owner, true);
- if (unlikely(r))
- return r;
-
/* Wait for any BO move to be completed */
if (exclusive) {
r = dma_fence_wait(exclusive, true);
@@ -63,7 +56,14 @@ static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p, void *owner,
return r;
}
- return 0;
+ /* Don't wait for submissions during page fault */
+ if (p->direct)
+ return 0;
+
+ /* Wait for PT BOs to be idle. PTs share the same resv. object
+ * as the root PD BO
+ */
+ return amdgpu_bo_sync_wait(p->vm->root.base.bo, owner, true);
}
/**
@@ -89,7 +89,7 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p,
pe += (unsigned long)amdgpu_bo_kptr(bo);
- trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
+ trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->direct);
for (i = 0; i < count; i++) {
value = p->pages_addr ?
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index 61fc584cbb1a..19b7f80758f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -68,17 +68,19 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
if (r)
return r;
- r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false);
- if (r)
- return r;
+ p->num_dw_left = ndw;
- r = amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.base.resv,
- owner, false);
+ /* Wait for moves to be completed */
+ r = amdgpu_sync_fence(&p->job->sync, exclusive, false);
if (r)
return r;
- p->num_dw_left = ndw;
- return 0;
+ /* Don't wait for any submissions during page fault handling */
+ if (p->direct)
+ return 0;
+
+ return amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.base.resv,
+ owner, false);
}
/**
@@ -93,24 +95,30 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
struct dma_fence **fence)
{
- struct amdgpu_bo *root = p->vm->root.base.bo;
struct amdgpu_ib *ib = p->job->ibs;
+ struct drm_sched_entity *entity;
+ struct dma_fence *f, *tmp;
struct amdgpu_ring *ring;
- struct dma_fence *f;
int r;
- ring = container_of(p->vm->entity.rq->sched, struct amdgpu_ring, sched);
+ entity = p->direct ? &p->vm->direct : &p->vm->delayed;
+ ring = container_of(entity->rq->sched, struct amdgpu_ring, sched);
WARN_ON(ib->length_dw == 0);
amdgpu_ring_pad_ib(ring, ib);
WARN_ON(ib->length_dw > p->num_dw_left);
- r = amdgpu_job_submit(p->job, &p->vm->entity,
- AMDGPU_FENCE_OWNER_VM, &f);
+ r = amdgpu_job_submit(p->job, entity, AMDGPU_FENCE_OWNER_VM, &f);
if (r)
goto error;
- amdgpu_bo_fence(root, f, true);
- if (fence)
+ tmp = dma_fence_get(f);
+ if (p->direct)
+ swap(p->vm->last_direct, tmp);
+ else
+ swap(p->vm->last_delayed, tmp);
+ dma_fence_put(tmp);
+
+ if (fence && !p->direct)
swap(*fence, f);
dma_fence_put(f);
return 0;
@@ -120,7 +128,6 @@ error:
return r;
}
-
/**
* amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping
*
@@ -141,7 +148,7 @@ static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p,
src += p->num_dw_left * 4;
pe += amdgpu_bo_gpu_offset(bo);
- trace_amdgpu_vm_copy_ptes(pe, src, count);
+ trace_amdgpu_vm_copy_ptes(pe, src, count, p->direct);
amdgpu_vm_copy_pte(p->adev, ib, pe, src, count);
}
@@ -168,7 +175,7 @@ static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p,
struct amdgpu_ib *ib = p->job->ibs;
pe += amdgpu_bo_gpu_offset(bo);
- trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
+ trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->direct);
if (count < 3) {
amdgpu_vm_write_pte(p->adev, ib, pe, addr | flags,
count, incr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 3a9d8c15fe9f..82a3299e53c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -23,6 +23,9 @@
*/
#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_atomfirmware.h"
+#include "atom.h"
struct amdgpu_vram_mgr {
struct drm_mm mm;
@@ -101,6 +104,39 @@ static ssize_t amdgpu_mem_info_vis_vram_used_show(struct device *dev,
amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]));
}
+static ssize_t amdgpu_mem_info_vram_vendor(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+
+ switch (adev->gmc.vram_vendor) {
+ case SAMSUNG:
+ return snprintf(buf, PAGE_SIZE, "samsung\n");
+ case INFINEON:
+ return snprintf(buf, PAGE_SIZE, "infineon\n");
+ case ELPIDA:
+ return snprintf(buf, PAGE_SIZE, "elpida\n");
+ case ETRON:
+ return snprintf(buf, PAGE_SIZE, "etron\n");
+ case NANYA:
+ return snprintf(buf, PAGE_SIZE, "nanya\n");
+ case HYNIX:
+ return snprintf(buf, PAGE_SIZE, "hynix\n");
+ case MOSEL:
+ return snprintf(buf, PAGE_SIZE, "mosel\n");
+ case WINBOND:
+ return snprintf(buf, PAGE_SIZE, "winbond\n");
+ case ESMT:
+ return snprintf(buf, PAGE_SIZE, "esmt\n");
+ case MICRON:
+ return snprintf(buf, PAGE_SIZE, "micron\n");
+ default:
+ return snprintf(buf, PAGE_SIZE, "unknown\n");
+ }
+}
+
static DEVICE_ATTR(mem_info_vram_total, S_IRUGO,
amdgpu_mem_info_vram_total_show, NULL);
static DEVICE_ATTR(mem_info_vis_vram_total, S_IRUGO,
@@ -109,6 +145,8 @@ static DEVICE_ATTR(mem_info_vram_used, S_IRUGO,
amdgpu_mem_info_vram_used_show, NULL);
static DEVICE_ATTR(mem_info_vis_vram_used, S_IRUGO,
amdgpu_mem_info_vis_vram_used_show, NULL);
+static DEVICE_ATTR(mem_info_vram_vendor, S_IRUGO,
+ amdgpu_mem_info_vram_vendor, NULL);
/**
* amdgpu_vram_mgr_init - init VRAM manager and DRM MM
@@ -154,6 +192,11 @@ static int amdgpu_vram_mgr_init(struct ttm_mem_type_manager *man,
DRM_ERROR("Failed to create device file mem_info_vis_vram_used\n");
return ret;
}
+ ret = device_create_file(adev->dev, &dev_attr_mem_info_vram_vendor);
+ if (ret) {
+ DRM_ERROR("Failed to create device file mem_info_vram_vendor\n");
+ return ret;
+ }
return 0;
}
@@ -180,6 +223,7 @@ static int amdgpu_vram_mgr_fini(struct ttm_mem_type_manager *man)
device_remove_file(adev->dev, &dev_attr_mem_info_vis_vram_total);
device_remove_file(adev->dev, &dev_attr_mem_info_vram_used);
device_remove_file(adev->dev, &dev_attr_mem_info_vis_vram_used);
+ device_remove_file(adev->dev, &dev_attr_mem_info_vram_vendor);
return 0;
}
@@ -275,7 +319,7 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
struct drm_mm_node *nodes;
enum drm_mm_insert_mode mode;
unsigned long lpfn, num_nodes, pages_per_node, pages_left;
- uint64_t vis_usage = 0, mem_bytes;
+ uint64_t vis_usage = 0, mem_bytes, max_bytes;
unsigned i;
int r;
@@ -283,9 +327,13 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
if (!lpfn)
lpfn = man->size;
+ max_bytes = adev->gmc.mc_vram_size;
+ if (tbo->type != ttm_bo_type_kernel)
+ max_bytes -= AMDGPU_VM_RESERVED_VRAM;
+
/* bail out quickly if there's likely not enough VRAM for this BO */
mem_bytes = (u64)mem->num_pages << PAGE_SHIFT;
- if (atomic64_add_return(mem_bytes, &mgr->usage) > adev->gmc.mc_vram_size) {
+ if (atomic64_add_return(mem_bytes, &mgr->usage) > max_bytes) {
atomic64_sub(mem_bytes, &mgr->usage);
mem->mm_node = NULL;
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 65aae75f80fd..a97af422575a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -25,6 +25,7 @@
#include "amdgpu.h"
#include "amdgpu_xgmi.h"
#include "amdgpu_smu.h"
+#include "amdgpu_ras.h"
#include "df/df_3_6_offset.h"
static DEFINE_MUTEX(xgmi_mutex);
@@ -145,16 +146,16 @@ static ssize_t amdgpu_xgmi_show_error(struct device *dev,
ficaa_pie_ctl_in = AMDGPU_XGMI_SET_FICAA(0x200);
ficaa_pie_status_in = AMDGPU_XGMI_SET_FICAA(0x208);
- fica_out = adev->df_funcs->get_fica(adev, ficaa_pie_ctl_in);
+ fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_ctl_in);
if (fica_out != 0x1f)
pr_err("xGMI error counters not enabled!\n");
- fica_out = adev->df_funcs->get_fica(adev, ficaa_pie_status_in);
+ fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_status_in);
if ((fica_out & 0xffff) == 2)
error_count = ((fica_out >> 62) & 0x1) + (fica_out >> 63);
- adev->df_funcs->set_fica(adev, ficaa_pie_status_in, 0, 0);
+ adev->df.funcs->set_fica(adev, ficaa_pie_status_in, 0, 0);
return snprintf(buf, PAGE_SIZE, "%d\n", error_count);
}
@@ -260,6 +261,7 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo
INIT_LIST_HEAD(&tmp->device_list);
mutex_init(&tmp->hive_lock);
mutex_init(&tmp->reset_lock);
+ task_barrier_init(&tmp->tb);
if (lock)
mutex_lock(&tmp->hive_lock);
@@ -273,22 +275,49 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
{
int ret = 0;
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
+ struct amdgpu_device *tmp_adev;
+ bool update_hive_pstate = true;
+ bool is_high_pstate = pstate && adev->asic_type == CHIP_VEGA20;
if (!hive)
return 0;
- if (hive->pstate == pstate)
- return 0;
+ mutex_lock(&hive->hive_lock);
+
+ if (hive->pstate == pstate) {
+ adev->pstate = is_high_pstate ? pstate : adev->pstate;
+ goto out;
+ }
dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate);
- if (is_support_sw_smu_xgmi(adev))
- ret = smu_set_xgmi_pstate(&adev->smu, pstate);
- if (ret)
+ ret = amdgpu_dpm_set_xgmi_pstate(adev, pstate);
+ if (ret) {
dev_err(adev->dev,
"XGMI: Set pstate failure on device %llx, hive %llx, ret %d",
adev->gmc.xgmi.node_id,
adev->gmc.xgmi.hive_id, ret);
+ goto out;
+ }
+
+ /* Update device pstate */
+ adev->pstate = pstate;
+
+ /*
+ * Update the hive pstate only all devices of the hive
+ * are in the same pstate
+ */
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ if (tmp_adev->pstate != adev->pstate) {
+ update_hive_pstate = false;
+ break;
+ }
+ }
+ if (update_hive_pstate || is_high_pstate)
+ hive->pstate = pstate;
+
+out:
+ mutex_unlock(&hive->hive_lock);
return ret;
}
@@ -363,6 +392,9 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
goto exit;
}
+ /* Set default device pstate */
+ adev->pstate = -1;
+
top_info = &adev->psp.xgmi_context.top_info;
list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
@@ -371,6 +403,8 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
top_info->num_nodes = count;
hive->number_devices = count;
+ task_barrier_add_task(&hive->tb);
+
if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
/* update node list for other device in the hive */
@@ -433,7 +467,57 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
mutex_destroy(&hive->hive_lock);
mutex_destroy(&hive->reset_lock);
} else {
+ task_barrier_rem_task(&hive->tb);
amdgpu_xgmi_sysfs_rem_dev_info(adev, hive);
mutex_unlock(&hive->hive_lock);
}
}
+
+int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
+{
+ int r;
+ struct ras_ih_if ih_info = {
+ .cb = NULL,
+ };
+ struct ras_fs_if fs_info = {
+ .sysfs_name = "xgmi_wafl_err_count",
+ .debugfs_name = "xgmi_wafl_err_inject",
+ };
+
+ if (!adev->gmc.xgmi.supported ||
+ adev->gmc.xgmi.num_physical_nodes == 0)
+ return 0;
+
+ if (!adev->gmc.xgmi.ras_if) {
+ adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+ if (!adev->gmc.xgmi.ras_if)
+ return -ENOMEM;
+ adev->gmc.xgmi.ras_if->block = AMDGPU_RAS_BLOCK__XGMI_WAFL;
+ adev->gmc.xgmi.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->gmc.xgmi.ras_if->sub_block_index = 0;
+ strcpy(adev->gmc.xgmi.ras_if->name, "xgmi_wafl");
+ }
+ ih_info.head = fs_info.head = *adev->gmc.xgmi.ras_if;
+ r = amdgpu_ras_late_init(adev, adev->gmc.xgmi.ras_if,
+ &fs_info, &ih_info);
+ if (r || !amdgpu_ras_is_supported(adev, adev->gmc.xgmi.ras_if->block)) {
+ kfree(adev->gmc.xgmi.ras_if);
+ adev->gmc.xgmi.ras_if = NULL;
+ }
+
+ return r;
+}
+
+void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev)
+{
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL) &&
+ adev->gmc.xgmi.ras_if) {
+ struct ras_common_if *ras_if = adev->gmc.xgmi.ras_if;
+ struct ras_ih_if ih_info = {
+ .cb = NULL,
+ };
+
+ amdgpu_ras_late_fini(adev, ras_if, &ih_info);
+ kfree(ras_if);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index fbcee31788c4..74011fbc2251 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -22,6 +22,7 @@
#ifndef __AMDGPU_XGMI_H__
#define __AMDGPU_XGMI_H__
+#include <drm/task_barrier.h>
#include "amdgpu_psp.h"
struct amdgpu_hive_info {
@@ -33,6 +34,7 @@ struct amdgpu_hive_info {
struct device_attribute dev_attr;
struct amdgpu_device *adev;
int pstate; /*0 -- low , 1 -- high , -1 unknown*/
+ struct task_barrier tb;
};
struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock);
@@ -42,6 +44,8 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
struct amdgpu_device *peer_adev);
+int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev);
+void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev);
static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
struct amdgpu_device *bo_adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c b/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c
index 4853899b1824..fda99c958c3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c
@@ -24,7 +24,6 @@
#include "soc15.h"
#include "soc15_common.h"
-#include "soc15_hw_ip.h"
#include "arct_ip_offset.h"
int arct_reg_base_init(struct amdgpu_device *adev)
@@ -52,6 +51,8 @@ int arct_reg_base_init(struct amdgpu_device *adev)
adev->reg_offset[SDMA7_HWIP][i] = (uint32_t *)(&(SDMA7_BASE.instance[i]));
adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
+ adev->reg_offset[UMC_HWIP][i] = (uint32_t *)(&(UMC_BASE.instance[i]));
+ adev->reg_offset[RSMU_HWIP][i] = (uint32_t *)(&(RSMU_BASE.instance[i]));
}
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c
index d9cc746af5e6..847ca9b3ce4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c
@@ -74,9 +74,9 @@ int athub_v1_0_set_clockgating(struct amdgpu_device *adev,
case CHIP_VEGA20:
case CHIP_RAVEN:
athub_update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
athub_update_medium_grain_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
break;
default:
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
index ceb9aa4df0e7..921a69abda55 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
@@ -77,9 +77,9 @@ int athub_v2_0_set_clockgating(struct amdgpu_device *adev,
case CHIP_NAVI14:
case CHIP_NAVI12:
athub_v2_0_update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
athub_v2_0_update_medium_grain_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
break;
default:
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
index 6858cde9fc5d..ea702a64f807 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
@@ -361,7 +361,6 @@ int amdgpu_atombios_dp_get_panel_mode(struct drm_encoder *encoder,
struct drm_connector *connector)
{
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
- struct amdgpu_connector_atom_dig *dig_connector;
int panel_mode = DP_PANEL_MODE_EXTERNAL_DP_MODE;
u16 dp_bridge = amdgpu_connector_encoder_get_dp_bridge_encoder_id(connector);
u8 tmp;
@@ -369,8 +368,6 @@ int amdgpu_atombios_dp_get_panel_mode(struct drm_encoder *encoder,
if (!amdgpu_connector->con_priv)
return panel_mode;
- dig_connector = amdgpu_connector->con_priv;
-
if (dp_bridge != ENCODER_OBJECT_ID_NONE) {
/* DP bridge chips */
if (drm_dp_dpcd_readb(&amdgpu_connector->ddc_bus->aux,
@@ -713,7 +710,6 @@ void amdgpu_atombios_dp_link_train(struct drm_encoder *encoder,
struct drm_device *dev = encoder->dev;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
- struct amdgpu_encoder_atom_dig *dig;
struct amdgpu_connector *amdgpu_connector;
struct amdgpu_connector_atom_dig *dig_connector;
struct amdgpu_atombios_dp_link_train_info dp_info;
@@ -721,7 +717,6 @@ void amdgpu_atombios_dp_link_train(struct drm_encoder *encoder,
if (!amdgpu_encoder->enc_priv)
return;
- dig = amdgpu_encoder->enc_priv;
amdgpu_connector = to_amdgpu_connector(connector);
if (!amdgpu_connector->con_priv)
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
index 980c363b1a0a..b4cc7c55fa16 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
@@ -76,11 +76,6 @@ static int amdgpu_atombios_i2c_process_i2c_ch(struct amdgpu_i2c_chan *chan,
}
args.lpI2CDataOut = cpu_to_le16(out);
} else {
- if (num > ATOM_MAX_HW_I2C_READ) {
- DRM_ERROR("hw i2c: tried to read too many bytes (%d vs 255)\n", num);
- r = -EINVAL;
- goto done;
- }
args.ucRegIndex = 0;
args.lpI2CDataOut = 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index b81bb414fcb3..006f21ef7ddf 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -966,6 +966,25 @@ static bool cik_read_bios_from_rom(struct amdgpu_device *adev,
static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = {
{mmGRBM_STATUS},
+ {mmGRBM_STATUS2},
+ {mmGRBM_STATUS_SE0},
+ {mmGRBM_STATUS_SE1},
+ {mmGRBM_STATUS_SE2},
+ {mmGRBM_STATUS_SE3},
+ {mmSRBM_STATUS},
+ {mmSRBM_STATUS2},
+ {mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET},
+ {mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET},
+ {mmCP_STAT},
+ {mmCP_STALLED_STAT1},
+ {mmCP_STALLED_STAT2},
+ {mmCP_STALLED_STAT3},
+ {mmCP_CPF_BUSY_STAT},
+ {mmCP_CPF_STALLED_STAT1},
+ {mmCP_CPF_STATUS},
+ {mmCP_CPC_BUSY_STAT},
+ {mmCP_CPC_STALLED_STAT1},
+ {mmCP_CPC_STATUS},
{mmGB_ADDR_CONFIG},
{mmMC_ARB_RAMCFG},
{mmGB_TILE_MODE0},
@@ -1270,15 +1289,15 @@ static int cik_gpu_pci_config_reset(struct amdgpu_device *adev)
}
/**
- * cik_asic_reset - soft reset GPU
+ * cik_asic_pci_config_reset - soft reset GPU
*
* @adev: amdgpu_device pointer
*
- * Look up which blocks are hung and attempt
- * to reset them.
+ * Use PCI Config method to reset the GPU.
+ *
* Returns 0 for success.
*/
-static int cik_asic_reset(struct amdgpu_device *adev)
+static int cik_asic_pci_config_reset(struct amdgpu_device *adev)
{
int r;
@@ -1291,10 +1310,62 @@ static int cik_asic_reset(struct amdgpu_device *adev)
return r;
}
+static bool cik_asic_supports_baco(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ return amdgpu_dpm_is_baco_supported(adev);
+ default:
+ return false;
+ }
+}
+
static enum amd_reset_method
cik_asic_reset_method(struct amdgpu_device *adev)
{
- return AMD_RESET_METHOD_LEGACY;
+ bool baco_reset;
+
+ switch (adev->asic_type) {
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ /* disable baco reset until it works */
+ /* smu7_asic_get_baco_capability(adev, &baco_reset); */
+ baco_reset = false;
+ break;
+ default:
+ baco_reset = false;
+ break;
+ }
+
+ if (baco_reset)
+ return AMD_RESET_METHOD_BACO;
+ else
+ return AMD_RESET_METHOD_LEGACY;
+}
+
+/**
+ * cik_asic_reset - soft reset GPU
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up which blocks are hung and attempt
+ * to reset them.
+ * Returns 0 for success.
+ */
+static int cik_asic_reset(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
+ if (!adev->in_suspend)
+ amdgpu_inc_vram_lost(adev);
+ r = amdgpu_dpm_baco_reset(adev);
+ } else {
+ r = cik_asic_pci_config_reset(adev);
+ }
+
+ return r;
}
static u32 cik_get_config_memsize(struct amdgpu_device *adev)
@@ -1384,7 +1455,6 @@ static int cik_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
{
struct pci_dev *root = adev->pdev->bus->self;
- int bridge_pos, gpu_pos;
u32 speed_cntl, current_data_rate;
int i;
u16 tmp16;
@@ -1419,12 +1489,7 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
DRM_INFO("enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n");
}
- bridge_pos = pci_pcie_cap(root);
- if (!bridge_pos)
- return;
-
- gpu_pos = pci_pcie_cap(adev->pdev);
- if (!gpu_pos)
+ if (!pci_is_pcie(root) || !pci_is_pcie(adev->pdev))
return;
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
@@ -1434,14 +1499,17 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
u16 bridge_cfg2, gpu_cfg2;
u32 max_lw, current_lw, tmp;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL,
+ &gpu_cfg);
tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL,
+ tmp16);
tmp = RREG32_PCIE(ixPCIE_LC_STATUS1);
max_lw = (tmp & PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >>
@@ -1465,15 +1533,23 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
for (i = 0; i < 10; i++) {
/* check status */
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_DEVSTA,
+ &tmp16);
if (tmp16 & PCI_EXP_DEVSTA_TRPND)
break;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ &gpu_cfg);
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &bridge_cfg2);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ &gpu_cfg2);
tmp = RREG32_PCIE(ixPCIE_LC_CNTL4);
tmp |= PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
@@ -1486,26 +1562,45 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
msleep(100);
/* linkctl */
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root, PCI_EXP_LNKCTL,
+ tmp16);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ tmp16);
/* linkctl2 */
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
-
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (bridge_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(root,
+ PCI_EXP_LNKCTL2,
+ tmp16);
+
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (gpu_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ tmp16);
tmp = RREG32_PCIE(ixPCIE_LC_CNTL4);
tmp &= ~PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
@@ -1520,15 +1615,16 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
speed_cntl &= ~PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_SW_SPEED_CHANGE_MASK;
WREG32_PCIE(ixPCIE_LC_SPEED_CNTL, speed_cntl);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~0xf;
+ pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16);
+ tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
+
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
- tmp16 |= 3; /* gen3 */
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
- tmp16 |= 2; /* gen2 */
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
else
- tmp16 |= 1; /* gen1 */
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
+ pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16);
speed_cntl = RREG32_PCIE(ixPCIE_LC_SPEED_CNTL);
speed_cntl |= PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK;
@@ -1842,6 +1938,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
.get_pcie_usage = &cik_get_pcie_usage,
.need_reset_on_init = &cik_need_reset_on_init,
.get_pcie_replay_count = &cik_get_pcie_replay_count,
+ .supports_baco = &cik_asic_supports_baco,
};
static int cik_common_early_init(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.h b/drivers/gpu/drm/amd/amdgpu/cik.h
index 54c625a2e570..f91ab4c246b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.h
+++ b/drivers/gpu/drm/amd/amdgpu/cik.h
@@ -31,4 +31,5 @@ void cik_srbm_select(struct amdgpu_device *adev,
int cik_set_ip_blocks(struct amdgpu_device *adev);
void legacy_doorbell_index_init(struct amdgpu_device *adev);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index c45304f1047c..580d3f93d670 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -228,7 +228,7 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
u32 extra_bits = vmid & 0xf;
/* IB packet must end on a 8 DW boundary */
- cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 8);
+ cik_sdma_ring_insert_nop(ring, (4 - lower_32_bits(ring->wptr)) & 7);
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
@@ -811,7 +811,7 @@ static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
u32 pad_count;
int i;
- pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+ pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
@@ -1372,16 +1372,14 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev)
{
- struct drm_gpu_scheduler *sched;
unsigned i;
adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs;
for (i = 0; i < adev->sdma.num_instances; i++) {
- sched = &adev->sdma.instance[i].ring.sched;
- adev->vm_manager.vm_pte_rqs[i] =
- &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+ adev->vm_manager.vm_pte_scheds[i] =
+ &adev->sdma.instance[i].ring.sched;
}
- adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
}
const struct amdgpu_ip_block_version cik_sdma_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 645550e7caf5..40d2ac723dd6 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -330,9 +330,11 @@ static void dce_v10_0_hpd_init(struct amdgpu_device *adev)
{
struct drm_device *dev = adev->ddev;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
u32 tmp;
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
@@ -368,6 +370,7 @@ static void dce_v10_0_hpd_init(struct amdgpu_device *adev)
amdgpu_irq_get(adev, &adev->hpd_irq,
amdgpu_connector->hpd.hpd);
}
+ drm_connector_list_iter_end(&iter);
}
/**
@@ -382,9 +385,11 @@ static void dce_v10_0_hpd_fini(struct amdgpu_device *adev)
{
struct drm_device *dev = adev->ddev;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
u32 tmp;
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
@@ -397,6 +402,7 @@ static void dce_v10_0_hpd_fini(struct amdgpu_device *adev)
amdgpu_irq_put(adev, &adev->hpd_irq,
amdgpu_connector->hpd.hpd);
}
+ drm_connector_list_iter_end(&iter);
}
static u32 dce_v10_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
@@ -1219,10 +1225,12 @@ static void dce_v10_0_afmt_audio_select_pin(struct drm_encoder *encoder)
static void dce_v10_0_audio_write_latency_fields(struct drm_encoder *encoder,
struct drm_display_mode *mode)
{
- struct amdgpu_device *adev = encoder->dev->dev_private;
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
u32 tmp;
int interlace = 0;
@@ -1230,12 +1238,14 @@ static void dce_v10_0_audio_write_latency_fields(struct drm_encoder *encoder,
if (!dig || !dig->afmt || !dig->afmt->pin)
return;
- list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
+ drm_connector_list_iter_end(&iter);
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1261,10 +1271,12 @@ static void dce_v10_0_audio_write_latency_fields(struct drm_encoder *encoder,
static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
{
- struct amdgpu_device *adev = encoder->dev->dev_private;
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
u32 tmp;
u8 *sadb = NULL;
@@ -1273,12 +1285,14 @@ static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder
if (!dig || !dig->afmt || !dig->afmt->pin)
return;
- list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
+ drm_connector_list_iter_end(&iter);
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1313,10 +1327,12 @@ static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder
static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder)
{
- struct amdgpu_device *adev = encoder->dev->dev_private;
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
struct cea_sad *sads;
int i, sad_count;
@@ -1339,12 +1355,14 @@ static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder)
if (!dig || !dig->afmt || !dig->afmt->pin)
return;
- list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
+ drm_connector_list_iter_end(&iter);
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1352,10 +1370,10 @@ static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder)
}
sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
- if (sad_count <= 0) {
+ if (sad_count < 0)
DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
+ if (sad_count <= 0)
return;
- }
BUG_ON(!sads);
for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index d9f470632b2c..898ef72d423c 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -348,9 +348,11 @@ static void dce_v11_0_hpd_init(struct amdgpu_device *adev)
{
struct drm_device *dev = adev->ddev;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
u32 tmp;
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
@@ -385,6 +387,7 @@ static void dce_v11_0_hpd_init(struct amdgpu_device *adev)
dce_v11_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
}
+ drm_connector_list_iter_end(&iter);
}
/**
@@ -399,9 +402,11 @@ static void dce_v11_0_hpd_fini(struct amdgpu_device *adev)
{
struct drm_device *dev = adev->ddev;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
u32 tmp;
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
@@ -413,6 +418,7 @@ static void dce_v11_0_hpd_fini(struct amdgpu_device *adev)
amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
}
+ drm_connector_list_iter_end(&iter);
}
static u32 dce_v11_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
@@ -1245,10 +1251,12 @@ static void dce_v11_0_afmt_audio_select_pin(struct drm_encoder *encoder)
static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder,
struct drm_display_mode *mode)
{
- struct amdgpu_device *adev = encoder->dev->dev_private;
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
u32 tmp;
int interlace = 0;
@@ -1256,12 +1264,14 @@ static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder,
if (!dig || !dig->afmt || !dig->afmt->pin)
return;
- list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
+ drm_connector_list_iter_end(&iter);
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1287,10 +1297,12 @@ static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder,
static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
{
- struct amdgpu_device *adev = encoder->dev->dev_private;
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
u32 tmp;
u8 *sadb = NULL;
@@ -1299,12 +1311,14 @@ static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder
if (!dig || !dig->afmt || !dig->afmt->pin)
return;
- list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
+ drm_connector_list_iter_end(&iter);
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1339,10 +1353,12 @@ static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder
static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder)
{
- struct amdgpu_device *adev = encoder->dev->dev_private;
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
struct cea_sad *sads;
int i, sad_count;
@@ -1365,12 +1381,14 @@ static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder)
if (!dig || !dig->afmt || !dig->afmt->pin)
return;
- list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
+ drm_connector_list_iter_end(&iter);
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1378,10 +1396,10 @@ static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder)
}
sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
- if (sad_count <= 0) {
+ if (sad_count < 0)
DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
+ if (sad_count <= 0)
return;
- }
BUG_ON(!sads);
for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 3eb2e7429269..db15a112becc 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -281,9 +281,11 @@ static void dce_v6_0_hpd_init(struct amdgpu_device *adev)
{
struct drm_device *dev = adev->ddev;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
u32 tmp;
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
@@ -309,7 +311,7 @@ static void dce_v6_0_hpd_init(struct amdgpu_device *adev)
dce_v6_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
}
-
+ drm_connector_list_iter_end(&iter);
}
/**
@@ -324,9 +326,11 @@ static void dce_v6_0_hpd_fini(struct amdgpu_device *adev)
{
struct drm_device *dev = adev->ddev;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
u32 tmp;
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
@@ -338,6 +342,7 @@ static void dce_v6_0_hpd_fini(struct amdgpu_device *adev)
amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
}
+ drm_connector_list_iter_end(&iter);
}
static u32 dce_v6_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
@@ -1124,20 +1129,24 @@ static void dce_v6_0_audio_select_pin(struct drm_encoder *encoder)
static void dce_v6_0_audio_write_latency_fields(struct drm_encoder *encoder,
struct drm_display_mode *mode)
{
- struct amdgpu_device *adev = encoder->dev->dev_private;
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
int interlace = 0;
u32 tmp;
- list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
+ drm_connector_list_iter_end(&iter);
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1164,21 +1173,25 @@ static void dce_v6_0_audio_write_latency_fields(struct drm_encoder *encoder,
static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
{
- struct amdgpu_device *adev = encoder->dev->dev_private;
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
u8 *sadb = NULL;
int sad_count;
u32 tmp;
- list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
+ drm_connector_list_iter_end(&iter);
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1221,10 +1234,12 @@ static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
{
- struct amdgpu_device *adev = encoder->dev->dev_private;
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
struct cea_sad *sads;
int i, sad_count;
@@ -1244,12 +1259,14 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
};
- list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
+ drm_connector_list_iter_end(&iter);
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1257,10 +1274,10 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
}
sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
- if (sad_count <= 0) {
+ if (sad_count < 0)
DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
+ if (sad_count <= 0)
return;
- }
for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
u32 tmp = 0;
@@ -1632,6 +1649,7 @@ static void dce_v6_0_afmt_setmode(struct drm_encoder *encoder,
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
int bpc = 8;
@@ -1639,12 +1657,14 @@ static void dce_v6_0_afmt_setmode(struct drm_encoder *encoder,
if (!dig || !dig->afmt)
return;
- list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
+ drm_connector_list_iter_end(&iter);
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index a16c5e9e610e..f06c9022c1fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -275,9 +275,11 @@ static void dce_v8_0_hpd_init(struct amdgpu_device *adev)
{
struct drm_device *dev = adev->ddev;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
u32 tmp;
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
@@ -303,6 +305,7 @@ static void dce_v8_0_hpd_init(struct amdgpu_device *adev)
dce_v8_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
}
+ drm_connector_list_iter_end(&iter);
}
/**
@@ -317,9 +320,11 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev)
{
struct drm_device *dev = adev->ddev;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
u32 tmp;
- list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
@@ -331,6 +336,7 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev)
amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
}
+ drm_connector_list_iter_end(&iter);
}
static u32 dce_v8_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
@@ -1157,10 +1163,12 @@ static void dce_v8_0_afmt_audio_select_pin(struct drm_encoder *encoder)
static void dce_v8_0_audio_write_latency_fields(struct drm_encoder *encoder,
struct drm_display_mode *mode)
{
- struct amdgpu_device *adev = encoder->dev->dev_private;
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
u32 tmp = 0, offset;
@@ -1169,12 +1177,14 @@ static void dce_v8_0_audio_write_latency_fields(struct drm_encoder *encoder,
offset = dig->afmt->pin->offset;
- list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
+ drm_connector_list_iter_end(&iter);
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1214,10 +1224,12 @@ static void dce_v8_0_audio_write_latency_fields(struct drm_encoder *encoder,
static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
{
- struct amdgpu_device *adev = encoder->dev->dev_private;
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
u32 offset, tmp;
u8 *sadb = NULL;
@@ -1228,12 +1240,14 @@ static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
offset = dig->afmt->pin->offset;
- list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
+ drm_connector_list_iter_end(&iter);
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1263,11 +1277,13 @@ static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder)
{
- struct amdgpu_device *adev = encoder->dev->dev_private;
+ struct drm_device *dev = encoder->dev;
+ struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
u32 offset;
struct drm_connector *connector;
+ struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
struct cea_sad *sads;
int i, sad_count;
@@ -1292,12 +1308,14 @@ static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder)
offset = dig->afmt->pin->offset;
- list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+ drm_connector_list_iter_begin(dev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
amdgpu_connector = to_amdgpu_connector(connector);
break;
}
}
+ drm_connector_list_iter_end(&iter);
if (!amdgpu_connector) {
DRM_ERROR("Couldn't find encoder's connector\n");
@@ -1305,10 +1323,10 @@ static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder)
}
sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
- if (sad_count <= 0) {
+ if (sad_count < 0)
DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
+ if (sad_count <= 0)
return;
- }
BUG_ON(!sads);
for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
index 844c03868248..d6aca1c08068 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
@@ -31,6 +31,13 @@ static u32 df_v1_7_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2};
static void df_v1_7_sw_init(struct amdgpu_device *adev)
{
+ adev->df.hash_status.hash_64k = false;
+ adev->df.hash_status.hash_2m = false;
+ adev->df.hash_status.hash_1g = false;
+}
+
+static void df_v1_7_sw_fini(struct amdgpu_device *adev)
+{
}
static void df_v1_7_enable_broadcast_mode(struct amdgpu_device *adev,
@@ -62,7 +69,7 @@ static u32 df_v1_7_get_hbm_channel_number(struct amdgpu_device *adev)
{
int fb_channel_number;
- fb_channel_number = adev->df_funcs->get_fb_channel_number(adev);
+ fb_channel_number = adev->df.funcs->get_fb_channel_number(adev);
return df_v1_7_channel_number[fb_channel_number];
}
@@ -73,7 +80,7 @@ static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
u32 tmp;
/* Put DF on broadcast mode */
- adev->df_funcs->enable_broadcast_mode(adev, true);
+ adev->df.funcs->enable_broadcast_mode(adev, true);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) {
tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
@@ -88,7 +95,7 @@ static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
}
/* Exit boradcast mode */
- adev->df_funcs->enable_broadcast_mode(adev, false);
+ adev->df.funcs->enable_broadcast_mode(adev, false);
}
static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev,
@@ -111,6 +118,7 @@ static void df_v1_7_enable_ecc_force_par_wr_rmw(struct amdgpu_device *adev,
const struct amdgpu_df_funcs df_v1_7_funcs = {
.sw_init = df_v1_7_sw_init,
+ .sw_fini = df_v1_7_sw_fini,
.enable_broadcast_mode = df_v1_7_enable_broadcast_mode,
.get_fb_channel_number = df_v1_7_get_fb_channel_number,
.get_hbm_channel_number = df_v1_7_get_hbm_channel_number,
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
index 5850c8e34caa..5a1bd8ed1a6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -27,6 +27,9 @@
#include "df/df_3_6_offset.h"
#include "df/df_3_6_sh_mask.h"
+#define DF_3_6_SMN_REG_INST_DIST 0x8
+#define DF_3_6_INST_CNT 8
+
static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0,
16, 32, 0, 0, 0, 2, 4, 8};
@@ -99,8 +102,8 @@ static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev,
unsigned long flags, address, data;
uint32_t ficadl_val, ficadh_val;
- address = adev->nbio_funcs->get_pcie_index_offset(adev);
- data = adev->nbio_funcs->get_pcie_data_offset(adev);
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
@@ -122,8 +125,8 @@ static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val,
{
unsigned long flags, address, data;
- address = adev->nbio_funcs->get_pcie_index_offset(adev);
- data = adev->nbio_funcs->get_pcie_data_offset(adev);
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
@@ -150,8 +153,8 @@ static void df_v3_6_perfmon_rreg(struct amdgpu_device *adev,
{
unsigned long flags, address, data;
- address = adev->nbio_funcs->get_pcie_index_offset(adev);
- data = adev->nbio_funcs->get_pcie_data_offset(adev);
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
WREG32(address, lo_addr);
@@ -172,15 +175,70 @@ static void df_v3_6_perfmon_wreg(struct amdgpu_device *adev, uint32_t lo_addr,
{
unsigned long flags, address, data;
- address = adev->nbio_funcs->get_pcie_index_offset(adev);
- data = adev->nbio_funcs->get_pcie_data_offset(adev);
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(address, lo_addr);
+ WREG32(data, lo_val);
+ WREG32(address, hi_addr);
+ WREG32(data, hi_val);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+/* same as perfmon_wreg but return status on write value check */
+static int df_v3_6_perfmon_arm_with_status(struct amdgpu_device *adev,
+ uint32_t lo_addr, uint32_t lo_val,
+ uint32_t hi_addr, uint32_t hi_val)
+{
+ unsigned long flags, address, data;
+ uint32_t lo_val_rb, hi_val_rb;
+
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
WREG32(address, lo_addr);
WREG32(data, lo_val);
WREG32(address, hi_addr);
WREG32(data, hi_val);
+
+ WREG32(address, lo_addr);
+ lo_val_rb = RREG32(data);
+ WREG32(address, hi_addr);
+ hi_val_rb = RREG32(data);
spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+
+ if (!(lo_val == lo_val_rb && hi_val == hi_val_rb))
+ return -EBUSY;
+
+ return 0;
+}
+
+
+/*
+ * retry arming counters every 100 usecs within 1 millisecond interval.
+ * if retry fails after time out, return error.
+ */
+#define ARM_RETRY_USEC_TIMEOUT 1000
+#define ARM_RETRY_USEC_INTERVAL 100
+static int df_v3_6_perfmon_arm_with_retry(struct amdgpu_device *adev,
+ uint32_t lo_addr, uint32_t lo_val,
+ uint32_t hi_addr, uint32_t hi_val)
+{
+ int countdown = ARM_RETRY_USEC_TIMEOUT;
+
+ while (countdown) {
+
+ if (!df_v3_6_perfmon_arm_with_status(adev, lo_addr, lo_val,
+ hi_addr, hi_val))
+ break;
+
+ countdown -= ARM_RETRY_USEC_INTERVAL;
+ udelay(ARM_RETRY_USEC_INTERVAL);
+ }
+
+ return countdown > 0 ? 0 : -ETIME;
}
/* get the number of df counters available */
@@ -207,6 +265,32 @@ static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev,
/* device attr for available perfmon counters */
static DEVICE_ATTR(df_cntr_avail, S_IRUGO, df_v3_6_get_df_cntr_avail, NULL);
+static void df_v3_6_query_hashes(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ adev->df.hash_status.hash_64k = false;
+ adev->df.hash_status.hash_2m = false;
+ adev->df.hash_status.hash_1g = false;
+
+ if (adev->asic_type != CHIP_ARCTURUS)
+ return;
+
+ /* encoding for hash-enabled on Arcturus */
+ if (adev->df.funcs->get_fb_channel_number(adev) == 0xe) {
+ tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DfGlobalCtrl);
+ adev->df.hash_status.hash_64k = REG_GET_FIELD(tmp,
+ DF_CS_UMC_AON0_DfGlobalCtrl,
+ GlbHashIntlvCtl64K);
+ adev->df.hash_status.hash_2m = REG_GET_FIELD(tmp,
+ DF_CS_UMC_AON0_DfGlobalCtrl,
+ GlbHashIntlvCtl2M);
+ adev->df.hash_status.hash_1g = REG_GET_FIELD(tmp,
+ DF_CS_UMC_AON0_DfGlobalCtrl,
+ GlbHashIntlvCtl1G);
+ }
+}
+
/* init perfmons */
static void df_v3_6_sw_init(struct amdgpu_device *adev)
{
@@ -218,6 +302,15 @@ static void df_v3_6_sw_init(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_MAX_DF_PERFMONS; i++)
adev->df_perfmon_config_assign_mask[i] = 0;
+
+ df_v3_6_query_hashes(adev);
+}
+
+static void df_v3_6_sw_fini(struct amdgpu_device *adev)
+{
+
+ device_remove_file(adev->dev, &dev_attr_df_cntr_avail);
+
}
static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev,
@@ -249,7 +342,7 @@ static u32 df_v3_6_get_hbm_channel_number(struct amdgpu_device *adev)
{
int fb_channel_number;
- fb_channel_number = adev->df_funcs->get_fb_channel_number(adev);
+ fb_channel_number = adev->df.funcs->get_fb_channel_number(adev);
if (fb_channel_number >= ARRAY_SIZE(df_v3_6_channel_number))
fb_channel_number = 0;
@@ -261,23 +354,29 @@ static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev,
{
u32 tmp;
- /* Put DF on broadcast mode */
- adev->df_funcs->enable_broadcast_mode(adev, true);
-
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) {
- tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
- tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
- tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY;
- WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
- } else {
- tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
- tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
- tmp |= DF_V3_6_MGCG_DISABLE;
- WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
- }
+ if (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG) {
+ /* Put DF on broadcast mode */
+ adev->df.funcs->enable_broadcast_mode(adev, true);
+
+ if (enable) {
+ tmp = RREG32_SOC15(DF, 0,
+ mmDF_PIE_AON0_DfGlobalClkGater);
+ tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+ tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY;
+ WREG32_SOC15(DF, 0,
+ mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+ } else {
+ tmp = RREG32_SOC15(DF, 0,
+ mmDF_PIE_AON0_DfGlobalClkGater);
+ tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+ tmp |= DF_V3_6_MGCG_DISABLE;
+ WREG32_SOC15(DF, 0,
+ mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+ }
- /* Exit broadcast mode */
- adev->df_funcs->enable_broadcast_mode(adev, false);
+ /* Exit broadcast mode */
+ adev->df.funcs->enable_broadcast_mode(adev, false);
+ }
}
static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
@@ -321,20 +420,20 @@ static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev,
switch (target_cntr) {
case 0:
- *lo_base_addr = is_ctrl ? smnPerfMonCtlLo0 : smnPerfMonCtrLo0;
- *hi_base_addr = is_ctrl ? smnPerfMonCtlHi0 : smnPerfMonCtrHi0;
+ *lo_base_addr = is_ctrl ? smnPerfMonCtlLo4 : smnPerfMonCtrLo4;
+ *hi_base_addr = is_ctrl ? smnPerfMonCtlHi4 : smnPerfMonCtrHi4;
break;
case 1:
- *lo_base_addr = is_ctrl ? smnPerfMonCtlLo1 : smnPerfMonCtrLo1;
- *hi_base_addr = is_ctrl ? smnPerfMonCtlHi1 : smnPerfMonCtrHi1;
+ *lo_base_addr = is_ctrl ? smnPerfMonCtlLo5 : smnPerfMonCtrLo5;
+ *hi_base_addr = is_ctrl ? smnPerfMonCtlHi5 : smnPerfMonCtrHi5;
break;
case 2:
- *lo_base_addr = is_ctrl ? smnPerfMonCtlLo2 : smnPerfMonCtrLo2;
- *hi_base_addr = is_ctrl ? smnPerfMonCtlHi2 : smnPerfMonCtrHi2;
+ *lo_base_addr = is_ctrl ? smnPerfMonCtlLo6 : smnPerfMonCtrLo6;
+ *hi_base_addr = is_ctrl ? smnPerfMonCtlHi6 : smnPerfMonCtrHi6;
break;
case 3:
- *lo_base_addr = is_ctrl ? smnPerfMonCtlLo3 : smnPerfMonCtrLo3;
- *hi_base_addr = is_ctrl ? smnPerfMonCtlHi3 : smnPerfMonCtrHi3;
+ *lo_base_addr = is_ctrl ? smnPerfMonCtlLo7 : smnPerfMonCtrLo7;
+ *hi_base_addr = is_ctrl ? smnPerfMonCtlHi7 : smnPerfMonCtrHi7;
break;
}
@@ -409,6 +508,44 @@ static int df_v3_6_pmc_add_cntr(struct amdgpu_device *adev,
return -ENOSPC;
}
+#define DEFERRED_ARM_MASK (1 << 31)
+static int df_v3_6_pmc_set_deferred(struct amdgpu_device *adev,
+ uint64_t config, bool is_deferred)
+{
+ int target_cntr;
+
+ target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
+
+ if (target_cntr < 0)
+ return -EINVAL;
+
+ if (is_deferred)
+ adev->df_perfmon_config_assign_mask[target_cntr] |=
+ DEFERRED_ARM_MASK;
+ else
+ adev->df_perfmon_config_assign_mask[target_cntr] &=
+ ~DEFERRED_ARM_MASK;
+
+ return 0;
+}
+
+static bool df_v3_6_pmc_is_deferred(struct amdgpu_device *adev,
+ uint64_t config)
+{
+ int target_cntr;
+
+ target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
+
+ /*
+ * we never get target_cntr < 0 since this funciton is only called in
+ * pmc_count for now but we should check anyways.
+ */
+ return (target_cntr >= 0 &&
+ (adev->df_perfmon_config_assign_mask[target_cntr]
+ & DEFERRED_ARM_MASK));
+
+}
+
/* release performance counter */
static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev,
uint64_t config)
@@ -438,29 +575,33 @@ static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
int is_enable)
{
uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
- int ret = 0;
+ int err = 0, ret = 0;
switch (adev->asic_type) {
case CHIP_VEGA20:
+ if (is_enable)
+ return df_v3_6_pmc_add_cntr(adev, config);
df_v3_6_reset_perfmon_cntr(adev, config);
- if (is_enable) {
- ret = df_v3_6_pmc_add_cntr(adev, config);
- } else {
- ret = df_v3_6_pmc_get_ctrl_settings(adev,
+ ret = df_v3_6_pmc_get_ctrl_settings(adev,
config,
&lo_base_addr,
&hi_base_addr,
&lo_val,
&hi_val);
- if (ret)
- return ret;
+ if (ret)
+ return ret;
- df_v3_6_perfmon_wreg(adev, lo_base_addr, lo_val,
- hi_base_addr, hi_val);
- }
+ err = df_v3_6_perfmon_arm_with_retry(adev,
+ lo_base_addr,
+ lo_val,
+ hi_base_addr,
+ hi_val);
+
+ if (err)
+ ret = df_v3_6_pmc_set_deferred(adev, config, true);
break;
default:
@@ -488,7 +629,7 @@ static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
if (ret)
return ret;
- df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0);
+ df_v3_6_reset_perfmon_cntr(adev, config);
if (is_disable)
df_v3_6_pmc_release_cntr(adev, config);
@@ -505,18 +646,29 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
uint64_t config,
uint64_t *count)
{
- uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
+ uint32_t lo_base_addr, hi_base_addr, lo_val = 0, hi_val = 0;
*count = 0;
switch (adev->asic_type) {
case CHIP_VEGA20:
-
df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
&hi_base_addr);
if ((lo_base_addr == 0) || (hi_base_addr == 0))
return;
+ /* rearm the counter or throw away count value on failure */
+ if (df_v3_6_pmc_is_deferred(adev, config)) {
+ int rearm_err = df_v3_6_perfmon_arm_with_status(adev,
+ lo_base_addr, lo_val,
+ hi_base_addr, hi_val);
+
+ if (rearm_err)
+ return;
+
+ df_v3_6_pmc_set_deferred(adev, config, false);
+ }
+
df_v3_6_perfmon_rreg(adev, lo_base_addr, &lo_val,
hi_base_addr, &hi_val);
@@ -529,14 +681,66 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
config, lo_base_addr, hi_base_addr, lo_val, hi_val);
break;
-
default:
break;
}
}
+static uint64_t df_v3_6_get_dram_base_addr(struct amdgpu_device *adev,
+ uint32_t df_inst)
+{
+ uint32_t base_addr_reg_val = 0;
+ uint64_t base_addr = 0;
+
+ base_addr_reg_val = RREG32_PCIE(smnDF_CS_UMC_AON0_DramBaseAddress0 +
+ df_inst * DF_3_6_SMN_REG_INST_DIST);
+
+ if (REG_GET_FIELD(base_addr_reg_val,
+ DF_CS_UMC_AON0_DramBaseAddress0,
+ AddrRngVal) == 0) {
+ DRM_WARN("address range not valid");
+ return 0;
+ }
+
+ base_addr = REG_GET_FIELD(base_addr_reg_val,
+ DF_CS_UMC_AON0_DramBaseAddress0,
+ DramBaseAddr);
+
+ return base_addr << 28;
+}
+
+static uint32_t df_v3_6_get_df_inst_id(struct amdgpu_device *adev)
+{
+ uint32_t xgmi_node_id = 0;
+ uint32_t df_inst_id = 0;
+
+ /* Walk through DF dst nodes to find current XGMI node */
+ for (df_inst_id = 0; df_inst_id < DF_3_6_INST_CNT; df_inst_id++) {
+
+ xgmi_node_id = RREG32_PCIE(smnDF_CS_UMC_AON0_DramLimitAddress0 +
+ df_inst_id * DF_3_6_SMN_REG_INST_DIST);
+ xgmi_node_id = REG_GET_FIELD(xgmi_node_id,
+ DF_CS_UMC_AON0_DramLimitAddress0,
+ DstFabricID);
+
+ /* TODO: establish reason dest fabric id is offset by 7 */
+ xgmi_node_id = xgmi_node_id >> 7;
+
+ if (adev->gmc.xgmi.physical_node_id == xgmi_node_id)
+ break;
+ }
+
+ if (df_inst_id == DF_3_6_INST_CNT) {
+ DRM_WARN("cant match df dst id with gpu node");
+ return 0;
+ }
+
+ return df_inst_id;
+}
+
const struct amdgpu_df_funcs df_v3_6_funcs = {
.sw_init = df_v3_6_sw_init,
+ .sw_fini = df_v3_6_sw_fini,
.enable_broadcast_mode = df_v3_6_enable_broadcast_mode,
.get_fb_channel_number = df_v3_6_get_fb_channel_number,
.get_hbm_channel_number = df_v3_6_get_hbm_channel_number,
@@ -547,5 +751,7 @@ const struct amdgpu_df_funcs df_v3_6_funcs = {
.pmc_stop = df_v3_6_pmc_stop,
.pmc_get_count = df_v3_6_pmc_get_count,
.get_fica = df_v3_6_get_fica,
- .set_fica = df_v3_6_set_fica
+ .set_fica = df_v3_6_set_fica,
+ .get_dram_base_addr = df_v3_6_get_dram_base_addr,
+ .get_df_inst_id = df_v3_6_get_df_inst_id
};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 957811b73672..1785fdad6ecb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -40,6 +40,7 @@
#include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
#include "soc15.h"
+#include "soc15d.h"
#include "soc15_common.h"
#include "clearstate_gfx10.h"
#include "v10_structs.h"
@@ -50,9 +51,6 @@
* Navi10 has two graphic rings to share each graphic pipe.
* 1. Primary ring
* 2. Async ring
- *
- * In bring-up phase, it just used primary ring so set gfx ring number as 1 at
- * first.
*/
#define GFX10_NUM_GFX_RINGS 2
#define GFX10_MEC_HPD_SIZE 2048
@@ -93,7 +91,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1[] =
{
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc0000000, 0xc0000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd000000, 0x0d000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0x60000ff0, 0x60000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000000, 0x40000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
@@ -117,17 +115,20 @@ static const struct soc15_reg_golden golden_settings_gc_10_1[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0x10000000, 0x10000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffff9fff, 0x00001188),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070104),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000100, 0x00000130),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CGTT_CLK_CTRL, 0xfeff0fff, 0x40000100),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000)
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00c00000, 0x00c00000)
};
static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] =
@@ -140,7 +141,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_1[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x003c0014),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc0000000, 0xc0000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd000000, 0x0d000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xf8ff0fff, 0x60000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000ff0, 0x40000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
@@ -162,16 +163,19 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_1[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070105),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00c00000, 0x00c00000),
};
static const struct soc15_reg_golden golden_settings_gc_10_1_2[] =
@@ -179,7 +183,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x003e001f, 0x003c0014),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0xc0000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0x0d000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xffffcfff, 0x60000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0xffff0fff, 0x40000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
@@ -342,15 +346,29 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring,
amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
}
+static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
.kiq_set_resources = gfx10_kiq_set_resources,
.kiq_map_queues = gfx10_kiq_map_queues,
.kiq_unmap_queues = gfx10_kiq_unmap_queues,
.kiq_query_status = gfx10_kiq_query_status,
+ .kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
.set_resources_size = 8,
.map_queues_size = 7,
.unmap_queues_size = 6,
.query_status_size = 7,
+ .invalidate_tlbs_size = 2,
};
static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
@@ -468,18 +486,10 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
else
udelay(1);
}
- if (i < adev->usec_timeout) {
- if (amdgpu_emu_mode == 1)
- DRM_INFO("ring test on %d succeeded in %d msecs\n",
- ring->idx, i);
- else
- DRM_INFO("ring test on %d succeeded in %d usecs\n",
- ring->idx, i);
- } else {
- DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
- ring->idx, scratch, tmp);
- r = -EINVAL;
- }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
amdgpu_gfx_scratch_free(adev, scratch);
return r;
@@ -529,14 +539,10 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
}
tmp = RREG32(scratch);
- if (tmp == 0xDEADBEEF) {
- DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+ if (tmp == 0xDEADBEEF)
r = 0;
- } else {
- DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
- scratch, tmp);
+ else
r = -EINVAL;
- }
err2:
amdgpu_ib_free(adev, &ib, NULL);
dma_fence_put(f);
@@ -564,6 +570,31 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
kfree(adev->gfx.rlc.register_list_format);
}
+static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
+{
+ adev->gfx.cp_fw_write_wait = false;
+
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ case CHIP_NAVI12:
+ case CHIP_NAVI14:
+ if ((adev->gfx.me_fw_version >= 0x00000046) &&
+ (adev->gfx.me_feature_version >= 27) &&
+ (adev->gfx.pfp_fw_version >= 0x00000068) &&
+ (adev->gfx.pfp_feature_version >= 27) &&
+ (adev->gfx.mec_fw_version >= 0x0000005b) &&
+ (adev->gfx.mec_feature_version >= 27))
+ adev->gfx.cp_fw_write_wait = true;
+ break;
+ default:
+ break;
+ }
+
+ if (adev->gfx.cp_fw_write_wait == false)
+ DRM_WARN_ONCE("CP firmware version too old, please update!");
+}
+
+
static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
{
const struct rlc_firmware_header_v2_1 *rlc_hdr;
@@ -585,11 +616,29 @@ static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
}
+static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev)
+{
+ bool ret = false;
+
+ switch (adev->pdev->revision) {
+ case 0xc2:
+ case 0xc3:
+ ret = true;
+ break;
+ default:
+ ret = false;
+ break;
+ }
+
+ return ret ;
+}
+
static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_NAVI10:
- adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
+ if (!gfx_v10_0_navi10_gfxoff_should_enable(adev))
+ adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
break;
default:
break;
@@ -664,59 +713,61 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
- err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
- rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
- version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
- version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
- if (version_major == 2 && version_minor == 1)
- adev->gfx.rlc.is_rlc_v2_1 = true;
-
- adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
- adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
- adev->gfx.rlc.save_and_restore_offset =
+ if (!amdgpu_sriov_vf(adev)) {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
+ err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
+ if (err)
+ goto out;
+ err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+ version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+ if (version_major == 2 && version_minor == 1)
+ adev->gfx.rlc.is_rlc_v2_1 = true;
+
+ adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
+ adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
+ adev->gfx.rlc.save_and_restore_offset =
le32_to_cpu(rlc_hdr->save_and_restore_offset);
- adev->gfx.rlc.clear_state_descriptor_offset =
+ adev->gfx.rlc.clear_state_descriptor_offset =
le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
- adev->gfx.rlc.avail_scratch_ram_locations =
+ adev->gfx.rlc.avail_scratch_ram_locations =
le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
- adev->gfx.rlc.reg_restore_list_size =
+ adev->gfx.rlc.reg_restore_list_size =
le32_to_cpu(rlc_hdr->reg_restore_list_size);
- adev->gfx.rlc.reg_list_format_start =
+ adev->gfx.rlc.reg_list_format_start =
le32_to_cpu(rlc_hdr->reg_list_format_start);
- adev->gfx.rlc.reg_list_format_separate_start =
+ adev->gfx.rlc.reg_list_format_separate_start =
le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
- adev->gfx.rlc.starting_offsets_start =
+ adev->gfx.rlc.starting_offsets_start =
le32_to_cpu(rlc_hdr->starting_offsets_start);
- adev->gfx.rlc.reg_list_format_size_bytes =
+ adev->gfx.rlc.reg_list_format_size_bytes =
le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
- adev->gfx.rlc.reg_list_size_bytes =
+ adev->gfx.rlc.reg_list_size_bytes =
le32_to_cpu(rlc_hdr->reg_list_size_bytes);
- adev->gfx.rlc.register_list_format =
+ adev->gfx.rlc.register_list_format =
kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
- adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
- if (!adev->gfx.rlc.register_list_format) {
- err = -ENOMEM;
- goto out;
- }
+ adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
+ if (!adev->gfx.rlc.register_list_format) {
+ err = -ENOMEM;
+ goto out;
+ }
- tmp = (unsigned int *)((uintptr_t)rlc_hdr +
- le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
- for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
- adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
+ tmp = (unsigned int *)((uintptr_t)rlc_hdr +
+ le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
+ for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
+ adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
- adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
+ adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
- tmp = (unsigned int *)((uintptr_t)rlc_hdr +
- le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
- for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
- adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
+ tmp = (unsigned int *)((uintptr_t)rlc_hdr +
+ le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
+ for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
+ adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
- if (adev->gfx.rlc.is_rlc_v2_1)
- gfx_v10_0_init_rlc_ext_microcode(adev);
+ if (adev->gfx.rlc.is_rlc_v2_1)
+ gfx_v10_0_init_rlc_ext_microcode(adev);
+ }
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", chip_name, wks);
err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
@@ -771,10 +822,11 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
info->fw = adev->gfx.rlc_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
+ if (info->fw) {
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+ }
if (adev->gfx.rlc.is_rlc_v2_1 &&
adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
@@ -832,6 +884,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
}
}
+ gfx_v10_0_check_fw_write_wait(adev);
out:
if (err) {
dev_err(adev->dev,
@@ -966,39 +1019,6 @@ static int gfx_v10_0_rlc_init(struct amdgpu_device *adev)
return 0;
}
-static int gfx_v10_0_csb_vram_pin(struct amdgpu_device *adev)
-{
- int r;
-
- r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
- AMDGPU_GEM_DOMAIN_VRAM);
- if (!r)
- adev->gfx.rlc.clear_state_gpu_addr =
- amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
-
- amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-
- return r;
-}
-
-static void gfx_v10_0_csb_vram_unpin(struct amdgpu_device *adev)
-{
- int r;
-
- if (!adev->gfx.rlc.clear_state_obj)
- return;
-
- r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
- if (likely(r == 0)) {
- amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
- amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
- }
-}
-
static void gfx_v10_0_mec_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
@@ -1442,7 +1462,7 @@ static int gfx_v10_0_sw_fini(void *handle)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
amdgpu_gfx_mqd_sw_fini(adev);
- amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
amdgpu_gfx_kiq_fini(adev);
gfx_v10_0_pfp_fini(adev);
@@ -1758,27 +1778,18 @@ static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
}
-static void gfx_v10_0_init_csb(struct amdgpu_device *adev)
+static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
{
+ adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
+
/* csib */
WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI,
adev->gfx.rlc.clear_state_gpu_addr >> 32);
WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_LO,
adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
WREG32_SOC15(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
-}
-
-static void gfx_v10_0_init_pg(struct amdgpu_device *adev)
-{
- int i;
-
- gfx_v10_0_init_csb(adev);
- for (i = 0; i < adev->num_vmhubs; i++)
- amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
-
- /* TODO: init power gating */
- return;
+ return 0;
}
void gfx_v10_0_rlc_stop(struct amdgpu_device *adev)
@@ -1873,18 +1884,16 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
{
int r;
- if (amdgpu_sriov_vf(adev))
- return 0;
-
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+
r = gfx_v10_0_wait_for_rlc_autoload_complete(adev);
if (r)
return r;
- gfx_v10_0_init_pg(adev);
- /* enable RLC SRM */
- gfx_v10_0_rlc_enable_srm(adev);
+ gfx_v10_0_init_csb(adev);
+ if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
+ gfx_v10_0_rlc_enable_srm(adev);
} else {
adev->gfx.rlc.funcs->stop(adev);
@@ -1906,7 +1915,8 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
return r;
}
- gfx_v10_0_init_pg(adev);
+ gfx_v10_0_init_csb(adev);
+
adev->gfx.rlc.funcs->start(adev);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
@@ -1956,7 +1966,7 @@ static int gfx_v10_0_parse_rlc_toc(struct amdgpu_device *adev)
rlc_autoload_info[rlc_toc->id].size = rlc_toc->size * 4;
rlc_toc++;
- };
+ }
return 0;
}
@@ -2373,7 +2383,7 @@ static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
return 0;
}
-static void gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
+static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
{
int i;
u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
@@ -2386,7 +2396,17 @@ static void gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
adev->gfx.gfx_ring[i].sched.ready = false;
}
WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
- udelay(50);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
+
+ return 0;
}
static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
@@ -2443,7 +2463,7 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->nbio_funcs->hdp_flush(adev, NULL);
+ adev->nbio.funcs->hdp_flush(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
@@ -2513,7 +2533,7 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->nbio_funcs->hdp_flush(adev, NULL);
+ adev->nbio.funcs->hdp_flush(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0);
@@ -2582,7 +2602,7 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->nbio_funcs->hdp_flush(adev, NULL);
+ adev->nbio.funcs->hdp_flush(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
@@ -2757,7 +2777,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
/* Init gfx ring 0 for pipe 0 */
mutex_lock(&adev->srbm_mutex);
gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
- mutex_unlock(&adev->srbm_mutex);
+
/* Set ring buffer size */
ring = &adev->gfx.gfx_ring[0];
rb_bufsz = order_base_2(ring->ring_size / 8);
@@ -2795,11 +2815,11 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_RB_ACTIVE, 1);
gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+ mutex_unlock(&adev->srbm_mutex);
/* Init gfx ring 1 for pipe 1 */
mutex_lock(&adev->srbm_mutex);
gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
- mutex_unlock(&adev->srbm_mutex);
ring = &adev->gfx.gfx_ring[1];
rb_bufsz = order_base_2(ring->ring_size / 8);
tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
@@ -2829,6 +2849,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1);
gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+ mutex_unlock(&adev->srbm_mutex);
/* Switch to pipe 0 */
mutex_lock(&adev->srbm_mutex);
@@ -2903,7 +2924,7 @@ static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->nbio_funcs->hdp_flush(adev, NULL);
+ adev->nbio.funcs->hdp_flush(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
@@ -3087,6 +3108,7 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
struct v10_gfx_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = ring - &adev->gfx.gfx_ring[0];
if (!adev->in_gpu_reset && !adev->in_suspend) {
memset((void *)mqd, 0, sizeof(*mqd));
@@ -3098,14 +3120,15 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
#endif
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS])
- memcpy(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], mqd, sizeof(*mqd));
+ if (adev->gfx.me.mqd_backup[mqd_idx])
+ memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
} else if (adev->in_gpu_reset) {
/* reset mqd with the backup copy */
- if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS])
- memcpy(mqd, adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], sizeof(*mqd));
+ if (adev->gfx.me.mqd_backup[mqd_idx])
+ memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
/* reset the ring */
ring->wptr = 0;
+ adev->wb.wb[ring->wptr_offs] = 0;
amdgpu_ring_clear_ring(ring);
#ifdef BRING_UP_DEBUG
mutex_lock(&adev->srbm_mutex);
@@ -3314,8 +3337,11 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
mqd->cp_hqd_ib_control = tmp;
- /* activate the queue */
- mqd->cp_hqd_active = 1;
+ /* map_queues packet doesn't need activate the queue,
+ * so only kiq need set this field.
+ */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ mqd->cp_hqd_active = 1;
return 0;
}
@@ -3586,23 +3612,16 @@ static int gfx_v10_0_cp_resume(struct amdgpu_device *adev)
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
ring = &adev->gfx.gfx_ring[i];
- DRM_INFO("gfx %d ring me %d pipe %d q %d\n",
- i, ring->me, ring->pipe, ring->queue);
- r = amdgpu_ring_test_ring(ring);
- if (r) {
- ring->sched.ready = false;
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
return r;
- }
}
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
- ring->sched.ready = true;
- DRM_INFO("compute ring %d mec %d pipe %d q %d\n",
- i, ring->me, ring->pipe, ring->queue);
- r = amdgpu_ring_test_ring(ring);
+ r = amdgpu_ring_test_helper(ring);
if (r)
- ring->sched.ready = false;
+ return r;
}
return 0;
@@ -3705,10 +3724,6 @@ static int gfx_v10_0_hw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = gfx_v10_0_csb_vram_pin(adev);
- if (r)
- return r;
-
if (!amdgpu_emu_mode)
gfx_v10_0_init_golden_registers(adev);
@@ -3791,12 +3806,11 @@ static int gfx_v10_0_hw_fini(void *handle)
if (amdgpu_gfx_disable_kcq(adev))
DRM_ERROR("KCQ disable failed\n");
if (amdgpu_sriov_vf(adev)) {
- pr_debug("For SRIOV client, shouldn't do anything.\n");
+ gfx_v10_0_cp_gfx_enable(adev, false);
return 0;
}
gfx_v10_0_cp_enable(adev, false);
gfx_v10_0_enable_gui_idle_interrupt(adev, false);
- gfx_v10_0_csb_vram_unpin(adev);
return 0;
}
@@ -4215,7 +4229,7 @@ static int gfx_v10_0_set_powergating_state(void *handle,
enum amd_powergating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
+ bool enable = (state == AMD_PG_STATE_GATE);
switch (adev->asic_type) {
case CHIP_NAVI10:
case CHIP_NAVI14:
@@ -4241,7 +4255,7 @@ static int gfx_v10_0_set_clockgating_state(void *handle,
case CHIP_NAVI14:
case CHIP_NAVI12:
gfx_v10_0_update_gfx_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
break;
default:
break;
@@ -4357,7 +4371,7 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
u32 ref_and_mask, reg_mem_engine;
- const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
switch (ring->me) {
@@ -4377,8 +4391,8 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
}
gfx_v10_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
- adev->nbio_funcs->get_hdp_flush_req_offset(adev),
- adev->nbio_funcs->get_hdp_flush_done_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_req_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_done_offset(adev),
ref_and_mask, ref_and_mask, 0x20);
}
@@ -4723,6 +4737,7 @@ static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
{
struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
amdgpu_ring_write(ring, 0 | /* src: register*/
@@ -4731,9 +4746,9 @@ static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
amdgpu_ring_write(ring, reg);
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
- adev->virt.reg_val_offs * 4));
+ kiq->reg_val_offs * 4));
amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
- adev->virt.reg_val_offs * 4));
+ kiq->reg_val_offs * 4));
}
static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
@@ -4765,6 +4780,24 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
}
+static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ struct amdgpu_device *adev = ring->adev;
+ bool fw_version_ok = false;
+
+ fw_version_ok = adev->gfx.cp_fw_write_wait;
+
+ if (fw_version_ok)
+ gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+ ref, mask, 0x20);
+ else
+ amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
+ ref, mask);
+}
+
static void
gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
uint32_t me, uint32_t pipe,
@@ -5155,6 +5188,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
.emit_tmz = gfx_v10_0_ring_emit_tmz,
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
@@ -5188,6 +5222,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
@@ -5218,6 +5253,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
.emit_rreg = gfx_v10_0_ring_emit_rreg,
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
};
static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -5283,15 +5319,12 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev)
static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev)
{
- /* init asic gds info */
- switch (adev->asic_type) {
- case CHIP_NAVI10:
- default:
- adev->gds.gds_size = 0x10000;
- adev->gds.gds_compute_max_wave_id = 0x4ff;
- break;
- }
+ unsigned total_cu = adev->gfx.config.max_cu_per_sh *
+ adev->gfx.config.max_sh_per_se *
+ adev->gfx.config.max_shader_engines;
+ adev->gds.gds_size = 0x10000;
+ adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1;
adev->gds.gws_size = 64;
adev->gds.oa_size = 16;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 7f0a63628c43..31f44d05e606 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -1576,7 +1576,7 @@ static void gfx_v6_0_config_init(struct amdgpu_device *adev)
static void gfx_v6_0_constants_init(struct amdgpu_device *adev)
{
u32 gb_addr_config = 0;
- u32 mc_shared_chmap, mc_arb_ramcfg;
+ u32 mc_arb_ramcfg;
u32 sx_debug_1;
u32 hdp_host_path_cntl;
u32 tmp;
@@ -1678,7 +1678,6 @@ static void gfx_v6_0_constants_init(struct amdgpu_device *adev)
WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
- mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 791ba398f007..8f20a5dd44fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -4258,7 +4258,7 @@ static int gfx_v7_0_late_init(void *handle)
static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
{
u32 gb_addr_config;
- u32 mc_shared_chmap, mc_arb_ramcfg;
+ u32 mc_arb_ramcfg;
u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
u32 tmp;
@@ -4335,7 +4335,6 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
break;
}
- mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
@@ -4554,6 +4553,8 @@ static int gfx_v7_0_hw_init(void *handle)
gfx_v7_0_constants_init(adev);
+ /* init CSB */
+ adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
/* init rlc */
r = adev->gfx.rlc.funcs->resume(adev);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 87dd55e9d72b..fa245973de12 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1321,39 +1321,6 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
return 0;
}
-static int gfx_v8_0_csb_vram_pin(struct amdgpu_device *adev)
-{
- int r;
-
- r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
- AMDGPU_GEM_DOMAIN_VRAM);
- if (!r)
- adev->gfx.rlc.clear_state_gpu_addr =
- amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
-
- amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-
- return r;
-}
-
-static void gfx_v8_0_csb_vram_unpin(struct amdgpu_device *adev)
-{
- int r;
-
- if (!adev->gfx.rlc.clear_state_obj)
- return;
-
- r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
- if (likely(r == 0)) {
- amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
- amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
- }
-}
-
static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
@@ -1710,7 +1677,7 @@ fail:
static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
{
u32 gb_addr_config;
- u32 mc_shared_chmap, mc_arb_ramcfg;
+ u32 mc_arb_ramcfg;
u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
u32 tmp;
int ret;
@@ -1850,7 +1817,6 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
break;
}
- mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
@@ -2103,7 +2069,7 @@ static int gfx_v8_0_sw_fini(void *handle)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
amdgpu_gfx_mqd_sw_fini(adev);
- amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
amdgpu_gfx_kiq_fini(adev);
gfx_v8_0_mec_fini(adev);
@@ -3917,6 +3883,7 @@ static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
{
+ adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
/* csib */
WREG32(mmRLC_CSIB_ADDR_HI,
adev->gfx.rlc.clear_state_gpu_addr >> 32);
@@ -4591,8 +4558,11 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
- /* activate the queue */
- mqd->cp_hqd_active = 1;
+ /* map_queues packet doesn't need activate the queue,
+ * so only kiq need set this field.
+ */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ mqd->cp_hqd_active = 1;
return 0;
}
@@ -4837,10 +4807,6 @@ static int gfx_v8_0_hw_init(void *handle)
gfx_v8_0_init_golden_registers(adev);
gfx_v8_0_constants_init(adev);
- r = gfx_v8_0_csb_vram_pin(adev);
- if (r)
- return r;
-
r = adev->gfx.rlc.funcs->resume(adev);
if (r)
return r;
@@ -4958,8 +4924,6 @@ static int gfx_v8_0_hw_fini(void *handle)
pr_err("rlc is busy, skip halt rlc\n");
amdgpu_gfx_rlc_exit_safe_mode(adev);
- gfx_v8_0_csb_vram_unpin(adev);
-
return 0;
}
@@ -6184,7 +6148,23 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
- /* EVENT_WRITE_EOP - flush caches, send int */
+ /* Workaround for cache flush problems. First send a dummy EOP
+ * event down the pipe with seq one below.
+ */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
+ amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
+ EOP_TC_ACTION_EN |
+ EOP_TC_WB_ACTION_EN |
+ EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ EVENT_INDEX(5)));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
+ DATA_SEL(1) | INT_SEL(0));
+ amdgpu_ring_write(ring, lower_32_bits(seq - 1));
+ amdgpu_ring_write(ring, upper_32_bits(seq - 1));
+
+ /* Then send the real EOP event down the pipe:
+ * EVENT_WRITE_EOP - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
EOP_TC_ACTION_EN |
@@ -6469,6 +6449,7 @@ static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne
static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
{
struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
amdgpu_ring_write(ring, 0 | /* src: register*/
@@ -6477,9 +6458,9 @@ static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
amdgpu_ring_write(ring, reg);
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
- adev->virt.reg_val_offs * 4));
+ kiq->reg_val_offs * 4));
amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
- adev->virt.reg_val_offs * 4));
+ kiq->reg_val_offs * 4));
}
static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
@@ -6926,7 +6907,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
5 + /* COND_EXEC */
7 + /* PIPELINE_SYNC */
VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
- 8 + /* FENCE for VM_FLUSH */
+ 12 + /* FENCE for VM_FLUSH */
20 + /* GDS switch */
4 + /* double SWITCH_BUFFER,
the first COND_EXEC jump to the place just
@@ -6938,7 +6919,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
31 + /* DE_META */
3 + /* CNTX_CTRL */
5 + /* HDP_INVL */
- 8 + 8 + /* FENCE x2 */
+ 12 + 12 + /* FENCE x2 */
2, /* SWITCH_BUFFER */
.emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index dcadc73bffd2..90f64b8bc358 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -48,6 +48,8 @@
#include "amdgpu_ras.h"
+#include "gfx_v9_4.h"
+
#define GFX9_NUM_GFX_RINGS 1
#define GFX9_MEC_HPD_SIZE 4096
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
@@ -517,9 +519,9 @@ static const struct soc15_reg_golden golden_settings_gc_9_0[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
};
static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
@@ -582,9 +584,9 @@ static const struct soc15_reg_golden golden_settings_gc_9_1[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
};
static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
@@ -676,9 +678,9 @@ static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
};
static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
@@ -691,6 +693,8 @@ static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
};
static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
@@ -734,9 +738,138 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status);
+static void gfx_v9_0_clear_ras_edc_counter(struct amdgpu_device *adev);
static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
void *inject_if);
+static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
+ uint64_t queue_mask)
+{
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_SET_RESOURCES_VMID_MASK(0) |
+ /* vmid_mask:0* queue_type:0 (KIQ) */
+ PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
+ amdgpu_ring_write(kiq_ring,
+ lower_32_bits(queue_mask)); /* queue mask lo */
+ amdgpu_ring_write(kiq_ring,
+ upper_32_bits(queue_mask)); /* queue mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* oac mask */
+ amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
+}
+
+static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+ uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
+ PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
+ PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
+ /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
+ /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
+ PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
+ /* num_queues: must be 1 */
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+}
+
+static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq)
+{
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_UNMAP_QUEUES_ACTION(action) |
+ PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
+ PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
+ PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+
+ if (action == PREEMPT_QUEUES_NO_UNMAP) {
+ amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, seq);
+ } else {
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ }
+}
+
+static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ u64 addr,
+ u64 seq)
+{
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
+ PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
+ PACKET3_QUERY_STATUS_COMMAND(2));
+ /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
+ PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
+}
+
+static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
+static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
+ .kiq_set_resources = gfx_v9_0_kiq_set_resources,
+ .kiq_map_queues = gfx_v9_0_kiq_map_queues,
+ .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
+ .kiq_query_status = gfx_v9_0_kiq_query_status,
+ .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
+ .set_resources_size = 8,
+ .map_queues_size = 7,
+ .unmap_queues_size = 6,
+ .query_status_size = 7,
+ .invalidate_tlbs_size = 2,
+};
+
+static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
+}
+
static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
@@ -973,6 +1106,12 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
adev->gfx.me_fw_write_wait = false;
adev->gfx.mec_fw_write_wait = false;
+ if ((adev->gfx.mec_fw_version < 0x000001a5) ||
+ (adev->gfx.mec_feature_version < 46) ||
+ (adev->gfx.pfp_fw_version < 0x000000b7) ||
+ (adev->gfx.pfp_feature_version < 46))
+ DRM_WARN_ONCE("CP firmware version too old, please update!");
+
switch (adev->asic_type) {
case CHIP_VEGA10:
if ((adev->gfx.me_fw_version >= 0x0000009c) &&
@@ -1023,20 +1162,54 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
}
}
+struct amdgpu_gfxoff_quirk {
+ u16 chip_vendor;
+ u16 chip_device;
+ u16 subsys_vendor;
+ u16 subsys_device;
+ u8 revision;
+};
+
+static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
+ /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
+ { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
+ { 0, 0, 0, 0, 0 },
+};
+
+static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
+{
+ const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
+
+ while (p && p->chip_device != 0) {
+ if (pdev->vendor == p->chip_vendor &&
+ pdev->device == p->chip_device &&
+ pdev->subsystem_vendor == p->subsys_vendor &&
+ pdev->subsystem_device == p->subsys_device &&
+ pdev->revision == p->revision) {
+ return true;
+ }
+ ++p;
+ }
+ return false;
+}
+
static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
{
+ if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
+ adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
+
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
break;
case CHIP_RAVEN:
- if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
- &&((adev->gfx.rlc_fw_version != 106 &&
- adev->gfx.rlc_fw_version < 531) ||
- (adev->gfx.rlc_fw_version == 53815) ||
- (adev->gfx.rlc_feature_version < 1) ||
- !adev->gfx.rlc.is_rlc_v2_1))
+ if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) &&
+ ((adev->gfx.rlc_fw_version != 106 &&
+ adev->gfx.rlc_fw_version < 531) ||
+ (adev->gfx.rlc_fw_version == 53815) ||
+ (adev->gfx.rlc_feature_version < 1) ||
+ !adev->gfx.rlc.is_rlc_v2_1))
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
if (adev->pm.pp_feature & PP_GFXOFF_MASK)
@@ -1044,6 +1217,12 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
AMD_PG_SUPPORT_CP |
AMD_PG_SUPPORT_RLC_SMU_HS;
break;
+ case CHIP_RENOIR:
+ if (adev->pm.pp_feature & PP_GFXOFF_MASK)
+ adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_RLC_SMU_HS;
+ break;
default:
break;
}
@@ -1324,7 +1503,8 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
/* TODO: Determine if MEC2 JT FW loading can be removed
for all GFX V9 asic and above */
- if (adev->asic_type != CHIP_ARCTURUS) {
+ if (adev->asic_type != CHIP_ARCTURUS &&
+ adev->asic_type != CHIP_RENOIR) {
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
info->fw = adev->gfx.mec2_fw;
@@ -1662,39 +1842,6 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
return 0;
}
-static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
-{
- int r;
-
- r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
- AMDGPU_GEM_DOMAIN_VRAM);
- if (!r)
- adev->gfx.rlc.clear_state_gpu_addr =
- amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
-
- amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-
- return r;
-}
-
-static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
-{
- int r;
-
- if (!adev->gfx.rlc.clear_state_obj)
- return;
-
- r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
- if (likely(r == 0)) {
- amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
- amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
- }
-}
-
static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
@@ -1841,6 +1988,17 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
.query_ras_error_count = &gfx_v9_0_query_ras_error_count
};
+static const struct amdgpu_gfx_funcs gfx_v9_4_gfx_funcs = {
+ .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
+ .select_se_sh = &gfx_v9_0_select_se_sh,
+ .read_wave_data = &gfx_v9_0_read_wave_data,
+ .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
+ .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
+ .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
+ .ras_error_inject = &gfx_v9_4_ras_error_inject,
+ .query_ras_error_count = &gfx_v9_4_query_ras_error_count
+};
+
static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
{
u32 gb_addr_config;
@@ -1892,6 +2050,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
break;
case CHIP_ARCTURUS:
+ adev->gfx.funcs = &gfx_v9_4_gfx_funcs;
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -1956,190 +2115,6 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
return 0;
}
-static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
- struct amdgpu_ngg_buf *ngg_buf,
- int size_se,
- int default_size_se)
-{
- int r;
-
- if (size_se < 0) {
- dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
- return -EINVAL;
- }
- size_se = size_se ? size_se : default_size_se;
-
- ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
- r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
- &ngg_buf->bo,
- &ngg_buf->gpu_addr,
- NULL);
- if (r) {
- dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
- return r;
- }
- ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
-
- return r;
-}
-
-static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
-{
- int i;
-
- for (i = 0; i < NGG_BUF_MAX; i++)
- amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
- &adev->gfx.ngg.buf[i].gpu_addr,
- NULL);
-
- memset(&adev->gfx.ngg.buf[0], 0,
- sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
-
- adev->gfx.ngg.init = false;
-
- return 0;
-}
-
-static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
-{
- int r;
-
- if (!amdgpu_ngg || adev->gfx.ngg.init == true)
- return 0;
-
- /* GDS reserve memory: 64 bytes alignment */
- adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
- adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
- adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
- adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
-
- /* Primitive Buffer */
- r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
- amdgpu_prim_buf_per_se,
- 64 * 1024);
- if (r) {
- dev_err(adev->dev, "Failed to create Primitive Buffer\n");
- goto err;
- }
-
- /* Position Buffer */
- r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
- amdgpu_pos_buf_per_se,
- 256 * 1024);
- if (r) {
- dev_err(adev->dev, "Failed to create Position Buffer\n");
- goto err;
- }
-
- /* Control Sideband */
- r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
- amdgpu_cntl_sb_buf_per_se,
- 256);
- if (r) {
- dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
- goto err;
- }
-
- /* Parameter Cache, not created by default */
- if (amdgpu_param_buf_per_se <= 0)
- goto out;
-
- r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
- amdgpu_param_buf_per_se,
- 512 * 1024);
- if (r) {
- dev_err(adev->dev, "Failed to create Parameter Cache\n");
- goto err;
- }
-
-out:
- adev->gfx.ngg.init = true;
- return 0;
-err:
- gfx_v9_0_ngg_fini(adev);
- return r;
-}
-
-static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
-{
- struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
- int r;
- u32 data, base;
-
- if (!amdgpu_ngg)
- return 0;
-
- /* Program buffer size */
- data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
- adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
- data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
- adev->gfx.ngg.buf[NGG_POS].size >> 8);
- WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
-
- data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
- adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
- data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
- adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
- WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
-
- /* Program buffer base address */
- base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
- data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
- WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
-
- base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
- data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
- WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
-
- base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
- data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
- WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
-
- base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
- data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
- WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
-
- base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
- data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
- WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
-
- base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
- data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
- WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
-
- /* Clear GDS reserved memory */
- r = amdgpu_ring_alloc(ring, 17);
- if (r) {
- DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
- ring->name, r);
- return r;
- }
-
- gfx_v9_0_write_data_to_reg(ring, 0, false,
- SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
- (adev->gds.gds_size +
- adev->gfx.ngg.gds_reserve_size));
-
- amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
- amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
- PACKET3_DMA_DATA_DST_SEL(1) |
- PACKET3_DMA_DATA_SRC_SEL(2)));
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
- adev->gfx.ngg.gds_reserve_size);
-
- gfx_v9_0_write_data_to_reg(ring, 0, false,
- SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
-
- amdgpu_ring_commit(ring);
-
- return 0;
-}
-
static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
int mec, int pipe, int queue)
{
@@ -2307,10 +2282,6 @@ static int gfx_v9_0_sw_init(void *handle)
if (r)
return r;
- r = gfx_v9_0_ngg_init(adev);
- if (r)
- return r;
-
return 0;
}
@@ -2320,19 +2291,7 @@ static int gfx_v9_0_sw_fini(void *handle)
int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
- adev->gfx.ras_if) {
- struct ras_common_if *ras_if = adev->gfx.ras_if;
- struct ras_ih_if ih_info = {
- .head = *ras_if,
- };
-
- amdgpu_ras_debugfs_remove(adev, ras_if);
- amdgpu_ras_sysfs_remove(adev, ras_if);
- amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
- amdgpu_ras_feature_enable(adev, ras_if, 0);
- kfree(ras_if);
- }
+ amdgpu_gfx_ras_fini(adev);
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@ -2340,11 +2299,10 @@ static int gfx_v9_0_sw_fini(void *handle)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
amdgpu_gfx_mqd_sw_fini(adev);
- amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
amdgpu_gfx_kiq_fini(adev);
gfx_v9_0_mec_fini(adev);
- gfx_v9_0_ngg_fini(adev);
amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
@@ -2483,6 +2441,22 @@ static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
}
}
+static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ switch (adev->asic_type) {
+ case CHIP_ARCTURUS:
+ tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
+ tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
+ DISABLE_BARRIER_WAITCNT, 1);
+ WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
+ break;
+ default:
+ break;
+ };
+}
+
static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
{
u32 tmp;
@@ -2528,6 +2502,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
gfx_v9_0_init_compute_vmid(adev);
gfx_v9_0_init_gds_vmid(adev);
+ gfx_v9_0_init_sq_config(adev);
}
static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
@@ -2583,6 +2558,7 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
{
+ adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
/* csib */
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
adev->gfx.rlc.clear_state_gpu_addr >> 32);
@@ -2912,7 +2888,10 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
* And it's needed by gfxoff feature.
*/
if (adev->gfx.rlc.is_rlc_v2_1) {
- gfx_v9_1_init_rlc_save_restore_list(adev);
+ if (adev->asic_type == CHIP_VEGA12 ||
+ (adev->asic_type == CHIP_RAVEN &&
+ adev->rev_id >= 8))
+ gfx_v9_1_init_rlc_save_restore_list(adev);
gfx_v9_0_enable_save_restore_machine(adev);
}
@@ -3323,74 +3302,6 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
}
-static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
-{
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
- uint64_t queue_mask = 0;
- int r, i;
-
- for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
- if (!test_bit(i, adev->gfx.mec.queue_bitmap))
- continue;
-
- /* This situation may be hit in the future if a new HW
- * generation exposes more than 64 queues. If so, the
- * definition of queue_mask needs updating */
- if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
- DRM_ERROR("Invalid KCQ enabled: %d\n", i);
- break;
- }
-
- queue_mask |= (1ull << i);
- }
-
- r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
- if (r) {
- DRM_ERROR("Failed to lock KIQ (%d).\n", r);
- return r;
- }
-
- /* set resources */
- amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
- amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
- PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
- amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
- amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
- amdgpu_ring_write(kiq_ring, 0); /* oac mask */
- amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
- for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
- uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
- uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
-
- amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
- /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
- amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
- PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
- PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
- PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
- PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
- PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
- PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
- PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
- PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
- PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
- amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
- amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
- amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
- amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
- amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
- }
-
- r = amdgpu_ring_test_helper(kiq_ring);
- if (r)
- DRM_ERROR("KCQ enable failed\n");
-
- return r;
-}
-
static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
@@ -3527,8 +3438,11 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
mqd->cp_hqd_ib_control = tmp;
- /* activate the queue */
- mqd->cp_hqd_active = 1;
+ /* map_queues packet doesn't need activate the queue,
+ * so only kiq need set this field.
+ */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ mqd->cp_hqd_active = 1;
return 0;
}
@@ -3797,7 +3711,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
goto done;
}
- r = gfx_v9_0_kiq_kcq_enable(adev);
+ r = amdgpu_gfx_enable_kcq(adev);
done:
return r;
}
@@ -3854,6 +3768,23 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
return 0;
}
+static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ if (adev->asic_type != CHIP_ARCTURUS)
+ return;
+
+ tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
+ tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
+ adev->df.hash_status.hash_64k);
+ tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
+ adev->df.hash_status.hash_2m);
+ tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
+ adev->df.hash_status.hash_1g);
+ WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
+}
+
static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
{
if (adev->asic_type != CHIP_ARCTURUS)
@@ -3871,9 +3802,7 @@ static int gfx_v9_0_hw_init(void *handle)
gfx_v9_0_constants_init(adev);
- r = gfx_v9_0_csb_vram_pin(adev);
- if (r)
- return r;
+ gfx_v9_0_init_tcp_config(adev);
r = adev->gfx.rlc.funcs->resume(adev);
if (r)
@@ -3883,42 +3812,6 @@ static int gfx_v9_0_hw_init(void *handle)
if (r)
return r;
- if (adev->asic_type != CHIP_ARCTURUS) {
- r = gfx_v9_0_ngg_en(adev);
- if (r)
- return r;
- }
-
- return r;
-}
-
-static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
-{
- int r, i;
- struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
-
- r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
- if (r)
- DRM_ERROR("Failed to lock KIQ (%d).\n", r);
-
- for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
-
- amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
- amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
- PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
- PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
- PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
- PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
- amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
- amdgpu_ring_write(kiq_ring, 0);
- amdgpu_ring_write(kiq_ring, 0);
- amdgpu_ring_write(kiq_ring, 0);
- }
- r = amdgpu_ring_test_helper(kiq_ring);
- if (r)
- DRM_ERROR("KCQ disable failed\n");
-
return r;
}
@@ -3930,8 +3823,10 @@ static int gfx_v9_0_hw_fini(void *handle)
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
- /* disable KCQ to avoid CPC touch memory not valid anymore */
- gfx_v9_0_kcq_disable(adev);
+ /* DF freeze and kcq disable will fail */
+ if (!amdgpu_ras_intr_triggered())
+ /* disable KCQ to avoid CPC touch memory not valid anymore */
+ amdgpu_gfx_disable_kcq(adev);
if (amdgpu_sriov_vf(adev)) {
gfx_v9_0_cp_gfx_enable(adev, false);
@@ -3960,8 +3855,6 @@ static int gfx_v9_0_hw_fini(void *handle)
gfx_v9_0_cp_enable(adev, false);
adev->gfx.rlc.funcs->stop(adev);
- gfx_v9_0_csb_vram_unpin(adev);
-
return 0;
}
@@ -4067,9 +3960,22 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
uint64_t clock;
mutex_lock(&adev->gfx.gpu_clock_mutex);
- WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
- clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
- ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+ if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
+ uint32_t tmp, lsb, msb, i = 0;
+ do {
+ if (i != 0)
+ udelay(1);
+ tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
+ lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB);
+ msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
+ i++;
+ } while (unlikely(tmp != msb) && (i < adev->usec_timeout));
+ clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL);
+ } else {
+ WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
+ clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
+ ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+ }
mutex_unlock(&adev->gfx.gpu_clock_mutex);
return clock;
}
@@ -4137,33 +4043,61 @@ static const u32 sgpr_init_compute_shader[] =
0xbe800080, 0xbf810000,
};
+/* When below register arrays changed, please update gpr_reg_size,
+ and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
+ to cover all gfx9 ASICs */
static const struct soc15_reg_entry vgpr_init_regs[] = {
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
- { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
- { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
- { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
+};
+
+static const struct soc15_reg_entry sgpr1_init_regs[] = {
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
- { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
- { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
};
-static const struct soc15_reg_entry sgpr_init_regs[] = {
- { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
- { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
- { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
- { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
- { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
- { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
- { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
+static const struct soc15_reg_entry sgpr2_init_regs[] = {
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
- { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
};
-static const struct soc15_reg_entry sec_ded_counter_registers[] = {
+static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
{ SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
{ SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
{ SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
@@ -4184,6 +4118,7 @@ static const struct soc15_reg_entry sec_ded_counter_registers[] = {
{ SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
{ SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
{ SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
{ SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
{ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
{ SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
@@ -4196,6 +4131,7 @@ static const struct soc15_reg_entry sec_ded_counter_registers[] = {
{ SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
{ SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
{ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
+ { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1},
};
static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
@@ -4203,6 +4139,10 @@ static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
int i, r;
+ /* only support when RAS is enabled */
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return 0;
+
r = amdgpu_ring_alloc(ring, 7);
if (r) {
DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
@@ -4246,10 +4186,16 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
struct amdgpu_ib ib;
struct dma_fence *f = NULL;
- int r, i, j, k;
+ int r, i;
unsigned total_size, vgpr_offset, sgpr_offset;
u64 gpu_addr;
+ int compute_dim_x = adev->gfx.config.max_shader_engines *
+ adev->gfx.config.max_cu_per_sh *
+ adev->gfx.config.max_sh_per_se;
+ int sgpr_work_group_size = 5;
+ int gpr_reg_size = compute_dim_x / 16 + 6;
+
/* only support when RAS is enabled */
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
return 0;
@@ -4259,9 +4205,11 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
return 0;
total_size =
- ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
+ (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
+ total_size +=
+ (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
total_size +=
- ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
+ (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
total_size = ALIGN(total_size, 256);
vgpr_offset = total_size;
total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
@@ -4288,7 +4236,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
/* VGPR */
/* write the register state for the compute dispatch */
- for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
+ for (i = 0; i < gpr_reg_size; i++) {
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
- PACKET3_SET_SH_REG_START;
@@ -4304,7 +4252,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
/* write dispatch packet */
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
- ib.ptr[ib.length_dw++] = 128; /* x */
+ ib.ptr[ib.length_dw++] = compute_dim_x; /* x */
ib.ptr[ib.length_dw++] = 1; /* y */
ib.ptr[ib.length_dw++] = 1; /* z */
ib.ptr[ib.length_dw++] =
@@ -4314,13 +4262,13 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
- /* SGPR */
+ /* SGPR1 */
/* write the register state for the compute dispatch */
- for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
+ for (i = 0; i < gpr_reg_size; i++) {
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
- ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
+ ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
- PACKET3_SET_SH_REG_START;
- ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
+ ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
}
/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
@@ -4332,7 +4280,35 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
/* write dispatch packet */
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
- ib.ptr[ib.length_dw++] = 128; /* x */
+ ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
+ ib.ptr[ib.length_dw++] = 1; /* y */
+ ib.ptr[ib.length_dw++] = 1; /* z */
+ ib.ptr[ib.length_dw++] =
+ REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
+
+ /* write CS partial flush packet */
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
+ ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
+
+ /* SGPR2 */
+ /* write the register state for the compute dispatch */
+ for (i = 0; i < gpr_reg_size; i++) {
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
+ ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
+ - PACKET3_SET_SH_REG_START;
+ ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
+ }
+ /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
+ gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
+ ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
+ - PACKET3_SET_SH_REG_START;
+ ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
+ ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
+
+ /* write dispatch packet */
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
+ ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
ib.ptr[ib.length_dw++] = 1; /* y */
ib.ptr[ib.length_dw++] = 1; /* z */
ib.ptr[ib.length_dw++] =
@@ -4356,18 +4332,17 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
goto fail;
}
- /* read back registers to clear the counters */
- mutex_lock(&adev->grbm_idx_mutex);
- for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
- for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
- for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
- gfx_v9_0_select_se_sh(adev, j, 0x0, k);
- RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
- }
- }
+ switch (adev->asic_type)
+ {
+ case CHIP_VEGA20:
+ gfx_v9_0_clear_ras_edc_counter(adev);
+ break;
+ case CHIP_ARCTURUS:
+ gfx_v9_4_clear_ras_edc_counter(adev);
+ break;
+ default:
+ break;
}
- WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
- mutex_unlock(&adev->grbm_idx_mutex);
fail:
amdgpu_ib_free(adev, &ib, NULL);
@@ -4385,6 +4360,7 @@ static int gfx_v9_0_early_init(void *handle)
else
adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+ gfx_v9_0_set_kiq_pm4_funcs(adev);
gfx_v9_0_set_ring_funcs(adev);
gfx_v9_0_set_irq_funcs(adev);
gfx_v9_0_set_gds_init(adev);
@@ -4393,34 +4369,11 @@ static int gfx_v9_0_early_init(void *handle)
return 0;
}
-static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
- struct ras_err_data *err_data,
- struct amdgpu_iv_entry *entry);
-
static int gfx_v9_0_ecc_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct ras_common_if **ras_if = &adev->gfx.ras_if;
- struct ras_ih_if ih_info = {
- .cb = gfx_v9_0_process_ras_data_cb,
- };
- struct ras_fs_if fs_info = {
- .sysfs_name = "gfx_err_count",
- .debugfs_name = "gfx_err_inject",
- };
- struct ras_common_if ras_block = {
- .block = AMDGPU_RAS_BLOCK__GFX,
- .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
- .sub_block_index = 0,
- .name = "gfx",
- };
int r;
- if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
- amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
- return 0;
- }
-
r = gfx_v9_0_do_edc_gds_workarounds(adev);
if (r)
return r;
@@ -4430,72 +4383,11 @@ static int gfx_v9_0_ecc_late_init(void *handle)
if (r)
return r;
- /* handle resume path. */
- if (*ras_if) {
- /* resend ras TA enable cmd during resume.
- * prepare to handle failure.
- */
- ih_info.head = **ras_if;
- r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
- if (r) {
- if (r == -EAGAIN) {
- /* request a gpu reset. will run again. */
- amdgpu_ras_request_reset_on_boot(adev,
- AMDGPU_RAS_BLOCK__GFX);
- return 0;
- }
- /* fail to enable ras, cleanup all. */
- goto irq;
- }
- /* enable successfully. continue. */
- goto resume;
- }
-
- *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
- if (!*ras_if)
- return -ENOMEM;
-
- **ras_if = ras_block;
-
- r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
- if (r) {
- if (r == -EAGAIN) {
- amdgpu_ras_request_reset_on_boot(adev,
- AMDGPU_RAS_BLOCK__GFX);
- r = 0;
- }
- goto feature;
- }
-
- ih_info.head = **ras_if;
- fs_info.head = **ras_if;
-
- r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
- if (r)
- goto interrupt;
-
- amdgpu_ras_debugfs_create(adev, &fs_info);
-
- r = amdgpu_ras_sysfs_create(adev, &fs_info);
- if (r)
- goto sysfs;
-resume:
- r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
+ r = amdgpu_gfx_ras_late_init(adev);
if (r)
- goto irq;
+ return r;
return 0;
-irq:
- amdgpu_ras_sysfs_remove(adev, *ras_if);
-sysfs:
- amdgpu_ras_debugfs_remove(adev, *ras_if);
- amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
-interrupt:
- amdgpu_ras_feature_enable(adev, *ras_if, 0);
-feature:
- kfree(*ras_if);
- *ras_if = NULL;
- return r;
}
static int gfx_v9_0_late_init(void *handle)
@@ -4560,16 +4452,14 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
{
amdgpu_gfx_rlc_enter_safe_mode(adev);
- if (is_support_sw_smu(adev) && !enable)
- smu_set_gfx_cgpg(&adev->smu, enable);
-
if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
} else {
gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
- gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
+ if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
+ gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
}
amdgpu_gfx_rlc_exit_safe_mode(adev);
@@ -4815,7 +4705,7 @@ static int gfx_v9_0_set_powergating_state(void *handle,
enum amd_powergating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
+ bool enable = (state == AMD_PG_STATE_GATE);
switch (adev->asic_type) {
case CHIP_RAVEN:
@@ -4838,8 +4728,6 @@ static int gfx_v9_0_set_powergating_state(void *handle,
gfx_v9_0_enable_cp_power_gating(adev, false);
/* update gfx cgpg state */
- if (is_support_sw_smu(adev) && enable)
- smu_set_gfx_cgpg(&adev->smu, enable);
gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
/* update mgcg state */
@@ -4879,7 +4767,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle,
case CHIP_ARCTURUS:
case CHIP_RENOIR:
gfx_v9_0_update_gfx_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
break;
default:
break;
@@ -4896,12 +4784,12 @@ static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
*flags = 0;
/* AMD_CG_SUPPORT_GFX_MGCG */
- data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
*flags |= AMD_CG_SUPPORT_GFX_MGCG;
/* AMD_CG_SUPPORT_GFX_CGCG */
- data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
*flags |= AMD_CG_SUPPORT_GFX_CGCG;
@@ -4910,18 +4798,18 @@ static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
*flags |= AMD_CG_SUPPORT_GFX_CGLS;
/* AMD_CG_SUPPORT_GFX_RLC_LS */
- data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
/* AMD_CG_SUPPORT_GFX_CP_LS */
- data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
if (adev->asic_type != CHIP_ARCTURUS) {
/* AMD_CG_SUPPORT_GFX_3D_CGCG */
- data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
+ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
@@ -4970,7 +4858,7 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
u32 ref_and_mask, reg_mem_engine;
- const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
switch (ring->me) {
@@ -4990,8 +4878,8 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
}
gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
- adev->nbio_funcs->get_hdp_flush_req_offset(adev),
- adev->nbio_funcs->get_hdp_flush_done_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_req_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_done_offset(adev),
ref_and_mask, ref_and_mask, 0x20);
}
@@ -5392,6 +5280,7 @@ static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne
static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
{
struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
amdgpu_ring_write(ring, 0 | /* src: register*/
@@ -5400,9 +5289,9 @@ static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
amdgpu_ring_write(ring, reg);
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
- adev->virt.reg_val_offs * 4));
+ kiq->reg_val_offs * 4));
amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
- adev->virt.reg_val_offs * 4));
+ kiq->reg_val_offs * 4));
}
static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
@@ -5723,313 +5612,446 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
- struct ras_err_data *err_data,
- struct amdgpu_iv_entry *entry)
-{
- /* TODO ue will trigger an interrupt. */
- kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
- if (adev->gfx.funcs->query_ras_error_count)
- adev->gfx.funcs->query_ras_error_count(adev, err_data);
- amdgpu_ras_reset_gpu(adev, 0);
- return AMDGPU_RAS_SUCCESS;
-}
-
-static const struct {
- const char *name;
- uint32_t ip;
- uint32_t inst;
- uint32_t seg;
- uint32_t reg_offset;
- uint32_t per_se_instance;
- int32_t num_instance;
- uint32_t sec_count_mask;
- uint32_t ded_count_mask;
-} gfx_ras_edc_regs[] = {
- { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1,
- REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
- REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) },
- { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1,
- REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT),
- REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) },
- { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
- REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 },
- { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
- REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 },
- { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1,
- REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT),
- REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) },
- { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
- REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 },
- { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
- REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
- REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) },
- { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1,
- REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT),
- REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) },
- { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1,
- REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 },
- { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1,
- REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 },
- { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1,
- REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 },
- { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
- REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC),
- REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) },
- { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
- REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 },
+
+static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
+ { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
+ SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
+ },
+ { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
+ SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
+ },
+ { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
+ SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
+ 0, 0
+ },
+ { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
+ SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
+ 0, 0
+ },
+ { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
+ SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
+ },
+ { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
+ SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
+ 0, 0
+ },
+ { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
+ SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
+ SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
+ },
+ { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
+ SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
+ },
+ { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
+ SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
+ 0, 0
+ },
+ { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
+ SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
+ 0, 0
+ },
+ { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
+ SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
+ 0, 0
+ },
+ { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
+ SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
+ },
+ { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
+ SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
+ 0, 0
+ },
{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
- 0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
- REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) },
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
+ },
{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
- SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
- REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
- REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) },
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
+ },
{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
- SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
- REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 },
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
+ 0, 0
+ },
{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
- SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
- REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
- REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) },
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
+ },
{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
- SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
- REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
- REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) },
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
+ },
{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
- SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
- REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
- REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) },
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
+ },
{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
- SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
- REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
- REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) },
- { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1,
- REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 },
- { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
- REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
- REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) },
- { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
- REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 },
- { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
- REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 },
- { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
- REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 },
- { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
- REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 },
- { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
- REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 },
- { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
- REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 },
- { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
- REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
- REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) },
- { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
- REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
- REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) },
- { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
- REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
- REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) },
- { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
- REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
- REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) },
- { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
- REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
- REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) },
- { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
- REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 },
- { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
- REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 },
- { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
- REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 },
- { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
- REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 },
- { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
- REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 },
- { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
- REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 },
- { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
- REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 },
- { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
- REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 },
- { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
- 16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 },
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
+ },
+ { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
+ 0, 0
+ },
+ { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
+ },
+ { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
+ SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
+ SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
+ },
+ { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
+ },
+ { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
+ },
+ { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
+ },
+ { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
+ },
+ { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
+ 0, 0
+ },
{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
- 0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
- 0 },
- { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
- 16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 },
+ SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
+ 0, 0
+ },
{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
- 0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
- 0 },
- { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
- 16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 },
- { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72,
- REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 },
- { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
- REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
- REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) },
- { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
- REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
- REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) },
- { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
- REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 },
- { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
- REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 },
- { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
- REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 },
- { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
- REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
- REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) },
- { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
- REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
- REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) },
- { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
- REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
- REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) },
- { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
- REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
- REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) },
- { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
- REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 },
- { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
- REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT),
- REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) },
- { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
- REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT),
- REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) },
- { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
- REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT),
- REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) },
- { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
- REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT),
- REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) },
- { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
- REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT),
- REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) },
- { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
- REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT),
- REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) },
- { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
- REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT),
- REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) },
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
+ 0, 0
+ },
+ { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
+ 0, 0
+ },
+ { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
+ SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
+ 0, 0
+ },
+ { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
+ },
+ { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
+ },
+ { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
+ 0, 0
+ },
+ { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
+ 0, 0
+ },
+ { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
+ },
+ { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
+ },
+ { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
+ },
+ { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
+ },
+ { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
+ },
+ { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
+ },
+ { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
+ },
+ { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
+ },
+ { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
+ },
+ { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
+ },
+ { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
+ },
{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
- 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
- REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) },
- { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
- 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
- REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) },
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
+ },
+ { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
+ },
{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
- 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
- REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) },
- { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
- 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
- REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) },
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
+ },
+ { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
+ },
{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
- 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
- REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) },
- { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
- 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
- REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) },
- { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
- 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
- REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) },
- { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
- 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
- REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) },
- { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
- 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
- REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) },
- { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
- 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
- REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) },
- { "SQC_INST_BANKA_UTCL1_MISS_FIFO",
- SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
- REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
- 0 },
- { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
- 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 },
- { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
- 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 },
- { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
- 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 },
- { "SQC_DATA_BANKA_DIRTY_BIT_RAM",
- SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
- REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 },
- { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
- REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
- REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) },
- { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
- 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
- REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) },
- { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
- 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
- REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) },
- { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
- 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
- REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) },
- { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
- 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
- REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) },
- { "SQC_INST_BANKB_UTCL1_MISS_FIFO",
- SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
- REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
- 0 },
- { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
- 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 },
- { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
- 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 },
- { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
- 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 },
- { "SQC_DATA_BANKB_DIRTY_BIT_RAM",
- SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
- REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 },
- { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
- REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
- REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) },
- { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
- REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
- REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) },
- { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
- REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
- REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) },
- { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
- REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
- REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) },
- { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
- REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
- REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) },
- { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
- REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 },
- { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
- REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 },
- { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
- REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 },
- { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
- REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 },
- { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
- REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 },
- { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
- REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
- REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) },
- { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
- REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
- REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) },
- { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
- REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
- REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) },
- { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
- REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 },
- { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
- REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 },
- { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
- REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 },
- { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
- REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 },
- { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
- REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 },
- { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
- REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 },
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
+ },
+ { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
+ },
+ { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
+ },
+ { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
+ },
+ { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
+ },
+ { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
+ },
+ { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
+ },
+ { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
+ },
+ { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
+ },
+ { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
+ },
+ { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
+ },
+ { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
+ 0, 0
+ },
+ { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
+ 0, 0
+ },
+ { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
+ },
+ { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
+ },
+ { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
+ },
+ { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
+ },
+ { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
+ },
+ { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0
+ },
+ { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0
+ },
+ { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0
+ },
+ { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0
+ },
+ { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0
+ },
+ { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
+ },
+ { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
+ },
+ { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
+ },
+ { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0
+ },
+ { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0
+ },
+ { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ 0, 0
+ },
+ { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ 0, 0
+ },
+ { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ 0, 0
+ },
+ { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ 0, 0
+ }
};
static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
@@ -6039,7 +6061,7 @@ static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
int ret;
struct ta_ras_trigger_error_input block_info = { 0 };
- if (adev->asic_type != CHIP_VEGA20)
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
return -EINVAL;
if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
@@ -6078,87 +6100,300 @@ static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
return ret;
}
+static const char *vml2_mems[] = {
+ "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
+ "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
+ "UTC_VML2_BANK_CACHE_0_4K_MEM0",
+ "UTC_VML2_BANK_CACHE_0_4K_MEM1",
+ "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
+ "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
+ "UTC_VML2_BANK_CACHE_1_4K_MEM0",
+ "UTC_VML2_BANK_CACHE_1_4K_MEM1",
+ "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
+ "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
+ "UTC_VML2_BANK_CACHE_2_4K_MEM0",
+ "UTC_VML2_BANK_CACHE_2_4K_MEM1",
+ "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
+ "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
+ "UTC_VML2_BANK_CACHE_3_4K_MEM0",
+ "UTC_VML2_BANK_CACHE_3_4K_MEM1",
+};
+
+static const char *vml2_walker_mems[] = {
+ "UTC_VML2_CACHE_PDE0_MEM0",
+ "UTC_VML2_CACHE_PDE0_MEM1",
+ "UTC_VML2_CACHE_PDE1_MEM0",
+ "UTC_VML2_CACHE_PDE1_MEM1",
+ "UTC_VML2_CACHE_PDE2_MEM0",
+ "UTC_VML2_CACHE_PDE2_MEM1",
+ "UTC_VML2_RDIF_LOG_FIFO",
+};
+
+static const char *atc_l2_cache_2m_mems[] = {
+ "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
+ "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
+ "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
+ "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
+};
+
+static const char *atc_l2_cache_4k_mems[] = {
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
+};
+
+static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
+ struct ras_err_data *err_data)
+{
+ uint32_t i, data;
+ uint32_t sec_count, ded_count;
+
+ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
+ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
+
+ for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
+ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
+
+ sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
+ if (sec_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
+ vml2_mems[i], sec_count);
+ err_data->ce_count += sec_count;
+ }
+
+ ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
+ if (ded_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
+ vml2_mems[i], ded_count);
+ err_data->ue_count += ded_count;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
+ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
+
+ sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
+ SEC_COUNT);
+ if (sec_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
+ vml2_walker_mems[i], sec_count);
+ err_data->ce_count += sec_count;
+ }
+
+ ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
+ DED_COUNT);
+ if (ded_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
+ vml2_walker_mems[i], ded_count);
+ err_data->ue_count += ded_count;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
+
+ sec_count = (data & 0x00006000L) >> 0xd;
+ if (sec_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
+ atc_l2_cache_2m_mems[i], sec_count);
+ err_data->ce_count += sec_count;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
+
+ sec_count = (data & 0x00006000L) >> 0xd;
+ if (sec_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
+ atc_l2_cache_4k_mems[i], sec_count);
+ err_data->ce_count += sec_count;
+ }
+
+ ded_count = (data & 0x00018000L) >> 0xf;
+ if (ded_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
+ atc_l2_cache_4k_mems[i], ded_count);
+ err_data->ue_count += ded_count;
+ }
+ }
+
+ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
+
+ return 0;
+}
+
+static int gfx_v9_0_ras_error_count(const struct soc15_reg_entry *reg,
+ uint32_t se_id, uint32_t inst_id, uint32_t value,
+ uint32_t *sec_count, uint32_t *ded_count)
+{
+ uint32_t i;
+ uint32_t sec_cnt, ded_cnt;
+
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
+ if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
+ gfx_v9_0_ras_fields[i].seg != reg->seg ||
+ gfx_v9_0_ras_fields[i].inst != reg->inst)
+ continue;
+
+ sec_cnt = (value &
+ gfx_v9_0_ras_fields[i].sec_count_mask) >>
+ gfx_v9_0_ras_fields[i].sec_count_shift;
+ if (sec_cnt) {
+ DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
+ gfx_v9_0_ras_fields[i].name,
+ se_id, inst_id,
+ sec_cnt);
+ *sec_count += sec_cnt;
+ }
+
+ ded_cnt = (value &
+ gfx_v9_0_ras_fields[i].ded_count_mask) >>
+ gfx_v9_0_ras_fields[i].ded_count_shift;
+ if (ded_cnt) {
+ DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n",
+ gfx_v9_0_ras_fields[i].name,
+ se_id, inst_id,
+ ded_cnt);
+ *ded_count += ded_cnt;
+ }
+ }
+
+ return 0;
+}
+
+static void gfx_v9_0_clear_ras_edc_counter(struct amdgpu_device *adev)
+{
+ int i, j, k;
+
+ /* read back registers to clear the counters */
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
+ for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
+ gfx_v9_0_select_se_sh(adev, j, 0x0, k);
+ RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
+ }
+ }
+ }
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
+ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
+
+ for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
+ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
+ RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
+ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
+ RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
+ RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
+ RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
+ }
+
+ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
+}
+
static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
- uint32_t sec_count, ded_count;
- uint32_t i;
+ uint32_t sec_count = 0, ded_count = 0;
+ uint32_t i, j, k;
uint32_t reg_value;
- uint32_t se_id, instance_id;
- if (adev->asic_type != CHIP_VEGA20)
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
return -EINVAL;
err_data->ue_count = 0;
err_data->ce_count = 0;
mutex_lock(&adev->grbm_idx_mutex);
- for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) {
- for (instance_id = 0; instance_id < 256; instance_id++) {
- for (i = 0;
- i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]);
- i++) {
- if (se_id != 0 &&
- !gfx_ras_edc_regs[i].per_se_instance)
- continue;
- if (instance_id >= gfx_ras_edc_regs[i].num_instance)
- continue;
- gfx_v9_0_select_se_sh(adev, se_id, 0,
- instance_id);
-
- reg_value = RREG32(
- adev->reg_offset[gfx_ras_edc_regs[i].ip]
- [gfx_ras_edc_regs[i].inst]
- [gfx_ras_edc_regs[i].seg] +
- gfx_ras_edc_regs[i].reg_offset);
- sec_count = reg_value &
- gfx_ras_edc_regs[i].sec_count_mask;
- ded_count = reg_value &
- gfx_ras_edc_regs[i].ded_count_mask;
- if (sec_count) {
- DRM_INFO(
- "Instance[%d][%d]: SubBlock %s, SEC %d\n",
- se_id, instance_id,
- gfx_ras_edc_regs[i].name,
- sec_count);
- err_data->ce_count++;
- }
-
- if (ded_count) {
- DRM_INFO(
- "Instance[%d][%d]: SubBlock %s, DED %d\n",
- se_id, instance_id,
- gfx_ras_edc_regs[i].name,
- ded_count);
- err_data->ue_count++;
- }
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
+ for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
+ gfx_v9_0_select_se_sh(adev, j, 0, k);
+ reg_value =
+ RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
+ if (reg_value)
+ gfx_v9_0_ras_error_count(&gfx_v9_0_edc_counter_regs[i],
+ j, k, reg_value,
+ &sec_count, &ded_count);
}
}
}
- gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
- mutex_unlock(&adev->grbm_idx_mutex);
-
- return 0;
-}
-static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- struct amdgpu_iv_entry *entry)
-{
- struct ras_common_if *ras_if = adev->gfx.ras_if;
- struct ras_dispatch_if ih_data = {
- .entry = entry,
- };
+ err_data->ce_count += sec_count;
+ err_data->ue_count += ded_count;
- if (!ras_if)
- return 0;
+ gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
- ih_data.head = *ras_if;
+ gfx_v9_0_query_utc_edc_status(adev, err_data);
- DRM_ERROR("CP ECC ERROR IRQ\n");
- amdgpu_ras_interrupt_dispatch(adev, &ih_data);
return 0;
}
@@ -6325,7 +6560,7 @@ static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
.set = gfx_v9_0_set_cp_ecc_error_state,
- .process = gfx_v9_0_cp_ecc_error_irq,
+ .process = amdgpu_gfx_cp_ecc_error_irq,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
new file mode 100644
index 000000000000..f099f13d7f1e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
@@ -0,0 +1,978 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/kernel.h>
+
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "amdgpu_atomfirmware.h"
+#include "amdgpu_pm.h"
+
+#include "gc/gc_9_4_1_offset.h"
+#include "gc/gc_9_4_1_sh_mask.h"
+#include "soc15_common.h"
+
+#include "gfx_v9_4.h"
+#include "amdgpu_ras.h"
+
+static const struct soc15_reg_entry gfx_v9_4_edc_counter_regs[] = {
+ /* CPC */
+ { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1 },
+ /* DC */
+ { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1 },
+ /* CPF */
+ { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1 },
+ /* GDS */
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1 },
+ /* SPI */
+ { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1 },
+ /* SQ */
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16 },
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16 },
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16 },
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16 },
+ /* SQC */
+ { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6 },
+ { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6 },
+ { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6 },
+ { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3), 0, 4, 6 },
+ /* TA */
+ { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16 },
+ /* TCA */
+ { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2 },
+ /* TCC */
+ { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16 },
+ { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16 },
+ /* TCI */
+ { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72 },
+ /* TCP */
+ { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16 },
+ { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16 },
+ { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16 },
+ /* TD */
+ { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16 },
+ /* GCEA */
+ { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32 },
+ { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32 },
+ { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 1, 32 },
+ /* RLC */
+ { SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT), 0, 1, 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2), 0, 1, 1 },
+};
+
+static void gfx_v9_4_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance)
+{
+ u32 data;
+
+ if (instance == 0xffffffff)
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
+ instance);
+
+ if (se_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
+
+ if (sh_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
+
+ WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
+}
+
+static const struct soc15_ras_field_entry gfx_v9_4_ras_fields[] = {
+ /* CPC */
+ { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
+ SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) },
+ { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
+ SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT) },
+ { "CPC_DC_STATE_RAM_ME1", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
+ SOC15_REG_FIELD(DC_EDC_STATE_CNT, SEC_COUNT_ME1),
+ SOC15_REG_FIELD(DC_EDC_STATE_CNT, DED_COUNT_ME1) },
+ { "CPC_DC_CSINVOC_RAM_ME1",
+ SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
+ SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, SEC_COUNT_ME1),
+ SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, DED_COUNT_ME1) },
+ { "CPC_DC_RESTORE_RAM_ME1",
+ SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
+ SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, SEC_COUNT_ME1),
+ SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, DED_COUNT_ME1) },
+ { "CPC_DC_CSINVOC_RAM1_ME1",
+ SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
+ SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, SEC_COUNT1_ME1),
+ SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, DED_COUNT1_ME1) },
+ { "CPC_DC_RESTORE_RAM1_ME1",
+ SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
+ SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, SEC_COUNT1_ME1),
+ SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, DED_COUNT1_ME1) },
+
+ /* CPF */
+ { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
+ SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, SEC_COUNT_ME2),
+ SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, DED_COUNT_ME2) },
+ { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
+ SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, SEC_COUNT_ME1),
+ SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, DED_COUNT_ME1) },
+ { "CPF_TCIU_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
+ SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
+ SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT) },
+
+ /* GDS */
+ { "GDS_GRBM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT),
+ SOC15_REG_FIELD(GDS_EDC_GRBM_CNT, SEC),
+ SOC15_REG_FIELD(GDS_EDC_GRBM_CNT, DED) },
+ { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
+ SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED) },
+ { "GDS_PHY_CMD_RAM_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) },
+ { "GDS_PHY_DATA_RAM_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_DED) },
+ { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) },
+ { "GDS_ME1_PIPE0_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) },
+ { "GDS_ME1_PIPE1_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) },
+ { "GDS_ME1_PIPE2_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) },
+ { "GDS_ME1_PIPE3_PIPE_MEM",
+ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
+ SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) },
+
+ /* SPI */
+ { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SEC_COUNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_DED_COUNT) },
+ { "SPI_GDS_EXPREQ", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_GDS_EXPREQ_SEC_COUNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_GDS_EXPREQ_DED_COUNT) },
+ { "SPI_WB_GRANT_30", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_30_SEC_COUNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_30_DED_COUNT) },
+ { "SPI_WB_GRANT_61", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_61_SEC_COUNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_WB_GRANT_61_DED_COUNT) },
+ { "SPI_LIFE_CNT", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_LIFE_CNT_SEC_COUNT),
+ SOC15_REG_FIELD(SPI_EDC_CNT, SPI_LIFE_CNT_DED_COUNT) },
+
+ /* SQ */
+ { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT) },
+ { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT) },
+ { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT) },
+ { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT) },
+ { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT) },
+ { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT) },
+ { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
+ SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT) },
+
+ /* SQC */
+ { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) },
+ { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) },
+ { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) },
+ { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) },
+ { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) },
+ { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) },
+ { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) },
+ { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) },
+ { "SQC_INST_BANKA_UTCL1_MISS_FIFO",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3,
+ INST_BANKA_UTCL1_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3,
+ INST_BANKA_UTCL1_MISS_FIFO_DED_COUNT) },
+ { "SQC_INST_BANKA_MISS_FIFO",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKA_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3,
+ INST_BANKA_MISS_FIFO_DED_COUNT) },
+ { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) },
+ { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) },
+ { "SQC_DATA_BANKA_HIT_FIFO",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKA_HIT_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKA_HIT_FIFO_DED_COUNT) },
+ { "SQC_DATA_BANKA_MISS_FIFO",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKA_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3,
+ DATA_BANKA_MISS_FIFO_DED_COUNT) },
+ { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) },
+ { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) },
+ { "SQC_INST_BANKB_UTCL1_MISS_FIFO",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3,
+ INST_BANKB_UTCL1_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3,
+ INST_BANKB_UTCL1_MISS_FIFO_DED_COUNT) },
+ { "SQC_INST_BANKB_MISS_FIFO",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, INST_BANKB_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3,
+ INST_BANKB_MISS_FIFO_DED_COUNT) },
+ { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) },
+ { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) },
+ { "SQC_DATA_BANKB_HIT_FIFO",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKB_HIT_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKB_HIT_FIFO_DED_COUNT) },
+ { "SQC_DATA_BANKB_MISS_FIFO",
+ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_PARITY_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3, DATA_BANKB_MISS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_PARITY_CNT3,
+ DATA_BANKB_MISS_FIFO_DED_COUNT) },
+ { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) },
+
+ /* TA */
+ { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) },
+ { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_DED_COUNT) },
+ { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_DED_COUNT) },
+ { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_DED_COUNT) },
+ { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_DED_COUNT) },
+
+ /* TCA */
+ { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
+ SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_DED_COUNT) },
+ { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
+ SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_DED_COUNT) },
+
+ /* TCC */
+ { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) },
+ { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) },
+ { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) },
+ { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) },
+ { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_DEC_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_DEC_DED_COUNT) },
+ { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_TRANSFER_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, IN_USE_TRANSFER_DED_COUNT) },
+ { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_DATA_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_DATA_DED_COUNT) },
+ { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_CONTROL_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, RETURN_CONTROL_DED_COUNT) },
+ { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, UC_ATOMIC_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, UC_ATOMIC_FIFO_DED_COUNT) },
+ { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_DED_COUNT) },
+ { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_DED_COUNT) },
+ { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) },
+ { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_DED_COUNT) },
+ { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_DED_COUNT) },
+ { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_NEXT_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_NEXT_RAM_DED_COUNT) },
+
+ /* TCI */
+ { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
+ SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_DED_COUNT) },
+
+ /* TCP */
+ { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) },
+ { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) },
+ { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_DED_COUNT) },
+ { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_DED_COUNT) },
+ { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0, 0 },
+ { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) },
+ { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
+ SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) },
+
+ /* TD */
+ { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) },
+ { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) },
+ { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SEC_COUNT),
+ SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_DED_COUNT) },
+
+ /* EA */
+ { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) },
+ { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) },
+ { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) },
+ { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) },
+ { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) },
+ { "EA_GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) },
+ { "EA_GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) },
+ { "EA_GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) },
+ { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0, 0 },
+ { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT) },
+ { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0, 0 },
+ { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT) },
+ { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0, 0 },
+ { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, IORD_CMDMEM_DED_COUNT) },
+ { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0, 0 },
+ { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, IOWR_CMDMEM_DED_COUNT) },
+ { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0, 0 },
+ { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, IOWR_DATAMEM_DED_COUNT) },
+ { "EA_GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0, 0 },
+ { "EA_GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT) },
+ { "EA_GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0, 0 },
+ { "EA_GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT) },
+ { "EA_MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_DED_COUNT) },
+ { "EA_MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_DED_COUNT) },
+ { "EA_MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_DED_COUNT) },
+ { "EA_MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_DED_COUNT) },
+ { "EA_MAM_A0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A0MEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A0MEM_DED_COUNT) },
+ { "EA_MAM_A1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A1MEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A1MEM_DED_COUNT) },
+ { "EA_MAM_A2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A2MEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A2MEM_DED_COUNT) },
+ { "EA_MAM_A3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A3MEM_SEC_COUNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_A3MEM_DED_COUNT) },
+ { "EA_MAM_AFMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
+ SOC15_REG_FIELD(GCEA_EDC_CNT, MAM_AFMEM_SEC_COUNT), 0, 0 },
+ { "EA_MAM_AFMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT3), 0, 0,
+ SOC15_REG_FIELD(GCEA_EDC_CNT3, MAM_AFMEM_DED_COUNT) },
+
+ /* RLC */
+ { "RLCG_INSTR_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_INSTR_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_INSTR_RAM_DED_COUNT) },
+ { "RLCG_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCG_SCRATCH_RAM_DED_COUNT) },
+ { "RLCV_INSTR_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_INSTR_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_INSTR_RAM_DED_COUNT) },
+ { "RLCV_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLCV_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_TCTAG_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_TCTAG_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_TCTAG_RAM_DED_COUNT) },
+ { "RLC_SPM_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SPM_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SPM_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SRM_DATA_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_DATA_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_DATA_RAM_DED_COUNT) },
+ { "RLC_SRM_ADDR_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_ADDR_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT, RLC_SRM_ADDR_RAM_DED_COUNT) },
+ { "RLC_SPM_SE0_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE0_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE0_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE1_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE1_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE1_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE2_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE2_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE2_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE3_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE3_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE3_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE4_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE4_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE4_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE5_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE5_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE5_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE6_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE6_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE6_SCRATCH_RAM_DED_COUNT) },
+ { "RLC_SPM_SE7_SCRATCH_RAM", SOC15_REG_ENTRY(GC, 0, mmRLC_EDC_CNT2),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE7_SCRATCH_RAM_SEC_COUNT),
+ SOC15_REG_FIELD(RLC_EDC_CNT2, RLC_SPM_SE7_SCRATCH_RAM_DED_COUNT) },
+};
+
+static const char * const vml2_mems[] = {
+ "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
+ "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
+ "UTC_VML2_BANK_CACHE_0_4K_MEM0",
+ "UTC_VML2_BANK_CACHE_0_4K_MEM1",
+ "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
+ "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
+ "UTC_VML2_BANK_CACHE_1_4K_MEM0",
+ "UTC_VML2_BANK_CACHE_1_4K_MEM1",
+ "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
+ "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
+ "UTC_VML2_BANK_CACHE_2_4K_MEM0",
+ "UTC_VML2_BANK_CACHE_2_4K_MEM1",
+ "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
+ "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
+ "UTC_VML2_BANK_CACHE_3_4K_MEM0",
+ "UTC_VML2_BANK_CACHE_3_4K_MEM1",
+ "UTC_VML2_IFIFO_GROUP0",
+ "UTC_VML2_IFIFO_GROUP1",
+ "UTC_VML2_IFIFO_GROUP2",
+ "UTC_VML2_IFIFO_GROUP3",
+ "UTC_VML2_IFIFO_GROUP4",
+ "UTC_VML2_IFIFO_GROUP5",
+ "UTC_VML2_IFIFO_GROUP6",
+ "UTC_VML2_IFIFO_GROUP7",
+ "UTC_VML2_IFIFO_GROUP8",
+ "UTC_VML2_IFIFO_GROUP9",
+ "UTC_VML2_IFIFO_GROUP10",
+ "UTC_VML2_IFIFO_GROUP11",
+ "UTC_VML2_IFIFO_GROUP12",
+ "UTC_VML2_IFIFO_GROUP13",
+ "UTC_VML2_IFIFO_GROUP14",
+ "UTC_VML2_IFIFO_GROUP15",
+ "UTC_VML2_IFIFO_GROUP16",
+ "UTC_VML2_IFIFO_GROUP17",
+ "UTC_VML2_IFIFO_GROUP18",
+ "UTC_VML2_IFIFO_GROUP19",
+ "UTC_VML2_IFIFO_GROUP20",
+ "UTC_VML2_IFIFO_GROUP21",
+ "UTC_VML2_IFIFO_GROUP22",
+ "UTC_VML2_IFIFO_GROUP23",
+ "UTC_VML2_IFIFO_GROUP24",
+};
+
+static const char * const vml2_walker_mems[] = {
+ "UTC_VML2_CACHE_PDE0_MEM0",
+ "UTC_VML2_CACHE_PDE0_MEM1",
+ "UTC_VML2_CACHE_PDE1_MEM0",
+ "UTC_VML2_CACHE_PDE1_MEM1",
+ "UTC_VML2_CACHE_PDE2_MEM0",
+ "UTC_VML2_CACHE_PDE2_MEM1",
+ "UTC_VML2_RDIF_ARADDRS",
+ "UTC_VML2_RDIF_LOG_FIFO",
+ "UTC_VML2_QUEUE_REQ",
+ "UTC_VML2_QUEUE_RET",
+};
+
+static const char * const utcl2_router_mems[] = {
+ "UTCL2_ROUTER_GROUP0_VML2_REQ_FIFO0",
+ "UTCL2_ROUTER_GROUP1_VML2_REQ_FIFO1",
+ "UTCL2_ROUTER_GROUP2_VML2_REQ_FIFO2",
+ "UTCL2_ROUTER_GROUP3_VML2_REQ_FIFO3",
+ "UTCL2_ROUTER_GROUP4_VML2_REQ_FIFO4",
+ "UTCL2_ROUTER_GROUP5_VML2_REQ_FIFO5",
+ "UTCL2_ROUTER_GROUP6_VML2_REQ_FIFO6",
+ "UTCL2_ROUTER_GROUP7_VML2_REQ_FIFO7",
+ "UTCL2_ROUTER_GROUP8_VML2_REQ_FIFO8",
+ "UTCL2_ROUTER_GROUP9_VML2_REQ_FIFO9",
+ "UTCL2_ROUTER_GROUP10_VML2_REQ_FIFO10",
+ "UTCL2_ROUTER_GROUP11_VML2_REQ_FIFO11",
+ "UTCL2_ROUTER_GROUP12_VML2_REQ_FIFO12",
+ "UTCL2_ROUTER_GROUP13_VML2_REQ_FIFO13",
+ "UTCL2_ROUTER_GROUP14_VML2_REQ_FIFO14",
+ "UTCL2_ROUTER_GROUP15_VML2_REQ_FIFO15",
+ "UTCL2_ROUTER_GROUP16_VML2_REQ_FIFO16",
+ "UTCL2_ROUTER_GROUP17_VML2_REQ_FIFO17",
+ "UTCL2_ROUTER_GROUP18_VML2_REQ_FIFO18",
+ "UTCL2_ROUTER_GROUP19_VML2_REQ_FIFO19",
+ "UTCL2_ROUTER_GROUP20_VML2_REQ_FIFO20",
+ "UTCL2_ROUTER_GROUP21_VML2_REQ_FIFO21",
+ "UTCL2_ROUTER_GROUP22_VML2_REQ_FIFO22",
+ "UTCL2_ROUTER_GROUP23_VML2_REQ_FIFO23",
+ "UTCL2_ROUTER_GROUP24_VML2_REQ_FIFO24",
+};
+
+static const char * const atc_l2_cache_2m_mems[] = {
+ "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
+ "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
+ "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
+ "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
+};
+
+static const char * const atc_l2_cache_4k_mems[] = {
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
+ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
+ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
+};
+
+static int gfx_v9_4_query_utc_edc_status(struct amdgpu_device *adev,
+ struct ras_err_data *err_data)
+{
+ uint32_t i, data;
+ uint32_t sec_count, ded_count;
+
+ WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_CNTL, 0);
+ WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_CNTL, 0);
+ WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_CNTL, 0);
+
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL, 0);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL, 0);
+
+ for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
+ WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmVML2_MEM_ECC_CNTL);
+
+ sec_count = REG_GET_FIELD(data, VML2_MEM_ECC_CNTL, SEC_COUNT);
+ if (sec_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
+ vml2_mems[i], sec_count);
+ err_data->ce_count += sec_count;
+ }
+
+ ded_count = REG_GET_FIELD(data, VML2_MEM_ECC_CNTL, DED_COUNT);
+ if (ded_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
+ vml2_mems[i], ded_count);
+ err_data->ue_count += ded_count;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
+ WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_CNTL);
+
+ sec_count = REG_GET_FIELD(data, VML2_WALKER_MEM_ECC_CNTL,
+ SEC_COUNT);
+ if (sec_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
+ vml2_walker_mems[i], sec_count);
+ err_data->ce_count += sec_count;
+ }
+
+ ded_count = REG_GET_FIELD(data, VML2_WALKER_MEM_ECC_CNTL,
+ DED_COUNT);
+ if (ded_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
+ vml2_walker_mems[i], ded_count);
+ err_data->ue_count += ded_count;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(utcl2_router_mems); i++) {
+ WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_CNTL);
+
+ sec_count = REG_GET_FIELD(data, UTCL2_MEM_ECC_CNTL, SEC_COUNT);
+ if (sec_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
+ utcl2_router_mems[i], sec_count);
+ err_data->ce_count += sec_count;
+ }
+
+ ded_count = REG_GET_FIELD(data, UTCL2_MEM_ECC_CNTL, DED_COUNT);
+ if (ded_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
+ utcl2_router_mems[i], ded_count);
+ err_data->ue_count += ded_count;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL);
+
+ sec_count = REG_GET_FIELD(data, ATC_L2_CACHE_2M_DSM_CNTL,
+ SEC_COUNT);
+ if (sec_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
+ atc_l2_cache_2m_mems[i], sec_count);
+ err_data->ce_count += sec_count;
+ }
+
+ ded_count = REG_GET_FIELD(data, ATC_L2_CACHE_2M_DSM_CNTL,
+ DED_COUNT);
+ if (ded_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
+ atc_l2_cache_2m_mems[i], ded_count);
+ err_data->ue_count += ded_count;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, i);
+ data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_CNTL);
+
+ sec_count = REG_GET_FIELD(data, ATC_L2_CACHE_4K_DSM_CNTL,
+ SEC_COUNT);
+ if (sec_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
+ atc_l2_cache_4k_mems[i], sec_count);
+ err_data->ce_count += sec_count;
+ }
+
+ ded_count = REG_GET_FIELD(data, ATC_L2_CACHE_4K_DSM_CNTL,
+ DED_COUNT);
+ if (ded_count) {
+ DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
+ atc_l2_cache_4k_mems[i], ded_count);
+ err_data->ue_count += ded_count;
+ }
+ }
+
+ WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255);
+
+ return 0;
+}
+
+static int gfx_v9_4_ras_error_count(const struct soc15_reg_entry *reg,
+ uint32_t se_id, uint32_t inst_id,
+ uint32_t value, uint32_t *sec_count,
+ uint32_t *ded_count)
+{
+ uint32_t i;
+ uint32_t sec_cnt, ded_cnt;
+
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_4_ras_fields); i++) {
+ if (gfx_v9_4_ras_fields[i].reg_offset != reg->reg_offset ||
+ gfx_v9_4_ras_fields[i].seg != reg->seg ||
+ gfx_v9_4_ras_fields[i].inst != reg->inst)
+ continue;
+
+ sec_cnt = (value & gfx_v9_4_ras_fields[i].sec_count_mask) >>
+ gfx_v9_4_ras_fields[i].sec_count_shift;
+ if (sec_cnt) {
+ DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
+ gfx_v9_4_ras_fields[i].name, se_id, inst_id,
+ sec_cnt);
+ *sec_count += sec_cnt;
+ }
+
+ ded_cnt = (value & gfx_v9_4_ras_fields[i].ded_count_mask) >>
+ gfx_v9_4_ras_fields[i].ded_count_shift;
+ if (ded_cnt) {
+ DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n",
+ gfx_v9_4_ras_fields[i].name, se_id, inst_id,
+ ded_cnt);
+ *ded_count += ded_cnt;
+ }
+ }
+
+ return 0;
+}
+
+int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint32_t sec_count = 0, ded_count = 0;
+ uint32_t i, j, k;
+ uint32_t reg_value;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return -EINVAL;
+
+ err_data->ue_count = 0;
+ err_data->ce_count = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_4_edc_counter_regs); i++) {
+ for (j = 0; j < gfx_v9_4_edc_counter_regs[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_4_edc_counter_regs[i].instance;
+ k++) {
+ gfx_v9_4_select_se_sh(adev, j, 0, k);
+ reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(
+ gfx_v9_4_edc_counter_regs[i]));
+ if (reg_value)
+ gfx_v9_4_ras_error_count(
+ &gfx_v9_4_edc_counter_regs[i],
+ j, k, reg_value, &sec_count,
+ &ded_count);
+ }
+ }
+ }
+
+ err_data->ce_count += sec_count;
+ err_data->ue_count += ded_count;
+
+ gfx_v9_4_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ gfx_v9_4_query_utc_edc_status(adev, err_data);
+
+ return 0;
+}
+
+void gfx_v9_4_clear_ras_edc_counter(struct amdgpu_device *adev)
+{
+ int i, j, k;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < ARRAY_SIZE(gfx_v9_4_edc_counter_regs); i++) {
+ for (j = 0; j < gfx_v9_4_edc_counter_regs[i].se_num; j++) {
+ for (k = 0; k < gfx_v9_4_edc_counter_regs[i].instance;
+ k++) {
+ gfx_v9_4_select_se_sh(adev, j, 0x0, k);
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gfx_v9_4_edc_counter_regs[i]));
+ }
+ }
+ }
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_CNTL, 0);
+ WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_CNTL, 0);
+ WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_CNTL, 0);
+
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL, 0);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL, 0);
+
+ for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
+ WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, i);
+ RREG32_SOC15(GC, 0, mmVML2_MEM_ECC_CNTL);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
+ WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, i);
+ RREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_CNTL);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(utcl2_router_mems); i++) {
+ WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, i);
+ RREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_CNTL);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, i);
+ RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_CNTL);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, i);
+ RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_CNTL);
+ }
+
+ WREG32_SOC15(GC, 0, mmVML2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmVML2_WALKER_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmUTCL2_MEM_ECC_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_DSM_INDEX, 255);
+ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255);
+}
+
+int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev, void *inject_if)
+{
+ struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
+ int ret;
+ struct ta_ras_trigger_error_input block_info = { 0 };
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return -EINVAL;
+
+ block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
+ block_info.sub_block_index = info->head.sub_block_index;
+ block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
+ block_info.address = info->address;
+ block_info.value = info->value;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ ret = psp_ras_trigger_error(&adev->psp, &block_info);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h
new file mode 100644
index 000000000000..2e3f6f755ad4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V9_4_H__
+#define __GFX_V9_4_H__
+
+void gfx_v9_4_clear_ras_edc_counter(struct amdgpu_device *adev);
+
+int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status);
+
+int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev,
+ void *inject_if);
+
+#endif /* __GFX_V9_4_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 6ce37ce77d14..1a2f18b908fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -75,40 +75,45 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
- /* Program the system aperture low logical page number. */
- WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
- min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
-
- if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8)
- /*
- * Raven2 has a HW issue that it is unable to use the vram which
- * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
- * workaround that increase system aperture high address (add 1)
- * to get rid of the VM fault and hardware hang.
- */
- WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
- max((adev->gmc.fb_end >> 18) + 0x1,
- adev->gmc.agp_end >> 18));
- else
- WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
- max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
-
- /* Set default page address. */
- value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start
- + adev->vm_manager.vram_base_offset;
- WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
- (u32)(value >> 12));
- WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
- (u32)(value >> 44));
-
- /* Program "protection fault". */
- WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
- (u32)(adev->dummy_page_addr >> 12));
- WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
- (u32)((u64)adev->dummy_page_addr >> 44));
-
- WREG32_FIELD15(GC, 0, VM_L2_PROTECTION_FAULT_CNTL2,
- ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ if (!amdgpu_sriov_vf(adev) || adev->asic_type <= CHIP_VEGA10) {
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+
+ if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8)
+ /*
+ * Raven2 has a HW issue that it is unable to use the
+ * vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR.
+ * So here is the workaround that increase system
+ * aperture high address (add 1) to get rid of the VM
+ * fault and hardware hang.
+ */
+ WREG32_SOC15_RLC(GC, 0,
+ mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max((adev->gmc.fb_end >> 18) + 0x1,
+ adev->gmc.agp_end >> 18));
+ else
+ WREG32_SOC15_RLC(
+ GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+
+ /* Set default page address. */
+ value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
+ adev->vm_manager.vram_base_offset;
+ WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ WREG32_FIELD15(GC, 0, VM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ }
}
static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
@@ -178,6 +183,8 @@ static void gfxhub_v1_0_enable_system_domain(struct amdgpu_device *adev)
tmp = RREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
WREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL, tmp);
}
@@ -262,7 +269,7 @@ static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev)
int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
{
- if (amdgpu_sriov_vf(adev)) {
+ if (amdgpu_sriov_vf(adev) && adev->asic_type != CHIP_ARCTURUS) {
/*
* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
* VF copy registers so vbios post doesn't program them, for
@@ -278,10 +285,12 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
gfxhub_v1_0_init_gart_aperture_regs(adev);
gfxhub_v1_0_init_system_aperture_regs(adev);
gfxhub_v1_0_init_tlb_regs(adev);
- gfxhub_v1_0_init_cache_regs(adev);
+ if (!amdgpu_sriov_vf(adev))
+ gfxhub_v1_0_init_cache_regs(adev);
gfxhub_v1_0_enable_system_domain(adev);
- gfxhub_v1_0_disable_identity_aperture(adev);
+ if (!amdgpu_sriov_vf(adev))
+ gfxhub_v1_0_disable_identity_aperture(adev);
gfxhub_v1_0_setup_vmid_config(adev);
gfxhub_v1_0_program_invalidation(adev);
@@ -365,6 +374,8 @@ void gfxhub_v1_0_init(struct amdgpu_device *adev)
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(GC, 0,
mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
index 5e9ab8eb214a..c0ab71df0d90 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
@@ -33,16 +33,31 @@ int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev)
u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_CNTL);
u32 max_region =
REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION);
+ u32 max_num_physical_nodes = 0;
+ u32 max_physical_node_id = 0;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ max_num_physical_nodes = 4;
+ max_physical_node_id = 3;
+ break;
+ case CHIP_ARCTURUS:
+ max_num_physical_nodes = 8;
+ max_physical_node_id = 7;
+ break;
+ default:
+ return -EINVAL;
+ }
/* PF_MAX_REGION=0 means xgmi is disabled */
if (max_region) {
adev->gmc.xgmi.num_physical_nodes = max_region + 1;
- if (adev->gmc.xgmi.num_physical_nodes > 4)
+ if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes)
return -EINVAL;
adev->gmc.xgmi.physical_node_id =
REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_LFB_REGION);
- if (adev->gmc.xgmi.physical_node_id > 3)
+ if (adev->gmc.xgmi.physical_node_id > max_physical_node_id)
return -EINVAL;
adev->gmc.xgmi.node_segment_size = REG_GET_FIELD(
RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_SIZE),
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
index 8b789f750b72..b70c7b483c24 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
@@ -46,21 +46,25 @@ u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev)
return (u64)RREG32_SOC15(GC, 0, mmGCMC_VM_FB_OFFSET) << 24;
}
-static void gfxhub_v2_0_init_gart_pt_regs(struct amdgpu_device *adev)
+void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
{
- uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo);
+ /* two registers distance between mmGCVM_CONTEXT0_* to mmGCVM_CONTEXT1_* */
+ int offset = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32
+ - mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ offset * vmid, lower_32_bits(page_table_base));
- WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
- lower_32_bits(value));
-
- WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
- upper_32_bits(value));
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ offset * vmid, upper_32_bits(page_table_base));
}
static void gfxhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev)
{
- gfxhub_v2_0_init_gart_pt_regs(adev);
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ gfxhub_v2_0_setup_vm_pt_regs(adev, 0, pt_base);
WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
(u32)(adev->gmc.gart_start >> 12));
@@ -151,6 +155,15 @@ static void gfxhub_v2_0_init_cache_regs(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2, tmp);
tmp = mmGCVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, tmp);
tmp = mmGCVM_L2_CNTL4_DEFAULT;
@@ -166,6 +179,8 @@ static void gfxhub_v2_0_enable_system_domain(struct amdgpu_device *adev)
tmp = RREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL, tmp);
}
@@ -341,6 +356,8 @@ void gfxhub_v2_0_init(struct amdgpu_device *adev)
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(GC, 0,
mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h
index 06807940748b..392b8cd94fc0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h
@@ -31,5 +31,7 @@ void gfxhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev,
bool value);
void gfxhub_v2_0_init(struct amdgpu_device *adev);
u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev);
+void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 241a4e57cf4a..9775eca6fe43 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -30,6 +30,8 @@
#include "hdp/hdp_5_0_0_sh_mask.h"
#include "gc/gc_10_1_0_sh_mask.h"
#include "mmhub/mmhub_2_0_0_sh_mask.h"
+#include "athub/athub_2_0_0_sh_mask.h"
+#include "athub/athub_2_0_0_offset.h"
#include "dcn/dcn_2_0_0_offset.h"
#include "dcn/dcn_2_0_0_sh_mask.h"
#include "oss/osssys_5_0_0_offset.h"
@@ -37,6 +39,7 @@
#include "navi10_enum.h"
#include "soc15.h"
+#include "soc15d.h"
#include "soc15_common.h"
#include "nbio_v2_3.h"
@@ -219,6 +222,34 @@ static uint32_t gmc_v10_0_get_invalidate_req(unsigned int vmid,
return req;
}
+/**
+ * gmc_v10_0_use_invalidate_semaphore - judge whether to use semaphore
+ *
+ * @adev: amdgpu_device pointer
+ * @vmhub: vmhub type
+ *
+ */
+static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
+ uint32_t vmhub)
+{
+ return ((vmhub == AMDGPU_MMHUB_0 ||
+ vmhub == AMDGPU_MMHUB_1) &&
+ (!amdgpu_sriov_vf(adev)));
+}
+
+static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
+ struct amdgpu_device *adev,
+ uint8_t vmid, uint16_t *p_pasid)
+{
+ uint32_t value;
+
+ value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ + vmid);
+ *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+ return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
/*
* GART
* VMID 0 is the physical GPU addresses as used by the kernel.
@@ -229,13 +260,37 @@ static uint32_t gmc_v10_0_get_invalidate_req(unsigned int vmid,
static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
unsigned int vmhub, uint32_t flush_type)
{
+ bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
- u32 tmp = gmc_v10_0_get_invalidate_req(vmid, flush_type);
+ u32 inv_req = gmc_v10_0_get_invalidate_req(vmid, flush_type);
+ u32 tmp;
/* Use register 17 for GART */
const unsigned eng = 17;
unsigned int i;
- WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
+ spin_lock(&adev->gmc.invalidate_lock);
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore) {
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* a read return value of 1 means semaphore acuqire */
+ tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng);
+ if (tmp & 0x1)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
+ }
+
+ WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
/*
* Issue a dummy read to wait for the ACK register to be cleared
@@ -254,6 +309,16 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
udelay(1);
}
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /*
+ * add semaphore release after invalidation,
+ * write with 0 means semaphore release
+ */
+ WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0);
+
+ spin_unlock(&adev->gmc.invalidate_lock);
+
if (i < adev->usec_timeout)
return;
@@ -278,7 +343,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
int r;
/* flush hdp cache */
- adev->nbio_funcs->hdp_flush(adev, NULL);
+ adev->nbio.funcs->hdp_flush(adev, NULL);
mutex_lock(&adev->mman.gtt_window_lock);
@@ -292,7 +357,8 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
if (!adev->mman.buffer_funcs_enabled ||
!adev->ib_pool_ready ||
- adev->in_gpu_reset) {
+ adev->in_gpu_reset ||
+ ring->sched.ready == false) {
gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0);
mutex_unlock(&adev->mman.gtt_window_lock);
return;
@@ -309,6 +375,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
job->vm_needs_flush = true;
+ job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
r = amdgpu_job_submit(job, &adev->mman.entity,
AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
@@ -330,24 +397,102 @@ error_alloc:
DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
}
+/**
+ * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ int vmid, i;
+ signed long r;
+ uint32_t seq;
+ uint16_t queried_pasid;
+ bool ret;
+ struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+ if (amdgpu_emu_mode == 0 && ring->sched.ready) {
+ spin_lock(&adev->gfx.kiq.ring_lock);
+ /* 2 dwords flush + 8 dwords fence */
+ amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
+ kiq->pmf->kiq_invalidate_tlbs(ring,
+ pasid, flush_type, all_hub);
+ amdgpu_fence_emit_polling(ring, &seq);
+ amdgpu_ring_commit(ring);
+ spin_unlock(&adev->gfx.kiq.ring_lock);
+ r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+ if (r < 1) {
+ DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+ return -ETIME;
+ }
+
+ return 0;
+ }
+
+ for (vmid = 1; vmid < 16; vmid++) {
+
+ ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+ &queried_pasid);
+ if (ret && queried_pasid == pasid) {
+ if (all_hub) {
+ for (i = 0; i < adev->num_vmhubs; i++)
+ gmc_v10_0_flush_gpu_tlb(adev, vmid,
+ i, flush_type);
+ } else {
+ gmc_v10_0_flush_gpu_tlb(adev, vmid,
+ AMDGPU_GFXHUB_0, flush_type);
+ }
+ break;
+ }
+ }
+
+ return 0;
+}
+
static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
+ bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
uint32_t req = gmc_v10_0_get_invalidate_req(vmid, 0);
unsigned eng = ring->vm_inv_eng;
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /* a read return value of 1 means semaphore acuqire */
+ amdgpu_ring_emit_reg_wait(ring,
+ hub->vm_inv_eng0_sem + eng, 0x1, 0x1);
+
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
lower_32_bits(pd_addr));
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
upper_32_bits(pd_addr));
- amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
+ amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
+ hub->vm_inv_eng0_ack + eng,
+ req, 1 << vmid);
- /* wait for the invalidate to complete */
- amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
- 1 << vmid, 1 << vmid);
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /*
+ * add semaphore release after invalidation,
+ * write with 0 means semaphore release
+ */
+ amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0);
return pd_addr;
}
@@ -397,43 +542,23 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid
* 1 system
* 0 valid
*/
-static uint64_t gmc_v10_0_get_vm_pte_flags(struct amdgpu_device *adev,
- uint32_t flags)
-{
- uint64_t pte_flag = 0;
-
- if (flags & AMDGPU_VM_PAGE_EXECUTABLE)
- pte_flag |= AMDGPU_PTE_EXECUTABLE;
- if (flags & AMDGPU_VM_PAGE_READABLE)
- pte_flag |= AMDGPU_PTE_READABLE;
- if (flags & AMDGPU_VM_PAGE_WRITEABLE)
- pte_flag |= AMDGPU_PTE_WRITEABLE;
- switch (flags & AMDGPU_VM_MTYPE_MASK) {
+static uint64_t gmc_v10_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
+{
+ switch (flags) {
case AMDGPU_VM_MTYPE_DEFAULT:
- pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
- break;
+ return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
case AMDGPU_VM_MTYPE_NC:
- pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
- break;
+ return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
case AMDGPU_VM_MTYPE_WC:
- pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_WC);
- break;
+ return AMDGPU_PTE_MTYPE_NV10(MTYPE_WC);
case AMDGPU_VM_MTYPE_CC:
- pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_CC);
- break;
+ return AMDGPU_PTE_MTYPE_NV10(MTYPE_CC);
case AMDGPU_VM_MTYPE_UC:
- pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
- break;
+ return AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
default:
- pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
- break;
+ return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
}
-
- if (flags & AMDGPU_VM_PAGE_PRT)
- pte_flag |= AMDGPU_PTE_PRT;
-
- return pte_flag;
}
static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level,
@@ -460,12 +585,33 @@ static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level,
}
}
+static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t *flags)
+{
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
+ *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+
+ *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;
+ *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);
+
+ if (mapping->flags & AMDGPU_PTE_PRT) {
+ *flags |= AMDGPU_PTE_PRT;
+ *flags |= AMDGPU_PTE_SNOOPED;
+ *flags |= AMDGPU_PTE_LOG;
+ *flags |= AMDGPU_PTE_SYSTEM;
+ *flags &= ~AMDGPU_PTE_VALID;
+ }
+}
+
static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
.flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
+ .flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
.emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
.emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
- .get_vm_pte_flags = gmc_v10_0_get_vm_pte_flags,
- .get_vm_pde = gmc_v10_0_get_vm_pde
+ .map_mtype = gmc_v10_0_map_mtype,
+ .get_vm_pde = gmc_v10_0_get_vm_pde,
+ .get_vm_pte = gmc_v10_0_get_vm_pte
};
static void gmc_v10_0_set_gmc_funcs(struct amdgpu_device *adev)
@@ -494,22 +640,13 @@ static int gmc_v10_0_early_init(void *handle)
static int gmc_v10_0_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 4, 4 };
- unsigned i;
-
- for(i = 0; i < adev->num_rings; ++i) {
- struct amdgpu_ring *ring = adev->rings[i];
- unsigned vmhub = ring->funcs->vmhub;
+ int r;
- ring->vm_inv_eng = vm_inv_eng[vmhub]++;
- dev_info(adev->dev, "ring %u(%s) uses VM inv eng %u on hub %u\n",
- ring->idx, ring->name, ring->vm_inv_eng,
- ring->funcs->vmhub);
- }
+ amdgpu_bo_late_init(adev);
- /* Engine 17 is used for GART flushes */
- for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i)
- BUG_ON(vm_inv_eng[i] > 17);
+ r = amdgpu_gmc_allocate_vm_inv_eng(adev);
+ if (r)
+ return r;
return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
}
@@ -519,8 +656,7 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev,
{
u64 base = 0;
- if (!amdgpu_sriov_vf(adev))
- base = gfxhub_v2_0_get_fb_location(adev);
+ base = gfxhub_v2_0_get_fb_location(adev);
amdgpu_gmc_vram_location(adev, &adev->gmc, base);
amdgpu_gmc_gart_location(adev, mc);
@@ -540,24 +676,13 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev,
*/
static int gmc_v10_0_mc_init(struct amdgpu_device *adev)
{
- int chansize, numchan;
-
- if (!amdgpu_emu_mode)
- adev->gmc.vram_width = amdgpu_atomfirmware_get_vram_width(adev);
- else {
- /* hard code vram_width for emulation */
- chansize = 128;
- numchan = 1;
- adev->gmc.vram_width = numchan * chansize;
- }
-
/* Could aper size report 0 ? */
adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
/* size in MB on si */
adev->gmc.mc_vram_size =
- adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+ adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
adev->gmc.visible_vram_size = adev->gmc.aper_size;
@@ -636,7 +761,7 @@ static unsigned gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev)
static int gmc_v10_0_sw_init(void *handle)
{
- int r;
+ int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
gfxhub_v2_0_init(adev);
@@ -644,7 +769,15 @@ static int gmc_v10_0_sw_init(void *handle)
spin_lock_init(&adev->gmc.invalidate_lock);
- adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev);
+ r = amdgpu_atomfirmware_get_vram_info(adev,
+ &vram_width, &vram_type, &vram_vendor);
+ if (!amdgpu_emu_mode)
+ adev->gmc.vram_width = vram_width;
+ else
+ adev->gmc.vram_width = 1 * 128; /* numchan * chansize */
+
+ adev->gmc.vram_type = vram_type;
+ adev->gmc.vram_vendor = vram_vendor;
switch (adev->asic_type) {
case CHIP_NAVI10:
case CHIP_NAVI14:
@@ -665,6 +798,10 @@ static int gmc_v10_0_sw_init(void *handle)
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC,
VMC_1_0__SRCID__VM_FAULT,
&adev->gmc.vm_fault);
+
+ if (r)
+ return r;
+
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2,
UTCL2_1_0__SRCID__FAULT,
&adev->gmc.vm_fault);
@@ -677,15 +814,6 @@ static int gmc_v10_0_sw_init(void *handle)
*/
adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
- /*
- * Reserve 8M stolen memory for navi10 like vega10
- * TODO: will check if it's really needed on asic.
- */
- if (amdgpu_emu_mode == 1)
- adev->gmc.stolen_size = 0;
- else
- adev->gmc.stolen_size = 9 * 1024 *1024;
-
r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
if (r) {
printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
@@ -794,7 +922,7 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
/* Flush HDP after it is initialized */
- adev->nbio_funcs->hdp_flush(adev, NULL);
+ adev->nbio.funcs->hdp_flush(adev, NULL);
value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
false : true;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 9fb1765e92d1..b205039350b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -386,27 +386,20 @@ static uint64_t gmc_v6_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
return pd_addr;
}
-static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev,
- uint32_t flags)
-{
- uint64_t pte_flag = 0;
-
- if (flags & AMDGPU_VM_PAGE_READABLE)
- pte_flag |= AMDGPU_PTE_READABLE;
- if (flags & AMDGPU_VM_PAGE_WRITEABLE)
- pte_flag |= AMDGPU_PTE_WRITEABLE;
- if (flags & AMDGPU_VM_PAGE_PRT)
- pte_flag |= AMDGPU_PTE_PRT;
-
- return pte_flag;
-}
-
static void gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, int level,
uint64_t *addr, uint64_t *flags)
{
BUG_ON(*addr & 0xFFFFFF0000000FFFULL);
}
+static void gmc_v6_0_get_vm_pte(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t *flags)
+{
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
+ *flags &= ~AMDGPU_PTE_PRT;
+}
+
static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev,
bool value)
{
@@ -1153,7 +1146,7 @@ static const struct amdgpu_gmc_funcs gmc_v6_0_gmc_funcs = {
.emit_flush_gpu_tlb = gmc_v6_0_emit_flush_gpu_tlb,
.set_prt = gmc_v6_0_set_prt,
.get_vm_pde = gmc_v6_0_get_vm_pde,
- .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags
+ .get_vm_pte = gmc_v6_0_get_vm_pte,
};
static const struct amdgpu_irq_src_funcs gmc_v6_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 0c3d9bc3a641..9da9596a3638 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -381,7 +381,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
#ifdef CONFIG_X86_64
- if (adev->flags & AMD_IS_APU) {
+ if (adev->flags & AMD_IS_APU &&
+ adev->gmc.real_vram_size > adev->gmc.aper_size) {
adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22;
adev->gmc.aper_size = adev->gmc.real_vram_size;
}
@@ -418,6 +419,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
return 0;
}
+/**
+ * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ int vmid;
+ unsigned int tmp;
+
+ if (adev->in_gpu_reset)
+ return -EIO;
+
+ for (vmid = 1; vmid < 16; vmid++) {
+
+ tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+ if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
+ (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
+ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+ RREG32(mmVM_INVALIDATE_RESPONSE);
+ break;
+ }
+ }
+
+ return 0;
+}
+
/*
* GART
* VMID 0 is the physical GPU addresses as used by the kernel.
@@ -463,27 +496,20 @@ static void gmc_v7_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid);
}
-static uint64_t gmc_v7_0_get_vm_pte_flags(struct amdgpu_device *adev,
- uint32_t flags)
-{
- uint64_t pte_flag = 0;
-
- if (flags & AMDGPU_VM_PAGE_READABLE)
- pte_flag |= AMDGPU_PTE_READABLE;
- if (flags & AMDGPU_VM_PAGE_WRITEABLE)
- pte_flag |= AMDGPU_PTE_WRITEABLE;
- if (flags & AMDGPU_VM_PAGE_PRT)
- pte_flag |= AMDGPU_PTE_PRT;
-
- return pte_flag;
-}
-
static void gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, int level,
uint64_t *addr, uint64_t *flags)
{
BUG_ON(*addr & 0xFFFFFF0000000FFFULL);
}
+static void gmc_v7_0_get_vm_pte(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t *flags)
+{
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
+ *flags &= ~AMDGPU_PTE_PRT;
+}
+
/**
* gmc_v8_0_set_fault_enable_default - update VM fault handling
*
@@ -1340,11 +1366,12 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
.flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb,
+ .flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid,
.emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb,
.emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping,
.set_prt = gmc_v7_0_set_prt,
- .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags,
- .get_vm_pde = gmc_v7_0_get_vm_pde
+ .get_vm_pde = gmc_v7_0_get_vm_pde,
+ .get_vm_pte = gmc_v7_0_get_vm_pte
};
static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index ea764dd9245d..27d83204fa2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
return 0;
}
+/**
+ * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ int vmid;
+ unsigned int tmp;
+
+ if (adev->in_gpu_reset)
+ return -EIO;
+
+ for (vmid = 1; vmid < 16; vmid++) {
+
+ tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+ if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
+ (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
+ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+ RREG32(mmVM_INVALIDATE_RESPONSE);
+ break;
+ }
+ }
+
+ return 0;
+
+}
+
/*
* GART
* VMID 0 is the physical GPU addresses as used by the kernel.
@@ -686,29 +719,21 @@ static void gmc_v8_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
* 0 valid
*/
-static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev,
- uint32_t flags)
-{
- uint64_t pte_flag = 0;
-
- if (flags & AMDGPU_VM_PAGE_EXECUTABLE)
- pte_flag |= AMDGPU_PTE_EXECUTABLE;
- if (flags & AMDGPU_VM_PAGE_READABLE)
- pte_flag |= AMDGPU_PTE_READABLE;
- if (flags & AMDGPU_VM_PAGE_WRITEABLE)
- pte_flag |= AMDGPU_PTE_WRITEABLE;
- if (flags & AMDGPU_VM_PAGE_PRT)
- pte_flag |= AMDGPU_PTE_PRT;
-
- return pte_flag;
-}
-
static void gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, int level,
uint64_t *addr, uint64_t *flags)
{
BUG_ON(*addr & 0xFFFFFF0000000FFFULL);
}
+static void gmc_v8_0_get_vm_pte(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t *flags)
+{
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
+ *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+ *flags &= ~AMDGPU_PTE_PRT;
+}
+
/**
* gmc_v8_0_set_fault_enable_default - update VM fault handling
*
@@ -1708,11 +1733,12 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
.flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb,
+ .flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid,
.emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb,
.emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping,
.set_prt = gmc_v8_0_set_prt,
- .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags,
- .get_vm_pde = gmc_v8_0_get_vm_pde
+ .get_vm_pde = gmc_v8_0_get_vm_pde,
+ .get_vm_pte = gmc_v8_0_get_vm_pte
};
static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index f91337030dc0..90216abf14a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -38,10 +38,12 @@
#include "dce/dce_12_0_sh_mask.h"
#include "vega10_enum.h"
#include "mmhub/mmhub_1_0_offset.h"
+#include "athub/athub_1_0_sh_mask.h"
#include "athub/athub_1_0_offset.h"
#include "oss/osssys_4_0_offset.h"
#include "soc15.h"
+#include "soc15d.h"
#include "soc15_common.h"
#include "umc/umc_6_0_sh_mask.h"
@@ -51,10 +53,12 @@
#include "gfxhub_v1_1.h"
#include "mmhub_v9_4.h"
#include "umc_v6_1.h"
+#include "umc_v6_0.h"
#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
#include "amdgpu_ras.h"
+#include "amdgpu_xgmi.h"
/* add these here since we already include dce12 headers and these are for DCN */
#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d
@@ -205,6 +209,11 @@ static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
{
u32 bits, i, tmp, reg;
+ /* Devices newer then VEGA10/12 shall have these programming
+ sequences performed by PSP BL */
+ if (adev->asic_type >= CHIP_VEGA20)
+ return 0;
+
bits = 0x7f;
switch (state) {
@@ -243,44 +252,6 @@ static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
return 0;
}
-static int gmc_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
- struct ras_err_data *err_data,
- struct amdgpu_iv_entry *entry)
-{
- kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
- if (adev->umc.funcs->query_ras_error_count)
- adev->umc.funcs->query_ras_error_count(adev, err_data);
- /* umc query_ras_error_address is also responsible for clearing
- * error status
- */
- if (adev->umc.funcs->query_ras_error_address)
- adev->umc.funcs->query_ras_error_address(adev, err_data);
-
- /* only uncorrectable error needs gpu reset */
- if (err_data->ue_count)
- amdgpu_ras_reset_gpu(adev, 0);
-
- return AMDGPU_RAS_SUCCESS;
-}
-
-static int gmc_v9_0_process_ecc_irq(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- struct amdgpu_iv_entry *entry)
-{
- struct ras_common_if *ras_if = adev->gmc.umc_ras_if;
- struct ras_dispatch_if ih_data = {
- .entry = entry,
- };
-
- if (!ras_if)
- return 0;
-
- ih_data.head = *ras_if;
-
- amdgpu_ras_interrupt_dispatch(adev, &ih_data);
- return 0;
-}
-
static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
@@ -355,6 +326,10 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
}
/* If it's the first fault for this address, process it normally */
+ if (retry_fault && !in_interrupt() &&
+ amdgpu_vm_handle_fault(adev, entry->pasid, addr))
+ return 1; /* This also prevents sending it to KFD */
+
if (!amdgpu_sriov_vf(adev)) {
/*
* Issue a dummy read to wait for the status register to
@@ -417,7 +392,7 @@ static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = {
static const struct amdgpu_irq_src_funcs gmc_v9_0_ecc_funcs = {
.set = gmc_v9_0_ecc_interrupt_state,
- .process = gmc_v9_0_process_ecc_irq,
+ .process = amdgpu_umc_process_ecc_irq,
};
static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
@@ -425,8 +400,10 @@ static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gmc.vm_fault.num_types = 1;
adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
- adev->gmc.ecc_irq.num_types = 1;
- adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs;
+ if (!amdgpu_sriov_vf(adev)) {
+ adev->gmc.ecc_irq.num_types = 1;
+ adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs;
+ }
}
static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
@@ -448,6 +425,36 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
return req;
}
+/**
+ * gmc_v9_0_use_invalidate_semaphore - judge whether to use semaphore
+ *
+ * @adev: amdgpu_device pointer
+ * @vmhub: vmhub type
+ *
+ */
+static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
+ uint32_t vmhub)
+{
+ return ((vmhub == AMDGPU_MMHUB_0 ||
+ vmhub == AMDGPU_MMHUB_1) &&
+ (!amdgpu_sriov_vf(adev)) &&
+ (!(adev->asic_type == CHIP_RAVEN &&
+ adev->rev_id < 0x8 &&
+ adev->pdev->device == 0x15d8)));
+}
+
+static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
+ uint8_t vmid, uint16_t *p_pasid)
+{
+ uint32_t value;
+
+ value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ + vmid);
+ *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+ return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
/*
* GART
* VMID 0 is the physical GPU addresses as used by the kernel.
@@ -467,14 +474,15 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type)
{
+ bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
const unsigned eng = 17;
- u32 j, tmp;
+ u32 j, inv_req, tmp;
struct amdgpu_vmhub *hub;
BUG_ON(vmhub >= adev->num_vmhubs);
hub = &adev->vmhub[vmhub];
- tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type);
+ inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
/* This is necessary for a HW workaround under SRIOV as well
* as GFXOFF under bare metal
@@ -485,13 +493,35 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
uint32_t req = hub->vm_inv_eng0_req + eng;
uint32_t ack = hub->vm_inv_eng0_ack + eng;
- amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp,
+ amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
1 << vmid);
return;
}
spin_lock(&adev->gmc.invalidate_lock);
- WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
+
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore) {
+ for (j = 0; j < adev->usec_timeout; j++) {
+ /* a read return value of 1 means semaphore acuqire */
+ tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng);
+ if (tmp & 0x1)
+ break;
+ udelay(1);
+ }
+
+ if (j >= adev->usec_timeout)
+ DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
+ }
+
+ WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
/*
* Issue a dummy read to wait for the ACK register to be cleared
@@ -506,21 +536,107 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
break;
udelay(1);
}
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /*
+ * add semaphore release after invalidation,
+ * write with 0 means semaphore release
+ */
+ WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0);
+
spin_unlock(&adev->gmc.invalidate_lock);
+
if (j < adev->usec_timeout)
return;
DRM_ERROR("Timeout waiting for VM flush ACK!\n");
}
+/**
+ * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */
+static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub)
+{
+ int vmid, i;
+ signed long r;
+ uint32_t seq;
+ uint16_t queried_pasid;
+ bool ret;
+ struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+
+ if (adev->in_gpu_reset)
+ return -EIO;
+
+ if (ring->sched.ready) {
+ spin_lock(&adev->gfx.kiq.ring_lock);
+ /* 2 dwords flush + 8 dwords fence */
+ amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
+ kiq->pmf->kiq_invalidate_tlbs(ring,
+ pasid, flush_type, all_hub);
+ amdgpu_fence_emit_polling(ring, &seq);
+ amdgpu_ring_commit(ring);
+ spin_unlock(&adev->gfx.kiq.ring_lock);
+ r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+ if (r < 1) {
+ DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+ return -ETIME;
+ }
+
+ return 0;
+ }
+
+ for (vmid = 1; vmid < 16; vmid++) {
+
+ ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+ &queried_pasid);
+ if (ret && queried_pasid == pasid) {
+ if (all_hub) {
+ for (i = 0; i < adev->num_vmhubs; i++)
+ gmc_v9_0_flush_gpu_tlb(adev, vmid,
+ i, flush_type);
+ } else {
+ gmc_v9_0_flush_gpu_tlb(adev, vmid,
+ AMDGPU_GFXHUB_0, flush_type);
+ }
+ break;
+ }
+ }
+
+ return 0;
+
+}
+
static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
+ bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
struct amdgpu_device *adev = ring->adev;
struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
unsigned eng = ring->vm_inv_eng;
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /* a read return value of 1 means semaphore acuqire */
+ amdgpu_ring_emit_reg_wait(ring,
+ hub->vm_inv_eng0_sem + eng, 0x1, 0x1);
+
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
lower_32_bits(pd_addr));
@@ -531,6 +647,14 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
hub->vm_inv_eng0_ack + eng,
req, 1 << vmid);
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (use_semaphore)
+ /*
+ * add semaphore release after invalidation,
+ * write with 0 means semaphore release
+ */
+ amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0);
+
return pd_addr;
}
@@ -584,44 +708,25 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
* 0 valid
*/
-static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev,
- uint32_t flags)
+static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
{
- uint64_t pte_flag = 0;
-
- if (flags & AMDGPU_VM_PAGE_EXECUTABLE)
- pte_flag |= AMDGPU_PTE_EXECUTABLE;
- if (flags & AMDGPU_VM_PAGE_READABLE)
- pte_flag |= AMDGPU_PTE_READABLE;
- if (flags & AMDGPU_VM_PAGE_WRITEABLE)
- pte_flag |= AMDGPU_PTE_WRITEABLE;
-
- switch (flags & AMDGPU_VM_MTYPE_MASK) {
+ switch (flags) {
case AMDGPU_VM_MTYPE_DEFAULT:
- pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
- break;
+ return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
case AMDGPU_VM_MTYPE_NC:
- pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
- break;
+ return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
case AMDGPU_VM_MTYPE_WC:
- pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_WC);
- break;
+ return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC);
+ case AMDGPU_VM_MTYPE_RW:
+ return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW);
case AMDGPU_VM_MTYPE_CC:
- pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
- break;
+ return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
case AMDGPU_VM_MTYPE_UC:
- pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_UC);
- break;
+ return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC);
default:
- pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
- break;
+ return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
}
-
- if (flags & AMDGPU_VM_PAGE_PRT)
- pte_flag |= AMDGPU_PTE_PRT;
-
- return pte_flag;
}
static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
@@ -648,12 +753,35 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
}
}
+static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t *flags)
+{
+ *flags &= ~AMDGPU_PTE_EXECUTABLE;
+ *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
+
+ *flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
+ *flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK;
+
+ if (mapping->flags & AMDGPU_PTE_PRT) {
+ *flags |= AMDGPU_PTE_PRT;
+ *flags &= ~AMDGPU_PTE_VALID;
+ }
+
+ if (adev->asic_type == CHIP_ARCTURUS &&
+ !(*flags & AMDGPU_PTE_SYSTEM) &&
+ mapping->bo_va->is_xgmi)
+ *flags |= AMDGPU_PTE_SNOOPED;
+}
+
static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
+ .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
.emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
.emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
- .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
- .get_vm_pde = gmc_v9_0_get_vm_pde
+ .map_mtype = gmc_v9_0_map_mtype,
+ .get_vm_pde = gmc_v9_0_get_vm_pde,
+ .get_vm_pte = gmc_v9_0_get_vm_pte
};
static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
@@ -664,11 +792,22 @@ static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ adev->umc.funcs = &umc_v6_0_funcs;
+ break;
case CHIP_VEGA20:
adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
- adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET;
+ adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
+ adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
+ adev->umc.funcs = &umc_v6_1_funcs;
+ break;
+ case CHIP_ARCTURUS:
+ adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
+ adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
+ adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
+ adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
adev->umc.funcs = &umc_v6_1_funcs;
break;
@@ -681,7 +820,10 @@ static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_VEGA20:
- adev->mmhub_funcs = &mmhub_v1_0_funcs;
+ adev->mmhub.funcs = &mmhub_v1_0_funcs;
+ break;
+ case CHIP_ARCTURUS:
+ adev->mmhub.funcs = &mmhub_v9_4_funcs;
break;
default:
break;
@@ -732,175 +874,15 @@ static bool gmc_v9_0_keep_stolen_memory(struct amdgpu_device *adev)
}
}
-static int gmc_v9_0_allocate_vm_inv_eng(struct amdgpu_device *adev)
-{
- struct amdgpu_ring *ring;
- unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
- {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP,
- GFXHUB_FREE_VM_INV_ENGS_BITMAP};
- unsigned i;
- unsigned vmhub, inv_eng;
-
- for (i = 0; i < adev->num_rings; ++i) {
- ring = adev->rings[i];
- vmhub = ring->funcs->vmhub;
-
- inv_eng = ffs(vm_inv_engs[vmhub]);
- if (!inv_eng) {
- dev_err(adev->dev, "no VM inv eng for ring %s\n",
- ring->name);
- return -EINVAL;
- }
-
- ring->vm_inv_eng = inv_eng - 1;
- vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng);
-
- dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
- ring->name, ring->vm_inv_eng, ring->funcs->vmhub);
- }
-
- return 0;
-}
-
-static int gmc_v9_0_ecc_ras_block_late_init(void *handle,
- struct ras_fs_if *fs_info, struct ras_common_if *ras_block)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct ras_common_if **ras_if = NULL;
- struct ras_ih_if ih_info = {
- .cb = gmc_v9_0_process_ras_data_cb,
- };
- int r;
-
- if (ras_block->block == AMDGPU_RAS_BLOCK__UMC)
- ras_if = &adev->gmc.umc_ras_if;
- else if (ras_block->block == AMDGPU_RAS_BLOCK__MMHUB)
- ras_if = &adev->gmc.mmhub_ras_if;
- else
- BUG();
-
- if (!amdgpu_ras_is_supported(adev, ras_block->block)) {
- amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
- return 0;
- }
-
- /* handle resume path. */
- if (*ras_if) {
- /* resend ras TA enable cmd during resume.
- * prepare to handle failure.
- */
- ih_info.head = **ras_if;
- r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
- if (r) {
- if (r == -EAGAIN) {
- /* request a gpu reset. will run again. */
- amdgpu_ras_request_reset_on_boot(adev,
- ras_block->block);
- return 0;
- }
- /* fail to enable ras, cleanup all. */
- goto irq;
- }
- /* enable successfully. continue. */
- goto resume;
- }
-
- *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
- if (!*ras_if)
- return -ENOMEM;
-
- **ras_if = *ras_block;
-
- r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
- if (r) {
- if (r == -EAGAIN) {
- amdgpu_ras_request_reset_on_boot(adev,
- ras_block->block);
- r = 0;
- }
- goto feature;
- }
-
- ih_info.head = **ras_if;
- fs_info->head = **ras_if;
-
- if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) {
- r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
- if (r)
- goto interrupt;
- }
-
- amdgpu_ras_debugfs_create(adev, fs_info);
-
- r = amdgpu_ras_sysfs_create(adev, fs_info);
- if (r)
- goto sysfs;
-resume:
- if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) {
- r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0);
- if (r)
- goto irq;
- }
-
- return 0;
-irq:
- amdgpu_ras_sysfs_remove(adev, *ras_if);
-sysfs:
- amdgpu_ras_debugfs_remove(adev, *ras_if);
- if (ras_block->block == AMDGPU_RAS_BLOCK__UMC)
- amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
-interrupt:
- amdgpu_ras_feature_enable(adev, *ras_if, 0);
-feature:
- kfree(*ras_if);
- *ras_if = NULL;
- return r;
-}
-
-static int gmc_v9_0_ecc_late_init(void *handle)
-{
- int r;
-
- struct ras_fs_if umc_fs_info = {
- .sysfs_name = "umc_err_count",
- .debugfs_name = "umc_err_inject",
- };
- struct ras_common_if umc_ras_block = {
- .block = AMDGPU_RAS_BLOCK__UMC,
- .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
- .sub_block_index = 0,
- .name = "umc",
- };
- struct ras_fs_if mmhub_fs_info = {
- .sysfs_name = "mmhub_err_count",
- .debugfs_name = "mmhub_err_inject",
- };
- struct ras_common_if mmhub_ras_block = {
- .block = AMDGPU_RAS_BLOCK__MMHUB,
- .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
- .sub_block_index = 0,
- .name = "mmhub",
- };
-
- r = gmc_v9_0_ecc_ras_block_late_init(handle,
- &umc_fs_info, &umc_ras_block);
- if (r)
- return r;
-
- r = gmc_v9_0_ecc_ras_block_late_init(handle,
- &mmhub_fs_info, &mmhub_ras_block);
- return r;
-}
-
static int gmc_v9_0_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool r;
+ int r;
if (!gmc_v9_0_keep_stolen_memory(adev))
amdgpu_bo_late_init(adev);
- r = gmc_v9_0_allocate_vm_inv_eng(adev);
+ r = amdgpu_gmc_allocate_vm_inv_eng(adev);
if (r)
return r;
/* Check if ecc is available */
@@ -908,11 +890,12 @@ static int gmc_v9_0_late_init(void *handle)
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA20:
+ case CHIP_ARCTURUS:
r = amdgpu_atomfirmware_mem_ecc_supported(adev);
if (!r) {
DRM_INFO("ECC is not present.\n");
- if (adev->df_funcs->enable_ecc_force_par_wr_rmw)
- adev->df_funcs->enable_ecc_force_par_wr_rmw(adev, false);
+ if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
+ adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
} else {
DRM_INFO("ECC is active.\n");
}
@@ -929,7 +912,7 @@ static int gmc_v9_0_late_init(void *handle)
}
}
- r = gmc_v9_0_ecc_late_init(handle);
+ r = amdgpu_gmc_ras_late_init(adev);
if (r)
return r;
@@ -970,33 +953,11 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
*/
static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
{
- int chansize, numchan;
int r;
- if (amdgpu_sriov_vf(adev)) {
- /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
- * and DF related registers is not readable, seems hardcord is the
- * only way to set the correct vram_width
- */
- adev->gmc.vram_width = 2048;
- } else if (amdgpu_emu_mode != 1) {
- adev->gmc.vram_width = amdgpu_atomfirmware_get_vram_width(adev);
- }
-
- if (!adev->gmc.vram_width) {
- /* hbm memory channel size */
- if (adev->flags & AMD_IS_APU)
- chansize = 64;
- else
- chansize = 128;
-
- numchan = adev->df_funcs->get_hbm_channel_number(adev);
- adev->gmc.vram_width = numchan * chansize;
- }
-
/* size in MB on si */
adev->gmc.mc_vram_size =
- adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+ adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
if (!(adev->flags & AMD_IS_APU)) {
@@ -1108,7 +1069,7 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
static int gmc_v9_0_sw_init(void *handle)
{
- int r;
+ int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
gfxhub_v1_0_init(adev);
@@ -1119,7 +1080,32 @@ static int gmc_v9_0_sw_init(void *handle)
spin_lock_init(&adev->gmc.invalidate_lock);
- adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev);
+ r = amdgpu_atomfirmware_get_vram_info(adev,
+ &vram_width, &vram_type, &vram_vendor);
+ if (amdgpu_sriov_vf(adev))
+ /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
+ * and DF related registers is not readable, seems hardcord is the
+ * only way to set the correct vram_width
+ */
+ adev->gmc.vram_width = 2048;
+ else if (amdgpu_emu_mode != 1)
+ adev->gmc.vram_width = vram_width;
+
+ if (!adev->gmc.vram_width) {
+ int chansize, numchan;
+
+ /* hbm memory channel size */
+ if (adev->flags & AMD_IS_APU)
+ chansize = 64;
+ else
+ chansize = 128;
+
+ numchan = adev->df.funcs->get_hbm_channel_number(adev);
+ adev->gmc.vram_width = numchan * chansize;
+ }
+
+ adev->gmc.vram_type = vram_type;
+ adev->gmc.vram_vendor = vram_vendor;
switch (adev->asic_type) {
case CHIP_RAVEN:
adev->num_vmhubs = 2;
@@ -1180,11 +1166,13 @@ static int gmc_v9_0_sw_init(void *handle)
if (r)
return r;
- /* interrupt sent to DF. */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0,
- &adev->gmc.ecc_irq);
- if (r)
- return r;
+ if (!amdgpu_sriov_vf(adev)) {
+ /* interrupt sent to DF. */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0,
+ &adev->gmc.ecc_irq);
+ if (r)
+ return r;
+ }
/* Set the internal MC address mask
* This is the max address of the GPU's
@@ -1240,33 +1228,7 @@ static int gmc_v9_0_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
void *stolen_vga_buf;
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) &&
- adev->gmc.umc_ras_if) {
- struct ras_common_if *ras_if = adev->gmc.umc_ras_if;
- struct ras_ih_if ih_info = {
- .head = *ras_if,
- };
-
- /* remove fs first */
- amdgpu_ras_debugfs_remove(adev, ras_if);
- amdgpu_ras_sysfs_remove(adev, ras_if);
- /* remove the IH */
- amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
- amdgpu_ras_feature_enable(adev, ras_if, 0);
- kfree(ras_if);
- }
-
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) &&
- adev->gmc.mmhub_ras_if) {
- struct ras_common_if *ras_if = adev->gmc.mmhub_ras_if;
-
- /* remove fs and disable ras feature */
- amdgpu_ras_debugfs_remove(adev, ras_if);
- amdgpu_ras_sysfs_remove(adev, ras_if);
- amdgpu_ras_feature_enable(adev, ras_if, 0);
- kfree(ras_if);
- }
-
+ amdgpu_gmc_ras_fini(adev);
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
@@ -1316,13 +1278,7 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
*/
static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
{
- int r, i;
- bool value;
- u32 tmp;
-
- amdgpu_device_program_register_sequence(adev,
- golden_settings_vega10_hdp,
- ARRAY_SIZE(golden_settings_vega10_hdp));
+ int r;
if (adev->gart.bo == NULL) {
dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
@@ -1332,15 +1288,6 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
if (r)
return r;
- switch (adev->asic_type) {
- case CHIP_RAVEN:
- /* TODO for renoir */
- mmhub_v1_0_update_power_gating(adev, true);
- break;
- default:
- break;
- }
-
r = gfxhub_v1_0_gart_enable(adev);
if (r)
return r;
@@ -1352,6 +1299,49 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
if (r)
return r;
+ DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ (unsigned)(adev->gmc.gart_size >> 20),
+ (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
+ adev->gart.ready = true;
+ return 0;
+}
+
+static int gmc_v9_0_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool value;
+ int r, i;
+ u32 tmp;
+
+ /* The sequence of these two function calls matters.*/
+ gmc_v9_0_init_golden_registers(adev);
+
+ if (adev->mode_info.num_crtc) {
+ if (adev->asic_type != CHIP_ARCTURUS) {
+ /* Lockout access through VGA aperture*/
+ WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
+
+ /* disable VGA render */
+ WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
+ }
+ }
+
+ amdgpu_device_program_register_sequence(adev,
+ golden_settings_vega10_hdp,
+ ARRAY_SIZE(golden_settings_vega10_hdp));
+
+ switch (adev->asic_type) {
+ case CHIP_RAVEN:
+ /* TODO for renoir */
+ mmhub_v1_0_update_power_gating(adev, true);
+ break;
+ case CHIP_ARCTURUS:
+ WREG32_FIELD15(HDP, 0, HDP_MMHUB_CNTL, HDP_MMHUB_GCC, 1);
+ break;
+ default:
+ break;
+ }
+
WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL);
@@ -1361,44 +1351,25 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
/* After HDP is initialized, flush HDP.*/
- adev->nbio_funcs->hdp_flush(adev, NULL);
+ adev->nbio.funcs->hdp_flush(adev, NULL);
if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
value = false;
else
value = true;
- gfxhub_v1_0_set_fault_enable_default(adev, value);
- if (adev->asic_type == CHIP_ARCTURUS)
- mmhub_v9_4_set_fault_enable_default(adev, value);
- else
- mmhub_v1_0_set_fault_enable_default(adev, value);
-
+ if (!amdgpu_sriov_vf(adev)) {
+ gfxhub_v1_0_set_fault_enable_default(adev, value);
+ if (adev->asic_type == CHIP_ARCTURUS)
+ mmhub_v9_4_set_fault_enable_default(adev, value);
+ else
+ mmhub_v1_0_set_fault_enable_default(adev, value);
+ }
for (i = 0; i < adev->num_vmhubs; ++i)
gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0);
- DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
- (unsigned)(adev->gmc.gart_size >> 20),
- (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
- adev->gart.ready = true;
- return 0;
-}
-
-static int gmc_v9_0_hw_init(void *handle)
-{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- /* The sequence of these two function calls matters.*/
- gmc_v9_0_init_golden_registers(adev);
-
- if (adev->mode_info.num_crtc) {
- /* Lockout access through VGA aperture*/
- WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
-
- /* disable VGA render */
- WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
- }
+ if (adev->umc.funcs && adev->umc.funcs->init_registers)
+ adev->umc.funcs->init_registers(adev);
r = gmc_v9_0_gart_enable(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
index 971c0840358f..e0585e8c6c1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
@@ -24,24 +24,6 @@
#ifndef __GMC_V9_0_H__
#define __GMC_V9_0_H__
- /*
- * The latest engine allocation on gfx9 is:
- * Engine 2, 3: firmware
- * Engine 0, 1, 4~16: amdgpu ring,
- * subject to change when ring number changes
- * Engine 17: Gart flushes
- */
-#define GFXHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
-#define MMHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
-
extern const struct amd_ip_funcs gmc_v9_0_ip_funcs;
extern const struct amdgpu_ip_block_version gmc_v9_0_ip_block;
-
-/* amdgpu_amdkfd*.c */
-void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
- uint64_t value);
-void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
- uint64_t value);
-void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, int hubid,
- uint32_t vmid, uint64_t value);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
new file mode 100644
index 000000000000..0debfd9f428c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
@@ -0,0 +1,586 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "vcn_v1_0.h"
+
+#include "vcn/vcn_1_0_offset.h"
+#include "vcn/vcn_1_0_sh_mask.h"
+
+static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev);
+
+static void jpeg_v1_0_decode_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val)
+{
+ struct amdgpu_device *adev = ring->adev;
+ ring->ring[(*ptr)++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
+ if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+ ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+ ring->ring[(*ptr)++] = 0;
+ ring->ring[(*ptr)++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0);
+ } else {
+ ring->ring[(*ptr)++] = reg_offset;
+ ring->ring[(*ptr)++] = PACKETJ(0, 0, 0, PACKETJ_TYPE0);
+ }
+ ring->ring[(*ptr)++] = val;
+}
+
+static void jpeg_v1_0_decode_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ uint32_t reg, reg_offset, val, mask, i;
+
+ // 1st: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW
+ reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW);
+ reg_offset = (reg << 2);
+ val = lower_32_bits(ring->gpu_addr);
+ jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+ // 2nd: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH
+ reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH);
+ reg_offset = (reg << 2);
+ val = upper_32_bits(ring->gpu_addr);
+ jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+ // 3rd to 5th: issue MEM_READ commands
+ for (i = 0; i <= 2; i++) {
+ ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE2);
+ ring->ring[ptr++] = 0;
+ }
+
+ // 6th: program mmUVD_JRBC_RB_CNTL register to enable NO_FETCH and RPTR write ability
+ reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL);
+ reg_offset = (reg << 2);
+ val = 0x13;
+ jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+ // 7th: program mmUVD_JRBC_RB_REF_DATA
+ reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA);
+ reg_offset = (reg << 2);
+ val = 0x1;
+ jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+ // 8th: issue conditional register read mmUVD_JRBC_RB_CNTL
+ reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL);
+ reg_offset = (reg << 2);
+ val = 0x1;
+ mask = 0x1;
+
+ ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0);
+ ring->ring[ptr++] = 0x01400200;
+ ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0);
+ ring->ring[ptr++] = val;
+ ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
+ if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+ ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+ ring->ring[ptr++] = 0;
+ ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3);
+ } else {
+ ring->ring[ptr++] = reg_offset;
+ ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE3);
+ }
+ ring->ring[ptr++] = mask;
+
+ //9th to 21st: insert no-op
+ for (i = 0; i <= 12; i++) {
+ ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
+ ring->ring[ptr++] = 0;
+ }
+
+ //22nd: reset mmUVD_JRBC_RB_RPTR
+ reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_RPTR);
+ reg_offset = (reg << 2);
+ val = 0;
+ jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+ //23rd: program mmUVD_JRBC_RB_CNTL to disable no_fetch
+ reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL);
+ reg_offset = (reg << 2);
+ val = 0x12;
+ jpeg_v1_0_decode_ring_patch_wreg(ring, &ptr, reg_offset, val);
+}
+
+/**
+ * jpeg_v1_0_decode_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v1_0_decode_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR);
+}
+
+/**
+ * jpeg_v1_0_decode_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v1_0_decode_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v1_0_decode_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v1_0_decode_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+}
+
+/**
+ * jpeg_v1_0_decode_ring_insert_start - insert a start command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a start command to the ring.
+ */
+static void jpeg_v1_0_decode_ring_insert_start(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x68e04);
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x80010000);
+}
+
+/**
+ * jpeg_v1_0_decode_ring_insert_end - insert a end command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a end command to the ring.
+ */
+static void jpeg_v1_0_decode_ring_insert_end(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x68e04);
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x00010000);
+}
+
+/**
+ * jpeg_v1_0_decode_ring_emit_fence - emit an fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @fence: fence to emit
+ *
+ * Write a fence and a trap command to the ring.
+ */
+static void jpeg_v1_0_decode_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_DATA0), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_DATA1), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_CMD), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x8);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_CMD), 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(0, 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE2));
+ amdgpu_ring_write(ring, 0xffffffff);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x3fbc);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(0, 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x1);
+
+ /* emit trap */
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
+ amdgpu_ring_write(ring, 0);
+}
+
+/**
+ * jpeg_v1_0_decode_ring_emit_ib - execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @ib: indirect buffer to execute
+ *
+ * Write ring commands to execute the indirect buffer.
+ */
+static void jpeg_v1_0_decode_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_IB_SIZE), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, ib->length_dw);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x2);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_STATUS), 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3));
+ amdgpu_ring_write(ring, 0x2);
+}
+
+static void jpeg_v1_0_decode_ring_emit_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val,
+ uint32_t mask)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reg_offset = (reg << 2);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, val);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+ if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+ ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3));
+ } else {
+ amdgpu_ring_write(ring, reg_offset);
+ amdgpu_ring_write(ring,
+ PACKETJ(0, 0, 0, PACKETJ_TYPE3));
+ }
+ amdgpu_ring_write(ring, mask);
+}
+
+static void jpeg_v1_0_decode_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ uint32_t data0, data1, mask;
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for register write */
+ data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
+ data1 = lower_32_bits(pd_addr);
+ mask = 0xffffffff;
+ jpeg_v1_0_decode_ring_emit_reg_wait(ring, data0, data1, mask);
+}
+
+static void jpeg_v1_0_decode_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reg_offset = (reg << 2);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+ if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+ ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0));
+ } else {
+ amdgpu_ring_write(ring, reg_offset);
+ amdgpu_ring_write(ring,
+ PACKETJ(0, 0, 0, PACKETJ_TYPE0));
+ }
+ amdgpu_ring_write(ring, val);
+}
+
+static void jpeg_v1_0_decode_ring_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
+static int jpeg_v1_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v1_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: JPEG decode TRAP\n");
+
+ switch (entry->src_id) {
+ case 126:
+ amdgpu_fence_process(&adev->jpeg.inst->ring_dec);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v1_0_early_init - set function pointers
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ */
+int jpeg_v1_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->jpeg.num_jpeg_inst = 1;
+
+ jpeg_v1_0_set_dec_ring_funcs(adev);
+ jpeg_v1_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v1_0_sw_init - sw init for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ */
+int jpeg_v1_0_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int r;
+
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->jpeg.inst->irq);
+ if (r)
+ return r;
+
+ ring = &adev->jpeg.inst->ring_dec;
+ sprintf(ring->name, "jpeg_dec");
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch = adev->jpeg.inst->external.jpeg_pitch =
+ SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
+
+ return 0;
+}
+
+/**
+ * jpeg_v1_0_sw_fini - sw fini for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * JPEG free up sw allocation
+ */
+void jpeg_v1_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_ring_fini(&adev->jpeg.inst[0].ring_dec);
+}
+
+/**
+ * jpeg_v1_0_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+void jpeg_v1_0_start(struct amdgpu_device *adev, int mode)
+{
+ struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+
+ if (mode == 0) {
+ WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
+ UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
+ WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
+ }
+
+ /* initialize wptr */
+ ring->wptr = RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
+
+ /* copy patch commands to the jpeg ring */
+ jpeg_v1_0_decode_ring_set_patch_ring(ring,
+ (ring->wptr + ring->max_dw * amdgpu_sched_hw_submission));
+}
+
+static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .nop = PACKET0(0x81ff, 0),
+ .support_64bit_ptrs = false,
+ .no_user_fence = true,
+ .vmhub = AMDGPU_MMHUB_0,
+ .extra_dw = 64,
+ .get_rptr = jpeg_v1_0_decode_ring_get_rptr,
+ .get_wptr = jpeg_v1_0_decode_ring_get_wptr,
+ .set_wptr = jpeg_v1_0_decode_ring_set_wptr,
+ .emit_frame_size =
+ 6 + 6 + /* hdp invalidate / flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v1_0_decode_ring_emit_vm_flush */
+ 26 + 26 + /* jpeg_v1_0_decode_ring_emit_fence x2 vm fence */
+ 6,
+ .emit_ib_size = 22, /* jpeg_v1_0_decode_ring_emit_ib */
+ .emit_ib = jpeg_v1_0_decode_ring_emit_ib,
+ .emit_fence = jpeg_v1_0_decode_ring_emit_fence,
+ .emit_vm_flush = jpeg_v1_0_decode_ring_emit_vm_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v1_0_decode_ring_nop,
+ .insert_start = jpeg_v1_0_decode_ring_insert_start,
+ .insert_end = jpeg_v1_0_decode_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = vcn_v1_0_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = jpeg_v1_0_decode_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v1_0_decode_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->ring_dec.funcs = &jpeg_v1_0_decode_ring_vm_funcs;
+ DRM_INFO("JPEG decode is enabled in VM mode\n");
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v1_0_irq_funcs = {
+ .set = jpeg_v1_0_set_interrupt_state,
+ .process = jpeg_v1_0_process_interrupt,
+};
+
+static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->irq.funcs = &jpeg_v1_0_irq_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h
new file mode 100644
index 000000000000..bbf33a6a3972
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V1_0_H__
+#define __JPEG_V1_0_H__
+
+int jpeg_v1_0_early_init(void *handle);
+int jpeg_v1_0_sw_init(void *handle);
+void jpeg_v1_0_sw_fini(void *handle);
+void jpeg_v1_0_start(struct amdgpu_device *adev, int mode);
+
+#endif /*__JPEG_V1_0_H__*/
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
new file mode 100644
index 000000000000..ff2e6e1ccde7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
@@ -0,0 +1,827 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+
+#include "vcn/vcn_2_0_0_offset.h"
+#include "vcn/vcn_2_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
+
+#define mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET 0x1bfff
+#define mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET 0x4029
+#define mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET 0x402a
+#define mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET 0x402b
+#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ea
+#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40eb
+#define mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET 0x40cf
+#define mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET 0x40d1
+#define mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40e8
+#define mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40e9
+#define mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET 0x4082
+#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ec
+#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40ed
+#define mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET 0x4085
+#define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084
+#define mmUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089
+#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
+
+#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000
+
+static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v2_0_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v2_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state);
+
+/**
+ * jpeg_v2_0_early_init - set function pointers
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v2_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->jpeg.num_jpeg_inst = 1;
+
+ jpeg_v2_0_set_dec_ring_funcs(adev);
+ jpeg_v2_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_0_sw_init - sw init for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v2_0_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int r;
+
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_2_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ ring = &adev->jpeg.inst->ring_dec;
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
+ sprintf(ring->name, "jpeg_dec");
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst->external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_0_sw_fini - sw fini for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v2_0_sw_fini(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v2_0_hw_init - start and test JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ */
+static int jpeg_v2_0_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+ int r;
+
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (!r)
+ DRM_INFO("JPEG decode initialized successfully.\n");
+
+ return r;
+}
+
+/**
+ * jpeg_v2_0_hw_fini - stop the hardware block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v2_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS))
+ jpeg_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+
+ ring->sched.ready = false;
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_0_suspend - suspend JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v2_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = jpeg_v2_0_hw_fini(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v2_0_resume - resume JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v2_0_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ r = jpeg_v2_0_hw_init(adev);
+
+ return r;
+}
+
+static int jpeg_v2_0_disable_power_gating(struct amdgpu_device *adev)
+{
+ uint32_t data;
+ int r = 0;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ data = 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
+ WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_PGFSM_CONFIG), data);
+
+ SOC15_WAIT_ON_RREG(JPEG, 0,
+ mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS_UVDJ_PWR_ON,
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r);
+
+ if (r) {
+ DRM_ERROR("amdgpu: JPEG disable power gating failed\n");
+ return r;
+ }
+ }
+
+ /* Removing the anti hang mechanism to indicate the UVDJ tile is ON */
+ data = RREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS)) & ~0x1;
+ WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS), data);
+
+ return 0;
+}
+
+static int jpeg_v2_0_enable_power_gating(struct amdgpu_device* adev)
+{
+ if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
+ uint32_t data;
+ int r = 0;
+
+ data = RREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS));
+ data &= ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK;
+ data |= 0x1; //UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_TILES_OFF;
+ WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS), data);
+
+ data = 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
+ WREG32(SOC15_REG_OFFSET(JPEG, 0, mmUVD_PGFSM_CONFIG), data);
+
+ SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_PGFSM_STATUS,
+ (2 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT),
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r);
+
+ if (r) {
+ DRM_ERROR("amdgpu: JPEG enable power gating failed\n");
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static void jpeg_v2_0_disable_clock_gating(struct amdgpu_device* adev)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG)
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE);
+ data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
+ | JPEG_CGC_GATE__JPEG2_DEC_MASK
+ | JPEG_CGC_GATE__JPEG_ENC_MASK
+ | JPEG_CGC_GATE__JMCIF_MASK
+ | JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE, data);
+}
+
+static void jpeg_v2_0_enable_clock_gating(struct amdgpu_device* adev)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG)
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data |= 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE);
+ data |= (JPEG_CGC_GATE__JPEG_DEC_MASK
+ |JPEG_CGC_GATE__JPEG2_DEC_MASK
+ |JPEG_CGC_GATE__JPEG_ENC_MASK
+ |JPEG_CGC_GATE__JMCIF_MASK
+ |JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE, data);
+}
+
+/**
+ * jpeg_v2_0_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v2_0_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
+ int r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, true);
+
+ /* disable power gating */
+ r = jpeg_v2_0_disable_power_gating(adev);
+ if (r)
+ return r;
+
+ /* JPEG disable CGC */
+ jpeg_v2_0_disable_clock_gating(adev);
+
+ WREG32_SOC15(JPEG, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC_MASK,
+ ~JPEG_SYS_INT_EN__DJRBC_MASK);
+
+ WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_0_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v2_0_stop(struct amdgpu_device *adev)
+{
+ int r;
+
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable JPEG CGC */
+ jpeg_v2_0_enable_clock_gating(adev);
+
+ /* enable power gating */
+ r = jpeg_v2_0_enable_power_gating(adev);
+ if (r)
+ return r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_jpeg(adev, false);
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_0_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v2_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR);
+}
+
+/**
+ * jpeg_v2_0_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v2_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+ else
+ return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v2_0_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v2_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+/**
+ * jpeg_v2_0_dec_ring_insert_start - insert a start command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a start command to the ring.
+ */
+void jpeg_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x68e04);
+
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x80010000);
+}
+
+/**
+ * jpeg_v2_0_dec_ring_insert_end - insert a end command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a end command to the ring.
+ */
+void jpeg_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x68e04);
+
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x00010000);
+}
+
+/**
+ * jpeg_v2_0_dec_ring_emit_fence - emit an fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @fence: fence to emit
+ *
+ * Write a fence and a trap command to the ring.
+ */
+void jpeg_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x8);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
+ 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x3fbc);
+
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x1);
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
+ amdgpu_ring_write(ring, 0);
+}
+
+/**
+ * jpeg_v2_0_dec_ring_emit_ib - execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @ib: indirect buffer to execute
+ *
+ * Write ring commands to execute the indirect buffer.
+ */
+void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, ib->length_dw);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x2);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_STATUS_INTERNAL_OFFSET,
+ 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3));
+ amdgpu_ring_write(ring, 0x2);
+}
+
+void jpeg_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ uint32_t reg_offset = (reg << 2);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, val);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3));
+ } else {
+ amdgpu_ring_write(ring, reg_offset);
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE3));
+ }
+ amdgpu_ring_write(ring, mask);
+}
+
+void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ uint32_t data0, data1, mask;
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for register write */
+ data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
+ data1 = lower_32_bits(pd_addr);
+ mask = 0xffffffff;
+ jpeg_v2_0_dec_ring_emit_reg_wait(ring, data0, data1, mask);
+}
+
+void jpeg_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
+{
+ uint32_t reg_offset = (reg << 2);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0));
+ } else {
+ amdgpu_ring_write(ring, reg_offset);
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ }
+ amdgpu_ring_write(ring, val);
+}
+
+void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
+static bool jpeg_v2_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return ((RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS) &
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+}
+
+static int jpeg_v2_0_wait_for_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret = 0;
+
+ SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_JRBC_STATUS, UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK, ret);
+
+ return ret;
+}
+
+static int jpeg_v2_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_CG_STATE_GATE);
+
+ if (enable) {
+ if (jpeg_v2_0_is_idle(handle))
+ return -EBUSY;
+ jpeg_v2_0_enable_clock_gating(adev);
+ } else {
+ jpeg_v2_0_disable_clock_gating(adev);
+ }
+
+ return 0;
+}
+
+static int jpeg_v2_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ if (state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v2_0_stop(adev);
+ else
+ ret = jpeg_v2_0_start(adev);
+
+ if (!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v2_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v2_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: JPEG TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_2_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst->ring_dec);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = {
+ .name = "jpeg_v2_0",
+ .early_init = jpeg_v2_0_early_init,
+ .late_init = NULL,
+ .sw_init = jpeg_v2_0_sw_init,
+ .sw_fini = jpeg_v2_0_sw_fini,
+ .hw_init = jpeg_v2_0_hw_init,
+ .hw_fini = jpeg_v2_0_hw_fini,
+ .suspend = jpeg_v2_0_suspend,
+ .resume = jpeg_v2_0_resume,
+ .is_idle = jpeg_v2_0_is_idle,
+ .wait_for_idle = jpeg_v2_0_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = jpeg_v2_0_set_clockgating_state,
+ .set_powergating_state = jpeg_v2_0_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .vmhub = AMDGPU_MMHUB_0,
+ .get_rptr = jpeg_v2_0_dec_ring_get_rptr,
+ .get_wptr = jpeg_v2_0_dec_ring_get_wptr,
+ .set_wptr = jpeg_v2_0_dec_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v2_0_dec_ring_emit_vm_flush */
+ 18 + 18 + /* jpeg_v2_0_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v2_0_dec_ring_emit_ib */
+ .emit_ib = jpeg_v2_0_dec_ring_emit_ib,
+ .emit_fence = jpeg_v2_0_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v2_0_dec_ring_nop,
+ .insert_start = jpeg_v2_0_dec_ring_insert_start,
+ .insert_end = jpeg_v2_0_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->ring_dec.funcs = &jpeg_v2_0_dec_ring_vm_funcs;
+ DRM_INFO("JPEG decode is enabled in VM mode\n");
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v2_0_irq_funcs = {
+ .set = jpeg_v2_0_set_interrupt_state,
+ .process = jpeg_v2_0_process_interrupt,
+};
+
+static void jpeg_v2_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.inst->irq.num_types = 1;
+ adev->jpeg.inst->irq.funcs = &jpeg_v2_0_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version jpeg_v2_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 2,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &jpeg_v2_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h
new file mode 100644
index 000000000000..15a344ed340f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V2_0_H__
+#define __JPEG_V2_0_H__
+
+void jpeg_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring);
+void jpeg_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring);
+void jpeg_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags);
+void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
+ struct amdgpu_ib *ib, uint32_t flags);
+void jpeg_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask);
+void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr);
+void jpeg_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
+void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count);
+
+extern const struct amdgpu_ip_block_version jpeg_v2_0_ip_block;
+
+#endif /* __JPEG_V2_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
new file mode 100644
index 000000000000..c6d046df4b70
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
@@ -0,0 +1,641 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v2_0.h"
+
+#include "vcn/vcn_2_5_offset.h"
+#include "vcn/vcn_2_5_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
+
+#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
+
+#define JPEG25_MAX_HW_INSTANCES_ARCTURUS 2
+
+static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v2_5_set_powergating_state(void *handle,
+ enum amd_powergating_state state);
+
+static int amdgpu_ih_clientid_jpeg[] = {
+ SOC15_IH_CLIENTID_VCN,
+ SOC15_IH_CLIENTID_VCN1
+};
+
+/**
+ * jpeg_v2_5_early_init - set function pointers
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v2_5_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (adev->asic_type == CHIP_ARCTURUS) {
+ u32 harvest;
+ int i;
+
+ adev->jpeg.num_jpeg_inst = JPEG25_MAX_HW_INSTANCES_ARCTURUS;
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ harvest = RREG32_SOC15(JPEG, i, mmCC_UVD_HARVESTING);
+ if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
+ adev->jpeg.harvest_config |= 1 << i;
+ }
+
+ if (adev->jpeg.harvest_config == (AMDGPU_JPEG_HARVEST_JPEG0 |
+ AMDGPU_JPEG_HARVEST_JPEG1))
+ return -ENOENT;
+ } else
+ adev->jpeg.num_jpeg_inst = 1;
+
+ jpeg_v2_5_set_dec_ring_funcs(adev);
+ jpeg_v2_5_set_irq_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_5_sw_init - sw init for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v2_5_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ int i, r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i],
+ VCN_2_0__SRCID__JPEG_DECODE, &adev->jpeg.inst[i].irq);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ring = &adev->jpeg.inst[i].ring_dec;
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8 * i;
+ sprintf(ring->name, "jpeg_dec_%d", i);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst[i].irq, 0);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->jpeg.inst[i].external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_PITCH);
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_5_sw_fini - sw fini for JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v2_5_sw_fini(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v2_5_hw_init - start and test JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ */
+static int jpeg_v2_5_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int i, r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ring = &adev->jpeg.inst[i].ring_dec;
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i, i);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ DRM_INFO("JPEG decode initialized successfully.\n");
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_5_hw_fini - stop the hardware block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v2_5_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ring = &adev->jpeg.inst[i].ring_dec;
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS))
+ jpeg_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
+
+ ring->sched.ready = false;
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_5_suspend - suspend JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v2_5_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = jpeg_v2_5_hw_fini(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v2_5_resume - resume JPEG block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v2_5_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ r = jpeg_v2_5_hw_init(adev);
+
+ return r;
+}
+
+static void jpeg_v2_5_disable_clock_gating(struct amdgpu_device* adev, int inst)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(JPEG, inst, mmJPEG_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG)
+ data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+
+ data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(JPEG, inst, mmJPEG_CGC_CTRL, data);
+
+ data = RREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE);
+ data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
+ | JPEG_CGC_GATE__JPEG2_DEC_MASK
+ | JPEG_CGC_GATE__JPEG_ENC_MASK
+ | JPEG_CGC_GATE__JMCIF_MASK
+ | JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE, data);
+
+ data = RREG32_SOC15(JPEG, inst, mmJPEG_CGC_CTRL);
+ data &= ~(JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK
+ | JPEG_CGC_CTRL__JPEG2_DEC_MODE_MASK
+ | JPEG_CGC_CTRL__JMCIF_MODE_MASK
+ | JPEG_CGC_CTRL__JRBBM_MODE_MASK);
+ WREG32_SOC15(JPEG, inst, mmJPEG_CGC_CTRL, data);
+}
+
+static void jpeg_v2_5_enable_clock_gating(struct amdgpu_device* adev, int inst)
+{
+ uint32_t data;
+
+ data = RREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE);
+ data |= (JPEG_CGC_GATE__JPEG_DEC_MASK
+ |JPEG_CGC_GATE__JPEG2_DEC_MASK
+ |JPEG_CGC_GATE__JPEG_ENC_MASK
+ |JPEG_CGC_GATE__JMCIF_MASK
+ |JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE, data);
+}
+
+/**
+ * jpeg_v2_5_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v2_5_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ring = &adev->jpeg.inst[i].ring_dec;
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* JPEG disable CGC */
+ jpeg_v2_5_disable_clock_gating(adev, i);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC_MASK,
+ ~JPEG_SYS_INT_EN__DJRBC_MASK);
+
+ WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR);
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_5_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v2_5_stop(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ jpeg_v2_5_enable_clock_gating(adev, i);
+
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_5_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v2_5_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(JPEG, ring->me, mmUVD_JRBC_RB_RPTR);
+}
+
+/**
+ * jpeg_v2_5_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+ else
+ return RREG32_SOC15(JPEG, ring->me, mmUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * jpeg_v2_5_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(JPEG, ring->me, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+static bool jpeg_v2_5_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, ret = 1;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ ret &= (((RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS) &
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK));
+ }
+
+ return ret;
+}
+
+static int jpeg_v2_5_wait_for_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, ret = 0;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ SOC15_WAIT_ON_RREG(JPEG, i, mmUVD_JRBC_STATUS,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK, ret);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+static int jpeg_v2_5_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_CG_STATE_GATE);
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ if (enable) {
+ if (jpeg_v2_5_is_idle(handle))
+ return -EBUSY;
+ jpeg_v2_5_enable_clock_gating(adev, i);
+ } else {
+ jpeg_v2_5_disable_clock_gating(adev, i);
+ }
+ }
+
+ return 0;
+}
+
+static int jpeg_v2_5_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret;
+
+ if(state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v2_5_stop(adev);
+ else
+ ret = jpeg_v2_5_start(adev);
+
+ if(!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v2_5_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t ip_instance;
+
+ switch (entry->client_id) {
+ case SOC15_IH_CLIENTID_VCN:
+ ip_instance = 0;
+ break;
+ case SOC15_IH_CLIENTID_VCN1:
+ ip_instance = 1;
+ break;
+ default:
+ DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
+ return 0;
+ }
+
+ DRM_DEBUG("IH: JPEG TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_2_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[ip_instance].ring_dec);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = {
+ .name = "jpeg_v2_5",
+ .early_init = jpeg_v2_5_early_init,
+ .late_init = NULL,
+ .sw_init = jpeg_v2_5_sw_init,
+ .sw_fini = jpeg_v2_5_sw_fini,
+ .hw_init = jpeg_v2_5_hw_init,
+ .hw_fini = jpeg_v2_5_hw_fini,
+ .suspend = jpeg_v2_5_suspend,
+ .resume = jpeg_v2_5_resume,
+ .is_idle = jpeg_v2_5_is_idle,
+ .wait_for_idle = jpeg_v2_5_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = jpeg_v2_5_set_clockgating_state,
+ .set_powergating_state = jpeg_v2_5_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .vmhub = AMDGPU_MMHUB_1,
+ .get_rptr = jpeg_v2_5_dec_ring_get_rptr,
+ .get_wptr = jpeg_v2_5_dec_ring_get_wptr,
+ .set_wptr = jpeg_v2_5_dec_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v2_5_dec_ring_emit_vm_flush */
+ 18 + 18 + /* jpeg_v2_5_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v2_5_dec_ring_emit_ib */
+ .emit_ib = jpeg_v2_0_dec_ring_emit_ib,
+ .emit_fence = jpeg_v2_0_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v2_0_dec_ring_nop,
+ .insert_start = jpeg_v2_0_dec_ring_insert_start,
+ .insert_end = jpeg_v2_0_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ adev->jpeg.inst[i].ring_dec.funcs = &jpeg_v2_5_dec_ring_vm_funcs;
+ adev->jpeg.inst[i].ring_dec.me = i;
+ DRM_INFO("JPEG(%d) JPEG decode is enabled in VM mode\n", i);
+ }
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v2_5_irq_funcs = {
+ .set = jpeg_v2_5_set_interrupt_state,
+ .process = jpeg_v2_5_process_interrupt,
+};
+
+static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ adev->jpeg.inst[i].irq.num_types = 1;
+ adev->jpeg.inst[i].irq.funcs = &jpeg_v2_5_irq_funcs;
+ }
+}
+
+const struct amdgpu_ip_block_version jpeg_v2_5_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 2,
+ .minor = 5,
+ .rev = 0,
+ .funcs = &jpeg_v2_5_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h
new file mode 100644
index 000000000000..2b4087c02620
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V2_5_H__
+#define __JPEG_V2_5_H__
+
+extern const struct amdgpu_ip_block_version jpeg_v2_5_ip_block;
+
+#endif /* __JPEG_V2_5_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 04cd4b6f95d4..49a3a56ec017 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -27,17 +27,13 @@
#include "mmhub/mmhub_1_0_offset.h"
#include "mmhub/mmhub_1_0_sh_mask.h"
#include "mmhub/mmhub_1_0_default.h"
-#include "mmhub/mmhub_9_4_0_offset.h"
#include "vega10_enum.h"
-
+#include "soc15.h"
#include "soc15_common.h"
#define mmDAGB0_CNTL_MISC2_RV 0x008f
#define mmDAGB0_CNTL_MISC2_RV_BASE_IDX 0
-#define EA_EDC_CNT_MASK 0x3
-#define EA_EDC_CNT_SHIFT 0x2
-
u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev)
{
u64 base = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE);
@@ -206,6 +202,8 @@ static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev)
tmp = RREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL, tmp);
}
@@ -418,6 +416,8 @@ void mmhub_v1_0_init(struct amdgpu_device *adev)
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(MMHUB, 0,
mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
@@ -523,9 +523,9 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
case CHIP_RAVEN:
case CHIP_RENOIR:
mmhub_v1_0_update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
mmhub_v1_0_update_medium_grain_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
break;
default:
break;
@@ -560,61 +560,194 @@ void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
*flags |= AMD_CG_SUPPORT_MC_LS;
}
+static const struct soc15_ras_field_entry mmhub_v1_0_ras_fields[] = {
+ { "MMEA0_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA0_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA0_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT_VG20, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2_VG20, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA1_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA1_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT_VG20, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2_VG20, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ }
+};
+
+static const struct soc15_reg_entry mmhub_v1_0_edc_cnt_regs[] = {
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT_VG20), 0, 0, 0},
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20), 0, 0, 0},
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT_VG20), 0, 0, 0},
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20), 0, 0, 0},
+};
+
+static int mmhub_v1_0_get_ras_error_count(const struct soc15_reg_entry *reg,
+ uint32_t value, uint32_t *sec_count, uint32_t *ded_count)
+{
+ uint32_t i;
+ uint32_t sec_cnt, ded_cnt;
+
+ for (i = 0; i < ARRAY_SIZE(mmhub_v1_0_ras_fields); i++) {
+ if(mmhub_v1_0_ras_fields[i].reg_offset != reg->reg_offset)
+ continue;
+
+ sec_cnt = (value &
+ mmhub_v1_0_ras_fields[i].sec_count_mask) >>
+ mmhub_v1_0_ras_fields[i].sec_count_shift;
+ if (sec_cnt) {
+ DRM_INFO("MMHUB SubBlock %s, SEC %d\n",
+ mmhub_v1_0_ras_fields[i].name,
+ sec_cnt);
+ *sec_count += sec_cnt;
+ }
+
+ ded_cnt = (value &
+ mmhub_v1_0_ras_fields[i].ded_count_mask) >>
+ mmhub_v1_0_ras_fields[i].ded_count_shift;
+ if (ded_cnt) {
+ DRM_INFO("MMHUB SubBlock %s, DED %d\n",
+ mmhub_v1_0_ras_fields[i].name,
+ ded_cnt);
+ *ded_count += ded_cnt;
+ }
+ }
+
+ return 0;
+}
+
static void mmhub_v1_0_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status)
{
- int i;
- uint32_t ea0_edc_cnt, ea0_edc_cnt2;
- uint32_t ea1_edc_cnt, ea1_edc_cnt2;
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
-
- /* EDC CNT will be cleared automatically after read */
- ea0_edc_cnt = RREG32_SOC15(MMHUB, 0, mmMMEA0_EDC_CNT_VG20);
- ea0_edc_cnt2 = RREG32_SOC15(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20);
- ea1_edc_cnt = RREG32_SOC15(MMHUB, 0, mmMMEA1_EDC_CNT_VG20);
- ea1_edc_cnt2 = RREG32_SOC15(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20);
-
- /* error count of each error type is recorded by 2 bits,
- * ce and ue count in EDC_CNT
- */
- for (i = 0; i < 5; i++) {
- err_data->ce_count += (ea0_edc_cnt & EA_EDC_CNT_MASK);
- err_data->ce_count += (ea1_edc_cnt & EA_EDC_CNT_MASK);
- ea0_edc_cnt >>= EA_EDC_CNT_SHIFT;
- ea1_edc_cnt >>= EA_EDC_CNT_SHIFT;
- err_data->ue_count += (ea0_edc_cnt & EA_EDC_CNT_MASK);
- err_data->ue_count += (ea1_edc_cnt & EA_EDC_CNT_MASK);
- ea0_edc_cnt >>= EA_EDC_CNT_SHIFT;
- ea1_edc_cnt >>= EA_EDC_CNT_SHIFT;
- }
- /* successive ue count in EDC_CNT */
- for (i = 0; i < 5; i++) {
- err_data->ue_count += (ea0_edc_cnt & EA_EDC_CNT_MASK);
- err_data->ue_count += (ea1_edc_cnt & EA_EDC_CNT_MASK);
- ea0_edc_cnt >>= EA_EDC_CNT_SHIFT;
- ea1_edc_cnt >>= EA_EDC_CNT_SHIFT;
+ uint32_t sec_count = 0, ded_count = 0;
+ uint32_t i;
+ uint32_t reg_value;
+
+ err_data->ue_count = 0;
+ err_data->ce_count = 0;
+
+ for (i = 0; i < ARRAY_SIZE(mmhub_v1_0_edc_cnt_regs); i++) {
+ reg_value =
+ RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_0_edc_cnt_regs[i]));
+ if (reg_value)
+ mmhub_v1_0_get_ras_error_count(&mmhub_v1_0_edc_cnt_regs[i],
+ reg_value, &sec_count, &ded_count);
}
- /* ce and ue count in EDC_CNT2 */
- for (i = 0; i < 3; i++) {
- err_data->ce_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK);
- err_data->ce_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK);
- ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT;
- ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT;
- err_data->ue_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK);
- err_data->ue_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK);
- ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT;
- ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT;
- }
- /* successive ue count in EDC_CNT2 */
- for (i = 0; i < 6; i++) {
- err_data->ue_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK);
- err_data->ue_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK);
- ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT;
- ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT;
- }
+ err_data->ce_count += sec_count;
+ err_data->ue_count += ded_count;
}
const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs = {
+ .ras_late_init = amdgpu_mmhub_ras_late_init,
.query_ras_error_count = mmhub_v1_0_query_ras_error_count,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
index 3542c203c3c8..bde189680521 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
@@ -31,20 +31,25 @@
#include "soc15_common.h"
-static void mmhub_v2_0_init_gart_pt_regs(struct amdgpu_device *adev)
+void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
{
- uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo);
+ /* two registers distance between mmMMVM_CONTEXT0_* to mmMMVM_CONTEXT1_* */
+ int offset = mmMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32
+ - mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
- WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
- lower_32_bits(value));
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ offset * vmid, lower_32_bits(page_table_base));
- WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
- upper_32_bits(value));
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ offset * vmid, upper_32_bits(page_table_base));
}
static void mmhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev)
{
- mmhub_v2_0_init_gart_pt_regs(adev);
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v2_0_setup_vm_pt_regs(adev, 0, pt_base);
WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
(u32)(adev->gmc.gart_start >> 12));
@@ -137,6 +142,15 @@ static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev)
WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL2, tmp);
tmp = mmMMVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL3, tmp);
tmp = mmMMVM_L2_CNTL4_DEFAULT;
@@ -152,6 +166,8 @@ static void mmhub_v2_0_enable_system_domain(struct amdgpu_device *adev)
tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, tmp);
}
@@ -332,6 +348,8 @@ void mmhub_v2_0_init(struct amdgpu_device *adev)
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(MMHUB, 0,
mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
@@ -409,9 +427,9 @@ int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
case CHIP_NAVI14:
case CHIP_NAVI12:
mmhub_v2_0_update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
mmhub_v2_0_update_medium_grain_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
break;
default:
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h
index db16f3ece218..3ea4344f0315 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h
@@ -31,5 +31,7 @@ void mmhub_v2_0_init(struct amdgpu_device *adev);
int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state);
void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags);
+void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
index 0cf7ef44b4b5..a5281df8d84f 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
@@ -21,6 +21,7 @@
*
*/
#include "amdgpu.h"
+#include "amdgpu_ras.h"
#include "mmhub_v9_4.h"
#include "mmhub/mmhub_9_4_1_offset.h"
@@ -29,7 +30,7 @@
#include "athub/athub_1_0_offset.h"
#include "athub/athub_1_0_sh_mask.h"
#include "vega10_enum.h"
-
+#include "soc15.h"
#include "soc15_common.h"
#define MMHUB_NUM_INSTANCES 2
@@ -53,7 +54,7 @@ u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev)
return base;
}
-void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, int hubid,
+static void mmhub_v9_4_setup_hubid_vm_pt_regs(struct amdgpu_device *adev, int hubid,
uint32_t vmid, uint64_t value)
{
/* two registers distance between mmVML2VC0_VM_CONTEXT0_* to
@@ -79,7 +80,7 @@ static void mmhub_v9_4_init_gart_aperture_regs(struct amdgpu_device *adev,
{
uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
- mmhub_v9_4_setup_vm_pt_regs(adev, hubid, 0, pt_base);
+ mmhub_v9_4_setup_hubid_vm_pt_regs(adev, hubid, 0, pt_base);
WREG32_SOC15_OFFSET(MMHUB, 0,
mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
@@ -100,6 +101,16 @@ static void mmhub_v9_4_init_gart_aperture_regs(struct amdgpu_device *adev,
(u32)(adev->gmc.gart_end >> 44));
}
+void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ int i;
+
+ for (i = 0; i < MMHUB_NUM_INSTANCES; i++)
+ mmhub_v9_4_setup_hubid_vm_pt_regs(adev, i, vmid,
+ page_table_base);
+}
+
static void mmhub_v9_4_init_system_aperture_regs(struct amdgpu_device *adev,
int hubid)
{
@@ -117,45 +128,53 @@ static void mmhub_v9_4_init_system_aperture_regs(struct amdgpu_device *adev,
hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
adev->gmc.agp_start >> 24);
- /* Program the system aperture low logical page number. */
- WREG32_SOC15_OFFSET(MMHUB, 0,
- mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_LOW_ADDR,
- hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
- min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
- WREG32_SOC15_OFFSET(MMHUB, 0,
- mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
- hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
- max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+ if (!amdgpu_sriov_vf(adev)) {
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15_OFFSET(
+ MMHUB, 0, mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
+ WREG32_SOC15_OFFSET(
+ MMHUB, 0, mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
- /* Set default page address. */
- value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
- adev->vm_manager.vram_base_offset;
- WREG32_SOC15_OFFSET(MMHUB, 0,
+ /* Set default page address. */
+ value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
+ adev->vm_manager.vram_base_offset;
+ WREG32_SOC15_OFFSET(
+ MMHUB, 0,
mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
(u32)(value >> 12));
- WREG32_SOC15_OFFSET(MMHUB, 0,
+ WREG32_SOC15_OFFSET(
+ MMHUB, 0,
mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
(u32)(value >> 44));
- /* Program "protection fault". */
- WREG32_SOC15_OFFSET(MMHUB, 0,
- mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
- hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
- (u32)(adev->dummy_page_addr >> 12));
- WREG32_SOC15_OFFSET(MMHUB, 0,
- mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
- hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
- (u32)((u64)adev->dummy_page_addr >> 44));
+ /* Program "protection fault". */
+ WREG32_SOC15_OFFSET(
+ MMHUB, 0,
+ mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15_OFFSET(
+ MMHUB, 0,
+ mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ (u32)((u64)adev->dummy_page_addr >> 44));
- tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
- mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2,
- hubid * MMHUB_INSTANCE_REGISTER_OFFSET);
- tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL2,
- ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
- WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2,
- hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
+ tmp = RREG32_SOC15_OFFSET(
+ MMHUB, 0, mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2,
+ hubid * MMHUB_INSTANCE_REGISTER_OFFSET,
+ tmp);
+ }
}
static void mmhub_v9_4_init_tlb_regs(struct amdgpu_device *adev, int hubid)
@@ -219,6 +238,15 @@ static void mmhub_v9_4_init_cache_regs(struct amdgpu_device *adev, int hubid)
hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
tmp = mmVML2PF0_VM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL3,
hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
@@ -240,6 +268,8 @@ static void mmhub_v9_4_enable_system_domain(struct amdgpu_device *adev,
hubid * MMHUB_INSTANCE_REGISTER_OFFSET);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL,
hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
}
@@ -302,7 +332,8 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
adev->vm_manager.block_size - 9);
/* Send no-retry XNACK on fault to suppress VM fault storm. */
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
- RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL,
hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i,
tmp);
@@ -345,30 +376,16 @@ int mmhub_v9_4_gart_enable(struct amdgpu_device *adev)
int i;
for (i = 0; i < MMHUB_NUM_INSTANCES; i++) {
- if (amdgpu_sriov_vf(adev)) {
- /*
- * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase
- * they are VF copy registers so vbios post doesn't
- * program them, for SRIOV driver need to program them
- */
- WREG32_SOC15_OFFSET(MMHUB, 0,
- mmVMSHAREDVC0_MC_VM_FB_LOCATION_BASE,
- i * MMHUB_INSTANCE_REGISTER_OFFSET,
- adev->gmc.vram_start >> 24);
- WREG32_SOC15_OFFSET(MMHUB, 0,
- mmVMSHAREDVC0_MC_VM_FB_LOCATION_TOP,
- i * MMHUB_INSTANCE_REGISTER_OFFSET,
- adev->gmc.vram_end >> 24);
- }
-
/* GART Enable. */
mmhub_v9_4_init_gart_aperture_regs(adev, i);
mmhub_v9_4_init_system_aperture_regs(adev, i);
mmhub_v9_4_init_tlb_regs(adev, i);
- mmhub_v9_4_init_cache_regs(adev, i);
+ if (!amdgpu_sriov_vf(adev))
+ mmhub_v9_4_init_cache_regs(adev, i);
mmhub_v9_4_enable_system_domain(adev, i);
- mmhub_v9_4_disable_identity_aperture(adev, i);
+ if (!amdgpu_sriov_vf(adev))
+ mmhub_v9_4_disable_identity_aperture(adev, i);
mmhub_v9_4_setup_vmid_config(adev, i);
mmhub_v9_4_program_invalidation(adev, i);
}
@@ -493,6 +510,10 @@ void mmhub_v9_4_init(struct amdgpu_device *adev)
SOC15_REG_OFFSET(MMHUB, 0,
mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) +
i * MMHUB_INSTANCE_REGISTER_OFFSET;
+ hub[i]->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_INVALIDATE_ENG0_SEM) +
+ i * MMHUB_INSTANCE_REGISTER_OFFSET;
hub[i]->vm_inv_eng0_req =
SOC15_REG_OFFSET(MMHUB, 0,
mmVML2VC0_VM_INVALIDATE_ENG0_REQ) +
@@ -604,9 +625,9 @@ int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev,
switch (adev->asic_type) {
case CHIP_ARCTURUS:
mmhub_v9_4_update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
mmhub_v9_4_update_medium_grain_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
break;
default:
break;
@@ -640,3 +661,942 @@ void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags)
if (data & ATCL2_0_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
*flags |= AMD_CG_SUPPORT_MC_LS;
}
+
+static const struct soc15_ras_field_entry mmhub_v9_4_ras_fields[] = {
+ /* MMHUB Range 0 */
+ { "MMEA0_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA0_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA0_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA0_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA0_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA0_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA0_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA0_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA0_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA0_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA0_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA0_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA0_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA0_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+
+ /* MMHUB Range 1 */
+ { "MMEA1_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA1_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA1_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA1_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA1_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA1_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA1_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA1_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA1_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA1_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA1_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA1_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA1_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA1_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+
+ /* MMHAB Range 2*/
+ { "MMEA2_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA2_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA2_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA2_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA2_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA2_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA2_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA2_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA2_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA2_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA2_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA2_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA2_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA2_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA2_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA2_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA2_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+
+ /* MMHUB Rang 3 */
+ { "MMEA3_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA3_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA3_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA3_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA3_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA3_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA3_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA3_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA3_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA3_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA3_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA3_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA3_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA3_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA3_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA3_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA3_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+
+ /* MMHUB Range 4 */
+ { "MMEA4_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA4_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA4_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA4_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA4_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA4_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA4_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA4_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA4_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA4_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA4_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA4_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA4_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA4_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA4_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA4_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA4_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+
+ /* MMHUAB Range 5 */
+ { "MMEA5_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA5_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA5_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA5_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA5_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA5_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA5_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA5_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA5_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA5_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA5_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA5_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA5_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA5_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA5_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA5_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA5_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+
+ /* MMHUB Range 6 */
+ { "MMEA6_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA6_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA6_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA6_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA6_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA6_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA6_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA6_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA6_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA6_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA6_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA6_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA6_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA6_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA6_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA6_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA6_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA6_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA6_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA6_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA6_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA6_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA6_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA6_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA6_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA6_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA6_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA6_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA6_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA6_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA6_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA6_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA6_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA6_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ },
+
+ /* MMHUB Range 7*/
+ { "MMEA7_DRAMRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA7_DRAMWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA7_DRAMWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA7_RRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, RRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA7_WRET_TAGMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, WRET_TAGMEM_DED_COUNT),
+ },
+ { "MMEA7_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA7_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA7_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, IORD_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA7_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA7_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA7_GMIRD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA7_GMIWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA7_GMIWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA7_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA7_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
+ 0, 0,
+ },
+ { "MMEA7_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA7_EDC_CNT3, DRAMRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA7_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA7_EDC_CNT3, DRAMWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA7_IORD_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA7_EDC_CNT3, IORD_CMDMEM_DED_COUNT),
+ },
+ { "MMEA7_IOWR_CMDMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA7_EDC_CNT3, IOWR_CMDMEM_DED_COUNT),
+ },
+ { "MMEA7_IOWR_DATAMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA7_EDC_CNT3, IOWR_DATAMEM_DED_COUNT),
+ },
+ { "MMEA7_GMIRD_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA7_EDC_CNT3, GMIRD_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA7_GMIWR_PAGEMEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3),
+ 0, 0,
+ SOC15_REG_FIELD(MMEA7_EDC_CNT3, GMIWR_PAGEMEM_DED_COUNT),
+ },
+ { "MMEA7_MAM_D0MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D0MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D0MEM_DED_COUNT),
+ },
+ { "MMEA7_MAM_D1MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D1MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D1MEM_DED_COUNT),
+ },
+ { "MMEA7_MAM_D2MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D2MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D2MEM_DED_COUNT),
+ },
+ { "MMEA7_MAM_D3MEM", SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D3MEM_SED_COUNT),
+ SOC15_REG_FIELD(MMEA7_EDC_CNT2, MAM_D3MEM_DED_COUNT),
+ }
+};
+
+static const struct soc15_reg_entry mmhub_v9_4_edc_cnt_regs[] = {
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_EDC_CNT3), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT2), 0, 0, 0 },
+ { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_EDC_CNT3), 0, 0, 0 },
+};
+
+static int mmhub_v9_4_get_ras_error_count(const struct soc15_reg_entry *reg,
+ uint32_t value, uint32_t *sec_count, uint32_t *ded_count)
+{
+ uint32_t i;
+ uint32_t sec_cnt, ded_cnt;
+
+ for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_ras_fields); i++) {
+ if(mmhub_v9_4_ras_fields[i].reg_offset != reg->reg_offset)
+ continue;
+
+ sec_cnt = (value &
+ mmhub_v9_4_ras_fields[i].sec_count_mask) >>
+ mmhub_v9_4_ras_fields[i].sec_count_shift;
+ if (sec_cnt) {
+ DRM_INFO("MMHUB SubBlock %s, SEC %d\n",
+ mmhub_v9_4_ras_fields[i].name,
+ sec_cnt);
+ *sec_count += sec_cnt;
+ }
+
+ ded_cnt = (value &
+ mmhub_v9_4_ras_fields[i].ded_count_mask) >>
+ mmhub_v9_4_ras_fields[i].ded_count_shift;
+ if (ded_cnt) {
+ DRM_INFO("MMHUB SubBlock %s, DED %d\n",
+ mmhub_v9_4_ras_fields[i].name,
+ ded_cnt);
+ *ded_count += ded_cnt;
+ }
+ }
+
+ return 0;
+}
+
+static void mmhub_v9_4_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint32_t sec_count = 0, ded_count = 0;
+ uint32_t i;
+ uint32_t reg_value;
+
+ err_data->ue_count = 0;
+ err_data->ce_count = 0;
+
+ for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_edc_cnt_regs); i++) {
+ reg_value =
+ RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v9_4_edc_cnt_regs[i]));
+ if (reg_value)
+ mmhub_v9_4_get_ras_error_count(&mmhub_v9_4_edc_cnt_regs[i],
+ reg_value, &sec_count, &ded_count);
+ }
+
+ err_data->ce_count += sec_count;
+ err_data->ue_count += ded_count;
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs = {
+ .ras_late_init = amdgpu_mmhub_ras_late_init,
+ .query_ras_error_count = mmhub_v9_4_query_ras_error_count,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h
index d435cfcec1a8..1b979773776c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h
@@ -23,6 +23,8 @@
#ifndef __MMHUB_V9_4_H__
#define __MMHUB_V9_4_H__
+extern const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs;
+
u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev);
int mmhub_v9_4_gart_enable(struct amdgpu_device *adev);
void mmhub_v9_4_gart_disable(struct amdgpu_device *adev);
@@ -32,5 +34,7 @@ void mmhub_v9_4_init(struct amdgpu_device *adev);
int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state);
void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags);
+void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+ uint64_t page_table_base);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
index 8af0bddf85e4..20958639b601 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h
@@ -47,6 +47,18 @@ struct mmsch_v1_0_init_header {
uint32_t uvd_table_size;
};
+struct mmsch_vf_eng_init_header {
+ uint32_t init_status;
+ uint32_t table_offset;
+ uint32_t table_size;
+};
+
+struct mmsch_v1_1_init_header {
+ uint32_t version;
+ uint32_t total_size;
+ struct mmsch_vf_eng_init_header eng[2];
+};
+
struct mmsch_v1_0_cmd_direct_reg_header {
uint32_t reg_offset : 28;
uint32_t command_type : 4;
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index cc5bf595f9b1..5fd67e1cc2a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -158,82 +158,6 @@ static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev,
xgpu_ai_mailbox_set_valid(adev, false);
}
-static int xgpu_ai_get_pp_clk(struct amdgpu_device *adev, u32 type, char *buf)
-{
- int r = 0;
- u32 req, val, size;
-
- if (!amdgim_is_hwperf(adev) || buf == NULL)
- return -EBADRQC;
-
- switch(type) {
- case PP_SCLK:
- req = IDH_IRQ_GET_PP_SCLK;
- break;
- case PP_MCLK:
- req = IDH_IRQ_GET_PP_MCLK;
- break;
- default:
- return -EBADRQC;
- }
-
- mutex_lock(&adev->virt.dpm_mutex);
-
- xgpu_ai_mailbox_trans_msg(adev, req, 0, 0, 0);
-
- r = xgpu_ai_poll_msg(adev, IDH_SUCCESS);
- if (!r && adev->fw_vram_usage.va != NULL) {
- val = RREG32_NO_KIQ(
- SOC15_REG_OFFSET(NBIO, 0,
- mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW1));
- size = strnlen((((char *)adev->virt.fw_reserve.p_pf2vf) +
- val), PAGE_SIZE);
-
- if (size < PAGE_SIZE)
- strcpy(buf,((char *)adev->virt.fw_reserve.p_pf2vf + val));
- else
- size = 0;
-
- r = size;
- goto out;
- }
-
- r = xgpu_ai_poll_msg(adev, IDH_FAIL);
- if(r)
- pr_info("%s DPM request failed",
- (type == PP_SCLK)? "SCLK" : "MCLK");
-
-out:
- mutex_unlock(&adev->virt.dpm_mutex);
- return r;
-}
-
-static int xgpu_ai_force_dpm_level(struct amdgpu_device *adev, u32 level)
-{
- int r = 0;
- u32 req = IDH_IRQ_FORCE_DPM_LEVEL;
-
- if (!amdgim_is_hwperf(adev))
- return -EBADRQC;
-
- mutex_lock(&adev->virt.dpm_mutex);
- xgpu_ai_mailbox_trans_msg(adev, req, level, 0, 0);
-
- r = xgpu_ai_poll_msg(adev, IDH_SUCCESS);
- if (!r)
- goto out;
-
- r = xgpu_ai_poll_msg(adev, IDH_FAIL);
- if (!r)
- pr_info("DPM request failed");
- else
- pr_info("Mailbox is broken");
-
-out:
- mutex_unlock(&adev->virt.dpm_mutex);
- return r;
-}
-
static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
enum idh_request req)
{
@@ -326,7 +250,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
*/
locked = mutex_trylock(&adev->lock_reset);
if (locked)
- adev->in_gpu_reset = 1;
+ adev->in_gpu_reset = true;
do {
if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
@@ -338,7 +262,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
flr_done:
if (locked) {
- adev->in_gpu_reset = 0;
+ adev->in_gpu_reset = false;
mutex_unlock(&adev->lock_reset);
}
@@ -455,6 +379,4 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
.reset_gpu = xgpu_ai_request_reset,
.wait_reset = NULL,
.trans_msg = xgpu_ai_mailbox_trans_msg,
- .get_pp_clk = xgpu_ai_get_pp_clk,
- .force_dpm_level = xgpu_ai_force_dpm_level,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
index 077e91a33d62..37dbe0f2142f 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -35,10 +35,6 @@ enum idh_request {
IDH_REL_GPU_FINI_ACCESS,
IDH_REQ_GPU_RESET_ACCESS,
- IDH_IRQ_FORCE_DPM_LEVEL = 10,
- IDH_IRQ_GET_PP_SCLK,
- IDH_IRQ_GET_PP_MCLK,
-
IDH_LOG_VF_ERROR = 200,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
new file mode 100644
index 000000000000..237fa5e16b7c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -0,0 +1,384 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "nbio/nbio_2_3_offset.h"
+#include "nbio/nbio_2_3_sh_mask.h"
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_1_0_sh_mask.h"
+#include "soc15.h"
+#include "navi10_ih.h"
+#include "soc15_common.h"
+#include "mxgpu_nv.h"
+#include "mxgpu_ai.h"
+
+static void xgpu_nv_mailbox_send_ack(struct amdgpu_device *adev)
+{
+ WREG8(NV_MAIBOX_CONTROL_RCV_OFFSET_BYTE, 2);
+}
+
+static void xgpu_nv_mailbox_set_valid(struct amdgpu_device *adev, bool val)
+{
+ WREG8(NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE, val ? 1 : 0);
+}
+
+/*
+ * this peek_msg could *only* be called in IRQ routine becuase in IRQ routine
+ * RCV_MSG_VALID filed of BIF_BX_PF_MAILBOX_CONTROL must already be set to 1
+ * by host.
+ *
+ * if called no in IRQ routine, this peek_msg cannot guaranteed to return the
+ * correct value since it doesn't return the RCV_DW0 under the case that
+ * RCV_MSG_VALID is set by host.
+ */
+static enum idh_event xgpu_nv_mailbox_peek_msg(struct amdgpu_device *adev)
+{
+ return RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW0));
+}
+
+
+static int xgpu_nv_mailbox_rcv_msg(struct amdgpu_device *adev,
+ enum idh_event event)
+{
+ u32 reg;
+
+ reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW0));
+ if (reg != event)
+ return -ENOENT;
+
+ xgpu_nv_mailbox_send_ack(adev);
+
+ return 0;
+}
+
+static uint8_t xgpu_nv_peek_ack(struct amdgpu_device *adev)
+{
+ return RREG8(NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE) & 2;
+}
+
+static int xgpu_nv_poll_ack(struct amdgpu_device *adev)
+{
+ int timeout = NV_MAILBOX_POLL_ACK_TIMEDOUT;
+ u8 reg;
+
+ do {
+ reg = RREG8(NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE);
+ if (reg & 2)
+ return 0;
+
+ mdelay(5);
+ timeout -= 5;
+ } while (timeout > 1);
+
+ pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", NV_MAILBOX_POLL_ACK_TIMEDOUT);
+
+ return -ETIME;
+}
+
+static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event)
+{
+ int r, timeout = NV_MAILBOX_POLL_MSG_TIMEDOUT;
+
+ do {
+ r = xgpu_nv_mailbox_rcv_msg(adev, event);
+ if (!r)
+ return 0;
+
+ msleep(10);
+ timeout -= 10;
+ } while (timeout > 1);
+
+ pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r);
+
+ return -ETIME;
+}
+
+static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev,
+ enum idh_request req, u32 data1, u32 data2, u32 data3)
+{
+ u32 reg;
+ int r;
+ uint8_t trn;
+
+ /* IMPORTANT:
+ * clear TRN_MSG_VALID valid to clear host's RCV_MSG_ACK
+ * and with host's RCV_MSG_ACK cleared hw automatically clear host's RCV_MSG_ACK
+ * which lead to VF's TRN_MSG_ACK cleared, otherwise below xgpu_nv_poll_ack()
+ * will return immediatly
+ */
+ do {
+ xgpu_nv_mailbox_set_valid(adev, false);
+ trn = xgpu_nv_peek_ack(adev);
+ if (trn) {
+ pr_err("trn=%x ACK should not assert! wait again !\n", trn);
+ msleep(1);
+ }
+ } while (trn);
+
+ reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0));
+ reg = REG_SET_FIELD(reg, BIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0,
+ MSGBUF_DATA, req);
+ WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW0),
+ reg);
+ WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW1),
+ data1);
+ WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW2),
+ data2);
+ WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_MSGBUF_TRN_DW3),
+ data3);
+
+ xgpu_nv_mailbox_set_valid(adev, true);
+
+ /* start to poll ack */
+ r = xgpu_nv_poll_ack(adev);
+ if (r)
+ pr_err("Doesn't get ack from pf, continue\n");
+
+ xgpu_nv_mailbox_set_valid(adev, false);
+}
+
+static int xgpu_nv_send_access_requests(struct amdgpu_device *adev,
+ enum idh_request req)
+{
+ int r;
+
+ xgpu_nv_mailbox_trans_msg(adev, req, 0, 0, 0);
+
+ /* start to check msg if request is idh_req_gpu_init_access */
+ if (req == IDH_REQ_GPU_INIT_ACCESS ||
+ req == IDH_REQ_GPU_FINI_ACCESS ||
+ req == IDH_REQ_GPU_RESET_ACCESS) {
+ r = xgpu_nv_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
+ if (r) {
+ pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");
+ return r;
+ }
+ /* Retrieve checksum from mailbox2 */
+ if (req == IDH_REQ_GPU_INIT_ACCESS || req == IDH_REQ_GPU_RESET_ACCESS) {
+ adev->virt.fw_reserve.checksum_key =
+ RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_BX_PF_MAILBOX_MSGBUF_RCV_DW2));
+ }
+ }
+
+ return 0;
+}
+
+static int xgpu_nv_request_reset(struct amdgpu_device *adev)
+{
+ return xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS);
+}
+
+static int xgpu_nv_request_full_gpu_access(struct amdgpu_device *adev,
+ bool init)
+{
+ enum idh_request req;
+
+ req = init ? IDH_REQ_GPU_INIT_ACCESS : IDH_REQ_GPU_FINI_ACCESS;
+ return xgpu_nv_send_access_requests(adev, req);
+}
+
+static int xgpu_nv_release_full_gpu_access(struct amdgpu_device *adev,
+ bool init)
+{
+ enum idh_request req;
+ int r = 0;
+
+ req = init ? IDH_REL_GPU_INIT_ACCESS : IDH_REL_GPU_FINI_ACCESS;
+ r = xgpu_nv_send_access_requests(adev, req);
+
+ return r;
+}
+
+static int xgpu_nv_mailbox_ack_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("get ack intr and do nothing.\n");
+ return 0;
+}
+
+static int xgpu_nv_set_mailbox_ack_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL));
+
+ tmp = REG_SET_FIELD(tmp, BIF_BX_PF_MAILBOX_INT_CNTL, ACK_INT_EN,
+ (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0);
+ WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL), tmp);
+
+ return 0;
+}
+
+static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+ int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT;
+ int locked;
+
+ /* block amdgpu_gpu_recover till msg FLR COMPLETE received,
+ * otherwise the mailbox msg will be ruined/reseted by
+ * the VF FLR.
+ *
+ * we can unlock the lock_reset to allow "amdgpu_job_timedout"
+ * to run gpu_recover() after FLR_NOTIFICATION_CMPL received
+ * which means host side had finished this VF's FLR.
+ */
+ locked = mutex_trylock(&adev->lock_reset);
+ if (locked)
+ adev->in_gpu_reset = true;
+
+ do {
+ if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
+ goto flr_done;
+
+ msleep(10);
+ timeout -= 10;
+ } while (timeout > 1);
+
+flr_done:
+ if (locked) {
+ adev->in_gpu_reset = false;
+ mutex_unlock(&adev->lock_reset);
+ }
+
+ /* Trigger recovery for world switch failure if no TDR */
+ if (amdgpu_device_should_recover_gpu(adev)
+ && (adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT ||
+ adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
+ adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
+ adev->video_timeout == MAX_SCHEDULE_TIMEOUT))
+ amdgpu_device_gpu_recover(adev, NULL);
+}
+
+static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL));
+
+ tmp = REG_SET_FIELD(tmp, BIF_BX_PF_MAILBOX_INT_CNTL, VALID_INT_EN,
+ (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0);
+ WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_INT_CNTL), tmp);
+
+ return 0;
+}
+
+static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ enum idh_event event = xgpu_nv_mailbox_peek_msg(adev);
+
+ switch (event) {
+ case IDH_FLR_NOTIFICATION:
+ if (amdgpu_sriov_runtime(adev))
+ schedule_work(&adev->virt.flr_work);
+ break;
+ /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
+ * it byfar since that polling thread will handle it,
+ * other msg like flr complete is not handled here.
+ */
+ case IDH_CLR_MSG_BUF:
+ case IDH_FLR_NOTIFICATION_CMPL:
+ case IDH_READY_TO_ACCESS_GPU:
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs xgpu_nv_mailbox_ack_irq_funcs = {
+ .set = xgpu_nv_set_mailbox_ack_irq,
+ .process = xgpu_nv_mailbox_ack_irq,
+};
+
+static const struct amdgpu_irq_src_funcs xgpu_nv_mailbox_rcv_irq_funcs = {
+ .set = xgpu_nv_set_mailbox_rcv_irq,
+ .process = xgpu_nv_mailbox_rcv_irq,
+};
+
+void xgpu_nv_mailbox_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->virt.ack_irq.num_types = 1;
+ adev->virt.ack_irq.funcs = &xgpu_nv_mailbox_ack_irq_funcs;
+ adev->virt.rcv_irq.num_types = 1;
+ adev->virt.rcv_irq.funcs = &xgpu_nv_mailbox_rcv_irq_funcs;
+}
+
+int xgpu_nv_mailbox_add_irq_id(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq);
+ if (r) {
+ amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
+ return r;
+ }
+
+ return 0;
+}
+
+int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = amdgpu_irq_get(adev, &adev->virt.rcv_irq, 0);
+ if (r)
+ return r;
+ r = amdgpu_irq_get(adev, &adev->virt.ack_irq, 0);
+ if (r) {
+ amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
+ return r;
+ }
+
+ INIT_WORK(&adev->virt.flr_work, xgpu_nv_mailbox_flr_work);
+
+ return 0;
+}
+
+void xgpu_nv_mailbox_put_irq(struct amdgpu_device *adev)
+{
+ amdgpu_irq_put(adev, &adev->virt.ack_irq, 0);
+ amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
+}
+
+const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
+ .req_full_gpu = xgpu_nv_request_full_gpu_access,
+ .rel_full_gpu = xgpu_nv_release_full_gpu_access,
+ .reset_gpu = xgpu_nv_request_reset,
+ .wait_reset = NULL,
+ .trans_msg = xgpu_nv_mailbox_trans_msg,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
new file mode 100644
index 000000000000..99b15f6865cb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MXGPU_NV_H__
+#define __MXGPU_NV_H__
+
+#define NV_MAILBOX_POLL_ACK_TIMEDOUT 500
+#define NV_MAILBOX_POLL_MSG_TIMEDOUT 12000
+#define NV_MAILBOX_POLL_FLR_TIMEDOUT 500
+
+extern const struct amdgpu_virt_ops xgpu_nv_virt_ops;
+
+void xgpu_nv_mailbox_set_irq_funcs(struct amdgpu_device *adev);
+int xgpu_nv_mailbox_add_irq_id(struct amdgpu_device *adev);
+int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev);
+void xgpu_nv_mailbox_put_irq(struct amdgpu_device *adev);
+
+#define NV_MAIBOX_CONTROL_TRN_OFFSET_BYTE (SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_CONTROL) * 4)
+#define NV_MAIBOX_CONTROL_RCV_OFFSET_BYTE (SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_MAILBOX_CONTROL) * 4 + 1)
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index 9fe08408db58..cf557a428298 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -110,14 +110,13 @@ static uint32_t navi10_ih_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl
static int navi10_ih_irq_init(struct amdgpu_device *adev)
{
struct amdgpu_ih_ring *ih = &adev->irq.ih;
- int ret = 0;
u32 ih_rb_cntl, ih_doorbell_rtpr, ih_chicken;
u32 tmp;
/* disable irqs */
navi10_ih_disable_interrupts(adev);
- adev->nbio_funcs->ih_control(adev);
+ adev->nbio.funcs->ih_control(adev);
/* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE, ih->gpu_addr >> 8);
@@ -162,7 +161,7 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev)
}
WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR, ih_doorbell_rtpr);
- adev->nbio_funcs->ih_doorbell_range(adev, ih->use_doorbell,
+ adev->nbio.funcs->ih_doorbell_range(adev, ih->use_doorbell,
ih->doorbell_index);
tmp = RREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL);
@@ -179,7 +178,7 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev)
/* enable interrupts */
navi10_ih_enable_interrupts(adev);
- return ret;
+ return 0;
}
/**
@@ -427,7 +426,7 @@ static int navi10_ih_set_clockgating_state(void *handle,
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
navi10_ih_update_clockgating_state(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c
index a56c93620e78..88efaecf9f70 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c
@@ -24,7 +24,6 @@
#include "nv.h"
#include "soc15_common.h"
-#include "soc15_hw_ip.h"
#include "navi10_ip_offset.h"
int navi10_reg_base_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c
index cadc7603ca41..a786d159e5e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c
@@ -24,7 +24,6 @@
#include "nv.h"
#include "soc15_common.h"
-#include "soc15_hw_ip.h"
#include "navi12_ip_offset.h"
int navi12_reg_base_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c
index 3b5f0f65e096..4ea1e8fbb601 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c
@@ -24,7 +24,6 @@
#include "nv.h"
#include "soc15_common.h"
-#include "soc15_hw_ip.h"
#include "navi14_ip_offset.h"
int navi14_reg_base_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
index c05d78d4efc6..f3a3fe746222 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -27,11 +27,21 @@
#include "nbio/nbio_2_3_default.h"
#include "nbio/nbio_2_3_offset.h"
#include "nbio/nbio_2_3_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
#define smnPCIE_CONFIG_CNTL 0x11180044
#define smnCPM_CONTROL 0x11180460
#define smnPCIE_CNTL2 0x11180070
+
+static void nbio_v2_3_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
static u32 nbio_v2_3_get_rev_id(struct amdgpu_device *adev)
{
u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
@@ -56,10 +66,9 @@ static void nbio_v2_3_hdp_flush(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
if (!ring || !ring->funcs->emit_wreg)
- WREG32_SOC15_NO_KIQ(NBIO, 0, mmBIF_BX_PF_HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
+ WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
else
- amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
- NBIO, 0, mmBIF_BX_PF_HDP_MEM_COHERENCY_FLUSH_CNTL), 0);
+ amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
}
static u32 nbio_v2_3_get_memsize(struct amdgpu_device *adev)
@@ -311,7 +320,6 @@ static void nbio_v2_3_init_registers(struct amdgpu_device *adev)
}
const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
- .hdp_flush_reg = &nbio_v2_3_hdp_flush_reg,
.get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset,
.get_pcie_index_offset = nbio_v2_3_get_pcie_index_offset,
@@ -331,4 +339,5 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
.ih_control = nbio_v2_3_ih_control,
.init_registers = nbio_v2_3_init_registers,
.detect_hw_virt = nbio_v2_3_detect_hw_virt,
+ .remap_hdp_registers = nbio_v2_3_remap_hdp_registers,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h
index 5ae52085f6b7..a43b60acf7f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h
@@ -26,6 +26,7 @@
#include "soc15_common.h"
+extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg;
extern const struct amdgpu_nbio_funcs nbio_v2_3_funcs;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
index 6590143c3f75..635d9e1fc0a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
@@ -226,7 +226,7 @@ static u32 nbio_v6_1_get_pcie_data_offset(struct amdgpu_device *adev)
return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2);
}
-static const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = {
+const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = {
.ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK,
.ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK,
.ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK,
@@ -277,7 +277,6 @@ static void nbio_v6_1_init_registers(struct amdgpu_device *adev)
}
const struct amdgpu_nbio_funcs nbio_v6_1_funcs = {
- .hdp_flush_reg = &nbio_v6_1_hdp_flush_reg,
.get_hdp_flush_req_offset = nbio_v6_1_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v6_1_get_hdp_flush_done_offset,
.get_pcie_index_offset = nbio_v6_1_get_pcie_index_offset,
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h
index 0743a6f016f3..6dc743b73218 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h
@@ -26,6 +26,7 @@
#include "soc15_common.h"
+extern const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg;
extern const struct amdgpu_nbio_funcs nbio_v6_1_funcs;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index 74eecb768a82..d6cbf26074bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -292,7 +292,6 @@ static void nbio_v7_0_init_registers(struct amdgpu_device *adev)
}
const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
- .hdp_flush_reg = &nbio_v7_0_hdp_flush_reg,
.get_hdp_flush_req_offset = nbio_v7_0_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v7_0_get_hdp_flush_done_offset,
.get_pcie_index_offset = nbio_v7_0_get_pcie_index_offset,
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h
index 508d549c5029..e7aefb252550 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h
@@ -26,6 +26,7 @@
#include "soc15_common.h"
+extern const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg;
extern const struct amdgpu_nbio_funcs nbio_v7_0_funcs;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 910fffced43b..65eb378fa035 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -23,10 +23,12 @@
#include "amdgpu.h"
#include "amdgpu_atombios.h"
#include "nbio_v7_4.h"
+#include "amdgpu_ras.h"
#include "nbio/nbio_7_4_offset.h"
#include "nbio/nbio_7_4_sh_mask.h"
#include "nbio/nbio_7_4_0_smn.h"
+#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
#include <uapi/linux/kfd_ioctl.h>
#define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c
@@ -50,6 +52,9 @@
#define BIF_MMSCH1_DOORBELL_RANGE__OFFSET_MASK 0x00000FFCL
#define BIF_MMSCH1_DOORBELL_RANGE__SIZE_MASK 0x001F0000L
+static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status);
+
static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev)
{
WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
@@ -266,7 +271,7 @@ static u32 nbio_v7_4_get_pcie_data_offset(struct amdgpu_device *adev)
return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2);
}
-static const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = {
+const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = {
.ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK,
.ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK,
.ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK,
@@ -306,17 +311,224 @@ static void nbio_v7_4_detect_hw_virt(struct amdgpu_device *adev)
static void nbio_v7_4_init_registers(struct amdgpu_device *adev)
{
- uint32_t def, data;
- def = data = RREG32_PCIE(smnPCIE_CI_CNTL);
- data = REG_SET_FIELD(data, PCIE_CI_CNTL, CI_SLV_ORDERING_DIS, 1);
+}
- if (def != data)
- WREG32_PCIE(smnPCIE_CI_CNTL, data);
+static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device *adev)
+{
+ uint32_t bif_doorbell_intr_cntl;
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, adev->nbio.ras_if);
+
+ bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL);
+ if (REG_GET_FIELD(bif_doorbell_intr_cntl,
+ BIF_DOORBELL_INT_CNTL, RAS_CNTLR_INTERRUPT_STATUS)) {
+ /* driver has to clear the interrupt status when bif ring is disabled */
+ bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl,
+ BIF_DOORBELL_INT_CNTL,
+ RAS_CNTLR_INTERRUPT_CLEAR, 1);
+ WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl);
+
+ /*
+ * clear error status after ras_controller_intr according to
+ * hw team and count ue number for query
+ */
+ nbio_v7_4_query_ras_error_count(adev, &obj->err_data);
+
+ DRM_WARN("RAS controller interrupt triggered by NBIF error\n");
+
+ /* ras_controller_int is dedicated for nbif ras error,
+ * not the global interrupt for sync flood
+ */
+ amdgpu_ras_reset_gpu(adev);
+ }
+}
+
+static void nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev)
+{
+ uint32_t bif_doorbell_intr_cntl;
+
+ bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL);
+ if (REG_GET_FIELD(bif_doorbell_intr_cntl,
+ BIF_DOORBELL_INT_CNTL, RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) {
+ /* driver has to clear the interrupt status when bif ring is disabled */
+ bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl,
+ BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1);
+ WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl);
+
+ amdgpu_ras_global_ras_isr(adev);
+ }
+}
+
+
+static int nbio_v7_4_set_ras_controller_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ /* The ras_controller_irq enablement should be done in psp bl when it
+ * tries to enable ras feature. Driver only need to set the correct interrupt
+ * vector for bare-metal and sriov use case respectively
+ */
+ uint32_t bif_intr_cntl;
+
+ bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL);
+ if (state == AMDGPU_IRQ_STATE_ENABLE) {
+ /* set interrupt vector select bit to 0 to select
+ * vetcor 1 for bare metal case */
+ bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl,
+ BIF_INTR_CNTL,
+ RAS_INTR_VEC_SEL, 0);
+ WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl);
+ }
+
+ return 0;
+}
+
+static int nbio_v7_4_process_ras_controller_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ /* By design, the ih cookie for ras_controller_irq should be written
+ * to BIFring instead of general iv ring. However, due to known bif ring
+ * hw bug, it has to be disabled. There is no chance the process function
+ * will be involked. Just left it as a dummy one.
+ */
+ return 0;
+}
+
+static int nbio_v7_4_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ /* The ras_controller_irq enablement should be done in psp bl when it
+ * tries to enable ras feature. Driver only need to set the correct interrupt
+ * vector for bare-metal and sriov use case respectively
+ */
+ uint32_t bif_intr_cntl;
+
+ bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL);
+ if (state == AMDGPU_IRQ_STATE_ENABLE) {
+ /* set interrupt vector select bit to 0 to select
+ * vetcor 1 for bare metal case */
+ bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl,
+ BIF_INTR_CNTL,
+ RAS_INTR_VEC_SEL, 0);
+ WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl);
+ }
+
+ return 0;
+}
+
+static int nbio_v7_4_process_err_event_athub_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ /* By design, the ih cookie for err_event_athub_irq should be written
+ * to BIFring instead of general iv ring. However, due to known bif ring
+ * hw bug, it has to be disabled. There is no chance the process function
+ * will be involked. Just left it as a dummy one.
+ */
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs nbio_v7_4_ras_controller_irq_funcs = {
+ .set = nbio_v7_4_set_ras_controller_irq_state,
+ .process = nbio_v7_4_process_ras_controller_irq,
+};
+
+static const struct amdgpu_irq_src_funcs nbio_v7_4_ras_err_event_athub_irq_funcs = {
+ .set = nbio_v7_4_set_ras_err_event_athub_irq_state,
+ .process = nbio_v7_4_process_err_event_athub_irq,
+};
+
+static int nbio_v7_4_init_ras_controller_interrupt (struct amdgpu_device *adev)
+{
+ int r;
+
+ /* init the irq funcs */
+ adev->nbio.ras_controller_irq.funcs =
+ &nbio_v7_4_ras_controller_irq_funcs;
+ adev->nbio.ras_controller_irq.num_types = 1;
+
+ /* register ras controller interrupt */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF,
+ NBIF_7_4__SRCID__RAS_CONTROLLER_INTERRUPT,
+ &adev->nbio.ras_controller_irq);
+
+ return r;
+}
+
+static int nbio_v7_4_init_ras_err_event_athub_interrupt (struct amdgpu_device *adev)
+{
+
+ int r;
+
+ /* init the irq funcs */
+ adev->nbio.ras_err_event_athub_irq.funcs =
+ &nbio_v7_4_ras_err_event_athub_irq_funcs;
+ adev->nbio.ras_err_event_athub_irq.num_types = 1;
+
+ /* register ras err event athub interrupt */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF,
+ NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT,
+ &adev->nbio.ras_err_event_athub_irq);
+
+ return r;
+}
+
+#define smnPARITY_ERROR_STATUS_UNCORR_GRP2 0x13a20030
+
+static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ uint32_t global_sts, central_sts, int_eoi, parity_sts;
+ uint32_t corr, fatal, non_fatal;
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ global_sts = RREG32_PCIE(smnRAS_GLOBAL_STATUS_LO);
+ corr = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, ParityErrCorr);
+ fatal = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, ParityErrFatal);
+ non_fatal = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO,
+ ParityErrNonFatal);
+ parity_sts = RREG32_PCIE(smnPARITY_ERROR_STATUS_UNCORR_GRP2);
+
+ if (corr)
+ err_data->ce_count++;
+ if (fatal)
+ err_data->ue_count++;
+
+ if (corr || fatal || non_fatal) {
+ central_sts = RREG32_PCIE(smnBIFL_RAS_CENTRAL_STATUS);
+ /* clear error status register */
+ WREG32_PCIE(smnRAS_GLOBAL_STATUS_LO, global_sts);
+
+ if (fatal)
+ /* clear parity fatal error indication field */
+ WREG32_PCIE(smnPARITY_ERROR_STATUS_UNCORR_GRP2,
+ parity_sts);
+
+ if (REG_GET_FIELD(central_sts, BIFL_RAS_CENTRAL_STATUS,
+ BIFL_RasContller_Intr_Recv)) {
+ /* clear interrupt status register */
+ WREG32_PCIE(smnBIFL_RAS_CENTRAL_STATUS, central_sts);
+ int_eoi = RREG32_PCIE(smnIOHC_INTERRUPT_EOI);
+ int_eoi = REG_SET_FIELD(int_eoi,
+ IOHC_INTERRUPT_EOI, SMI_EOI, 1);
+ WREG32_PCIE(smnIOHC_INTERRUPT_EOI, int_eoi);
+ }
+ }
+}
+
+static void nbio_v7_4_enable_doorbell_interrupt(struct amdgpu_device *adev,
+ bool enable)
+{
+ WREG32_FIELD15(NBIO, 0, BIF_DOORBELL_INT_CNTL,
+ DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1);
}
const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
- .hdp_flush_reg = &nbio_v7_4_hdp_flush_reg,
.get_hdp_flush_req_offset = nbio_v7_4_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v7_4_get_hdp_flush_done_offset,
.get_pcie_index_offset = nbio_v7_4_get_pcie_index_offset,
@@ -330,6 +542,7 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
.enable_doorbell_aperture = nbio_v7_4_enable_doorbell_aperture,
.enable_doorbell_selfring_aperture = nbio_v7_4_enable_doorbell_selfring_aperture,
.ih_doorbell_range = nbio_v7_4_ih_doorbell_range,
+ .enable_doorbell_interrupt = nbio_v7_4_enable_doorbell_interrupt,
.update_medium_grain_clock_gating = nbio_v7_4_update_medium_grain_clock_gating,
.update_medium_grain_light_sleep = nbio_v7_4_update_medium_grain_light_sleep,
.get_clockgating_state = nbio_v7_4_get_clockgating_state,
@@ -337,4 +550,10 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
.init_registers = nbio_v7_4_init_registers,
.detect_hw_virt = nbio_v7_4_detect_hw_virt,
.remap_hdp_registers = nbio_v7_4_remap_hdp_registers,
+ .handle_ras_controller_intr_no_bifring = nbio_v7_4_handle_ras_controller_intr_no_bifring,
+ .handle_ras_err_event_athub_intr_no_bifring = nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring,
+ .init_ras_controller_interrupt = nbio_v7_4_init_ras_controller_interrupt,
+ .init_ras_err_event_athub_interrupt = nbio_v7_4_init_ras_err_event_athub_interrupt,
+ .query_ras_error_count = nbio_v7_4_query_ras_error_count,
+ .ras_late_init = amdgpu_nbio_ras_late_init,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h
index c442865bac4f..b1ac82872752 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h
@@ -26,6 +26,7 @@
#include "soc15_common.h"
+extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg;
extern const struct amdgpu_nbio_funcs nbio_v7_4_funcs;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index de9b995b65b1..2d1bebdf1603 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -40,19 +40,23 @@
#include "gc/gc_10_1_0_sh_mask.h"
#include "hdp/hdp_5_0_0_offset.h"
#include "hdp/hdp_5_0_0_sh_mask.h"
+#include "smuio/smuio_11_0_0_offset.h"
#include "soc15.h"
#include "soc15_common.h"
#include "gmc_v10_0.h"
#include "gfxhub_v2_0.h"
#include "mmhub_v2_0.h"
+#include "nbio_v2_3.h"
#include "nv.h"
#include "navi10_ih.h"
#include "gfx_v10_0.h"
#include "sdma_v5_0.h"
#include "vcn_v2_0.h"
+#include "jpeg_v2_0.h"
#include "dce_virtual.h"
#include "mes_v10_1.h"
+#include "mxgpu_nv.h"
static const struct amd_ip_funcs nv_common_ip_funcs;
@@ -63,8 +67,8 @@ static u32 nv_pcie_rreg(struct amdgpu_device *adev, u32 reg)
{
unsigned long flags, address, data;
u32 r;
- address = adev->nbio_funcs->get_pcie_index_offset(adev);
- data = adev->nbio_funcs->get_pcie_data_offset(adev);
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
WREG32(address, reg);
@@ -78,8 +82,8 @@ static void nv_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
{
unsigned long flags, address, data;
- address = adev->nbio_funcs->get_pcie_index_offset(adev);
- data = adev->nbio_funcs->get_pcie_data_offset(adev);
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
WREG32(address, reg);
@@ -119,7 +123,7 @@ static void nv_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
static u32 nv_get_config_memsize(struct amdgpu_device *adev)
{
- return adev->nbio_funcs->get_memsize(adev);
+ return adev->nbio.funcs->get_memsize(adev);
}
static u32 nv_get_xclk(struct amdgpu_device *adev)
@@ -154,8 +158,27 @@ static bool nv_read_disabled_bios(struct amdgpu_device *adev)
static bool nv_read_bios_from_rom(struct amdgpu_device *adev,
u8 *bios, u32 length_bytes)
{
- /* TODO: will implement it when SMU header is available */
- return false;
+ u32 *dw_ptr;
+ u32 i, length_dw;
+
+ if (bios == NULL)
+ return false;
+ if (length_bytes == 0)
+ return false;
+ /* APU vbios image is part of sbios image */
+ if (adev->flags & AMD_IS_APU)
+ return false;
+
+ dw_ptr = (u32 *)bios;
+ length_dw = ALIGN(length_bytes, 4) / 4;
+
+ /* set rom index to 0 */
+ WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX), 0);
+ /* read out the rom data */
+ for (i = 0; i < length_dw; i++)
+ dw_ptr[i] = RREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA));
+
+ return true;
}
static struct soc15_allowed_register_entry nv_allowed_read_registers[] = {
@@ -176,6 +199,7 @@ static struct soc15_allowed_register_entry nv_allowed_read_registers[] = {
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_BUSY_STAT)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STALLED_STAT1)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_BUSY_STAT)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)},
{ SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)},
@@ -279,7 +303,7 @@ static int nv_asic_mode1_reset(struct amdgpu_device *adev)
/* wait for asic to come out of reset */
for (i = 0; i < adev->usec_timeout; i++) {
- u32 memsize = adev->nbio_funcs->get_memsize(adev);
+ u32 memsize = adev->nbio.funcs->get_memsize(adev);
if (memsize != 0xffffffff)
break;
@@ -291,12 +315,22 @@ static int nv_asic_mode1_reset(struct amdgpu_device *adev)
return ret;
}
+static bool nv_asic_supports_baco(struct amdgpu_device *adev)
+{
+ struct smu_context *smu = &adev->smu;
+
+ if (smu_baco_is_support(smu))
+ return true;
+ else
+ return false;
+}
+
static enum amd_reset_method
nv_asic_reset_method(struct amdgpu_device *adev)
{
struct smu_context *smu = &adev->smu;
- if (smu_baco_is_support(smu))
+ if (!amdgpu_sriov_vf(adev) && smu_baco_is_support(smu))
return AMD_RESET_METHOD_BACO;
else
return AMD_RESET_METHOD_MODE1;
@@ -319,7 +353,12 @@ static int nv_asic_reset(struct amdgpu_device *adev)
if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
if (!adev->in_suspend)
amdgpu_inc_vram_lost(adev);
- ret = smu_baco_reset(smu);
+ ret = smu_baco_enter(smu);
+ if (ret)
+ return ret;
+ ret = smu_baco_exit(smu);
+ if (ret)
+ return ret;
} else {
if (!adev->in_suspend)
amdgpu_inc_vram_lost(adev);
@@ -368,8 +407,8 @@ static void nv_program_aspm(struct amdgpu_device *adev)
static void nv_enable_doorbell_aperture(struct amdgpu_device *adev,
bool enable)
{
- adev->nbio_funcs->enable_doorbell_aperture(adev, enable);
- adev->nbio_funcs->enable_doorbell_selfring_aperture(adev, enable);
+ adev->nbio.funcs->enable_doorbell_aperture(adev, enable);
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable);
}
static const struct amdgpu_ip_block_version nv_common_ip_block =
@@ -423,9 +462,13 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
if (r)
return r;
- adev->nbio_funcs = &nbio_v2_3_funcs;
+ adev->nbio.funcs = &nbio_v2_3_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg;
- adev->nbio_funcs->detect_hw_virt(adev);
+ adev->nbio.funcs->detect_hw_virt(adev);
+
+ if (amdgpu_sriov_vf(adev))
+ adev->virt.ops = &xgpu_nv_virt_ops;
switch (adev->asic_type) {
case CHIP_NAVI10:
@@ -435,7 +478,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
- is_support_sw_smu(adev))
+ !amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
@@ -446,9 +489,10 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
- is_support_sw_smu(adev))
+ !amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
if (adev->enable_mes)
amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block);
break;
@@ -458,7 +502,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
- is_support_sw_smu(adev))
+ !amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
@@ -469,9 +513,10 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
- is_support_sw_smu(adev))
+ !amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
break;
default:
return -EINVAL;
@@ -482,12 +527,12 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
static uint32_t nv_get_rev_id(struct amdgpu_device *adev)
{
- return adev->nbio_funcs->get_rev_id(adev);
+ return adev->nbio.funcs->get_rev_id(adev);
}
static void nv_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
{
- adev->nbio_funcs->hdp_flush(adev, ring);
+ adev->nbio.funcs->hdp_flush(adev, ring);
}
static void nv_invalidate_hdp(struct amdgpu_device *adev,
@@ -532,6 +577,16 @@ static bool nv_need_reset_on_init(struct amdgpu_device *adev)
return false;
}
+static uint64_t nv_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+
+ /* TODO
+ * dummy implement for pcie_replay_count sysfs interface
+ * */
+
+ return 0;
+}
+
static void nv_init_doorbell_index(struct amdgpu_device *adev)
{
adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ;
@@ -579,12 +634,17 @@ static const struct amdgpu_asic_funcs nv_asic_funcs =
.need_full_reset = &nv_need_full_reset,
.get_pcie_usage = &nv_get_pcie_usage,
.need_reset_on_init = &nv_need_reset_on_init,
+ .get_pcie_replay_count = &nv_get_pcie_replay_count,
+ .supports_baco = &nv_asic_supports_baco,
};
static int nv_common_early_init(void *handle)
{
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
adev->smc_rreg = NULL;
adev->smc_wreg = NULL;
adev->pcie_rreg = &nv_pcie_rreg;
@@ -615,10 +675,12 @@ static int nv_common_early_init(void *handle)
AMD_CG_SUPPORT_ATHUB_MGCG |
AMD_CG_SUPPORT_ATHUB_LS |
AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
AMD_CG_SUPPORT_BIF_MGCG |
AMD_CG_SUPPORT_BIF_LS;
adev->pg_flags = AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG |
AMD_PG_SUPPORT_ATHUB;
adev->external_rev_id = adev->rev_id + 0x1;
break;
@@ -635,9 +697,11 @@ static int nv_common_early_init(void *handle)
AMD_CG_SUPPORT_ATHUB_MGCG |
AMD_CG_SUPPORT_ATHUB_LS |
AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
AMD_CG_SUPPORT_BIF_MGCG |
AMD_CG_SUPPORT_BIF_LS;
adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_JPEG |
AMD_PG_SUPPORT_VCN_DPG;
adev->external_rev_id = adev->rev_id + 20;
break;
@@ -656,10 +720,18 @@ static int nv_common_early_init(void *handle)
AMD_CG_SUPPORT_MC_LS |
AMD_CG_SUPPORT_ATHUB_MGCG |
AMD_CG_SUPPORT_ATHUB_LS |
- AMD_CG_SUPPORT_VCN_MGCG;
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG;
adev->pg_flags = AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_JPEG |
AMD_PG_SUPPORT_ATHUB;
+ /* guest vm gets 0xffffffff when reading RCC_DEV0_EPF0_STRAP0,
+ * as a consequence, the rev_id and external_rev_id are wrong.
+ * workaround it by hardcoding rev_id to 0 (default value).
+ */
+ if (amdgpu_sriov_vf(adev))
+ adev->rev_id = 0;
adev->external_rev_id = adev->rev_id + 0xa;
break;
default:
@@ -667,16 +739,31 @@ static int nv_common_early_init(void *handle)
return -EINVAL;
}
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_init_setting(adev);
+ xgpu_nv_mailbox_set_irq_funcs(adev);
+ }
+
return 0;
}
static int nv_common_late_init(void *handle)
{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ xgpu_nv_mailbox_get_irq(adev);
+
return 0;
}
static int nv_common_sw_init(void *handle)
{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ xgpu_nv_mailbox_add_irq_id(adev);
+
return 0;
}
@@ -694,7 +781,13 @@ static int nv_common_hw_init(void *handle)
/* enable aspm */
nv_program_aspm(adev);
/* setup nbio registers */
- adev->nbio_funcs->init_registers(adev);
+ adev->nbio.funcs->init_registers(adev);
+ /* remap HDP registers to a hole in mmio space,
+ * for the purpose of expose those registers
+ * to process space
+ */
+ if (adev->nbio.funcs->remap_hdp_registers)
+ adev->nbio.funcs->remap_hdp_registers(adev);
/* enable the doorbell aperture */
nv_enable_doorbell_aperture(adev, true);
@@ -856,14 +949,14 @@ static int nv_common_set_clockgating_state(void *handle,
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
- adev->nbio_funcs->update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
- adev->nbio_funcs->update_medium_grain_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ adev->nbio.funcs->update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->nbio.funcs->update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
nv_update_hdp_mem_power_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
nv_update_hdp_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
break;
default:
break;
@@ -886,7 +979,7 @@ static void nv_common_get_clockgating_state(void *handle, u32 *flags)
if (amdgpu_sriov_vf(adev))
*flags = 0;
- adev->nbio_funcs->get_clockgating_state(adev, flags);
+ adev->nbio.funcs->get_clockgating_state(adev, flags);
/* AMD_CG_SUPPORT_HDP_MGCG */
tmp = RREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL);
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index 74a9fe8e0cfb..36b65797434e 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -242,6 +242,7 @@ enum psp_gfx_fw_type {
GFX_FW_TYPE_SDMA5 = 55, /* SDMA5 MI */
GFX_FW_TYPE_SDMA6 = 56, /* SDMA6 MI */
GFX_FW_TYPE_SDMA7 = 57, /* SDMA7 MI */
+ GFX_FW_TYPE_VCN1 = 58, /* VCN1 MI */
GFX_FW_TYPE_MAX
};
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
index 5d95e614369a..7539104175e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
@@ -40,6 +40,9 @@
MODULE_FIRMWARE("amdgpu/raven_asd.bin");
MODULE_FIRMWARE("amdgpu/picasso_asd.bin");
MODULE_FIRMWARE("amdgpu/raven2_asd.bin");
+MODULE_FIRMWARE("amdgpu/picasso_ta.bin");
+MODULE_FIRMWARE("amdgpu/raven2_ta.bin");
+MODULE_FIRMWARE("amdgpu/raven_ta.bin");
static int psp_v10_0_init_microcode(struct psp_context *psp)
{
@@ -48,7 +51,7 @@ static int psp_v10_0_init_microcode(struct psp_context *psp)
char fw_name[30];
int err = 0;
const struct psp_firmware_header_v1_0 *hdr;
-
+ const struct ta_firmware_header_v1_0 *ta_hdr;
DRM_DEBUG("\n");
switch (adev->asic_type) {
@@ -79,7 +82,45 @@ static int psp_v10_0_init_microcode(struct psp_context *psp)
adev->psp.asd_start_addr = (uint8_t *)hdr +
le32_to_cpu(hdr->header.ucode_array_offset_bytes);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
+ err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
+ if (err) {
+ release_firmware(adev->psp.ta_fw);
+ adev->psp.ta_fw = NULL;
+ dev_info(adev->dev,
+ "psp v10.0: Failed to load firmware \"%s\"\n",
+ fw_name);
+ } else {
+ err = amdgpu_ucode_validate(adev->psp.ta_fw);
+ if (err)
+ goto out2;
+
+ ta_hdr = (const struct ta_firmware_header_v1_0 *)
+ adev->psp.ta_fw->data;
+ adev->psp.ta_hdcp_ucode_version =
+ le32_to_cpu(ta_hdr->ta_hdcp_ucode_version);
+ adev->psp.ta_hdcp_ucode_size =
+ le32_to_cpu(ta_hdr->ta_hdcp_size_bytes);
+ adev->psp.ta_hdcp_start_addr =
+ (uint8_t *)ta_hdr +
+ le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+
+ adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
+
+ adev->psp.ta_dtm_ucode_version =
+ le32_to_cpu(ta_hdr->ta_dtm_ucode_version);
+ adev->psp.ta_dtm_ucode_size =
+ le32_to_cpu(ta_hdr->ta_dtm_size_bytes);
+ adev->psp.ta_dtm_start_addr =
+ (uint8_t *)adev->psp.ta_hdcp_start_addr +
+ le32_to_cpu(ta_hdr->ta_dtm_offset_bytes);
+ }
+
return 0;
+
+out2:
+ release_firmware(adev->psp.ta_fw);
+ adev->psp.ta_fw = NULL;
out:
if (err) {
dev_err(adev->dev,
@@ -189,53 +230,6 @@ static int psp_v10_0_ring_destroy(struct psp_context *psp,
return ret;
}
-static int psp_v10_0_cmd_submit(struct psp_context *psp,
- uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
- int index)
-{
- unsigned int psp_write_ptr_reg = 0;
- struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem;
- struct psp_ring *ring = &psp->km_ring;
- struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem;
- struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start +
- ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1;
- struct amdgpu_device *adev = psp->adev;
- uint32_t ring_size_dw = ring->ring_size / 4;
- uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
-
- /* KM (GPCOM) prepare write pointer */
- psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
-
- /* Update KM RB frame pointer to new frame */
- if ((psp_write_ptr_reg % ring_size_dw) == 0)
- write_frame = ring_buffer_start;
- else
- write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw);
- /* Check invalid write_frame ptr address */
- if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) {
- DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
- ring_buffer_start, ring_buffer_end, write_frame);
- DRM_ERROR("write_frame is pointing to address out of bounds\n");
- return -EINVAL;
- }
-
- /* Initialize KM RB frame */
- memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame));
-
- /* Update KM RB frame */
- write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr);
- write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr);
- write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr);
- write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr);
- write_frame->fence_value = index;
-
- /* Update the write Pointer in DWORDs */
- psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
-
- return 0;
-}
-
static int
psp_v10_0_sram_map(struct amdgpu_device *adev,
unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
@@ -365,15 +359,30 @@ static int psp_v10_0_mode1_reset(struct psp_context *psp)
return -EINVAL;
}
+static uint32_t psp_v10_0_ring_get_wptr(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ return RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
+}
+
+static void psp_v10_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value);
+}
+
static const struct psp_funcs psp_v10_0_funcs = {
.init_microcode = psp_v10_0_init_microcode,
.ring_init = psp_v10_0_ring_init,
.ring_create = psp_v10_0_ring_create,
.ring_stop = psp_v10_0_ring_stop,
.ring_destroy = psp_v10_0_ring_destroy,
- .cmd_submit = psp_v10_0_cmd_submit,
.compare_sram_data = psp_v10_0_compare_sram_data,
.mode1_reset = psp_v10_0_mode1_reset,
+ .ring_get_wptr = psp_v10_0_ring_get_wptr,
+ .ring_set_wptr = psp_v10_0_ring_set_wptr,
};
void psp_v10_0_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index 10166104b8a3..0829188c1a5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -22,6 +22,7 @@
#include <linux/firmware.h>
#include <linux/module.h>
+#include <linux/vmalloc.h>
#include "amdgpu.h"
#include "amdgpu_psp.h"
@@ -43,12 +44,16 @@ MODULE_FIRMWARE("amdgpu/vega20_asd.bin");
MODULE_FIRMWARE("amdgpu/vega20_ta.bin");
MODULE_FIRMWARE("amdgpu/navi10_sos.bin");
MODULE_FIRMWARE("amdgpu/navi10_asd.bin");
+MODULE_FIRMWARE("amdgpu/navi10_ta.bin");
MODULE_FIRMWARE("amdgpu/navi14_sos.bin");
MODULE_FIRMWARE("amdgpu/navi14_asd.bin");
+MODULE_FIRMWARE("amdgpu/navi14_ta.bin");
MODULE_FIRMWARE("amdgpu/navi12_sos.bin");
MODULE_FIRMWARE("amdgpu/navi12_asd.bin");
+MODULE_FIRMWARE("amdgpu/navi12_ta.bin");
MODULE_FIRMWARE("amdgpu/arcturus_sos.bin");
MODULE_FIRMWARE("amdgpu/arcturus_asd.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_ta.bin");
/* address block */
#define smnMP1_FIRMWARE_FLAGS 0x3010024
@@ -57,6 +62,8 @@ MODULE_FIRMWARE("amdgpu/arcturus_asd.bin");
#define mmRLC_GPM_UCODE_DATA_NV10 0x5b62
#define mmSDMA0_UCODE_ADDR_NV10 0x5880
#define mmSDMA0_UCODE_DATA_NV10 0x5881
+/* memory training timeout define */
+#define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000
static int psp_v11_0_init_microcode(struct psp_context *psp)
{
@@ -155,6 +162,7 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
switch (adev->asic_type) {
case CHIP_VEGA20:
+ case CHIP_ARCTURUS:
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
if (err) {
@@ -182,7 +190,31 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
- case CHIP_ARCTURUS:
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
+ err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
+ if (err) {
+ release_firmware(adev->psp.ta_fw);
+ adev->psp.ta_fw = NULL;
+ dev_info(adev->dev,
+ "psp v11.0: Failed to load firmware \"%s\"\n", fw_name);
+ } else {
+ err = amdgpu_ucode_validate(adev->psp.ta_fw);
+ if (err)
+ goto out2;
+
+ ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data;
+ adev->psp.ta_hdcp_ucode_version = le32_to_cpu(ta_hdr->ta_hdcp_ucode_version);
+ adev->psp.ta_hdcp_ucode_size = le32_to_cpu(ta_hdr->ta_hdcp_size_bytes);
+ adev->psp.ta_hdcp_start_addr = (uint8_t *)ta_hdr +
+ le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+
+ adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
+
+ adev->psp.ta_dtm_ucode_version = le32_to_cpu(ta_hdr->ta_dtm_ucode_version);
+ adev->psp.ta_dtm_ucode_size = le32_to_cpu(ta_hdr->ta_dtm_size_bytes);
+ adev->psp.ta_dtm_start_addr = (uint8_t *)adev->psp.ta_hdcp_start_addr +
+ le32_to_cpu(ta_hdr->ta_dtm_offset_bytes);
+ }
break;
default:
BUG();
@@ -205,26 +237,55 @@ out:
return err;
}
+int psp_v11_0_wait_for_bootloader(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ int ret;
+ int retry_loop;
+
+ for (retry_loop = 0; retry_loop < 10; retry_loop++) {
+ /* Wait for bootloader to signify that is
+ ready having bit 31 of C2PMSG_35 set to 1 */
+ ret = psp_wait_for(psp,
+ SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+ 0x80000000,
+ 0x80000000,
+ false);
+
+ if (ret == 0)
+ return 0;
+ }
+
+ return ret;
+}
+
+static bool psp_v11_0_is_sos_alive(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t sol_reg;
+
+ sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
+
+ return sol_reg != 0x0;
+}
+
static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp)
{
int ret;
uint32_t psp_gfxdrv_command_reg = 0;
struct amdgpu_device *adev = psp->adev;
- uint32_t sol_reg;
/* Check tOS sign of life register to confirm sys driver and sOS
* are already been loaded.
*/
- sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
- if (sol_reg) {
+ if (psp_v11_0_is_sos_alive(psp)) {
psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58);
dev_info(adev->dev, "sos fw version = 0x%x.\n", psp->sos_fw_version);
return 0;
}
- /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ ret = psp_v11_0_wait_for_bootloader(psp);
if (ret)
return ret;
@@ -233,16 +294,14 @@ static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp)
/* Copy PSP KDB binary to memory */
memcpy(psp->fw_pri_buf, psp->kdb_start_addr, psp->kdb_bin_size);
- /* Provide the sys driver to bootloader */
+ /* Provide the PSP KDB to bootloader */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
(uint32_t)(psp->fw_pri_mc_addr >> 20));
psp_gfxdrv_command_reg = PSP_BL__LOAD_KEY_DATABASE;
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
psp_gfxdrv_command_reg);
- /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1*/
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ ret = psp_v11_0_wait_for_bootloader(psp);
return ret;
}
@@ -252,21 +311,17 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp)
int ret;
uint32_t psp_gfxdrv_command_reg = 0;
struct amdgpu_device *adev = psp->adev;
- uint32_t sol_reg;
/* Check sOS sign of life register to confirm sys driver and sOS
* are already been loaded.
*/
- sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
- if (sol_reg) {
+ if (psp_v11_0_is_sos_alive(psp)) {
psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58);
dev_info(adev->dev, "sos fw version = 0x%x.\n", psp->sos_fw_version);
return 0;
}
- /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ ret = psp_v11_0_wait_for_bootloader(psp);
if (ret)
return ret;
@@ -285,8 +340,7 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp)
/* there might be handshake issue with hardware which needs delay */
mdelay(20);
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ ret = psp_v11_0_wait_for_bootloader(psp);
return ret;
}
@@ -296,18 +350,14 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp)
int ret;
unsigned int psp_gfxdrv_command_reg = 0;
struct amdgpu_device *adev = psp->adev;
- uint32_t sol_reg;
/* Check sOS sign of life register to confirm sys driver and sOS
* are already been loaded.
*/
- sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
- if (sol_reg)
+ if (psp_v11_0_is_sos_alive(psp))
return 0;
- /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, false);
+ ret = psp_v11_0_wait_for_bootloader(psp);
if (ret)
return ret;
@@ -398,6 +448,34 @@ static bool psp_v11_0_support_vmr_ring(struct psp_context *psp)
return false;
}
+static int psp_v11_0_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Write the ring destroy command*/
+ if (psp_v11_0_support_vmr_ring(psp))
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
+ else
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64,
+ GFX_CTRL_CMD_ID_DESTROY_RINGS);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) */
+ if (psp_v11_0_support_vmr_ring(psp))
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x80000000, false);
+ else
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, false);
+
+ return ret;
+}
+
static int psp_v11_0_ring_create(struct psp_context *psp,
enum psp_ring_type ring_type)
{
@@ -407,6 +485,12 @@ static int psp_v11_0_ring_create(struct psp_context *psp,
struct amdgpu_device *adev = psp->adev;
if (psp_v11_0_support_vmr_ring(psp)) {
+ ret = psp_v11_0_ring_stop(psp, ring_type);
+ if (ret) {
+ DRM_ERROR("psp_v11_0_ring_stop_sriov failed!\n");
+ return ret;
+ }
+
/* Write low address of the ring to C2PMSG_102 */
psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
@@ -426,6 +510,14 @@ static int psp_v11_0_ring_create(struct psp_context *psp,
0x80000000, 0x8000FFFF, false);
} else {
+ /* Wait for sOS ready for ring creation */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, false);
+ if (ret) {
+ DRM_ERROR("Failed to wait for sOS ready for ring creation\n");
+ return ret;
+ }
+
/* Write low address of the ring to C2PMSG_69 */
psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
@@ -451,33 +543,6 @@ static int psp_v11_0_ring_create(struct psp_context *psp,
return ret;
}
-static int psp_v11_0_ring_stop(struct psp_context *psp,
- enum psp_ring_type ring_type)
-{
- int ret = 0;
- struct amdgpu_device *adev = psp->adev;
-
- /* Write the ring destroy command*/
- if (psp_v11_0_support_vmr_ring(psp))
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
- GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
- else
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64,
- GFX_CTRL_CMD_ID_DESTROY_RINGS);
-
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
-
- /* Wait for response flag (bit 31) */
- if (psp_v11_0_support_vmr_ring(psp))
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
- 0x80000000, 0x80000000, false);
- else
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
-
- return ret;
-}
static int psp_v11_0_ring_destroy(struct psp_context *psp,
enum psp_ring_type ring_type)
@@ -497,62 +562,6 @@ static int psp_v11_0_ring_destroy(struct psp_context *psp,
return ret;
}
-static int psp_v11_0_cmd_submit(struct psp_context *psp,
- uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
- int index)
-{
- unsigned int psp_write_ptr_reg = 0;
- struct psp_gfx_rb_frame *write_frame = psp->km_ring.ring_mem;
- struct psp_ring *ring = &psp->km_ring;
- struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem;
- struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start +
- ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1;
- struct amdgpu_device *adev = psp->adev;
- uint32_t ring_size_dw = ring->ring_size / 4;
- uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
-
- /* KM (GPCOM) prepare write pointer */
- if (psp_v11_0_support_vmr_ring(psp))
- psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
- else
- psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
-
- /* Update KM RB frame pointer to new frame */
- /* write_frame ptr increments by size of rb_frame in bytes */
- /* psp_write_ptr_reg increments by size of rb_frame in DWORDs */
- if ((psp_write_ptr_reg % ring_size_dw) == 0)
- write_frame = ring_buffer_start;
- else
- write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw);
- /* Check invalid write_frame ptr address */
- if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) {
- DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
- ring_buffer_start, ring_buffer_end, write_frame);
- DRM_ERROR("write_frame is pointing to address out of bounds\n");
- return -EINVAL;
- }
-
- /* Initialize KM RB frame */
- memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame));
-
- /* Update KM RB frame */
- write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr);
- write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr);
- write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr);
- write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr);
- write_frame->fence_value = index;
-
- /* Update the write Pointer in DWORDs */
- psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
- if (psp_v11_0_support_vmr_ring(psp)) {
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD);
- } else
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
-
- return 0;
-}
-
static int
psp_v11_0_sram_map(struct amdgpu_device *adev,
unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
@@ -889,6 +898,216 @@ static int psp_v11_0_rlc_autoload_start(struct psp_context *psp)
return psp_rlc_autoload_start(psp);
}
+static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int msg)
+{
+ int ret;
+ int i;
+ uint32_t data_32;
+ int max_wait;
+ struct amdgpu_device *adev = psp->adev;
+
+ data_32 = (psp->mem_train_ctx.c2p_train_data_offset >> 20);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, data_32);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, msg);
+
+ max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout;
+ for (i = 0; i < max_wait; i++) {
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, false);
+ if (ret == 0)
+ break;
+ }
+ if (i < max_wait)
+ ret = 0;
+ else
+ ret = -ETIME;
+
+ DRM_DEBUG("training %s %s, cost %d @ %d ms\n",
+ (msg == PSP_BL__DRAM_SHORT_TRAIN) ? "short" : "long",
+ (ret == 0) ? "succeed" : "failed",
+ i, adev->usec_timeout/1000);
+ return ret;
+}
+
+static void psp_v11_0_memory_training_fini(struct psp_context *psp)
+{
+ struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
+
+ ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
+ kfree(ctx->sys_cache);
+ ctx->sys_cache = NULL;
+}
+
+static int psp_v11_0_memory_training_init(struct psp_context *psp)
+{
+ int ret;
+ struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
+
+ if (ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) {
+ DRM_DEBUG("memory training is not supported!\n");
+ return 0;
+ }
+
+ ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL);
+ if (ctx->sys_cache == NULL) {
+ DRM_ERROR("alloc mem_train_ctx.sys_cache failed!\n");
+ ret = -ENOMEM;
+ goto Err_out;
+ }
+
+ DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
+ ctx->train_data_size,
+ ctx->p2c_train_data_offset,
+ ctx->c2p_train_data_offset);
+ ctx->init = PSP_MEM_TRAIN_INIT_SUCCESS;
+ return 0;
+
+Err_out:
+ psp_v11_0_memory_training_fini(psp);
+ return ret;
+}
+
+/*
+ * save and restore proces
+ */
+static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
+{
+ struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
+ uint32_t *pcache = (uint32_t*)ctx->sys_cache;
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t p2c_header[4];
+ uint32_t sz;
+ void *buf;
+ int ret;
+
+ if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) {
+ DRM_DEBUG("Memory training is not supported.\n");
+ return 0;
+ } else if (ctx->init != PSP_MEM_TRAIN_INIT_SUCCESS) {
+ DRM_ERROR("Memory training initialization failure.\n");
+ return -EINVAL;
+ }
+
+ if (psp_v11_0_is_sos_alive(psp)) {
+ DRM_DEBUG("SOS is alive, skip memory training.\n");
+ return 0;
+ }
+
+ amdgpu_device_vram_access(adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false);
+ DRM_DEBUG("sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n",
+ pcache[0], pcache[1], pcache[2], pcache[3],
+ p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]);
+
+ if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) {
+ DRM_DEBUG("Short training depends on restore.\n");
+ ops |= PSP_MEM_TRAIN_RESTORE;
+ }
+
+ if ((ops & PSP_MEM_TRAIN_RESTORE) &&
+ pcache[0] != MEM_TRAIN_SYSTEM_SIGNATURE) {
+ DRM_DEBUG("sys_cache[0] is invalid, restore depends on save.\n");
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ if (p2c_header[0] == MEM_TRAIN_SYSTEM_SIGNATURE &&
+ !(pcache[0] == MEM_TRAIN_SYSTEM_SIGNATURE &&
+ pcache[3] == p2c_header[3])) {
+ DRM_DEBUG("sys_cache is invalid or out-of-date, need save training data to sys_cache.\n");
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ if ((ops & PSP_MEM_TRAIN_SAVE) &&
+ p2c_header[0] != MEM_TRAIN_SYSTEM_SIGNATURE) {
+ DRM_DEBUG("p2c_header[0] is invalid, save depends on long training.\n");
+ ops |= PSP_MEM_TRAIN_SEND_LONG_MSG;
+ }
+
+ if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
+ ops &= ~PSP_MEM_TRAIN_SEND_SHORT_MSG;
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ DRM_DEBUG("Memory training ops:%x.\n", ops);
+
+ if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
+ /*
+ * Long traing will encroach certain mount of bottom VRAM,
+ * saving the content of this bottom VRAM to system memory
+ * before training, and restoring it after training to avoid
+ * VRAM corruption.
+ */
+ sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE;
+
+ if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) {
+ DRM_ERROR("visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n",
+ adev->gmc.visible_vram_size,
+ adev->mman.aper_base_kaddr);
+ return -EINVAL;
+ }
+
+ buf = vmalloc(sz);
+ if (!buf) {
+ DRM_ERROR("failed to allocate system memory.\n");
+ return -ENOMEM;
+ }
+
+ memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz);
+ ret = psp_v11_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN);
+ if (ret) {
+ DRM_ERROR("Send long training msg failed.\n");
+ vfree(buf);
+ return ret;
+ }
+
+ memcpy_toio(adev->mman.aper_base_kaddr, buf, sz);
+ adev->nbio.funcs->hdp_flush(adev, NULL);
+ vfree(buf);
+ }
+
+ if (ops & PSP_MEM_TRAIN_SAVE) {
+ amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, ctx->sys_cache, ctx->train_data_size, false);
+ }
+
+ if (ops & PSP_MEM_TRAIN_RESTORE) {
+ amdgpu_device_vram_access(psp->adev, ctx->c2p_train_data_offset, ctx->sys_cache, ctx->train_data_size, true);
+ }
+
+ if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) {
+ ret = psp_v11_0_memory_training_send_msg(psp, (amdgpu_force_long_training > 0) ?
+ PSP_BL__DRAM_LONG_TRAIN : PSP_BL__DRAM_SHORT_TRAIN);
+ if (ret) {
+ DRM_ERROR("send training msg failed.\n");
+ return ret;
+ }
+ }
+ ctx->training_cnt++;
+ return 0;
+}
+
+static uint32_t psp_v11_0_ring_get_wptr(struct psp_context *psp)
+{
+ uint32_t data;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (psp_v11_0_support_vmr_ring(psp))
+ data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
+ else
+ data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
+
+ return data;
+}
+
+static void psp_v11_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (psp_v11_0_support_vmr_ring(psp)) {
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD);
+ } else
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value);
+}
+
static const struct psp_funcs psp_v11_0_funcs = {
.init_microcode = psp_v11_0_init_microcode,
.bootloader_load_kdb = psp_v11_0_bootloader_load_kdb,
@@ -898,7 +1117,6 @@ static const struct psp_funcs psp_v11_0_funcs = {
.ring_create = psp_v11_0_ring_create,
.ring_stop = psp_v11_0_ring_stop,
.ring_destroy = psp_v11_0_ring_destroy,
- .cmd_submit = psp_v11_0_cmd_submit,
.compare_sram_data = psp_v11_0_compare_sram_data,
.mode1_reset = psp_v11_0_mode1_reset,
.xgmi_get_topology_info = psp_v11_0_xgmi_get_topology_info,
@@ -909,6 +1127,11 @@ static const struct psp_funcs psp_v11_0_funcs = {
.ras_trigger_error = psp_v11_0_ras_trigger_error,
.ras_cure_posion = psp_v11_0_ras_cure_posion,
.rlc_autoload_start = psp_v11_0_rlc_autoload_start,
+ .mem_training_init = psp_v11_0_memory_training_init,
+ .mem_training_fini = psp_v11_0_memory_training_fini,
+ .mem_training = psp_v11_0_memory_training,
+ .ring_get_wptr = psp_v11_0_ring_get_wptr,
+ .ring_set_wptr = psp_v11_0_ring_set_wptr,
};
void psp_v11_0_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
index c72e43f8e0be..58d8b6d732e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
@@ -334,62 +334,6 @@ static int psp_v12_0_ring_destroy(struct psp_context *psp,
return ret;
}
-static int psp_v12_0_cmd_submit(struct psp_context *psp,
- uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
- int index)
-{
- unsigned int psp_write_ptr_reg = 0;
- struct psp_gfx_rb_frame *write_frame = psp->km_ring.ring_mem;
- struct psp_ring *ring = &psp->km_ring;
- struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem;
- struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start +
- ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1;
- struct amdgpu_device *adev = psp->adev;
- uint32_t ring_size_dw = ring->ring_size / 4;
- uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
-
- /* KM (GPCOM) prepare write pointer */
- if (psp_v12_0_support_vmr_ring(psp))
- psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
- else
- psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
-
- /* Update KM RB frame pointer to new frame */
- /* write_frame ptr increments by size of rb_frame in bytes */
- /* psp_write_ptr_reg increments by size of rb_frame in DWORDs */
- if ((psp_write_ptr_reg % ring_size_dw) == 0)
- write_frame = ring_buffer_start;
- else
- write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw);
- /* Check invalid write_frame ptr address */
- if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) {
- DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
- ring_buffer_start, ring_buffer_end, write_frame);
- DRM_ERROR("write_frame is pointing to address out of bounds\n");
- return -EINVAL;
- }
-
- /* Initialize KM RB frame */
- memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame));
-
- /* Update KM RB frame */
- write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr);
- write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr);
- write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr);
- write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr);
- write_frame->fence_value = index;
-
- /* Update the write Pointer in DWORDs */
- psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
- if (psp_v12_0_support_vmr_ring(psp)) {
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD);
- } else
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
-
- return 0;
-}
-
static int
psp_v12_0_sram_map(struct amdgpu_device *adev,
unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
@@ -546,6 +490,30 @@ static int psp_v12_0_mode1_reset(struct psp_context *psp)
return 0;
}
+static uint32_t psp_v12_0_ring_get_wptr(struct psp_context *psp)
+{
+ uint32_t data;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (psp_v12_0_support_vmr_ring(psp))
+ data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
+ else
+ data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
+
+ return data;
+}
+
+static void psp_v12_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (psp_v12_0_support_vmr_ring(psp)) {
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD);
+ } else
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value);
+}
+
static const struct psp_funcs psp_v12_0_funcs = {
.init_microcode = psp_v12_0_init_microcode,
.bootloader_load_sysdrv = psp_v12_0_bootloader_load_sysdrv,
@@ -554,9 +522,10 @@ static const struct psp_funcs psp_v12_0_funcs = {
.ring_create = psp_v12_0_ring_create,
.ring_stop = psp_v12_0_ring_stop,
.ring_destroy = psp_v12_0_ring_destroy,
- .cmd_submit = psp_v12_0_cmd_submit,
.compare_sram_data = psp_v12_0_compare_sram_data,
.mode1_reset = psp_v12_0_mode1_reset,
+ .ring_get_wptr = psp_v12_0_ring_get_wptr,
+ .ring_set_wptr = psp_v12_0_ring_set_wptr,
};
void psp_v12_0_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index d2c727f6a8bd..735c43c7daab 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -179,7 +179,7 @@ static bool psp_v3_1_match_version(struct amdgpu_device *adev, uint32_t ver)
* Double check if the latest four legacy versions.
* If yes, it is still the right version.
*/
- for (i = 0; i < sizeof(sos_old_versions) / sizeof(uint32_t); i++) {
+ for (i = 0; i < ARRAY_SIZE(sos_old_versions); i++) {
if (sos_old_versions[i] == adev->psp.sos_fw_version)
return true;
}
@@ -410,64 +410,6 @@ static int psp_v3_1_ring_destroy(struct psp_context *psp,
return ret;
}
-static int psp_v3_1_cmd_submit(struct psp_context *psp,
- uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
- int index)
-{
- unsigned int psp_write_ptr_reg = 0;
- struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem;
- struct psp_ring *ring = &psp->km_ring;
- struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem;
- struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start +
- ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1;
- struct amdgpu_device *adev = psp->adev;
- uint32_t ring_size_dw = ring->ring_size / 4;
- uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
-
- /* KM (GPCOM) prepare write pointer */
- if (psp_v3_1_support_vmr_ring(psp))
- psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
- else
- psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
-
- /* Update KM RB frame pointer to new frame */
- /* write_frame ptr increments by size of rb_frame in bytes */
- /* psp_write_ptr_reg increments by size of rb_frame in DWORDs */
- if ((psp_write_ptr_reg % ring_size_dw) == 0)
- write_frame = ring_buffer_start;
- else
- write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw);
- /* Check invalid write_frame ptr address */
- if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) {
- DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
- ring_buffer_start, ring_buffer_end, write_frame);
- DRM_ERROR("write_frame is pointing to address out of bounds\n");
- return -EINVAL;
- }
-
- /* Initialize KM RB frame */
- memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame));
-
- /* Update KM RB frame */
- write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr);
- write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr);
- write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr);
- write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr);
- write_frame->fence_value = index;
-
- /* Update the write Pointer in DWORDs */
- psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
- if (psp_v3_1_support_vmr_ring(psp)) {
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg);
- /* send interrupt to PSP for SRIOV ring write pointer update */
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
- GFX_CTRL_CMD_ID_CONSUME_CMD);
- } else
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
-
- return 0;
-}
-
static int
psp_v3_1_sram_map(struct amdgpu_device *adev,
unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
@@ -641,6 +583,31 @@ static bool psp_v3_1_support_vmr_ring(struct psp_context *psp)
return false;
}
+static uint32_t psp_v3_1_ring_get_wptr(struct psp_context *psp)
+{
+ uint32_t data;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (psp_v3_1_support_vmr_ring(psp))
+ data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
+ else
+ data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
+ return data;
+}
+
+static void psp_v3_1_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (psp_v3_1_support_vmr_ring(psp)) {
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value);
+ /* send interrupt to PSP for SRIOV ring write pointer update */
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_CONSUME_CMD);
+ } else
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value);
+}
+
static const struct psp_funcs psp_v3_1_funcs = {
.init_microcode = psp_v3_1_init_microcode,
.bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv,
@@ -649,11 +616,12 @@ static const struct psp_funcs psp_v3_1_funcs = {
.ring_create = psp_v3_1_ring_create,
.ring_stop = psp_v3_1_ring_stop,
.ring_destroy = psp_v3_1_ring_destroy,
- .cmd_submit = psp_v3_1_cmd_submit,
.compare_sram_data = psp_v3_1_compare_sram_data,
.smu_reload_quirk = psp_v3_1_smu_reload_quirk,
.mode1_reset = psp_v3_1_mode1_reset,
.support_vmr_ring = psp_v3_1_support_vmr_ring,
+ .ring_get_wptr = psp_v3_1_ring_get_wptr,
+ .ring_set_wptr = psp_v3_1_ring_set_wptr,
};
void psp_v3_1_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index a10175838013..7d509a40076f 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -255,7 +255,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
/* IB packet must end on a 8 DW boundary */
- sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
+ sdma_v2_4_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
@@ -750,7 +750,7 @@ static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
u32 pad_count;
int i;
- pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+ pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
@@ -1260,16 +1260,14 @@ static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev)
{
- struct drm_gpu_scheduler *sched;
unsigned i;
adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs;
for (i = 0; i < adev->sdma.num_instances; i++) {
- sched = &adev->sdma.instance[i].ring.sched;
- adev->vm_manager.vm_pte_rqs[i] =
- &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+ adev->vm_manager.vm_pte_scheds[i] =
+ &adev->sdma.instance[i].ring.sched;
}
- adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
}
const struct amdgpu_ip_block_version sdma_v2_4_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 5f4e2c616241..b6109a99fc43 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -429,7 +429,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
/* IB packet must end on a 8 DW boundary */
- sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
+ sdma_v3_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
@@ -1021,7 +1021,7 @@ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
u32 pad_count;
int i;
- pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+ pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
@@ -1698,16 +1698,14 @@ static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = {
static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev)
{
- struct drm_gpu_scheduler *sched;
unsigned i;
adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs;
for (i = 0; i < adev->sdma.num_instances; i++) {
- sched = &adev->sdma.instance[i].ring.sched;
- adev->vm_manager.vm_pte_rqs[i] =
- &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+ adev->vm_manager.vm_pte_scheds[i] =
+ &adev->sdma.instance[i].ring.sched;
}
- adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
}
const struct amdgpu_ip_block_version sdma_v3_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 78452cf0115d..e55884d204bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -82,6 +82,7 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev);
static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev);
+static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev);
static const struct soc15_reg_golden golden_settings_sdma_4[] = {
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
@@ -254,6 +255,106 @@ static const struct soc15_reg_golden golden_settings_sdma_4_3[] = {
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x03fbe1fe)
+};
+
+static const struct soc15_ras_field_entry sdma_v4_0_ras_fields[] = {
+ { "SDMA_UCODE_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UCODE_BUF_SED),
+ 0, 0,
+ },
+ { "SDMA_RB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_RB_CMD_BUF_SED),
+ 0, 0,
+ },
+ { "SDMA_IB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_IB_CMD_BUF_SED),
+ 0, 0,
+ },
+ { "SDMA_UTCL1_RD_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RD_FIFO_SED),
+ 0, 0,
+ },
+ { "SDMA_UTCL1_RDBST_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_UTCL1_RDBST_FIFO_SED),
+ 0, 0,
+ },
+ { "SDMA_DATA_LUT_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_DATA_LUT_FIFO_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF0_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF0_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF1_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF1_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF2_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF2_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF3_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF3_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF4_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF4_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF5_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF5_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF6_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF6_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF7_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF7_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF8_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF8_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF9_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF9_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF10_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF10_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF11_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF11_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF12_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF12_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF13_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF13_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF14_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF14_SED),
+ 0, 0,
+ },
+ { "SDMA_MBANK_DATA_BUF15_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF15_SED),
+ 0, 0,
+ },
+ { "SDMA_SPLIT_DAT_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_SPLIT_DAT_BUF_SED),
+ 0, 0,
+ },
+ { "SDMA_MC_WR_ADDR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER),
+ SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MC_WR_ADDR_FIFO_SED),
+ 0, 0,
+ },
};
static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev,
@@ -697,7 +798,7 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
/* IB packet must end on a 8 DW boundary */
- sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
+ sdma_v4_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
@@ -746,13 +847,13 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
u32 ref_and_mask = 0;
- const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
sdma_v4_0_wait_reg_mem(ring, 0, 1,
- adev->nbio_funcs->get_hdp_flush_done_offset(adev),
- adev->nbio_funcs->get_hdp_flush_req_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_done_offset(adev),
+ adev->nbio.funcs->get_hdp_flush_req_offset(adev),
ref_and_mask, ref_and_mask, 10);
}
@@ -1578,7 +1679,7 @@ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
u32 pad_count;
int i;
- pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+ pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
@@ -1685,107 +1786,33 @@ static int sdma_v4_0_early_init(void *handle)
sdma_v4_0_set_buffer_funcs(adev);
sdma_v4_0_set_vm_pte_funcs(adev);
sdma_v4_0_set_irq_funcs(adev);
+ sdma_v4_0_set_ras_funcs(adev);
return 0;
}
static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
- struct ras_err_data *err_data,
+ void *err_data,
struct amdgpu_iv_entry *entry);
static int sdma_v4_0_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct ras_common_if **ras_if = &adev->sdma.ras_if;
struct ras_ih_if ih_info = {
.cb = sdma_v4_0_process_ras_data_cb,
};
- struct ras_fs_if fs_info = {
- .sysfs_name = "sdma_err_count",
- .debugfs_name = "sdma_err_inject",
- };
- struct ras_common_if ras_block = {
- .block = AMDGPU_RAS_BLOCK__SDMA,
- .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
- .sub_block_index = 0,
- .name = "sdma",
- };
- int r, i;
-
- if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
- amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
- return 0;
- }
-
- /* handle resume path. */
- if (*ras_if) {
- /* resend ras TA enable cmd during resume.
- * prepare to handle failure.
- */
- ih_info.head = **ras_if;
- r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
- if (r) {
- if (r == -EAGAIN) {
- /* request a gpu reset. will run again. */
- amdgpu_ras_request_reset_on_boot(adev,
- AMDGPU_RAS_BLOCK__SDMA);
- return 0;
- }
- /* fail to enable ras, cleanup all. */
- goto irq;
- }
- /* enable successfully. continue. */
- goto resume;
- }
-
- *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
- if (!*ras_if)
- return -ENOMEM;
-
- **ras_if = ras_block;
-
- r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
- if (r) {
- if (r == -EAGAIN) {
- amdgpu_ras_request_reset_on_boot(adev,
- AMDGPU_RAS_BLOCK__SDMA);
- r = 0;
- }
- goto feature;
- }
-
- ih_info.head = **ras_if;
- fs_info.head = **ras_if;
-
- r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
- if (r)
- goto interrupt;
-
- amdgpu_ras_debugfs_create(adev, &fs_info);
+ int i;
- r = amdgpu_ras_sysfs_create(adev, &fs_info);
- if (r)
- goto sysfs;
-resume:
- for (i = 0; i < adev->sdma.num_instances; i++) {
- r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq,
- AMDGPU_SDMA_IRQ_INSTANCE0 + i);
- if (r)
- goto irq;
+ /* read back edc counter registers to clear the counters */
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ RREG32_SDMA(i, mmSDMA0_EDC_COUNTER);
}
- return 0;
-irq:
- amdgpu_ras_sysfs_remove(adev, *ras_if);
-sysfs:
- amdgpu_ras_debugfs_remove(adev, *ras_if);
- amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
-interrupt:
- amdgpu_ras_feature_enable(adev, *ras_if, 0);
-feature:
- kfree(*ras_if);
- *ras_if = NULL;
- return r;
+ if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init)
+ return adev->sdma.funcs->ras_late_init(adev, &ih_info);
+ else
+ return 0;
}
static int sdma_v4_0_sw_init(void *handle)
@@ -1857,21 +1884,8 @@ static int sdma_v4_0_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i;
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) &&
- adev->sdma.ras_if) {
- struct ras_common_if *ras_if = adev->sdma.ras_if;
- struct ras_ih_if ih_info = {
- .head = *ras_if,
- };
-
- /*remove fs first*/
- amdgpu_ras_debugfs_remove(adev, ras_if);
- amdgpu_ras_sysfs_remove(adev, ras_if);
- /*remove the IH*/
- amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
- amdgpu_ras_feature_enable(adev, ras_if, 0);
- kfree(ras_if);
- }
+ if (adev->sdma.funcs && adev->sdma.funcs->ras_fini)
+ adev->sdma.funcs->ras_fini(adev);
for (i = 0; i < adev->sdma.num_instances; i++) {
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
@@ -1891,7 +1905,7 @@ static int sdma_v4_0_hw_init(void *handle)
if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs &&
adev->powerplay.pp_funcs->set_powergating_by_smu) ||
- adev->asic_type == CHIP_RENOIR)
+ (adev->asic_type == CHIP_RENOIR && !adev->in_gpu_reset))
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false);
if (!amdgpu_sriov_vf(adev))
@@ -2024,52 +2038,28 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
}
static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
- struct ras_err_data *err_data,
+ void *err_data,
struct amdgpu_iv_entry *entry)
{
- uint32_t err_source;
int instance;
+ /* When “Full RAS” is enabled, the per-IP interrupt sources should
+ * be disabled and the driver should only look for the aggregated
+ * interrupt via sync flood
+ */
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ goto out;
+
instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
if (instance < 0)
- return 0;
-
- switch (entry->src_id) {
- case SDMA0_4_0__SRCID__SDMA_SRAM_ECC:
- err_source = 0;
- break;
- case SDMA0_4_0__SRCID__SDMA_ECC:
- err_source = 1;
- break;
- default:
- return 0;
- }
-
- kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+ goto out;
- amdgpu_ras_reset_gpu(adev, 0);
+ amdgpu_sdma_process_ras_data_cb(adev, err_data, entry);
+out:
return AMDGPU_RAS_SUCCESS;
}
-static int sdma_v4_0_process_ecc_irq(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- struct amdgpu_iv_entry *entry)
-{
- struct ras_common_if *ras_if = adev->sdma.ras_if;
- struct ras_dispatch_if ih_data = {
- .entry = entry,
- };
-
- if (!ras_if)
- return 0;
-
- ih_data.head = *ras_if;
-
- amdgpu_ras_interrupt_dispatch(adev, &ih_data);
- return 0;
-}
-
static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -2186,9 +2176,9 @@ static int sdma_v4_0_set_clockgating_state(void *handle,
case CHIP_ARCTURUS:
case CHIP_RENOIR:
sdma_v4_0_update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
sdma_v4_0_update_medium_grain_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
break;
default:
break;
@@ -2417,7 +2407,7 @@ static const struct amdgpu_irq_src_funcs sdma_v4_0_illegal_inst_irq_funcs = {
static const struct amdgpu_irq_src_funcs sdma_v4_0_ecc_irq_funcs = {
.set = sdma_v4_0_set_ecc_irq_state,
- .process = sdma_v4_0_process_ecc_irq,
+ .process = amdgpu_sdma_process_ecc_irq,
};
@@ -2531,10 +2521,73 @@ static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev)
sched = &adev->sdma.instance[i].page.sched;
else
sched = &adev->sdma.instance[i].ring.sched;
- adev->vm_manager.vm_pte_rqs[i] =
- &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+ adev->vm_manager.vm_pte_scheds[i] = sched;
+ }
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
+}
+
+static void sdma_v4_0_get_ras_error_count(uint32_t value,
+ uint32_t instance,
+ uint32_t *sec_count)
+{
+ uint32_t i;
+ uint32_t sec_cnt;
+
+ /* double bits error (multiple bits) error detection is not supported */
+ for (i = 0; i < ARRAY_SIZE(sdma_v4_0_ras_fields); i++) {
+ /* the SDMA_EDC_COUNTER register in each sdma instance
+ * shares the same sed shift_mask
+ * */
+ sec_cnt = (value &
+ sdma_v4_0_ras_fields[i].sec_count_mask) >>
+ sdma_v4_0_ras_fields[i].sec_count_shift;
+ if (sec_cnt) {
+ DRM_INFO("Detected %s in SDMA%d, SED %d\n",
+ sdma_v4_0_ras_fields[i].name,
+ instance, sec_cnt);
+ *sec_count += sec_cnt;
+ }
+ }
+}
+
+static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev,
+ uint32_t instance, void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint32_t sec_count = 0;
+ uint32_t reg_value = 0;
+
+ reg_value = RREG32_SDMA(instance, mmSDMA0_EDC_COUNTER);
+ /* double bit error is not supported */
+ if (reg_value)
+ sdma_v4_0_get_ras_error_count(reg_value,
+ instance, &sec_count);
+ /* err_data->ce_count should be initialized to 0
+ * before calling into this function */
+ err_data->ce_count += sec_count;
+ /* double bit error is not supported
+ * set ue count to 0 */
+ err_data->ue_count = 0;
+
+ return 0;
+};
+
+static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = {
+ .ras_late_init = amdgpu_sdma_ras_late_init,
+ .ras_fini = amdgpu_sdma_ras_fini,
+ .query_ras_error_count = sdma_v4_0_query_ras_error_count,
+};
+
+static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ case CHIP_ARCTURUS:
+ adev->sdma.funcs = &sdma_v4_0_ras_funcs;
+ break;
+ default:
+ break;
}
- adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
}
const struct amdgpu_ip_block_version sdma_v4_0_ip_block = {
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index fa2f70ce2e2b..67b9830b7c7e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -382,8 +382,15 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
- /* IB packet must end on a 8 DW boundary */
- sdma_v5_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
+ /* An IB packet must end on a 8 DW boundary--the next dword
+ * must be on a 8-dword boundary. Our IB packet below is 6
+ * dwords long, thus add x number of NOPs, such that, in
+ * modular arithmetic,
+ * wptr + 6 + x = 8k, k >= 0, which in C is,
+ * (wptr + 6 + x) % 8 = 0.
+ * The expression below, is a solution of x.
+ */
+ sdma_v5_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
@@ -406,7 +413,7 @@ static void sdma_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
u32 ref_and_mask = 0;
- const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
if (ring->me == 0)
ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0;
@@ -416,8 +423,8 @@ static void sdma_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
- amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_done_offset(adev)) << 2);
- amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_req_offset(adev)) << 2);
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
amdgpu_ring_write(ring, ref_and_mask); /* reference */
amdgpu_ring_write(ring, ref_and_mask); /* mask */
amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
@@ -683,7 +690,7 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
- adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
ring->doorbell_index, 20);
if (amdgpu_sriov_vf(adev))
@@ -907,16 +914,9 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)
udelay(1);
}
- if (i < adev->usec_timeout) {
- if (amdgpu_emu_mode == 1)
- DRM_INFO("ring test on %d succeeded in %d msecs\n", ring->idx, i);
- else
- DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
- } else {
- DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
- ring->idx, tmp);
- r = -EINVAL;
- }
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
amdgpu_device_wb_free(adev, index);
return r;
@@ -981,13 +981,10 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
goto err1;
}
tmp = le32_to_cpu(adev->wb.wb[index]);
- if (tmp == 0xDEADBEEF) {
- DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+ if (tmp == 0xDEADBEEF)
r = 0;
- } else {
- DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
+ else
r = -EINVAL;
- }
err1:
amdgpu_ib_free(adev, &ib, NULL);
@@ -1086,10 +1083,10 @@ static void sdma_v5_0_vm_set_pte_pde(struct amdgpu_ib *ib,
}
/**
- * sdma_v5_0_ring_pad_ib - pad the IB to the required number of dw
- *
+ * sdma_v5_0_ring_pad_ib - pad the IB
* @ib: indirect buffer to fill with padding
*
+ * Pad the IB with NOPs to a boundary multiple of 8.
*/
static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
{
@@ -1097,7 +1094,7 @@ static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
u32 pad_count;
int i;
- pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+ pad_count = (-ib->length_dw) & 0x7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
@@ -1129,7 +1126,7 @@ static void sdma_v5_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, addr & 0xfffffffc);
amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
amdgpu_ring_write(ring, seq); /* reference */
- amdgpu_ring_write(ring, 0xfffffff); /* mask */
+ amdgpu_ring_write(ring, 0xffffffff); /* mask */
amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
}
@@ -1173,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
}
+static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ amdgpu_ring_emit_wreg(ring, reg0, ref);
+ /* wait for a cycle to reset vm_inv_eng*_ack */
+ amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
+ amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
+}
+
static int sdma_v5_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1518,9 +1525,9 @@ static int sdma_v5_0_set_clockgating_state(void *handle,
case CHIP_NAVI14:
case CHIP_NAVI12:
sdma_v5_0_update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
sdma_v5_0_update_medium_grain_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
break;
default:
break;
@@ -1588,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
6 + /* sdma_v5_0_ring_emit_pipeline_sync */
/* sdma_v5_0_ring_emit_vm_flush */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
- SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
.emit_ib = sdma_v5_0_ring_emit_ib,
@@ -1602,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
.pad_ib = sdma_v5_0_ring_pad_ib,
.emit_wreg = sdma_v5_0_ring_emit_wreg,
.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
.preempt_ib = sdma_v5_0_ring_preempt_ib,
@@ -1710,17 +1718,15 @@ static const struct amdgpu_vm_pte_funcs sdma_v5_0_vm_pte_funcs = {
static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev)
{
- struct drm_gpu_scheduler *sched;
unsigned i;
if (adev->vm_manager.vm_pte_funcs == NULL) {
adev->vm_manager.vm_pte_funcs = &sdma_v5_0_vm_pte_funcs;
for (i = 0; i < adev->sdma.num_instances; i++) {
- sched = &adev->sdma.instance[i].ring.sched;
- adev->vm_manager.vm_pte_rqs[i] =
- &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+ adev->vm_manager.vm_pte_scheds[i] =
+ &adev->sdma.instance[i].ring.sched;
}
- adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 493af42152f2..4d415bfdb42f 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -975,6 +975,17 @@ static void si_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = {
{GRBM_STATUS},
+ {mmGRBM_STATUS2},
+ {mmGRBM_STATUS_SE0},
+ {mmGRBM_STATUS_SE1},
+ {mmSRBM_STATUS},
+ {mmSRBM_STATUS2},
+ {DMA_STATUS_REG + DMA0_REGISTER_OFFSET},
+ {DMA_STATUS_REG + DMA1_REGISTER_OFFSET},
+ {mmCP_STAT},
+ {mmCP_STALLED_STAT1},
+ {mmCP_STALLED_STAT2},
+ {mmCP_STALLED_STAT3},
{GB_ADDR_CONFIG},
{MC_ARB_RAMCFG},
{GB_TILE_MODE0},
@@ -1186,6 +1197,11 @@ static int si_asic_reset(struct amdgpu_device *adev)
return 0;
}
+static bool si_asic_supports_baco(struct amdgpu_device *adev)
+{
+ return false;
+}
+
static enum amd_reset_method
si_asic_reset_method(struct amdgpu_device *adev)
{
@@ -1414,6 +1430,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
.get_pcie_usage = &si_get_pcie_usage,
.need_reset_on_init = &si_need_reset_on_init,
.get_pcie_replay_count = &si_get_pcie_replay_count,
+ .supports_baco = &si_asic_supports_baco,
};
static uint32_t si_get_rev_id(struct amdgpu_device *adev)
@@ -1633,7 +1650,6 @@ static void si_init_golden_registers(struct amdgpu_device *adev)
static void si_pcie_gen3_enable(struct amdgpu_device *adev)
{
struct pci_dev *root = adev->pdev->bus->self;
- int bridge_pos, gpu_pos;
u32 speed_cntl, current_data_rate;
int i;
u16 tmp16;
@@ -1668,12 +1684,7 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
DRM_INFO("enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n");
}
- bridge_pos = pci_pcie_cap(root);
- if (!bridge_pos)
- return;
-
- gpu_pos = pci_pcie_cap(adev->pdev);
- if (!gpu_pos)
+ if (!pci_is_pcie(root) || !pci_is_pcie(adev->pdev))
return;
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
@@ -1682,14 +1693,17 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
u16 bridge_cfg2, gpu_cfg2;
u32 max_lw, current_lw, tmp;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL,
+ &gpu_cfg);
tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL,
+ tmp16);
tmp = RREG32_PCIE(PCIE_LC_STATUS1);
max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
@@ -1706,15 +1720,23 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
}
for (i = 0; i < 10; i++) {
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_DEVSTA,
+ &tmp16);
if (tmp16 & PCI_EXP_DEVSTA_TRPND)
break;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ &gpu_cfg);
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &bridge_cfg2);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ &gpu_cfg2);
tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
tmp |= LC_SET_QUIESCE;
@@ -1726,25 +1748,44 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
mdelay(100);
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root, PCI_EXP_LNKCTL,
+ tmp16);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
-
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
-
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ pcie_capability_write_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ tmp16);
+
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (bridge_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(root,
+ PCI_EXP_LNKCTL2,
+ tmp16);
+
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (gpu_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ tmp16);
tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
tmp &= ~LC_SET_QUIESCE;
@@ -1757,15 +1798,16 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~0xf;
+ pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16);
+ tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
+
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
- tmp16 |= 3;
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
- tmp16 |= 2;
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
else
- tmp16 |= 1;
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
+ pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16);
speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index bdda8b4e03f0..42d5601b6bf3 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -648,7 +648,7 @@ static int si_dma_set_clockgating_state(void *handle,
bool enable;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- enable = (state == AMD_CG_STATE_GATE) ? true : false;
+ enable = (state == AMD_CG_STATE_GATE);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -834,16 +834,14 @@ static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = {
static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev)
{
- struct drm_gpu_scheduler *sched;
unsigned i;
adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs;
for (i = 0; i < adev->sdma.num_instances; i++) {
- sched = &adev->sdma.instance[i].ring.sched;
- adev->vm_manager.vm_pte_rqs[i] =
- &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+ adev->vm_manager.vm_pte_scheds[i] =
+ &adev->sdma.instance[i].ring.sched;
}
- adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
+ adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
}
const struct amdgpu_ip_block_version si_dma_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
index 57bb5f9e08b2..88ae27a5a03d 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -64,7 +64,8 @@ static int si_ih_irq_init(struct amdgpu_device *adev)
u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
si_ih_disable_interrupts(adev);
- WREG32(INTERRUPT_CNTL2, adev->irq.ih.gpu_addr >> 8);
+ /* set dummy read address to dummy page address */
+ WREG32(INTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
interrupt_cntl = RREG32(INTERRUPT_CNTL);
interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
index c44723c267c9..c902f26cf50d 100644
--- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c
@@ -234,7 +234,7 @@ static uint32_t smu_v11_0_i2c_transmit(struct i2c_adapter *control,
DRM_DEBUG_DRIVER("I2C_Transmit(), address = %x, bytes = %d , data: ",
(uint16_t)address, numbytes);
- if (drm_debug & DRM_UT_DRIVER) {
+ if (drm_debug_enabled(DRM_UT_DRIVER)) {
print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE,
16, 1, data, numbytes, false);
}
@@ -388,7 +388,7 @@ static uint32_t smu_v11_0_i2c_receive(struct i2c_adapter *control,
DRM_DEBUG_DRIVER("I2C_Receive(), address = %x, bytes = %d, data :",
(uint16_t)address, bytes_received);
- if (drm_debug & DRM_UT_DRIVER) {
+ if (drm_debug_enabled(DRM_UT_DRIVER)) {
print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE,
16, 1, data, bytes_received, false);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index f8ab80c8801b..15f3424a1ff7 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -58,13 +58,18 @@
#include "mmhub_v1_0.h"
#include "df_v1_7.h"
#include "df_v3_6.h"
+#include "nbio_v6_1.h"
+#include "nbio_v7_0.h"
+#include "nbio_v7_4.h"
#include "vega10_ih.h"
#include "sdma_v4_0.h"
#include "uvd_v7_0.h"
#include "vce_v4_0.h"
#include "vcn_v1_0.h"
#include "vcn_v2_0.h"
+#include "jpeg_v2_0.h"
#include "vcn_v2_5.h"
+#include "jpeg_v2_5.h"
#include "dce_virtual.h"
#include "mxgpu_ai.h"
#include "amdgpu_smu.h"
@@ -91,8 +96,8 @@ static u32 soc15_pcie_rreg(struct amdgpu_device *adev, u32 reg)
{
unsigned long flags, address, data;
u32 r;
- address = adev->nbio_funcs->get_pcie_index_offset(adev);
- data = adev->nbio_funcs->get_pcie_data_offset(adev);
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
WREG32(address, reg);
@@ -106,8 +111,8 @@ static void soc15_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
{
unsigned long flags, address, data;
- address = adev->nbio_funcs->get_pcie_index_offset(adev);
- data = adev->nbio_funcs->get_pcie_data_offset(adev);
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
WREG32(address, reg);
@@ -121,8 +126,8 @@ static u64 soc15_pcie_rreg64(struct amdgpu_device *adev, u32 reg)
{
unsigned long flags, address, data;
u64 r;
- address = adev->nbio_funcs->get_pcie_index_offset(adev);
- data = adev->nbio_funcs->get_pcie_data_offset(adev);
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
/* read low 32 bit */
@@ -142,8 +147,8 @@ static void soc15_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v)
{
unsigned long flags, address, data;
- address = adev->nbio_funcs->get_pcie_index_offset(adev);
- data = adev->nbio_funcs->get_pcie_data_offset(adev);
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
spin_lock_irqsave(&adev->pcie_idx_lock, flags);
/* write low 32 bit */
@@ -262,7 +267,7 @@ static void soc15_se_cac_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
static u32 soc15_get_config_memsize(struct amdgpu_device *adev)
{
- return adev->nbio_funcs->get_memsize(adev);
+ return adev->nbio.funcs->get_memsize(adev);
}
static u32 soc15_get_xclk(struct amdgpu_device *adev)
@@ -336,6 +341,7 @@ static struct soc15_allowed_register_entry soc15_allowed_read_registers[] = {
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_BUSY_STAT)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STALLED_STAT1)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_BUSY_STAT)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)},
{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)},
{ SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)},
@@ -461,7 +467,7 @@ static int soc15_asic_mode1_reset(struct amdgpu_device *adev)
/* wait for asic to come out of reset */
for (i = 0; i < adev->usec_timeout; i++) {
- u32 memsize = adev->nbio_funcs->get_memsize(adev);
+ u32 memsize = adev->nbio.funcs->get_memsize(adev);
if (memsize != 0xffffffff)
break;
@@ -473,78 +479,53 @@ static int soc15_asic_mode1_reset(struct amdgpu_device *adev)
return ret;
}
-static int soc15_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap)
-{
- void *pp_handle = adev->powerplay.pp_handle;
- const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
-
- if (!pp_funcs || !pp_funcs->get_asic_baco_capability) {
- *cap = false;
- return -ENOENT;
- }
-
- return pp_funcs->get_asic_baco_capability(pp_handle, cap);
-}
-
static int soc15_asic_baco_reset(struct amdgpu_device *adev)
{
- void *pp_handle = adev->powerplay.pp_handle;
- const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
-
- if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state)
- return -ENOENT;
-
- /* enter BACO state */
- if (pp_funcs->set_asic_baco_state(pp_handle, 1))
- return -EIO;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ int ret = 0;
- /* exit BACO state */
- if (pp_funcs->set_asic_baco_state(pp_handle, 0))
- return -EIO;
+ /* avoid NBIF got stuck when do RAS recovery in BACO reset */
+ if (ras && ras->supported)
+ adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
- dev_info(adev->dev, "GPU BACO reset\n");
+ ret = amdgpu_dpm_baco_reset(adev);
+ if (ret)
+ return ret;
- adev->in_baco_reset = 1;
+ /* re-enable doorbell interrupt after BACO exit */
+ if (ras && ras->supported)
+ adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
return 0;
}
-static int soc15_mode2_reset(struct amdgpu_device *adev)
-{
- if (!adev->powerplay.pp_funcs ||
- !adev->powerplay.pp_funcs->asic_reset_mode_2)
- return -ENOENT;
-
- return adev->powerplay.pp_funcs->asic_reset_mode_2(adev->powerplay.pp_handle);
-}
-
static enum amd_reset_method
soc15_asic_reset_method(struct amdgpu_device *adev)
{
- bool baco_reset;
+ bool baco_reset = false;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
switch (adev->asic_type) {
case CHIP_RAVEN:
+ case CHIP_RENOIR:
return AMD_RESET_METHOD_MODE2;
case CHIP_VEGA10:
case CHIP_VEGA12:
- soc15_asic_get_baco_capability(adev, &baco_reset);
+ case CHIP_ARCTURUS:
+ baco_reset = amdgpu_dpm_is_baco_supported(adev);
break;
case CHIP_VEGA20:
if (adev->psp.sos_fw_version >= 0x80067)
- soc15_asic_get_baco_capability(adev, &baco_reset);
- else
- baco_reset = false;
- if (baco_reset) {
- struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
- struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ baco_reset = amdgpu_dpm_is_baco_supported(adev);
- if (hive || (ras && ras->supported))
- baco_reset = false;
- }
+ /*
+ * 1. PMFW version > 0x284300: all cases use baco
+ * 2. PMFW version <= 0x284300: only sGPU w/o RAS use baco
+ */
+ if ((ras && ras->supported) && adev->pm.fw_version <= 0x283400)
+ baco_reset = false;
break;
default:
- baco_reset = false;
break;
}
@@ -556,13 +537,17 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
static int soc15_asic_reset(struct amdgpu_device *adev)
{
+ /* original raven doesn't have full asic reset */
+ if (adev->pdev->device == 0x15dd && adev->rev_id < 0x8)
+ return 0;
+
switch (soc15_asic_reset_method(adev)) {
case AMD_RESET_METHOD_BACO:
if (!adev->in_suspend)
amdgpu_inc_vram_lost(adev);
return soc15_asic_baco_reset(adev);
case AMD_RESET_METHOD_MODE2:
- return soc15_mode2_reset(adev);
+ return amdgpu_dpm_mode2_reset(adev);
default:
if (!adev->in_suspend)
amdgpu_inc_vram_lost(adev);
@@ -570,6 +555,22 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
}
}
+static bool soc15_supports_baco(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ case CHIP_VEGA12:
+ case CHIP_ARCTURUS:
+ return amdgpu_dpm_is_baco_supported(adev);
+ case CHIP_VEGA20:
+ if (adev->psp.sos_fw_version >= 0x80067)
+ return amdgpu_dpm_is_baco_supported(adev);
+ return false;
+ default:
+ return false;
+ }
+}
+
/*static int soc15_set_uvd_clock(struct amdgpu_device *adev, u32 clock,
u32 cntl_reg, u32 status_reg)
{
@@ -626,8 +627,8 @@ static void soc15_program_aspm(struct amdgpu_device *adev)
static void soc15_enable_doorbell_aperture(struct amdgpu_device *adev,
bool enable)
{
- adev->nbio_funcs->enable_doorbell_aperture(adev, enable);
- adev->nbio_funcs->enable_doorbell_selfring_aperture(adev, enable);
+ adev->nbio.funcs->enable_doorbell_aperture(adev, enable);
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable);
}
static const struct amdgpu_ip_block_version vega10_common_ip_block =
@@ -641,7 +642,7 @@ static const struct amdgpu_ip_block_version vega10_common_ip_block =
static uint32_t soc15_get_rev_id(struct amdgpu_device *adev)
{
- return adev->nbio_funcs->get_rev_id(adev);
+ return adev->nbio.funcs->get_rev_id(adev);
}
int soc15_set_ip_blocks(struct amdgpu_device *adev)
@@ -667,21 +668,25 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS)
adev->gmc.xgmi.supported = true;
- if (adev->flags & AMD_IS_APU)
- adev->nbio_funcs = &nbio_v7_0_funcs;
- else if (adev->asic_type == CHIP_VEGA20 ||
- adev->asic_type == CHIP_ARCTURUS)
- adev->nbio_funcs = &nbio_v7_4_funcs;
- else
- adev->nbio_funcs = &nbio_v6_1_funcs;
+ if (adev->flags & AMD_IS_APU) {
+ adev->nbio.funcs = &nbio_v7_0_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v7_0_hdp_flush_reg;
+ } else if (adev->asic_type == CHIP_VEGA20 ||
+ adev->asic_type == CHIP_ARCTURUS) {
+ adev->nbio.funcs = &nbio_v7_4_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg;
+ } else {
+ adev->nbio.funcs = &nbio_v6_1_funcs;
+ adev->nbio.hdp_flush_reg = &nbio_v6_1_hdp_flush_reg;
+ }
if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS)
- adev->df_funcs = &df_v3_6_funcs;
+ adev->df.funcs = &df_v3_6_funcs;
else
- adev->df_funcs = &df_v1_7_funcs;
+ adev->df.funcs = &df_v1_7_funcs;
adev->rev_id = soc15_get_rev_id(adev);
- adev->nbio_funcs->detect_hw_virt(adev);
+ adev->nbio.funcs->detect_hw_virt(adev);
if (amdgpu_sriov_vf(adev))
adev->virt.ops = &xgpu_ai_virt_ops;
@@ -713,11 +718,11 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
}
amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
- if (!amdgpu_sriov_vf(adev)) {
- if (is_support_sw_smu(adev))
+ if (is_support_sw_smu(adev)) {
+ if (!amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
- else
- amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
+ } else {
+ amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
}
if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
@@ -750,13 +755,31 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
case CHIP_ARCTURUS:
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
- amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
+
+ if (amdgpu_sriov_vf(adev)) {
+ if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
+ amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
+ } else {
+ amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
+ if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
+ amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
+ }
+
if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
- amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block);
+
+ if (amdgpu_sriov_vf(adev)) {
+ if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
+ amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block);
+ } else {
+ amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block);
+ }
+ if (!amdgpu_sriov_vf(adev))
+ amdgpu_device_ip_block_add(adev, &jpeg_v2_5_ip_block);
break;
case CHIP_RENOIR:
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
@@ -764,8 +787,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))
amdgpu_device_ip_block_add(adev, &psp_v12_0_ip_block);
- if (is_support_sw_smu(adev))
- amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block);
amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
@@ -775,6 +797,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &dm_ip_block);
#endif
amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
break;
default:
return -EINVAL;
@@ -785,7 +808,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
static void soc15_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
{
- adev->nbio_funcs->hdp_flush(adev, ring);
+ adev->nbio.funcs->hdp_flush(adev, ring);
}
static void soc15_invalidate_hdp(struct amdgpu_device *adev,
@@ -953,6 +976,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
.get_pcie_usage = &soc15_get_pcie_usage,
.need_reset_on_init = &soc15_need_reset_on_init,
.get_pcie_replay_count = &soc15_get_pcie_replay_count,
+ .supports_baco = &soc15_supports_baco,
};
static const struct amdgpu_asic_funcs vega20_asic_funcs =
@@ -961,6 +985,7 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
.read_bios_from_rom = &soc15_read_bios_from_rom,
.read_register = &soc15_read_register,
.reset = &soc15_asic_reset,
+ .reset_method = &soc15_asic_reset_method,
.set_vga_state = &soc15_vga_set_state,
.get_xclk = &soc15_get_xclk,
.set_uvd_clocks = &soc15_set_uvd_clocks,
@@ -973,7 +998,7 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
.get_pcie_usage = &vega20_get_pcie_usage,
.need_reset_on_init = &soc15_need_reset_on_init,
.get_pcie_replay_count = &soc15_get_pcie_replay_count,
- .reset_method = &soc15_asic_reset_method
+ .supports_baco = &soc15_supports_baco,
};
static int soc15_common_early_init(void *handle)
@@ -1157,7 +1182,10 @@ static int soc15_common_early_init(void *handle)
AMD_CG_SUPPORT_SDMA_MGCG |
AMD_CG_SUPPORT_SDMA_LS |
AMD_CG_SUPPORT_MC_MGCG |
- AMD_CG_SUPPORT_MC_LS;
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG;
adev->pg_flags = 0;
adev->external_rev_id = adev->rev_id + 0x32;
break;
@@ -1178,19 +1206,16 @@ static int soc15_common_early_init(void *handle)
AMD_CG_SUPPORT_HDP_LS |
AMD_CG_SUPPORT_ROM_MGCG |
AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
AMD_CG_SUPPORT_IH_CG |
AMD_CG_SUPPORT_ATHUB_LS |
AMD_CG_SUPPORT_ATHUB_MGCG |
AMD_CG_SUPPORT_DF_MGCG;
adev->pg_flags = AMD_PG_SUPPORT_SDMA |
AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_JPEG |
AMD_PG_SUPPORT_VCN_DPG;
adev->external_rev_id = adev->rev_id + 0x91;
-
- if (adev->pm.pp_feature & PP_GFXOFF_MASK)
- adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
- AMD_PG_SUPPORT_CP |
- AMD_PG_SUPPORT_RLC_SMU_HS;
break;
default:
/* FIXME: not supported yet */
@@ -1208,11 +1233,15 @@ static int soc15_common_early_init(void *handle)
static int soc15_common_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r = 0;
if (amdgpu_sriov_vf(adev))
xgpu_ai_mailbox_get_irq(adev);
- return 0;
+ if (adev->nbio.funcs->ras_late_init)
+ r = adev->nbio.funcs->ras_late_init(adev);
+
+ return r;
}
static int soc15_common_sw_init(void *handle)
@@ -1222,13 +1251,17 @@ static int soc15_common_sw_init(void *handle)
if (amdgpu_sriov_vf(adev))
xgpu_ai_mailbox_add_irq_id(adev);
- adev->df_funcs->sw_init(adev);
+ adev->df.funcs->sw_init(adev);
return 0;
}
static int soc15_common_sw_fini(void *handle)
{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_nbio_ras_fini(adev);
+ adev->df.funcs->sw_fini(adev);
return 0;
}
@@ -1241,12 +1274,12 @@ static void soc15_doorbell_range_init(struct amdgpu_device *adev)
if (!amdgpu_sriov_vf(adev)) {
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
- adev->nbio_funcs->sdma_doorbell_range(adev, i,
+ adev->nbio.funcs->sdma_doorbell_range(adev, i,
ring->use_doorbell, ring->doorbell_index,
adev->doorbell_index.sdma_doorbell_range);
}
- adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
+ adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
adev->irq.ih.doorbell_index);
}
}
@@ -1260,13 +1293,13 @@ static int soc15_common_hw_init(void *handle)
/* enable aspm */
soc15_program_aspm(adev);
/* setup nbio registers */
- adev->nbio_funcs->init_registers(adev);
+ adev->nbio.funcs->init_registers(adev);
/* remap HDP registers to a hole in mmio space,
* for the purpose of expose those registers
* to process space
*/
- if (adev->nbio_funcs->remap_hdp_registers)
- adev->nbio_funcs->remap_hdp_registers(adev);
+ if (adev->nbio.funcs->remap_hdp_registers)
+ adev->nbio.funcs->remap_hdp_registers(adev);
/* enable the doorbell aperture */
soc15_enable_doorbell_aperture(adev, true);
@@ -1289,6 +1322,14 @@ static int soc15_common_hw_fini(void *handle)
if (amdgpu_sriov_vf(adev))
xgpu_ai_mailbox_put_irq(adev);
+ if (adev->nbio.ras_if &&
+ amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) {
+ if (adev->nbio.funcs->init_ras_controller_interrupt)
+ amdgpu_irq_put(adev, &adev->nbio.ras_controller_irq, 0);
+ if (adev->nbio.funcs->init_ras_err_event_athub_interrupt)
+ amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0);
+ }
+
return 0;
}
@@ -1429,39 +1470,39 @@ static int soc15_common_set_clockgating_state(void *handle,
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
- adev->nbio_funcs->update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
- adev->nbio_funcs->update_medium_grain_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ adev->nbio.funcs->update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->nbio.funcs->update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
soc15_update_hdp_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
soc15_update_drm_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
soc15_update_drm_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
soc15_update_rom_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
- adev->df_funcs->update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
+ adev->df.funcs->update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
break;
case CHIP_RAVEN:
case CHIP_RENOIR:
- adev->nbio_funcs->update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
- adev->nbio_funcs->update_medium_grain_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ adev->nbio.funcs->update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->nbio.funcs->update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
soc15_update_hdp_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
soc15_update_drm_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
soc15_update_drm_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
soc15_update_rom_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
break;
case CHIP_ARCTURUS:
soc15_update_hdp_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
break;
default:
break;
@@ -1477,7 +1518,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags)
if (amdgpu_sriov_vf(adev))
*flags = 0;
- adev->nbio_funcs->get_clockgating_state(adev, flags);
+ adev->nbio.funcs->get_clockgating_state(adev, flags);
/* AMD_CG_SUPPORT_HDP_LS */
data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS));
@@ -1499,7 +1540,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags)
if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK))
*flags |= AMD_CG_SUPPORT_ROM_MGCG;
- adev->df_funcs->get_clockgating_state(adev, flags);
+ adev->df.funcs->get_clockgating_state(adev, flags);
}
static int soc15_common_set_powergating_state(void *handle,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index a3dde0c31f57..d0fb7a67c1a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -28,8 +28,8 @@
#include "nbio_v7_0.h"
#include "nbio_v7_4.h"
-#define SOC15_FLUSH_GPU_TLB_NUM_WREG 4
-#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 1
+#define SOC15_FLUSH_GPU_TLB_NUM_WREG 6
+#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 3
extern const struct amd_ip_funcs soc15_common_ip_funcs;
@@ -60,6 +60,18 @@ struct soc15_allowed_register_entry {
bool grbm_indexed;
};
+struct soc15_ras_field_entry {
+ const char *name;
+ uint32_t hwip;
+ uint32_t inst;
+ uint32_t seg;
+ uint32_t reg_offset;
+ uint32_t sec_count_mask;
+ uint32_t sec_count_shift;
+ uint32_t ded_count_mask;
+ uint32_t ded_count_shift;
+};
+
#define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg
#define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset)
@@ -67,6 +79,8 @@ struct soc15_allowed_register_entry {
#define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \
{ ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask }
+#define SOC15_REG_FIELD(reg, field) reg##__##field##_MASK, reg##__##field##__SHIFT
+
void soc15_grbm_select(struct amdgpu_device *adev,
u32 me, u32 pipe, u32 queue, u32 vmid);
int soc15_set_ip_blocks(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 839f186e1182..19e870c79896 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -52,6 +52,7 @@
uint32_t old_ = 0; \
uint32_t tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
uint32_t loop = adev->usec_timeout; \
+ ret = 0; \
while ((tmp_ & (mask)) != (expected_value)) { \
if (old_ != tmp_) { \
loop = adev->usec_timeout; \
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c
new file mode 100644
index 000000000000..0d6b50528d76
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "umc_v6_0.h"
+#include "amdgpu.h"
+
+static void umc_v6_0_init_registers(struct amdgpu_device *adev)
+{
+ unsigned i,j;
+
+ for (i = 0; i < 4; i++)
+ for (j = 0; j < 4; j++)
+ WREG32((i*0x100000 + 0x5010c + j*0x2000)/4, 0x1002);
+}
+
+const struct amdgpu_umc_funcs umc_v6_0_funcs = {
+ .init_registers = umc_v6_0_init_registers,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.h
new file mode 100644
index 000000000000..109f1a57a46e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __UMC_V6_0_H__
+#define __UMC_V6_0_H__
+
+#include "soc15_common.h"
+#include "amdgpu.h"
+
+extern const struct amdgpu_umc_funcs umc_v6_0_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
index 8502e736f721..793bf70e64b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
@@ -28,19 +28,24 @@
#include "rsmu/rsmu_0_0_2_sh_mask.h"
#include "umc/umc_6_1_1_offset.h"
#include "umc/umc_6_1_1_sh_mask.h"
+#include "umc/umc_6_1_2_offset.h"
-#define smnMCA_UMC0_MCUMC_ADDRT0 0x50f10
+#define UMC_6_INST_DIST 0x40000
/*
* (addr / 256) * 8192, the higher 26 bits in ErrorAddr
* is the index of 8KB block
*/
-#define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5)
+#define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5)
/* channel index is the index of 256B block */
#define ADDR_OF_256B_BLOCK(channel_index) ((channel_index) << 8)
/* offset in 256B block */
#define OFFSET_IN_256B_BLOCK(addr) ((addr) & 0xffULL)
+#define LOOP_UMC_INST(umc_inst) for ((umc_inst) = 0; (umc_inst) < adev->umc.umc_inst_num; (umc_inst)++)
+#define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++)
+#define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst))
+
const uint32_t
umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM] = {
{2, 18, 11, 27}, {4, 20, 13, 29},
@@ -49,30 +54,35 @@ const uint32_t
{9, 25, 0, 16}, {15, 31, 6, 22}
};
-static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev,
- uint32_t umc_instance)
+static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev)
+{
+ WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
+ RSMU_UMC_INDEX_MODE_EN, 1);
+}
+
+static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev)
+{
+ WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
+ RSMU_UMC_INDEX_MODE_EN, 0);
+}
+
+static uint32_t umc_v6_1_get_umc_index_mode_state(struct amdgpu_device *adev)
{
uint32_t rsmu_umc_index;
rsmu_umc_index = RREG32_SOC15(RSMU, 0,
mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU);
- rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index,
- RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
- RSMU_UMC_INDEX_MODE_EN, 1);
- rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index,
- RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
- RSMU_UMC_INDEX_INSTANCE, umc_instance);
- rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index,
+
+ return REG_GET_FIELD(rsmu_umc_index,
RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
- RSMU_UMC_INDEX_WREN, 1 << umc_instance);
- WREG32_SOC15(RSMU, 0, mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
- rsmu_umc_index);
+ RSMU_UMC_INDEX_MODE_EN);
}
-static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev)
+static inline uint32_t get_umc_6_reg_offset(struct amdgpu_device *adev,
+ uint32_t umc_inst,
+ uint32_t ch_inst)
{
- WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
- RSMU_UMC_INDEX_MODE_EN, 0);
+ return adev->umc.channel_offs*ch_inst + UMC_6_INST_DIST*umc_inst;
}
static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev,
@@ -84,39 +94,50 @@ static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev,
uint64_t mc_umc_status;
uint32_t mc_umc_status_addr;
- ecc_err_cnt_sel_addr =
- SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
- ecc_err_cnt_addr =
- SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
- mc_umc_status_addr =
- SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+ if (adev->asic_type == CHIP_ARCTURUS) {
+ /* UMC 6_1_2 registers */
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT);
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
+ } else {
+ /* UMC 6_1_1 registers */
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+ }
/* select the lower chip and check the error count */
- ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset);
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
EccErrCntCsSel, 0);
- WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel);
- ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
*error_count +=
(REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
UMC_V6_1_CE_CNT_INIT);
/* clear the lower chip err count */
- WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT);
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
/* select the higher chip and check the err counter */
ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
EccErrCntCsSel, 1);
- WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel);
- ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
*error_count +=
(REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
UMC_V6_1_CE_CNT_INIT);
/* clear the higher chip err count */
- WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT);
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
/* check for SRAM correctable error
MCUMC_STATUS is a 64 bit register */
- mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset);
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 &&
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
@@ -130,11 +151,18 @@ static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev
uint64_t mc_umc_status;
uint32_t mc_umc_status_addr;
- mc_umc_status_addr =
- SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+ if (adev->asic_type == CHIP_ARCTURUS) {
+ /* UMC 6_1_2 registers */
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
+ } else {
+ /* UMC 6_1_1 registers */
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+ }
/* check the MCUMC_STATUS */
- mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset);
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
@@ -144,112 +172,202 @@ static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev
*error_count += 1;
}
-static void umc_v6_1_query_error_count(struct amdgpu_device *adev,
- struct ras_err_data *err_data, uint32_t umc_reg_offset,
- uint32_t channel_index)
-{
- umc_v6_1_query_correctable_error_count(adev, umc_reg_offset,
- &(err_data->ce_count));
- umc_v6_1_querry_uncorrectable_error_count(adev, umc_reg_offset,
- &(err_data->ue_count));
-}
-
static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status)
{
- amdgpu_umc_for_each_channel(umc_v6_1_query_error_count);
+ struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status;
+
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+ uint32_t umc_reg_offset = 0;
+
+ uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
+
+ if (rsmu_umc_index_state)
+ umc_v6_1_disable_umc_index_mode(adev);
+
+ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+ umc_reg_offset = get_umc_6_reg_offset(adev,
+ umc_inst,
+ ch_inst);
+
+ umc_v6_1_query_correctable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ce_count));
+ umc_v6_1_querry_uncorrectable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ue_count));
+ }
+
+ if (rsmu_umc_index_state)
+ umc_v6_1_enable_umc_index_mode(adev);
}
static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
struct ras_err_data *err_data,
- uint32_t umc_reg_offset, uint32_t channel_index)
+ uint32_t umc_reg_offset,
+ uint32_t ch_inst,
+ uint32_t umc_inst)
{
uint32_t lsb, mc_umc_status_addr;
- uint64_t mc_umc_status, err_addr;
-
- mc_umc_status_addr =
- SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+ uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
+ struct eeprom_table_record *err_rec;
+ uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+
+ if (adev->asic_type == CHIP_ARCTURUS) {
+ /* UMC 6_1_2 registers */
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
+ mc_umc_addrt0 =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT);
+ } else {
+ /* UMC 6_1_1 registers */
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+ mc_umc_addrt0 =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0);
+ }
/* skip error address process if -ENOMEM */
if (!err_data->err_addr) {
/* clear umc status */
- WREG64_UMC(mc_umc_status_addr + umc_reg_offset, 0x0ULL);
+ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
return;
}
- mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset);
+ err_rec = &err_data->err_addr[err_data->err_addr_cnt];
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
/* calculate error address if ue/ce error is detected */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
- err_addr = RREG64_PCIE(smnMCA_UMC0_MCUMC_ADDRT0 + umc_reg_offset * 4);
+ err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
/* the lowest lsb bits should be ignored */
lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB);
err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
err_addr &= ~((0x1ULL << lsb) - 1);
/* translate umc channel address to soc pa, 3 parts are included */
- err_data->err_addr[err_data->err_addr_cnt] =
- ADDR_OF_8KB_BLOCK(err_addr) |
- ADDR_OF_256B_BLOCK(channel_index) |
- OFFSET_IN_256B_BLOCK(err_addr);
-
- err_data->err_addr_cnt++;
+ retired_page = ADDR_OF_8KB_BLOCK(err_addr) |
+ ADDR_OF_256B_BLOCK(channel_index) |
+ OFFSET_IN_256B_BLOCK(err_addr);
+
+ /* we only save ue error information currently, ce is skipped */
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
+ == 1) {
+ err_rec->address = err_addr;
+ /* page frame address is saved */
+ err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
+ err_rec->ts = (uint64_t)ktime_get_real_seconds();
+ err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
+ err_rec->cu = 0;
+ err_rec->mem_channel = channel_index;
+ err_rec->mcumc_id = umc_inst;
+
+ err_data->err_addr_cnt++;
+ }
}
/* clear umc status */
- WREG64_UMC(mc_umc_status_addr + umc_reg_offset, 0x0ULL);
+ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
}
static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev,
void *ras_error_status)
{
- amdgpu_umc_for_each_channel(umc_v6_1_query_error_address);
+ struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status;
+
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+ uint32_t umc_reg_offset = 0;
+
+ uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
+
+ if (rsmu_umc_index_state)
+ umc_v6_1_disable_umc_index_mode(adev);
+
+ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+ umc_reg_offset = get_umc_6_reg_offset(adev,
+ umc_inst,
+ ch_inst);
+
+ umc_v6_1_query_error_address(adev,
+ err_data,
+ umc_reg_offset,
+ ch_inst,
+ umc_inst);
+ }
+
+ if (rsmu_umc_index_state)
+ umc_v6_1_enable_umc_index_mode(adev);
}
-static void umc_v6_1_ras_init_per_channel(struct amdgpu_device *adev,
- struct ras_err_data *err_data,
- uint32_t umc_reg_offset, uint32_t channel_index)
+static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset)
{
uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
uint32_t ecc_err_cnt_addr;
- ecc_err_cnt_sel_addr =
- SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
- ecc_err_cnt_addr =
- SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
+ if (adev->asic_type == CHIP_ARCTURUS) {
+ /* UMC 6_1_2 registers */
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT);
+ } else {
+ /* UMC 6_1_1 registers */
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
+ }
/* select the lower chip and check the error count */
- ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset);
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
EccErrCntCsSel, 0);
/* set ce error interrupt type to APIC based interrupt */
ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
EccErrInt, 0x1);
- WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
/* set error count to initial value */
- WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT);
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
/* select the higher chip and check the err counter */
ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
EccErrCntCsSel, 1);
- WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel);
- WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
}
-static void umc_v6_1_ras_init(struct amdgpu_device *adev)
+static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev)
{
- void *ras_error_status = NULL;
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+ uint32_t umc_reg_offset = 0;
+
+ uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
+
+ if (rsmu_umc_index_state)
+ umc_v6_1_disable_umc_index_mode(adev);
+
+ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+ umc_reg_offset = get_umc_6_reg_offset(adev,
+ umc_inst,
+ ch_inst);
+
+ umc_v6_1_err_cnt_init_per_channel(adev, umc_reg_offset);
+ }
- amdgpu_umc_for_each_channel(umc_v6_1_ras_init_per_channel);
+ if (rsmu_umc_index_state)
+ umc_v6_1_enable_umc_index_mode(adev);
}
const struct amdgpu_umc_funcs umc_v6_1_funcs = {
- .ras_init = umc_v6_1_ras_init,
+ .err_cnt_init = umc_v6_1_err_cnt_init,
+ .ras_late_init = amdgpu_umc_ras_late_init,
.query_ras_error_count = umc_v6_1_query_ras_error_count,
.query_ras_error_address = umc_v6_1_query_ras_error_address,
- .enable_umc_index_mode = umc_v6_1_enable_umc_index_mode,
- .disable_umc_index_mode = umc_v6_1_disable_umc_index_mode,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h
index dab9cbd292c5..0ce1d323cfdd 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h
@@ -35,7 +35,8 @@
/* total channel instances in one umc block */
#define UMC_V6_1_TOTAL_CHANNEL_NUM (UMC_V6_1_CHANNEL_INSTANCE_NUM * UMC_V6_1_UMC_INSTANCE_NUM)
/* UMC regiser per channel offset */
-#define UMC_V6_1_PER_CHANNEL_OFFSET 0x800
+#define UMC_V6_1_PER_CHANNEL_OFFSET_VG20 0x800
+#define UMC_V6_1_PER_CHANNEL_OFFSET_ARCT 0x400
/* EccErrCnt max value */
#define UMC_V6_1_CE_CNT_MAX 0xffff
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index 01e62fb8e6e0..0fa8aae2d78e 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -763,7 +763,7 @@ static int uvd_v5_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+ bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
/* wait for STATUS to clear */
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 670784a78512..e0aadcaf6c8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -206,13 +206,14 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring)
* Open up a stream for HW test
*/
static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+ struct amdgpu_bo *bo,
struct dma_fence **fence)
{
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
- uint64_t dummy;
+ uint64_t addr;
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -220,15 +221,15 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle
return r;
ib = &job->ibs[0];
- dummy = ib->gpu_addr + 1024;
+ addr = amdgpu_bo_gpu_offset(bo);
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = 0x00010000;
- ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
- ib->ptr[ib->length_dw++] = dummy;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
ib->ptr[ib->length_dw++] = 0x00000014;
ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
@@ -268,13 +269,14 @@ err:
*/
static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
uint32_t handle,
+ struct amdgpu_bo *bo,
struct dma_fence **fence)
{
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
- uint64_t dummy;
+ uint64_t addr;
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -282,15 +284,15 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
return r;
ib = &job->ibs[0];
- dummy = ib->gpu_addr + 1024;
+ addr = amdgpu_bo_gpu_offset(bo);
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = 0x00010000;
- ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
- ib->ptr[ib->length_dw++] = dummy;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
ib->ptr[ib->length_dw++] = 0x00000014;
ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
@@ -327,13 +329,20 @@ err:
static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct dma_fence *fence = NULL;
+ struct amdgpu_bo *bo = NULL;
long r;
- r = uvd_v6_0_enc_get_create_msg(ring, 1, NULL);
+ r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &bo, NULL, NULL);
+ if (r)
+ return r;
+
+ r = uvd_v6_0_enc_get_create_msg(ring, 1, bo, NULL);
if (r)
goto error;
- r = uvd_v6_0_enc_get_destroy_msg(ring, 1, &fence);
+ r = uvd_v6_0_enc_get_destroy_msg(ring, 1, bo, &fence);
if (r)
goto error;
@@ -345,6 +354,8 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
error:
dma_fence_put(fence);
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
return r;
}
@@ -1410,7 +1421,7 @@ static int uvd_v6_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+ bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
/* wait for STATUS to clear */
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 01f658fa72c6..0995378d8263 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -214,13 +214,14 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)
* Open up a stream for HW test
*/
static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+ struct amdgpu_bo *bo,
struct dma_fence **fence)
{
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
- uint64_t dummy;
+ uint64_t addr;
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -228,15 +229,15 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle
return r;
ib = &job->ibs[0];
- dummy = ib->gpu_addr + 1024;
+ addr = amdgpu_bo_gpu_offset(bo);
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = 0x00000000;
- ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
- ib->ptr[ib->length_dw++] = dummy;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
ib->ptr[ib->length_dw++] = 0x00000014;
ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
@@ -275,13 +276,14 @@ err:
* Close up a stream for HW test or if userspace failed to do so
*/
static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
- struct dma_fence **fence)
+ struct amdgpu_bo *bo,
+ struct dma_fence **fence)
{
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
- uint64_t dummy;
+ uint64_t addr;
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -289,15 +291,15 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl
return r;
ib = &job->ibs[0];
- dummy = ib->gpu_addr + 1024;
+ addr = amdgpu_bo_gpu_offset(bo);
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001;
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = 0x00000000;
- ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
- ib->ptr[ib->length_dw++] = dummy;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
ib->ptr[ib->length_dw++] = 0x00000014;
ib->ptr[ib->length_dw++] = 0x00000002;
@@ -334,13 +336,20 @@ err:
static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct dma_fence *fence = NULL;
+ struct amdgpu_bo *bo = NULL;
long r;
- r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL);
+ r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &bo, NULL, NULL);
+ if (r)
+ return r;
+
+ r = uvd_v7_0_enc_get_create_msg(ring, 1, bo, NULL);
if (r)
goto error;
- r = uvd_v7_0_enc_get_destroy_msg(ring, 1, &fence);
+ r = uvd_v7_0_enc_get_destroy_msg(ring, 1, bo, &fence);
if (r)
goto error;
@@ -352,6 +361,8 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
error:
dma_fence_put(fence);
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 475ae68f38f5..217db187207c 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -739,7 +739,7 @@ static int vce_v3_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+ bool enable = (state == AMD_CG_STATE_GATE);
int i;
if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 683701cf7270..3fd102efb7af 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -887,7 +887,7 @@ static int vce_v4_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+ bool enable = (state == AMD_CG_STATE_GATE);
int i;
if ((adev->asic_type == CHIP_POLARIS10) ||
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 93b3500e522b..1a24fadd30e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -25,6 +25,7 @@
#include "amdgpu.h"
#include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
#include "soc15.h"
#include "soc15d.h"
#include "soc15_common.h"
@@ -36,21 +37,22 @@
#include "mmhub/mmhub_9_1_sh_mask.h"
#include "ivsrcid/vcn/irqsrcs_vcn_1_0.h"
+#include "jpeg_v1_0.h"
-#define mmUVD_RBC_XX_IB_REG_CHECK 0x05ab
-#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1
-#define mmUVD_REG_XX_MASK 0x05ac
-#define mmUVD_REG_XX_MASK_BASE_IDX 1
+#define mmUVD_RBC_XX_IB_REG_CHECK_1_0 0x05ab
+#define mmUVD_RBC_XX_IB_REG_CHECK_1_0_BASE_IDX 1
+#define mmUVD_REG_XX_MASK_1_0 0x05ac
+#define mmUVD_REG_XX_MASK_1_0_BASE_IDX 1
static int vcn_v1_0_stop(struct amdgpu_device *adev);
static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev);
-static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev);
static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
-static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr);
static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state);
static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
- struct dpg_pause_state *new_state);
+ int inst_idx, struct dpg_pause_state *new_state);
+
+static void vcn_v1_0_idle_work_handler(struct work_struct *work);
/**
* vcn_v1_0_early_init - set function pointers
@@ -68,9 +70,10 @@ static int vcn_v1_0_early_init(void *handle)
vcn_v1_0_set_dec_ring_funcs(adev);
vcn_v1_0_set_enc_ring_funcs(adev);
- vcn_v1_0_set_jpeg_ring_funcs(adev);
vcn_v1_0_set_irq_funcs(adev);
+ jpeg_v1_0_early_init(handle);
+
return 0;
}
@@ -101,15 +104,13 @@ static int vcn_v1_0_sw_init(void *handle)
return r;
}
- /* VCN JPEG TRAP */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->vcn.inst->irq);
- if (r)
- return r;
-
r = amdgpu_vcn_sw_init(adev);
if (r)
return r;
+ /* Override the work func */
+ adev->vcn.idle_work.work.func = vcn_v1_0_idle_work_handler;
+
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
const struct common_firmware_header *hdr;
hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
@@ -149,17 +150,11 @@ static int vcn_v1_0_sw_init(void *handle)
return r;
}
- ring = &adev->vcn.inst->ring_jpeg;
- sprintf(ring->name, "vcn_jpeg");
- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0);
- if (r)
- return r;
-
adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode;
- adev->vcn.internal.jpeg_pitch = adev->vcn.inst->external.jpeg_pitch =
- SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH);
- return 0;
+ r = jpeg_v1_0_sw_init(handle);
+
+ return r;
}
/**
@@ -178,6 +173,8 @@ static int vcn_v1_0_sw_fini(void *handle)
if (r)
return r;
+ jpeg_v1_0_sw_fini(handle);
+
r = amdgpu_vcn_sw_fini(adev);
return r;
@@ -202,13 +199,12 @@ static int vcn_v1_0_hw_init(void *handle)
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
ring = &adev->vcn.inst->ring_enc[i];
- ring->sched.ready = true;
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
}
- ring = &adev->vcn.inst->ring_jpeg;
+ ring = &adev->jpeg.inst->ring_dec;
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
@@ -839,9 +835,9 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
vcn_v1_0_mc_resume_spg_mode(adev);
- WREG32_SOC15(UVD, 0, mmUVD_REG_XX_MASK, 0x10);
- WREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK,
- RREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK) | 0x3);
+ WREG32_SOC15(UVD, 0, mmUVD_REG_XX_MASK_1_0, 0x10);
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK_1_0,
+ RREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK_1_0) | 0x3);
/* enable VCPU clock */
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, UVD_VCPU_CNTL__CLK_EN_MASK);
@@ -948,22 +944,7 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
- ring = &adev->vcn.inst->ring_jpeg;
- WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
- UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
- WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr));
- WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, 0);
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, 0);
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
-
- /* initialize wptr */
- ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
-
- /* copy patch commands to the jpeg ring */
- vcn_v1_0_jpeg_ring_set_patch_ring(ring,
- (ring->wptr + ring->max_dw * amdgpu_sched_hw_submission));
+ jpeg_v1_0_start(adev, 0);
return 0;
}
@@ -1107,13 +1088,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0,
~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);
- /* initialize JPEG wptr */
- ring = &adev->vcn.inst->ring_jpeg;
- ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
-
- /* copy patch commands to the jpeg ring */
- vcn_v1_0_jpeg_ring_set_patch_ring(ring,
- (ring->wptr + ring->max_dw * amdgpu_sched_hw_submission));
+ jpeg_v1_0_start(adev, 1);
return 0;
}
@@ -1224,7 +1199,7 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev)
}
static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
- struct dpg_pause_state *new_state)
+ int inst_idx, struct dpg_pause_state *new_state)
{
int ret_code;
uint32_t reg_data = 0;
@@ -1317,7 +1292,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code);
/* Restore */
- ring = &adev->vcn.inst->ring_jpeg;
+ ring = &adev->jpeg.inst->ring_dec;
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
@@ -1371,7 +1346,7 @@ static int vcn_v1_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+ bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
/* wait for STATUS to clear */
@@ -1717,389 +1692,6 @@ static void vcn_v1_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, val);
}
-
-/**
- * vcn_v1_0_jpeg_ring_get_rptr - get read pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Returns the current hardware read pointer
- */
-static uint64_t vcn_v1_0_jpeg_ring_get_rptr(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR);
-}
-
-/**
- * vcn_v1_0_jpeg_ring_get_wptr - get write pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Returns the current hardware write pointer
- */
-static uint64_t vcn_v1_0_jpeg_ring_get_wptr(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
-}
-
-/**
- * vcn_v1_0_jpeg_ring_set_wptr - set write pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Commits the write pointer to the hardware
- */
-static void vcn_v1_0_jpeg_ring_set_wptr(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
-}
-
-/**
- * vcn_v1_0_jpeg_ring_insert_start - insert a start command
- *
- * @ring: amdgpu_ring pointer
- *
- * Write a start command to the ring.
- */
-static void vcn_v1_0_jpeg_ring_insert_start(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x68e04);
-
- amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x80010000);
-}
-
-/**
- * vcn_v1_0_jpeg_ring_insert_end - insert a end command
- *
- * @ring: amdgpu_ring pointer
- *
- * Write a end command to the ring.
- */
-static void vcn_v1_0_jpeg_ring_insert_end(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x68e04);
-
- amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x00010000);
-}
-
-/**
- * vcn_v1_0_jpeg_ring_emit_fence - emit an fence & trap command
- *
- * @ring: amdgpu_ring pointer
- * @fence: fence to emit
- *
- * Write a fence and a trap command to the ring.
- */
-static void vcn_v1_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
- unsigned flags)
-{
- struct amdgpu_device *adev = ring->adev;
-
- WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_DATA0), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, seq);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_DATA1), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, seq);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, lower_32_bits(addr));
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, upper_32_bits(addr));
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_CMD), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x8);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_CMD), 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
- amdgpu_ring_write(ring, 0);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x01400200);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, seq);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, lower_32_bits(addr));
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, upper_32_bits(addr));
-
- amdgpu_ring_write(ring,
- PACKETJ(0, 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE2));
- amdgpu_ring_write(ring, 0xffffffff);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x3fbc);
-
- amdgpu_ring_write(ring,
- PACKETJ(0, 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x1);
-
- /* emit trap */
- amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
- amdgpu_ring_write(ring, 0);
-}
-
-/**
- * vcn_v1_0_jpeg_ring_emit_ib - execute indirect buffer
- *
- * @ring: amdgpu_ring pointer
- * @ib: indirect buffer to execute
- *
- * Write ring commands to execute the indirect buffer.
- */
-static void vcn_v1_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring,
- struct amdgpu_job *job,
- struct amdgpu_ib *ib,
- uint32_t flags)
-{
- struct amdgpu_device *adev = ring->adev;
- unsigned vmid = AMDGPU_JOB_GET_VMID(job);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, (vmid | (vmid << 4)));
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, (vmid | (vmid << 4)));
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_IB_SIZE), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, ib->length_dw);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr));
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr));
-
- amdgpu_ring_write(ring,
- PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2));
- amdgpu_ring_write(ring, 0);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x01400200);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x2);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_STATUS), 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3));
- amdgpu_ring_write(ring, 0x2);
-}
-
-static void vcn_v1_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring,
- uint32_t reg, uint32_t val,
- uint32_t mask)
-{
- struct amdgpu_device *adev = ring->adev;
- uint32_t reg_offset = (reg << 2);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x01400200);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, val);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
- if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
- ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring,
- PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3));
- } else {
- amdgpu_ring_write(ring, reg_offset);
- amdgpu_ring_write(ring,
- PACKETJ(0, 0, 0, PACKETJ_TYPE3));
- }
- amdgpu_ring_write(ring, mask);
-}
-
-static void vcn_v1_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring,
- unsigned vmid, uint64_t pd_addr)
-{
- struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
- uint32_t data0, data1, mask;
-
- pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
-
- /* wait for register write */
- data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
- data1 = lower_32_bits(pd_addr);
- mask = 0xffffffff;
- vcn_v1_0_jpeg_ring_emit_reg_wait(ring, data0, data1, mask);
-}
-
-static void vcn_v1_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring,
- uint32_t reg, uint32_t val)
-{
- struct amdgpu_device *adev = ring->adev;
- uint32_t reg_offset = (reg << 2);
-
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
- if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
- ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring,
- PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0));
- } else {
- amdgpu_ring_write(ring, reg_offset);
- amdgpu_ring_write(ring,
- PACKETJ(0, 0, 0, PACKETJ_TYPE0));
- }
- amdgpu_ring_write(ring, val);
-}
-
-static void vcn_v1_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count)
-{
- int i;
-
- WARN_ON(ring->wptr % 2 || count % 2);
-
- for (i = 0; i < count / 2; i++) {
- amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
- amdgpu_ring_write(ring, 0);
- }
-}
-
-static void vcn_v1_0_jpeg_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val)
-{
- struct amdgpu_device *adev = ring->adev;
- ring->ring[(*ptr)++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
- if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
- ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
- ring->ring[(*ptr)++] = 0;
- ring->ring[(*ptr)++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0);
- } else {
- ring->ring[(*ptr)++] = reg_offset;
- ring->ring[(*ptr)++] = PACKETJ(0, 0, 0, PACKETJ_TYPE0);
- }
- ring->ring[(*ptr)++] = val;
-}
-
-static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr)
-{
- struct amdgpu_device *adev = ring->adev;
-
- uint32_t reg, reg_offset, val, mask, i;
-
- // 1st: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW
- reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW);
- reg_offset = (reg << 2);
- val = lower_32_bits(ring->gpu_addr);
- vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
-
- // 2nd: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH
- reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH);
- reg_offset = (reg << 2);
- val = upper_32_bits(ring->gpu_addr);
- vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
-
- // 3rd to 5th: issue MEM_READ commands
- for (i = 0; i <= 2; i++) {
- ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE2);
- ring->ring[ptr++] = 0;
- }
-
- // 6th: program mmUVD_JRBC_RB_CNTL register to enable NO_FETCH and RPTR write ability
- reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
- reg_offset = (reg << 2);
- val = 0x13;
- vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
-
- // 7th: program mmUVD_JRBC_RB_REF_DATA
- reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA);
- reg_offset = (reg << 2);
- val = 0x1;
- vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
-
- // 8th: issue conditional register read mmUVD_JRBC_RB_CNTL
- reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
- reg_offset = (reg << 2);
- val = 0x1;
- mask = 0x1;
-
- ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0);
- ring->ring[ptr++] = 0x01400200;
- ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0);
- ring->ring[ptr++] = val;
- ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
- if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
- ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
- ring->ring[ptr++] = 0;
- ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3);
- } else {
- ring->ring[ptr++] = reg_offset;
- ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE3);
- }
- ring->ring[ptr++] = mask;
-
- //9th to 21st: insert no-op
- for (i = 0; i <= 12; i++) {
- ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
- ring->ring[ptr++] = 0;
- }
-
- //22nd: reset mmUVD_JRBC_RB_RPTR
- reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_RPTR);
- reg_offset = (reg << 2);
- val = 0;
- vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
-
- //23rd: program mmUVD_JRBC_RB_CNTL to disable no_fetch
- reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
- reg_offset = (reg << 2);
- val = 0x12;
- vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
-}
-
static int vcn_v1_0_set_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
@@ -2124,9 +1716,6 @@ static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev,
case 120:
amdgpu_fence_process(&adev->vcn.inst->ring_enc[1]);
break;
- case 126:
- amdgpu_fence_process(&adev->vcn.inst->ring_jpeg);
- break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
entry->src_id, entry->src_data[0]);
@@ -2175,6 +1764,86 @@ static int vcn_v1_0_set_powergating_state(void *handle,
return ret;
}
+static void vcn_v1_0_idle_work_handler(struct work_struct *work)
+{
+ struct amdgpu_device *adev =
+ container_of(work, struct amdgpu_device, vcn.idle_work.work);
+ unsigned int fences = 0, i;
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_enc[i]);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ struct dpg_pause_state new_state;
+
+ if (fences)
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+
+ if (amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec))
+ new_state.jpeg = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
+
+ adev->vcn.pause_dpg_mode(adev, 0, &new_state);
+ }
+
+ fences += amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec);
+ fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_dec);
+
+ if (fences == 0) {
+ amdgpu_gfx_off_ctrl(adev, true);
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, false);
+ else
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+ AMD_PG_STATE_GATE);
+ } else {
+ schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+ }
+}
+
+void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+ if (set_clocks) {
+ amdgpu_gfx_off_ctrl(adev, false);
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, true);
+ else
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+ AMD_PG_STATE_UNGATE);
+ }
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ struct dpg_pause_state new_state;
+ unsigned int fences = 0, i;
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_enc[i]);
+
+ if (fences)
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+
+ if (amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec))
+ new_state.jpeg = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
+ new_state.jpeg = VCN_DPG_STATE__PAUSE;
+
+ adev->vcn.pause_dpg_mode(adev, 0, &new_state);
+ }
+}
+
static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
.name = "vcn_v1_0",
.early_init = vcn_v1_0_early_init,
@@ -2221,7 +1890,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
.insert_start = vcn_v1_0_dec_ring_insert_start,
.insert_end = vcn_v1_0_dec_ring_insert_end,
.pad_ib = amdgpu_ring_generic_pad_ib,
- .begin_use = amdgpu_vcn_ring_begin_use,
+ .begin_use = vcn_v1_0_ring_begin_use,
.end_use = amdgpu_vcn_ring_end_use,
.emit_wreg = vcn_v1_0_dec_ring_emit_wreg,
.emit_reg_wait = vcn_v1_0_dec_ring_emit_reg_wait,
@@ -2253,48 +1922,13 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
.insert_nop = amdgpu_ring_insert_nop,
.insert_end = vcn_v1_0_enc_ring_insert_end,
.pad_ib = amdgpu_ring_generic_pad_ib,
- .begin_use = amdgpu_vcn_ring_begin_use,
+ .begin_use = vcn_v1_0_ring_begin_use,
.end_use = amdgpu_vcn_ring_end_use,
.emit_wreg = vcn_v1_0_enc_ring_emit_wreg,
.emit_reg_wait = vcn_v1_0_enc_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
-static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = {
- .type = AMDGPU_RING_TYPE_VCN_JPEG,
- .align_mask = 0xf,
- .nop = PACKET0(0x81ff, 0),
- .support_64bit_ptrs = false,
- .no_user_fence = true,
- .vmhub = AMDGPU_MMHUB_0,
- .extra_dw = 64,
- .get_rptr = vcn_v1_0_jpeg_ring_get_rptr,
- .get_wptr = vcn_v1_0_jpeg_ring_get_wptr,
- .set_wptr = vcn_v1_0_jpeg_ring_set_wptr,
- .emit_frame_size =
- 6 + 6 + /* hdp invalidate / flush */
- SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
- SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
- 8 + /* vcn_v1_0_jpeg_ring_emit_vm_flush */
- 26 + 26 + /* vcn_v1_0_jpeg_ring_emit_fence x2 vm fence */
- 6,
- .emit_ib_size = 22, /* vcn_v1_0_jpeg_ring_emit_ib */
- .emit_ib = vcn_v1_0_jpeg_ring_emit_ib,
- .emit_fence = vcn_v1_0_jpeg_ring_emit_fence,
- .emit_vm_flush = vcn_v1_0_jpeg_ring_emit_vm_flush,
- .test_ring = amdgpu_vcn_jpeg_ring_test_ring,
- .test_ib = amdgpu_vcn_jpeg_ring_test_ib,
- .insert_nop = vcn_v1_0_jpeg_ring_nop,
- .insert_start = vcn_v1_0_jpeg_ring_insert_start,
- .insert_end = vcn_v1_0_jpeg_ring_insert_end,
- .pad_ib = amdgpu_ring_generic_pad_ib,
- .begin_use = amdgpu_vcn_ring_begin_use,
- .end_use = amdgpu_vcn_ring_end_use,
- .emit_wreg = vcn_v1_0_jpeg_ring_emit_wreg,
- .emit_reg_wait = vcn_v1_0_jpeg_ring_emit_reg_wait,
- .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
-};
-
static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
adev->vcn.inst->ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs;
@@ -2311,12 +1945,6 @@ static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev)
DRM_INFO("VCN encode is enabled in VM mode\n");
}
-static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev)
-{
- adev->vcn.inst->ring_jpeg.funcs = &vcn_v1_0_jpeg_ring_vm_funcs;
- DRM_INFO("VCN jpeg decode is enabled in VM mode\n");
-}
-
static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = {
.set = vcn_v1_0_set_interrupt_state,
.process = vcn_v1_0_process_interrupt,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h
index 2a497a7a4840..f67d7391fc21 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h
@@ -24,6 +24,8 @@
#ifndef __VCN_V1_0_H__
#define __VCN_V1_0_H__
+void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring);
+
extern const struct amdgpu_ip_block_version vcn_v1_0_ip_block;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index 36ad0c0e8efb..4f7216788f11 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -47,39 +47,13 @@
#define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x5a7
#define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x1e2
-#define mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET 0x1bfff
-#define mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET 0x4029
-#define mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET 0x402a
-#define mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET 0x402b
-#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ea
-#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40eb
-#define mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET 0x40cf
-#define mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET 0x40d1
-#define mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40e8
-#define mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40e9
-#define mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET 0x4082
-#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ec
-#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40ed
-#define mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET 0x4085
-#define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084
-#define mmUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089
-#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
-
-#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000
-
-#define mmUVD_RBC_XX_IB_REG_CHECK 0x026b
-#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1
-#define mmUVD_REG_XX_MASK 0x026c
-#define mmUVD_REG_XX_MASK_BASE_IDX 1
-
static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev);
-static void vcn_v2_0_set_jpeg_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev);
static int vcn_v2_0_set_powergating_state(void *handle,
enum amd_powergating_state state);
static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
- struct dpg_pause_state *new_state);
+ int inst_idx, struct dpg_pause_state *new_state);
/**
* vcn_v2_0_early_init - set function pointers
@@ -97,7 +71,6 @@ static int vcn_v2_0_early_init(void *handle)
vcn_v2_0_set_dec_ring_funcs(adev);
vcn_v2_0_set_enc_ring_funcs(adev);
- vcn_v2_0_set_jpeg_ring_funcs(adev);
vcn_v2_0_set_irq_funcs(adev);
return 0;
@@ -132,12 +105,6 @@ static int vcn_v2_0_sw_init(void *handle)
return r;
}
- /* VCN JPEG TRAP */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
- VCN_2_0__SRCID__JPEG_DECODE, &adev->vcn.inst->irq);
- if (r)
- return r;
-
r = amdgpu_vcn_sw_init(adev);
if (r)
return r;
@@ -194,19 +161,8 @@ static int vcn_v2_0_sw_init(void *handle)
return r;
}
- ring = &adev->vcn.inst->ring_jpeg;
- ring->use_doorbell = true;
- ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
- sprintf(ring->name, "vcn_jpeg");
- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0);
- if (r)
- return r;
-
adev->vcn.pause_dpg_mode = vcn_v2_0_pause_dpg_mode;
- adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
- adev->vcn.inst->external.jpeg_pitch = SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH);
-
return 0;
}
@@ -244,32 +200,18 @@ static int vcn_v2_0_hw_init(void *handle)
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
int i, r;
- adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
ring->doorbell_index, 0);
- ring->sched.ready = true;
- r = amdgpu_ring_test_ring(ring);
- if (r) {
- ring->sched.ready = false;
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
goto done;
- }
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
ring = &adev->vcn.inst->ring_enc[i];
- ring->sched.ready = true;
- r = amdgpu_ring_test_ring(ring);
- if (r) {
- ring->sched.ready = false;
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
goto done;
- }
- }
-
- ring = &adev->vcn.inst->ring_jpeg;
- ring->sched.ready = true;
- r = amdgpu_ring_test_ring(ring);
- if (r) {
- ring->sched.ready = false;
- goto done;
}
done:
@@ -305,9 +247,6 @@ static int vcn_v2_0_hw_fini(void *handle)
ring->sched.ready = false;
}
- ring = &adev->vcn.inst->ring_jpeg;
- ring->sched.ready = false;
-
return 0;
}
@@ -402,7 +341,6 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
WREG32_SOC15(UVD, 0, mmUVD_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
- WREG32_SOC15(UVD, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
}
static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirect)
@@ -413,88 +351,88 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec
/* cache window 0: fw */
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
if (!indirect) {
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
} else {
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
}
offset = 0;
} else {
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst->gpu_addr), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst->gpu_addr), 0, indirect);
offset = size;
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_CACHE_OFFSET0),
AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
}
if (!indirect)
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
else
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
/* cache window 1: stack */
if (!indirect) {
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
} else {
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
}
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
/* cache window 2: context */
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
/* non-cache window */
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect);
/* VCN global tiling registers */
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
}
@@ -640,146 +578,23 @@ static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
UVD_CGC_CTRL__WCB_MODE_MASK |
UVD_CGC_CTRL__VCPU_MODE_MASK |
UVD_CGC_CTRL__SCPU_MODE_MASK);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
/* turn off clock gating */
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect);
/* turn on SUVD clock gating */
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
/* turn on sw mode in UVD_SUVD_CGC_CTRL */
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
}
/**
- * jpeg_v2_0_start - start JPEG block
- *
- * @adev: amdgpu_device pointer
- *
- * Setup and start the JPEG block
- */
-static int jpeg_v2_0_start(struct amdgpu_device *adev)
-{
- struct amdgpu_ring *ring = &adev->vcn.inst->ring_jpeg;
- uint32_t tmp;
- int r = 0;
-
- /* disable power gating */
- tmp = 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
- WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_PGFSM_CONFIG), tmp);
-
- SOC15_WAIT_ON_RREG(VCN, 0,
- mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS_UVDJ_PWR_ON,
- UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r);
-
- if (r) {
- DRM_ERROR("amdgpu: JPEG disable power gating failed\n");
- return r;
- }
-
- /* Removing the anti hang mechanism to indicate the UVDJ tile is ON */
- tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS)) & ~0x1;
- WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS), tmp);
-
- /* JPEG disable CGC */
- tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL);
- tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
- tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
- tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
- WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, tmp);
-
- tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE);
- tmp &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
- | JPEG_CGC_GATE__JPEG2_DEC_MASK
- | JPEG_CGC_GATE__JPEG_ENC_MASK
- | JPEG_CGC_GATE__JMCIF_MASK
- | JPEG_CGC_GATE__JRBBM_MASK);
- WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, tmp);
-
- /* enable JMI channel */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JMI_CNTL), 0,
- ~UVD_JMI_CNTL__SOFT_RESET_MASK);
-
- /* enable System Interrupt for JRBC */
- WREG32_P(SOC15_REG_OFFSET(VCN, 0, mmJPEG_SYS_INT_EN),
- JPEG_SYS_INT_EN__DJRBC_MASK,
- ~JPEG_SYS_INT_EN__DJRBC_MASK);
-
- WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
- WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
- lower_32_bits(ring->gpu_addr));
- WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
- upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, 0);
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, 0);
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L);
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
- ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
-
- return 0;
-}
-
-/**
- * jpeg_v2_0_stop - stop JPEG block
- *
- * @adev: amdgpu_device pointer
- *
- * stop the JPEG block
- */
-static int jpeg_v2_0_stop(struct amdgpu_device *adev)
-{
- uint32_t tmp;
- int r = 0;
-
- /* reset JMI */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JMI_CNTL),
- UVD_JMI_CNTL__SOFT_RESET_MASK,
- ~UVD_JMI_CNTL__SOFT_RESET_MASK);
-
- /* enable JPEG CGC */
- tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL);
- tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
- tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
- tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
- WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, tmp);
-
-
- tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE);
- tmp |= (JPEG_CGC_GATE__JPEG_DEC_MASK
- |JPEG_CGC_GATE__JPEG2_DEC_MASK
- |JPEG_CGC_GATE__JPEG_ENC_MASK
- |JPEG_CGC_GATE__JMCIF_MASK
- |JPEG_CGC_GATE__JRBBM_MASK);
- WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, tmp);
-
- /* enable power gating */
- tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS));
- tmp &= ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK;
- tmp |= 0x1; //UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_TILES_OFF;
- WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS), tmp);
-
- tmp = 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
- WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_PGFSM_CONFIG), tmp);
-
- SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS,
- (2 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT),
- UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r);
-
- if (r) {
- DRM_ERROR("amdgpu: JPEG enable power gating failed\n");
- return r;
- }
-
- return r;
-}
-
-/**
* vcn_v2_0_enable_clock_gating - enable VCN clock gating
*
* @adev: amdgpu_device pointer
@@ -939,7 +754,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, tmp);
if (indirect)
- adev->vcn.dpg_sram_curr_addr = (uint32_t*)adev->vcn.dpg_sram_cpu_addr;
+ adev->vcn.inst->dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst->dpg_sram_cpu_addr;
/* enable clock gating */
vcn_v2_0_clock_gating_dpg_mode(adev, 0, indirect);
@@ -948,11 +763,11 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
tmp |= UVD_VCPU_CNTL__MIF_WR_LOW_THRESHOLD_BP_MASK;
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
/* disable master interupt */
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_MASTINT_EN), 0, 0, indirect);
/* setup mmUVD_LMI_CTRL */
@@ -964,28 +779,28 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
0x00100000L);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_CTRL), tmp, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_MPC_CNTL),
0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_MPC_SET_MUXA0),
((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
(0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
(0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_MPC_SET_MUXB0),
((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
(0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
(0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_MPC_SET_MUX),
((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
@@ -993,29 +808,29 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
vcn_v2_0_mc_resume_dpg_mode(adev, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
/* release VCPU reset to boot */
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_SOFT_RESET), 0, 0, indirect);
/* enable LMI MC and UMC channels */
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_LMI_CTRL2),
0x1F << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT, 0, indirect);
/* enable master interrupt */
- WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ WREG32_SOC15_DPG_MODE_2_0(0, SOC15_DPG_MODE_OFFSET_2_0(
UVD, 0, mmUVD_MASTINT_EN),
UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
if (indirect)
- psp_update_vcn_sram(adev, 0, adev->vcn.dpg_sram_gpu_addr,
- (uint32_t)((uintptr_t)adev->vcn.dpg_sram_curr_addr -
- (uintptr_t)adev->vcn.dpg_sram_cpu_addr));
+ psp_update_vcn_sram(adev, 0, adev->vcn.inst->dpg_sram_gpu_addr,
+ (uint32_t)((uintptr_t)adev->vcn.inst->dpg_sram_curr_addr -
+ (uintptr_t)adev->vcn.inst->dpg_sram_cpu_addr));
/* force RBC into idle state */
rb_bufsz = order_base_2(ring->ring_size);
@@ -1061,12 +876,8 @@ static int vcn_v2_0_start(struct amdgpu_device *adev)
if (adev->pm.dpm_enabled)
amdgpu_dpm_enable_uvd(adev, true);
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- r = vcn_v2_0_start_dpg_mode(adev, adev->vcn.indirect_sram);
- if (r)
- return r;
- goto jpeg;
- }
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v2_0_start_dpg_mode(adev, adev->vcn.indirect_sram);
vcn_v2_0_disable_static_power_gating(adev);
@@ -1218,10 +1029,7 @@ static int vcn_v2_0_start(struct amdgpu_device *adev)
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
-jpeg:
- r = jpeg_v2_0_start(adev);
-
- return r;
+ return 0;
}
static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev)
@@ -1240,9 +1048,6 @@ static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev)
tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code);
- tmp = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_JRBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
-
tmp = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
@@ -1261,10 +1066,6 @@ static int vcn_v2_0_stop(struct amdgpu_device *adev)
uint32_t tmp;
int r;
- r = jpeg_v2_0_stop(adev);
- if (r)
- return r;
-
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
r = vcn_v2_0_stop_dpg_mode(adev);
if (r)
@@ -1329,7 +1130,7 @@ power_off:
}
static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
- struct dpg_pause_state *new_state)
+ int inst_idx, struct dpg_pause_state *new_state)
{
struct amdgpu_ring *ring;
uint32_t reg_data = 0;
@@ -1412,7 +1213,7 @@ static int vcn_v2_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+ bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
/* wait for STATUS to clear */
@@ -1790,272 +1591,6 @@ void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_
amdgpu_ring_write(ring, val);
}
-/**
- * vcn_v2_0_jpeg_ring_get_rptr - get read pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Returns the current hardware read pointer
- */
-static uint64_t vcn_v2_0_jpeg_ring_get_rptr(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR);
-}
-
-/**
- * vcn_v2_0_jpeg_ring_get_wptr - get write pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Returns the current hardware write pointer
- */
-static uint64_t vcn_v2_0_jpeg_ring_get_wptr(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- if (ring->use_doorbell)
- return adev->wb.wb[ring->wptr_offs];
- else
- return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
-}
-
-/**
- * vcn_v2_0_jpeg_ring_set_wptr - set write pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Commits the write pointer to the hardware
- */
-static void vcn_v2_0_jpeg_ring_set_wptr(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- if (ring->use_doorbell) {
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
- WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
- } else {
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
- }
-}
-
-/**
- * vcn_v2_0_jpeg_ring_insert_start - insert a start command
- *
- * @ring: amdgpu_ring pointer
- *
- * Write a start command to the ring.
- */
-void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring)
-{
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x68e04);
-
- amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x80010000);
-}
-
-/**
- * vcn_v2_0_jpeg_ring_insert_end - insert a end command
- *
- * @ring: amdgpu_ring pointer
- *
- * Write a end command to the ring.
- */
-void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring)
-{
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x68e04);
-
- amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x00010000);
-}
-
-/**
- * vcn_v2_0_jpeg_ring_emit_fence - emit an fence & trap command
- *
- * @ring: amdgpu_ring pointer
- * @fence: fence to emit
- *
- * Write a fence and a trap command to the ring.
- */
-void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
- unsigned flags)
-{
- WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, seq);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, seq);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, lower_32_bits(addr));
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, upper_32_bits(addr));
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x8);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
- 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
- amdgpu_ring_write(ring, 0);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x3fbc);
-
- amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x1);
-
- amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
- amdgpu_ring_write(ring, 0);
-}
-
-/**
- * vcn_v2_0_jpeg_ring_emit_ib - execute indirect buffer
- *
- * @ring: amdgpu_ring pointer
- * @ib: indirect buffer to execute
- *
- * Write ring commands to execute the indirect buffer.
- */
-void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring,
- struct amdgpu_job *job,
- struct amdgpu_ib *ib,
- uint32_t flags)
-{
- unsigned vmid = AMDGPU_JOB_GET_VMID(job);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, (vmid | (vmid << 4)));
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, (vmid | (vmid << 4)));
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, ib->length_dw);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr));
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr));
-
- amdgpu_ring_write(ring, PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2));
- amdgpu_ring_write(ring, 0);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x01400200);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x2);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_STATUS_INTERNAL_OFFSET,
- 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3));
- amdgpu_ring_write(ring, 0x2);
-}
-
-void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
- uint32_t val, uint32_t mask)
-{
- uint32_t reg_offset = (reg << 2);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x01400200);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, val);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring,
- PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3));
- } else {
- amdgpu_ring_write(ring, reg_offset);
- amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
- 0, 0, PACKETJ_TYPE3));
- }
- amdgpu_ring_write(ring, mask);
-}
-
-void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring,
- unsigned vmid, uint64_t pd_addr)
-{
- struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
- uint32_t data0, data1, mask;
-
- pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
-
- /* wait for register write */
- data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
- data1 = lower_32_bits(pd_addr);
- mask = 0xffffffff;
- vcn_v2_0_jpeg_ring_emit_reg_wait(ring, data0, data1, mask);
-}
-
-void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
-{
- uint32_t reg_offset = (reg << 2);
-
- amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring,
- PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0));
- } else {
- amdgpu_ring_write(ring, reg_offset);
- amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
- 0, 0, PACKETJ_TYPE0));
- }
- amdgpu_ring_write(ring, val);
-}
-
-void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count)
-{
- int i;
-
- WARN_ON(ring->wptr % 2 || count % 2);
-
- for (i = 0; i < count / 2; i++) {
- amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
- amdgpu_ring_write(ring, 0);
- }
-}
-
static int vcn_v2_0_set_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
@@ -2080,9 +1615,6 @@ static int vcn_v2_0_process_interrupt(struct amdgpu_device *adev,
case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY:
amdgpu_fence_process(&adev->vcn.inst->ring_enc[1]);
break;
- case VCN_2_0__SRCID__JPEG_DECODE:
- amdgpu_fence_process(&adev->vcn.inst->ring_jpeg);
- break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
entry->src_id, entry->src_data[0]);
@@ -2092,7 +1624,7 @@ static int vcn_v2_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring)
+int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t tmp = 0;
@@ -2228,36 +1760,6 @@ static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = {
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
-static const struct amdgpu_ring_funcs vcn_v2_0_jpeg_ring_vm_funcs = {
- .type = AMDGPU_RING_TYPE_VCN_JPEG,
- .align_mask = 0xf,
- .vmhub = AMDGPU_MMHUB_0,
- .get_rptr = vcn_v2_0_jpeg_ring_get_rptr,
- .get_wptr = vcn_v2_0_jpeg_ring_get_wptr,
- .set_wptr = vcn_v2_0_jpeg_ring_set_wptr,
- .emit_frame_size =
- SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
- SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
- 8 + /* vcn_v2_0_jpeg_ring_emit_vm_flush */
- 18 + 18 + /* vcn_v2_0_jpeg_ring_emit_fence x2 vm fence */
- 8 + 16,
- .emit_ib_size = 22, /* vcn_v2_0_jpeg_ring_emit_ib */
- .emit_ib = vcn_v2_0_jpeg_ring_emit_ib,
- .emit_fence = vcn_v2_0_jpeg_ring_emit_fence,
- .emit_vm_flush = vcn_v2_0_jpeg_ring_emit_vm_flush,
- .test_ring = amdgpu_vcn_jpeg_ring_test_ring,
- .test_ib = amdgpu_vcn_jpeg_ring_test_ib,
- .insert_nop = vcn_v2_0_jpeg_ring_nop,
- .insert_start = vcn_v2_0_jpeg_ring_insert_start,
- .insert_end = vcn_v2_0_jpeg_ring_insert_end,
- .pad_ib = amdgpu_ring_generic_pad_ib,
- .begin_use = amdgpu_vcn_ring_begin_use,
- .end_use = amdgpu_vcn_ring_end_use,
- .emit_wreg = vcn_v2_0_jpeg_ring_emit_wreg,
- .emit_reg_wait = vcn_v2_0_jpeg_ring_emit_reg_wait,
- .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
-};
-
static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
adev->vcn.inst->ring_dec.funcs = &vcn_v2_0_dec_ring_vm_funcs;
@@ -2274,12 +1776,6 @@ static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev)
DRM_INFO("VCN encode is enabled in VM mode\n");
}
-static void vcn_v2_0_set_jpeg_ring_funcs(struct amdgpu_device *adev)
-{
- adev->vcn.inst->ring_jpeg.funcs = &vcn_v2_0_jpeg_ring_vm_funcs;
- DRM_INFO("VCN jpeg decode is enabled in VM mode\n");
-}
-
static const struct amdgpu_irq_src_funcs vcn_v2_0_irq_funcs = {
.set = vcn_v2_0_set_interrupt_state,
.process = vcn_v2_0_process_interrupt,
@@ -2287,7 +1783,7 @@ static const struct amdgpu_irq_src_funcs vcn_v2_0_irq_funcs = {
static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev)
{
- adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 2;
+ adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 1;
adev->vcn.inst->irq.funcs = &vcn_v2_0_irq_funcs;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h
index 8467292f32e5..6c9de1882428 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h
@@ -37,6 +37,7 @@ extern void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr);
extern void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring,
uint32_t reg, uint32_t val);
+extern int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring);
extern void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring);
extern void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
@@ -49,19 +50,6 @@ extern void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned int vmid, uint64_t pd_addr);
extern void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
-extern void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring);
-extern void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring);
-extern void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
- unsigned flags);
-extern void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
- struct amdgpu_ib *ib, uint32_t flags);
-extern void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
- uint32_t val, uint32_t mask);
-extern void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring,
- unsigned vmid, uint64_t pd_addr);
-extern void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
-extern void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count);
-
extern const struct amdgpu_ip_block_version vcn_v2_0_ip_block;
#endif /* __VCN_V2_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index 395c2259f979..70fae7977f8f 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -25,9 +25,11 @@
#include "amdgpu.h"
#include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
#include "soc15.h"
#include "soc15d.h"
#include "vcn_v2_0.h"
+#include "mmsch_v1_0.h"
#include "vcn/vcn_2_5_offset.h"
#include "vcn/vcn_2_5_sh_mask.h"
@@ -46,16 +48,16 @@
#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x3b5
#define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x25c
-#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
-
-#define VCN25_MAX_HW_INSTANCES_ARCTURUS 2
+#define VCN25_MAX_HW_INSTANCES_ARCTURUS 2
static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev);
-static void vcn_v2_5_set_jpeg_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev);
static int vcn_v2_5_set_powergating_state(void *handle,
enum amd_powergating_state state);
+static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
+ int inst_idx, struct dpg_pause_state *new_state);
+static int vcn_v2_5_sriov_start(struct amdgpu_device *adev);
static int amdgpu_ih_clientid_vcns[] = {
SOC15_IH_CLIENTID_VCN,
@@ -90,11 +92,16 @@ static int vcn_v2_5_early_init(void *handle)
} else
adev->vcn.num_vcn_inst = 1;
- adev->vcn.num_enc_rings = 2;
+ if (amdgpu_sriov_vf(adev)) {
+ adev->vcn.num_vcn_inst = 2;
+ adev->vcn.harvest_config = 0;
+ adev->vcn.num_enc_rings = 1;
+ } else {
+ adev->vcn.num_enc_rings = 2;
+ }
vcn_v2_5_set_dec_ring_funcs(adev);
vcn_v2_5_set_enc_ring_funcs(adev);
- vcn_v2_5_set_jpeg_ring_funcs(adev);
vcn_v2_5_set_irq_funcs(adev);
return 0;
@@ -129,12 +136,6 @@ static int vcn_v2_5_sw_init(void *handle)
if (r)
return r;
}
-
- /* VCN JPEG TRAP */
- r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j],
- VCN_2_0__SRCID__JPEG_DECODE, &adev->vcn.inst[j].irq);
- if (r)
- return r;
}
r = amdgpu_vcn_sw_init(adev);
@@ -183,12 +184,11 @@ static int vcn_v2_5_sw_init(void *handle)
adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(UVD, j, mmUVD_NO_OP);
- adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
- adev->vcn.inst[j].external.jpeg_pitch = SOC15_REG_OFFSET(UVD, j, mmUVD_JPEG_PITCH);
-
ring = &adev->vcn.inst[j].ring_dec;
ring->use_doorbell = true;
- ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8*j;
+
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ (amdgpu_sriov_vf(adev) ? 2*j : 8*j);
sprintf(ring->name, "vcn_dec_%d", j);
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0);
if (r)
@@ -197,22 +197,26 @@ static int vcn_v2_5_sw_init(void *handle)
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
ring = &adev->vcn.inst[j].ring_enc[i];
ring->use_doorbell = true;
- ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i + 8*j;
+
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ (amdgpu_sriov_vf(adev) ? (1 + i + 2*j) : (2 + i + 8*j));
+
sprintf(ring->name, "vcn_enc_%d.%d", j, i);
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0);
if (r)
return r;
}
+ }
- ring = &adev->vcn.inst[j].ring_jpeg;
- ring->use_doorbell = true;
- ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8*j;
- sprintf(ring->name, "vcn_jpeg_%d", j);
- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0);
+ if (amdgpu_sriov_vf(adev)) {
+ r = amdgpu_virt_alloc_mm_table(adev);
if (r)
return r;
}
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.pause_dpg_mode = vcn_v2_5_pause_dpg_mode;
+
return 0;
}
@@ -228,6 +232,9 @@ static int vcn_v2_5_sw_fini(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_free_mm_table(adev);
+
r = amdgpu_vcn_suspend(adev);
if (r)
return r;
@@ -248,43 +255,44 @@ static int vcn_v2_5_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring;
- int i, j, r;
+ int i, j, r = 0;
+
+ if (amdgpu_sriov_vf(adev))
+ r = vcn_v2_5_sriov_start(adev);
for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
if (adev->vcn.harvest_config & (1 << j))
continue;
- ring = &adev->vcn.inst[j].ring_dec;
- adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell,
- ring->doorbell_index, j);
+ if (amdgpu_sriov_vf(adev)) {
+ adev->vcn.inst[j].ring_enc[0].sched.ready = true;
+ adev->vcn.inst[j].ring_enc[1].sched.ready = false;
+ adev->vcn.inst[j].ring_enc[2].sched.ready = false;
+ adev->vcn.inst[j].ring_dec.sched.ready = true;
+ } else {
- r = amdgpu_ring_test_ring(ring);
- if (r) {
- ring->sched.ready = false;
- goto done;
- }
+ ring = &adev->vcn.inst[j].ring_dec;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
- ring = &adev->vcn.inst[j].ring_enc[i];
- ring->sched.ready = false;
- continue;
- r = amdgpu_ring_test_ring(ring);
- if (r) {
- ring->sched.ready = false;
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ ring->doorbell_index, j);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
goto done;
- }
- }
- ring = &adev->vcn.inst[j].ring_jpeg;
- r = amdgpu_ring_test_ring(ring);
- if (r) {
- ring->sched.ready = false;
- goto done;
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ ring = &adev->vcn.inst[j].ring_enc[i];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ goto done;
+ }
}
}
+
done:
if (!r)
- DRM_INFO("VCN decode and encode initialized successfully.\n");
+ DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
return r;
}
@@ -300,25 +308,24 @@ static int vcn_v2_5_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring;
- int i;
+ int i, j;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
continue;
ring = &adev->vcn.inst[i].ring_dec;
- if (RREG32_SOC15(VCN, i, mmUVD_STATUS))
+ if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
+ (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, i, mmUVD_STATUS)))
vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
ring->sched.ready = false;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
- ring = &adev->vcn.inst[i].ring_enc[i];
+ for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ ring = &adev->vcn.inst[i].ring_enc[j];
ring->sched.ready = false;
}
-
- ring = &adev->vcn.inst[i].ring_jpeg;
- ring->sched.ready = false;
}
return 0;
@@ -385,9 +392,9 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev)
/* cache window 0: fw */
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
- (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo));
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo));
WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
- (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi));
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi));
WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0);
offset = 0;
} else {
@@ -419,11 +426,103 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev)
}
}
+static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ uint32_t offset;
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ }
+ offset = 0;
+ } else {
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ offset = size;
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
+ }
+
+ if (!indirect)
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
+ else
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
+
+ /* cache window 1: stack */
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ }
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
+
+ /* cache window 2: context */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
+
+ /* non-cache window */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+}
+
/**
* vcn_v2_5_disable_clock_gating - disable VCN clock gating
*
* @adev: amdgpu_device pointer
- * @sw: enable SW clock gating
*
* Disable clock gating for VCN block
*/
@@ -538,11 +637,58 @@ static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev)
}
}
+static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev,
+ uint8_t sram_sel, int inst_idx, uint8_t indirect)
+{
+ uint32_t reg_data = 0;
+
+ /* enable sw clock gating control */
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
+ UVD_CGC_CTRL__SYS_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MODE_MASK |
+ UVD_CGC_CTRL__MPEG2_MODE_MASK |
+ UVD_CGC_CTRL__REGS_MODE_MASK |
+ UVD_CGC_CTRL__RBC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_MC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
+ UVD_CGC_CTRL__IDCT_MODE_MASK |
+ UVD_CGC_CTRL__MPRD_MODE_MASK |
+ UVD_CGC_CTRL__MPC_MODE_MASK |
+ UVD_CGC_CTRL__LBSI_MODE_MASK |
+ UVD_CGC_CTRL__LRBBM_MODE_MASK |
+ UVD_CGC_CTRL__WCB_MODE_MASK |
+ UVD_CGC_CTRL__VCPU_MODE_MASK |
+ UVD_CGC_CTRL__MMSCH_MODE_MASK);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
+
+ /* turn off clock gating */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect);
+
+ /* turn on SUVD clock gating */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
+
+ /* turn on sw mode in UVD_SUVD_CGC_CTRL */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
+}
+
/**
* vcn_v2_5_enable_clock_gating - enable VCN clock gating
*
* @adev: amdgpu_device pointer
- * @sw: enable SW clock gating
*
* Enable clock gating for VCN block
*/
@@ -601,111 +747,134 @@ static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev)
}
}
-/**
- * jpeg_v2_5_start - start JPEG block
- *
- * @adev: amdgpu_device pointer
- *
- * Setup and start the JPEG block
- */
-static int jpeg_v2_5_start(struct amdgpu_device *adev)
+static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
{
struct amdgpu_ring *ring;
- uint32_t tmp;
- int i;
-
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- ring = &adev->vcn.inst[i].ring_jpeg;
- /* disable anti hang mechanism */
- WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JPEG_POWER_STATUS), 0,
- ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
-
- /* JPEG disable CGC */
- tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL);
- tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
- tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
- tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
- WREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL, tmp);
-
- tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_GATE);
- tmp &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
- | JPEG_CGC_GATE__JPEG2_DEC_MASK
- | JPEG_CGC_GATE__JMCIF_MASK
- | JPEG_CGC_GATE__JRBBM_MASK);
- WREG32_SOC15(VCN, i, mmJPEG_CGC_GATE, tmp);
-
- tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL);
- tmp &= ~(JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK
- | JPEG_CGC_CTRL__JPEG2_DEC_MODE_MASK
- | JPEG_CGC_CTRL__JMCIF_MODE_MASK
- | JPEG_CGC_CTRL__JRBBM_MODE_MASK);
- WREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL, tmp);
-
- /* MJPEG global tiling registers */
- WREG32_SOC15(UVD, i, mmJPEG_DEC_GFX8_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
- WREG32_SOC15(UVD, i, mmJPEG_DEC_GFX10_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
-
- /* enable JMI channel */
- WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JMI_CNTL), 0,
- ~UVD_JMI_CNTL__SOFT_RESET_MASK);
-
- /* enable System Interrupt for JRBC */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmJPEG_SYS_INT_EN),
- JPEG_SYS_INT_EN__DJRBC_MASK,
- ~JPEG_SYS_INT_EN__DJRBC_MASK);
+ uint32_t rb_bufsz, tmp;
- WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_VMID, 0);
- WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
- WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
- lower_32_bits(ring->gpu_addr));
- WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
- upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_RPTR, 0);
- WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_WPTR, 0);
- WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_CNTL, 0x00000002L);
- WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
- ring->wptr = RREG32_SOC15(UVD, i, mmUVD_JRBC_RB_WPTR);
- }
-
- return 0;
-}
-
-/**
- * jpeg_v2_5_stop - stop JPEG block
- *
- * @adev: amdgpu_device pointer
- *
- * stop the JPEG block
- */
-static int jpeg_v2_5_stop(struct amdgpu_device *adev)
-{
- uint32_t tmp;
- int i;
-
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- /* reset JMI */
- WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JMI_CNTL),
- UVD_JMI_CNTL__SOFT_RESET_MASK,
- ~UVD_JMI_CNTL__SOFT_RESET_MASK);
-
- tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_GATE);
- tmp |= (JPEG_CGC_GATE__JPEG_DEC_MASK
- |JPEG_CGC_GATE__JPEG2_DEC_MASK
- |JPEG_CGC_GATE__JMCIF_MASK
- |JPEG_CGC_GATE__JRBBM_MASK);
- WREG32_SOC15(VCN, i, mmJPEG_CGC_GATE, tmp);
-
- /* enable anti hang mechanism */
- WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JPEG_POWER_STATUS),
- UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
- ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
- }
+ /* disable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), 1,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ /* enable dynamic power gating mode */
+ tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_POWER_STATUS);
+ tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+ tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
+ WREG32_SOC15(UVD, inst_idx, mmUVD_POWER_STATUS, tmp);
+
+ if (indirect)
+ adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
+
+ /* enable clock gating */
+ vcn_v2_5_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
+
+ /* enable VCPU clock */
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ tmp |= UVD_VCPU_CNTL__BLK_RST_MASK;
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* disable master interupt */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_MASTINT_EN), 0, 0, indirect);
+
+ /* setup mmUVD_LMI_CTRL */
+ tmp = (0x8 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ 0x00100000L);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_CTRL), tmp, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_MPC_CNTL),
+ 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_MPC_SET_MUXA0),
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_MPC_SET_MUXB0),
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_MPC_SET_MUX),
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
+
+ vcn_v2_5_mc_resume_dpg_mode(adev, inst_idx, indirect);
+
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_CTRL2), 0, 0, indirect);
+
+ /* unblock VCPU register access */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_RB_ARB_CTRL), 0, 0, indirect);
+
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* enable master interrupt */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
+
+ if (indirect)
+ psp_update_vcn_sram(adev, inst_idx, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr,
+ (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr -
+ (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr));
+
+ ring = &adev->vcn.inst[inst_idx].ring_dec;
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_CNTL, tmp);
+
+ /* set the write pointer delay */
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0);
+
+ /* set the wb address */
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR_ADDR,
+ (upper_32_bits(ring->gpu_addr) >> 2));
+
+ /* programm the RB_BASE for ring buffer */
+ WREG32_SOC15(UVD, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR, 0);
+
+ WREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2, 0);
+
+ ring->wptr = RREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR);
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR,
+ lower_32_bits(ring->wptr));
return 0;
}
@@ -716,9 +885,17 @@ static int vcn_v2_5_start(struct amdgpu_device *adev)
uint32_t rb_bufsz, tmp;
int i, j, k, r;
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, true);
+
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
continue;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ r = vcn_v2_5_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
+ continue;
+ }
+
/* disable register anti-hang mechanism */
WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), 0,
~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
@@ -728,6 +905,9 @@ static int vcn_v2_5_start(struct amdgpu_device *adev)
WREG32_SOC15(UVD, i, mmUVD_STATUS, tmp);
}
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return 0;
+
/*SW clock gating */
vcn_v2_5_disable_clock_gating(adev);
@@ -880,23 +1060,250 @@ static int vcn_v2_5_start(struct amdgpu_device *adev)
WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
}
- r = jpeg_v2_5_start(adev);
- return r;
+ return 0;
}
-static int vcn_v2_5_stop(struct amdgpu_device *adev)
+static int vcn_v2_5_mmsch_start(struct amdgpu_device *adev,
+ struct amdgpu_mm_table *table)
{
+ uint32_t data = 0, loop = 0, size = 0;
+ uint64_t addr = table->gpu_addr;
+ struct mmsch_v1_1_init_header *header = NULL;;
+
+ header = (struct mmsch_v1_1_init_header *)table->cpu_addr;
+ size = header->total_size;
+
+ /*
+ * 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of
+ * memory descriptor location
+ */
+ WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(addr));
+ WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(addr));
+
+ /* 2, update vmid of descriptor */
+ data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_VMID);
+ data &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ /* use domain0 for MM scheduler */
+ data |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(UVD, 0, mmMMSCH_VF_VMID, data);
+
+ /* 3, notify mmsch about the size of this descriptor */
+ WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_SIZE, size);
+
+ /* 4, set resp to zero */
+ WREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP, 0);
+
+ /*
+ * 5, kick off the initialization and wait until
+ * VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero
+ */
+ WREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_HOST, 0x10000001);
+
+ data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP);
+ loop = 10;
+ while ((data & 0x10000002) != 0x10000002) {
+ udelay(100);
+ data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP);
+ loop--;
+ if (!loop)
+ break;
+ }
+
+ if (!loop) {
+ dev_err(adev->dev,
+ "failed to init MMSCH, mmMMSCH_VF_MAILBOX_RESP = %x\n",
+ data);
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+static int vcn_v2_5_sriov_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ uint32_t offset, size, tmp, i, rb_bufsz;
+ uint32_t table_size = 0;
+ struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
+ struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
+ struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
+ struct mmsch_v1_0_cmd_end end = { { 0 } };
+ uint32_t *init_table = adev->virt.mm_table.cpu_addr;
+ struct mmsch_v1_1_init_header *header = (struct mmsch_v1_1_init_header *)init_table;
+
+ direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
+ direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
+ direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
+ end.cmd_header.command_type = MMSCH_COMMAND__END;
+
+ header->version = MMSCH_VERSION;
+ header->total_size = sizeof(struct mmsch_v1_1_init_header) >> 2;
+ init_table += header->total_size;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ header->eng[i].table_offset = header->total_size;
+ header->eng[i].init_status = 0;
+ header->eng[i].table_size = 0;
+
+ table_size = 0;
+
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
+ ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
+
+ size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ /* mc resume*/
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
+ offset = 0;
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0), 0);
+ } else {
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[i].gpu_addr));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[i].gpu_addr));
+ offset = size;
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0),
+ size);
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[i].gpu_addr + offset));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[i].gpu_addr + offset));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET1),
+ 0);
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE1),
+ AMDGPU_VCN_STACK_SIZE);
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[i].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[i].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET2),
+ 0);
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2),
+ AMDGPU_VCN_CONTEXT_SIZE);
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->wptr = 0;
+
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_LO),
+ lower_32_bits(ring->gpu_addr));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_HI),
+ upper_32_bits(ring->gpu_addr));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_RB_SIZE),
+ ring->ring_size / 4);
+
+ ring = &adev->vcn.inst[i].ring_dec;
+ ring->wptr = 0;
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_RBC_RB_64BIT_BAR_LOW),
+ lower_32_bits(ring->gpu_addr));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH),
+ upper_32_bits(ring->gpu_addr));
+
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_RBC_RB_CNTL), tmp);
+
+ /* add end packet */
+ memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
+ table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
+ init_table += sizeof(struct mmsch_v1_0_cmd_end) / 4;
+
+ /* refine header */
+ header->eng[i].table_size = table_size;
+ header->total_size += table_size;
+ }
+
+ return vcn_v2_5_mmsch_start(adev, &adev->virt.mm_table);
+}
+
+static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+{
+ int ret_code = 0;
uint32_t tmp;
- int i, r;
- r = jpeg_v2_5_stop(adev);
- if (r)
- return r;
+ /* Wait for power status to be 1 */
+ SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+ /* wait for read ptr to be equal to write ptr */
+ tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR);
+ SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
+
+ tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR2);
+ SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code);
+
+ tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
+ SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
+
+ SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+ /* disable dynamic power gating mode */
+ WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+ return 0;
+}
+
+static int vcn_v2_5_stop(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+ int i, r = 0;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
continue;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ r = vcn_v2_5_stop_dpg_mode(adev, i);
+ continue;
+ }
+
/* wait for vcn idle */
SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r);
if (r)
@@ -946,6 +1353,70 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev)
~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
}
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, false);
+
+ return 0;
+}
+
+static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
+ int inst_idx, struct dpg_pause_state *new_state)
+{
+ struct amdgpu_ring *ring;
+ uint32_t reg_data = 0;
+ int ret_code;
+
+ /* pause/unpause if state is changed */
+ if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
+ DRM_DEBUG("dpg pause state changed %d -> %d",
+ adev->vcn.pause_state.fw_based, new_state->fw_based);
+ reg_data = RREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+ ret_code = 0;
+ SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 0x1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+ if (!ret_code) {
+ /* pause DPG */
+ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data);
+
+ /* wait for ACK */
+ SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
+
+ /* Restore */
+ ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4);
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+
+ ring = &adev->vcn.inst[inst_idx].ring_enc[1];
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR,
+ RREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF);
+
+ SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS,
+ 0x0, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ }
+ } else {
+ /* unpause dpg, no need to wait */
+ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data);
+ }
+ adev->vcn.pause_state.fw_based = new_state->fw_based;
+ }
+
return 0;
}
@@ -991,6 +1462,10 @@ static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ WREG32_SOC15(UVD, ring->me, mmUVD_SCRATCH2,
+ lower_32_bits(ring->wptr) | 0x80000000);
+
if (ring->use_doorbell) {
adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
@@ -1016,7 +1491,7 @@ static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = {
.emit_ib = vcn_v2_0_dec_ring_emit_ib,
.emit_fence = vcn_v2_0_dec_ring_emit_fence,
.emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush,
- .test_ring = amdgpu_vcn_dec_ring_test_ring,
+ .test_ring = vcn_v2_0_dec_ring_test_ring,
.test_ib = amdgpu_vcn_dec_ring_test_ib,
.insert_nop = vcn_v2_0_dec_ring_insert_nop,
.insert_start = vcn_v2_0_dec_ring_insert_start,
@@ -1128,86 +1603,6 @@ static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = {
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
-/**
- * vcn_v2_5_jpeg_ring_get_rptr - get read pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Returns the current hardware read pointer
- */
-static uint64_t vcn_v2_5_jpeg_ring_get_rptr(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- return RREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_RPTR);
-}
-
-/**
- * vcn_v2_5_jpeg_ring_get_wptr - get write pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Returns the current hardware write pointer
- */
-static uint64_t vcn_v2_5_jpeg_ring_get_wptr(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- if (ring->use_doorbell)
- return adev->wb.wb[ring->wptr_offs];
- else
- return RREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_WPTR);
-}
-
-/**
- * vcn_v2_5_jpeg_ring_set_wptr - set write pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Commits the write pointer to the hardware
- */
-static void vcn_v2_5_jpeg_ring_set_wptr(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- if (ring->use_doorbell) {
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
- WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
- } else {
- WREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
- }
-}
-
-static const struct amdgpu_ring_funcs vcn_v2_5_jpeg_ring_vm_funcs = {
- .type = AMDGPU_RING_TYPE_VCN_JPEG,
- .align_mask = 0xf,
- .vmhub = AMDGPU_MMHUB_1,
- .get_rptr = vcn_v2_5_jpeg_ring_get_rptr,
- .get_wptr = vcn_v2_5_jpeg_ring_get_wptr,
- .set_wptr = vcn_v2_5_jpeg_ring_set_wptr,
- .emit_frame_size =
- SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
- SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
- 8 + /* vcn_v2_0_jpeg_ring_emit_vm_flush */
- 18 + 18 + /* vcn_v2_0_jpeg_ring_emit_fence x2 vm fence */
- 8 + 16,
- .emit_ib_size = 22, /* vcn_v2_0_jpeg_ring_emit_ib */
- .emit_ib = vcn_v2_0_jpeg_ring_emit_ib,
- .emit_fence = vcn_v2_0_jpeg_ring_emit_fence,
- .emit_vm_flush = vcn_v2_0_jpeg_ring_emit_vm_flush,
- .test_ring = amdgpu_vcn_jpeg_ring_test_ring,
- .test_ib = amdgpu_vcn_jpeg_ring_test_ib,
- .insert_nop = vcn_v2_0_jpeg_ring_nop,
- .insert_start = vcn_v2_0_jpeg_ring_insert_start,
- .insert_end = vcn_v2_0_jpeg_ring_insert_end,
- .pad_ib = amdgpu_ring_generic_pad_ib,
- .begin_use = amdgpu_vcn_ring_begin_use,
- .end_use = amdgpu_vcn_ring_end_use,
- .emit_wreg = vcn_v2_0_jpeg_ring_emit_wreg,
- .emit_reg_wait = vcn_v2_0_jpeg_ring_emit_reg_wait,
- .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
-};
-
static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev)
{
int i;
@@ -1236,19 +1631,6 @@ static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev)
}
}
-static void vcn_v2_5_set_jpeg_ring_funcs(struct amdgpu_device *adev)
-{
- int i;
-
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- adev->vcn.inst[i].ring_jpeg.funcs = &vcn_v2_5_jpeg_ring_vm_funcs;
- adev->vcn.inst[i].ring_jpeg.me = i;
- DRM_INFO("VCN(%d) jpeg decode is enabled in VM mode\n", i);
- }
-}
-
static bool vcn_v2_5_is_idle(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1284,7 +1666,10 @@ static int vcn_v2_5_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+ bool enable = (state == AMD_CG_STATE_GATE);
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
if (enable) {
if (vcn_v2_5_is_idle(handle))
@@ -1303,6 +1688,9 @@ static int vcn_v2_5_set_powergating_state(void *handle,
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int ret;
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
if(state == adev->vcn.cur_state)
return 0;
@@ -1355,9 +1743,6 @@ static int vcn_v2_5_process_interrupt(struct amdgpu_device *adev,
case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY:
amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]);
break;
- case VCN_2_0__SRCID__JPEG_DECODE:
- amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_jpeg);
- break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
entry->src_id, entry->src_data[0]);
@@ -1379,7 +1764,7 @@ static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev)
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
continue;
- adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 2;
+ adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
adev->vcn.inst[i].irq.funcs = &vcn_v2_5_irq_funcs;
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index 9eae3536ddad..407c6093c2ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -226,7 +226,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
/* disable irqs */
vega10_ih_disable_interrupts(adev);
- adev->nbio_funcs->ih_control(adev);
+ adev->nbio.funcs->ih_control(adev);
ih = &adev->irq.ih;
/* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
@@ -234,16 +234,9 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI, (ih->gpu_addr >> 40) & 0xff);
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL);
- ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN);
ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl);
- if (adev->irq.ih.use_bus_addr) {
- ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1);
- } else {
- ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, MC_SPACE_FBPA_ENABLE, 1);
- }
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM,
!!adev->irq.msi_enabled);
-
if (amdgpu_sriov_vf(adev)) {
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
@@ -253,10 +246,19 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
}
- if ((adev->asic_type == CHIP_ARCTURUS
- && adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)
- || adev->asic_type == CHIP_RENOIR)
+ if ((adev->asic_type == CHIP_ARCTURUS &&
+ adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
+ adev->asic_type == CHIP_RENOIR) {
+ ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN);
+ if (adev->irq.ih.use_bus_addr) {
+ ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN,
+ MC_SPACE_GPA_ENABLE, 1);
+ } else {
+ ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN,
+ MC_SPACE_FBPA_ENABLE, 1);
+ }
WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken);
+ }
/* set the writeback address whether it's enabled or not */
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO,
@@ -675,10 +677,49 @@ static int vega10_ih_soft_reset(void *handle)
return 0;
}
+static void vega10_ih_update_clockgating_state(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def, field_val;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) {
+ def = data = RREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL);
+ field_val = enable ? 0 : 1;
+ /**
+ * Vega10 does not have IH_RETRY_INT_CAM_MEM_CLK_SOFT_OVERRIDE
+ * and IH_BUFFER_MEM_CLK_SOFT_OVERRIDE field.
+ */
+ if (adev->asic_type > CHIP_VEGA10) {
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ IH_RETRY_INT_CAM_MEM_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ IH_BUFFER_MEM_CLK_SOFT_OVERRIDE, field_val);
+ }
+
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DBUS_MUX_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DYN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ REG_CLK_SOFT_OVERRIDE, field_val);
+ if (def != data)
+ WREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL, data);
+ }
+}
+
static int vega10_ih_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ vega10_ih_update_clockgating_state(adev,
+ state == AMD_CG_STATE_GATE);
return 0;
+
}
static int vega10_ih_set_powergating_state(void *handle,
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
index bd0580334f83..6b52a539d51b 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
@@ -24,7 +24,6 @@
#include "soc15.h"
#include "soc15_common.h"
-#include "soc15_hw_ip.h"
#include "vega10_ip_offset.h"
int vega10_reg_base_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
index 587e33f5dcce..556f854e3551 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
@@ -24,7 +24,6 @@
#include "soc15.h"
#include "soc15_common.h"
-#include "soc15_hw_ip.h"
#include "vega20_ip_offset.h"
int vega20_reg_base_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 5f8c8786cac5..78b35901643b 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -690,15 +690,15 @@ static int vi_gpu_pci_config_reset(struct amdgpu_device *adev)
}
/**
- * vi_asic_reset - soft reset GPU
+ * vi_asic_pci_config_reset - soft reset GPU
*
* @adev: amdgpu_device pointer
*
- * Look up which blocks are hung and attempt
- * to reset them.
+ * Use PCI Config method to reset the GPU.
+ *
* Returns 0 for success.
*/
-static int vi_asic_reset(struct amdgpu_device *adev)
+static int vi_asic_pci_config_reset(struct amdgpu_device *adev)
{
int r;
@@ -711,10 +711,68 @@ static int vi_asic_reset(struct amdgpu_device *adev)
return r;
}
+static bool vi_asic_supports_baco(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_FIJI:
+ case CHIP_TONGA:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_TOPAZ:
+ return amdgpu_dpm_is_baco_supported(adev);
+ default:
+ return false;
+ }
+}
+
static enum amd_reset_method
vi_asic_reset_method(struct amdgpu_device *adev)
{
- return AMD_RESET_METHOD_LEGACY;
+ bool baco_reset;
+
+ switch (adev->asic_type) {
+ case CHIP_FIJI:
+ case CHIP_TONGA:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_TOPAZ:
+ baco_reset = amdgpu_dpm_is_baco_supported(adev);
+ break;
+ default:
+ baco_reset = false;
+ break;
+ }
+
+ if (baco_reset)
+ return AMD_RESET_METHOD_BACO;
+ else
+ return AMD_RESET_METHOD_LEGACY;
+}
+
+/**
+ * vi_asic_reset - soft reset GPU
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up which blocks are hung and attempt
+ * to reset them.
+ * Returns 0 for success.
+ */
+static int vi_asic_reset(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
+ if (!adev->in_suspend)
+ amdgpu_inc_vram_lost(adev);
+ r = amdgpu_dpm_baco_reset(adev);
+ } else {
+ r = vi_asic_pci_config_reset(adev);
+ }
+
+ return r;
}
static u32 vi_get_config_memsize(struct amdgpu_device *adev)
@@ -1042,6 +1100,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
.get_pcie_usage = &vi_get_pcie_usage,
.need_reset_on_init = &vi_need_reset_on_init,
.get_pcie_replay_count = &vi_get_pcie_replay_count,
+ .supports_baco = &vi_asic_supports_baco,
};
#define CZ_REV_BRISTOL(rev) \
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.h b/drivers/gpu/drm/amd/amdgpu/vi.h
index 8de0772f986c..defb4aaf929a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.h
+++ b/drivers/gpu/drm/amd/amdgpu/vi.h
@@ -31,4 +31,5 @@ void vi_srbm_select(struct amdgpu_device *adev,
int vi_set_ip_blocks(struct amdgpu_device *adev);
void legacy_doorbell_index_init(struct amdgpu_device *adev);
+
#endif