aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c234
1 files changed, 215 insertions, 19 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index a11db2b1a63f..2f18c64d531f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -28,6 +28,7 @@
#include "soc15.h"
#include "soc15d.h"
#include "amdgpu_atomfirmware.h"
+#include "amdgpu_pm.h"
#include "gc/gc_9_0_offset.h"
#include "gc/gc_9_0_sh_mask.h"
@@ -40,6 +41,8 @@
#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
+#include "amdgpu_ras.h"
+
#define GFX9_NUM_GFX_RINGS 1
#define GFX9_MEC_HPD_SIZE 4096
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
@@ -94,6 +97,7 @@ MODULE_FIRMWARE("amdgpu/raven2_me.bin");
MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
+MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
static const struct soc15_reg_golden golden_settings_gc_9_0[] =
{
@@ -576,6 +580,28 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
}
}
+static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ case CHIP_VEGA12:
+ case CHIP_VEGA20:
+ break;
+ case CHIP_RAVEN:
+ if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
+ break;
+ if ((adev->gfx.rlc_fw_version != 106 &&
+ adev->gfx.rlc_fw_version < 531) ||
+ (adev->gfx.rlc_fw_version == 53815) ||
+ (adev->gfx.rlc_feature_version < 1) ||
+ !adev->gfx.rlc.is_rlc_v2_1)
+ adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
+ break;
+ default:
+ break;
+ }
+}
+
static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
@@ -589,6 +615,7 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
unsigned int i = 0;
uint16_t version_major;
uint16_t version_minor;
+ uint32_t smu_version;
DRM_DEBUG("\n");
@@ -659,6 +686,12 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
+ else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
+ (smu_version >= 0x41e2b))
+ /**
+ *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
+ */
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
else
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
@@ -828,6 +861,7 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
}
out:
+ gfx_v9_0_check_if_need_gfxoff(adev);
gfx_v9_0_check_fw_write_wait(adev);
if (err) {
dev_err(adev->dev,
@@ -1639,6 +1673,18 @@ static int gfx_v9_0_sw_init(void *handle)
if (r)
return r;
+ /* ECC error */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
+ &adev->gfx.cp_ecc_error_irq);
+ if (r)
+ return r;
+
+ /* FUE error */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
+ &adev->gfx.cp_ecc_error_irq);
+ if (r)
+ return r;
+
adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
gfx_v9_0_scratch_init(adev);
@@ -1731,6 +1777,20 @@ static int gfx_v9_0_sw_fini(void *handle)
int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
+ adev->gfx.ras_if) {
+ struct ras_common_if *ras_if = adev->gfx.ras_if;
+ struct ras_ih_if ih_info = {
+ .head = *ras_if,
+ };
+
+ amdgpu_ras_debugfs_remove(adev, ras_if);
+ amdgpu_ras_sysfs_remove(adev, ras_if);
+ amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
+ amdgpu_ras_feature_enable(adev, ras_if, 0);
+ kfree(ras_if);
+ }
+
amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
@@ -1899,25 +1959,6 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
mutex_unlock(&adev->srbm_mutex);
gfx_v9_0_init_compute_vmid(adev);
-
- mutex_lock(&adev->grbm_idx_mutex);
- /*
- * making sure that the following register writes will be broadcasted
- * to all the shaders
- */
- gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
-
- WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE,
- (adev->gfx.config.sc_prim_fifo_size_frontend <<
- PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
- (adev->gfx.config.sc_prim_fifo_size_backend <<
- PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
- (adev->gfx.config.sc_hiz_tile_fifo_size <<
- PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
- (adev->gfx.config.sc_earlyz_tile_fifo_size <<
- PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
- mutex_unlock(&adev->grbm_idx_mutex);
-
}
static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
@@ -3303,6 +3344,7 @@ static int gfx_v9_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
@@ -3492,6 +3534,80 @@ static int gfx_v9_0_early_init(void *handle)
return 0;
}
+static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry);
+
+static int gfx_v9_0_ecc_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct ras_common_if **ras_if = &adev->gfx.ras_if;
+ struct ras_ih_if ih_info = {
+ .cb = gfx_v9_0_process_ras_data_cb,
+ };
+ struct ras_fs_if fs_info = {
+ .sysfs_name = "gfx_err_count",
+ .debugfs_name = "gfx_err_inject",
+ };
+ struct ras_common_if ras_block = {
+ .block = AMDGPU_RAS_BLOCK__GFX,
+ .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ .sub_block_index = 0,
+ .name = "gfx",
+ };
+ int r;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
+ amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
+ return 0;
+ }
+
+ if (*ras_if)
+ goto resume;
+
+ *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
+ if (!*ras_if)
+ return -ENOMEM;
+
+ **ras_if = ras_block;
+
+ r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
+ if (r)
+ goto feature;
+
+ ih_info.head = **ras_if;
+ fs_info.head = **ras_if;
+
+ r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
+ if (r)
+ goto interrupt;
+
+ r = amdgpu_ras_debugfs_create(adev, &fs_info);
+ if (r)
+ goto debugfs;
+
+ r = amdgpu_ras_sysfs_create(adev, &fs_info);
+ if (r)
+ goto sysfs;
+resume:
+ r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
+ if (r)
+ goto irq;
+
+ return 0;
+irq:
+ amdgpu_ras_sysfs_remove(adev, *ras_if);
+sysfs:
+ amdgpu_ras_debugfs_remove(adev, *ras_if);
+debugfs:
+ amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
+interrupt:
+ amdgpu_ras_feature_enable(adev, *ras_if, 0);
+feature:
+ kfree(*ras_if);
+ *ras_if = NULL;
+ return -EINVAL;
+}
+
static int gfx_v9_0_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -3505,6 +3621,10 @@ static int gfx_v9_0_late_init(void *handle)
if (r)
return r;
+ r = gfx_v9_0_ecc_late_init(handle);
+ if (r)
+ return r;
+
return 0;
}
@@ -4541,6 +4661,45 @@ static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
return 0;
}
+#define ENABLE_ECC_ON_ME_PIPE(me, pipe) \
+ WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
+ CP_ECC_ERROR_INT_ENABLE, 1)
+
+#define DISABLE_ECC_ON_ME_PIPE(me, pipe) \
+ WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
+ CP_ECC_ERROR_INT_ENABLE, 0)
+
+static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
+ CP_ECC_ERROR_INT_ENABLE, 0);
+ DISABLE_ECC_ON_ME_PIPE(1, 0);
+ DISABLE_ECC_ON_ME_PIPE(1, 1);
+ DISABLE_ECC_ON_ME_PIPE(1, 2);
+ DISABLE_ECC_ON_ME_PIPE(1, 3);
+ break;
+
+ case AMDGPU_IRQ_STATE_ENABLE:
+ WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
+ CP_ECC_ERROR_INT_ENABLE, 1);
+ ENABLE_ECC_ON_ME_PIPE(1, 0);
+ ENABLE_ECC_ON_ME_PIPE(1, 1);
+ ENABLE_ECC_ON_ME_PIPE(1, 2);
+ ENABLE_ECC_ON_ME_PIPE(1, 3);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+
static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
@@ -4657,6 +4816,34 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
return 0;
}
+static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ /* TODO ue will trigger an interrupt. */
+ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+ amdgpu_ras_reset_gpu(adev, 0);
+ return AMDGPU_RAS_UE;
+}
+
+static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ struct ras_common_if *ras_if = adev->gfx.ras_if;
+ struct ras_dispatch_if ih_data = {
+ .entry = entry,
+ };
+
+ if (!ras_if)
+ return 0;
+
+ ih_data.head = *ras_if;
+
+ DRM_ERROR("CP ECC ERROR IRQ\n");
+ amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+ return 0;
+}
+
static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
.name = "gfx_v9_0",
.early_init = gfx_v9_0_early_init,
@@ -4818,6 +5005,12 @@ static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
.process = gfx_v9_0_priv_inst_irq,
};
+static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
+ .set = gfx_v9_0_set_cp_ecc_error_state,
+ .process = gfx_v9_0_cp_ecc_error_irq,
+};
+
+
static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
{
adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
@@ -4828,6 +5021,9 @@ static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gfx.priv_inst_irq.num_types = 1;
adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
+
+ adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
+ adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
}
static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)