aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/umc_v6_1.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_1.c244
1 files changed, 168 insertions, 76 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
index 47c4b96b14d1..793bf70e64b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
@@ -28,19 +28,24 @@
#include "rsmu/rsmu_0_0_2_sh_mask.h"
#include "umc/umc_6_1_1_offset.h"
#include "umc/umc_6_1_1_sh_mask.h"
+#include "umc/umc_6_1_2_offset.h"
-#define smnMCA_UMC0_MCUMC_ADDRT0 0x50f10
+#define UMC_6_INST_DIST 0x40000
/*
* (addr / 256) * 8192, the higher 26 bits in ErrorAddr
* is the index of 8KB block
*/
-#define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5)
+#define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5)
/* channel index is the index of 256B block */
#define ADDR_OF_256B_BLOCK(channel_index) ((channel_index) << 8)
/* offset in 256B block */
#define OFFSET_IN_256B_BLOCK(addr) ((addr) & 0xffULL)
+#define LOOP_UMC_INST(umc_inst) for ((umc_inst) = 0; (umc_inst) < adev->umc.umc_inst_num; (umc_inst)++)
+#define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++)
+#define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst))
+
const uint32_t
umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM] = {
{2, 18, 11, 27}, {4, 20, 13, 29},
@@ -49,24 +54,10 @@ const uint32_t
{9, 25, 0, 16}, {15, 31, 6, 22}
};
-static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev,
- uint32_t umc_instance)
+static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev)
{
- uint32_t rsmu_umc_index;
-
- rsmu_umc_index = RREG32_SOC15(RSMU, 0,
- mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU);
- rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index,
- RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
+ WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
RSMU_UMC_INDEX_MODE_EN, 1);
- rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index,
- RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
- RSMU_UMC_INDEX_INSTANCE, umc_instance);
- rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index,
- RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
- RSMU_UMC_INDEX_WREN, 1 << umc_instance);
- WREG32_SOC15(RSMU, 0, mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
- rsmu_umc_index);
}
static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev)
@@ -75,15 +66,23 @@ static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev)
RSMU_UMC_INDEX_MODE_EN, 0);
}
-static uint32_t umc_v6_1_get_umc_inst(struct amdgpu_device *adev)
+static uint32_t umc_v6_1_get_umc_index_mode_state(struct amdgpu_device *adev)
{
uint32_t rsmu_umc_index;
rsmu_umc_index = RREG32_SOC15(RSMU, 0,
- mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU);
+ mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU);
+
return REG_GET_FIELD(rsmu_umc_index,
- RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
- RSMU_UMC_INDEX_INSTANCE);
+ RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU,
+ RSMU_UMC_INDEX_MODE_EN);
+}
+
+static inline uint32_t get_umc_6_reg_offset(struct amdgpu_device *adev,
+ uint32_t umc_inst,
+ uint32_t ch_inst)
+{
+ return adev->umc.channel_offs*ch_inst + UMC_6_INST_DIST*umc_inst;
}
static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev,
@@ -95,39 +94,50 @@ static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev,
uint64_t mc_umc_status;
uint32_t mc_umc_status_addr;
- ecc_err_cnt_sel_addr =
- SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
- ecc_err_cnt_addr =
- SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
- mc_umc_status_addr =
- SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+ if (adev->asic_type == CHIP_ARCTURUS) {
+ /* UMC 6_1_2 registers */
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT);
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
+ } else {
+ /* UMC 6_1_1 registers */
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+ }
/* select the lower chip and check the error count */
- ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset);
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
EccErrCntCsSel, 0);
- WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel);
- ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
*error_count +=
(REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
UMC_V6_1_CE_CNT_INIT);
/* clear the lower chip err count */
- WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT);
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
/* select the higher chip and check the err counter */
ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
EccErrCntCsSel, 1);
- WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel);
- ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
*error_count +=
(REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
UMC_V6_1_CE_CNT_INIT);
/* clear the higher chip err count */
- WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT);
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
/* check for SRAM correctable error
MCUMC_STATUS is a 64 bit register */
- mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset);
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 &&
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
@@ -141,11 +151,18 @@ static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev
uint64_t mc_umc_status;
uint32_t mc_umc_status_addr;
- mc_umc_status_addr =
- SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+ if (adev->asic_type == CHIP_ARCTURUS) {
+ /* UMC 6_1_2 registers */
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
+ } else {
+ /* UMC 6_1_1 registers */
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+ }
/* check the MCUMC_STATUS */
- mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset);
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
@@ -155,49 +172,78 @@ static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev
*error_count += 1;
}
-static void umc_v6_1_query_error_count(struct amdgpu_device *adev,
- struct ras_err_data *err_data, uint32_t umc_reg_offset,
- uint32_t channel_index)
-{
- umc_v6_1_query_correctable_error_count(adev, umc_reg_offset,
- &(err_data->ce_count));
- umc_v6_1_querry_uncorrectable_error_count(adev, umc_reg_offset,
- &(err_data->ue_count));
-}
-
static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status)
{
- amdgpu_umc_for_each_channel(umc_v6_1_query_error_count);
+ struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status;
+
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+ uint32_t umc_reg_offset = 0;
+
+ uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
+
+ if (rsmu_umc_index_state)
+ umc_v6_1_disable_umc_index_mode(adev);
+
+ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+ umc_reg_offset = get_umc_6_reg_offset(adev,
+ umc_inst,
+ ch_inst);
+
+ umc_v6_1_query_correctable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ce_count));
+ umc_v6_1_querry_uncorrectable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ue_count));
+ }
+
+ if (rsmu_umc_index_state)
+ umc_v6_1_enable_umc_index_mode(adev);
}
static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
struct ras_err_data *err_data,
- uint32_t umc_reg_offset, uint32_t channel_index)
+ uint32_t umc_reg_offset,
+ uint32_t ch_inst,
+ uint32_t umc_inst)
{
uint32_t lsb, mc_umc_status_addr;
- uint64_t mc_umc_status, err_addr, retired_page;
+ uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
struct eeprom_table_record *err_rec;
-
- mc_umc_status_addr =
- SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+ uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+
+ if (adev->asic_type == CHIP_ARCTURUS) {
+ /* UMC 6_1_2 registers */
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
+ mc_umc_addrt0 =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT);
+ } else {
+ /* UMC 6_1_1 registers */
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+ mc_umc_addrt0 =
+ SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0);
+ }
/* skip error address process if -ENOMEM */
if (!err_data->err_addr) {
/* clear umc status */
- WREG64_UMC(mc_umc_status_addr + umc_reg_offset, 0x0ULL);
+ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
return;
}
err_rec = &err_data->err_addr[err_data->err_addr_cnt];
- mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset);
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
/* calculate error address if ue/ce error is detected */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
- err_addr = RREG64_PCIE(smnMCA_UMC0_MCUMC_ADDRT0 + umc_reg_offset * 4);
+ err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
/* the lowest lsb bits should be ignored */
lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB);
err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
@@ -218,57 +264,105 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
err_rec->cu = 0;
err_rec->mem_channel = channel_index;
- err_rec->mcumc_id = umc_v6_1_get_umc_inst(adev);
+ err_rec->mcumc_id = umc_inst;
err_data->err_addr_cnt++;
}
}
/* clear umc status */
- WREG64_UMC(mc_umc_status_addr + umc_reg_offset, 0x0ULL);
+ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
}
static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev,
void *ras_error_status)
{
- amdgpu_umc_for_each_channel(umc_v6_1_query_error_address);
+ struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status;
+
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+ uint32_t umc_reg_offset = 0;
+
+ uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
+
+ if (rsmu_umc_index_state)
+ umc_v6_1_disable_umc_index_mode(adev);
+
+ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+ umc_reg_offset = get_umc_6_reg_offset(adev,
+ umc_inst,
+ ch_inst);
+
+ umc_v6_1_query_error_address(adev,
+ err_data,
+ umc_reg_offset,
+ ch_inst,
+ umc_inst);
+ }
+
+ if (rsmu_umc_index_state)
+ umc_v6_1_enable_umc_index_mode(adev);
}
static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev,
- struct ras_err_data *err_data,
- uint32_t umc_reg_offset, uint32_t channel_index)
+ uint32_t umc_reg_offset)
{
uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
uint32_t ecc_err_cnt_addr;
- ecc_err_cnt_sel_addr =
- SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
- ecc_err_cnt_addr =
- SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
+ if (adev->asic_type == CHIP_ARCTURUS) {
+ /* UMC 6_1_2 registers */
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT);
+ } else {
+ /* UMC 6_1_1 registers */
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
+ }
/* select the lower chip and check the error count */
- ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset);
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
EccErrCntCsSel, 0);
/* set ce error interrupt type to APIC based interrupt */
ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
EccErrInt, 0x1);
- WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
/* set error count to initial value */
- WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT);
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
/* select the higher chip and check the err counter */
ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
EccErrCntCsSel, 1);
- WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel);
- WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT);
}
static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev)
{
- void *ras_error_status = NULL;
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+ uint32_t umc_reg_offset = 0;
+
+ uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev);
+
+ if (rsmu_umc_index_state)
+ umc_v6_1_disable_umc_index_mode(adev);
+
+ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+ umc_reg_offset = get_umc_6_reg_offset(adev,
+ umc_inst,
+ ch_inst);
+
+ umc_v6_1_err_cnt_init_per_channel(adev, umc_reg_offset);
+ }
- amdgpu_umc_for_each_channel(umc_v6_1_err_cnt_init_per_channel);
+ if (rsmu_umc_index_state)
+ umc_v6_1_enable_umc_index_mode(adev);
}
const struct amdgpu_umc_funcs umc_v6_1_funcs = {
@@ -276,6 +370,4 @@ const struct amdgpu_umc_funcs umc_v6_1_funcs = {
.ras_late_init = amdgpu_umc_ras_late_init,
.query_ras_error_count = umc_v6_1_query_ras_error_count,
.query_ras_error_address = umc_v6_1_query_ras_error_address,
- .enable_umc_index_mode = umc_v6_1_enable_umc_index_mode,
- .disable_umc_index_mode = umc_v6_1_disable_umc_index_mode,
};