From abc3b5d21d34d21914b8e3caa75690f72baa2f36 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Wed, 31 Jan 2024 17:48:34 +0800 Subject: drm/amdgpu: add new aca_smu_type support Add new types to distinguish between ACA error type and smu mca type. e.g.: the ACA_ERROR_TYPE_DEFERRED is not matched any smu mca valid bank channel, so add new type 'aca_smu_type' to distinguish aca error type and smu mca type. Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c') diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index c0fc44cdd658..bf83e2c08f04 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -707,24 +707,27 @@ static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = { }; static int mmhub_v1_8_aca_bank_generate_report(struct aca_handle *handle, - struct aca_bank *bank, enum aca_error_type type, + struct aca_bank *bank, enum aca_smu_type type, struct aca_bank_report *report, void *data) { - u64 status, misc0; + u64 misc0; int ret; - status = bank->regs[ACA_REG_IDX_STATUS]; - if ((type == ACA_ERROR_TYPE_UE && - ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_FAULT) || - (type == ACA_ERROR_TYPE_CE && - ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_CE)) { - - ret = aca_bank_info_decode(bank, &report->info); - if (ret) - return ret; - - misc0 = bank->regs[ACA_REG_IDX_MISC0]; - report->count[type] = ACA_REG__MISC0__ERRCNT(misc0); + ret = aca_bank_info_decode(bank, &report->info); + if (ret) + return ret; + + misc0 = bank->regs[ACA_REG_IDX_MISC0]; + + switch (type) { + case ACA_SMU_TYPE_UE: + report->count[ACA_ERROR_TYPE_UE] = 1ULL; + break; + case ACA_SMU_TYPE_CE: + report->count[ACA_ERROR_TYPE_CE] = ACA_REG__MISC0__ERRCNT(misc0); + break; + default: + return -EINVAL; } return 0; @@ -741,7 +744,7 @@ static int mmhub_v1_8_err_codes[] = { }; static bool mmhub_v1_8_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, - enum aca_error_type type, void *data) + enum aca_smu_type type, void *data) { u32 instlo; -- cgit v1.2.3-59-g8ed1b From 5275114a7043425b690275566e80d450ddc7525d Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 22 Feb 2024 10:11:07 +0800 Subject: drm/amdgpu: refine aca error cache for mmhub v1.8 refine aca error cache for mmhub v1.8 Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c') diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index bf83e2c08f04..c41f600a5e10 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -710,27 +710,29 @@ static int mmhub_v1_8_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, struct aca_bank_report *report, void *data) { + struct aca_bank_info info; u64 misc0; int ret; - ret = aca_bank_info_decode(bank, &report->info); + ret = aca_bank_info_decode(bank, &info); if (ret) return ret; misc0 = bank->regs[ACA_REG_IDX_MISC0]; - switch (type) { case ACA_SMU_TYPE_UE: - report->count[ACA_ERROR_TYPE_UE] = 1ULL; + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, + 1ULL); break; case ACA_SMU_TYPE_CE: - report->count[ACA_ERROR_TYPE_CE] = ACA_REG__MISC0__ERRCNT(misc0); + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, + ACA_REG__MISC0__ERRCNT(misc0)); break; default: return -EINVAL; } - return 0; + return ret; } /* reference to smu driver if header file */ -- cgit v1.2.3-59-g8ed1b From 176c3e89567fdc037c318f672bbe3c2271004560 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Mon, 11 Mar 2024 16:51:49 +0800 Subject: drm/amdgpu: add utcl2 RAS poison query for mmhub Add it for mmhub v1.8. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h | 2 ++ drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 15 +++++++++++++++ 2 files changed, 17 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h index 1ca9d4ed8063..95d676ee207f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -63,6 +63,8 @@ struct amdgpu_mmhub_funcs { uint64_t page_table_base); void (*update_power_gating)(struct amdgpu_device *adev, bool enable); + bool (*query_utcl2_poison_status)(struct amdgpu_device *adev, + int hub_inst); }; struct amdgpu_mmhub { diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index c41f600a5e10..50113cadaa0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -559,6 +559,20 @@ static void mmhub_v1_8_get_clockgating(struct amdgpu_device *adev, u64 *flags) } +static bool mmhub_v1_8_query_utcl2_poison_status(struct amdgpu_device *adev, + int hub_inst) +{ + u32 fed, status; + + status = RREG32_SOC15(MMHUB, hub_inst, regVM_L2_PROTECTION_FAULT_STATUS); + fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); + /* reset page fault status */ + WREG32_P(SOC15_REG_OFFSET(MMHUB, hub_inst, + regVM_L2_PROTECTION_FAULT_STATUS), 1, ~1); + + return fed; +} + const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = { .get_fb_location = mmhub_v1_8_get_fb_location, .init = mmhub_v1_8_init, @@ -568,6 +582,7 @@ const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = { .setup_vm_pt_regs = mmhub_v1_8_setup_vm_pt_regs, .set_clockgating = mmhub_v1_8_set_clockgating, .get_clockgating = mmhub_v1_8_get_clockgating, + .query_utcl2_poison_status = mmhub_v1_8_query_utcl2_poison_status, }; static const struct amdgpu_ras_err_status_reg_entry mmhub_v1_8_ce_reg_list[] = { -- cgit v1.2.3-59-g8ed1b From e3d4de8d8b24933a9588019bf53891bca520b226 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 22 Feb 2024 14:03:14 +0800 Subject: drm/amdgpu: retire unused aca_bank_report data structure retire unused aca_bank_report data structure. Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 17 +++++++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h | 8 +------- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 7 +++---- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 7 +++---- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 6 +++--- 7 files changed, 24 insertions(+), 35 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 7cbff74c0f2f..a0b7f0d0c513 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -297,29 +297,26 @@ int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_in return 0; } -static int aca_generate_bank_report(struct aca_handle *handle, struct aca_bank *bank, - enum aca_smu_type type, struct aca_bank_report *report) +static int aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type) { const struct aca_bank_ops *bank_ops = handle->bank_ops; - if (!bank || !report) + if (!bank) return -EINVAL; - if (!bank_ops->aca_bank_generate_report) + if (!bank_ops->aca_bank_parser) return -EOPNOTSUPP; - memset(report, 0, sizeof(*report)); - return bank_ops->aca_bank_generate_report(handle, bank, type, - report, handle->data); + return bank_ops->aca_bank_parser(handle, bank, type, + handle->data); } static int handler_aca_log_bank_error(struct aca_handle *handle, struct aca_bank *bank, - enum aca_smu_type smu_type, void *data) + enum aca_smu_type type, void *data) { - struct aca_bank_report report; int ret; - ret = aca_generate_bank_report(handle, bank, smu_type, &report); + ret = aca_bank_parser(handle, bank, type); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h index 470efefcde10..6eb4c0341278 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -122,11 +122,6 @@ struct aca_bank_info { int mcatype; }; -struct aca_bank_report { - struct aca_bank_info info; - u64 count[ACA_ERROR_TYPE_COUNT]; -}; - struct aca_bank_error { struct list_head node; struct aca_bank_info info; @@ -164,8 +159,7 @@ struct aca_handle { }; struct aca_bank_ops { - int (*aca_bank_generate_report)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, - struct aca_bank_report *report, void *data); + int (*aca_bank_parser)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data); bool (*aca_bank_is_valid)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data); }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 3e7f0199926e..523827e8e7a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -1035,8 +1035,8 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev) return 0; } -static int xgmi_v6_4_0_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, - struct aca_bank_report *report, void *data) +static int xgmi_v6_4_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) { struct amdgpu_device *adev = handle->adev; struct aca_bank_info info; @@ -1075,7 +1075,7 @@ static int xgmi_v6_4_0_aca_bank_generate_report(struct aca_handle *handle, struc } static const struct aca_bank_ops xgmi_v6_4_0_aca_bank_ops = { - .aca_bank_generate_report = xgmi_v6_4_0_aca_bank_generate_report, + .aca_bank_parser = xgmi_v6_4_0_aca_bank_parser, }; static const struct aca_info xgmi_v6_4_0_aca_info = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index cc7cccf0b2b2..fc33354f1d3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -680,9 +680,9 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .ih_node_to_logical_xcc = &gfx_v9_4_3_ih_to_xcc_inst, }; -static int gfx_v9_4_3_aca_bank_generate_report(struct aca_handle *handle, - struct aca_bank *bank, enum aca_smu_type type, - struct aca_bank_report *report, void *data) +static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle, + struct aca_bank *bank, enum aca_smu_type type, + void *data) { struct aca_bank_info info; u64 misc0; @@ -736,7 +736,7 @@ static bool gfx_v9_4_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_b } static const struct aca_bank_ops gfx_v9_4_3_aca_bank_ops = { - .aca_bank_generate_report = gfx_v9_4_3_aca_bank_generate_report, + .aca_bank_parser = gfx_v9_4_3_aca_bank_parser, .aca_bank_is_valid = gfx_v9_4_3_aca_bank_is_valid, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index 50113cadaa0d..7a1ff298417a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -721,9 +721,8 @@ static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = { .reset_ras_error_count = mmhub_v1_8_reset_ras_error_count, }; -static int mmhub_v1_8_aca_bank_generate_report(struct aca_handle *handle, - struct aca_bank *bank, enum aca_smu_type type, - struct aca_bank_report *report, void *data) +static int mmhub_v1_8_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) { struct aca_bank_info info; u64 misc0; @@ -780,7 +779,7 @@ static bool mmhub_v1_8_aca_bank_is_valid(struct aca_handle *handle, struct aca_b } static const struct aca_bank_ops mmhub_v1_8_aca_bank_ops = { - .aca_bank_generate_report = mmhub_v1_8_aca_bank_generate_report, + .aca_bank_parser = mmhub_v1_8_aca_bank_parser, .aca_bank_is_valid = mmhub_v1_8_aca_bank_is_valid, }; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 02fc11b98b20..71c2f50530cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -2189,9 +2189,8 @@ static const struct amdgpu_ras_block_hw_ops sdma_v4_4_2_ras_hw_ops = { .reset_ras_error_count = sdma_v4_4_2_reset_ras_error_count, }; -static int sdma_v4_4_2_aca_bank_generate_report(struct aca_handle *handle, - struct aca_bank *bank, enum aca_smu_type type, - struct aca_bank_report *report, void *data) +static int sdma_v4_4_2_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) { struct aca_bank_info info; u64 misc0; @@ -2241,7 +2240,7 @@ static bool sdma_v4_4_2_aca_bank_is_valid(struct aca_handle *handle, struct aca_ } static const struct aca_bank_ops sdma_v4_4_2_aca_bank_ops = { - .aca_bank_generate_report = sdma_v4_4_2_aca_bank_generate_report, + .aca_bank_parser = sdma_v4_4_2_aca_bank_parser, .aca_bank_is_valid = sdma_v4_4_2_aca_bank_is_valid, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index b512a4b38067..e2e18ccbbc62 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -504,8 +504,8 @@ const struct amdgpu_ras_block_hw_ops umc_v12_0_ras_hw_ops = { .query_ras_error_address = umc_v12_0_query_ras_error_address, }; -static int umc_v12_0_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, - struct aca_bank_report *report, void *data) +static int umc_v12_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) { struct amdgpu_device *adev = handle->adev; struct aca_bank_info info; @@ -542,7 +542,7 @@ static int umc_v12_0_aca_bank_generate_report(struct aca_handle *handle, struct } static const struct aca_bank_ops umc_v12_0_aca_bank_ops = { - .aca_bank_generate_report = umc_v12_0_aca_bank_generate_report, + .aca_bank_parser = umc_v12_0_aca_bank_parser, }; const struct aca_info umc_v12_0_aca_info = { -- cgit v1.2.3-59-g8ed1b