From abc3b5d21d34d21914b8e3caa75690f72baa2f36 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Wed, 31 Jan 2024 17:48:34 +0800 Subject: drm/amdgpu: add new aca_smu_type support Add new types to distinguish between ACA error type and smu mca type. e.g.: the ACA_ERROR_TYPE_DEFERRED is not matched any smu mca valid bank channel, so add new type 'aca_smu_type' to distinguish aca error type and smu mca type. Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 69 +++++++++++++++++++++------------ 1 file changed, 45 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 493982f94649..2ad2d874f42f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -28,7 +28,7 @@ #define ACA_BANK_HWID(type, hwid, mcatype) [ACA_HWIP_TYPE_##type] = {hwid, mcatype} -typedef int bank_handler_t(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type, void *data); +typedef int bank_handler_t(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data); struct aca_banks { int nr_banks; @@ -86,7 +86,7 @@ static void aca_banks_release(struct aca_banks *banks) } } -static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, enum aca_error_type type, u32 *count) +static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, enum aca_smu_type type, u32 *count) { struct amdgpu_aca *aca = &adev->aca; const struct aca_smu_funcs *smu_funcs = aca->smu_funcs; @@ -127,7 +127,7 @@ static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, st idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]); } -static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_error_type type, +static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type, int start, int count, struct aca_banks *banks) { @@ -143,13 +143,12 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_erro return -EOPNOTSUPP; switch (type) { - case ACA_ERROR_TYPE_UE: + case ACA_SMU_TYPE_UE: max_count = smu_funcs->max_ue_bank_count; break; - case ACA_ERROR_TYPE_CE: + case ACA_SMU_TYPE_CE: max_count = smu_funcs->max_ce_bank_count; break; - case ACA_ERROR_TYPE_DEFERRED: default: return -EINVAL; } @@ -164,6 +163,8 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_erro if (ret) return ret; + bank.type = type; + aca_smu_bank_dump(adev, i, count, &bank); ret = aca_banks_add_bank(banks, &bank); @@ -195,7 +196,7 @@ static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type t return hwip->hwid == hwid && hwip->mcatype == mcatype; } -static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type) +static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type) { const struct aca_bank_ops *bank_ops = handle->bank_ops; @@ -297,7 +298,7 @@ static int aca_log_errors(struct aca_handle *handle, enum aca_error_type type, } static int aca_generate_bank_report(struct aca_handle *handle, struct aca_bank *bank, - enum aca_error_type type, struct aca_bank_report *report) + enum aca_smu_type type, struct aca_bank_report *report) { const struct aca_bank_ops *bank_ops = handle->bank_ops; @@ -313,12 +314,24 @@ static int aca_generate_bank_report(struct aca_handle *handle, struct aca_bank * } static int handler_aca_log_bank_error(struct aca_handle *handle, struct aca_bank *bank, - enum aca_error_type type, void *data) + enum aca_smu_type smu_type, void *data) { struct aca_bank_report report; + enum aca_error_type type; int ret; - ret = aca_generate_bank_report(handle, bank, type, &report); + switch (smu_type) { + case ACA_SMU_TYPE_UE: + type = ACA_ERROR_TYPE_UE; + break; + case ACA_SMU_TYPE_CE: + type = ACA_ERROR_TYPE_CE; + break; + default: + return -EINVAL; + } + + ret = aca_generate_bank_report(handle, bank, smu_type, &report); if (ret) return ret; @@ -333,7 +346,7 @@ static int handler_aca_log_bank_error(struct aca_handle *handle, struct aca_bank } static int aca_dispatch_bank(struct aca_handle_manager *mgr, struct aca_bank *bank, - enum aca_error_type type, bank_handler_t handler, void *data) + enum aca_smu_type type, bank_handler_t handler, void *data) { struct aca_handle *handle; int ret; @@ -354,7 +367,7 @@ static int aca_dispatch_bank(struct aca_handle_manager *mgr, struct aca_bank *ba } static int aca_dispatch_banks(struct aca_handle_manager *mgr, struct aca_banks *banks, - enum aca_error_type type, bank_handler_t handler, void *data) + enum aca_smu_type type, bank_handler_t handler, void *data) { struct aca_bank_node *node; struct aca_bank *bank; @@ -378,7 +391,7 @@ static int aca_dispatch_banks(struct aca_handle_manager *mgr, struct aca_banks * return 0; } -static int aca_banks_update(struct amdgpu_device *adev, enum aca_error_type type, +static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type, bank_handler_t handler, void *data) { struct amdgpu_aca *aca = &adev->aca; @@ -389,10 +402,6 @@ static int aca_banks_update(struct amdgpu_device *adev, enum aca_error_type type if (list_empty(&aca->mgr.list)) return 0; - /* NOTE: pmfw is only support UE and CE */ - if (type == ACA_ERROR_TYPE_DEFERRED) - type = ACA_ERROR_TYPE_CE; - ret = aca_smu_get_valid_aca_count(adev, type, &count); if (ret) return ret; @@ -479,10 +488,22 @@ out_unlock: static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, enum aca_error_type type, struct ras_err_data *err_data) { + enum aca_smu_type smu_type; int ret; + switch (type) { + case ACA_ERROR_TYPE_UE: + smu_type = ACA_SMU_TYPE_UE; + break; + case ACA_ERROR_TYPE_CE: + smu_type = ACA_SMU_TYPE_CE; + break; + default: + return -EINVAL; + } + /* udpate aca bank to aca source error_cache first */ - ret = aca_banks_update(adev, type, handler_aca_log_bank_error, NULL); + ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, NULL); if (ret) return ret; @@ -784,7 +805,7 @@ static int amdgpu_aca_smu_debug_mode_set(void *data, u64 val) return 0; } -static void aca_dump_entry(struct seq_file *m, struct aca_bank *bank, enum aca_error_type type, int idx) +static void aca_dump_entry(struct seq_file *m, struct aca_bank *bank, enum aca_smu_type type, int idx) { struct aca_bank_info info; int i, ret; @@ -793,7 +814,7 @@ static void aca_dump_entry(struct seq_file *m, struct aca_bank *bank, enum aca_e if (ret) return; - seq_printf(m, "aca entry[%d].type: %s\n", idx, type == ACA_ERROR_TYPE_UE ? "UE" : "CE"); + seq_printf(m, "aca entry[%d].type: %s\n", idx, type == ACA_SMU_TYPE_UE ? "UE" : "CE"); seq_printf(m, "aca entry[%d].info: socketid:%d aid:%d hwid:0x%03x mcatype:0x%04x\n", idx, info.socket_id, info.die_id, info.hwid, info.mcatype); @@ -807,7 +828,7 @@ struct aca_dump_context { }; static int handler_aca_bank_dump(struct aca_handle *handle, struct aca_bank *bank, - enum aca_error_type type, void *data) + enum aca_smu_type type, void *data) { struct aca_dump_context *ctx = (struct aca_dump_context *)data; @@ -816,7 +837,7 @@ static int handler_aca_bank_dump(struct aca_handle *handle, struct aca_bank *ban return handler_aca_log_bank_error(handle, bank, type, NULL); } -static int aca_dump_show(struct seq_file *m, enum aca_error_type type) +static int aca_dump_show(struct seq_file *m, enum aca_smu_type type) { struct amdgpu_device *adev = (struct amdgpu_device *)m->private; struct aca_dump_context context = { @@ -829,7 +850,7 @@ static int aca_dump_show(struct seq_file *m, enum aca_error_type type) static int aca_dump_ce_show(struct seq_file *m, void *unused) { - return aca_dump_show(m, ACA_ERROR_TYPE_CE); + return aca_dump_show(m, ACA_SMU_TYPE_CE); } static int aca_dump_ce_open(struct inode *inode, struct file *file) @@ -847,7 +868,7 @@ static const struct file_operations aca_ce_dump_debug_fops = { static int aca_dump_ue_show(struct seq_file *m, void *unused) { - return aca_dump_show(m, ACA_ERROR_TYPE_UE); + return aca_dump_show(m, ACA_SMU_TYPE_UE); } static int aca_dump_ue_open(struct inode *inode, struct file *file) -- cgit v1.2.3-59-g8ed1b From 949899cbacf54f1611a7c343093069462bbb6625 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 6 Feb 2024 13:24:16 +0800 Subject: drm/amdgpu: add new api to save error count into aca cache add new api to save error count into aca cache. Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 33 +++++++-------------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h | 4 +++- 2 files changed, 10 insertions(+), 27 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 2ad2d874f42f..7cbff74c0f2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -274,25 +274,25 @@ static struct aca_bank_error *get_bank_error(struct aca_error *aerr, struct aca_ return new_bank_error(aerr, info); } -static int aca_log_errors(struct aca_handle *handle, enum aca_error_type type, - struct aca_bank_report *report) +int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info, + enum aca_error_type type, u64 count) { struct aca_error_cache *error_cache = &handle->error_cache; struct aca_bank_error *bank_error; struct aca_error *aerr; - if (!handle || !report) + if (!handle || !info || type >= ACA_ERROR_TYPE_COUNT) return -EINVAL; - if (!report->count[type]) + if (!count) return 0; aerr = &error_cache->errors[type]; - bank_error = get_bank_error(aerr, &report->info); + bank_error = get_bank_error(aerr, info); if (!bank_error) return -ENOMEM; - bank_error->count[type] += report->count[type]; + bank_error->count += count; return 0; } @@ -317,31 +317,12 @@ static int handler_aca_log_bank_error(struct aca_handle *handle, struct aca_bank enum aca_smu_type smu_type, void *data) { struct aca_bank_report report; - enum aca_error_type type; int ret; - switch (smu_type) { - case ACA_SMU_TYPE_UE: - type = ACA_ERROR_TYPE_UE; - break; - case ACA_SMU_TYPE_CE: - type = ACA_ERROR_TYPE_CE; - break; - default: - return -EINVAL; - } - ret = aca_generate_bank_report(handle, bank, smu_type, &report); if (ret) return ret; - if (!report.count[type]) - return 0; - - ret = aca_log_errors(handle, type, &report); - if (ret) - return ret; - return 0; } @@ -440,7 +421,7 @@ static int aca_log_aca_error_data(struct aca_bank_error *bank_error, enum aca_er if (type >= ACA_ERROR_TYPE_COUNT) return -EINVAL; - count = bank_error->count[type]; + count = bank_error->count; if (!count) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h index a48f4abc345c..470efefcde10 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -130,7 +130,7 @@ struct aca_bank_report { struct aca_bank_error { struct list_head node; struct aca_bank_info info; - u64 count[ACA_ERROR_TYPE_COUNT]; + u64 count; }; struct aca_error { @@ -206,4 +206,6 @@ int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *han enum aca_error_type type, void *data); int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en); void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root); +int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info, + enum aca_error_type type, u64 count); #endif -- cgit v1.2.3-59-g8ed1b From e3d4de8d8b24933a9588019bf53891bca520b226 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 22 Feb 2024 14:03:14 +0800 Subject: drm/amdgpu: retire unused aca_bank_report data structure retire unused aca_bank_report data structure. Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 17 +++++++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h | 8 +------- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 7 +++---- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 7 +++---- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 6 +++--- 7 files changed, 24 insertions(+), 35 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 7cbff74c0f2f..a0b7f0d0c513 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -297,29 +297,26 @@ int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_in return 0; } -static int aca_generate_bank_report(struct aca_handle *handle, struct aca_bank *bank, - enum aca_smu_type type, struct aca_bank_report *report) +static int aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type) { const struct aca_bank_ops *bank_ops = handle->bank_ops; - if (!bank || !report) + if (!bank) return -EINVAL; - if (!bank_ops->aca_bank_generate_report) + if (!bank_ops->aca_bank_parser) return -EOPNOTSUPP; - memset(report, 0, sizeof(*report)); - return bank_ops->aca_bank_generate_report(handle, bank, type, - report, handle->data); + return bank_ops->aca_bank_parser(handle, bank, type, + handle->data); } static int handler_aca_log_bank_error(struct aca_handle *handle, struct aca_bank *bank, - enum aca_smu_type smu_type, void *data) + enum aca_smu_type type, void *data) { - struct aca_bank_report report; int ret; - ret = aca_generate_bank_report(handle, bank, smu_type, &report); + ret = aca_bank_parser(handle, bank, type); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h index 470efefcde10..6eb4c0341278 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -122,11 +122,6 @@ struct aca_bank_info { int mcatype; }; -struct aca_bank_report { - struct aca_bank_info info; - u64 count[ACA_ERROR_TYPE_COUNT]; -}; - struct aca_bank_error { struct list_head node; struct aca_bank_info info; @@ -164,8 +159,7 @@ struct aca_handle { }; struct aca_bank_ops { - int (*aca_bank_generate_report)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, - struct aca_bank_report *report, void *data); + int (*aca_bank_parser)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data); bool (*aca_bank_is_valid)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data); }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 3e7f0199926e..523827e8e7a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -1035,8 +1035,8 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev) return 0; } -static int xgmi_v6_4_0_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, - struct aca_bank_report *report, void *data) +static int xgmi_v6_4_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) { struct amdgpu_device *adev = handle->adev; struct aca_bank_info info; @@ -1075,7 +1075,7 @@ static int xgmi_v6_4_0_aca_bank_generate_report(struct aca_handle *handle, struc } static const struct aca_bank_ops xgmi_v6_4_0_aca_bank_ops = { - .aca_bank_generate_report = xgmi_v6_4_0_aca_bank_generate_report, + .aca_bank_parser = xgmi_v6_4_0_aca_bank_parser, }; static const struct aca_info xgmi_v6_4_0_aca_info = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index cc7cccf0b2b2..fc33354f1d3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -680,9 +680,9 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .ih_node_to_logical_xcc = &gfx_v9_4_3_ih_to_xcc_inst, }; -static int gfx_v9_4_3_aca_bank_generate_report(struct aca_handle *handle, - struct aca_bank *bank, enum aca_smu_type type, - struct aca_bank_report *report, void *data) +static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle, + struct aca_bank *bank, enum aca_smu_type type, + void *data) { struct aca_bank_info info; u64 misc0; @@ -736,7 +736,7 @@ static bool gfx_v9_4_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_b } static const struct aca_bank_ops gfx_v9_4_3_aca_bank_ops = { - .aca_bank_generate_report = gfx_v9_4_3_aca_bank_generate_report, + .aca_bank_parser = gfx_v9_4_3_aca_bank_parser, .aca_bank_is_valid = gfx_v9_4_3_aca_bank_is_valid, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index 50113cadaa0d..7a1ff298417a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -721,9 +721,8 @@ static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = { .reset_ras_error_count = mmhub_v1_8_reset_ras_error_count, }; -static int mmhub_v1_8_aca_bank_generate_report(struct aca_handle *handle, - struct aca_bank *bank, enum aca_smu_type type, - struct aca_bank_report *report, void *data) +static int mmhub_v1_8_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) { struct aca_bank_info info; u64 misc0; @@ -780,7 +779,7 @@ static bool mmhub_v1_8_aca_bank_is_valid(struct aca_handle *handle, struct aca_b } static const struct aca_bank_ops mmhub_v1_8_aca_bank_ops = { - .aca_bank_generate_report = mmhub_v1_8_aca_bank_generate_report, + .aca_bank_parser = mmhub_v1_8_aca_bank_parser, .aca_bank_is_valid = mmhub_v1_8_aca_bank_is_valid, }; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 02fc11b98b20..71c2f50530cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -2189,9 +2189,8 @@ static const struct amdgpu_ras_block_hw_ops sdma_v4_4_2_ras_hw_ops = { .reset_ras_error_count = sdma_v4_4_2_reset_ras_error_count, }; -static int sdma_v4_4_2_aca_bank_generate_report(struct aca_handle *handle, - struct aca_bank *bank, enum aca_smu_type type, - struct aca_bank_report *report, void *data) +static int sdma_v4_4_2_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) { struct aca_bank_info info; u64 misc0; @@ -2241,7 +2240,7 @@ static bool sdma_v4_4_2_aca_bank_is_valid(struct aca_handle *handle, struct aca_ } static const struct aca_bank_ops sdma_v4_4_2_aca_bank_ops = { - .aca_bank_generate_report = sdma_v4_4_2_aca_bank_generate_report, + .aca_bank_parser = sdma_v4_4_2_aca_bank_parser, .aca_bank_is_valid = sdma_v4_4_2_aca_bank_is_valid, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index b512a4b38067..e2e18ccbbc62 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -504,8 +504,8 @@ const struct amdgpu_ras_block_hw_ops umc_v12_0_ras_hw_ops = { .query_ras_error_address = umc_v12_0_query_ras_error_address, }; -static int umc_v12_0_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, - struct aca_bank_report *report, void *data) +static int umc_v12_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) { struct amdgpu_device *adev = handle->adev; struct aca_bank_info info; @@ -542,7 +542,7 @@ static int umc_v12_0_aca_bank_generate_report(struct aca_handle *handle, struct } static const struct aca_bank_ops umc_v12_0_aca_bank_ops = { - .aca_bank_generate_report = umc_v12_0_aca_bank_generate_report, + .aca_bank_parser = umc_v12_0_aca_bank_parser, }; const struct aca_info umc_v12_0_aca_info = { -- cgit v1.2.3-59-g8ed1b From 865d3397630b806f2df156e2143cdfa416db1e01 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Wed, 21 Feb 2024 15:07:30 +0800 Subject: drm/amdgpu: add aca deferred error type support add aca deferred error type support Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index a0b7f0d0c513..611c1751577a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -434,6 +434,8 @@ static int aca_log_aca_error_data(struct aca_bank_error *bank_error, enum aca_er amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, count); break; case ACA_ERROR_TYPE_DEFERRED: + amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, NULL, count); + break; default: break; } @@ -474,6 +476,7 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h smu_type = ACA_SMU_TYPE_UE; break; case ACA_ERROR_TYPE_CE: + case ACA_ERROR_TYPE_DEFERRED: smu_type = ACA_SMU_TYPE_CE; break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 3c6d532824f6..2bc02b809246 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1291,8 +1291,8 @@ ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *a if (amdgpu_ras_query_error_status(obj->adev, &info)) return -EINVAL; - return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count, - "ce", info.ce_count); + return sysfs_emit(buf, "%s: %lu\n%s: %lu\n%s: %lu\n", "ue", info.ue_count, + "ce", info.ce_count, "de", info.ue_count); } static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev, @@ -1341,6 +1341,10 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev, ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_CE, err_data); if (ret) return ret; + + ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_DEFERRED, err_data); + if (ret) + return ret; } else { /* FIXME: add code to check return value later */ amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_UE, err_data, qctx); -- cgit v1.2.3-59-g8ed1b From bd15bf742f6d869998d331f01496e8ed54bcf237 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Sun, 3 Mar 2024 19:01:23 +0800 Subject: drm/amdgpu: avoid update aca bank multi times during ras isr Because the UE Valid MCA count will only be cleared after reset, in order to avoid repeated counting of the error count, the aca bank is only updated once during ras isr. Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 27 +++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h | 1 + 2 files changed, 28 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 611c1751577a..53ad76f590a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -369,6 +369,26 @@ static int aca_dispatch_banks(struct aca_handle_manager *mgr, struct aca_banks * return 0; } +static bool aca_bank_should_update(struct amdgpu_device *adev, enum aca_smu_type type) +{ + struct amdgpu_aca *aca = &adev->aca; + bool ret = true; + + /* + * Because the UE Valid MCA count will only be cleared after reset, + * in order to avoid repeated counting of the error count, + * the aca bank is only updated once during the gpu recovery stage. + */ + if (type == ACA_SMU_TYPE_UE) { + if (amdgpu_ras_intr_triggered()) + ret = atomic_cmpxchg(&aca->ue_update_flag, 0, 1) == 0; + else + atomic_set(&aca->ue_update_flag, 0); + } + + return ret; +} + static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type, bank_handler_t handler, void *data) { @@ -380,6 +400,9 @@ static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type, if (list_empty(&aca->mgr.list)) return 0; + if (!aca_bank_should_update(adev, type)) + return 0; + ret = aca_smu_get_valid_aca_count(adev, type, &count); if (ret) return ret; @@ -670,6 +693,8 @@ int amdgpu_aca_init(struct amdgpu_device *adev) struct amdgpu_aca *aca = &adev->aca; int ret; + atomic_set(&aca->ue_update_flag, 0); + ret = aca_manager_init(&aca->mgr); if (ret) return ret; @@ -682,6 +707,8 @@ void amdgpu_aca_fini(struct amdgpu_device *adev) struct amdgpu_aca *aca = &adev->aca; aca_manager_fini(&aca->mgr); + + atomic_set(&aca->ue_update_flag, 0); } int amdgpu_aca_reset(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h index 6eb4c0341278..674a5a9da862 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -175,6 +175,7 @@ struct aca_smu_funcs { struct amdgpu_aca { struct aca_handle_manager mgr; const struct aca_smu_funcs *smu_funcs; + atomic_t ue_update_flag; bool is_enabled; }; -- cgit v1.2.3-59-g8ed1b From 31fd330b97ba334e00ea6102dfb68565eeb7a23f Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Mon, 18 Mar 2024 09:32:36 +0800 Subject: drm/amdgpu: add ras event id support for ACA add ras event id support for ACA. Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 29 ++++++++++++++++------------- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 11 ++++++----- 3 files changed, 23 insertions(+), 19 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 53ad76f590a1..c3242dbf8355 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -116,20 +116,22 @@ static struct aca_regs_dump { {"CONTROL_MASK", ACA_REG_IDX_CTL_MASK}, }; -static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, struct aca_bank *bank) +static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, struct aca_bank *bank, + struct ras_query_context *qctx) { + u64 event_id = qctx ? qctx->event_id : 0ULL; int i; - dev_info(adev->dev, HW_ERR "Accelerator Check Architecture events logged\n"); + RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n"); /* plus 1 for output format, e.g: ACA[08/08]: xxxx */ for (i = 0; i < ARRAY_SIZE(aca_regs); i++) - dev_info(adev->dev, HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n", - idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]); + RAS_EVENT_LOG(adev, event_id, HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n", + idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]); } static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type, int start, int count, - struct aca_banks *banks) + struct aca_banks *banks, struct ras_query_context *qctx) { struct amdgpu_aca *aca = &adev->aca; const struct aca_smu_funcs *smu_funcs = aca->smu_funcs; @@ -165,7 +167,7 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_ bank.type = type; - aca_smu_bank_dump(adev, i, count, &bank); + aca_smu_bank_dump(adev, i, count, &bank, qctx); ret = aca_banks_add_bank(banks, &bank); if (ret) @@ -390,7 +392,7 @@ static bool aca_bank_should_update(struct amdgpu_device *adev, enum aca_smu_type } static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type, - bank_handler_t handler, void *data) + bank_handler_t handler, struct ras_query_context *qctx, void *data) { struct amdgpu_aca *aca = &adev->aca; struct aca_banks banks; @@ -412,7 +414,7 @@ static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type, aca_banks_init(&banks); - ret = aca_smu_get_valid_aca_banks(adev, type, 0, count, &banks); + ret = aca_smu_get_valid_aca_banks(adev, type, 0, count, &banks, qctx); if (ret) goto err_release_banks; @@ -489,7 +491,7 @@ out_unlock: } static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, enum aca_error_type type, - struct ras_err_data *err_data) + struct ras_err_data *err_data, struct ras_query_context *qctx) { enum aca_smu_type smu_type; int ret; @@ -507,7 +509,7 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h } /* udpate aca bank to aca source error_cache first */ - ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, NULL); + ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, qctx, NULL); if (ret) return ret; @@ -523,7 +525,7 @@ static bool aca_handle_is_valid(struct aca_handle *handle) } int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, - enum aca_error_type type, void *data) + enum aca_error_type type, void *data, void *qctx) { struct ras_err_data *err_data = (struct ras_err_data *)data; @@ -536,7 +538,8 @@ int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *han if (!(BIT(type) & handle->mask)) return 0; - return __aca_get_error_data(adev, handle, type, err_data); + return __aca_get_error_data(adev, handle, type, err_data, + (struct ras_query_context *)qctx); } static void aca_error_init(struct aca_error *aerr, enum aca_error_type type) @@ -853,7 +856,7 @@ static int aca_dump_show(struct seq_file *m, enum aca_smu_type type) .idx = 0, }; - return aca_banks_update(adev, type, handler_aca_bank_dump, (void *)&context); + return aca_banks_update(adev, type, handler_aca_bank_dump, NULL, (void *)&context); } static int aca_dump_ce_show(struct seq_file *m, void *unused) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h index 674a5a9da862..247968d6a925 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -198,7 +198,7 @@ int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle, const char *name, const struct aca_info *aca_info, void *data); void amdgpu_aca_remove_handle(struct aca_handle *handle); int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, - enum aca_error_type type, void *data); + enum aca_error_type type, void *data, void *qctx); int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en); void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root); int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 2bc02b809246..b8c7d0bf8fb1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1269,7 +1269,8 @@ int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk) } static int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu_ras_block blk, - enum aca_error_type type, struct ras_err_data *err_data) + enum aca_error_type type, struct ras_err_data *err_data, + struct ras_query_context *qctx) { struct ras_manager *obj; @@ -1277,7 +1278,7 @@ static int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu if (!obj) return -EINVAL; - return amdgpu_aca_get_error_data(adev, &obj->aca_handle, type, err_data); + return amdgpu_aca_get_error_data(adev, &obj->aca_handle, type, err_data, qctx); } ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr, @@ -1334,15 +1335,15 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev, } } else { if (amdgpu_aca_is_enabled(adev)) { - ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_UE, err_data); + ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_UE, err_data, qctx); if (ret) return ret; - ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_CE, err_data); + ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_CE, err_data, qctx); if (ret) return ret; - ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_DEFERRED, err_data); + ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_DEFERRED, err_data, qctx); if (ret) return ret; } else { -- cgit v1.2.3-59-g8ed1b From 81d96e8b5a85e3ebb85342525aab02e89bec72dc Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 28 Mar 2024 13:46:14 +0800 Subject: drm/amdgpu: refine function signature of amdgpu_aca_get_error_data() refine function signature of amdgpu_aca_get_error_data(); Signed-off-by: Yang Wang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 8 +++----- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h | 6 +++++- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index c3242dbf8355..c02634a124dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -525,10 +525,9 @@ static bool aca_handle_is_valid(struct aca_handle *handle) } int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, - enum aca_error_type type, void *data, void *qctx) + enum aca_error_type type, struct ras_err_data *err_data, + struct ras_query_context *qctx) { - struct ras_err_data *err_data = (struct ras_err_data *)data; - if (!handle || !err_data) return -EINVAL; @@ -538,8 +537,7 @@ int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *han if (!(BIT(type) & handle->mask)) return 0; - return __aca_get_error_data(adev, handle, type, err_data, - (struct ras_query_context *)qctx); + return __aca_get_error_data(adev, handle, type, err_data, qctx); } static void aca_error_init(struct aca_error *aerr, enum aca_error_type type) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h index 247968d6a925..3765843ea648 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -26,6 +26,9 @@ #include +struct ras_err_data; +struct ras_query_context; + #define ACA_MAX_REGS_COUNT (16) #define ACA_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> l) @@ -198,7 +201,8 @@ int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle, const char *name, const struct aca_info *aca_info, void *data); void amdgpu_aca_remove_handle(struct aca_handle *handle); int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, - enum aca_error_type type, void *data, void *qctx); + enum aca_error_type type, struct ras_err_data *err_data, + struct ras_query_context *qctx); int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en); void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root); int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info, -- cgit v1.2.3-59-g8ed1b From f23558627f2bb28df3b7f3d1d53b1e7f4bd1e250 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Fri, 12 Apr 2024 09:53:15 +0800 Subject: drm/amdgpu: add new aca smu callback func parse_error_code() add new aca smu callback parse_error_code{} to avoid specific asic check in amdgpu_aca.c file Signed-off-by: Yang Wang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 23 ++++++++-------------- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h | 1 + .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 13 ++++++++++++ 3 files changed, 22 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index c02634a124dd..c50202215f6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -753,23 +753,13 @@ int aca_bank_info_decode(struct aca_bank *bank, struct aca_bank_info *info) static int aca_bank_get_error_code(struct amdgpu_device *adev, struct aca_bank *bank) { - int error_code; - - switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { - case IP_VERSION(13, 0, 6): - if (!(adev->flags & AMD_IS_APU) && adev->pm.fw_version >= 0x00555600) { - error_code = ACA_REG__SYND__ERRORINFORMATION(bank->regs[ACA_REG_IDX_SYND]); - return error_code & 0xff; - } - break; - default: - break; - } + struct amdgpu_aca *aca = &adev->aca; + const struct aca_smu_funcs *smu_funcs = aca->smu_funcs; - /* NOTE: the true error code is encoded in status.errorcode[0:7] */ - error_code = ACA_REG__STATUS__ERRORCODE(bank->regs[ACA_REG_IDX_STATUS]); + if (!smu_funcs || !smu_funcs->parse_error_code) + return -EOPNOTSUPP; - return error_code & 0xff; + return smu_funcs->parse_error_code(adev, bank); } int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank, int *err_codes, int size) @@ -780,6 +770,9 @@ int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank return -EINVAL; error_code = aca_bank_get_error_code(adev, bank); + if (error_code < 0) + return error_code; + for (i = 0; i < size; i++) { if (err_codes[i] == error_code) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h index 3765843ea648..5ef6b745f222 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -173,6 +173,7 @@ struct aca_smu_funcs { int (*set_debug_mode)(struct amdgpu_device *adev, bool enable); int (*get_valid_aca_count)(struct amdgpu_device *adev, enum aca_smu_type type, u32 *count); int (*get_valid_aca_bank)(struct amdgpu_device *adev, enum aca_smu_type type, int idx, struct aca_bank *bank); + int (*parse_error_code)(struct amdgpu_device *adev, struct aca_bank *bank); }; struct amdgpu_aca { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 16179e170874..bc1f3fe4d82a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -3118,12 +3118,25 @@ static int aca_smu_get_valid_aca_bank(struct amdgpu_device *adev, return 0; } +static int aca_smu_parse_error_code(struct amdgpu_device *adev, struct aca_bank *bank) +{ + int error_code; + + if (!(adev->flags & AMD_IS_APU) && adev->pm.fw_version >= 0x00555600) + error_code = ACA_REG__SYND__ERRORINFORMATION(bank->regs[ACA_REG_IDX_SYND]); + else + error_code = ACA_REG__STATUS__ERRORCODE(bank->regs[ACA_REG_IDX_STATUS]); + + return error_code & 0xff; +} + static const struct aca_smu_funcs smu_v13_0_6_aca_smu_funcs = { .max_ue_bank_count = 12, .max_ce_bank_count = 12, .set_debug_mode = aca_smu_set_debug_mode, .get_valid_aca_count = aca_smu_get_valid_aca_count, .get_valid_aca_bank = aca_smu_get_valid_aca_bank, + .parse_error_code = aca_smu_parse_error_code, }; static int smu_v13_0_6_select_xgmi_plpd_policy(struct smu_context *smu, -- cgit v1.2.3-59-g8ed1b