aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/edac
diff options
context:
space:
mode:
authorYazen Ghannam <yazen.ghannam@amd.com>2021-12-16 16:29:04 +0000
committerBorislav Petkov <bp@suse.de>2021-12-22 17:19:18 +0100
commit5176a93ab27aef1b9f4496fc68e6c303a011d7cc (patch)
tree0cc2456dac9ca9525d1a1d1343e0a08eae9e6db8 /drivers/edac
parentx86/mce: Check regs before accessing it (diff)
downloadlinux-dev-5176a93ab27aef1b9f4496fc68e6c303a011d7cc.tar.xz
linux-dev-5176a93ab27aef1b9f4496fc68e6c303a011d7cc.zip
x86/MCE/AMD, EDAC/mce_amd: Add new SMCA bank types
Add HWID and McaType values for new SMCA bank types, and add their error descriptions to edac_mce_amd. The "PHY" bank types all have the same error descriptions, and the NBIF and SHUB bank types have the same error descriptions. So reuse the same arrays where appropriate. [ bp: Remove useless comments over hwid types. ] Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com> Signed-off-by: Borislav Petkov <bp@suse.de> Link: https://lore.kernel.org/r/20211216162905.4132657-2-yazen.ghannam@amd.com
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/mce_amd.c135
1 files changed, 128 insertions, 7 deletions
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 67dbf4c31271..cfd3f7ae9251 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -399,6 +399,63 @@ static const char * const smca_mp5_mce_desc[] = {
"Instruction Tag Cache Bank B ECC or parity error",
};
+static const char * const smca_mpdma_mce_desc[] = {
+ "Main SRAM [31:0] bank ECC or parity error",
+ "Main SRAM [63:32] bank ECC or parity error",
+ "Main SRAM [95:64] bank ECC or parity error",
+ "Main SRAM [127:96] bank ECC or parity error",
+ "Data Cache Bank A ECC or parity error",
+ "Data Cache Bank B ECC or parity error",
+ "Data Tag Cache Bank A ECC or parity error",
+ "Data Tag Cache Bank B ECC or parity error",
+ "Instruction Cache Bank A ECC or parity error",
+ "Instruction Cache Bank B ECC or parity error",
+ "Instruction Tag Cache Bank A ECC or parity error",
+ "Instruction Tag Cache Bank B ECC or parity error",
+ "Data Cache Bank A ECC or parity error",
+ "Data Cache Bank B ECC or parity error",
+ "Data Tag Cache Bank A ECC or parity error",
+ "Data Tag Cache Bank B ECC or parity error",
+ "Instruction Cache Bank A ECC or parity error",
+ "Instruction Cache Bank B ECC or parity error",
+ "Instruction Tag Cache Bank A ECC or parity error",
+ "Instruction Tag Cache Bank B ECC or parity error",
+ "Data Cache Bank A ECC or parity error",
+ "Data Cache Bank B ECC or parity error",
+ "Data Tag Cache Bank A ECC or parity error",
+ "Data Tag Cache Bank B ECC or parity error",
+ "Instruction Cache Bank A ECC or parity error",
+ "Instruction Cache Bank B ECC or parity error",
+ "Instruction Tag Cache Bank A ECC or parity error",
+ "Instruction Tag Cache Bank B ECC or parity error",
+ "System Hub Read Buffer ECC or parity error",
+ "MPDMA TVF DVSEC Memory ECC or parity error",
+ "MPDMA TVF MMIO Mailbox0 ECC or parity error",
+ "MPDMA TVF MMIO Mailbox1 ECC or parity error",
+ "MPDMA TVF Doorbell Memory ECC or parity error",
+ "MPDMA TVF SDP Slave Memory 0 ECC or parity error",
+ "MPDMA TVF SDP Slave Memory 1 ECC or parity error",
+ "MPDMA TVF SDP Slave Memory 2 ECC or parity error",
+ "MPDMA TVF SDP Master Memory 0 ECC or parity error",
+ "MPDMA TVF SDP Master Memory 1 ECC or parity error",
+ "MPDMA TVF SDP Master Memory 2 ECC or parity error",
+ "MPDMA TVF SDP Master Memory 3 ECC or parity error",
+ "MPDMA TVF SDP Master Memory 4 ECC or parity error",
+ "MPDMA TVF SDP Master Memory 5 ECC or parity error",
+ "MPDMA TVF SDP Master Memory 6 ECC or parity error",
+ "MPDMA PTE Command FIFO ECC or parity error",
+ "MPDMA PTE Hub Data FIFO ECC or parity error",
+ "MPDMA PTE Internal Data FIFO ECC or parity error",
+ "MPDMA PTE Command Memory DMA ECC or parity error",
+ "MPDMA PTE Command Memory Internal ECC or parity error",
+ "MPDMA PTE DMA Completion FIFO ECC or parity error",
+ "MPDMA PTE Tablewalk Completion FIFO ECC or parity error",
+ "MPDMA PTE Descriptor Completion FIFO ECC or parity error",
+ "MPDMA PTE ReadOnly Completion FIFO ECC or parity error",
+ "MPDMA PTE DirectWrite Completion FIFO ECC or parity error",
+ "SDP Watchdog Timer expired",
+};
+
static const char * const smca_nbio_mce_desc[] = {
"ECC or Parity error",
"PCIE error",
@@ -448,7 +505,7 @@ static const char * const smca_xgmipcs_mce_desc[] = {
"Rx Replay Timeout Error",
"LinkSub Tx Timeout Error",
"LinkSub Rx Timeout Error",
- "Rx CMD Pocket Error",
+ "Rx CMD Packet Error",
};
static const char * const smca_xgmiphy_mce_desc[] = {
@@ -458,11 +515,66 @@ static const char * const smca_xgmiphy_mce_desc[] = {
"PHY APB error",
};
-static const char * const smca_waflphy_mce_desc[] = {
- "RAM ECC Error",
- "ARC instruction buffer parity error",
- "ARC data buffer parity error",
- "PHY APB error",
+static const char * const smca_nbif_mce_desc[] = {
+ "Timeout error from GMI",
+ "SRAM ECC error",
+ "NTB Error Event",
+ "SDP Parity error",
+};
+
+static const char * const smca_sata_mce_desc[] = {
+ "Parity error for port 0",
+ "Parity error for port 1",
+ "Parity error for port 2",
+ "Parity error for port 3",
+ "Parity error for port 4",
+ "Parity error for port 5",
+ "Parity error for port 6",
+ "Parity error for port 7",
+};
+
+static const char * const smca_usb_mce_desc[] = {
+ "Parity error or ECC error for S0 RAM0",
+ "Parity error or ECC error for S0 RAM1",
+ "Parity error or ECC error for S0 RAM2",
+ "Parity error for PHY RAM0",
+ "Parity error for PHY RAM1",
+ "AXI Slave Response error",
+};
+
+static const char * const smca_gmipcs_mce_desc[] = {
+ "Data Loss Error",
+ "Training Error",
+ "Replay Parity Error",
+ "Rx Fifo Underflow Error",
+ "Rx Fifo Overflow Error",
+ "CRC Error",
+ "BER Exceeded Error",
+ "Tx Fifo Underflow Error",
+ "Replay Buffer Parity Error",
+ "Tx Overflow Error",
+ "Replay Fifo Overflow Error",
+ "Replay Fifo Underflow Error",
+ "Elastic Fifo Overflow Error",
+ "Deskew Error",
+ "Offline Error",
+ "Data Startup Limit Error",
+ "FC Init Timeout Error",
+ "Recovery Timeout Error",
+ "Ready Serial Timeout Error",
+ "Ready Serial Attempt Error",
+ "Recovery Attempt Error",
+ "Recovery Relock Attempt Error",
+ "Deskew Abort Error",
+ "Rx Buffer Error",
+ "Rx LFDS Fifo Overflow Error",
+ "Rx LFDS Fifo Underflow Error",
+ "LinkSub Tx Timeout Error",
+ "LinkSub Rx Timeout Error",
+ "Rx CMD Packet Error",
+ "LFDS Training Timeout Error",
+ "LFDS FC Init Timeout Error",
+ "Data Loss Error",
};
struct smca_mce_desc {
@@ -490,12 +602,21 @@ static struct smca_mce_desc smca_mce_descs[] = {
[SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) },
[SMCA_SMU_V2] = { smca_smu2_mce_desc, ARRAY_SIZE(smca_smu2_mce_desc) },
[SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) },
+ [SMCA_MPDMA] = { smca_mpdma_mce_desc, ARRAY_SIZE(smca_mpdma_mce_desc) },
[SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc) },
[SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc) },
[SMCA_PCIE_V2] = { smca_pcie2_mce_desc, ARRAY_SIZE(smca_pcie2_mce_desc) },
[SMCA_XGMI_PCS] = { smca_xgmipcs_mce_desc, ARRAY_SIZE(smca_xgmipcs_mce_desc) },
+ /* NBIF and SHUB have the same error descriptions, for now. */
+ [SMCA_NBIF] = { smca_nbif_mce_desc, ARRAY_SIZE(smca_nbif_mce_desc) },
+ [SMCA_SHUB] = { smca_nbif_mce_desc, ARRAY_SIZE(smca_nbif_mce_desc) },
+ [SMCA_SATA] = { smca_sata_mce_desc, ARRAY_SIZE(smca_sata_mce_desc) },
+ [SMCA_USB] = { smca_usb_mce_desc, ARRAY_SIZE(smca_usb_mce_desc) },
+ [SMCA_GMI_PCS] = { smca_gmipcs_mce_desc, ARRAY_SIZE(smca_gmipcs_mce_desc) },
+ /* All the PHY bank types have the same error descriptions, for now. */
[SMCA_XGMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) },
- [SMCA_WAFL_PHY] = { smca_waflphy_mce_desc, ARRAY_SIZE(smca_waflphy_mce_desc) },
+ [SMCA_WAFL_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) },
+ [SMCA_GMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) },
};
static bool f12h_mc0_mce(u16 ec, u8 xec)