aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/edac/i10nm_base.c
diff options
context:
space:
mode:
authorYouquan Song <youquan.song@intel.com>2022-09-01 12:43:10 -0700
committerTony Luck <tony.luck@intel.com>2022-09-08 11:40:01 -0700
commit2738c69a8813453b35549465867ae591f8598eb0 (patch)
tree572fe6bb7040d4a553048e794b3c55ec98c16597 /drivers/edac/i10nm_base.c
parentEDAC/skx_common: Make output format similar (diff)
downloadlinux-dev-2738c69a8813453b35549465867ae591f8598eb0.tar.xz
linux-dev-2738c69a8813453b35549465867ae591f8598eb0.zip
EDAC/i10nm: Add driver decoder for Ice Lake and Tremont CPUs
Current i10nm_edac only supports firmware decoder (ACPI DSM methods). MCA bank registers of Ice Lake or Tremont CPUs contain the information to decode DDR memory errors. To get better decoding performance, add the driver decoder (decoding DDR memory errors via extracting error information from MCA bank registers) for Ice Lake and Tremont CPUs. Co-developed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com> Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com> Signed-off-by: Youquan Song <youquan.song@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com> Link: https://lore.kernel.org/all/20220901194310.115427-1-tony.luck@intel.com/
Diffstat (limited to 'drivers/edac/i10nm_base.c')
-rw-r--r--drivers/edac/i10nm_base.c134
1 files changed, 132 insertions, 2 deletions
diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c
index 6cf50ee0b77c..817f618fcff0 100644
--- a/drivers/edac/i10nm_base.c
+++ b/drivers/edac/i10nm_base.c
@@ -74,6 +74,8 @@ static struct list_head *i10nm_edac_list;
static struct res_config *res_cfg;
static int retry_rd_err_log;
+static int decoding_via_mca;
+static bool mem_cfg_2lm;
static u32 offsets_scrub_icx[] = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8};
static u32 offsets_scrub_spr[] = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8};
@@ -231,6 +233,103 @@ static bool i10nm_check_2lm(struct res_config *cfg)
return false;
}
+/*
+ * Check whether the error comes from DDRT by ICX/Tremont model specific error code.
+ * Refer to SDM vol3B 16.11.3 Intel IMC MC error codes for IA32_MCi_STATUS.
+ */
+static bool i10nm_mscod_is_ddrt(u32 mscod)
+{
+ switch (mscod) {
+ case 0x0106: case 0x0107:
+ case 0x0800: case 0x0804:
+ case 0x0806 ... 0x0808:
+ case 0x080a ... 0x080e:
+ case 0x0810: case 0x0811:
+ case 0x0816: case 0x081e:
+ case 0x081f:
+ return true;
+ }
+
+ return false;
+}
+
+static bool i10nm_mc_decode_available(struct mce *mce)
+{
+ u8 bank;
+
+ if (!decoding_via_mca || mem_cfg_2lm)
+ return false;
+
+ if ((mce->status & (MCI_STATUS_MISCV | MCI_STATUS_ADDRV))
+ != (MCI_STATUS_MISCV | MCI_STATUS_ADDRV))
+ return false;
+
+ bank = mce->bank;
+
+ switch (res_cfg->type) {
+ case I10NM:
+ if (bank < 13 || bank > 26)
+ return false;
+
+ /* DDRT errors can't be decoded from MCA bank registers */
+ if (MCI_MISC_ECC_MODE(mce->misc) == MCI_MISC_ECC_DDRT)
+ return false;
+
+ if (i10nm_mscod_is_ddrt(MCI_STATUS_MSCOD(mce->status)))
+ return false;
+
+ /* Check whether one of {13,14,17,18,21,22,25,26} */
+ return ((bank - 13) & BIT(1)) == 0;
+ default:
+ return false;
+ }
+}
+
+static bool i10nm_mc_decode(struct decoded_addr *res)
+{
+ struct mce *m = res->mce;
+ struct skx_dev *d;
+ u8 bank;
+
+ if (!i10nm_mc_decode_available(m))
+ return false;
+
+ list_for_each_entry(d, i10nm_edac_list, list) {
+ if (d->imc[0].src_id == m->socketid) {
+ res->socket = m->socketid;
+ res->dev = d;
+ break;
+ }
+ }
+
+ switch (res_cfg->type) {
+ case I10NM:
+ bank = m->bank - 13;
+ res->imc = bank / 4;
+ res->channel = bank % 2;
+ break;
+ default:
+ return false;
+ }
+
+ if (!res->dev) {
+ skx_printk(KERN_ERR, "No device for src_id %d imc %d\n",
+ m->socketid, res->imc);
+ return false;
+ }
+
+ res->column = GET_BITFIELD(m->misc, 9, 18) << 2;
+ res->row = GET_BITFIELD(m->misc, 19, 39);
+ res->bank_group = GET_BITFIELD(m->misc, 40, 41);
+ res->bank_address = GET_BITFIELD(m->misc, 42, 43);
+ res->bank_group |= GET_BITFIELD(m->misc, 44, 44) << 2;
+ res->rank = GET_BITFIELD(m->misc, 56, 58);
+ res->dimm = res->rank >> 2;
+ res->rank = res->rank % 4;
+
+ return true;
+}
+
static int i10nm_get_ddr_munits(void)
{
struct pci_dev *mdev;
@@ -574,7 +673,8 @@ static int __init i10nm_init(void)
return -ENODEV;
}
- skx_set_mem_cfg(i10nm_check_2lm(cfg));
+ mem_cfg_2lm = i10nm_check_2lm(cfg);
+ skx_set_mem_cfg(mem_cfg_2lm);
rc = i10nm_get_ddr_munits();
@@ -626,9 +726,11 @@ static int __init i10nm_init(void)
setup_i10nm_debug();
if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) {
- skx_set_decode(NULL, show_retry_rd_err_log);
+ skx_set_decode(i10nm_mc_decode, show_retry_rd_err_log);
if (retry_rd_err_log == 2)
enable_retry_rd_err_log(true);
+ } else {
+ skx_set_decode(i10nm_mc_decode, NULL);
}
i10nm_printk(KERN_INFO, "%s\n", I10NM_REVISION);
@@ -658,6 +760,34 @@ static void __exit i10nm_exit(void)
module_init(i10nm_init);
module_exit(i10nm_exit);
+static int set_decoding_via_mca(const char *buf, const struct kernel_param *kp)
+{
+ unsigned long val;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &val);
+
+ if (ret || val > 1)
+ return -EINVAL;
+
+ if (val && mem_cfg_2lm) {
+ i10nm_printk(KERN_NOTICE, "Decoding errors via MCA banks for 2LM isn't supported yet\n");
+ return -EIO;
+ }
+
+ ret = param_set_int(buf, kp);
+
+ return ret;
+}
+
+static const struct kernel_param_ops decoding_via_mca_param_ops = {
+ .set = set_decoding_via_mca,
+ .get = param_get_int,
+};
+
+module_param_cb(decoding_via_mca, &decoding_via_mca_param_ops, &decoding_via_mca, 0644);
+MODULE_PARM_DESC(decoding_via_mca, "decoding_via_mca: 0=off(default), 1=enable");
+
module_param(retry_rd_err_log, int, 0444);
MODULE_PARM_DESC(retry_rd_err_log, "retry_rd_err_log: 0=off(default), 1=bios(Linux doesn't reset any control bits, but just reports values.), 2=linux(Linux tries to take control and resets mode bits, clear valid/UC bits after reading.)");