diff options
Diffstat (limited to 'drivers/edac')
32 files changed, 872 insertions, 620 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 58ab63642e72..456602d373b7 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -55,6 +55,7 @@ config EDAC_DECODE_MCE config EDAC_GHES bool "Output ACPI APEI/GHES BIOS detected errors via EDAC" depends on ACPI_APEI_GHES && (EDAC=y) + select UEFI_CPER help Not all machines support hardware-driven error report. Some of those provide a BIOS-driven error report mechanism via ACPI, using the @@ -262,6 +263,7 @@ config EDAC_I10NM config EDAC_PND2 tristate "Intel Pondicherry2" depends on PCI && X86_64 && X86_MCE_INTEL + select P2SB if X86 help Support for error detection and correction on the Intel Pondicherry2 Integrated Memory Controller. This SoC IP is @@ -471,7 +473,7 @@ config EDAC_ALTERA_SDMMC config EDAC_SIFIVE bool "Sifive platform EDAC driver" - depends on EDAC=y && SIFIVE_L2 + depends on EDAC=y && SIFIVE_CCACHE help Support for error detection and correction on the SiFive SoCs. @@ -484,7 +486,7 @@ config EDAC_ARMADA_XP config EDAC_SYNOPSYS tristate "Synopsys DDR Memory Controller" - depends on ARCH_ZYNQ || ARCH_ZYNQMP || ARCH_INTEL_SOCFPGA + depends on ARCH_ZYNQ || ARCH_ZYNQMP || ARCH_INTEL_SOCFPGA || ARCH_MXC help Support for error detection and correction on the Synopsys DDR memory controller. diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 3a6d2416cb0f..e7e8e624a436 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -350,7 +350,7 @@ static int altr_sdram_probe(struct platform_device *pdev) if (irq < 0) { edac_printk(KERN_ERR, EDAC_MC, "No irq %d in DT\n", irq); - return -ENODEV; + return irq; } /* Arria10 has a 2nd IRQ */ @@ -1083,8 +1083,46 @@ static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat) #ifdef CONFIG_EDAC_ALTERA_SDRAM +/* + * A legacy U-Boot bug only enabled memory mapped access to the ECC Enable + * register if ECC is enabled. Linux checks the ECC Enable register to + * determine ECC status. + * Use an SMC call (which always works) to determine ECC enablement. + */ +static int altr_s10_sdram_check_ecc_deps(struct altr_edac_device_dev *device) +{ + const struct edac_device_prv_data *prv = device->data; + unsigned long sdram_ecc_addr; + struct arm_smccc_res result; + struct device_node *np; + phys_addr_t sdram_addr; + u32 read_reg; + int ret; + + np = of_find_compatible_node(NULL, NULL, "altr,sdr-ctl"); + if (!np) + goto sdram_err; + + sdram_addr = of_translate_address(np, of_get_address(np, 0, + NULL, NULL)); + of_node_put(np); + sdram_ecc_addr = (unsigned long)sdram_addr + prv->ecc_en_ofst; + arm_smccc_smc(INTEL_SIP_SMC_REG_READ, sdram_ecc_addr, + 0, 0, 0, 0, 0, 0, &result); + read_reg = (unsigned int)result.a1; + ret = (int)result.a0; + if (!ret && (read_reg & prv->ecc_enable_mask)) + return 0; + +sdram_err: + edac_printk(KERN_ERR, EDAC_DEVICE, + "%s: No ECC present or ECC disabled.\n", + device->edac_dev_name); + return -ENODEV; +} + static const struct edac_device_prv_data s10_sdramecc_data = { - .setup = altr_check_ecc_deps, + .setup = altr_s10_sdram_check_ecc_deps, .ce_clear_mask = ALTR_S10_ECC_SERRPENA, .ue_clear_mask = ALTR_S10_ECC_DERRPENA, .ecc_enable_mask = ALTR_S10_ECC_EN, diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index fba609ada0e6..2f854feeeb23 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -15,6 +15,21 @@ static struct msr __percpu *msrs; static struct amd64_family_type *fam_type; +static inline u32 get_umc_reg(u32 reg) +{ + if (!fam_type->flags.zn_regs_v2) + return reg; + + switch (reg) { + case UMCCH_ADDR_CFG: return UMCCH_ADDR_CFG_DDR5; + case UMCCH_ADDR_MASK_SEC: return UMCCH_ADDR_MASK_SEC_DDR5; + case UMCCH_DIMM_CFG: return UMCCH_DIMM_CFG_DDR5; + } + + WARN_ONCE(1, "%s: unknown register 0x%x", __func__, reg); + return 0; +} + /* Per-node stuff */ static struct ecc_settings **ecc_stngs; @@ -1429,8 +1444,10 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt) edac_dbg(1, "UMC%d x16 DIMMs present: %s\n", i, (umc->dimm_cfg & BIT(7)) ? "yes" : "no"); - if (pvt->dram_type == MEM_LRDDR4) { - amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_ADDR_CFG, &tmp); + if (umc->dram_type == MEM_LRDDR4 || umc->dram_type == MEM_LRDDR5) { + amd_smn_read(pvt->mc_node_id, + umc_base + get_umc_reg(UMCCH_ADDR_CFG), + &tmp); edac_dbg(1, "UMC%d LRDIMM %dx rank multiply\n", i, 1 << ((tmp >> 4) & 0x3)); } @@ -1505,7 +1522,7 @@ static void prep_chip_selects(struct amd64_pvt *pvt) for_each_umc(umc) { pvt->csels[umc].b_cnt = 4; - pvt->csels[umc].m_cnt = 2; + pvt->csels[umc].m_cnt = fam_type->flags.zn_regs_v2 ? 4 : 2; } } else { @@ -1545,7 +1562,7 @@ static void read_umc_base_mask(struct amd64_pvt *pvt) } umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK; - umc_mask_reg_sec = get_umc_base(umc) + UMCCH_ADDR_MASK_SEC; + umc_mask_reg_sec = get_umc_base(umc) + get_umc_reg(UMCCH_ADDR_MASK_SEC); for_each_chip_select_mask(cs, umc, pvt) { mask = &pvt->csels[umc].csmasks[cs]; @@ -1616,19 +1633,49 @@ static void read_dct_base_mask(struct amd64_pvt *pvt) } } +static void determine_memory_type_df(struct amd64_pvt *pvt) +{ + struct amd64_umc *umc; + u32 i; + + for_each_umc(i) { + umc = &pvt->umc[i]; + + if (!(umc->sdp_ctrl & UMC_SDP_INIT)) { + umc->dram_type = MEM_EMPTY; + continue; + } + + /* + * Check if the system supports the "DDR Type" field in UMC Config + * and has DDR5 DIMMs in use. + */ + if (fam_type->flags.zn_regs_v2 && ((umc->umc_cfg & GENMASK(2, 0)) == 0x1)) { + if (umc->dimm_cfg & BIT(5)) + umc->dram_type = MEM_LRDDR5; + else if (umc->dimm_cfg & BIT(4)) + umc->dram_type = MEM_RDDR5; + else + umc->dram_type = MEM_DDR5; + } else { + if (umc->dimm_cfg & BIT(5)) + umc->dram_type = MEM_LRDDR4; + else if (umc->dimm_cfg & BIT(4)) + umc->dram_type = MEM_RDDR4; + else + umc->dram_type = MEM_DDR4; + } + + edac_dbg(1, " UMC%d DIMM type: %s\n", i, edac_mem_types[umc->dram_type]); + } +} + static void determine_memory_type(struct amd64_pvt *pvt) { u32 dram_ctrl, dcsm; - if (pvt->umc) { - if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5)) - pvt->dram_type = MEM_LRDDR4; - else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4)) - pvt->dram_type = MEM_RDDR4; - else - pvt->dram_type = MEM_DDR4; - return; - } + if (pvt->umc) + return determine_memory_type_df(pvt); switch (pvt->fam) { case 0xf: @@ -2149,6 +2196,7 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, { u32 addr_mask_orig, addr_mask_deinterleaved; u32 msb, weight, num_zero_bits; + int cs_mask_nr = csrow_nr; int dimm, size = 0; /* No Chip Selects are enabled. */ @@ -2164,17 +2212,33 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, return size; /* - * There is one mask per DIMM, and two Chip Selects per DIMM. - * CS0 and CS1 -> DIMM0 - * CS2 and CS3 -> DIMM1 + * Family 17h introduced systems with one mask per DIMM, + * and two Chip Selects per DIMM. + * + * CS0 and CS1 -> MASK0 / DIMM0 + * CS2 and CS3 -> MASK1 / DIMM1 + * + * Family 19h Model 10h introduced systems with one mask per Chip Select, + * and two Chip Selects per DIMM. + * + * CS0 -> MASK0 -> DIMM0 + * CS1 -> MASK1 -> DIMM0 + * CS2 -> MASK2 -> DIMM1 + * CS3 -> MASK3 -> DIMM1 + * + * Keep the mask number equal to the Chip Select number for newer systems, + * and shift the mask number for older systems. */ dimm = csrow_nr >> 1; + if (!fam_type->flags.zn_regs_v2) + cs_mask_nr >>= 1; + /* Asymmetric dual-rank DIMM support. */ if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY)) - addr_mask_orig = pvt->csels[umc].csmasks_sec[dimm]; + addr_mask_orig = pvt->csels[umc].csmasks_sec[cs_mask_nr]; else - addr_mask_orig = pvt->csels[umc].csmasks[dimm]; + addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr]; /* * The number of zero bits in the mask is equal to the number of bits @@ -2930,6 +2994,7 @@ static struct amd64_family_type family_types[] = { .f0_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F0, .f6_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F6, .max_mcs = 12, + .flags.zn_regs_v2 = 1, .ops = { .early_channel_count = f17_early_channel_count, .dbam_to_cs = f17_addr_mask_to_cs_size, @@ -3368,7 +3433,7 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt) umc_base = get_umc_base(i); umc = &pvt->umc[i]; - amd_smn_read(nid, umc_base + UMCCH_DIMM_CFG, &umc->dimm_cfg); + amd_smn_read(nid, umc_base + get_umc_reg(UMCCH_DIMM_CFG), &umc->dimm_cfg); amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &umc->umc_cfg); amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &umc->sdp_ctrl); amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &umc->ecc_ctrl); @@ -3452,7 +3517,9 @@ skip: read_dct_base_mask(pvt); determine_memory_type(pvt); - edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]); + + if (!pvt->umc) + edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]); determine_ecc_sym_sz(pvt); } @@ -3548,7 +3615,7 @@ static int init_csrows_df(struct mem_ctl_info *mci) pvt->mc_node_id, cs); dimm->nr_pages = get_csrow_nr_pages(pvt, umc, cs); - dimm->mtype = pvt->dram_type; + dimm->mtype = pvt->umc[umc].dram_type; dimm->edac_mode = edac_mode; dimm->dtype = dev_type; dimm->grain = 64; @@ -4269,7 +4336,7 @@ static int __init amd64_edac_init(void) if (!x86_match_cpu(amd64_cpuids)) return -ENODEV; - if (amd_cache_northbridges() < 0) + if (!amd_nb_num()) return -ENODEV; opstate_init(); diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 352bda9803f6..38e5ad95d010 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -273,8 +273,11 @@ #define UMCCH_BASE_ADDR_SEC 0x10 #define UMCCH_ADDR_MASK 0x20 #define UMCCH_ADDR_MASK_SEC 0x28 +#define UMCCH_ADDR_MASK_SEC_DDR5 0x30 #define UMCCH_ADDR_CFG 0x30 +#define UMCCH_ADDR_CFG_DDR5 0x40 #define UMCCH_DIMM_CFG 0x80 +#define UMCCH_DIMM_CFG_DDR5 0x90 #define UMCCH_UMC_CFG 0x100 #define UMCCH_SDP_CTRL 0x104 #define UMCCH_ECC_CTRL 0x14C @@ -344,6 +347,9 @@ struct amd64_umc { u32 sdp_ctrl; /* SDP Control reg */ u32 ecc_ctrl; /* DRAM ECC Control reg */ u32 umc_cap_hi; /* Capabilities High reg */ + + /* cache the dram_type */ + enum mem_type dram_type; }; struct amd64_pvt { @@ -391,7 +397,12 @@ struct amd64_pvt { /* place to store error injection parameters prior to issue */ struct error_injection injection; - /* cache the dram_type */ + /* + * cache the dram_type + * + * NOTE: Don't use this for Family 17h and later. + * Use dram_type in struct amd64_umc instead. + */ enum mem_type dram_type; struct amd64_umc *umc; /* UMC registers */ @@ -480,11 +491,22 @@ struct low_ops { unsigned cs_mode, int cs_mask_nr); }; +struct amd64_family_flags { + /* + * Indicates that the system supports the new register offsets, etc. + * first introduced with Family 19h Model 10h. + */ + __u64 zn_regs_v2 : 1, + + __reserved : 63; +}; + struct amd64_family_type { const char *ctl_name; u16 f0_id, f1_id, f2_id, f6_id; /* Maximum number of memory controllers per die/node. */ u8 max_mcs; + struct amd64_family_flags flags; struct low_ops ops; }; diff --git a/drivers/edac/armada_xp_edac.c b/drivers/edac/armada_xp_edac.c index b1f46a974b9e..038abbb83f4b 100644 --- a/drivers/edac/armada_xp_edac.c +++ b/drivers/edac/armada_xp_edac.c @@ -286,17 +286,10 @@ static int axp_mc_probe(struct platform_device *pdev) struct edac_mc_layer layers[1]; const struct of_device_id *id; struct mem_ctl_info *mci; - struct resource *r; void __iomem *base; uint32_t config; - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!r) { - dev_err(&pdev->dev, "Unable to get mem resource\n"); - return -ENODEV; - } - - base = devm_ioremap_resource(&pdev->dev, r); + base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(base)) { dev_err(&pdev->dev, "Unable to map regs\n"); return PTR_ERR(base); @@ -516,15 +509,8 @@ static int aurora_l2_probe(struct platform_device *pdev) const struct of_device_id *id; uint32_t l2x0_aux_ctrl; void __iomem *base; - struct resource *r; - - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!r) { - dev_err(&pdev->dev, "Unable to get mem resource\n"); - return -ENODEV; - } - base = devm_ioremap_resource(&pdev->dev, r); + base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(base)) { dev_err(&pdev->dev, "Unable to map regs\n"); return PTR_ERR(base); diff --git a/drivers/edac/dmc520_edac.c b/drivers/edac/dmc520_edac.c index b8a7d9594afd..1fa5ca57e9ec 100644 --- a/drivers/edac/dmc520_edac.c +++ b/drivers/edac/dmc520_edac.c @@ -489,7 +489,7 @@ static int dmc520_edac_probe(struct platform_device *pdev) dev = &pdev->dev; for (idx = 0; idx < NUMBER_OF_IRQS; idx++) { - irq = platform_get_irq_byname(pdev, dmc520_irq_configs[idx].name); + irq = platform_get_irq_byname_optional(pdev, dmc520_irq_configs[idx].name); irqs[idx] = irq; masks[idx] = dmc520_irq_configs[idx].mask; if (irq >= 0) { diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index 8c4d947fb848..19522c568aa5 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -47,99 +47,67 @@ static void edac_device_dump_device(struct edac_device_ctl_info *edac_dev) } #endif /* CONFIG_EDAC_DEBUG */ -struct edac_device_ctl_info *edac_device_alloc_ctl_info( - unsigned sz_private, - char *edac_device_name, unsigned nr_instances, - char *edac_block_name, unsigned nr_blocks, - unsigned offset_value, /* zero, 1, or other based offset */ - struct edac_dev_sysfs_block_attribute *attrib_spec, unsigned nr_attrib, - int device_index) +/* + * @off_val: zero, 1, or other based offset + */ +struct edac_device_ctl_info * +edac_device_alloc_ctl_info(unsigned pvt_sz, char *dev_name, unsigned nr_instances, + char *blk_name, unsigned nr_blocks, unsigned off_val, + struct edac_dev_sysfs_block_attribute *attrib_spec, + unsigned nr_attrib, int device_index) { - struct edac_device_ctl_info *dev_ctl; - struct edac_device_instance *dev_inst, *inst; - struct edac_device_block *dev_blk, *blk_p, *blk; struct edac_dev_sysfs_block_attribute *dev_attrib, *attrib_p, *attrib; - unsigned total_size; - unsigned count; + struct edac_device_block *dev_blk, *blk_p, *blk; + struct edac_device_instance *dev_inst, *inst; + struct edac_device_ctl_info *dev_ctl; unsigned instance, block, attr; - void *pvt, *p; + void *pvt; int err; edac_dbg(4, "instances=%d blocks=%d\n", nr_instances, nr_blocks); - /* Calculate the size of memory we need to allocate AND - * determine the offsets of the various item arrays - * (instance,block,attrib) from the start of an allocated structure. - * We want the alignment of each item (instance,block,attrib) - * to be at least as stringent as what the compiler would - * provide if we could simply hardcode everything into a single struct. - */ - p = NULL; - dev_ctl = edac_align_ptr(&p, sizeof(*dev_ctl), 1); + dev_ctl = kzalloc(sizeof(struct edac_device_ctl_info), GFP_KERNEL); + if (!dev_ctl) + return NULL; - /* Calc the 'end' offset past end of ONE ctl_info structure - * which will become the start of the 'instance' array - */ - dev_inst = edac_align_ptr(&p, sizeof(*dev_inst), nr_instances); + dev_inst = kcalloc(nr_instances, sizeof(struct edac_device_instance), GFP_KERNEL); + if (!dev_inst) + goto free; - /* Calc the 'end' offset past the instance array within the ctl_info - * which will become the start of the block array - */ - count = nr_instances * nr_blocks; - dev_blk = edac_align_ptr(&p, sizeof(*dev_blk), count); + dev_ctl->instances = dev_inst; - /* Calc the 'end' offset past the dev_blk array - * which will become the start of the attrib array, if any. - */ - /* calc how many nr_attrib we need */ - if (nr_attrib > 0) - count *= nr_attrib; - dev_attrib = edac_align_ptr(&p, sizeof(*dev_attrib), count); + dev_blk = kcalloc(nr_instances * nr_blocks, sizeof(struct edac_device_block), GFP_KERNEL); + if (!dev_blk) + goto free; - /* Calc the 'end' offset past the attributes array */ - pvt = edac_align_ptr(&p, sz_private, 1); + dev_ctl->blocks = dev_blk; - /* 'pvt' now points to where the private data area is. - * At this point 'pvt' (like dev_inst,dev_blk and dev_attrib) - * is baselined at ZERO - */ - total_size = ((unsigned long)pvt) + sz_private; + if (nr_attrib) { + dev_attrib = kcalloc(nr_attrib, sizeof(struct edac_dev_sysfs_block_attribute), + GFP_KERNEL); + if (!dev_attrib) + goto free; - /* Allocate the amount of memory for the set of control structures */ - dev_ctl = kzalloc(total_size, GFP_KERNEL); - if (dev_ctl == NULL) - return NULL; + dev_ctl->attribs = dev_attrib; + } - /* Adjust pointers so they point within the actual memory we - * just allocated rather than an imaginary chunk of memory - * located at address 0. - * 'dev_ctl' points to REAL memory, while the others are - * ZERO based and thus need to be adjusted to point within - * the allocated memory. - */ - dev_inst = (struct edac_device_instance *) - (((char *)dev_ctl) + ((unsigned long)dev_inst)); - dev_blk = (struct edac_device_block *) - (((char *)dev_ctl) + ((unsigned long)dev_blk)); - dev_attrib = (struct edac_dev_sysfs_block_attribute *) - (((char *)dev_ctl) + ((unsigned long)dev_attrib)); - pvt = sz_private ? (((char *)dev_ctl) + ((unsigned long)pvt)) : NULL; - - /* Begin storing the information into the control info structure */ - dev_ctl->dev_idx = device_index; - dev_ctl->nr_instances = nr_instances; - dev_ctl->instances = dev_inst; - dev_ctl->pvt_info = pvt; + if (pvt_sz) { + pvt = kzalloc(pvt_sz, GFP_KERNEL); + if (!pvt) + goto free; + + dev_ctl->pvt_info = pvt; + } + + dev_ctl->dev_idx = device_index; + dev_ctl->nr_instances = nr_instances; /* Default logging of CEs and UEs */ dev_ctl->log_ce = 1; dev_ctl->log_ue = 1; /* Name of this edac device */ - snprintf(dev_ctl->name,sizeof(dev_ctl->name),"%s",edac_device_name); - - edac_dbg(4, "edac_dev=%p next after end=%p\n", - dev_ctl, pvt + sz_private); + snprintf(dev_ctl->name, sizeof(dev_ctl->name),"%s", dev_name); /* Initialize every Instance */ for (instance = 0; instance < nr_instances; instance++) { @@ -150,15 +118,14 @@ struct edac_device_ctl_info *edac_device_alloc_ctl_info( inst->blocks = blk_p; /* name of this instance */ - snprintf(inst->name, sizeof(inst->name), - "%s%u", edac_device_name, instance); + snprintf(inst->name, sizeof(inst->name), "%s%u", dev_name, instance); /* Initialize every block in each instance */ for (block = 0; block < nr_blocks; block++) { blk = &blk_p[block]; blk->instance = inst; snprintf(blk->name, sizeof(blk->name), - "%s%d", edac_block_name, block+offset_value); + "%s%d", blk_name, block + off_val); edac_dbg(4, "instance=%d inst_p=%p block=#%d block_p=%p name='%s'\n", instance, inst, block, blk, blk->name); @@ -210,10 +177,8 @@ struct edac_device_ctl_info *edac_device_alloc_ctl_info( * Initialize the 'root' kobj for the edac_device controller */ err = edac_device_register_sysfs_main_kobj(dev_ctl); - if (err) { - kfree(dev_ctl); - return NULL; - } + if (err) + goto free; /* at this point, the root kobj is valid, and in order to * 'free' the object, then the function: @@ -223,6 +188,11 @@ struct edac_device_ctl_info *edac_device_alloc_ctl_info( */ return dev_ctl; + +free: + __edac_device_free_ctl_info(dev_ctl); + + return NULL; } EXPORT_SYMBOL_GPL(edac_device_alloc_ctl_info); diff --git a/drivers/edac/edac_device.h b/drivers/edac/edac_device.h index fc2d2c218064..3f44e6b9d387 100644 --- a/drivers/edac/edac_device.h +++ b/drivers/edac/edac_device.h @@ -216,6 +216,8 @@ struct edac_device_ctl_info { */ u32 nr_instances; struct edac_device_instance *instances; + struct edac_device_block *blocks; + struct edac_dev_sysfs_block_attribute *attribs; /* Event counters for the this whole EDAC Device */ struct edac_device_counter counters; @@ -348,4 +350,16 @@ edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, int inst_nr, */ extern int edac_device_alloc_index(void); extern const char *edac_layer_name[]; + +/* Free the actual struct */ +static inline void __edac_device_free_ctl_info(struct edac_device_ctl_info *ci) +{ + if (ci) { + kfree(ci->pvt_info); + kfree(ci->attribs); + kfree(ci->blocks); + kfree(ci->instances); + kfree(ci); + } +} #endif diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c index 5e7593753799..ac678b4a21fc 100644 --- a/drivers/edac/edac_device_sysfs.c +++ b/drivers/edac/edac_device_sysfs.c @@ -163,13 +163,14 @@ CTL_INFO_ATTR(poll_msec, S_IRUGO | S_IWUSR, edac_device_ctl_poll_msec_show, edac_device_ctl_poll_msec_store); /* Base Attributes of the EDAC_DEVICE ECC object */ -static struct ctl_info_attribute *device_ctrl_attr[] = { - &attr_ctl_info_panic_on_ue, - &attr_ctl_info_log_ue, - &attr_ctl_info_log_ce, - &attr_ctl_info_poll_msec, +static struct attribute *device_ctrl_attrs[] = { + &attr_ctl_info_panic_on_ue.attr, + &attr_ctl_info_log_ue.attr, + &attr_ctl_info_log_ce.attr, + &attr_ctl_info_poll_msec.attr, NULL, }; +ATTRIBUTE_GROUPS(device_ctrl); /* * edac_device_ctrl_master_release @@ -207,17 +208,14 @@ static void edac_device_ctrl_master_release(struct kobject *kobj) /* decrement the EDAC CORE module ref count */ module_put(edac_dev->owner); - /* free the control struct containing the 'main' kobj - * passed in to this routine - */ - kfree(edac_dev); + __edac_device_free_ctl_info(edac_dev); } /* ktype for the main (master) kobject */ static struct kobj_type ktype_device_ctrl = { .release = edac_device_ctrl_master_release, .sysfs_ops = &device_ctl_info_ops, - .default_attrs = (struct attribute **)device_ctrl_attr, + .default_groups = device_ctrl_groups, }; /* @@ -389,17 +387,18 @@ INSTANCE_ATTR(ce_count, S_IRUGO, instance_ce_count_show, NULL); INSTANCE_ATTR(ue_count, S_IRUGO, instance_ue_count_show, NULL); /* list of edac_dev 'instance' attributes */ -static struct instance_attribute *device_instance_attr[] = { - &attr_instance_ce_count, - &attr_instance_ue_count, +static struct attribute *device_instance_attrs[] = { + &attr_instance_ce_count.attr, + &attr_instance_ue_count.attr, NULL, }; +ATTRIBUTE_GROUPS(device_instance); /* The 'ktype' for each edac_dev 'instance' */ static struct kobj_type ktype_instance_ctrl = { .release = edac_device_ctrl_instance_release, .sysfs_ops = &device_instance_ops, - .default_attrs = (struct attribute **)device_instance_attr, + .default_groups = device_instance_groups, }; /* edac_dev -> instance -> block information */ @@ -487,17 +486,18 @@ BLOCK_ATTR(ce_count, S_IRUGO, block_ce_count_show, NULL); BLOCK_ATTR(ue_count, S_IRUGO, block_ue_count_show, NULL); /* list of edac_dev 'block' attributes */ -static struct edac_dev_sysfs_block_attribute *device_block_attr[] = { - &attr_block_ce_count, - &attr_block_ue_count, +static struct attribute *device_block_attrs[] = { + &attr_block_ce_count.attr, + &attr_block_ue_count.attr, NULL, }; +ATTRIBUTE_GROUPS(device_block); /* The 'ktype' for each edac_dev 'block' */ static struct kobj_type ktype_block_ctrl = { .release = edac_device_ctrl_block_release, .sysfs_ops = &device_block_ops, - .default_attrs = (struct attribute **)device_block_attr, + .default_groups = device_block_groups, }; /* block ctor/dtor code */ diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 9d9aabdec96b..6faeb2ab3960 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -103,7 +103,6 @@ static void edac_mc_dump_dimm(struct dimm_info *dimm) edac_dbg(4, " dimm->label = '%s'\n", dimm->label); edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages); edac_dbg(4, " dimm->grain = %d\n", dimm->grain); - edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages); } static void edac_mc_dump_csrow(struct csrow_info *csrow) @@ -170,61 +169,6 @@ const char * const edac_mem_types[] = { }; EXPORT_SYMBOL_GPL(edac_mem_types); -/** - * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation - * @p: pointer to a pointer with the memory offset to be used. At - * return, this will be incremented to point to the next offset - * @size: Size of the data structure to be reserved - * @n_elems: Number of elements that should be reserved - * - * If 'size' is a constant, the compiler will optimize this whole function - * down to either a no-op or the addition of a constant to the value of '*p'. - * - * The 'p' pointer is absolutely needed to keep the proper advancing - * further in memory to the proper offsets when allocating the struct along - * with its embedded structs, as edac_device_alloc_ctl_info() does it - * above, for example. - * - * At return, the pointer 'p' will be incremented to be used on a next call - * to this function. - */ -void *edac_align_ptr(void **p, unsigned int size, int n_elems) -{ - unsigned int align, r; - void *ptr = *p; - - *p += size * n_elems; - - /* - * 'p' can possibly be an unaligned item X such that sizeof(X) is - * 'size'. Adjust 'p' so that its alignment is at least as - * stringent as what the compiler would provide for X and return - * the aligned result. - * Here we assume that the alignment of a "long long" is the most - * stringent alignment that the compiler will ever provide by default. - * As far as I know, this is a reasonable assumption. - */ - if (size > sizeof(long)) - align = sizeof(long long); - else if (size > sizeof(int)) - align = sizeof(long); - else if (size > sizeof(short)) - align = sizeof(int); - else if (size > sizeof(char)) - align = sizeof(short); - else - return (char *)ptr; - - r = (unsigned long)p % align; - - if (r == 0) - return (char *)ptr; - - *p += align - r; - - return (void *)(((unsigned long)ptr) + align - r); -} - static void _edac_mc_free(struct mem_ctl_info *mci) { put_device(&mci->dev); @@ -257,6 +201,8 @@ static void mci_release(struct device *dev) } kfree(mci->csrows); } + kfree(mci->pvt_info); + kfree(mci->layers); kfree(mci); } @@ -392,9 +338,8 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num, { struct mem_ctl_info *mci; struct edac_mc_layer *layer; - unsigned int idx, size, tot_dimms = 1; + unsigned int idx, tot_dimms = 1; unsigned int tot_csrows = 1, tot_channels = 1; - void *pvt, *ptr = NULL; bool per_rank = false; if (WARN_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0)) @@ -416,41 +361,25 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num, per_rank = true; } - /* Figure out the offsets of the various items from the start of an mc - * structure. We want the alignment of each item to be at least as - * stringent as what the compiler would provide if we could simply - * hardcode everything into a single struct. - */ - mci = edac_align_ptr(&ptr, sizeof(*mci), 1); - layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers); - pvt = edac_align_ptr(&ptr, sz_pvt, 1); - size = ((unsigned long)pvt) + sz_pvt; - - edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n", - size, - tot_dimms, - per_rank ? "ranks" : "dimms", - tot_csrows * tot_channels); - - mci = kzalloc(size, GFP_KERNEL); - if (mci == NULL) + mci = kzalloc(sizeof(struct mem_ctl_info), GFP_KERNEL); + if (!mci) return NULL; + mci->layers = kcalloc(n_layers, sizeof(struct edac_mc_layer), GFP_KERNEL); + if (!mci->layers) + goto error; + + mci->pvt_info = kzalloc(sz_pvt, GFP_KERNEL); + if (!mci->pvt_info) + goto error; + mci->dev.release = mci_release; device_initialize(&mci->dev); - /* Adjust pointers so they point within the memory we just allocated - * rather than an imaginary chunk of memory located at address 0. - */ - layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer)); - pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL; - /* setup index and various internal pointers */ mci->mc_idx = mc_num; mci->tot_dimms = tot_dimms; - mci->pvt_info = pvt; mci->n_layers = n_layers; - mci->layers = layer; memcpy(mci->layers, layers, sizeof(*layer) * n_layers); mci->nr_csrows = tot_csrows; mci->num_cschannel = tot_channels; diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h index aa1f91688eb8..50ed9f2425bb 100644 --- a/drivers/edac/edac_module.h +++ b/drivers/edac/edac_module.h @@ -28,13 +28,9 @@ void edac_mc_sysfs_exit(void); extern int edac_create_sysfs_mci_device(struct mem_ctl_info *mci, const struct attribute_group **groups); extern void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci); -extern int edac_get_log_ue(void); -extern int edac_get_log_ce(void); -extern int edac_get_panic_on_ue(void); extern int edac_mc_get_log_ue(void); extern int edac_mc_get_log_ce(void); extern int edac_mc_get_panic_on_ue(void); -extern int edac_get_poll_msec(void); extern unsigned int edac_mc_get_poll_msec(void); unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, @@ -59,8 +55,6 @@ extern void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev, unsigned long value); extern void edac_mc_reset_delay_period(unsigned long value); -extern void *edac_align_ptr(void **p, unsigned size, int n_elems); - /* * EDAC debugfs functions */ diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c index 48c844a72a27..64c142aecca7 100644 --- a/drivers/edac/edac_pci.c +++ b/drivers/edac/edac_pci.c @@ -1,13 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * EDAC PCI component * * Author: Dave Jiang <djiang@mvista.com> * - * 2007 (c) MontaVista Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - * + * 2007 (c) MontaVista Software, Inc. */ #include <asm/page.h> #include <linux/uaccess.h> @@ -29,32 +26,31 @@ static LIST_HEAD(edac_pci_list); static atomic_t pci_indexes = ATOMIC_INIT(0); struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt, - const char *edac_pci_name) + const char *edac_pci_name) { struct edac_pci_ctl_info *pci; - void *p = NULL, *pvt; - unsigned int size; edac_dbg(1, "\n"); - pci = edac_align_ptr(&p, sizeof(*pci), 1); - pvt = edac_align_ptr(&p, 1, sz_pvt); - size = ((unsigned long)pvt) + sz_pvt; - - /* Alloc the needed control struct memory */ - pci = kzalloc(size, GFP_KERNEL); - if (pci == NULL) + pci = kzalloc(sizeof(struct edac_pci_ctl_info), GFP_KERNEL); + if (!pci) return NULL; - /* Now much private space */ - pvt = sz_pvt ? ((char *)pci) + ((unsigned long)pvt) : NULL; + if (sz_pvt) { + pci->pvt_info = kzalloc(sz_pvt, GFP_KERNEL); + if (!pci->pvt_info) + goto free; + } - pci->pvt_info = pvt; pci->op_state = OP_ALLOC; snprintf(pci->name, strlen(edac_pci_name) + 1, "%s", edac_pci_name); return pci; + +free: + kfree(pci); + return NULL; } EXPORT_SYMBOL_GPL(edac_pci_alloc_ctl_info); diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c index 53042af7262e..888d5728ecef 100644 --- a/drivers/edac/edac_pci_sysfs.c +++ b/drivers/edac/edac_pci_sysfs.c @@ -135,17 +135,18 @@ INSTANCE_ATTR(pe_count, S_IRUGO, instance_pe_count_show, NULL); INSTANCE_ATTR(npe_count, S_IRUGO, instance_npe_count_show, NULL); /* pci instance attributes */ -static struct instance_attribute *pci_instance_attr[] = { - &attr_instance_pe_count, - &attr_instance_npe_count, +static struct attribute *pci_instance_attrs[] = { + &attr_instance_pe_count.attr, + &attr_instance_npe_count.attr, NULL }; +ATTRIBUTE_GROUPS(pci_instance); /* the ktype for a pci instance */ static struct kobj_type ktype_pci_instance = { .release = edac_pci_instance_release, .sysfs_ops = &pci_instance_ops, - .default_attrs = (struct attribute **)pci_instance_attr, + .default_groups = pci_instance_groups, }; /* @@ -292,15 +293,16 @@ EDAC_PCI_ATTR(pci_parity_count, S_IRUGO, edac_pci_int_show, NULL); EDAC_PCI_ATTR(pci_nonparity_count, S_IRUGO, edac_pci_int_show, NULL); /* Base Attributes of the memory ECC object */ -static struct edac_pci_dev_attribute *edac_pci_attr[] = { - &edac_pci_attr_check_pci_errors, - &edac_pci_attr_edac_pci_log_pe, - &edac_pci_attr_edac_pci_log_npe, - &edac_pci_attr_edac_pci_panic_on_pe, - &edac_pci_attr_pci_parity_count, - &edac_pci_attr_pci_nonparity_count, +static struct attribute *edac_pci_attrs[] = { + &edac_pci_attr_check_pci_errors.attr, + &edac_pci_attr_edac_pci_log_pe.attr, + &edac_pci_attr_edac_pci_log_npe.attr, + &edac_pci_attr_edac_pci_panic_on_pe.attr, + &edac_pci_attr_pci_parity_count.attr, + &edac_pci_attr_pci_nonparity_count.attr, NULL, }; +ATTRIBUTE_GROUPS(edac_pci); /* * edac_pci_release_main_kobj @@ -327,7 +329,7 @@ static void edac_pci_release_main_kobj(struct kobject *kobj) static struct kobj_type ktype_edac_pci_main_kobj = { .release = edac_pci_release_main_kobj, .sysfs_ops = &edac_pci_sysfs_ops, - .default_attrs = (struct attribute **)edac_pci_attr, + .default_groups = edac_pci_groups, }; /** diff --git a/drivers/edac/fsl_ddr_edac.c b/drivers/edac/fsl_ddr_edac.c index 6d8ea226010d..ac2102b25706 100644 --- a/drivers/edac/fsl_ddr_edac.c +++ b/drivers/edac/fsl_ddr_edac.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Freescale Memory Controller kernel module * @@ -9,10 +10,7 @@ * * Author: Dave Jiang <djiang@mvista.com> * - * 2006-2007 (c) MontaVista Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. + * 2006-2007 (c) MontaVista Software, Inc. */ #include <linux/module.h> #include <linux/init.h> diff --git a/drivers/edac/fsl_ddr_edac.h b/drivers/edac/fsl_ddr_edac.h index 589b9b4a5e8a..332439d7b2d9 100644 --- a/drivers/edac/fsl_ddr_edac.h +++ b/drivers/edac/fsl_ddr_edac.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Freescale Memory Controller kernel module * @@ -7,11 +8,7 @@ * * Author: Dave Jiang <djiang@mvista.com> * - * 2006-2007 (c) MontaVista Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - * + * 2006-2007 (c) MontaVista Software, Inc. */ #ifndef _FSL_DDR_EDAC_H_ #define _FSL_DDR_EDAC_H_ diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index 6d1ddecbf0da..c8fa7dcfdbd0 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -15,11 +15,13 @@ #include "edac_module.h" #include <ras/ras_event.h> +#define OTHER_DETAIL_LEN 400 + struct ghes_pvt { struct mem_ctl_info *mci; /* Buffers for the error handling routine */ - char other_detail[400]; + char other_detail[OTHER_DETAIL_LEN]; char msg[80]; }; @@ -36,7 +38,7 @@ static struct ghes_pvt *ghes_pvt; * This driver's representation of the system hardware, as collected * from DMI. */ -struct ghes_hw_desc { +static struct ghes_hw_desc { int num_dimms; struct dimm_info *dimms; } ghes_hw; @@ -101,9 +103,14 @@ static void dimm_setup_label(struct dimm_info *dimm, u16 handle) dmi_memdev_name(handle, &bank, &device); - /* both strings must be non-zero */ - if (bank && *bank && device && *device) - snprintf(dimm->label, sizeof(dimm->label), "%s %s", bank, device); + /* + * Set to a NULL string when both bank and device are zero. In this case, + * the label assigned by default will be preserved. + */ + snprintf(dimm->label, sizeof(dimm->label), "%s%s%s", + (bank && *bank) ? bank : "", + (bank && *bank && device && *device) ? " " : "", + (device && *device) ? device : ""); } static void assign_dmi_dimm_info(struct dimm_info *dimm, struct memdev_dmi_entry *entry) @@ -235,8 +242,34 @@ static void ghes_scan_system(void) system_scanned = true; } +static int print_mem_error_other_detail(const struct cper_sec_mem_err *mem, char *msg, + const char *location, unsigned int len) +{ + u32 n; + + if (!msg) + return 0; + + n = 0; + len -= 1; + + n += scnprintf(msg + n, len - n, "APEI location: %s ", location); + + if (!(mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)) + goto out; + + n += scnprintf(msg + n, len - n, "status(0x%016llx): ", mem->error_status); + n += scnprintf(msg + n, len - n, "%s ", cper_mem_err_status_str(mem->error_status)); + +out: + msg[n] = '\0'; + + return n; +} + void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) { + struct cper_mem_err_compact cmem; struct edac_raw_error_desc *e; struct mem_ctl_info *mci; struct ghes_pvt *pvt; @@ -292,60 +325,10 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) /* Error type, mapped on e->msg */ if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { + u8 etype = mem_err->error_type; + p = pvt->msg; - switch (mem_err->error_type) { - case 0: - p += sprintf(p, "Unknown"); - break; - case 1: - p += sprintf(p, "No error"); - break; - case 2: - p += sprintf(p, "Single-bit ECC"); - break; - case 3: - p += sprintf(p, "Multi-bit ECC"); - break; - case 4: - p += sprintf(p, "Single-symbol ChipKill ECC"); - break; - case 5: - p += sprintf(p, "Multi-symbol ChipKill ECC"); - break; - case 6: - p += sprintf(p, "Master abort"); - break; - case 7: - p += sprintf(p, "Target abort"); - break; - case 8: - p += sprintf(p, "Parity Error"); - break; - case 9: - p += sprintf(p, "Watchdog timeout"); - break; - case 10: - p += sprintf(p, "Invalid address"); - break; - case 11: - p += sprintf(p, "Mirror Broken"); - break; - case 12: - p += sprintf(p, "Memory Sparing"); - break; - case 13: - p += sprintf(p, "Scrub corrected error"); - break; - case 14: - p += sprintf(p, "Scrub uncorrected error"); - break; - case 15: - p += sprintf(p, "Physical Memory Map-out event"); - break; - default: - p += sprintf(p, "reserved error (%d)", - mem_err->error_type); - } + p += snprintf(p, sizeof(pvt->msg), "%s", cper_mem_err_type_str(etype)); } else { strcpy(pvt->msg, "unknown error"); } @@ -362,52 +345,19 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) /* Memory error location, mapped on e->location */ p = e->location; - if (mem_err->validation_bits & CPER_MEM_VALID_NODE) - p += sprintf(p, "node:%d ", mem_err->node); - if (mem_err->validation_bits & CPER_MEM_VALID_CARD) - p += sprintf(p, "card:%d ", mem_err->card); - if (mem_err->validation_bits & CPER_MEM_VALID_MODULE) - p += sprintf(p, "module:%d ", mem_err->module); - if (mem_err->validation_bits & CPER_MEM_VALID_RANK_NUMBER) - p += sprintf(p, "rank:%d ", mem_err->rank); - if (mem_err->validation_bits & CPER_MEM_VALID_BANK) - p += sprintf(p, "bank:%d ", mem_err->bank); - if (mem_err->validation_bits & CPER_MEM_VALID_BANK_GROUP) - p += sprintf(p, "bank_group:%d ", - mem_err->bank >> CPER_MEM_BANK_GROUP_SHIFT); - if (mem_err->validation_bits & CPER_MEM_VALID_BANK_ADDRESS) - p += sprintf(p, "bank_address:%d ", - mem_err->bank & CPER_MEM_BANK_ADDRESS_MASK); - if (mem_err->validation_bits & (CPER_MEM_VALID_ROW | CPER_MEM_VALID_ROW_EXT)) { - u32 row = mem_err->row; - - row |= cper_get_mem_extension(mem_err->validation_bits, mem_err->extended); - p += sprintf(p, "row:%d ", row); - } - if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN) - p += sprintf(p, "col:%d ", mem_err->column); - if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION) - p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos); + cper_mem_err_pack(mem_err, &cmem); + p += cper_mem_err_location(&cmem, p); + if (mem_err->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) { - const char *bank = NULL, *device = NULL; struct dimm_info *dimm; - dmi_memdev_name(mem_err->mem_dev_handle, &bank, &device); - if (bank != NULL && device != NULL) - p += sprintf(p, "DIMM location:%s %s ", bank, device); - else - p += sprintf(p, "DIMM DMI handle: 0x%.4x ", - mem_err->mem_dev_handle); - + p += cper_dimm_err_location(&cmem, p); dimm = find_dimm_by_handle(mci, mem_err->mem_dev_handle); if (dimm) { e->top_layer = dimm->idx; strcpy(e->label, dimm->label); } } - if (mem_err->validation_bits & CPER_MEM_VALID_CHIP_ID) - p += sprintf(p, "chipID: %d ", - mem_err->extended >> CPER_MEM_CHIP_ID_SHIFT); if (p > e->location) *(p - 1) = '\0'; @@ -416,78 +366,7 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) /* All other fields are mapped on e->other_detail */ p = pvt->other_detail; - p += snprintf(p, sizeof(pvt->other_detail), - "APEI location: %s ", e->location); - if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_STATUS) { - u64 status = mem_err->error_status; - - p += sprintf(p, "status(0x%016llx): ", (long long)status); - switch ((status >> 8) & 0xff) { - case 1: - p += sprintf(p, "Error detected internal to the component "); - break; - case 16: - p += sprintf(p, "Error detected in the bus "); - break; - case 4: - p += sprintf(p, "Storage error in DRAM memory "); - break; - case 5: - p += sprintf(p, "Storage error in TLB "); - break; - case 6: - p += sprintf(p, "Storage error in cache "); - break; - case 7: - p += sprintf(p, "Error in one or more functional units "); - break; - case 8: - p += sprintf(p, "component failed self test "); - break; - case 9: - p += sprintf(p, "Overflow or undervalue of internal queue "); - break; - case 17: - p += sprintf(p, "Virtual address not found on IO-TLB or IO-PDIR "); - break; - case 18: - p += sprintf(p, "Improper access error "); - break; - case 19: - p += sprintf(p, "Access to a memory address which is not mapped to any component "); - break; - case 20: - p += sprintf(p, "Loss of Lockstep "); - break; - case 21: - p += sprintf(p, "Response not associated with a request "); - break; - case 22: - p += sprintf(p, "Bus parity error - must also set the A, C, or D Bits "); - break; - case 23: - p += sprintf(p, "Detection of a PATH_ERROR "); - break; - case 25: - p += sprintf(p, "Bus operation timeout "); - break; - case 26: - p += sprintf(p, "A read was issued to data that has been poisoned "); - break; - default: - p += sprintf(p, "reserved "); - break; - } - } - if (mem_err->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) - p += sprintf(p, "requestorID: 0x%016llx ", - (long long)mem_err->requestor_id); - if (mem_err->validation_bits & CPER_MEM_VALID_RESPONDER_ID) - p += sprintf(p, "responderID: 0x%016llx ", - (long long)mem_err->responder_id); - if (mem_err->validation_bits & CPER_MEM_VALID_TARGET_ID) - p += sprintf(p, "targetID: 0x%016llx ", - (long long)mem_err->responder_id); + p += print_mem_error_other_detail(mem_err, p, e->location, OTHER_DETAIL_LEN); if (p > pvt->other_detail) *(p - 1) = '\0'; diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 6cf50ee0b77c..a22ea053f8e1 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -74,31 +74,47 @@ static struct list_head *i10nm_edac_list; static struct res_config *res_cfg; static int retry_rd_err_log; +static int decoding_via_mca; +static bool mem_cfg_2lm; static u32 offsets_scrub_icx[] = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8}; static u32 offsets_scrub_spr[] = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8}; +static u32 offsets_scrub_spr_hbm0[] = {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8}; +static u32 offsets_scrub_spr_hbm1[] = {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8}; static u32 offsets_demand_icx[] = {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0}; static u32 offsets_demand_spr[] = {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0}; +static u32 offsets_demand2_spr[] = {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10}; +static u32 offsets_demand_spr_hbm0[] = {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0}; +static u32 offsets_demand_spr_hbm1[] = {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0}; -static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable) +static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable, + u32 *offsets_scrub, u32 *offsets_demand, + u32 *offsets_demand2) { - u32 s, d; + u32 s, d, d2; - if (!imc->mbase) - return; - - s = I10NM_GET_REG32(imc, chan, res_cfg->offsets_scrub[0]); - d = I10NM_GET_REG32(imc, chan, res_cfg->offsets_demand[0]); + s = I10NM_GET_REG32(imc, chan, offsets_scrub[0]); + d = I10NM_GET_REG32(imc, chan, offsets_demand[0]); + if (offsets_demand2) + d2 = I10NM_GET_REG32(imc, chan, offsets_demand2[0]); if (enable) { /* Save default configurations */ imc->chan[chan].retry_rd_err_log_s = s; imc->chan[chan].retry_rd_err_log_d = d; + if (offsets_demand2) + imc->chan[chan].retry_rd_err_log_d2 = d2; s &= ~RETRY_RD_ERR_LOG_NOOVER_UC; s |= RETRY_RD_ERR_LOG_EN; d &= ~RETRY_RD_ERR_LOG_NOOVER_UC; d |= RETRY_RD_ERR_LOG_EN; + + if (offsets_demand2) { + d2 &= ~RETRY_RD_ERR_LOG_UC; + d2 |= RETRY_RD_ERR_LOG_NOOVER; + d2 |= RETRY_RD_ERR_LOG_EN; + } } else { /* Restore default configurations */ if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_UC) @@ -113,23 +129,55 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable d |= RETRY_RD_ERR_LOG_NOOVER; if (!(imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_EN)) d &= ~RETRY_RD_ERR_LOG_EN; + + if (offsets_demand2) { + if (imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_UC) + d2 |= RETRY_RD_ERR_LOG_UC; + if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_NOOVER)) + d2 &= ~RETRY_RD_ERR_LOG_NOOVER; + if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_EN)) + d2 &= ~RETRY_RD_ERR_LOG_EN; + } } - I10NM_SET_REG32(imc, chan, res_cfg->offsets_scrub[0], s); - I10NM_SET_REG32(imc, chan, res_cfg->offsets_demand[0], d); + I10NM_SET_REG32(imc, chan, offsets_scrub[0], s); + I10NM_SET_REG32(imc, chan, offsets_demand[0], d); + if (offsets_demand2) + I10NM_SET_REG32(imc, chan, offsets_demand2[0], d2); } static void enable_retry_rd_err_log(bool enable) { + struct skx_imc *imc; struct skx_dev *d; int i, j; edac_dbg(2, "\n"); list_for_each_entry(d, i10nm_edac_list, list) - for (i = 0; i < I10NM_NUM_IMC; i++) - for (j = 0; j < I10NM_NUM_CHANNELS; j++) - __enable_retry_rd_err_log(&d->imc[i], j, enable); + for (i = 0; i < I10NM_NUM_IMC; i++) { + imc = &d->imc[i]; + if (!imc->mbase) + continue; + + for (j = 0; j < I10NM_NUM_CHANNELS; j++) { + if (imc->hbm_mc) { + __enable_retry_rd_err_log(imc, j, enable, + res_cfg->offsets_scrub_hbm0, + res_cfg->offsets_demand_hbm0, + NULL); + __enable_retry_rd_err_log(imc, j, enable, + res_cfg->offsets_scrub_hbm1, + res_cfg->offsets_demand_hbm1, + NULL); + } else { + __enable_retry_rd_err_log(imc, j, enable, + res_cfg->offsets_scrub, + res_cfg->offsets_demand, + res_cfg->offsets_demand2); + } + } + } } static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, @@ -138,14 +186,33 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, struct skx_imc *imc = &res->dev->imc[res->imc]; u32 log0, log1, log2, log3, log4; u32 corr0, corr1, corr2, corr3; + u32 lxg0, lxg1, lxg3, lxg4; + u32 *xffsets = NULL; u64 log2a, log5; + u64 lxg2a, lxg5; u32 *offsets; - int n; + int n, pch; if (!imc->mbase) return; - offsets = scrub_err ? res_cfg->offsets_scrub : res_cfg->offsets_demand; + if (imc->hbm_mc) { + pch = res->cs & 1; + + if (pch) + offsets = scrub_err ? res_cfg->offsets_scrub_hbm1 : + res_cfg->offsets_demand_hbm1; + else + offsets = scrub_err ? res_cfg->offsets_scrub_hbm0 : + res_cfg->offsets_demand_hbm0; + } else { + if (scrub_err) { + offsets = res_cfg->offsets_scrub; + } else { + offsets = res_cfg->offsets_demand; + xffsets = res_cfg->offsets_demand2; + } + } log0 = I10NM_GET_REG32(imc, res->channel, offsets[0]); log1 = I10NM_GET_REG32(imc, res->channel, offsets[1]); @@ -153,20 +220,52 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, log4 = I10NM_GET_REG32(imc, res->channel, offsets[4]); log5 = I10NM_GET_REG64(imc, res->channel, offsets[5]); + if (xffsets) { + lxg0 = I10NM_GET_REG32(imc, res->channel, xffsets[0]); + lxg1 = I10NM_GET_REG32(imc, res->channel, xffsets[1]); + lxg3 = I10NM_GET_REG32(imc, res->channel, xffsets[3]); + lxg4 = I10NM_GET_REG32(imc, res->channel, xffsets[4]); + lxg5 = I10NM_GET_REG64(imc, res->channel, xffsets[5]); + } + if (res_cfg->type == SPR) { log2a = I10NM_GET_REG64(imc, res->channel, offsets[2]); - n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx]", + n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx", log0, log1, log2a, log3, log4, log5); + + if (len - n > 0) { + if (xffsets) { + lxg2a = I10NM_GET_REG64(imc, res->channel, xffsets[2]); + n += snprintf(msg + n, len - n, " %.8x %.8x %.16llx %.8x %.8x %.16llx]", + lxg0, lxg1, lxg2a, lxg3, lxg4, lxg5); + } else { + n += snprintf(msg + n, len - n, "]"); + } + } } else { log2 = I10NM_GET_REG32(imc, res->channel, offsets[2]); n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x %.16llx]", log0, log1, log2, log3, log4, log5); } - corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18); - corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c); - corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20); - corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24); + if (imc->hbm_mc) { + if (pch) { + corr0 = I10NM_GET_REG32(imc, res->channel, 0x2c18); + corr1 = I10NM_GET_REG32(imc, res->channel, 0x2c1c); + corr2 = I10NM_GET_REG32(imc, res->channel, 0x2c20); + corr3 = I10NM_GET_REG32(imc, res->channel, 0x2c24); + } else { + corr0 = I10NM_GET_REG32(imc, res->channel, 0x2818); + corr1 = I10NM_GET_REG32(imc, res->channel, 0x281c); + corr2 = I10NM_GET_REG32(imc, res->channel, 0x2820); + corr3 = I10NM_GET_REG32(imc, res->channel, 0x2824); + } + } else { + corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18); + corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c); + corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20); + corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24); + } if (len - n > 0) snprintf(msg + n, len - n, @@ -177,9 +276,16 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, corr3 & 0xffff, corr3 >> 16); /* Clear status bits */ - if (retry_rd_err_log == 2 && (log0 & RETRY_RD_ERR_LOG_OVER_UC_V)) { - log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V; - I10NM_SET_REG32(imc, res->channel, offsets[0], log0); + if (retry_rd_err_log == 2) { + if (log0 & RETRY_RD_ERR_LOG_OVER_UC_V) { + log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V; + I10NM_SET_REG32(imc, res->channel, offsets[0], log0); + } + + if (xffsets && (lxg0 & RETRY_RD_ERR_LOG_OVER_UC_V)) { + lxg0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V; + I10NM_SET_REG32(imc, res->channel, xffsets[0], lxg0); + } } } @@ -231,6 +337,103 @@ static bool i10nm_check_2lm(struct res_config *cfg) return false; } +/* + * Check whether the error comes from DDRT by ICX/Tremont model specific error code. + * Refer to SDM vol3B 16.11.3 Intel IMC MC error codes for IA32_MCi_STATUS. + */ +static bool i10nm_mscod_is_ddrt(u32 mscod) +{ + switch (mscod) { + case 0x0106: case 0x0107: + case 0x0800: case 0x0804: + case 0x0806 ... 0x0808: + case 0x080a ... 0x080e: + case 0x0810: case 0x0811: + case 0x0816: case 0x081e: + case 0x081f: + return true; + } + + return false; +} + +static bool i10nm_mc_decode_available(struct mce *mce) +{ + u8 bank; + + if (!decoding_via_mca || mem_cfg_2lm) + return false; + + if ((mce->status & (MCI_STATUS_MISCV | MCI_STATUS_ADDRV)) + != (MCI_STATUS_MISCV | MCI_STATUS_ADDRV)) + return false; + + bank = mce->bank; + + switch (res_cfg->type) { + case I10NM: + if (bank < 13 || bank > 26) + return false; + + /* DDRT errors can't be decoded from MCA bank registers */ + if (MCI_MISC_ECC_MODE(mce->misc) == MCI_MISC_ECC_DDRT) + return false; + + if (i10nm_mscod_is_ddrt(MCI_STATUS_MSCOD(mce->status))) + return false; + + /* Check whether one of {13,14,17,18,21,22,25,26} */ + return ((bank - 13) & BIT(1)) == 0; + default: + return false; + } +} + +static bool i10nm_mc_decode(struct decoded_addr *res) +{ + struct mce *m = res->mce; + struct skx_dev *d; + u8 bank; + + if (!i10nm_mc_decode_available(m)) + return false; + + list_for_each_entry(d, i10nm_edac_list, list) { + if (d->imc[0].src_id == m->socketid) { + res->socket = m->socketid; + res->dev = d; + break; + } + } + + switch (res_cfg->type) { + case I10NM: + bank = m->bank - 13; + res->imc = bank / 4; + res->channel = bank % 2; + break; + default: + return false; + } + + if (!res->dev) { + skx_printk(KERN_ERR, "No device for src_id %d imc %d\n", + m->socketid, res->imc); + return false; + } + + res->column = GET_BITFIELD(m->misc, 9, 18) << 2; + res->row = GET_BITFIELD(m->misc, 19, 39); + res->bank_group = GET_BITFIELD(m->misc, 40, 41); + res->bank_address = GET_BITFIELD(m->misc, 42, 43); + res->bank_group |= GET_BITFIELD(m->misc, 44, 44) << 2; + res->rank = GET_BITFIELD(m->misc, 56, 58); + res->dimm = res->rank >> 2; + res->rank = res->rank % 4; + + return true; +} + static int i10nm_get_ddr_munits(void) { struct pci_dev *mdev; @@ -420,7 +623,12 @@ static struct res_config spr_cfg = { .sad_all_devfn = PCI_DEVFN(10, 0), .sad_all_offset = 0x300, .offsets_scrub = offsets_scrub_spr, + .offsets_scrub_hbm0 = offsets_scrub_spr_hbm0, + .offsets_scrub_hbm1 = offsets_scrub_spr_hbm1, .offsets_demand = offsets_demand_spr, + .offsets_demand2 = offsets_demand2_spr, + .offsets_demand_hbm0 = offsets_demand_spr_hbm0, + .offsets_demand_hbm1 = offsets_demand_spr_hbm1, }; static const struct x86_cpu_id i10nm_cpuids[] = { @@ -574,7 +782,8 @@ static int __init i10nm_init(void) return -ENODEV; } - skx_set_mem_cfg(i10nm_check_2lm(cfg)); + mem_cfg_2lm = i10nm_check_2lm(cfg); + skx_set_mem_cfg(mem_cfg_2lm); rc = i10nm_get_ddr_munits(); @@ -626,9 +835,11 @@ static int __init i10nm_init(void) setup_i10nm_debug(); if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) { - skx_set_decode(NULL, show_retry_rd_err_log); + skx_set_decode(i10nm_mc_decode, show_retry_rd_err_log); if (retry_rd_err_log == 2) enable_retry_rd_err_log(true); + } else { + skx_set_decode(i10nm_mc_decode, NULL); } i10nm_printk(KERN_INFO, "%s\n", I10NM_REVISION); @@ -658,6 +869,34 @@ static void __exit i10nm_exit(void) module_init(i10nm_init); module_exit(i10nm_exit); +static int set_decoding_via_mca(const char *buf, const struct kernel_param *kp) +{ + unsigned long val; + int ret; + + ret = kstrtoul(buf, 0, &val); + + if (ret || val > 1) + return -EINVAL; + + if (val && mem_cfg_2lm) { + i10nm_printk(KERN_NOTICE, "Decoding errors via MCA banks for 2LM isn't supported yet\n"); + return -EIO; + } + + ret = param_set_int(buf, kp); + + return ret; +} + +static const struct kernel_param_ops decoding_via_mca_param_ops = { + .set = set_decoding_via_mca, + .get = param_get_int, +}; + +module_param_cb(decoding_via_mca, &decoding_via_mca_param_ops, &decoding_via_mca, 0644); +MODULE_PARM_DESC(decoding_via_mca, "decoding_via_mca: 0=off(default), 1=enable"); + module_param(retry_rd_err_log, int, 0444); MODULE_PARM_DESC(retry_rd_err_log, "retry_rd_err_log: 0=off(default), 1=bios(Linux doesn't reset any control bits, but just reports values.), 2=linux(Linux tries to take control and resets mode bits, clear valid/UC bits after reading.)"); diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index 324a46b8479b..f5d82518c15e 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -244,11 +244,6 @@ static inline u32 i5100_nrecmema_rank(u32 a) return a >> 8 & ((1 << 3) - 1); } -static inline u32 i5100_nrecmema_dm_buf_id(u32 a) -{ - return a & ((1 << 8) - 1); -} - static inline u32 i5100_nrecmemb_cas(u32 a) { return a >> 16 & ((1 << 13) - 1); diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 4f28b8c8d378..61adaa872ba7 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -1193,7 +1193,7 @@ static int __init i7300_init(void) } /** - * i7300_init() - Unregisters the driver + * i7300_exit() - Unregisters the driver */ static void __exit i7300_exit(void) { diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 9a9ff5ad611a..9ef13570f2e5 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -20,11 +20,15 @@ * 0c08: Xeon E3-1200 v3 Processor DRAM Controller * 1918: Xeon E3-1200 v5 Skylake Host Bridge/DRAM Registers * 5918: Xeon E3-1200 Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers + * 190f: 6th Gen Core Dual-Core Processor Host Bridge/DRAM Registers + * 191f: 6th Gen Core Quad-Core Processor Host Bridge/DRAM Registers * 3e..: 8th/9th Gen Core Processor Host Bridge/DRAM Registers * * Based on Intel specification: * https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e3-1200v3-vol-2-datasheet.pdf * http://www.intel.com/content/www/us/en/processors/xeon/xeon-e3-1200-family-vol-2-datasheet.html + * https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/desktop-6th-gen-core-family-datasheet-vol-2.pdf + * https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e3-1200v6-vol-2-datasheet.pdf * https://www.intel.com/content/www/us/en/processors/core/7th-gen-core-family-mobile-h-processor-lines-datasheet-vol-2.html * https://www.intel.com/content/www/us/en/products/docs/processors/core/8th-gen-core-family-datasheet-vol-2.html * @@ -53,15 +57,17 @@ #define ie31200_printk(level, fmt, arg...) \ edac_printk(level, "ie31200", fmt, ##arg) -#define PCI_DEVICE_ID_INTEL_IE31200_HB_1 0x0108 -#define PCI_DEVICE_ID_INTEL_IE31200_HB_2 0x010c -#define PCI_DEVICE_ID_INTEL_IE31200_HB_3 0x0150 -#define PCI_DEVICE_ID_INTEL_IE31200_HB_4 0x0158 -#define PCI_DEVICE_ID_INTEL_IE31200_HB_5 0x015c -#define PCI_DEVICE_ID_INTEL_IE31200_HB_6 0x0c04 -#define PCI_DEVICE_ID_INTEL_IE31200_HB_7 0x0c08 -#define PCI_DEVICE_ID_INTEL_IE31200_HB_8 0x1918 -#define PCI_DEVICE_ID_INTEL_IE31200_HB_9 0x5918 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_1 0x0108 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_2 0x010c +#define PCI_DEVICE_ID_INTEL_IE31200_HB_3 0x0150 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_4 0x0158 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_5 0x015c +#define PCI_DEVICE_ID_INTEL_IE31200_HB_6 0x0c04 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_7 0x0c08 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_8 0x190F +#define PCI_DEVICE_ID_INTEL_IE31200_HB_9 0x1918 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_10 0x191F +#define PCI_DEVICE_ID_INTEL_IE31200_HB_11 0x5918 /* Coffee Lake-S */ #define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK 0x3e00 @@ -80,6 +86,8 @@ #define DEVICE_ID_SKYLAKE_OR_LATER(did) \ (((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_8) || \ ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_9) || \ + ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_10) || \ + ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_11) || \ (((did) & PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK) == \ PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK)) @@ -577,6 +585,8 @@ static const struct pci_device_id ie31200_pci_tbl[] = { { PCI_VEND_DEV(INTEL, IE31200_HB_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, { PCI_VEND_DEV(INTEL, IE31200_HB_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, { PCI_VEND_DEV(INTEL, IE31200_HB_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, + { PCI_VEND_DEV(INTEL, IE31200_HB_10), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, + { PCI_VEND_DEV(INTEL, IE31200_HB_11), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_1), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_2), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_3), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index 67f7bc3fe5b3..e50d7928bf8f 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c @@ -24,6 +24,8 @@ #include <linux/of_platform.h> #include <linux/of_device.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> #include "edac_module.h" #include "mpc85xx_edac.h" #include "fsl_ddr_edac.h" @@ -609,13 +611,6 @@ static int mpc85xx_l2_err_remove(struct platform_device *op) } static const struct of_device_id mpc85xx_l2_err_of_match[] = { -/* deprecate the fsl,85.. forms in the future, 2.6.30? */ - { .compatible = "fsl,8540-l2-cache-controller", }, - { .compatible = "fsl,8541-l2-cache-controller", }, - { .compatible = "fsl,8544-l2-cache-controller", }, - { .compatible = "fsl,8548-l2-cache-controller", }, - { .compatible = "fsl,8555-l2-cache-controller", }, - { .compatible = "fsl,8568-l2-cache-controller", }, { .compatible = "fsl,mpc8536-l2-cache-controller", }, { .compatible = "fsl,mpc8540-l2-cache-controller", }, { .compatible = "fsl,mpc8541-l2-cache-controller", }, @@ -644,13 +639,6 @@ static struct platform_driver mpc85xx_l2_err_driver = { }; static const struct of_device_id mpc85xx_mc_err_of_match[] = { -/* deprecate the fsl,85.. forms in the future, 2.6.30? */ - { .compatible = "fsl,8540-memory-controller", }, - { .compatible = "fsl,8541-memory-controller", }, - { .compatible = "fsl,8544-memory-controller", }, - { .compatible = "fsl,8548-memory-controller", }, - { .compatible = "fsl,8555-memory-controller", }, - { .compatible = "fsl,8568-memory-controller", }, { .compatible = "fsl,mpc8536-memory-controller", }, { .compatible = "fsl,mpc8540-memory-controller", }, { .compatible = "fsl,mpc8541-memory-controller", }, diff --git a/drivers/edac/mpc85xx_edac.h b/drivers/edac/mpc85xx_edac.h index 3f6fb16ad34f..66a046ae33ee 100644 --- a/drivers/edac/mpc85xx_edac.h +++ b/drivers/edac/mpc85xx_edac.h @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Freescale MPC85xx Memory Controller kernel module * Author: Dave Jiang <djiang@mvista.com> * - * 2006-2007 (c) MontaVista Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. - * + * 2006-2007 (c) MontaVista Software, Inc. */ #ifndef _MPC85XX_EDAC_H_ #define _MPC85XX_EDAC_H_ diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c index c94ca1f790c4..a20b299f1202 100644 --- a/drivers/edac/pnd2_edac.c +++ b/drivers/edac/pnd2_edac.c @@ -28,6 +28,8 @@ #include <linux/bitmap.h> #include <linux/math64.h> #include <linux/mod_devicetable.h> +#include <linux/platform_data/x86/p2sb.h> + #include <asm/cpu_device_id.h> #include <asm/intel-family.h> #include <asm/processor.h> @@ -232,42 +234,14 @@ static u64 get_mem_ctrl_hub_base_addr(void) return U64_LSHIFT(hi.base, 32) | U64_LSHIFT(lo.base, 15); } -static u64 get_sideband_reg_base_addr(void) -{ - struct pci_dev *pdev; - u32 hi, lo; - u8 hidden; - - pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x19dd, NULL); - if (pdev) { - /* Unhide the P2SB device, if it's hidden */ - pci_read_config_byte(pdev, 0xe1, &hidden); - if (hidden) - pci_write_config_byte(pdev, 0xe1, 0); - - pci_read_config_dword(pdev, 0x10, &lo); - pci_read_config_dword(pdev, 0x14, &hi); - lo &= 0xfffffff0; - - /* Hide the P2SB device, if it was hidden before */ - if (hidden) - pci_write_config_byte(pdev, 0xe1, hidden); - - pci_dev_put(pdev); - return (U64_LSHIFT(hi, 32) | U64_LSHIFT(lo, 0)); - } else { - return 0xfd000000; - } -} - #define DNV_MCHBAR_SIZE 0x8000 #define DNV_SB_PORT_SIZE 0x10000 static int dnv_rd_reg(int port, int off, int op, void *data, size_t sz, char *name) { struct pci_dev *pdev; - char *base; - u64 addr; - unsigned long size; + void __iomem *base; + struct resource r; + int ret; if (op == 4) { pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x1980, NULL); @@ -279,26 +253,30 @@ static int dnv_rd_reg(int port, int off, int op, void *data, size_t sz, char *na } else { /* MMIO via memory controller hub base address */ if (op == 0 && port == 0x4c) { - addr = get_mem_ctrl_hub_base_addr(); - if (!addr) + memset(&r, 0, sizeof(r)); + + r.start = get_mem_ctrl_hub_base_addr(); + if (!r.start) return -ENODEV; - size = DNV_MCHBAR_SIZE; + r.end = r.start + DNV_MCHBAR_SIZE - 1; } else { /* MMIO via sideband register base address */ - addr = get_sideband_reg_base_addr(); - if (!addr) - return -ENODEV; - addr += (port << 16); - size = DNV_SB_PORT_SIZE; + ret = p2sb_bar(NULL, 0, &r); + if (ret) + return ret; + + r.start += (port << 16); + r.end = r.start + DNV_SB_PORT_SIZE - 1; } - base = ioremap((resource_size_t)addr, size); + base = ioremap(r.start, resource_size(&r)); if (!base) return -ENODEV; if (sz == 8) - *(u32 *)(data + 4) = *(u32 *)(base + off + 4); - *(u32 *)data = *(u32 *)(base + off); + *(u64 *)data = readq(base + off); + else + *(u32 *)data = readl(base + off); iounmap(base); } diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c index 6793f6d799e7..046969b4e82e 100644 --- a/drivers/edac/ppc4xx_edac.c +++ b/drivers/edac/ppc4xx_edac.c @@ -11,6 +11,7 @@ #include <linux/mm.h> #include <linux/module.h> #include <linux/of_device.h> +#include <linux/of_irq.h> #include <linux/of_platform.h> #include <linux/types.h> @@ -177,11 +178,6 @@ struct ppc4xx_ecc_status { u32 wmirq; }; -/* Function Prototypes */ - -static int ppc4xx_edac_probe(struct platform_device *device); -static int ppc4xx_edac_remove(struct platform_device *device); - /* Global Variables */ /* @@ -196,15 +192,6 @@ static const struct of_device_id ppc4xx_edac_match[] = { }; MODULE_DEVICE_TABLE(of, ppc4xx_edac_match); -static struct platform_driver ppc4xx_edac_driver = { - .probe = ppc4xx_edac_probe, - .remove = ppc4xx_edac_remove, - .driver = { - .name = PPC4XX_EDAC_MODULE_NAME, - .of_match_table = ppc4xx_edac_match, - }, -}; - /* * TODO: The row and channel parameters likely need to be dynamically * set based on the aforementioned variant controller realizations. @@ -1390,6 +1377,15 @@ ppc4xx_edac_opstate_init(void) EDAC_OPSTATE_UNKNOWN_STR))); } +static struct platform_driver ppc4xx_edac_driver = { + .probe = ppc4xx_edac_probe, + .remove = ppc4xx_edac_remove, + .driver = { + .name = PPC4XX_EDAC_MODULE_NAME, + .of_match_table = ppc4xx_edac_match, + }, +}; + /** * ppc4xx_edac_init - driver/module insertion entry point * diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 9678ab97c7ac..8e39370fdb5c 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -335,6 +335,12 @@ struct sbridge_info { struct sbridge_channel { u32 ranks; u32 dimms; + struct dimm { + u32 rowbits; + u32 colbits; + u32 bank_xor_enable; + u32 amap_fine; + } dimm[MAX_DIMMS]; }; struct pci_id_descr { @@ -1603,7 +1609,7 @@ static int __populate_dimms(struct mem_ctl_info *mci, banks = 8; for (i = 0; i < channels; i++) { - u32 mtr; + u32 mtr, amap = 0; int max_dimms_per_channel; @@ -1615,6 +1621,7 @@ static int __populate_dimms(struct mem_ctl_info *mci, max_dimms_per_channel = ARRAY_SIZE(mtr_regs); if (!pvt->pci_tad[i]) continue; + pci_read_config_dword(pvt->pci_tad[i], 0x8c, &amap); } for (j = 0; j < max_dimms_per_channel; j++) { @@ -1627,6 +1634,7 @@ static int __populate_dimms(struct mem_ctl_info *mci, mtr_regs[j], &mtr); } edac_dbg(4, "Channel #%d MTR%d = %x\n", i, j, mtr); + if (IS_DIMM_PRESENT(mtr)) { if (!IS_ECC_ENABLED(pvt->info.mcmtr)) { sbridge_printk(KERN_ERR, "CPU SrcID #%d, Ha #%d, Channel #%d has DIMMs, but ECC is disabled\n", @@ -1661,6 +1669,11 @@ static int __populate_dimms(struct mem_ctl_info *mci, dimm->dtype = pvt->info.get_width(pvt, mtr); dimm->mtype = mtype; dimm->edac_mode = mode; + pvt->channel[i].dimm[j].rowbits = order_base_2(rows); + pvt->channel[i].dimm[j].colbits = order_base_2(cols); + pvt->channel[i].dimm[j].bank_xor_enable = + GET_BITFIELD(pvt->info.mcmtr, 9, 9); + pvt->channel[i].dimm[j].amap_fine = GET_BITFIELD(amap, 0, 0); snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_Ha#%u_Chan#%u_DIMM#%u", pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom, i, j); @@ -1922,6 +1935,99 @@ static struct mem_ctl_info *get_mci_for_node_id(u8 node_id, u8 ha) return NULL; } +static u8 sb_close_row[] = { + 15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33 +}; + +static u8 sb_close_column[] = { + 3, 4, 5, 14, 19, 23, 24, 25, 26, 27 +}; + +static u8 sb_open_row[] = { + 14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33 +}; + +static u8 sb_open_column[] = { + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 +}; + +static u8 sb_open_fine_column[] = { + 3, 4, 5, 7, 8, 9, 10, 11, 12, 13 +}; + +static int sb_bits(u64 addr, int nbits, u8 *bits) +{ + int i, res = 0; + + for (i = 0; i < nbits; i++) + res |= ((addr >> bits[i]) & 1) << i; + return res; +} + +static int sb_bank_bits(u64 addr, int b0, int b1, int do_xor, int x0, int x1) +{ + int ret = GET_BITFIELD(addr, b0, b0) | (GET_BITFIELD(addr, b1, b1) << 1); + + if (do_xor) + ret ^= GET_BITFIELD(addr, x0, x0) | (GET_BITFIELD(addr, x1, x1) << 1); + + return ret; +} + +static bool sb_decode_ddr4(struct mem_ctl_info *mci, int ch, u8 rank, + u64 rank_addr, char *msg) +{ + int dimmno = 0; + int row, col, bank_address, bank_group; + struct sbridge_pvt *pvt; + u32 bg0 = 0, rowbits = 0, colbits = 0; + u32 amap_fine = 0, bank_xor_enable = 0; + + dimmno = (rank < 12) ? rank / 4 : 2; + pvt = mci->pvt_info; + amap_fine = pvt->channel[ch].dimm[dimmno].amap_fine; + bg0 = amap_fine ? 6 : 13; + rowbits = pvt->channel[ch].dimm[dimmno].rowbits; + colbits = pvt->channel[ch].dimm[dimmno].colbits; + bank_xor_enable = pvt->channel[ch].dimm[dimmno].bank_xor_enable; + + if (pvt->is_lockstep) { + pr_warn_once("LockStep row/column decode is not supported yet!\n"); + msg[0] = '\0'; + return false; + } + + if (pvt->is_close_pg) { + row = sb_bits(rank_addr, rowbits, sb_close_row); + col = sb_bits(rank_addr, colbits, sb_close_column); + col |= 0x400; /* C10 is autoprecharge, always set */ + bank_address = sb_bank_bits(rank_addr, 8, 9, bank_xor_enable, 22, 28); + bank_group = sb_bank_bits(rank_addr, 6, 7, bank_xor_enable, 20, 21); + } else { + row = sb_bits(rank_addr, rowbits, sb_open_row); + if (amap_fine) + col = sb_bits(rank_addr, colbits, sb_open_fine_column); + else + col = sb_bits(rank_addr, colbits, sb_open_column); + bank_address = sb_bank_bits(rank_addr, 18, 19, bank_xor_enable, 22, 23); + bank_group = sb_bank_bits(rank_addr, bg0, 17, bank_xor_enable, 20, 21); + } + + row &= (1u << rowbits) - 1; + + sprintf(msg, "row:0x%x col:0x%x bank_addr:%d bank_group:%d", + row, col, bank_address, bank_group); + return true; +} + +static bool sb_decode_ddr3(struct mem_ctl_info *mci, int ch, u8 rank, + u64 rank_addr, char *msg) +{ + pr_warn_once("DDR3 row/column decode not support yet!\n"); + msg[0] = '\0'; + return false; +} + static int get_memory_error_data(struct mem_ctl_info *mci, u64 addr, u8 *socket, u8 *ha, @@ -1937,12 +2043,13 @@ static int get_memory_error_data(struct mem_ctl_info *mci, int interleave_mode, shiftup = 0; unsigned int sad_interleave[MAX_INTERLEAVE]; u32 reg, dram_rule; - u8 ch_way, sck_way, pkg, sad_ha = 0; + u8 ch_way, sck_way, pkg, sad_ha = 0, rankid = 0; u32 tad_offset; u32 rir_way; u32 mb, gb; u64 ch_addr, offset, limit = 0, prv = 0; - + u64 rank_addr; + enum mem_type mtype; /* * Step 0) Check if the address is at special memory ranges @@ -2226,6 +2333,28 @@ static int get_memory_error_data(struct mem_ctl_info *mci, pci_read_config_dword(pvt->pci_tad[base_ch], rir_offset[n_rir][idx], ®); *rank = RIR_RNK_TGT(pvt->info.type, reg); + if (pvt->info.type == BROADWELL) { + if (pvt->is_close_pg) + shiftup = 6; + else + shiftup = 13; + + rank_addr = ch_addr >> shiftup; + rank_addr /= (1 << rir_way); + rank_addr <<= shiftup; + rank_addr |= ch_addr & GENMASK_ULL(shiftup - 1, 0); + rank_addr -= RIR_OFFSET(pvt->info.type, reg); + + mtype = pvt->info.get_memory_type(pvt); + rankid = *rank; + if (mtype == MEM_DDR4 || mtype == MEM_RDDR4) + sb_decode_ddr4(mci, base_ch, rankid, rank_addr, msg); + else + sb_decode_ddr3(mci, base_ch, rankid, rank_addr, msg); + } else { + msg[0] = '\0'; + } + edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n", n_rir, ch_addr, @@ -2950,7 +3079,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, struct mem_ctl_info *new_mci; struct sbridge_pvt *pvt = mci->pvt_info; enum hw_event_mc_err_type tp_event; - char *optype, msg[256]; + char *optype, msg[256], msg_full[512]; bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); bool overflow = GET_BITFIELD(m->status, 62, 62); bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); @@ -3089,18 +3218,17 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, */ if (!pvt->is_lockstep && !pvt->is_cur_addr_mirrored && !pvt->is_close_pg) channel = first_channel; - - snprintf(msg, sizeof(msg), - "%s%s area:%s err_code:%04x:%04x socket:%d ha:%d channel_mask:%ld rank:%d", + snprintf(msg_full, sizeof(msg_full), + "%s%s area:%s err_code:%04x:%04x socket:%d ha:%d channel_mask:%ld rank:%d %s", overflow ? " OVERFLOW" : "", (uncorrected_error && recoverable) ? " recoverable" : "", area_type, mscod, errcode, socket, ha, channel_mask, - rank); + rank, msg); - edac_dbg(0, "%s\n", msg); + edac_dbg(0, "%s\n", msg_full); /* FIXME: need support for channel mask */ @@ -3111,7 +3239,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, edac_mc_handle_error(tp_event, mci, core_err_cnt, m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, channel, dimm, -1, - optype, msg); + optype, msg_full); return; err_parsing: edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0, diff --git a/drivers/edac/sifive_edac.c b/drivers/edac/sifive_edac.c index ee800aec7d47..b844e2626fd5 100644 --- a/drivers/edac/sifive_edac.c +++ b/drivers/edac/sifive_edac.c @@ -2,7 +2,7 @@ /* * SiFive Platform EDAC Driver * - * Copyright (C) 2018-2019 SiFive, Inc. + * Copyright (C) 2018-2022 SiFive, Inc. * * This driver is partially based on octeon_edac-pc.c * @@ -10,7 +10,7 @@ #include <linux/edac.h> #include <linux/platform_device.h> #include "edac_module.h" -#include <soc/sifive/sifive_l2_cache.h> +#include <soc/sifive/sifive_ccache.h> #define DRVNAME "sifive_edac" @@ -32,9 +32,9 @@ int ecc_err_event(struct notifier_block *this, unsigned long event, void *ptr) p = container_of(this, struct sifive_edac_priv, notifier); - if (event == SIFIVE_L2_ERR_TYPE_UE) + if (event == SIFIVE_CCACHE_ERR_TYPE_UE) edac_device_handle_ue(p->dci, 0, 0, msg); - else if (event == SIFIVE_L2_ERR_TYPE_CE) + else if (event == SIFIVE_CCACHE_ERR_TYPE_CE) edac_device_handle_ce(p->dci, 0, 0, msg); return NOTIFY_OK; @@ -67,7 +67,7 @@ static int ecc_register(struct platform_device *pdev) goto err; } - register_sifive_l2_error_notifier(&p->notifier); + register_sifive_ccache_error_notifier(&p->notifier); return 0; @@ -81,7 +81,7 @@ static int ecc_unregister(struct platform_device *pdev) { struct sifive_edac_priv *p = platform_get_drvdata(pdev); - unregister_sifive_l2_error_notifier(&p->notifier); + unregister_sifive_ccache_error_notifier(&p->notifier); edac_device_del_device(&pdev->dev); edac_device_free_ctl_info(p->dci); diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c index 1abc020d49ab..7e2762f62eec 100644 --- a/drivers/edac/skx_base.c +++ b/drivers/edac/skx_base.c @@ -714,8 +714,13 @@ static int __init skx_init(void) skx_set_decode(skx_decode, skx_show_retry_rd_err_log); - if (nvdimm_count && skx_adxl_get() == -ENODEV) - skx_printk(KERN_NOTICE, "Only decoding DDR4 address!\n"); + if (nvdimm_count && skx_adxl_get() != -ENODEV) { + skx_set_decode(NULL, skx_show_retry_rd_err_log); + } else { + if (nvdimm_count) + skx_printk(KERN_NOTICE, "Only decoding DDR4 address!\n"); + skx_set_decode(skx_decode, skx_show_retry_rd_err_log); + } /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 19c17c5198c5..f0f8e98f6efb 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -27,9 +27,11 @@ static const char * const component_names[] = { [INDEX_MEMCTRL] = "MemoryControllerId", [INDEX_CHANNEL] = "ChannelId", [INDEX_DIMM] = "DimmSlotId", + [INDEX_CS] = "ChipSelect", [INDEX_NM_MEMCTRL] = "NmMemoryControllerId", [INDEX_NM_CHANNEL] = "NmChannelId", [INDEX_NM_DIMM] = "NmDimmSlotId", + [INDEX_NM_CS] = "NmChipSelect", }; static int component_indices[ARRAY_SIZE(component_names)]; @@ -40,7 +42,7 @@ static char *adxl_msg; static unsigned long adxl_nm_bitmap; static char skx_msg[MSG_SIZE]; -static skx_decode_f skx_decode; +static skx_decode_f driver_decode; static skx_show_retry_log_f skx_show_retry_rd_err_log; static u64 skx_tolm, skx_tohm; static LIST_HEAD(dev_edac_list); @@ -139,10 +141,13 @@ static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_me (int)adxl_values[component_indices[INDEX_NM_CHANNEL]] : -1; res->dimm = (adxl_nm_bitmap & BIT_NM_DIMM) ? (int)adxl_values[component_indices[INDEX_NM_DIMM]] : -1; + res->cs = (adxl_nm_bitmap & BIT_NM_CS) ? + (int)adxl_values[component_indices[INDEX_NM_CS]] : -1; } else { res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]]; res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]]; + res->cs = (int)adxl_values[component_indices[INDEX_CS]]; } if (res->imc > NUM_IMC - 1 || res->imc < 0) { @@ -173,6 +178,8 @@ static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_me break; } + res->decoded_by_adxl = true; + return true; } @@ -183,7 +190,7 @@ void skx_set_mem_cfg(bool mem_cfg_2lm) void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log) { - skx_decode = decode; + driver_decode = decode; skx_show_retry_rd_err_log = show_retry_log; } @@ -591,19 +598,19 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, break; } } - if (adxl_component_count) { + if (res->decoded_by_adxl) { len = snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s", overflow ? " OVERFLOW" : "", (uncorrected_error && recoverable) ? " recoverable" : "", mscod, errcode, adxl_msg); } else { len = snprintf(skx_msg, MSG_SIZE, - "%s%s err_code:0x%04x:0x%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:0x%x col:0x%x", + "%s%s err_code:0x%04x:0x%04x ProcessorSocketId:0x%x MemoryControllerId:0x%x PhysicalRankId:0x%x Row:0x%x Column:0x%x Bank:0x%x BankGroup:0x%x", overflow ? " OVERFLOW" : "", (uncorrected_error && recoverable) ? " recoverable" : "", mscod, errcode, res->socket, res->imc, res->rank, - res->bank_group, res->bank_address, res->row, res->column); + res->row, res->column, res->bank_address, res->bank_group); } if (skx_show_retry_rd_err_log) @@ -649,13 +656,14 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, return NOTIFY_DONE; memset(&res, 0, sizeof(res)); + res.mce = mce; res.addr = mce->addr; - if (adxl_component_count) { - if (!skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce))) + /* Try driver decoder first */ + if (!(driver_decode && driver_decode(&res))) { + /* Then try firmware decoder (ACPI DSM methods) */ + if (!(adxl_component_count && skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce)))) return NOTIFY_DONE; - } else if (!skx_decode || !skx_decode(&res)) { - return NOTIFY_DONE; } mci = res.dev->imc[res.imc].mci; diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 03ac067a80b9..0cbadd3d2cd3 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -10,6 +10,7 @@ #define _SKX_COMM_EDAC_H #include <linux/bits.h> +#include <asm/mce.h> #define MSG_SIZE 1024 @@ -52,6 +53,9 @@ #define IS_DIMM_PRESENT(r) GET_BITFIELD(r, 15, 15) #define IS_NVDIMM_PRESENT(r, i) GET_BITFIELD(r, i, i) +#define MCI_MISC_ECC_MODE(m) (((m) >> 59) & 15) +#define MCI_MISC_ECC_DDRT 8 /* read from DDRT */ + /* * Each cpu socket contains some pci devices that provide global * information, and also some that are local to each of the two @@ -82,6 +86,7 @@ struct skx_dev { struct pci_dev *edev; u32 retry_rd_err_log_s; u32 retry_rd_err_log_d; + u32 retry_rd_err_log_d2; struct skx_dimm { u8 close_pg; u8 bank_xor_enable; @@ -108,18 +113,22 @@ enum { INDEX_MEMCTRL, INDEX_CHANNEL, INDEX_DIMM, + INDEX_CS, INDEX_NM_FIRST, INDEX_NM_MEMCTRL = INDEX_NM_FIRST, INDEX_NM_CHANNEL, INDEX_NM_DIMM, + INDEX_NM_CS, INDEX_MAX }; #define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL) #define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL) #define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM) +#define BIT_NM_CS BIT_ULL(INDEX_NM_CS) struct decoded_addr { + struct mce *mce; struct skx_dev *dev; u64 addr; int socket; @@ -129,6 +138,7 @@ struct decoded_addr { int sktways; int chanways; int dimm; + int cs; int rank; int channel_rank; u64 rank_address; @@ -136,6 +146,7 @@ struct decoded_addr { int column; int bank_address; int bank_group; + bool decoded_by_adxl; }; struct res_config { @@ -154,7 +165,12 @@ struct res_config { int sad_all_offset; /* Offsets of retry_rd_err_log registers */ u32 *offsets_scrub; + u32 *offsets_scrub_hbm0; + u32 *offsets_scrub_hbm1; u32 *offsets_demand; + u32 *offsets_demand2; + u32 *offsets_demand_hbm0; + u32 *offsets_demand_hbm1; }; typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci, diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c index f05ff02c0656..f7d37c282819 100644 --- a/drivers/edac/synopsys_edac.c +++ b/drivers/edac/synopsys_edac.c @@ -1,22 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Synopsys DDR ECC Driver * This driver is based on ppc4xx_edac.c drivers * * Copyright (C) 2012 - 2014 Xilinx, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details */ #include <linux/edac.h> @@ -164,6 +151,11 @@ #define ECC_STAT_CECNT_SHIFT 8 #define ECC_STAT_BITNUM_MASK 0x7F +/* ECC error count register definitions */ +#define ECC_ERRCNT_UECNT_MASK 0xFFFF0000 +#define ECC_ERRCNT_UECNT_SHIFT 16 +#define ECC_ERRCNT_CECNT_MASK 0xFFFF + /* DDR QOS Interrupt register definitions */ #define DDR_QOS_IRQ_STAT_OFST 0x20200 #define DDR_QOSUE_MASK 0x4 @@ -423,15 +415,16 @@ static int zynqmp_get_error_info(struct synps_edac_priv *priv) base = priv->baseaddr; p = &priv->stat; + regval = readl(base + ECC_ERRCNT_OFST); + p->ce_cnt = regval & ECC_ERRCNT_CECNT_MASK; + p->ue_cnt = (regval & ECC_ERRCNT_UECNT_MASK) >> ECC_ERRCNT_UECNT_SHIFT; + if (!p->ce_cnt) + goto ue_err; + regval = readl(base + ECC_STAT_OFST); if (!regval) return 1; - p->ce_cnt = (regval & ECC_STAT_CECNT_MASK) >> ECC_STAT_CECNT_SHIFT; - p->ue_cnt = (regval & ECC_STAT_UECNT_MASK) >> ECC_STAT_UECNT_SHIFT; - if (!p->ce_cnt) - goto ue_err; - p->ceinfo.bitpos = (regval & ECC_STAT_BITNUM_MASK); regval = readl(base + ECC_CEADDR0_OFST); @@ -521,6 +514,28 @@ static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p) memset(p, 0, sizeof(*p)); } +static void enable_intr(struct synps_edac_priv *priv) +{ + /* Enable UE/CE Interrupts */ + if (priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR) + writel(DDR_UE_MASK | DDR_CE_MASK, + priv->baseaddr + ECC_CLR_OFST); + else + writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK, + priv->baseaddr + DDR_QOS_IRQ_EN_OFST); + +} + +static void disable_intr(struct synps_edac_priv *priv) +{ + /* Disable UE/CE Interrupts */ + if (priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR) + writel(0x0, priv->baseaddr + ECC_CLR_OFST); + else + writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK, + priv->baseaddr + DDR_QOS_IRQ_DB_OFST); +} + /** * intr_handler - Interrupt Handler for ECC interrupts. * @irq: IRQ number. @@ -562,6 +577,9 @@ static irqreturn_t intr_handler(int irq, void *dev_id) /* v3.0 of the controller does not have this register */ if (!(priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)) writel(regval, priv->baseaddr + DDR_QOS_IRQ_STAT_OFST); + else + enable_intr(priv); + return IRQ_HANDLED; } @@ -844,25 +862,6 @@ static void mc_init(struct mem_ctl_info *mci, struct platform_device *pdev) init_csrows(mci); } -static void enable_intr(struct synps_edac_priv *priv) -{ - /* Enable UE/CE Interrupts */ - if (priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR) - writel(DDR_UE_MASK | DDR_CE_MASK, - priv->baseaddr + ECC_CLR_OFST); - else - writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK, - priv->baseaddr + DDR_QOS_IRQ_EN_OFST); - -} - -static void disable_intr(struct synps_edac_priv *priv) -{ - /* Disable UE/CE Interrupts */ - writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK, - priv->baseaddr + DDR_QOS_IRQ_DB_OFST); -} - static int setup_irq(struct mem_ctl_info *mci, struct platform_device *pdev) { diff --git a/drivers/edac/wq.c b/drivers/edac/wq.c index d021d287eaec..ad3f516627c5 100644 --- a/drivers/edac/wq.c +++ b/drivers/edac/wq.c @@ -37,7 +37,6 @@ int edac_workqueue_setup(void) void edac_workqueue_teardown(void) { - flush_workqueue(wq); destroy_workqueue(wq); wq = NULL; } diff --git a/drivers/edac/xgene_edac.c b/drivers/edac/xgene_edac.c index 2ccd1db5e98f..54081403db4f 100644 --- a/drivers/edac/xgene_edac.c +++ b/drivers/edac/xgene_edac.c @@ -501,7 +501,7 @@ static int xgene_edac_mc_remove(struct xgene_edac_mc_ctx *mcu) #define MEMERR_L2C_L2ESRA_PAGE_OFFSET 0x0804 /* - * Processor Module Domain (PMD) context - Context for a pair of processsors. + * Processor Module Domain (PMD) context - Context for a pair of processors. * Each PMD consists of 2 CPUs and a shared L2 cache. Each CPU consists of * its own L1 cache. */ @@ -1919,7 +1919,7 @@ static int xgene_edac_probe(struct platform_device *pdev) irq = platform_get_irq_optional(pdev, i); if (irq < 0) { dev_err(&pdev->dev, "No IRQ resource\n"); - rc = -EINVAL; + rc = irq; goto out_err; } rc = devm_request_irq(&pdev->dev, irq, |