From fe783516e3016652b74ac92fb8b3fc2b1c0e9d5b Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 21 Mar 2019 15:13:39 -0700 Subject: EDAC, skx, i10nm: Make skx_common.c a pure library The following Kconfig constellations fail randconfig builds: CONFIG_ACPI_NFIT=y CONFIG_EDAC_DEBUG=y CONFIG_EDAC_SKX=m CONFIG_EDAC_I10NM=y or CONFIG_ACPI_NFIT=y CONFIG_EDAC_DEBUG=y CONFIG_EDAC_SKX=y CONFIG_EDAC_I10NM=m with: ... CC [M] drivers/edac/skx_common.o ... .../skx_common.o:.../skx_common.c:672: undefined reference to `__this_module' That is because if one of the two drivers - skx_edac or i10nm_edac - is built-in and the other one is a module, the shared file skx_common.c gets linked into a module object by kbuild. Therefore, when linking that same file into vmlinux, the '__this_module' symbol used in debugfs isn't defined, leading to the above error. Fix it by moving all debugfs code from skx_common.c to both skx_base.c and i10nm_base.c respectively. Thus, skx_common.c doesn't refer to the '__this_module' symbol anymore. Clarify skx_common.c's purpose at the top of the file for future reference, while at it. [ bp: Make text more readable. ] Fixes: d4dc89d069aa ("EDAC, i10nm: Add a driver for Intel 10nm server processors") Reported-by: Arnd Bergmann Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Cc: James Morse Cc: Mauro Carvalho Chehab Cc: linux-edac Link: https://lkml.kernel.org/r/20190321221339.GA32323@agluck-desk --- drivers/edac/i10nm_base.c | 52 ++++++++++++++++++++++++++++++++++++++++-- drivers/edac/skx_base.c | 50 ++++++++++++++++++++++++++++++++++++++++- drivers/edac/skx_common.c | 57 +++++++++-------------------------------------- drivers/edac/skx_common.h | 8 ------- 4 files changed, 109 insertions(+), 58 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index c334fb7c63df..6f06aec4877c 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -181,6 +181,54 @@ static struct notifier_block i10nm_mce_dec = { .priority = MCE_PRIO_EDAC, }; +#ifdef CONFIG_EDAC_DEBUG +/* + * Debug feature. + * Exercise the address decode logic by writing an address to + * /sys/kernel/debug/edac/i10nm_test/addr. + */ +static struct dentry *i10nm_test; + +static int debugfs_u64_set(void *data, u64 val) +{ + struct mce m; + + pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val); + + memset(&m, 0, sizeof(m)); + /* ADDRV + MemRd + Unknown channel */ + m.status = MCI_STATUS_ADDRV + 0x90; + /* One corrected error */ + m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT); + m.addr = val; + skx_mce_check_error(NULL, 0, &m); + + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); + +static void setup_i10nm_debug(void) +{ + i10nm_test = edac_debugfs_create_dir("i10nm_test"); + if (!i10nm_test) + return; + + if (!edac_debugfs_create_file("addr", 0200, i10nm_test, + NULL, &fops_u64_wo)) { + debugfs_remove(i10nm_test); + i10nm_test = NULL; + } +} + +static void teardown_i10nm_debug(void) +{ + debugfs_remove_recursive(i10nm_test); +} +#else +static inline void setup_i10nm_debug(void) {} +static inline void teardown_i10nm_debug(void) {} +#endif /*CONFIG_EDAC_DEBUG*/ + static int __init i10nm_init(void) { u8 mc = 0, src_id = 0, node_id = 0; @@ -249,7 +297,7 @@ static int __init i10nm_init(void) opstate_init(); mce_register_decode_chain(&i10nm_mce_dec); - setup_skx_debug("i10nm_test"); + setup_i10nm_debug(); i10nm_printk(KERN_INFO, "%s\n", I10NM_REVISION); @@ -262,7 +310,7 @@ fail: static void __exit i10nm_exit(void) { edac_dbg(2, "\n"); - teardown_skx_debug(); + teardown_i10nm_debug(); mce_unregister_decode_chain(&i10nm_mce_dec); skx_adxl_put(); skx_remove(); diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c index adae4c848ca1..a5c8fa3a249a 100644 --- a/drivers/edac/skx_base.c +++ b/drivers/edac/skx_base.c @@ -540,6 +540,54 @@ static struct notifier_block skx_mce_dec = { .priority = MCE_PRIO_EDAC, }; +#ifdef CONFIG_EDAC_DEBUG +/* + * Debug feature. + * Exercise the address decode logic by writing an address to + * /sys/kernel/debug/edac/skx_test/addr. + */ +static struct dentry *skx_test; + +static int debugfs_u64_set(void *data, u64 val) +{ + struct mce m; + + pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val); + + memset(&m, 0, sizeof(m)); + /* ADDRV + MemRd + Unknown channel */ + m.status = MCI_STATUS_ADDRV + 0x90; + /* One corrected error */ + m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT); + m.addr = val; + skx_mce_check_error(NULL, 0, &m); + + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); + +static void setup_skx_debug(void) +{ + skx_test = edac_debugfs_create_dir("skx_test"); + if (!skx_test) + return; + + if (!edac_debugfs_create_file("addr", 0200, skx_test, + NULL, &fops_u64_wo)) { + debugfs_remove(skx_test); + skx_test = NULL; + } +} + +static void teardown_skx_debug(void) +{ + debugfs_remove_recursive(skx_test); +} +#else +static inline void setup_skx_debug(void) {} +static inline void teardown_skx_debug(void) {} +#endif /*CONFIG_EDAC_DEBUG*/ + /* * skx_init: * make sure we are running on the correct cpu model @@ -619,7 +667,7 @@ static int __init skx_init(void) /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); - setup_skx_debug("skx_test"); + setup_skx_debug(); mce_register_decode_chain(&skx_mce_dec); diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 0e96e7b5b0a7..b0dddcfa9baa 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -1,7 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Common codes for both the skx_edac driver and Intel 10nm server EDAC driver. - * Originally split out from the skx_edac driver. + * + * Shared code by both skx_edac and i10nm_edac. Originally split out + * from the skx_edac driver. + * + * This file is linked into both skx_edac and i10nm_edac drivers. In + * order to avoid link errors, this file must be like a pure library + * without including symbols and defines which would otherwise conflict, + * when linked once into a module and into a built-in object, at the + * same time. For example, __this_module symbol references when that + * file is being linked into a built-in object. * * Copyright (c) 2018, Intel Corporation. */ @@ -644,48 +652,3 @@ void skx_remove(void) kfree(d); } } - -#ifdef CONFIG_EDAC_DEBUG -/* - * Debug feature. - * Exercise the address decode logic by writing an address to - * /sys/kernel/debug/edac/dirname/addr. - */ -static struct dentry *skx_test; - -static int debugfs_u64_set(void *data, u64 val) -{ - struct mce m; - - pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val); - - memset(&m, 0, sizeof(m)); - /* ADDRV + MemRd + Unknown channel */ - m.status = MCI_STATUS_ADDRV + 0x90; - /* One corrected error */ - m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT); - m.addr = val; - skx_mce_check_error(NULL, 0, &m); - - return 0; -} -DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); - -void setup_skx_debug(const char *dirname) -{ - skx_test = edac_debugfs_create_dir(dirname); - if (!skx_test) - return; - - if (!edac_debugfs_create_file("addr", 0200, skx_test, - NULL, &fops_u64_wo)) { - debugfs_remove(skx_test); - skx_test = NULL; - } -} - -void teardown_skx_debug(void) -{ - debugfs_remove_recursive(skx_test); -} -#endif /*CONFIG_EDAC_DEBUG*/ diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index d25374e34d4f..d18fa98669af 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -141,12 +141,4 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, void skx_remove(void); -#ifdef CONFIG_EDAC_DEBUG -void setup_skx_debug(const char *dirname); -void teardown_skx_debug(void); -#else -static inline void setup_skx_debug(const char *dirname) {} -static inline void teardown_skx_debug(void) {} -#endif /*CONFIG_EDAC_DEBUG*/ - #endif /* _SKX_COMM_EDAC_H */ -- cgit v1.2.3-59-g8ed1b From 1bd76ff448a91e35e451a96671b5594e3fffc4bd Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Wed, 13 Mar 2019 10:27:22 -0500 Subject: EDAC, altera: Fix S10 Double Bit Error Notification Stratix10 Double Bit Error Address was always read from SDRAM Address register instead of each device's Address register. To determine which device had the DBE, cycle through the EDAC devices comparing the DBE value to the db_irq value. Once found, report the DBE Address from the device registers as well as the device name. Finally, notify the system via an SMC call and indicate the panic should result in a system reboot. Change a run-time check to a Stratix10 compile-time check for a clean SMC notification. Fixes: d5fc9125566c ("EDAC, altera: Combine Stratix10 and Arria10 probe functions") Signed-off-by: Thor Thayer Signed-off-by: Borislav Petkov Cc: James Morse Cc: Mauro Carvalho Chehab Cc: linux-edac Link: https://lkml.kernel.org/r/1552490842-25440-1-git-send-email-thor.thayer@linux.intel.com --- drivers/edac/altera_edac.c | 69 ++++++++++++++++++++++++++++++++++------------ drivers/edac/altera_edac.h | 20 ++++++++++++++ 2 files changed, 72 insertions(+), 17 deletions(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 1bcf9aea0cdf..5ff263850cc7 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -1930,6 +1930,15 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac, goto err_release_group1; } +#ifdef CONFIG_ARCH_STRATIX10 + /* Use IRQ to determine SError origin instead of assigning IRQ */ + rc = of_property_read_u32_index(np, "interrupts", 0, &altdev->db_irq); + if (rc) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "Unable to parse DB IRQ index\n"); + goto err_release_group1; + } +#else altdev->db_irq = irq_of_parse_and_map(np, 1); if (!altdev->db_irq) { edac_printk(KERN_ERR, EDAC_DEVICE, "Error allocating DBIRQ\n"); @@ -1943,6 +1952,7 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac, edac_printk(KERN_ERR, EDAC_DEVICE, "No DBERR IRQ resource\n"); goto err_release_group1; } +#endif rc = edac_device_add_device(dci); if (rc) { @@ -2005,6 +2015,10 @@ static const struct irq_domain_ops a10_eccmgr_ic_ops = { /************** Stratix 10 EDAC Double Bit Error Handler ************/ #define to_a10edac(p, m) container_of(p, struct altr_arria10_edac, m) +#ifdef CONFIG_ARCH_STRATIX10 +/* panic routine issues reboot on non-zero panic_timeout */ +extern int panic_timeout; + /* * The double bit error is handled through SError which is fatal. This is * called as a panic notifier to printout ECC error info as part of the panic. @@ -2018,17 +2032,37 @@ static int s10_edac_dberr_handler(struct notifier_block *this, regmap_read(edac->ecc_mgr_map, S10_SYSMGR_ECC_INTSTAT_DERR_OFST, &dberror); regmap_write(edac->ecc_mgr_map, S10_SYSMGR_UE_VAL_OFST, dberror); - if (dberror & S10_DDR0_IRQ_MASK) { - regmap_read(edac->ecc_mgr_map, A10_DERRADDR_OFST, &err_addr); - regmap_write(edac->ecc_mgr_map, S10_SYSMGR_UE_ADDR_OFST, - err_addr); - edac_printk(KERN_ERR, EDAC_MC, - "EDAC: [Uncorrectable errors @ 0x%08X]\n\n", - err_addr); + if (dberror & S10_DBE_IRQ_MASK) { + struct list_head *position; + struct altr_edac_device_dev *ed; + struct arm_smccc_res result; + + /* Find the matching DBE in the list of devices */ + list_for_each(position, &edac->a10_ecc_devices) { + ed = list_entry(position, struct altr_edac_device_dev, + next); + if (!(BIT(ed->db_irq) & dberror)) + continue; + + writel(ALTR_A10_ECC_DERRPENA, + ed->base + ALTR_A10_ECC_INTSTAT_OFST); + err_addr = readl(ed->base + ALTR_S10_DERR_ADDRA_OFST); + regmap_write(edac->ecc_mgr_map, + S10_SYSMGR_UE_ADDR_OFST, err_addr); + edac_printk(KERN_ERR, EDAC_DEVICE, + "EDAC: [Fatal DBE on %s @ 0x%08X]\n", + ed->edac_dev_name, err_addr); + break; + } + /* Notify the System through SMC. Reboot delay = 1 second */ + panic_timeout = 1; + arm_smccc_smc(INTEL_SIP_SMC_ECC_DBE, dberror, 0, 0, 0, 0, + 0, 0, &result); } return NOTIFY_DONE; } +#endif /****************** Arria 10 EDAC Probe Function *********************/ static int altr_edac_a10_probe(struct platform_device *pdev) @@ -2098,16 +2132,8 @@ static int altr_edac_a10_probe(struct platform_device *pdev) altr_edac_a10_irq_handler, edac); - if (socfpga_is_a10()) { - edac->db_irq = platform_get_irq(pdev, 1); - if (edac->db_irq < 0) { - dev_err(&pdev->dev, "No DBERR IRQ resource\n"); - return edac->db_irq; - } - irq_set_chained_handler_and_data(edac->db_irq, - altr_edac_a10_irq_handler, - edac); - } else { +#ifdef CONFIG_ARCH_STRATIX10 + { int dberror, err_addr; edac->panic_notifier.notifier_call = s10_edac_dberr_handler; @@ -2130,6 +2156,15 @@ static int altr_edac_a10_probe(struct platform_device *pdev) S10_SYSMGR_UE_ADDR_OFST, 0); } } +#else + edac->db_irq = platform_get_irq(pdev, 1); + if (edac->db_irq < 0) { + dev_err(&pdev->dev, "No DBERR IRQ resource\n"); + return edac->db_irq; + } + irq_set_chained_handler_and_data(edac->db_irq, + altr_edac_a10_irq_handler, edac); +#endif for_each_child_of_node(pdev->dev.of_node, child) { if (!of_device_is_available(child)) diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index f8664bac9fa8..60513f201ffb 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -289,6 +289,7 @@ struct altr_sdram_mc_data { #define ALTR_A10_ECC_INIT_WATCHDOG_10US 10000 /************* Stratix10 Defines **************/ +#define ALTR_S10_DERR_ADDRA_OFST 0x2C /* Stratix10 ECC Manager Defines */ #define S10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98 @@ -299,6 +300,7 @@ struct altr_sdram_mc_data { #define S10_SYSMGR_UE_ADDR_OFST 0x224 #define S10_DDR0_IRQ_MASK BIT(16) +#define S10_DBE_IRQ_MASK 0x3FE /* Define ECC Block Offsets for peripherals */ #define ECC_BLK_ADDRESS_OFST 0x40 @@ -435,4 +437,22 @@ struct altr_arria10_edac { #define INTEL_SIP_SMC_REG_WRITE \ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_REG_WRITE) +/* + * Request INTEL_SIP_SMC_ECC_DBE + * + * Sync call used by service driver at EL1 alert EL3 that a Double Bit + * ECC error has occurred. + * + * Call register usage: + * a0 INTEL_SIP_SMC_ECC_DBE + * a1 SysManager Double Bit Error value + * a2-7 not used + * + * Return status + * a0 INTEL_SIP_SMC_STATUS_OK + */ +#define INTEL_SIP_SMC_FUNCID_ECC_DBE 13 +#define INTEL_SIP_SMC_ECC_DBE \ + INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_ECC_DBE) + #endif /* #ifndef _ALTERA_EDAC_H */ -- cgit v1.2.3-59-g8ed1b From 76fc276f4a91bdf96940c276398954f291daf034 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 25 Mar 2019 16:29:33 -0700 Subject: MAINTAINERS: Update entry for EDAC-SKYLAKE Code refactoring to share some source code with a new EDAC driver resulted in renaming one file (skx_edac.c became skx_base.c) and adding a new file (skx_common.c). Update the file pattern in MAINTAINERS to take account of this change. Fixes: 98f2fc829e3b ("EDAC, skx_edac: Delete duplicated code") Reported-by: Joe Perches Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Cc: "David S. Miller" Cc: Aristeu Rozanski Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Mauro Carvalho Chehab Cc: Nicolas Ferre Cc: Qiuxu Zhuo Cc: linux-edac Link: https://lkml.kernel.org/r/20190325232932.GA8869@agluck-desk --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index e17ebf70b548..a708bf6a02fd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5677,7 +5677,7 @@ EDAC-SKYLAKE M: Tony Luck L: linux-edac@vger.kernel.org S: Maintained -F: drivers/edac/skx_edac.c +F: drivers/edac/skx_*.c EDAC-TI M: Tero Kristo -- cgit v1.2.3-59-g8ed1b From bcc5c1bbf76c40764735a5b3bdb884575e7fd822 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 25 Mar 2019 16:56:27 -0700 Subject: MAINTAINERS: Add entry for EDAC-I10NM Updating the MAINTAINERS file when adding this new driver was forgotten. Fix it. Fixes: d4dc89d069aa ("EDAC, i10nm: Add a driver for Intel 10nm server processors") Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Cc: "David S. Miller" Cc: Aristeu Rozanski Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Mauro Carvalho Chehab Cc: Nicolas Ferre Cc: Qiuxu Zhuo Cc: linux-edac Link: https://lkml.kernel.org/r/20190325235627.GA11938@agluck-desk --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index a708bf6a02fd..7d1246b62a99 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5596,6 +5596,12 @@ L: linux-edac@vger.kernel.org S: Maintained F: drivers/edac/ghes_edac.c +EDAC-I10NM +M: Tony Luck +L: linux-edac@vger.kernel.org +S: Maintained +F: drivers/edac/i10nm_base.c + EDAC-I3000 L: linux-edac@vger.kernel.org S: Orphan -- cgit v1.2.3-59-g8ed1b From 6e846239e5487cbb89ac8192d5f11437d010130e Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 28 Feb 2019 15:36:09 +0000 Subject: EDAC/amd64: Add Family 17h Model 30h PCI IDs Add the new Family 17h Model 30h PCI IDs to the AMD64 EDAC module. This also fixes a probe failure that appeared when some other PCI IDs for Family 17h Model 30h were added to the AMD NB code. Fixes: be3518a16ef2 (x86/amd_nb: Add PCI device IDs for family 17h, model 30h) Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Tested-by: Kim Phillips Cc: James Morse Cc: Mauro Carvalho Chehab Cc: linux-edac Link: https://lkml.kernel.org/r/20190228153558.127292-1-Yazen.Ghannam@amd.com --- drivers/edac/amd64_edac.c | 13 +++++++++++++ drivers/edac/amd64_edac.h | 3 +++ 2 files changed, 16 insertions(+) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 6ea98575a402..98e8da9d9f5b 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2211,6 +2211,15 @@ static struct amd64_family_type family_types[] = { .dbam_to_cs = f17_base_addr_to_cs_size, } }, + [F17_M30H_CPUS] = { + .ctl_name = "F17h_M30h", + .f0_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F0, + .f6_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F6, + .ops = { + .early_channel_count = f17_early_channel_count, + .dbam_to_cs = f17_base_addr_to_cs_size, + } + }, }; /* @@ -3203,6 +3212,10 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) fam_type = &family_types[F17_M10H_CPUS]; pvt->ops = &family_types[F17_M10H_CPUS].ops; break; + } else if (pvt->model >= 0x30 && pvt->model <= 0x3f) { + fam_type = &family_types[F17_M30H_CPUS]; + pvt->ops = &family_types[F17_M30H_CPUS].ops; + break; } /* fall through */ case 0x18: diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 4242f8e39c18..de8dbb0b42b5 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -117,6 +117,8 @@ #define PCI_DEVICE_ID_AMD_17H_DF_F6 0x1466 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F0 0x15e8 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F6 0x15ee +#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F0 0x1490 +#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F6 0x1496 /* * Function 1 - Address Map @@ -284,6 +286,7 @@ enum amd_families { F16_M30H_CPUS, F17_CPUS, F17_M10H_CPUS, + F17_M30H_CPUS, NUM_FAMILIES, }; -- cgit v1.2.3-59-g8ed1b From 4d30d2bc3c23e63c2608bc5b03b0960490d5b740 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 28 Feb 2019 15:36:10 +0000 Subject: EDAC/amd64: Use a macro for iterating over Unified Memory Controllers Define and use a macro for looping over the number of Unified Memory Controllers. No functional change. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Tested-by: Kim Phillips Cc: James Morse Cc: Mauro Carvalho Chehab Cc: linux-edac Link: https://lkml.kernel.org/r/20190228153558.127292-2-Yazen.Ghannam@amd.com --- drivers/edac/amd64_edac.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 98e8da9d9f5b..e4fd459d807a 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -449,6 +449,9 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct, #define for_each_chip_select_mask(i, dct, pvt) \ for (i = 0; i < pvt->csels[dct].m_cnt; i++) +#define for_each_umc(i) \ + for (i = 0; i < NUM_UMCS; i++) + /* * @input_addr is an InputAddr associated with the node given by mci. Return the * csrow that input_addr maps to, or -1 on failure (no csrow claims input_addr). @@ -722,7 +725,7 @@ static unsigned long determine_edac_cap(struct amd64_pvt *pvt) if (pvt->umc) { u8 i, umc_en_mask = 0, dimm_ecc_en_mask = 0; - for (i = 0; i < NUM_UMCS; i++) { + for_each_umc(i) { if (!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT)) continue; @@ -811,7 +814,7 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt) struct amd64_umc *umc; u32 i, tmp, umc_base; - for (i = 0; i < NUM_UMCS; i++) { + for_each_umc(i) { umc_base = get_umc_base(i); umc = &pvt->umc[i]; @@ -1388,7 +1391,7 @@ static int f17_early_channel_count(struct amd64_pvt *pvt) int i, channels = 0; /* SDP Control bit 31 (SdpInit) is clear for unused UMC channels */ - for (i = 0; i < NUM_UMCS; i++) + for_each_umc(i) channels += !!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT); amd64_info("MCT channel count: %d\n", channels); @@ -2612,7 +2615,7 @@ static void determine_ecc_sym_sz(struct amd64_pvt *pvt) if (pvt->umc) { u8 i; - for (i = 0; i < NUM_UMCS; i++) { + for_each_umc(i) { /* Check enabled channels only: */ if ((pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) && (pvt->umc[i].ecc_ctrl & BIT(7))) { @@ -2648,7 +2651,7 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt) u32 i, umc_base; /* Read registers from each UMC */ - for (i = 0; i < NUM_UMCS; i++) { + for_each_umc(i) { umc_base = get_umc_base(i); umc = &pvt->umc[i]; @@ -3061,7 +3064,7 @@ static bool ecc_enabled(struct pci_dev *F3, u16 nid) if (boot_cpu_data.x86 >= 0x17) { u8 umc_en_mask = 0, ecc_en_mask = 0; - for (i = 0; i < NUM_UMCS; i++) { + for_each_umc(i) { u32 base = get_umc_base(i); /* Only check enabled UMCs. */ @@ -3114,7 +3117,7 @@ f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt) { u8 i, ecc_en = 1, cpk_en = 1; - for (i = 0; i < NUM_UMCS; i++) { + for_each_umc(i) { if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) { ecc_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_ENABLED); cpk_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_CHIPKILL_CAP); -- cgit v1.2.3-59-g8ed1b From bdcee7747f5c490297665af0e1e0fbeb4368804d Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 28 Feb 2019 15:36:10 +0000 Subject: EDAC/amd64: Support more than two Unified Memory Controllers The first few models of Family 17h all had 2 Unified Memory Controllers per Die, so this was treated as a fixed value. However, future systems may have more Unified Memory Controllers per Die. Related to this, the channel number and base address of a Unified Memory Controller were found by matching on fixed, known values. However, current and future systems follow this pattern for the channel number and base address of a Unified Memory Controller: 0xYXXXXX, where Y is the channel number. So matching on hardcoded values is not necessary. Set the number of Unified Memory Controllers at driver init time based on the family/model. Also, update the functions that find the channel number and base address of a Unified Memory Controller to support more than two. [ bp: Move num_umcs into the .c file and simplify comment. ] Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Tested-by: Kim Phillips Cc: James Morse Cc: Mauro Carvalho Chehab Cc: linux-edac Link: https://lkml.kernel.org/r/20190228153558.127292-3-Yazen.Ghannam@amd.com --- drivers/edac/amd64_edac.c | 47 ++++++++++++++++++++++++++++++----------------- drivers/edac/amd64_edac.h | 6 ++---- 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index e4fd459d807a..81dca957fce0 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -18,6 +18,9 @@ static struct msr __percpu *msrs; /* Per-node stuff */ static struct ecc_settings **ecc_stngs; +/* Number of Unified Memory Controllers */ +static u8 num_umcs; + /* * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching- @@ -450,7 +453,7 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct, for (i = 0; i < pvt->csels[dct].m_cnt; i++) #define for_each_umc(i) \ - for (i = 0; i < NUM_UMCS; i++) + for (i = 0; i < num_umcs; i++) /* * @input_addr is an InputAddr associated with the node given by mci. Return the @@ -2476,18 +2479,14 @@ static inline void decode_bus_error(int node_id, struct mce *m) * To find the UMC channel represented by this bank we need to match on its * instance_id. The instance_id of a bank is held in the lower 32 bits of its * IPID. + * + * Currently, we can derive the channel number by looking at the 6th nibble in + * the instance_id. For example, instance_id=0xYXXXXX where Y is the channel + * number. */ -static int find_umc_channel(struct amd64_pvt *pvt, struct mce *m) +static int find_umc_channel(struct mce *m) { - u32 umc_instance_id[] = {0x50f00, 0x150f00}; - u32 instance_id = m->ipid & GENMASK(31, 0); - int i, channel = -1; - - for (i = 0; i < ARRAY_SIZE(umc_instance_id); i++) - if (umc_instance_id[i] == instance_id) - channel = i; - - return channel; + return (m->ipid & GENMASK(31, 0)) >> 20; } static void decode_umc_error(int node_id, struct mce *m) @@ -2509,11 +2508,7 @@ static void decode_umc_error(int node_id, struct mce *m) if (m->status & MCI_STATUS_DEFERRED) ecc_type = 3; - err.channel = find_umc_channel(pvt, m); - if (err.channel < 0) { - err.err_code = ERR_CHANNEL; - goto log_error; - } + err.channel = find_umc_channel(m); if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) { err.err_code = ERR_NORM_ADDR; @@ -3252,6 +3247,22 @@ static const struct attribute_group *amd64_edac_attr_groups[] = { NULL }; +/* Set the number of Unified Memory Controllers in the system. */ +static void compute_num_umcs(void) +{ + u8 model = boot_cpu_data.x86_model; + + if (boot_cpu_data.x86 < 0x17) + return; + + if (model >= 0x30 && model <= 0x3f) + num_umcs = 8; + else + num_umcs = 2; + + edac_dbg(1, "Number of UMCs: %x", num_umcs); +} + static int init_one_instance(unsigned int nid) { struct pci_dev *F3 = node_to_amd_nb(nid)->misc; @@ -3276,7 +3287,7 @@ static int init_one_instance(unsigned int nid) goto err_free; if (pvt->fam >= 0x17) { - pvt->umc = kcalloc(NUM_UMCS, sizeof(struct amd64_umc), GFP_KERNEL); + pvt->umc = kcalloc(num_umcs, sizeof(struct amd64_umc), GFP_KERNEL); if (!pvt->umc) { ret = -ENOMEM; goto err_free; @@ -3497,6 +3508,8 @@ static int __init amd64_edac_init(void) if (!msrs) goto err_free; + compute_num_umcs(); + for (i = 0; i < amd_nb_num(); i++) { err = probe_one_instance(i); if (err) { diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index de8dbb0b42b5..88ca6dc2d5df 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -274,8 +274,6 @@ #define UMC_SDP_INIT BIT(31) -#define NUM_UMCS 2 - enum amd_families { K8_CPUS = 0, F10_CPUS, @@ -399,8 +397,8 @@ struct err_info { static inline u32 get_umc_base(u8 channel) { - /* ch0: 0x50000, ch1: 0x150000 */ - return 0x50000 + (!!channel << 20); + /* chY: 0xY50000 */ + return 0x50000 + (channel << 20); } static inline u64 get_dram_base(struct amd64_pvt *pvt, u8 i) -- cgit v1.2.3-59-g8ed1b From 869adc4316ea348e3c52af2494d9b1f6bd68abbd Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 25 Mar 2019 20:33:30 +0000 Subject: EDAC/amd64: Set maximum channel layer size depending on family The AMD64 EDAC module currently hardcodes the EDAC channel layer size count to two. Future AMD systems may have more channels than this. Set the EDAC channel layer size equal to the maximum number of channels possible for the system. On Family 17h and later, this is set in the num_umcs variable. Older systems will continue to use two as the default. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Cc: James Morse Cc: Mauro Carvalho Chehab Cc: linux-edac Link: https://lkml.kernel.org/r/20190325203319.7603-1-Yazen.Ghannam@amd.com --- drivers/edac/amd64_edac.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 81dca957fce0..54acd3a7acdc 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3326,8 +3326,14 @@ static int init_one_instance(unsigned int nid) * Always allocate two channels since we can have setups with DIMMs on * only one channel. Also, this simplifies handling later for the price * of a couple of KBs tops. + * + * On Fam17h+, the number of controllers may be greater than two. So set + * the size equal to the maximum number of UMCs. */ - layers[1].size = 2; + if (pvt->fam >= 0x17) + layers[1].size = num_umcs; + else + layers[1].size = 2; layers[1].is_virt_csrow = false; mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0); -- cgit v1.2.3-59-g8ed1b From 7835961d377b75ab9ae77f715e378fcb72508306 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 28 Feb 2019 15:36:11 +0000 Subject: EDAC/amd64: Recognize x16 symbol size Future AMD systems may support x16 symbol sizes. Recognize if a system is using x16 symbol size. Also, simplify the print statement. Note that a x16 syndrome vector table is not necessary like with x4 or x8 syndromes. This is because systems that support x16 symbol sizes are SMCA systems and in that case, the syndrome can be directly extracted from the MCA_SYND[Syndrome] field. [ bp: massage. ] Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Tested-by: Kim Phillips Cc: James Morse Cc: Mauro Carvalho Chehab Cc: linux-edac Link: https://lkml.kernel.org/r/20190228153558.127292-4-Yazen.Ghannam@amd.com --- drivers/edac/amd64_edac.c | 21 ++++++++++----------- drivers/edac/amd64_edac.h | 2 +- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 54acd3a7acdc..1457919cb3b5 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -900,8 +900,7 @@ static void dump_misc_regs(struct amd64_pvt *pvt) edac_dbg(1, " DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no"); - amd64_info("using %s syndromes.\n", - ((pvt->ecc_sym_sz == 8) ? "x8" : "x4")); + amd64_info("using x%u syndromes.\n", pvt->ecc_sym_sz); } /* @@ -2612,17 +2611,17 @@ static void determine_ecc_sym_sz(struct amd64_pvt *pvt) for_each_umc(i) { /* Check enabled channels only: */ - if ((pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) && - (pvt->umc[i].ecc_ctrl & BIT(7))) { - pvt->ecc_sym_sz = 8; - break; + if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) { + if (pvt->umc[i].ecc_ctrl & BIT(9)) { + pvt->ecc_sym_sz = 16; + return; + } else if (pvt->umc[i].ecc_ctrl & BIT(7)) { + pvt->ecc_sym_sz = 8; + return; + } } } - - return; - } - - if (pvt->fam >= 0x10) { + } else if (pvt->fam >= 0x10) { u32 tmp; amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp); diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 88ca6dc2d5df..8f66472f7adc 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -364,7 +364,7 @@ struct amd64_pvt { u32 dct_sel_hi; /* DRAM Controller Select High */ u32 online_spare; /* On-Line spare Reg */ - /* x4 or x8 syndromes in use */ + /* x4, x8, or x16 syndromes in use */ u8 ecc_sym_sz; /* place to store error injection parameters prior to issue */ -- cgit v1.2.3-59-g8ed1b From 0a227af521d6df5286550b62f4b591417170b4ea Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 28 Feb 2019 15:36:11 +0000 Subject: EDAC/amd64: Support more than two controllers for chip select handling The struct chip_select array that's used for saving chip select bases and masks is fixed at length of two. There should be one struct chip_select for each controller, so this array should be increased to support systems that may have more than two controllers. Increase the size of the struct chip_select array to eight, which is the largest number of controllers per die currently supported on AMD systems. Also, carve out the Family 17h+ reading of the bases/masks into a separate function. This effectively reverts the original bases/masks reading code to before Family 17h support was added. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Tested-by: Kim Phillips Cc: James Morse Cc: Mauro Carvalho Chehab Cc: linux-edac Link: https://lkml.kernel.org/r/20190228153558.127292-5-Yazen.Ghannam@amd.com --- drivers/edac/amd64_edac.c | 113 ++++++++++++++++++++++++---------------------- drivers/edac/amd64_edac.h | 5 +- 2 files changed, 62 insertions(+), 56 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 1457919cb3b5..d89998dcf7f5 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -914,89 +914,94 @@ static void prep_chip_selects(struct amd64_pvt *pvt) } else if (pvt->fam == 0x15 && pvt->model == 0x30) { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4; pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2; + } else if (pvt->fam >= 0x17) { + int umc; + + for_each_umc(umc) { + pvt->csels[umc].b_cnt = 8; + pvt->csels[umc].m_cnt = 4; + } + } else { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8; pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4; } } +static void read_umc_base_mask(struct amd64_pvt *pvt) +{ + int cs, umc; + + for_each_umc(umc) { + u32 umc_base_reg = get_umc_base(umc) + UMCCH_BASE_ADDR; + u32 umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK; + + for_each_chip_select(cs, 0, pvt) { + u32 *base = &pvt->csels[umc].csbases[cs]; + u32 *mask = &pvt->csels[umc].csmasks[cs]; + + u32 base_reg = umc_base_reg + (cs * 4); + u32 mask_reg = umc_mask_reg + ((cs >> 1) * 4); + + if (!amd_smn_read(pvt->mc_node_id, base_reg, base)) + edac_dbg(0, " DCSB%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *base, base_reg); + + if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask)) + edac_dbg(0, " DCSM%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *mask, mask_reg); + } + } +} + /* * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask registers */ static void read_dct_base_mask(struct amd64_pvt *pvt) { - int base_reg0, base_reg1, mask_reg0, mask_reg1, cs; + int cs; prep_chip_selects(pvt); - if (pvt->umc) { - base_reg0 = get_umc_base(0) + UMCCH_BASE_ADDR; - base_reg1 = get_umc_base(1) + UMCCH_BASE_ADDR; - mask_reg0 = get_umc_base(0) + UMCCH_ADDR_MASK; - mask_reg1 = get_umc_base(1) + UMCCH_ADDR_MASK; - } else { - base_reg0 = DCSB0; - base_reg1 = DCSB1; - mask_reg0 = DCSM0; - mask_reg1 = DCSM1; - } + if (pvt->umc) + return read_umc_base_mask(pvt); for_each_chip_select(cs, 0, pvt) { - int reg0 = base_reg0 + (cs * 4); - int reg1 = base_reg1 + (cs * 4); + int reg0 = DCSB0 + (cs * 4); + int reg1 = DCSB1 + (cs * 4); u32 *base0 = &pvt->csels[0].csbases[cs]; u32 *base1 = &pvt->csels[1].csbases[cs]; - if (pvt->umc) { - if (!amd_smn_read(pvt->mc_node_id, reg0, base0)) - edac_dbg(0, " DCSB0[%d]=0x%08x reg: 0x%x\n", - cs, *base0, reg0); + if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0)) + edac_dbg(0, " DCSB0[%d]=0x%08x reg: F2x%x\n", + cs, *base0, reg0); - if (!amd_smn_read(pvt->mc_node_id, reg1, base1)) - edac_dbg(0, " DCSB1[%d]=0x%08x reg: 0x%x\n", - cs, *base1, reg1); - } else { - if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0)) - edac_dbg(0, " DCSB0[%d]=0x%08x reg: F2x%x\n", - cs, *base0, reg0); - - if (pvt->fam == 0xf) - continue; + if (pvt->fam == 0xf) + continue; - if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1)) - edac_dbg(0, " DCSB1[%d]=0x%08x reg: F2x%x\n", - cs, *base1, (pvt->fam == 0x10) ? reg1 - : reg0); - } + if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1)) + edac_dbg(0, " DCSB1[%d]=0x%08x reg: F2x%x\n", + cs, *base1, (pvt->fam == 0x10) ? reg1 + : reg0); } for_each_chip_select_mask(cs, 0, pvt) { - int reg0 = mask_reg0 + (cs * 4); - int reg1 = mask_reg1 + (cs * 4); + int reg0 = DCSM0 + (cs * 4); + int reg1 = DCSM1 + (cs * 4); u32 *mask0 = &pvt->csels[0].csmasks[cs]; u32 *mask1 = &pvt->csels[1].csmasks[cs]; - if (pvt->umc) { - if (!amd_smn_read(pvt->mc_node_id, reg0, mask0)) - edac_dbg(0, " DCSM0[%d]=0x%08x reg: 0x%x\n", - cs, *mask0, reg0); - - if (!amd_smn_read(pvt->mc_node_id, reg1, mask1)) - edac_dbg(0, " DCSM1[%d]=0x%08x reg: 0x%x\n", - cs, *mask1, reg1); - } else { - if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0)) - edac_dbg(0, " DCSM0[%d]=0x%08x reg: F2x%x\n", - cs, *mask0, reg0); + if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0)) + edac_dbg(0, " DCSM0[%d]=0x%08x reg: F2x%x\n", + cs, *mask0, reg0); - if (pvt->fam == 0xf) - continue; + if (pvt->fam == 0xf) + continue; - if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1)) - edac_dbg(0, " DCSM1[%d]=0x%08x reg: F2x%x\n", - cs, *mask1, (pvt->fam == 0x10) ? reg1 - : reg0); - } + if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1)) + edac_dbg(0, " DCSM1[%d]=0x%08x reg: F2x%x\n", + cs, *mask1, (pvt->fam == 0x10) ? reg1 + : reg0); } } diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 8f66472f7adc..4dce6a2ac75f 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -96,6 +96,7 @@ /* Hardware limit on ChipSelect rows per MC and processors per system */ #define NUM_CHIPSELECTS 8 #define DRAM_RANGES 8 +#define NUM_CONTROLLERS 8 #define ON true #define OFF false @@ -351,8 +352,8 @@ struct amd64_pvt { u32 dbam0; /* DRAM Base Address Mapping reg for DCT0 */ u32 dbam1; /* DRAM Base Address Mapping reg for DCT1 */ - /* one for each DCT */ - struct chip_select csels[2]; + /* one for each DCT/UMC */ + struct chip_select csels[NUM_CONTROLLERS]; /* DRAM base and limit pairs F1x[78,70,68,60,58,50,48,40] */ struct dram_range ranges[DRAM_RANGES]; -- cgit v1.2.3-59-g8ed1b From fc00c6a416381010c4a721a4142ddd0260d68f20 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 28 Feb 2019 15:36:12 +0000 Subject: EDAC/amd64: Adjust printed chip select sizes when interleaved AMD systems may support chip select interleaving. However, on family 17h+ this was not taken into account when printing the chip select sizes. Add support to detect if chip selects are interleaved on family 17h+, and adjust the sizes accordingly. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Tested-by: Kim Phillips Cc: James Morse Cc: Mauro Carvalho Chehab Cc: linux-edac Link: https://lkml.kernel.org/r/20190228153558.127292-6-Yazen.Ghannam@amd.com --- drivers/edac/amd64_edac.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index d89998dcf7f5..4c0239aeff2f 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -787,6 +787,22 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan) (dclr & BIT(15)) ? "yes" : "no"); } +/* + * The Address Mask should be a contiguous set of bits in the non-interleaved + * case. So to check for CS interleaving, find the most- and least-significant + * bits of the mask, generate a contiguous bitmask, and compare the two. + */ +static bool f17_cs_interleaved(struct amd64_pvt *pvt, u8 ctrl, int cs) +{ + u32 mask = pvt->csels[ctrl].csmasks[cs >> 1]; + u32 msb = fls(mask) - 1, lsb = ffs(mask) - 1; + u32 test_mask = GENMASK(msb, lsb); + + edac_dbg(1, "mask=0x%08x test_mask=0x%08x\n", mask, test_mask); + + return mask ^ test_mask; +} + static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl) { int dimm, size0, size1, cs0, cs1; @@ -803,8 +819,19 @@ static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl) size1 = 0; cs1 = dimm * 2 + 1; - if (csrow_enabled(cs1, ctrl, pvt)) - size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs1); + if (csrow_enabled(cs1, ctrl, pvt)) { + /* + * CS interleaving is only supported if both CSes have + * the same amount of memory. Because they are + * interleaved, it will look like both CSes have the + * full amount of memory. Save the size for both as + * half the amount we found on CS0, if interleaved. + */ + if (f17_cs_interleaved(pvt, ctrl, cs1)) + size1 = size0 = (size0 >> 1); + else + size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs1); + } amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", cs0, size0, -- cgit v1.2.3-59-g8ed1b From 436b0a583af0497b4b84cb73c12f8bd620f868af Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Tue, 26 Mar 2019 16:29:30 -0500 Subject: EDAC/altera: Do less intrusive error injection Improve the Arria10 and Stratix10 error injection routine by reading the data and changing just 1 bit before writing back out. Previous routine would overwrite the first bytes to 0 then change 1 bit but this method is less intrusive. Signed-off-by: Thor Thayer Signed-off-by: Borislav Petkov Cc: James Morse Cc: Mauro Carvalho Chehab Cc: linux-edac Link: https://lkml.kernel.org/r/1553635771-32693-1-git-send-email-thor.thayer@linux.intel.com --- drivers/edac/altera_edac.c | 30 +++++++++++++----------------- drivers/edac/altera_edac.h | 2 +- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 5ff263850cc7..acb3006df631 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -1762,28 +1762,24 @@ static ssize_t altr_edac_a10_device_trig2(struct file *file, if (trig_type == ALTR_UE_TRIGGER_CHAR) { writel(priv->ue_set_mask, set_addr); } else { - /* Setup write of 0 to first 4 bytes */ - writel(0x0, drvdata->base + ECC_BLK_WDATA0_OFST); - writel(0x0, drvdata->base + ECC_BLK_WDATA1_OFST); - writel(0x0, drvdata->base + ECC_BLK_WDATA2_OFST); - writel(0x0, drvdata->base + ECC_BLK_WDATA3_OFST); - /* Setup write of 4 bytes */ + /* Setup read/write of 4 bytes */ writel(ECC_WORD_WRITE, drvdata->base + ECC_BLK_DBYTECTRL_OFST); /* Setup Address to 0 */ - writel(0x0, drvdata->base + ECC_BLK_ADDRESS_OFST); - /* Setup accctrl to write & data override */ - writel(ECC_WRITE_DOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST); - /* Kick it. */ - writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST); - /* Setup accctrl to read & ecc override */ - writel(ECC_READ_EOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST); + writel(0, drvdata->base + ECC_BLK_ADDRESS_OFST); + /* Setup accctrl to read & ecc & data override */ + writel(ECC_READ_EDOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST); /* Kick it. */ writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST); /* Setup write for single bit change */ - writel(0x1, drvdata->base + ECC_BLK_WDATA0_OFST); - writel(0x0, drvdata->base + ECC_BLK_WDATA1_OFST); - writel(0x0, drvdata->base + ECC_BLK_WDATA2_OFST); - writel(0x0, drvdata->base + ECC_BLK_WDATA3_OFST); + writel(readl(drvdata->base + ECC_BLK_RDATA0_OFST) ^ 0x1, + drvdata->base + ECC_BLK_WDATA0_OFST); + writel(readl(drvdata->base + ECC_BLK_RDATA1_OFST), + drvdata->base + ECC_BLK_WDATA1_OFST); + writel(readl(drvdata->base + ECC_BLK_RDATA2_OFST), + drvdata->base + ECC_BLK_WDATA2_OFST); + writel(readl(drvdata->base + ECC_BLK_RDATA3_OFST), + drvdata->base + ECC_BLK_WDATA3_OFST); + /* Copy Read ECC to Write ECC */ writel(readl(drvdata->base + ECC_BLK_RECC0_OFST), drvdata->base + ECC_BLK_WECC0_OFST); diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index 60513f201ffb..1532ec9c3510 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -321,7 +321,7 @@ struct altr_sdram_mc_data { #define ECC_BLK_STARTACC_OFST 0x7C #define ECC_XACT_KICK 0x10000 -#define ECC_WORD_WRITE 0xF +#define ECC_WORD_WRITE 0xFF #define ECC_WRITE_DOVR 0x101 #define ECC_WRITE_EDOVR 0x103 #define ECC_READ_EOVR 0x2 -- cgit v1.2.3-59-g8ed1b From 788586efd116d6d7d05985881eda503333e702b4 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Tue, 26 Mar 2019 16:29:31 -0500 Subject: EDAC/altera: Initialize peripheral FIFOs in probe() The FIFO memory and ECC initialization doesn't need to be done as a separate operation early in the startup. Improve the Arria10 and Stratix10 peripheral FIFO init by initializing memory and enabling ECC as part of the device driver initialization. Signed-off-by: Thor Thayer Signed-off-by: Borislav Petkov Cc: James Morse Cc: Mauro Carvalho Chehab Cc: linux-edac Link: https://lkml.kernel.org/r/1553635771-32693-2-git-send-email-thor.thayer@linux.intel.com --- drivers/edac/altera_edac.c | 162 +++++++++++++++++++++++++-------------------- 1 file changed, 91 insertions(+), 71 deletions(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index acb3006df631..761199175c76 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -1361,8 +1361,19 @@ static const struct edac_device_prv_data a10_l2ecc_data = { #ifdef CONFIG_EDAC_ALTERA_ETHERNET +static int __init socfpga_init_ethernet_ecc(struct altr_edac_device_dev *dev) +{ + int ret; + + ret = altr_init_a10_ecc_device_type("altr,socfpga-eth-mac-ecc"); + if (ret) + return ret; + + return altr_check_ecc_deps(dev); +} + static const struct edac_device_prv_data a10_enetecc_data = { - .setup = altr_check_ecc_deps, + .setup = socfpga_init_ethernet_ecc, .ce_clear_mask = ALTR_A10_ECC_SERRPENA, .ue_clear_mask = ALTR_A10_ECC_DERRPENA, .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, @@ -1374,21 +1385,25 @@ static const struct edac_device_prv_data a10_enetecc_data = { .inject_fops = &altr_edac_a10_device_inject2_fops, }; -static int __init socfpga_init_ethernet_ecc(void) -{ - return altr_init_a10_ecc_device_type("altr,socfpga-eth-mac-ecc"); -} - -early_initcall(socfpga_init_ethernet_ecc); - #endif /* CONFIG_EDAC_ALTERA_ETHERNET */ /********************** NAND Device Functions **********************/ #ifdef CONFIG_EDAC_ALTERA_NAND +static int __init socfpga_init_nand_ecc(struct altr_edac_device_dev *device) +{ + int ret; + + ret = altr_init_a10_ecc_device_type("altr,socfpga-nand-ecc"); + if (ret) + return ret; + + return altr_check_ecc_deps(device); +} + static const struct edac_device_prv_data a10_nandecc_data = { - .setup = altr_check_ecc_deps, + .setup = socfpga_init_nand_ecc, .ce_clear_mask = ALTR_A10_ECC_SERRPENA, .ue_clear_mask = ALTR_A10_ECC_DERRPENA, .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, @@ -1400,21 +1415,25 @@ static const struct edac_device_prv_data a10_nandecc_data = { .inject_fops = &altr_edac_a10_device_inject_fops, }; -static int __init socfpga_init_nand_ecc(void) -{ - return altr_init_a10_ecc_device_type("altr,socfpga-nand-ecc"); -} - -early_initcall(socfpga_init_nand_ecc); - #endif /* CONFIG_EDAC_ALTERA_NAND */ /********************** DMA Device Functions **********************/ #ifdef CONFIG_EDAC_ALTERA_DMA +static int __init socfpga_init_dma_ecc(struct altr_edac_device_dev *device) +{ + int ret; + + ret = altr_init_a10_ecc_device_type("altr,socfpga-dma-ecc"); + if (ret) + return ret; + + return altr_check_ecc_deps(device); +} + static const struct edac_device_prv_data a10_dmaecc_data = { - .setup = altr_check_ecc_deps, + .setup = socfpga_init_dma_ecc, .ce_clear_mask = ALTR_A10_ECC_SERRPENA, .ue_clear_mask = ALTR_A10_ECC_DERRPENA, .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, @@ -1426,21 +1445,25 @@ static const struct edac_device_prv_data a10_dmaecc_data = { .inject_fops = &altr_edac_a10_device_inject_fops, }; -static int __init socfpga_init_dma_ecc(void) -{ - return altr_init_a10_ecc_device_type("altr,socfpga-dma-ecc"); -} - -early_initcall(socfpga_init_dma_ecc); - #endif /* CONFIG_EDAC_ALTERA_DMA */ /********************** USB Device Functions **********************/ #ifdef CONFIG_EDAC_ALTERA_USB +static int __init socfpga_init_usb_ecc(struct altr_edac_device_dev *device) +{ + int ret; + + ret = altr_init_a10_ecc_device_type("altr,socfpga-usb-ecc"); + if (ret) + return ret; + + return altr_check_ecc_deps(device); +} + static const struct edac_device_prv_data a10_usbecc_data = { - .setup = altr_check_ecc_deps, + .setup = socfpga_init_usb_ecc, .ce_clear_mask = ALTR_A10_ECC_SERRPENA, .ue_clear_mask = ALTR_A10_ECC_DERRPENA, .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, @@ -1452,21 +1475,25 @@ static const struct edac_device_prv_data a10_usbecc_data = { .inject_fops = &altr_edac_a10_device_inject2_fops, }; -static int __init socfpga_init_usb_ecc(void) -{ - return altr_init_a10_ecc_device_type("altr,socfpga-usb-ecc"); -} - -early_initcall(socfpga_init_usb_ecc); - #endif /* CONFIG_EDAC_ALTERA_USB */ /********************** QSPI Device Functions **********************/ #ifdef CONFIG_EDAC_ALTERA_QSPI +static int __init socfpga_init_qspi_ecc(struct altr_edac_device_dev *device) +{ + int ret; + + ret = altr_init_a10_ecc_device_type("altr,socfpga-qspi-ecc"); + if (ret) + return ret; + + return altr_check_ecc_deps(device); +} + static const struct edac_device_prv_data a10_qspiecc_data = { - .setup = altr_check_ecc_deps, + .setup = socfpga_init_qspi_ecc, .ce_clear_mask = ALTR_A10_ECC_SERRPENA, .ue_clear_mask = ALTR_A10_ECC_DERRPENA, .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, @@ -1478,13 +1505,6 @@ static const struct edac_device_prv_data a10_qspiecc_data = { .inject_fops = &altr_edac_a10_device_inject_fops, }; -static int __init socfpga_init_qspi_ecc(void) -{ - return altr_init_a10_ecc_device_type("altr,socfpga-qspi-ecc"); -} - -early_initcall(socfpga_init_qspi_ecc); - #endif /* CONFIG_EDAC_ALTERA_QSPI */ /********************* SDMMC Device Functions **********************/ @@ -1593,6 +1613,35 @@ err_release_group_1: return rc; } +static int __init socfpga_init_sdmmc_ecc(struct altr_edac_device_dev *device) +{ + int rc = -ENODEV; + struct device_node *child; + + child = of_find_compatible_node(NULL, NULL, "altr,socfpga-sdmmc-ecc"); + if (!child) + return -ENODEV; + + if (!of_device_is_available(child)) + goto exit; + + if (validate_parent_available(child)) + goto exit; + + /* Init portB */ + rc = altr_init_a10_ecc_block(child, ALTR_A10_SDMMC_IRQ_MASK, + a10_sdmmceccb_data.ecc_enable_mask, 1); + if (rc) + goto exit; + + /* Setup portB */ + return altr_portb_setup(device); + +exit: + of_node_put(child); + return rc; +} + static irqreturn_t altr_edac_a10_ecc_irq_portb(int irq, void *dev_id) { struct altr_edac_device_dev *ad = dev_id; @@ -1617,7 +1666,7 @@ static irqreturn_t altr_edac_a10_ecc_irq_portb(int irq, void *dev_id) } static const struct edac_device_prv_data a10_sdmmcecca_data = { - .setup = altr_portb_setup, + .setup = socfpga_init_sdmmc_ecc, .ce_clear_mask = ALTR_A10_ECC_SERRPENA, .ue_clear_mask = ALTR_A10_ECC_DERRPENA, .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, @@ -1630,7 +1679,7 @@ static const struct edac_device_prv_data a10_sdmmcecca_data = { }; static const struct edac_device_prv_data a10_sdmmceccb_data = { - .setup = altr_portb_setup, + .setup = socfpga_init_sdmmc_ecc, .ce_clear_mask = ALTR_A10_ECC_SERRPENB, .ue_clear_mask = ALTR_A10_ECC_DERRPENB, .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, @@ -1642,35 +1691,6 @@ static const struct edac_device_prv_data a10_sdmmceccb_data = { .inject_fops = &altr_edac_a10_device_inject_fops, }; -static int __init socfpga_init_sdmmc_ecc(void) -{ - int rc = -ENODEV; - struct device_node *child; - - if (!socfpga_is_a10() && !socfpga_is_s10()) - return -ENODEV; - - child = of_find_compatible_node(NULL, NULL, "altr,socfpga-sdmmc-ecc"); - if (!child) { - edac_printk(KERN_WARNING, EDAC_DEVICE, "SDMMC node not found\n"); - return -ENODEV; - } - - if (!of_device_is_available(child)) - goto exit; - - if (validate_parent_available(child)) - goto exit; - - rc = altr_init_a10_ecc_block(child, ALTR_A10_SDMMC_IRQ_MASK, - a10_sdmmcecca_data.ecc_enable_mask, 1); -exit: - of_node_put(child); - return rc; -} - -early_initcall(socfpga_init_sdmmc_ecc); - #endif /* CONFIG_EDAC_ALTERA_SDMMC */ /********************* Arria10 EDAC Device Functions *************************/ -- cgit v1.2.3-59-g8ed1b From fad9fab975cb9fae651854c811cb07a30bc2b98a Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Tue, 2 Apr 2019 17:40:56 +0200 Subject: EDAC/altera, firmware/intel: Add Stratix10 ECC DBE SMC call Reserve ECC Double Bit Error SMC call to alert U-Boot that a DBE has occurred. Move the call from local EDAC header file to a common header. [ bp: Merge the two patches. ] Signed-off-by: Thor Thayer Signed-off-by: Borislav Petkov Reviewed-by: Richard Gong Reviewed-by: Alan Tull # firmware Cc: Greg KH Cc: James Morse Cc: linux-edac Cc: mchehab@kernel.org Link: https://lkml.kernel.org/r/1553870639-23895-1-git-send-email-thor.thayer@linux.intel.com Signed-off-by: Borislav Petkov --- drivers/edac/altera_edac.c | 1 + drivers/edac/altera_edac.h | 83 ---------------------------- include/linux/firmware/intel/stratix10-smc.h | 19 +++++++ 3 files changed, 20 insertions(+), 83 deletions(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 761199175c76..8816f74a22b4 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index 1532ec9c3510..55654cc4bcdf 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -372,87 +372,4 @@ struct altr_arria10_edac { struct notifier_block panic_notifier; }; -/* - * Functions specified by ARM SMC Calling convention: - * - * FAST call executes atomic operations, returns when the requested operation - * has completed. - * STD call starts a operation which can be preempted by a non-secure - * interrupt. The call can return before the requested operation has - * completed. - * - * a0..a7 is used as register names in the descriptions below, on arm32 - * that translates to r0..r7 and on arm64 to w0..w7. - */ - -#define INTEL_SIP_SMC_STD_CALL_VAL(func_num) \ - ARM_SMCCC_CALL_VAL(ARM_SMCCC_STD_CALL, ARM_SMCCC_SMC_64, \ - ARM_SMCCC_OWNER_SIP, (func_num)) - -#define INTEL_SIP_SMC_FAST_CALL_VAL(func_num) \ - ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_64, \ - ARM_SMCCC_OWNER_SIP, (func_num)) - -#define INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION 0xFFFFFFFF -#define INTEL_SIP_SMC_STATUS_OK 0x0 -#define INTEL_SIP_SMC_REG_ERROR 0x5 - -/* - * Request INTEL_SIP_SMC_REG_READ - * - * Read a protected register using SMCCC - * - * Call register usage: - * a0: INTEL_SIP_SMC_REG_READ. - * a1: register address. - * a2-7: not used. - * - * Return status: - * a0: INTEL_SIP_SMC_STATUS_OK, INTEL_SIP_SMC_REG_ERROR, or - * INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION - * a1: Value in the register - * a2-3: not used. - */ -#define INTEL_SIP_SMC_FUNCID_REG_READ 7 -#define INTEL_SIP_SMC_REG_READ \ - INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_REG_READ) - -/* - * Request INTEL_SIP_SMC_REG_WRITE - * - * Write a protected register using SMCCC - * - * Call register usage: - * a0: INTEL_SIP_SMC_REG_WRITE. - * a1: register address - * a2: value to program into register. - * a3-7: not used. - * - * Return status: - * a0: INTEL_SIP_SMC_STATUS_OK, INTEL_SIP_SMC_REG_ERROR, or - * INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION - * a1-3: not used. - */ -#define INTEL_SIP_SMC_FUNCID_REG_WRITE 8 -#define INTEL_SIP_SMC_REG_WRITE \ - INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_REG_WRITE) - -/* - * Request INTEL_SIP_SMC_ECC_DBE - * - * Sync call used by service driver at EL1 alert EL3 that a Double Bit - * ECC error has occurred. - * - * Call register usage: - * a0 INTEL_SIP_SMC_ECC_DBE - * a1 SysManager Double Bit Error value - * a2-7 not used - * - * Return status - * a0 INTEL_SIP_SMC_STATUS_OK - */ -#define INTEL_SIP_SMC_FUNCID_ECC_DBE 13 -#define INTEL_SIP_SMC_ECC_DBE \ - INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_ECC_DBE) - #endif /* #ifndef _ALTERA_EDAC_H */ diff --git a/include/linux/firmware/intel/stratix10-smc.h b/include/linux/firmware/intel/stratix10-smc.h index 5be5dab50b13..01684d935580 100644 --- a/include/linux/firmware/intel/stratix10-smc.h +++ b/include/linux/firmware/intel/stratix10-smc.h @@ -309,4 +309,23 @@ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_COMPLETED_WRITE) #define INTEL_SIP_SMC_FUNCID_RSU_UPDATE 12 #define INTEL_SIP_SMC_RSU_UPDATE \ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_RSU_UPDATE) + +/* + * Request INTEL_SIP_SMC_ECC_DBE + * + * Sync call used by service driver at EL1 to alert EL3 that a Double + * Bit ECC error has occurred. + * + * Call register usage: + * a0 INTEL_SIP_SMC_ECC_DBE + * a1 SysManager Double Bit Error value + * a2-7 not used + * + * Return status + * a0 INTEL_SIP_SMC_STATUS_OK + */ +#define INTEL_SIP_SMC_FUNCID_ECC_DBE 13 +#define INTEL_SIP_SMC_ECC_DBE \ + INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_ECC_DBE) + #endif -- cgit v1.2.3-59-g8ed1b From b9c8172eedc1a8285c373564174a724311394472 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Thu, 4 Apr 2019 09:36:35 -0500 Subject: Documentation: dt: edac: Fix Stratix10 IRQ bindings Fix Stratix10 ECC bindings to specify only the single bit error. On Stratix10 double bit errors are handled as SErrors instead of interrupts. Indicate the differences between the ARM64 and ARM32 EDAC architecture in the bindings. Signed-off-by: Thor Thayer Signed-off-by: Borislav Petkov Acked-by: Rob Herring Cc: James Morse Cc: Mark Rutland Cc: devicetree@vger.kernel.org Cc: dinguyen@kernel.org Cc: linux-edac Cc: mark.rutland@arm.com Cc: mchehab@kernel.org Link: https://lkml.kernel.org/r/1554388597-28019-2-git-send-email-thor.thayer@linux.intel.com --- .../devicetree/bindings/edac/socfpga-eccmgr.txt | 29 ++++++++++++++-------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/Documentation/devicetree/bindings/edac/socfpga-eccmgr.txt b/Documentation/devicetree/bindings/edac/socfpga-eccmgr.txt index 5626560a6cfd..acb211c098c0 100644 --- a/Documentation/devicetree/bindings/edac/socfpga-eccmgr.txt +++ b/Documentation/devicetree/bindings/edac/socfpga-eccmgr.txt @@ -232,37 +232,46 @@ Example: }; }; -Stratix10 SoCFPGA ECC Manager +Stratix10 SoCFPGA ECC Manager (ARM64) The Stratix10 SoC ECC Manager handles the IRQs for each peripheral -in a shared register similar to the Arria10. However, ECC requires -access to registers that can only be read from Secure Monitor with -SMC calls. Therefore the device tree is slightly different. +in a shared register similar to the Arria10. However, Stratix10 ECC +requires access to registers that can only be read from Secure Monitor +with SMC calls. Therefore the device tree is slightly different. Note +that only 1 interrupt is sent in Stratix10 because the double bit errors +are treated as SErrors in ARM64 instead of IRQs in ARM32. Required Properties: - compatible : Should be "altr,socfpga-s10-ecc-manager" -- interrupts : Should be single bit error interrupt, then double bit error - interrupt. +- altr,sysgr-syscon : phandle to Stratix10 System Manager Block + containing the ECC manager registers. +- interrupts : Should be single bit error interrupt. - interrupt-controller : boolean indicator that ECC Manager is an interrupt controller - #interrupt-cells : must be set to 2. +- #address-cells: must be 1 +- #size-cells: must be 1 +- ranges : standard definition, should translate from local addresses Subcomponents: SDRAM ECC Required Properties: - compatible : Should be "altr,sdram-edac-s10" -- interrupts : Should be single bit error interrupt, then double bit error - interrupt, in this order. +- interrupts : Should be single bit error interrupt. Example: eccmgr { compatible = "altr,socfpga-s10-ecc-manager"; - interrupts = <0 15 4>, <0 95 4>; + altr,sysmgr-syscon = <&sysmgr>; + #address-cells = <1>; + #size-cells = <1>; + interrupts = <0 15 4>; interrupt-controller; #interrupt-cells = <2>; + ranges; sdramedac { compatible = "altr,sdram-edac-s10"; - interrupts = <16 4>, <48 4>; + interrupts = <16 IRQ_TYPE_LEVEL_HIGH>; }; }; -- cgit v1.2.3-59-g8ed1b From 71eec083eef10b1529f2300d6c3553f0949733a5 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Thu, 4 Apr 2019 09:36:36 -0500 Subject: Documentation: dt: edac: Add Stratix10 Peripheral bindings Add peripheral bindings for Stratix10 EDAC to capture the differences between the ARM64 and ARM32 architecture. Signed-off-by: Thor Thayer Signed-off-by: Borislav Petkov Reviewed-by: Rob Herring Cc: James Morse Cc: Mark Rutland Cc: devicetree@vger.kernel.org Cc: dinguyen@kernel.org Cc: linux-edac Cc: mark.rutland@arm.com Cc: mchehab@kernel.org Link: https://lkml.kernel.org/r/1554388597-28019-3-git-send-email-thor.thayer@linux.intel.com --- .../devicetree/bindings/edac/socfpga-eccmgr.txt | 106 +++++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/Documentation/devicetree/bindings/edac/socfpga-eccmgr.txt b/Documentation/devicetree/bindings/edac/socfpga-eccmgr.txt index acb211c098c0..8f52206cfd2a 100644 --- a/Documentation/devicetree/bindings/edac/socfpga-eccmgr.txt +++ b/Documentation/devicetree/bindings/edac/socfpga-eccmgr.txt @@ -258,6 +258,49 @@ Required Properties: - compatible : Should be "altr,sdram-edac-s10" - interrupts : Should be single bit error interrupt. +On-Chip RAM ECC +Required Properties: +- compatible : Should be "altr,socfpga-s10-ocram-ecc" +- reg : Address and size for ECC block registers. +- altr,ecc-parent : phandle to parent OCRAM node. +- interrupts : Should be single bit error interrupt. + +Ethernet FIFO ECC +Required Properties: +- compatible : Should be "altr,socfpga-s10-eth-mac-ecc" +- reg : Address and size for ECC block registers. +- altr,ecc-parent : phandle to parent Ethernet node. +- interrupts : Should be single bit error interrupt. + +NAND FIFO ECC +Required Properties: +- compatible : Should be "altr,socfpga-s10-nand-ecc" +- reg : Address and size for ECC block registers. +- altr,ecc-parent : phandle to parent NAND node. +- interrupts : Should be single bit error interrupt. + +DMA FIFO ECC +Required Properties: +- compatible : Should be "altr,socfpga-s10-dma-ecc" +- reg : Address and size for ECC block registers. +- altr,ecc-parent : phandle to parent DMA node. +- interrupts : Should be single bit error interrupt. + +USB FIFO ECC +Required Properties: +- compatible : Should be "altr,socfpga-s10-usb-ecc" +- reg : Address and size for ECC block registers. +- altr,ecc-parent : phandle to parent USB node. +- interrupts : Should be single bit error interrupt. + +SDMMC FIFO ECC +Required Properties: +- compatible : Should be "altr,socfpga-s10-sdmmc-ecc" +- reg : Address and size for ECC block registers. +- altr,ecc-parent : phandle to parent SD/MMC node. +- interrupts : Should be single bit error interrupt for port A + and then single bit error interrupt for port B. + Example: eccmgr { @@ -274,4 +317,67 @@ Example: compatible = "altr,sdram-edac-s10"; interrupts = <16 IRQ_TYPE_LEVEL_HIGH>; }; + + ocram-ecc@ff8cc000 { + compatible = "altr,socfpga-s10-ocram-ecc"; + reg = ; + altr,ecc-parent = <&ocram>; + interrupts = <1 IRQ_TYPE_LEVEL_HIGH>; + }; + + emac0-rx-ecc@ff8c0000 { + compatible = "altr,socfpga-s10-eth-mac-ecc"; + reg = <0xff8c0000 0x100>; + altr,ecc-parent = <&gmac0>; + interrupts = <4 IRQ_TYPE_LEVEL_HIGH>; + }; + + emac0-tx-ecc@ff8c0400 { + compatible = "altr,socfpga-s10-eth-mac-ecc"; + reg = <0xff8c0400 0x100>; + altr,ecc-parent = <&gmac0>; + interrupts = <5 IRQ_TYPE_LEVEL_HIGH>' + }; + + nand-buf-ecc@ff8c8000 { + compatible = "altr,socfpga-s10-nand-ecc"; + reg = <0xff8c8000 0x100>; + altr,ecc-parent = <&nand>; + interrupts = <11 IRQ_TYPE_LEVEL_HIGH>; + }; + + nand-rd-ecc@ff8c8400 { + compatible = "altr,socfpga-s10-nand-ecc"; + reg = <0xff8c8400 0x100>; + altr,ecc-parent = <&nand>; + interrupts = <13 IRQ_TYPE_LEVEL_HIGH>; + }; + + nand-wr-ecc@ff8c8800 { + compatible = "altr,socfpga-s10-nand-ecc"; + reg = <0xff8c8800 0x100>; + altr,ecc-parent = <&nand>; + interrupts = <12 IRQ_TYPE_LEVEL_HIGH>; + }; + + dma-ecc@ff8c9000 { + compatible = "altr,socfpga-s10-dma-ecc"; + reg = <0xff8c9000 0x100>; + altr,ecc-parent = <&pdma>; + interrupts = <10 IRQ_TYPE_LEVEL_HIGH>; + + usb0-ecc@ff8c4000 { + compatible = "altr,socfpga-s10-usb-ecc"; + reg = <0xff8c4000 0x100>; + altr,ecc-parent = <&usb0>; + interrupts = <2 IRQ_TYPE_LEVEL_HIGH>; + }; + + sdmmc-ecc@ff8c8c00 { + compatible = "altr,socfpga-s10-sdmmc-ecc"; + reg = <0xff8c8c00 0x100>; + altr,ecc-parent = <&mmc>; + interrupts = <14 IRQ_TYPE_LEVEL_HIGH>, + <15 IRQ_TYPE_LEVEL_HIGH>; + }; }; -- cgit v1.2.3-59-g8ed1b From 74676a8e247a2163a25fb0ef98f17b74fbad9d21 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Thu, 4 Apr 2019 09:36:37 -0500 Subject: arm64: dts: stratix10: Use new Stratix10 EDAC bindings Use the new Stratix10 binding format for EDAC nodes. Signed-off-by: Thor Thayer Signed-off-by: Borislav Petkov Acked-by: Dinh Nguyen Cc: James Morse Cc: Mark Rutland Cc: devicetree@vger.kernel.org Cc: linux-edac Cc: mark.rutland@arm.com Cc: mchehab@kernel.org Link: https://lkml.kernel.org/r/1554388597-28019-4-git-send-email-thor.thayer@linux.intel.com --- arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi | 25 ++++++++++++----------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index 7c649f6b14cb..e00ad770fa39 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -531,11 +531,12 @@ }; eccmgr { - compatible = "altr,socfpga-a10-ecc-manager"; + compatible = "altr,socfpga-s10-ecc-manager", + "altr,socfpga-a10-ecc-manager"; altr,sysmgr-syscon = <&sysmgr>; #address-cells = <1>; #size-cells = <1>; - interrupts = <0 15 4>, <0 95 4>; + interrupts = <0 15 4>; interrupt-controller; #interrupt-cells = <2>; ranges; @@ -543,31 +544,31 @@ sdramedac { compatible = "altr,sdram-edac-s10"; altr,sdr-syscon = <&sdr>; - interrupts = <16 4>, <48 4>; + interrupts = <16 4>; }; usb0-ecc@ff8c4000 { - compatible = "altr,socfpga-usb-ecc"; + compatible = "altr,socfpga-s10-usb-ecc", + "altr,socfpga-usb-ecc"; reg = <0xff8c4000 0x100>; altr,ecc-parent = <&usb0>; - interrupts = <2 4>, - <34 4>; + interrupts = <2 4>; }; emac0-rx-ecc@ff8c0000 { - compatible = "altr,socfpga-eth-mac-ecc"; + compatible = "altr,socfpga-s10-eth-mac-ecc", + "altr,socfpga-eth-mac-ecc"; reg = <0xff8c0000 0x100>; altr,ecc-parent = <&gmac0>; - interrupts = <4 4>, - <36 4>; + interrupts = <4 4>; }; emac0-tx-ecc@ff8c0400 { - compatible = "altr,socfpga-eth-mac-ecc"; + compatible = "altr,socfpga-s10-eth-mac-ecc", + "altr,socfpga-eth-mac-ecc"; reg = <0xff8c0400 0x100>; altr,ecc-parent = <&gmac0>; - interrupts = <5 4>, - <37 4>; + interrupts = <5 4>; }; }; -- cgit v1.2.3-59-g8ed1b From 8de9930a4618811edfaebc4981a9fafff2af9170 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 25 Apr 2019 16:30:34 +0200 Subject: Revert "EDAC/amd64: Support more than two controllers for chip select handling" This reverts commit 0a227af521d6df5286550b62f4b591417170b4ea. Unfortunately, this commit caused wrong detection of chip select sizes on some F17h client machines: --- 00-rc6+ 2019-02-14 14:28:03.126622904 +0100 +++ 01-rc4+ 2019-04-14 21:06:16.060614790 +0200 EDAC amd64: MC: 0: 0MB 1: 0MB -EDAC amd64: MC: 2: 16383MB 3: 16383MB +EDAC amd64: MC: 2: 0MB 3: 2097151MB EDAC amd64: MC: 4: 0MB 5: 0MB EDAC amd64: MC: 6: 0MB 7: 0MB EDAC MC: UMC1 chip selects: EDAC amd64: MC: 0: 0MB 1: 0MB -EDAC amd64: MC: 2: 16383MB 3: 16383MB +EDAC amd64: MC: 2: 0MB 3: 2097151MB EDAC amd64: MC: 4: 0MB 5: 0MB EDAC amd64: MC: 6: 0MB 7: 0M Revert it for now until it has been solved properly. Signed-off-by: Borislav Petkov Cc: Yazen Ghannam --- drivers/edac/amd64_edac.c | 113 ++++++++++++++++++++++------------------------ drivers/edac/amd64_edac.h | 5 +- 2 files changed, 56 insertions(+), 62 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 4c0239aeff2f..e2a99466faaa 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -941,94 +941,89 @@ static void prep_chip_selects(struct amd64_pvt *pvt) } else if (pvt->fam == 0x15 && pvt->model == 0x30) { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4; pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2; - } else if (pvt->fam >= 0x17) { - int umc; - - for_each_umc(umc) { - pvt->csels[umc].b_cnt = 8; - pvt->csels[umc].m_cnt = 4; - } - } else { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8; pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4; } } -static void read_umc_base_mask(struct amd64_pvt *pvt) -{ - int cs, umc; - - for_each_umc(umc) { - u32 umc_base_reg = get_umc_base(umc) + UMCCH_BASE_ADDR; - u32 umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK; - - for_each_chip_select(cs, 0, pvt) { - u32 *base = &pvt->csels[umc].csbases[cs]; - u32 *mask = &pvt->csels[umc].csmasks[cs]; - - u32 base_reg = umc_base_reg + (cs * 4); - u32 mask_reg = umc_mask_reg + ((cs >> 1) * 4); - - if (!amd_smn_read(pvt->mc_node_id, base_reg, base)) - edac_dbg(0, " DCSB%d[%d]=0x%08x reg: 0x%x\n", - umc, cs, *base, base_reg); - - if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask)) - edac_dbg(0, " DCSM%d[%d]=0x%08x reg: 0x%x\n", - umc, cs, *mask, mask_reg); - } - } -} - /* * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask registers */ static void read_dct_base_mask(struct amd64_pvt *pvt) { - int cs; + int base_reg0, base_reg1, mask_reg0, mask_reg1, cs; prep_chip_selects(pvt); - if (pvt->umc) - return read_umc_base_mask(pvt); + if (pvt->umc) { + base_reg0 = get_umc_base(0) + UMCCH_BASE_ADDR; + base_reg1 = get_umc_base(1) + UMCCH_BASE_ADDR; + mask_reg0 = get_umc_base(0) + UMCCH_ADDR_MASK; + mask_reg1 = get_umc_base(1) + UMCCH_ADDR_MASK; + } else { + base_reg0 = DCSB0; + base_reg1 = DCSB1; + mask_reg0 = DCSM0; + mask_reg1 = DCSM1; + } for_each_chip_select(cs, 0, pvt) { - int reg0 = DCSB0 + (cs * 4); - int reg1 = DCSB1 + (cs * 4); + int reg0 = base_reg0 + (cs * 4); + int reg1 = base_reg1 + (cs * 4); u32 *base0 = &pvt->csels[0].csbases[cs]; u32 *base1 = &pvt->csels[1].csbases[cs]; - if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0)) - edac_dbg(0, " DCSB0[%d]=0x%08x reg: F2x%x\n", - cs, *base0, reg0); + if (pvt->umc) { + if (!amd_smn_read(pvt->mc_node_id, reg0, base0)) + edac_dbg(0, " DCSB0[%d]=0x%08x reg: 0x%x\n", + cs, *base0, reg0); - if (pvt->fam == 0xf) - continue; + if (!amd_smn_read(pvt->mc_node_id, reg1, base1)) + edac_dbg(0, " DCSB1[%d]=0x%08x reg: 0x%x\n", + cs, *base1, reg1); + } else { + if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0)) + edac_dbg(0, " DCSB0[%d]=0x%08x reg: F2x%x\n", + cs, *base0, reg0); + + if (pvt->fam == 0xf) + continue; - if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1)) - edac_dbg(0, " DCSB1[%d]=0x%08x reg: F2x%x\n", - cs, *base1, (pvt->fam == 0x10) ? reg1 - : reg0); + if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1)) + edac_dbg(0, " DCSB1[%d]=0x%08x reg: F2x%x\n", + cs, *base1, (pvt->fam == 0x10) ? reg1 + : reg0); + } } for_each_chip_select_mask(cs, 0, pvt) { - int reg0 = DCSM0 + (cs * 4); - int reg1 = DCSM1 + (cs * 4); + int reg0 = mask_reg0 + (cs * 4); + int reg1 = mask_reg1 + (cs * 4); u32 *mask0 = &pvt->csels[0].csmasks[cs]; u32 *mask1 = &pvt->csels[1].csmasks[cs]; - if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0)) - edac_dbg(0, " DCSM0[%d]=0x%08x reg: F2x%x\n", - cs, *mask0, reg0); + if (pvt->umc) { + if (!amd_smn_read(pvt->mc_node_id, reg0, mask0)) + edac_dbg(0, " DCSM0[%d]=0x%08x reg: 0x%x\n", + cs, *mask0, reg0); - if (pvt->fam == 0xf) - continue; + if (!amd_smn_read(pvt->mc_node_id, reg1, mask1)) + edac_dbg(0, " DCSM1[%d]=0x%08x reg: 0x%x\n", + cs, *mask1, reg1); + } else { + if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0)) + edac_dbg(0, " DCSM0[%d]=0x%08x reg: F2x%x\n", + cs, *mask0, reg0); - if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1)) - edac_dbg(0, " DCSM1[%d]=0x%08x reg: F2x%x\n", - cs, *mask1, (pvt->fam == 0x10) ? reg1 - : reg0); + if (pvt->fam == 0xf) + continue; + + if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1)) + edac_dbg(0, " DCSM1[%d]=0x%08x reg: F2x%x\n", + cs, *mask1, (pvt->fam == 0x10) ? reg1 + : reg0); + } } } diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 4dce6a2ac75f..8f66472f7adc 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -96,7 +96,6 @@ /* Hardware limit on ChipSelect rows per MC and processors per system */ #define NUM_CHIPSELECTS 8 #define DRAM_RANGES 8 -#define NUM_CONTROLLERS 8 #define ON true #define OFF false @@ -352,8 +351,8 @@ struct amd64_pvt { u32 dbam0; /* DRAM Base Address Mapping reg for DCT0 */ u32 dbam1; /* DRAM Base Address Mapping reg for DCT1 */ - /* one for each DCT/UMC */ - struct chip_select csels[NUM_CONTROLLERS]; + /* one for each DCT */ + struct chip_select csels[2]; /* DRAM base and limit pairs F1x[78,70,68,60,58,50,48,40] */ struct dram_range ranges[DRAM_RANGES]; -- cgit v1.2.3-59-g8ed1b