aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/edac/amd64_edac.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/edac/amd64_edac.c')
-rw-r--r--drivers/edac/amd64_edac.c135
1 files changed, 98 insertions, 37 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 6ea98575a402..e2a99466faaa 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -18,6 +18,9 @@ static struct msr __percpu *msrs;
/* Per-node stuff */
static struct ecc_settings **ecc_stngs;
+/* Number of Unified Memory Controllers */
+static u8 num_umcs;
+
/*
* Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing
* bandwidth to a valid bit pattern. The 'set' operation finds the 'matching-
@@ -449,6 +452,9 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
#define for_each_chip_select_mask(i, dct, pvt) \
for (i = 0; i < pvt->csels[dct].m_cnt; i++)
+#define for_each_umc(i) \
+ for (i = 0; i < num_umcs; i++)
+
/*
* @input_addr is an InputAddr associated with the node given by mci. Return the
* csrow that input_addr maps to, or -1 on failure (no csrow claims input_addr).
@@ -722,7 +728,7 @@ static unsigned long determine_edac_cap(struct amd64_pvt *pvt)
if (pvt->umc) {
u8 i, umc_en_mask = 0, dimm_ecc_en_mask = 0;
- for (i = 0; i < NUM_UMCS; i++) {
+ for_each_umc(i) {
if (!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT))
continue;
@@ -781,6 +787,22 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan)
(dclr & BIT(15)) ? "yes" : "no");
}
+/*
+ * The Address Mask should be a contiguous set of bits in the non-interleaved
+ * case. So to check for CS interleaving, find the most- and least-significant
+ * bits of the mask, generate a contiguous bitmask, and compare the two.
+ */
+static bool f17_cs_interleaved(struct amd64_pvt *pvt, u8 ctrl, int cs)
+{
+ u32 mask = pvt->csels[ctrl].csmasks[cs >> 1];
+ u32 msb = fls(mask) - 1, lsb = ffs(mask) - 1;
+ u32 test_mask = GENMASK(msb, lsb);
+
+ edac_dbg(1, "mask=0x%08x test_mask=0x%08x\n", mask, test_mask);
+
+ return mask ^ test_mask;
+}
+
static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl)
{
int dimm, size0, size1, cs0, cs1;
@@ -797,8 +819,19 @@ static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl)
size1 = 0;
cs1 = dimm * 2 + 1;
- if (csrow_enabled(cs1, ctrl, pvt))
- size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs1);
+ if (csrow_enabled(cs1, ctrl, pvt)) {
+ /*
+ * CS interleaving is only supported if both CSes have
+ * the same amount of memory. Because they are
+ * interleaved, it will look like both CSes have the
+ * full amount of memory. Save the size for both as
+ * half the amount we found on CS0, if interleaved.
+ */
+ if (f17_cs_interleaved(pvt, ctrl, cs1))
+ size1 = size0 = (size0 >> 1);
+ else
+ size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs1);
+ }
amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
cs0, size0,
@@ -811,7 +844,7 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt)
struct amd64_umc *umc;
u32 i, tmp, umc_base;
- for (i = 0; i < NUM_UMCS; i++) {
+ for_each_umc(i) {
umc_base = get_umc_base(i);
umc = &pvt->umc[i];
@@ -894,8 +927,7 @@ static void dump_misc_regs(struct amd64_pvt *pvt)
edac_dbg(1, " DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no");
- amd64_info("using %s syndromes.\n",
- ((pvt->ecc_sym_sz == 8) ? "x8" : "x4"));
+ amd64_info("using x%u syndromes.\n", pvt->ecc_sym_sz);
}
/*
@@ -1388,7 +1420,7 @@ static int f17_early_channel_count(struct amd64_pvt *pvt)
int i, channels = 0;
/* SDP Control bit 31 (SdpInit) is clear for unused UMC channels */
- for (i = 0; i < NUM_UMCS; i++)
+ for_each_umc(i)
channels += !!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT);
amd64_info("MCT channel count: %d\n", channels);
@@ -2211,6 +2243,15 @@ static struct amd64_family_type family_types[] = {
.dbam_to_cs = f17_base_addr_to_cs_size,
}
},
+ [F17_M30H_CPUS] = {
+ .ctl_name = "F17h_M30h",
+ .f0_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F0,
+ .f6_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F6,
+ .ops = {
+ .early_channel_count = f17_early_channel_count,
+ .dbam_to_cs = f17_base_addr_to_cs_size,
+ }
+ },
};
/*
@@ -2464,18 +2505,14 @@ static inline void decode_bus_error(int node_id, struct mce *m)
* To find the UMC channel represented by this bank we need to match on its
* instance_id. The instance_id of a bank is held in the lower 32 bits of its
* IPID.
+ *
+ * Currently, we can derive the channel number by looking at the 6th nibble in
+ * the instance_id. For example, instance_id=0xYXXXXX where Y is the channel
+ * number.
*/
-static int find_umc_channel(struct amd64_pvt *pvt, struct mce *m)
+static int find_umc_channel(struct mce *m)
{
- u32 umc_instance_id[] = {0x50f00, 0x150f00};
- u32 instance_id = m->ipid & GENMASK(31, 0);
- int i, channel = -1;
-
- for (i = 0; i < ARRAY_SIZE(umc_instance_id); i++)
- if (umc_instance_id[i] == instance_id)
- channel = i;
-
- return channel;
+ return (m->ipid & GENMASK(31, 0)) >> 20;
}
static void decode_umc_error(int node_id, struct mce *m)
@@ -2497,11 +2534,7 @@ static void decode_umc_error(int node_id, struct mce *m)
if (m->status & MCI_STATUS_DEFERRED)
ecc_type = 3;
- err.channel = find_umc_channel(pvt, m);
- if (err.channel < 0) {
- err.err_code = ERR_CHANNEL;
- goto log_error;
- }
+ err.channel = find_umc_channel(m);
if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) {
err.err_code = ERR_NORM_ADDR;
@@ -2603,19 +2636,19 @@ static void determine_ecc_sym_sz(struct amd64_pvt *pvt)
if (pvt->umc) {
u8 i;
- for (i = 0; i < NUM_UMCS; i++) {
+ for_each_umc(i) {
/* Check enabled channels only: */
- if ((pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) &&
- (pvt->umc[i].ecc_ctrl & BIT(7))) {
- pvt->ecc_sym_sz = 8;
- break;
+ if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) {
+ if (pvt->umc[i].ecc_ctrl & BIT(9)) {
+ pvt->ecc_sym_sz = 16;
+ return;
+ } else if (pvt->umc[i].ecc_ctrl & BIT(7)) {
+ pvt->ecc_sym_sz = 8;
+ return;
+ }
}
}
-
- return;
- }
-
- if (pvt->fam >= 0x10) {
+ } else if (pvt->fam >= 0x10) {
u32 tmp;
amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp);
@@ -2639,7 +2672,7 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt)
u32 i, umc_base;
/* Read registers from each UMC */
- for (i = 0; i < NUM_UMCS; i++) {
+ for_each_umc(i) {
umc_base = get_umc_base(i);
umc = &pvt->umc[i];
@@ -3052,7 +3085,7 @@ static bool ecc_enabled(struct pci_dev *F3, u16 nid)
if (boot_cpu_data.x86 >= 0x17) {
u8 umc_en_mask = 0, ecc_en_mask = 0;
- for (i = 0; i < NUM_UMCS; i++) {
+ for_each_umc(i) {
u32 base = get_umc_base(i);
/* Only check enabled UMCs. */
@@ -3105,7 +3138,7 @@ f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt)
{
u8 i, ecc_en = 1, cpk_en = 1;
- for (i = 0; i < NUM_UMCS; i++) {
+ for_each_umc(i) {
if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) {
ecc_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_ENABLED);
cpk_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_CHIPKILL_CAP);
@@ -3203,6 +3236,10 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
fam_type = &family_types[F17_M10H_CPUS];
pvt->ops = &family_types[F17_M10H_CPUS].ops;
break;
+ } else if (pvt->model >= 0x30 && pvt->model <= 0x3f) {
+ fam_type = &family_types[F17_M30H_CPUS];
+ pvt->ops = &family_types[F17_M30H_CPUS].ops;
+ break;
}
/* fall through */
case 0x18:
@@ -3236,6 +3273,22 @@ static const struct attribute_group *amd64_edac_attr_groups[] = {
NULL
};
+/* Set the number of Unified Memory Controllers in the system. */
+static void compute_num_umcs(void)
+{
+ u8 model = boot_cpu_data.x86_model;
+
+ if (boot_cpu_data.x86 < 0x17)
+ return;
+
+ if (model >= 0x30 && model <= 0x3f)
+ num_umcs = 8;
+ else
+ num_umcs = 2;
+
+ edac_dbg(1, "Number of UMCs: %x", num_umcs);
+}
+
static int init_one_instance(unsigned int nid)
{
struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
@@ -3260,7 +3313,7 @@ static int init_one_instance(unsigned int nid)
goto err_free;
if (pvt->fam >= 0x17) {
- pvt->umc = kcalloc(NUM_UMCS, sizeof(struct amd64_umc), GFP_KERNEL);
+ pvt->umc = kcalloc(num_umcs, sizeof(struct amd64_umc), GFP_KERNEL);
if (!pvt->umc) {
ret = -ENOMEM;
goto err_free;
@@ -3299,8 +3352,14 @@ static int init_one_instance(unsigned int nid)
* Always allocate two channels since we can have setups with DIMMs on
* only one channel. Also, this simplifies handling later for the price
* of a couple of KBs tops.
+ *
+ * On Fam17h+, the number of controllers may be greater than two. So set
+ * the size equal to the maximum number of UMCs.
*/
- layers[1].size = 2;
+ if (pvt->fam >= 0x17)
+ layers[1].size = num_umcs;
+ else
+ layers[1].size = 2;
layers[1].is_virt_csrow = false;
mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0);
@@ -3481,6 +3540,8 @@ static int __init amd64_edac_init(void)
if (!msrs)
goto err_free;
+ compute_num_umcs();
+
for (i = 0; i < amd_nb_num(); i++) {
err = probe_one_instance(i);
if (err) {