aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/mce/amd.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/mce/amd.c')
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c633
1 files changed, 240 insertions, 393 deletions
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 52de616a8065..1c87501e0fa3 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -71,33 +71,58 @@ static const char * const smca_umc_block_names[] = {
"misc_umc"
};
+#define HWID_MCATYPE(hwid, mcatype) (((hwid) << 16) | (mcatype))
+
+struct smca_hwid {
+ unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */
+ u32 hwid_mcatype; /* (hwid,mcatype) tuple */
+};
+
+struct smca_bank {
+ const struct smca_hwid *hwid;
+ u32 id; /* Value of MCA_IPID[InstanceId]. */
+ u8 sysfs_id; /* Value used for sysfs name. */
+};
+
+static DEFINE_PER_CPU_READ_MOSTLY(struct smca_bank[MAX_NR_BANKS], smca_banks);
+static DEFINE_PER_CPU_READ_MOSTLY(u8[N_SMCA_BANK_TYPES], smca_bank_counts);
+
struct smca_bank_name {
const char *name; /* Short name for sysfs */
const char *long_name; /* Long name for pretty-printing */
};
static struct smca_bank_name smca_names[] = {
- [SMCA_LS] = { "load_store", "Load Store Unit" },
- [SMCA_LS_V2] = { "load_store", "Load Store Unit" },
- [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" },
- [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" },
- [SMCA_DE] = { "decode_unit", "Decode Unit" },
- [SMCA_RESERVED] = { "reserved", "Reserved" },
- [SMCA_EX] = { "execution_unit", "Execution Unit" },
- [SMCA_FP] = { "floating_point", "Floating Point Unit" },
- [SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" },
- [SMCA_CS] = { "coherent_slave", "Coherent Slave" },
- [SMCA_CS_V2] = { "coherent_slave", "Coherent Slave" },
- [SMCA_PIE] = { "pie", "Power, Interrupts, etc." },
- [SMCA_UMC] = { "umc", "Unified Memory Controller" },
- [SMCA_PB] = { "param_block", "Parameter Block" },
- [SMCA_PSP] = { "psp", "Platform Security Processor" },
- [SMCA_PSP_V2] = { "psp", "Platform Security Processor" },
- [SMCA_SMU] = { "smu", "System Management Unit" },
- [SMCA_SMU_V2] = { "smu", "System Management Unit" },
- [SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" },
- [SMCA_NBIO] = { "nbio", "Northbridge IO Unit" },
- [SMCA_PCIE] = { "pcie", "PCI Express Unit" },
+ [SMCA_LS ... SMCA_LS_V2] = { "load_store", "Load Store Unit" },
+ [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" },
+ [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" },
+ [SMCA_DE] = { "decode_unit", "Decode Unit" },
+ [SMCA_RESERVED] = { "reserved", "Reserved" },
+ [SMCA_EX] = { "execution_unit", "Execution Unit" },
+ [SMCA_FP] = { "floating_point", "Floating Point Unit" },
+ [SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" },
+ [SMCA_CS ... SMCA_CS_V2] = { "coherent_slave", "Coherent Slave" },
+ [SMCA_PIE] = { "pie", "Power, Interrupts, etc." },
+
+ /* UMC v2 is separate because both of them can exist in a single system. */
+ [SMCA_UMC] = { "umc", "Unified Memory Controller" },
+ [SMCA_UMC_V2] = { "umc_v2", "Unified Memory Controller v2" },
+ [SMCA_PB] = { "param_block", "Parameter Block" },
+ [SMCA_PSP ... SMCA_PSP_V2] = { "psp", "Platform Security Processor" },
+ [SMCA_SMU ... SMCA_SMU_V2] = { "smu", "System Management Unit" },
+ [SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" },
+ [SMCA_MPDMA] = { "mpdma", "MPDMA Unit" },
+ [SMCA_NBIO] = { "nbio", "Northbridge IO Unit" },
+ [SMCA_PCIE ... SMCA_PCIE_V2] = { "pcie", "PCI Express Unit" },
+ [SMCA_XGMI_PCS] = { "xgmi_pcs", "Ext Global Memory Interconnect PCS Unit" },
+ [SMCA_NBIF] = { "nbif", "NBIF Unit" },
+ [SMCA_SHUB] = { "shub", "System Hub Unit" },
+ [SMCA_SATA] = { "sata", "SATA Unit" },
+ [SMCA_USB] = { "usb", "USB Unit" },
+ [SMCA_GMI_PCS] = { "gmi_pcs", "Global Memory Interconnect PCS Unit" },
+ [SMCA_XGMI_PHY] = { "xgmi_phy", "Ext Global Memory Interconnect PHY Unit" },
+ [SMCA_WAFL_PHY] = { "wafl_phy", "WAFL PHY Unit" },
+ [SMCA_GMI_PHY] = { "gmi_phy", "Global Memory Interconnect PHY Unit" },
};
static const char *smca_get_name(enum smca_bank_types t)
@@ -117,69 +142,82 @@ const char *smca_get_long_name(enum smca_bank_types t)
}
EXPORT_SYMBOL_GPL(smca_get_long_name);
-static enum smca_bank_types smca_get_bank_type(unsigned int bank)
+enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank)
{
struct smca_bank *b;
if (bank >= MAX_NR_BANKS)
return N_SMCA_BANK_TYPES;
- b = &smca_banks[bank];
+ b = &per_cpu(smca_banks, cpu)[bank];
if (!b->hwid)
return N_SMCA_BANK_TYPES;
return b->hwid->bank_type;
}
+EXPORT_SYMBOL_GPL(smca_get_bank_type);
-static struct smca_hwid smca_hwid_mcatypes[] = {
- /* { bank_type, hwid_mcatype, xec_bitmap } */
+static const struct smca_hwid smca_hwid_mcatypes[] = {
+ /* { bank_type, hwid_mcatype } */
/* Reserved type */
- { SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 },
+ { SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0) },
/* ZN Core (HWID=0xB0) MCA types */
- { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF },
- { SMCA_LS_V2, HWID_MCATYPE(0xB0, 0x10), 0xFFFFFF },
- { SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
- { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
- { SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF },
+ { SMCA_LS, HWID_MCATYPE(0xB0, 0x0) },
+ { SMCA_LS_V2, HWID_MCATYPE(0xB0, 0x10) },
+ { SMCA_IF, HWID_MCATYPE(0xB0, 0x1) },
+ { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2) },
+ { SMCA_DE, HWID_MCATYPE(0xB0, 0x3) },
/* HWID 0xB0 MCATYPE 0x4 is Reserved */
- { SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0xFFF },
- { SMCA_FP, HWID_MCATYPE(0xB0, 0x6), 0x7F },
- { SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF },
+ { SMCA_EX, HWID_MCATYPE(0xB0, 0x5) },
+ { SMCA_FP, HWID_MCATYPE(0xB0, 0x6) },
+ { SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7) },
/* Data Fabric MCA types */
- { SMCA_CS, HWID_MCATYPE(0x2E, 0x0), 0x1FF },
- { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0x1F },
- { SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2), 0x3FFF },
+ { SMCA_CS, HWID_MCATYPE(0x2E, 0x0) },
+ { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1) },
+ { SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2) },
/* Unified Memory Controller MCA type */
- { SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0xFF },
+ { SMCA_UMC, HWID_MCATYPE(0x96, 0x0) },
+ { SMCA_UMC_V2, HWID_MCATYPE(0x96, 0x1) },
/* Parameter Block MCA type */
- { SMCA_PB, HWID_MCATYPE(0x05, 0x0), 0x1 },
+ { SMCA_PB, HWID_MCATYPE(0x05, 0x0) },
/* Platform Security Processor MCA type */
- { SMCA_PSP, HWID_MCATYPE(0xFF, 0x0), 0x1 },
- { SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1), 0x3FFFF },
+ { SMCA_PSP, HWID_MCATYPE(0xFF, 0x0) },
+ { SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1) },
/* System Management Unit MCA type */
- { SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 },
- { SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1), 0x7FF },
+ { SMCA_SMU, HWID_MCATYPE(0x01, 0x0) },
+ { SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1) },
/* Microprocessor 5 Unit MCA type */
- { SMCA_MP5, HWID_MCATYPE(0x01, 0x2), 0x3FF },
+ { SMCA_MP5, HWID_MCATYPE(0x01, 0x2) },
+
+ /* MPDMA MCA type */
+ { SMCA_MPDMA, HWID_MCATYPE(0x01, 0x3) },
/* Northbridge IO Unit MCA type */
- { SMCA_NBIO, HWID_MCATYPE(0x18, 0x0), 0x1F },
+ { SMCA_NBIO, HWID_MCATYPE(0x18, 0x0) },
/* PCI Express Unit MCA type */
- { SMCA_PCIE, HWID_MCATYPE(0x46, 0x0), 0x1F },
+ { SMCA_PCIE, HWID_MCATYPE(0x46, 0x0) },
+ { SMCA_PCIE_V2, HWID_MCATYPE(0x46, 0x1) },
+
+ { SMCA_XGMI_PCS, HWID_MCATYPE(0x50, 0x0) },
+ { SMCA_NBIF, HWID_MCATYPE(0x6C, 0x0) },
+ { SMCA_SHUB, HWID_MCATYPE(0x80, 0x0) },
+ { SMCA_SATA, HWID_MCATYPE(0xA8, 0x0) },
+ { SMCA_USB, HWID_MCATYPE(0xAA, 0x0) },
+ { SMCA_GMI_PCS, HWID_MCATYPE(0x241, 0x0) },
+ { SMCA_XGMI_PHY, HWID_MCATYPE(0x259, 0x0) },
+ { SMCA_WAFL_PHY, HWID_MCATYPE(0x267, 0x0) },
+ { SMCA_GMI_PHY, HWID_MCATYPE(0x269, 0x0) },
};
-struct smca_bank smca_banks[MAX_NR_BANKS];
-EXPORT_SYMBOL_GPL(smca_banks);
-
/*
* In SMCA enabled processors, we can have multiple banks for a given IP type.
* So to define a unique name for each bank, we use a temp c-string to append
@@ -192,7 +230,12 @@ EXPORT_SYMBOL_GPL(smca_banks);
static char buf_mcatype[MAX_MCATYPE_NAME_LEN];
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
-static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */
+
+/*
+ * A list of the banks enabled on each logical CPU. Controls which respective
+ * descriptors to initialize later in mce_threshold_create_device().
+ */
+static DEFINE_PER_CPU(unsigned int, bank_map);
/* Map of banks that have more than MCA_MISC0 available. */
static DEFINE_PER_CPU(u32, smca_misc_banks_map);
@@ -230,8 +273,9 @@ static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu)
static void smca_configure(unsigned int bank, unsigned int cpu)
{
+ u8 *bank_counts = this_cpu_ptr(smca_bank_counts);
+ const struct smca_hwid *s_hwid;
unsigned int i, hwid_mcatype;
- struct smca_hwid *s_hwid;
u32 high, low;
u32 smca_config = MSR_AMD64_SMCA_MCx_CONFIG(bank);
@@ -267,10 +311,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
smca_set_misc_banks_map(bank, cpu);
- /* Return early if this bank was already initialized. */
- if (smca_banks[bank].hwid && smca_banks[bank].hwid->hwid_mcatype != 0)
- return;
-
if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) {
pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
return;
@@ -281,10 +321,11 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) {
s_hwid = &smca_hwid_mcatypes[i];
+
if (hwid_mcatype == s_hwid->hwid_mcatype) {
- smca_banks[bank].hwid = s_hwid;
- smca_banks[bank].id = low;
- smca_banks[bank].sysfs_id = s_hwid->count++;
+ this_cpu_ptr(smca_banks)[bank].hwid = s_hwid;
+ this_cpu_ptr(smca_banks)[bank].id = low;
+ this_cpu_ptr(smca_banks)[bank].sysfs_id = bank_counts[s_hwid->bank_type]++;
break;
}
}
@@ -381,6 +422,10 @@ static void threshold_restart_bank(void *_tr)
struct thresh_restart *tr = _tr;
u32 hi, lo;
+ /* sysfs write might race against an offline operation */
+ if (!this_cpu_read(threshold_banks) && !tr->set_lvt_off)
+ return;
+
rdmsr(tr->b->address, lo, hi);
if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
@@ -504,7 +549,7 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high,
/* Fall back to method we used for older processors: */
switch (block) {
case 0:
- addr = msr_ops.misc(bank);
+ addr = mca_msr_reg(bank, MCA_MISC);
break;
case 1:
offset = ((low & MASK_BLKPTR_LO) >> 21);
@@ -566,16 +611,21 @@ out:
bool amd_filter_mce(struct mce *m)
{
- enum smca_bank_types bank_type = smca_get_bank_type(m->bank);
+ enum smca_bank_types bank_type = smca_get_bank_type(m->extcpu, m->bank);
struct cpuinfo_x86 *c = &boot_cpu_data;
- u8 xec = (m->status >> 16) & 0x3F;
/* See Family 17h Models 10h-2Fh Erratum #1114. */
if (c->x86 == 0x17 &&
c->x86_model >= 0x10 && c->x86_model <= 0x2F &&
- bank_type == SMCA_IF && xec == 10)
+ bank_type == SMCA_IF && XEC(m->status, 0x3f) == 10)
return true;
+ /* NB GART TLB error reporting is disabled by default. */
+ if (c->x86 < 0x17) {
+ if (m->bank == 4 && XEC(m->status, 0x1f) == 0x5)
+ return true;
+ }
+
return false;
}
@@ -599,7 +649,7 @@ static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
} else if (c->x86 == 0x17 &&
(c->x86_model >= 0x10 && c->x86_model <= 0x2F)) {
- if (smca_get_bank_type(bank) != SMCA_IF)
+ if (smca_get_bank_type(smp_processor_id(), bank) != SMCA_IF)
return;
msrs[0] = MSR_AMD64_SMCA_MCx_MISC(bank);
@@ -661,213 +711,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
deferred_error_interrupt_enable(c);
}
-int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
-{
- u64 dram_base_addr, dram_limit_addr, dram_hole_base;
- /* We start from the normalized address */
- u64 ret_addr = norm_addr;
-
- u32 tmp;
-
- u8 die_id_shift, die_id_mask, socket_id_shift, socket_id_mask;
- u8 intlv_num_dies, intlv_num_chan, intlv_num_sockets;
- u8 intlv_addr_sel, intlv_addr_bit;
- u8 num_intlv_bits, hashed_bit;
- u8 lgcy_mmio_hole_en, base = 0;
- u8 cs_mask, cs_id = 0;
- bool hash_enabled = false;
-
- /* Read D18F0x1B4 (DramOffset), check if base 1 is used. */
- if (amd_df_indirect_read(nid, 0, 0x1B4, umc, &tmp))
- goto out_err;
-
- /* Remove HiAddrOffset from normalized address, if enabled: */
- if (tmp & BIT(0)) {
- u64 hi_addr_offset = (tmp & GENMASK_ULL(31, 20)) << 8;
-
- if (norm_addr >= hi_addr_offset) {
- ret_addr -= hi_addr_offset;
- base = 1;
- }
- }
-
- /* Read D18F0x110 (DramBaseAddress). */
- if (amd_df_indirect_read(nid, 0, 0x110 + (8 * base), umc, &tmp))
- goto out_err;
-
- /* Check if address range is valid. */
- if (!(tmp & BIT(0))) {
- pr_err("%s: Invalid DramBaseAddress range: 0x%x.\n",
- __func__, tmp);
- goto out_err;
- }
-
- lgcy_mmio_hole_en = tmp & BIT(1);
- intlv_num_chan = (tmp >> 4) & 0xF;
- intlv_addr_sel = (tmp >> 8) & 0x7;
- dram_base_addr = (tmp & GENMASK_ULL(31, 12)) << 16;
-
- /* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */
- if (intlv_addr_sel > 3) {
- pr_err("%s: Invalid interleave address select %d.\n",
- __func__, intlv_addr_sel);
- goto out_err;
- }
-
- /* Read D18F0x114 (DramLimitAddress). */
- if (amd_df_indirect_read(nid, 0, 0x114 + (8 * base), umc, &tmp))
- goto out_err;
-
- intlv_num_sockets = (tmp >> 8) & 0x1;
- intlv_num_dies = (tmp >> 10) & 0x3;
- dram_limit_addr = ((tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0);
-
- intlv_addr_bit = intlv_addr_sel + 8;
-
- /* Re-use intlv_num_chan by setting it equal to log2(#channels) */
- switch (intlv_num_chan) {
- case 0: intlv_num_chan = 0; break;
- case 1: intlv_num_chan = 1; break;
- case 3: intlv_num_chan = 2; break;
- case 5: intlv_num_chan = 3; break;
- case 7: intlv_num_chan = 4; break;
-
- case 8: intlv_num_chan = 1;
- hash_enabled = true;
- break;
- default:
- pr_err("%s: Invalid number of interleaved channels %d.\n",
- __func__, intlv_num_chan);
- goto out_err;
- }
-
- num_intlv_bits = intlv_num_chan;
-
- if (intlv_num_dies > 2) {
- pr_err("%s: Invalid number of interleaved nodes/dies %d.\n",
- __func__, intlv_num_dies);
- goto out_err;
- }
-
- num_intlv_bits += intlv_num_dies;
-
- /* Add a bit if sockets are interleaved. */
- num_intlv_bits += intlv_num_sockets;
-
- /* Assert num_intlv_bits <= 4 */
- if (num_intlv_bits > 4) {
- pr_err("%s: Invalid interleave bits %d.\n",
- __func__, num_intlv_bits);
- goto out_err;
- }
-
- if (num_intlv_bits > 0) {
- u64 temp_addr_x, temp_addr_i, temp_addr_y;
- u8 die_id_bit, sock_id_bit, cs_fabric_id;
-
- /*
- * Read FabricBlockInstanceInformation3_CS[BlockFabricID].
- * This is the fabric id for this coherent slave. Use
- * umc/channel# as instance id of the coherent slave
- * for FICAA.
- */
- if (amd_df_indirect_read(nid, 0, 0x50, umc, &tmp))
- goto out_err;
-
- cs_fabric_id = (tmp >> 8) & 0xFF;
- die_id_bit = 0;
-
- /* If interleaved over more than 1 channel: */
- if (intlv_num_chan) {
- die_id_bit = intlv_num_chan;
- cs_mask = (1 << die_id_bit) - 1;
- cs_id = cs_fabric_id & cs_mask;
- }
-
- sock_id_bit = die_id_bit;
-
- /* Read D18F1x208 (SystemFabricIdMask). */
- if (intlv_num_dies || intlv_num_sockets)
- if (amd_df_indirect_read(nid, 1, 0x208, umc, &tmp))
- goto out_err;
-
- /* If interleaved over more than 1 die. */
- if (intlv_num_dies) {
- sock_id_bit = die_id_bit + intlv_num_dies;
- die_id_shift = (tmp >> 24) & 0xF;
- die_id_mask = (tmp >> 8) & 0xFF;
-
- cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit;
- }
-
- /* If interleaved over more than 1 socket. */
- if (intlv_num_sockets) {
- socket_id_shift = (tmp >> 28) & 0xF;
- socket_id_mask = (tmp >> 16) & 0xFF;
-
- cs_id |= ((cs_fabric_id & socket_id_mask) >> socket_id_shift) << sock_id_bit;
- }
-
- /*
- * The pre-interleaved address consists of XXXXXXIIIYYYYY
- * where III is the ID for this CS, and XXXXXXYYYYY are the
- * address bits from the post-interleaved address.
- * "num_intlv_bits" has been calculated to tell us how many "I"
- * bits there are. "intlv_addr_bit" tells us how many "Y" bits
- * there are (where "I" starts).
- */
- temp_addr_y = ret_addr & GENMASK_ULL(intlv_addr_bit-1, 0);
- temp_addr_i = (cs_id << intlv_addr_bit);
- temp_addr_x = (ret_addr & GENMASK_ULL(63, intlv_addr_bit)) << num_intlv_bits;
- ret_addr = temp_addr_x | temp_addr_i | temp_addr_y;
- }
-
- /* Add dram base address */
- ret_addr += dram_base_addr;
-
- /* If legacy MMIO hole enabled */
- if (lgcy_mmio_hole_en) {
- if (amd_df_indirect_read(nid, 0, 0x104, umc, &tmp))
- goto out_err;
-
- dram_hole_base = tmp & GENMASK(31, 24);
- if (ret_addr >= dram_hole_base)
- ret_addr += (BIT_ULL(32) - dram_hole_base);
- }
-
- if (hash_enabled) {
- /* Save some parentheses and grab ls-bit at the end. */
- hashed_bit = (ret_addr >> 12) ^
- (ret_addr >> 18) ^
- (ret_addr >> 21) ^
- (ret_addr >> 30) ^
- cs_id;
-
- hashed_bit &= BIT(0);
-
- if (hashed_bit != ((ret_addr >> intlv_addr_bit) & BIT(0)))
- ret_addr ^= BIT(intlv_addr_bit);
- }
-
- /* Is calculated system address is above DRAM limit address? */
- if (ret_addr > dram_limit_addr)
- goto out_err;
-
- *sys_addr = ret_addr;
- return 0;
-
-out_err:
- return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr);
-
bool amd_mce_is_memory_error(struct mce *m)
{
/* ErrCodeExt[20:16] */
u8 xec = (m->status >> 16) & 0x1f;
if (mce_flags.smca)
- return smca_get_bank_type(m->bank) == SMCA_UMC && xec == 0x0;
+ return smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC && xec == 0x0;
return m->bank == 4 && xec == 0x8;
}
@@ -907,14 +757,13 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
mce_log(&m);
}
-asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(struct pt_regs *regs)
+DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
{
- entering_irq();
trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
inc_irq_stat(irq_deferred_error_count);
deferred_error_int_vector();
trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
- exiting_ack_irq();
+ ack_APIC_irq();
}
/*
@@ -952,8 +801,8 @@ static void log_error_deferred(unsigned int bank)
{
bool defrd;
- defrd = _log_error_bank(bank, msr_ops.status(bank),
- msr_ops.addr(bank), 0);
+ defrd = _log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS),
+ mca_msr_reg(bank, MCA_ADDR), 0);
if (!mce_flags.smca)
return;
@@ -983,7 +832,7 @@ static void amd_deferred_error_interrupt(void)
static void log_error_thresholding(unsigned int bank, u64 misc)
{
- _log_error_bank(bank, msr_ops.status(bank), msr_ops.addr(bank), misc);
+ _log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS), mca_msr_reg(bank, MCA_ADDR), misc);
}
static void log_and_reset_block(struct threshold_block *block)
@@ -1016,13 +865,22 @@ static void log_and_reset_block(struct threshold_block *block)
static void amd_threshold_interrupt(void)
{
struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL;
+ struct threshold_bank **bp = this_cpu_read(threshold_banks);
unsigned int bank, cpu = smp_processor_id();
+ /*
+ * Validate that the threshold bank has been initialized already. The
+ * handler is installed at boot time, but on a hotplug event the
+ * interrupt might fire before the data has been initialized.
+ */
+ if (!bp)
+ return;
+
for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue;
- first_block = per_cpu(threshold_banks, cpu)[bank]->blocks;
+ first_block = bp[bank]->blocks;
if (!first_block)
continue;
@@ -1071,7 +929,8 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
memset(&tr, 0, sizeof(tr));
tr.b = b;
- smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
+ if (smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1))
+ return -ENODEV;
return size;
}
@@ -1095,7 +954,8 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
b->threshold_limit = new;
tr.b = b;
- smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
+ if (smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1))
+ return -ENODEV;
return size;
}
@@ -1104,7 +964,9 @@ static ssize_t show_error_count(struct threshold_block *b, char *buf)
{
u32 lo, hi;
- rdmsr_on_cpu(b->cpu, b->address, &lo, &hi);
+ /* CPU might be offline by now */
+ if (rdmsr_on_cpu(b->cpu, b->address, &lo, &hi))
+ return -ENODEV;
return sprintf(buf, "%u\n", ((hi & THRESHOLD_MAX) -
(THRESHOLD_MAX - b->threshold_limit)));
@@ -1131,6 +993,7 @@ static struct attribute *default_attrs[] = {
NULL, /* possibly interrupt_enable if supported, see below */
NULL,
};
+ATTRIBUTE_GROUPS(default);
#define to_block(k) container_of(k, struct threshold_block, kobj)
#define to_attr(a) container_of(a, struct threshold_attr, attr)
@@ -1167,11 +1030,11 @@ static void threshold_block_release(struct kobject *kobj);
static struct kobj_type threshold_ktype = {
.sysfs_ops = &threshold_ops,
- .default_attrs = default_attrs,
+ .default_groups = default_groups,
.release = threshold_block_release,
};
-static const char *get_name(unsigned int bank, struct threshold_block *b)
+static const char *get_name(unsigned int cpu, unsigned int bank, struct threshold_block *b)
{
enum smca_bank_types bank_type;
@@ -1182,7 +1045,7 @@ static const char *get_name(unsigned int bank, struct threshold_block *b)
return th_names[bank];
}
- bank_type = smca_get_bank_type(bank);
+ bank_type = smca_get_bank_type(cpu, bank);
if (bank_type >= N_SMCA_BANK_TYPES)
return NULL;
@@ -1192,12 +1055,12 @@ static const char *get_name(unsigned int bank, struct threshold_block *b)
return NULL;
}
- if (smca_banks[bank].hwid->count == 1)
+ if (per_cpu(smca_bank_counts, cpu)[bank_type] == 1)
return smca_get_name(bank_type);
snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN,
- "%s_%x", smca_get_name(bank_type),
- smca_banks[bank].sysfs_id);
+ "%s_%u", smca_get_name(bank_type),
+ per_cpu(smca_banks, cpu)[bank].sysfs_id);
return buf_mcatype;
}
@@ -1209,10 +1072,10 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb
u32 low, high;
int err;
- if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
+ if ((bank >= this_cpu_read(mce_num_banks)) || (block >= NR_BLOCKS))
return 0;
- if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
+ if (rdmsr_safe(address, &low, &high))
return 0;
if (!(high & MASK_VALID_HI)) {
@@ -1239,20 +1102,21 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb
b->threshold_limit = THRESHOLD_MAX;
if (b->interrupt_capable) {
- threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
+ default_attrs[2] = &interrupt_enable.attr;
b->interrupt_enable = 1;
} else {
- threshold_ktype.default_attrs[2] = NULL;
+ default_attrs[2] = NULL;
}
INIT_LIST_HEAD(&b->miscj);
+ /* This is safe as @tb is not visible yet */
if (tb->blocks)
list_add(&b->miscj, &tb->blocks->miscj);
else
tb->blocks = b;
- err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(bank, b));
+ err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(cpu, bank, b));
if (err)
goto out_free;
recurse:
@@ -1267,13 +1131,12 @@ recurse:
if (b)
kobject_uevent(&b->kobj, KOBJ_ADD);
- return err;
+ return 0;
out_free:
if (b) {
- kobject_put(&b->kobj);
list_del(&b->miscj);
- kfree(b);
+ kobject_put(&b->kobj);
}
return err;
}
@@ -1302,19 +1165,20 @@ static int __threshold_add_blocks(struct threshold_bank *b)
return err;
}
-static int threshold_create_bank(unsigned int cpu, unsigned int bank)
+static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu,
+ unsigned int bank)
{
- struct device *dev = per_cpu(mce_device, cpu);
+ struct device *dev = this_cpu_read(mce_device);
struct amd_northbridge *nb = NULL;
struct threshold_bank *b = NULL;
- const char *name = get_name(bank, NULL);
+ const char *name = get_name(cpu, bank, NULL);
int err = 0;
if (!dev)
return -ENODEV;
if (is_shared_bank(bank)) {
- nb = node_to_amd_nb(amd_get_nb_id(cpu));
+ nb = node_to_amd_nb(topology_die_id(cpu));
/* threshold descriptor already initialized on this node? */
if (nb && nb->bank4) {
@@ -1324,7 +1188,7 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
if (err)
goto out;
- per_cpu(threshold_banks, cpu)[bank] = b;
+ bp[bank] = b;
refcount_inc(&b->cpus);
err = __threshold_add_blocks(b);
@@ -1339,6 +1203,7 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
goto out;
}
+ /* Associate the bank with the per-CPU MCE device */
b->kobj = kobject_create_and_add(name, &dev->kobj);
if (!b->kobj) {
err = -EINVAL;
@@ -1346,6 +1211,7 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
}
if (is_shared_bank(bank)) {
+ b->shared = 1;
refcount_set(&b->cpus, 1);
/* nb is already initialized, see above */
@@ -1355,18 +1221,18 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
}
}
- err = allocate_threshold_blocks(cpu, b, bank, 0, msr_ops.misc(bank));
+ err = allocate_threshold_blocks(cpu, b, bank, 0, mca_msr_reg(bank, MCA_MISC));
if (err)
- goto out_free;
-
- per_cpu(threshold_banks, cpu)[bank] = b;
+ goto out_kobj;
+ bp[bank] = b;
return 0;
- out_free:
+out_kobj:
+ kobject_put(b->kobj);
+out_free:
kfree(b);
-
- out:
+out:
return err;
}
@@ -1375,21 +1241,16 @@ static void threshold_block_release(struct kobject *kobj)
kfree(to_block(kobj));
}
-static void deallocate_threshold_block(unsigned int cpu, unsigned int bank)
+static void deallocate_threshold_blocks(struct threshold_bank *bank)
{
- struct threshold_block *pos = NULL;
- struct threshold_block *tmp = NULL;
- struct threshold_bank *head = per_cpu(threshold_banks, cpu)[bank];
+ struct threshold_block *pos, *tmp;
- if (!head)
- return;
-
- list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) {
+ list_for_each_entry_safe(pos, tmp, &bank->blocks->miscj, miscj) {
list_del(&pos->miscj);
kobject_put(&pos->kobj);
}
- kobject_put(&head->blocks->kobj);
+ kobject_put(&bank->blocks->kobj);
}
static void __threshold_remove_blocks(struct threshold_bank *b)
@@ -1403,122 +1264,108 @@ static void __threshold_remove_blocks(struct threshold_bank *b)
kobject_del(&pos->kobj);
}
-static void threshold_remove_bank(unsigned int cpu, int bank)
+static void threshold_remove_bank(struct threshold_bank *bank)
{
struct amd_northbridge *nb;
- struct threshold_bank *b;
- b = per_cpu(threshold_banks, cpu)[bank];
- if (!b)
- return;
+ if (!bank->blocks)
+ goto out_free;
- if (!b->blocks)
- goto free_out;
+ if (!bank->shared)
+ goto out_dealloc;
- if (is_shared_bank(bank)) {
- if (!refcount_dec_and_test(&b->cpus)) {
- __threshold_remove_blocks(b);
- per_cpu(threshold_banks, cpu)[bank] = NULL;
- return;
- } else {
- /*
- * the last CPU on this node using the shared bank is
- * going away, remove that bank now.
- */
- nb = node_to_amd_nb(amd_get_nb_id(cpu));
- nb->bank4 = NULL;
- }
+ if (!refcount_dec_and_test(&bank->cpus)) {
+ __threshold_remove_blocks(bank);
+ return;
+ } else {
+ /*
+ * The last CPU on this node using the shared bank is going
+ * away, remove that bank now.
+ */
+ nb = node_to_amd_nb(topology_die_id(smp_processor_id()));
+ nb->bank4 = NULL;
}
- deallocate_threshold_block(cpu, bank);
+out_dealloc:
+ deallocate_threshold_blocks(bank);
-free_out:
- kobject_del(b->kobj);
- kobject_put(b->kobj);
- kfree(b);
- per_cpu(threshold_banks, cpu)[bank] = NULL;
+out_free:
+ kobject_put(bank->kobj);
+ kfree(bank);
}
-int mce_threshold_remove_device(unsigned int cpu)
+static void __threshold_remove_device(struct threshold_bank **bp)
{
- unsigned int bank;
+ unsigned int bank, numbanks = this_cpu_read(mce_num_banks);
- for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) {
- if (!(per_cpu(bank_map, cpu) & (1 << bank)))
+ for (bank = 0; bank < numbanks; bank++) {
+ if (!bp[bank])
continue;
- threshold_remove_bank(cpu, bank);
+
+ threshold_remove_bank(bp[bank]);
+ bp[bank] = NULL;
}
- kfree(per_cpu(threshold_banks, cpu));
- per_cpu(threshold_banks, cpu) = NULL;
+ kfree(bp);
+}
+
+int mce_threshold_remove_device(unsigned int cpu)
+{
+ struct threshold_bank **bp = this_cpu_read(threshold_banks);
+
+ if (!bp)
+ return 0;
+
+ /*
+ * Clear the pointer before cleaning up, so that the interrupt won't
+ * touch anything of this.
+ */
+ this_cpu_write(threshold_banks, NULL);
+
+ __threshold_remove_device(bp);
return 0;
}
-/* create dir/files for all valid threshold banks */
+/**
+ * mce_threshold_create_device - Create the per-CPU MCE threshold device
+ * @cpu: The plugged in CPU
+ *
+ * Create directories and files for all valid threshold banks.
+ *
+ * This is invoked from the CPU hotplug callback which was installed in
+ * mcheck_init_device(). The invocation happens in context of the hotplug
+ * thread running on @cpu. The callback is invoked on all CPUs which are
+ * online when the callback is installed or during a real hotplug event.
+ */
int mce_threshold_create_device(unsigned int cpu)
{
- unsigned int bank;
+ unsigned int numbanks, bank;
struct threshold_bank **bp;
- int err = 0;
+ int err;
+
+ if (!mce_flags.amd_threshold)
+ return 0;
- bp = per_cpu(threshold_banks, cpu);
+ bp = this_cpu_read(threshold_banks);
if (bp)
return 0;
- bp = kcalloc(per_cpu(mce_num_banks, cpu), sizeof(struct threshold_bank *),
- GFP_KERNEL);
+ numbanks = this_cpu_read(mce_num_banks);
+ bp = kcalloc(numbanks, sizeof(*bp), GFP_KERNEL);
if (!bp)
return -ENOMEM;
- per_cpu(threshold_banks, cpu) = bp;
-
- for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) {
- if (!(per_cpu(bank_map, cpu) & (1 << bank)))
+ for (bank = 0; bank < numbanks; ++bank) {
+ if (!(this_cpu_read(bank_map) & (1 << bank)))
continue;
- err = threshold_create_bank(cpu, bank);
- if (err)
- goto err;
- }
- return err;
-err:
- mce_threshold_remove_device(cpu);
- return err;
-}
-
-static __init int threshold_init_device(void)
-{
- unsigned lcpu = 0;
-
- /* to hit CPUs online before the notifier is up */
- for_each_online_cpu(lcpu) {
- int err = mce_threshold_create_device(lcpu);
-
- if (err)
+ err = threshold_create_bank(bp, cpu, bank);
+ if (err) {
+ __threshold_remove_device(bp);
return err;
+ }
}
+ this_cpu_write(threshold_banks, bp);
if (thresholding_irq_en)
mce_threshold_vector = amd_threshold_interrupt;
-
return 0;
}
-/*
- * there are 3 funcs which need to be _initcalled in a logic sequence:
- * 1. xen_late_init_mcelog
- * 2. mcheck_init_device
- * 3. threshold_init_device
- *
- * xen_late_init_mcelog must register xen_mce_chrdev_device before
- * native mce_chrdev_device registration if running under xen platform;
- *
- * mcheck_init_device should be inited before threshold_init_device to
- * initialize mce_device, otherwise a NULL ptr dereference will cause panic.
- *
- * so we use following _initcalls
- * 1. device_initcall(xen_late_init_mcelog);
- * 2. device_initcall_sync(mcheck_init_device);
- * 3. late_initcall(threshold_init_device);
- *
- * when running under xen, the initcall order is 1,2,3;
- * on baremetal, we skip 1 and we do only 2 and 3.
- */
-late_initcall(threshold_init_device);