aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/mce
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/mce')
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c6
-rw-r--r--arch/x86/kernel/cpu/mce/core.c72
-rw-r--r--arch/x86/kernel/cpu/mce/inject.c2
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c15
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h2
-rw-r--r--arch/x86/kernel/cpu/mce/therm_throt.c11
6 files changed, 49 insertions, 59 deletions
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 5167bd2bb6b1..b3a50d962851 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -78,6 +78,7 @@ struct smca_bank_name {
static struct smca_bank_name smca_names[] = {
[SMCA_LS] = { "load_store", "Load Store Unit" },
+ [SMCA_LS_V2] = { "load_store", "Load Store Unit" },
[SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" },
[SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" },
[SMCA_DE] = { "decode_unit", "Decode Unit" },
@@ -138,6 +139,7 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
/* ZN Core (HWID=0xB0) MCA types */
{ SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF },
+ { SMCA_LS_V2, HWID_MCATYPE(0xB0, 0x10), 0xFFFFFF },
{ SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
{ SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF },
@@ -266,10 +268,10 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
smca_set_misc_banks_map(bank, cpu);
/* Return early if this bank was already initialized. */
- if (smca_banks[bank].hwid)
+ if (smca_banks[bank].hwid && smca_banks[bank].hwid->hwid_mcatype != 0)
return;
- if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) {
+ if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) {
pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
return;
}
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 5f42f25bac8f..2c4f949611e4 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -53,8 +53,6 @@
#include "internal.h"
-static DEFINE_MUTEX(mce_log_mutex);
-
/* sysfs synchronization */
static DEFINE_MUTEX(mce_sysfs_mutex);
@@ -156,19 +154,10 @@ void mce_log(struct mce *m)
if (!mce_gen_pool_add(m))
irq_work_queue(&mce_irq_work);
}
-
-void mce_inject_log(struct mce *m)
-{
- mutex_lock(&mce_log_mutex);
- mce_log(m);
- mutex_unlock(&mce_log_mutex);
-}
-EXPORT_SYMBOL_GPL(mce_inject_log);
-
-static struct notifier_block mce_srao_nb;
+EXPORT_SYMBOL_GPL(mce_log);
/*
- * We run the default notifier if we have only the SRAO, the first and the
+ * We run the default notifier if we have only the UC, the first and the
* default notifier registered. I.e., the mandatory NUM_DEFAULT_NOTIFIERS
* notifiers registered on the chain.
*/
@@ -594,26 +583,29 @@ static struct notifier_block first_nb = {
.priority = MCE_PRIO_FIRST,
};
-static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
- void *data)
+static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
+ void *data)
{
struct mce *mce = (struct mce *)data;
unsigned long pfn;
- if (!mce)
+ if (!mce || !mce_usable_address(mce))
return NOTIFY_DONE;
- if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
- pfn = mce->addr >> PAGE_SHIFT;
- if (!memory_failure(pfn, 0))
- set_mce_nospec(pfn);
- }
+ if (mce->severity != MCE_AO_SEVERITY &&
+ mce->severity != MCE_DEFERRED_SEVERITY)
+ return NOTIFY_DONE;
+
+ pfn = mce->addr >> PAGE_SHIFT;
+ if (!memory_failure(pfn, 0))
+ set_mce_nospec(pfn);
return NOTIFY_OK;
}
-static struct notifier_block mce_srao_nb = {
- .notifier_call = srao_decode_notifier,
- .priority = MCE_PRIO_SRAO,
+
+static struct notifier_block mce_uc_nb = {
+ .notifier_call = uc_decode_notifier,
+ .priority = MCE_PRIO_UC,
};
static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
@@ -763,26 +755,22 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
log_it:
error_seen = true;
- mce_read_aux(&m, i);
+ if (flags & MCP_DONTLOG)
+ goto clear_it;
+ mce_read_aux(&m, i);
m.severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
-
/*
* Don't get the IP here because it's unlikely to
* have anything to do with the actual error location.
*/
- if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce)
- mce_log(&m);
- else if (mce_usable_address(&m)) {
- /*
- * Although we skipped logging this, we still want
- * to take action. Add to the pool so the registered
- * notifiers will see it.
- */
- if (!mce_gen_pool_add(&m))
- mce_schedule_work();
- }
+ if (mca_cfg.dont_log_ce && !mce_usable_address(&m))
+ goto clear_it;
+
+ mce_log(&m);
+
+clear_it:
/*
* Clear state for this bank.
*/
@@ -807,7 +795,7 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
struct pt_regs *regs)
{
- char *tmp;
+ char *tmp = *msg;
int i;
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
@@ -819,8 +807,8 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
if (quirk_no_way_out)
quirk_no_way_out(i, m, regs);
+ m->bank = i;
if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
- m->bank = i;
mce_read_aux(m, i);
*msg = tmp;
return 1;
@@ -1232,8 +1220,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
struct mca_config *cfg = &mca_cfg;
int cpu = smp_processor_id();
- char *msg = "Unknown";
struct mce m, *final;
+ char *msg = NULL;
int worst = 0;
/*
@@ -1365,7 +1353,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
ist_end_non_atomic();
} else {
if (!fixup_exception(regs, X86_TRAP_MC, error_code, 0))
- mce_panic("Failed kernel mode recovery", &m, NULL);
+ mce_panic("Failed kernel mode recovery", &m, msg);
}
out_ist:
@@ -2041,7 +2029,7 @@ int __init mcheck_init(void)
{
mcheck_intel_therm_init();
mce_register_decode_chain(&first_nb);
- mce_register_decode_chain(&mce_srao_nb);
+ mce_register_decode_chain(&mce_uc_nb);
mce_register_decode_chain(&mce_default_nb);
mcheck_vendor_init_severity();
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 1f30117b24ba..3413b41b8d55 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -494,7 +494,7 @@ static void do_inject(void)
i_mce.status |= MCI_STATUS_SYNDV;
if (inj_type == SW_INJ) {
- mce_inject_log(&i_mce);
+ mce_log(&i_mce);
return;
}
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index e270d0770134..5627b1091b85 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -115,15 +115,16 @@ static bool lmce_supported(void)
/*
* BIOS should indicate support for LMCE by setting bit 20 in
- * IA32_FEATURE_CONTROL without which touching MCG_EXT_CTL will
- * generate a #GP fault.
+ * IA32_FEAT_CTL without which touching MCG_EXT_CTL will generate a #GP
+ * fault. The MSR must also be locked for LMCE_ENABLED to take effect.
+ * WARN if the MSR isn't locked as init_ia32_feat_ctl() unconditionally
+ * locks the MSR in the event that it wasn't already locked by BIOS.
*/
- rdmsrl(MSR_IA32_FEATURE_CONTROL, tmp);
- if ((tmp & (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) ==
- (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE))
- return true;
+ rdmsrl(MSR_IA32_FEAT_CTL, tmp);
+ if (WARN_ON_ONCE(!(tmp & FEAT_CTL_LOCKED)))
+ return false;
- return false;
+ return tmp & FEAT_CTL_LMCE_ENABLED;
}
bool mce_intel_cmci_poll(void)
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 842b273bce31..b785c0d0b590 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -84,8 +84,6 @@ static inline int apei_clear_mce(u64 record_id)
}
#endif
-void mce_inject_log(struct mce *m);
-
/*
* We consider records to be equivalent if bank+status+addr+misc all match.
* This is only used when the system is going down because of a fatal error
diff --git a/arch/x86/kernel/cpu/mce/therm_throt.c b/arch/x86/kernel/cpu/mce/therm_throt.c
index b38010b541d6..58b4ee3cda77 100644
--- a/arch/x86/kernel/cpu/mce/therm_throt.c
+++ b/arch/x86/kernel/cpu/mce/therm_throt.c
@@ -235,7 +235,7 @@ static void get_therm_status(int level, bool *proc_hot, u8 *temp)
*temp = (msr_val >> 16) & 0x7F;
}
-static void throttle_active_work(struct work_struct *work)
+static void __maybe_unused throttle_active_work(struct work_struct *work)
{
struct _thermal_state *state = container_of(to_delayed_work(work),
struct _thermal_state, therm_work);
@@ -467,6 +467,7 @@ static int thermal_throttle_online(unsigned int cpu)
{
struct thermal_state *state = &per_cpu(thermal_state, cpu);
struct device *dev = get_cpu_device(cpu);
+ u32 l;
state->package_throttle.level = PACKAGE_LEVEL;
state->core_throttle.level = CORE_LEVEL;
@@ -474,6 +475,10 @@ static int thermal_throttle_online(unsigned int cpu)
INIT_DELAYED_WORK(&state->package_throttle.therm_work, throttle_active_work);
INIT_DELAYED_WORK(&state->core_throttle.therm_work, throttle_active_work);
+ /* Unmask the thermal vector after the above workqueues are initialized. */
+ l = apic_read(APIC_LVTTHMR);
+ apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+
return thermal_throttle_add_dev(dev, cpu);
}
@@ -722,10 +727,6 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
- /* Unmask the thermal vector: */
- l = apic_read(APIC_LVTTHMR);
- apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
-
pr_info_once("CPU0: Thermal monitoring enabled (%s)\n",
tm2 ? "TM2" : "TM1");