From 23ba710a0864108910c7531dc4c73ef65eca5568 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 14 Feb 2020 14:27:17 -0800 Subject: x86/mce: Fix all mce notifiers to update the mce->kflags bitmask If the handler took any action to log or deal with the error, set a bit in mce->kflags so that the default handler on the end of the machine check chain can see what has been done. Get rid of NOTIFY_STOP returns. Make the EDAC and dev-mcelog handlers skip over errors already processed by CEC. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Tested-by: Tony Luck Link: https://lkml.kernel.org/r/20200214222720.13168-5-tony.luck@intel.com --- drivers/edac/i7core_edac.c | 5 +++-- drivers/edac/mce_amd.c | 6 +++++- drivers/edac/pnd2_edac.c | 5 +++-- drivers/edac/sb_edac.c | 5 ++++- drivers/edac/skx_common.c | 4 ++++ 5 files changed, 19 insertions(+), 6 deletions(-) (limited to 'drivers/edac') diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index b3135b208f9a..5860ca41185c 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -1815,7 +1815,7 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, struct mem_ctl_info *mci; i7_dev = get_i7core_dev(mce->socketid); - if (!i7_dev) + if (!i7_dev || (mce->kflags & MCE_HANDLED_CEC)) return NOTIFY_DONE; mci = i7_dev->mci; @@ -1834,7 +1834,8 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, i7core_check_error(mci, mce); /* Advise mcelog that the errors were handled */ - return NOTIFY_STOP; + mce->kflags |= MCE_HANDLED_EDAC; + return NOTIFY_OK; } static struct notifier_block i7_mce_dec = { diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index e58644d9c92b..2b5401db56ad 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -1046,6 +1046,9 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) unsigned int fam = x86_family(m->cpuid); int ecc; + if (m->kflags & MCE_HANDLED_CEC) + return NOTIFY_DONE; + pr_emerg(HW_ERR "%s\n", decode_error_status(m)); pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s", @@ -1146,7 +1149,8 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) err_code: amd_decode_err_code(m->status & 0xffff); - return NOTIFY_STOP; + m->kflags |= MCE_HANDLED_EDAC; + return NOTIFY_OK; } static struct notifier_block amd_mce_dec_nb = { diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c index bc47328eb485..1929a5dc8f94 100644 --- a/drivers/edac/pnd2_edac.c +++ b/drivers/edac/pnd2_edac.c @@ -1400,7 +1400,7 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo return NOTIFY_DONE; mci = pnd2_mci; - if (!mci) + if (!mci || (mce->kflags & MCE_HANDLED_CEC)) return NOTIFY_DONE; /* @@ -1429,7 +1429,8 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo pnd2_mce_output_error(mci, mce, &daddr); /* Advice mcelog that the error were handled */ - return NOTIFY_STOP; + mce->kflags |= MCE_HANDLED_EDAC; + return NOTIFY_OK; } static struct notifier_block pnd2_mce_dec = { diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 7d51c82be62b..f790f7d08688 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -3136,6 +3136,8 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, if (edac_get_report_status() == EDAC_REPORTING_DISABLED) return NOTIFY_DONE; + if (mce->kflags & MCE_HANDLED_CEC) + return NOTIFY_DONE; /* * Just let mcelog handle it if the error is @@ -3183,7 +3185,8 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, sbridge_mce_output_error(mci, mce); /* Advice mcelog that the error were handled */ - return NOTIFY_STOP; + mce->kflags |= MCE_HANDLED_EDAC; + return NOTIFY_OK; } static struct notifier_block sbridge_mce_dec = { diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 99bbaf629b8d..6f08a12f6b11 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -577,6 +577,9 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, if (edac_get_report_status() == EDAC_REPORTING_DISABLED) return NOTIFY_DONE; + if (mce->kflags & MCE_HANDLED_CEC) + return NOTIFY_DONE; + /* ignore unless this is memory related with an address */ if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV)) return NOTIFY_DONE; @@ -616,6 +619,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, skx_mce_output_error(mci, mce, &res); + mce->kflags |= MCE_HANDLED_EDAC; return NOTIFY_DONE; } -- cgit v1.2.3-59-g8ed1b