diff options
Diffstat (limited to 'drivers/ras')
-rw-r--r-- | drivers/ras/cec.c | 82 |
1 files changed, 59 insertions, 23 deletions
diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c index c09cf55e2d20..321af498ee11 100644 --- a/drivers/ras/cec.c +++ b/drivers/ras/cec.c @@ -38,7 +38,7 @@ * elements entered into the array, during which, we're decaying all elements. * If, after decay, an element gets inserted again, its generation is set to 11b * to make sure it has higher numerical count than other, older elements and - * thus emulate an an LRU-like behavior when deleting elements to free up space + * thus emulate an LRU-like behavior when deleting elements to free up space * in the page. * * When an element reaches it's max count of action_threshold, we try to poison @@ -309,11 +309,20 @@ static bool sanity_check(struct ce_array *ca) return ret; } -int cec_add_elem(u64 pfn) +/** + * cec_add_elem - Add an element to the CEC array. + * @pfn: page frame number to insert + * + * Return values: + * - <0: on error + * - 0: on success + * - >0: when the inserted pfn was offlined + */ +static int cec_add_elem(u64 pfn) { struct ce_array *ca = &ce_arr; + int count, err, ret = 0; unsigned int to = 0; - int count, ret = 0; /* * We can be called very early on the identify_cpu() path where we are @@ -330,8 +339,8 @@ int cec_add_elem(u64 pfn) if (ca->n == MAX_ELEMS) WARN_ON(!del_lru_elem_unlocked(ca)); - ret = find_elem(ca, pfn, &to); - if (ret < 0) { + err = find_elem(ca, pfn, &to); + if (err < 0) { /* * Shift range [to-end] to make room for one more element. */ @@ -435,7 +444,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(action_threshold_ops, u64_get, action_threshold_set, "% static const char * const bins[] = { "00", "01", "10", "11" }; -static int array_dump(struct seq_file *m, void *v) +static int array_show(struct seq_file *m, void *v) { struct ce_array *ca = &ce_arr; int i; @@ -467,18 +476,7 @@ static int array_dump(struct seq_file *m, void *v) return 0; } -static int array_open(struct inode *inode, struct file *filp) -{ - return single_open(filp, array_dump, NULL); -} - -static const struct file_operations array_ops = { - .owner = THIS_MODULE, - .open = array_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(array); static int __init create_debugfs_nodes(void) { @@ -513,7 +511,7 @@ static int __init create_debugfs_nodes(void) goto err; } - array = debugfs_create_file("array", S_IRUSR, d, NULL, &array_ops); + array = debugfs_create_file("array", S_IRUSR, d, NULL, &array_fops); if (!array) { pr_warn("Error creating array debugfs node!\n"); goto err; @@ -527,27 +525,65 @@ err: return 1; } -void __init cec_init(void) +static int cec_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct mce *m = (struct mce *)data; + + if (!m) + return NOTIFY_DONE; + + /* We eat only correctable DRAM errors with usable addresses. */ + if (mce_is_memory_error(m) && + mce_is_correctable(m) && + mce_usable_address(m)) { + if (!cec_add_elem(m->addr >> PAGE_SHIFT)) { + m->kflags |= MCE_HANDLED_CEC; + return NOTIFY_OK; + } + } + + return NOTIFY_DONE; +} + +static struct notifier_block cec_nb = { + .notifier_call = cec_notifier, + .priority = MCE_PRIO_CEC, +}; + +static int __init cec_init(void) { if (ce_arr.disabled) - return; + return -ENODEV; + + /* + * Intel systems may avoid uncorrectable errors + * if pages with corrected errors are aggressively + * taken offline. + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + action_threshold = 2; ce_arr.array = (void *)get_zeroed_page(GFP_KERNEL); if (!ce_arr.array) { pr_err("Error allocating CE array page!\n"); - return; + return -ENOMEM; } if (create_debugfs_nodes()) { free_page((unsigned long)ce_arr.array); - return; + return -ENOMEM; } INIT_DELAYED_WORK(&cec_work, cec_work_fn); schedule_delayed_work(&cec_work, CEC_DECAY_DEFAULT_INTERVAL); + mce_register_decode_chain(&cec_nb); + pr_info("Correctable Errors collector initialized.\n"); + return 0; } +late_initcall(cec_init); int __init parse_cec_param(char *str) { |