diff options
Diffstat (limited to 'arch/x86/kernel')
32 files changed, 284 insertions, 103 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 02d6f5cf4e70..8824d01c0c35 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -61,6 +61,7 @@ obj-y += alternative.o i8253.o hw_breakpoint.o obj-y += tsc.o tsc_msr.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o +obj-y += irqflags.o obj-y += process.o obj-y += fpu/ diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 7553819c74c3..3982f79d2377 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1851,7 +1851,7 @@ static void ioapic_ir_ack_level(struct irq_data *irq_data) * intr-remapping table entry. Hence for the io-apic * EOI we use the pin number. */ - ack_APIC_irq(); + apic_ack_irq(irq_data); eoi_ioapic_pin(data->entry.vector, data); } diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index bb6f7a2148d7..35aaee4fc028 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -235,6 +235,15 @@ static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest) if (vector && cpu_online(cpu) && cpumask_test_cpu(cpu, dest)) return 0; + /* + * Careful here. @apicd might either have move_in_progress set or + * be enqueued for cleanup. Assigning a new vector would either + * leave a stale vector on some CPU around or in case of a pending + * cleanup corrupt the hlist. + */ + if (apicd->move_in_progress || !hlist_unhashed(&apicd->clist)) + return -EBUSY; + vector = irq_matrix_alloc(vector_matrix, dest, resvd, &cpu); if (vector > 0) apic_update_vector(irqd, vector, cpu); @@ -579,8 +588,7 @@ error: static void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d, struct irq_data *irqd, int ind) { - unsigned int cpu, vector, prev_cpu, prev_vector; - struct apic_chip_data *apicd; + struct apic_chip_data apicd; unsigned long flags; int irq; @@ -596,24 +604,26 @@ static void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d, return; } - apicd = irqd->chip_data; - if (!apicd) { + if (!irqd->chip_data) { seq_printf(m, "%*sVector: Not assigned\n", ind, ""); return; } raw_spin_lock_irqsave(&vector_lock, flags); - cpu = apicd->cpu; - vector = apicd->vector; - prev_cpu = apicd->prev_cpu; - prev_vector = apicd->prev_vector; + memcpy(&apicd, irqd->chip_data, sizeof(apicd)); raw_spin_unlock_irqrestore(&vector_lock, flags); - seq_printf(m, "%*sVector: %5u\n", ind, "", vector); - seq_printf(m, "%*sTarget: %5u\n", ind, "", cpu); - if (prev_vector) { - seq_printf(m, "%*sPrevious vector: %5u\n", ind, "", prev_vector); - seq_printf(m, "%*sPrevious target: %5u\n", ind, "", prev_cpu); + + seq_printf(m, "%*sVector: %5u\n", ind, "", apicd.vector); + seq_printf(m, "%*sTarget: %5u\n", ind, "", apicd.cpu); + if (apicd.prev_vector) { + seq_printf(m, "%*sPrevious vector: %5u\n", ind, "", apicd.prev_vector); + seq_printf(m, "%*sPrevious target: %5u\n", ind, "", apicd.prev_cpu); } + seq_printf(m, "%*smove_in_progress: %u\n", ind, "", apicd.move_in_progress ? 1 : 0); + seq_printf(m, "%*sis_managed: %u\n", ind, "", apicd.is_managed ? 1 : 0); + seq_printf(m, "%*scan_reserve: %u\n", ind, "", apicd.can_reserve ? 1 : 0); + seq_printf(m, "%*shas_reserved: %u\n", ind, "", apicd.has_reserved ? 1 : 0); + seq_printf(m, "%*scleanup_pending: %u\n", ind, "", !hlist_unhashed(&apicd.clist)); } #endif @@ -800,13 +810,18 @@ static int apic_retrigger_irq(struct irq_data *irqd) return 1; } -void apic_ack_edge(struct irq_data *irqd) +void apic_ack_irq(struct irq_data *irqd) { - irq_complete_move(irqd_cfg(irqd)); irq_move_irq(irqd); ack_APIC_irq(); } +void apic_ack_edge(struct irq_data *irqd) +{ + irq_complete_move(irqd_cfg(irqd)); + apic_ack_irq(irqd); +} + static struct irq_chip lapic_controller = { .name = "APIC", .irq_ack = apic_ack_edge, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index efaf2d4f9c3c..d492752f79e1 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -26,6 +26,7 @@ #include <linux/delay.h> #include <linux/crash_dump.h> #include <linux/reboot.h> +#include <linux/memory.h> #include <asm/uv/uv_mmrs.h> #include <asm/uv/uv_hub.h> @@ -392,6 +393,51 @@ extern int uv_hub_info_version(void) } EXPORT_SYMBOL(uv_hub_info_version); +/* Default UV memory block size is 2GB */ +static unsigned long mem_block_size = (2UL << 30); + +/* Kernel parameter to specify UV mem block size */ +static int parse_mem_block_size(char *ptr) +{ + unsigned long size = memparse(ptr, NULL); + + /* Size will be rounded down by set_block_size() below */ + mem_block_size = size; + return 0; +} +early_param("uv_memblksize", parse_mem_block_size); + +static __init int adj_blksize(u32 lgre) +{ + unsigned long base = (unsigned long)lgre << UV_GAM_RANGE_SHFT; + unsigned long size; + + for (size = mem_block_size; size > MIN_MEMORY_BLOCK_SIZE; size >>= 1) + if (IS_ALIGNED(base, size)) + break; + + if (size >= mem_block_size) + return 0; + + mem_block_size = size; + return 1; +} + +static __init void set_block_size(void) +{ + unsigned int order = ffs(mem_block_size); + + if (order) { + /* adjust for ffs return of 1..64 */ + set_memory_block_size_order(order - 1); + pr_info("UV: mem_block_size set to 0x%lx\n", mem_block_size); + } else { + /* bad or zero value, default to 1UL << 31 (2GB) */ + pr_err("UV: mem_block_size error with 0x%lx\n", mem_block_size); + set_memory_block_size_order(31); + } +} + /* Build GAM range lookup table: */ static __init void build_uv_gr_table(void) { @@ -1180,23 +1226,30 @@ static void __init decode_gam_rng_tbl(unsigned long ptr) << UV_GAM_RANGE_SHFT); int order = 0; char suffix[] = " KMGTPE"; + int flag = ' '; while (size > 9999 && order < sizeof(suffix)) { size /= 1024; order++; } + /* adjust max block size to current range start */ + if (gre->type == 1 || gre->type == 2) + if (adj_blksize(lgre)) + flag = '*'; + if (!index) { pr_info("UV: GAM Range Table...\n"); - pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN"); + pr_info("UV: # %20s %14s %6s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN"); } - pr_info("UV: %2d: 0x%014lx-0x%014lx %5lu%c %3d %04x %02x %02x\n", + pr_info("UV: %2d: 0x%014lx-0x%014lx%c %5lu%c %3d %04x %02x %02x\n", index++, (unsigned long)lgre << UV_GAM_RANGE_SHFT, (unsigned long)gre->limit << UV_GAM_RANGE_SHFT, - size, suffix[order], + flag, size, suffix[order], gre->type, gre->nasid, gre->sockid, gre->pnode); + /* update to next range start */ lgre = gre->limit; if (sock_min > gre->sockid) sock_min = gre->sockid; @@ -1427,6 +1480,7 @@ static void __init uv_system_init_hub(void) build_socket_tables(); build_uv_gr_table(); + set_block_size(); uv_init_hub_info(&hub_info); uv_possible_blades = num_possible_nodes(); if (!_node_to_pnode) diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 76417a9aab73..dcb008c320fe 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -32,7 +32,7 @@ void common(void) { BLANK(); OFFSET(TASK_threadsp, task_struct, thread.sp); -#ifdef CONFIG_CC_STACKPROTECTOR +#ifdef CONFIG_STACKPROTECTOR OFFSET(TASK_stack_canary, task_struct, stack_canary); #endif diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index f91ba53e06c8..a4a3be399f4b 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -50,7 +50,7 @@ void foo(void) DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) - offsetofend(struct cpu_entry_area, entry_stack_page.stack)); -#ifdef CONFIG_CC_STACKPROTECTOR +#ifdef CONFIG_STACKPROTECTOR BLANK(); OFFSET(stack_canary_offset, stack_canary, canary); #endif diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index bf51e51d808d..b2dcd161f514 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -69,7 +69,7 @@ int main(void) OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); BLANK(); -#ifdef CONFIG_CC_STACKPROTECTOR +#ifdef CONFIG_STACKPROTECTOR DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary)); BLANK(); #endif diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 082d7875cef8..38915fbfae73 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -543,7 +543,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) nodes_per_socket = ((value >> 3) & 7) + 1; } - if (c->x86 >= 0x15 && c->x86 <= 0x17) { + if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) && + !boot_cpu_has(X86_FEATURE_VIRT_SSBD) && + c->x86 >= 0x15 && c->x86 <= 0x17) { unsigned int bit; switch (c->x86) { diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7416fc206b4a..5c0ea39311fe 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -27,6 +27,7 @@ #include <asm/pgtable.h> #include <asm/set_memory.h> #include <asm/intel-family.h> +#include <asm/hypervisor.h> static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); @@ -154,7 +155,8 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; /* SSBD controlled in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) hostval |= ssbd_tif_to_spec_ctrl(ti->flags); if (hostval != guestval) { @@ -529,18 +531,16 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) if (mode == SPEC_STORE_BYPASS_DISABLE) { setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE); /* - * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses - * a completely different MSR and bit dependent on family. + * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may + * use a completely different MSR and bit dependent on family. */ - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) && + !static_cpu_has(X86_FEATURE_AMD_SSBD)) { + x86_amd_ssb_disable(); + } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); - break; - case X86_VENDOR_AMD: - x86_amd_ssb_disable(); - break; } } @@ -667,6 +667,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr if (boot_cpu_has(X86_FEATURE_PTI)) return sprintf(buf, "Mitigation: PTI\n"); + if (hypervisor_is_type(X86_HYPER_XEN_PV)) + return sprintf(buf, "Unknown (XEN PV detected, hypervisor mitigation required)\n"); + break; case X86_BUG_SPECTRE_V1: diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 38354c66df81..0c5fcbd998cf 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -671,7 +671,7 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id) num_sharing_cache = ((eax >> 14) & 0xfff) + 1; if (num_sharing_cache) { - int bits = get_count_order(num_sharing_cache) - 1; + int bits = get_count_order(num_sharing_cache); per_cpu(cpu_llc_id, cpu) = c->apicid >> bits; } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 95c8e507580d..eb4cb3efd20e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1,3 +1,6 @@ +/* cpu_feature_enabled() cannot be used this early */ +#define USE_EARLY_PGTABLE_L5 + #include <linux/bootmem.h> #include <linux/linkage.h> #include <linux/bitops.h> @@ -803,6 +806,12 @@ static void init_speculation_control(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_STIBP); set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); } + + if (cpu_has(c, X86_FEATURE_AMD_SSBD)) { + set_cpu_cap(c, X86_FEATURE_SSBD); + set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); + clear_cpu_cap(c, X86_FEATURE_VIRT_SSBD); + } } void get_cpu_cap(struct cpuinfo_x86 *c) @@ -992,7 +1001,8 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); if (!x86_match_cpu(cpu_no_spec_store_bypass) && - !(ia32_cap & ARCH_CAP_SSB_NO)) + !(ia32_cap & ARCH_CAP_SSB_NO) && + !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); if (x86_match_cpu(cpu_no_meltdown)) @@ -1592,7 +1602,7 @@ DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = (unsigned long)&init_thread_union + THREAD_SIZE; EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack); -#ifdef CONFIG_CC_STACKPROTECTOR +#ifdef CONFIG_STACKPROTECTOR DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); #endif diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 24bfa63e86cf..ec4754f81cbd 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -845,6 +845,8 @@ static __init void rdt_quirks(void) case INTEL_FAM6_SKYLAKE_X: if (boot_cpu_data.x86_stepping <= 4) set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat"); + else + set_rdt_options("!l3cat"); } } diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 5bbd06f38ff6..f34d89c01edc 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -160,6 +160,11 @@ static struct severity { SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), USER ), + MCESEV( + PANIC, "Data load in unrecoverable area of kernel", + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), + KERNEL + ), #endif MCESEV( PANIC, "Action required: unknown MCACOD", diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index cd76380af79f..c102ad51025e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -772,23 +772,25 @@ EXPORT_SYMBOL_GPL(machine_check_poll); static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, struct pt_regs *regs) { - int i, ret = 0; char *tmp; + int i; for (i = 0; i < mca_cfg.banks; i++) { m->status = mce_rdmsrl(msr_ops.status(i)); - if (m->status & MCI_STATUS_VAL) { - __set_bit(i, validp); - if (quirk_no_way_out) - quirk_no_way_out(i, m, regs); - } + if (!(m->status & MCI_STATUS_VAL)) + continue; + + __set_bit(i, validp); + if (quirk_no_way_out) + quirk_no_way_out(i, m, regs); if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { + mce_read_aux(m, i); *msg = tmp; - ret = 1; + return 1; } } - return ret; + return 0; } /* @@ -1205,13 +1207,18 @@ void do_machine_check(struct pt_regs *regs, long error_code) lmce = m.mcgstatus & MCG_STATUS_LMCES; /* + * Local machine check may already know that we have to panic. + * Broadcast machine check begins rendezvous in mce_start() * Go through all banks in exclusion of the other CPUs. This way we * don't report duplicated events on shared banks because the first one - * to see it will clear it. If this is a Local MCE, then no need to - * perform rendezvous. + * to see it will clear it. */ - if (!lmce) + if (lmce) { + if (no_way_out) + mce_panic("Fatal local machine check", &m, msg); + } else { order = mce_start(&no_way_out); + } for (i = 0; i < cfg->banks; i++) { __clear_bit(i, toclear); @@ -1287,12 +1294,17 @@ void do_machine_check(struct pt_regs *regs, long error_code) no_way_out = worst >= MCE_PANIC_SEVERITY; } else { /* - * Local MCE skipped calling mce_reign() - * If we found a fatal error, we need to panic here. + * If there was a fatal machine check we should have + * already called mce_panic earlier in this function. + * Since we re-read the banks, we might have found + * something new. Check again to see if we found a + * fatal error. We call "mce_severity()" again to + * make sure we have the right "msg". */ - if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) - mce_panic("Machine check from unknown source", - NULL, NULL); + if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) { + mce_severity(&m, cfg->tolerant, &msg, true); + mce_panic("Local fatal machine check!", &m, msg); + } } /* @@ -1457,7 +1469,7 @@ static int __mcheck_cpu_mce_banks_init(void) int i; u8 num_banks = mca_cfg.banks; - mce_banks = kzalloc(num_banks * sizeof(struct mce_bank), GFP_KERNEL); + mce_banks = kcalloc(num_banks, sizeof(struct mce_bank), GFP_KERNEL); if (!mce_banks) return -ENOMEM; diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index f591b01930db..dd33c357548f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -1384,7 +1384,7 @@ int mce_threshold_create_device(unsigned int cpu) if (bp) return 0; - bp = kzalloc(sizeof(struct threshold_bank *) * mca_cfg.banks, + bp = kcalloc(mca_cfg.banks, sizeof(struct threshold_bank *), GFP_KERNEL); if (!bp) return -ENOMEM; diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 77e201301528..08286269fd24 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -70,7 +70,7 @@ static DEFINE_MUTEX(microcode_mutex); /* * Serialize late loading so that CPUs get updated one-by-one. */ -static DEFINE_SPINLOCK(update_lock); +static DEFINE_RAW_SPINLOCK(update_lock); struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; @@ -560,9 +560,9 @@ static int __reload_late(void *info) if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC)) return -1; - spin_lock(&update_lock); + raw_spin_lock(&update_lock); apply_microcode_local(&err); - spin_unlock(&update_lock); + raw_spin_unlock(&update_lock); /* siblings return UCODE_OK because their engine got updated already */ if (err > UCODE_NFOUND) { diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 1c2cfa0644aa..97ccf4c3b45b 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -190,8 +190,11 @@ static void save_microcode_patch(void *data, unsigned int size) p = memdup_patch(data, size); if (!p) pr_err("Error allocating buffer %p\n", data); - else + else { list_replace(&iter->plist, &p->plist); + kfree(iter->data); + kfree(iter); + } } } diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 558444b23923..40eee6cc4124 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -43,7 +43,7 @@ mtrr_file_add(unsigned long base, unsigned long size, max = num_var_ranges; if (fcount == NULL) { - fcount = kzalloc(max * sizeof *fcount, GFP_KERNEL); + fcount = kcalloc(max, sizeof(*fcount), GFP_KERNEL); if (!fcount) return -ENOMEM; FILE_FCOUNT(file) = fcount; @@ -106,17 +106,10 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) memset(line, 0, LINE_SIZE); - length = len; - length--; - - if (length > LINE_SIZE - 1) - length = LINE_SIZE - 1; - + len = min_t(size_t, len, LINE_SIZE - 1); + length = strncpy_from_user(line, buf, len); if (length < 0) - return -EINVAL; - - if (copy_from_user(line, buf, length)) - return -EFAULT; + return length; linelen = strlen(line); ptr = line + linelen - 1; @@ -149,17 +142,16 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) return -EINVAL; ptr = skip_spaces(ptr + 5); - for (i = 0; i < MTRR_NUM_TYPES; ++i) { - if (strcmp(ptr, mtrr_strings[i])) - continue; - base >>= PAGE_SHIFT; - size >>= PAGE_SHIFT; - err = mtrr_add_page((unsigned long)base, (unsigned long)size, i, true); - if (err < 0) - return err; - return len; - } - return -EINVAL; + i = match_string(mtrr_strings, MTRR_NUM_TYPES, ptr); + if (i < 0) + return i; + + base >>= PAGE_SHIFT; + size >>= PAGE_SHIFT; + err = mtrr_add_page((unsigned long)base, (unsigned long)size, i, true); + if (err < 0) + return err; + return len; } static long diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index d1f25c831447..c88c23c658c1 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1248,6 +1248,7 @@ void __init e820__memblock_setup(void) { int i; u64 end; + u64 addr = 0; /* * The bootstrap memblock region count maximum is 128 entries @@ -1264,13 +1265,21 @@ void __init e820__memblock_setup(void) struct e820_entry *entry = &e820_table->entries[i]; end = entry->addr + entry->size; + if (addr < entry->addr) + memblock_reserve(addr, entry->addr - addr); + addr = end; if (end != (resource_size_t)end) continue; + /* + * all !E820_TYPE_RAM ranges (including gap ranges) are put + * into memblock.reserved to make sure that struct pages in + * such regions are not left uninitialized after bootup. + */ if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN) - continue; - - memblock_add(entry->addr, entry->size); + memblock_reserve(entry->addr, entry->size); + else + memblock_add(entry->addr, entry->size); } /* Throw away partial pages: */ diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index a21d6ace648e..8047379e575a 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -44,7 +44,7 @@ static unsigned int __initdata next_early_pgt; pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); #ifdef CONFIG_X86_5LEVEL -unsigned int __pgtable_l5_enabled __initdata; +unsigned int __pgtable_l5_enabled __ro_after_init; unsigned int pgdir_shift __ro_after_init = 39; EXPORT_SYMBOL(pgdir_shift); unsigned int ptrs_per_p4d __ro_after_init = 1; diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index b59e4fb40fd9..abe6df15a8fb 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -375,7 +375,7 @@ ENDPROC(startup_32_smp) */ __INIT setup_once: -#ifdef CONFIG_CC_STACKPROTECTOR +#ifdef CONFIG_STACKPROTECTOR /* * Configure the stack canary. The linker can't handle this by * relocation. Manually set base address in stack canary diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index b6be34ee88e9..346b24883911 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -610,7 +610,7 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) if (!hpet_domain) return; - hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL); + hpet_devs = kcalloc(num_timers, sizeof(struct hpet_dev), GFP_KERNEL); if (!hpet_devs) return; @@ -966,8 +966,8 @@ int __init hpet_enable(void) #endif cfg = hpet_readl(HPET_CFG); - hpet_boot_cfg = kmalloc((last + 2) * sizeof(*hpet_boot_cfg), - GFP_KERNEL); + hpet_boot_cfg = kmalloc_array(last + 2, sizeof(*hpet_boot_cfg), + GFP_KERNEL); if (hpet_boot_cfg) *hpet_boot_cfg = cfg; else diff --git a/arch/x86/kernel/i8237.c b/arch/x86/kernel/i8237.c index 8eeaa81de066..0a3e70fd00d6 100644 --- a/arch/x86/kernel/i8237.c +++ b/arch/x86/kernel/i8237.c @@ -9,10 +9,12 @@ * your option) any later version. */ +#include <linux/dmi.h> #include <linux/init.h> #include <linux/syscore_ops.h> #include <asm/dma.h> +#include <asm/x86_init.h> /* * This module just handles suspend/resume issues with the @@ -49,6 +51,29 @@ static struct syscore_ops i8237_syscore_ops = { static int __init i8237A_init_ops(void) { + /* + * From SKL PCH onwards, the legacy DMA device is removed in which the + * I/O ports (81h-83h, 87h, 89h-8Bh, 8Fh) related to it are removed + * as well. All removed ports must return 0xff for a inb() request. + * + * Note: DMA_PAGE_2 (port 0x81) should not be checked for detecting + * the presence of DMA device since it may be used by BIOS to decode + * LPC traffic for POST codes. Original LPC only decodes one byte of + * port 0x80 but some BIOS may choose to enhance PCH LPC port 0x8x + * decoding. + */ + if (dma_inb(DMA_PAGE_0) == 0xFF) + return -ENODEV; + + /* + * It is not required to load this driver as newer SoC may not + * support 8237 DMA or bus mastering from LPC. Platform firmware + * must announce the support for such legacy devices via + * ACPI_FADT_LEGACY_DEVICES field in FADT table. + */ + if (x86_pnpbios_disabled() && dmi_get_bios_year() >= 2017) + return -ENODEV; + register_syscore_ops(&i8237_syscore_ops); return 0; } diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 2c3a1b4294eb..74383a3780dc 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -317,15 +317,12 @@ void __init idt_setup_apic_and_irq_gates(void) set_intr_gate(i, entry); } - for_each_clear_bit_from(i, system_vectors, NR_VECTORS) { #ifdef CONFIG_X86_LOCAL_APIC + for_each_clear_bit_from(i, system_vectors, NR_VECTORS) { set_bit(i, system_vectors); set_intr_gate(i, spurious_interrupt); -#else - entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR); - set_intr_gate(i, entry); -#endif } +#endif } /** diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S new file mode 100644 index 000000000000..ddeeaac8adda --- /dev/null +++ b/arch/x86/kernel/irqflags.S @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <asm/asm.h> +#include <asm/export.h> +#include <linux/linkage.h> + +/* + * unsigned long native_save_fl(void) + */ +ENTRY(native_save_fl) + pushf + pop %_ASM_AX + ret +ENDPROC(native_save_fl) +EXPORT_SYMBOL(native_save_fl) + +/* + * void native_restore_fl(unsigned long flags) + * %eax/%rdi: flags + */ +ENTRY(native_restore_fl) + push %_ASM_ARG1 + popf + ret +ENDPROC(native_restore_fl) +EXPORT_SYMBOL(native_restore_fl) diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c index 8c1cc08f514f..163ae706a0d4 100644 --- a/arch/x86/kernel/ksysfs.c +++ b/arch/x86/kernel/ksysfs.c @@ -283,7 +283,7 @@ static int __init create_setup_data_nodes(struct kobject *parent) if (ret) goto out_setup_data_kobj; - kobjp = kmalloc(sizeof(*kobjp) * nr, GFP_KERNEL); + kobjp = kmalloc_array(nr, sizeof(*kobjp), GFP_KERNEL); if (!kobjp) { ret = -ENOMEM; goto out_setup_data_kobj; diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c index 235fe6008ac8..b348a672f71d 100644 --- a/arch/x86/kernel/platform-quirks.c +++ b/arch/x86/kernel/platform-quirks.c @@ -33,9 +33,14 @@ void __init x86_early_init_platform_quirks(void) x86_platform.set_legacy_features(); } +bool __init x86_pnpbios_disabled(void) +{ + return x86_platform.legacy.devices.pnpbios == 0; +} + #if defined(CONFIG_PNPBIOS) bool __init arch_pnpbios_disabled(void) { - return x86_platform.legacy.devices.pnpbios == 0; + return x86_pnpbios_disabled(); } #endif diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 697a4ce04308..736348ead421 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -645,12 +645,19 @@ static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev) /* Skylake */ static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev) { - u32 capid0; + u32 capid0, capid5; pci_read_config_dword(pdev, 0x84, &capid0); + pci_read_config_dword(pdev, 0x98, &capid5); - if ((capid0 & 0xc0) == 0xc0) + /* + * CAPID0{7:6} indicate whether this is an advanced RAS SKU + * CAPID5{8:5} indicate that various NVDIMM usage modes are + * enabled, so memory machine check recovery is also enabled. + */ + if ((capid0 & 0xc0) == 0xc0 || (capid5 & 0x1e0)) static_branch_inc(&mcsafe_key); + } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ras_cap); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index da270b95fe4d..92a3b312a53c 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -688,6 +688,12 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) sigset_t *set = sigmask_to_save(); compat_sigset_t *cset = (compat_sigset_t *) set; + /* + * Increment event counter and perform fixup for the pre-signal + * frame. + */ + rseq_signal_deliver(ksig, regs); + /* Set up the stack frame */ if (is_ia32_frame(ksig)) { if (ksig->ka.sa.sa_flags & SA_SIGINFO) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c2f7d1d2a5c3..db9656e13ea0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -221,6 +221,11 @@ static void notrace start_secondary(void *unused) #ifdef CONFIG_X86_32 /* switch away from the initial page table */ load_cr3(swapper_pg_dir); + /* + * Initialize the CR4 shadow before doing anything that could + * try to read it. + */ + cr4_init_shadow(); __flush_tlb_all(); #endif load_current_idt(); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a535dd64de63..e6db475164ed 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -835,16 +835,18 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" : "simd exception"; - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP) - return; cond_local_irq_enable(regs); if (!user_mode(regs)) { - if (!fixup_exception(regs, trapnr)) { - task->thread.error_code = error_code; - task->thread.trap_nr = trapnr; + if (fixup_exception(regs, trapnr)) + return; + + task->thread.error_code = error_code; + task->thread.trap_nr = trapnr; + + if (notify_die(DIE_TRAP, str, regs, error_code, + trapnr, SIGFPE) != NOTIFY_STOP) die(str, regs, error_code); - } return; } diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 58d8d800875d..deb576b23b7c 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -293,7 +293,7 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool insn_init(insn, auprobe->insn, sizeof(auprobe->insn), x86_64); /* has the side-effect of processing the entire instruction */ insn_get_length(insn); - if (WARN_ON_ONCE(!insn_complete(insn))) + if (!insn_complete(insn)) return -ENOEXEC; if (is_prefix_bad(insn)) |