aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/Makefile6
-rw-r--r--arch/x86/boot/Makefile2
-rw-r--r--arch/x86/boot/compressed/Makefile2
-rw-r--r--arch/x86/boot/compressed/acpi.c338
-rw-r--r--arch/x86/boot/compressed/cmdline.c4
-rw-r--r--arch/x86/boot/compressed/head_64.S11
-rw-r--r--arch/x86/boot/compressed/kaslr.c75
-rw-r--r--arch/x86/boot/compressed/misc.c3
-rw-r--r--arch/x86/boot/compressed/misc.h23
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c19
-rw-r--r--arch/x86/boot/compressed/vmlinux.lds.S2
-rw-r--r--arch/x86/boot/setup.ld2
-rw-r--r--arch/x86/boot/string.c141
-rw-r--r--arch/x86/boot/string.h1
-rw-r--r--arch/x86/configs/i386_defconfig3
-rw-r--r--arch/x86/configs/x86_64_defconfig4
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl3
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl3
-rw-r--r--arch/x86/events/core.c13
-rw-r--r--arch/x86/events/intel/core.c156
-rw-r--r--arch/x86/events/intel/lbr.c1
-rw-r--r--arch/x86/events/intel/uncore.c1
-rw-r--r--arch/x86/events/intel/uncore.h12
-rw-r--r--arch/x86/events/intel/uncore_snb.c4
-rw-r--r--arch/x86/events/perf_event.h17
-rw-r--r--arch/x86/hyperv/hv_init.c8
-rw-r--r--arch/x86/include/asm/asm-prototypes.h1
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/fpu/internal.h57
-rw-r--r--arch/x86/include/asm/fpu/types.h7
-rw-r--r--arch/x86/include/asm/mce.h7
-rw-r--r--arch/x86/include/asm/msr-index.h6
-rw-r--r--arch/x86/include/asm/page_64_types.h4
-rw-r--r--arch/x86/include/asm/pgtable_64.h3
-rw-r--r--arch/x86/include/asm/processor.h1
-rw-r--r--arch/x86/include/asm/uaccess.h3
-rw-r--r--arch/x86/include/asm/unwind.h6
-rw-r--r--arch/x86/include/asm/uv/bios.h5
-rw-r--r--arch/x86/include/asm/xen/hypercall.h13
-rw-r--r--arch/x86/kernel/acpi/boot.c3
-rw-r--r--arch/x86/kernel/acpi/wakeup_32.S2
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S12
-rw-r--r--arch/x86/kernel/apic/io_apic.c7
-rw-r--r--arch/x86/kernel/cpu/cacheinfo.c1
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c62
-rw-r--r--arch/x86/kernel/cpu/mce/apei.c10
-rw-r--r--arch/x86/kernel/cpu/mce/core.c30
-rw-r--r--arch/x86/kernel/cpu/mce/severity.c5
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c12
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c3
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h16
-rw-r--r--arch/x86/kernel/cpu/resctrl/pseudo_lock.c7
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c185
-rw-r--r--arch/x86/kernel/e820.c19
-rw-r--r--arch/x86/kernel/fpu/xstate.c2
-rw-r--r--arch/x86/kernel/ftrace.c42
-rw-r--r--arch/x86/kernel/hw_breakpoint.c5
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c18
-rw-r--r--arch/x86/kernel/kgdb.c1
-rw-r--r--arch/x86/kernel/machine_kexec_64.c3
-rw-r--r--arch/x86/kernel/setup_percpu.c10
-rw-r--r--arch/x86/kernel/smpboot.c7
-rw-r--r--arch/x86/kernel/traps.c5
-rw-r--r--arch/x86/kernel/unwind_frame.c25
-rw-r--r--arch/x86/kernel/unwind_orc.c17
-rw-r--r--arch/x86/kernel/uprobes.c1
-rw-r--r--arch/x86/kernel/vmlinux.lds.S4
-rw-r--r--arch/x86/lib/insn-eval.c2
-rw-r--r--arch/x86/mm/cpu_entry_area.c2
-rw-r--r--arch/x86/mm/dump_pagetables.c2
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/kasan_init_64.c14
-rw-r--r--arch/x86/mm/numa.c12
-rw-r--r--arch/x86/mm/pageattr.c4
-rw-r--r--arch/x86/mm/tlb.c3
-rw-r--r--arch/x86/pci/fixup.c16
-rw-r--r--arch/x86/platform/olpc/olpc_dt.c3
-rw-r--r--arch/x86/platform/uv/bios_uv.c16
-rw-r--r--arch/x86/platform/uv/tlb_uv.c8
-rw-r--r--arch/x86/realmode/rm/Makefile5
-rw-r--r--arch/x86/realmode/rm/realmode.lds.S2
-rw-r--r--arch/x86/xen/mmu_pv.c13
-rw-r--r--arch/x86/xen/p2m.c11
-rw-r--r--arch/x86/xen/setup.c13
85 files changed, 1210 insertions, 401 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 90b562a34d65..c1f9b3cf437c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -14,7 +14,6 @@ config X86_32
select ARCH_WANT_IPC_PARSE_VERSION
select CLKSRC_I8253
select CLONE_BACKWARDS
- select HAVE_GENERIC_DMA_COHERENT
select MODULES_USE_ELF_REL
select OLD_SIGACTION
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 9c5a67d1b9c1..2d8b9d8ca4f8 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -187,7 +187,6 @@ cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,
cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1)
# does binutils support specific instructions?
-asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1)
avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
@@ -217,6 +216,11 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
# Avoid indirect branches in kernel to deal with Spectre
ifdef CONFIG_RETPOLINE
KBUILD_CFLAGS += $(RETPOLINE_CFLAGS)
+ # Additionally, avoid generating expensive indirect jumps which
+ # are subject to retpolines for small number of switch cases.
+ # clang turns off jump table generation by default when under
+ # retpoline builds, however, gcc does not for x86.
+ KBUILD_CFLAGS += $(call cc-option,--param=case-values-threshold=20)
endif
archscripts: scripts_basic
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 9b5adae9cc40..e2839b5c246c 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -100,7 +100,7 @@ $(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE
AFLAGS_header.o += -I$(objtree)/$(obj)
$(obj)/header.o: $(obj)/zoffset.h
-LDFLAGS_setup.elf := -T
+LDFLAGS_setup.elf := -m elf_i386 -T
$(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE
$(call if_changed,ld)
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index f0515ac895a4..6b84afdd7538 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -84,6 +84,8 @@ ifdef CONFIG_X86_64
vmlinux-objs-y += $(obj)/pgtable_64.o
endif
+vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
+
$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \
diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
new file mode 100644
index 000000000000..0ef4ad55b29b
--- /dev/null
+++ b/arch/x86/boot/compressed/acpi.c
@@ -0,0 +1,338 @@
+// SPDX-License-Identifier: GPL-2.0
+#define BOOT_CTYPE_H
+#include "misc.h"
+#include "error.h"
+#include "../string.h"
+
+#include <linux/numa.h>
+#include <linux/efi.h>
+#include <asm/efi.h>
+
+/*
+ * Longest parameter of 'acpi=' is 'copy_dsdt', plus an extra '\0'
+ * for termination.
+ */
+#define MAX_ACPI_ARG_LENGTH 10
+
+/*
+ * Immovable memory regions representation. Max amount of memory regions is
+ * MAX_NUMNODES*2.
+ */
+struct mem_vector immovable_mem[MAX_NUMNODES*2];
+
+/*
+ * Max length of 64-bit hex address string is 19, prefix "0x" + 16 hex
+ * digits, and '\0' for termination.
+ */
+#define MAX_ADDR_LEN 19
+
+static acpi_physical_address get_acpi_rsdp(void)
+{
+ acpi_physical_address addr = 0;
+
+#ifdef CONFIG_KEXEC
+ char val[MAX_ADDR_LEN] = { };
+ int ret;
+
+ ret = cmdline_find_option("acpi_rsdp", val, MAX_ADDR_LEN);
+ if (ret < 0)
+ return 0;
+
+ if (kstrtoull(val, 16, &addr))
+ return 0;
+#endif
+ return addr;
+}
+
+/* Search EFI system tables for RSDP. */
+static acpi_physical_address efi_get_rsdp_addr(void)
+{
+ acpi_physical_address rsdp_addr = 0;
+
+#ifdef CONFIG_EFI
+ unsigned long systab, systab_tables, config_tables;
+ unsigned int nr_tables;
+ struct efi_info *ei;
+ bool efi_64;
+ int size, i;
+ char *sig;
+
+ ei = &boot_params->efi_info;
+ sig = (char *)&ei->efi_loader_signature;
+
+ if (!strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) {
+ efi_64 = true;
+ } else if (!strncmp(sig, EFI32_LOADER_SIGNATURE, 4)) {
+ efi_64 = false;
+ } else {
+ debug_putstr("Wrong EFI loader signature.\n");
+ return 0;
+ }
+
+ /* Get systab from boot params. */
+#ifdef CONFIG_X86_64
+ systab = ei->efi_systab | ((__u64)ei->efi_systab_hi << 32);
+#else
+ if (ei->efi_systab_hi || ei->efi_memmap_hi) {
+ debug_putstr("Error getting RSDP address: EFI system table located above 4GB.\n");
+ return 0;
+ }
+ systab = ei->efi_systab;
+#endif
+ if (!systab)
+ error("EFI system table not found.");
+
+ /* Handle EFI bitness properly */
+ if (efi_64) {
+ efi_system_table_64_t *stbl = (efi_system_table_64_t *)systab;
+
+ config_tables = stbl->tables;
+ nr_tables = stbl->nr_tables;
+ size = sizeof(efi_config_table_64_t);
+ } else {
+ efi_system_table_32_t *stbl = (efi_system_table_32_t *)systab;
+
+ config_tables = stbl->tables;
+ nr_tables = stbl->nr_tables;
+ size = sizeof(efi_config_table_32_t);
+ }
+
+ if (!config_tables)
+ error("EFI config tables not found.");
+
+ /* Get EFI tables from systab. */
+ for (i = 0; i < nr_tables; i++) {
+ acpi_physical_address table;
+ efi_guid_t guid;
+
+ config_tables += size;
+
+ if (efi_64) {
+ efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables;
+
+ guid = tbl->guid;
+ table = tbl->table;
+
+ if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) {
+ debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n");
+ return 0;
+ }
+ } else {
+ efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables;
+
+ guid = tbl->guid;
+ table = tbl->table;
+ }
+
+ if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)))
+ rsdp_addr = table;
+ else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID)))
+ return table;
+ }
+#endif
+ return rsdp_addr;
+}
+
+static u8 compute_checksum(u8 *buffer, u32 length)
+{
+ u8 *end = buffer + length;
+ u8 sum = 0;
+
+ while (buffer < end)
+ sum += *(buffer++);
+
+ return sum;
+}
+
+/* Search a block of memory for the RSDP signature. */
+static u8 *scan_mem_for_rsdp(u8 *start, u32 length)
+{
+ struct acpi_table_rsdp *rsdp;
+ u8 *address, *end;
+
+ end = start + length;
+
+ /* Search from given start address for the requested length */
+ for (address = start; address < end; address += ACPI_RSDP_SCAN_STEP) {
+ /*
+ * Both RSDP signature and checksum must be correct.
+ * Note: Sometimes there exists more than one RSDP in memory;
+ * the valid RSDP has a valid checksum, all others have an
+ * invalid checksum.
+ */
+ rsdp = (struct acpi_table_rsdp *)address;
+
+ /* BAD Signature */
+ if (!ACPI_VALIDATE_RSDP_SIG(rsdp->signature))
+ continue;
+
+ /* Check the standard checksum */
+ if (compute_checksum((u8 *)rsdp, ACPI_RSDP_CHECKSUM_LENGTH))
+ continue;
+
+ /* Check extended checksum if table version >= 2 */
+ if ((rsdp->revision >= 2) &&
+ (compute_checksum((u8 *)rsdp, ACPI_RSDP_XCHECKSUM_LENGTH)))
+ continue;
+
+ /* Signature and checksum valid, we have found a real RSDP */
+ return address;
+ }
+ return NULL;
+}
+
+/* Search RSDP address in EBDA. */
+static acpi_physical_address bios_get_rsdp_addr(void)
+{
+ unsigned long address;
+ u8 *rsdp;
+
+ /* Get the location of the Extended BIOS Data Area (EBDA) */
+ address = *(u16 *)ACPI_EBDA_PTR_LOCATION;
+ address <<= 4;
+
+ /*
+ * Search EBDA paragraphs (EBDA is required to be a minimum of
+ * 1K length)
+ */
+ if (address > 0x400) {
+ rsdp = scan_mem_for_rsdp((u8 *)address, ACPI_EBDA_WINDOW_SIZE);
+ if (rsdp)
+ return (acpi_physical_address)(unsigned long)rsdp;
+ }
+
+ /* Search upper memory: 16-byte boundaries in E0000h-FFFFFh */
+ rsdp = scan_mem_for_rsdp((u8 *) ACPI_HI_RSDP_WINDOW_BASE,
+ ACPI_HI_RSDP_WINDOW_SIZE);
+ if (rsdp)
+ return (acpi_physical_address)(unsigned long)rsdp;
+
+ return 0;
+}
+
+/* Return RSDP address on success, otherwise 0. */
+acpi_physical_address get_rsdp_addr(void)
+{
+ acpi_physical_address pa;
+
+ pa = get_acpi_rsdp();
+
+ if (!pa)
+ pa = boot_params->acpi_rsdp_addr;
+
+ if (!pa)
+ pa = efi_get_rsdp_addr();
+
+ if (!pa)
+ pa = bios_get_rsdp_addr();
+
+ return pa;
+}
+
+#if defined(CONFIG_RANDOMIZE_BASE) && defined(CONFIG_MEMORY_HOTREMOVE)
+/* Compute SRAT address from RSDP. */
+static unsigned long get_acpi_srat_table(void)
+{
+ unsigned long root_table, acpi_table;
+ struct acpi_table_header *header;
+ struct acpi_table_rsdp *rsdp;
+ u32 num_entries, size, len;
+ char arg[10];
+ u8 *entry;
+
+ rsdp = (struct acpi_table_rsdp *)(long)boot_params->acpi_rsdp_addr;
+ if (!rsdp)
+ return 0;
+
+ /* Get ACPI root table from RSDP.*/
+ if (!(cmdline_find_option("acpi", arg, sizeof(arg)) == 4 &&
+ !strncmp(arg, "rsdt", 4)) &&
+ rsdp->xsdt_physical_address &&
+ rsdp->revision > 1) {
+ root_table = rsdp->xsdt_physical_address;
+ size = ACPI_XSDT_ENTRY_SIZE;
+ } else {
+ root_table = rsdp->rsdt_physical_address;
+ size = ACPI_RSDT_ENTRY_SIZE;
+ }
+
+ if (!root_table)
+ return 0;
+
+ header = (struct acpi_table_header *)root_table;
+ len = header->length;
+ if (len < sizeof(struct acpi_table_header) + size)
+ return 0;
+
+ num_entries = (len - sizeof(struct acpi_table_header)) / size;
+ entry = (u8 *)(root_table + sizeof(struct acpi_table_header));
+
+ while (num_entries--) {
+ if (size == ACPI_RSDT_ENTRY_SIZE)
+ acpi_table = *(u32 *)entry;
+ else
+ acpi_table = *(u64 *)entry;
+
+ if (acpi_table) {
+ header = (struct acpi_table_header *)acpi_table;
+
+ if (ACPI_COMPARE_NAME(header->signature, ACPI_SIG_SRAT))
+ return acpi_table;
+ }
+ entry += size;
+ }
+ return 0;
+}
+
+/**
+ * count_immovable_mem_regions - Parse SRAT and cache the immovable
+ * memory regions into the immovable_mem array.
+ *
+ * Return the number of immovable memory regions on success, 0 on failure:
+ *
+ * - Too many immovable memory regions
+ * - ACPI off or no SRAT found
+ * - No immovable memory region found.
+ */
+int count_immovable_mem_regions(void)
+{
+ unsigned long table_addr, table_end, table;
+ struct acpi_subtable_header *sub_table;
+ struct acpi_table_header *table_header;
+ char arg[MAX_ACPI_ARG_LENGTH];
+ int num = 0;
+
+ if (cmdline_find_option("acpi", arg, sizeof(arg)) == 3 &&
+ !strncmp(arg, "off", 3))
+ return 0;
+
+ table_addr = get_acpi_srat_table();
+ if (!table_addr)
+ return 0;
+
+ table_header = (struct acpi_table_header *)table_addr;
+ table_end = table_addr + table_header->length;
+ table = table_addr + sizeof(struct acpi_table_srat);
+
+ while (table + sizeof(struct acpi_subtable_header) < table_end) {
+ sub_table = (struct acpi_subtable_header *)table;
+ if (sub_table->type == ACPI_SRAT_TYPE_MEMORY_AFFINITY) {
+ struct acpi_srat_mem_affinity *ma;
+
+ ma = (struct acpi_srat_mem_affinity *)sub_table;
+ if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && ma->length) {
+ immovable_mem[num].start = ma->base_address;
+ immovable_mem[num].size = ma->length;
+ num++;
+ }
+
+ if (num >= MAX_NUMNODES*2) {
+ debug_putstr("Too many immovable memory regions, aborting.\n");
+ return 0;
+ }
+ }
+ table += sub_table->length;
+ }
+ return num;
+}
+#endif /* CONFIG_RANDOMIZE_BASE && CONFIG_MEMORY_HOTREMOVE */
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c
index af6cda0b7900..f1add5d85da9 100644
--- a/arch/x86/boot/compressed/cmdline.c
+++ b/arch/x86/boot/compressed/cmdline.c
@@ -1,8 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include "misc.h"
-#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE || CONFIG_X86_5LEVEL
-
static unsigned long fs;
static inline void set_fs(unsigned long seg)
{
@@ -30,5 +28,3 @@ int cmdline_find_option_bool(const char *option)
{
return __cmdline_find_option_bool(get_cmd_line_ptr(), option);
}
-
-#endif
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index f62e347862cc..fafb75c6c592 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -358,8 +358,11 @@ ENTRY(startup_64)
* paging_prepare() sets up the trampoline and checks if we need to
* enable 5-level paging.
*
- * Address of the trampoline is returned in RAX.
- * Non zero RDX on return means we need to enable 5-level paging.
+ * paging_prepare() returns a two-quadword structure which lands
+ * into RDX:RAX:
+ * - Address of the trampoline is returned in RAX.
+ * - Non zero RDX means trampoline needs to enable 5-level
+ * paging.
*
* RSI holds real mode data and needs to be preserved across
* this function call.
@@ -565,7 +568,7 @@ adjust_got:
*
* RDI contains the return address (might be above 4G).
* ECX contains the base address of the trampoline memory.
- * Non zero RDX on return means we need to enable 5-level paging.
+ * Non zero RDX means trampoline needs to enable 5-level paging.
*/
ENTRY(trampoline_32bit_src)
/* Set up data and stack segments */
@@ -655,8 +658,6 @@ no_longmode:
.data
gdt64:
.word gdt_end - gdt
- .long 0
- .word 0
.quad 0
gdt:
.word gdt_end - gdt
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 9ed9709d9947..2e53c056ba20 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -87,10 +87,6 @@ static unsigned long get_boot_seed(void)
#define KASLR_COMPRESSED_BOOT
#include "../../lib/kaslr.c"
-struct mem_vector {
- unsigned long long start;
- unsigned long long size;
-};
/* Only supporting at most 4 unusable memmap regions with kaslr */
#define MAX_MEMMAP_REGIONS 4
@@ -101,6 +97,8 @@ static bool memmap_too_large;
/* Store memory limit specified by "mem=nn[KMG]" or "memmap=nn[KMG]" */
static unsigned long long mem_limit = ULLONG_MAX;
+/* Number of immovable memory regions */
+static int num_immovable_mem;
enum mem_avoid_index {
MEM_AVOID_ZO_RANGE = 0,
@@ -417,6 +415,9 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
/* Mark the memmap regions we need to avoid */
handle_mem_options();
+ /* Enumerate the immovable memory regions */
+ num_immovable_mem = count_immovable_mem_regions();
+
#ifdef CONFIG_X86_VERBOSE_BOOTUP
/* Make sure video RAM can be used. */
add_identity_map(0, PMD_SIZE);
@@ -572,9 +573,9 @@ static unsigned long slots_fetch_random(void)
return 0;
}
-static void process_mem_region(struct mem_vector *entry,
- unsigned long minimum,
- unsigned long image_size)
+static void __process_mem_region(struct mem_vector *entry,
+ unsigned long minimum,
+ unsigned long image_size)
{
struct mem_vector region, overlap;
unsigned long start_orig, end;
@@ -650,6 +651,56 @@ static void process_mem_region(struct mem_vector *entry,
}
}
+static bool process_mem_region(struct mem_vector *region,
+ unsigned long long minimum,
+ unsigned long long image_size)
+{
+ int i;
+ /*
+ * If no immovable memory found, or MEMORY_HOTREMOVE disabled,
+ * use @region directly.
+ */
+ if (!num_immovable_mem) {
+ __process_mem_region(region, minimum, image_size);
+
+ if (slot_area_index == MAX_SLOT_AREA) {
+ debug_putstr("Aborted e820/efi memmap scan (slot_areas full)!\n");
+ return 1;
+ }
+ return 0;
+ }
+
+#if defined(CONFIG_MEMORY_HOTREMOVE) && defined(CONFIG_ACPI)
+ /*
+ * If immovable memory found, filter the intersection between
+ * immovable memory and @region.
+ */
+ for (i = 0; i < num_immovable_mem; i++) {
+ unsigned long long start, end, entry_end, region_end;
+ struct mem_vector entry;
+
+ if (!mem_overlaps(region, &immovable_mem[i]))
+ continue;
+
+ start = immovable_mem[i].start;
+ end = start + immovable_mem[i].size;
+ region_end = region->start + region->size;
+
+ entry.start = clamp(region->start, start, end);
+ entry_end = clamp(region_end, start, end);
+ entry.size = entry_end - entry.start;
+
+ __process_mem_region(&entry, minimum, image_size);
+
+ if (slot_area_index == MAX_SLOT_AREA) {
+ debug_putstr("Aborted e820/efi memmap scan when walking immovable regions(slot_areas full)!\n");
+ return 1;
+ }
+ }
+#endif
+ return 0;
+}
+
#ifdef CONFIG_EFI
/*
* Returns true if mirror region found (and must have been processed
@@ -715,11 +766,8 @@ process_efi_entries(unsigned long minimum, unsigned long image_size)
region.start = md->phys_addr;
region.size = md->num_pages << EFI_PAGE_SHIFT;
- process_mem_region(&region, minimum, image_size);
- if (slot_area_index == MAX_SLOT_AREA) {
- debug_putstr("Aborted EFI scan (slot_areas full)!\n");
+ if (process_mem_region(&region, minimum, image_size))
break;
- }
}
return true;
}
@@ -746,11 +794,8 @@ static void process_e820_entries(unsigned long minimum,
continue;
region.start = entry->addr;
region.size = entry->size;
- process_mem_region(&region, minimum, image_size);
- if (slot_area_index == MAX_SLOT_AREA) {
- debug_putstr("Aborted e820 scan (slot_areas full)!\n");
+ if (process_mem_region(&region, minimum, image_size))
break;
- }
}
}
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 8dd1d5ccae58..c0d6c560df69 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -351,6 +351,9 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
/* Clear flags intended for solely in-kernel use. */
boot_params->hdr.loadflags &= ~KASLR_FLAG;
+ /* Save RSDP address for later use. */
+ boot_params->acpi_rsdp_addr = get_rsdp_addr();
+
sanitize_boot_params(boot_params);
if (boot_params->screen_info.orig_video_mode == 7) {
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index a1d5918765f3..fd13655e0f9b 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -25,6 +25,9 @@
#include <asm/bootparam.h>
#include <asm/bootparam_utils.h>
+#define BOOT_CTYPE_H
+#include <linux/acpi.h>
+
#define BOOT_BOOT_H
#include "../ctype.h"
@@ -63,12 +66,14 @@ static inline void debug_puthex(const char *s)
#endif
-#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE
/* cmdline.c */
int cmdline_find_option(const char *option, char *buffer, int bufsize);
int cmdline_find_option_bool(const char *option);
-#endif
+struct mem_vector {
+ unsigned long long start;
+ unsigned long long size;
+};
#if CONFIG_RANDOMIZE_BASE
/* kaslr.c */
@@ -116,3 +121,17 @@ static inline void console_init(void)
void set_sev_encryption_mask(void);
#endif
+
+/* acpi.c */
+#ifdef CONFIG_ACPI
+acpi_physical_address get_rsdp_addr(void);
+#else
+static inline acpi_physical_address get_rsdp_addr(void) { return 0; }
+#endif
+
+#if defined(CONFIG_RANDOMIZE_BASE) && defined(CONFIG_MEMORY_HOTREMOVE) && defined(CONFIG_ACPI)
+extern struct mem_vector immovable_mem[MAX_NUMNODES*2];
+int count_immovable_mem_regions(void);
+#else
+static inline int count_immovable_mem_regions(void) { return 0; }
+#endif
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index 9e2157371491..f8debf7aeb4c 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -1,5 +1,7 @@
+#include <linux/efi.h>
#include <asm/e820/types.h>
#include <asm/processor.h>
+#include <asm/efi.h>
#include "pgtable.h"
#include "../string.h"
@@ -37,9 +39,10 @@ int cmdline_find_option_bool(const char *option);
static unsigned long find_trampoline_placement(void)
{
- unsigned long bios_start, ebda_start;
+ unsigned long bios_start = 0, ebda_start = 0;
unsigned long trampoline_start;
struct boot_e820_entry *entry;
+ char *signature;
int i;
/*
@@ -47,8 +50,18 @@ static unsigned long find_trampoline_placement(void)
* This code is based on reserve_bios_regions().
*/
- ebda_start = *(unsigned short *)0x40e << 4;
- bios_start = *(unsigned short *)0x413 << 10;
+ /*
+ * EFI systems may not provide legacy ROM. The memory may not be mapped
+ * at all.
+ *
+ * Only look for values in the legacy ROM for non-EFI system.
+ */
+ signature = (char *)&boot_params->efi_info.efi_loader_signature;
+ if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) &&
+ strncmp(signature, EFI64_LOADER_SIGNATURE, 4)) {
+ ebda_start = *(unsigned short *)0x40e << 4;
+ bios_start = *(unsigned short *)0x413 << 10;
+ }
if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX)
bios_start = BIOS_START_MAX;
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
index f491bbde8493..508cfa6828c5 100644
--- a/arch/x86/boot/compressed/vmlinux.lds.S
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <asm-generic/vmlinux.lds.h>
-OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
+OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT)
#undef i386
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld
index 96a6c7563538..0149e41d42c2 100644
--- a/arch/x86/boot/setup.ld
+++ b/arch/x86/boot/setup.ld
@@ -3,7 +3,7 @@
*
* Linker script for the i386 setup code
*/
-OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+OUTPUT_FORMAT("elf32-i386")
OUTPUT_ARCH(i386)
ENTRY(_start)
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index c4428a176973..315a67b8896b 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -13,10 +13,14 @@
*/
#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
#include <asm/asm.h>
#include "ctype.h"
#include "string.h"
+#define KSTRTOX_OVERFLOW (1U << 31)
+
/*
* Undef these macros so that the functions that we provide
* here will have the correct names regardless of how string.h
@@ -187,3 +191,140 @@ char *strchr(const char *s, int c)
return NULL;
return (char *)s;
}
+
+static inline u64 __div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
+{
+ union {
+ u64 v64;
+ u32 v32[2];
+ } d = { dividend };
+ u32 upper;
+
+ upper = d.v32[1];
+ d.v32[1] = 0;
+ if (upper >= divisor) {
+ d.v32[1] = upper / divisor;
+ upper %= divisor;
+ }
+ asm ("divl %2" : "=a" (d.v32[0]), "=d" (*remainder) :
+ "rm" (divisor), "0" (d.v32[0]), "1" (upper));
+ return d.v64;
+}
+
+static inline u64 __div_u64(u64 dividend, u32 divisor)
+{
+ u32 remainder;
+
+ return __div_u64_rem(dividend, divisor, &remainder);
+}
+
+static inline char _tolower(const char c)
+{
+ return c | 0x20;
+}
+
+static const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
+{
+ if (*base == 0) {
+ if (s[0] == '0') {
+ if (_tolower(s[1]) == 'x' && isxdigit(s[2]))
+ *base = 16;
+ else
+ *base = 8;
+ } else
+ *base = 10;
+ }
+ if (*base == 16 && s[0] == '0' && _tolower(s[1]) == 'x')
+ s += 2;
+ return s;
+}
+
+/*
+ * Convert non-negative integer string representation in explicitly given radix
+ * to an integer.
+ * Return number of characters consumed maybe or-ed with overflow bit.
+ * If overflow occurs, result integer (incorrect) is still returned.
+ *
+ * Don't you dare use this function.
+ */
+static unsigned int _parse_integer(const char *s,
+ unsigned int base,
+ unsigned long long *p)
+{
+ unsigned long long res;
+ unsigned int rv;
+
+ res = 0;
+ rv = 0;
+ while (1) {
+ unsigned int c = *s;
+ unsigned int lc = c | 0x20; /* don't tolower() this line */
+ unsigned int val;
+
+ if ('0' <= c && c <= '9')
+ val = c - '0';
+ else if ('a' <= lc && lc <= 'f')
+ val = lc - 'a' + 10;
+ else
+ break;
+
+ if (val >= base)
+ break;
+ /*
+ * Check for overflow only if we are within range of
+ * it in the max base we support (16)
+ */
+ if (unlikely(res & (~0ull << 60))) {
+ if (res > __div_u64(ULLONG_MAX - val, base))
+ rv |= KSTRTOX_OVERFLOW;
+ }
+ res = res * base + val;
+ rv++;
+ s++;
+ }
+ *p = res;
+ return rv;
+}
+
+static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
+{
+ unsigned long long _res;
+ unsigned int rv;
+
+ s = _parse_integer_fixup_radix(s, &base);
+ rv = _parse_integer(s, base, &_res);
+ if (rv & KSTRTOX_OVERFLOW)
+ return -ERANGE;
+ if (rv == 0)
+ return -EINVAL;
+ s += rv;
+ if (*s == '\n')
+ s++;
+ if (*s)
+ return -EINVAL;
+ *res = _res;
+ return 0;
+}
+
+/**
+ * kstrtoull - convert a string to an unsigned long long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ * include a single newline before its terminating null. The first character
+ * may also be a plus sign, but not a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ * given as 0, then the base of the string is automatically detected with the
+ * conventional semantics - If it begins with 0x the number will be parsed as a
+ * hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ * parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
+int kstrtoull(const char *s, unsigned int base, unsigned long long *res)
+{
+ if (s[0] == '+')
+ s++;
+ return _kstrtoull(s, base, res);
+}
diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h
index 3d78e27077f4..38d8f2f5e47e 100644
--- a/arch/x86/boot/string.h
+++ b/arch/x86/boot/string.h
@@ -29,4 +29,5 @@ extern unsigned int atou(const char *s);
extern unsigned long long simple_strtoull(const char *cp, char **endp,
unsigned int base);
+int kstrtoull(const char *s, unsigned int base, unsigned long long *res);
#endif /* BOOT_STRING_H */
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 4bb95d7ad947..9f908112bbb9 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -287,7 +287,6 @@ CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
CONFIG_PRINTK_TIME=y
-# CONFIG_ENABLE_WARN_DEPRECATED is not set
CONFIG_FRAME_WARN=1024
CONFIG_MAGIC_SYSRQ=y
# CONFIG_UNUSED_SYMBOLS is not set
@@ -310,3 +309,5 @@ CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_DISABLE=y
CONFIG_CRYPTO_AES_586=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_EFI_STUB=y
+CONFIG_ACPI_BGRT=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 0fed049422a8..1d3badfda09e 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -286,7 +286,6 @@ CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
CONFIG_PRINTK_TIME=y
-# CONFIG_ENABLE_WARN_DEPRECATED is not set
CONFIG_MAGIC_SYSRQ=y
# CONFIG_UNUSED_SYMBOLS is not set
CONFIG_DEBUG_KERNEL=y
@@ -308,3 +307,6 @@ CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_DISABLE=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_EFI_STUB=y
+CONFIG_EFI_MIXED=y
+CONFIG_ACPI_BGRT=y
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 955ab6a3b61f..8da78595d69d 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -429,3 +429,6 @@
421 i386 rt_sigtimedwait_time64 sys_rt_sigtimedwait __ia32_compat_sys_rt_sigtimedwait_time64
422 i386 futex_time64 sys_futex __ia32_sys_futex
423 i386 sched_rr_get_interval_time64 sys_sched_rr_get_interval __ia32_sys_sched_rr_get_interval
+425 i386 io_uring_setup sys_io_uring_setup __ia32_sys_io_uring_setup
+426 i386 io_uring_enter sys_io_uring_enter __ia32_sys_io_uring_enter
+427 i386 io_uring_register sys_io_uring_register __ia32_sys_io_uring_register
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 2ae92fddb6d5..c768447f97ec 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -345,6 +345,9 @@
334 common rseq __x64_sys_rseq
# don't use numbers 387 through 423, add new calls after the last
# 'common' entry
+425 common io_uring_setup __x64_sys_io_uring_setup
+426 common io_uring_enter __x64_sys_io_uring_enter
+427 common io_uring_register __x64_sys_io_uring_register
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index b684f0294f35..e2b1447192a8 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1995,7 +1995,7 @@ static int x86_pmu_commit_txn(struct pmu *pmu)
*/
static void free_fake_cpuc(struct cpu_hw_events *cpuc)
{
- kfree(cpuc->shared_regs);
+ intel_cpuc_finish(cpuc);
kfree(cpuc);
}
@@ -2007,14 +2007,11 @@ static struct cpu_hw_events *allocate_fake_cpuc(void)
cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL);
if (!cpuc)
return ERR_PTR(-ENOMEM);
-
- /* only needed, if we have extra_regs */
- if (x86_pmu.extra_regs) {
- cpuc->shared_regs = allocate_shared_regs(cpu);
- if (!cpuc->shared_regs)
- goto error;
- }
cpuc->is_fake = 1;
+
+ if (intel_cpuc_prepare(cpuc, cpu))
+ goto error;
+
return cpuc;
error:
free_fake_cpuc(cpuc);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 17096d3cd616..35102ecdfc8d 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2000,6 +2000,39 @@ static void intel_pmu_nhm_enable_all(int added)
intel_pmu_enable_all(added);
}
+static void intel_set_tfa(struct cpu_hw_events *cpuc, bool on)
+{
+ u64 val = on ? MSR_TFA_RTM_FORCE_ABORT : 0;
+
+ if (cpuc->tfa_shadow != val) {
+ cpuc->tfa_shadow = val;
+ wrmsrl(MSR_TSX_FORCE_ABORT, val);
+ }
+}
+
+static void intel_tfa_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
+{
+ /*
+ * We're going to use PMC3, make sure TFA is set before we touch it.
+ */
+ if (cntr == 3 && !cpuc->is_fake)
+ intel_set_tfa(cpuc, true);
+}
+
+static void intel_tfa_pmu_enable_all(int added)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ /*
+ * If we find PMC3 is no longer used when we enable the PMU, we can
+ * clear TFA.
+ */
+ if (!test_bit(3, cpuc->active_mask))
+ intel_set_tfa(cpuc, false);
+
+ intel_pmu_enable_all(added);
+}
+
static void enable_counter_freeze(void)
{
update_debugctlmsr(get_debugctlmsr() |
@@ -2770,6 +2803,35 @@ intel_stop_scheduling(struct cpu_hw_events *cpuc)
}
static struct event_constraint *
+dyn_constraint(struct cpu_hw_events *cpuc, struct event_constraint *c, int idx)
+{
+ WARN_ON_ONCE(!cpuc->constraint_list);
+
+ if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
+ struct event_constraint *cx;
+
+ /*
+ * grab pre-allocated constraint entry
+ */
+ cx = &cpuc->constraint_list[idx];
+
+ /*
+ * initialize dynamic constraint
+ * with static constraint
+ */
+ *cx = *c;
+
+ /*
+ * mark constraint as dynamic
+ */
+ cx->flags |= PERF_X86_EVENT_DYNAMIC;
+ c = cx;
+ }
+
+ return c;
+}
+
+static struct event_constraint *
intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
int idx, struct event_constraint *c)
{
@@ -2799,27 +2861,7 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
* only needed when constraint has not yet
* been cloned (marked dynamic)
*/
- if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
- struct event_constraint *cx;
-
- /*
- * grab pre-allocated constraint entry
- */
- cx = &cpuc->constraint_list[idx];
-
- /*
- * initialize dynamic constraint
- * with static constraint
- */
- *cx = *c;
-
- /*
- * mark constraint as dynamic, so we
- * can free it later on
- */
- cx->flags |= PERF_X86_EVENT_DYNAMIC;
- c = cx;
- }
+ c = dyn_constraint(cpuc, c, idx);
/*
* From here on, the constraint is dynamic.
@@ -3357,6 +3399,26 @@ glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
return c;
}
+static bool allow_tsx_force_abort = true;
+
+static struct event_constraint *
+tfa_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ struct event_constraint *c = hsw_get_event_constraints(cpuc, idx, event);
+
+ /*
+ * Without TFA we must not use PMC3.
+ */
+ if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) {
+ c = dyn_constraint(cpuc, c, idx);
+ c->idxmsk64 &= ~(1ULL << 3);
+ c->weight--;
+ }
+
+ return c;
+}
+
/*
* Broadwell:
*
@@ -3410,7 +3472,7 @@ ssize_t intel_event_sysfs_show(char *page, u64 config)
return x86_event_sysfs_show(page, config, event);
}
-struct intel_shared_regs *allocate_shared_regs(int cpu)
+static struct intel_shared_regs *allocate_shared_regs(int cpu)
{
struct intel_shared_regs *regs;
int i;
@@ -3442,23 +3504,24 @@ static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
return c;
}
-static int intel_pmu_cpu_prepare(int cpu)
-{
- struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
+{
if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
cpuc->shared_regs = allocate_shared_regs(cpu);
if (!cpuc->shared_regs)
goto err;
}
- if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
+ if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA)) {
size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
- cpuc->constraint_list = kzalloc(sz, GFP_KERNEL);
+ cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
if (!cpuc->constraint_list)
goto err_shared_regs;
+ }
+ if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
cpuc->excl_cntrs = allocate_excl_cntrs(cpu);
if (!cpuc->excl_cntrs)
goto err_constraint_list;
@@ -3480,6 +3543,11 @@ err:
return -ENOMEM;
}
+static int intel_pmu_cpu_prepare(int cpu)
+{
+ return intel_cpuc_prepare(&per_cpu(cpu_hw_events, cpu), cpu);
+}
+
static void flip_smm_bit(void *data)
{
unsigned long set = *(unsigned long *)data;
@@ -3554,9 +3622,8 @@ static void intel_pmu_cpu_starting(int cpu)
}
}
-static void free_excl_cntrs(int cpu)
+static void free_excl_cntrs(struct cpu_hw_events *cpuc)
{
- struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
struct intel_excl_cntrs *c;
c = cpuc->excl_cntrs;
@@ -3564,9 +3631,10 @@ static void free_excl_cntrs(int cpu)
if (c->core_id == -1 || --c->refcnt == 0)
kfree(c);
cpuc->excl_cntrs = NULL;
- kfree(cpuc->constraint_list);
- cpuc->constraint_list = NULL;
}
+
+ kfree(cpuc->constraint_list);
+ cpuc->constraint_list = NULL;
}
static void intel_pmu_cpu_dying(int cpu)
@@ -3577,9 +3645,8 @@ static void intel_pmu_cpu_dying(int cpu)
disable_counter_freeze();
}
-static void intel_pmu_cpu_dead(int cpu)
+void intel_cpuc_finish(struct cpu_hw_events *cpuc)
{
- struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
struct intel_shared_regs *pc;
pc = cpuc->shared_regs;
@@ -3589,7 +3656,12 @@ static void intel_pmu_cpu_dead(int cpu)
cpuc->shared_regs = NULL;
}
- free_excl_cntrs(cpu);
+ free_excl_cntrs(cpuc);
+}
+
+static void intel_pmu_cpu_dead(int cpu)
+{
+ intel_cpuc_finish(&per_cpu(cpu_hw_events, cpu));
}
static void intel_pmu_sched_task(struct perf_event_context *ctx,
@@ -4107,8 +4179,11 @@ static struct attribute *intel_pmu_caps_attrs[] = {
NULL
};
+DEVICE_BOOL_ATTR(allow_tsx_force_abort, 0644, allow_tsx_force_abort);
+
static struct attribute *intel_pmu_attrs[] = {
&dev_attr_freeze_on_smi.attr,
+ NULL, /* &dev_attr_allow_tsx_force_abort.attr.attr */
NULL,
};
@@ -4220,6 +4295,8 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_CORE2_MEROM:
x86_add_quirk(intel_clovertown_quirk);
+ /* fall through */
+
case INTEL_FAM6_CORE2_MEROM_L:
case INTEL_FAM6_CORE2_PENRYN:
case INTEL_FAM6_CORE2_DUNNINGTON:
@@ -4605,6 +4682,15 @@ __init int intel_pmu_init(void)
tsx_attr = hsw_tsx_events_attrs;
intel_pmu_pebs_data_source_skl(
boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
+
+ if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) {
+ x86_pmu.flags |= PMU_FL_TFA;
+ x86_pmu.get_event_constraints = tfa_get_event_constraints;
+ x86_pmu.enable_all = intel_tfa_pmu_enable_all;
+ x86_pmu.commit_scheduling = intel_tfa_commit_scheduling;
+ intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr.attr;
+ }
+
pr_cont("Skylake events, ");
name = "skylake";
break;
@@ -4756,7 +4842,7 @@ static __init int fixup_ht_bug(void)
hardlockup_detector_perf_restart();
for_each_online_cpu(c)
- free_excl_cntrs(c);
+ free_excl_cntrs(&per_cpu(cpu_hw_events, c));
cpus_read_unlock();
pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n");
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index c88ed39582a1..580c1b91c454 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -931,6 +931,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
ret = X86_BR_ZERO_CALL;
break;
}
+ /* fall through */
case 0x9a: /* call far absolute */
ret = X86_BR_CALL;
break;
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index d516161c00c4..9fe64c01a2e5 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -732,6 +732,7 @@ static int uncore_pmu_event_init(struct perf_event *event)
/* fixed counters have event field hardcoded to zero */
hwc->config = 0ULL;
} else if (is_freerunning_event(event)) {
+ hwc->config = event->attr.config;
if (!check_valid_freerunning_event(box, event))
return -EINVAL;
event->hw.idx = UNCORE_PMC_IDX_FREERUNNING;
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index cb46d602a6b8..853a49a8ccf6 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -292,8 +292,8 @@ static inline
unsigned int uncore_freerunning_counter(struct intel_uncore_box *box,
struct perf_event *event)
{
- unsigned int type = uncore_freerunning_type(event->attr.config);
- unsigned int idx = uncore_freerunning_idx(event->attr.config);
+ unsigned int type = uncore_freerunning_type(event->hw.config);
+ unsigned int idx = uncore_freerunning_idx(event->hw.config);
struct intel_uncore_pmu *pmu = box->pmu;
return pmu->type->freerunning[type].counter_base +
@@ -377,7 +377,7 @@ static inline
unsigned int uncore_freerunning_bits(struct intel_uncore_box *box,
struct perf_event *event)
{
- unsigned int type = uncore_freerunning_type(event->attr.config);
+ unsigned int type = uncore_freerunning_type(event->hw.config);
return box->pmu->type->freerunning[type].bits;
}
@@ -385,7 +385,7 @@ unsigned int uncore_freerunning_bits(struct intel_uncore_box *box,
static inline int uncore_num_freerunning(struct intel_uncore_box *box,
struct perf_event *event)
{
- unsigned int type = uncore_freerunning_type(event->attr.config);
+ unsigned int type = uncore_freerunning_type(event->hw.config);
return box->pmu->type->freerunning[type].num_counters;
}
@@ -399,8 +399,8 @@ static inline int uncore_num_freerunning_types(struct intel_uncore_box *box,
static inline bool check_valid_freerunning_event(struct intel_uncore_box *box,
struct perf_event *event)
{
- unsigned int type = uncore_freerunning_type(event->attr.config);
- unsigned int idx = uncore_freerunning_idx(event->attr.config);
+ unsigned int type = uncore_freerunning_type(event->hw.config);
+ unsigned int idx = uncore_freerunning_idx(event->hw.config);
return (type < uncore_num_freerunning_types(box, event)) &&
(idx < uncore_num_freerunning(box, event));
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index b12517fae77a..13493f43b247 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -442,9 +442,11 @@ static int snb_uncore_imc_event_init(struct perf_event *event)
/* must be done before validate_group */
event->hw.event_base = base;
- event->hw.config = cfg;
event->hw.idx = idx;
+ /* Convert to standard encoding format for freerunning counters */
+ event->hw.config = ((cfg - 1) << 8) | 0x10ff;
+
/* no group validation needed, we have free running counters */
return 0;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 7e75f474b076..b04ae6c8775e 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -243,6 +243,11 @@ struct cpu_hw_events {
int excl_thread_id; /* 0 or 1 */
/*
+ * SKL TSX_FORCE_ABORT shadow
+ */
+ u64 tfa_shadow;
+
+ /*
* AMD specific bits
*/
struct amd_nb *amd_nb;
@@ -682,6 +687,7 @@ do { \
#define PMU_FL_EXCL_CNTRS 0x4 /* has exclusive counter requirements */
#define PMU_FL_EXCL_ENABLED 0x8 /* exclusive counter active */
#define PMU_FL_PEBS_ALL 0x10 /* all events are valid PEBS events */
+#define PMU_FL_TFA 0x20 /* deal with TSX force abort */
#define EVENT_VAR(_id) event_attr_##_id
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
@@ -890,7 +896,8 @@ struct event_constraint *
x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event);
-struct intel_shared_regs *allocate_shared_regs(int cpu);
+extern int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu);
+extern void intel_cpuc_finish(struct cpu_hw_events *cpuc);
int intel_pmu_init(void);
@@ -1026,9 +1033,13 @@ static inline int intel_pmu_init(void)
return 0;
}
-static inline struct intel_shared_regs *allocate_shared_regs(int cpu)
+static inline int intel_cpuc_prepare(struct cpu_hw_event *cpuc, int cpu)
+{
+ return 0;
+}
+
+static inline void intel_cpuc_finish(struct cpu_hw_event *cpuc)
{
- return NULL;
}
static inline int is_ht_workaround_enabled(void)
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 7abb09e2eeb8..6461a16b4559 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -96,6 +96,7 @@ void __percpu **hyperv_pcpu_input_arg;
EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);
u32 hv_max_vp_index;
+EXPORT_SYMBOL_GPL(hv_max_vp_index);
static int hv_cpu_init(unsigned int cpu)
{
@@ -406,6 +407,13 @@ void hyperv_cleanup(void)
/* Reset our OS id */
wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
+ /*
+ * Reset hypercall page reference before reset the page,
+ * let hypercall operations fail safely rather than
+ * panic the kernel for using invalid hypercall page
+ */
+ hv_hypercall_pg = NULL;
+
/* Reset the hypercall page */
hypercall_msr.as_uint64 = 0;
wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 1908214b9125..ce92c4acc913 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -7,7 +7,6 @@
#include <asm-generic/asm-prototypes.h>
-#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/special_insns.h>
#include <asm/preempt.h>
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 6d6122524711..981ff9479648 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -344,6 +344,7 @@
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index fa2c93cb42a2..fb04a3ded7dd 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -137,37 +137,25 @@ static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
{
if (IS_ENABLED(CONFIG_X86_32))
return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
- else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
+ else
return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
- /* See comment in copy_fxregs_to_kernel() below. */
- return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx));
}
static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
{
- if (IS_ENABLED(CONFIG_X86_32)) {
+ if (IS_ENABLED(CONFIG_X86_32))
kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
- } else {
- if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) {
- kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
- } else {
- /* See comment in copy_fxregs_to_kernel() below. */
- kernel_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
- }
- }
+ else
+ kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
}
static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
{
if (IS_ENABLED(CONFIG_X86_32))
return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
- else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
+ else
return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
-
- /* See comment in copy_fxregs_to_kernel() below. */
- return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
- "m" (*fx));
}
static inline void copy_kernel_to_fregs(struct fregs_state *fx)
@@ -184,34 +172,8 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
{
if (IS_ENABLED(CONFIG_X86_32))
asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave));
- else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
+ else
asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave));
- else {
- /* Using "rex64; fxsave %0" is broken because, if the memory
- * operand uses any extended registers for addressing, a second
- * REX prefix will be generated (to the assembler, rex64
- * followed by semicolon is a separate instruction), and hence
- * the 64-bitness is lost.
- *
- * Using "fxsaveq %0" would be the ideal choice, but is only
- * supported starting with gas 2.16.
- *
- * Using, as a workaround, the properly prefixed form below
- * isn't accepted by any binutils version so far released,
- * complaining that the same type of prefix is used twice if
- * an extended register is needed for addressing (fix submitted
- * to mainline 2005-11-21).
- *
- * asm volatile("rex64/fxsave %0" : "=m" (fpu->state.fxsave));
- *
- * This, however, we can work around by forcing the compiler to
- * select an addressing mode that doesn't require extended
- * registers.
- */
- asm volatile( "rex64/fxsave (%[fx])"
- : "=m" (fpu->state.fxsave)
- : [fx] "R" (&fpu->state.fxsave));
- }
}
/* These macros all use (%edi)/(%rdi) as the single memory argument. */
@@ -414,6 +376,13 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
{
if (likely(use_xsave())) {
copy_xregs_to_kernel(&fpu->state.xsave);
+
+ /*
+ * AVX512 state is tracked here because its use is
+ * known to slow the max clock speed of the core.
+ */
+ if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
+ fpu->avx512_timestamp = jiffies;
return 1;
}
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 202c53918ecf..2e32e178e064 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -303,6 +303,13 @@ struct fpu {
unsigned char initialized;
/*
+ * @avx512_timestamp:
+ *
+ * Records the timestamp of AVX512 use during last context switch.
+ */
+ unsigned long avx512_timestamp;
+
+ /*
* @state:
*
* In-memory copy of all FPU registers that we save/restore
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index c1a812bd5a27..22d05e3835f0 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -48,6 +48,7 @@
#define MCI_STATUS_SYNDV BIT_ULL(53) /* synd reg. valid */
#define MCI_STATUS_DEFERRED BIT_ULL(44) /* uncorrected error, deferred exception */
#define MCI_STATUS_POISON BIT_ULL(43) /* access poisonous data */
+#define MCI_STATUS_SCRUB BIT_ULL(40) /* Error detected during scrub operation */
/*
* McaX field if set indicates a given bank supports MCA extensions:
@@ -307,11 +308,17 @@ enum smca_bank_types {
SMCA_FP, /* Floating Point */
SMCA_L3_CACHE, /* L3 Cache */
SMCA_CS, /* Coherent Slave */
+ SMCA_CS_V2, /* Coherent Slave */
SMCA_PIE, /* Power, Interrupts, etc. */
SMCA_UMC, /* Unified Memory Controller */
SMCA_PB, /* Parameter Block */
SMCA_PSP, /* Platform Security Processor */
+ SMCA_PSP_V2, /* Platform Security Processor */
SMCA_SMU, /* System Management Unit */
+ SMCA_SMU_V2, /* System Management Unit */
+ SMCA_MP5, /* Microprocessor 5 Unit */
+ SMCA_NBIO, /* Northbridge IO Unit */
+ SMCA_PCIE, /* PCI Express Unit */
N_SMCA_BANK_TYPES
};
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 8e40c2446fd1..ca5bc0eacb95 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -666,6 +666,12 @@
#define MSR_IA32_TSC_DEADLINE 0x000006E0
+
+#define MSR_TSX_FORCE_ABORT 0x0000010F
+
+#define MSR_TFA_RTM_FORCE_ABORT_BIT 0
+#define MSR_TFA_RTM_FORCE_ABORT BIT_ULL(MSR_TFA_RTM_FORCE_ABORT_BIT)
+
/* P4/Xeon+ specific */
#define MSR_IA32_MCG_EAX 0x00000180
#define MSR_IA32_MCG_EBX 0x00000181
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 0ce558a8150d..8f657286d599 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -7,11 +7,7 @@
#endif
#ifdef CONFIG_KASAN
-#ifdef CONFIG_KASAN_EXTRA
-#define KASAN_STACK_ORDER 2
-#else
#define KASAN_STACK_ORDER 1
-#endif
#else
#define KASAN_STACK_ORDER 0
#endif
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 9c85b54bf03c..0bb566315621 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -259,8 +259,7 @@ extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
#define gup_fast_permitted gup_fast_permitted
-static inline bool gup_fast_permitted(unsigned long start, int nr_pages,
- int write)
+static inline bool gup_fast_permitted(unsigned long start, int nr_pages)
{
unsigned long len, end;
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 33051436c864..2bb3a648fc12 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -742,7 +742,6 @@ enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT,
extern void enable_sep_cpu(void);
extern int sysenter_setup(void);
-void early_trap_pf_init(void);
/* Defined in head.S */
extern struct desc_ptr early_gdt_descr;
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 62004d22524a..1954dd5552a2 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -34,10 +34,7 @@ static inline void set_fs(mm_segment_t fs)
}
#define segment_eq(a, b) ((a).seg == (b).seg)
-
#define user_addr_max() (current->thread.addr_limit.seg)
-#define __addr_ok(addr) \
- ((unsigned long __force)(addr) < user_addr_max())
/*
* Test whether a block of memory is a valid user space address.
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 1f86e1b0a5cd..499578f7e6d7 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -23,6 +23,12 @@ struct unwind_state {
#elif defined(CONFIG_UNWINDER_FRAME_POINTER)
bool got_irq;
unsigned long *bp, *orig_sp, ip;
+ /*
+ * If non-NULL: The current frame is incomplete and doesn't contain a
+ * valid BP. When looking for the next frame, use this instead of the
+ * non-existent saved BP.
+ */
+ unsigned long *next_bp;
struct pt_regs *regs;
#else
unsigned long *sp;
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 3f697a9e3f59..8cfccc3cbbf4 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -141,7 +141,6 @@ enum uv_memprotect {
*/
extern s64 uv_bios_call(enum uv_bios_cmd, u64, u64, u64, u64, u64);
extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64);
-extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64);
extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *, long *);
extern s64 uv_bios_freq_base(u64, u64 *);
@@ -152,11 +151,7 @@ extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect);
extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *);
extern int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus);
-#ifdef CONFIG_EFI
extern void uv_bios_init(void);
-#else
-void uv_bios_init(void) { }
-#endif
extern unsigned long sn_rtc_cycles_per_second;
extern int uv_type;
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index ef05bea7010d..de6f0d59a24f 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -332,15 +332,11 @@ HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val,
return _hypercall4(int, update_va_mapping, va,
new_val.pte, new_val.pte >> 32, flags);
}
-extern int __must_check xen_event_channel_op_compat(int, void *);
static inline int
HYPERVISOR_event_channel_op(int cmd, void *arg)
{
- int rc = _hypercall2(int, event_channel_op, cmd, arg);
- if (unlikely(rc == -ENOSYS))
- rc = xen_event_channel_op_compat(cmd, arg);
- return rc;
+ return _hypercall2(int, event_channel_op, cmd, arg);
}
static inline int
@@ -355,15 +351,10 @@ HYPERVISOR_console_io(int cmd, int count, char *str)
return _hypercall3(int, console_io, cmd, count, str);
}
-extern int __must_check xen_physdev_op_compat(int, void *);
-
static inline int
HYPERVISOR_physdev_op(int cmd, void *arg)
{
- int rc = _hypercall2(int, physdev_op, cmd, arg);
- if (unlikely(rc == -ENOSYS))
- rc = xen_physdev_op_compat(cmd, arg);
- return rc;
+ return _hypercall2(int, physdev_op, cmd, arg);
}
static inline int
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 2624de16cd7a..8dcbf6890714 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -935,6 +935,9 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table)
#define HPET_RESOURCE_NAME_SIZE 9
hpet_res = memblock_alloc(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE,
SMP_CACHE_BYTES);
+ if (!hpet_res)
+ panic("%s: Failed to allocate %zu bytes\n", __func__,
+ sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE);
hpet_res->name = (void *)&hpet_res[1];
hpet_res->flags = IORESOURCE_MEM;
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
index 0c26b1b44e51..4203d4f0c68d 100644
--- a/arch/x86/kernel/acpi/wakeup_32.S
+++ b/arch/x86/kernel/acpi/wakeup_32.S
@@ -90,7 +90,7 @@ ret_point:
.data
ALIGN
ENTRY(saved_magic) .long 0
-ENTRY(saved_eip) .long 0
+saved_eip: .long 0
# saved registers
saved_idt: .long 0,0
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 50b8ed0317a3..510fa12aab73 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -125,12 +125,12 @@ ENTRY(do_suspend_lowlevel)
ENDPROC(do_suspend_lowlevel)
.data
-ENTRY(saved_rbp) .quad 0
-ENTRY(saved_rsi) .quad 0
-ENTRY(saved_rdi) .quad 0
-ENTRY(saved_rbx) .quad 0
+saved_rbp: .quad 0
+saved_rsi: .quad 0
+saved_rdi: .quad 0
+saved_rbx: .quad 0
-ENTRY(saved_rip) .quad 0
-ENTRY(saved_rsp) .quad 0
+saved_rip: .quad 0
+saved_rsp: .quad 0
ENTRY(saved_magic) .quad 0
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 2953bbf05c08..53aa234a6803 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -812,6 +812,7 @@ static int irq_polarity(int idx)
return IOAPIC_POL_HIGH;
case MP_IRQPOL_RESERVED:
pr_warn("IOAPIC: Invalid polarity: 2, defaulting to low\n");
+ /* fall through */
case MP_IRQPOL_ACTIVE_LOW:
default: /* Pointless default required due to do gcc stupidity */
return IOAPIC_POL_LOW;
@@ -859,6 +860,7 @@ static int irq_trigger(int idx)
return IOAPIC_EDGE;
case MP_IRQTRIG_RESERVED:
pr_warn("IOAPIC: Invalid trigger mode 2 defaulting to level\n");
+ /* fall through */
case MP_IRQTRIG_LEVEL:
default: /* Pointless default required due to do gcc stupidity */
return IOAPIC_LEVEL;
@@ -2579,6 +2581,8 @@ static struct resource * __init ioapic_setup_resources(void)
n *= nr_ioapics;
mem = memblock_alloc(n, SMP_CACHE_BYTES);
+ if (!mem)
+ panic("%s: Failed to allocate %lu bytes\n", __func__, n);
res = (void *)mem;
mem += sizeof(struct resource) * nr_ioapics;
@@ -2623,6 +2627,9 @@ fake_ioapic_page:
#endif
ioapic_phys = (unsigned long)memblock_alloc(PAGE_SIZE,
PAGE_SIZE);
+ if (!ioapic_phys)
+ panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+ __func__, PAGE_SIZE, PAGE_SIZE);
ioapic_phys = __pa(ioapic_phys);
}
set_fixmap_nocache(idx, ioapic_phys);
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index c4d1023fb0ab..395d46f78582 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -248,6 +248,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
switch (leaf) {
case 1:
l1 = &l1i;
+ /* fall through */
case 0:
if (!l1->val)
return;
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 89298c83de53..e64de5149e50 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -88,11 +88,17 @@ static struct smca_bank_name smca_names[] = {
[SMCA_FP] = { "floating_point", "Floating Point Unit" },
[SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" },
[SMCA_CS] = { "coherent_slave", "Coherent Slave" },
+ [SMCA_CS_V2] = { "coherent_slave", "Coherent Slave" },
[SMCA_PIE] = { "pie", "Power, Interrupts, etc." },
[SMCA_UMC] = { "umc", "Unified Memory Controller" },
[SMCA_PB] = { "param_block", "Parameter Block" },
[SMCA_PSP] = { "psp", "Platform Security Processor" },
+ [SMCA_PSP_V2] = { "psp", "Platform Security Processor" },
[SMCA_SMU] = { "smu", "System Management Unit" },
+ [SMCA_SMU_V2] = { "smu", "System Management Unit" },
+ [SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" },
+ [SMCA_NBIO] = { "nbio", "Northbridge IO Unit" },
+ [SMCA_PCIE] = { "pcie", "PCI Express Unit" },
};
static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
@@ -138,30 +144,42 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
{ SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 },
/* ZN Core (HWID=0xB0) MCA types */
- { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
+ { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF },
{ SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
{ SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF },
/* HWID 0xB0 MCATYPE 0x4 is Reserved */
- { SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0x7FF },
+ { SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0xFFF },
{ SMCA_FP, HWID_MCATYPE(0xB0, 0x6), 0x7F },
{ SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF },
/* Data Fabric MCA types */
{ SMCA_CS, HWID_MCATYPE(0x2E, 0x0), 0x1FF },
- { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0xF },
+ { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0x1F },
+ { SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2), 0x3FFF },
/* Unified Memory Controller MCA type */
- { SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0x3F },
+ { SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0xFF },
/* Parameter Block MCA type */
{ SMCA_PB, HWID_MCATYPE(0x05, 0x0), 0x1 },
/* Platform Security Processor MCA type */
{ SMCA_PSP, HWID_MCATYPE(0xFF, 0x0), 0x1 },
+ { SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1), 0x3FFFF },
/* System Management Unit MCA type */
{ SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 },
+ { SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1), 0x7FF },
+
+ /* Microprocessor 5 Unit MCA type */
+ { SMCA_MP5, HWID_MCATYPE(0x01, 0x2), 0x3FF },
+
+ /* Northbridge IO Unit MCA type */
+ { SMCA_NBIO, HWID_MCATYPE(0x18, 0x0), 0x1F },
+
+ /* PCI Express Unit MCA type */
+ { SMCA_PCIE, HWID_MCATYPE(0x46, 0x0), 0x1F },
};
struct smca_bank smca_banks[MAX_NR_BANKS];
@@ -545,6 +563,40 @@ out:
return offset;
}
+/*
+ * Turn off MC4_MISC thresholding banks on all family 0x15 models since
+ * they're not supported there.
+ */
+void disable_err_thresholding(struct cpuinfo_x86 *c)
+{
+ int i;
+ u64 hwcr;
+ bool need_toggle;
+ u32 msrs[] = {
+ 0x00000413, /* MC4_MISC0 */
+ 0xc0000408, /* MC4_MISC1 */
+ };
+
+ if (c->x86 != 0x15)
+ return;
+
+ rdmsrl(MSR_K7_HWCR, hwcr);
+
+ /* McStatusWrEn has to be set */
+ need_toggle = !(hwcr & BIT(18));
+
+ if (need_toggle)
+ wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
+
+ /* Clear CntP bit safely */
+ for (i = 0; i < ARRAY_SIZE(msrs); i++)
+ msr_clear_bit(msrs[i], 62);
+
+ /* restore old settings */
+ if (need_toggle)
+ wrmsrl(MSR_K7_HWCR, hwcr);
+}
+
/* cpu init entry point, called from mce.c with preempt off */
void mce_amd_feature_init(struct cpuinfo_x86 *c)
{
@@ -552,6 +604,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
unsigned int bank, block, cpu = smp_processor_id();
int offset = -1;
+ disable_err_thresholding(c);
+
for (bank = 0; bank < mca_cfg.banks; ++bank) {
if (mce_flags.smca)
smca_configure(bank, cpu);
diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c
index 1d9b3ce662a0..c038e5c00a59 100644
--- a/arch/x86/kernel/cpu/mce/apei.c
+++ b/arch/x86/kernel/cpu/mce/apei.c
@@ -64,11 +64,11 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
#define CPER_CREATOR_MCE \
- UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
- 0x64, 0x90, 0xb8, 0x9d)
+ GUID_INIT(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
+ 0x64, 0x90, 0xb8, 0x9d)
#define CPER_SECTION_TYPE_MCE \
- UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
- 0x04, 0x4a, 0x38, 0xfc)
+ GUID_INIT(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
+ 0x04, 0x4a, 0x38, 0xfc)
/*
* CPER specification (in UEFI specification 2.3 appendix N) requires
@@ -135,7 +135,7 @@ retry:
goto out;
/* try to skip other type records in storage */
else if (rc != sizeof(rcd) ||
- uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE))
+ !guid_equal(&rcd.hdr.creator_id, &CPER_CREATOR_MCE))
goto retry;
memcpy(m, &rcd.mce, sizeof(*m));
rc = sizeof(*m);
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 6ce290c506d9..b7fb541a4873 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1612,36 +1612,6 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
if (c->x86 == 0x15 && c->x86_model <= 0xf)
mce_flags.overflow_recov = 1;
- /*
- * Turn off MC4_MISC thresholding banks on those models since
- * they're not supported there.
- */
- if (c->x86 == 0x15 &&
- (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) {
- int i;
- u64 hwcr;
- bool need_toggle;
- u32 msrs[] = {
- 0x00000413, /* MC4_MISC0 */
- 0xc0000408, /* MC4_MISC1 */
- };
-
- rdmsrl(MSR_K7_HWCR, hwcr);
-
- /* McStatusWrEn has to be set */
- need_toggle = !(hwcr & BIT(18));
-
- if (need_toggle)
- wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
-
- /* Clear CntP bit safely */
- for (i = 0; i < ARRAY_SIZE(msrs); i++)
- msr_clear_bit(msrs[i], 62);
-
- /* restore old settings */
- if (need_toggle)
- wrmsrl(MSR_K7_HWCR, hwcr);
- }
}
if (c->x86_vendor == X86_VENDOR_INTEL) {
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index dc3e26e905a3..65201e180fe0 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -165,6 +165,11 @@ static struct severity {
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
KERNEL
),
+ MCESEV(
+ PANIC, "Instruction fetch error in kernel",
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
+ KERNEL
+ ),
#endif
MCESEV(
PANIC, "Action required: unknown MCACOD",
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index e81a2db42df7..3fa238a137d2 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -328,6 +328,18 @@ static void __init ms_hyperv_init_platform(void)
# ifdef CONFIG_SMP
smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu;
# endif
+
+ /*
+ * Hyper-V doesn't provide irq remapping for IO-APIC. To enable x2apic,
+ * set x2apic destination mode to physcial mode when x2apic is available
+ * and Hyper-V IOMMU driver makes sure cpus assigned with IO-APIC irqs
+ * have 8-bit APIC id.
+ */
+# ifdef CONFIG_X86_X2APIC
+ if (x2apic_supported())
+ x2apic_phys = 1;
+# endif
+
#endif
}
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 3668c5df90c6..5bd011737272 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -296,7 +296,7 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
unsigned long sizek)
{
unsigned long hole_basek, hole_sizek;
- unsigned long second_basek, second_sizek;
+ unsigned long second_sizek;
unsigned long range0_basek, range0_sizek;
unsigned long range_basek, range_sizek;
unsigned long chunk_sizek;
@@ -304,7 +304,6 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
hole_basek = 0;
hole_sizek = 0;
- second_basek = 0;
second_sizek = 0;
chunk_sizek = state->chunk_sizek;
gran_sizek = state->gran_sizek;
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 822b7db634ee..e49b77283924 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -4,6 +4,7 @@
#include <linux/sched.h>
#include <linux/kernfs.h>
+#include <linux/fs_context.h>
#include <linux/jump_label.h>
#define MSR_IA32_L3_QOS_CFG 0xc81
@@ -40,6 +41,21 @@
#define RMID_VAL_ERROR BIT_ULL(63)
#define RMID_VAL_UNAVAIL BIT_ULL(62)
+
+struct rdt_fs_context {
+ struct kernfs_fs_context kfc;
+ bool enable_cdpl2;
+ bool enable_cdpl3;
+ bool enable_mba_mbps;
+};
+
+static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
+{
+ struct kernfs_fs_context *kfc = fc->fs_private;
+
+ return container_of(kfc, struct rdt_fs_context, kfc);
+}
+
DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
/**
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index 14bed6af8377..604c0e3bcc83 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -34,13 +34,6 @@
#include "pseudo_lock_event.h"
/*
- * MSR_MISC_FEATURE_CONTROL register enables the modification of hardware
- * prefetcher state. Details about this register can be found in the MSR
- * tables for specific platforms found in Intel's SDM.
- */
-#define MSR_MISC_FEATURE_CONTROL 0x000001a4
-
-/*
* The bits needed to disable hardware prefetching varies based on the
* platform. During initialization we will discover which bits to use.
*/
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 8388adf241b2..399601eda8e4 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -24,6 +24,7 @@
#include <linux/cpu.h>
#include <linux/debugfs.h>
#include <linux/fs.h>
+#include <linux/fs_parser.h>
#include <linux/sysfs.h>
#include <linux/kernfs.h>
#include <linux/seq_buf.h>
@@ -32,6 +33,7 @@
#include <linux/sched/task.h>
#include <linux/slab.h>
#include <linux/task_work.h>
+#include <linux/user_namespace.h>
#include <uapi/linux/magic.h>
@@ -1858,46 +1860,6 @@ static void cdp_disable_all(void)
cdpl2_disable();
}
-static int parse_rdtgroupfs_options(char *data)
-{
- char *token, *o = data;
- int ret = 0;
-
- while ((token = strsep(&o, ",")) != NULL) {
- if (!*token) {
- ret = -EINVAL;
- goto out;
- }
-
- if (!strcmp(token, "cdp")) {
- ret = cdpl3_enable();
- if (ret)
- goto out;
- } else if (!strcmp(token, "cdpl2")) {
- ret = cdpl2_enable();
- if (ret)
- goto out;
- } else if (!strcmp(token, "mba_MBps")) {
- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
- ret = set_mba_sc(true);
- else
- ret = -EINVAL;
- if (ret)
- goto out;
- } else {
- ret = -EINVAL;
- goto out;
- }
- }
-
- return 0;
-
-out:
- pr_err("Invalid mount option \"%s\"\n", token);
-
- return ret;
-}
-
/*
* We don't allow rdtgroup directories to be created anywhere
* except the root directory. Thus when looking for the rdtgroup
@@ -1969,13 +1931,27 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn,
struct rdtgroup *prgrp,
struct kernfs_node **mon_data_kn);
-static struct dentry *rdt_mount(struct file_system_type *fs_type,
- int flags, const char *unused_dev_name,
- void *data)
+static int rdt_enable_ctx(struct rdt_fs_context *ctx)
+{
+ int ret = 0;
+
+ if (ctx->enable_cdpl2)
+ ret = cdpl2_enable();
+
+ if (!ret && ctx->enable_cdpl3)
+ ret = cdpl3_enable();
+
+ if (!ret && ctx->enable_mba_mbps)
+ ret = set_mba_sc(true);
+
+ return ret;
+}
+
+static int rdt_get_tree(struct fs_context *fc)
{
+ struct rdt_fs_context *ctx = rdt_fc2context(fc);
struct rdt_domain *dom;
struct rdt_resource *r;
- struct dentry *dentry;
int ret;
cpus_read_lock();
@@ -1984,53 +1960,42 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
* resctrl file system can only be mounted once.
*/
if (static_branch_unlikely(&rdt_enable_key)) {
- dentry = ERR_PTR(-EBUSY);
+ ret = -EBUSY;
goto out;
}
- ret = parse_rdtgroupfs_options(data);
- if (ret) {
- dentry = ERR_PTR(ret);
+ ret = rdt_enable_ctx(ctx);
+ if (ret < 0)
goto out_cdp;
- }
closid_init();
ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
- if (ret) {
- dentry = ERR_PTR(ret);
- goto out_cdp;
- }
+ if (ret < 0)
+ goto out_mba;
if (rdt_mon_capable) {
ret = mongroup_create_dir(rdtgroup_default.kn,
NULL, "mon_groups",
&kn_mongrp);
- if (ret) {
- dentry = ERR_PTR(ret);
+ if (ret < 0)
goto out_info;
- }
kernfs_get(kn_mongrp);
ret = mkdir_mondata_all(rdtgroup_default.kn,
&rdtgroup_default, &kn_mondata);
- if (ret) {
- dentry = ERR_PTR(ret);
+ if (ret < 0)
goto out_mongrp;
- }
kernfs_get(kn_mondata);
rdtgroup_default.mon.mon_data_kn = kn_mondata;
}
ret = rdt_pseudo_lock_init();
- if (ret) {
- dentry = ERR_PTR(ret);
+ if (ret)
goto out_mondata;
- }
- dentry = kernfs_mount(fs_type, flags, rdt_root,
- RDTGROUP_SUPER_MAGIC, NULL);
- if (IS_ERR(dentry))
+ ret = kernfs_get_tree(fc);
+ if (ret < 0)
goto out_psl;
if (rdt_alloc_capable)
@@ -2059,14 +2024,95 @@ out_mongrp:
kernfs_remove(kn_mongrp);
out_info:
kernfs_remove(kn_info);
+out_mba:
+ if (ctx->enable_mba_mbps)
+ set_mba_sc(false);
out_cdp:
cdp_disable_all();
out:
rdt_last_cmd_clear();
mutex_unlock(&rdtgroup_mutex);
cpus_read_unlock();
+ return ret;
+}
+
+enum rdt_param {
+ Opt_cdp,
+ Opt_cdpl2,
+ Opt_mba_mpbs,
+ nr__rdt_params
+};
+
+static const struct fs_parameter_spec rdt_param_specs[] = {
+ fsparam_flag("cdp", Opt_cdp),
+ fsparam_flag("cdpl2", Opt_cdpl2),
+ fsparam_flag("mba_mpbs", Opt_mba_mpbs),
+ {}
+};
+
+static const struct fs_parameter_description rdt_fs_parameters = {
+ .name = "rdt",
+ .specs = rdt_param_specs,
+};
+
+static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ struct rdt_fs_context *ctx = rdt_fc2context(fc);
+ struct fs_parse_result result;
+ int opt;
+
+ opt = fs_parse(fc, &rdt_fs_parameters, param, &result);
+ if (opt < 0)
+ return opt;
- return dentry;
+ switch (opt) {
+ case Opt_cdp:
+ ctx->enable_cdpl3 = true;
+ return 0;
+ case Opt_cdpl2:
+ ctx->enable_cdpl2 = true;
+ return 0;
+ case Opt_mba_mpbs:
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return -EINVAL;
+ ctx->enable_mba_mbps = true;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static void rdt_fs_context_free(struct fs_context *fc)
+{
+ struct rdt_fs_context *ctx = rdt_fc2context(fc);
+
+ kernfs_free_fs_context(fc);
+ kfree(ctx);
+}
+
+static const struct fs_context_operations rdt_fs_context_ops = {
+ .free = rdt_fs_context_free,
+ .parse_param = rdt_parse_param,
+ .get_tree = rdt_get_tree,
+};
+
+static int rdt_init_fs_context(struct fs_context *fc)
+{
+ struct rdt_fs_context *ctx;
+
+ ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->kfc.root = rdt_root;
+ ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
+ fc->fs_private = &ctx->kfc;
+ fc->ops = &rdt_fs_context_ops;
+ if (fc->user_ns)
+ put_user_ns(fc->user_ns);
+ fc->user_ns = get_user_ns(&init_user_ns);
+ fc->global = true;
+ return 0;
}
static int reset_all_ctrls(struct rdt_resource *r)
@@ -2239,9 +2285,10 @@ static void rdt_kill_sb(struct super_block *sb)
}
static struct file_system_type rdt_fs_type = {
- .name = "resctrl",
- .mount = rdt_mount,
- .kill_sb = rdt_kill_sb,
+ .name = "resctrl",
+ .init_fs_context = rdt_init_fs_context,
+ .parameters = &rdt_fs_parameters,
+ .kill_sb = rdt_kill_sb,
};
static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 50895c2f937d..2879e234e193 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -14,6 +14,7 @@
#include <linux/acpi.h>
#include <linux/firmware-map.h>
#include <linux/sort.h>
+#include <linux/memory_hotplug.h>
#include <asm/e820/api.h>
#include <asm/setup.h>
@@ -671,21 +672,18 @@ __init void e820__reallocate_tables(void)
int size;
size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table->nr_entries;
- n = kmalloc(size, GFP_KERNEL);
+ n = kmemdup(e820_table, size, GFP_KERNEL);
BUG_ON(!n);
- memcpy(n, e820_table, size);
e820_table = n;
size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table_kexec->nr_entries;
- n = kmalloc(size, GFP_KERNEL);
+ n = kmemdup(e820_table_kexec, size, GFP_KERNEL);
BUG_ON(!n);
- memcpy(n, e820_table_kexec, size);
e820_table_kexec = n;
size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table_firmware->nr_entries;
- n = kmalloc(size, GFP_KERNEL);
+ n = kmemdup(e820_table_firmware, size, GFP_KERNEL);
BUG_ON(!n);
- memcpy(n, e820_table_firmware, size);
e820_table_firmware = n;
}
@@ -778,7 +776,7 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 align)
{
u64 addr;
- addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
+ addr = memblock_phys_alloc(size, align);
if (addr) {
e820__range_update_kexec(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
pr_info("update e820_table_kexec for e820__memblock_alloc_reserved()\n");
@@ -881,6 +879,10 @@ static int __init parse_memopt(char *p)
e820__range_remove(mem_size, ULLONG_MAX - mem_size, E820_TYPE_RAM, 1);
+#ifdef CONFIG_MEMORY_HOTPLUG
+ max_mem_size = mem_size;
+#endif
+
return 0;
}
early_param("mem", parse_memopt);
@@ -1095,6 +1097,9 @@ void __init e820__reserve_resources(void)
res = memblock_alloc(sizeof(*res) * e820_table->nr_entries,
SMP_CACHE_BYTES);
+ if (!res)
+ panic("%s: Failed to allocate %zu bytes\n", __func__,
+ sizeof(*res) * e820_table->nr_entries);
e820_res = res;
for (i = 0; i < e820_table->nr_entries; i++) {
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 9cc108456d0b..d7432c2b1051 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -669,7 +669,7 @@ static bool is_supported_xstate_size(unsigned int test_xstate_size)
return false;
}
-static int init_xstate_size(void)
+static int __init init_xstate_size(void)
{
/* Recompute the context size for enabled features: */
unsigned int possible_xstate_size;
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 3e3789c8f8e1..ef49517f6bb2 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -49,7 +49,7 @@ int ftrace_arch_code_modify_post_process(void)
union ftrace_code_union {
char code[MCOUNT_INSN_SIZE];
struct {
- unsigned char e8;
+ unsigned char op;
int offset;
} __attribute__((packed));
};
@@ -59,20 +59,23 @@ static int ftrace_calc_offset(long ip, long addr)
return (int)(addr - ip);
}
-static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+static unsigned char *
+ftrace_text_replace(unsigned char op, unsigned long ip, unsigned long addr)
{
static union ftrace_code_union calc;
- calc.e8 = 0xe8;
+ calc.op = op;
calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
- /*
- * No locking needed, this must be called via kstop_machine
- * which in essence is like running on a uniprocessor machine.
- */
return calc.code;
}
+static unsigned char *
+ftrace_call_replace(unsigned long ip, unsigned long addr)
+{
+ return ftrace_text_replace(0xe8, ip, addr);
+}
+
static inline int
within(unsigned long addr, unsigned long start, unsigned long end)
{
@@ -665,22 +668,6 @@ int __init ftrace_dyn_arch_init(void)
return 0;
}
-#if defined(CONFIG_X86_64) || defined(CONFIG_FUNCTION_GRAPH_TRACER)
-static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
-{
- static union ftrace_code_union calc;
-
- /* Jmp not a call (ignore the .e8) */
- calc.e8 = 0xe9;
- calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
-
- /*
- * ftrace external locks synchronize the access to the static variable.
- */
- return calc.code;
-}
-#endif
-
/* Currently only x86_64 supports dynamic trampolines */
#ifdef CONFIG_X86_64
@@ -892,8 +879,8 @@ static void *addr_from_call(void *ptr)
return NULL;
/* Make sure this is a call */
- if (WARN_ON_ONCE(calc.e8 != 0xe8)) {
- pr_warn("Expected e8, got %x\n", calc.e8);
+ if (WARN_ON_ONCE(calc.op != 0xe8)) {
+ pr_warn("Expected e8, got %x\n", calc.op);
return NULL;
}
@@ -964,6 +951,11 @@ void arch_ftrace_trampoline_free(struct ftrace_ops *ops)
#ifdef CONFIG_DYNAMIC_FTRACE
extern void ftrace_graph_call(void);
+static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
+{
+ return ftrace_text_replace(0xe9, ip, addr);
+}
+
static int ftrace_mod_jmp(unsigned long ip, void *func)
{
unsigned char *new;
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 34a5c1715148..ff9bfd40429e 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -261,12 +261,8 @@ static int arch_build_bp_info(struct perf_event *bp,
* allow kernel breakpoints at all.
*/
if (attr->bp_addr >= TASK_SIZE_MAX) {
-#ifdef CONFIG_KPROBES
if (within_kprobe_blacklist(attr->bp_addr))
return -EINVAL;
-#else
- return -EINVAL;
-#endif
}
hw->type = X86_BREAKPOINT_EXECUTE;
@@ -279,6 +275,7 @@ static int arch_build_bp_info(struct perf_event *bp,
hw->len = X86_BREAKPOINT_LEN_X;
return 0;
}
+ /* fall through */
default:
return -EINVAL;
}
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 53917a3ebf94..22f60dd26460 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -218,6 +218,9 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params,
params->screen_info.ext_mem_k = 0;
params->alt_mem_k = 0;
+ /* Always fill in RSDP: it is either 0 or a valid value */
+ params->acpi_rsdp_addr = boot_params.acpi_rsdp_addr;
+
/* Default APM info */
memset(&params->apm_bios_info, 0, sizeof(params->apm_bios_info));
@@ -256,7 +259,6 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params,
setup_efi_state(params, params_load_addr, efi_map_offset, efi_map_sz,
efi_setup_data_offset);
#endif
-
/* Setup EDD info */
memcpy(params->eddbuf, boot_params.eddbuf,
EDDMAXNR * sizeof(struct edd_info));
@@ -536,9 +538,17 @@ static int bzImage64_cleanup(void *loader_data)
#ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG
static int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len)
{
- return verify_pefile_signature(kernel, kernel_len,
- VERIFY_USE_SECONDARY_KEYRING,
- VERIFYING_KEXEC_PE_SIGNATURE);
+ int ret;
+
+ ret = verify_pefile_signature(kernel, kernel_len,
+ VERIFY_USE_SECONDARY_KEYRING,
+ VERIFYING_KEXEC_PE_SIGNATURE);
+ if (ret == -ENOKEY && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING)) {
+ ret = verify_pefile_signature(kernel, kernel_len,
+ VERIFY_USE_PLATFORM_KEYRING,
+ VERIFYING_KEXEC_PE_SIGNATURE);
+ }
+ return ret;
}
#endif
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 5db08425063e..4ff6b4cdb941 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -467,6 +467,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
ptr = &remcomInBuffer[1];
if (kgdb_hex2long(&ptr, &addr))
linux_regs->ip = addr;
+ /* fall through */
case 'D':
case 'k':
/* clear the trace bit */
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 4c8acdfdc5a7..ceba408ea982 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -352,6 +352,8 @@ void machine_kexec(struct kimage *image)
void arch_crash_save_vmcoreinfo(void)
{
+ u64 sme_mask = sme_me_mask;
+
VMCOREINFO_NUMBER(phys_base);
VMCOREINFO_SYMBOL(init_top_pgt);
vmcoreinfo_append_str("NUMBER(pgtable_l5_enabled)=%d\n",
@@ -364,6 +366,7 @@ void arch_crash_save_vmcoreinfo(void)
vmcoreinfo_append_str("KERNELOFFSET=%lx\n",
kaslr_offset());
VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE);
+ VMCOREINFO_NUMBER(sme_mask);
}
/* arch-dependent functionality related to kexec file-based syscall */
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 13af08827eef..4bf46575568a 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -106,22 +106,22 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
void *ptr;
if (!node_online(node) || !NODE_DATA(node)) {
- ptr = memblock_alloc_from_nopanic(size, align, goal);
+ ptr = memblock_alloc_from(size, align, goal);
pr_info("cpu %d has no node %d or node-local memory\n",
cpu, node);
pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
cpu, size, __pa(ptr));
} else {
- ptr = memblock_alloc_try_nid_nopanic(size, align, goal,
- MEMBLOCK_ALLOC_ACCESSIBLE,
- node);
+ ptr = memblock_alloc_try_nid(size, align, goal,
+ MEMBLOCK_ALLOC_ACCESSIBLE,
+ node);
pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n",
cpu, size, node, __pa(ptr));
}
return ptr;
#else
- return memblock_alloc_from_nopanic(size, align, goal);
+ return memblock_alloc_from(size, align, goal);
#endif
}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c91ff9f9fe8a..ce1a67b70168 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -150,7 +150,7 @@ static inline void smpboot_restore_warm_reset_vector(void)
*/
static void smp_callin(void)
{
- int cpuid, phys_id;
+ int cpuid;
/*
* If waken up by an INIT in an 82489DX configuration
@@ -161,11 +161,6 @@ static void smp_callin(void)
cpuid = smp_processor_id();
/*
- * (This works even if the APIC is not enabled.)
- */
- phys_id = read_apic_id();
-
- /*
* the boot CPU has finished the init stage and is spinning
* on callin_map until we finish. We are free to set up this
* CPU, first the APIC. (this is probably redundant on most
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index e289ce1332ab..d26f9e9c3d83 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -881,12 +881,12 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
dotraplinkage void
do_device_not_available(struct pt_regs *regs, long error_code)
{
- unsigned long cr0;
+ unsigned long cr0 = read_cr0();
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
#ifdef CONFIG_MATH_EMULATION
- if (!boot_cpu_has(X86_FEATURE_FPU) && (read_cr0() & X86_CR0_EM)) {
+ if (!boot_cpu_has(X86_FEATURE_FPU) && (cr0 & X86_CR0_EM)) {
struct math_emu_info info = { };
cond_local_irq_enable(regs);
@@ -898,7 +898,6 @@ do_device_not_available(struct pt_regs *regs, long error_code)
#endif
/* This should not happen. */
- cr0 = read_cr0();
if (WARN(cr0 & X86_CR0_TS, "CR0.TS was set")) {
/* Try to fix it up and carry on. */
write_cr0(cr0 & ~X86_CR0_TS);
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 3dc26f95d46e..9b9fd4826e7a 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -320,10 +320,14 @@ bool unwind_next_frame(struct unwind_state *state)
}
/* Get the next frame pointer: */
- if (state->regs)
+ if (state->next_bp) {
+ next_bp = state->next_bp;
+ state->next_bp = NULL;
+ } else if (state->regs) {
next_bp = (unsigned long *)state->regs->bp;
- else
+ } else {
next_bp = (unsigned long *)READ_ONCE_TASK_STACK(state->task, *state->bp);
+ }
/* Move to the next frame if it's safe: */
if (!update_stack_state(state, next_bp))
@@ -398,6 +402,21 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
bp = get_frame_pointer(task, regs);
+ /*
+ * If we crash with IP==0, the last successfully executed instruction
+ * was probably an indirect function call with a NULL function pointer.
+ * That means that SP points into the middle of an incomplete frame:
+ * *SP is a return pointer, and *(SP-sizeof(unsigned long)) is where we
+ * would have written a frame pointer if we hadn't crashed.
+ * Pretend that the frame is complete and that BP points to it, but save
+ * the real BP so that we can use it when looking for the next frame.
+ */
+ if (regs && regs->ip == 0 &&
+ (unsigned long *)kernel_stack_pointer(regs) >= first_frame) {
+ state->next_bp = bp;
+ bp = ((unsigned long *)kernel_stack_pointer(regs)) - 1;
+ }
+
/* Initialize stack info and make sure the frame data is accessible: */
get_stack_info(bp, state->task, &state->stack_info,
&state->stack_mask);
@@ -410,7 +429,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
*/
while (!unwind_done(state) &&
(!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
- state->bp < first_frame))
+ (state->next_bp == NULL && state->bp < first_frame)))
unwind_next_frame(state);
}
EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 26038eacf74a..89be1be1790c 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -113,6 +113,20 @@ static struct orc_entry *orc_ftrace_find(unsigned long ip)
}
#endif
+/*
+ * If we crash with IP==0, the last successfully executed instruction
+ * was probably an indirect function call with a NULL function pointer,
+ * and we don't have unwind information for NULL.
+ * This hardcoded ORC entry for IP==0 allows us to unwind from a NULL function
+ * pointer into its parent and then continue normally from there.
+ */
+static struct orc_entry null_orc_entry = {
+ .sp_offset = sizeof(long),
+ .sp_reg = ORC_REG_SP,
+ .bp_reg = ORC_REG_UNDEFINED,
+ .type = ORC_TYPE_CALL
+};
+
static struct orc_entry *orc_find(unsigned long ip)
{
static struct orc_entry *orc;
@@ -120,6 +134,9 @@ static struct orc_entry *orc_find(unsigned long ip)
if (!orc_init)
return NULL;
+ if (ip == 0)
+ return &null_orc_entry;
+
/* For non-init vmlinux addresses, use the fast lookup table: */
if (ip >= LOOKUP_START_IP && ip < LOOKUP_STOP_IP) {
unsigned int idx, start, stop;
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 843feb94a950..ccf03416e434 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -745,6 +745,7 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
* OPCODE1() of the "short" jmp which checks the same condition.
*/
opc1 = OPCODE2(insn) - 0x10;
+ /* fall through */
default:
if (!is_cond_jmp_opcode(opc1))
return -ENOSYS;
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 0d618ee634ac..bad8c51fee6e 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -31,7 +31,7 @@
#undef i386 /* in case the preprocessor is a 32bit one */
-OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
+OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT)
#ifdef CONFIG_X86_32
OUTPUT_ARCH(i386)
@@ -401,7 +401,7 @@ SECTIONS
* Per-cpu symbols which need to be offset from __per_cpu_load
* for the boot processor.
*/
-#define INIT_PER_CPU(x) init_per_cpu__##x = x + __per_cpu_load
+#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load
INIT_PER_CPU(gdt_page);
INIT_PER_CPU(irq_stack_union);
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index 9119d8e41f1f..cf00ab6c6621 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -179,6 +179,8 @@ static int resolve_default_seg(struct insn *insn, struct pt_regs *regs, int off)
if (insn->addr_bytes == 2)
return -EINVAL;
+ /* fall through */
+
case -EDOM:
case offsetof(struct pt_regs, bx):
case offsetof(struct pt_regs, si):
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index 12d7e7fb4efd..19c6abf9ea31 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -52,7 +52,7 @@ cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
}
-static void percpu_setup_debug_store(int cpu)
+static void __init percpu_setup_debug_store(int cpu)
{
#ifdef CONFIG_CPU_SUP_INTEL
int npages;
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index e3cdc85ce5b6..ee8f8ab46941 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -444,7 +444,6 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
int i;
pud_t *start, *pud_start;
pgprotval_t prot, eff;
- pud_t *prev_pud = NULL;
pud_start = start = (pud_t *)p4d_page_vaddr(addr);
@@ -462,7 +461,6 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
} else
note_page(m, st, __pgprot(0), 0, 3);
- prev_pud = start;
start++;
}
}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 9d5c75f02295..667f1da36208 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1031,7 +1031,7 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
static void
do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
- unsigned int fault)
+ vm_fault_t fault)
{
struct task_struct *tsk = current;
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 462fde83b515..8dc0fc0b1382 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -24,14 +24,16 @@ extern struct range pfn_mapped[E820_MAX_ENTRIES];
static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
-static __init void *early_alloc(size_t size, int nid, bool panic)
+static __init void *early_alloc(size_t size, int nid, bool should_panic)
{
- if (panic)
- return memblock_alloc_try_nid(size, size,
- __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
- else
- return memblock_alloc_try_nid_nopanic(size, size,
+ void *ptr = memblock_alloc_try_nid(size, size,
__pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
+
+ if (!ptr && should_panic)
+ panic("%pS: Failed to allocate page, nid=%d from=%lx\n",
+ (void *)_RET_IP_, nid, __pa(MAX_DMA_ADDRESS));
+
+ return ptr;
}
static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 12c1b7a83ed7..dfb6c4df639a 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -195,15 +195,11 @@ static void __init alloc_node_data(int nid)
* Allocate node data. Try node-local memory and then any node.
* Never allocate in DMA zone.
*/
- nd_pa = memblock_phys_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
+ nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
if (!nd_pa) {
- nd_pa = __memblock_alloc_base(nd_size, SMP_CACHE_BYTES,
- MEMBLOCK_ALLOC_ACCESSIBLE);
- if (!nd_pa) {
- pr_err("Cannot find %zu bytes in any node (initial node: %d)\n",
- nd_size, nid);
- return;
- }
+ pr_err("Cannot find %zu bytes in any node (initial node: %d)\n",
+ nd_size, nid);
+ return;
}
nd = __va(nd_pa);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 14e6119838a6..4c570612e24e 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -738,7 +738,7 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address,
{
unsigned long numpages, pmask, psize, lpaddr, pfn, old_pfn;
pgprot_t old_prot, new_prot, req_prot, chk_prot;
- pte_t new_pte, old_pte, *tmp;
+ pte_t new_pte, *tmp;
enum pg_level level;
/*
@@ -781,7 +781,7 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address,
* Convert protection attributes to 4k-format, as cpa->mask* are set
* up accordingly.
*/
- old_pte = *kpte;
+
/* Clear PSE (aka _PAGE_PAT) and move PAT bit to correct position */
req_prot = pgprot_large_2_4k(old_prot);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 999d6d8f0bef..bc4bc7b2f075 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -685,9 +685,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
* that UV should be updated so that smp_call_function_many(),
* etc, are optimal on UV.
*/
- unsigned int cpu;
-
- cpu = smp_processor_id();
cpumask = uv_flush_tlb_others(cpumask, info);
if (cpumask)
smp_call_function_many(cpumask, flush_tlb_func_remote,
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 30a5111ae5fd..527e69b12002 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -635,6 +635,22 @@ static void quirk_no_aersid(struct pci_dev *pdev)
DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, PCI_ANY_ID,
PCI_CLASS_BRIDGE_PCI, 8, quirk_no_aersid);
+static void quirk_intel_th_dnv(struct pci_dev *dev)
+{
+ struct resource *r = &dev->resource[4];
+
+ /*
+ * Denverton reports 2k of RTIT_BAR (intel_th resource 4), which
+ * appears to be 4 MB in reality.
+ */
+ if (r->end == r->start + 0x7ff) {
+ r->start = 0;
+ r->end = 0x3fffff;
+ r->flags |= IORESOURCE_UNSET;
+ }
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x19e1, quirk_intel_th_dnv);
+
#ifdef CONFIG_PHYS_ADDR_T_64BIT
#define AMD_141b_MMIO_BASE(x) (0x80 + (x) * 0x8)
diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c
index b4ab779f1d47..ac9e7bf49b66 100644
--- a/arch/x86/platform/olpc/olpc_dt.c
+++ b/arch/x86/platform/olpc/olpc_dt.c
@@ -141,6 +141,9 @@ void * __init prom_early_alloc(unsigned long size)
* wasted bootmem) and hand off chunks of it to callers.
*/
res = memblock_alloc(chunk_size, SMP_CACHE_BYTES);
+ if (!res)
+ panic("%s: Failed to allocate %zu bytes\n", __func__,
+ chunk_size);
BUG_ON(!res);
prom_early_allocated += chunk_size;
memset(res, 0, chunk_size);
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index eb33432f2f24..ef60d789c76e 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -45,7 +45,7 @@ static s64 __uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
* If EFI_OLD_MEMMAP is set, we need to fall back to using our old EFI
* callback method, which uses efi_call() directly, with the kernel page tables:
*/
- if (unlikely(test_bit(EFI_OLD_MEMMAP, &efi.flags)))
+ if (unlikely(efi_enabled(EFI_OLD_MEMMAP)))
ret = efi_call((void *)__va(tab->function), (u64)which, a1, a2, a3, a4, a5);
else
ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5);
@@ -85,18 +85,6 @@ s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
return ret;
}
-s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
- u64 a4, u64 a5)
-{
- s64 ret;
-
- preempt_disable();
- ret = uv_bios_call(which, a1, a2, a3, a4, a5);
- preempt_enable();
-
- return ret;
-}
-
long sn_partition_id;
EXPORT_SYMBOL_GPL(sn_partition_id);
@@ -207,7 +195,6 @@ int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus)
}
EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target);
-#ifdef CONFIG_EFI
void uv_bios_init(void)
{
uv_systab = NULL;
@@ -237,4 +224,3 @@ void uv_bios_init(void)
}
pr_info("UV: UVsystab: Revision:%x\n", uv_systab->revision);
}
-#endif
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index a4130b84d1ff..2c53b0f19329 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -2010,8 +2010,7 @@ static void make_per_cpu_thp(struct bau_control *smaster)
int cpu;
size_t hpsz = sizeof(struct hub_and_pnode) * num_possible_cpus();
- smaster->thp = kmalloc_node(hpsz, GFP_KERNEL, smaster->osnode);
- memset(smaster->thp, 0, hpsz);
+ smaster->thp = kzalloc_node(hpsz, GFP_KERNEL, smaster->osnode);
for_each_present_cpu(cpu) {
smaster->thp[cpu].pnode = uv_cpu_hub_info(cpu)->pnode;
smaster->thp[cpu].uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
@@ -2135,15 +2134,12 @@ static int __init summarize_uvhub_sockets(int nuvhubs,
static int __init init_per_cpu(int nuvhubs, int base_part_pnode)
{
unsigned char *uvhub_mask;
- void *vp;
struct uvhub_desc *uvhub_descs;
if (is_uv3_hub() || is_uv2_hub() || is_uv1_hub())
timeout_us = calculate_destination_timeout();
- vp = kmalloc_array(nuvhubs, sizeof(struct uvhub_desc), GFP_KERNEL);
- uvhub_descs = (struct uvhub_desc *)vp;
- memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc));
+ uvhub_descs = kcalloc(nuvhubs, sizeof(struct uvhub_desc), GFP_KERNEL);
uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL);
if (get_cpu_topology(base_part_pnode, uvhub_descs, uvhub_mask))
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
index 4463fa72db94..f60501a384f9 100644
--- a/arch/x86/realmode/rm/Makefile
+++ b/arch/x86/realmode/rm/Makefile
@@ -37,8 +37,7 @@ REALMODE_OBJS = $(addprefix $(obj)/,$(realmode-y))
sed-pasyms := -n -r -e 's/^([0-9a-fA-F]+) [ABCDGRSTVW] (.+)$$/pa_\2 = \2;/p'
quiet_cmd_pasyms = PASYMS $@
- cmd_pasyms = $(NM) $(filter-out FORCE,$^) | \
- sed $(sed-pasyms) | sort | uniq > $@
+ cmd_pasyms = $(NM) $(real-prereqs) | sed $(sed-pasyms) | sort | uniq > $@
targets += pasyms.h
$(obj)/pasyms.h: $(REALMODE_OBJS) FORCE
@@ -47,7 +46,7 @@ $(obj)/pasyms.h: $(REALMODE_OBJS) FORCE
targets += realmode.lds
$(obj)/realmode.lds: $(obj)/pasyms.h
-LDFLAGS_realmode.elf := --emit-relocs -T
+LDFLAGS_realmode.elf := -m elf_i386 --emit-relocs -T
CPPFLAGS_realmode.lds += -P -C -I$(objtree)/$(obj)
targets += realmode.elf
diff --git a/arch/x86/realmode/rm/realmode.lds.S b/arch/x86/realmode/rm/realmode.lds.S
index df8e11e26bc3..3bb980800c58 100644
--- a/arch/x86/realmode/rm/realmode.lds.S
+++ b/arch/x86/realmode/rm/realmode.lds.S
@@ -9,7 +9,7 @@
#undef i386
-OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+OUTPUT_FORMAT("elf32-i386")
OUTPUT_ARCH(i386)
SECTIONS
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 856a85814f00..a21e1734fc1f 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2114,10 +2114,10 @@ void __init xen_relocate_p2m(void)
pt = early_memremap(pt_phys, PAGE_SIZE);
clear_page(pt);
for (idx_pte = 0;
- idx_pte < min(n_pte, PTRS_PER_PTE);
- idx_pte++) {
- set_pte(pt + idx_pte,
- pfn_pte(p2m_pfn, PAGE_KERNEL));
+ idx_pte < min(n_pte, PTRS_PER_PTE);
+ idx_pte++) {
+ pt[idx_pte] = pfn_pte(p2m_pfn,
+ PAGE_KERNEL);
p2m_pfn++;
}
n_pte -= PTRS_PER_PTE;
@@ -2125,8 +2125,7 @@ void __init xen_relocate_p2m(void)
make_lowmem_page_readonly(__va(pt_phys));
pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
PFN_DOWN(pt_phys));
- set_pmd(pmd + idx_pt,
- __pmd(_PAGE_TABLE | pt_phys));
+ pmd[idx_pt] = __pmd(_PAGE_TABLE | pt_phys);
pt_phys += PAGE_SIZE;
}
n_pt -= PTRS_PER_PMD;
@@ -2134,7 +2133,7 @@ void __init xen_relocate_p2m(void)
make_lowmem_page_readonly(__va(pmd_phys));
pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
PFN_DOWN(pmd_phys));
- set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
+ pud[idx_pmd] = __pud(_PAGE_TABLE | pmd_phys);
pmd_phys += PAGE_SIZE;
}
n_pmd -= PTRS_PER_PUD;
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 055e37e43541..95ce9b5be411 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -181,8 +181,15 @@ static void p2m_init_identity(unsigned long *p2m, unsigned long pfn)
static void * __ref alloc_p2m_page(void)
{
- if (unlikely(!slab_is_available()))
- return memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ if (unlikely(!slab_is_available())) {
+ void *ptr = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+
+ if (!ptr)
+ panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+ __func__, PAGE_SIZE, PAGE_SIZE);
+
+ return ptr;
+ }
return (void *)__get_free_page(GFP_KERNEL);
}
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index d5f303c0e656..548d1e0a5ba1 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -12,6 +12,7 @@
#include <linux/memblock.h>
#include <linux/cpuidle.h>
#include <linux/cpufreq.h>
+#include <linux/memory_hotplug.h>
#include <asm/elf.h>
#include <asm/vdso.h>
@@ -589,6 +590,14 @@ static void __init xen_align_and_add_e820_region(phys_addr_t start,
if (type == E820_TYPE_RAM) {
start = PAGE_ALIGN(start);
end &= ~((phys_addr_t)PAGE_SIZE - 1);
+#ifdef CONFIG_MEMORY_HOTPLUG
+ /*
+ * Don't allow adding memory not in E820 map while booting the
+ * system. Once the balloon driver is up it will remove that
+ * restriction again.
+ */
+ max_mem_size = end;
+#endif
}
e820__range_add(start, end - start, type);
@@ -748,6 +757,10 @@ char * __init xen_memory_setup(void)
memmap.nr_entries = ARRAY_SIZE(xen_e820_table.entries);
set_xen_guest_handle(memmap.buffer, xen_e820_table.entries);
+#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_XEN_BALLOON)
+ xen_saved_max_mem_size = max_mem_size;
+#endif
+
op = xen_initial_domain() ?
XENMEM_machine_memory_map :
XENMEM_memory_map;