aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2016-03-16 09:01:55 +0100
committerIngo Molnar <mingo@kernel.org>2016-03-16 09:01:55 +0100
commitba4e06d68ea4fd2be401d7226c68941892d6bbaf (patch)
treea9a7125a8c88ba543e4fcfb907869b97688dee3c /arch
parentx86/video: Don't assume all FB devices are PCI devices (diff)
parentMerge branch 'smp-hotplug-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (diff)
downloadlinux-dev-ba4e06d68ea4fd2be401d7226c68941892d6bbaf.tar.xz
linux-dev-ba4e06d68ea4fd2be401d7226c68941892d6bbaf.zip
Merge branch 'linus' into x86/urgent, to pick up dependencies for a fix
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/kernel/smp.c2
-rw-r--r--arch/arc/kernel/smp.c2
-rw-r--r--arch/arm/kernel/smp.c2
-rw-r--r--arch/arm/mach-mvebu/Kconfig6
-rw-r--r--arch/arm64/kernel/smp.c2
-rw-r--r--arch/blackfin/mach-common/smp.c2
-rw-r--r--arch/hexagon/kernel/smp.c2
-rw-r--r--arch/ia64/kernel/smpboot.c2
-rw-r--r--arch/m32r/kernel/smpboot.c2
-rw-r--r--arch/metag/kernel/smp.c2
-rw-r--r--arch/mips/Kconfig8
-rw-r--r--arch/mips/ath79/irq.c244
-rw-r--r--arch/mips/bmips/irq.c10
-rw-r--r--arch/mips/include/asm/mach-ath79/ath79.h4
-rw-r--r--arch/mips/include/asm/smp-ops.h5
-rw-r--r--arch/mips/kernel/Makefile1
-rw-r--r--arch/mips/kernel/smp-cmp.c4
-rw-r--r--arch/mips/kernel/smp-cps.c4
-rw-r--r--arch/mips/kernel/smp-mt.c2
-rw-r--r--arch/mips/kernel/smp.c138
-rw-r--r--arch/mn10300/kernel/smp.c2
-rw-r--r--arch/parisc/kernel/smp.c2
-rw-r--r--arch/powerpc/kernel/smp.c2
-rw-r--r--arch/s390/kernel/smp.c2
-rw-r--r--arch/sh/kernel/smp.c2
-rw-r--r--arch/sparc/kernel/smp_32.c2
-rw-r--r--arch/sparc/kernel/smp_64.c2
-rw-r--r--arch/tile/kernel/smpboot.c2
-rw-r--r--arch/x86/Kconfig27
-rw-r--r--arch/x86/Kconfig.debug10
-rw-r--r--arch/x86/boot/cpuflags.h2
-rw-r--r--arch/x86/boot/mkcpustr.c2
-rw-r--r--arch/x86/boot/tools/build.c1
-rw-r--r--arch/x86/configs/i386_defconfig2
-rw-r--r--arch/x86/crypto/crc32-pclmul_glue.c2
-rw-r--r--arch/x86/crypto/crc32c-intel_glue.c2
-rw-r--r--arch/x86/crypto/crct10dif-pclmul_glue.c2
-rw-r--r--arch/x86/entry/calling.h31
-rw-r--r--arch/x86/entry/common.c106
-rw-r--r--arch/x86/entry/entry_32.S268
-rw-r--r--arch/x86/entry/entry_64.S286
-rw-r--r--arch/x86/entry/entry_64_compat.S102
-rw-r--r--arch/x86/entry/syscall_32.c10
-rw-r--r--arch/x86/entry/syscall_64.c13
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl20
-rw-r--r--arch/x86/entry/syscalls/syscalltbl.sh58
-rw-r--r--arch/x86/entry/vdso/vdso2c.h7
-rw-r--r--arch/x86/entry/vdso/vdso32-setup.c1
-rw-r--r--arch/x86/entry/vdso/vdso32/system_call.S2
-rw-r--r--arch/x86/entry/vdso/vma.c127
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_gtod.c9
-rw-r--r--arch/x86/include/asm/alternative.h6
-rw-r--r--arch/x86/include/asm/apic.h1
-rw-r--r--arch/x86/include/asm/arch_hweight.h2
-rw-r--r--arch/x86/include/asm/bitops.h36
-rw-r--r--arch/x86/include/asm/clocksource.h9
-rw-r--r--arch/x86/include/asm/cmpxchg.h1
-rw-r--r--arch/x86/include/asm/cpufeature.h448
-rw-r--r--arch/x86/include/asm/cpufeatures.h300
-rw-r--r--arch/x86/include/asm/desc_defs.h23
-rw-r--r--arch/x86/include/asm/dmi.h2
-rw-r--r--arch/x86/include/asm/fixmap.h2
-rw-r--r--arch/x86/include/asm/fpu/internal.h18
-rw-r--r--arch/x86/include/asm/frame.h59
-rw-r--r--arch/x86/include/asm/imr.h2
-rw-r--r--arch/x86/include/asm/ipi.h58
-rw-r--r--arch/x86/include/asm/irq_work.h2
-rw-r--r--arch/x86/include/asm/mce.h1
-rw-r--r--arch/x86/include/asm/microcode.h26
-rw-r--r--arch/x86/include/asm/microcode_intel.h1
-rw-r--r--arch/x86/include/asm/mmu.h3
-rw-r--r--arch/x86/include/asm/msr-index.h7
-rw-r--r--arch/x86/include/asm/mwait.h2
-rw-r--r--arch/x86/include/asm/processor.h8
-rw-r--r--arch/x86/include/asm/proto.h15
-rw-r--r--arch/x86/include/asm/sighandling.h1
-rw-r--r--arch/x86/include/asm/smap.h2
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/thread_info.h9
-rw-r--r--arch/x86/include/asm/tlbflush.h58
-rw-r--r--arch/x86/include/asm/tsc.h2
-rw-r--r--arch/x86/include/asm/uaccess_64.h2
-rw-r--r--arch/x86/include/asm/vdso.h3
-rw-r--r--arch/x86/include/asm/vgtod.h6
-rw-r--r--arch/x86/include/uapi/asm/sigcontext.h32
-rw-r--r--arch/x86/include/uapi/asm/ucontext.h53
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c2
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c4
-rw-r--r--arch/x86/kernel/apic/ipi.c60
-rw-r--r--arch/x86/kernel/asm-offsets.c1
-rw-r--r--arch/x86/kernel/asm-offsets_32.c7
-rw-r--r--arch/x86/kernel/asm-offsets_64.c10
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/centaur.c2
-rw-r--r--arch/x86/kernel/cpu/common.c55
-rw-r--r--arch/x86/kernel/cpu/cyrix.c1
-rw-r--r--arch/x86/kernel/cpu/intel.c2
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c2
-rw-r--r--arch/x86/kernel/cpu/match.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c13
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c15
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c19
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c285
-rw-r--r--arch/x86/kernel/cpu/microcode/intel_lib.c58
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.sh6
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c2
-rw-r--r--arch/x86/kernel/cpu/transmeta.c2
-rw-r--r--arch/x86/kernel/e820.c1
-rw-r--r--arch/x86/kernel/fpu/core.c52
-rw-r--r--arch/x86/kernel/fpu/init.c13
-rw-r--r--arch/x86/kernel/fpu/xstate.c3
-rw-r--r--arch/x86/kernel/ftrace.c11
-rw-r--r--arch/x86/kernel/head64.c14
-rw-r--r--arch/x86/kernel/head_32.S8
-rw-r--r--arch/x86/kernel/head_64.S5
-rw-r--r--arch/x86/kernel/hpet.c1
-rw-r--r--arch/x86/kernel/mcount_64.S14
-rw-r--r--arch/x86/kernel/msr.c2
-rw-r--r--arch/x86/kernel/process.c3
-rw-r--r--arch/x86/kernel/signal.c127
-rw-r--r--arch/x86/kernel/smpboot.c2
-rw-r--r--arch/x86/kernel/traps.c141
-rw-r--r--arch/x86/kernel/tsc.c67
-rw-r--r--arch/x86/kernel/verify_cpu.S2
-rw-r--r--arch/x86/kernel/vm86_32.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S11
-rw-r--r--arch/x86/lib/clear_page_64.S2
-rw-r--r--arch/x86/lib/cmdline.c60
-rw-r--r--arch/x86/lib/copy_page_64.S2
-rw-r--r--arch/x86/lib/copy_user_64.S2
-rw-r--r--arch/x86/lib/memcpy_64.S2
-rw-r--r--arch/x86/lib/memmove_64.S2
-rw-r--r--arch/x86/lib/memset_64.S2
-rw-r--r--arch/x86/mm/dump_pagetables.c11
-rw-r--r--arch/x86/mm/init_32.c3
-rw-r--r--arch/x86/mm/init_64.c24
-rw-r--r--arch/x86/mm/kasan_init_64.c17
-rw-r--r--arch/x86/mm/kmmio.c88
-rw-r--r--arch/x86/mm/mmap.c14
-rw-r--r--arch/x86/mm/numa.c67
-rw-r--r--arch/x86/mm/pageattr.c4
-rw-r--r--arch/x86/mm/pat.c4
-rw-r--r--arch/x86/mm/setup_nx.c6
-rw-r--r--arch/x86/oprofile/op_model_amd.c1
-rw-r--r--arch/x86/platform/geode/alix.c14
-rw-r--r--arch/x86/platform/geode/geos.c8
-rw-r--r--arch/x86/platform/geode/net5501.c8
-rw-r--r--arch/x86/platform/intel-mid/mfld.c5
-rw-r--r--arch/x86/platform/intel-mid/mrfl.c5
-rw-r--r--arch/x86/platform/intel-quark/imr.c59
-rw-r--r--arch/x86/platform/intel-quark/imr_selftest.c30
-rw-r--r--arch/x86/um/asm/barrier.h2
-rw-r--r--arch/x86/um/sys_call_table_32.c4
-rw-r--r--arch/x86/um/sys_call_table_64.c7
-rw-r--r--arch/x86/um/user-offsets.c6
-rw-r--r--arch/x86/xen/smp.c2
-rw-r--r--arch/xtensa/kernel/smp.c2
157 files changed, 2303 insertions, 1940 deletions
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 2f24447fef92..46bf263c3153 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -168,7 +168,7 @@ smp_callin(void)
cpuid, current, current->active_mm));
preempt_disable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
/* Wait until hwrpb->txrdy is clear for cpu. Return -1 on timeout. */
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index 424e937da5c8..4cb3add77c75 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -142,7 +142,7 @@ void start_kernel_secondary(void)
local_irq_enable();
preempt_disable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
/*
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 37312f6749f3..baee70267f29 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -409,7 +409,7 @@ asmlinkage void secondary_start_kernel(void)
/*
* OK, it's off to the idle thread for us
*/
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
void __init smp_cpus_done(unsigned int max_cpus)
diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig
index 64e3d2ce9a07..b003e3afd693 100644
--- a/arch/arm/mach-mvebu/Kconfig
+++ b/arch/arm/mach-mvebu/Kconfig
@@ -3,7 +3,6 @@ menuconfig ARCH_MVEBU
depends on ARCH_MULTI_V7 || ARCH_MULTI_V5
select ARCH_SUPPORTS_BIG_ENDIAN
select CLKSRC_MMIO
- select GENERIC_IRQ_CHIP
select PINCTRL
select PLAT_ORION
select SOC_BUS
@@ -29,6 +28,7 @@ config MACH_ARMADA_370
bool "Marvell Armada 370 boards"
depends on ARCH_MULTI_V7
select ARMADA_370_CLK
+ select ARMADA_370_XP_IRQ
select CPU_PJ4B
select MACH_MVEBU_V7
select PINCTRL_ARMADA_370
@@ -39,6 +39,7 @@ config MACH_ARMADA_370
config MACH_ARMADA_375
bool "Marvell Armada 375 boards"
depends on ARCH_MULTI_V7
+ select ARMADA_370_XP_IRQ
select ARM_ERRATA_720789
select ARM_ERRATA_753970
select ARM_GIC
@@ -58,6 +59,7 @@ config MACH_ARMADA_38X
select ARM_ERRATA_720789
select ARM_ERRATA_753970
select ARM_GIC
+ select ARMADA_370_XP_IRQ
select ARMADA_38X_CLK
select HAVE_ARM_SCU
select HAVE_ARM_TWD if SMP
@@ -72,6 +74,7 @@ config MACH_ARMADA_39X
bool "Marvell Armada 39x boards"
depends on ARCH_MULTI_V7
select ARM_GIC
+ select ARMADA_370_XP_IRQ
select ARMADA_39X_CLK
select CACHE_L2X0
select HAVE_ARM_SCU
@@ -86,6 +89,7 @@ config MACH_ARMADA_39X
config MACH_ARMADA_XP
bool "Marvell Armada XP boards"
depends on ARCH_MULTI_V7
+ select ARMADA_370_XP_IRQ
select ARMADA_XP_CLK
select CPU_PJ4B
select MACH_MVEBU_V7
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index b1adc51b2c2e..460765799c64 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -195,7 +195,7 @@ asmlinkage void secondary_start_kernel(void)
/*
* OK, it's off to the idle thread for us
*/
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index 0030e21cfceb..23c4ef5f8bdc 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -333,7 +333,7 @@ void secondary_start_kernel(void)
/* We are done with local CPU inits, unblock the boot CPU. */
set_cpu_online(cpu, true);
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
void __init smp_prepare_boot_cpu(void)
diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c
index ff759f26b96a..983bae7d2665 100644
--- a/arch/hexagon/kernel/smp.c
+++ b/arch/hexagon/kernel/smp.c
@@ -180,7 +180,7 @@ void start_secondary(void)
local_irq_enable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 0e76fad27975..74fe317477e6 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -454,7 +454,7 @@ start_secondary (void *unused)
preempt_disable();
smp_callin();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
return 0;
}
diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c
index a468467542f4..f98d2f6519d6 100644
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c
@@ -432,7 +432,7 @@ int __init start_secondary(void *unused)
*/
local_flush_tlb_all();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
return 0;
}
diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c
index c3c6f0864881..bad13232de51 100644
--- a/arch/metag/kernel/smp.c
+++ b/arch/metag/kernel/smp.c
@@ -396,7 +396,7 @@ asmlinkage void secondary_start_kernel(void)
/*
* OK, it's off to the idle thread for us
*/
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
void __init smp_cpus_done(unsigned int max_cpus)
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index d3da79dda629..a65eacf59918 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -151,6 +151,7 @@ config BMIPS_GENERIC
select CSRC_R4K
select SYNC_R4K
select COMMON_CLK
+ select BCM6345_L1_IRQ
select BCM7038_L1_IRQ
select BCM7120_L2_IRQ
select BRCMSTB_L2_IRQ
@@ -2169,7 +2170,6 @@ config MIPS_MT_SMP
select CPU_MIPSR2_IRQ_VI
select CPU_MIPSR2_IRQ_EI
select SYNC_R4K
- select MIPS_GIC_IPI if MIPS_GIC
select MIPS_MT
select SMP
select SMP_UP
@@ -2267,7 +2267,6 @@ config MIPS_VPE_APSP_API_MT
config MIPS_CMP
bool "MIPS CMP framework support (DEPRECATED)"
depends on SYS_SUPPORTS_MIPS_CMP && !CPU_MIPSR6
- select MIPS_GIC_IPI if MIPS_GIC
select SMP
select SYNC_R4K
select SYS_SUPPORTS_SMP
@@ -2287,7 +2286,6 @@ config MIPS_CPS
select MIPS_CM
select MIPS_CPC
select MIPS_CPS_PM if HOTPLUG_CPU
- select MIPS_GIC_IPI if MIPS_GIC
select SMP
select SYNC_R4K if (CEVT_R4K || CSRC_R4K)
select SYS_SUPPORTS_HOTPLUG_CPU
@@ -2305,10 +2303,6 @@ config MIPS_CPS_PM
select MIPS_CPC
bool
-config MIPS_GIC_IPI
- depends on MIPS_GIC
- bool
-
config MIPS_CM
bool
diff --git a/arch/mips/ath79/irq.c b/arch/mips/ath79/irq.c
index 511c06560dc1..2dfff1f19004 100644
--- a/arch/mips/ath79/irq.c
+++ b/arch/mips/ath79/irq.c
@@ -26,90 +26,6 @@
#include "common.h"
#include "machtypes.h"
-static void __init ath79_misc_intc_domain_init(
- struct device_node *node, int irq);
-
-static void ath79_misc_irq_handler(struct irq_desc *desc)
-{
- struct irq_domain *domain = irq_desc_get_handler_data(desc);
- void __iomem *base = domain->host_data;
- u32 pending;
-
- pending = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS) &
- __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
-
- if (!pending) {
- spurious_interrupt();
- return;
- }
-
- while (pending) {
- int bit = __ffs(pending);
-
- generic_handle_irq(irq_linear_revmap(domain, bit));
- pending &= ~BIT(bit);
- }
-}
-
-static void ar71xx_misc_irq_unmask(struct irq_data *d)
-{
- void __iomem *base = irq_data_get_irq_chip_data(d);
- unsigned int irq = d->hwirq;
- u32 t;
-
- t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
- __raw_writel(t | (1 << irq), base + AR71XX_RESET_REG_MISC_INT_ENABLE);
-
- /* flush write */
- __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
-}
-
-static void ar71xx_misc_irq_mask(struct irq_data *d)
-{
- void __iomem *base = irq_data_get_irq_chip_data(d);
- unsigned int irq = d->hwirq;
- u32 t;
-
- t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
- __raw_writel(t & ~(1 << irq), base + AR71XX_RESET_REG_MISC_INT_ENABLE);
-
- /* flush write */
- __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
-}
-
-static void ar724x_misc_irq_ack(struct irq_data *d)
-{
- void __iomem *base = irq_data_get_irq_chip_data(d);
- unsigned int irq = d->hwirq;
- u32 t;
-
- t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS);
- __raw_writel(t & ~(1 << irq), base + AR71XX_RESET_REG_MISC_INT_STATUS);
-
- /* flush write */
- __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS);
-}
-
-static struct irq_chip ath79_misc_irq_chip = {
- .name = "MISC",
- .irq_unmask = ar71xx_misc_irq_unmask,
- .irq_mask = ar71xx_misc_irq_mask,
-};
-
-static void __init ath79_misc_irq_init(void)
-{
- if (soc_is_ar71xx() || soc_is_ar913x())
- ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask;
- else if (soc_is_ar724x() ||
- soc_is_ar933x() ||
- soc_is_ar934x() ||
- soc_is_qca955x())
- ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack;
- else
- BUG();
-
- ath79_misc_intc_domain_init(NULL, ATH79_CPU_IRQ(6));
-}
static void ar934x_ip2_irq_dispatch(struct irq_desc *desc)
{
@@ -212,142 +128,12 @@ static void qca955x_irq_init(void)
irq_set_chained_handler(ATH79_CPU_IRQ(3), qca955x_ip3_irq_dispatch);
}
-/*
- * The IP2/IP3 lines are tied to a PCI/WMAC/USB device. Drivers for
- * these devices typically allocate coherent DMA memory, however the
- * DMA controller may still have some unsynchronized data in the FIFO.
- * Issue a flush in the handlers to ensure that the driver sees
- * the update.
- *
- * This array map the interrupt lines to the DDR write buffer channels.
- */
-
-static unsigned irq_wb_chan[8] = {
- -1, -1, -1, -1, -1, -1, -1, -1,
-};
-
-asmlinkage void plat_irq_dispatch(void)
-{
- unsigned long pending;
- int irq;
-
- pending = read_c0_status() & read_c0_cause() & ST0_IM;
-
- if (!pending) {
- spurious_interrupt();
- return;
- }
-
- pending >>= CAUSEB_IP;
- while (pending) {
- irq = fls(pending) - 1;
- if (irq < ARRAY_SIZE(irq_wb_chan) && irq_wb_chan[irq] != -1)
- ath79_ddr_wb_flush(irq_wb_chan[irq]);
- do_IRQ(MIPS_CPU_IRQ_BASE + irq);
- pending &= ~BIT(irq);
- }
-}
-
-static int misc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
-{
- irq_set_chip_and_handler(irq, &ath79_misc_irq_chip, handle_level_irq);
- irq_set_chip_data(irq, d->host_data);
- return 0;
-}
-
-static const struct irq_domain_ops misc_irq_domain_ops = {
- .xlate = irq_domain_xlate_onecell,
- .map = misc_map,
-};
-
-static void __init ath79_misc_intc_domain_init(
- struct device_node *node, int irq)
-{
- void __iomem *base = ath79_reset_base;
- struct irq_domain *domain;
-
- domain = irq_domain_add_legacy(node, ATH79_MISC_IRQ_COUNT,
- ATH79_MISC_IRQ_BASE, 0, &misc_irq_domain_ops, base);
- if (!domain)
- panic("Failed to add MISC irqdomain");
-
- /* Disable and clear all interrupts */
- __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE);
- __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS);
-
- irq_set_chained_handler_and_data(irq, ath79_misc_irq_handler, domain);
-}
-
-static int __init ath79_misc_intc_of_init(
- struct device_node *node, struct device_node *parent)
-{
- int irq;
-
- irq = irq_of_parse_and_map(node, 0);
- if (!irq)
- panic("Failed to get MISC IRQ");
-
- ath79_misc_intc_domain_init(node, irq);
- return 0;
-}
-
-static int __init ar7100_misc_intc_of_init(
- struct device_node *node, struct device_node *parent)
-{
- ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask;
- return ath79_misc_intc_of_init(node, parent);
-}
-
-IRQCHIP_DECLARE(ar7100_misc_intc, "qca,ar7100-misc-intc",
- ar7100_misc_intc_of_init);
-
-static int __init ar7240_misc_intc_of_init(
- struct device_node *node, struct device_node *parent)
-{
- ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack;
- return ath79_misc_intc_of_init(node, parent);
-}
-
-IRQCHIP_DECLARE(ar7240_misc_intc, "qca,ar7240-misc-intc",
- ar7240_misc_intc_of_init);
-
-static int __init ar79_cpu_intc_of_init(
- struct device_node *node, struct device_node *parent)
-{
- int err, i, count;
-
- /* Fill the irq_wb_chan table */
- count = of_count_phandle_with_args(
- node, "qca,ddr-wb-channels", "#qca,ddr-wb-channel-cells");
-
- for (i = 0; i < count; i++) {
- struct of_phandle_args args;
- u32 irq = i;
-
- of_property_read_u32_index(
- node, "qca,ddr-wb-channel-interrupts", i, &irq);
- if (irq >= ARRAY_SIZE(irq_wb_chan))
- continue;
-
- err = of_parse_phandle_with_args(
- node, "qca,ddr-wb-channels",
- "#qca,ddr-wb-channel-cells",
- i, &args);
- if (err)
- return err;
-
- irq_wb_chan[irq] = args.args[0];
- pr_info("IRQ: Set flush channel of IRQ%d to %d\n",
- irq, args.args[0]);
- }
-
- return mips_cpu_irq_of_init(node, parent);
-}
-IRQCHIP_DECLARE(ar79_cpu_intc, "qca,ar7100-cpu-intc",
- ar79_cpu_intc_of_init);
-
void __init arch_init_irq(void)
{
+ unsigned irq_wb_chan2 = -1;
+ unsigned irq_wb_chan3 = -1;
+ bool misc_is_ar71xx;
+
if (mips_machtype == ATH79_MACH_GENERIC_OF) {
irqchip_init();
return;
@@ -355,14 +141,26 @@ void __init arch_init_irq(void)
if (soc_is_ar71xx() || soc_is_ar724x() ||
soc_is_ar913x() || soc_is_ar933x()) {
- irq_wb_chan[2] = 3;
- irq_wb_chan[3] = 2;
+ irq_wb_chan2 = 3;
+ irq_wb_chan3 = 2;
} else if (soc_is_ar934x()) {
- irq_wb_chan[3] = 2;
+ irq_wb_chan3 = 2;
}
- mips_cpu_irq_init();
- ath79_misc_irq_init();
+ ath79_cpu_irq_init(irq_wb_chan2, irq_wb_chan3);
+
+ if (soc_is_ar71xx() || soc_is_ar913x())
+ misc_is_ar71xx = true;
+ else if (soc_is_ar724x() ||
+ soc_is_ar933x() ||
+ soc_is_ar934x() ||
+ soc_is_qca955x())
+ misc_is_ar71xx = false;
+ else
+ BUG();
+ ath79_misc_irq_init(
+ ath79_reset_base + AR71XX_RESET_REG_MISC_INT_STATUS,
+ ATH79_CPU_IRQ(6), ATH79_MISC_IRQ_BASE, misc_is_ar71xx);
if (soc_is_ar934x())
ar934x_ip2_irq_init();
diff --git a/arch/mips/bmips/irq.c b/arch/mips/bmips/irq.c
index e7fc6f9348ba..7efefcf44033 100644
--- a/arch/mips/bmips/irq.c
+++ b/arch/mips/bmips/irq.c
@@ -15,6 +15,12 @@
#include <asm/irq_cpu.h>
#include <asm/time.h>
+static const struct of_device_id smp_intc_dt_match[] = {
+ { .compatible = "brcm,bcm7038-l1-intc" },
+ { .compatible = "brcm,bcm6345-l1-intc" },
+ {}
+};
+
unsigned int get_c0_compare_int(void)
{
return CP0_LEGACY_COMPARE_IRQ;
@@ -24,8 +30,8 @@ void __init arch_init_irq(void)
{
struct device_node *dn;
- /* Only the STB (bcm7038) controller supports SMP IRQ affinity */
- dn = of_find_compatible_node(NULL, NULL, "brcm,bcm7038-l1-intc");
+ /* Only these controllers support SMP IRQ affinity */
+ dn = of_find_matching_node(NULL, smp_intc_dt_match);
if (dn)
of_node_put(dn);
else
diff --git a/arch/mips/include/asm/mach-ath79/ath79.h b/arch/mips/include/asm/mach-ath79/ath79.h
index 2b3487213d1e..441faa92c3cd 100644
--- a/arch/mips/include/asm/mach-ath79/ath79.h
+++ b/arch/mips/include/asm/mach-ath79/ath79.h
@@ -144,4 +144,8 @@ static inline u32 ath79_reset_rr(unsigned reg)
void ath79_device_reset_set(u32 mask);
void ath79_device_reset_clear(u32 mask);
+void ath79_cpu_irq_init(unsigned irq_wb_chan2, unsigned irq_wb_chan3);
+void ath79_misc_irq_init(void __iomem *regs, int irq,
+ int irq_base, bool is_ar71xx);
+
#endif /* __ASM_MACH_ATH79_H */
diff --git a/arch/mips/include/asm/smp-ops.h b/arch/mips/include/asm/smp-ops.h
index 6ba1fb8b11e2..db7c322f057f 100644
--- a/arch/mips/include/asm/smp-ops.h
+++ b/arch/mips/include/asm/smp-ops.h
@@ -44,8 +44,9 @@ static inline void plat_smp_setup(void)
mp_ops->smp_setup();
}
-extern void gic_send_ipi_single(int cpu, unsigned int action);
-extern void gic_send_ipi_mask(const struct cpumask *mask, unsigned int action);
+extern void mips_smp_send_ipi_single(int cpu, unsigned int action);
+extern void mips_smp_send_ipi_mask(const struct cpumask *mask,
+ unsigned int action);
#else /* !CONFIG_SMP */
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 68e2b7db9348..b0988fd62fcc 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -52,7 +52,6 @@ obj-$(CONFIG_MIPS_MT_SMP) += smp-mt.o
obj-$(CONFIG_MIPS_CMP) += smp-cmp.o
obj-$(CONFIG_MIPS_CPS) += smp-cps.o cps-vec.o
obj-$(CONFIG_MIPS_CPS_NS16550) += cps-vec-ns16550.o
-obj-$(CONFIG_MIPS_GIC_IPI) += smp-gic.o
obj-$(CONFIG_MIPS_SPRAM) += spram.o
obj-$(CONFIG_MIPS_VPE_LOADER) += vpe.o
diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c
index d5e0f949dc48..76923349b4fe 100644
--- a/arch/mips/kernel/smp-cmp.c
+++ b/arch/mips/kernel/smp-cmp.c
@@ -149,8 +149,8 @@ void __init cmp_prepare_cpus(unsigned int max_cpus)
}
struct plat_smp_ops cmp_smp_ops = {
- .send_ipi_single = gic_send_ipi_single,
- .send_ipi_mask = gic_send_ipi_mask,
+ .send_ipi_single = mips_smp_send_ipi_single,
+ .send_ipi_mask = mips_smp_send_ipi_mask,
.init_secondary = cmp_init_secondary,
.smp_finish = cmp_smp_finish,
.boot_secondary = cmp_boot_secondary,
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index 2ad4e4c96d61..253e1409338c 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -472,8 +472,8 @@ static struct plat_smp_ops cps_smp_ops = {
.boot_secondary = cps_boot_secondary,
.init_secondary = cps_init_secondary,
.smp_finish = cps_smp_finish,
- .send_ipi_single = gic_send_ipi_single,
- .send_ipi_mask = gic_send_ipi_mask,
+ .send_ipi_single = mips_smp_send_ipi_single,
+ .send_ipi_mask = mips_smp_send_ipi_mask,
#ifdef CONFIG_HOTPLUG_CPU
.cpu_disable = cps_cpu_disable,
.cpu_die = cps_cpu_die,
diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c
index 86311a164ef1..4f9570a57e8d 100644
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -121,7 +121,7 @@ static void vsmp_send_ipi_single(int cpu, unsigned int action)
#ifdef CONFIG_MIPS_GIC
if (gic_present) {
- gic_send_ipi_single(cpu, action);
+ mips_smp_send_ipi_single(cpu, action);
return;
}
#endif
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 2b521e07b860..37708d9af638 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -33,12 +33,16 @@
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/ftrace.h>
+#include <linux/irqdomain.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
#include <linux/atomic.h>
#include <asm/cpu.h>
#include <asm/processor.h>
#include <asm/idle.h>
#include <asm/r4k-timer.h>
+#include <asm/mips-cpc.h>
#include <asm/mmu_context.h>
#include <asm/time.h>
#include <asm/setup.h>
@@ -79,6 +83,11 @@ static cpumask_t cpu_core_setup_map;
cpumask_t cpu_coherent_mask;
+#ifdef CONFIG_GENERIC_IRQ_IPI
+static struct irq_desc *call_desc;
+static struct irq_desc *sched_desc;
+#endif
+
static inline void set_cpu_sibling_map(int cpu)
{
int i;
@@ -146,6 +155,133 @@ void register_smp_ops(struct plat_smp_ops *ops)
mp_ops = ops;
}
+#ifdef CONFIG_GENERIC_IRQ_IPI
+void mips_smp_send_ipi_single(int cpu, unsigned int action)
+{
+ mips_smp_send_ipi_mask(cpumask_of(cpu), action);
+}
+
+void mips_smp_send_ipi_mask(const struct cpumask *mask, unsigned int action)
+{
+ unsigned long flags;
+ unsigned int core;
+ int cpu;
+
+ local_irq_save(flags);
+
+ switch (action) {
+ case SMP_CALL_FUNCTION:
+ __ipi_send_mask(call_desc, mask);
+ break;
+
+ case SMP_RESCHEDULE_YOURSELF:
+ __ipi_send_mask(sched_desc, mask);
+ break;
+
+ default:
+ BUG();
+ }
+
+ if (mips_cpc_present()) {
+ for_each_cpu(cpu, mask) {
+ core = cpu_data[cpu].core;
+
+ if (core == current_cpu_data.core)
+ continue;
+
+ while (!cpumask_test_cpu(cpu, &cpu_coherent_mask)) {
+ mips_cpc_lock_other(core);
+ write_cpc_co_cmd(CPC_Cx_CMD_PWRUP);
+ mips_cpc_unlock_other();
+ }
+ }
+ }
+
+ local_irq_restore(flags);
+}
+
+
+static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
+{
+ scheduler_ipi();
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t ipi_call_interrupt(int irq, void *dev_id)
+{
+ generic_smp_call_function_interrupt();
+
+ return IRQ_HANDLED;
+}
+
+static struct irqaction irq_resched = {
+ .handler = ipi_resched_interrupt,
+ .flags = IRQF_PERCPU,
+ .name = "IPI resched"
+};
+
+static struct irqaction irq_call = {
+ .handler = ipi_call_interrupt,
+ .flags = IRQF_PERCPU,
+ .name = "IPI call"
+};
+
+static __init void smp_ipi_init_one(unsigned int virq,
+ struct irqaction *action)
+{
+ int ret;
+
+ irq_set_handler(virq, handle_percpu_irq);
+ ret = setup_irq(virq, action);
+ BUG_ON(ret);
+}
+
+static int __init mips_smp_ipi_init(void)
+{
+ unsigned int call_virq, sched_virq;
+ struct irq_domain *ipidomain;
+ struct device_node *node;
+
+ node = of_irq_find_parent(of_root);
+ ipidomain = irq_find_matching_host(node, DOMAIN_BUS_IPI);
+
+ /*
+ * Some platforms have half DT setup. So if we found irq node but
+ * didn't find an ipidomain, try to search for one that is not in the
+ * DT.
+ */
+ if (node && !ipidomain)
+ ipidomain = irq_find_matching_host(NULL, DOMAIN_BUS_IPI);
+
+ BUG_ON(!ipidomain);
+
+ call_virq = irq_reserve_ipi(ipidomain, cpu_possible_mask);
+ BUG_ON(!call_virq);
+
+ sched_virq = irq_reserve_ipi(ipidomain, cpu_possible_mask);
+ BUG_ON(!sched_virq);
+
+ if (irq_domain_is_ipi_per_cpu(ipidomain)) {
+ int cpu;
+
+ for_each_cpu(cpu, cpu_possible_mask) {
+ smp_ipi_init_one(call_virq + cpu, &irq_call);
+ smp_ipi_init_one(sched_virq + cpu, &irq_resched);
+ }
+ } else {
+ smp_ipi_init_one(call_virq, &irq_call);
+ smp_ipi_init_one(sched_virq, &irq_resched);
+ }
+
+ call_desc = irq_to_desc(call_virq);
+ sched_desc = irq_to_desc(sched_virq);
+
+ return 0;
+}
+early_initcall(mips_smp_ipi_init);
+#endif
+
/*
* First C code run on the secondary CPUs after being started up by
* the master.
@@ -192,7 +328,7 @@ asmlinkage void start_secondary(void)
WARN_ON_ONCE(!irqs_disabled());
mp_ops->smp_finish();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
static void stop_this_cpu(void *dummy)
diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c
index f984193718b1..426173c4b0b9 100644
--- a/arch/mn10300/kernel/smp.c
+++ b/arch/mn10300/kernel/smp.c
@@ -675,7 +675,7 @@ int __init start_secondary(void *unused)
#ifdef CONFIG_GENERIC_CLOCKEVENTS
init_clockevents();
#endif
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
return 0;
}
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 52e85973a283..c2a9cc55a62f 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -305,7 +305,7 @@ void __init smp_callin(void)
local_irq_enable(); /* Interrupts have been off until now */
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
/* NOTREACHED */
panic("smp_callin() AAAAaaaaahhhh....\n");
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index ec9ec2058d2d..cc13d4c83291 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -727,7 +727,7 @@ void start_secondary(void *unused)
local_irq_enable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
BUG();
}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 3c65a8eae34d..40a6b4f9c36c 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -798,7 +798,7 @@ static void smp_start_secondary(void *cpuvoid)
set_cpu_online(smp_processor_id(), true);
inc_irq_stat(CPU_RST);
local_irq_enable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
/* Upping and downing of CPUs */
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index de6be008fc01..13f633add29a 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -203,7 +203,7 @@ asmlinkage void start_secondary(void)
set_cpu_online(cpu, true);
per_cpu(cpu_state, cpu) = CPU_ONLINE;
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
extern struct {
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index b3a5d81b20f0..fb30e7c6a5b1 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -364,7 +364,7 @@ static void sparc_start_secondary(void *arg)
local_irq_enable();
wmb();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
/* We should never reach here! */
BUG();
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 19cd08d18672..8a6151a628ce 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -134,7 +134,7 @@ void smp_callin(void)
local_irq_enable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
void cpu_panic(void)
diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c
index 20d52a98e171..6c0abaacec33 100644
--- a/arch/tile/kernel/smpboot.c
+++ b/arch/tile/kernel/smpboot.c
@@ -208,7 +208,7 @@ void online_secondary(void)
/* Set up tile-timer clock-event device on this cpu */
setup_tile_timer();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b1051057e5b0..8f2e6659281b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1163,22 +1163,23 @@ config MICROCODE
bool "CPU microcode loading support"
default y
depends on CPU_SUP_AMD || CPU_SUP_INTEL
- depends on BLK_DEV_INITRD
select FW_LOADER
---help---
-
If you say Y here, you will be able to update the microcode on
- certain Intel and AMD processors. The Intel support is for the
- IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4,
- Xeon etc. The AMD support is for families 0x10 and later. You will
- obviously need the actual microcode binary data itself which is not
- shipped with the Linux kernel.
-
- This option selects the general module only, you need to select
- at least one vendor specific module as well.
-
- To compile this driver as a module, choose M here: the module
- will be called microcode.
+ Intel and AMD processors. The Intel support is for the IA32 family,
+ e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4, Xeon etc. The
+ AMD support is for families 0x10 and later. You will obviously need
+ the actual microcode binary data itself which is not shipped with
+ the Linux kernel.
+
+ The preferred method to load microcode from a detached initrd is described
+ in Documentation/x86/early-microcode.txt. For that you need to enable
+ CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the
+ initrd for microcode blobs.
+
+ In addition, you can build-in the microcode into the kernel. For that you
+ need to enable FIRMWARE_IN_KERNEL and add the vendor-supplied microcode
+ to the CONFIG_EXTRA_FIRMWARE config option.
config MICROCODE_INTEL
bool "Intel microcode loading support"
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 7816b7b276f4..67eec55093a5 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -338,16 +338,6 @@ config DEBUG_IMR_SELFTEST
If unsure say N here.
-config X86_DEBUG_STATIC_CPU_HAS
- bool "Debug alternatives"
- depends on DEBUG_KERNEL
- ---help---
- This option causes additional code to be generated which
- fails if static_cpu_has() is used before alternatives have
- run.
-
- If unsure, say N.
-
config X86_DEBUG_FPU
bool "Debug the x86 FPU code"
depends on DEBUG_KERNEL
diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h
index ea97697e51e4..4cb404fd45ce 100644
--- a/arch/x86/boot/cpuflags.h
+++ b/arch/x86/boot/cpuflags.h
@@ -1,7 +1,7 @@
#ifndef BOOT_CPUFLAGS_H
#define BOOT_CPUFLAGS_H
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/processor-flags.h>
struct cpu_features {
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
index 637097e66a62..f72498dc90d2 100644
--- a/arch/x86/boot/mkcpustr.c
+++ b/arch/x86/boot/mkcpustr.c
@@ -17,7 +17,7 @@
#include "../include/asm/required-features.h"
#include "../include/asm/disabled-features.h"
-#include "../include/asm/cpufeature.h"
+#include "../include/asm/cpufeatures.h"
#include "../kernel/cpu/capflags.c"
int main(void)
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index a7661c430cd9..0702d2531bc7 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -49,7 +49,6 @@ typedef unsigned int u32;
/* This must be large enough to hold the entire setup */
u8 buf[SETUP_SECT_MAX*512];
-int is_big_kernel;
#define PECOFF_RELOC_RESERVE 0x20
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 028be48c8839..e25a1630320c 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -288,7 +288,7 @@ CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
CONFIG_PRINTK_TIME=y
# CONFIG_ENABLE_WARN_DEPRECATED is not set
-CONFIG_FRAME_WARN=2048
+CONFIG_FRAME_WARN=1024
CONFIG_MAGIC_SYSRQ=y
# CONFIG_UNUSED_SYMBOLS is not set
CONFIG_DEBUG_KERNEL=y
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c
index 07d2c6c86a54..27226df3f7d8 100644
--- a/arch/x86/crypto/crc32-pclmul_glue.c
+++ b/arch/x86/crypto/crc32-pclmul_glue.c
@@ -33,7 +33,7 @@
#include <linux/crc32.h>
#include <crypto/internal/hash.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/cpu_device_id.h>
#include <asm/fpu/api.h>
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index 0e9871693f24..0857b1a1de3b 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -30,7 +30,7 @@
#include <linux/kernel.h>
#include <crypto/internal/hash.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/cpu_device_id.h>
#include <asm/fpu/internal.h>
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c
index a3fcfc97a311..cd4df9322501 100644
--- a/arch/x86/crypto/crct10dif-pclmul_glue.c
+++ b/arch/x86/crypto/crct10dif-pclmul_glue.c
@@ -30,7 +30,7 @@
#include <linux/string.h>
#include <linux/kernel.h>
#include <asm/fpu/api.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/cpu_device_id.h>
asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index e32206e09868..9a9e5884066c 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -201,37 +201,6 @@ For 32-bit we have the following conventions - kernel is built with
.byte 0xf1
.endm
-#else /* CONFIG_X86_64 */
-
-/*
- * For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These
- * are different from the entry_32.S versions in not changing the segment
- * registers. So only suitable for in kernel use, not when transitioning
- * from or to user space. The resulting stack frame is not a standard
- * pt_regs frame. The main use case is calling C code from assembler
- * when all the registers need to be preserved.
- */
-
- .macro SAVE_ALL
- pushl %eax
- pushl %ebp
- pushl %edi
- pushl %esi
- pushl %edx
- pushl %ecx
- pushl %ebx
- .endm
-
- .macro RESTORE_ALL
- popl %ebx
- popl %ecx
- popl %edx
- popl %esi
- popl %edi
- popl %ebp
- popl %eax
- .endm
-
#endif /* CONFIG_X86_64 */
/*
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 03663740c866..e79d93d44ecd 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -26,6 +26,7 @@
#include <asm/traps.h>
#include <asm/vdso.h>
#include <asm/uaccess.h>
+#include <asm/cpufeature.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
@@ -44,6 +45,8 @@ __visible void enter_from_user_mode(void)
CT_WARN_ON(ct_state() != CONTEXT_USER);
user_exit();
}
+#else
+static inline void enter_from_user_mode(void) {}
#endif
static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
@@ -84,17 +87,6 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
-#ifdef CONFIG_CONTEXT_TRACKING
- /*
- * If TIF_NOHZ is set, we are required to call user_exit() before
- * doing anything that could touch RCU.
- */
- if (work & _TIF_NOHZ) {
- enter_from_user_mode();
- work &= ~_TIF_NOHZ;
- }
-#endif
-
#ifdef CONFIG_SECCOMP
/*
* Do seccomp first -- it should minimize exposure of other
@@ -171,16 +163,6 @@ long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
BUG_ON(regs != task_pt_regs(current));
- /*
- * If we stepped into a sysenter/syscall insn, it trapped in
- * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
- * If user-mode had set TF itself, then it's still clear from
- * do_debug() and we need to set it again to restore the user
- * state. If we entered on the slow path, TF was already set.
- */
- if (work & _TIF_SINGLESTEP)
- regs->flags |= X86_EFLAGS_TF;
-
#ifdef CONFIG_SECCOMP
/*
* Call seccomp_phase2 before running the other hooks so that
@@ -268,6 +250,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
/* Called with IRQs disabled. */
__visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
{
+ struct thread_info *ti = pt_regs_to_thread_info(regs);
u32 cached_flags;
if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
@@ -275,12 +258,22 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
lockdep_sys_exit();
- cached_flags =
- READ_ONCE(pt_regs_to_thread_info(regs)->flags);
+ cached_flags = READ_ONCE(ti->flags);
if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
exit_to_usermode_loop(regs, cached_flags);
+#ifdef CONFIG_COMPAT
+ /*
+ * Compat syscalls set TS_COMPAT. Make sure we clear it before
+ * returning to user mode. We need to clear it *after* signal
+ * handling, because syscall restart has a fixup for compat
+ * syscalls. The fixup is exercised by the ptrace_syscall_32
+ * selftest.
+ */
+ ti->status &= ~TS_COMPAT;
+#endif
+
user_enter();
}
@@ -332,33 +325,45 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs)
if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS))
syscall_slow_exit_work(regs, cached_flags);
-#ifdef CONFIG_COMPAT
+ local_irq_disable();
+ prepare_exit_to_usermode(regs);
+}
+
+#ifdef CONFIG_X86_64
+__visible void do_syscall_64(struct pt_regs *regs)
+{
+ struct thread_info *ti = pt_regs_to_thread_info(regs);
+ unsigned long nr = regs->orig_ax;
+
+ enter_from_user_mode();
+ local_irq_enable();
+
+ if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
+ nr = syscall_trace_enter(regs);
+
/*
- * Compat syscalls set TS_COMPAT. Make sure we clear it before
- * returning to user mode.
+ * NB: Native and x32 syscalls are dispatched from the same
+ * table. The only functional difference is the x32 bit in
+ * regs->orig_ax, which changes the behavior of some syscalls.
*/
- ti->status &= ~TS_COMPAT;
-#endif
+ if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) {
+ regs->ax = sys_call_table[nr & __SYSCALL_MASK](
+ regs->di, regs->si, regs->dx,
+ regs->r10, regs->r8, regs->r9);
+ }
- local_irq_disable();
- prepare_exit_to_usermode(regs);
+ syscall_return_slowpath(regs);
}
+#endif
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
/*
- * Does a 32-bit syscall. Called with IRQs on and does all entry and
- * exit work and returns with IRQs off. This function is extremely hot
- * in workloads that use it, and it's usually called from
+ * Does a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. Does
+ * all entry and exit work and returns with IRQs off. This function is
+ * extremely hot in workloads that use it, and it's usually called from
* do_fast_syscall_32, so forcibly inline it to improve performance.
*/
-#ifdef CONFIG_X86_32
-/* 32-bit kernels use a trap gate for INT80, and the asm code calls here. */
-__visible
-#else
-/* 64-bit kernels use do_syscall_32_irqs_off() instead. */
-static
-#endif
-__always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
+static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
{
struct thread_info *ti = pt_regs_to_thread_info(regs);
unsigned int nr = (unsigned int)regs->orig_ax;
@@ -393,14 +398,13 @@ __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
syscall_return_slowpath(regs);
}
-#ifdef CONFIG_X86_64
-/* Handles INT80 on 64-bit kernels */
-__visible void do_syscall_32_irqs_off(struct pt_regs *regs)
+/* Handles int $0x80 */
+__visible void do_int80_syscall_32(struct pt_regs *regs)
{
+ enter_from_user_mode();
local_irq_enable();
do_syscall_32_irqs_on(regs);
}
-#endif
/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
__visible long do_fast_syscall_32(struct pt_regs *regs)
@@ -420,12 +424,11 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
*/
regs->ip = landing_pad;
- /*
- * Fetch EBP from where the vDSO stashed it.
- *
- * WARNING: We are in CONTEXT_USER and RCU isn't paying attention!
- */
+ enter_from_user_mode();
+
local_irq_enable();
+
+ /* Fetch EBP from where the vDSO stashed it. */
if (
#ifdef CONFIG_X86_64
/*
@@ -443,9 +446,6 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
/* User code screwed up. */
local_irq_disable();
regs->ax = -EFAULT;
-#ifdef CONFIG_CONTEXT_TRACKING
- enter_from_user_mode();
-#endif
prepare_exit_to_usermode(regs);
return 0; /* Keep it simple: use IRET. */
}
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index bb3e376d0f33..10868aa734dc 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -40,7 +40,7 @@
#include <asm/processor-flags.h>
#include <asm/ftrace.h>
#include <asm/irq_vectors.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
#include <asm/asm.h>
#include <asm/smap.h>
@@ -287,14 +287,64 @@ need_resched:
END(resume_kernel)
#endif
- # SYSENTER call handler stub
+GLOBAL(__begin_SYSENTER_singlestep_region)
+/*
+ * All code from here through __end_SYSENTER_singlestep_region is subject
+ * to being single-stepped if a user program sets TF and executes SYSENTER.
+ * There is absolutely nothing that we can do to prevent this from happening
+ * (thanks Intel!). To keep our handling of this situation as simple as
+ * possible, we handle TF just like AC and NT, except that our #DB handler
+ * will ignore all of the single-step traps generated in this range.
+ */
+
+#ifdef CONFIG_XEN
+/*
+ * Xen doesn't set %esp to be precisely what the normal SYSENTER
+ * entry point expects, so fix it up before using the normal path.
+ */
+ENTRY(xen_sysenter_target)
+ addl $5*4, %esp /* remove xen-provided frame */
+ jmp sysenter_past_esp
+#endif
+
+/*
+ * 32-bit SYSENTER entry.
+ *
+ * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
+ * if X86_FEATURE_SEP is available. This is the preferred system call
+ * entry on 32-bit systems.
+ *
+ * The SYSENTER instruction, in principle, should *only* occur in the
+ * vDSO. In practice, a small number of Android devices were shipped
+ * with a copy of Bionic that inlined a SYSENTER instruction. This
+ * never happened in any of Google's Bionic versions -- it only happened
+ * in a narrow range of Intel-provided versions.
+ *
+ * SYSENTER loads SS, ESP, CS, and EIP from previously programmed MSRs.
+ * IF and VM in RFLAGS are cleared (IOW: interrupts are off).
+ * SYSENTER does not save anything on the stack,
+ * and does not save old EIP (!!!), ESP, or EFLAGS.
+ *
+ * To avoid losing track of EFLAGS.VM (and thus potentially corrupting
+ * user and/or vm86 state), we explicitly disable the SYSENTER
+ * instruction in vm86 mode by reprogramming the MSRs.
+ *
+ * Arguments:
+ * eax system call number
+ * ebx arg1
+ * ecx arg2
+ * edx arg3
+ * esi arg4
+ * edi arg5
+ * ebp user stack
+ * 0(%ebp) arg6
+ */
ENTRY(entry_SYSENTER_32)
movl TSS_sysenter_sp0(%esp), %esp
sysenter_past_esp:
pushl $__USER_DS /* pt_regs->ss */
pushl %ebp /* pt_regs->sp (stashed in bp) */
pushfl /* pt_regs->flags (except IF = 0) */
- ASM_CLAC /* Clear AC after saving FLAGS */
orl $X86_EFLAGS_IF, (%esp) /* Fix IF */
pushl $__USER_CS /* pt_regs->cs */
pushl $0 /* pt_regs->ip = 0 (placeholder) */
@@ -302,6 +352,29 @@ sysenter_past_esp:
SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
/*
+ * SYSENTER doesn't filter flags, so we need to clear NT, AC
+ * and TF ourselves. To save a few cycles, we can check whether
+ * either was set instead of doing an unconditional popfq.
+ * This needs to happen before enabling interrupts so that
+ * we don't get preempted with NT set.
+ *
+ * If TF is set, we will single-step all the way to here -- do_debug
+ * will ignore all the traps. (Yes, this is slow, but so is
+ * single-stepping in general. This allows us to avoid having
+ * a more complicated code to handle the case where a user program
+ * forces us to single-step through the SYSENTER entry code.)
+ *
+ * NB.: .Lsysenter_fix_flags is a label with the code under it moved
+ * out-of-line as an optimization: NT is unlikely to be set in the
+ * majority of the cases and instead of polluting the I$ unnecessarily,
+ * we're keeping that code behind a branch which will predict as
+ * not-taken and therefore its instructions won't be fetched.
+ */
+ testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, PT_EFLAGS(%esp)
+ jnz .Lsysenter_fix_flags
+.Lsysenter_flags_fixed:
+
+ /*
* User mode is traced as though IRQs are on, and SYSENTER
* turned them off.
*/
@@ -327,6 +400,15 @@ sysenter_past_esp:
popl %eax /* pt_regs->ax */
/*
+ * Restore all flags except IF. (We restore IF separately because
+ * STI gives a one-instruction window in which we won't be interrupted,
+ * whereas POPF does not.)
+ */
+ addl $PT_EFLAGS-PT_DS, %esp /* point esp at pt_regs->flags */
+ btr $X86_EFLAGS_IF_BIT, (%esp)
+ popfl
+
+ /*
* Return back to the vDSO, which will pop ecx and edx.
* Don't bother with DS and ES (they already contain __USER_DS).
*/
@@ -339,28 +421,63 @@ sysenter_past_esp:
.popsection
_ASM_EXTABLE(1b, 2b)
PTGS_TO_GS_EX
+
+.Lsysenter_fix_flags:
+ pushl $X86_EFLAGS_FIXED
+ popfl
+ jmp .Lsysenter_flags_fixed
+GLOBAL(__end_SYSENTER_singlestep_region)
ENDPROC(entry_SYSENTER_32)
- # system call handler stub
+/*
+ * 32-bit legacy system call entry.
+ *
+ * 32-bit x86 Linux system calls traditionally used the INT $0x80
+ * instruction. INT $0x80 lands here.
+ *
+ * This entry point can be used by any 32-bit perform system calls.
+ * Instances of INT $0x80 can be found inline in various programs and
+ * libraries. It is also used by the vDSO's __kernel_vsyscall
+ * fallback for hardware that doesn't support a faster entry method.
+ * Restarted 32-bit system calls also fall back to INT $0x80
+ * regardless of what instruction was originally used to do the system
+ * call. (64-bit programs can use INT $0x80 as well, but they can
+ * only run on 64-bit kernels and therefore land in
+ * entry_INT80_compat.)
+ *
+ * This is considered a slow path. It is not used by most libc
+ * implementations on modern hardware except during process startup.
+ *
+ * Arguments:
+ * eax system call number
+ * ebx arg1
+ * ecx arg2
+ * edx arg3
+ * esi arg4
+ * edi arg5
+ * ebp arg6
+ */
ENTRY(entry_INT80_32)
ASM_CLAC
pushl %eax /* pt_regs->orig_ax */
SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
/*
- * User mode is traced as though IRQs are on. Unlike the 64-bit
- * case, INT80 is a trap gate on 32-bit kernels, so interrupts
- * are already on (unless user code is messing around with iopl).
+ * User mode is traced as though IRQs are on, and the interrupt gate
+ * turned them off.
*/
+ TRACE_IRQS_OFF
movl %esp, %eax
- call do_syscall_32_irqs_on
+ call do_int80_syscall_32
.Lsyscall_32_done:
restore_all:
TRACE_IRQS_IRET
restore_all_notrace:
#ifdef CONFIG_X86_ESPFIX32
+ ALTERNATIVE "jmp restore_nocheck", "", X86_BUG_ESPFIX
+
movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
/*
* Warning: PT_OLDSS(%esp) contains the wrong/random values if we
@@ -387,19 +504,6 @@ ENTRY(iret_exc )
#ifdef CONFIG_X86_ESPFIX32
ldt_ss:
-#ifdef CONFIG_PARAVIRT
- /*
- * The kernel can't run on a non-flat stack if paravirt mode
- * is active. Rather than try to fixup the high bits of
- * ESP, bypass this code entirely. This may break DOSemu
- * and/or Wine support in a paravirt VM, although the option
- * is still available to implement the setting of the high
- * 16-bits in the INTERRUPT_RETURN paravirt-op.
- */
- cmpl $0, pv_info+PARAVIRT_enabled
- jne restore_nocheck
-#endif
-
/*
* Setup and switch to ESPFIX stack
*
@@ -632,14 +736,6 @@ ENTRY(spurious_interrupt_bug)
END(spurious_interrupt_bug)
#ifdef CONFIG_XEN
-/*
- * Xen doesn't set %esp to be precisely what the normal SYSENTER
- * entry point expects, so fix it up before using the normal path.
- */
-ENTRY(xen_sysenter_target)
- addl $5*4, %esp /* remove xen-provided frame */
- jmp sysenter_past_esp
-
ENTRY(xen_hypervisor_callback)
pushl $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
@@ -939,51 +1035,48 @@ error_code:
jmp ret_from_exception
END(page_fault)
-/*
- * Debug traps and NMI can happen at the one SYSENTER instruction
- * that sets up the real kernel stack. Check here, since we can't
- * allow the wrong stack to be used.
- *
- * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
- * already pushed 3 words if it hits on the sysenter instruction:
- * eflags, cs and eip.
- *
- * We just load the right stack, and push the three (known) values
- * by hand onto the new stack - while updating the return eip past
- * the instruction that would have done it for sysenter.
- */
-.macro FIX_STACK offset ok label
- cmpw $__KERNEL_CS, 4(%esp)
- jne \ok
-\label:
- movl TSS_sysenter_sp0 + \offset(%esp), %esp
- pushfl
- pushl $__KERNEL_CS
- pushl $sysenter_past_esp
-.endm
-
ENTRY(debug)
+ /*
+ * #DB can happen at the first instruction of
+ * entry_SYSENTER_32 or in Xen's SYSENTER prologue. If this
+ * happens, then we will be running on a very small stack. We
+ * need to detect this condition and switch to the thread
+ * stack before calling any C code at all.
+ *
+ * If you edit this code, keep in mind that NMIs can happen in here.
+ */
ASM_CLAC
- cmpl $entry_SYSENTER_32, (%esp)
- jne debug_stack_correct
- FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
-debug_stack_correct:
pushl $-1 # mark this as an int
SAVE_ALL
- TRACE_IRQS_OFF
xorl %edx, %edx # error code 0
movl %esp, %eax # pt_regs pointer
+
+ /* Are we currently on the SYSENTER stack? */
+ PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
+ subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
+ cmpl $SIZEOF_SYSENTER_stack, %ecx
+ jb .Ldebug_from_sysenter_stack
+
+ TRACE_IRQS_OFF
+ call do_debug
+ jmp ret_from_exception
+
+.Ldebug_from_sysenter_stack:
+ /* We're on the SYSENTER stack. Switch off. */
+ movl %esp, %ebp
+ movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
+ TRACE_IRQS_OFF
call do_debug
+ movl %ebp, %esp
jmp ret_from_exception
END(debug)
/*
- * NMI is doubly nasty. It can happen _while_ we're handling
- * a debug fault, and the debug fault hasn't yet been able to
- * clear up the stack. So we first check whether we got an
- * NMI on the sysenter entry path, but after that we need to
- * check whether we got an NMI on the debug path where the debug
- * fault happened on the sysenter path.
+ * NMI is doubly nasty. It can happen on the first instruction of
+ * entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning
+ * of the #DB handler even if that #DB in turn hit before entry_SYSENTER_32
+ * switched stacks. We handle both conditions by simply checking whether we
+ * interrupted kernel code running on the SYSENTER stack.
*/
ENTRY(nmi)
ASM_CLAC
@@ -994,41 +1087,32 @@ ENTRY(nmi)
popl %eax
je nmi_espfix_stack
#endif
- cmpl $entry_SYSENTER_32, (%esp)
- je nmi_stack_fixup
- pushl %eax
- movl %esp, %eax
- /*
- * Do not access memory above the end of our stack page,
- * it might not exist.
- */
- andl $(THREAD_SIZE-1), %eax
- cmpl $(THREAD_SIZE-20), %eax
- popl %eax
- jae nmi_stack_correct
- cmpl $entry_SYSENTER_32, 12(%esp)
- je nmi_debug_stack_check
-nmi_stack_correct:
- pushl %eax
+
+ pushl %eax # pt_regs->orig_ax
SAVE_ALL
xorl %edx, %edx # zero error code
movl %esp, %eax # pt_regs pointer
+
+ /* Are we currently on the SYSENTER stack? */
+ PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
+ subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
+ cmpl $SIZEOF_SYSENTER_stack, %ecx
+ jb .Lnmi_from_sysenter_stack
+
+ /* Not on SYSENTER stack. */
call do_nmi
jmp restore_all_notrace
-nmi_stack_fixup:
- FIX_STACK 12, nmi_stack_correct, 1
- jmp nmi_stack_correct
-
-nmi_debug_stack_check:
- cmpw $__KERNEL_CS, 16(%esp)
- jne nmi_stack_correct
- cmpl $debug, (%esp)
- jb nmi_stack_correct
- cmpl $debug_esp_fix_insn, (%esp)
- ja nmi_stack_correct
- FIX_STACK 24, nmi_stack_correct, 1
- jmp nmi_stack_correct
+.Lnmi_from_sysenter_stack:
+ /*
+ * We're on the SYSENTER stack. Switch off. No one (not even debug)
+ * is using the thread stack right now, so it's safe for us to use it.
+ */
+ movl %esp, %ebp
+ movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
+ call do_nmi
+ movl %ebp, %esp
+ jmp restore_all_notrace
#ifdef CONFIG_X86_ESPFIX32
nmi_espfix_stack:
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 9d34d3cfceb6..858b555e274b 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -103,6 +103,16 @@ ENDPROC(native_usergs_sysret64)
/*
* 64-bit SYSCALL instruction entry. Up to 6 arguments in registers.
*
+ * This is the only entry point used for 64-bit system calls. The
+ * hardware interface is reasonably well designed and the register to
+ * argument mapping Linux uses fits well with the registers that are
+ * available when SYSCALL is used.
+ *
+ * SYSCALL instructions can be found inlined in libc implementations as
+ * well as some other programs and libraries. There are also a handful
+ * of SYSCALL instructions in the vDSO used, for example, as a
+ * clock_gettimeofday fallback.
+ *
* 64-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
* then loads new ss, cs, and rip from previously programmed MSRs.
* rflags gets masked by a value from another MSR (so CLD and CLAC
@@ -145,17 +155,11 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
movq %rsp, PER_CPU_VAR(rsp_scratch)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ TRACE_IRQS_OFF
+
/* Construct struct pt_regs on stack */
pushq $__USER_DS /* pt_regs->ss */
pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */
- /*
- * Re-enable interrupts.
- * We use 'rsp_scratch' as a scratch space, hence irq-off block above
- * must execute atomically in the face of possible interrupt-driven
- * task preemption. We must enable interrupts only after we're done
- * with using rsp_scratch:
- */
- ENABLE_INTERRUPTS(CLBR_NONE)
pushq %r11 /* pt_regs->flags */
pushq $__USER_CS /* pt_regs->cs */
pushq %rcx /* pt_regs->ip */
@@ -171,9 +175,21 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
pushq %r11 /* pt_regs->r11 */
sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
- testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
- jnz tracesys
+ /*
+ * If we need to do entry work or if we guess we'll need to do
+ * exit work, go straight to the slow path.
+ */
+ testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+ jnz entry_SYSCALL64_slow_path
+
entry_SYSCALL_64_fastpath:
+ /*
+ * Easy case: enable interrupts and issue the syscall. If the syscall
+ * needs pt_regs, we'll call a stub that disables interrupts again
+ * and jumps to the slow path.
+ */
+ TRACE_IRQS_ON
+ ENABLE_INTERRUPTS(CLBR_NONE)
#if __SYSCALL_MASK == ~0
cmpq $__NR_syscall_max, %rax
#else
@@ -182,103 +198,56 @@ entry_SYSCALL_64_fastpath:
#endif
ja 1f /* return -ENOSYS (already in pt_regs->ax) */
movq %r10, %rcx
+
+ /*
+ * This call instruction is handled specially in stub_ptregs_64.
+ * It might end up jumping to the slow path. If it jumps, RAX
+ * and all argument registers are clobbered.
+ */
call *sys_call_table(, %rax, 8)
+.Lentry_SYSCALL_64_after_fastpath_call:
+
movq %rax, RAX(%rsp)
1:
-/*
- * Syscall return path ending with SYSRET (fast path).
- * Has incompletely filled pt_regs.
- */
- LOCKDEP_SYS_EXIT
- /*
- * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
- * it is too small to ever cause noticeable irq latency.
- */
- DISABLE_INTERRUPTS(CLBR_NONE)
/*
- * We must check ti flags with interrupts (or at least preemption)
- * off because we must *never* return to userspace without
- * processing exit work that is enqueued if we're preempted here.
- * In particular, returning to userspace with any of the one-shot
- * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is
- * very bad.
+ * If we get here, then we know that pt_regs is clean for SYSRET64.
+ * If we see that no exit work is required (which we are required
+ * to check with IRQs off), then we can go straight to SYSRET64.
*/
+ DISABLE_INTERRUPTS(CLBR_NONE)
+ TRACE_IRQS_OFF
testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
- jnz int_ret_from_sys_call_irqs_off /* Go to the slow path */
+ jnz 1f
- RESTORE_C_REGS_EXCEPT_RCX_R11
+ LOCKDEP_SYS_EXIT
+ TRACE_IRQS_ON /* user mode is traced as IRQs on */
movq RIP(%rsp), %rcx
movq EFLAGS(%rsp), %r11
+ RESTORE_C_REGS_EXCEPT_RCX_R11
movq RSP(%rsp), %rsp
- /*
- * 64-bit SYSRET restores rip from rcx,
- * rflags from r11 (but RF and VM bits are forced to 0),
- * cs and ss are loaded from MSRs.
- * Restoration of rflags re-enables interrupts.
- *
- * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
- * descriptor is not reinitialized. This means that we should
- * avoid SYSRET with SS == NULL, which could happen if we schedule,
- * exit the kernel, and re-enter using an interrupt vector. (All
- * interrupt entries on x86_64 set SS to NULL.) We prevent that
- * from happening by reloading SS in __switch_to. (Actually
- * detecting the failure in 64-bit userspace is tricky but can be
- * done.)
- */
USERGS_SYSRET64
-GLOBAL(int_ret_from_sys_call_irqs_off)
+1:
+ /*
+ * The fast path looked good when we started, but something changed
+ * along the way and we need to switch to the slow path. Calling
+ * raise(3) will trigger this, for example. IRQs are off.
+ */
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
- jmp int_ret_from_sys_call
-
- /* Do syscall entry tracing */
-tracesys:
- movq %rsp, %rdi
- movl $AUDIT_ARCH_X86_64, %esi
- call syscall_trace_enter_phase1
- test %rax, %rax
- jnz tracesys_phase2 /* if needed, run the slow path */
- RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */
- movq ORIG_RAX(%rsp), %rax
- jmp entry_SYSCALL_64_fastpath /* and return to the fast path */
-
-tracesys_phase2:
SAVE_EXTRA_REGS
movq %rsp, %rdi
- movl $AUDIT_ARCH_X86_64, %esi
- movq %rax, %rdx
- call syscall_trace_enter_phase2
-
- /*
- * Reload registers from stack in case ptrace changed them.
- * We don't reload %rax because syscall_trace_entry_phase2() returned
- * the value it wants us to use in the table lookup.
- */
- RESTORE_C_REGS_EXCEPT_RAX
- RESTORE_EXTRA_REGS
-#if __SYSCALL_MASK == ~0
- cmpq $__NR_syscall_max, %rax
-#else
- andl $__SYSCALL_MASK, %eax
- cmpl $__NR_syscall_max, %eax
-#endif
- ja 1f /* return -ENOSYS (already in pt_regs->ax) */
- movq %r10, %rcx /* fixup for C */
- call *sys_call_table(, %rax, 8)
- movq %rax, RAX(%rsp)
-1:
- /* Use IRET because user could have changed pt_regs->foo */
+ call syscall_return_slowpath /* returns with IRQs disabled */
+ jmp return_from_SYSCALL_64
-/*
- * Syscall return path ending with IRET.
- * Has correct iret frame.
- */
-GLOBAL(int_ret_from_sys_call)
+entry_SYSCALL64_slow_path:
+ /* IRQs are off. */
SAVE_EXTRA_REGS
movq %rsp, %rdi
- call syscall_return_slowpath /* returns with IRQs disabled */
+ call do_syscall_64 /* returns with IRQs disabled */
+
+return_from_SYSCALL_64:
RESTORE_EXTRA_REGS
TRACE_IRQS_IRETQ /* we're about to change IF */
@@ -355,83 +324,45 @@ opportunistic_sysret_failed:
jmp restore_c_regs_and_iret
END(entry_SYSCALL_64)
+ENTRY(stub_ptregs_64)
+ /*
+ * Syscalls marked as needing ptregs land here.
+ * If we are on the fast path, we need to save the extra regs,
+ * which we achieve by trying again on the slow path. If we are on
+ * the slow path, the extra regs are already saved.
+ *
+ * RAX stores a pointer to the C function implementing the syscall.
+ * IRQs are on.
+ */
+ cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
+ jne 1f
- .macro FORK_LIKE func
-ENTRY(stub_\func)
- SAVE_EXTRA_REGS 8
- jmp sys_\func
-END(stub_\func)
- .endm
-
- FORK_LIKE clone
- FORK_LIKE fork
- FORK_LIKE vfork
-
-ENTRY(stub_execve)
- call sys_execve
-return_from_execve:
- testl %eax, %eax
- jz 1f
- /* exec failed, can use fast SYSRET code path in this case */
- ret
-1:
- /* must use IRET code path (pt_regs->cs may have changed) */
- addq $8, %rsp
- ZERO_EXTRA_REGS
- movq %rax, RAX(%rsp)
- jmp int_ret_from_sys_call
-END(stub_execve)
-/*
- * Remaining execve stubs are only 7 bytes long.
- * ENTRY() often aligns to 16 bytes, which in this case has no benefits.
- */
- .align 8
-GLOBAL(stub_execveat)
- call sys_execveat
- jmp return_from_execve
-END(stub_execveat)
-
-#if defined(CONFIG_X86_X32_ABI)
- .align 8
-GLOBAL(stub_x32_execve)
- call compat_sys_execve
- jmp return_from_execve
-END(stub_x32_execve)
- .align 8
-GLOBAL(stub_x32_execveat)
- call compat_sys_execveat
- jmp return_from_execve
-END(stub_x32_execveat)
-#endif
-
-/*
- * sigreturn is special because it needs to restore all registers on return.
- * This cannot be done with SYSRET, so use the IRET return path instead.
- */
-ENTRY(stub_rt_sigreturn)
/*
- * SAVE_EXTRA_REGS result is not normally needed:
- * sigreturn overwrites all pt_regs->GPREGS.
- * But sigreturn can fail (!), and there is no easy way to detect that.
- * To make sure RESTORE_EXTRA_REGS doesn't restore garbage on error,
- * we SAVE_EXTRA_REGS here.
+ * Called from fast path -- disable IRQs again, pop return address
+ * and jump to slow path
*/
- SAVE_EXTRA_REGS 8
- call sys_rt_sigreturn
-return_from_stub:
- addq $8, %rsp
- RESTORE_EXTRA_REGS
- movq %rax, RAX(%rsp)
- jmp int_ret_from_sys_call
-END(stub_rt_sigreturn)
+ DISABLE_INTERRUPTS(CLBR_NONE)
+ TRACE_IRQS_OFF
+ popq %rax
+ jmp entry_SYSCALL64_slow_path
-#ifdef CONFIG_X86_X32_ABI
-ENTRY(stub_x32_rt_sigreturn)
- SAVE_EXTRA_REGS 8
- call sys32_x32_rt_sigreturn
- jmp return_from_stub
-END(stub_x32_rt_sigreturn)
-#endif
+1:
+ /* Called from C */
+ jmp *%rax /* called from C */
+END(stub_ptregs_64)
+
+.macro ptregs_stub func
+ENTRY(ptregs_\func)
+ leaq \func(%rip), %rax
+ jmp stub_ptregs_64
+END(ptregs_\func)
+.endm
+
+/* Instantiate ptregs_stub for each ptregs-using syscall */
+#define __SYSCALL_64_QUAL_(sym)
+#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
+#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
+#include <asm/syscalls_64.h>
/*
* A newly forked process directly context switches into this address.
@@ -439,7 +370,6 @@ END(stub_x32_rt_sigreturn)
* rdi: prev task we switched from
*/
ENTRY(ret_from_fork)
-
LOCK ; btr $TIF_FORK, TI_flags(%r8)
pushq $0x0002
@@ -447,28 +377,32 @@ ENTRY(ret_from_fork)
call schedule_tail /* rdi: 'prev' task parameter */
- RESTORE_EXTRA_REGS
-
testb $3, CS(%rsp) /* from kernel_thread? */
+ jnz 1f
/*
- * By the time we get here, we have no idea whether our pt_regs,
- * ti flags, and ti status came from the 64-bit SYSCALL fast path,
- * the slow path, or one of the 32-bit compat paths.
- * Use IRET code path to return, since it can safely handle
- * all of the above.
+ * We came from kernel_thread. This code path is quite twisted, and
+ * someone should clean it up.
+ *
+ * copy_thread_tls stashes the function pointer in RBX and the
+ * parameter to be passed in RBP. The called function is permitted
+ * to call do_execve and thereby jump to user mode.
*/
- jnz int_ret_from_sys_call
+ movq RBP(%rsp), %rdi
+ call *RBX(%rsp)
+ movl $0, RAX(%rsp)
/*
- * We came from kernel_thread
- * nb: we depend on RESTORE_EXTRA_REGS above
+ * Fall through as though we're exiting a syscall. This makes a
+ * twisted sort of sense if we just called do_execve.
*/
- movq %rbp, %rdi
- call *%rbx
- movl $0, RAX(%rsp)
- RESTORE_EXTRA_REGS
- jmp int_ret_from_sys_call
+
+1:
+ movq %rsp, %rdi
+ call syscall_return_slowpath /* returns with IRQs disabled */
+ TRACE_IRQS_ON /* user mode is traced as IRQS on */
+ SWAPGS
+ jmp restore_regs_and_iret
END(ret_from_fork)
/*
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 3c990eeee40b..847f2f0c31e5 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -19,12 +19,21 @@
.section .entry.text, "ax"
/*
- * 32-bit SYSENTER instruction entry.
+ * 32-bit SYSENTER entry.
*
- * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
- * IF and VM in rflags are cleared (IOW: interrupts are off).
+ * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
+ * on 64-bit kernels running on Intel CPUs.
+ *
+ * The SYSENTER instruction, in principle, should *only* occur in the
+ * vDSO. In practice, a small number of Android devices were shipped
+ * with a copy of Bionic that inlined a SYSENTER instruction. This
+ * never happened in any of Google's Bionic versions -- it only happened
+ * in a narrow range of Intel-provided versions.
+ *
+ * SYSENTER loads SS, RSP, CS, and RIP from previously programmed MSRs.
+ * IF and VM in RFLAGS are cleared (IOW: interrupts are off).
* SYSENTER does not save anything on the stack,
- * and does not save old rip (!!!) and rflags.
+ * and does not save old RIP (!!!), RSP, or RFLAGS.
*
* Arguments:
* eax system call number
@@ -35,10 +44,6 @@
* edi arg5
* ebp user stack
* 0(%ebp) arg6
- *
- * This is purely a fast path. For anything complicated we use the int 0x80
- * path below. We set up a complete hardware stack frame to share code
- * with the int 0x80 path.
*/
ENTRY(entry_SYSENTER_compat)
/* Interrupts are off on entry. */
@@ -66,8 +71,6 @@ ENTRY(entry_SYSENTER_compat)
*/
pushfq /* pt_regs->flags (except IF = 0) */
orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */
- ASM_CLAC /* Clear AC after saving FLAGS */
-
pushq $__USER32_CS /* pt_regs->cs */
xorq %r8,%r8
pushq %r8 /* pt_regs->ip = 0 (placeholder) */
@@ -90,19 +93,25 @@ ENTRY(entry_SYSENTER_compat)
cld
/*
- * Sysenter doesn't filter flags, so we need to clear NT
+ * SYSENTER doesn't filter flags, so we need to clear NT and AC
* ourselves. To save a few cycles, we can check whether
- * NT was set instead of doing an unconditional popfq.
+ * either was set instead of doing an unconditional popfq.
* This needs to happen before enabling interrupts so that
* we don't get preempted with NT set.
*
+ * If TF is set, we will single-step all the way to here -- do_debug
+ * will ignore all the traps. (Yes, this is slow, but so is
+ * single-stepping in general. This allows us to avoid having
+ * a more complicated code to handle the case where a user program
+ * forces us to single-step through the SYSENTER entry code.)
+ *
* NB.: .Lsysenter_fix_flags is a label with the code under it moved
* out-of-line as an optimization: NT is unlikely to be set in the
* majority of the cases and instead of polluting the I$ unnecessarily,
* we're keeping that code behind a branch which will predict as
* not-taken and therefore its instructions won't be fetched.
*/
- testl $X86_EFLAGS_NT, EFLAGS(%rsp)
+ testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, EFLAGS(%rsp)
jnz .Lsysenter_fix_flags
.Lsysenter_flags_fixed:
@@ -123,20 +132,42 @@ ENTRY(entry_SYSENTER_compat)
pushq $X86_EFLAGS_FIXED
popfq
jmp .Lsysenter_flags_fixed
+GLOBAL(__end_entry_SYSENTER_compat)
ENDPROC(entry_SYSENTER_compat)
/*
- * 32-bit SYSCALL instruction entry.
+ * 32-bit SYSCALL entry.
+ *
+ * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
+ * on 64-bit kernels running on AMD CPUs.
+ *
+ * The SYSCALL instruction, in principle, should *only* occur in the
+ * vDSO. In practice, it appears that this really is the case.
+ * As evidence:
+ *
+ * - The calling convention for SYSCALL has changed several times without
+ * anyone noticing.
*
- * 32-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
- * then loads new ss, cs, and rip from previously programmed MSRs.
- * rflags gets masked by a value from another MSR (so CLD and CLAC
- * are not needed). SYSCALL does not save anything on the stack
- * and does not change rsp.
+ * - Prior to the in-kernel X86_BUG_SYSRET_SS_ATTRS fixup, anything
+ * user task that did SYSCALL without immediately reloading SS
+ * would randomly crash.
*
- * Note: rflags saving+masking-with-MSR happens only in Long mode
+ * - Most programmers do not directly target AMD CPUs, and the 32-bit
+ * SYSCALL instruction does not exist on Intel CPUs. Even on AMD
+ * CPUs, Linux disables the SYSCALL instruction on 32-bit kernels
+ * because the SYSCALL instruction in legacy/native 32-bit mode (as
+ * opposed to compat mode) is sufficiently poorly designed as to be
+ * essentially unusable.
+ *
+ * 32-bit SYSCALL saves RIP to RCX, clears RFLAGS.RF, then saves
+ * RFLAGS to R11, then loads new SS, CS, and RIP from previously
+ * programmed MSRs. RFLAGS gets masked by a value from another MSR
+ * (so CLD and CLAC are not needed). SYSCALL does not save anything on
+ * the stack and does not change RSP.
+ *
+ * Note: RFLAGS saving+masking-with-MSR happens only in Long mode
* (in legacy 32-bit mode, IF, RF and VM bits are cleared and that's it).
- * Don't get confused: rflags saving+masking depends on Long Mode Active bit
+ * Don't get confused: RFLAGS saving+masking depends on Long Mode Active bit
* (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
* or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
*
@@ -236,7 +267,21 @@ sysret32_from_system_call:
END(entry_SYSCALL_compat)
/*
- * Emulated IA32 system calls via int 0x80.
+ * 32-bit legacy system call entry.
+ *
+ * 32-bit x86 Linux system calls traditionally used the INT $0x80
+ * instruction. INT $0x80 lands here.
+ *
+ * This entry point can be used by 32-bit and 64-bit programs to perform
+ * 32-bit system calls. Instances of INT $0x80 can be found inline in
+ * various programs and libraries. It is also used by the vDSO's
+ * __kernel_vsyscall fallback for hardware that doesn't support a faster
+ * entry method. Restarted 32-bit system calls also fall back to INT
+ * $0x80 regardless of what instruction was originally used to do the
+ * system call.
+ *
+ * This is considered a slow path. It is not used by most libc
+ * implementations on modern hardware except during process startup.
*
* Arguments:
* eax system call number
@@ -245,17 +290,8 @@ END(entry_SYSCALL_compat)
* edx arg3
* esi arg4
* edi arg5
- * ebp arg6 (note: not saved in the stack frame, should not be touched)
- *
- * Notes:
- * Uses the same stack frame as the x86-64 version.
- * All registers except eax must be saved (but ptrace may violate that).
- * Arguments are zero extended. For system calls that want sign extension and
- * take long arguments a wrapper is needed. Most calls can just be called
- * directly.
- * Assumes it is only called from user space and entered with interrupts off.
+ * ebp arg6
*/
-
ENTRY(entry_INT80_compat)
/*
* Interrupts are off on entry.
@@ -300,7 +336,7 @@ ENTRY(entry_INT80_compat)
TRACE_IRQS_OFF
movq %rsp, %rdi
- call do_syscall_32_irqs_off
+ call do_int80_syscall_32
.Lsyscall_32_done:
/* Go back to user mode. */
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index 9a6649857106..8f895ee13a1c 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -6,17 +6,11 @@
#include <asm/asm-offsets.h>
#include <asm/syscall.h>
-#ifdef CONFIG_IA32_EMULATION
-#define SYM(sym, compat) compat
-#else
-#define SYM(sym, compat) sym
-#endif
-
-#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long SYM(sym, compat)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
#include <asm/syscalls_32.h>
#undef __SYSCALL_I386
-#define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat),
+#define __SYSCALL_I386(nr, sym, qual) [nr] = sym,
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index 41283d22be7a..9dbc5abb6162 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -6,19 +6,14 @@
#include <asm/asm-offsets.h>
#include <asm/syscall.h>
-#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
+#define __SYSCALL_64_QUAL_(sym) sym
+#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym
-#ifdef CONFIG_X86_X32_ABI
-# define __SYSCALL_X32(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
-#else
-# define __SYSCALL_X32(nr, sym, compat) /* nothing */
-#endif
-
-#define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
#include <asm/syscalls_64.h>
#undef __SYSCALL_64
-#define __SYSCALL_64(nr, sym, compat) [nr] = sym,
+#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym),
extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index dc1040a50bdc..2e5b565adacc 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -21,7 +21,7 @@
12 common brk sys_brk
13 64 rt_sigaction sys_rt_sigaction
14 common rt_sigprocmask sys_rt_sigprocmask
-15 64 rt_sigreturn stub_rt_sigreturn
+15 64 rt_sigreturn sys_rt_sigreturn/ptregs
16 64 ioctl sys_ioctl
17 common pread64 sys_pread64
18 common pwrite64 sys_pwrite64
@@ -62,10 +62,10 @@
53 common socketpair sys_socketpair
54 64 setsockopt sys_setsockopt
55 64 getsockopt sys_getsockopt
-56 common clone stub_clone
-57 common fork stub_fork
-58 common vfork stub_vfork
-59 64 execve stub_execve
+56 common clone sys_clone/ptregs
+57 common fork sys_fork/ptregs
+58 common vfork sys_vfork/ptregs
+59 64 execve sys_execve/ptregs
60 common exit sys_exit
61 common wait4 sys_wait4
62 common kill sys_kill
@@ -178,7 +178,7 @@
169 common reboot sys_reboot
170 common sethostname sys_sethostname
171 common setdomainname sys_setdomainname
-172 common iopl sys_iopl
+172 common iopl sys_iopl/ptregs
173 common ioperm sys_ioperm
174 64 create_module
175 common init_module sys_init_module
@@ -328,7 +328,7 @@
319 common memfd_create sys_memfd_create
320 common kexec_file_load sys_kexec_file_load
321 common bpf sys_bpf
-322 64 execveat stub_execveat
+322 64 execveat sys_execveat/ptregs
323 common userfaultfd sys_userfaultfd
324 common membarrier sys_membarrier
325 common mlock2 sys_mlock2
@@ -339,14 +339,14 @@
# for native 64-bit operation.
#
512 x32 rt_sigaction compat_sys_rt_sigaction
-513 x32 rt_sigreturn stub_x32_rt_sigreturn
+513 x32 rt_sigreturn sys32_x32_rt_sigreturn
514 x32 ioctl compat_sys_ioctl
515 x32 readv compat_sys_readv
516 x32 writev compat_sys_writev
517 x32 recvfrom compat_sys_recvfrom
518 x32 sendmsg compat_sys_sendmsg
519 x32 recvmsg compat_sys_recvmsg
-520 x32 execve stub_x32_execve
+520 x32 execve compat_sys_execve/ptregs
521 x32 ptrace compat_sys_ptrace
522 x32 rt_sigpending compat_sys_rt_sigpending
523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait
@@ -371,4 +371,4 @@
542 x32 getsockopt compat_sys_getsockopt
543 x32 io_setup compat_sys_io_setup
544 x32 io_submit compat_sys_io_submit
-545 x32 execveat stub_x32_execveat
+545 x32 execveat compat_sys_execveat/ptregs
diff --git a/arch/x86/entry/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh
index 0e7f8ec071e7..cd3d3015d7df 100644
--- a/arch/x86/entry/syscalls/syscalltbl.sh
+++ b/arch/x86/entry/syscalls/syscalltbl.sh
@@ -3,13 +3,63 @@
in="$1"
out="$2"
+syscall_macro() {
+ abi="$1"
+ nr="$2"
+ entry="$3"
+
+ # Entry can be either just a function name or "function/qualifier"
+ real_entry="${entry%%/*}"
+ qualifier="${entry:${#real_entry}}" # Strip the function name
+ qualifier="${qualifier:1}" # Strip the slash, if any
+
+ echo "__SYSCALL_${abi}($nr, $real_entry, $qualifier)"
+}
+
+emit() {
+ abi="$1"
+ nr="$2"
+ entry="$3"
+ compat="$4"
+
+ if [ "$abi" == "64" -a -n "$compat" ]; then
+ echo "a compat entry for a 64-bit syscall makes no sense" >&2
+ exit 1
+ fi
+
+ if [ -z "$compat" ]; then
+ if [ -n "$entry" ]; then
+ syscall_macro "$abi" "$nr" "$entry"
+ fi
+ else
+ echo "#ifdef CONFIG_X86_32"
+ if [ -n "$entry" ]; then
+ syscall_macro "$abi" "$nr" "$entry"
+ fi
+ echo "#else"
+ syscall_macro "$abi" "$nr" "$compat"
+ echo "#endif"
+ fi
+}
+
grep '^[0-9]' "$in" | sort -n | (
while read nr abi name entry compat; do
abi=`echo "$abi" | tr '[a-z]' '[A-Z]'`
- if [ -n "$compat" ]; then
- echo "__SYSCALL_${abi}($nr, $entry, $compat)"
- elif [ -n "$entry" ]; then
- echo "__SYSCALL_${abi}($nr, $entry, $entry)"
+ if [ "$abi" == "COMMON" -o "$abi" == "64" ]; then
+ # COMMON is the same as 64, except that we don't expect X32
+ # programs to use it. Our expectation has nothing to do with
+ # any generated code, so treat them the same.
+ emit 64 "$nr" "$entry" "$compat"
+ elif [ "$abi" == "X32" ]; then
+ # X32 is equivalent to 64 on an X32-compatible kernel.
+ echo "#ifdef CONFIG_X86_X32_ABI"
+ emit 64 "$nr" "$entry" "$compat"
+ echo "#endif"
+ elif [ "$abi" == "I386" ]; then
+ emit "$abi" "$nr" "$entry" "$compat"
+ else
+ echo "Unknown abi $abi" >&2
+ exit 1
fi
done
) > "$out"
diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index 3f69326ed545..63a03bb91497 100644
--- a/arch/x86/entry/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
@@ -150,16 +150,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
}
fprintf(outfile, "\n};\n\n");
- fprintf(outfile, "static struct page *pages[%lu];\n\n",
- mapping_size / 4096);
-
fprintf(outfile, "const struct vdso_image %s = {\n", name);
fprintf(outfile, "\t.data = raw_data,\n");
fprintf(outfile, "\t.size = %lu,\n", mapping_size);
- fprintf(outfile, "\t.text_mapping = {\n");
- fprintf(outfile, "\t\t.name = \"[vdso]\",\n");
- fprintf(outfile, "\t\t.pages = pages,\n");
- fprintf(outfile, "\t},\n");
if (alt_sec) {
fprintf(outfile, "\t.alt = %lu,\n",
(unsigned long)GET_LE(&alt_sec->sh_offset));
diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c
index 08a317a9ae4b..7853b53959cd 100644
--- a/arch/x86/entry/vdso/vdso32-setup.c
+++ b/arch/x86/entry/vdso/vdso32-setup.c
@@ -11,7 +11,6 @@
#include <linux/kernel.h>
#include <linux/mm_types.h>
-#include <asm/cpufeature.h>
#include <asm/processor.h>
#include <asm/vdso.h>
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S
index 3a1d9297074b..0109ac6cb79c 100644
--- a/arch/x86/entry/vdso/vdso32/system_call.S
+++ b/arch/x86/entry/vdso/vdso32/system_call.S
@@ -3,7 +3,7 @@
*/
#include <asm/dwarf2.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
/*
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index b8f69e264ac4..10f704584922 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -20,6 +20,7 @@
#include <asm/page.h>
#include <asm/hpet.h>
#include <asm/desc.h>
+#include <asm/cpufeature.h>
#if defined(CONFIG_X86_64)
unsigned int __read_mostly vdso64_enabled = 1;
@@ -27,13 +28,7 @@ unsigned int __read_mostly vdso64_enabled = 1;
void __init init_vdso_image(const struct vdso_image *image)
{
- int i;
- int npages = (image->size) / PAGE_SIZE;
-
BUG_ON(image->size % PAGE_SIZE != 0);
- for (i = 0; i < npages; i++)
- image->text_mapping.pages[i] =
- virt_to_page(image->data + i*PAGE_SIZE);
apply_alternatives((struct alt_instr *)(image->data + image->alt),
(struct alt_instr *)(image->data + image->alt +
@@ -90,18 +85,87 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
#endif
}
+static int vdso_fault(const struct vm_special_mapping *sm,
+ struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ const struct vdso_image *image = vma->vm_mm->context.vdso_image;
+
+ if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size)
+ return VM_FAULT_SIGBUS;
+
+ vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT));
+ get_page(vmf->page);
+ return 0;
+}
+
+static const struct vm_special_mapping text_mapping = {
+ .name = "[vdso]",
+ .fault = vdso_fault,
+};
+
+static int vvar_fault(const struct vm_special_mapping *sm,
+ struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ const struct vdso_image *image = vma->vm_mm->context.vdso_image;
+ long sym_offset;
+ int ret = -EFAULT;
+
+ if (!image)
+ return VM_FAULT_SIGBUS;
+
+ sym_offset = (long)(vmf->pgoff << PAGE_SHIFT) +
+ image->sym_vvar_start;
+
+ /*
+ * Sanity check: a symbol offset of zero means that the page
+ * does not exist for this vdso image, not that the page is at
+ * offset zero relative to the text mapping. This should be
+ * impossible here, because sym_offset should only be zero for
+ * the page past the end of the vvar mapping.
+ */
+ if (sym_offset == 0)
+ return VM_FAULT_SIGBUS;
+
+ if (sym_offset == image->sym_vvar_page) {
+ ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
+ __pa_symbol(&__vvar_page) >> PAGE_SHIFT);
+ } else if (sym_offset == image->sym_hpet_page) {
+#ifdef CONFIG_HPET_TIMER
+ if (hpet_address && vclock_was_used(VCLOCK_HPET)) {
+ ret = vm_insert_pfn_prot(
+ vma,
+ (unsigned long)vmf->virtual_address,
+ hpet_address >> PAGE_SHIFT,
+ pgprot_noncached(PAGE_READONLY));
+ }
+#endif
+ } else if (sym_offset == image->sym_pvclock_page) {
+ struct pvclock_vsyscall_time_info *pvti =
+ pvclock_pvti_cpu0_va();
+ if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) {
+ ret = vm_insert_pfn(
+ vma,
+ (unsigned long)vmf->virtual_address,
+ __pa(pvti) >> PAGE_SHIFT);
+ }
+ }
+
+ if (ret == 0 || ret == -EBUSY)
+ return VM_FAULT_NOPAGE;
+
+ return VM_FAULT_SIGBUS;
+}
+
static int map_vdso(const struct vdso_image *image, bool calculate_addr)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long addr, text_start;
int ret = 0;
- static struct page *no_pages[] = {NULL};
- static struct vm_special_mapping vvar_mapping = {
+ static const struct vm_special_mapping vvar_mapping = {
.name = "[vvar]",
- .pages = no_pages,
+ .fault = vvar_fault,
};
- struct pvclock_vsyscall_time_info *pvti;
if (calculate_addr) {
addr = vdso_addr(current->mm->start_stack,
@@ -121,6 +185,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
text_start = addr - image->sym_vvar_start;
current->mm->context.vdso = (void __user *)text_start;
+ current->mm->context.vdso_image = image;
/*
* MAYWRITE to allow gdb to COW and set breakpoints
@@ -130,7 +195,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
image->size,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- &image->text_mapping);
+ &text_mapping);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
@@ -140,7 +205,8 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
vma = _install_special_mapping(mm,
addr,
-image->sym_vvar_start,
- VM_READ|VM_MAYREAD,
+ VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
+ VM_PFNMAP,
&vvar_mapping);
if (IS_ERR(vma)) {
@@ -148,41 +214,6 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
goto up_fail;
}
- if (image->sym_vvar_page)
- ret = remap_pfn_range(vma,
- text_start + image->sym_vvar_page,
- __pa_symbol(&__vvar_page) >> PAGE_SHIFT,
- PAGE_SIZE,
- PAGE_READONLY);
-
- if (ret)
- goto up_fail;
-
-#ifdef CONFIG_HPET_TIMER
- if (hpet_address && image->sym_hpet_page) {
- ret = io_remap_pfn_range(vma,
- text_start + image->sym_hpet_page,
- hpet_address >> PAGE_SHIFT,
- PAGE_SIZE,
- pgprot_noncached(PAGE_READONLY));
-
- if (ret)
- goto up_fail;
- }
-#endif
-
- pvti = pvclock_pvti_cpu0_va();
- if (pvti && image->sym_pvclock_page) {
- ret = remap_pfn_range(vma,
- text_start + image->sym_pvclock_page,
- __pa(pvti) >> PAGE_SHIFT,
- PAGE_SIZE,
- PAGE_READONLY);
-
- if (ret)
- goto up_fail;
- }
-
up_fail:
if (ret)
current->mm->context.vdso = NULL;
@@ -254,7 +285,7 @@ static void vgetcpu_cpu_init(void *arg)
#ifdef CONFIG_NUMA
node = cpu_to_node(cpu);
#endif
- if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
+ if (static_cpu_has(X86_FEATURE_RDTSCP))
write_rdtscp_aux((node << 12) | cpu);
/*
diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c
index 51e330416995..0fb3a104ac62 100644
--- a/arch/x86/entry/vsyscall/vsyscall_gtod.c
+++ b/arch/x86/entry/vsyscall/vsyscall_gtod.c
@@ -16,6 +16,8 @@
#include <asm/vgtod.h>
#include <asm/vvar.h>
+int vclocks_used __read_mostly;
+
DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
void update_vsyscall_tz(void)
@@ -26,12 +28,17 @@ void update_vsyscall_tz(void)
void update_vsyscall(struct timekeeper *tk)
{
+ int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
+ /* Mark the new vclock used. */
+ BUILD_BUG_ON(VCLOCK_MAX >= 32);
+ WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode));
+
gtod_write_begin(vdata);
/* copy vsyscall data */
- vdata->vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
+ vdata->vclock_mode = vclock_mode;
vdata->cycle_last = tk->tkr_mono.cycle_last;
vdata->mask = tk->tkr_mono.mask;
vdata->mult = tk->tkr_mono.mult;
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 7bfc85bbb8ff..99afb665a004 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -152,12 +152,6 @@ static inline int alternatives_text_reserved(void *start, void *end)
".popsection"
/*
- * This must be included *after* the definition of ALTERNATIVE due to
- * <asm/arch_hweight.h>
- */
-#include <asm/cpufeature.h>
-
-/*
* Alternative instructions for different CPU types or capabilities.
*
* This allows to use optimized instructions even on generic binary
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index c80f6b6f3da2..0899cfc8dfe8 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -6,7 +6,6 @@
#include <asm/alternative.h>
#include <asm/cpufeature.h>
-#include <asm/processor.h>
#include <asm/apicdef.h>
#include <linux/atomic.h>
#include <asm/fixmap.h>
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index 259a7c1ef709..02e799fa43d1 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -1,6 +1,8 @@
#ifndef _ASM_X86_HWEIGHT_H
#define _ASM_X86_HWEIGHT_H
+#include <asm/cpufeatures.h>
+
#ifdef CONFIG_64BIT
/* popcnt %edi, %eax -- redundant REX prefix for alignment */
#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7"
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index cfe3b954d5e4..7766d1cf096e 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -91,7 +91,7 @@ set_bit(long nr, volatile unsigned long *addr)
* If it's called on the same region of memory simultaneously, the effect
* may be that only one operation succeeds.
*/
-static inline void __set_bit(long nr, volatile unsigned long *addr)
+static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
{
asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
}
@@ -128,13 +128,13 @@ clear_bit(long nr, volatile unsigned long *addr)
* clear_bit() is atomic and implies release semantics before the memory
* operation. It can be used for an unlock.
*/
-static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
+static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
{
barrier();
clear_bit(nr, addr);
}
-static inline void __clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
{
asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
}
@@ -151,7 +151,7 @@ static inline void __clear_bit(long nr, volatile unsigned long *addr)
* No memory barrier is required here, because x86 cannot reorder stores past
* older loads. Same principle as spin_unlock.
*/
-static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
+static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
{
barrier();
__clear_bit(nr, addr);
@@ -166,7 +166,7 @@ static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
* If it's called on the same region of memory simultaneously, the effect
* may be that only one operation succeeds.
*/
-static inline void __change_bit(long nr, volatile unsigned long *addr)
+static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
{
asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
}
@@ -180,7 +180,7 @@ static inline void __change_bit(long nr, volatile unsigned long *addr)
* Note that @nr may be almost arbitrarily large; this function is not
* restricted to acting on a single-word quantity.
*/
-static inline void change_bit(long nr, volatile unsigned long *addr)
+static __always_inline void change_bit(long nr, volatile unsigned long *addr)
{
if (IS_IMMEDIATE(nr)) {
asm volatile(LOCK_PREFIX "xorb %1,%0"
@@ -201,7 +201,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
-static inline int test_and_set_bit(long nr, volatile unsigned long *addr)
+static __always_inline int test_and_set_bit(long nr, volatile unsigned long *addr)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c");
}
@@ -228,7 +228,7 @@ test_and_set_bit_lock(long nr, volatile unsigned long *addr)
* If two examples of this operation race, one can appear to succeed
* but actually fail. You must protect multiple accesses with a lock.
*/
-static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
+static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
{
int oldbit;
@@ -247,7 +247,7 @@ static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
-static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c");
}
@@ -268,7 +268,7 @@ static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
* accessed from a hypervisor on the same CPU if running in a VM: don't change
* this without also updating arch/x86/kernel/kvm.c
*/
-static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
{
int oldbit;
@@ -280,7 +280,7 @@ static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
}
/* WARNING: non atomic and it can be reordered! */
-static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
+static __always_inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
{
int oldbit;
@@ -300,7 +300,7 @@ static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
-static inline int test_and_change_bit(long nr, volatile unsigned long *addr)
+static __always_inline int test_and_change_bit(long nr, volatile unsigned long *addr)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c");
}
@@ -311,7 +311,7 @@ static __always_inline int constant_test_bit(long nr, const volatile unsigned lo
(addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
}
-static inline int variable_test_bit(long nr, volatile const unsigned long *addr)
+static __always_inline int variable_test_bit(long nr, volatile const unsigned long *addr)
{
int oldbit;
@@ -343,7 +343,7 @@ static int test_bit(int nr, const volatile unsigned long *addr);
*
* Undefined if no bit exists, so code should check against 0 first.
*/
-static inline unsigned long __ffs(unsigned long word)
+static __always_inline unsigned long __ffs(unsigned long word)
{
asm("rep; bsf %1,%0"
: "=r" (word)
@@ -357,7 +357,7 @@ static inline unsigned long __ffs(unsigned long word)
*
* Undefined if no zero exists, so code should check against ~0UL first.
*/
-static inline unsigned long ffz(unsigned long word)
+static __always_inline unsigned long ffz(unsigned long word)
{
asm("rep; bsf %1,%0"
: "=r" (word)
@@ -371,7 +371,7 @@ static inline unsigned long ffz(unsigned long word)
*
* Undefined if no set bit exists, so code should check against 0 first.
*/
-static inline unsigned long __fls(unsigned long word)
+static __always_inline unsigned long __fls(unsigned long word)
{
asm("bsr %1,%0"
: "=r" (word)
@@ -393,7 +393,7 @@ static inline unsigned long __fls(unsigned long word)
* set bit if value is nonzero. The first (least significant) bit
* is at position 1.
*/
-static inline int ffs(int x)
+static __always_inline int ffs(int x)
{
int r;
@@ -434,7 +434,7 @@ static inline int ffs(int x)
* set bit if value is nonzero. The last (most significant) bit is
* at position 32.
*/
-static inline int fls(int x)
+static __always_inline int fls(int x)
{
int r;
diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h
index eda81dc0f4ae..d194266acb28 100644
--- a/arch/x86/include/asm/clocksource.h
+++ b/arch/x86/include/asm/clocksource.h
@@ -3,10 +3,11 @@
#ifndef _ASM_X86_CLOCKSOURCE_H
#define _ASM_X86_CLOCKSOURCE_H
-#define VCLOCK_NONE 0 /* No vDSO clock available. */
-#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
-#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */
-#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */
+#define VCLOCK_NONE 0 /* No vDSO clock available. */
+#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
+#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */
+#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */
+#define VCLOCK_MAX 3
struct arch_clocksource_data {
int vclock_mode;
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index ad19841eddfe..9733361fed6f 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -2,6 +2,7 @@
#define ASM_X86_CMPXCHG_H
#include <linux/compiler.h>
+#include <asm/cpufeatures.h>
#include <asm/alternative.h> /* Provides LOCK_PREFIX */
/*
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 7ad8c9464297..68e4e8258b84 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -1,288 +1,7 @@
-/*
- * Defines x86 CPU feature bits
- */
#ifndef _ASM_X86_CPUFEATURE_H
#define _ASM_X86_CPUFEATURE_H
-#ifndef _ASM_X86_REQUIRED_FEATURES_H
-#include <asm/required-features.h>
-#endif
-
-#ifndef _ASM_X86_DISABLED_FEATURES_H
-#include <asm/disabled-features.h>
-#endif
-
-#define NCAPINTS 16 /* N 32-bit words worth of info */
-#define NBUGINTS 1 /* N 32-bit bug flags */
-
-/*
- * Note: If the comment begins with a quoted string, that string is used
- * in /proc/cpuinfo instead of the macro name. If the string is "",
- * this feature bit is not displayed in /proc/cpuinfo at all.
- */
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
-#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
-#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
-#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
-#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */
- /* (plus FCMOVcc, FCOMI with FPU) */
-#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
-#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
-#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
-#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
-#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
-#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
-#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
-#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
-#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
-#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
-#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
-
-/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
-/* Don't duplicate feature flags which are redundant with Intel! */
-#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */
-#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
-#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
-#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
-#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
-#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
-#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */
-#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */
-#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */
-
-/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
-#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
-#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
-#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
-
-/* Other features, Linux-defined mapping, word 3 */
-/* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
-#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
-#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
-#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
-/* cpu types for specific tunings: */
-#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
-#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
-#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
-#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
-#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
-#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */
-/* free, was #define X86_FEATURE_FXSAVE_LEAK ( 3*32+10) * "" FXSAVE leaks FOP/FIP/FOP */
-#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
-#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
-#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
-#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */
-#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */
-#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */
-#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
-#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
-/* free, was #define X86_FEATURE_11AP ( 3*32+19) * "" Bad local APIC aka 11AP */
-#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
-#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
-#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
-#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
-#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
-/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
-#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
-#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
-#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
-#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
-#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
-
-/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
-#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
-#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
-#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
-#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
-#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
-#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
-#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */
-#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
-#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
-#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
-#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
-#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
-#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
-#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */
-#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
-#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */
-#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
-#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
-#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
-#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
-#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */
-#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
-#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
-#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */
-#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
-#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
-#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */
-#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
-#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */
-#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */
-#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
-
-/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
-#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
-#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
-#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
-#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
-#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
-#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
-#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
-#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
-#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
-#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
-
-/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
-#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
-#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
-#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */
-#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
-#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
-#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
-#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
-#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
-#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
-#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
-#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
-#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
-#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
-#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
-#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
-#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
-#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */
-#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
-#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */
-#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */
-#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
-#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
-#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
-#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
-#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
-
-/*
- * Auxiliary flags: Linux defined - For features scattered in various
- * CPUID levels like 0x6, 0xA etc, word 7.
- *
- * Reuse free bits when adding new feature flags!
- */
-
-#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
-#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
-
-#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
-#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-
-#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
-
-/* Virtualization flags: Linux defined, word 8 */
-#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
-#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
-#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
-#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
-#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
-
-#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
-#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
-
-
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
-#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
-#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
-#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
-#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
-#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
-#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
-#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
-#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
-#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
-#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
-#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
-#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
-#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
-#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
-#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
-#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */
-#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
-#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
-#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
-#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
-#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
-#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
-
-/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
-#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */
-#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */
-#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */
-#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */
-
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
-#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
-
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
-#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
-
-/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
-#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
-
-/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
-#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
-#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
-#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
-#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
-#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
-#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
-#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
-#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
-#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
-#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
-
-/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
-#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
-#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
-#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
-#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
-#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
-#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
-#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
-#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
-#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
-#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
-
-/*
- * BUG word(s)
- */
-#define X86_BUG(x) (NCAPINTS*32 + (x))
-
-#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
-#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
-#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
-#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
-#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
-#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
-#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
-#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
-#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+#include <asm/processor.h>
#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
@@ -369,8 +88,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
* is not relevant.
*/
#define cpu_feature_enabled(bit) \
- (__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : \
- cpu_has(&boot_cpu_data, bit))
+ (__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit))
#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
@@ -406,106 +124,19 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
#define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE)
#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
/*
- * Do not add any more of those clumsy macros - use static_cpu_has_safe() for
+ * Do not add any more of those clumsy macros - use static_cpu_has() for
* fast paths and boot_cpu_has() otherwise!
*/
-#if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS)
-extern void warn_pre_alternatives(void);
-extern bool __static_cpu_has_safe(u16 bit);
-
+#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
/*
* Static testing of CPU features. Used the same as boot_cpu_has().
- * These are only valid after alternatives have run, but will statically
- * patch the target code for additional performance.
+ * These will statically patch the target code for additional
+ * performance.
*/
-static __always_inline __pure bool __static_cpu_has(u16 bit)
-{
-#ifdef CC_HAVE_ASM_GOTO
-
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
-
- /*
- * Catch too early usage of this before alternatives
- * have run.
- */
- asm_volatile_goto("1: jmp %l[t_warn]\n"
- "2:\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n"
- " .long 0\n" /* no replacement */
- " .word %P0\n" /* 1: do replace */
- " .byte 2b - 1b\n" /* source len */
- " .byte 0\n" /* replacement len */
- " .byte 0\n" /* pad len */
- ".previous\n"
- /* skipping size check since replacement size = 0 */
- : : "i" (X86_FEATURE_ALWAYS) : : t_warn);
-
-#endif
-
- asm_volatile_goto("1: jmp %l[t_no]\n"
- "2:\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n"
- " .long 0\n" /* no replacement */
- " .word %P0\n" /* feature bit */
- " .byte 2b - 1b\n" /* source len */
- " .byte 0\n" /* replacement len */
- " .byte 0\n" /* pad len */
- ".previous\n"
- /* skipping size check since replacement size = 0 */
- : : "i" (bit) : : t_no);
- return true;
- t_no:
- return false;
-
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
- t_warn:
- warn_pre_alternatives();
- return false;
-#endif
-
-#else /* CC_HAVE_ASM_GOTO */
-
- u8 flag;
- /* Open-coded due to __stringify() in ALTERNATIVE() */
- asm volatile("1: movb $0,%0\n"
- "2:\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n"
- " .long 3f - .\n"
- " .word %P1\n" /* feature bit */
- " .byte 2b - 1b\n" /* source len */
- " .byte 4f - 3f\n" /* replacement len */
- " .byte 0\n" /* pad len */
- ".previous\n"
- ".section .discard,\"aw\",@progbits\n"
- " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
- ".previous\n"
- ".section .altinstr_replacement,\"ax\"\n"
- "3: movb $1,%0\n"
- "4:\n"
- ".previous\n"
- : "=qm" (flag) : "i" (bit));
- return flag;
-
-#endif /* CC_HAVE_ASM_GOTO */
-}
-
-#define static_cpu_has(bit) \
-( \
- __builtin_constant_p(boot_cpu_has(bit)) ? \
- boot_cpu_has(bit) : \
- __builtin_constant_p(bit) ? \
- __static_cpu_has(bit) : \
- boot_cpu_has(bit) \
-)
-
-static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
+static __always_inline __pure bool _static_cpu_has(u16 bit)
{
-#ifdef CC_HAVE_ASM_GOTO
- asm_volatile_goto("1: jmp %l[t_dynamic]\n"
+ asm_volatile_goto("1: jmp 6f\n"
"2:\n"
".skip -(((5f-4f) - (2b-1b)) > 0) * "
"((5f-4f) - (2b-1b)),0x90\n"
@@ -530,66 +161,34 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
" .byte 0\n" /* repl len */
" .byte 0\n" /* pad len */
".previous\n"
- : : "i" (bit), "i" (X86_FEATURE_ALWAYS)
- : : t_dynamic, t_no);
+ ".section .altinstr_aux,\"ax\"\n"
+ "6:\n"
+ " testb %[bitnum],%[cap_byte]\n"
+ " jnz %l[t_yes]\n"
+ " jmp %l[t_no]\n"
+ ".previous\n"
+ : : "i" (bit), "i" (X86_FEATURE_ALWAYS),
+ [bitnum] "i" (1 << (bit & 7)),
+ [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+ : : t_yes, t_no);
+ t_yes:
return true;
t_no:
return false;
- t_dynamic:
- return __static_cpu_has_safe(bit);
-#else
- u8 flag;
- /* Open-coded due to __stringify() in ALTERNATIVE() */
- asm volatile("1: movb $2,%0\n"
- "2:\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n" /* src offset */
- " .long 3f - .\n" /* repl offset */
- " .word %P2\n" /* always replace */
- " .byte 2b - 1b\n" /* source len */
- " .byte 4f - 3f\n" /* replacement len */
- " .byte 0\n" /* pad len */
- ".previous\n"
- ".section .discard,\"aw\",@progbits\n"
- " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
- ".previous\n"
- ".section .altinstr_replacement,\"ax\"\n"
- "3: movb $0,%0\n"
- "4:\n"
- ".previous\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n" /* src offset */
- " .long 5f - .\n" /* repl offset */
- " .word %P1\n" /* feature bit */
- " .byte 4b - 3b\n" /* src len */
- " .byte 6f - 5f\n" /* repl len */
- " .byte 0\n" /* pad len */
- ".previous\n"
- ".section .discard,\"aw\",@progbits\n"
- " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
- ".previous\n"
- ".section .altinstr_replacement,\"ax\"\n"
- "5: movb $1,%0\n"
- "6:\n"
- ".previous\n"
- : "=qm" (flag)
- : "i" (bit), "i" (X86_FEATURE_ALWAYS));
- return (flag == 2 ? __static_cpu_has_safe(bit) : flag);
-#endif /* CC_HAVE_ASM_GOTO */
}
-#define static_cpu_has_safe(bit) \
+#define static_cpu_has(bit) \
( \
__builtin_constant_p(boot_cpu_has(bit)) ? \
boot_cpu_has(bit) : \
- _static_cpu_has_safe(bit) \
+ _static_cpu_has(bit) \
)
#else
/*
- * gcc 3.x is too stupid to do the static test; fall back to dynamic.
+ * Fall back to dynamic for gcc versions which don't support asm goto. Should be
+ * a minority now anyway.
*/
#define static_cpu_has(bit) boot_cpu_has(bit)
-#define static_cpu_has_safe(bit) boot_cpu_has(bit)
#endif
#define cpu_has_bug(c, bit) cpu_has(c, (bit))
@@ -597,7 +196,6 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
#define clear_cpu_bug(c, bit) clear_cpu_cap(c, (bit))
#define static_cpu_has_bug(bit) static_cpu_has((bit))
-#define static_cpu_has_bug_safe(bit) static_cpu_has_safe((bit))
#define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit))
#define MAX_CPU_FEATURES (NCAPINTS * 32)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
new file mode 100644
index 000000000000..074b7604bd51
--- /dev/null
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -0,0 +1,300 @@
+#ifndef _ASM_X86_CPUFEATURES_H
+#define _ASM_X86_CPUFEATURES_H
+
+#ifndef _ASM_X86_REQUIRED_FEATURES_H
+#include <asm/required-features.h>
+#endif
+
+#ifndef _ASM_X86_DISABLED_FEATURES_H
+#include <asm/disabled-features.h>
+#endif
+
+/*
+ * Defines x86 CPU feature bits
+ */
+#define NCAPINTS 16 /* N 32-bit words worth of info */
+#define NBUGINTS 1 /* N 32-bit bug flags */
+
+/*
+ * Note: If the comment begins with a quoted string, that string is used
+ * in /proc/cpuinfo instead of the macro name. If the string is "",
+ * this feature bit is not displayed in /proc/cpuinfo at all.
+ */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
+#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
+#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
+#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
+#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
+#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
+#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
+#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
+#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
+#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
+#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
+#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
+#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */
+ /* (plus FCMOVcc, FCOMI with FPU) */
+#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
+#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
+#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
+#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
+#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
+#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
+#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
+#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
+#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
+#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
+#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
+#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
+#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
+#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
+
+/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
+/* Don't duplicate feature flags which are redundant with Intel! */
+#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
+#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */
+#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
+#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
+#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
+#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
+#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */
+#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */
+#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */
+
+/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
+#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
+#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
+#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
+
+/* Other features, Linux-defined mapping, word 3 */
+/* This range is used for feature bits which conflict or are synthesized */
+#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
+/* cpu types for specific tunings: */
+#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
+#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
+#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
+#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
+#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
+#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */
+#define X86_FEATURE_ART ( 3*32+10) /* Platform has always running timer (ART) */
+#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
+#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
+#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
+#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */
+#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */
+#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */
+#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
+#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
+/* free, was #define X86_FEATURE_11AP ( 3*32+19) * "" Bad local APIC aka 11AP */
+#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
+#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
+#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
+#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
+#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
+/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
+#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
+#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
+#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
+#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
+#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
+#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
+#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
+#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
+#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
+#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
+#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
+#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
+#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */
+#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
+#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
+#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
+#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
+#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
+#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
+#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */
+#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
+#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */
+#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
+#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
+#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
+#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
+#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */
+#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
+#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
+#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */
+#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
+#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
+#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */
+#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
+#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */
+#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */
+#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
+
+/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
+#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
+#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
+#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
+#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
+#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
+#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
+#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
+#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
+#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
+#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
+
+/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
+#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
+#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */
+#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
+#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
+#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
+#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
+#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
+#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
+#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
+#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
+#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
+#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
+#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
+#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
+#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
+#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */
+#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
+#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */
+#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */
+#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
+#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
+#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
+#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
+#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
+
+/*
+ * Auxiliary flags: Linux defined - For features scattered in various
+ * CPUID levels like 0x6, 0xA etc, word 7.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+
+#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
+#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
+
+#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
+#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+
+#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
+
+/* Virtualization flags: Linux defined, word 8 */
+#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
+#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
+#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
+#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
+#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
+
+#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
+#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
+
+
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
+#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
+#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
+#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
+#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
+#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
+#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
+#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
+#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
+#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
+#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
+#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
+#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
+#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
+#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
+#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
+#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
+#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
+#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */
+#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
+#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
+#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
+#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
+#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
+#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
+#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
+#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
+
+/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
+#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */
+#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */
+#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */
+#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */
+
+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
+#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
+
+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
+#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
+
+/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
+#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
+
+/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
+#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
+#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
+#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
+#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
+#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
+#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
+#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
+#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
+#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
+#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
+
+/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
+#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
+#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
+#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
+#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
+#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
+#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
+#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
+#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
+#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
+#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
+#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
+
+/*
+ * BUG word(s)
+ */
+#define X86_BUG(x) (NCAPINTS*32 + (x))
+
+#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
+#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
+#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
+#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
+#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
+#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
+#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
+#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
+#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+
+#ifdef CONFIG_X86_32
+/*
+ * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional
+ * to avoid confusion.
+ */
+#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
+#endif
+
+#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
index 278441f39856..eb5deb42484d 100644
--- a/arch/x86/include/asm/desc_defs.h
+++ b/arch/x86/include/asm/desc_defs.h
@@ -98,4 +98,27 @@ struct desc_ptr {
#endif /* !__ASSEMBLY__ */
+/* Access rights as returned by LAR */
+#define AR_TYPE_RODATA (0 * (1 << 9))
+#define AR_TYPE_RWDATA (1 * (1 << 9))
+#define AR_TYPE_RODATA_EXPDOWN (2 * (1 << 9))
+#define AR_TYPE_RWDATA_EXPDOWN (3 * (1 << 9))
+#define AR_TYPE_XOCODE (4 * (1 << 9))
+#define AR_TYPE_XRCODE (5 * (1 << 9))
+#define AR_TYPE_XOCODE_CONF (6 * (1 << 9))
+#define AR_TYPE_XRCODE_CONF (7 * (1 << 9))
+#define AR_TYPE_MASK (7 * (1 << 9))
+
+#define AR_DPL0 (0 * (1 << 13))
+#define AR_DPL3 (3 * (1 << 13))
+#define AR_DPL_MASK (3 * (1 << 13))
+
+#define AR_A (1 << 8) /* "Accessed" */
+#define AR_S (1 << 12) /* If clear, "System" segment */
+#define AR_P (1 << 15) /* "Present" */
+#define AR_AVL (1 << 20) /* "AVaiLable" (no HW effect) */
+#define AR_L (1 << 21) /* "Long mode" for code segments */
+#define AR_DB (1 << 22) /* D/B, effect depends on type */
+#define AR_G (1 << 23) /* "Granularity" (limit in pages) */
+
#endif /* _ASM_X86_DESC_DEFS_H */
diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h
index 535192f6bfad..3c69fed215c5 100644
--- a/arch/x86/include/asm/dmi.h
+++ b/arch/x86/include/asm/dmi.h
@@ -15,7 +15,7 @@ static __always_inline __init void *dmi_alloc(unsigned len)
/* Use early IO mappings for DMI because it's initialized early */
#define dmi_early_remap early_ioremap
#define dmi_early_unmap early_iounmap
-#define dmi_remap ioremap
+#define dmi_remap ioremap_cache
#define dmi_unmap iounmap
#endif /* _ASM_X86_DMI_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 6d7d0e52ed5a..8554f960e21b 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -138,7 +138,7 @@ extern void reserve_top_address(unsigned long reserve);
extern int fixmaps_set;
extern pte_t *kmap_pte;
-extern pgprot_t kmap_prot;
+#define kmap_prot PAGE_KERNEL
extern pte_t *pkmap_page_table;
void __native_set_fixmap(enum fixed_addresses idx, pte_t pte);
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 0fd440df63f1..a2124343edf5 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -17,6 +17,7 @@
#include <asm/user.h>
#include <asm/fpu/api.h>
#include <asm/fpu/xstate.h>
+#include <asm/cpufeature.h>
/*
* High level FPU state handling functions:
@@ -58,22 +59,22 @@ extern u64 fpu__get_supported_xfeatures_mask(void);
*/
static __always_inline __pure bool use_eager_fpu(void)
{
- return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
+ return static_cpu_has(X86_FEATURE_EAGER_FPU);
}
static __always_inline __pure bool use_xsaveopt(void)
{
- return static_cpu_has_safe(X86_FEATURE_XSAVEOPT);
+ return static_cpu_has(X86_FEATURE_XSAVEOPT);
}
static __always_inline __pure bool use_xsave(void)
{
- return static_cpu_has_safe(X86_FEATURE_XSAVE);
+ return static_cpu_has(X86_FEATURE_XSAVE);
}
static __always_inline __pure bool use_fxsr(void)
{
- return static_cpu_has_safe(X86_FEATURE_FXSR);
+ return static_cpu_has(X86_FEATURE_FXSR);
}
/*
@@ -300,7 +301,7 @@ static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
WARN_ON(system_state != SYSTEM_BOOTING);
- if (static_cpu_has_safe(X86_FEATURE_XSAVES))
+ if (static_cpu_has(X86_FEATURE_XSAVES))
XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
else
XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
@@ -322,7 +323,7 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
WARN_ON(system_state != SYSTEM_BOOTING);
- if (static_cpu_has_safe(X86_FEATURE_XSAVES))
+ if (static_cpu_has(X86_FEATURE_XSAVES))
XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
else
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
@@ -460,7 +461,7 @@ static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
* pending. Clear the x87 state here by setting it to fixed values.
* "m" is a random variable that should be in L1.
*/
- if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
+ if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) {
asm volatile(
"fnclex\n\t"
"emms\n\t"
@@ -589,7 +590,8 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
* If the task has used the math, pre-load the FPU on xsave processors
* or if the past 5 consecutive context-switches used math.
*/
- fpu.preload = new_fpu->fpstate_active &&
+ fpu.preload = static_cpu_has(X86_FEATURE_FPU) &&
+ new_fpu->fpstate_active &&
(use_eager_fpu() || new_fpu->counter > 5);
if (old_fpu->fpregs_active) {
diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h
index 793179cf8e21..6e4d170726b7 100644
--- a/arch/x86/include/asm/frame.h
+++ b/arch/x86/include/asm/frame.h
@@ -1,23 +1,44 @@
-#ifdef __ASSEMBLY__
+#ifndef _ASM_X86_FRAME_H
+#define _ASM_X86_FRAME_H
#include <asm/asm.h>
-/* The annotation hides the frame from the unwinder and makes it look
- like a ordinary ebp save/restore. This avoids some special cases for
- frame pointer later */
+/*
+ * These are stack frame creation macros. They should be used by every
+ * callable non-leaf asm function to make kernel stack traces more reliable.
+ */
+
#ifdef CONFIG_FRAME_POINTER
- .macro FRAME
- __ASM_SIZE(push,) %__ASM_REG(bp)
- __ASM_SIZE(mov) %__ASM_REG(sp), %__ASM_REG(bp)
- .endm
- .macro ENDFRAME
- __ASM_SIZE(pop,) %__ASM_REG(bp)
- .endm
-#else
- .macro FRAME
- .endm
- .macro ENDFRAME
- .endm
-#endif
-
-#endif /* __ASSEMBLY__ */
+
+#ifdef __ASSEMBLY__
+
+.macro FRAME_BEGIN
+ push %_ASM_BP
+ _ASM_MOV %_ASM_SP, %_ASM_BP
+.endm
+
+.macro FRAME_END
+ pop %_ASM_BP
+.endm
+
+#else /* !__ASSEMBLY__ */
+
+#define FRAME_BEGIN \
+ "push %" _ASM_BP "\n" \
+ _ASM_MOV "%" _ASM_SP ", %" _ASM_BP "\n"
+
+#define FRAME_END "pop %" _ASM_BP "\n"
+
+#endif /* __ASSEMBLY__ */
+
+#define FRAME_OFFSET __ASM_SEL(4, 8)
+
+#else /* !CONFIG_FRAME_POINTER */
+
+#define FRAME_BEGIN
+#define FRAME_END
+#define FRAME_OFFSET 0
+
+#endif /* CONFIG_FRAME_POINTER */
+
+#endif /* _ASM_X86_FRAME_H */
diff --git a/arch/x86/include/asm/imr.h b/arch/x86/include/asm/imr.h
index cd2ce4068441..ebea2c9d2cdc 100644
--- a/arch/x86/include/asm/imr.h
+++ b/arch/x86/include/asm/imr.h
@@ -53,7 +53,7 @@
#define IMR_MASK (IMR_ALIGN - 1)
int imr_add_range(phys_addr_t base, size_t size,
- unsigned int rmask, unsigned int wmask, bool lock);
+ unsigned int rmask, unsigned int wmask);
int imr_remove_range(phys_addr_t base, size_t size);
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index cfc9a0d2d07c..a4fe16e42b7b 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -57,67 +57,13 @@ static inline void __xapic_wait_icr_idle(void)
cpu_relax();
}
-static inline void
-__default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest)
-{
- /*
- * Subtle. In the case of the 'never do double writes' workaround
- * we have to lock out interrupts to be safe. As we don't care
- * of the value read we use an atomic rmw access to avoid costly
- * cli/sti. Otherwise we use an even cheaper single atomic write
- * to the APIC.
- */
- unsigned int cfg;
-
- /*
- * Wait for idle.
- */
- __xapic_wait_icr_idle();
-
- /*
- * No need to touch the target chip field
- */
- cfg = __prepare_ICR(shortcut, vector, dest);
-
- /*
- * Send the IPI. The write to APIC_ICR fires this off.
- */
- native_apic_mem_write(APIC_ICR, cfg);
-}
+void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest);
/*
* This is used to send an IPI with no shorthand notation (the destination is
* specified in bits 56 to 63 of the ICR).
*/
-static inline void
- __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest)
-{
- unsigned long cfg;
-
- /*
- * Wait for idle.
- */
- if (unlikely(vector == NMI_VECTOR))
- safe_apic_wait_icr_idle();
- else
- __xapic_wait_icr_idle();
-
- /*
- * prepare target chip field
- */
- cfg = __prepare_ICR2(mask);
- native_apic_mem_write(APIC_ICR2, cfg);
-
- /*
- * program the ICR
- */
- cfg = __prepare_ICR(0, vector, dest);
-
- /*
- * Send the IPI. The write to APIC_ICR fires this off.
- */
- native_apic_mem_write(APIC_ICR, cfg);
-}
+void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest);
extern void default_send_IPI_single(int cpu, int vector);
extern void default_send_IPI_single_phys(int cpu, int vector);
diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h
index 78162f8e248b..d0afb05c84fc 100644
--- a/arch/x86/include/asm/irq_work.h
+++ b/arch/x86/include/asm/irq_work.h
@@ -1,7 +1,7 @@
#ifndef _ASM_IRQ_WORK_H
#define _ASM_IRQ_WORK_H
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
static inline bool arch_irq_work_has_interrupt(void)
{
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index cfff34172be0..92b6f651fa4f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -135,6 +135,7 @@ struct mca_config {
bool ignore_ce;
bool disabled;
bool ser;
+ bool recovery;
bool bios_cmci_threshold;
u8 banks;
s8 bootlog;
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 1e1b07a5a738..9d3a96c4da78 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -3,6 +3,7 @@
#include <asm/cpu.h>
#include <linux/earlycpio.h>
+#include <linux/initrd.h>
#define native_rdmsr(msr, val1, val2) \
do { \
@@ -143,4 +144,29 @@ static inline void reload_early_microcode(void) { }
static inline bool
get_builtin_firmware(struct cpio_data *cd, const char *name) { return false; }
#endif
+
+static inline unsigned long get_initrd_start(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+ return initrd_start;
+#else
+ return 0;
+#endif
+}
+
+static inline unsigned long get_initrd_start_addr(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+#ifdef CONFIG_X86_32
+ unsigned long *initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start);
+
+ return (unsigned long)__pa_nodebug(*initrd_start_p);
+#else
+ return get_initrd_start();
+#endif
+#else /* CONFIG_BLK_DEV_INITRD */
+ return 0;
+#endif
+}
+
#endif /* _ASM_X86_MICROCODE_H */
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
index 8559b0102ea1..603417f8dd6c 100644
--- a/arch/x86/include/asm/microcode_intel.h
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -40,7 +40,6 @@ struct extended_sigtable {
#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable))
#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature))
-#define DWSIZE (sizeof(u32))
#define get_totalsize(mc) \
(((struct microcode_intel *)mc)->hdr.datasize ? \
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 55234d5e7160..1ea0baef1175 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -19,7 +19,8 @@ typedef struct {
#endif
struct mutex lock;
- void __user *vdso;
+ void __user *vdso; /* vdso base address */
+ const struct vdso_image *vdso_image; /* vdso image in use */
atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */
} mm_context_t;
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b05402ef3b84..984ab75bf621 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -1,7 +1,12 @@
#ifndef _ASM_X86_MSR_INDEX_H
#define _ASM_X86_MSR_INDEX_H
-/* CPU model specific register (MSR) numbers */
+/*
+ * CPU model specific register (MSR) numbers.
+ *
+ * Do not add new entries to this file unless the definitions are shared
+ * between multiple compilation units.
+ */
/* x86-64 specific MSRs */
#define MSR_EFER 0xc0000080 /* extended feature register */
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index c70689b5e5aa..0deeb2d26df7 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -3,6 +3,8 @@
#include <linux/sched.h>
+#include <asm/cpufeature.h>
+
#define MWAIT_SUBSTATE_MASK 0xf
#define MWAIT_CSTATE_MASK 0xf
#define MWAIT_SUBSTATE_SIZE 4
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 813384ef811a..983738ac014c 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -13,7 +13,7 @@ struct vm86;
#include <asm/types.h>
#include <uapi/asm/sigcontext.h>
#include <asm/current.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/page.h>
#include <asm/pgtable_types.h>
#include <asm/percpu.h>
@@ -24,7 +24,6 @@ struct vm86;
#include <asm/fpu/types.h>
#include <linux/personality.h>
-#include <linux/cpumask.h>
#include <linux/cache.h>
#include <linux/threads.h>
#include <linux/math64.h>
@@ -300,10 +299,13 @@ struct tss_struct {
*/
unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
+#ifdef CONFIG_X86_32
/*
- * Space for the temporary SYSENTER stack:
+ * Space for the temporary SYSENTER stack.
*/
+ unsigned long SYSENTER_stack_canary;
unsigned long SYSENTER_stack[64];
+#endif
} ____cacheline_aligned;
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index a4a77286cb1d..9b9b30b19441 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -7,12 +7,23 @@
void syscall_init(void);
+#ifdef CONFIG_X86_64
void entry_SYSCALL_64(void);
-void entry_SYSCALL_compat(void);
+#endif
+
+#ifdef CONFIG_X86_32
void entry_INT80_32(void);
-void entry_INT80_compat(void);
void entry_SYSENTER_32(void);
+void __begin_SYSENTER_singlestep_region(void);
+void __end_SYSENTER_singlestep_region(void);
+#endif
+
+#ifdef CONFIG_IA32_EMULATION
void entry_SYSENTER_compat(void);
+void __end_entry_SYSENTER_compat(void);
+void entry_SYSCALL_compat(void);
+void entry_INT80_compat(void);
+#endif
void x86_configure_nx(void);
void x86_report_nx(void);
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index 89db46752a8f..452c88b8ad06 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -13,7 +13,6 @@
X86_EFLAGS_CF | X86_EFLAGS_RF)
void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc);
int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
struct pt_regs *regs, unsigned long mask);
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index ba665ebd17bb..db333300bd4b 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -15,7 +15,7 @@
#include <linux/stringify.h>
#include <asm/nops.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
/* "Raw" instruction opcodes */
#define __ASM_CLAC .byte 0x0f,0x01,0xca
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index dfcf0727623b..20a3de5cb3b0 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -16,7 +16,6 @@
#endif
#include <asm/thread_info.h>
#include <asm/cpumask.h>
-#include <asm/cpufeature.h>
extern int smp_num_siblings;
extern unsigned int num_processors;
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index c7b551028740..82866697fcf1 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -49,7 +49,7 @@
*/
#ifndef __ASSEMBLY__
struct task_struct;
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <linux/atomic.h>
struct thread_info {
@@ -134,10 +134,13 @@ struct thread_info {
#define _TIF_ADDR32 (1 << TIF_ADDR32)
#define _TIF_X32 (1 << TIF_X32)
-/* work to do in syscall_trace_enter() */
+/*
+ * work to do in syscall_trace_enter(). Also includes TIF_NOHZ for
+ * enter_from_user_mode()
+ */
#define _TIF_WORK_SYSCALL_ENTRY \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \
- _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \
+ _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
_TIF_NOHZ)
/* work to do on any return to user space */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 6df2029405a3..c24b4224d439 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -5,8 +5,57 @@
#include <linux/sched.h>
#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/special_insns.h>
+static inline void __invpcid(unsigned long pcid, unsigned long addr,
+ unsigned long type)
+{
+ struct { u64 d[2]; } desc = { { pcid, addr } };
+
+ /*
+ * The memory clobber is because the whole point is to invalidate
+ * stale TLB entries and, especially if we're flushing global
+ * mappings, we don't want the compiler to reorder any subsequent
+ * memory accesses before the TLB flush.
+ *
+ * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
+ * invpcid (%rcx), %rax in long mode.
+ */
+ asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
+ : : "m" (desc), "a" (type), "c" (&desc) : "memory");
+}
+
+#define INVPCID_TYPE_INDIV_ADDR 0
+#define INVPCID_TYPE_SINGLE_CTXT 1
+#define INVPCID_TYPE_ALL_INCL_GLOBAL 2
+#define INVPCID_TYPE_ALL_NON_GLOBAL 3
+
+/* Flush all mappings for a given pcid and addr, not including globals. */
+static inline void invpcid_flush_one(unsigned long pcid,
+ unsigned long addr)
+{
+ __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invpcid_flush_single_context(unsigned long pcid)
+{
+ __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
+}
+
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invpcid_flush_all(void)
+{
+ __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invpcid_flush_all_nonglobals(void)
+{
+ __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
+}
+
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#else
@@ -104,6 +153,15 @@ static inline void __native_flush_tlb_global(void)
{
unsigned long flags;
+ if (static_cpu_has(X86_FEATURE_INVPCID)) {
+ /*
+ * Using INVPCID is considerably faster than a pair of writes
+ * to CR4 sandwiched inside an IRQ flag save/restore.
+ */
+ invpcid_flush_all();
+ return;
+ }
+
/*
* Read-modify-write to CR4 - protect it from preemption and
* from interrupts. (Use the raw variant because this code can
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 6d7c5479bcea..174c4212780a 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -29,6 +29,8 @@ static inline cycles_t get_cycles(void)
return rdtsc();
}
+extern struct system_counterval_t convert_art_to_tsc(cycle_t art);
+
extern void tsc_init(void);
extern void mark_tsc_unstable(char *reason);
extern int unsynchronized_tsc(void);
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index b89c34c4019b..307698688fa1 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -8,7 +8,7 @@
#include <linux/errno.h>
#include <linux/lockdep.h>
#include <asm/alternative.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/page.h>
/*
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index deabaf9759b6..43dc55be524e 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -13,9 +13,6 @@ struct vdso_image {
void *data;
unsigned long size; /* Always a multiple of PAGE_SIZE */
- /* text_mapping.pages is big enough for data/size page pointers */
- struct vm_special_mapping text_mapping;
-
unsigned long alt, alt_len;
long sym_vvar_start; /* Negative offset to the vvar area */
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index f556c4843aa1..e728699db774 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -37,6 +37,12 @@ struct vsyscall_gtod_data {
};
extern struct vsyscall_gtod_data vsyscall_gtod_data;
+extern int vclocks_used;
+static inline bool vclock_was_used(int vclock)
+{
+ return READ_ONCE(vclocks_used) & (1 << vclock);
+}
+
static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s)
{
unsigned ret;
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h
index d485232f1e9f..62d4111c1c54 100644
--- a/arch/x86/include/uapi/asm/sigcontext.h
+++ b/arch/x86/include/uapi/asm/sigcontext.h
@@ -256,7 +256,7 @@ struct sigcontext_64 {
__u16 cs;
__u16 gs;
__u16 fs;
- __u16 __pad0;
+ __u16 ss;
__u64 err;
__u64 trapno;
__u64 oldmask;
@@ -341,9 +341,37 @@ struct sigcontext {
__u64 rip;
__u64 eflags; /* RFLAGS */
__u16 cs;
+
+ /*
+ * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"),
+ * Linux saved and restored fs and gs in these slots. This
+ * was counterproductive, as fsbase and gsbase were never
+ * saved, so arch_prctl was presumably unreliable.
+ *
+ * These slots should never be reused without extreme caution:
+ *
+ * - Some DOSEMU versions stash fs and gs in these slots manually,
+ * thus overwriting anything the kernel expects to be preserved
+ * in these slots.
+ *
+ * - If these slots are ever needed for any other purpose,
+ * there is some risk that very old 64-bit binaries could get
+ * confused. I doubt that many such binaries still work,
+ * though, since the same patch in 2.5.64 also removed the
+ * 64-bit set_thread_area syscall, so it appears that there
+ * is no TLS API beyond modify_ldt that works in both pre-
+ * and post-2.5.64 kernels.
+ *
+ * If the kernel ever adds explicit fs, gs, fsbase, and gsbase
+ * save/restore, it will most likely need to be opt-in and use
+ * different context slots.
+ */
__u16 gs;
__u16 fs;
- __u16 __pad0;
+ union {
+ __u16 ss; /* If UC_SIGCONTEXT_SS */
+ __u16 __pad0; /* Alias name for old (!UC_SIGCONTEXT_SS) user-space */
+ };
__u64 err;
__u64 trapno;
__u64 oldmask;
diff --git a/arch/x86/include/uapi/asm/ucontext.h b/arch/x86/include/uapi/asm/ucontext.h
index b7c29c8017f2..e3d1ec90616e 100644
--- a/arch/x86/include/uapi/asm/ucontext.h
+++ b/arch/x86/include/uapi/asm/ucontext.h
@@ -1,11 +1,54 @@
#ifndef _ASM_X86_UCONTEXT_H
#define _ASM_X86_UCONTEXT_H
-#define UC_FP_XSTATE 0x1 /* indicates the presence of extended state
- * information in the memory layout pointed
- * by the fpstate pointer in the ucontext's
- * sigcontext struct (uc_mcontext).
- */
+/*
+ * Indicates the presence of extended state information in the memory
+ * layout pointed by the fpstate pointer in the ucontext's sigcontext
+ * struct (uc_mcontext).
+ */
+#define UC_FP_XSTATE 0x1
+
+#ifdef __x86_64__
+/*
+ * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
+ * kernels that save SS in the sigcontext. All kernels that set
+ * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
+ * regardless of SS (i.e. they implement espfix).
+ *
+ * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
+ * when delivering a signal that came from 64-bit code.
+ *
+ * Sigreturn restores SS as follows:
+ *
+ * if (saved SS is valid || UC_STRICT_RESTORE_SS is set ||
+ * saved CS is not 64-bit)
+ * new SS = saved SS (will fail IRET and signal if invalid)
+ * else
+ * new SS = a flat 32-bit data segment
+ *
+ * This behavior serves three purposes:
+ *
+ * - Legacy programs that construct a 64-bit sigcontext from scratch
+ * with zero or garbage in the SS slot (e.g. old CRIU) and call
+ * sigreturn will still work.
+ *
+ * - Old DOSEMU versions sometimes catch a signal from a segmented
+ * context, delete the old SS segment (with modify_ldt), and change
+ * the saved CS to a 64-bit segment. These DOSEMU versions expect
+ * sigreturn to send them back to 64-bit mode without killing them,
+ * despite the fact that the SS selector when the signal was raised is
+ * no longer valid. UC_STRICT_RESTORE_SS will be clear, so the kernel
+ * will fix up SS for these DOSEMU versions.
+ *
+ * - Old and new programs that catch a signal and return without
+ * modifying the saved context will end up in exactly the state they
+ * started in, even if they were running in a segmented context when
+ * the signal was raised.. Old kernels would lose track of the
+ * previous SS value.
+ */
+#define UC_SIGCONTEXT_SS 0x2
+#define UC_STRICT_RESTORE_SS 0x4
+#endif
#include <asm-generic/ucontext.h>
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 9968f30cca3e..76f89e2b245a 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -53,7 +53,7 @@ void flat_init_apic_ldr(void)
apic_write(APIC_LDR, val);
}
-static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
+static void _flat_send_IPI_mask(unsigned long mask, int vector)
{
unsigned long flags;
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index c80c02c6ec49..ab5c2c685a3c 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -30,7 +30,7 @@ static unsigned int numachip1_get_apic_id(unsigned long x)
unsigned long value;
unsigned int id = (x >> 24) & 0xff;
- if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+ if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
rdmsrl(MSR_FAM10H_NODE_ID, value);
id |= (value << 2) & 0xff00;
}
@@ -178,7 +178,7 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
this_cpu_write(cpu_llc_id, node);
/* Account for nodes per socket in multi-core-module processors */
- if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+ if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
rdmsrl(MSR_FAM10H_NODE_ID, val);
nodes = ((val >> 3) & 7) + 1;
}
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index eb45fc9b6124..28bde88b0085 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -18,6 +18,66 @@
#include <asm/proto.h>
#include <asm/ipi.h>
+void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest)
+{
+ /*
+ * Subtle. In the case of the 'never do double writes' workaround
+ * we have to lock out interrupts to be safe. As we don't care
+ * of the value read we use an atomic rmw access to avoid costly
+ * cli/sti. Otherwise we use an even cheaper single atomic write
+ * to the APIC.
+ */
+ unsigned int cfg;
+
+ /*
+ * Wait for idle.
+ */
+ __xapic_wait_icr_idle();
+
+ /*
+ * No need to touch the target chip field
+ */
+ cfg = __prepare_ICR(shortcut, vector, dest);
+
+ /*
+ * Send the IPI. The write to APIC_ICR fires this off.
+ */
+ native_apic_mem_write(APIC_ICR, cfg);
+}
+
+/*
+ * This is used to send an IPI with no shorthand notation (the destination is
+ * specified in bits 56 to 63 of the ICR).
+ */
+void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest)
+{
+ unsigned long cfg;
+
+ /*
+ * Wait for idle.
+ */
+ if (unlikely(vector == NMI_VECTOR))
+ safe_apic_wait_icr_idle();
+ else
+ __xapic_wait_icr_idle();
+
+ /*
+ * prepare target chip field
+ */
+ cfg = __prepare_ICR2(mask);
+ native_apic_mem_write(APIC_ICR2, cfg);
+
+ /*
+ * program the ICR
+ */
+ cfg = __prepare_ICR(0, vector, dest);
+
+ /*
+ * Send the IPI. The write to APIC_ICR fires this off.
+ */
+ native_apic_mem_write(APIC_ICR, cfg);
+}
+
void default_send_IPI_single_phys(int cpu, int vector)
{
unsigned long flags;
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 84a7524b202c..5c042466f274 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -59,7 +59,6 @@ void common(void) {
#ifdef CONFIG_PARAVIRT
BLANK();
- OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 6ce39025f467..ecdc1d217dc0 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -7,7 +7,7 @@
#include <linux/lguest.h>
#include "../../../drivers/lguest/lg.h"
-#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
+#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
static char syscalls[] = {
#include <asm/syscalls_32.h>
};
@@ -52,6 +52,11 @@ void foo(void)
DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
offsetofend(struct tss_struct, SYSENTER_stack));
+ /* Offset from cpu_tss to SYSENTER_stack */
+ OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
+ /* Size of SYSENTER_stack */
+ DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
+
#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
BLANK();
OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index f2edafb5f24e..d875f97d4e0b 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -4,17 +4,11 @@
#include <asm/ia32.h>
-#define __SYSCALL_64(nr, sym, compat) [nr] = 1,
-#define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1,
-#ifdef CONFIG_X86_X32_ABI
-# define __SYSCALL_X32(nr, sym, compat) [nr] = 1,
-#else
-# define __SYSCALL_X32(nr, sym, compat) /* nothing */
-#endif
+#define __SYSCALL_64(nr, sym, qual) [nr] = 1,
static char syscalls_64[] = {
#include <asm/syscalls_64.h>
};
-#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
+#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
static char syscalls_ia32[] = {
#include <asm/syscalls_32.h>
};
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 7a60424d63fa..0d373d7affc8 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -42,7 +42,7 @@ ifdef CONFIG_X86_FEATURE_NAMES
quiet_cmd_mkcapflags = MKCAP $@
cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@
-cpufeature = $(src)/../../include/asm/cpufeature.h
+cpufeature = $(src)/../../include/asm/cpufeatures.h
targets += capflags.c
$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index ce197bb7c129..1661d8ec9280 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -1,7 +1,7 @@
#include <linux/bitops.h>
#include <linux/kernel.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/e820.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 81cf716f6f97..249461f95851 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -162,6 +162,22 @@ static int __init x86_mpx_setup(char *s)
}
__setup("nompx", x86_mpx_setup);
+static int __init x86_noinvpcid_setup(char *s)
+{
+ /* noinvpcid doesn't accept parameters */
+ if (s)
+ return -EINVAL;
+
+ /* do not emit a message if the feature is not present */
+ if (!boot_cpu_has(X86_FEATURE_INVPCID))
+ return 0;
+
+ setup_clear_cpu_cap(X86_FEATURE_INVPCID);
+ pr_info("noinvpcid: INVPCID feature disabled\n");
+ return 0;
+}
+early_param("noinvpcid", x86_noinvpcid_setup);
+
#ifdef CONFIG_X86_32
static int cachesize_override = -1;
static int disable_x86_serial_nr = 1;
@@ -801,6 +817,31 @@ static void detect_nopl(struct cpuinfo_x86 *c)
#else
set_cpu_cap(c, X86_FEATURE_NOPL);
#endif
+
+ /*
+ * ESPFIX is a strange bug. All real CPUs have it. Paravirt
+ * systems that run Linux at CPL > 0 may or may not have the
+ * issue, but, even if they have the issue, there's absolutely
+ * nothing we can do about it because we can't use the real IRET
+ * instruction.
+ *
+ * NB: For the time being, only 32-bit kernels support
+ * X86_BUG_ESPFIX as such. 64-bit kernels directly choose
+ * whether to apply espfix using paravirt hooks. If any
+ * non-paravirt system ever shows up that does *not* have the
+ * ESPFIX issue, we can change this.
+ */
+#ifdef CONFIG_X86_32
+#ifdef CONFIG_PARAVIRT
+ do {
+ extern void native_iret(void);
+ if (pv_cpu_ops.iret == native_iret)
+ set_cpu_bug(c, X86_BUG_ESPFIX);
+ } while (0);
+#else
+ set_cpu_bug(c, X86_BUG_ESPFIX);
+#endif
+#endif
}
static void generic_identify(struct cpuinfo_x86 *c)
@@ -1475,20 +1516,6 @@ void cpu_init(void)
}
#endif
-#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
-void warn_pre_alternatives(void)
-{
- WARN(1, "You're using static_cpu_has before alternatives have run!\n");
-}
-EXPORT_SYMBOL_GPL(warn_pre_alternatives);
-#endif
-
-inline bool __static_cpu_has_safe(u16 bit)
-{
- return boot_cpu_has(bit);
-}
-EXPORT_SYMBOL_GPL(__static_cpu_has_safe);
-
static void bsp_resume(void)
{
if (this_cpu->c_bsp_resume)
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 187bb583d0df..6adef9cac23e 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -8,6 +8,7 @@
#include <linux/timer.h>
#include <asm/pci-direct.h>
#include <asm/tsc.h>
+#include <asm/cpufeature.h>
#include "cpu.h"
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 38766c2b5b00..1f7fdb91a818 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -8,7 +8,7 @@
#include <linux/module.h>
#include <linux/uaccess.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/pgtable.h>
#include <asm/msr.h>
#include <asm/bugs.h>
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 6ed779efff26..de6626c18e42 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -14,7 +14,7 @@
#include <linux/sysfs.h>
#include <linux/pci.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/amd_nb.h>
#include <asm/smp.h>
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index afa9f0d487ea..fbb5e90557a5 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -1,5 +1,5 @@
#include <asm/cpu_device_id.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/slab.h>
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 524f2a8492d7..f0c921b03e42 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1578,6 +1578,17 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
if (c->x86 == 6 && c->x86_model == 45)
quirk_no_way_out = quirk_sandybridge_ifu;
+ /*
+ * MCG_CAP.MCG_SER_P is necessary but not sufficient to know
+ * whether this processor will actually generate recoverable
+ * machine checks. Check to see if this is an E7 model Xeon.
+ * We can't do a model number check because E5 and E7 use the
+ * same model number. E5 doesn't support recovery, E7 does.
+ */
+ if (mca_cfg.recovery || (mca_cfg.ser &&
+ !strncmp(c->x86_model_id,
+ "Intel(R) Xeon(R) CPU E7-", 24)))
+ set_cpu_cap(c, X86_FEATURE_MCE_RECOVERY);
}
if (cfg->monarch_timeout < 0)
cfg->monarch_timeout = 0;
@@ -2030,6 +2041,8 @@ static int __init mcheck_enable(char *str)
cfg->bootlog = (str[0] == 'b');
else if (!strcmp(str, "bios_cmci_threshold"))
cfg->bios_cmci_threshold = true;
+ else if (!strcmp(str, "recovery"))
+ cfg->recovery = true;
else if (isdigit(str[0])) {
if (get_option(&str, &cfg->tolerant) == 2)
get_option(&str, &(cfg->monarch_timeout));
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 75d3aab5f7b2..8581963894c7 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -431,10 +431,6 @@ int __init save_microcode_in_initrd_amd(void)
else
container = cont_va;
- if (ucode_new_rev)
- pr_info("microcode: updated early to new patch_level=0x%08x\n",
- ucode_new_rev);
-
eax = cpuid_eax(0x00000001);
eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
@@ -469,8 +465,7 @@ void reload_ucode_amd(void)
if (mc && rev < mc->hdr.patch_id) {
if (!__apply_microcode_amd(mc)) {
ucode_new_rev = mc->hdr.patch_id;
- pr_info("microcode: reload patch_level=0x%08x\n",
- ucode_new_rev);
+ pr_info("reload patch_level=0x%08x\n", ucode_new_rev);
}
}
}
@@ -793,15 +788,13 @@ static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover)
return -EINVAL;
}
- patch->data = kzalloc(patch_size, GFP_KERNEL);
+ patch->data = kmemdup(fw + SECTION_HDR_SIZE, patch_size, GFP_KERNEL);
if (!patch->data) {
pr_err("Patch data allocation failure.\n");
kfree(patch);
return -EINVAL;
}
- /* All looks ok, copy patch... */
- memcpy(patch->data, fw + SECTION_HDR_SIZE, patch_size);
INIT_LIST_HEAD(&patch->plist);
patch->patch_id = mc_hdr->patch_id;
patch->equiv_cpu = proc_id;
@@ -957,6 +950,10 @@ struct microcode_ops * __init init_amd_microcode(void)
return NULL;
}
+ if (ucode_new_rev)
+ pr_info_once("microcode updated early to new patch_level=0x%08x\n",
+ ucode_new_rev);
+
return &microcode_amd_ops;
}
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index faec7120c508..ac360bfbbdb6 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -43,16 +43,8 @@
#define MICROCODE_VERSION "2.01"
static struct microcode_ops *microcode_ops;
-
static bool dis_ucode_ldr;
-static int __init disable_loader(char *str)
-{
- dis_ucode_ldr = true;
- return 1;
-}
-__setup("dis_ucode_ldr", disable_loader);
-
/*
* Synchronization.
*
@@ -81,15 +73,16 @@ struct cpu_info_ctx {
static bool __init check_loader_disabled_bsp(void)
{
+ static const char *__dis_opt_str = "dis_ucode_ldr";
+
#ifdef CONFIG_X86_32
const char *cmdline = (const char *)__pa_nodebug(boot_command_line);
- const char *opt = "dis_ucode_ldr";
- const char *option = (const char *)__pa_nodebug(opt);
+ const char *option = (const char *)__pa_nodebug(__dis_opt_str);
bool *res = (bool *)__pa_nodebug(&dis_ucode_ldr);
#else /* CONFIG_X86_64 */
const char *cmdline = boot_command_line;
- const char *option = "dis_ucode_ldr";
+ const char *option = __dis_opt_str;
bool *res = &dis_ucode_ldr;
#endif
@@ -479,7 +472,7 @@ static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw)
enum ucode_state ustate;
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- if (uci && uci->valid)
+ if (uci->valid)
return UCODE_OK;
if (collect_cpu_info(cpu))
@@ -630,7 +623,7 @@ int __init microcode_init(void)
struct cpuinfo_x86 *c = &boot_cpu_data;
int error;
- if (paravirt_enabled() || dis_ucode_ldr)
+ if (dis_ucode_ldr)
return -EINVAL;
if (c->x86_vendor == X86_VENDOR_INTEL)
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index ee81c544ee0d..cbb3cf09b065 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -39,9 +39,15 @@
#include <asm/setup.h>
#include <asm/msr.h>
-static unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT];
+/*
+ * Temporary microcode blobs pointers storage. We note here the pointers to
+ * microcode blobs we've got from whatever storage (detached initrd, builtin).
+ * Later on, we put those into final storage mc_saved_data.mc_saved.
+ */
+static unsigned long mc_tmp_ptrs[MAX_UCODE_COUNT];
+
static struct mc_saved_data {
- unsigned int mc_saved_count;
+ unsigned int num_saved;
struct microcode_intel **mc_saved;
} mc_saved_data;
@@ -78,53 +84,50 @@ load_microcode_early(struct microcode_intel **saved,
}
static inline void
-copy_initrd_ptrs(struct microcode_intel **mc_saved, unsigned long *initrd,
- unsigned long off, int num_saved)
+copy_ptrs(struct microcode_intel **mc_saved, unsigned long *mc_ptrs,
+ unsigned long off, int num_saved)
{
int i;
for (i = 0; i < num_saved; i++)
- mc_saved[i] = (struct microcode_intel *)(initrd[i] + off);
+ mc_saved[i] = (struct microcode_intel *)(mc_ptrs[i] + off);
}
#ifdef CONFIG_X86_32
static void
-microcode_phys(struct microcode_intel **mc_saved_tmp,
- struct mc_saved_data *mc_saved_data)
+microcode_phys(struct microcode_intel **mc_saved_tmp, struct mc_saved_data *mcs)
{
int i;
struct microcode_intel ***mc_saved;
- mc_saved = (struct microcode_intel ***)
- __pa_nodebug(&mc_saved_data->mc_saved);
- for (i = 0; i < mc_saved_data->mc_saved_count; i++) {
+ mc_saved = (struct microcode_intel ***)__pa_nodebug(&mcs->mc_saved);
+
+ for (i = 0; i < mcs->num_saved; i++) {
struct microcode_intel *p;
- p = *(struct microcode_intel **)
- __pa_nodebug(mc_saved_data->mc_saved + i);
+ p = *(struct microcode_intel **)__pa_nodebug(mcs->mc_saved + i);
mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p);
}
}
#endif
static enum ucode_state
-load_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
- unsigned long initrd_start, struct ucode_cpu_info *uci)
+load_microcode(struct mc_saved_data *mcs, unsigned long *mc_ptrs,
+ unsigned long offset, struct ucode_cpu_info *uci)
{
struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
- unsigned int count = mc_saved_data->mc_saved_count;
+ unsigned int count = mcs->num_saved;
- if (!mc_saved_data->mc_saved) {
- copy_initrd_ptrs(mc_saved_tmp, initrd, initrd_start, count);
+ if (!mcs->mc_saved) {
+ copy_ptrs(mc_saved_tmp, mc_ptrs, offset, count);
return load_microcode_early(mc_saved_tmp, count, uci);
} else {
#ifdef CONFIG_X86_32
- microcode_phys(mc_saved_tmp, mc_saved_data);
+ microcode_phys(mc_saved_tmp, mcs);
return load_microcode_early(mc_saved_tmp, count, uci);
#else
- return load_microcode_early(mc_saved_data->mc_saved,
- count, uci);
+ return load_microcode_early(mcs->mc_saved, count, uci);
#endif
}
}
@@ -175,25 +178,25 @@ matching_model_microcode(struct microcode_header_intel *mc_header,
}
static int
-save_microcode(struct mc_saved_data *mc_saved_data,
+save_microcode(struct mc_saved_data *mcs,
struct microcode_intel **mc_saved_src,
- unsigned int mc_saved_count)
+ unsigned int num_saved)
{
int i, j;
struct microcode_intel **saved_ptr;
int ret;
- if (!mc_saved_count)
+ if (!num_saved)
return -EINVAL;
/*
* Copy new microcode data.
*/
- saved_ptr = kcalloc(mc_saved_count, sizeof(struct microcode_intel *), GFP_KERNEL);
+ saved_ptr = kcalloc(num_saved, sizeof(struct microcode_intel *), GFP_KERNEL);
if (!saved_ptr)
return -ENOMEM;
- for (i = 0; i < mc_saved_count; i++) {
+ for (i = 0; i < num_saved; i++) {
struct microcode_header_intel *mc_hdr;
struct microcode_intel *mc;
unsigned long size;
@@ -207,20 +210,18 @@ save_microcode(struct mc_saved_data *mc_saved_data,
mc_hdr = &mc->hdr;
size = get_totalsize(mc_hdr);
- saved_ptr[i] = kmalloc(size, GFP_KERNEL);
+ saved_ptr[i] = kmemdup(mc, size, GFP_KERNEL);
if (!saved_ptr[i]) {
ret = -ENOMEM;
goto err;
}
-
- memcpy(saved_ptr[i], mc, size);
}
/*
* Point to newly saved microcode.
*/
- mc_saved_data->mc_saved = saved_ptr;
- mc_saved_data->mc_saved_count = mc_saved_count;
+ mcs->mc_saved = saved_ptr;
+ mcs->num_saved = num_saved;
return 0;
@@ -284,22 +285,20 @@ static unsigned int _save_mc(struct microcode_intel **mc_saved,
* BSP can stay in the platform.
*/
static enum ucode_state __init
-get_matching_model_microcode(int cpu, unsigned long start,
- void *data, size_t size,
- struct mc_saved_data *mc_saved_data,
- unsigned long *mc_saved_in_initrd,
+get_matching_model_microcode(unsigned long start, void *data, size_t size,
+ struct mc_saved_data *mcs, unsigned long *mc_ptrs,
struct ucode_cpu_info *uci)
{
- u8 *ucode_ptr = data;
- unsigned int leftover = size;
+ struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
+ struct microcode_header_intel *mc_header;
+ unsigned int num_saved = mcs->num_saved;
enum ucode_state state = UCODE_OK;
+ unsigned int leftover = size;
+ u8 *ucode_ptr = data;
unsigned int mc_size;
- struct microcode_header_intel *mc_header;
- struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
- unsigned int mc_saved_count = mc_saved_data->mc_saved_count;
int i;
- while (leftover && mc_saved_count < ARRAY_SIZE(mc_saved_tmp)) {
+ while (leftover && num_saved < ARRAY_SIZE(mc_saved_tmp)) {
if (leftover < sizeof(mc_header))
break;
@@ -318,32 +317,31 @@ get_matching_model_microcode(int cpu, unsigned long start,
* the platform, we need to find and save microcode patches
* with the same family and model as the BSP.
*/
- if (matching_model_microcode(mc_header, uci->cpu_sig.sig) !=
- UCODE_OK) {
+ if (matching_model_microcode(mc_header, uci->cpu_sig.sig) != UCODE_OK) {
ucode_ptr += mc_size;
continue;
}
- mc_saved_count = _save_mc(mc_saved_tmp, ucode_ptr, mc_saved_count);
+ num_saved = _save_mc(mc_saved_tmp, ucode_ptr, num_saved);
ucode_ptr += mc_size;
}
if (leftover) {
state = UCODE_ERROR;
- goto out;
+ return state;
}
- if (mc_saved_count == 0) {
+ if (!num_saved) {
state = UCODE_NFOUND;
- goto out;
+ return state;
}
- for (i = 0; i < mc_saved_count; i++)
- mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start;
+ for (i = 0; i < num_saved; i++)
+ mc_ptrs[i] = (unsigned long)mc_saved_tmp[i] - start;
+
+ mcs->num_saved = num_saved;
- mc_saved_data->mc_saved_count = mc_saved_count;
-out:
return state;
}
@@ -373,7 +371,7 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci)
native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
csig.pf = 1 << ((val[1] >> 18) & 7);
}
- native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+ native_wrmsrl(MSR_IA32_UCODE_REV, 0);
/* As documented in the SDM: Do a CPUID 1 here */
sync_core();
@@ -396,11 +394,11 @@ static void show_saved_mc(void)
unsigned int sig, pf, rev, total_size, data_size, date;
struct ucode_cpu_info uci;
- if (mc_saved_data.mc_saved_count == 0) {
+ if (!mc_saved_data.num_saved) {
pr_debug("no microcode data saved.\n");
return;
}
- pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count);
+ pr_debug("Total microcode saved: %d\n", mc_saved_data.num_saved);
collect_cpu_info_early(&uci);
@@ -409,7 +407,7 @@ static void show_saved_mc(void)
rev = uci.cpu_sig.rev;
pr_debug("CPU: sig=0x%x, pf=0x%x, rev=0x%x\n", sig, pf, rev);
- for (i = 0; i < mc_saved_data.mc_saved_count; i++) {
+ for (i = 0; i < mc_saved_data.num_saved; i++) {
struct microcode_header_intel *mc_saved_header;
struct extended_sigtable *ext_header;
int ext_sigcount;
@@ -465,7 +463,7 @@ int save_mc_for_early(u8 *mc)
{
struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
unsigned int mc_saved_count_init;
- unsigned int mc_saved_count;
+ unsigned int num_saved;
struct microcode_intel **mc_saved;
int ret = 0;
int i;
@@ -476,23 +474,23 @@ int save_mc_for_early(u8 *mc)
*/
mutex_lock(&x86_cpu_microcode_mutex);
- mc_saved_count_init = mc_saved_data.mc_saved_count;
- mc_saved_count = mc_saved_data.mc_saved_count;
+ mc_saved_count_init = mc_saved_data.num_saved;
+ num_saved = mc_saved_data.num_saved;
mc_saved = mc_saved_data.mc_saved;
- if (mc_saved && mc_saved_count)
+ if (mc_saved && num_saved)
memcpy(mc_saved_tmp, mc_saved,
- mc_saved_count * sizeof(struct microcode_intel *));
+ num_saved * sizeof(struct microcode_intel *));
/*
* Save the microcode patch mc in mc_save_tmp structure if it's a newer
* version.
*/
- mc_saved_count = _save_mc(mc_saved_tmp, mc, mc_saved_count);
+ num_saved = _save_mc(mc_saved_tmp, mc, num_saved);
/*
* Save the mc_save_tmp in global mc_saved_data.
*/
- ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count);
+ ret = save_microcode(&mc_saved_data, mc_saved_tmp, num_saved);
if (ret) {
pr_err("Cannot save microcode patch.\n");
goto out;
@@ -536,7 +534,7 @@ static bool __init load_builtin_intel_microcode(struct cpio_data *cp)
static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin";
static __init enum ucode_state
-scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
+scan_microcode(struct mc_saved_data *mcs, unsigned long *mc_ptrs,
unsigned long start, unsigned long size,
struct ucode_cpu_info *uci)
{
@@ -551,14 +549,18 @@ scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
cd.data = NULL;
cd.size = 0;
- cd = find_cpio_data(p, (void *)start, size, &offset);
- if (!cd.data) {
+ /* try built-in microcode if no initrd */
+ if (!size) {
if (!load_builtin_intel_microcode(&cd))
return UCODE_ERROR;
+ } else {
+ cd = find_cpio_data(p, (void *)start, size, &offset);
+ if (!cd.data)
+ return UCODE_ERROR;
}
- return get_matching_model_microcode(0, start, cd.data, cd.size,
- mc_saved_data, initrd, uci);
+ return get_matching_model_microcode(start, cd.data, cd.size,
+ mcs, mc_ptrs, uci);
}
/*
@@ -567,14 +569,11 @@ scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
static void
print_ucode_info(struct ucode_cpu_info *uci, unsigned int date)
{
- int cpu = smp_processor_id();
-
- pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n",
- cpu,
- uci->cpu_sig.rev,
- date & 0xffff,
- date >> 24,
- (date >> 16) & 0xff);
+ pr_info_once("microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n",
+ uci->cpu_sig.rev,
+ date & 0xffff,
+ date >> 24,
+ (date >> 16) & 0xff);
}
#ifdef CONFIG_X86_32
@@ -603,19 +602,19 @@ void show_ucode_info_early(void)
*/
static void print_ucode(struct ucode_cpu_info *uci)
{
- struct microcode_intel *mc_intel;
+ struct microcode_intel *mc;
int *delay_ucode_info_p;
int *current_mc_date_p;
- mc_intel = uci->mc;
- if (mc_intel == NULL)
+ mc = uci->mc;
+ if (!mc)
return;
delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info);
current_mc_date_p = (int *)__pa_nodebug(&current_mc_date);
*delay_ucode_info_p = 1;
- *current_mc_date_p = mc_intel->hdr.date;
+ *current_mc_date_p = mc->hdr.date;
}
#else
@@ -630,37 +629,35 @@ static inline void flush_tlb_early(void)
static inline void print_ucode(struct ucode_cpu_info *uci)
{
- struct microcode_intel *mc_intel;
+ struct microcode_intel *mc;
- mc_intel = uci->mc;
- if (mc_intel == NULL)
+ mc = uci->mc;
+ if (!mc)
return;
- print_ucode_info(uci, mc_intel->hdr.date);
+ print_ucode_info(uci, mc->hdr.date);
}
#endif
static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
{
- struct microcode_intel *mc_intel;
+ struct microcode_intel *mc;
unsigned int val[2];
- mc_intel = uci->mc;
- if (mc_intel == NULL)
+ mc = uci->mc;
+ if (!mc)
return 0;
/* write microcode via MSR 0x79 */
- native_wrmsr(MSR_IA32_UCODE_WRITE,
- (unsigned long) mc_intel->bits,
- (unsigned long) mc_intel->bits >> 16 >> 16);
- native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+ native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
+ native_wrmsrl(MSR_IA32_UCODE_REV, 0);
/* As documented in the SDM: Do a CPUID 1 here */
sync_core();
/* get the current revision from MSR 0x8B */
native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
- if (val[1] != mc_intel->hdr.rev)
+ if (val[1] != mc->hdr.rev)
return -1;
#ifdef CONFIG_X86_64
@@ -672,25 +669,26 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
if (early)
print_ucode(uci);
else
- print_ucode_info(uci, mc_intel->hdr.date);
+ print_ucode_info(uci, mc->hdr.date);
return 0;
}
/*
* This function converts microcode patch offsets previously stored in
- * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data.
+ * mc_tmp_ptrs to pointers and stores the pointers in mc_saved_data.
*/
int __init save_microcode_in_initrd_intel(void)
{
- unsigned int count = mc_saved_data.mc_saved_count;
+ unsigned int count = mc_saved_data.num_saved;
struct microcode_intel *mc_saved[MAX_UCODE_COUNT];
int ret = 0;
- if (count == 0)
+ if (!count)
return ret;
- copy_initrd_ptrs(mc_saved, mc_saved_in_initrd, initrd_start, count);
+ copy_ptrs(mc_saved, mc_tmp_ptrs, get_initrd_start(), count);
+
ret = save_microcode(&mc_saved_data, mc_saved, count);
if (ret)
pr_err("Cannot save microcode patches from initrd.\n");
@@ -701,8 +699,7 @@ int __init save_microcode_in_initrd_intel(void)
}
static void __init
-_load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data,
- unsigned long *initrd,
+_load_ucode_intel_bsp(struct mc_saved_data *mcs, unsigned long *mc_ptrs,
unsigned long start, unsigned long size)
{
struct ucode_cpu_info uci;
@@ -710,11 +707,11 @@ _load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data,
collect_cpu_info_early(&uci);
- ret = scan_microcode(mc_saved_data, initrd, start, size, &uci);
+ ret = scan_microcode(mcs, mc_ptrs, start, size, &uci);
if (ret != UCODE_OK)
return;
- ret = load_microcode(mc_saved_data, initrd, start, &uci);
+ ret = load_microcode(mcs, mc_ptrs, start, &uci);
if (ret != UCODE_OK)
return;
@@ -728,53 +725,49 @@ void __init load_ucode_intel_bsp(void)
struct boot_params *p;
p = (struct boot_params *)__pa_nodebug(&boot_params);
- start = p->hdr.ramdisk_image;
size = p->hdr.ramdisk_size;
- _load_ucode_intel_bsp(
- (struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
- (unsigned long *)__pa_nodebug(&mc_saved_in_initrd),
- start, size);
+ /*
+ * Set start only if we have an initrd image. We cannot use initrd_start
+ * because it is not set that early yet.
+ */
+ start = (size ? p->hdr.ramdisk_image : 0);
+
+ _load_ucode_intel_bsp((struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
+ (unsigned long *)__pa_nodebug(&mc_tmp_ptrs),
+ start, size);
#else
- start = boot_params.hdr.ramdisk_image + PAGE_OFFSET;
size = boot_params.hdr.ramdisk_size;
+ start = (size ? boot_params.hdr.ramdisk_image + PAGE_OFFSET : 0);
- _load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, start, size);
+ _load_ucode_intel_bsp(&mc_saved_data, mc_tmp_ptrs, start, size);
#endif
}
void load_ucode_intel_ap(void)
{
- struct mc_saved_data *mc_saved_data_p;
+ unsigned long *mcs_tmp_p;
+ struct mc_saved_data *mcs_p;
struct ucode_cpu_info uci;
- unsigned long *mc_saved_in_initrd_p;
- unsigned long initrd_start_addr;
enum ucode_state ret;
#ifdef CONFIG_X86_32
- unsigned long *initrd_start_p;
- mc_saved_in_initrd_p =
- (unsigned long *)__pa_nodebug(mc_saved_in_initrd);
- mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data);
- initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start);
- initrd_start_addr = (unsigned long)__pa_nodebug(*initrd_start_p);
+ mcs_tmp_p = (unsigned long *)__pa_nodebug(mc_tmp_ptrs);
+ mcs_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data);
#else
- mc_saved_data_p = &mc_saved_data;
- mc_saved_in_initrd_p = mc_saved_in_initrd;
- initrd_start_addr = initrd_start;
+ mcs_tmp_p = mc_tmp_ptrs;
+ mcs_p = &mc_saved_data;
#endif
/*
* If there is no valid ucode previously saved in memory, no need to
* update ucode on this AP.
*/
- if (mc_saved_data_p->mc_saved_count == 0)
+ if (!mcs_p->num_saved)
return;
collect_cpu_info_early(&uci);
- ret = load_microcode(mc_saved_data_p, mc_saved_in_initrd_p,
- initrd_start_addr, &uci);
-
+ ret = load_microcode(mcs_p, mcs_tmp_p, get_initrd_start_addr(), &uci);
if (ret != UCODE_OK)
return;
@@ -786,13 +779,13 @@ void reload_ucode_intel(void)
struct ucode_cpu_info uci;
enum ucode_state ret;
- if (!mc_saved_data.mc_saved_count)
+ if (!mc_saved_data.num_saved)
return;
collect_cpu_info_early(&uci);
ret = load_microcode_early(mc_saved_data.mc_saved,
- mc_saved_data.mc_saved_count, &uci);
+ mc_saved_data.num_saved, &uci);
if (ret != UCODE_OK)
return;
@@ -825,7 +818,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
* return 0 - no update found
* return 1 - found update
*/
-static int get_matching_mc(struct microcode_intel *mc_intel, int cpu)
+static int get_matching_mc(struct microcode_intel *mc, int cpu)
{
struct cpu_signature cpu_sig;
unsigned int csig, cpf, crev;
@@ -836,39 +829,36 @@ static int get_matching_mc(struct microcode_intel *mc_intel, int cpu)
cpf = cpu_sig.pf;
crev = cpu_sig.rev;
- return has_newer_microcode(mc_intel, csig, cpf, crev);
+ return has_newer_microcode(mc, csig, cpf, crev);
}
static int apply_microcode_intel(int cpu)
{
- struct microcode_intel *mc_intel;
+ struct microcode_intel *mc;
struct ucode_cpu_info *uci;
+ struct cpuinfo_x86 *c;
unsigned int val[2];
- int cpu_num = raw_smp_processor_id();
- struct cpuinfo_x86 *c = &cpu_data(cpu_num);
-
- uci = ucode_cpu_info + cpu;
- mc_intel = uci->mc;
/* We should bind the task to the CPU */
- BUG_ON(cpu_num != cpu);
+ if (WARN_ON(raw_smp_processor_id() != cpu))
+ return -1;
- if (mc_intel == NULL)
+ uci = ucode_cpu_info + cpu;
+ mc = uci->mc;
+ if (!mc)
return 0;
/*
* Microcode on this CPU could be updated earlier. Only apply the
- * microcode patch in mc_intel when it is newer than the one on this
+ * microcode patch in mc when it is newer than the one on this
* CPU.
*/
- if (get_matching_mc(mc_intel, cpu) == 0)
+ if (!get_matching_mc(mc, cpu))
return 0;
/* write microcode via MSR 0x79 */
- wrmsr(MSR_IA32_UCODE_WRITE,
- (unsigned long) mc_intel->bits,
- (unsigned long) mc_intel->bits >> 16 >> 16);
- wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+ wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
+ wrmsrl(MSR_IA32_UCODE_REV, 0);
/* As documented in the SDM: Do a CPUID 1 here */
sync_core();
@@ -876,16 +866,19 @@ static int apply_microcode_intel(int cpu)
/* get the current revision from MSR 0x8B */
rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
- if (val[1] != mc_intel->hdr.rev) {
+ if (val[1] != mc->hdr.rev) {
pr_err("CPU%d update to revision 0x%x failed\n",
- cpu_num, mc_intel->hdr.rev);
+ cpu, mc->hdr.rev);
return -1;
}
+
pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x\n",
- cpu_num, val[1],
- mc_intel->hdr.date & 0xffff,
- mc_intel->hdr.date >> 24,
- (mc_intel->hdr.date >> 16) & 0xff);
+ cpu, val[1],
+ mc->hdr.date & 0xffff,
+ mc->hdr.date >> 24,
+ (mc->hdr.date >> 16) & 0xff);
+
+ c = &cpu_data(cpu);
uci->cpu_sig.rev = val[1];
c->microcode = val[1];
diff --git a/arch/x86/kernel/cpu/microcode/intel_lib.c b/arch/x86/kernel/cpu/microcode/intel_lib.c
index b96896bcbdaf..2ce1a7dc45b7 100644
--- a/arch/x86/kernel/cpu/microcode/intel_lib.c
+++ b/arch/x86/kernel/cpu/microcode/intel_lib.c
@@ -49,7 +49,7 @@ int microcode_sanity_check(void *mc, int print_err)
unsigned long total_size, data_size, ext_table_size;
struct microcode_header_intel *mc_header = mc;
struct extended_sigtable *ext_header = NULL;
- int sum, orig_sum, ext_sigcount = 0, i;
+ u32 sum, orig_sum, ext_sigcount = 0, i;
struct extended_signature *ext_sig;
total_size = get_totalsize(mc_header);
@@ -57,69 +57,85 @@ int microcode_sanity_check(void *mc, int print_err)
if (data_size + MC_HEADER_SIZE > total_size) {
if (print_err)
- pr_err("error! Bad data size in microcode data file\n");
+ pr_err("Error: bad microcode data file size.\n");
return -EINVAL;
}
if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
if (print_err)
- pr_err("error! Unknown microcode update format\n");
+ pr_err("Error: invalid/unknown microcode update format.\n");
return -EINVAL;
}
+
ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
if (ext_table_size) {
+ u32 ext_table_sum = 0;
+ u32 *ext_tablep;
+
if ((ext_table_size < EXT_HEADER_SIZE)
|| ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
if (print_err)
- pr_err("error! Small exttable size in microcode data file\n");
+ pr_err("Error: truncated extended signature table.\n");
return -EINVAL;
}
+
ext_header = mc + MC_HEADER_SIZE + data_size;
if (ext_table_size != exttable_size(ext_header)) {
if (print_err)
- pr_err("error! Bad exttable size in microcode data file\n");
+ pr_err("Error: extended signature table size mismatch.\n");
return -EFAULT;
}
+
ext_sigcount = ext_header->count;
- }
- /* check extended table checksum */
- if (ext_table_size) {
- int ext_table_sum = 0;
- int *ext_tablep = (int *)ext_header;
+ /*
+ * Check extended table checksum: the sum of all dwords that
+ * comprise a valid table must be 0.
+ */
+ ext_tablep = (u32 *)ext_header;
- i = ext_table_size / DWSIZE;
+ i = ext_table_size / sizeof(u32);
while (i--)
ext_table_sum += ext_tablep[i];
+
if (ext_table_sum) {
if (print_err)
- pr_warn("aborting, bad extended signature table checksum\n");
+ pr_warn("Bad extended signature table checksum, aborting.\n");
return -EINVAL;
}
}
- /* calculate the checksum */
+ /*
+ * Calculate the checksum of update data and header. The checksum of
+ * valid update data and header including the extended signature table
+ * must be 0.
+ */
orig_sum = 0;
- i = (MC_HEADER_SIZE + data_size) / DWSIZE;
+ i = (MC_HEADER_SIZE + data_size) / sizeof(u32);
while (i--)
- orig_sum += ((int *)mc)[i];
+ orig_sum += ((u32 *)mc)[i];
+
if (orig_sum) {
if (print_err)
- pr_err("aborting, bad checksum\n");
+ pr_err("Bad microcode data checksum, aborting.\n");
return -EINVAL;
}
+
if (!ext_table_size)
return 0;
- /* check extended signature checksum */
+
+ /*
+ * Check extended signature checksum: 0 => valid.
+ */
for (i = 0; i < ext_sigcount; i++) {
ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
EXT_SIGNATURE_SIZE * i;
- sum = orig_sum
- - (mc_header->sig + mc_header->pf + mc_header->cksum)
- + (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
+
+ sum = (mc_header->sig + mc_header->pf + mc_header->cksum) -
+ (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
if (sum) {
if (print_err)
- pr_err("aborting, bad checksum\n");
+ pr_err("Bad extended signature checksum, aborting.\n");
return -EINVAL;
}
}
diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh
index 3f20710a5b23..6988c74409a8 100644
--- a/arch/x86/kernel/cpu/mkcapflags.sh
+++ b/arch/x86/kernel/cpu/mkcapflags.sh
@@ -1,6 +1,6 @@
#!/bin/sh
#
-# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeature.h
+# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeatures.h
#
IN=$1
@@ -49,8 +49,8 @@ dump_array()
trap 'rm "$OUT"' EXIT
(
- echo "#ifndef _ASM_X86_CPUFEATURE_H"
- echo "#include <asm/cpufeature.h>"
+ echo "#ifndef _ASM_X86_CPUFEATURES_H"
+ echo "#include <asm/cpufeatures.h>"
echo "#endif"
echo ""
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index ba80d68f683e..10f8d4796240 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -47,7 +47,7 @@
#include <linux/smp.h>
#include <linux/syscore_ops.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/e820.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index e3b4d1841175..34178564be2a 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -1,6 +1,6 @@
#include <linux/kernel.h>
#include <linux/mm.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/msr.h>
#include "cpu.h"
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 837365f10912..621b501f8935 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -24,6 +24,7 @@
#include <asm/e820.h>
#include <asm/proto.h>
#include <asm/setup.h>
+#include <asm/cpufeature.h>
/*
* The e820 map is the map that gets modified e.g. with command line parameters
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index d5804adfa6da..0b1b9abd4d5f 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -114,6 +114,10 @@ void __kernel_fpu_begin(void)
kernel_fpu_disable();
if (fpu->fpregs_active) {
+ /*
+ * Ignore return value -- we don't care if reg state
+ * is clobbered.
+ */
copy_fpregs_to_fpstate(fpu);
} else {
this_cpu_write(fpu_fpregs_owner_ctx, NULL);
@@ -189,8 +193,12 @@ void fpu__save(struct fpu *fpu)
preempt_disable();
if (fpu->fpregs_active) {
- if (!copy_fpregs_to_fpstate(fpu))
- fpregs_deactivate(fpu);
+ if (!copy_fpregs_to_fpstate(fpu)) {
+ if (use_eager_fpu())
+ copy_kernel_to_fpregs(&fpu->state);
+ else
+ fpregs_deactivate(fpu);
+ }
}
preempt_enable();
}
@@ -223,14 +231,15 @@ void fpstate_init(union fpregs_state *state)
}
EXPORT_SYMBOL_GPL(fpstate_init);
-/*
- * Copy the current task's FPU state to a new task's FPU context.
- *
- * In both the 'eager' and the 'lazy' case we save hardware registers
- * directly to the destination buffer.
- */
-static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu)
+int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
{
+ dst_fpu->counter = 0;
+ dst_fpu->fpregs_active = 0;
+ dst_fpu->last_cpu = -1;
+
+ if (!src_fpu->fpstate_active || !cpu_has_fpu)
+ return 0;
+
WARN_ON_FPU(src_fpu != &current->thread.fpu);
/*
@@ -243,10 +252,9 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu)
/*
* Save current FPU registers directly into the child
* FPU context, without any memory-to-memory copying.
- *
- * If the FPU context got destroyed in the process (FNSAVE
- * done on old CPUs) then copy it back into the source
- * context and mark the current task for lazy restore.
+ * In lazy mode, if the FPU context isn't loaded into
+ * fpregs, CR0.TS will be set and do_device_not_available
+ * will load the FPU context.
*
* We have to do all this with preemption disabled,
* mostly because of the FNSAVE case, because in that
@@ -259,19 +267,13 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu)
preempt_disable();
if (!copy_fpregs_to_fpstate(dst_fpu)) {
memcpy(&src_fpu->state, &dst_fpu->state, xstate_size);
- fpregs_deactivate(src_fpu);
+
+ if (use_eager_fpu())
+ copy_kernel_to_fpregs(&src_fpu->state);
+ else
+ fpregs_deactivate(src_fpu);
}
preempt_enable();
-}
-
-int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
-{
- dst_fpu->counter = 0;
- dst_fpu->fpregs_active = 0;
- dst_fpu->last_cpu = -1;
-
- if (src_fpu->fpstate_active && cpu_has_fpu)
- fpu_copy(dst_fpu, src_fpu);
return 0;
}
@@ -425,7 +427,7 @@ void fpu__clear(struct fpu *fpu)
{
WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
- if (!use_eager_fpu()) {
+ if (!use_eager_fpu() || !static_cpu_has(X86_FEATURE_FPU)) {
/* FPU state will be reallocated lazily at the first use. */
fpu__drop(fpu);
} else {
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index bd08fb77073d..54c86fffbf9f 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -262,7 +262,10 @@ static void __init fpu__init_system_xstate_size_legacy(void)
* not only saved the restores along the way, but we also have the
* FPU ready to be used for the original task.
*
- * 'eager' switching is used on modern CPUs, there we switch the FPU
+ * 'lazy' is deprecated because it's almost never a performance win
+ * and it's much more complicated than 'eager'.
+ *
+ * 'eager' switching is by default on all CPUs, there we switch the FPU
* state during every context switch, regardless of whether the task
* has used FPU instructions in that time slice or not. This is done
* because modern FPU context saving instructions are able to optimize
@@ -273,7 +276,7 @@ static void __init fpu__init_system_xstate_size_legacy(void)
* to use 'eager' restores, if we detect that a task is using the FPU
* frequently. See the fpu->counter logic in fpu/internal.h for that. ]
*/
-static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
+static enum { ENABLE, DISABLE } eagerfpu = ENABLE;
/*
* Find supported xfeatures based on cpu features and command-line input.
@@ -344,15 +347,9 @@ static void __init fpu__init_system_ctx_switch(void)
*/
static void __init fpu__init_parse_early_param(void)
{
- /*
- * No need to check "eagerfpu=auto" again, since it is the
- * initial default.
- */
if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) {
eagerfpu = DISABLE;
fpu__clear_eager_fpu_features();
- } else if (cmdline_find_option_bool(boot_command_line, "eagerfpu=on")) {
- eagerfpu = ENABLE;
}
if (cmdline_find_option_bool(boot_command_line, "no387"))
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index d425cda5ae6d..6e8354f5a593 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -51,6 +51,9 @@ void fpu__xstate_clear_all_cpu_caps(void)
setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
+ setup_clear_cpu_cap(X86_FEATURE_AVX512DQ);
+ setup_clear_cpu_cap(X86_FEATURE_AVX512BW);
+ setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
setup_clear_cpu_cap(X86_FEATURE_MPX);
setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
}
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 05c9e3f5b6d7..702547ce33c9 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -697,9 +697,8 @@ static inline void tramp_free(void *tramp) { }
#endif
/* Defined as markers to the end of the ftrace default trampolines */
-extern void ftrace_caller_end(void);
extern void ftrace_regs_caller_end(void);
-extern void ftrace_return(void);
+extern void ftrace_epilogue(void);
extern void ftrace_caller_op_ptr(void);
extern void ftrace_regs_caller_op_ptr(void);
@@ -746,7 +745,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
op_offset = (unsigned long)ftrace_regs_caller_op_ptr;
} else {
start_offset = (unsigned long)ftrace_caller;
- end_offset = (unsigned long)ftrace_caller_end;
+ end_offset = (unsigned long)ftrace_epilogue;
op_offset = (unsigned long)ftrace_caller_op_ptr;
}
@@ -754,7 +753,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
/*
* Allocate enough size to store the ftrace_caller code,
- * the jmp to ftrace_return, as well as the address of
+ * the jmp to ftrace_epilogue, as well as the address of
* the ftrace_ops this trampoline is used for.
*/
trampoline = alloc_tramp(size + MCOUNT_INSN_SIZE + sizeof(void *));
@@ -772,8 +771,8 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
ip = (unsigned long)trampoline + size;
- /* The trampoline ends with a jmp to ftrace_return */
- jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_return);
+ /* The trampoline ends with a jmp to ftrace_epilogue */
+ jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_epilogue);
memcpy(trampoline + size, jmp, MCOUNT_INSN_SIZE);
/*
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 2c0f3407bd1f..1f4422d5c8d0 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -40,13 +40,8 @@ pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
/* Wipe all early page tables except for the kernel symbol map */
static void __init reset_early_page_tables(void)
{
- unsigned long i;
-
- for (i = 0; i < PTRS_PER_PGD-1; i++)
- early_level4_pgt[i].pgd = 0;
-
+ memset(early_level4_pgt, 0, sizeof(pgd_t)*(PTRS_PER_PGD-1));
next_early_pgt = 0;
-
write_cr3(__pa_nodebug(early_level4_pgt));
}
@@ -54,7 +49,6 @@ static void __init reset_early_page_tables(void)
int __init early_make_pgtable(unsigned long address)
{
unsigned long physaddr = address - __PAGE_OFFSET;
- unsigned long i;
pgdval_t pgd, *pgd_p;
pudval_t pud, *pud_p;
pmdval_t pmd, *pmd_p;
@@ -81,8 +75,7 @@ again:
}
pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++];
- for (i = 0; i < PTRS_PER_PUD; i++)
- pud_p[i] = 0;
+ memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
*pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
}
pud_p += pud_index(address);
@@ -97,8 +90,7 @@ again:
}
pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++];
- for (i = 0; i < PTRS_PER_PMD; i++)
- pmd_p[i] = 0;
+ memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
*pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
}
pmd = (physaddr & PMD_MASK) + early_pmd_flags;
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 6bc9ae24b6d2..54cdbd2003fe 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -19,7 +19,7 @@
#include <asm/setup.h>
#include <asm/processor-flags.h>
#include <asm/msr-index.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/percpu.h>
#include <asm/nops.h>
#include <asm/bootparam.h>
@@ -389,6 +389,12 @@ default_entry:
/* Make changes effective */
wrmsr
+ /*
+ * And make sure that all the mappings we set up have NX set from
+ * the beginning.
+ */
+ orl $(1 << (_PAGE_BIT_NX - 32)), pa(__supported_pte_mask + 4)
+
enable_paging:
/*
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index ffdc0e860390..22fbf9df61bb 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -38,7 +38,6 @@
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET)
-L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET)
L4_START_KERNEL = pgd_index(__START_KERNEL_map)
L3_START_KERNEL = pud_index(__START_KERNEL_map)
@@ -76,9 +75,7 @@ startup_64:
subq $_text - __START_KERNEL_map, %rbp
/* Is the address not 2M aligned? */
- movq %rbp, %rax
- andl $~PMD_PAGE_MASK, %eax
- testl %eax, %eax
+ testl $~PMD_PAGE_MASK, %ebp
jnz bad_address
/*
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index b8e6ff5cd5d0..be0ebbb6d1d1 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -12,6 +12,7 @@
#include <linux/pm.h>
#include <linux/io.h>
+#include <asm/cpufeature.h>
#include <asm/irqdomain.h>
#include <asm/fixmap.h>
#include <asm/hpet.h>
diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
index 87e1762e2bca..ed48a9f465f8 100644
--- a/arch/x86/kernel/mcount_64.S
+++ b/arch/x86/kernel/mcount_64.S
@@ -168,12 +168,14 @@ GLOBAL(ftrace_call)
restore_mcount_regs
/*
- * The copied trampoline must call ftrace_return as it
+ * The copied trampoline must call ftrace_epilogue as it
* still may need to call the function graph tracer.
+ *
+ * The code up to this label is copied into trampolines so
+ * think twice before adding any new code or changing the
+ * layout here.
*/
-GLOBAL(ftrace_caller_end)
-
-GLOBAL(ftrace_return)
+GLOBAL(ftrace_epilogue)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
GLOBAL(ftrace_graph_call)
@@ -244,14 +246,14 @@ GLOBAL(ftrace_regs_call)
popfq
/*
- * As this jmp to ftrace_return can be a short jump
+ * As this jmp to ftrace_epilogue can be a short jump
* it must not be copied into the trampoline.
* The trampoline will add the code to jump
* to the return.
*/
GLOBAL(ftrace_regs_caller_end)
- jmp ftrace_return
+ jmp ftrace_epilogue
END(ftrace_regs_caller)
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 64f9616f93f1..7f3550acde1b 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -40,7 +40,7 @@
#include <linux/uaccess.h>
#include <linux/gfp.h>
-#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/msr.h>
static struct class *msr_class;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 9decee2bfdbe..2915d54e9dd5 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -57,6 +57,9 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
*/
.io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
#endif
+#ifdef CONFIG_X86_32
+ .SYSENTER_stack_canary = STACK_END_MAGIC,
+#endif
};
EXPORT_PER_CPU_SYMBOL(cpu_tss);
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index cb6282c3638f..548ddf7d6fd2 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -61,7 +61,38 @@
regs->seg = GET_SEG(seg) | 3; \
} while (0)
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
+#ifdef CONFIG_X86_64
+/*
+ * If regs->ss will cause an IRET fault, change it. Otherwise leave it
+ * alone. Using this generally makes no sense unless
+ * user_64bit_mode(regs) would return true.
+ */
+static void force_valid_ss(struct pt_regs *regs)
+{
+ u32 ar;
+ asm volatile ("lar %[old_ss], %[ar]\n\t"
+ "jz 1f\n\t" /* If invalid: */
+ "xorl %[ar], %[ar]\n\t" /* set ar = 0 */
+ "1:"
+ : [ar] "=r" (ar)
+ : [old_ss] "rm" ((u16)regs->ss));
+
+ /*
+ * For a valid 64-bit user context, we need DPL 3, type
+ * read-write data or read-write exp-down data, and S and P
+ * set. We can't use VERW because VERW doesn't check the
+ * P bit.
+ */
+ ar &= AR_DPL_MASK | AR_S | AR_P | AR_TYPE_MASK;
+ if (ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA) &&
+ ar != (AR_DPL3 | AR_S | AR_P | AR_TYPE_RWDATA_EXPDOWN))
+ regs->ss = __USER_DS;
+}
+#endif
+
+static int restore_sigcontext(struct pt_regs *regs,
+ struct sigcontext __user *sc,
+ unsigned long uc_flags)
{
unsigned long buf_val;
void __user *buf;
@@ -94,15 +125,18 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
COPY(r15);
#endif /* CONFIG_X86_64 */
-#ifdef CONFIG_X86_32
COPY_SEG_CPL3(cs);
COPY_SEG_CPL3(ss);
-#else /* !CONFIG_X86_32 */
- /* Kernel saves and restores only the CS segment register on signals,
- * which is the bare minimum needed to allow mixed 32/64-bit code.
- * App's signal handler can save/restore other segments if needed. */
- COPY_SEG_CPL3(cs);
-#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_X86_64
+ /*
+ * Fix up SS if needed for the benefit of old DOSEMU and
+ * CRIU.
+ */
+ if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) &&
+ user_64bit_mode(regs)))
+ force_valid_ss(regs);
+#endif
get_user_ex(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
@@ -165,6 +199,7 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
put_user_ex(regs->cs, &sc->cs);
put_user_ex(0, &sc->gs);
put_user_ex(0, &sc->fs);
+ put_user_ex(regs->ss, &sc->ss);
#endif /* CONFIG_X86_32 */
put_user_ex(fpstate, &sc->fpstate);
@@ -403,6 +438,21 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
return 0;
}
#else /* !CONFIG_X86_32 */
+static unsigned long frame_uc_flags(struct pt_regs *regs)
+{
+ unsigned long flags;
+
+ if (cpu_has_xsave)
+ flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS;
+ else
+ flags = UC_SIGCONTEXT_SS;
+
+ if (likely(user_64bit_mode(regs)))
+ flags |= UC_STRICT_RESTORE_SS;
+
+ return flags;
+}
+
static int __setup_rt_frame(int sig, struct ksignal *ksig,
sigset_t *set, struct pt_regs *regs)
{
@@ -422,10 +472,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
put_user_try {
/* Create the ucontext. */
- if (cpu_has_xsave)
- put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
- else
- put_user_ex(0, &frame->uc.uc_flags);
+ put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
save_altstack_ex(&frame->uc.uc_stack, regs->sp);
@@ -459,10 +506,28 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
regs->sp = (unsigned long)frame;
- /* Set up the CS register to run signal handlers in 64-bit mode,
- even if the handler happens to be interrupting 32-bit code. */
+ /*
+ * Set up the CS and SS registers to run signal handlers in
+ * 64-bit mode, even if the handler happens to be interrupting
+ * 32-bit or 16-bit code.
+ *
+ * SS is subtle. In 64-bit mode, we don't need any particular
+ * SS descriptor, but we do need SS to be valid. It's possible
+ * that the old SS is entirely bogus -- this can happen if the
+ * signal we're trying to deliver is #GP or #SS caused by a bad
+ * SS value. We also have a compatbility issue here: DOSEMU
+ * relies on the contents of the SS register indicating the
+ * SS value at the time of the signal, even though that code in
+ * DOSEMU predates sigreturn's ability to restore SS. (DOSEMU
+ * avoids relying on sigreturn to restore SS; instead it uses
+ * a trampoline.) So we do our best: if the old SS was valid,
+ * we keep it. Otherwise we replace it.
+ */
regs->cs = __USER_CS;
+ if (unlikely(regs->ss != __USER_DS))
+ force_valid_ss(regs);
+
return 0;
}
#endif /* CONFIG_X86_32 */
@@ -489,10 +554,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
put_user_try {
/* Create the ucontext. */
- if (cpu_has_xsave)
- put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
- else
- put_user_ex(0, &frame->uc.uc_flags);
+ put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp);
put_user_ex(0, &frame->uc.uc__pad0);
@@ -554,7 +616,11 @@ asmlinkage unsigned long sys_sigreturn(void)
set_current_blocked(&set);
- if (restore_sigcontext(regs, &frame->sc))
+ /*
+ * x86_32 has no uc_flags bits relevant to restore_sigcontext.
+ * Save a few cycles by skipping the __get_user.
+ */
+ if (restore_sigcontext(regs, &frame->sc, 0))
goto badframe;
return regs->ax;
@@ -570,16 +636,19 @@ asmlinkage long sys_rt_sigreturn(void)
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe __user *frame;
sigset_t set;
+ unsigned long uc_flags;
frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
+ if (__get_user(uc_flags, &frame->uc.uc_flags))
+ goto badframe;
set_current_blocked(&set);
- if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+ if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
goto badframe;
if (restore_altstack(&frame->uc.uc_stack))
@@ -692,12 +761,15 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
{
-#if defined(CONFIG_X86_32) || !defined(CONFIG_X86_64)
+#ifdef CONFIG_X86_64
+ if (is_ia32_task())
+ return __NR_ia32_restart_syscall;
+#endif
+#ifdef CONFIG_X86_X32_ABI
+ return __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT);
+#else
return __NR_restart_syscall;
-#else /* !CONFIG_X86_32 && CONFIG_X86_64 */
- return test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall :
- __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT);
-#endif /* CONFIG_X86_32 || !CONFIG_X86_64 */
+#endif
}
/*
@@ -763,6 +835,7 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe_x32 __user *frame;
sigset_t set;
+ unsigned long uc_flags;
frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
@@ -770,10 +843,12 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
goto badframe;
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
+ if (__get_user(uc_flags, &frame->uc.uc_flags))
+ goto badframe;
set_current_blocked(&set);
- if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+ if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
goto badframe;
if (compat_restore_altstack(&frame->uc.uc_stack))
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 3bf1e0b5f827..643dbdccf4bc 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -256,7 +256,7 @@ static void notrace start_secondary(void *unused)
x86_cpuinit.setup_percpu_clockev();
wmb();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
int topology_update_package_map(unsigned int apicid, unsigned int cpu)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 211c11c7bba4..06cbe25861f1 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -83,30 +83,16 @@ gate_desc idt_table[NR_VECTORS] __page_aligned_bss;
DECLARE_BITMAP(used_vectors, NR_VECTORS);
EXPORT_SYMBOL_GPL(used_vectors);
-static inline void conditional_sti(struct pt_regs *regs)
+static inline void cond_local_irq_enable(struct pt_regs *regs)
{
if (regs->flags & X86_EFLAGS_IF)
local_irq_enable();
}
-static inline void preempt_conditional_sti(struct pt_regs *regs)
-{
- preempt_count_inc();
- if (regs->flags & X86_EFLAGS_IF)
- local_irq_enable();
-}
-
-static inline void conditional_cli(struct pt_regs *regs)
-{
- if (regs->flags & X86_EFLAGS_IF)
- local_irq_disable();
-}
-
-static inline void preempt_conditional_cli(struct pt_regs *regs)
+static inline void cond_local_irq_disable(struct pt_regs *regs)
{
if (regs->flags & X86_EFLAGS_IF)
local_irq_disable();
- preempt_count_dec();
}
void ist_enter(struct pt_regs *regs)
@@ -262,7 +248,6 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = trapnr;
-#ifdef CONFIG_X86_64
if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
printk_ratelimit()) {
pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx",
@@ -271,7 +256,6 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
print_vma_addr(" in ", regs->ip);
pr_cont("\n");
}
-#endif
force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk);
}
@@ -286,7 +270,7 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
NOTIFY_STOP) {
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
do_trap(trapnr, signr, str, regs, error_code,
fill_trap_info(regs, signr, trapnr, &info));
}
@@ -368,7 +352,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
if (notify_die(DIE_TRAP, "bounds", regs, error_code,
X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP)
return;
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
if (!user_mode(regs))
die("bounds", regs, error_code);
@@ -443,7 +427,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
struct task_struct *tsk;
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
if (v8086_mode(regs)) {
local_irq_enable();
@@ -517,9 +501,11 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
* as we may switch to the interrupt stack.
*/
debug_stack_usage_inc();
- preempt_conditional_sti(regs);
+ preempt_disable();
+ cond_local_irq_enable(regs);
do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
- preempt_conditional_cli(regs);
+ cond_local_irq_disable(regs);
+ preempt_enable_no_resched();
debug_stack_usage_dec();
exit:
ist_exit(regs);
@@ -571,6 +557,29 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
NOKPROBE_SYMBOL(fixup_bad_iret);
#endif
+static bool is_sysenter_singlestep(struct pt_regs *regs)
+{
+ /*
+ * We don't try for precision here. If we're anywhere in the region of
+ * code that can be single-stepped in the SYSENTER entry path, then
+ * assume that this is a useless single-step trap due to SYSENTER
+ * being invoked with TF set. (We don't know in advance exactly
+ * which instructions will be hit because BTF could plausibly
+ * be set.)
+ */
+#ifdef CONFIG_X86_32
+ return (regs->ip - (unsigned long)__begin_SYSENTER_singlestep_region) <
+ (unsigned long)__end_SYSENTER_singlestep_region -
+ (unsigned long)__begin_SYSENTER_singlestep_region;
+#elif defined(CONFIG_IA32_EMULATION)
+ return (regs->ip - (unsigned long)entry_SYSENTER_compat) <
+ (unsigned long)__end_entry_SYSENTER_compat -
+ (unsigned long)entry_SYSENTER_compat;
+#else
+ return false;
+#endif
+}
+
/*
* Our handling of the processor debug registers is non-trivial.
* We do not clear them on entry and exit from the kernel. Therefore
@@ -605,11 +614,42 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
ist_enter(regs);
get_debugreg(dr6, 6);
+ /*
+ * The Intel SDM says:
+ *
+ * Certain debug exceptions may clear bits 0-3. The remaining
+ * contents of the DR6 register are never cleared by the
+ * processor. To avoid confusion in identifying debug
+ * exceptions, debug handlers should clear the register before
+ * returning to the interrupted task.
+ *
+ * Keep it simple: clear DR6 immediately.
+ */
+ set_debugreg(0, 6);
/* Filter out all the reserved bits which are preset to 1 */
dr6 &= ~DR6_RESERVED;
/*
+ * The SDM says "The processor clears the BTF flag when it
+ * generates a debug exception." Clear TIF_BLOCKSTEP to keep
+ * TIF_BLOCKSTEP in sync with the hardware BTF flag.
+ */
+ clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP);
+
+ if (unlikely(!user_mode(regs) && (dr6 & DR_STEP) &&
+ is_sysenter_singlestep(regs))) {
+ dr6 &= ~DR_STEP;
+ if (!dr6)
+ goto exit;
+ /*
+ * else we might have gotten a single-step trap and hit a
+ * watchpoint at the same time, in which case we should fall
+ * through and handle the watchpoint.
+ */
+ }
+
+ /*
* If dr6 has no reason to give us about the origin of this trap,
* then it's very likely the result of an icebp/int01 trap.
* User wants a sigtrap for that.
@@ -617,18 +657,10 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
if (!dr6 && user_mode(regs))
user_icebp = 1;
- /* Catch kmemcheck conditions first of all! */
+ /* Catch kmemcheck conditions! */
if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
goto exit;
- /* DR6 may or may not be cleared by the CPU */
- set_debugreg(0, 6);
-
- /*
- * The processor cleared BTF, so don't mark that we need it set.
- */
- clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP);
-
/* Store the virtualized DR6 value */
tsk->thread.debugreg6 = dr6;
@@ -648,24 +680,25 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
debug_stack_usage_inc();
/* It's safe to allow irq's after DR6 has been saved */
- preempt_conditional_sti(regs);
+ preempt_disable();
+ cond_local_irq_enable(regs);
if (v8086_mode(regs)) {
handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
X86_TRAP_DB);
- preempt_conditional_cli(regs);
+ cond_local_irq_disable(regs);
+ preempt_enable_no_resched();
debug_stack_usage_dec();
goto exit;
}
- /*
- * Single-stepping through system calls: ignore any exceptions in
- * kernel space, but re-enable TF when returning to user mode.
- *
- * We already checked v86 mode above, so we can check for kernel mode
- * by just checking the CPL of CS.
- */
- if ((dr6 & DR_STEP) && !user_mode(regs)) {
+ if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) {
+ /*
+ * Historical junk that used to handle SYSENTER single-stepping.
+ * This should be unreachable now. If we survive for a while
+ * without anyone hitting this warning, we'll turn this into
+ * an oops.
+ */
tsk->thread.debugreg6 &= ~DR_STEP;
set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
regs->flags &= ~X86_EFLAGS_TF;
@@ -673,10 +706,19 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
si_code = get_si_code(tsk->thread.debugreg6);
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
send_sigtrap(tsk, regs, error_code, si_code);
- preempt_conditional_cli(regs);
+ cond_local_irq_disable(regs);
+ preempt_enable_no_resched();
debug_stack_usage_dec();
exit:
+#if defined(CONFIG_X86_32)
+ /*
+ * This is the most likely code path that involves non-trivial use
+ * of the SYSENTER stack. Check that we haven't overrun it.
+ */
+ WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
+ "Overran or corrupted SYSENTER stack\n");
+#endif
ist_exit(regs);
}
NOKPROBE_SYMBOL(do_debug);
@@ -696,7 +738,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP)
return;
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
if (!user_mode(regs)) {
if (!fixup_exception(regs, trapnr)) {
@@ -743,20 +785,19 @@ do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
dotraplinkage void
do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
{
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
}
dotraplinkage void
do_device_not_available(struct pt_regs *regs, long error_code)
{
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
- BUG_ON(use_eager_fpu());
#ifdef CONFIG_MATH_EMULATION
- if (read_cr0() & X86_CR0_EM) {
+ if (!boot_cpu_has(X86_FEATURE_FPU) && (read_cr0() & X86_CR0_EM)) {
struct math_emu_info info = { };
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
info.regs = regs;
math_emulate(&info);
@@ -765,7 +806,7 @@ do_device_not_available(struct pt_regs *regs, long error_code)
#endif
fpu__restore(&current->thread.fpu); /* interrupts still off */
#ifdef CONFIG_X86_32
- conditional_sti(regs);
+ cond_local_irq_enable(regs);
#endif
}
NOKPROBE_SYMBOL(do_device_not_available);
@@ -868,7 +909,7 @@ void __init trap_init(void)
#endif
#ifdef CONFIG_X86_32
- set_system_trap_gate(IA32_SYSCALL_VECTOR, entry_INT80_32);
+ set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_32);
set_bit(IA32_SYSCALL_VECTOR, used_vectors);
#endif
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 3d743da828d3..56380440d862 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -43,6 +43,11 @@ static DEFINE_STATIC_KEY_FALSE(__use_tsc);
int tsc_clocksource_reliable;
+static u32 art_to_tsc_numerator;
+static u32 art_to_tsc_denominator;
+static u64 art_to_tsc_offset;
+struct clocksource *art_related_clocksource;
+
/*
* Use a ring-buffer like data structure, where a writer advances the head by
* writing a new data entry and a reader advances the tail when it observes a
@@ -964,6 +969,37 @@ core_initcall(cpufreq_tsc);
#endif /* CONFIG_CPU_FREQ */
+#define ART_CPUID_LEAF (0x15)
+#define ART_MIN_DENOMINATOR (1)
+
+
+/*
+ * If ART is present detect the numerator:denominator to convert to TSC
+ */
+static void detect_art(void)
+{
+ unsigned int unused[2];
+
+ if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF)
+ return;
+
+ cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator,
+ &art_to_tsc_numerator, unused, unused+1);
+
+ /* Don't enable ART in a VM, non-stop TSC required */
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR) ||
+ !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ||
+ art_to_tsc_denominator < ART_MIN_DENOMINATOR)
+ return;
+
+ if (rdmsrl_safe(MSR_IA32_TSC_ADJUST, &art_to_tsc_offset))
+ return;
+
+ /* Make this sticky over multiple CPU init calls */
+ setup_force_cpu_cap(X86_FEATURE_ART);
+}
+
+
/* clocksource code */
static struct clocksource clocksource_tsc;
@@ -1071,6 +1107,25 @@ int unsynchronized_tsc(void)
return 0;
}
+/*
+ * Convert ART to TSC given numerator/denominator found in detect_art()
+ */
+struct system_counterval_t convert_art_to_tsc(cycle_t art)
+{
+ u64 tmp, res, rem;
+
+ rem = do_div(art, art_to_tsc_denominator);
+
+ res = art * art_to_tsc_numerator;
+ tmp = rem * art_to_tsc_numerator;
+
+ do_div(tmp, art_to_tsc_denominator);
+ res += tmp + art_to_tsc_offset;
+
+ return (struct system_counterval_t) {.cs = art_related_clocksource,
+ .cycles = res};
+}
+EXPORT_SYMBOL(convert_art_to_tsc);
static void tsc_refine_calibration_work(struct work_struct *work);
static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
@@ -1142,6 +1197,8 @@ static void tsc_refine_calibration_work(struct work_struct *work)
(unsigned long)tsc_khz % 1000);
out:
+ if (boot_cpu_has(X86_FEATURE_ART))
+ art_related_clocksource = &clocksource_tsc;
clocksource_register_khz(&clocksource_tsc, tsc_khz);
}
@@ -1235,6 +1292,8 @@ void __init tsc_init(void)
mark_tsc_unstable("TSCs unsynchronized");
check_system_tsc_reliable();
+
+ detect_art();
}
#ifdef CONFIG_SMP
@@ -1246,14 +1305,14 @@ void __init tsc_init(void)
*/
unsigned long calibrate_delay_is_known(void)
{
- int i, cpu = smp_processor_id();
+ int sibling, cpu = smp_processor_id();
if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC))
return 0;
- for_each_online_cpu(i)
- if (cpu_data(i).phys_proc_id == cpu_data(cpu).phys_proc_id)
- return cpu_data(i).loops_per_jiffy;
+ sibling = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+ if (sibling < nr_cpu_ids)
+ return cpu_data(sibling).loops_per_jiffy;
return 0;
}
#endif
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
index 07efb35ee4bc..014ea59aa153 100644
--- a/arch/x86/kernel/verify_cpu.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -30,7 +30,7 @@
* appropriately. Either display a message or halt.
*/
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
verify_cpu:
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index e574b8546518..3dce1ca0a653 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -362,7 +362,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
/* make room for real-mode segments */
tsk->thread.sp0 += 16;
- if (static_cpu_has_safe(X86_FEATURE_SEP))
+ if (static_cpu_has(X86_FEATURE_SEP))
tsk->thread.sysenter_cs = 0;
load_sp0(tss, &tsk->thread);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index fe133b710bef..5af9958cbdb6 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -192,6 +192,17 @@ SECTIONS
:init
#endif
+ /*
+ * Section for code used exclusively before alternatives are run. All
+ * references to such code must be patched out by alternatives, normally
+ * by using X86_FEATURE_ALWAYS CPU feature bit.
+ *
+ * See static_cpu_has() for an example.
+ */
+ .altinstr_aux : AT(ADDR(.altinstr_aux) - LOAD_OFFSET) {
+ *(.altinstr_aux)
+ }
+
INIT_DATA_SECTION(16)
.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index a2fe51b00cce..65be7cfaf947 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -1,5 +1,5 @@
#include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
/*
diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c
index 422db000d727..5cc78bf57232 100644
--- a/arch/x86/lib/cmdline.c
+++ b/arch/x86/lib/cmdline.c
@@ -21,12 +21,16 @@ static inline int myisspace(u8 c)
* @option: option string to look for
*
* Returns the position of that @option (starts counting with 1)
- * or 0 on not found.
+ * or 0 on not found. @option will only be found if it is found
+ * as an entire word in @cmdline. For instance, if @option="car"
+ * then a cmdline which contains "cart" will not match.
*/
-int cmdline_find_option_bool(const char *cmdline, const char *option)
+static int
+__cmdline_find_option_bool(const char *cmdline, int max_cmdline_size,
+ const char *option)
{
char c;
- int len, pos = 0, wstart = 0;
+ int pos = 0, wstart = 0;
const char *opptr = NULL;
enum {
st_wordstart = 0, /* Start of word/after whitespace */
@@ -37,11 +41,11 @@ int cmdline_find_option_bool(const char *cmdline, const char *option)
if (!cmdline)
return -1; /* No command line */
- len = min_t(int, strlen(cmdline), COMMAND_LINE_SIZE);
- if (!len)
- return 0;
-
- while (len--) {
+ /*
+ * This 'pos' check ensures we do not overrun
+ * a non-NULL-terminated 'cmdline'
+ */
+ while (pos < max_cmdline_size) {
c = *(char *)cmdline++;
pos++;
@@ -58,18 +62,35 @@ int cmdline_find_option_bool(const char *cmdline, const char *option)
/* fall through */
case st_wordcmp:
- if (!*opptr)
+ if (!*opptr) {
+ /*
+ * We matched all the way to the end of the
+ * option we were looking for. If the
+ * command-line has a space _or_ ends, then
+ * we matched!
+ */
if (!c || myisspace(c))
return wstart;
- else
- state = st_wordskip;
- else if (!c)
+ /*
+ * We hit the end of the option, but _not_
+ * the end of a word on the cmdline. Not
+ * a match.
+ */
+ } else if (!c) {
+ /*
+ * Hit the NULL terminator on the end of
+ * cmdline.
+ */
return 0;
- else if (c != *opptr++)
- state = st_wordskip;
- else if (!len) /* last word and is matching */
- return wstart;
- break;
+ } else if (c == *opptr++) {
+ /*
+ * We are currently matching, so continue
+ * to the next character on the cmdline.
+ */
+ break;
+ }
+ state = st_wordskip;
+ /* fall through */
case st_wordskip:
if (!c)
@@ -82,3 +103,8 @@ int cmdline_find_option_bool(const char *cmdline, const char *option)
return 0; /* Buffer overrun */
}
+
+int cmdline_find_option_bool(const char *cmdline, const char *option)
+{
+ return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option);
+}
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 009f98216b7e..24ef1c2104d4 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -1,7 +1,7 @@
/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
#include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
/*
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 27f89c79a44b..2b0ef26da0bd 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -10,7 +10,7 @@
#include <asm/current.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
#include <asm/asm.h>
#include <asm/smap.h>
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 7d37641ada5b..cbb8ee5830ff 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -1,7 +1,7 @@
/* Copyright 2002 Andi Kleen */
#include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
/*
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index ca2afdd6d98e..90ce01bee00c 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -6,7 +6,7 @@
* - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
*/
#include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
#undef memmove
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 2661fad05827..c9c81227ea37 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -1,7 +1,7 @@
/* Copyright 2002 Andi Kleen, SuSE Labs */
#include <linux/linkage.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
.weak memset
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 4a6f1d9b5106..99bfb192803f 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -358,20 +358,19 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
#define pgd_none(a) pud_none(__pud(pgd_val(a)))
#endif
-#ifdef CONFIG_X86_64
static inline bool is_hypervisor_range(int idx)
{
+#ifdef CONFIG_X86_64
/*
* ffff800000000000 - ffff87ffffffffff is reserved for
* the hypervisor.
*/
- return paravirt_enabled() &&
- (idx >= pgd_index(__PAGE_OFFSET) - 16) &&
- (idx < pgd_index(__PAGE_OFFSET));
-}
+ return (idx >= pgd_index(__PAGE_OFFSET) - 16) &&
+ (idx < pgd_index(__PAGE_OFFSET));
#else
-static inline bool is_hypervisor_range(int idx) { return false; }
+ return false;
#endif
+}
static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
bool checkwx)
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 2ebfbaf61142..bd7a9b9e2e14 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -388,7 +388,6 @@ repeat:
}
pte_t *kmap_pte;
-pgprot_t kmap_prot;
static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr)
{
@@ -405,8 +404,6 @@ static void __init kmap_init(void)
*/
kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
-
- kmap_prot = PAGE_KERNEL;
}
#ifdef CONFIG_HIGHMEM
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a40b755c67e3..214afda97911 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -53,6 +53,7 @@
#include <asm/numa.h>
#include <asm/cacheflush.h>
#include <asm/init.h>
+#include <asm/uv/uv.h>
#include <asm/setup.h>
#include "mm_internal.h"
@@ -1203,26 +1204,13 @@ int kern_addr_valid(unsigned long addr)
static unsigned long probe_memory_block_size(void)
{
- /* start from 2g */
- unsigned long bz = 1UL<<31;
+ unsigned long bz = MIN_MEMORY_BLOCK_SIZE;
- if (totalram_pages >= (64ULL << (30 - PAGE_SHIFT))) {
- pr_info("Using 2GB memory block size for large-memory system\n");
- return 2UL * 1024 * 1024 * 1024;
- }
-
- /* less than 64g installed */
- if ((max_pfn << PAGE_SHIFT) < (16UL << 32))
- return MIN_MEMORY_BLOCK_SIZE;
-
- /* get the tail size */
- while (bz > MIN_MEMORY_BLOCK_SIZE) {
- if (!((max_pfn << PAGE_SHIFT) & (bz - 1)))
- break;
- bz >>= 1;
- }
+ /* if system is UV or has 64GB of RAM or more, use large blocks */
+ if (is_uv_system() || ((max_pfn << PAGE_SHIFT) >= (64UL << 30)))
+ bz = 2UL << 30; /* 2GB */
- printk(KERN_DEBUG "memory block size : %ldMB\n", bz >> 20);
+ pr_info("x86/mm: Memory block size: %ldMB\n", bz >> 20);
return bz;
}
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index d470cf219a2d..1b1110fa0057 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -120,11 +120,22 @@ void __init kasan_init(void)
kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
(void *)KASAN_SHADOW_END);
- memset(kasan_zero_page, 0, PAGE_SIZE);
-
load_cr3(init_level4_pgt);
__flush_tlb_all();
- init_task.kasan_depth = 0;
+ /*
+ * kasan_zero_page has been used as early shadow memory, thus it may
+ * contain some garbage. Now we can clear and write protect it, since
+ * after the TLB flush no one should write to it.
+ */
+ memset(kasan_zero_page, 0, PAGE_SIZE);
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ pte_t pte = __pte(__pa(kasan_zero_page) | __PAGE_KERNEL_RO);
+ set_pte(&kasan_zero_pte[i], pte);
+ }
+ /* Flush TLBs again to be sure that write protection applied. */
+ __flush_tlb_all();
+
+ init_task.kasan_depth = 0;
pr_info("KernelAddressSanitizer initialized\n");
}
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 637ab34ed632..ddb2244b06a1 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -33,7 +33,7 @@
struct kmmio_fault_page {
struct list_head list;
struct kmmio_fault_page *release_next;
- unsigned long page; /* location of the fault page */
+ unsigned long addr; /* the requested address */
pteval_t old_presence; /* page presence prior to arming */
bool armed;
@@ -70,9 +70,16 @@ unsigned int kmmio_count;
static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
static LIST_HEAD(kmmio_probes);
-static struct list_head *kmmio_page_list(unsigned long page)
+static struct list_head *kmmio_page_list(unsigned long addr)
{
- return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
+ unsigned int l;
+ pte_t *pte = lookup_address(addr, &l);
+
+ if (!pte)
+ return NULL;
+ addr &= page_level_mask(l);
+
+ return &kmmio_page_table[hash_long(addr, KMMIO_PAGE_HASH_BITS)];
}
/* Accessed per-cpu */
@@ -98,15 +105,19 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
}
/* You must be holding RCU read lock. */
-static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
+static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long addr)
{
struct list_head *head;
struct kmmio_fault_page *f;
+ unsigned int l;
+ pte_t *pte = lookup_address(addr, &l);
- page &= PAGE_MASK;
- head = kmmio_page_list(page);
+ if (!pte)
+ return NULL;
+ addr &= page_level_mask(l);
+ head = kmmio_page_list(addr);
list_for_each_entry_rcu(f, head, list) {
- if (f->page == page)
+ if (f->addr == addr)
return f;
}
return NULL;
@@ -137,10 +148,10 @@ static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old)
static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
{
unsigned int level;
- pte_t *pte = lookup_address(f->page, &level);
+ pte_t *pte = lookup_address(f->addr, &level);
if (!pte) {
- pr_err("no pte for page 0x%08lx\n", f->page);
+ pr_err("no pte for addr 0x%08lx\n", f->addr);
return -1;
}
@@ -156,7 +167,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
return -1;
}
- __flush_tlb_one(f->page);
+ __flush_tlb_one(f->addr);
return 0;
}
@@ -176,12 +187,12 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
int ret;
WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n"));
if (f->armed) {
- pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n",
- f->page, f->count, !!f->old_presence);
+ pr_warning("double-arm: addr 0x%08lx, ref %d, old %d\n",
+ f->addr, f->count, !!f->old_presence);
}
ret = clear_page_presence(f, true);
- WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"),
- f->page);
+ WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming at 0x%08lx failed.\n"),
+ f->addr);
f->armed = true;
return ret;
}
@@ -191,7 +202,7 @@ static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
{
int ret = clear_page_presence(f, false);
WARN_ONCE(ret < 0,
- KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page);
+ KERN_ERR "kmmio disarming at 0x%08lx failed.\n", f->addr);
f->armed = false;
}
@@ -215,6 +226,12 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
struct kmmio_context *ctx;
struct kmmio_fault_page *faultpage;
int ret = 0; /* default to fault not handled */
+ unsigned long page_base = addr;
+ unsigned int l;
+ pte_t *pte = lookup_address(addr, &l);
+ if (!pte)
+ return -EINVAL;
+ page_base &= page_level_mask(l);
/*
* Preemption is now disabled to prevent process switch during
@@ -227,7 +244,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
preempt_disable();
rcu_read_lock();
- faultpage = get_kmmio_fault_page(addr);
+ faultpage = get_kmmio_fault_page(page_base);
if (!faultpage) {
/*
* Either this page fault is not caused by kmmio, or
@@ -239,7 +256,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
ctx = &get_cpu_var(kmmio_ctx);
if (ctx->active) {
- if (addr == ctx->addr) {
+ if (page_base == ctx->addr) {
/*
* A second fault on the same page means some other
* condition needs handling by do_page_fault(), the
@@ -267,9 +284,9 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
ctx->active++;
ctx->fpage = faultpage;
- ctx->probe = get_kmmio_probe(addr);
+ ctx->probe = get_kmmio_probe(page_base);
ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
- ctx->addr = addr;
+ ctx->addr = page_base;
if (ctx->probe && ctx->probe->pre_handler)
ctx->probe->pre_handler(ctx->probe, regs, addr);
@@ -354,12 +371,11 @@ out:
}
/* You must be holding kmmio_lock. */
-static int add_kmmio_fault_page(unsigned long page)
+static int add_kmmio_fault_page(unsigned long addr)
{
struct kmmio_fault_page *f;
- page &= PAGE_MASK;
- f = get_kmmio_fault_page(page);
+ f = get_kmmio_fault_page(addr);
if (f) {
if (!f->count)
arm_kmmio_fault_page(f);
@@ -372,26 +388,25 @@ static int add_kmmio_fault_page(unsigned long page)
return -1;
f->count = 1;
- f->page = page;
+ f->addr = addr;
if (arm_kmmio_fault_page(f)) {
kfree(f);
return -1;
}
- list_add_rcu(&f->list, kmmio_page_list(f->page));
+ list_add_rcu(&f->list, kmmio_page_list(f->addr));
return 0;
}
/* You must be holding kmmio_lock. */
-static void release_kmmio_fault_page(unsigned long page,
+static void release_kmmio_fault_page(unsigned long addr,
struct kmmio_fault_page **release_list)
{
struct kmmio_fault_page *f;
- page &= PAGE_MASK;
- f = get_kmmio_fault_page(page);
+ f = get_kmmio_fault_page(addr);
if (!f)
return;
@@ -420,18 +435,27 @@ int register_kmmio_probe(struct kmmio_probe *p)
int ret = 0;
unsigned long size = 0;
const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
+ unsigned int l;
+ pte_t *pte;
spin_lock_irqsave(&kmmio_lock, flags);
if (get_kmmio_probe(p->addr)) {
ret = -EEXIST;
goto out;
}
+
+ pte = lookup_address(p->addr, &l);
+ if (!pte) {
+ ret = -EINVAL;
+ goto out;
+ }
+
kmmio_count++;
list_add_rcu(&p->list, &kmmio_probes);
while (size < size_lim) {
if (add_kmmio_fault_page(p->addr + size))
pr_err("Unable to set page fault.\n");
- size += PAGE_SIZE;
+ size += page_level_size(l);
}
out:
spin_unlock_irqrestore(&kmmio_lock, flags);
@@ -506,11 +530,17 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
struct kmmio_fault_page *release_list = NULL;
struct kmmio_delayed_release *drelease;
+ unsigned int l;
+ pte_t *pte;
+
+ pte = lookup_address(p->addr, &l);
+ if (!pte)
+ return;
spin_lock_irqsave(&kmmio_lock, flags);
while (size < size_lim) {
release_kmmio_fault_page(p->addr + size, &release_list);
- size += PAGE_SIZE;
+ size += page_level_size(l);
}
list_del_rcu(&p->list);
kmmio_count--;
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 72bb52f93c3d..d2dc0438d654 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -94,18 +94,6 @@ static unsigned long mmap_base(unsigned long rnd)
}
/*
- * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64
- * does, but not when emulating X86_32
- */
-static unsigned long mmap_legacy_base(unsigned long rnd)
-{
- if (mmap_is_ia32())
- return TASK_UNMAPPED_BASE;
- else
- return TASK_UNMAPPED_BASE + rnd;
-}
-
-/*
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
*/
@@ -116,7 +104,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
if (current->flags & PF_RANDOMIZE)
random_factor = arch_mmap_rnd();
- mm->mmap_legacy_base = mmap_legacy_base(random_factor);
+ mm->mmap_legacy_base = TASK_UNMAPPED_BASE + random_factor;
if (mmap_is_legacy()) {
mm->mmap_base = mm->mmap_legacy_base;
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index d04f8094bc23..f70c1ff46125 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -465,46 +465,67 @@ static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
return true;
}
+/*
+ * Mark all currently memblock-reserved physical memory (which covers the
+ * kernel's own memory ranges) as hot-unswappable.
+ */
static void __init numa_clear_kernel_node_hotplug(void)
{
- int i, nid;
- nodemask_t numa_kernel_nodes = NODE_MASK_NONE;
- phys_addr_t start, end;
- struct memblock_region *r;
+ nodemask_t reserved_nodemask = NODE_MASK_NONE;
+ struct memblock_region *mb_region;
+ int i;
/*
+ * We have to do some preprocessing of memblock regions, to
+ * make them suitable for reservation.
+ *
* At this time, all memory regions reserved by memblock are
- * used by the kernel. Set the nid in memblock.reserved will
- * mark out all the nodes the kernel resides in.
+ * used by the kernel, but those regions are not split up
+ * along node boundaries yet, and don't necessarily have their
+ * node ID set yet either.
+ *
+ * So iterate over all memory known to the x86 architecture,
+ * and use those ranges to set the nid in memblock.reserved.
+ * This will split up the memblock regions along node
+ * boundaries and will set the node IDs as well.
*/
for (i = 0; i < numa_meminfo.nr_blks; i++) {
- struct numa_memblk *mb = &numa_meminfo.blk[i];
+ struct numa_memblk *mb = numa_meminfo.blk + i;
+ int ret;
- memblock_set_node(mb->start, mb->end - mb->start,
- &memblock.reserved, mb->nid);
+ ret = memblock_set_node(mb->start, mb->end - mb->start, &memblock.reserved, mb->nid);
+ WARN_ON_ONCE(ret);
}
/*
- * Mark all kernel nodes.
+ * Now go over all reserved memblock regions, to construct a
+ * node mask of all kernel reserved memory areas.
*
- * When booting with mem=nn[kMG] or in a kdump kernel, numa_meminfo
- * may not include all the memblock.reserved memory ranges because
- * trim_snb_memory() reserves specific pages for Sandy Bridge graphics.
+ * [ Note, when booting with mem=nn[kMG] or in a kdump kernel,
+ * numa_meminfo might not include all memblock.reserved
+ * memory ranges, because quirks such as trim_snb_memory()
+ * reserve specific pages for Sandy Bridge graphics. ]
*/
- for_each_memblock(reserved, r)
- if (r->nid != MAX_NUMNODES)
- node_set(r->nid, numa_kernel_nodes);
+ for_each_memblock(reserved, mb_region) {
+ if (mb_region->nid != MAX_NUMNODES)
+ node_set(mb_region->nid, reserved_nodemask);
+ }
- /* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */
+ /*
+ * Finally, clear the MEMBLOCK_HOTPLUG flag for all memory
+ * belonging to the reserved node mask.
+ *
+ * Note that this will include memory regions that reside
+ * on nodes that contain kernel memory - entire nodes
+ * become hot-unpluggable:
+ */
for (i = 0; i < numa_meminfo.nr_blks; i++) {
- nid = numa_meminfo.blk[i].nid;
- if (!node_isset(nid, numa_kernel_nodes))
- continue;
+ struct numa_memblk *mb = numa_meminfo.blk + i;
- start = numa_meminfo.blk[i].start;
- end = numa_meminfo.blk[i].end;
+ if (!node_isset(mb->nid, reserved_nodemask))
+ continue;
- memblock_clear_hotplug(start, end - start);
+ memblock_clear_hotplug(mb->start, mb->end - mb->start);
}
}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 1c37e650acac..007ebe2d8157 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1128,8 +1128,10 @@ static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
/*
* Ignore all non primary paths.
*/
- if (!primary)
+ if (!primary) {
+ cpa->numpages = 1;
return 0;
+ }
/*
* Ignore the NULL PTE for kernel identity mapping, as it is expected
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index f4ae536b0914..04e2e7144bee 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -943,7 +943,7 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
return -EINVAL;
}
- *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
+ *prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) |
cachemode2protval(pcm));
return 0;
@@ -959,7 +959,7 @@ int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
/* Set prot based on lookup */
pcm = lookup_memtype(pfn_t_to_phys(pfn));
- *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
+ *prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) |
cachemode2protval(pcm));
return 0;
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
index 92e2eacb3321..8bea84724a7d 100644
--- a/arch/x86/mm/setup_nx.c
+++ b/arch/x86/mm/setup_nx.c
@@ -4,6 +4,7 @@
#include <asm/pgtable.h>
#include <asm/proto.h>
+#include <asm/cpufeature.h>
static int disable_nx;
@@ -31,9 +32,8 @@ early_param("noexec", noexec_setup);
void x86_configure_nx(void)
{
- if (boot_cpu_has(X86_FEATURE_NX) && !disable_nx)
- __supported_pte_mask |= _PAGE_NX;
- else
+ /* If disable_nx is set, clear NX on all new mappings going forward. */
+ if (disable_nx)
__supported_pte_mask &= ~_PAGE_NX;
}
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 50d86c0e9ba4..660a83c8287b 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -24,7 +24,6 @@
#include <asm/nmi.h>
#include <asm/apic.h>
#include <asm/processor.h>
-#include <asm/cpufeature.h>
#include "op_x86_model.h"
#include "op_counter.h"
diff --git a/arch/x86/platform/geode/alix.c b/arch/x86/platform/geode/alix.c
index 76b6632d3143..1865c196f136 100644
--- a/arch/x86/platform/geode/alix.c
+++ b/arch/x86/platform/geode/alix.c
@@ -21,7 +21,7 @@
#include <linux/init.h>
#include <linux/io.h>
#include <linux/string.h>
-#include <linux/module.h>
+#include <linux/moduleparam.h>
#include <linux/leds.h>
#include <linux/platform_device.h>
#include <linux/gpio.h>
@@ -35,6 +35,11 @@
#define BIOS_SIGNATURE_COREBOOT 0x500
#define BIOS_REGION_SIZE 0x10000
+/*
+ * This driver is not modular, but to keep back compatibility
+ * with existing use cases, continuing with module_param is
+ * the easiest way forward.
+ */
static bool force = 0;
module_param(force, bool, 0444);
/* FIXME: Award bios is not automatically detected as Alix platform */
@@ -192,9 +197,4 @@ static int __init alix_init(void)
return 0;
}
-
-module_init(alix_init);
-
-MODULE_AUTHOR("Ed Wildgoose <kernel@wildgooses.com>");
-MODULE_DESCRIPTION("PCEngines ALIX System Setup");
-MODULE_LICENSE("GPL");
+device_initcall(alix_init);
diff --git a/arch/x86/platform/geode/geos.c b/arch/x86/platform/geode/geos.c
index aa733fba2471..4fcdb91318a0 100644
--- a/arch/x86/platform/geode/geos.c
+++ b/arch/x86/platform/geode/geos.c
@@ -19,7 +19,6 @@
#include <linux/init.h>
#include <linux/io.h>
#include <linux/string.h>
-#include <linux/module.h>
#include <linux/leds.h>
#include <linux/platform_device.h>
#include <linux/gpio.h>
@@ -120,9 +119,4 @@ static int __init geos_init(void)
return 0;
}
-
-module_init(geos_init);
-
-MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>");
-MODULE_DESCRIPTION("Traverse Technologies Geos System Setup");
-MODULE_LICENSE("GPL");
+device_initcall(geos_init);
diff --git a/arch/x86/platform/geode/net5501.c b/arch/x86/platform/geode/net5501.c
index 927e38c0089f..a2f6b982a729 100644
--- a/arch/x86/platform/geode/net5501.c
+++ b/arch/x86/platform/geode/net5501.c
@@ -20,7 +20,6 @@
#include <linux/init.h>
#include <linux/io.h>
#include <linux/string.h>
-#include <linux/module.h>
#include <linux/leds.h>
#include <linux/platform_device.h>
#include <linux/gpio.h>
@@ -146,9 +145,4 @@ static int __init net5501_init(void)
return 0;
}
-
-module_init(net5501_init);
-
-MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>");
-MODULE_DESCRIPTION("Soekris net5501 System Setup");
-MODULE_LICENSE("GPL");
+device_initcall(net5501_init);
diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c
index 23381d2174ae..1eb47b6298c2 100644
--- a/arch/x86/platform/intel-mid/mfld.c
+++ b/arch/x86/platform/intel-mid/mfld.c
@@ -52,10 +52,7 @@ static unsigned long __init mfld_calibrate_tsc(void)
/* mark tsc clocksource as reliable */
set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
- if (fast_calibrate)
- return fast_calibrate;
-
- return 0;
+ return fast_calibrate;
}
static void __init penwell_arch_setup(void)
diff --git a/arch/x86/platform/intel-mid/mrfl.c b/arch/x86/platform/intel-mid/mrfl.c
index aaca91753d32..bd1adc621781 100644
--- a/arch/x86/platform/intel-mid/mrfl.c
+++ b/arch/x86/platform/intel-mid/mrfl.c
@@ -81,10 +81,7 @@ static unsigned long __init tangier_calibrate_tsc(void)
/* mark tsc clocksource as reliable */
set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
- if (fast_calibrate)
- return fast_calibrate;
-
- return 0;
+ return fast_calibrate;
}
static void __init tangier_arch_setup(void)
diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c
index bfadcd0f4944..17d6d2296e4d 100644
--- a/arch/x86/platform/intel-quark/imr.c
+++ b/arch/x86/platform/intel-quark/imr.c
@@ -1,5 +1,5 @@
/**
- * imr.c
+ * imr.c -- Intel Isolated Memory Region driver
*
* Copyright(c) 2013 Intel Corporation.
* Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
@@ -31,7 +31,6 @@
#include <linux/debugfs.h>
#include <linux/init.h>
#include <linux/mm.h>
-#include <linux/module.h>
#include <linux/types.h>
struct imr_device {
@@ -135,11 +134,9 @@ static int imr_read(struct imr_device *idev, u32 imr_id, struct imr_regs *imr)
* @idev: pointer to imr_device structure.
* @imr_id: IMR entry to write.
* @imr: IMR structure representing address and access masks.
- * @lock: indicates if the IMR lock bit should be applied.
* @return: 0 on success or error code passed from mbi_iosf on failure.
*/
-static int imr_write(struct imr_device *idev, u32 imr_id,
- struct imr_regs *imr, bool lock)
+static int imr_write(struct imr_device *idev, u32 imr_id, struct imr_regs *imr)
{
unsigned long flags;
u32 reg = imr_id * IMR_NUM_REGS + idev->reg_base;
@@ -163,15 +160,6 @@ static int imr_write(struct imr_device *idev, u32 imr_id,
if (ret)
goto failed;
- /* Lock bit must be set separately to addr_lo address bits. */
- if (lock) {
- imr->addr_lo |= IMR_LOCK;
- ret = iosf_mbi_write(QRK_MBI_UNIT_MM, MBI_REG_WRITE,
- reg - IMR_NUM_REGS, imr->addr_lo);
- if (ret)
- goto failed;
- }
-
local_irq_restore(flags);
return 0;
failed:
@@ -270,17 +258,6 @@ static int imr_debugfs_register(struct imr_device *idev)
}
/**
- * imr_debugfs_unregister - unregister debugfs hooks.
- *
- * @idev: pointer to imr_device structure.
- * @return:
- */
-static void imr_debugfs_unregister(struct imr_device *idev)
-{
- debugfs_remove(idev->file);
-}
-
-/**
* imr_check_params - check passed address range IMR alignment and non-zero size
*
* @base: base address of intended IMR.
@@ -334,11 +311,10 @@ static inline int imr_address_overlap(phys_addr_t addr, struct imr_regs *imr)
* @size: physical size of region in bytes must be aligned to 1KiB.
* @read_mask: read access mask.
* @write_mask: write access mask.
- * @lock: indicates whether or not to permanently lock this region.
* @return: zero on success or negative value indicating error.
*/
int imr_add_range(phys_addr_t base, size_t size,
- unsigned int rmask, unsigned int wmask, bool lock)
+ unsigned int rmask, unsigned int wmask)
{
phys_addr_t end;
unsigned int i;
@@ -411,7 +387,7 @@ int imr_add_range(phys_addr_t base, size_t size,
imr.rmask = rmask;
imr.wmask = wmask;
- ret = imr_write(idev, reg, &imr, lock);
+ ret = imr_write(idev, reg, &imr);
if (ret < 0) {
/*
* In the highly unlikely event iosf_mbi_write failed
@@ -422,7 +398,7 @@ int imr_add_range(phys_addr_t base, size_t size,
imr.addr_hi = 0;
imr.rmask = IMR_READ_ACCESS_ALL;
imr.wmask = IMR_WRITE_ACCESS_ALL;
- imr_write(idev, reg, &imr, false);
+ imr_write(idev, reg, &imr);
}
failed:
mutex_unlock(&idev->lock);
@@ -518,7 +494,7 @@ static int __imr_remove_range(int reg, phys_addr_t base, size_t size)
imr.rmask = IMR_READ_ACCESS_ALL;
imr.wmask = IMR_WRITE_ACCESS_ALL;
- ret = imr_write(idev, reg, &imr, false);
+ ret = imr_write(idev, reg, &imr);
failed:
mutex_unlock(&idev->lock);
@@ -599,7 +575,7 @@ static void __init imr_fixup_memmap(struct imr_device *idev)
* We don't round up @size since it is already PAGE_SIZE aligned.
* See vmlinux.lds.S for details.
*/
- ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
+ ret = imr_add_range(base, size, IMR_CPU, IMR_CPU);
if (ret < 0) {
pr_err("unable to setup IMR for kernel: %zu KiB (%lx - %lx)\n",
size / 1024, start, end);
@@ -614,7 +590,6 @@ static const struct x86_cpu_id imr_ids[] __initconst = {
{ X86_VENDOR_INTEL, 5, 9 }, /* Intel Quark SoC X1000. */
{}
};
-MODULE_DEVICE_TABLE(x86cpu, imr_ids);
/**
* imr_init - entry point for IMR driver.
@@ -640,22 +615,4 @@ static int __init imr_init(void)
imr_fixup_memmap(idev);
return 0;
}
-
-/**
- * imr_exit - exit point for IMR code.
- *
- * Deregisters debugfs, leave IMR state as-is.
- *
- * return:
- */
-static void __exit imr_exit(void)
-{
- imr_debugfs_unregister(&imr_dev);
-}
-
-module_init(imr_init);
-module_exit(imr_exit);
-
-MODULE_AUTHOR("Bryan O'Donoghue <pure.logic@nexus-software.ie>");
-MODULE_DESCRIPTION("Intel Isolated Memory Region driver");
-MODULE_LICENSE("Dual BSD/GPL");
+device_initcall(imr_init);
diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c
index 278e4da4222f..f5bad40936ac 100644
--- a/arch/x86/platform/intel-quark/imr_selftest.c
+++ b/arch/x86/platform/intel-quark/imr_selftest.c
@@ -1,5 +1,5 @@
/**
- * imr_selftest.c
+ * imr_selftest.c -- Intel Isolated Memory Region self-test driver
*
* Copyright(c) 2013 Intel Corporation.
* Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
@@ -15,7 +15,6 @@
#include <asm/imr.h>
#include <linux/init.h>
#include <linux/mm.h>
-#include <linux/module.h>
#include <linux/types.h>
#define SELFTEST KBUILD_MODNAME ": "
@@ -61,30 +60,30 @@ static void __init imr_self_test(void)
int ret;
/* Test zero zero. */
- ret = imr_add_range(0, 0, 0, 0, false);
+ ret = imr_add_range(0, 0, 0, 0);
imr_self_test_result(ret < 0, "zero sized IMR\n");
/* Test exact overlap. */
- ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
+ ret = imr_add_range(base, size, IMR_CPU, IMR_CPU);
imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
/* Test overlap with base inside of existing. */
base += size - IMR_ALIGN;
- ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
+ ret = imr_add_range(base, size, IMR_CPU, IMR_CPU);
imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
/* Test overlap with end inside of existing. */
base -= size + IMR_ALIGN * 2;
- ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
+ ret = imr_add_range(base, size, IMR_CPU, IMR_CPU);
imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
/* Test that a 1 KiB IMR @ zero with read/write all will bomb out. */
ret = imr_add_range(0, IMR_ALIGN, IMR_READ_ACCESS_ALL,
- IMR_WRITE_ACCESS_ALL, false);
+ IMR_WRITE_ACCESS_ALL);
imr_self_test_result(ret < 0, "1KiB IMR @ 0x00000000 - access-all\n");
/* Test that a 1 KiB IMR @ zero with CPU only will work. */
- ret = imr_add_range(0, IMR_ALIGN, IMR_CPU, IMR_CPU, false);
+ ret = imr_add_range(0, IMR_ALIGN, IMR_CPU, IMR_CPU);
imr_self_test_result(ret >= 0, "1KiB IMR @ 0x00000000 - cpu-access\n");
if (ret >= 0) {
ret = imr_remove_range(0, IMR_ALIGN);
@@ -93,8 +92,7 @@ static void __init imr_self_test(void)
/* Test 2 KiB works. */
size = IMR_ALIGN * 2;
- ret = imr_add_range(0, size, IMR_READ_ACCESS_ALL,
- IMR_WRITE_ACCESS_ALL, false);
+ ret = imr_add_range(0, size, IMR_READ_ACCESS_ALL, IMR_WRITE_ACCESS_ALL);
imr_self_test_result(ret >= 0, "2KiB IMR @ 0x00000000\n");
if (ret >= 0) {
ret = imr_remove_range(0, size);
@@ -106,7 +104,6 @@ static const struct x86_cpu_id imr_ids[] __initconst = {
{ X86_VENDOR_INTEL, 5, 9 }, /* Intel Quark SoC X1000. */
{}
};
-MODULE_DEVICE_TABLE(x86cpu, imr_ids);
/**
* imr_self_test_init - entry point for IMR driver.
@@ -125,13 +122,4 @@ static int __init imr_self_test_init(void)
*
* return:
*/
-static void __exit imr_self_test_exit(void)
-{
-}
-
-module_init(imr_self_test_init);
-module_exit(imr_self_test_exit);
-
-MODULE_AUTHOR("Bryan O'Donoghue <pure.logic@nexus-software.ie>");
-MODULE_DESCRIPTION("Intel Isolated Memory Region self-test driver");
-MODULE_LICENSE("Dual BSD/GPL");
+device_initcall(imr_self_test_init);
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
index 174781a404ff..00c319048d52 100644
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -3,7 +3,7 @@
#include <asm/asm.h>
#include <asm/segment.h>
-#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
#include <asm/cmpxchg.h>
#include <asm/nops.h>
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index 439c0994b696..bfce503dffae 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -25,11 +25,11 @@
#define old_mmap sys_old_mmap
-#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
#include <asm/syscalls_32.h>
#undef __SYSCALL_I386
-#define __SYSCALL_I386(nr, sym, compat) [ nr ] = sym,
+#define __SYSCALL_I386(nr, sym, qual) [ nr ] = sym,
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index b74ea6c2c0e7..f306413d3eb6 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -35,14 +35,11 @@
#define stub_execveat sys_execveat
#define stub_rt_sigreturn sys_rt_sigreturn
-#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
-#define __SYSCALL_X32(nr, sym, compat) /* Not supported */
-
-#define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
#include <asm/syscalls_64.h>
#undef __SYSCALL_64
-#define __SYSCALL_64(nr, sym, compat) [ nr ] = sym,
+#define __SYSCALL_64(nr, sym, qual) [ nr ] = sym,
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c
index ce7e3607a870..470564bbd08e 100644
--- a/arch/x86/um/user-offsets.c
+++ b/arch/x86/um/user-offsets.c
@@ -9,14 +9,12 @@
#include <asm/types.h>
#ifdef __i386__
-#define __SYSCALL_I386(nr, sym, compat) [nr] = 1,
+#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
static char syscalls[] = {
#include <asm/syscalls_32.h>
};
#else
-#define __SYSCALL_64(nr, sym, compat) [nr] = 1,
-#define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1,
-#define __SYSCALL_X32(nr, sym, compat) /* Not supported */
+#define __SYSCALL_64(nr, sym, qual) [nr] = 1,
static char syscalls[] = {
#include <asm/syscalls_64.h>
};
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 3f4ebf0261f2..3c6d17fd423a 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -112,7 +112,7 @@ asmlinkage __visible void cpu_bringup_and_idle(int cpu)
xen_pvh_secondary_vcpu_init(cpu);
#endif
cpu_bringup();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
static void xen_smp_intr_free(unsigned int cpu)
diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c
index 4d02e38514f5..fc4ad21a5ed4 100644
--- a/arch/xtensa/kernel/smp.c
+++ b/arch/xtensa/kernel/smp.c
@@ -157,7 +157,7 @@ void secondary_start_kernel(void)
complete(&cpu_running);
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
static void mx_cpu_start(void *p)