aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/entry/Makefile2
-rw-r--r--arch/x86/entry/common.c6
-rw-r--r--arch/x86/entry/entry_64.S25
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl2
-rw-r--r--arch/x86/entry/vdso/vclock_gettime.c25
-rw-r--r--arch/x86/entry/vdso/vdso2c.h6
-rw-r--r--arch/x86/events/core.c11
-rw-r--r--arch/x86/events/intel/uncore_snb.c14
-rw-r--r--arch/x86/events/intel/uncore_snbep.c10
-rw-r--r--arch/x86/include/asm/apic.h2
-rw-r--r--arch/x86/include/asm/dma-mapping.h5
-rw-r--r--arch/x86/include/asm/elf.h4
-rw-r--r--arch/x86/include/asm/fpu/internal.h16
-rw-r--r--arch/x86/include/asm/hardirq.h4
-rw-r--r--arch/x86/include/asm/init.h4
-rw-r--r--arch/x86/include/asm/mc146818rtc.h1
-rw-r--r--arch/x86/include/asm/mmu_context.h2
-rw-r--r--arch/x86/include/asm/pgtable_64.h4
-rw-r--r--arch/x86/include/asm/pvclock.h39
-rw-r--r--arch/x86/include/asm/realmode.h10
-rw-r--r--arch/x86/include/asm/rtc.h1
-rw-r--r--arch/x86/include/asm/swiotlb.h4
-rw-r--r--arch/x86/include/asm/syscall.h5
-rw-r--r--arch/x86/include/asm/thread_info.h47
-rw-r--r--arch/x86/include/asm/tlbflush.h7
-rw-r--r--arch/x86/include/asm/uaccess.h26
-rw-r--r--arch/x86/include/asm/uaccess_32.h2
-rw-r--r--arch/x86/include/asm/uaccess_64.h2
-rw-r--r--arch/x86/include/asm/uv/bios.h5
-rw-r--r--arch/x86/include/asm/xen/page-coherent.h9
-rw-r--r--arch/x86/kernel/amd_gart_64.c20
-rw-r--r--arch/x86/kernel/apic/apic.c30
-rw-r--r--arch/x86/kernel/apic/vector.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c13
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c42
-rw-r--r--arch/x86/kernel/fpu/signal.c12
-rw-r--r--arch/x86/kernel/fpu/xstate.c138
-rw-r--r--arch/x86/kernel/head32.c2
-rw-r--r--arch/x86/kernel/head64.c1
-rw-r--r--arch/x86/kernel/hpet.c3
-rw-r--r--arch/x86/kernel/irq.c3
-rw-r--r--arch/x86/kernel/nmi.c1
-rw-r--r--arch/x86/kernel/pci-calgary_64.c14
-rw-r--r--arch/x86/kernel/pci-dma.c4
-rw-r--r--arch/x86/kernel/pci-nommu.c4
-rw-r--r--arch/x86/kernel/pci-swiotlb.c4
-rw-r--r--arch/x86/kernel/ptrace.c15
-rw-r--r--arch/x86/kernel/pvclock.c17
-rw-r--r--arch/x86/kernel/rtc.c3
-rw-r--r--arch/x86/kernel/setup.c27
-rw-r--r--arch/x86/kernel/signal.c40
-rw-r--r--arch/x86/kernel/tsc.c4
-rw-r--r--arch/x86/kernel/uprobes.c22
-rw-r--r--arch/x86/kvm/irq.h3
-rw-r--r--arch/x86/kvm/lapic.c3
-rw-r--r--arch/x86/kvm/vmx.c15
-rw-r--r--arch/x86/lib/hweight.S2
-rw-r--r--arch/x86/lib/kaslr.c2
-rw-r--r--arch/x86/mm/ident_map.c19
-rw-r--r--arch/x86/mm/init.c14
-rw-r--r--arch/x86/mm/kaslr.c2
-rw-r--r--arch/x86/pci/sta2x11-fixup.c2
-rw-r--r--arch/x86/pci/vmd.c16
-rw-r--r--arch/x86/platform/efi/efi.c1
-rw-r--r--arch/x86/platform/efi/efi_64.c1
-rw-r--r--arch/x86/platform/efi/quirks.c21
-rw-r--r--arch/x86/platform/intel-mid/intel_mid_vrtc.c1
-rw-r--r--arch/x86/platform/uv/bios_uv.c11
-rw-r--r--arch/x86/power/hibernate_64.c20
-rw-r--r--arch/x86/power/hibernate_asm_64.S2
-rw-r--r--arch/x86/realmode/init.c47
-rw-r--r--arch/x86/um/vdso/Makefile3
73 files changed, 513 insertions, 396 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 3a9add58d794..c580d8c33562 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -80,6 +80,7 @@ config X86
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_AOUT if X86_32
select HAVE_ARCH_AUDITSYSCALL
+ select HAVE_ARCH_HARDENED_USERCOPY
select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP
@@ -91,6 +92,7 @@ config X86
select HAVE_ARCH_SOFT_DIRTY if X86_64
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ select HAVE_ARCH_WITHIN_STACK_FRAMES
select HAVE_EBPF_JIT if X86_64
select HAVE_CC_STACKPROTECTOR
select HAVE_CMPXCHG_DOUBLE
@@ -152,6 +154,7 @@ config X86
select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
select PERF_EVENTS
select RTC_LIB
+ select RTC_MC146818_LIB
select SPARSE_IRQ
select SRCU
select SYSCTL_EXCEPTION_TRACE
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index fe91c25092da..77f28ce9c646 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -5,6 +5,8 @@
OBJECT_FILES_NON_STANDARD_entry_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y
+CFLAGS_syscall_64.o += -Wno-override-init
+CFLAGS_syscall_32.o += -Wno-override-init
obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
obj-y += common.o
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index a1e71d431fed..1433f6b4607d 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -204,8 +204,12 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
* handling, because syscall restart has a fixup for compat
* syscalls. The fixup is exercised by the ptrace_syscall_32
* selftest.
+ *
+ * We also need to clear TS_REGS_POKED_I386: the 32-bit tracer
+ * special case only applies after poking regs and before the
+ * very next return to user mode.
*/
- ti->status &= ~TS_COMPAT;
+ ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
#endif
user_enter_irqoff();
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index b846875aeea6..d172c619c449 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -288,11 +288,15 @@ return_from_SYSCALL_64:
jne opportunistic_sysret_failed
/*
- * SYSRET can't restore RF. SYSRET can restore TF, but unlike IRET,
- * restoring TF results in a trap from userspace immediately after
- * SYSRET. This would cause an infinite loop whenever #DB happens
- * with register state that satisfies the opportunistic SYSRET
- * conditions. For example, single-stepping this user code:
+ * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
+ * restore RF properly. If the slowpath sets it for whatever reason, we
+ * need to restore it correctly.
+ *
+ * SYSRET can restore TF, but unlike IRET, restoring TF results in a
+ * trap from userspace immediately after SYSRET. This would cause an
+ * infinite loop whenever #DB happens with register state that satisfies
+ * the opportunistic SYSRET conditions. For example, single-stepping
+ * this user code:
*
* movq $stuck_here, %rcx
* pushfq
@@ -601,9 +605,20 @@ apicinterrupt3 \num trace(\sym) smp_trace(\sym)
.endm
#endif
+/* Make sure APIC interrupt handlers end up in the irqentry section: */
+#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
+# define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax"
+# define POP_SECTION_IRQENTRY .popsection
+#else
+# define PUSH_SECTION_IRQENTRY
+# define POP_SECTION_IRQENTRY
+#endif
+
.macro apicinterrupt num sym do_sym
+PUSH_SECTION_IRQENTRY
apicinterrupt3 \num \sym \do_sym
trace_apicinterrupt \num \sym
+POP_SECTION_IRQENTRY
.endm
#ifdef CONFIG_SMP
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 4cddd17153fb..f848572169ea 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -294,7 +294,7 @@
# 285 sys_setaltroot
286 i386 add_key sys_add_key
287 i386 request_key sys_request_key
-288 i386 keyctl sys_keyctl
+288 i386 keyctl sys_keyctl compat_sys_keyctl
289 i386 ioprio_set sys_ioprio_set
290 i386 ioprio_get sys_ioprio_get
291 i386 inotify_init sys_inotify_init
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 2f02d23a05ef..94d54d0defa7 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -96,9 +96,8 @@ static notrace cycle_t vread_pvclock(int *mode)
{
const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
cycle_t ret;
- u64 tsc, pvti_tsc;
- u64 last, delta, pvti_system_time;
- u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift;
+ u64 last;
+ u32 version;
/*
* Note: The kernel and hypervisor must guarantee that cpu ID
@@ -123,29 +122,15 @@ static notrace cycle_t vread_pvclock(int *mode)
*/
do {
- version = pvti->version;
-
- smp_rmb();
+ version = pvclock_read_begin(pvti);
if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
*mode = VCLOCK_NONE;
return 0;
}
- tsc = rdtsc_ordered();
- pvti_tsc_to_system_mul = pvti->tsc_to_system_mul;
- pvti_tsc_shift = pvti->tsc_shift;
- pvti_system_time = pvti->system_time;
- pvti_tsc = pvti->tsc_timestamp;
-
- /* Make sure that the version double-check is last. */
- smp_rmb();
- } while (unlikely((version & 1) || version != pvti->version));
-
- delta = tsc - pvti_tsc;
- ret = pvti_system_time +
- pvclock_scale_delta(delta, pvti_tsc_to_system_mul,
- pvti_tsc_shift);
+ ret = __pvclock_read_cycles(pvti);
+ } while (pvclock_read_retry(pvti, version));
/* refer to vread_tsc() comment for rationale */
last = gtod->cycle_last;
diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index 63a03bb91497..4f741192846d 100644
--- a/arch/x86/entry/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
@@ -22,6 +22,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_LE(&hdr->e_phoff));
+ if (hdr->e_type != ET_DYN)
+ fail("input is not a shared object\n");
+
/* Walk the segment table. */
for (i = 0; i < GET_LE(&hdr->e_phnum); i++) {
if (GET_LE(&pt[i].p_type) == PT_LOAD) {
@@ -49,6 +52,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
if (stripped_len < load_size)
fail("stripped input is too short\n");
+ if (!dyn)
+ fail("input has no PT_DYNAMIC section -- your toolchain is buggy\n");
+
/* Walk the dynamic table */
for (i = 0; dyn + i < dyn_end &&
GET_LE(&dyn[i].d_tag) != DT_NULL; i++) {
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index fad97886d8b1..d0efb5cb1b00 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -263,10 +263,13 @@ static bool check_hw_exists(void)
return true;
msr_fail:
- pr_cont("Broken PMU hardware detected, using software events only.\n");
- printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
- boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
- reg, val_new);
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+ pr_cont("PMU not available due to virtualization, using software events only.\n");
+ } else {
+ pr_cont("Broken PMU hardware detected, using software events only.\n");
+ pr_err("Failed to access perfctr msr (MSR %x is %Lx)\n",
+ reg, val_new);
+ }
return false;
}
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 97a69dbba649..9d35ec0cb8fc 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -100,6 +100,12 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
}
}
+static void snb_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+ wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
+ SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
+}
+
static void snb_uncore_msr_exit_box(struct intel_uncore_box *box)
{
if (box->pmu->pmu_idx == 0)
@@ -127,6 +133,7 @@ static struct attribute_group snb_uncore_format_group = {
static struct intel_uncore_ops snb_uncore_msr_ops = {
.init_box = snb_uncore_msr_init_box,
+ .enable_box = snb_uncore_msr_enable_box,
.exit_box = snb_uncore_msr_exit_box,
.disable_event = snb_uncore_msr_disable_event,
.enable_event = snb_uncore_msr_enable_event,
@@ -192,6 +199,12 @@ static void skl_uncore_msr_init_box(struct intel_uncore_box *box)
}
}
+static void skl_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+ wrmsrl(SKL_UNC_PERF_GLOBAL_CTL,
+ SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL);
+}
+
static void skl_uncore_msr_exit_box(struct intel_uncore_box *box)
{
if (box->pmu->pmu_idx == 0)
@@ -200,6 +213,7 @@ static void skl_uncore_msr_exit_box(struct intel_uncore_box *box)
static struct intel_uncore_ops skl_uncore_msr_ops = {
.init_box = skl_uncore_msr_init_box,
+ .enable_box = skl_uncore_msr_enable_box,
.exit_box = skl_uncore_msr_exit_box,
.disable_event = snb_uncore_msr_disable_event,
.enable_event = snb_uncore_msr_enable_event,
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 824e54086e07..8aee83bcf71f 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -2626,7 +2626,7 @@ void hswep_uncore_cpu_init(void)
static struct intel_uncore_type hswep_uncore_ha = {
.name = "ha",
- .num_counters = 5,
+ .num_counters = 4,
.num_boxes = 2,
.perf_ctr_bits = 48,
SNBEP_UNCORE_PCI_COMMON_INIT(),
@@ -2645,7 +2645,7 @@ static struct uncore_event_desc hswep_uncore_imc_events[] = {
static struct intel_uncore_type hswep_uncore_imc = {
.name = "imc",
- .num_counters = 5,
+ .num_counters = 4,
.num_boxes = 8,
.perf_ctr_bits = 48,
.fixed_ctr_bits = 48,
@@ -2691,7 +2691,7 @@ static struct intel_uncore_type hswep_uncore_irp = {
static struct intel_uncore_type hswep_uncore_qpi = {
.name = "qpi",
- .num_counters = 5,
+ .num_counters = 4,
.num_boxes = 3,
.perf_ctr_bits = 48,
.perf_ctr = SNBEP_PCI_PMON_CTR0,
@@ -2773,7 +2773,7 @@ static struct event_constraint hswep_uncore_r3qpi_constraints[] = {
static struct intel_uncore_type hswep_uncore_r3qpi = {
.name = "r3qpi",
- .num_counters = 4,
+ .num_counters = 3,
.num_boxes = 3,
.perf_ctr_bits = 44,
.constraints = hswep_uncore_r3qpi_constraints,
@@ -2972,7 +2972,7 @@ static struct intel_uncore_type bdx_uncore_ha = {
static struct intel_uncore_type bdx_uncore_imc = {
.name = "imc",
- .num_counters = 5,
+ .num_counters = 4,
.num_boxes = 8,
.perf_ctr_bits = 48,
.fixed_ctr_bits = 48,
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index f5befd4945f2..124357773ffa 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -135,6 +135,7 @@ extern void init_apic_mappings(void);
void register_lapic_address(unsigned long address);
extern void setup_boot_APIC_clock(void);
extern void setup_secondary_APIC_clock(void);
+extern void lapic_update_tsc_freq(void);
extern int APIC_init_uniprocessor(void);
#ifdef CONFIG_X86_64
@@ -170,6 +171,7 @@ static inline void init_apic_mappings(void) { }
static inline void disable_local_APIC(void) { }
# define setup_boot_APIC_clock x86_init_noop
# define setup_secondary_APIC_clock x86_init_noop
+static inline void lapic_update_tsc_freq(void) { }
#endif /* !CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_X2APIC
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 3a27b93e6261..44461626830e 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -9,7 +9,6 @@
#include <linux/kmemcheck.h>
#include <linux/scatterlist.h>
#include <linux/dma-debug.h>
-#include <linux/dma-attrs.h>
#include <asm/io.h>
#include <asm/swiotlb.h>
#include <linux/dma-contiguous.h>
@@ -48,11 +47,11 @@ extern int dma_supported(struct device *hwdev, u64 mask);
extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addr, gfp_t flag,
- struct dma_attrs *attrs);
+ unsigned long attrs);
extern void dma_generic_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_addr,
- struct dma_attrs *attrs);
+ unsigned long attrs);
#ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */
extern bool dma_capable(struct device *dev, dma_addr_t addr, size_t size);
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index fea7724141a0..e7f155c3045e 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -344,8 +344,8 @@ extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
*/
static inline int mmap_is_ia32(void)
{
- return config_enabled(CONFIG_X86_32) ||
- (config_enabled(CONFIG_COMPAT) &&
+ return IS_ENABLED(CONFIG_X86_32) ||
+ (IS_ENABLED(CONFIG_COMPAT) &&
test_thread_flag(TIF_ADDR32));
}
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 116b58347501..2737366ea583 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -137,9 +137,9 @@ static inline int copy_fregs_to_user(struct fregs_state __user *fx)
static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
{
- if (config_enabled(CONFIG_X86_32))
+ if (IS_ENABLED(CONFIG_X86_32))
return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
- else if (config_enabled(CONFIG_AS_FXSAVEQ))
+ else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
/* See comment in copy_fxregs_to_kernel() below. */
@@ -150,10 +150,10 @@ static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
{
int err;
- if (config_enabled(CONFIG_X86_32)) {
+ if (IS_ENABLED(CONFIG_X86_32)) {
err = check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
} else {
- if (config_enabled(CONFIG_AS_FXSAVEQ)) {
+ if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) {
err = check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
} else {
/* See comment in copy_fxregs_to_kernel() below. */
@@ -166,9 +166,9 @@ static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
{
- if (config_enabled(CONFIG_X86_32))
+ if (IS_ENABLED(CONFIG_X86_32))
return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
- else if (config_enabled(CONFIG_AS_FXSAVEQ))
+ else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
/* See comment in copy_fxregs_to_kernel() below. */
@@ -190,9 +190,9 @@ static inline int copy_user_to_fregs(struct fregs_state __user *fx)
static inline void copy_fxregs_to_kernel(struct fpu *fpu)
{
- if (config_enabled(CONFIG_X86_32))
+ if (IS_ENABLED(CONFIG_X86_32))
asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave));
- else if (config_enabled(CONFIG_AS_FXSAVEQ))
+ else if (IS_ENABLED(CONFIG_AS_FXSAVEQ))
asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave));
else {
/* Using "rex64; fxsave %0" is broken because, if the memory
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 7178043b0e1d..59405a248fc2 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -22,10 +22,6 @@ typedef struct {
#ifdef CONFIG_SMP
unsigned int irq_resched_count;
unsigned int irq_call_count;
- /*
- * irq_tlb_count is double-counted in irq_call_count, so it must be
- * subtracted from irq_call_count when displaying irq_call_count
- */
unsigned int irq_tlb_count;
#endif
#ifdef CONFIG_X86_THERMAL_VECTOR
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 223042086f4e..737da62bfeb0 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -5,10 +5,10 @@ struct x86_mapping_info {
void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
void *context; /* context for alloc_pgt_page */
unsigned long pmd_flag; /* page flag for PMD entry */
- bool kernel_mapping; /* kernel mapping or ident mapping */
+ unsigned long offset; /* ident mapping offset */
};
int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
- unsigned long addr, unsigned long end);
+ unsigned long pstart, unsigned long pend);
#endif /* _ASM_X86_INIT_H */
diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h
index 0f555cc31984..24acd9ba7837 100644
--- a/arch/x86/include/asm/mc146818rtc.h
+++ b/arch/x86/include/asm/mc146818rtc.h
@@ -6,7 +6,6 @@
#include <asm/io.h>
#include <asm/processor.h>
-#include <linux/mc146818rtc.h>
#ifndef RTC_PORT
#define RTC_PORT(x) (0x70 + (x))
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 396348196aa7..d8abfcf524d1 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -155,7 +155,7 @@ static inline void arch_exit_mmap(struct mm_struct *mm)
#ifdef CONFIG_X86_64
static inline bool is_64bit_mm(struct mm_struct *mm)
{
- return !config_enabled(CONFIG_IA32_EMULATION) ||
+ return !IS_ENABLED(CONFIG_IA32_EMULATION) ||
!(mm->context.ia32_compat == TIF_IA32);
}
#else
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 7e8ec7ae10fa..1cc82ece9ac1 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -145,7 +145,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
*
* | ... | 11| 10| 9|8|7|6|5| 4| 3|2|1|0| <- bit number
* | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names
- * | OFFSET (14->63) | TYPE (10-13) |0|X|X|X| X| X|X|X|0| <- swp entry
+ * | OFFSET (14->63) | TYPE (9-13) |0|X|X|X| X| X|X|X|0| <- swp entry
*
* G (8) is aliased and used as a PROT_NONE indicator for
* !present ptes. We need to start storing swap entries above
@@ -156,7 +156,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
#define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
#define SWP_TYPE_BITS 5
/* Place the offset above the type: */
-#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS + 1)
+#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS)
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 7c1c89598688..d019f0cc80ec 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -25,6 +25,24 @@ void pvclock_resume(void);
void pvclock_touch_watchdogs(void);
+static __always_inline
+unsigned pvclock_read_begin(const struct pvclock_vcpu_time_info *src)
+{
+ unsigned version = src->version & ~1;
+ /* Make sure that the version is read before the data. */
+ virt_rmb();
+ return version;
+}
+
+static __always_inline
+bool pvclock_read_retry(const struct pvclock_vcpu_time_info *src,
+ unsigned version)
+{
+ /* Make sure that the version is re-read after the data. */
+ virt_rmb();
+ return unlikely(version != src->version);
+}
+
/*
* Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
* yielding a 64-bit result.
@@ -69,23 +87,12 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
}
static __always_inline
-unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
- cycle_t *cycles, u8 *flags)
+cycle_t __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src)
{
- unsigned version;
- cycle_t offset;
- u64 delta;
-
- version = src->version;
- /* Make the latest version visible */
- smp_rmb();
-
- delta = rdtsc_ordered() - src->tsc_timestamp;
- offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
- src->tsc_shift);
- *cycles = src->system_time + offset;
- *flags = src->flags;
- return version;
+ u64 delta = rdtsc_ordered() - src->tsc_timestamp;
+ cycle_t offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
+ src->tsc_shift);
+ return src->system_time + offset;
}
struct pvclock_vsyscall_time_info {
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index 9c6b890d5e7a..b2988c0ed829 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -58,7 +58,15 @@ extern unsigned char boot_gdt[];
extern unsigned char secondary_startup_64[];
#endif
+static inline size_t real_mode_size_needed(void)
+{
+ if (real_mode_header)
+ return 0; /* already allocated. */
+
+ return ALIGN(real_mode_blob_end - real_mode_blob, PAGE_SIZE);
+}
+
+void set_real_mode_mem(phys_addr_t mem, size_t size);
void reserve_real_mode(void);
-void setup_real_mode(void);
#endif /* _ARCH_X86_REALMODE_H */
diff --git a/arch/x86/include/asm/rtc.h b/arch/x86/include/asm/rtc.h
deleted file mode 100644
index f71c3b0ed360..000000000000
--- a/arch/x86/include/asm/rtc.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/rtc.h>
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index ab05d73e2bb7..d2f69b9ff732 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -31,9 +31,9 @@ static inline void dma_mark_clean(void *addr, size_t size) {}
extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags,
- struct dma_attrs *attrs);
+ unsigned long attrs);
extern void x86_swiotlb_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_addr,
- struct dma_attrs *attrs);
+ unsigned long attrs);
#endif /* _ASM_X86_SWIOTLB_H */
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 999b7cd2e78c..4e23dd15c661 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task,
* TS_COMPAT is set for 32-bit syscall entries and then
* remains set until we return to user mode.
*/
- if (task_thread_info(task)->status & TS_COMPAT)
+ if (task_thread_info(task)->status & (TS_COMPAT|TS_I386_REGS_POKED))
/*
* Sign-extend the value so (int)-EFOO becomes (long)-EFOO
* and will match correctly in comparisons.
@@ -239,9 +239,6 @@ static inline int syscall_get_arch(void)
* TS_COMPAT is set for 32-bit syscall entry and then
* remains set until we return to user mode.
*
- * TIF_IA32 tasks should always have TS_COMPAT set at
- * system call time.
- *
* x32 tasks should be considered AUDIT_ARCH_X86_64.
*/
if (task_thread_info(current)->status & TS_COMPAT)
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index b45ffdda3549..8b7c8d8e0852 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -176,6 +176,50 @@ static inline unsigned long current_stack_pointer(void)
return sp;
}
+/*
+ * Walks up the stack frames to make sure that the specified object is
+ * entirely contained by a single stack frame.
+ *
+ * Returns:
+ * 1 if within a frame
+ * -1 if placed across a frame boundary (or outside stack)
+ * 0 unable to determine (no frame pointers, etc)
+ */
+static inline int arch_within_stack_frames(const void * const stack,
+ const void * const stackend,
+ const void *obj, unsigned long len)
+{
+#if defined(CONFIG_FRAME_POINTER)
+ const void *frame = NULL;
+ const void *oldframe;
+
+ oldframe = __builtin_frame_address(1);
+ if (oldframe)
+ frame = __builtin_frame_address(2);
+ /*
+ * low ----------------------------------------------> high
+ * [saved bp][saved ip][args][local vars][saved bp][saved ip]
+ * ^----------------^
+ * allow copies only within here
+ */
+ while (stack <= frame && frame < stackend) {
+ /*
+ * If obj + len extends past the last frame, this
+ * check won't pass and the next frame will be 0,
+ * causing us to bail out and correctly report
+ * the copy as invalid.
+ */
+ if (obj + len <= frame)
+ return obj >= oldframe + 2 * sizeof(void *) ? 1 : -1;
+ oldframe = frame;
+ frame = *(const void * const *)frame;
+ }
+ return -1;
+#else
+ return 0;
+#endif
+}
+
#else /* !__ASSEMBLY__ */
#ifdef CONFIG_X86_64
@@ -219,6 +263,9 @@ static inline unsigned long current_stack_pointer(void)
* have to worry about atomic accesses.
*/
#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
+#ifdef CONFIG_COMPAT
+#define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */
+#endif
#ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 4e5be94e079a..6fa85944af83 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -135,7 +135,14 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
static inline void __native_flush_tlb(void)
{
+ /*
+ * If current->mm == NULL then we borrow a mm which may change during a
+ * task switch and therefore we must not be preempted while we write CR3
+ * back:
+ */
+ preempt_disable();
native_write_cr3(native_read_cr3());
+ preempt_enable();
}
static inline void __native_flush_tlb_global_irq_disabled(void)
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index c03bfb68c503..a0ae610b9280 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -761,9 +761,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n)
* case, and do only runtime checking for non-constant sizes.
*/
- if (likely(sz < 0 || sz >= n))
+ if (likely(sz < 0 || sz >= n)) {
+ check_object_size(to, n, false);
n = _copy_from_user(to, from, n);
- else if(__builtin_constant_p(n))
+ } else if (__builtin_constant_p(n))
copy_from_user_overflow();
else
__copy_from_user_overflow(sz, n);
@@ -781,9 +782,10 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
might_fault();
/* See the comment in copy_from_user() above. */
- if (likely(sz < 0 || sz >= n))
+ if (likely(sz < 0 || sz >= n)) {
+ check_object_size(from, n, true);
n = _copy_to_user(to, from, n);
- else if(__builtin_constant_p(n))
+ } else if (__builtin_constant_p(n))
copy_to_user_overflow();
else
__copy_to_user_overflow(sz, n);
@@ -812,21 +814,21 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
#define user_access_begin() __uaccess_begin()
#define user_access_end() __uaccess_end()
-#define unsafe_put_user(x, ptr) \
-({ \
+#define unsafe_put_user(x, ptr, err_label) \
+do { \
int __pu_err; \
__put_user_size((x), (ptr), sizeof(*(ptr)), __pu_err, -EFAULT); \
- __builtin_expect(__pu_err, 0); \
-})
+ if (unlikely(__pu_err)) goto err_label; \
+} while (0)
-#define unsafe_get_user(x, ptr) \
-({ \
+#define unsafe_get_user(x, ptr, err_label) \
+do { \
int __gu_err; \
unsigned long __gu_val; \
__get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err, -EFAULT); \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
- __builtin_expect(__gu_err, 0); \
-})
+ if (unlikely(__gu_err)) goto err_label; \
+} while (0)
#endif /* _ASM_X86_UACCESS_H */
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 4b32da24faaf..7d3bdd1ed697 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -37,6 +37,7 @@ unsigned long __must_check __copy_from_user_ll_nocache_nozero
static __always_inline unsigned long __must_check
__copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
{
+ check_object_size(from, n, true);
return __copy_to_user_ll(to, from, n);
}
@@ -95,6 +96,7 @@ static __always_inline unsigned long
__copy_from_user(void *to, const void __user *from, unsigned long n)
{
might_fault();
+ check_object_size(to, n, false);
if (__builtin_constant_p(n)) {
unsigned long ret;
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 2eac2aa3e37f..673059a109fe 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -54,6 +54,7 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size)
{
int ret = 0;
+ check_object_size(dst, size, false);
if (!__builtin_constant_p(size))
return copy_user_generic(dst, (__force void *)src, size);
switch (size) {
@@ -119,6 +120,7 @@ int __copy_to_user_nocheck(void __user *dst, const void *src, unsigned size)
{
int ret = 0;
+ check_object_size(src, size, true);
if (!__builtin_constant_p(size))
return copy_user_generic((__force void *)dst, src, size);
switch (size) {
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index c852590254d5..e652a7cc6186 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -79,7 +79,7 @@ struct uv_gam_range_entry {
u16 nasid; /* HNasid */
u16 sockid; /* Socket ID, high bits of APIC ID */
u16 pnode; /* Index to MMR and GRU spaces */
- u32 pxm; /* ACPI proximity domain number */
+ u32 unused2;
u32 limit; /* PA bits 56:26 (UV_GAM_RANGE_SHFT) */
};
@@ -88,7 +88,8 @@ struct uv_gam_range_entry {
#define UV_SYSTAB_VERSION_UV4 0x400 /* UV4 BIOS base version */
#define UV_SYSTAB_VERSION_UV4_1 0x401 /* + gpa_shift */
#define UV_SYSTAB_VERSION_UV4_2 0x402 /* + TYPE_NVRAM/WINDOW/MBOX */
-#define UV_SYSTAB_VERSION_UV4_LATEST UV_SYSTAB_VERSION_UV4_2
+#define UV_SYSTAB_VERSION_UV4_3 0x403 /* - GAM Range PXM Value */
+#define UV_SYSTAB_VERSION_UV4_LATEST UV_SYSTAB_VERSION_UV4_3
#define UV_SYSTAB_TYPE_UNUSED 0 /* End of table (offset == 0) */
#define UV_SYSTAB_TYPE_GAM_PARAMS 1 /* GAM PARAM conversions */
diff --git a/arch/x86/include/asm/xen/page-coherent.h b/arch/x86/include/asm/xen/page-coherent.h
index acd844c017d3..f02f025ff988 100644
--- a/arch/x86/include/asm/xen/page-coherent.h
+++ b/arch/x86/include/asm/xen/page-coherent.h
@@ -2,12 +2,11 @@
#define _ASM_X86_XEN_PAGE_COHERENT_H
#include <asm/page.h>
-#include <linux/dma-attrs.h>
#include <linux/dma-mapping.h>
static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
void *vstart = (void*)__get_free_pages(flags, get_order(size));
*dma_handle = virt_to_phys(vstart);
@@ -16,18 +15,18 @@ static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
void *cpu_addr, dma_addr_t dma_handle,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
free_pages((unsigned long) cpu_addr, get_order(size));
}
static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
dma_addr_t dev_addr, unsigned long offset, size_t size,
- enum dma_data_direction dir, struct dma_attrs *attrs) { }
+ enum dma_data_direction dir, unsigned long attrs) { }
static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir,
- struct dma_attrs *attrs) { }
+ unsigned long attrs) { }
static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir) { }
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 42d27a62a404..63ff468a7986 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -241,7 +241,7 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
static dma_addr_t gart_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
unsigned long bus;
phys_addr_t paddr = page_to_phys(page) + offset;
@@ -263,7 +263,7 @@ static dma_addr_t gart_map_page(struct device *dev, struct page *page,
*/
static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
size_t size, enum dma_data_direction dir,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
unsigned long iommu_page;
int npages;
@@ -285,7 +285,7 @@ static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
* Wrapper for pci_unmap_single working with scatterlists.
*/
static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
- enum dma_data_direction dir, struct dma_attrs *attrs)
+ enum dma_data_direction dir, unsigned long attrs)
{
struct scatterlist *s;
int i;
@@ -293,7 +293,7 @@ static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
for_each_sg(sg, s, nents, i) {
if (!s->dma_length || !s->length)
break;
- gart_unmap_page(dev, s->dma_address, s->dma_length, dir, NULL);
+ gart_unmap_page(dev, s->dma_address, s->dma_length, dir, 0);
}
}
@@ -315,7 +315,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
addr = dma_map_area(dev, addr, s->length, dir, 0);
if (addr == bad_dma_addr) {
if (i > 0)
- gart_unmap_sg(dev, sg, i, dir, NULL);
+ gart_unmap_sg(dev, sg, i, dir, 0);
nents = 0;
sg[0].dma_length = 0;
break;
@@ -386,7 +386,7 @@ dma_map_cont(struct device *dev, struct scatterlist *start, int nelems,
* Merge chunks that have page aligned sizes into a continuous mapping.
*/
static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
- enum dma_data_direction dir, struct dma_attrs *attrs)
+ enum dma_data_direction dir, unsigned long attrs)
{
struct scatterlist *s, *ps, *start_sg, *sgmap;
int need = 0, nextneed, i, out, start;
@@ -456,7 +456,7 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
error:
flush_gart();
- gart_unmap_sg(dev, sg, out, dir, NULL);
+ gart_unmap_sg(dev, sg, out, dir, 0);
/* When it was forced or merged try again in a dumb way */
if (force_iommu || iommu_merge) {
@@ -476,7 +476,7 @@ error:
/* allocate and map a coherent mapping */
static void *
gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
- gfp_t flag, struct dma_attrs *attrs)
+ gfp_t flag, unsigned long attrs)
{
dma_addr_t paddr;
unsigned long align_mask;
@@ -508,9 +508,9 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
/* free a coherent mapping */
static void
gart_free_coherent(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_addr, struct dma_attrs *attrs)
+ dma_addr_t dma_addr, unsigned long attrs)
{
- gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL);
+ gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0);
dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 7943d38c57ca..cea4fc19e844 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -147,7 +147,7 @@ static int force_enable_local_apic __initdata;
*/
static int __init parse_lapic(char *arg)
{
- if (config_enabled(CONFIG_X86_32) && !arg)
+ if (IS_ENABLED(CONFIG_X86_32) && !arg)
force_enable_local_apic = 1;
else if (arg && !strncmp(arg, "notscdeadline", 13))
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
@@ -313,7 +313,7 @@ int lapic_get_maxlvt(void)
/* Clock divisor */
#define APIC_DIVISOR 16
-#define TSC_DIVISOR 32
+#define TSC_DIVISOR 8
/*
* This function sets up the local APIC timer, with a timeout of
@@ -565,13 +565,37 @@ static void setup_APIC_timer(void)
CLOCK_EVT_FEAT_DUMMY);
levt->set_next_event = lapic_next_deadline;
clockevents_config_and_register(levt,
- (tsc_khz / TSC_DIVISOR) * 1000,
+ tsc_khz * (1000 / TSC_DIVISOR),
0xF, ~0UL);
} else
clockevents_register_device(levt);
}
/*
+ * Install the updated TSC frequency from recalibration at the TSC
+ * deadline clockevent devices.
+ */
+static void __lapic_update_tsc_freq(void *info)
+{
+ struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
+
+ if (!this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+ return;
+
+ clockevents_update_freq(levt, tsc_khz * (1000 / TSC_DIVISOR));
+}
+
+void lapic_update_tsc_freq(void)
+{
+ /*
+ * The clockevent device's ->mult and ->shift can both be
+ * changed. In order to avoid races, schedule the frequency
+ * update code on each CPU.
+ */
+ on_each_cpu(__lapic_update_tsc_freq, NULL, 0);
+}
+
+/*
* In this functions we calibrate APIC bus clocks to the external timer.
*
* We want to do the calibration only once since we want to have local timer
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index a5e400afc563..6066d945c40e 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -523,7 +523,7 @@ static int apic_set_affinity(struct irq_data *irq_data,
struct apic_chip_data *data = irq_data->chip_data;
int err, irq = irq_data->irq;
- if (!config_enabled(CONFIG_SMP))
+ if (!IS_ENABLED(CONFIG_SMP))
return -EPERM;
if (!cpumask_intersects(dest, cpu_online_mask))
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 6368fa69d2af..54f35d988025 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -155,7 +155,7 @@ static void init_x2apic_ldr(void)
/*
* At CPU state changes, update the x2apic cluster sibling info.
*/
-int x2apic_prepare_cpu(unsigned int cpu)
+static int x2apic_prepare_cpu(unsigned int cpu)
{
if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL))
return -ENOMEM;
@@ -168,7 +168,7 @@ int x2apic_prepare_cpu(unsigned int cpu)
return 0;
}
-int x2apic_dead_cpu(unsigned int this_cpu)
+static int x2apic_dead_cpu(unsigned int this_cpu)
{
int cpu;
@@ -186,13 +186,18 @@ int x2apic_dead_cpu(unsigned int this_cpu)
static int x2apic_cluster_probe(void)
{
int cpu = smp_processor_id();
+ int ret;
if (!x2apic_mode)
return 0;
+ ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
+ x2apic_prepare_cpu, x2apic_dead_cpu);
+ if (ret < 0) {
+ pr_err("Failed to register X2APIC_PREPARE\n");
+ return 0;
+ }
cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
- cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
- x2apic_prepare_cpu, x2apic_dead_cpu);
return 1;
}
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 09b59adaea3f..cb0673c1e940 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -223,6 +223,11 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
if (strncmp(oem_id, "SGI", 3) != 0)
return 0;
+ if (numa_off) {
+ pr_err("UV: NUMA is off, disabling UV support\n");
+ return 0;
+ }
+
/* Setup early hub type field in uv_hub_info for Node 0 */
uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0;
@@ -325,7 +330,7 @@ static __init void build_uv_gr_table(void)
struct uv_gam_range_entry *gre = uv_gre_table;
struct uv_gam_range_s *grt;
unsigned long last_limit = 0, ram_limit = 0;
- int bytes, i, sid, lsid = -1;
+ int bytes, i, sid, lsid = -1, indx = 0, lindx = -1;
if (!gre)
return;
@@ -356,11 +361,12 @@ static __init void build_uv_gr_table(void)
}
sid = gre->sockid - _min_socket;
if (lsid < sid) { /* new range */
- grt = &_gr_table[sid];
- grt->base = lsid;
+ grt = &_gr_table[indx];
+ grt->base = lindx;
grt->nasid = gre->nasid;
grt->limit = last_limit = gre->limit;
lsid = sid;
+ lindx = indx++;
continue;
}
if (lsid == sid && !ram_limit) { /* update range */
@@ -371,7 +377,7 @@ static __init void build_uv_gr_table(void)
}
if (!ram_limit) { /* non-contiguous ram range */
grt++;
- grt->base = sid - 1;
+ grt->base = lindx;
grt->nasid = gre->nasid;
grt->limit = last_limit = gre->limit;
continue;
@@ -1155,19 +1161,18 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
if (!index) {
pr_info("UV: GAM Range Table...\n");
- pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s %3s\n",
+ pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n",
"Range", "", "Size", "Type", "NASID",
- "SID", "PN", "PXM");
+ "SID", "PN");
}
pr_info(
- "UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x %3d\n",
+ "UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x\n",
index++,
(unsigned long)lgre << UV_GAM_RANGE_SHFT,
(unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
((unsigned long)(gre->limit - lgre)) >>
(30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
- gre->type, gre->nasid, gre->sockid,
- gre->pnode, gre->pxm);
+ gre->type, gre->nasid, gre->sockid, gre->pnode);
lgre = gre->limit;
if (sock_min > gre->sockid)
@@ -1286,7 +1291,7 @@ static void __init build_socket_tables(void)
_pnode_to_socket[i] = SOCK_EMPTY;
/* fill in pnode/node/addr conversion list values */
- pr_info("UV: GAM Building socket/pnode/pxm conversion tables\n");
+ pr_info("UV: GAM Building socket/pnode conversion tables\n");
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
if (gre->type == UV_GAM_RANGE_TYPE_HOLE)
continue;
@@ -1294,20 +1299,18 @@ static void __init build_socket_tables(void)
if (_socket_to_pnode[i] != SOCK_EMPTY)
continue; /* duplicate */
_socket_to_pnode[i] = gre->pnode;
- _socket_to_node[i] = gre->pxm;
i = gre->pnode - minpnode;
_pnode_to_socket[i] = gre->sockid;
pr_info(
- "UV: sid:%02x type:%d nasid:%04x pn:%02x pxm:%2d pn2s:%2x\n",
+ "UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n",
gre->sockid, gre->type, gre->nasid,
_socket_to_pnode[gre->sockid - minsock],
- _socket_to_node[gre->sockid - minsock],
_pnode_to_socket[gre->pnode - minpnode]);
}
- /* check socket -> node values */
+ /* Set socket -> node values */
lnid = -1;
for_each_present_cpu(cpu) {
int nid = cpu_to_node(cpu);
@@ -1318,14 +1321,9 @@ static void __init build_socket_tables(void)
lnid = nid;
apicid = per_cpu(x86_cpu_to_apicid, cpu);
sockid = apicid >> uv_cpuid.socketid_shift;
- i = sockid - minsock;
-
- if (nid != _socket_to_node[i]) {
- pr_warn(
- "UV: %02x: type:%d socket:%02x PXM:%02x != node:%2d\n",
- i, sockid, gre->type, _socket_to_node[i], nid);
- _socket_to_node[i] = nid;
- }
+ _socket_to_node[sockid - minsock] = nid;
+ pr_info("UV: sid:%02x: apicid:%04x node:%2d\n",
+ sockid, apicid, nid);
}
/* Setup physical blade to pnode translation from GAM Range Table */
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 9e231d88bb33..a184c210efba 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -159,8 +159,8 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
struct task_struct *tsk = current;
int ia32_fxstate = (buf != buf_fx);
- ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
- config_enabled(CONFIG_IA32_EMULATION));
+ ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) ||
+ IS_ENABLED(CONFIG_IA32_EMULATION));
if (!access_ok(VERIFY_WRITE, buf, size))
return -EACCES;
@@ -268,8 +268,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
u64 xfeatures = 0;
int fx_only = 0;
- ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
- config_enabled(CONFIG_IA32_EMULATION));
+ ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) ||
+ IS_ENABLED(CONFIG_IA32_EMULATION));
if (!buf) {
fpu__clear(fpu);
@@ -416,8 +416,8 @@ void fpu__init_prepare_fx_sw_frame(void)
fx_sw_reserved.xfeatures = xfeatures_mask;
fx_sw_reserved.xstate_size = fpu_user_xstate_size;
- if (config_enabled(CONFIG_IA32_EMULATION) ||
- config_enabled(CONFIG_X86_32)) {
+ if (IS_ENABLED(CONFIG_IA32_EMULATION) ||
+ IS_ENABLED(CONFIG_X86_32)) {
int fsave_header_size = sizeof(struct fregs_state);
fx_sw_reserved_ia32 = fx_sw_reserved;
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 680049aa4593..01567aa87503 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -866,105 +866,17 @@ const void *get_xsave_field_ptr(int xsave_state)
return get_xsave_addr(&fpu->state.xsave, xsave_state);
}
-
-/*
- * Set xfeatures (aka XSTATE_BV) bit for a feature that we want
- * to take out of its "init state". This will ensure that an
- * XRSTOR actually restores the state.
- */
-static void fpu__xfeature_set_non_init(struct xregs_state *xsave,
- int xstate_feature_mask)
-{
- xsave->header.xfeatures |= xstate_feature_mask;
-}
-
-/*
- * This function is safe to call whether the FPU is in use or not.
- *
- * Note that this only works on the current task.
- *
- * Inputs:
- * @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP,
- * XFEATURE_MASK_SSE, etc...)
- * @xsave_state_ptr: a pointer to a copy of the state that you would
- * like written in to the current task's FPU xsave state. This pointer
- * must not be located in the current tasks's xsave area.
- * Output:
- * address of the state in the xsave area or NULL if the state
- * is not present or is in its 'init state'.
- */
-static void fpu__xfeature_set_state(int xstate_feature_mask,
- void *xstate_feature_src, size_t len)
-{
- struct xregs_state *xsave = &current->thread.fpu.state.xsave;
- struct fpu *fpu = &current->thread.fpu;
- void *dst;
-
- if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
- WARN_ONCE(1, "%s() attempted with no xsave support", __func__);
- return;
- }
-
- /*
- * Tell the FPU code that we need the FPU state to be in
- * 'fpu' (not in the registers), and that we need it to
- * be stable while we write to it.
- */
- fpu__current_fpstate_write_begin();
-
- /*
- * This method *WILL* *NOT* work for compact-format
- * buffers. If the 'xstate_feature_mask' is unset in
- * xcomp_bv then we may need to move other feature state
- * "up" in the buffer.
- */
- if (xsave->header.xcomp_bv & xstate_feature_mask) {
- WARN_ON_ONCE(1);
- goto out;
- }
-
- /* find the location in the xsave buffer of the desired state */
- dst = __raw_xsave_addr(&fpu->state.xsave, xstate_feature_mask);
-
- /*
- * Make sure that the pointer being passed in did not
- * come from the xsave buffer itself.
- */
- WARN_ONCE(xstate_feature_src == dst, "set from xsave buffer itself");
-
- /* put the caller-provided data in the location */
- memcpy(dst, xstate_feature_src, len);
-
- /*
- * Mark the xfeature so that the CPU knows there is state
- * in the buffer now.
- */
- fpu__xfeature_set_non_init(xsave, xstate_feature_mask);
-out:
- /*
- * We are done writing to the 'fpu'. Reenable preeption
- * and (possibly) move the fpstate back in to the fpregs.
- */
- fpu__current_fpstate_write_end();
-}
-
#define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2)
#define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1)
/*
- * This will go out and modify the XSAVE buffer so that PKRU is
- * set to a particular state for access to 'pkey'.
- *
- * PKRU state does affect kernel access to user memory. We do
- * not modfiy PKRU *itself* here, only the XSAVE state that will
- * be restored in to PKRU when we return back to userspace.
+ * This will go out and modify PKRU register to set the access
+ * rights for @pkey to @init_val.
*/
int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
unsigned long init_val)
{
- struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
- struct pkru_state *old_pkru_state;
- struct pkru_state new_pkru_state;
+ u32 old_pkru;
int pkey_shift = (pkey * PKRU_BITS_PER_PKEY);
u32 new_pkru_bits = 0;
@@ -974,6 +886,15 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
*/
if (!boot_cpu_has(X86_FEATURE_OSPKE))
return -EINVAL;
+ /*
+ * For most XSAVE components, this would be an arduous task:
+ * brining fpstate up to date with fpregs, updating fpstate,
+ * then re-populating fpregs. But, for components that are
+ * never lazily managed, we can just access the fpregs
+ * directly. PKRU is never managed lazily, so we can just
+ * manipulate it directly. Make sure it stays that way.
+ */
+ WARN_ON_ONCE(!use_eager_fpu());
/* Set the bits we need in PKRU: */
if (init_val & PKEY_DISABLE_ACCESS)
@@ -984,37 +905,12 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
/* Shift the bits in to the correct place in PKRU for pkey: */
new_pkru_bits <<= pkey_shift;
- /* Locate old copy of the state in the xsave buffer: */
- old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU);
-
- /*
- * When state is not in the buffer, it is in the init
- * state, set it manually. Otherwise, copy out the old
- * state.
- */
- if (!old_pkru_state)
- new_pkru_state.pkru = 0;
- else
- new_pkru_state.pkru = old_pkru_state->pkru;
-
- /* Mask off any old bits in place: */
- new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
-
- /* Set the newly-requested bits: */
- new_pkru_state.pkru |= new_pkru_bits;
-
- /*
- * We could theoretically live without zeroing pkru.pad.
- * The current XSAVE feature state definition says that
- * only bytes 0->3 are used. But we do not want to
- * chance leaking kernel stack out to userspace in case a
- * memcpy() of the whole xsave buffer was done.
- *
- * They're in the same cacheline anyway.
- */
- new_pkru_state.pad = 0;
+ /* Get old PKRU and mask off any old bits in place: */
+ old_pkru = read_pkru();
+ old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
- fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, sizeof(new_pkru_state));
+ /* Write old part along with new part: */
+ write_pkru(old_pkru | new_pkru_bits);
return 0;
}
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 2dda0bc4576e..f16c55bfc090 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -25,8 +25,6 @@ static void __init i386_default_early_setup(void)
/* Initialize 32bit specific setup functions */
x86_init.resources.reserve_resources = i386_reserve_resources;
x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc;
-
- reserve_bios_regions();
}
asmlinkage __visible void __init i386_start_kernel(void)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 99d48e7d2974..54a2372f5dbb 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -183,7 +183,6 @@ void __init x86_64_start_reservations(char *real_mode_data)
copy_bootdata(__va(real_mode_data));
x86_early_init_platform_quirks();
- reserve_bios_regions();
switch (boot_params.hdr.hardware_subarch) {
case X86_SUBARCH_INTEL_MID:
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 3d747070fe67..c6dfd801df97 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1019,7 +1019,6 @@ void hpet_disable(void)
*/
#include <linux/mc146818rtc.h>
#include <linux/rtc.h>
-#include <asm/rtc.h>
#define DEFAULT_RTC_INT_FREQ 64
#define DEFAULT_RTC_SHIFT 6
@@ -1243,7 +1242,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
memset(&curr_time, 0, sizeof(struct rtc_time));
if (hpet_rtc_flags & (RTC_UIE | RTC_AIE))
- get_rtc_time(&curr_time);
+ mc146818_get_time(&curr_time);
if (hpet_rtc_flags & RTC_UIE &&
curr_time.tm_sec != hpet_prev_update_sec) {
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 61521dc19c10..9f669fdd2010 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -102,8 +102,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_puts(p, " Rescheduling interrupts\n");
seq_printf(p, "%*s: ", prec, "CAL");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->irq_call_count -
- irq_stats(j)->irq_tlb_count);
+ seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
seq_puts(p, " Function call interrupts\n");
seq_printf(p, "%*s: ", prec, "TLB");
for_each_online_cpu(j)
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 04b132a767f1..bfe4d6c96fbd 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -17,6 +17,7 @@
#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/hardirq.h>
+#include <linux/ratelimit.h>
#include <linux/slab.h>
#include <linux/export.h>
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 833b1d329c47..5d400ba1349d 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -340,7 +340,7 @@ static inline struct iommu_table *find_iommu_table(struct device *dev)
static void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist,
int nelems,enum dma_data_direction dir,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
struct iommu_table *tbl = find_iommu_table(dev);
struct scatterlist *s;
@@ -364,7 +364,7 @@ static void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist,
static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
int nelems, enum dma_data_direction dir,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
struct iommu_table *tbl = find_iommu_table(dev);
struct scatterlist *s;
@@ -396,7 +396,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
return nelems;
error:
- calgary_unmap_sg(dev, sg, nelems, dir, NULL);
+ calgary_unmap_sg(dev, sg, nelems, dir, 0);
for_each_sg(sg, s, nelems, i) {
sg->dma_address = DMA_ERROR_CODE;
sg->dma_length = 0;
@@ -407,7 +407,7 @@ error:
static dma_addr_t calgary_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
void *vaddr = page_address(page) + offset;
unsigned long uaddr;
@@ -422,7 +422,7 @@ static dma_addr_t calgary_map_page(struct device *dev, struct page *page,
static void calgary_unmap_page(struct device *dev, dma_addr_t dma_addr,
size_t size, enum dma_data_direction dir,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
struct iommu_table *tbl = find_iommu_table(dev);
unsigned int npages;
@@ -432,7 +432,7 @@ static void calgary_unmap_page(struct device *dev, dma_addr_t dma_addr,
}
static void* calgary_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag, struct dma_attrs *attrs)
+ dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs)
{
void *ret = NULL;
dma_addr_t mapping;
@@ -466,7 +466,7 @@ error:
static void calgary_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
unsigned int npages;
struct iommu_table *tbl = find_iommu_table(dev);
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 6ba014c61d62..d30c37750765 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -77,7 +77,7 @@ void __init pci_iommu_alloc(void)
}
void *dma_generic_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addr, gfp_t flag,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
unsigned long dma_mask;
struct page *page;
@@ -120,7 +120,7 @@ again:
}
void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_addr, struct dma_attrs *attrs)
+ dma_addr_t dma_addr, unsigned long attrs)
{
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
struct page *page = virt_to_page(vaddr);
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index da15918d1c81..00e71ce396a8 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -28,7 +28,7 @@ check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
dma_addr_t bus = page_to_phys(page) + offset;
WARN_ON(size == 0);
@@ -55,7 +55,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
*/
static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
int nents, enum dma_data_direction dir,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
struct scatterlist *s;
int i;
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 5069ef560d83..b47edb8f5256 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -16,7 +16,7 @@ int swiotlb __read_mostly;
void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
void *vaddr;
@@ -37,7 +37,7 @@ void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
void x86_swiotlb_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_addr,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr)))
swiotlb_free_coherent(dev, size, vaddr, dma_addr);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 600edd225e81..f79576a541ff 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -923,15 +923,18 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
case offsetof(struct user32, regs.orig_eax):
/*
- * A 32-bit debugger setting orig_eax means to restore
- * the state of the task restarting a 32-bit syscall.
- * Make sure we interpret the -ERESTART* codes correctly
- * in case the task is not actually still sitting at the
- * exit from a 32-bit syscall with TS_COMPAT still set.
+ * Warning: bizarre corner case fixup here. A 32-bit
+ * debugger setting orig_eax to -1 wants to disable
+ * syscall restart. Make sure that the syscall
+ * restart code sign-extends orig_ax. Also make sure
+ * we interpret the -ERESTART* codes correctly if
+ * loaded into regs->ax in case the task is not
+ * actually still sitting at the exit from a 32-bit
+ * syscall with TS_COMPAT still set.
*/
regs->orig_ax = value;
if (syscall_get_nr(child, regs) >= 0)
- task_thread_info(child)->status |= TS_COMPAT;
+ task_thread_info(child)->status |= TS_I386_REGS_POKED;
break;
case offsetof(struct user32, regs.eflags):
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 06c58ce46762..3599404e3089 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -64,14 +64,9 @@ u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src)
u8 flags;
do {
- version = src->version;
- /* Make the latest version visible */
- smp_rmb();
-
+ version = pvclock_read_begin(src);
flags = src->flags;
- /* Make sure that the version double-check is last. */
- smp_rmb();
- } while ((src->version & 1) || version != src->version);
+ } while (pvclock_read_retry(src, version));
return flags & valid_flags;
}
@@ -84,10 +79,10 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
u8 flags;
do {
- version = __pvclock_read_cycles(src, &ret, &flags);
- /* Make sure that the version double-check is last. */
- smp_rmb();
- } while ((src->version & 1) || version != src->version);
+ version = pvclock_read_begin(src);
+ ret = __pvclock_read_cycles(src);
+ flags = src->flags;
+ } while (pvclock_read_retry(src, version));
if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) {
src->flags &= ~PVCLOCK_GUEST_STOPPED;
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index eceaa082ec3f..79c6311cd912 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -13,7 +13,6 @@
#include <asm/x86_init.h>
#include <asm/time.h>
#include <asm/intel-mid.h>
-#include <asm/rtc.h>
#include <asm/setup.h>
#ifdef CONFIG_X86_32
@@ -47,7 +46,7 @@ int mach_set_rtc_mmss(const struct timespec *now)
rtc_time_to_tm(nowtime, &tm);
if (!rtc_valid_tm(&tm)) {
- retval = set_rtc_time(&tm);
+ retval = mc146818_set_time(&tm);
if (retval)
printk(KERN_ERR "%s: RTC write failed with error %d\n",
__func__, retval);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 991b77986d57..0fa60f5f5a16 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -936,8 +936,6 @@ void __init setup_arch(char **cmdline_p)
x86_init.oem.arch_setup();
- kernel_randomize_memory();
-
iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
setup_memory_map();
parse_setup_data();
@@ -1055,6 +1053,12 @@ void __init setup_arch(char **cmdline_p)
max_possible_pfn = max_pfn;
+ /*
+ * Define random base addresses for memory sections after max_pfn is
+ * defined and before each memory section base is used.
+ */
+ kernel_randomize_memory();
+
#ifdef CONFIG_X86_32
/* max_low_pfn get updated here */
find_low_pfn_range();
@@ -1097,6 +1101,8 @@ void __init setup_arch(char **cmdline_p)
efi_find_mirror();
}
+ reserve_bios_regions();
+
/*
* The EFI specification says that boot service code won't be called
* after ExitBootServices(). This is, in fact, a lie.
@@ -1125,7 +1131,15 @@ void __init setup_arch(char **cmdline_p)
early_trap_pf_init();
- setup_real_mode();
+ /*
+ * Update mmu_cr4_features (and, indirectly, trampoline_cr4_features)
+ * with the current CR4 value. This may not be necessary, but
+ * auditing all the early-boot CR4 manipulation would be needed to
+ * rule it out.
+ */
+ if (boot_cpu_data.cpuid_level >= 0)
+ /* A CPU has %cr4 if and only if it has CPUID. */
+ mmu_cr4_features = __read_cr4();
memblock_set_current_limit(get_max_mapped());
@@ -1174,13 +1188,6 @@ void __init setup_arch(char **cmdline_p)
kasan_init();
- if (boot_cpu_data.cpuid_level >= 0) {
- /* A CPU has %cr4 if and only if it has CPUID */
- mmu_cr4_features = __read_cr4();
- if (trampoline_cr4_features)
- *trampoline_cr4_features = mmu_cr4_features;
- }
-
#ifdef CONFIG_X86_32
/* sync back kernel address range */
clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 22cc2f9f8aec..04cb3212db2d 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -146,7 +146,7 @@ static int restore_sigcontext(struct pt_regs *regs,
buf = (void __user *)buf_val;
} get_user_catch(err);
- err |= fpu__restore_sig(buf, config_enabled(CONFIG_X86_32));
+ err |= fpu__restore_sig(buf, IS_ENABLED(CONFIG_X86_32));
force_iret();
@@ -245,14 +245,14 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
struct fpu *fpu = &current->thread.fpu;
/* redzone */
- if (config_enabled(CONFIG_X86_64))
+ if (IS_ENABLED(CONFIG_X86_64))
sp -= 128;
/* This is the X/Open sanctioned signal stack switching. */
if (ka->sa.sa_flags & SA_ONSTACK) {
if (sas_ss_flags(sp) == 0)
sp = current->sas_ss_sp + current->sas_ss_size;
- } else if (config_enabled(CONFIG_X86_32) &&
+ } else if (IS_ENABLED(CONFIG_X86_32) &&
!onsigstack &&
(regs->ss & 0xffff) != __USER_DS &&
!(ka->sa.sa_flags & SA_RESTORER) &&
@@ -262,7 +262,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
}
if (fpu->fpstate_active) {
- sp = fpu__alloc_mathframe(sp, config_enabled(CONFIG_X86_32),
+ sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32),
&buf_fx, &math_size);
*fpstate = (void __user *)sp;
}
@@ -662,18 +662,18 @@ badframe:
static inline int is_ia32_compat_frame(void)
{
- return config_enabled(CONFIG_IA32_EMULATION) &&
+ return IS_ENABLED(CONFIG_IA32_EMULATION) &&
test_thread_flag(TIF_IA32);
}
static inline int is_ia32_frame(void)
{
- return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame();
+ return IS_ENABLED(CONFIG_X86_32) || is_ia32_compat_frame();
}
static inline int is_x32_frame(void)
{
- return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32);
+ return IS_ENABLED(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32);
}
static int
@@ -760,8 +760,30 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
{
-#ifdef CONFIG_X86_64
- if (in_ia32_syscall())
+ /*
+ * This function is fundamentally broken as currently
+ * implemented.
+ *
+ * The idea is that we want to trigger a call to the
+ * restart_block() syscall and that we want in_ia32_syscall(),
+ * in_x32_syscall(), etc. to match whatever they were in the
+ * syscall being restarted. We assume that the syscall
+ * instruction at (regs->ip - 2) matches whatever syscall
+ * instruction we used to enter in the first place.
+ *
+ * The problem is that we can get here when ptrace pokes
+ * syscall-like values into regs even if we're not in a syscall
+ * at all.
+ *
+ * For now, we maintain historical behavior and guess based on
+ * stored state. We could do better by saving the actual
+ * syscall arch in restart_block or (with caveats on x32) by
+ * checking if regs->ip points to 'int $0x80'. The current
+ * behavior is incorrect if a tracer has a different bitness
+ * than the tracee.
+ */
+#ifdef CONFIG_IA32_EMULATION
+ if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
return __NR_ia32_restart_syscall;
#endif
#ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 1ef87e887051..78b9cb5a26af 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -22,6 +22,7 @@
#include <asm/nmi.h>
#include <asm/x86_init.h>
#include <asm/geode.h>
+#include <asm/apic.h>
unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
EXPORT_SYMBOL(cpu_khz);
@@ -1249,6 +1250,9 @@ static void tsc_refine_calibration_work(struct work_struct *work)
(unsigned long)tsc_khz / 1000,
(unsigned long)tsc_khz % 1000);
+ /* Inform the TSC deadline clockevent devices about the recalibration */
+ lapic_update_tsc_freq();
+
out:
if (boot_cpu_has(X86_FEATURE_ART))
art_related_clocksource = &clocksource_tsc;
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 6c1ff31d99ff..495c776de4b4 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -357,20 +357,22 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
*cursor &= 0xfe;
}
/*
- * Similar treatment for VEX3 prefix.
- * TODO: add XOP/EVEX treatment when insn decoder supports them
+ * Similar treatment for VEX3/EVEX prefix.
+ * TODO: add XOP treatment when insn decoder supports them
*/
- if (insn->vex_prefix.nbytes == 3) {
+ if (insn->vex_prefix.nbytes >= 3) {
/*
* vex2: c5 rvvvvLpp (has no b bit)
* vex3/xop: c4/8f rxbmmmmm wvvvvLpp
* evex: 62 rxbR00mm wvvvv1pp zllBVaaa
- * (evex will need setting of both b and x since
- * in non-sib encoding evex.x is 4th bit of MODRM.rm)
- * Setting VEX3.b (setting because it has inverted meaning):
+ * Setting VEX3.b (setting because it has inverted meaning).
+ * Setting EVEX.x since (in non-SIB encoding) EVEX.x
+ * is the 4th bit of MODRM.rm, and needs the same treatment.
+ * For VEX3-encoded insns, VEX3.x value has no effect in
+ * non-SIB encoding, the change is superfluous but harmless.
*/
cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1;
- *cursor |= 0x20;
+ *cursor |= 0x60;
}
/*
@@ -415,12 +417,10 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
reg = MODRM_REG(insn); /* Fetch modrm.reg */
reg2 = 0xff; /* Fetch vex.vvvv */
- if (insn->vex_prefix.nbytes == 2)
- reg2 = insn->vex_prefix.bytes[1];
- else if (insn->vex_prefix.nbytes == 3)
+ if (insn->vex_prefix.nbytes)
reg2 = insn->vex_prefix.bytes[2];
/*
- * TODO: add XOP, EXEV vvvv reading.
+ * TODO: add XOP vvvv reading.
*
* vex.vvvv field is in bits 6-3, bits are inverted.
* But in 32-bit mode, high-order bit may be ignored.
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 61ebdc13a29a..035731eb3897 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -120,4 +120,7 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
int apic_has_pending_timer(struct kvm_vcpu *vcpu);
+int kvm_setup_default_irq_routing(struct kvm *kvm);
+int kvm_setup_empty_irq_routing(struct kvm *kvm);
+
#endif
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 730cf174090a..b62c85229711 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1349,6 +1349,9 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
{
+ if (!lapic_in_kernel(vcpu))
+ return false;
+
return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
}
EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bc354f003ce1..a45d8580f91e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2809,12 +2809,8 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
vmx->nested.nested_vmx_ept_caps |=
VMX_EPT_EXECUTE_ONLY_BIT;
vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept;
- /*
- * For nested guests, we don't do anything specific
- * for single context invalidation. Hence, only advertise
- * support for global context invalidation.
- */
- vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT;
+ vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
+ VMX_EPT_EXTENT_CONTEXT_BIT;
} else
vmx->nested.nested_vmx_ept_caps = 0;
@@ -2945,7 +2941,6 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
vmx->nested.nested_vmx_secondary_ctls_high);
break;
case MSR_IA32_VMX_EPT_VPID_CAP:
- /* Currently, no nested vpid support */
*pdata = vmx->nested.nested_vmx_ept_caps |
((u64)vmx->nested.nested_vmx_vpid_caps << 32);
break;
@@ -7609,12 +7604,16 @@ static int handle_invept(struct kvm_vcpu *vcpu)
switch (type) {
case VMX_EPT_EXTENT_GLOBAL:
+ /*
+ * TODO: track mappings and invalidate
+ * single context requests appropriately
+ */
+ case VMX_EPT_EXTENT_CONTEXT:
kvm_mmu_sync_roots(vcpu);
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
nested_vmx_succeed(vcpu);
break;
default:
- /* Trap single context invalidation invept calls */
BUG_ON(1);
break;
}
diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S
index 02de3d74d2c5..8a602a1e404a 100644
--- a/arch/x86/lib/hweight.S
+++ b/arch/x86/lib/hweight.S
@@ -35,6 +35,7 @@ ENDPROC(__sw_hweight32)
ENTRY(__sw_hweight64)
#ifdef CONFIG_X86_64
+ pushq %rdi
pushq %rdx
movq %rdi, %rdx # w -> t
@@ -60,6 +61,7 @@ ENTRY(__sw_hweight64)
shrq $56, %rax # w = w_tmp >> 56
popq %rdx
+ popq %rdi
ret
#else /* CONFIG_X86_32 */
/* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */
diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c
index f7dfeda83e5c..121f59c6ee54 100644
--- a/arch/x86/lib/kaslr.c
+++ b/arch/x86/lib/kaslr.c
@@ -19,7 +19,7 @@
#include <asm/cpufeature.h>
#include <asm/setup.h>
-#define debug_putstr(v) early_printk(v)
+#define debug_putstr(v) early_printk("%s", v)
#define has_cpuflag(f) boot_cpu_has(f)
#define get_boot_seed() kaslr_offset()
#endif
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index ec21796ac5fd..4473cb4f8b90 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -3,15 +3,17 @@
* included by both the compressed kernel and the regular kernel.
*/
-static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
+static void ident_pmd_init(struct x86_mapping_info *info, pmd_t *pmd_page,
unsigned long addr, unsigned long end)
{
addr &= PMD_MASK;
for (; addr < end; addr += PMD_SIZE) {
pmd_t *pmd = pmd_page + pmd_index(addr);
- if (!pmd_present(*pmd))
- set_pmd(pmd, __pmd(addr | pmd_flag));
+ if (pmd_present(*pmd))
+ continue;
+
+ set_pmd(pmd, __pmd((addr - info->offset) | info->pmd_flag));
}
}
@@ -30,13 +32,13 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
if (pud_present(*pud)) {
pmd = pmd_offset(pud, 0);
- ident_pmd_init(info->pmd_flag, pmd, addr, next);
+ ident_pmd_init(info, pmd, addr, next);
continue;
}
pmd = (pmd_t *)info->alloc_pgt_page(info->context);
if (!pmd)
return -ENOMEM;
- ident_pmd_init(info->pmd_flag, pmd, addr, next);
+ ident_pmd_init(info, pmd, addr, next);
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
}
@@ -44,14 +46,15 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
}
int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
- unsigned long addr, unsigned long end)
+ unsigned long pstart, unsigned long pend)
{
+ unsigned long addr = pstart + info->offset;
+ unsigned long end = pend + info->offset;
unsigned long next;
int result;
- int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
for (; addr < end; addr = next) {
- pgd_t *pgd = pgd_page + pgd_index(addr) + off;
+ pgd_t *pgd = pgd_page + pgd_index(addr);
pud_t *pud;
next = (addr & PGDIR_MASK) + PGDIR_SIZE;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 620928903be3..d28a2d741f9e 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -122,8 +122,18 @@ __ref void *alloc_low_pages(unsigned int num)
return __va(pfn << PAGE_SHIFT);
}
-/* need 3 4k for initial PMD_SIZE, 3 4k for 0-ISA_END_ADDRESS */
-#define INIT_PGT_BUF_SIZE (6 * PAGE_SIZE)
+/*
+ * By default need 3 4k for initial PMD_SIZE, 3 4k for 0-ISA_END_ADDRESS.
+ * With KASLR memory randomization, depending on the machine e820 memory
+ * and the PUD alignment. We may need twice more pages when KASLR memory
+ * randomization is enabled.
+ */
+#ifndef CONFIG_RANDOMIZE_MEMORY
+#define INIT_PGD_PAGE_COUNT 6
+#else
+#define INIT_PGD_PAGE_COUNT 12
+#endif
+#define INIT_PGT_BUF_SIZE (INIT_PGD_PAGE_COUNT * PAGE_SIZE)
RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE);
void __init early_alloc_pgt_buf(void)
{
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 26dccd6c0df1..ec8654f117d8 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -97,7 +97,7 @@ void __init kernel_randomize_memory(void)
* add padding if needed (especially for memory hotplug support).
*/
BUG_ON(kaslr_regions[0].base != &page_offset_base);
- memory_tb = ((max_pfn << PAGE_SHIFT) >> TB_SHIFT) +
+ memory_tb = DIV_ROUND_UP(max_pfn << PAGE_SHIFT, 1UL << TB_SHIFT) +
CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING;
/* Adapt phyiscal memory region size based on available memory */
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 5ceda85b8687..052c1cb76305 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -169,7 +169,7 @@ static void *sta2x11_swiotlb_alloc_coherent(struct device *dev,
size_t size,
dma_addr_t *dma_handle,
gfp_t flags,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
void *vaddr;
diff --git a/arch/x86/pci/vmd.c b/arch/x86/pci/vmd.c
index e88b4176260f..b814ca675131 100644
--- a/arch/x86/pci/vmd.c
+++ b/arch/x86/pci/vmd.c
@@ -274,14 +274,14 @@ static struct dma_map_ops *vmd_dma_ops(struct device *dev)
}
static void *vmd_alloc(struct device *dev, size_t size, dma_addr_t *addr,
- gfp_t flag, struct dma_attrs *attrs)
+ gfp_t flag, unsigned long attrs)
{
return vmd_dma_ops(dev)->alloc(to_vmd_dev(dev), size, addr, flag,
attrs);
}
static void vmd_free(struct device *dev, size_t size, void *vaddr,
- dma_addr_t addr, struct dma_attrs *attrs)
+ dma_addr_t addr, unsigned long attrs)
{
return vmd_dma_ops(dev)->free(to_vmd_dev(dev), size, vaddr, addr,
attrs);
@@ -289,7 +289,7 @@ static void vmd_free(struct device *dev, size_t size, void *vaddr,
static int vmd_mmap(struct device *dev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t addr, size_t size,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
return vmd_dma_ops(dev)->mmap(to_vmd_dev(dev), vma, cpu_addr, addr,
size, attrs);
@@ -297,7 +297,7 @@ static int vmd_mmap(struct device *dev, struct vm_area_struct *vma,
static int vmd_get_sgtable(struct device *dev, struct sg_table *sgt,
void *cpu_addr, dma_addr_t addr, size_t size,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
return vmd_dma_ops(dev)->get_sgtable(to_vmd_dev(dev), sgt, cpu_addr,
addr, size, attrs);
@@ -306,26 +306,26 @@ static int vmd_get_sgtable(struct device *dev, struct sg_table *sgt,
static dma_addr_t vmd_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
return vmd_dma_ops(dev)->map_page(to_vmd_dev(dev), page, offset, size,
dir, attrs);
}
static void vmd_unmap_page(struct device *dev, dma_addr_t addr, size_t size,
- enum dma_data_direction dir, struct dma_attrs *attrs)
+ enum dma_data_direction dir, unsigned long attrs)
{
vmd_dma_ops(dev)->unmap_page(to_vmd_dev(dev), addr, size, dir, attrs);
}
static int vmd_map_sg(struct device *dev, struct scatterlist *sg, int nents,
- enum dma_data_direction dir, struct dma_attrs *attrs)
+ enum dma_data_direction dir, unsigned long attrs)
{
return vmd_dma_ops(dev)->map_sg(to_vmd_dev(dev), sg, nents, dir, attrs);
}
static void vmd_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
- enum dma_data_direction dir, struct dma_attrs *attrs)
+ enum dma_data_direction dir, unsigned long attrs)
{
vmd_dma_ops(dev)->unmap_sg(to_vmd_dev(dev), sg, nents, dir, attrs);
}
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 17c8bbd4e2f0..1fbb408e2e72 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -51,7 +51,6 @@
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#include <asm/x86_init.h>
-#include <asm/rtc.h>
#include <asm/uv/uv.h>
static struct efi efi_phys __initdata;
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 04db6fbce96d..677e29e29473 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -25,6 +25,7 @@
#include <linux/bootmem.h>
#include <linux/ioport.h>
#include <linux/init.h>
+#include <linux/mc146818rtc.h>
#include <linux/efi.h>
#include <linux/uaccess.h>
#include <linux/io.h>
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 4480c06cade7..89d1146f5a6f 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -254,6 +254,7 @@ void __init efi_free_boot_services(void)
for_each_efi_memory_desc(md) {
unsigned long long start = md->phys_addr;
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
+ size_t rm_size;
if (md->type != EFI_BOOT_SERVICES_CODE &&
md->type != EFI_BOOT_SERVICES_DATA)
@@ -263,6 +264,26 @@ void __init efi_free_boot_services(void)
if (md->attribute & EFI_MEMORY_RUNTIME)
continue;
+ /*
+ * Nasty quirk: if all sub-1MB memory is used for boot
+ * services, we can get here without having allocated the
+ * real mode trampoline. It's too late to hand boot services
+ * memory back to the memblock allocator, so instead
+ * try to manually allocate the trampoline if needed.
+ *
+ * I've seen this on a Dell XPS 13 9350 with firmware
+ * 1.4.4 with SGX enabled booting Linux via Fedora 24's
+ * grub2-efi on a hard disk. (And no, I don't know why
+ * this happened, but Linux should still try to boot rather
+ * panicing early.)
+ */
+ rm_size = real_mode_size_needed();
+ if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) {
+ set_real_mode_mem(start, rm_size);
+ start += rm_size;
+ size -= rm_size;
+ }
+
free_bootmem_late(start, size);
}
diff --git a/arch/x86/platform/intel-mid/intel_mid_vrtc.c b/arch/x86/platform/intel-mid/intel_mid_vrtc.c
index ee40fcb6e54d..58024862a7eb 100644
--- a/arch/x86/platform/intel-mid/intel_mid_vrtc.c
+++ b/arch/x86/platform/intel-mid/intel_mid_vrtc.c
@@ -22,6 +22,7 @@
#include <linux/init.h>
#include <linux/sfi.h>
#include <linux/platform_device.h>
+#include <linux/mc146818rtc.h>
#include <asm/intel-mid.h>
#include <asm/intel_mid_vrtc.h>
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index 66b2166ea4a1..23f2f3e41c7f 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -187,7 +187,8 @@ EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target);
void uv_bios_init(void)
{
uv_systab = NULL;
- if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab) {
+ if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
+ !efi.uv_systab || efi_runtime_disabled()) {
pr_crit("UV: UVsystab: missing\n");
return;
}
@@ -199,12 +200,14 @@ void uv_bios_init(void)
return;
}
+ /* Starting with UV4 the UV systab size is variable */
if (uv_systab->revision >= UV_SYSTAB_VERSION_UV4) {
+ int size = uv_systab->size;
+
iounmap(uv_systab);
- uv_systab = ioremap(efi.uv_systab, uv_systab->size);
+ uv_systab = ioremap(efi.uv_systab, size);
if (!uv_systab) {
- pr_err("UV: UVsystab: ioremap(%d) failed!\n",
- uv_systab->size);
+ pr_err("UV: UVsystab: ioremap(%d) failed!\n", size);
return;
}
}
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index f2b5e6a5cf95..a3e3ccc87138 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -37,11 +37,11 @@ unsigned long jump_address_phys;
*/
unsigned long restore_cr3 __visible;
-pgd_t *temp_level4_pgt __visible;
+unsigned long temp_level4_pgt __visible;
unsigned long relocated_restore_code __visible;
-static int set_up_temporary_text_mapping(void)
+static int set_up_temporary_text_mapping(pgd_t *pgd)
{
pmd_t *pmd;
pud_t *pud;
@@ -71,7 +71,7 @@ static int set_up_temporary_text_mapping(void)
__pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC));
set_pud(pud + pud_index(restore_jump_address),
__pud(__pa(pmd) | _KERNPG_TABLE));
- set_pgd(temp_level4_pgt + pgd_index(restore_jump_address),
+ set_pgd(pgd + pgd_index(restore_jump_address),
__pgd(__pa(pud) | _KERNPG_TABLE));
return 0;
@@ -87,18 +87,19 @@ static int set_up_temporary_mappings(void)
struct x86_mapping_info info = {
.alloc_pgt_page = alloc_pgt_page,
.pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
- .kernel_mapping = true,
+ .offset = __PAGE_OFFSET,
};
unsigned long mstart, mend;
+ pgd_t *pgd;
int result;
int i;
- temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
- if (!temp_level4_pgt)
+ pgd = (pgd_t *)get_safe_page(GFP_ATOMIC);
+ if (!pgd)
return -ENOMEM;
/* Prepare a temporary mapping for the kernel text */
- result = set_up_temporary_text_mapping();
+ result = set_up_temporary_text_mapping(pgd);
if (result)
return result;
@@ -107,13 +108,12 @@ static int set_up_temporary_mappings(void)
mstart = pfn_mapped[i].start << PAGE_SHIFT;
mend = pfn_mapped[i].end << PAGE_SHIFT;
- result = kernel_ident_mapping_init(&info, temp_level4_pgt,
- mstart, mend);
-
+ result = kernel_ident_mapping_init(&info, pgd, mstart, mend);
if (result)
return result;
}
+ temp_level4_pgt = (unsigned long)pgd - __PAGE_OFFSET;
return 0;
}
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index 8eee0e9c93f0..ce8da3a0412c 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -72,8 +72,6 @@ ENTRY(restore_image)
/* code below has been relocated to a safe page */
ENTRY(core_restore_code)
/* switch to temporary page tables */
- movq $__PAGE_OFFSET, %rcx
- subq %rcx, %rax
movq %rax, %cr3
/* flush TLB */
movq %rbx, %rcx
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 705e3fffb4a1..5db706f14111 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -1,9 +1,11 @@
#include <linux/io.h>
+#include <linux/slab.h>
#include <linux/memblock.h>
#include <asm/cacheflush.h>
#include <asm/pgtable.h>
#include <asm/realmode.h>
+#include <asm/tlbflush.h>
struct real_mode_header *real_mode_header;
u32 *trampoline_cr4_features;
@@ -11,25 +13,37 @@ u32 *trampoline_cr4_features;
/* Hold the pgd entry used on booting additional CPUs */
pgd_t trampoline_pgd_entry;
+void __init set_real_mode_mem(phys_addr_t mem, size_t size)
+{
+ void *base = __va(mem);
+
+ real_mode_header = (struct real_mode_header *) base;
+ printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
+ base, (unsigned long long)mem, size);
+}
+
void __init reserve_real_mode(void)
{
phys_addr_t mem;
- unsigned char *base;
- size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
+ size_t size = real_mode_size_needed();
+
+ if (!size)
+ return;
+
+ WARN_ON(slab_is_available());
/* Has to be under 1M so we can execute real-mode AP code. */
mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
- if (!mem)
- panic("Cannot allocate trampoline\n");
+ if (!mem) {
+ pr_info("No sub-1M memory is available for the trampoline\n");
+ return;
+ }
- base = __va(mem);
memblock_reserve(mem, size);
- real_mode_header = (struct real_mode_header *) base;
- printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
- base, (unsigned long long)mem, size);
+ set_real_mode_mem(mem, size);
}
-void __init setup_real_mode(void)
+static void __init setup_real_mode(void)
{
u16 real_mode_seg;
const u32 *rel;
@@ -84,7 +98,7 @@ void __init setup_real_mode(void)
trampoline_header->start = (u64) secondary_startup_64;
trampoline_cr4_features = &trampoline_header->cr4;
- *trampoline_cr4_features = __read_cr4();
+ *trampoline_cr4_features = mmu_cr4_features;
trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
trampoline_pgd[0] = trampoline_pgd_entry.pgd;
@@ -100,7 +114,7 @@ void __init setup_real_mode(void)
* need to mark it executable at do_pre_smp_initcalls() at least,
* thus run it as a early_initcall().
*/
-static int __init set_real_mode_permissions(void)
+static void __init set_real_mode_permissions(void)
{
unsigned char *base = (unsigned char *) real_mode_header;
size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
@@ -119,7 +133,16 @@ static int __init set_real_mode_permissions(void)
set_memory_nx((unsigned long) base, size >> PAGE_SHIFT);
set_memory_ro((unsigned long) base, ro_size >> PAGE_SHIFT);
set_memory_x((unsigned long) text_start, text_size >> PAGE_SHIFT);
+}
+
+static int __init init_real_mode(void)
+{
+ if (!real_mode_header)
+ panic("Real mode trampoline was not allocated");
+
+ setup_real_mode();
+ set_real_mode_permissions();
return 0;
}
-early_initcall(set_real_mode_permissions);
+early_initcall(init_real_mode);
diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile
index 6c803ca49b5d..d72dec406ccb 100644
--- a/arch/x86/um/vdso/Makefile
+++ b/arch/x86/um/vdso/Makefile
@@ -2,6 +2,9 @@
# Building vDSO images for x86.
#
+# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
+KCOV_INSTRUMENT := n
+
VDSO64-y := y
vdso-install-$(VDSO64-y) += vdso.so