aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/include
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include')
-rw-r--r--arch/x86/include/asm/acpi.h23
-rw-r--r--arch/x86/include/asm/amd_nb.h2
-rw-r--r--arch/x86/include/asm/apic.h112
-rw-r--r--arch/x86/include/asm/arch_hweight.h13
-rw-r--r--arch/x86/include/asm/atomic.h29
-rw-r--r--arch/x86/include/asm/atomic64_32.h14
-rw-r--r--arch/x86/include/asm/atomic64_64.h19
-rw-r--r--arch/x86/include/asm/barrier.h19
-rw-r--r--arch/x86/include/asm/cacheflush.h73
-rw-r--r--arch/x86/include/asm/context_tracking.h10
-rw-r--r--arch/x86/include/asm/cpufeature.h10
-rw-r--r--arch/x86/include/asm/delay.h1
-rw-r--r--arch/x86/include/asm/dma-mapping.h34
-rw-r--r--arch/x86/include/asm/dwarf2.h84
-rw-r--r--arch/x86/include/asm/efi.h13
-rw-r--r--arch/x86/include/asm/elf.h27
-rw-r--r--arch/x86/include/asm/fpu/signal.h2
-rw-r--r--arch/x86/include/asm/fpu/types.h148
-rw-r--r--arch/x86/include/asm/fpu/xstate.h15
-rw-r--r--arch/x86/include/asm/ftrace.h4
-rw-r--r--arch/x86/include/asm/hpet.h6
-rw-r--r--arch/x86/include/asm/hw_irq.h6
-rw-r--r--arch/x86/include/asm/ia32.h13
-rw-r--r--arch/x86/include/asm/io.h8
-rw-r--r--arch/x86/include/asm/iosf_mbi.h8
-rw-r--r--arch/x86/include/asm/irq.h4
-rw-r--r--arch/x86/include/asm/irq_remapping.h10
-rw-r--r--arch/x86/include/asm/irq_vectors.h10
-rw-r--r--arch/x86/include/asm/jump_label.h23
-rw-r--r--arch/x86/include/asm/kasan.h3
-rw-r--r--arch/x86/include/asm/kdebug.h6
-rw-r--r--arch/x86/include/asm/kvm_emulate.h10
-rw-r--r--arch/x86/include/asm/kvm_host.h89
-rw-r--r--arch/x86/include/asm/math_emu.h6
-rw-r--r--arch/x86/include/asm/mce.h42
-rw-r--r--arch/x86/include/asm/microcode.h26
-rw-r--r--arch/x86/include/asm/microcode_amd.h3
-rw-r--r--arch/x86/include/asm/microcode_intel.h10
-rw-r--r--arch/x86/include/asm/mmu.h2
-rw-r--r--arch/x86/include/asm/mmu_context.h28
-rw-r--r--arch/x86/include/asm/mshyperv.h5
-rw-r--r--arch/x86/include/asm/msr-index.h30
-rw-r--r--arch/x86/include/asm/msr.h70
-rw-r--r--arch/x86/include/asm/mwait.h45
-rw-r--r--arch/x86/include/asm/numachip/numachip.h1
-rw-r--r--arch/x86/include/asm/numachip/numachip_csr.h153
-rw-r--r--arch/x86/include/asm/page_64_types.h3
-rw-r--r--arch/x86/include/asm/page_types.h3
-rw-r--r--arch/x86/include/asm/paravirt.h40
-rw-r--r--arch/x86/include/asm/paravirt_types.h3
-rw-r--r--arch/x86/include/asm/pci_x86.h2
-rw-r--r--arch/x86/include/asm/perf_event.h7
-rw-r--r--arch/x86/include/asm/pgtable.h35
-rw-r--r--arch/x86/include/asm/pgtable_types.h40
-rw-r--r--arch/x86/include/asm/pmc_atom.h29
-rw-r--r--arch/x86/include/asm/pmem.h153
-rw-r--r--arch/x86/include/asm/preempt.h9
-rw-r--r--arch/x86/include/asm/processor.h27
-rw-r--r--arch/x86/include/asm/ptrace.h1
-rw-r--r--arch/x86/include/asm/pvclock-abi.h1
-rw-r--r--arch/x86/include/asm/pvclock.h10
-rw-r--r--arch/x86/include/asm/qrwlock.h10
-rw-r--r--arch/x86/include/asm/qspinlock.h19
-rw-r--r--arch/x86/include/asm/sigcontext.h75
-rw-r--r--arch/x86/include/asm/sigframe.h18
-rw-r--r--arch/x86/include/asm/signal.h4
-rw-r--r--arch/x86/include/asm/stackprotector.h2
-rw-r--r--arch/x86/include/asm/string_64.h5
-rw-r--r--arch/x86/include/asm/syscall.h14
-rw-r--r--arch/x86/include/asm/syscalls.h1
-rw-r--r--arch/x86/include/asm/thread_info.h30
-rw-r--r--arch/x86/include/asm/tlbflush.h6
-rw-r--r--arch/x86/include/asm/trace/mpx.h7
-rw-r--r--arch/x86/include/asm/traps.h4
-rw-r--r--arch/x86/include/asm/tsc.h19
-rw-r--r--arch/x86/include/asm/uaccess.h14
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h2
-rw-r--r--arch/x86/include/asm/vdso.h10
-rw-r--r--arch/x86/include/asm/vm86.h57
-rw-r--r--arch/x86/include/asm/vmx.h50
-rw-r--r--arch/x86/include/asm/xen/events.h11
-rw-r--r--arch/x86/include/asm/xen/hypercall.h10
-rw-r--r--arch/x86/include/asm/xen/hypervisor.h5
-rw-r--r--arch/x86/include/asm/xen/interface.h219
-rw-r--r--arch/x86/include/asm/xen/page.h53
-rw-r--r--arch/x86/include/uapi/asm/bitsperlong.h2
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h2
-rw-r--r--arch/x86/include/uapi/asm/e820.h2
-rw-r--r--arch/x86/include/uapi/asm/hyperv.h20
-rw-r--r--arch/x86/include/uapi/asm/mce.h5
-rw-r--r--arch/x86/include/uapi/asm/processor-flags.h2
-rw-r--r--arch/x86/include/uapi/asm/sigcontext.h486
-rw-r--r--arch/x86/include/uapi/asm/sigcontext32.h73
-rw-r--r--arch/x86/include/uapi/asm/vmx.h6
94 files changed, 1772 insertions, 1103 deletions
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 3a45668f6dc3..94c18ebfd68c 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -32,6 +32,10 @@
#include <asm/mpspec.h>
#include <asm/realmode.h>
+#ifdef CONFIG_ACPI_APEI
+# include <asm/pgtable_types.h>
+#endif
+
#ifdef CONFIG_ACPI
extern int acpi_lapic;
extern int acpi_ioapic;
@@ -147,4 +151,23 @@ extern int x86_acpi_numa_init(void);
#define acpi_unlazy_tlb(x) leave_mm(x)
+#ifdef CONFIG_ACPI_APEI
+static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)
+{
+ /*
+ * We currently have no way to look up the EFI memory map
+ * attributes for a region in a consistent way, because the
+ * memmap is discarded after efi_free_boot_services(). So if
+ * you call efi_mem_attributes() during boot and at runtime,
+ * you could theoretically see different attributes.
+ *
+ * Since we are yet to see any x86 platforms that require
+ * anything other than PAGE_KERNEL (some arm64 platforms
+ * require the equivalent of PAGE_KERNEL_NOCACHE), return that
+ * until we know differently.
+ */
+ return PAGE_KERNEL;
+}
+#endif
+
#endif /* _ASM_X86_ACPI_H */
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 1a5da2e63aee..3c56ef1ae068 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -81,7 +81,7 @@ static inline struct amd_northbridge *node_to_amd_nb(int node)
return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL;
}
-static inline u16 amd_get_node_id(struct pci_dev *pdev)
+static inline u16 amd_pci_dev_to_node_id(struct pci_dev *pdev)
{
struct pci_dev *misc;
int i;
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index c8393634ca0c..a30316bf801a 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -115,6 +115,59 @@ static inline bool apic_is_x2apic_enabled(void)
return msr & X2APIC_ENABLE;
}
+extern void enable_IR_x2apic(void);
+
+extern int get_physical_broadcast(void);
+
+extern int lapic_get_maxlvt(void);
+extern void clear_local_APIC(void);
+extern void disconnect_bsp_APIC(int virt_wire_setup);
+extern void disable_local_APIC(void);
+extern void lapic_shutdown(void);
+extern void sync_Arb_IDs(void);
+extern void init_bsp_APIC(void);
+extern void setup_local_APIC(void);
+extern void init_apic_mappings(void);
+void register_lapic_address(unsigned long address);
+extern void setup_boot_APIC_clock(void);
+extern void setup_secondary_APIC_clock(void);
+extern int APIC_init_uniprocessor(void);
+
+#ifdef CONFIG_X86_64
+static inline int apic_force_enable(unsigned long addr)
+{
+ return -1;
+}
+#else
+extern int apic_force_enable(unsigned long addr);
+#endif
+
+extern int apic_bsp_setup(bool upmode);
+extern void apic_ap_setup(void);
+
+/*
+ * On 32bit this is mach-xxx local
+ */
+#ifdef CONFIG_X86_64
+extern int apic_is_clustered_box(void);
+#else
+static inline int apic_is_clustered_box(void)
+{
+ return 0;
+}
+#endif
+
+extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
+
+#else /* !CONFIG_X86_LOCAL_APIC */
+static inline void lapic_shutdown(void) { }
+#define local_apic_timer_c2_ok 1
+static inline void init_apic_mappings(void) { }
+static inline void disable_local_APIC(void) { }
+# define setup_boot_APIC_clock x86_init_noop
+# define setup_secondary_APIC_clock x86_init_noop
+#endif /* !CONFIG_X86_LOCAL_APIC */
+
#ifdef CONFIG_X86_X2APIC
/*
* Make previous memory operations globally visible before
@@ -186,67 +239,14 @@ static inline int x2apic_enabled(void)
}
#define x2apic_supported() (cpu_has_x2apic)
-#else
+#else /* !CONFIG_X86_X2APIC */
static inline void check_x2apic(void) { }
static inline void x2apic_setup(void) { }
static inline int x2apic_enabled(void) { return 0; }
#define x2apic_mode (0)
#define x2apic_supported() (0)
-#endif
-
-extern void enable_IR_x2apic(void);
-
-extern int get_physical_broadcast(void);
-
-extern int lapic_get_maxlvt(void);
-extern void clear_local_APIC(void);
-extern void disconnect_bsp_APIC(int virt_wire_setup);
-extern void disable_local_APIC(void);
-extern void lapic_shutdown(void);
-extern void sync_Arb_IDs(void);
-extern void init_bsp_APIC(void);
-extern void setup_local_APIC(void);
-extern void init_apic_mappings(void);
-void register_lapic_address(unsigned long address);
-extern void setup_boot_APIC_clock(void);
-extern void setup_secondary_APIC_clock(void);
-extern int APIC_init_uniprocessor(void);
-
-#ifdef CONFIG_X86_64
-static inline int apic_force_enable(unsigned long addr)
-{
- return -1;
-}
-#else
-extern int apic_force_enable(unsigned long addr);
-#endif
-
-extern int apic_bsp_setup(bool upmode);
-extern void apic_ap_setup(void);
-
-/*
- * On 32bit this is mach-xxx local
- */
-#ifdef CONFIG_X86_64
-extern int apic_is_clustered_box(void);
-#else
-static inline int apic_is_clustered_box(void)
-{
- return 0;
-}
-#endif
-
-extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
-
-#else /* !CONFIG_X86_LOCAL_APIC */
-static inline void lapic_shutdown(void) { }
-#define local_apic_timer_c2_ok 1
-static inline void init_apic_mappings(void) { }
-static inline void disable_local_APIC(void) { }
-# define setup_boot_APIC_clock x86_init_noop
-# define setup_secondary_APIC_clock x86_init_noop
-#endif /* !CONFIG_X86_LOCAL_APIC */
+#endif /* !CONFIG_X86_X2APIC */
#ifdef CONFIG_X86_64
#define SET_APIC_ID(x) (apic->set_apic_id(x))
@@ -313,7 +313,6 @@ struct apic {
/* wakeup_secondary_cpu */
int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip);
- bool wait_for_init_deassert;
void (*inquire_remote_apic)(int apicid);
/* apic ops */
@@ -378,7 +377,6 @@ extern struct apic *__apicdrivers[], *__apicdrivers_end[];
* APIC functionality to boot other CPUs - only used on SMP:
*/
#ifdef CONFIG_SMP
-extern atomic_t init_deasserted;
extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
#endif
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index 9686c3d9ff73..259a7c1ef709 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -21,7 +21,7 @@
* ARCH_HWEIGHT_CFLAGS in <arch/x86/Kconfig> for the respective
* compiler switches.
*/
-static inline unsigned int __arch_hweight32(unsigned int w)
+static __always_inline unsigned int __arch_hweight32(unsigned int w)
{
unsigned int res = 0;
@@ -42,20 +42,23 @@ static inline unsigned int __arch_hweight8(unsigned int w)
return __arch_hweight32(w & 0xff);
}
+#ifdef CONFIG_X86_32
static inline unsigned long __arch_hweight64(__u64 w)
{
- unsigned long res = 0;
-
-#ifdef CONFIG_X86_32
return __arch_hweight32((u32)w) +
__arch_hweight32((u32)(w >> 32));
+}
#else
+static __always_inline unsigned long __arch_hweight64(__u64 w)
+{
+ unsigned long res = 0;
+
asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT)
: "="REG_OUT (res)
: REG_IN (w));
-#endif /* CONFIG_X86_32 */
return res;
}
+#endif /* CONFIG_X86_32 */
#endif
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index e9168955c42f..ae5fb83e6d91 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -24,7 +24,7 @@
*/
static __always_inline int atomic_read(const atomic_t *v)
{
- return ACCESS_ONCE((v)->counter);
+ return READ_ONCE((v)->counter);
}
/**
@@ -36,7 +36,7 @@ static __always_inline int atomic_read(const atomic_t *v)
*/
static __always_inline void atomic_set(atomic_t *v, int i)
{
- v->counter = i;
+ WRITE_ONCE(v->counter, i);
}
/**
@@ -182,6 +182,21 @@ static inline int atomic_xchg(atomic_t *v, int new)
return xchg(&v->counter, new);
}
+#define ATOMIC_OP(op) \
+static inline void atomic_##op(int i, atomic_t *v) \
+{ \
+ asm volatile(LOCK_PREFIX #op"l %1,%0" \
+ : "+m" (v->counter) \
+ : "ir" (i) \
+ : "memory"); \
+}
+
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
+
+#undef ATOMIC_OP
+
/**
* __atomic_add_unless - add unless the number is already a given value
* @v: pointer of type atomic_t
@@ -219,16 +234,6 @@ static __always_inline short int atomic_inc_short(short int *v)
return *v;
}
-/* These are x86-specific, used by some header files */
-#define atomic_clear_mask(mask, addr) \
- asm volatile(LOCK_PREFIX "andl %0,%1" \
- : : "r" (~(mask)), "m" (*(addr)) : "memory")
-
-#define atomic_set_mask(mask, addr) \
- asm volatile(LOCK_PREFIX "orl %0,%1" \
- : : "r" ((unsigned)(mask)), "m" (*(addr)) \
- : "memory")
-
#ifdef CONFIG_X86_32
# include <asm/atomic64_32.h>
#else
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index b154de75c90c..a11c30b77fb5 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -313,4 +313,18 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
#undef alternative_atomic64
#undef __alternative_atomic64
+#define ATOMIC64_OP(op, c_op) \
+static inline void atomic64_##op(long long i, atomic64_t *v) \
+{ \
+ long long old, c = 0; \
+ while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c) \
+ c = old; \
+}
+
+ATOMIC64_OP(and, &)
+ATOMIC64_OP(or, |)
+ATOMIC64_OP(xor, ^)
+
+#undef ATOMIC64_OP
+
#endif /* _ASM_X86_ATOMIC64_32_H */
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index b965f9e03f2a..037351022f54 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -18,7 +18,7 @@
*/
static inline long atomic64_read(const atomic64_t *v)
{
- return ACCESS_ONCE((v)->counter);
+ return READ_ONCE((v)->counter);
}
/**
@@ -30,7 +30,7 @@ static inline long atomic64_read(const atomic64_t *v)
*/
static inline void atomic64_set(atomic64_t *v, long i)
{
- v->counter = i;
+ WRITE_ONCE(v->counter, i);
}
/**
@@ -220,4 +220,19 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
return dec;
}
+#define ATOMIC64_OP(op) \
+static inline void atomic64_##op(long i, atomic64_t *v) \
+{ \
+ asm volatile(LOCK_PREFIX #op"q %1,%0" \
+ : "+m" (v->counter) \
+ : "er" (i) \
+ : "memory"); \
+}
+
+ATOMIC64_OP(and)
+ATOMIC64_OP(or)
+ATOMIC64_OP(xor)
+
+#undef ATOMIC64_OP
+
#endif /* _ASM_X86_ATOMIC64_64_H */
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index e51a8f803f55..0681d2532527 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -57,12 +57,12 @@
do { \
compiletime_assert_atomic_type(*p); \
smp_mb(); \
- ACCESS_ONCE(*p) = (v); \
+ WRITE_ONCE(*p, v); \
} while (0)
#define smp_load_acquire(p) \
({ \
- typeof(*p) ___p1 = ACCESS_ONCE(*p); \
+ typeof(*p) ___p1 = READ_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
smp_mb(); \
___p1; \
@@ -74,12 +74,12 @@ do { \
do { \
compiletime_assert_atomic_type(*p); \
barrier(); \
- ACCESS_ONCE(*p) = (v); \
+ WRITE_ONCE(*p, v); \
} while (0)
#define smp_load_acquire(p) \
({ \
- typeof(*p) ___p1 = ACCESS_ONCE(*p); \
+ typeof(*p) ___p1 = READ_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
barrier(); \
___p1; \
@@ -91,15 +91,4 @@ do { \
#define smp_mb__before_atomic() barrier()
#define smp_mb__after_atomic() barrier()
-/*
- * Stop RDTSC speculation. This is needed when you need to use RDTSC
- * (or get_cycles or vread that possibly accesses the TSC) in a defined
- * code region.
- */
-static __always_inline void rdtsc_barrier(void)
-{
- alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
- "lfence", X86_FEATURE_LFENCE_RDTSC);
-}
-
#endif /* _ASM_X86_BARRIER_H */
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index 9bf3ea14b9f0..e63aa38e85fb 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -89,6 +89,8 @@ int set_pages_rw(struct page *page, int numpages);
void clflush_cache_range(void *addr, unsigned int size);
+#define mmio_flush_range(addr, size) clflush_cache_range(addr, size)
+
#ifdef CONFIG_DEBUG_RODATA
void mark_rodata_ro(void);
extern const int rodata_test_data;
@@ -109,75 +111,4 @@ static inline int rodata_test(void)
}
#endif
-#ifdef ARCH_HAS_NOCACHE_UACCESS
-
-/**
- * arch_memcpy_to_pmem - copy data to persistent memory
- * @dst: destination buffer for the copy
- * @src: source buffer for the copy
- * @n: length of the copy in bytes
- *
- * Copy data to persistent memory media via non-temporal stores so that
- * a subsequent arch_wmb_pmem() can flush cpu and memory controller
- * write buffers to guarantee durability.
- */
-static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
- size_t n)
-{
- int unwritten;
-
- /*
- * We are copying between two kernel buffers, if
- * __copy_from_user_inatomic_nocache() returns an error (page
- * fault) we would have already reported a general protection fault
- * before the WARN+BUG.
- */
- unwritten = __copy_from_user_inatomic_nocache((void __force *) dst,
- (void __user *) src, n);
- if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n",
- __func__, dst, src, unwritten))
- BUG();
-}
-
-/**
- * arch_wmb_pmem - synchronize writes to persistent memory
- *
- * After a series of arch_memcpy_to_pmem() operations this drains data
- * from cpu write buffers and any platform (memory controller) buffers
- * to ensure that written data is durable on persistent memory media.
- */
-static inline void arch_wmb_pmem(void)
-{
- /*
- * wmb() to 'sfence' all previous writes such that they are
- * architecturally visible to 'pcommit'. Note, that we've
- * already arranged for pmem writes to avoid the cache via
- * arch_memcpy_to_pmem().
- */
- wmb();
- pcommit_sfence();
-}
-
-static inline bool __arch_has_wmb_pmem(void)
-{
-#ifdef CONFIG_X86_64
- /*
- * We require that wmb() be an 'sfence', that is only guaranteed on
- * 64-bit builds
- */
- return static_cpu_has(X86_FEATURE_PCOMMIT);
-#else
- return false;
-#endif
-}
-#else /* ARCH_HAS_NOCACHE_UACCESS i.e. ARCH=um */
-extern void arch_memcpy_to_pmem(void __pmem *dst, const void *src, size_t n);
-extern void arch_wmb_pmem(void);
-
-static inline bool __arch_has_wmb_pmem(void)
-{
- return false;
-}
-#endif
-
#endif /* _ASM_X86_CACHEFLUSH_H */
diff --git a/arch/x86/include/asm/context_tracking.h b/arch/x86/include/asm/context_tracking.h
deleted file mode 100644
index 1fe49704b146..000000000000
--- a/arch/x86/include/asm/context_tracking.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _ASM_X86_CONTEXT_TRACKING_H
-#define _ASM_X86_CONTEXT_TRACKING_H
-
-#ifdef CONFIG_CONTEXT_TRACKING
-# define SCHEDULE_USER call schedule_user
-#else
-# define SCHEDULE_USER call schedule
-#endif
-
-#endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 3d6606fb97d0..e4f8010f22e0 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -12,7 +12,7 @@
#include <asm/disabled-features.h>
#endif
-#define NCAPINTS 13 /* N 32-bit words worth of info */
+#define NCAPINTS 14 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */
/*
@@ -119,6 +119,7 @@
#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
+#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */
#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
@@ -176,6 +177,7 @@
#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
+#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
/*
* Auxiliary flags: Linux defined - For features scattered in various
@@ -191,7 +193,7 @@
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_HWP ( 7*32+ 10) /* "hwp" Intel HWP */
-#define X86_FEATURE_HWP_NOITFY ( 7*32+ 11) /* Intel HWP_NOTIFY */
+#define X86_FEATURE_HWP_NOTIFY ( 7*32+ 11) /* Intel HWP_NOTIFY */
#define X86_FEATURE_HWP_ACT_WINDOW ( 7*32+ 12) /* Intel HWP_ACT_WINDOW */
#define X86_FEATURE_HWP_EPP ( 7*32+13) /* Intel HWP_EPP */
#define X86_FEATURE_HWP_PKG_REQ ( 7*32+14) /* Intel HWP_PKG_REQ */
@@ -239,6 +241,7 @@
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
+#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */
@@ -252,6 +255,9 @@
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
+/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
+#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
+
/*
* BUG word(s)
*/
diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h
index 9b3b4f2754c7..36a760bda462 100644
--- a/arch/x86/include/asm/delay.h
+++ b/arch/x86/include/asm/delay.h
@@ -4,5 +4,6 @@
#include <asm-generic/delay.h>
void use_tsc_delay(void);
+void use_mwaitx_delay(void);
#endif /* _ASM_X86_DELAY_H */
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 1f5b7287d1ad..953b7263f844 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -12,7 +12,6 @@
#include <linux/dma-attrs.h>
#include <asm/io.h>
#include <asm/swiotlb.h>
-#include <asm-generic/dma-coherent.h>
#include <linux/dma-contiguous.h>
#ifdef CONFIG_ISA
@@ -41,24 +40,13 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev)
#endif
}
-#include <asm-generic/dma-mapping-common.h>
-
-/* Make sure we keep the same behaviour */
-static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
- debug_dma_mapping_error(dev, dma_addr);
- if (ops->mapping_error)
- return ops->mapping_error(dev, dma_addr);
-
- return (dma_addr == DMA_ERROR_CODE);
-}
-
-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
+bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp);
+#define arch_dma_alloc_attrs arch_dma_alloc_attrs
+#define HAVE_ARCH_DMA_SUPPORTED 1
extern int dma_supported(struct device *hwdev, u64 mask);
-extern int dma_set_mask(struct device *dev, u64 mask);
+
+#include <asm-generic/dma-mapping-common.h>
extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addr, gfp_t flag,
@@ -125,16 +113,4 @@ static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
return gfp;
}
-#define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL)
-
-void *
-dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
- gfp_t gfp, struct dma_attrs *attrs);
-
-#define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL)
-
-void dma_free_attrs(struct device *dev, size_t size,
- void *vaddr, dma_addr_t bus,
- struct dma_attrs *attrs);
-
#endif
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
new file mode 100644
index 000000000000..b7a1ab865d68
--- /dev/null
+++ b/arch/x86/include/asm/dwarf2.h
@@ -0,0 +1,84 @@
+#ifndef _ASM_X86_DWARF2_H
+#define _ASM_X86_DWARF2_H
+
+#ifndef __ASSEMBLY__
+#warning "asm/dwarf2.h should be only included in pure assembly files"
+#endif
+
+/*
+ * Macros for dwarf2 CFI unwind table entries.
+ * See "as.info" for details on these pseudo ops. Unfortunately
+ * they are only supported in very new binutils, so define them
+ * away for older version.
+ */
+
+#ifdef CONFIG_AS_CFI
+
+#define CFI_STARTPROC .cfi_startproc
+#define CFI_ENDPROC .cfi_endproc
+#define CFI_DEF_CFA .cfi_def_cfa
+#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register
+#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
+#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
+#define CFI_OFFSET .cfi_offset
+#define CFI_REL_OFFSET .cfi_rel_offset
+#define CFI_REGISTER .cfi_register
+#define CFI_RESTORE .cfi_restore
+#define CFI_REMEMBER_STATE .cfi_remember_state
+#define CFI_RESTORE_STATE .cfi_restore_state
+#define CFI_UNDEFINED .cfi_undefined
+#define CFI_ESCAPE .cfi_escape
+
+#ifdef CONFIG_AS_CFI_SIGNAL_FRAME
+#define CFI_SIGNAL_FRAME .cfi_signal_frame
+#else
+#define CFI_SIGNAL_FRAME
+#endif
+
+#if defined(CONFIG_AS_CFI_SECTIONS) && defined(__ASSEMBLY__)
+#ifndef BUILD_VDSO
+ /*
+ * Emit CFI data in .debug_frame sections, not .eh_frame sections.
+ * The latter we currently just discard since we don't do DWARF
+ * unwinding at runtime. So only the offline DWARF information is
+ * useful to anyone. Note we should not use this directive if
+ * vmlinux.lds.S gets changed so it doesn't discard .eh_frame.
+ */
+ .cfi_sections .debug_frame
+#else
+ /*
+ * For the vDSO, emit both runtime unwind information and debug
+ * symbols for the .dbg file.
+ */
+ .cfi_sections .eh_frame, .debug_frame
+#endif
+#endif
+
+#else
+
+/*
+ * Due to the structure of pre-exisiting code, don't use assembler line
+ * comment character # to ignore the arguments. Instead, use a dummy macro.
+ */
+.macro cfi_ignore a=0, b=0, c=0, d=0
+.endm
+
+#define CFI_STARTPROC cfi_ignore
+#define CFI_ENDPROC cfi_ignore
+#define CFI_DEF_CFA cfi_ignore
+#define CFI_DEF_CFA_REGISTER cfi_ignore
+#define CFI_DEF_CFA_OFFSET cfi_ignore
+#define CFI_ADJUST_CFA_OFFSET cfi_ignore
+#define CFI_OFFSET cfi_ignore
+#define CFI_REL_OFFSET cfi_ignore
+#define CFI_REGISTER cfi_ignore
+#define CFI_RESTORE cfi_ignore
+#define CFI_REMEMBER_STATE cfi_ignore
+#define CFI_RESTORE_STATE cfi_ignore
+#define CFI_UNDEFINED cfi_ignore
+#define CFI_ESCAPE cfi_ignore
+#define CFI_SIGNAL_FRAME cfi_ignore
+
+#endif
+
+#endif /* _ASM_X86_DWARF2_H */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 155162ea0e00..0010c78c4998 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -86,6 +86,18 @@ extern u64 asmlinkage efi_call(void *fp, ...);
extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
u32 type, u64 attribute);
+#ifdef CONFIG_KASAN
+/*
+ * CONFIG_KASAN may redefine memset to __memset. __memset function is present
+ * only in kernel binary. Since the EFI stub linked into a separate binary it
+ * doesn't have __memset(). So we should use standard memset from
+ * arch/x86/boot/compressed/string.c. The same applies to memcpy and memmove.
+ */
+#undef memcpy
+#undef memset
+#undef memmove
+#endif
+
#endif /* CONFIG_X86_32 */
extern struct efi_scratch efi_scratch;
@@ -93,6 +105,7 @@ extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable);
extern int __init efi_memblock_x86_reserve_range(void);
extern pgd_t * __init efi_call_phys_prolog(void);
extern void __init efi_call_phys_epilog(pgd_t *save_pgd);
+extern void __init efi_print_memmap(void);
extern void __init efi_unmap_memmap(void);
extern void __init efi_memory_uc(u64 addr, unsigned long size);
extern void __init efi_map_region(efi_memory_desc_t *md);
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index f161c189c27b..1514753fd435 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -78,7 +78,7 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
#ifdef CONFIG_X86_64
extern unsigned int vdso64_enabled;
#endif
-#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
extern unsigned int vdso32_enabled;
#endif
@@ -171,11 +171,11 @@ do { \
static inline void elf_common_init(struct thread_struct *t,
struct pt_regs *regs, const u16 ds)
{
- /* Commented-out registers are cleared in stub_execve */
- /*regs->ax = regs->bx =*/ regs->cx = regs->dx = 0;
- regs->si = regs->di /*= regs->bp*/ = 0;
+ /* ax gets execve's return value. */
+ /*regs->ax = */ regs->bx = regs->cx = regs->dx = 0;
+ regs->si = regs->di = regs->bp = 0;
regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0;
- /*regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;*/
+ regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
t->fs = t->gs = 0;
t->fsindex = t->gsindex = 0;
t->ds = t->es = ds;
@@ -187,8 +187,8 @@ static inline void elf_common_init(struct thread_struct *t,
#define COMPAT_ELF_PLAT_INIT(regs, load_addr) \
elf_common_init(&current->thread, regs, __USER_DS)
-void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp);
-#define compat_start_thread start_thread_ia32
+void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp);
+#define compat_start_thread compat_start_thread
void set_personality_ia32(bool);
#define COMPAT_SET_PERSONALITY(ex) \
@@ -328,7 +328,7 @@ else \
#define VDSO_ENTRY \
((unsigned long)current->mm->context.vdso + \
- selected_vdso32->sym___kernel_vsyscall)
+ vdso_image_32.sym___kernel_vsyscall)
struct linux_binprm;
@@ -344,14 +344,9 @@ extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
*/
static inline int mmap_is_ia32(void)
{
-#ifdef CONFIG_X86_32
- return 1;
-#endif
-#ifdef CONFIG_IA32_EMULATION
- if (test_thread_flag(TIF_ADDR32))
- return 1;
-#endif
- return 0;
+ return config_enabled(CONFIG_X86_32) ||
+ (config_enabled(CONFIG_COMPAT) &&
+ test_thread_flag(TIF_ADDR32));
}
/* Do not change the values. See get_align_mask() */
diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h
index 7358e9d61f1e..0e970d00dfcd 100644
--- a/arch/x86/include/asm/fpu/signal.h
+++ b/arch/x86/include/asm/fpu/signal.h
@@ -5,7 +5,7 @@
#define _ASM_X86_FPU_SIGNAL_H
#ifdef CONFIG_X86_64
-# include <asm/sigcontext32.h>
+# include <uapi/asm/sigcontext.h>
# include <asm/user32.h>
struct ksignal;
int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index c49c5173158e..1c6f6ac52ad0 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -95,63 +95,122 @@ struct swregs_state {
/*
* List of XSAVE features Linux knows about:
*/
-enum xfeature_bit {
- XSTATE_BIT_FP,
- XSTATE_BIT_SSE,
- XSTATE_BIT_YMM,
- XSTATE_BIT_BNDREGS,
- XSTATE_BIT_BNDCSR,
- XSTATE_BIT_OPMASK,
- XSTATE_BIT_ZMM_Hi256,
- XSTATE_BIT_Hi16_ZMM,
-
- XFEATURES_NR_MAX,
+enum xfeature {
+ XFEATURE_FP,
+ XFEATURE_SSE,
+ /*
+ * Values above here are "legacy states".
+ * Those below are "extended states".
+ */
+ XFEATURE_YMM,
+ XFEATURE_BNDREGS,
+ XFEATURE_BNDCSR,
+ XFEATURE_OPMASK,
+ XFEATURE_ZMM_Hi256,
+ XFEATURE_Hi16_ZMM,
+
+ XFEATURE_MAX,
};
-#define XSTATE_FP (1 << XSTATE_BIT_FP)
-#define XSTATE_SSE (1 << XSTATE_BIT_SSE)
-#define XSTATE_YMM (1 << XSTATE_BIT_YMM)
-#define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS)
-#define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR)
-#define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK)
-#define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256)
-#define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM)
+#define XFEATURE_MASK_FP (1 << XFEATURE_FP)
+#define XFEATURE_MASK_SSE (1 << XFEATURE_SSE)
+#define XFEATURE_MASK_YMM (1 << XFEATURE_YMM)
+#define XFEATURE_MASK_BNDREGS (1 << XFEATURE_BNDREGS)
+#define XFEATURE_MASK_BNDCSR (1 << XFEATURE_BNDCSR)
+#define XFEATURE_MASK_OPMASK (1 << XFEATURE_OPMASK)
+#define XFEATURE_MASK_ZMM_Hi256 (1 << XFEATURE_ZMM_Hi256)
+#define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM)
+
+#define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
+#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \
+ | XFEATURE_MASK_ZMM_Hi256 \
+ | XFEATURE_MASK_Hi16_ZMM)
+
+#define FIRST_EXTENDED_XFEATURE XFEATURE_YMM
-#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE)
-#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
+struct reg_128_bit {
+ u8 regbytes[128/8];
+};
+struct reg_256_bit {
+ u8 regbytes[256/8];
+};
+struct reg_512_bit {
+ u8 regbytes[512/8];
+};
/*
+ * State component 2:
+ *
* There are 16x 256-bit AVX registers named YMM0-YMM15.
* The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15)
- * and are stored in 'struct fxregs_state::xmm_space[]'.
+ * and are stored in 'struct fxregs_state::xmm_space[]' in the
+ * "legacy" area.
*
- * The high 128 bits are stored here:
- * 16x 128 bits == 256 bytes.
+ * The high 128 bits are stored here.
*/
struct ymmh_struct {
- u8 ymmh_space[256];
-};
-
-/* We don't support LWP yet: */
-struct lwp_struct {
- u8 reserved[128];
-};
+ struct reg_128_bit hi_ymm[16];
+} __packed;
/* Intel MPX support: */
-struct bndreg {
+
+struct mpx_bndreg {
u64 lower_bound;
u64 upper_bound;
} __packed;
+/*
+ * State component 3 is used for the 4 128-bit bounds registers
+ */
+struct mpx_bndreg_state {
+ struct mpx_bndreg bndreg[4];
+} __packed;
-struct bndcsr {
+/*
+ * State component 4 is used for the 64-bit user-mode MPX
+ * configuration register BNDCFGU and the 64-bit MPX status
+ * register BNDSTATUS. We call the pair "BNDCSR".
+ */
+struct mpx_bndcsr {
u64 bndcfgu;
u64 bndstatus;
} __packed;
-struct mpx_struct {
- struct bndreg bndreg[4];
- struct bndcsr bndcsr;
-};
+/*
+ * The BNDCSR state is padded out to be 64-bytes in size.
+ */
+struct mpx_bndcsr_state {
+ union {
+ struct mpx_bndcsr bndcsr;
+ u8 pad_to_64_bytes[64];
+ };
+} __packed;
+
+/* AVX-512 Components: */
+
+/*
+ * State component 5 is used for the 8 64-bit opmask registers
+ * k0-k7 (opmask state).
+ */
+struct avx_512_opmask_state {
+ u64 opmask_reg[8];
+} __packed;
+
+/*
+ * State component 6 is used for the upper 256 bits of the
+ * registers ZMM0-ZMM15. These 16 256-bit values are denoted
+ * ZMM0_H-ZMM15_H (ZMM_Hi256 state).
+ */
+struct avx_512_zmm_uppers_state {
+ struct reg_256_bit zmm_upper[16];
+} __packed;
+
+/*
+ * State component 7 is used for the 16 512-bit registers
+ * ZMM16-ZMM31 (Hi16_ZMM state).
+ */
+struct avx_512_hi16_state {
+ struct reg_512_bit hi16_zmm[16];
+} __packed;
struct xstate_header {
u64 xfeatures;
@@ -159,22 +218,19 @@ struct xstate_header {
u64 reserved[6];
} __attribute__((packed));
-/* New processor state extensions should be added here: */
-#define XSTATE_RESERVE (sizeof(struct ymmh_struct) + \
- sizeof(struct lwp_struct) + \
- sizeof(struct mpx_struct) )
/*
* This is our most modern FPU state format, as saved by the XSAVE
* and restored by the XRSTOR instructions.
*
* It consists of a legacy fxregs portion, an xstate header and
- * subsequent fixed size areas as defined by the xstate header.
- * Not all CPUs support all the extensions.
+ * subsequent areas as defined by the xstate header. Not all CPUs
+ * support all the extensions, so the size of the extended area
+ * can vary quite a bit between CPUs.
*/
struct xregs_state {
struct fxregs_state i387;
struct xstate_header header;
- u8 __reserved[XSTATE_RESERVE];
+ u8 extended_state_area[0];
} __attribute__ ((packed, aligned (64)));
/*
@@ -182,7 +238,9 @@ struct xregs_state {
* put together, so that we can pick the right one runtime.
*
* The size of the structure is determined by the largest
- * member - which is the xsave area:
+ * member - which is the xsave area. The padding is there
+ * to ensure that statically-allocated task_structs (just
+ * the init_task today) have enough space.
*/
union fpregs_state {
struct fregs_state fsave;
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 4656b25bb9a7..3a6c89b70307 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -6,7 +6,7 @@
#include <linux/uaccess.h>
/* Bit 63 of XCR0 is reserved for future expansion */
-#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63)))
+#define XFEATURE_MASK_EXTEND (~(XFEATURE_MASK_FPSSE | (1ULL << 63)))
#define XSTATE_CPUID 0x0000000d
@@ -19,14 +19,18 @@
#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
/* Supported features which support lazy state saving */
-#define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
- | XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
+#define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \
+ XFEATURE_MASK_SSE | \
+ XFEATURE_MASK_YMM | \
+ XFEATURE_MASK_OPMASK | \
+ XFEATURE_MASK_ZMM_Hi256 | \
+ XFEATURE_MASK_Hi16_ZMM)
/* Supported features which require eager state saving */
-#define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR)
+#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)
/* All currently supported features */
-#define XCNTXT_MASK (XSTATE_LAZY | XSTATE_EAGER)
+#define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER)
#ifdef CONFIG_X86_64
#define REX_PREFIX "0x48, "
@@ -40,6 +44,7 @@ extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
+void fpu__xstate_clear_all_cpu_caps(void);
void *get_xsave_addr(struct xregs_state *xsave, int xstate);
const void *get_xsave_field_ptr(int xstate_field);
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index f45acad3c4b6..24938852db30 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -3,9 +3,9 @@
#ifdef CONFIG_FUNCTION_TRACER
#ifdef CC_USING_FENTRY
-# define MCOUNT_ADDR ((long)(__fentry__))
+# define MCOUNT_ADDR ((unsigned long)(__fentry__))
#else
-# define MCOUNT_ADDR ((long)(mcount))
+# define MCOUNT_ADDR ((unsigned long)(mcount))
#endif
#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 5fa9fb0f8809..cc285ec4b2c1 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -63,10 +63,10 @@
/* hpet memory map physical address */
extern unsigned long hpet_address;
extern unsigned long force_hpet_address;
-extern int boot_hpet_disable;
+extern bool boot_hpet_disable;
extern u8 hpet_blockid;
-extern int hpet_force_user;
-extern u8 hpet_msi_disable;
+extern bool hpet_force_user;
+extern bool hpet_msi_disable;
extern int is_hpet_enabled(void);
extern int hpet_enable(void);
extern void hpet_disable(void);
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 6615032e19c8..1e3408e88604 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -182,10 +182,10 @@ extern char irq_entries_start[];
#define trace_irq_entries_start irq_entries_start
#endif
-#define VECTOR_UNDEFINED (-1)
-#define VECTOR_RETRIGGERED (-2)
+#define VECTOR_UNUSED NULL
+#define VECTOR_RETRIGGERED ((void *)~0UL)
-typedef int vector_irq_t[NR_VECTORS];
+typedef struct irq_desc* vector_irq_t[NR_VECTORS];
DECLARE_PER_CPU(vector_irq_t, vector_irq);
#endif /* !ASSEMBLY_ */
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index d0e8e0141041..a9bdf5569ab3 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -10,7 +10,7 @@
* 32 bit structures for IA32 support.
*/
-#include <asm/sigcontext32.h>
+#include <uapi/asm/sigcontext.h>
/* signal.h */
@@ -18,16 +18,7 @@ struct ucontext_ia32 {
unsigned int uc_flags;
unsigned int uc_link;
compat_stack_t uc_stack;
- struct sigcontext_ia32 uc_mcontext;
- compat_sigset_t uc_sigmask; /* mask last for extensibility */
-};
-
-struct ucontext_x32 {
- unsigned int uc_flags;
- unsigned int uc_link;
- compat_stack_t uc_stack;
- unsigned int uc__pad0; /* needed for alignment */
- struct sigcontext uc_mcontext; /* the 64-bit sigcontext type */
+ struct sigcontext_32 uc_mcontext;
compat_sigset_t uc_sigmask; /* mask last for extensibility */
};
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index cc9c61bc1abe..de25aad07853 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -180,6 +180,8 @@ static inline unsigned int isa_virt_to_bus(volatile void *address)
*/
extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size);
+#define ioremap_uc ioremap_uc
+
extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
unsigned long prot_val);
@@ -248,12 +250,6 @@ static inline void flush_write_buffers(void)
#endif
}
-static inline void __pmem *arch_memremap_pmem(resource_size_t offset,
- unsigned long size)
-{
- return (void __force __pmem *) ioremap_cache(offset, size);
-}
-
#endif /* __KERNEL__ */
extern void native_io_delay(void);
diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h
index 57995f0596a6..b72ad0faa6c5 100644
--- a/arch/x86/include/asm/iosf_mbi.h
+++ b/arch/x86/include/asm/iosf_mbi.h
@@ -52,20 +52,20 @@
/* Quark available units */
#define QRK_MBI_UNIT_HBA 0x00
-#define QRK_MBI_UNIT_HB 0x03
+#define QRK_MBI_UNIT_HB 0x03
#define QRK_MBI_UNIT_RMU 0x04
-#define QRK_MBI_UNIT_MM 0x05
+#define QRK_MBI_UNIT_MM 0x05
#define QRK_MBI_UNIT_MMESRAM 0x05
#define QRK_MBI_UNIT_SOC 0x31
/* Quark read/write opcodes */
#define QRK_MBI_HBA_READ 0x10
#define QRK_MBI_HBA_WRITE 0x11
-#define QRK_MBI_HB_READ 0x10
+#define QRK_MBI_HB_READ 0x10
#define QRK_MBI_HB_WRITE 0x11
#define QRK_MBI_RMU_READ 0x10
#define QRK_MBI_RMU_WRITE 0x11
-#define QRK_MBI_MM_READ 0x10
+#define QRK_MBI_MM_READ 0x10
#define QRK_MBI_MM_WRITE 0x11
#define QRK_MBI_MMESRAM_READ 0x12
#define QRK_MBI_MMESRAM_WRITE 0x13
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 8008d06581c7..881b4768644a 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -36,7 +36,9 @@ extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void));
extern void (*x86_platform_ipi_callback)(void);
extern void native_init_IRQ(void);
-extern bool handle_irq(unsigned irq, struct pt_regs *regs);
+
+struct irq_desc;
+extern bool handle_irq(struct irq_desc *desc, struct pt_regs *regs);
extern __visible unsigned int do_IRQ(struct pt_regs *regs);
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 046c7fb1ca43..a210eba2727c 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -33,6 +33,11 @@ enum irq_remap_cap {
IRQ_POSTING_CAP = 0,
};
+struct vcpu_data {
+ u64 pi_desc_addr; /* Physical address of PI Descriptor */
+ u32 vector; /* Guest vector of the interrupt */
+};
+
#ifdef CONFIG_IRQ_REMAP
extern bool irq_remapping_cap(enum irq_remap_cap cap);
@@ -58,11 +63,6 @@ static inline struct irq_domain *arch_get_ir_parent_domain(void)
return x86_vector_domain;
}
-struct vcpu_data {
- u64 pi_desc_addr; /* Physical address of PI Descriptor */
- u32 vector; /* Guest vector of the interrupt */
-};
-
#else /* CONFIG_IRQ_REMAP */
static inline bool irq_remapping_cap(enum irq_remap_cap cap) { return 0; }
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 4c2d2eb2060a..6ca9fd6234e1 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -117,16 +117,6 @@
#define FPU_IRQ 13
-#define FIRST_VM86_IRQ 3
-#define LAST_VM86_IRQ 15
-
-#ifndef __ASSEMBLY__
-static inline int invalid_vm86_irq(int irq)
-{
- return irq < FIRST_VM86_IRQ || irq > LAST_VM86_IRQ;
-}
-#endif
-
/*
* Size the maximum number of interrupts.
*
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index a4c1cf7e93f8..5daeca3d0f9e 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -16,15 +16,32 @@
# define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC
#endif
-static __always_inline bool arch_static_branch(struct static_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
asm_volatile_goto("1:"
".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
".pushsection __jump_table, \"aw\" \n\t"
_ASM_ALIGN "\n\t"
- _ASM_PTR "1b, %l[l_yes], %c0 \n\t"
+ _ASM_PTR "1b, %l[l_yes], %c0 + %c1 \n\t"
".popsection \n\t"
- : : "i" (key) : : l_yes);
+ : : "i" (key), "i" (branch) : : l_yes);
+
+ return false;
+l_yes:
+ return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+ asm_volatile_goto("1:"
+ ".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t"
+ "2:\n\t"
+ ".pushsection __jump_table, \"aw\" \n\t"
+ _ASM_ALIGN "\n\t"
+ _ASM_PTR "1b, %l[l_yes], %c0 + %c1 \n\t"
+ ".popsection \n\t"
+ : : "i" (key), "i" (branch) : : l_yes);
+
return false;
l_yes:
return true;
diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h
index 74a2a8dc9908..1410b567ecde 100644
--- a/arch/x86/include/asm/kasan.h
+++ b/arch/x86/include/asm/kasan.h
@@ -1,6 +1,9 @@
#ifndef _ASM_X86_KASAN_H
#define _ASM_X86_KASAN_H
+#include <linux/const.h>
+#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
+
/*
* Compiler uses shadow offset assuming that addresses start
* from 0. Kernel addresses don't start from 0, so shadow
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 32ce71375b21..e5f5dc9787d5 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -29,11 +29,5 @@ extern void show_trace(struct task_struct *t, struct pt_regs *regs,
extern void __show_regs(struct pt_regs *regs, int all);
extern unsigned long oops_begin(void);
extern void oops_end(unsigned long, struct pt_regs *, int signr);
-#ifdef CONFIG_KEXEC
-extern int in_crash_kexec;
-#else
-/* no crash dump is ever in progress if no crash kernel can be kexec'd */
-#define in_crash_kexec 0
-#endif
#endif /* _ASM_X86_KDEBUG_H */
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index e16466ec473c..e9cd7befcb76 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -112,6 +112,16 @@ struct x86_emulate_ops {
struct x86_exception *fault);
/*
+ * read_phys: Read bytes of standard (non-emulated/special) memory.
+ * Used for descriptor reading.
+ * @addr: [IN ] Physical address from which to read.
+ * @val: [OUT] Value read from memory.
+ * @bytes: [IN ] Number of bytes to read from memory.
+ */
+ int (*read_phys)(struct x86_emulate_ctxt *ctxt, unsigned long addr,
+ void *val, unsigned int bytes);
+
+ /*
* write_std: Write bytes of standard (non-emulated/special) memory.
* Used for descriptor writing.
* @addr: [IN ] Linear address to which to write.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 49ec9038ec14..9265196e877f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -24,6 +24,7 @@
#include <linux/perf_event.h>
#include <linux/pvclock_gtod.h>
#include <linux/clocksource.h>
+#include <linux/irqbypass.h>
#include <asm/pvclock-abi.h>
#include <asm/desc.h>
@@ -40,6 +41,7 @@
#define KVM_PIO_PAGE_OFFSET 1
#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
+#define KVM_HALT_POLL_NS_DEFAULT 500000
#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
@@ -175,6 +177,8 @@ enum {
*/
#define KVM_APIC_PV_EOI_PENDING 1
+struct kvm_kernel_irq_routing_entry;
+
/*
* We don't want allocation failures within the mmu code, so we preallocate
* enough memory for a single page fault in a cache.
@@ -252,6 +256,11 @@ struct kvm_pio_request {
int size;
};
+struct rsvd_bits_validate {
+ u64 rsvd_bits_mask[2][4];
+ u64 bad_mt_xwr;
+};
+
/*
* x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
* 32-bit). The kvm_mmu structure abstracts the details of the current mmu
@@ -289,8 +298,15 @@ struct kvm_mmu {
u64 *pae_root;
u64 *lm_root;
- u64 rsvd_bits_mask[2][4];
- u64 bad_mt_xwr;
+
+ /*
+ * check zero bits on shadow page table entries, these
+ * bits include not only hardware reserved bits but also
+ * the bits spte never used.
+ */
+ struct rsvd_bits_validate shadow_zero_check;
+
+ struct rsvd_bits_validate guest_rsvd_check;
/*
* Bitmap: bit set = last pte in walk
@@ -358,6 +374,12 @@ struct kvm_mtrr {
struct list_head head;
};
+/* Hyper-V per vcpu emulation context */
+struct kvm_vcpu_hv {
+ u64 hv_vapic;
+ s64 runtime_offset;
+};
+
struct kvm_vcpu_arch {
/*
* rip and regs accesses must go through
@@ -378,6 +400,7 @@ struct kvm_vcpu_arch {
u64 efer;
u64 apic_base;
struct kvm_lapic *apic; /* kernel irqchip context */
+ u64 eoi_exit_bitmap[4];
unsigned long apic_attention;
int32_t apic_arb_prio;
int mp_state;
@@ -514,8 +537,7 @@ struct kvm_vcpu_arch {
/* used for guest single stepping over the given code position */
unsigned long singlestep_rip;
- /* fields used by HYPER-V emulation */
- u64 hv_vapic;
+ struct kvm_vcpu_hv hyperv;
cpumask_var_t wbinvd_dirty_mask;
@@ -556,6 +578,9 @@ struct kvm_vcpu_arch {
struct {
bool pv_unhalted;
} pv;
+
+ int pending_ioapic_eoi;
+ int pending_external_vector;
};
struct kvm_lpage_info {
@@ -586,6 +611,17 @@ struct kvm_apic_map {
struct kvm_lapic *logical_map[16][16];
};
+/* Hyper-V emulation context */
+struct kvm_hv {
+ u64 hv_guest_os_id;
+ u64 hv_hypercall;
+ u64 hv_tsc_page;
+
+ /* Hyper-v based guest crash (NT kernel bugcheck) parameters */
+ u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
+ u64 hv_crash_ctl;
+};
+
struct kvm_arch {
unsigned int n_used_mmu_pages;
unsigned int n_requested_mmu_pages;
@@ -645,18 +681,19 @@ struct kvm_arch {
/* reads protected by irq_srcu, writes by irq_lock */
struct hlist_head mask_notifier_list;
- /* fields used by HYPER-V emulation */
- u64 hv_guest_os_id;
- u64 hv_hypercall;
- u64 hv_tsc_page;
+ struct kvm_hv hyperv;
#ifdef CONFIG_KVM_MMU_AUDIT
int audit_point;
#endif
bool boot_vcpu_runs_old_kvmclock;
+ u32 bsp_vcpu_id;
u64 disabled_quirks;
+
+ bool irqchip_split;
+ u8 nr_reserved_ioapic_pins;
};
struct kvm_vm_stat {
@@ -686,6 +723,7 @@ struct kvm_vcpu_stat {
u32 nmi_window_exits;
u32 halt_exits;
u32 halt_successful_poll;
+ u32 halt_attempted_poll;
u32 halt_wakeup;
u32 request_irq_exits;
u32 irq_exits;
@@ -792,10 +830,10 @@ struct kvm_x86_ops {
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
- int (*vm_has_apicv)(struct kvm *kvm);
+ int (*cpu_uses_apicv)(struct kvm_vcpu *vcpu);
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
void (*hwapic_isr_update)(struct kvm *kvm, int isr);
- void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
+ void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu);
void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
@@ -860,6 +898,20 @@ struct kvm_x86_ops {
gfn_t offset, unsigned long mask);
/* pmu operations of sub-arch */
const struct kvm_pmu_ops *pmu_ops;
+
+ /*
+ * Architecture specific hooks for vCPU blocking due to
+ * HLT instruction.
+ * Returns for .pre_block():
+ * - 0 means continue to block the vCPU.
+ * - 1 means we cannot block the vCPU since some event
+ * happens during this period, such as, 'ON' bit in
+ * posted-interrupts descriptor is set.
+ */
+ int (*pre_block)(struct kvm_vcpu *vcpu);
+ void (*post_block)(struct kvm_vcpu *vcpu);
+ int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
+ uint32_t guest_irq, bool set);
};
struct kvm_arch_async_pf {
@@ -1199,9 +1251,18 @@ void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
int kvm_is_in_guest(void);
-int __x86_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem);
-int x86_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem);
+int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
+int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
+bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
+bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
+
+bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+ struct kvm_vcpu **dest_vcpu);
+
+void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm_lapic_irq *irq);
+
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/math_emu.h b/arch/x86/include/asm/math_emu.h
index 031f6266f425..0d9b14f60d2c 100644
--- a/arch/x86/include/asm/math_emu.h
+++ b/arch/x86/include/asm/math_emu.h
@@ -2,7 +2,6 @@
#define _ASM_X86_MATH_EMU_H
#include <asm/ptrace.h>
-#include <asm/vm86.h>
/* This structure matches the layout of the data saved to the stack
following a device-not-present interrupt, part of it saved
@@ -10,9 +9,6 @@
*/
struct math_emu_info {
long ___orig_eip;
- union {
- struct pt_regs *regs;
- struct kernel_vm86_regs *vm86;
- };
+ struct pt_regs *regs;
};
#endif /* _ASM_X86_MATH_EMU_H */
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 982dfc3679ad..2ea4527e462f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -123,19 +123,27 @@ struct mca_config {
};
struct mce_vendor_flags {
- /*
- * overflow recovery cpuid bit indicates that overflow
- * conditions are not fatal
- */
- __u64 overflow_recov : 1,
-
- /*
- * SUCCOR stands for S/W UnCorrectable error COntainment
- * and Recovery. It indicates support for data poisoning
- * in HW and deferred error interrupts.
- */
- succor : 1,
- __reserved_0 : 62;
+ /*
+ * Indicates that overflow conditions are not fatal, when set.
+ */
+ __u64 overflow_recov : 1,
+
+ /*
+ * (AMD) SUCCOR stands for S/W UnCorrectable error COntainment and
+ * Recovery. It indicates support for data poisoning in HW and deferred
+ * error interrupts.
+ */
+ succor : 1,
+
+ /*
+ * (AMD) SMCA: This bit indicates support for Scalable MCA which expands
+ * the register space for each MCA bank and also increases number of
+ * banks. Also, to accommodate the new banks and registers, the MCA
+ * register space is moved to a new MSR range.
+ */
+ smca : 1,
+
+ __reserved_0 : 61;
};
extern struct mce_vendor_flags mce_flags;
@@ -151,10 +159,12 @@ extern int mce_p5_enabled;
#ifdef CONFIG_X86_MCE
int mcheck_init(void);
void mcheck_cpu_init(struct cpuinfo_x86 *c);
+void mcheck_cpu_clear(struct cpuinfo_x86 *c);
void mcheck_vendor_init_severity(void);
#else
static inline int mcheck_init(void) { return 0; }
static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
+static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {}
static inline void mcheck_vendor_init_severity(void) {}
#endif
@@ -181,20 +191,18 @@ DECLARE_PER_CPU(struct device *, mce_device);
#ifdef CONFIG_X86_MCE_INTEL
void mce_intel_feature_init(struct cpuinfo_x86 *c);
+void mce_intel_feature_clear(struct cpuinfo_x86 *c);
void cmci_clear(void);
void cmci_reenable(void);
void cmci_rediscover(void);
void cmci_recheck(void);
-void lmce_clear(void);
-void lmce_enable(void);
#else
static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { }
+static inline void mce_intel_feature_clear(struct cpuinfo_x86 *c) { }
static inline void cmci_clear(void) {}
static inline void cmci_reenable(void) {}
static inline void cmci_rediscover(void) {}
static inline void cmci_recheck(void) {}
-static inline void lmce_clear(void) {}
-static inline void lmce_enable(void) {}
#endif
#ifdef CONFIG_X86_MCE_AMD
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 9e6278c7140e..34e62b1dcfce 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -27,7 +27,6 @@ struct cpu_signature {
struct device;
enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND };
-extern bool dis_ucode_ldr;
struct microcode_ops {
enum ucode_state (*request_microcode_user) (int cpu,
@@ -55,6 +54,12 @@ struct ucode_cpu_info {
};
extern struct ucode_cpu_info ucode_cpu_info[];
+#ifdef CONFIG_MICROCODE
+int __init microcode_init(void);
+#else
+static inline int __init microcode_init(void) { return 0; };
+#endif
+
#ifdef CONFIG_MICROCODE_INTEL
extern struct microcode_ops * __init init_intel_microcode(void);
#else
@@ -75,7 +80,6 @@ static inline struct microcode_ops * __init init_amd_microcode(void)
static inline void __exit exit_amd_microcode(void) {}
#endif
-#ifdef CONFIG_MICROCODE_EARLY
#define MAX_UCODE_COUNT 128
#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
@@ -150,22 +154,18 @@ static inline unsigned int x86_model(unsigned int sig)
return model;
}
+#ifdef CONFIG_MICROCODE
extern void __init load_ucode_bsp(void);
extern void load_ucode_ap(void);
extern int __init save_microcode_in_initrd(void);
void reload_early_microcode(void);
extern bool get_builtin_firmware(struct cpio_data *cd, const char *name);
#else
-static inline void __init load_ucode_bsp(void) {}
-static inline void load_ucode_ap(void) {}
-static inline int __init save_microcode_in_initrd(void)
-{
- return 0;
-}
-static inline void reload_early_microcode(void) {}
-static inline bool get_builtin_firmware(struct cpio_data *cd, const char *name)
-{
- return false;
-}
+static inline void __init load_ucode_bsp(void) { }
+static inline void load_ucode_ap(void) { }
+static inline int __init save_microcode_in_initrd(void) { return 0; }
+static inline void reload_early_microcode(void) { }
+static inline bool
+get_builtin_firmware(struct cpio_data *cd, const char *name) { return false; }
#endif
#endif /* _ASM_X86_MICROCODE_H */
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
index ac6d328977a6..adfc847a395e 100644
--- a/arch/x86/include/asm/microcode_amd.h
+++ b/arch/x86/include/asm/microcode_amd.h
@@ -64,7 +64,7 @@ extern enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, s
#define PATCH_MAX_SIZE PAGE_SIZE
extern u8 amd_ucode_patch[PATCH_MAX_SIZE];
-#ifdef CONFIG_MICROCODE_AMD_EARLY
+#ifdef CONFIG_MICROCODE_AMD
extern void __init load_ucode_amd_bsp(unsigned int family);
extern void load_ucode_amd_ap(void);
extern int __init save_microcode_in_initrd_amd(void);
@@ -76,4 +76,5 @@ static inline int __init save_microcode_in_initrd_amd(void) { return -EINVAL; }
void reload_ucode_amd(void) {}
#endif
+extern bool check_current_patch_level(u32 *rev, bool early);
#endif /* _ASM_X86_MICROCODE_AMD_H */
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
index 7991c606125d..8559b0102ea1 100644
--- a/arch/x86/include/asm/microcode_intel.h
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -57,7 +57,7 @@ extern int has_newer_microcode(void *mc, unsigned int csig, int cpf, int rev);
extern int microcode_sanity_check(void *mc, int print_err);
extern int find_matching_signature(void *mc, unsigned int csig, int cpf);
-#ifdef CONFIG_MICROCODE_INTEL_EARLY
+#ifdef CONFIG_MICROCODE_INTEL
extern void __init load_ucode_intel_bsp(void);
extern void load_ucode_intel_ap(void);
extern void show_ucode_info_early(void);
@@ -71,13 +71,9 @@ static inline int __init save_microcode_in_initrd_intel(void) { return -EINVAL;
static inline void reload_ucode_intel(void) {}
#endif
-#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU)
+#ifdef CONFIG_HOTPLUG_CPU
extern int save_mc_for_early(u8 *mc);
#else
-static inline int save_mc_for_early(u8 *mc)
-{
- return 0;
-}
+static inline int save_mc_for_early(u8 *mc) { return 0; }
#endif
-
#endif /* _ASM_X86_MICROCODE_INTEL_H */
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 364d27481a52..55234d5e7160 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -9,7 +9,9 @@
* we put the segment information here.
*/
typedef struct {
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
struct ldt_struct *ldt;
+#endif
#ifdef CONFIG_X86_64
/* True if mm supports a task running in 32 bit compatibility mode. */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 984abfe47edc..379cd3658799 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -33,6 +33,7 @@ static inline void load_mm_cr4(struct mm_struct *mm)
static inline void load_mm_cr4(struct mm_struct *mm) {}
#endif
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
/*
* ldt_structs can be allocated, used, and freed, but they are never
* modified while live.
@@ -48,8 +49,23 @@ struct ldt_struct {
int size;
};
+/*
+ * Used for LDT copy/destruction.
+ */
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
+void destroy_context(struct mm_struct *mm);
+#else /* CONFIG_MODIFY_LDT_SYSCALL */
+static inline int init_new_context(struct task_struct *tsk,
+ struct mm_struct *mm)
+{
+ return 0;
+}
+static inline void destroy_context(struct mm_struct *mm) {}
+#endif
+
static inline void load_mm_ldt(struct mm_struct *mm)
{
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
struct ldt_struct *ldt;
/* lockless_dereference synchronizes with smp_store_release */
@@ -73,17 +89,13 @@ static inline void load_mm_ldt(struct mm_struct *mm)
set_ldt(ldt->entries, ldt->size);
else
clear_LDT();
+#else
+ clear_LDT();
+#endif
DEBUG_LOCKS_WARN_ON(preemptible());
}
-/*
- * Used for LDT copy/destruction.
- */
-int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
-void destroy_context(struct mm_struct *mm);
-
-
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
#ifdef CONFIG_SMP
@@ -114,6 +126,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
/* Load per-mm CR4 state */
load_mm_cr4(next);
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
/*
* Load the LDT, if the LDT is different.
*
@@ -128,6 +141,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
*/
if (unlikely(prev->context.ldt != next->context.ldt))
load_mm_ldt(next);
+#endif
}
#ifdef CONFIG_SMP
else {
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index c163215abb9a..aaf59b7da98a 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -7,6 +7,7 @@
struct ms_hyperv_info {
u32 features;
+ u32 misc_features;
u32 hints;
};
@@ -20,4 +21,8 @@ void hyperv_vector_handler(struct pt_regs *regs);
void hv_setup_vmbus_irq(void (*handler)(void));
void hv_remove_vmbus_irq(void);
+void hv_setup_kexec_handler(void (*handler)(void));
+void hv_remove_kexec_handler(void);
+void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs));
+void hv_remove_crash_handler(void);
#endif
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 9ebc3d009373..9f3905697f12 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -73,6 +73,12 @@
#define MSR_LBR_CORE_FROM 0x00000040
#define MSR_LBR_CORE_TO 0x00000060
+#define MSR_LBR_INFO_0 0x00000dc0 /* ... 0xddf for _31 */
+#define LBR_INFO_MISPRED BIT_ULL(63)
+#define LBR_INFO_IN_TX BIT_ULL(62)
+#define LBR_INFO_ABORT BIT_ULL(61)
+#define LBR_INFO_CYCLES 0xffff
+
#define MSR_IA32_PEBS_ENABLE 0x000003f1
#define MSR_IA32_DS_AREA 0x00000600
#define MSR_IA32_PERF_CAPABILITIES 0x00000345
@@ -80,13 +86,21 @@
#define MSR_IA32_RTIT_CTL 0x00000570
#define RTIT_CTL_TRACEEN BIT(0)
+#define RTIT_CTL_CYCLEACC BIT(1)
#define RTIT_CTL_OS BIT(2)
#define RTIT_CTL_USR BIT(3)
#define RTIT_CTL_CR3EN BIT(7)
#define RTIT_CTL_TOPA BIT(8)
+#define RTIT_CTL_MTC_EN BIT(9)
#define RTIT_CTL_TSC_EN BIT(10)
#define RTIT_CTL_DISRETC BIT(11)
#define RTIT_CTL_BRANCH_EN BIT(13)
+#define RTIT_CTL_MTC_RANGE_OFFSET 14
+#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
+#define RTIT_CTL_CYC_THRESH_OFFSET 19
+#define RTIT_CTL_CYC_THRESH (0x0full << RTIT_CTL_CYC_THRESH_OFFSET)
+#define RTIT_CTL_PSB_FREQ_OFFSET 24
+#define RTIT_CTL_PSB_FREQ (0x0full << RTIT_CTL_PSB_FREQ_OFFSET)
#define MSR_IA32_RTIT_STATUS 0x00000571
#define RTIT_STATUS_CONTEXTEN BIT(1)
#define RTIT_STATUS_TRIGGEREN BIT(2)
@@ -127,6 +141,8 @@
#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10)
#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11)
+#define MSR_PEBS_FRONTEND 0x000003f7
+
#define MSR_IA32_POWER_CTL 0x000001fc
#define MSR_IA32_MC0_CTL 0x00000400
@@ -170,6 +186,12 @@
#define MSR_PP1_ENERGY_STATUS 0x00000641
#define MSR_PP1_POLICY 0x00000642
+#define MSR_CONFIG_TDP_NOMINAL 0x00000648
+#define MSR_CONFIG_TDP_LEVEL_1 0x00000649
+#define MSR_CONFIG_TDP_LEVEL_2 0x0000064A
+#define MSR_CONFIG_TDP_CONTROL 0x0000064B
+#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C
+
#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658
#define MSR_PKG_ANY_CORE_C0_RES 0x00000659
#define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A
@@ -184,6 +206,13 @@
#define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0
#define MSR_RING_PERF_LIMIT_REASONS 0x000006B1
+/* Config TDP MSRs */
+#define MSR_CONFIG_TDP_NOMINAL 0x00000648
+#define MSR_CONFIG_TDP_LEVEL1 0x00000649
+#define MSR_CONFIG_TDP_LEVEL2 0x0000064A
+#define MSR_CONFIG_TDP_CONTROL 0x0000064B
+#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C
+
/* Hardware P state interface */
#define MSR_PPERF 0x0000064e
#define MSR_PERF_LIMIT_REASONS 0x0000064f
@@ -311,6 +340,7 @@
/* C1E active bits in int pending message */
#define K8_INTP_C1E_ACTIVE_MASK 0x18000000
#define MSR_K8_TSEG_ADDR 0xc0010112
+#define MSR_K8_TSEG_MASK 0xc0010113
#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */
#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */
#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index e6a707eb5081..77d8b284e4a7 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -47,14 +47,13 @@ static inline unsigned long long native_read_tscp(unsigned int *aux)
* it means rax *or* rdx.
*/
#ifdef CONFIG_X86_64
-#define DECLARE_ARGS(val, low, high) unsigned low, high
-#define EAX_EDX_VAL(val, low, high) ((low) | ((u64)(high) << 32))
-#define EAX_EDX_ARGS(val, low, high) "a" (low), "d" (high)
+/* Using 64-bit values saves one instruction clearing the high half of low */
+#define DECLARE_ARGS(val, low, high) unsigned long low, high
+#define EAX_EDX_VAL(val, low, high) ((low) | (high) << 32)
#define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high)
#else
#define DECLARE_ARGS(val, low, high) unsigned long long val
#define EAX_EDX_VAL(val, low, high) (val)
-#define EAX_EDX_ARGS(val, low, high) "A" (val)
#define EAX_EDX_RET(val, low, high) "=A" (val)
#endif
@@ -106,12 +105,19 @@ notrace static inline int native_write_msr_safe(unsigned int msr,
return err;
}
-extern unsigned long long native_read_tsc(void);
-
extern int rdmsr_safe_regs(u32 regs[8]);
extern int wrmsr_safe_regs(u32 regs[8]);
-static __always_inline unsigned long long __native_read_tsc(void)
+/**
+ * rdtsc() - returns the current TSC without ordering constraints
+ *
+ * rdtsc() returns the result of RDTSC as a 64-bit integer. The
+ * only ordering constraint it supplies is the ordering implied by
+ * "asm volatile": it will put the RDTSC in the place you expect. The
+ * CPU can and will speculatively execute that RDTSC, though, so the
+ * results can be non-monotonic if compared on different CPUs.
+ */
+static __always_inline unsigned long long rdtsc(void)
{
DECLARE_ARGS(val, low, high);
@@ -120,6 +126,35 @@ static __always_inline unsigned long long __native_read_tsc(void)
return EAX_EDX_VAL(val, low, high);
}
+/**
+ * rdtsc_ordered() - read the current TSC in program order
+ *
+ * rdtsc_ordered() returns the result of RDTSC as a 64-bit integer.
+ * It is ordered like a load to a global in-memory counter. It should
+ * be impossible to observe non-monotonic rdtsc_unordered() behavior
+ * across multiple CPUs as long as the TSC is synced.
+ */
+static __always_inline unsigned long long rdtsc_ordered(void)
+{
+ /*
+ * The RDTSC instruction is not ordered relative to memory
+ * access. The Intel SDM and the AMD APM are both vague on this
+ * point, but empirically an RDTSC instruction can be
+ * speculatively executed before prior loads. An RDTSC
+ * immediately after an appropriate barrier appears to be
+ * ordered as a normal load, that is, it provides the same
+ * ordering guarantees as reading from a global memory location
+ * that some other imaginary CPU is updating continuously with a
+ * time stamp.
+ */
+ alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
+ "lfence", X86_FEATURE_LFENCE_RDTSC);
+ return rdtsc();
+}
+
+/* Deprecated, keep it for a cycle for easier merging: */
+#define rdtscll(now) do { (now) = rdtsc_ordered(); } while (0)
+
static inline unsigned long long native_read_pmc(int counter)
{
DECLARE_ARGS(val, low, high);
@@ -153,8 +188,10 @@ static inline void wrmsr(unsigned msr, unsigned low, unsigned high)
#define rdmsrl(msr, val) \
((val) = native_read_msr((msr)))
-#define wrmsrl(msr, val) \
- native_write_msr((msr), (u32)((u64)(val)), (u32)((u64)(val) >> 32))
+static inline void wrmsrl(unsigned msr, u64 val)
+{
+ native_write_msr(msr, (u32)val, (u32)(val >> 32));
+}
/* wrmsr with exception handling */
static inline int wrmsr_safe(unsigned msr, unsigned low, unsigned high)
@@ -180,12 +217,6 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
return err;
}
-#define rdtscl(low) \
- ((low) = (u32)__native_read_tsc())
-
-#define rdtscll(val) \
- ((val) = __native_read_tsc())
-
#define rdpmc(counter, low, high) \
do { \
u64 _l = native_read_pmc((counter)); \
@@ -195,15 +226,6 @@ do { \
#define rdpmcl(counter, val) ((val) = native_read_pmc(counter))
-#define rdtscp(low, high, aux) \
-do { \
- unsigned long long _val = native_read_tscp(&(aux)); \
- (low) = (u32)_val; \
- (high) = (u32)(_val >> 32); \
-} while (0)
-
-#define rdtscpll(val, aux) (val) = native_read_tscp(&(aux))
-
#endif /* !CONFIG_PARAVIRT */
/*
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 653dfa7662e1..c70689b5e5aa 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -14,6 +14,9 @@
#define CPUID5_ECX_INTERRUPT_BREAK 0x2
#define MWAIT_ECX_INTERRUPT_BREAK 0x1
+#define MWAITX_ECX_TIMER_ENABLE BIT(1)
+#define MWAITX_MAX_LOOPS ((u32)-1)
+#define MWAITX_DISABLE_CSTATES 0xf
static inline void __monitor(const void *eax, unsigned long ecx,
unsigned long edx)
@@ -23,6 +26,14 @@ static inline void __monitor(const void *eax, unsigned long ecx,
:: "a" (eax), "c" (ecx), "d"(edx));
}
+static inline void __monitorx(const void *eax, unsigned long ecx,
+ unsigned long edx)
+{
+ /* "monitorx %eax, %ecx, %edx;" */
+ asm volatile(".byte 0x0f, 0x01, 0xfa;"
+ :: "a" (eax), "c" (ecx), "d"(edx));
+}
+
static inline void __mwait(unsigned long eax, unsigned long ecx)
{
/* "mwait %eax, %ecx;" */
@@ -30,6 +41,40 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
:: "a" (eax), "c" (ecx));
}
+/*
+ * MWAITX allows for a timer expiration to get the core out a wait state in
+ * addition to the default MWAIT exit condition of a store appearing at a
+ * monitored virtual address.
+ *
+ * Registers:
+ *
+ * MWAITX ECX[1]: enable timer if set
+ * MWAITX EBX[31:0]: max wait time expressed in SW P0 clocks. The software P0
+ * frequency is the same as the TSC frequency.
+ *
+ * Below is a comparison between MWAIT and MWAITX on AMD processors:
+ *
+ * MWAIT MWAITX
+ * opcode 0f 01 c9 | 0f 01 fb
+ * ECX[0] value of RFLAGS.IF seen by instruction
+ * ECX[1] unused/#GP if set | enable timer if set
+ * ECX[31:2] unused/#GP if set
+ * EAX unused (reserve for hint)
+ * EBX[31:0] unused | max wait time (P0 clocks)
+ *
+ * MONITOR MONITORX
+ * opcode 0f 01 c8 | 0f 01 fa
+ * EAX (logical) address to monitor
+ * ECX #GP if not zero
+ */
+static inline void __mwaitx(unsigned long eax, unsigned long ebx,
+ unsigned long ecx)
+{
+ /* "mwaitx %eax, %ebx, %ecx;" */
+ asm volatile(".byte 0x0f, 0x01, 0xfb;"
+ :: "a" (eax), "b" (ebx), "c" (ecx));
+}
+
static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
{
trace_hardirqs_on();
diff --git a/arch/x86/include/asm/numachip/numachip.h b/arch/x86/include/asm/numachip/numachip.h
index 1c6f7f6212c1..c64373a2d731 100644
--- a/arch/x86/include/asm/numachip/numachip.h
+++ b/arch/x86/include/asm/numachip/numachip.h
@@ -14,6 +14,7 @@
#ifndef _ASM_X86_NUMACHIP_NUMACHIP_H
#define _ASM_X86_NUMACHIP_NUMACHIP_H
+extern u8 numachip_system;
extern int __init pci_numachip_init(void);
#endif /* _ASM_X86_NUMACHIP_NUMACHIP_H */
diff --git a/arch/x86/include/asm/numachip/numachip_csr.h b/arch/x86/include/asm/numachip/numachip_csr.h
index 660f843df928..29719eecdc2e 100644
--- a/arch/x86/include/asm/numachip/numachip_csr.h
+++ b/arch/x86/include/asm/numachip/numachip_csr.h
@@ -14,12 +14,8 @@
#ifndef _ASM_X86_NUMACHIP_NUMACHIP_CSR_H
#define _ASM_X86_NUMACHIP_NUMACHIP_CSR_H
-#include <linux/numa.h>
-#include <linux/percpu.h>
+#include <linux/smp.h>
#include <linux/io.h>
-#include <linux/swab.h>
-#include <asm/types.h>
-#include <asm/processor.h>
#define CSR_NODE_SHIFT 16
#define CSR_NODE_BITS(p) (((unsigned long)(p)) << CSR_NODE_SHIFT)
@@ -27,11 +23,8 @@
/* 32K CSR space, b15 indicates geo/non-geo */
#define CSR_OFFSET_MASK 0x7fffUL
-
-/* Global CSR space covers all 4K possible nodes with 64K CSR space per node */
-#define NUMACHIP_GCSR_BASE 0x3fff00000000ULL
-#define NUMACHIP_GCSR_LIM 0x3fff0fffffffULL
-#define NUMACHIP_GCSR_SIZE (NUMACHIP_GCSR_LIM - NUMACHIP_GCSR_BASE + 1)
+#define CSR_G0_NODE_IDS (0x008 + (0 << 12))
+#define CSR_G3_EXT_IRQ_GEN (0x030 + (3 << 12))
/*
* Local CSR space starts in global CSR space with "nodeid" = 0xfff0, however
@@ -41,12 +34,7 @@
#define NUMACHIP_LCSR_BASE 0x3ffffe000000ULL
#define NUMACHIP_LCSR_LIM 0x3fffffffffffULL
#define NUMACHIP_LCSR_SIZE (NUMACHIP_LCSR_LIM - NUMACHIP_LCSR_BASE + 1)
-
-static inline void *gcsr_address(int node, unsigned long offset)
-{
- return __va(NUMACHIP_GCSR_BASE | (1UL << 15) |
- CSR_NODE_BITS(node & CSR_NODE_MASK) | (offset & CSR_OFFSET_MASK));
-}
+#define NUMACHIP_LAPIC_BITS 8
static inline void *lcsr_address(unsigned long offset)
{
@@ -54,114 +42,57 @@ static inline void *lcsr_address(unsigned long offset)
CSR_NODE_BITS(0xfff0) | (offset & CSR_OFFSET_MASK));
}
-static inline unsigned int read_gcsr(int node, unsigned long offset)
+static inline unsigned int read_lcsr(unsigned long offset)
{
- return swab32(readl(gcsr_address(node, offset)));
+ return swab32(readl(lcsr_address(offset)));
}
-static inline void write_gcsr(int node, unsigned long offset, unsigned int val)
+static inline void write_lcsr(unsigned long offset, unsigned int val)
{
- writel(swab32(val), gcsr_address(node, offset));
+ writel(swab32(val), lcsr_address(offset));
}
-static inline unsigned int read_lcsr(unsigned long offset)
+/*
+ * On NumaChip2, local CSR space is 16MB and starts at fixed offset below 4G
+ */
+
+#define NUMACHIP2_LCSR_BASE 0xf0000000UL
+#define NUMACHIP2_LCSR_SIZE 0x1000000UL
+#define NUMACHIP2_APIC_ICR 0x100000
+#define NUMACHIP2_TIMER_DEADLINE 0x200000
+#define NUMACHIP2_TIMER_INT 0x200008
+#define NUMACHIP2_TIMER_NOW 0x200018
+#define NUMACHIP2_TIMER_RESET 0x200020
+
+static inline void __iomem *numachip2_lcsr_address(unsigned long offset)
{
- return swab32(readl(lcsr_address(offset)));
+ return (void __iomem *)__va(NUMACHIP2_LCSR_BASE |
+ (offset & (NUMACHIP2_LCSR_SIZE - 1)));
}
-static inline void write_lcsr(unsigned long offset, unsigned int val)
+static inline u32 numachip2_read32_lcsr(unsigned long offset)
{
- writel(swab32(val), lcsr_address(offset));
+ return readl(numachip2_lcsr_address(offset));
}
-/* ========================================================================= */
-/* CSR_G0_STATE_CLEAR */
-/* ========================================================================= */
-
-#define CSR_G0_STATE_CLEAR (0x000 + (0 << 12))
-union numachip_csr_g0_state_clear {
- unsigned int v;
- struct numachip_csr_g0_state_clear_s {
- unsigned int _state:2;
- unsigned int _rsvd_2_6:5;
- unsigned int _lost:1;
- unsigned int _rsvd_8_31:24;
- } s;
-};
-
-/* ========================================================================= */
-/* CSR_G0_NODE_IDS */
-/* ========================================================================= */
+static inline u64 numachip2_read64_lcsr(unsigned long offset)
+{
+ return readq(numachip2_lcsr_address(offset));
+}
-#define CSR_G0_NODE_IDS (0x008 + (0 << 12))
-union numachip_csr_g0_node_ids {
- unsigned int v;
- struct numachip_csr_g0_node_ids_s {
- unsigned int _initialid:16;
- unsigned int _nodeid:12;
- unsigned int _rsvd_28_31:4;
- } s;
-};
-
-/* ========================================================================= */
-/* CSR_G3_EXT_IRQ_GEN */
-/* ========================================================================= */
+static inline void numachip2_write32_lcsr(unsigned long offset, u32 val)
+{
+ writel(val, numachip2_lcsr_address(offset));
+}
-#define CSR_G3_EXT_IRQ_GEN (0x030 + (3 << 12))
-union numachip_csr_g3_ext_irq_gen {
- unsigned int v;
- struct numachip_csr_g3_ext_irq_gen_s {
- unsigned int _vector:8;
- unsigned int _msgtype:3;
- unsigned int _index:5;
- unsigned int _destination_apic_id:16;
- } s;
-};
-
-/* ========================================================================= */
-/* CSR_G3_EXT_IRQ_STATUS */
-/* ========================================================================= */
-
-#define CSR_G3_EXT_IRQ_STATUS (0x034 + (3 << 12))
-union numachip_csr_g3_ext_irq_status {
- unsigned int v;
- struct numachip_csr_g3_ext_irq_status_s {
- unsigned int _result:32;
- } s;
-};
-
-/* ========================================================================= */
-/* CSR_G3_EXT_IRQ_DEST */
-/* ========================================================================= */
-
-#define CSR_G3_EXT_IRQ_DEST (0x038 + (3 << 12))
-union numachip_csr_g3_ext_irq_dest {
- unsigned int v;
- struct numachip_csr_g3_ext_irq_dest_s {
- unsigned int _irq:8;
- unsigned int _rsvd_8_31:24;
- } s;
-};
-
-/* ========================================================================= */
-/* CSR_G3_NC_ATT_MAP_SELECT */
-/* ========================================================================= */
-
-#define CSR_G3_NC_ATT_MAP_SELECT (0x7fc + (3 << 12))
-union numachip_csr_g3_nc_att_map_select {
- unsigned int v;
- struct numachip_csr_g3_nc_att_map_select_s {
- unsigned int _upper_address_bits:4;
- unsigned int _select_ram:4;
- unsigned int _rsvd_8_31:24;
- } s;
-};
-
-/* ========================================================================= */
-/* CSR_G3_NC_ATT_MAP_SELECT_0-255 */
-/* ========================================================================= */
-
-#define CSR_G3_NC_ATT_MAP_SELECT_0 (0x800 + (3 << 12))
+static inline void numachip2_write64_lcsr(unsigned long offset, u64 val)
+{
+ writeq(val, numachip2_lcsr_address(offset));
+}
-#endif /* _ASM_X86_NUMACHIP_NUMACHIP_CSR_H */
+static inline unsigned int numachip2_timer(void)
+{
+ return (smp_processor_id() % 48) << 6;
+}
+#endif /* _ASM_X86_NUMACHIP_NUMACHIP_CSR_H */
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 4edd53b79a81..4928cf0d5af0 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -26,9 +26,6 @@
#define MCE_STACK 4
#define N_EXCEPTION_STACKS 4 /* hw limit: 7 */
-#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
-#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
-
/*
* Set __PAGE_OFFSET to the most negative possible address +
* PGDIR_SIZE*16 (pgd slot 272). The gap is to allow a space for a
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index c7c712f2648b..c5b7fb2774d0 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -20,6 +20,9 @@
#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1))
+#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
+#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
+
#define HPAGE_SHIFT PMD_SHIFT
#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index d143bfad45d7..10d0596433f8 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -153,7 +153,11 @@ do { \
val = paravirt_read_msr(msr, &_err); \
} while (0)
-#define wrmsrl(msr, val) wrmsr(msr, (u32)((u64)(val)), ((u64)(val))>>32)
+static inline void wrmsrl(unsigned msr, u64 val)
+{
+ wrmsr(msr, (u32)val, (u32)(val>>32));
+}
+
#define wrmsr_safe(msr, a, b) paravirt_write_msr(msr, a, b)
/* rdmsr with exception handling */
@@ -174,19 +178,6 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
return err;
}
-static inline u64 paravirt_read_tsc(void)
-{
- return PVOP_CALL0(u64, pv_cpu_ops.read_tsc);
-}
-
-#define rdtscl(low) \
-do { \
- u64 _l = paravirt_read_tsc(); \
- low = (int)_l; \
-} while (0)
-
-#define rdtscll(val) (val = paravirt_read_tsc())
-
static inline unsigned long long paravirt_sched_clock(void)
{
return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
@@ -215,27 +206,6 @@ do { \
#define rdpmcl(counter, val) ((val) = paravirt_read_pmc(counter))
-static inline unsigned long long paravirt_rdtscp(unsigned int *aux)
-{
- return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux);
-}
-
-#define rdtscp(low, high, aux) \
-do { \
- int __aux; \
- unsigned long __val = paravirt_rdtscp(&__aux); \
- (low) = (u32)__val; \
- (high) = (u32)(__val >> 32); \
- (aux) = __aux; \
-} while (0)
-
-#define rdtscpll(val, aux) \
-do { \
- unsigned long __aux; \
- val = paravirt_rdtscp(&__aux); \
- (aux) = __aux; \
-} while (0)
-
static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
{
PVOP_VCALL2(pv_cpu_ops.alloc_ldt, ldt, entries);
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index a6b8f9fadb06..31247b5bff7c 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -97,7 +97,6 @@ struct pv_lazy_ops {
struct pv_time_ops {
unsigned long long (*sched_clock)(void);
unsigned long long (*steal_clock)(int cpu);
- unsigned long (*get_tsc_khz)(void);
};
struct pv_cpu_ops {
@@ -156,9 +155,7 @@ struct pv_cpu_ops {
u64 (*read_msr)(unsigned int msr, int *err);
int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
- u64 (*read_tsc)(void);
u64 (*read_pmc)(int counter);
- unsigned long long (*read_tscp)(unsigned int *aux);
#ifdef CONFIG_X86_32
/*
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 164e3f8d3c3d..fa1195dae425 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -93,8 +93,6 @@ extern raw_spinlock_t pci_config_lock;
extern int (*pcibios_enable_irq)(struct pci_dev *dev);
extern void (*pcibios_disable_irq)(struct pci_dev *dev);
-extern bool mp_should_keep_irq(struct device *dev);
-
struct pci_raw_ops {
int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn,
int reg, int len, u32 *val);
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index dc0f6ed35b08..7bcb861a04e5 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -159,6 +159,13 @@ struct x86_pmu_capability {
*/
#define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 16)
+#define GLOBAL_STATUS_COND_CHG BIT_ULL(63)
+#define GLOBAL_STATUS_BUFFER_OVF BIT_ULL(62)
+#define GLOBAL_STATUS_UNC_OVF BIT_ULL(61)
+#define GLOBAL_STATUS_ASIF BIT_ULL(60)
+#define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59)
+#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(58)
+
/*
* IBS cpuid feature detection
*/
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 867da5bbb4a3..6ec0c8b2e9df 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -19,6 +19,13 @@
#include <asm/x86_init.h>
void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
+void ptdump_walk_pgd_level_checkwx(void);
+
+#ifdef CONFIG_DEBUG_WX
+#define debug_checkwx() ptdump_walk_pgd_level_checkwx()
+#else
+#define debug_checkwx() do { } while (0)
+#endif
/*
* ZERO_PAGE is a global shared page that is always zero: used
@@ -142,12 +149,12 @@ static inline unsigned long pte_pfn(pte_t pte)
static inline unsigned long pmd_pfn(pmd_t pmd)
{
- return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
+ return (pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
}
static inline unsigned long pud_pfn(pud_t pud)
{
- return (pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT;
+ return (pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT;
}
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
@@ -318,6 +325,16 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY);
}
+static inline pte_t pte_clear_soft_dirty(pte_t pte)
+{
+ return pte_clear_flags(pte, _PAGE_SOFT_DIRTY);
+}
+
+static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
+{
+ return pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY);
+}
+
#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
/*
@@ -379,7 +396,9 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
return __pgprot(preservebits | addbits);
}
-#define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK)
+#define pte_pgprot(x) __pgprot(pte_flags(x))
+#define pmd_pgprot(x) __pgprot(pmd_flags(x))
+#define pud_pgprot(x) __pgprot(pud_flags(x))
#define canon_pgprot(p) __pgprot(massage_pgprot(p))
@@ -502,14 +521,15 @@ static inline int pmd_none(pmd_t pmd)
static inline unsigned long pmd_page_vaddr(pmd_t pmd)
{
- return (unsigned long)__va(pmd_val(pmd) & PTE_PFN_MASK);
+ return (unsigned long)__va(pmd_val(pmd) & pmd_pfn_mask(pmd));
}
/*
* Currently stuck as a macro due to indirect forward reference to
* linux/mmzone.h's __section_mem_map_addr() definition:
*/
-#define pmd_page(pmd) pfn_to_page((pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT)
+#define pmd_page(pmd) \
+ pfn_to_page((pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT)
/*
* the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
@@ -570,14 +590,15 @@ static inline int pud_present(pud_t pud)
static inline unsigned long pud_page_vaddr(pud_t pud)
{
- return (unsigned long)__va((unsigned long)pud_val(pud) & PTE_PFN_MASK);
+ return (unsigned long)__va(pud_val(pud) & pud_pfn_mask(pud));
}
/*
* Currently stuck as a macro due to indirect forward reference to
* linux/mmzone.h's __section_mem_map_addr() definition:
*/
-#define pud_page(pud) pfn_to_page(pud_val(pud) >> PAGE_SHIFT)
+#define pud_page(pud) \
+ pfn_to_page((pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT)
/* Find an entry in the second-level page table.. */
static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 13f310bfc09a..dd5b0aa9dd2f 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -209,10 +209,10 @@ enum page_cache_mode {
#include <linux/types.h>
-/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
+/* Extracts the PFN from a (pte|pmd|pud|pgd)val_t of a 4KB page */
#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK)
-/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */
+/* Extracts the flags from a (pte|pmd|pud|pgd)val_t of a 4KB page */
#define PTE_FLAGS_MASK (~PTE_PFN_MASK)
typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
@@ -276,14 +276,46 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
}
#endif
+static inline pudval_t pud_pfn_mask(pud_t pud)
+{
+ if (native_pud_val(pud) & _PAGE_PSE)
+ return PUD_PAGE_MASK & PHYSICAL_PAGE_MASK;
+ else
+ return PTE_PFN_MASK;
+}
+
+static inline pudval_t pud_flags_mask(pud_t pud)
+{
+ if (native_pud_val(pud) & _PAGE_PSE)
+ return ~(PUD_PAGE_MASK & (pudval_t)PHYSICAL_PAGE_MASK);
+ else
+ return ~PTE_PFN_MASK;
+}
+
static inline pudval_t pud_flags(pud_t pud)
{
- return native_pud_val(pud) & PTE_FLAGS_MASK;
+ return native_pud_val(pud) & pud_flags_mask(pud);
+}
+
+static inline pmdval_t pmd_pfn_mask(pmd_t pmd)
+{
+ if (native_pmd_val(pmd) & _PAGE_PSE)
+ return PMD_PAGE_MASK & PHYSICAL_PAGE_MASK;
+ else
+ return PTE_PFN_MASK;
+}
+
+static inline pmdval_t pmd_flags_mask(pmd_t pmd)
+{
+ if (native_pmd_val(pmd) & _PAGE_PSE)
+ return ~(PMD_PAGE_MASK & (pmdval_t)PHYSICAL_PAGE_MASK);
+ else
+ return ~PTE_PFN_MASK;
}
static inline pmdval_t pmd_flags(pmd_t pmd)
{
- return native_pmd_val(pmd) & PTE_FLAGS_MASK;
+ return native_pmd_val(pmd) & pmd_flags_mask(pmd);
}
static inline pte_t native_make_pte(pteval_t val)
diff --git a/arch/x86/include/asm/pmc_atom.h b/arch/x86/include/asm/pmc_atom.h
index bc0fc0866553..aa8744c77c6d 100644
--- a/arch/x86/include/asm/pmc_atom.h
+++ b/arch/x86/include/asm/pmc_atom.h
@@ -18,6 +18,8 @@
/* ValleyView Power Control Unit PCI Device ID */
#define PCI_DEVICE_ID_VLV_PMC 0x0F1C
+/* CherryTrail Power Control Unit PCI Device ID */
+#define PCI_DEVICE_ID_CHT_PMC 0x229C
/* PMC Memory mapped IO registers */
#define PMC_BASE_ADDR_OFFSET 0x44
@@ -29,6 +31,10 @@
#define PMC_FUNC_DIS 0x34
#define PMC_FUNC_DIS_2 0x38
+/* CHT specific bits in FUNC_DIS2 register */
+#define BIT_FD_GMM BIT(3)
+#define BIT_FD_ISH BIT(4)
+
/* S0ix wake event control */
#define PMC_S0IX_WAKE_EN 0x3C
@@ -75,6 +81,21 @@
#define PMC_PSS_BIT_USB BIT(16)
#define PMC_PSS_BIT_USB_SUS BIT(17)
+/* CHT specific bits in PSS register */
+#define PMC_PSS_BIT_CHT_UFS BIT(7)
+#define PMC_PSS_BIT_CHT_UXD BIT(11)
+#define PMC_PSS_BIT_CHT_UXD_FD BIT(12)
+#define PMC_PSS_BIT_CHT_UX_ENG BIT(15)
+#define PMC_PSS_BIT_CHT_USB_SUS BIT(16)
+#define PMC_PSS_BIT_CHT_GMM BIT(17)
+#define PMC_PSS_BIT_CHT_ISH BIT(18)
+#define PMC_PSS_BIT_CHT_DFX_MASTER BIT(26)
+#define PMC_PSS_BIT_CHT_DFX_CLUSTER1 BIT(27)
+#define PMC_PSS_BIT_CHT_DFX_CLUSTER2 BIT(28)
+#define PMC_PSS_BIT_CHT_DFX_CLUSTER3 BIT(29)
+#define PMC_PSS_BIT_CHT_DFX_CLUSTER4 BIT(30)
+#define PMC_PSS_BIT_CHT_DFX_CLUSTER5 BIT(31)
+
/* These registers reflect D3 status of functions */
#define PMC_D3_STS_0 0xA0
@@ -117,6 +138,10 @@
#define BIT_USH_SS_PHY BIT(2)
#define BIT_DFX BIT(3)
+/* CHT specific bits in PMC_D3_STS_1 register */
+#define BIT_STS_GMM BIT(1)
+#define BIT_STS_ISH BIT(2)
+
/* PMC I/O Registers */
#define ACPI_BASE_ADDR_OFFSET 0x40
#define ACPI_BASE_ADDR_MASK 0xFFFFFE00
@@ -126,4 +151,8 @@
#define SLEEP_TYPE_MASK 0xFFFFECFF
#define SLEEP_TYPE_S5 0x1C00
#define SLEEP_ENABLE 0x2000
+
+extern int pmc_atom_read(int offset, u32 *value);
+extern int pmc_atom_write(int offset, u32 value);
+
#endif /* PMC_ATOM_H */
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
new file mode 100644
index 000000000000..d8ce3ec816ab
--- /dev/null
+++ b/arch/x86/include/asm/pmem.h
@@ -0,0 +1,153 @@
+/*
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#ifndef __ASM_X86_PMEM_H__
+#define __ASM_X86_PMEM_H__
+
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
+#include <asm/special_insns.h>
+
+#ifdef CONFIG_ARCH_HAS_PMEM_API
+/**
+ * arch_memcpy_to_pmem - copy data to persistent memory
+ * @dst: destination buffer for the copy
+ * @src: source buffer for the copy
+ * @n: length of the copy in bytes
+ *
+ * Copy data to persistent memory media via non-temporal stores so that
+ * a subsequent arch_wmb_pmem() can flush cpu and memory controller
+ * write buffers to guarantee durability.
+ */
+static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
+ size_t n)
+{
+ int unwritten;
+
+ /*
+ * We are copying between two kernel buffers, if
+ * __copy_from_user_inatomic_nocache() returns an error (page
+ * fault) we would have already reported a general protection fault
+ * before the WARN+BUG.
+ */
+ unwritten = __copy_from_user_inatomic_nocache((void __force *) dst,
+ (void __user *) src, n);
+ if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n",
+ __func__, dst, src, unwritten))
+ BUG();
+}
+
+/**
+ * arch_wmb_pmem - synchronize writes to persistent memory
+ *
+ * After a series of arch_memcpy_to_pmem() operations this drains data
+ * from cpu write buffers and any platform (memory controller) buffers
+ * to ensure that written data is durable on persistent memory media.
+ */
+static inline void arch_wmb_pmem(void)
+{
+ /*
+ * wmb() to 'sfence' all previous writes such that they are
+ * architecturally visible to 'pcommit'. Note, that we've
+ * already arranged for pmem writes to avoid the cache via
+ * arch_memcpy_to_pmem().
+ */
+ wmb();
+ pcommit_sfence();
+}
+
+/**
+ * __arch_wb_cache_pmem - write back a cache range with CLWB
+ * @vaddr: virtual start address
+ * @size: number of bytes to write back
+ *
+ * Write back a cache range using the CLWB (cache line write back)
+ * instruction. This function requires explicit ordering with an
+ * arch_wmb_pmem() call. This API is internal to the x86 PMEM implementation.
+ */
+static inline void __arch_wb_cache_pmem(void *vaddr, size_t size)
+{
+ u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
+ unsigned long clflush_mask = x86_clflush_size - 1;
+ void *vend = vaddr + size;
+ void *p;
+
+ for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
+ p < vend; p += x86_clflush_size)
+ clwb(p);
+}
+
+/*
+ * copy_from_iter_nocache() on x86 only uses non-temporal stores for iovec
+ * iterators, so for other types (bvec & kvec) we must do a cache write-back.
+ */
+static inline bool __iter_needs_pmem_wb(struct iov_iter *i)
+{
+ return iter_is_iovec(i) == false;
+}
+
+/**
+ * arch_copy_from_iter_pmem - copy data from an iterator to PMEM
+ * @addr: PMEM destination address
+ * @bytes: number of bytes to copy
+ * @i: iterator with source data
+ *
+ * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
+ * This function requires explicit ordering with an arch_wmb_pmem() call.
+ */
+static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
+ struct iov_iter *i)
+{
+ void *vaddr = (void __force *)addr;
+ size_t len;
+
+ /* TODO: skip the write-back by always using non-temporal stores */
+ len = copy_from_iter_nocache(vaddr, bytes, i);
+
+ if (__iter_needs_pmem_wb(i))
+ __arch_wb_cache_pmem(vaddr, bytes);
+
+ return len;
+}
+
+/**
+ * arch_clear_pmem - zero a PMEM memory range
+ * @addr: virtual start address
+ * @size: number of bytes to zero
+ *
+ * Write zeros into the memory range starting at 'addr' for 'size' bytes.
+ * This function requires explicit ordering with an arch_wmb_pmem() call.
+ */
+static inline void arch_clear_pmem(void __pmem *addr, size_t size)
+{
+ void *vaddr = (void __force *)addr;
+
+ /* TODO: implement the zeroing via non-temporal writes */
+ if (size == PAGE_SIZE && ((unsigned long)vaddr & ~PAGE_MASK) == 0)
+ clear_page(vaddr);
+ else
+ memset(vaddr, 0, size);
+
+ __arch_wb_cache_pmem(vaddr, size);
+}
+
+static inline bool __arch_has_wmb_pmem(void)
+{
+ /*
+ * We require that wmb() be an 'sfence', that is only guaranteed on
+ * 64-bit builds
+ */
+ return static_cpu_has(X86_FEATURE_PCOMMIT);
+}
+#endif /* CONFIG_ARCH_HAS_PMEM_API */
+#endif /* __ASM_X86_PMEM_H__ */
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index dca71714f860..01bcde84d3e4 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -30,12 +30,9 @@ static __always_inline void preempt_count_set(int pc)
/*
* must be macros to avoid header recursion hell
*/
-#define init_task_preempt_count(p) do { \
- task_thread_info(p)->saved_preempt_count = PREEMPT_DISABLED; \
-} while (0)
+#define init_task_preempt_count(p) do { } while (0)
#define init_idle_preempt_count(p, cpu) do { \
- task_thread_info(p)->saved_preempt_count = PREEMPT_ENABLED; \
per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \
} while (0)
@@ -90,9 +87,9 @@ static __always_inline bool __preempt_count_dec_and_test(void)
/*
* Returns true when we need to resched and can (barring IRQ state).
*/
-static __always_inline bool should_resched(void)
+static __always_inline bool should_resched(int preempt_offset)
{
- return unlikely(!raw_cpu_read_4(__preempt_count));
+ return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
}
#ifdef CONFIG_PREEMPT
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 944f1785ed0d..67522256c7ff 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -6,12 +6,12 @@
/* Forward declaration, a strange C thing */
struct task_struct;
struct mm_struct;
+struct vm86;
-#include <asm/vm86.h>
#include <asm/math_emu.h>
#include <asm/segment.h>
#include <asm/types.h>
-#include <asm/sigcontext.h>
+#include <uapi/asm/sigcontext.h>
#include <asm/current.h>
#include <asm/cpufeature.h>
#include <asm/page.h>
@@ -400,15 +400,9 @@ struct thread_struct {
unsigned long cr2;
unsigned long trap_nr;
unsigned long error_code;
-#ifdef CONFIG_X86_32
+#ifdef CONFIG_VM86
/* Virtual 86 mode info */
- struct vm86_struct __user *vm86_info;
- unsigned long screen_bitmap;
- unsigned long v86flags;
- unsigned long v86mask;
- unsigned long saved_sp0;
- unsigned int saved_fs;
- unsigned int saved_gs;
+ struct vm86 *vm86;
#endif
/* IO permissions: */
unsigned long *io_bitmap_ptr;
@@ -562,12 +556,12 @@ static inline unsigned int cpuid_edx(unsigned int op)
}
/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-static inline void rep_nop(void)
+static __always_inline void rep_nop(void)
{
asm volatile("rep; nop" ::: "memory");
}
-static inline void cpu_relax(void)
+static __always_inline void cpu_relax(void)
{
rep_nop();
}
@@ -651,14 +645,6 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr)
extern void set_task_blockstep(struct task_struct *task, bool on);
-/*
- * from system description table in BIOS. Mostly for MCA use, but
- * others may find it useful:
- */
-extern unsigned int machine_id;
-extern unsigned int machine_submodel_id;
-extern unsigned int BIOS_revision;
-
/* Boot loader type from the setup header: */
extern int bootloader_type;
extern int bootloader_version;
@@ -720,7 +706,6 @@ static inline void spin_lock_prefetch(const void *x)
#define INIT_THREAD { \
.sp0 = TOP_OF_INIT_STACK, \
- .vm86_info = NULL, \
.sysenter_cs = __KERNEL_CS, \
.io_bitmap_ptr = NULL, \
}
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 5fabf1362942..6271281f947d 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -88,7 +88,6 @@ extern long syscall_trace_enter_phase2(struct pt_regs *, u32 arch,
unsigned long phase1_result);
extern long syscall_trace_enter(struct pt_regs *);
-extern void syscall_trace_leave(struct pt_regs *);
static inline unsigned long regs_return_value(struct pt_regs *regs)
{
diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h
index 655e07a48f6c..67f08230103a 100644
--- a/arch/x86/include/asm/pvclock-abi.h
+++ b/arch/x86/include/asm/pvclock-abi.h
@@ -41,6 +41,7 @@ struct pvclock_wall_clock {
#define PVCLOCK_TSC_STABLE_BIT (1 << 0)
#define PVCLOCK_GUEST_STOPPED (1 << 1)
+/* PVCLOCK_COUNTS_FROM_ZERO broke ABI and can't be used anymore. */
#define PVCLOCK_COUNTS_FROM_ZERO (1 << 2)
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_PVCLOCK_ABI_H */
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 628954ceede1..7a6bed5c08bc 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -62,7 +62,7 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
static __always_inline
u64 pvclock_get_nsec_offset(const struct pvclock_vcpu_time_info *src)
{
- u64 delta = __native_read_tsc() - src->tsc_timestamp;
+ u64 delta = rdtsc_ordered() - src->tsc_timestamp;
return pvclock_scale_delta(delta, src->tsc_to_system_mul,
src->tsc_shift);
}
@@ -76,13 +76,7 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
u8 ret_flags;
version = src->version;
- /* Note: emulated platforms which do not advertise SSE2 support
- * result in kvmclock not using the necessary RDTSC barriers.
- * Without barriers, it is possible that RDTSC instruction reads from
- * the time stamp counter outside rdtsc_barrier protected section
- * below, resulting in violation of monotonicity.
- */
- rdtsc_barrier();
+
offset = pvclock_get_nsec_offset(src);
ret = src->system_time + offset;
ret_flags = src->flags;
diff --git a/arch/x86/include/asm/qrwlock.h b/arch/x86/include/asm/qrwlock.h
index ae0e241e228b..c537cbb038a7 100644
--- a/arch/x86/include/asm/qrwlock.h
+++ b/arch/x86/include/asm/qrwlock.h
@@ -2,16 +2,6 @@
#define _ASM_X86_QRWLOCK_H
#include <asm-generic/qrwlock_types.h>
-
-#ifndef CONFIG_X86_PPRO_FENCE
-#define queue_write_unlock queue_write_unlock
-static inline void queue_write_unlock(struct qrwlock *lock)
-{
- barrier();
- ACCESS_ONCE(*(u8 *)&lock->cnts) = 0;
-}
-#endif
-
#include <asm-generic/qrwlock.h>
#endif /* _ASM_X86_QRWLOCK_H */
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index 9d51fae1cba3..eaba08076030 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -39,18 +39,27 @@ static inline void queued_spin_unlock(struct qspinlock *lock)
}
#endif
-#define virt_queued_spin_lock virt_queued_spin_lock
-
-static inline bool virt_queued_spin_lock(struct qspinlock *lock)
+#ifdef CONFIG_PARAVIRT
+#define virt_spin_lock virt_spin_lock
+static inline bool virt_spin_lock(struct qspinlock *lock)
{
if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
return false;
- while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0)
- cpu_relax();
+ /*
+ * On hypervisors without PARAVIRT_SPINLOCKS support we fall
+ * back to a Test-and-Set spinlock, because fair locks have
+ * horrible lock 'holder' preemption issues.
+ */
+
+ do {
+ while (atomic_read(&lock->val) != 0)
+ cpu_relax();
+ } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0);
return true;
}
+#endif /* CONFIG_PARAVIRT */
#include <asm-generic/qspinlock.h>
diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h
index 6fe6b182c998..e6cd2c489dbb 100644
--- a/arch/x86/include/asm/sigcontext.h
+++ b/arch/x86/include/asm/sigcontext.h
@@ -1,79 +1,8 @@
#ifndef _ASM_X86_SIGCONTEXT_H
#define _ASM_X86_SIGCONTEXT_H
-#include <uapi/asm/sigcontext.h>
-
-#ifdef __i386__
-struct sigcontext {
- unsigned short gs, __gsh;
- unsigned short fs, __fsh;
- unsigned short es, __esh;
- unsigned short ds, __dsh;
- unsigned long di;
- unsigned long si;
- unsigned long bp;
- unsigned long sp;
- unsigned long bx;
- unsigned long dx;
- unsigned long cx;
- unsigned long ax;
- unsigned long trapno;
- unsigned long err;
- unsigned long ip;
- unsigned short cs, __csh;
- unsigned long flags;
- unsigned long sp_at_signal;
- unsigned short ss, __ssh;
+/* This is a legacy header - all kernel code includes <uapi/asm/sigcontext.h> directly. */
- /*
- * fpstate is really (struct _fpstate *) or (struct _xstate *)
- * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved
- * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end
- * of extended memory layout. See comments at the definition of
- * (struct _fpx_sw_bytes)
- */
- void __user *fpstate; /* zero when no FPU/extended context */
- unsigned long oldmask;
- unsigned long cr2;
-};
-#else /* __i386__ */
-struct sigcontext {
- unsigned long r8;
- unsigned long r9;
- unsigned long r10;
- unsigned long r11;
- unsigned long r12;
- unsigned long r13;
- unsigned long r14;
- unsigned long r15;
- unsigned long di;
- unsigned long si;
- unsigned long bp;
- unsigned long bx;
- unsigned long dx;
- unsigned long ax;
- unsigned long cx;
- unsigned long sp;
- unsigned long ip;
- unsigned long flags;
- unsigned short cs;
- unsigned short __pad2; /* Was called gs, but was always zero. */
- unsigned short __pad1; /* Was called fs, but was always zero. */
- unsigned short ss;
- unsigned long err;
- unsigned long trapno;
- unsigned long oldmask;
- unsigned long cr2;
+#include <uapi/asm/sigcontext.h>
- /*
- * fpstate is really (struct _fpstate *) or (struct _xstate *)
- * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved
- * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end
- * of extended memory layout. See comments at the definition of
- * (struct _fpx_sw_bytes)
- */
- void __user *fpstate; /* zero when no FPU/extended context */
- unsigned long reserved1[8];
-};
-#endif /* !__i386__ */
#endif /* _ASM_X86_SIGCONTEXT_H */
diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h
index 7c7c27c97daa..34edd1650bae 100644
--- a/arch/x86/include/asm/sigframe.h
+++ b/arch/x86/include/asm/sigframe.h
@@ -1,15 +1,14 @@
#ifndef _ASM_X86_SIGFRAME_H
#define _ASM_X86_SIGFRAME_H
-#include <asm/sigcontext.h>
+#include <uapi/asm/sigcontext.h>
#include <asm/siginfo.h>
#include <asm/ucontext.h>
+#include <linux/compat.h>
#ifdef CONFIG_X86_32
#define sigframe_ia32 sigframe
#define rt_sigframe_ia32 rt_sigframe
-#define sigcontext_ia32 sigcontext
-#define _fpstate_ia32 _fpstate
#define ucontext_ia32 ucontext
#else /* !CONFIG_X86_32 */
@@ -23,7 +22,7 @@
struct sigframe_ia32 {
u32 pretcode;
int sig;
- struct sigcontext_ia32 sc;
+ struct sigcontext_32 sc;
/*
* fpstate is unused. fpstate is moved/allocated after
* retcode[] below. This movement allows to have the FP state and the
@@ -32,7 +31,7 @@ struct sigframe_ia32 {
* the offset of extramask[] in the sigframe and thus prevent any
* legacy application accessing/modifying it.
*/
- struct _fpstate_ia32 fpstate_unused;
+ struct _fpstate_32 fpstate_unused;
#ifdef CONFIG_IA32_EMULATION
unsigned int extramask[_COMPAT_NSIG_WORDS-1];
#else /* !CONFIG_IA32_EMULATION */
@@ -69,6 +68,15 @@ struct rt_sigframe {
#ifdef CONFIG_X86_X32_ABI
+struct ucontext_x32 {
+ unsigned int uc_flags;
+ unsigned int uc_link;
+ compat_stack_t uc_stack;
+ unsigned int uc__pad0; /* needed for alignment */
+ struct sigcontext uc_mcontext; /* the 64-bit sigcontext type */
+ compat_sigset_t uc_sigmask; /* mask last for extensibility */
+};
+
struct rt_sigframe_x32 {
u64 pretcode;
struct ucontext_x32 uc;
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 31eab867e6d3..2138c9ae19ee 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -30,11 +30,11 @@ typedef sigset_t compat_sigset_t;
#endif /* __ASSEMBLY__ */
#include <uapi/asm/signal.h>
#ifndef __ASSEMBLY__
-extern void do_notify_resume(struct pt_regs *, void *, __u32);
+extern void do_signal(struct pt_regs *regs);
#define __ARCH_HAS_SA_RESTORER
-#include <asm/sigcontext.h>
+#include <uapi/asm/sigcontext.h>
#ifdef __i386__
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index c2e00bb2a136..58505f01962f 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -72,7 +72,7 @@ static __always_inline void boot_init_stack_canary(void)
* on during the bootup the random pool has true entropy too.
*/
get_random_bytes(&canary, sizeof(canary));
- tsc = __native_read_tsc();
+ tsc = rdtsc();
canary += tsc + (tsc << 32UL);
current->stack_canary = canary;
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index e4661196994e..ff8b9a17dc4b 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -27,12 +27,11 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t
function. */
#define __HAVE_ARCH_MEMCPY 1
+extern void *memcpy(void *to, const void *from, size_t len);
extern void *__memcpy(void *to, const void *from, size_t len);
#ifndef CONFIG_KMEMCHECK
-#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4
-extern void *memcpy(void *to, const void *from, size_t len);
-#else
+#if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4
#define memcpy(dst, src, len) \
({ \
size_t __len = (len); \
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index d6a756ae04c8..999b7cd2e78c 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -20,9 +20,21 @@
#include <asm/thread_info.h> /* for TS_COMPAT */
#include <asm/unistd.h>
-typedef void (*sys_call_ptr_t)(void);
+typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long,
+ unsigned long, unsigned long,
+ unsigned long, unsigned long);
extern const sys_call_ptr_t sys_call_table[];
+#if defined(CONFIG_X86_32)
+#define ia32_sys_call_table sys_call_table
+#define __NR_syscall_compat_max __NR_syscall_max
+#define IA32_NR_syscalls NR_syscalls
+#endif
+
+#if defined(CONFIG_IA32_EMULATION)
+extern const sys_call_ptr_t ia32_sys_call_table[];
+#endif
+
/*
* Only the low 32 bits of orig_ax are meaningful, so we return int.
* This importantly ignores the high bits on 64-bit, so comparisons
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 592a6a672e07..91dfcafe27a6 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -37,6 +37,7 @@ asmlinkage long sys_get_thread_area(struct user_desc __user *);
asmlinkage unsigned long sys_sigreturn(void);
/* kernel/vm86_32.c */
+struct vm86_struct;
asmlinkage long sys_vm86old(struct vm86_struct __user *);
asmlinkage long sys_vm86(unsigned long, unsigned long);
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 225ee545e1a0..c7b551028740 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -27,14 +27,17 @@
* Without this offset, that can result in a page fault. (We are
* careful that, in this case, the value we read doesn't matter.)
*
- * In vm86 mode, the hardware frame is much longer still, but we neither
- * access the extra members from NMI context, nor do we write such a
- * frame at sp0 at all.
+ * In vm86 mode, the hardware frame is much longer still, so add 16
+ * bytes to make room for the real-mode segments.
*
* x86_64 has a fixed-length stack frame.
*/
#ifdef CONFIG_X86_32
-# define TOP_OF_KERNEL_STACK_PADDING 8
+# ifdef CONFIG_VM86
+# define TOP_OF_KERNEL_STACK_PADDING 16
+# else
+# define TOP_OF_KERNEL_STACK_PADDING 8
+# endif
#else
# define TOP_OF_KERNEL_STACK_PADDING 0
#endif
@@ -54,9 +57,7 @@ struct thread_info {
__u32 flags; /* low level flags */
__u32 status; /* thread synchronous flags */
__u32 cpu; /* current CPU */
- int saved_preempt_count;
mm_segment_t addr_limit;
- void __user *sysenter_return;
unsigned int sig_on_uaccess_error:1;
unsigned int uaccess_err:1; /* uaccess failed */
};
@@ -66,7 +67,6 @@ struct thread_info {
.task = &tsk, \
.flags = 0, \
.cpu = 0, \
- .saved_preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \
}
@@ -140,27 +140,11 @@ struct thread_info {
_TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \
_TIF_NOHZ)
-/* work to do in syscall_trace_leave() */
-#define _TIF_WORK_SYSCALL_EXIT \
- (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \
- _TIF_SYSCALL_TRACEPOINT | _TIF_NOHZ)
-
-/* work to do on interrupt/exception return */
-#define _TIF_WORK_MASK \
- (0x0000FFFF & \
- ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT| \
- _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU))
-
/* work to do on any return to user space */
#define _TIF_ALLWORK_MASK \
((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \
_TIF_NOHZ)
-/* Only used for 64 bit */
-#define _TIF_DO_NOTIFY_MASK \
- (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \
- _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE)
-
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
(_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index cd791948b286..6df2029405a3 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -261,6 +261,12 @@ static inline void reset_lazy_tlbstate(void)
#endif /* SMP */
+/* Not inlined due to inc_irq_stat not being defined yet */
+#define flush_tlb_local() { \
+ inc_irq_stat(irq_tlb_count); \
+ local_flush_tlb(); \
+}
+
#ifndef CONFIG_PARAVIRT
#define flush_tlb_others(mask, mm, start, end) \
native_flush_tlb_others(mask, mm, start, end)
diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h
index 173dd3ba108c..0f492fc50bce 100644
--- a/arch/x86/include/asm/trace/mpx.h
+++ b/arch/x86/include/asm/trace/mpx.h
@@ -11,7 +11,7 @@
TRACE_EVENT(mpx_bounds_register_exception,
TP_PROTO(void *addr_referenced,
- const struct bndreg *bndreg),
+ const struct mpx_bndreg *bndreg),
TP_ARGS(addr_referenced, bndreg),
TP_STRUCT__entry(
@@ -44,7 +44,7 @@ TRACE_EVENT(mpx_bounds_register_exception,
TRACE_EVENT(bounds_exception_mpx,
- TP_PROTO(const struct bndcsr *bndcsr),
+ TP_PROTO(const struct mpx_bndcsr *bndcsr),
TP_ARGS(bndcsr),
TP_STRUCT__entry(
@@ -116,7 +116,8 @@ TRACE_EVENT(mpx_new_bounds_table,
/*
* This gets used outside of MPX-specific code, so we need a stub.
*/
-static inline void trace_bounds_exception_mpx(const struct bndcsr *bndcsr)
+static inline
+void trace_bounds_exception_mpx(const struct mpx_bndcsr *bndcsr)
{
}
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index c5380bea2a36..c3496619740a 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -112,8 +112,8 @@ asmlinkage void smp_threshold_interrupt(void);
asmlinkage void smp_deferred_error_interrupt(void);
#endif
-extern enum ctx_state ist_enter(struct pt_regs *regs);
-extern void ist_exit(struct pt_regs *regs, enum ctx_state prev_state);
+extern void ist_enter(struct pt_regs *regs);
+extern void ist_exit(struct pt_regs *regs);
extern void ist_begin_non_atomic(struct pt_regs *regs);
extern void ist_end_non_atomic(void);
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 94605c0e9cee..6d7c5479bcea 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -21,28 +21,12 @@ extern void disable_TSC(void);
static inline cycles_t get_cycles(void)
{
- unsigned long long ret = 0;
-
#ifndef CONFIG_X86_TSC
if (!cpu_has_tsc)
return 0;
#endif
- rdtscll(ret);
-
- return ret;
-}
-static __always_inline cycles_t vget_cycles(void)
-{
- /*
- * We only do VDSOs on TSC capable CPUs, so this shouldn't
- * access boot_cpu_data (which is not VDSO-safe):
- */
-#ifndef CONFIG_X86_TSC
- if (!cpu_has_tsc)
- return 0;
-#endif
- return (cycles_t)__native_read_tsc();
+ return rdtsc();
}
extern void tsc_init(void);
@@ -51,6 +35,7 @@ extern int unsynchronized_tsc(void);
extern int check_tsc_unstable(void);
extern int check_tsc_disabled(void);
extern unsigned long native_calibrate_tsc(void);
+extern unsigned long long native_sched_clock_from_tsc(u64 tsc);
extern int tsc_clocksource_reliable;
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index a8df874f3e88..09b1b0ab94b7 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -51,13 +51,13 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
* limit, not add it to the address).
*/
if (__builtin_constant_p(size))
- return addr > limit - size;
+ return unlikely(addr > limit - size);
/* Arbitrary sizes? Be careful about overflow */
addr += size;
- if (addr < size)
+ if (unlikely(addr < size))
return true;
- return addr > limit;
+ return unlikely(addr > limit);
}
#define __range_not_ok(addr, size, limit) \
@@ -182,7 +182,7 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
: "=a" (__ret_gu), "=r" (__val_gu) \
: "0" (ptr), "i" (sizeof(*(ptr)))); \
(x) = (__force __typeof__(*(ptr))) __val_gu; \
- __ret_gu; \
+ __builtin_expect(__ret_gu, 0); \
})
#define __put_user_x(size, x, ptr, __ret_pu) \
@@ -278,7 +278,7 @@ extern void __put_user_8(void);
__put_user_x(X, __pu_val, ptr, __ret_pu); \
break; \
} \
- __ret_pu; \
+ __builtin_expect(__ret_pu, 0); \
})
#define __put_user_size(x, ptr, size, retval, errret) \
@@ -401,7 +401,7 @@ do { \
({ \
int __pu_err; \
__put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \
- __pu_err; \
+ __builtin_expect(__pu_err, 0); \
})
#define __get_user_nocheck(x, ptr, size) \
@@ -410,7 +410,7 @@ do { \
unsigned long __gu_val; \
__get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
- __gu_err; \
+ __builtin_expect(__gu_err, 0); \
})
/* FIXME: this hack is definitely wrong -AK */
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index a00ad8f2a657..ea7074784cc4 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -609,7 +609,7 @@ struct uv_cpu_nmi_s {
DECLARE_PER_CPU(struct uv_cpu_nmi_s, uv_cpu_nmi);
-#define uv_hub_nmi (uv_cpu_nmi.hub)
+#define uv_hub_nmi this_cpu_read(uv_cpu_nmi.hub)
#define uv_cpu_nmi_per(cpu) (per_cpu(uv_cpu_nmi, cpu))
#define uv_hub_nmi_per(cpu) (uv_cpu_nmi_per(cpu).hub)
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 8021bd28c0f1..756de9190aec 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -26,7 +26,7 @@ struct vdso_image {
long sym___kernel_sigreturn;
long sym___kernel_rt_sigreturn;
long sym___kernel_vsyscall;
- long sym_VDSO32_SYSENTER_RETURN;
+ long sym_int80_landing_pad;
};
#ifdef CONFIG_X86_64
@@ -38,13 +38,7 @@ extern const struct vdso_image vdso_image_x32;
#endif
#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
-extern const struct vdso_image vdso_image_32_int80;
-#ifdef CONFIG_COMPAT
-extern const struct vdso_image vdso_image_32_syscall;
-#endif
-extern const struct vdso_image vdso_image_32_sysenter;
-
-extern const struct vdso_image *selected_vdso32;
+extern const struct vdso_image vdso_image_32;
#endif
extern void __init init_vdso_image(const struct vdso_image *image);
diff --git a/arch/x86/include/asm/vm86.h b/arch/x86/include/asm/vm86.h
index 1d8de3f3feca..1e491f3af317 100644
--- a/arch/x86/include/asm/vm86.h
+++ b/arch/x86/include/asm/vm86.h
@@ -1,7 +1,6 @@
#ifndef _ASM_X86_VM86_H
#define _ASM_X86_VM86_H
-
#include <asm/ptrace.h>
#include <uapi/asm/vm86.h>
@@ -28,43 +27,49 @@ struct kernel_vm86_regs {
unsigned short gs, __gsh;
};
-struct kernel_vm86_struct {
- struct kernel_vm86_regs regs;
-/*
- * the below part remains on the kernel stack while we are in VM86 mode.
- * 'tss.esp0' then contains the address of VM86_TSS_ESP0 below, and when we
- * get forced back from VM86, the CPU and "SAVE_ALL" will restore the above
- * 'struct kernel_vm86_regs' with the then actual values.
- * Therefore, pt_regs in fact points to a complete 'kernel_vm86_struct'
- * in kernelspace, hence we need not reget the data from userspace.
- */
-#define VM86_TSS_ESP0 flags
+struct vm86 {
+ struct vm86plus_struct __user *user_vm86;
+ struct pt_regs regs32;
+ unsigned long veflags;
+ unsigned long veflags_mask;
+ unsigned long saved_sp0;
+
unsigned long flags;
unsigned long screen_bitmap;
unsigned long cpu_type;
struct revectored_struct int_revectored;
struct revectored_struct int21_revectored;
struct vm86plus_info_struct vm86plus;
- struct pt_regs *regs32; /* here we save the pointer to the old regs */
-/*
- * The below is not part of the structure, but the stack layout continues
- * this way. In front of 'return-eip' may be some data, depending on
- * compilation, so we don't rely on this and save the pointer to 'oldregs'
- * in 'regs32' above.
- * However, with GCC-2.7.2 and the current CFLAGS you see exactly this:
-
- long return-eip; from call to vm86()
- struct pt_regs oldregs; user space registers as saved by syscall
- */
};
#ifdef CONFIG_VM86
void handle_vm86_fault(struct kernel_vm86_regs *, long);
int handle_vm86_trap(struct kernel_vm86_regs *, long, int);
-struct pt_regs *save_v86_state(struct kernel_vm86_regs *);
+void save_v86_state(struct kernel_vm86_regs *, int);
struct task_struct;
+
+#define free_vm86(t) do { \
+ struct thread_struct *__t = (t); \
+ if (__t->vm86 != NULL) { \
+ kfree(__t->vm86); \
+ __t->vm86 = NULL; \
+ } \
+} while (0)
+
+/*
+ * Support for VM86 programs to request interrupts for
+ * real mode hardware drivers:
+ */
+#define FIRST_VM86_IRQ 3
+#define LAST_VM86_IRQ 15
+
+static inline int invalid_vm86_irq(int irq)
+{
+ return irq < FIRST_VM86_IRQ || irq > LAST_VM86_IRQ;
+}
+
void release_vm86_irqs(struct task_struct *);
#else
@@ -77,6 +82,10 @@ static inline int handle_vm86_trap(struct kernel_vm86_regs *a, long b, int c)
return 0;
}
+static inline void save_v86_state(struct kernel_vm86_regs *a, int b) { }
+
+#define free_vm86(t) do { } while(0)
+
#endif /* CONFIG_VM86 */
#endif /* _ASM_X86_VM86_H */
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index da772edd19ab..aa336ff3e03e 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -47,6 +47,7 @@
#define CPU_BASED_MOV_DR_EXITING 0x00800000
#define CPU_BASED_UNCOND_IO_EXITING 0x01000000
#define CPU_BASED_USE_IO_BITMAPS 0x02000000
+#define CPU_BASED_MONITOR_TRAP_FLAG 0x08000000
#define CPU_BASED_USE_MSR_BITMAPS 0x10000000
#define CPU_BASED_MONITOR_EXITING 0x20000000
#define CPU_BASED_PAUSE_EXITING 0x40000000
@@ -71,7 +72,7 @@
#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
#define SECONDARY_EXEC_ENABLE_PML 0x00020000
#define SECONDARY_EXEC_XSAVES 0x00100000
-
+#define SECONDARY_EXEC_PCOMMIT 0x00200000
#define PIN_BASED_EXT_INTR_MASK 0x00000001
#define PIN_BASED_NMI_EXITING 0x00000008
@@ -367,29 +368,29 @@ enum vmcs_field {
#define TYPE_PHYSICAL_APIC_EVENT (10 << 12)
#define TYPE_PHYSICAL_APIC_INST (15 << 12)
-/* segment AR */
-#define SEGMENT_AR_L_MASK (1 << 13)
-
-#define AR_TYPE_ACCESSES_MASK 1
-#define AR_TYPE_READABLE_MASK (1 << 1)
-#define AR_TYPE_WRITEABLE_MASK (1 << 2)
-#define AR_TYPE_CODE_MASK (1 << 3)
-#define AR_TYPE_MASK 0x0f
-#define AR_TYPE_BUSY_64_TSS 11
-#define AR_TYPE_BUSY_32_TSS 11
-#define AR_TYPE_BUSY_16_TSS 3
-#define AR_TYPE_LDT 2
-
-#define AR_UNUSABLE_MASK (1 << 16)
-#define AR_S_MASK (1 << 4)
-#define AR_P_MASK (1 << 7)
-#define AR_L_MASK (1 << 13)
-#define AR_DB_MASK (1 << 14)
-#define AR_G_MASK (1 << 15)
-#define AR_DPL_SHIFT 5
-#define AR_DPL(ar) (((ar) >> AR_DPL_SHIFT) & 3)
-
-#define AR_RESERVD_MASK 0xfffe0f00
+/* segment AR in VMCS -- these are different from what LAR reports */
+#define VMX_SEGMENT_AR_L_MASK (1 << 13)
+
+#define VMX_AR_TYPE_ACCESSES_MASK 1
+#define VMX_AR_TYPE_READABLE_MASK (1 << 1)
+#define VMX_AR_TYPE_WRITEABLE_MASK (1 << 2)
+#define VMX_AR_TYPE_CODE_MASK (1 << 3)
+#define VMX_AR_TYPE_MASK 0x0f
+#define VMX_AR_TYPE_BUSY_64_TSS 11
+#define VMX_AR_TYPE_BUSY_32_TSS 11
+#define VMX_AR_TYPE_BUSY_16_TSS 3
+#define VMX_AR_TYPE_LDT 2
+
+#define VMX_AR_UNUSABLE_MASK (1 << 16)
+#define VMX_AR_S_MASK (1 << 4)
+#define VMX_AR_P_MASK (1 << 7)
+#define VMX_AR_L_MASK (1 << 13)
+#define VMX_AR_DB_MASK (1 << 14)
+#define VMX_AR_G_MASK (1 << 15)
+#define VMX_AR_DPL_SHIFT 5
+#define VMX_AR_DPL(ar) (((ar) >> VMX_AR_DPL_SHIFT) & 3)
+
+#define VMX_AR_RESERVD_MASK 0xfffe0f00
#define TSS_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 0)
#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 1)
@@ -415,6 +416,7 @@ enum vmcs_field {
#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25)
#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26)
+#define VMX_VPID_INVVPID_BIT (1ull << 0) /* (32 - 32) */
#define VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT (1ull << 9) /* (41 - 32) */
#define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */
diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h
index 608a79d5a466..e6911caf5bbf 100644
--- a/arch/x86/include/asm/xen/events.h
+++ b/arch/x86/include/asm/xen/events.h
@@ -20,4 +20,15 @@ static inline int xen_irqs_disabled(struct pt_regs *regs)
/* No need for a barrier -- XCHG is a barrier on x86. */
#define xchg_xen_ulong(ptr, val) xchg((ptr), (val))
+extern int xen_have_vector_callback;
+
+/*
+ * Events delivered via platform PCI interrupts are always
+ * routed to vcpu 0 and hence cannot be rebound.
+ */
+static inline bool xen_support_evtchn_rebind(void)
+{
+ return (!xen_hvm_domain() || xen_have_vector_callback);
+}
+
#endif /* _ASM_X86_XEN_EVENTS_H */
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index ca08a27b90b3..4c20dd333412 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -336,10 +336,10 @@ HYPERVISOR_update_descriptor(u64 ma, u64 desc)
return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32);
}
-static inline int
+static inline long
HYPERVISOR_memory_op(unsigned int cmd, void *arg)
{
- return _hypercall2(int, memory_op, cmd, arg);
+ return _hypercall2(long, memory_op, cmd, arg);
}
static inline int
@@ -465,6 +465,12 @@ HYPERVISOR_tmem_op(
return _hypercall1(int, tmem_op, op);
}
+static inline int
+HYPERVISOR_xenpmu_op(unsigned int op, void *arg)
+{
+ return _hypercall2(int, xenpmu_op, op, arg);
+}
+
static inline void
MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
{
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index d866959e5685..8b2d4bea9962 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -57,4 +57,9 @@ static inline bool xen_x2apic_para_available(void)
}
#endif
+#ifdef CONFIG_HOTPLUG_CPU
+void xen_arch_register_cpu(int num);
+void xen_arch_unregister_cpu(int num);
+#endif
+
#endif /* _ASM_X86_XEN_HYPERVISOR_H */
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index 3400dbaec3c3..62ca03ef5c65 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -3,12 +3,38 @@
*
* Guest OS interface to x86 Xen.
*
- * Copyright (c) 2004, K A Fraser
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004-2006, K A Fraser
*/
#ifndef _ASM_X86_XEN_INTERFACE_H
#define _ASM_X86_XEN_INTERFACE_H
+/*
+ * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field
+ * in a struct in memory.
+ * XEN_GUEST_HANDLE_PARAM represent a guest pointer, when passed as an
+ * hypercall argument.
+ * XEN_GUEST_HANDLE_PARAM and XEN_GUEST_HANDLE are the same on X86 but
+ * they might not be on other architectures.
+ */
#ifdef __XEN__
#define __DEFINE_GUEST_HANDLE(name, type) \
typedef struct { type *p; } __guest_handle_ ## name
@@ -88,13 +114,16 @@ DEFINE_GUEST_HANDLE(xen_ulong_t);
* start of the GDT because some stupid OSes export hard-coded selector values
* in their ABI. These hard-coded values are always near the start of the GDT,
* so Xen places itself out of the way, at the far end of the GDT.
+ *
+ * NB The LDT is set using the MMUEXT_SET_LDT op of HYPERVISOR_mmuext_op
*/
#define FIRST_RESERVED_GDT_PAGE 14
#define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096)
#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8)
/*
- * Send an array of these to HYPERVISOR_set_trap_table()
+ * Send an array of these to HYPERVISOR_set_trap_table().
+ * Terminate the array with a sentinel entry, with traps[].address==0.
* The privilege level specifies which modes may enter a trap via a software
* interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate
* privilege levels as follows:
@@ -118,10 +147,41 @@ struct trap_info {
DEFINE_GUEST_HANDLE_STRUCT(trap_info);
struct arch_shared_info {
- unsigned long max_pfn; /* max pfn that appears in table */
- /* Frame containing list of mfns containing list of mfns containing p2m. */
- unsigned long pfn_to_mfn_frame_list_list;
- unsigned long nmi_reason;
+ /*
+ * Number of valid entries in the p2m table(s) anchored at
+ * pfn_to_mfn_frame_list_list and/or p2m_vaddr.
+ */
+ unsigned long max_pfn;
+ /*
+ * Frame containing list of mfns containing list of mfns containing p2m.
+ * A value of 0 indicates it has not yet been set up, ~0 indicates it
+ * has been set to invalid e.g. due to the p2m being too large for the
+ * 3-level p2m tree. In this case the linear mapper p2m list anchored
+ * at p2m_vaddr is to be used.
+ */
+ xen_pfn_t pfn_to_mfn_frame_list_list;
+ unsigned long nmi_reason;
+ /*
+ * Following three fields are valid if p2m_cr3 contains a value
+ * different from 0.
+ * p2m_cr3 is the root of the address space where p2m_vaddr is valid.
+ * p2m_cr3 is in the same format as a cr3 value in the vcpu register
+ * state and holds the folded machine frame number (via xen_pfn_to_cr3)
+ * of a L3 or L4 page table.
+ * p2m_vaddr holds the virtual address of the linear p2m list. All
+ * entries in the range [0...max_pfn[ are accessible via this pointer.
+ * p2m_generation will be incremented by the guest before and after each
+ * change of the mappings of the p2m list. p2m_generation starts at 0
+ * and a value with the least significant bit set indicates that a
+ * mapping update is in progress. This allows guest external software
+ * (e.g. in Dom0) to verify that read mappings are consistent and
+ * whether they have changed since the last check.
+ * Modifying a p2m element in the linear p2m list is allowed via an
+ * atomic write only.
+ */
+ unsigned long p2m_cr3; /* cr3 value of the p2m address space */
+ unsigned long p2m_vaddr; /* virtual address of the p2m list */
+ unsigned long p2m_generation; /* generation count of p2m mapping */
};
#endif /* !__ASSEMBLY__ */
@@ -137,13 +197,31 @@ struct arch_shared_info {
/*
* The following is all CPU context. Note that the fpu_ctxt block is filled
* in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
+ *
+ * Also note that when calling DOMCTL_setvcpucontext and VCPU_initialise
+ * for HVM and PVH guests, not all information in this structure is updated:
+ *
+ * - For HVM guests, the structures read include: fpu_ctxt (if
+ * VGCT_I387_VALID is set), flags, user_regs, debugreg[*]
+ *
+ * - PVH guests are the same as HVM guests, but additionally use ctrlreg[3] to
+ * set cr3. All other fields not used should be set to 0.
*/
struct vcpu_guest_context {
/* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */
-#define VGCF_I387_VALID (1<<0)
-#define VGCF_HVM_GUEST (1<<1)
-#define VGCF_IN_KERNEL (1<<2)
+#define VGCF_I387_VALID (1<<0)
+#define VGCF_IN_KERNEL (1<<2)
+#define _VGCF_i387_valid 0
+#define VGCF_i387_valid (1<<_VGCF_i387_valid)
+#define _VGCF_in_kernel 2
+#define VGCF_in_kernel (1<<_VGCF_in_kernel)
+#define _VGCF_failsafe_disables_events 3
+#define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events)
+#define _VGCF_syscall_disables_events 4
+#define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events)
+#define _VGCF_online 5
+#define VGCF_online (1<<_VGCF_online)
unsigned long flags; /* VGCF_* flags */
struct cpu_user_regs user_regs; /* User-level CPU registers */
struct trap_info trap_ctxt[256]; /* Virtual IDT */
@@ -172,6 +250,129 @@ struct vcpu_guest_context {
#endif
};
DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context);
+
+/* AMD PMU registers and structures */
+struct xen_pmu_amd_ctxt {
+ /*
+ * Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd).
+ * For PV(H) guests these fields are RO.
+ */
+ uint32_t counters;
+ uint32_t ctrls;
+
+ /* Counter MSRs */
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+ uint64_t regs[];
+#elif defined(__GNUC__)
+ uint64_t regs[0];
+#endif
+};
+
+/* Intel PMU registers and structures */
+struct xen_pmu_cntr_pair {
+ uint64_t counter;
+ uint64_t control;
+};
+
+struct xen_pmu_intel_ctxt {
+ /*
+ * Offsets to fixed and architectural counter MSRs (relative to
+ * xen_pmu_arch.c.intel).
+ * For PV(H) guests these fields are RO.
+ */
+ uint32_t fixed_counters;
+ uint32_t arch_counters;
+
+ /* PMU registers */
+ uint64_t global_ctrl;
+ uint64_t global_ovf_ctrl;
+ uint64_t global_status;
+ uint64_t fixed_ctrl;
+ uint64_t ds_area;
+ uint64_t pebs_enable;
+ uint64_t debugctl;
+
+ /* Fixed and architectural counter MSRs */
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+ uint64_t regs[];
+#elif defined(__GNUC__)
+ uint64_t regs[0];
+#endif
+};
+
+/* Sampled domain's registers */
+struct xen_pmu_regs {
+ uint64_t ip;
+ uint64_t sp;
+ uint64_t flags;
+ uint16_t cs;
+ uint16_t ss;
+ uint8_t cpl;
+ uint8_t pad[3];
+};
+
+/* PMU flags */
+#define PMU_CACHED (1<<0) /* PMU MSRs are cached in the context */
+#define PMU_SAMPLE_USER (1<<1) /* Sample is from user or kernel mode */
+#define PMU_SAMPLE_REAL (1<<2) /* Sample is from realmode */
+#define PMU_SAMPLE_PV (1<<3) /* Sample from a PV guest */
+
+/*
+ * Architecture-specific information describing state of the processor at
+ * the time of PMU interrupt.
+ * Fields of this structure marked as RW for guest should only be written by
+ * the guest when PMU_CACHED bit in pmu_flags is set (which is done by the
+ * hypervisor during PMU interrupt). Hypervisor will read updated data in
+ * XENPMU_flush hypercall and clear PMU_CACHED bit.
+ */
+struct xen_pmu_arch {
+ union {
+ /*
+ * Processor's registers at the time of interrupt.
+ * WO for hypervisor, RO for guests.
+ */
+ struct xen_pmu_regs regs;
+ /*
+ * Padding for adding new registers to xen_pmu_regs in
+ * the future
+ */
+#define XENPMU_REGS_PAD_SZ 64
+ uint8_t pad[XENPMU_REGS_PAD_SZ];
+ } r;
+
+ /* WO for hypervisor, RO for guest */
+ uint64_t pmu_flags;
+
+ /*
+ * APIC LVTPC register.
+ * RW for both hypervisor and guest.
+ * Only APIC_LVT_MASKED bit is loaded by the hypervisor into hardware
+ * during XENPMU_flush or XENPMU_lvtpc_set.
+ */
+ union {
+ uint32_t lapic_lvtpc;
+ uint64_t pad;
+ } l;
+
+ /*
+ * Vendor-specific PMU registers.
+ * RW for both hypervisor and guest (see exceptions above).
+ * Guest's updates to this field are verified and then loaded by the
+ * hypervisor into hardware during XENPMU_flush
+ */
+ union {
+ struct xen_pmu_amd_ctxt amd;
+ struct xen_pmu_intel_ctxt intel;
+
+ /*
+ * Padding for contexts (fixed parts only, does not include
+ * MSR banks that are specified by offsets)
+ */
+#define XENPMU_CTXT_PAD_SZ 128
+ uint8_t pad[XENPMU_CTXT_PAD_SZ];
+ } c;
+};
+
#endif /* !__ASSEMBLY__ */
/*
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index c44a5d53e464..f5fb840b43e8 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -12,7 +12,7 @@
#include <asm/pgtable.h>
#include <xen/interface/xen.h>
-#include <xen/grant_table.h>
+#include <xen/interface/grant_table.h>
#include <xen/features.h>
/* Xen machine address */
@@ -35,9 +35,7 @@ typedef struct xpaddr {
#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT)
#define IDENTITY_FRAME(m) ((m) | IDENTITY_FRAME_BIT)
-/* Maximum amount of memory we can handle in a domain in pages */
-#define MAX_DOMAIN_PAGES \
- ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE))
+#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
extern unsigned long *machine_to_phys_mapping;
extern unsigned long machine_to_phys_nr;
@@ -45,11 +43,13 @@ extern unsigned long *xen_p2m_addr;
extern unsigned long xen_p2m_size;
extern unsigned long xen_max_p2m_pfn;
+extern int xen_alloc_p2m_entry(unsigned long pfn);
+
extern unsigned long get_phys_to_machine(unsigned long pfn);
extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
-extern unsigned long set_phys_range_identity(unsigned long pfn_s,
- unsigned long pfn_e);
+extern unsigned long __init set_phys_range_identity(unsigned long pfn_s,
+ unsigned long pfn_e);
extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
struct gnttab_map_grant_ref *kmap_ops,
@@ -103,6 +103,11 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn)
{
unsigned long mfn;
+ /*
+ * Some x86 code are still using pfn_to_mfn instead of
+ * pfn_to_mfn. This will have to be removed when we figured
+ * out which call.
+ */
if (xen_feature(XENFEAT_auto_translated_physmap))
return pfn;
@@ -149,6 +154,11 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
{
unsigned long pfn;
+ /*
+ * Some x86 code are still using mfn_to_pfn instead of
+ * gfn_to_pfn. This will have to be removed when we figure
+ * out which call.
+ */
if (xen_feature(XENFEAT_auto_translated_physmap))
return mfn;
@@ -178,6 +188,27 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine)
return XPADDR(PFN_PHYS(mfn_to_pfn(PFN_DOWN(machine.maddr))) | offset);
}
+/* Pseudo-physical <-> Guest conversion */
+static inline unsigned long pfn_to_gfn(unsigned long pfn)
+{
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return pfn;
+ else
+ return pfn_to_mfn(pfn);
+}
+
+static inline unsigned long gfn_to_pfn(unsigned long gfn)
+{
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return gfn;
+ else
+ return mfn_to_pfn(gfn);
+}
+
+/* Pseudo-physical <-> Bus conversion */
+#define pfn_to_bfn(pfn) pfn_to_gfn(pfn)
+#define bfn_to_pfn(bfn) gfn_to_pfn(bfn)
+
/*
* We detect special mappings in one of two ways:
* 1. If the MFN is an I/O page then Xen will set the m2p entry
@@ -198,7 +229,7 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine)
* require. In all the cases we care about, the FOREIGN_FRAME bit is
* masked (e.g., pfn_to_mfn()) so behaviour there is correct.
*/
-static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
+static inline unsigned long bfn_to_local_pfn(unsigned long mfn)
{
unsigned long pfn;
@@ -217,6 +248,10 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
#define virt_to_mfn(v) (pfn_to_mfn(virt_to_pfn(v)))
#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT))
+/* VIRT <-> GUEST conversion */
+#define virt_to_gfn(v) (pfn_to_gfn(virt_to_pfn(v)))
+#define gfn_to_virt(g) (__va(gfn_to_pfn(g) << PAGE_SHIFT))
+
static inline unsigned long pte_mfn(pte_t pte)
{
return (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT;
@@ -263,8 +298,8 @@ void make_lowmem_page_readwrite(void *vaddr);
#define xen_unmap(cookie) iounmap((cookie))
static inline bool xen_arch_need_swiotlb(struct device *dev,
- unsigned long pfn,
- unsigned long mfn)
+ phys_addr_t phys,
+ dma_addr_t dev_addr)
{
return false;
}
diff --git a/arch/x86/include/uapi/asm/bitsperlong.h b/arch/x86/include/uapi/asm/bitsperlong.h
index b0ae1c4dc791..217909b4d6f5 100644
--- a/arch/x86/include/uapi/asm/bitsperlong.h
+++ b/arch/x86/include/uapi/asm/bitsperlong.h
@@ -1,7 +1,7 @@
#ifndef __ASM_X86_BITSPERLONG_H
#define __ASM_X86_BITSPERLONG_H
-#ifdef __x86_64__
+#if defined(__x86_64__) && !defined(__ILP32__)
# define __BITS_PER_LONG 64
#else
# define __BITS_PER_LONG 32
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index ab456dc233b5..329254373479 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -120,7 +120,7 @@ struct boot_params {
__u8 _pad3[16]; /* 0x070 */
__u8 hd0_info[16]; /* obsolete! */ /* 0x080 */
__u8 hd1_info[16]; /* obsolete! */ /* 0x090 */
- struct sys_desc_table sys_desc_table; /* 0x0a0 */
+ struct sys_desc_table sys_desc_table; /* obsolete! */ /* 0x0a0 */
struct olpc_ofw_header olpc_ofw_header; /* 0x0b0 */
__u32 ext_ramdisk_image; /* 0x0c0 */
__u32 ext_ramdisk_size; /* 0x0c4 */
diff --git a/arch/x86/include/uapi/asm/e820.h b/arch/x86/include/uapi/asm/e820.h
index 0f457e6eab18..9dafe59cf6e2 100644
--- a/arch/x86/include/uapi/asm/e820.h
+++ b/arch/x86/include/uapi/asm/e820.h
@@ -37,7 +37,7 @@
/*
* This is a non-standardized way to represent ADR or NVDIMM regions that
* persist over a reboot. The kernel will ignore their special capabilities
- * unless the CONFIG_X86_PMEM_LEGACY=y option is set.
+ * unless the CONFIG_X86_PMEM_LEGACY option is set.
*
* ( Note that older platforms also used 6 for the same type of memory,
* but newer versions switched to 12 as 6 was assigned differently. Some
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index f36d56bd7632..040d4083c24f 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -27,6 +27,8 @@
#define HV_X64_MSR_VP_RUNTIME_AVAILABLE (1 << 0)
/* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
#define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1)
+/* Partition reference TSC MSR is available */
+#define HV_X64_MSR_REFERENCE_TSC_AVAILABLE (1 << 9)
/* A partition's reference time stamp counter (TSC) page */
#define HV_X64_MSR_REFERENCE_TSC 0x40000021
@@ -151,6 +153,12 @@
/* MSR used to provide vcpu index */
#define HV_X64_MSR_VP_INDEX 0x40000002
+/* MSR used to reset the guest OS. */
+#define HV_X64_MSR_RESET 0x40000003
+
+/* MSR used to provide vcpu runtime in 100ns units */
+#define HV_X64_MSR_VP_RUNTIME 0x40000010
+
/* MSR used to read the per-partition time reference counter */
#define HV_X64_MSR_TIME_REF_COUNT 0x40000020
@@ -249,4 +257,16 @@ typedef struct _HV_REFERENCE_TSC_PAGE {
__s64 tsc_offset;
} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
+/* Define the number of synthetic interrupt sources. */
+#define HV_SYNIC_SINT_COUNT (16)
+/* Define the expected SynIC version. */
+#define HV_SYNIC_VERSION_1 (0x1)
+
+#define HV_SYNIC_CONTROL_ENABLE (1ULL << 0)
+#define HV_SYNIC_SIMP_ENABLE (1ULL << 0)
+#define HV_SYNIC_SIEFP_ENABLE (1ULL << 0)
+#define HV_SYNIC_SINT_MASKED (1ULL << 16)
+#define HV_SYNIC_SINT_AUTO_EOI (1ULL << 17)
+#define HV_SYNIC_SINT_VECTOR_MASK (0xFF)
+
#endif
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index a0eab85ce7b8..03429da2fa80 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -2,7 +2,7 @@
#define _UAPI_ASM_X86_MCE_H
#include <linux/types.h>
-#include <asm/ioctls.h>
+#include <linux/ioctl.h>
/* Fields are zero when not available */
struct mce {
@@ -15,7 +15,8 @@ struct mce {
__u64 time; /* wall time_t when error was detected */
__u8 cpuvendor; /* cpu vendor as encoded in system.h */
__u8 inject_flags; /* software inject flags */
- __u16 pad;
+ __u8 severity;
+ __u8 usable_addr;
__u32 cpuid; /* CPUID 1 EAX */
__u8 cs; /* code segment */
__u8 bank; /* machine check bank */
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index 180a0c3c224d..79887abcb5e1 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -37,8 +37,6 @@
#define X86_EFLAGS_VM _BITUL(X86_EFLAGS_VM_BIT)
#define X86_EFLAGS_AC_BIT 18 /* Alignment Check/Access Control */
#define X86_EFLAGS_AC _BITUL(X86_EFLAGS_AC_BIT)
-#define X86_EFLAGS_AC_BIT 18 /* Alignment Check/Access Control */
-#define X86_EFLAGS_AC _BITUL(X86_EFLAGS_AC_BIT)
#define X86_EFLAGS_VIF_BIT 19 /* Virtual Interrupt Flag */
#define X86_EFLAGS_VIF _BITUL(X86_EFLAGS_VIF_BIT)
#define X86_EFLAGS_VIP_BIT 20 /* Virtual Interrupt Pending */
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h
index 0e8a973de9ee..d485232f1e9f 100644
--- a/arch/x86/include/uapi/asm/sigcontext.h
+++ b/arch/x86/include/uapi/asm/sigcontext.h
@@ -1,236 +1,360 @@
#ifndef _UAPI_ASM_X86_SIGCONTEXT_H
#define _UAPI_ASM_X86_SIGCONTEXT_H
+/*
+ * Linux signal context definitions. The sigcontext includes a complex
+ * hierarchy of CPU and FPU state, available to user-space (on the stack) when
+ * a signal handler is executed.
+ *
+ * As over the years this ABI grew from its very simple roots towards
+ * supporting more and more CPU state organically, some of the details (which
+ * were rather clever hacks back in the days) became a bit quirky by today.
+ *
+ * The current ABI includes flexible provisions for future extensions, so we
+ * won't have to grow new quirks for quite some time. Promise!
+ */
+
#include <linux/compiler.h>
#include <linux/types.h>
-#define FP_XSTATE_MAGIC1 0x46505853U
-#define FP_XSTATE_MAGIC2 0x46505845U
-#define FP_XSTATE_MAGIC2_SIZE sizeof(FP_XSTATE_MAGIC2)
+#define FP_XSTATE_MAGIC1 0x46505853U
+#define FP_XSTATE_MAGIC2 0x46505845U
+#define FP_XSTATE_MAGIC2_SIZE sizeof(FP_XSTATE_MAGIC2)
/*
- * bytes 464..511 in the current 512byte layout of fxsave/fxrstor frame
- * are reserved for SW usage. On cpu's supporting xsave/xrstor, these bytes
- * are used to extended the fpstate pointer in the sigcontext, which now
- * includes the extended state information along with fpstate information.
+ * Bytes 464..511 in the current 512-byte layout of the FXSAVE/FXRSTOR frame
+ * are reserved for SW usage. On CPUs supporting XSAVE/XRSTOR, these bytes are
+ * used to extend the fpstate pointer in the sigcontext, which now includes the
+ * extended state information along with fpstate information.
*
- * Presence of FP_XSTATE_MAGIC1 at the beginning of this SW reserved
- * area and FP_XSTATE_MAGIC2 at the end of memory layout
- * (extended_size - FP_XSTATE_MAGIC2_SIZE) indicates the presence of the
- * extended state information in the memory layout pointed by the fpstate
- * pointer in sigcontext.
+ * If sw_reserved.magic1 == FP_XSTATE_MAGIC1 then there's a
+ * sw_reserved.extended_size bytes large extended context area present. (The
+ * last 32-bit word of this extended area (at the
+ * fpstate+extended_size-FP_XSTATE_MAGIC2_SIZE address) is set to
+ * FP_XSTATE_MAGIC2 so that you can sanity check your size calculations.)
+ *
+ * This extended area typically grows with newer CPUs that have larger and
+ * larger XSAVE areas.
*/
struct _fpx_sw_bytes {
- __u32 magic1; /* FP_XSTATE_MAGIC1 */
- __u32 extended_size; /* total size of the layout referred by
- * fpstate pointer in the sigcontext.
- */
- __u64 xfeatures;
- /* feature bit mask (including fp/sse/extended
- * state) that is present in the memory
- * layout.
- */
- __u32 xstate_size; /* actual xsave state size, based on the
- * features saved in the layout.
- * 'extended_size' will be greater than
- * 'xstate_size'.
- */
- __u32 padding[7]; /* for future use. */
+ /*
+ * If set to FP_XSTATE_MAGIC1 then this is an xstate context.
+ * 0 if a legacy frame.
+ */
+ __u32 magic1;
+
+ /*
+ * Total size of the fpstate area:
+ *
+ * - if magic1 == 0 then it's sizeof(struct _fpstate)
+ * - if magic1 == FP_XSTATE_MAGIC1 then it's sizeof(struct _xstate)
+ * plus extensions (if any)
+ */
+ __u32 extended_size;
+
+ /*
+ * Feature bit mask (including FP/SSE/extended state) that is present
+ * in the memory layout:
+ */
+ __u64 xfeatures;
+
+ /*
+ * Actual XSAVE state size, based on the xfeatures saved in the layout.
+ * 'extended_size' is greater than 'xstate_size':
+ */
+ __u32 xstate_size;
+
+ /* For future use: */
+ __u32 padding[7];
};
-#ifdef __i386__
/*
- * As documented in the iBCS2 standard..
- *
- * The first part of "struct _fpstate" is just the normal i387
- * hardware setup, the extra "status" word is used to save the
- * coprocessor status word before entering the handler.
+ * As documented in the iBCS2 standard:
*
- * Pentium III FXSR, SSE support
- * Gareth Hughes <gareth@valinux.com>, May 2000
+ * The first part of "struct _fpstate" is just the normal i387 hardware setup,
+ * the extra "status" word is used to save the coprocessor status word before
+ * entering the handler.
*
- * The FPU state data structure has had to grow to accommodate the
- * extended FPU state required by the Streaming SIMD Extensions.
- * There is no documented standard to accomplish this at the moment.
+ * The FPU state data structure has had to grow to accommodate the extended FPU
+ * state required by the Streaming SIMD Extensions. There is no documented
+ * standard to accomplish this at the moment.
*/
+
+/* 10-byte legacy floating point register: */
struct _fpreg {
- unsigned short significand[4];
- unsigned short exponent;
+ __u16 significand[4];
+ __u16 exponent;
};
+/* 16-byte floating point register: */
struct _fpxreg {
- unsigned short significand[4];
- unsigned short exponent;
- unsigned short padding[3];
+ __u16 significand[4];
+ __u16 exponent;
+ __u16 padding[3];
};
+/* 16-byte XMM register: */
struct _xmmreg {
- unsigned long element[4];
+ __u32 element[4];
};
-struct _fpstate {
- /* Regular FPU environment */
- unsigned long cw;
- unsigned long sw;
- unsigned long tag;
- unsigned long ipoff;
- unsigned long cssel;
- unsigned long dataoff;
- unsigned long datasel;
- struct _fpreg _st[8];
- unsigned short status;
- unsigned short magic; /* 0xffff = regular FPU data only */
+#define X86_FXSR_MAGIC 0x0000
+
+/*
+ * The 32-bit FPU frame:
+ */
+struct _fpstate_32 {
+ /* Legacy FPU environment: */
+ __u32 cw;
+ __u32 sw;
+ __u32 tag;
+ __u32 ipoff;
+ __u32 cssel;
+ __u32 dataoff;
+ __u32 datasel;
+ struct _fpreg _st[8];
+ __u16 status;
+ __u16 magic; /* 0xffff: regular FPU data only */
+ /* 0x0000: FXSR FPU data */
/* FXSR FPU environment */
- unsigned long _fxsr_env[6]; /* FXSR FPU env is ignored */
- unsigned long mxcsr;
- unsigned long reserved;
- struct _fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */
- struct _xmmreg _xmm[8];
- unsigned long padding1[44];
+ __u32 _fxsr_env[6]; /* FXSR FPU env is ignored */
+ __u32 mxcsr;
+ __u32 reserved;
+ struct _fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */
+ struct _xmmreg _xmm[8]; /* First 8 XMM registers */
+ union {
+ __u32 padding1[44]; /* Second 8 XMM registers plus padding */
+ __u32 padding[44]; /* Alias name for old user-space */
+ };
union {
- unsigned long padding2[12];
- struct _fpx_sw_bytes sw_reserved; /* represents the extended
- * state info */
+ __u32 padding2[12];
+ struct _fpx_sw_bytes sw_reserved; /* Potential extended state is encoded here */
};
};
-#define X86_FXSR_MAGIC 0x0000
-
-#ifndef __KERNEL__
/*
- * User-space might still rely on the old definition:
+ * The 64-bit FPU frame. (FXSAVE format and later)
+ *
+ * Note1: If sw_reserved.magic1 == FP_XSTATE_MAGIC1 then the structure is
+ * larger: 'struct _xstate'. Note that 'struct _xstate' embedds
+ * 'struct _fpstate' so that you can always assume the _fpstate portion
+ * exists so that you can check the magic value.
+ *
+ * Note2: Reserved fields may someday contain valuable data. Always
+ * save/restore them when you change signal frames.
*/
-struct sigcontext {
- unsigned short gs, __gsh;
- unsigned short fs, __fsh;
- unsigned short es, __esh;
- unsigned short ds, __dsh;
- unsigned long edi;
- unsigned long esi;
- unsigned long ebp;
- unsigned long esp;
- unsigned long ebx;
- unsigned long edx;
- unsigned long ecx;
- unsigned long eax;
- unsigned long trapno;
- unsigned long err;
- unsigned long eip;
- unsigned short cs, __csh;
- unsigned long eflags;
- unsigned long esp_at_signal;
- unsigned short ss, __ssh;
- struct _fpstate __user *fpstate;
- unsigned long oldmask;
- unsigned long cr2;
-};
-#endif /* !__KERNEL__ */
-
-#else /* __i386__ */
-
-/* FXSAVE frame */
-/* Note: reserved1/2 may someday contain valuable data. Always save/restore
- them when you change signal frames. */
-struct _fpstate {
- __u16 cwd;
- __u16 swd;
- __u16 twd; /* Note this is not the same as the
- 32bit/x87/FSAVE twd */
- __u16 fop;
- __u64 rip;
- __u64 rdp;
- __u32 mxcsr;
- __u32 mxcsr_mask;
- __u32 st_space[32]; /* 8*16 bytes for each FP-reg */
- __u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg */
- __u32 reserved2[12];
+struct _fpstate_64 {
+ __u16 cwd;
+ __u16 swd;
+ /* Note this is not the same as the 32-bit/x87/FSAVE twd: */
+ __u16 twd;
+ __u16 fop;
+ __u64 rip;
+ __u64 rdp;
+ __u32 mxcsr;
+ __u32 mxcsr_mask;
+ __u32 st_space[32]; /* 8x FP registers, 16 bytes each */
+ __u32 xmm_space[64]; /* 16x XMM registers, 16 bytes each */
+ __u32 reserved2[12];
union {
- __u32 reserved3[12];
- struct _fpx_sw_bytes sw_reserved; /* represents the extended
- * state information */
+ __u32 reserved3[12];
+ struct _fpx_sw_bytes sw_reserved; /* Potential extended state is encoded here */
};
};
-#ifndef __KERNEL__
+#ifdef __i386__
+# define _fpstate _fpstate_32
+#else
+# define _fpstate _fpstate_64
+#endif
+
+struct _header {
+ __u64 xfeatures;
+ __u64 reserved1[2];
+ __u64 reserved2[5];
+};
+
+struct _ymmh_state {
+ /* 16x YMM registers, 16 bytes each: */
+ __u32 ymmh_space[64];
+};
+
/*
- * User-space might still rely on the old definition:
+ * Extended state pointed to by sigcontext::fpstate.
+ *
+ * In addition to the fpstate, information encoded in _xstate::xstate_hdr
+ * indicates the presence of other extended state information supported
+ * by the CPU and kernel:
*/
-struct sigcontext {
- __u64 r8;
- __u64 r9;
- __u64 r10;
- __u64 r11;
- __u64 r12;
- __u64 r13;
- __u64 r14;
- __u64 r15;
- __u64 rdi;
- __u64 rsi;
- __u64 rbp;
- __u64 rbx;
- __u64 rdx;
- __u64 rax;
- __u64 rcx;
- __u64 rsp;
- __u64 rip;
- __u64 eflags; /* RFLAGS */
- __u16 cs;
+struct _xstate {
+ struct _fpstate fpstate;
+ struct _header xstate_hdr;
+ struct _ymmh_state ymmh;
+ /* New processor state extensions go here: */
+};
+
+/*
+ * The 32-bit signal frame:
+ */
+struct sigcontext_32 {
+ __u16 gs, __gsh;
+ __u16 fs, __fsh;
+ __u16 es, __esh;
+ __u16 ds, __dsh;
+ __u32 di;
+ __u32 si;
+ __u32 bp;
+ __u32 sp;
+ __u32 bx;
+ __u32 dx;
+ __u32 cx;
+ __u32 ax;
+ __u32 trapno;
+ __u32 err;
+ __u32 ip;
+ __u16 cs, __csh;
+ __u32 flags;
+ __u32 sp_at_signal;
+ __u16 ss, __ssh;
/*
- * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"),
- * Linux saved and restored fs and gs in these slots. This
- * was counterproductive, as fsbase and gsbase were never
- * saved, so arch_prctl was presumably unreliable.
- *
- * If these slots are ever needed for any other purpose, there
- * is some risk that very old 64-bit binaries could get
- * confused. I doubt that many such binaries still work,
- * though, since the same patch in 2.5.64 also removed the
- * 64-bit set_thread_area syscall, so it appears that there is
- * no TLS API that works in both pre- and post-2.5.64 kernels.
+ * fpstate is really (struct _fpstate *) or (struct _xstate *)
+ * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved
+ * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end
+ * of extended memory layout. See comments at the definition of
+ * (struct _fpx_sw_bytes)
*/
- __u16 __pad2; /* Was gs. */
- __u16 __pad1; /* Was fs. */
-
- __u16 ss;
- __u64 err;
- __u64 trapno;
- __u64 oldmask;
- __u64 cr2;
- struct _fpstate __user *fpstate; /* zero when no FPU context */
-#ifdef __ILP32__
- __u32 __fpstate_pad;
-#endif
- __u64 reserved1[8];
+ __u32 fpstate; /* Zero when no FPU/extended context */
+ __u32 oldmask;
+ __u32 cr2;
};
-#endif /* !__KERNEL__ */
-#endif /* !__i386__ */
+/*
+ * The 64-bit signal frame:
+ */
+struct sigcontext_64 {
+ __u64 r8;
+ __u64 r9;
+ __u64 r10;
+ __u64 r11;
+ __u64 r12;
+ __u64 r13;
+ __u64 r14;
+ __u64 r15;
+ __u64 di;
+ __u64 si;
+ __u64 bp;
+ __u64 bx;
+ __u64 dx;
+ __u64 ax;
+ __u64 cx;
+ __u64 sp;
+ __u64 ip;
+ __u64 flags;
+ __u16 cs;
+ __u16 gs;
+ __u16 fs;
+ __u16 __pad0;
+ __u64 err;
+ __u64 trapno;
+ __u64 oldmask;
+ __u64 cr2;
-struct _header {
- __u64 xfeatures;
- __u64 reserved1[2];
- __u64 reserved2[5];
+ /*
+ * fpstate is really (struct _fpstate *) or (struct _xstate *)
+ * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved
+ * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end
+ * of extended memory layout. See comments at the definition of
+ * (struct _fpx_sw_bytes)
+ */
+ __u64 fpstate; /* Zero when no FPU/extended context */
+ __u64 reserved1[8];
};
-struct _ymmh_state {
- /* 16 * 16 bytes for each YMMH-reg */
- __u32 ymmh_space[64];
-};
+/*
+ * Create the real 'struct sigcontext' type:
+ */
+#ifdef __KERNEL__
+# ifdef __i386__
+# define sigcontext sigcontext_32
+# else
+# define sigcontext sigcontext_64
+# endif
+#endif
/*
- * Extended state pointed by the fpstate pointer in the sigcontext.
- * In addition to the fpstate, information encoded in the xstate_hdr
- * indicates the presence of other extended state information
- * supported by the processor and OS.
+ * The old user-space sigcontext definition, just in case user-space still
+ * relies on it. The kernel definition (in asm/sigcontext.h) has unified
+ * field names but otherwise the same layout.
*/
-struct _xstate {
- struct _fpstate fpstate;
- struct _header xstate_hdr;
- struct _ymmh_state ymmh;
- /* new processor state extensions go here */
+#ifndef __KERNEL__
+
+#define _fpstate_ia32 _fpstate_32
+#define sigcontext_ia32 sigcontext_32
+
+
+# ifdef __i386__
+struct sigcontext {
+ __u16 gs, __gsh;
+ __u16 fs, __fsh;
+ __u16 es, __esh;
+ __u16 ds, __dsh;
+ __u32 edi;
+ __u32 esi;
+ __u32 ebp;
+ __u32 esp;
+ __u32 ebx;
+ __u32 edx;
+ __u32 ecx;
+ __u32 eax;
+ __u32 trapno;
+ __u32 err;
+ __u32 eip;
+ __u16 cs, __csh;
+ __u32 eflags;
+ __u32 esp_at_signal;
+ __u16 ss, __ssh;
+ struct _fpstate __user *fpstate;
+ __u32 oldmask;
+ __u32 cr2;
+};
+# else /* __x86_64__: */
+struct sigcontext {
+ __u64 r8;
+ __u64 r9;
+ __u64 r10;
+ __u64 r11;
+ __u64 r12;
+ __u64 r13;
+ __u64 r14;
+ __u64 r15;
+ __u64 rdi;
+ __u64 rsi;
+ __u64 rbp;
+ __u64 rbx;
+ __u64 rdx;
+ __u64 rax;
+ __u64 rcx;
+ __u64 rsp;
+ __u64 rip;
+ __u64 eflags; /* RFLAGS */
+ __u16 cs;
+ __u16 gs;
+ __u16 fs;
+ __u16 __pad0;
+ __u64 err;
+ __u64 trapno;
+ __u64 oldmask;
+ __u64 cr2;
+ struct _fpstate __user *fpstate; /* Zero when no FPU context */
+# ifdef __ILP32__
+ __u32 __fpstate_pad;
+# endif
+ __u64 reserved1[8];
};
+# endif /* __x86_64__ */
+#endif /* !__KERNEL__ */
#endif /* _UAPI_ASM_X86_SIGCONTEXT_H */
diff --git a/arch/x86/include/uapi/asm/sigcontext32.h b/arch/x86/include/uapi/asm/sigcontext32.h
index ad1478c4ae12..a92b0f0dc09e 100644
--- a/arch/x86/include/uapi/asm/sigcontext32.h
+++ b/arch/x86/include/uapi/asm/sigcontext32.h
@@ -1,77 +1,8 @@
#ifndef _ASM_X86_SIGCONTEXT32_H
#define _ASM_X86_SIGCONTEXT32_H
-#include <linux/types.h>
+/* This is a legacy file - all the type definitions are in sigcontext.h: */
-/* signal context for 32bit programs. */
-
-#define X86_FXSR_MAGIC 0x0000
-
-struct _fpreg {
- unsigned short significand[4];
- unsigned short exponent;
-};
-
-struct _fpxreg {
- unsigned short significand[4];
- unsigned short exponent;
- unsigned short padding[3];
-};
-
-struct _xmmreg {
- __u32 element[4];
-};
-
-/* FSAVE frame with extensions */
-struct _fpstate_ia32 {
- /* Regular FPU environment */
- __u32 cw;
- __u32 sw;
- __u32 tag; /* not compatible to 64bit twd */
- __u32 ipoff;
- __u32 cssel;
- __u32 dataoff;
- __u32 datasel;
- struct _fpreg _st[8];
- unsigned short status;
- unsigned short magic; /* 0xffff = regular FPU data only */
-
- /* FXSR FPU environment */
- __u32 _fxsr_env[6];
- __u32 mxcsr;
- __u32 reserved;
- struct _fpxreg _fxsr_st[8];
- struct _xmmreg _xmm[8]; /* It's actually 16 */
- __u32 padding[44];
- union {
- __u32 padding2[12];
- struct _fpx_sw_bytes sw_reserved;
- };
-};
-
-struct sigcontext_ia32 {
- unsigned short gs, __gsh;
- unsigned short fs, __fsh;
- unsigned short es, __esh;
- unsigned short ds, __dsh;
- unsigned int di;
- unsigned int si;
- unsigned int bp;
- unsigned int sp;
- unsigned int bx;
- unsigned int dx;
- unsigned int cx;
- unsigned int ax;
- unsigned int trapno;
- unsigned int err;
- unsigned int ip;
- unsigned short cs, __csh;
- unsigned int flags;
- unsigned int sp_at_signal;
- unsigned short ss, __ssh;
- unsigned int fpstate; /* really (struct _fpstate_ia32 *) */
- unsigned int oldmask;
- unsigned int cr2;
-};
+#include <asm/sigcontext.h>
#endif /* _ASM_X86_SIGCONTEXT32_H */
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 1fe92181ee9e..5b15d94a33f8 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -58,6 +58,7 @@
#define EXIT_REASON_INVALID_STATE 33
#define EXIT_REASON_MSR_LOAD_FAIL 34
#define EXIT_REASON_MWAIT_INSTRUCTION 36
+#define EXIT_REASON_MONITOR_TRAP_FLAG 37
#define EXIT_REASON_MONITOR_INSTRUCTION 39
#define EXIT_REASON_PAUSE_INSTRUCTION 40
#define EXIT_REASON_MCE_DURING_VMENTRY 41
@@ -77,6 +78,7 @@
#define EXIT_REASON_PML_FULL 62
#define EXIT_REASON_XSAVES 63
#define EXIT_REASON_XRSTORS 64
+#define EXIT_REASON_PCOMMIT 65
#define VMX_EXIT_REASONS \
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
@@ -106,6 +108,7 @@
{ EXIT_REASON_MSR_READ, "MSR_READ" }, \
{ EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \
{ EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \
+ { EXIT_REASON_MONITOR_TRAP_FLAG, "MONITOR_TRAP_FLAG" }, \
{ EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \
{ EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \
{ EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \
@@ -124,7 +127,8 @@
{ EXIT_REASON_INVVPID, "INVVPID" }, \
{ EXIT_REASON_INVPCID, "INVPCID" }, \
{ EXIT_REASON_XSAVES, "XSAVES" }, \
- { EXIT_REASON_XRSTORS, "XRSTORS" }
+ { EXIT_REASON_XRSTORS, "XRSTORS" }, \
+ { EXIT_REASON_PCOMMIT, "PCOMMIT" }
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4